Asset module

Queries

Set of Asset queries

Source code in kili/queries/asset/__init__.py

class QueriesAsset:
    """
    Set of Asset queries
    """

    # pylint: disable=too-many-arguments,too-many-locals

    def __init__(self, auth):
        """Initialize the subclass.

        Args:
            auth: KiliAuth object
        """
        self.auth = auth

    # pylint: disable=dangerous-default-value
    def assets(
        self,
        project_id: str,
        asset_id: Optional[str] = None,
        skip: int = 0,
        fields: List[str] = [
            "content",
            "createdAt",
            "externalId",
            "id",
            "isHoneypot",
            "jsonMetadata",
            "labels.author.id",
            "labels.author.email",
            "labels.createdAt",
            "labels.id",
            "labels.jsonResponse",
            "skipped",
            "status",
        ],
        asset_id_in: Optional[List[str]] = None,
        consensus_mark_gt: Optional[float] = None,
        consensus_mark_lt: Optional[float] = None,
        disable_tqdm: bool = False,
        external_id_contains: Optional[List[str]] = None,
        first: Optional[int] = None,
        format: Optional[str] = None,  # pylint: disable=redefined-builtin
        honeypot_mark_gt: Optional[float] = None,
        honeypot_mark_lt: Optional[float] = None,
        label_author_in: Optional[List[str]] = None,
        label_consensus_mark_gt: Optional[float] = None,
        label_consensus_mark_lt: Optional[float] = None,
        label_created_at: Optional[str] = None,
        label_created_at_gt: Optional[str] = None,
        label_created_at_lt: Optional[str] = None,
        label_honeypot_mark_gt: Optional[float] = None,
        label_honeypot_mark_lt: Optional[float] = None,
        label_type_in: Optional[List[str]] = None,
        metadata_where: Optional[dict] = None,
        skipped: Optional[bool] = None,
        status_in: Optional[List[str]] = None,
        updated_at_gte: Optional[str] = None,
        updated_at_lte: Optional[str] = None,
        as_generator: bool = False,
        label_category_search: Optional[str] = None,
        download_media: bool = False,
        local_media_dir: Optional[str] = None,
    ) -> Iterable[Dict]:
        # pylint: disable=line-too-long
        """Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.

        Args:
            project_id: Identifier of the project.
            asset_id: Identifier of the asset to retrieve.
            asset_id_in: A list of the IDs of the assets to retrieve.
            skip: Number of assets to skip (they are ordered by their date of creation, first to last).
            fields: All the fields to request among the possible fields for the assets.
                    See [the documentation](https://docs.kili-technology.com/reference/graphql-api#asset) for all possible fields.
            first: Maximum number of assets to return.
            consensus_mark_gt: Minimum amount of consensus for the asset.
            consensus_mark_lt: Maximum amount of consensus for the asset.
            external_id_contains: Returned assets have an external id that belongs to that list, if given.
            metadata_where: Filters by the values of the metadata of the asset.
            honeypot_mark_gt: Minimum amount of honeypot for the asset.
            honeypot_mark_lt : Maximum amount of honeypot for the asset.
            status_in: Returned assets should have a status that belongs to that list, if given.
                Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
            label_type_in: Returned assets should have a label whose type belongs to that list, if given.
            label_author_in: Returned assets should have a label whose status belongs to that list, if given.
            label_consensus_mark_gt: Returned assets should have a label whose consensus is greater than this number.
            label_consensus_mark_lt: Returned assets should have a label whose consensus is lower than this number.
            label_created_at: Returned assets should have a label whose creation date is equal to this date.
            label_created_at_gt: Returned assets should have a label whose creation date is greater than this date.
            label_created_at_lt: Returned assets should have a label whose creation date is lower than this date.
            label_honeypot_mark_gt: Returned assets should have a label whose honeypot is greater than this number
            label_honeypot_mark_lt: Returned assets should have a label whose honeypot is lower than this number
            skipped: Returned assets should be skipped
            updated_at_gte: Returned assets should have a label whose update date is greated or equal to this date.
            updated_at_lte: Returned assets should have a label whose update date is lower or equal to this date.
            format: If equal to 'pandas', returns a pandas DataFrame
            disable_tqdm: If `True`, the progress bar will be disabled
            as_generator: If `True`, a generator on the assets is returned.
            label_category_search: Returned assets should have a label that follows this category search query.
            download_media: Tell is the media have to be downloaded or not.
            local_media_dir: Directory where the media is downloaded if `download_media` is True.

        !!! info "Dates format"
            Date strings should have format: "YYYY-MM-DD"

        Returns:
            A result object which contains the query if it was successful,
                or an error message.

        Example:
            ```
            # returns the assets list of the project
            >>> kili.assets(project_id)
            >>> kili.assets(project_id, asset_id=asset_id)
            # returns a generator of the project assets
            >>> kili.assets(project_id, as_generator=True)
            ```

        !!! example "How to filter based on Metadata"
            - `metadata_where = {key1: "value1"}` to filter on assets whose metadata
                have key "key1" with value "value1"
            - `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
                have key "key1" with value "value1" or value "value2
            - `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
                have key "key2" with a value between 2 and 10.

        !!! example "How to filter based on label categories"
            The search query is composed of logical expressions following this format:

                [job_name].[category_name].count [comparaison_operator] [value]
            where:

            - `[job_name]` is the name of the job in the interface
            - `[category_name]` is the name of the category in the interface for this job
            - `[comparaison_operator]` can be one of: [`==`, `>=`, `<=`, `<`, `>`]
            - `[value]` is an integer that represents the count of such objects of the given category in the label

            These operations can be separated by OR and AND operators

            Example:

                label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
                label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
                label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
        """
        if format == "pandas" and as_generator:
            raise ValueError(
                'Argument values as_generator==True and format=="pandas" are not compatible.'
            )

        saved_args = locals()
        count_args = {
            k: v
            for (k, v) in saved_args.items()
            if k
            not in [
                "skip",
                "first",
                "disable_tqdm",
                "format",
                "fields",
                "self",
                "as_generator",
                "message",
                "download_media",
                "local_media_dir",
            ]
        }

        # using tqdm with a generator is messy, so it is always disabled
        disable_tqdm = disable_tqdm or as_generator
        if label_category_search:
            validate_category_search_query(label_category_search)

        payload_query = {
            "where": {
                "id": asset_id,
                "project": {
                    "id": project_id,
                },
                "externalIdStrictlyIn": external_id_contains,
                "statusIn": status_in,
                "consensusMarkGte": consensus_mark_gt,
                "consensusMarkLte": consensus_mark_lt,
                "honeypotMarkGte": honeypot_mark_gt,
                "honeypotMarkLte": honeypot_mark_lt,
                "idIn": asset_id_in,
                "metadata": metadata_where,
                "label": {
                    "typeIn": label_type_in,
                    "authorIn": label_author_in,
                    "consensusMarkGte": label_consensus_mark_gt,
                    "consensusMarkLte": label_consensus_mark_lt,
                    "createdAt": label_created_at,
                    "createdAtGte": label_created_at_gt,
                    "createdAtLte": label_created_at_lt,
                    "honeypotMarkGte": label_honeypot_mark_gt,
                    "honeypotMarkLte": label_honeypot_mark_lt,
                    "search": label_category_search,
                },
                "skipped": skipped,
                "updatedAtGte": updated_at_gte,
                "updatedAtLte": updated_at_lte,
            },
        }

        post_call_process = get_post_assets_call_process(
            download_media, local_media_dir, project_id
        )

        asset_generator = row_generator_from_paginated_calls(
            skip,
            first,
            self.count_assets,
            count_args,
            self._query_assets,
            payload_query,
            fields,
            disable_tqdm,
            post_call_process,
        )

        if format == "pandas":
            return pd.DataFrame(list(asset_generator))
        if as_generator:
            return asset_generator
        return list(asset_generator)

    def _query_assets(self, skip: int, first: int, payload: dict, fields: List[str]):
        payload.update({"skip": skip, "first": first})
        _gql_assets = gql_assets(fragment_builder(fields, AssetType))
        result = self.auth.client.execute(_gql_assets, payload)
        assets = format_result("data", result, _object=List[Asset])
        return assets

    @typechecked
    def count_assets(
        self,
        project_id: str,
        asset_id: Optional[str] = None,
        asset_id_in: Optional[List[str]] = None,
        external_id_contains: Optional[List[str]] = None,
        metadata_where: Optional[dict] = None,
        status_in: Optional[List[str]] = None,
        consensus_mark_gt: Optional[float] = None,
        consensus_mark_lt: Optional[float] = None,
        honeypot_mark_gt: Optional[float] = None,
        honeypot_mark_lt: Optional[float] = None,
        label_type_in: Optional[List[str]] = None,
        label_author_in: Optional[List[str]] = None,
        label_consensus_mark_gt: Optional[float] = None,
        label_consensus_mark_lt: Optional[float] = None,
        label_created_at: Optional[str] = None,
        label_created_at_gt: Optional[str] = None,
        label_created_at_lt: Optional[str] = None,
        label_honeypot_mark_gt: Optional[float] = None,
        label_honeypot_mark_lt: Optional[float] = None,
        skipped: Optional[bool] = None,
        updated_at_gte: Optional[str] = None,
        updated_at_lte: Optional[str] = None,
        label_category_search: Optional[str] = None,
    ) -> int:
        """Count and return the number of assets with the given constraints.

        Parameters beginning with 'label_' apply to labels, others apply to assets.

        Args:
            project_id: Identifier of the project
            asset_id: The unique id of the asset to retrieve.
            asset_id_in: A list of the ids of the assets to retrieve.
            external_id_contains: Returned assets should have an external id
                that belongs to that list, if given.
            metadata_where: Filters by the values of the metadata of the asset.
            status_in: Returned assets should have a status that belongs to that list, if given.
                Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
            consensus_mark_gt: Minimum amount of consensus for the asset.
            consensus_mark_lt: Maximum amount of consensus for the asset.
            honeypot_mark_gt: Minimum amount of honeypot for the asset.
            honeypot_mark_lt: Maximum amount of consensus for the asset.
            label_type_in: Returned assets should have a label
                whose type belongs to that list, if given.
            label_author_in: Returned assets should have a label
                whose status belongs to that list, if given.
            label_consensus_mark_gt: Returned assets should have a label
                whose consensus is greater than this number.
            label_consensus_mark_lt: Returned assets should have a label
                whose consensus is lower than this number.
            label_created_at: Returned assets should have a label
                whose creation date is equal to this date.
            label_created_at_gt: Returned assets should have a label
                whose creation date is greater than this date.
            label_created_at_lt: Returned assets should have a label
                whose creation date is lower than this date.
            label_honeypot_mark_gt: Returned assets should have a label
                whose honeypot is greater than this number.
            label_honeypot_mark_lt: Returned assets should have a label
                whose honeypot is lower than this number.
            skipped: Returned assets should be skipped
            updated_at_gte: Returned assets should have a label
                whose update date is greated or equal to this date.
            updated_at_lte: Returned assets should have a label
                whose update date is lower or equal to this date.

        !!! info "Dates format"
            Date strings should have format: "YYYY-MM-DD"

        Returns:
            A result object which contains the query if it was successful,
                or an error message.

        Examples:
            >>> kili.count_assets(project_id=project_id)
            250
            >>> kili.count_assets(asset_id=asset_id)
            1

        !!! example "How to filter based on Metadata"
            - `metadata_where = {key1: "value1"}` to filter on assets whose metadata
                have key "key1" with value "value1"
            - `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
                have key "key1" with value "value1" or value "value2
            - `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
                have key "key2" with a value between 2 and 10.
        """
        if label_category_search:
            validate_category_search_query(label_category_search)

        variables = {
            "where": {
                "id": asset_id,
                "project": {
                    "id": project_id,
                },
                "externalIdStrictlyIn": external_id_contains,
                "statusIn": status_in,
                "consensusMarkGte": consensus_mark_gt,
                "consensusMarkLte": consensus_mark_lt,
                "honeypotMarkGte": honeypot_mark_gt,
                "honeypotMarkLte": honeypot_mark_lt,
                "idIn": asset_id_in,
                "metadata": metadata_where,
                "label": {
                    "typeIn": label_type_in,
                    "authorIn": label_author_in,
                    "consensusMarkGte": label_consensus_mark_gt,
                    "consensusMarkLte": label_consensus_mark_lt,
                    "createdAt": label_created_at,
                    "createdAtGte": label_created_at_gt,
                    "createdAtLte": label_created_at_lt,
                    "honeypotMarkGte": label_honeypot_mark_gt,
                    "honeypotMarkLte": label_honeypot_mark_lt,
                    "search": label_category_search,
                },
                "skipped": skipped,
                "updatedAtGte": updated_at_gte,
                "updatedAtLte": updated_at_lte,
            }
        }
        result = self.auth.client.execute(GQL_ASSETS_COUNT, variables)
        return format_result("data", result, int)

assets(self, project_id, asset_id=None, skip=0, fields=['content', 'createdAt', 'externalId', 'id', 'isHoneypot', 'jsonMetadata', 'labels.author.id', 'labels.author.email', 'labels.createdAt', 'labels.id', 'labels.jsonResponse', 'skipped', 'status'], asset_id_in=None, consensus_mark_gt=None, consensus_mark_lt=None, disable_tqdm=False, external_id_contains=None, first=None, format=None, honeypot_mark_gt=None, honeypot_mark_lt=None, label_author_in=None, label_consensus_mark_gt=None, label_consensus_mark_lt=None, label_created_at=None, label_created_at_gt=None, label_created_at_lt=None, label_honeypot_mark_gt=None, label_honeypot_mark_lt=None, label_type_in=None, metadata_where=None, skipped=None, status_in=None, updated_at_gte=None, updated_at_lte=None, as_generator=False, label_category_search=None, download_media=False, local_media_dir=None)

Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.

Parameters:

Name	Type	Description	Default
`project_id`	`str`	Identifier of the project.	required
`asset_id`	`Optional[str]`	Identifier of the asset to retrieve.	`None`
`asset_id_in`	`Optional[List[str]]`	A list of the IDs of the assets to retrieve.	`None`
`skip`	`int`	Number of assets to skip (they are ordered by their date of creation, first to last).	`0`
`fields`	`List[str]`	All the fields to request among the possible fields for the assets. See the documentation for all possible fields.	`['content', 'createdAt', 'externalId', 'id', 'isHoneypot', 'jsonMetadata', 'labels.author.id', 'labels.author.email', 'labels.createdAt', 'labels.id', 'labels.jsonResponse', 'skipped', 'status']`
`first`	`Optional[int]`	Maximum number of assets to return.	`None`
`consensus_mark_gt`	`Optional[float]`	Minimum amount of consensus for the asset.	`None`
`consensus_mark_lt`	`Optional[float]`	Maximum amount of consensus for the asset.	`None`
`external_id_contains`	`Optional[List[str]]`	Returned assets have an external id that belongs to that list, if given.	`None`
`metadata_where`	`Optional[dict]`	Filters by the values of the metadata of the asset.	`None`
`honeypot_mark_gt`	`Optional[float]`	Minimum amount of honeypot for the asset.	`None`
`honeypot_mark_lt`		Maximum amount of honeypot for the asset.	`None`
`status_in`	`Optional[List[str]]`	Returned assets should have a status that belongs to that list, if given. Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`	`None`
`label_type_in`	`Optional[List[str]]`	Returned assets should have a label whose type belongs to that list, if given.	`None`
`label_author_in`	`Optional[List[str]]`	Returned assets should have a label whose status belongs to that list, if given.	`None`
`label_consensus_mark_gt`	`Optional[float]`	Returned assets should have a label whose consensus is greater than this number.	`None`
`label_consensus_mark_lt`	`Optional[float]`	Returned assets should have a label whose consensus is lower than this number.	`None`
`label_created_at`	`Optional[str]`	Returned assets should have a label whose creation date is equal to this date.	`None`
`label_created_at_gt`	`Optional[str]`	Returned assets should have a label whose creation date is greater than this date.	`None`
`label_created_at_lt`	`Optional[str]`	Returned assets should have a label whose creation date is lower than this date.	`None`
`label_honeypot_mark_gt`	`Optional[float]`	Returned assets should have a label whose honeypot is greater than this number	`None`
`label_honeypot_mark_lt`	`Optional[float]`	Returned assets should have a label whose honeypot is lower than this number	`None`
`skipped`	`Optional[bool]`	Returned assets should be skipped	`None`
`updated_at_gte`	`Optional[str]`	Returned assets should have a label whose update date is greated or equal to this date.	`None`
`updated_at_lte`	`Optional[str]`	Returned assets should have a label whose update date is lower or equal to this date.	`None`
`format`	`Optional[str]`	If equal to 'pandas', returns a pandas DataFrame	`None`
`disable_tqdm`	`bool`	If `True`, the progress bar will be disabled	`False`
`as_generator`	`bool`	If `True`, a generator on the assets is returned.	`False`
`label_category_search`	`Optional[str]`	Returned assets should have a label that follows this category search query.	`None`
`download_media`	`bool`	Tell is the media have to be downloaded or not.	`False`
`local_media_dir`	`Optional[str]`	Directory where the media is downloaded if `download_media` is True.	`None`

Dates format

Date strings should have format: "YYYY-MM-DD"

Returns:

Type	Description
`Iterable[Dict]`	A result object which contains the query if it was successful, or an error message.

Examples:

# returns the assets list of the project
>>> kili.assets(project_id)
>>> kili.assets(project_id, asset_id=asset_id)
# returns a generator of the project assets
>>> kili.assets(project_id, as_generator=True)

How to filter based on Metadata

metadata_where = {key1: "value1"} to filter on assets whose metadata have key "key1" with value "value1"
metadata_where = {key1: ["value1", "value2"]} to filter on assets whose metadata have key "key1" with value "value1" or value "value2
metadata_where = {key2: [2, 10]} to filter on assets whose metadata have key "key2" with a value between 2 and 10.

How to filter based on label categories

The search query is composed of logical expressions following this format:

[job_name].[category_name].count [comparaison_operator] [value]

where:

[job_name] is the name of the job in the interface
[category_name] is the name of the category in the interface for this job
[comparaison_operator] can be one of: [==, >=, <=, <, >]
[value] is an integer that represents the count of such objects of the given category in the label

These operations can be separated by OR and AND operators

Example:

label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`

Source code in kili/queries/asset/__init__.py

def assets(
    self,
    project_id: str,
    asset_id: Optional[str] = None,
    skip: int = 0,
    fields: List[str] = [
        "content",
        "createdAt",
        "externalId",
        "id",
        "isHoneypot",
        "jsonMetadata",
        "labels.author.id",
        "labels.author.email",
        "labels.createdAt",
        "labels.id",
        "labels.jsonResponse",
        "skipped",
        "status",
    ],
    asset_id_in: Optional[List[str]] = None,
    consensus_mark_gt: Optional[float] = None,
    consensus_mark_lt: Optional[float] = None,
    disable_tqdm: bool = False,
    external_id_contains: Optional[List[str]] = None,
    first: Optional[int] = None,
    format: Optional[str] = None,  # pylint: disable=redefined-builtin
    honeypot_mark_gt: Optional[float] = None,
    honeypot_mark_lt: Optional[float] = None,
    label_author_in: Optional[List[str]] = None,
    label_consensus_mark_gt: Optional[float] = None,
    label_consensus_mark_lt: Optional[float] = None,
    label_created_at: Optional[str] = None,
    label_created_at_gt: Optional[str] = None,
    label_created_at_lt: Optional[str] = None,
    label_honeypot_mark_gt: Optional[float] = None,
    label_honeypot_mark_lt: Optional[float] = None,
    label_type_in: Optional[List[str]] = None,
    metadata_where: Optional[dict] = None,
    skipped: Optional[bool] = None,
    status_in: Optional[List[str]] = None,
    updated_at_gte: Optional[str] = None,
    updated_at_lte: Optional[str] = None,
    as_generator: bool = False,
    label_category_search: Optional[str] = None,
    download_media: bool = False,
    local_media_dir: Optional[str] = None,
) -> Iterable[Dict]:
    # pylint: disable=line-too-long
    """Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.

    Args:
        project_id: Identifier of the project.
        asset_id: Identifier of the asset to retrieve.
        asset_id_in: A list of the IDs of the assets to retrieve.
        skip: Number of assets to skip (they are ordered by their date of creation, first to last).
        fields: All the fields to request among the possible fields for the assets.
                See [the documentation](https://docs.kili-technology.com/reference/graphql-api#asset) for all possible fields.
        first: Maximum number of assets to return.
        consensus_mark_gt: Minimum amount of consensus for the asset.
        consensus_mark_lt: Maximum amount of consensus for the asset.
        external_id_contains: Returned assets have an external id that belongs to that list, if given.
        metadata_where: Filters by the values of the metadata of the asset.
        honeypot_mark_gt: Minimum amount of honeypot for the asset.
        honeypot_mark_lt : Maximum amount of honeypot for the asset.
        status_in: Returned assets should have a status that belongs to that list, if given.
            Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
        label_type_in: Returned assets should have a label whose type belongs to that list, if given.
        label_author_in: Returned assets should have a label whose status belongs to that list, if given.
        label_consensus_mark_gt: Returned assets should have a label whose consensus is greater than this number.
        label_consensus_mark_lt: Returned assets should have a label whose consensus is lower than this number.
        label_created_at: Returned assets should have a label whose creation date is equal to this date.
        label_created_at_gt: Returned assets should have a label whose creation date is greater than this date.
        label_created_at_lt: Returned assets should have a label whose creation date is lower than this date.
        label_honeypot_mark_gt: Returned assets should have a label whose honeypot is greater than this number
        label_honeypot_mark_lt: Returned assets should have a label whose honeypot is lower than this number
        skipped: Returned assets should be skipped
        updated_at_gte: Returned assets should have a label whose update date is greated or equal to this date.
        updated_at_lte: Returned assets should have a label whose update date is lower or equal to this date.
        format: If equal to 'pandas', returns a pandas DataFrame
        disable_tqdm: If `True`, the progress bar will be disabled
        as_generator: If `True`, a generator on the assets is returned.
        label_category_search: Returned assets should have a label that follows this category search query.
        download_media: Tell is the media have to be downloaded or not.
        local_media_dir: Directory where the media is downloaded if `download_media` is True.

    !!! info "Dates format"
        Date strings should have format: "YYYY-MM-DD"

    Returns:
        A result object which contains the query if it was successful,
            or an error message.

    Example:
        ```
        # returns the assets list of the project
        >>> kili.assets(project_id)
        >>> kili.assets(project_id, asset_id=asset_id)
        # returns a generator of the project assets
        >>> kili.assets(project_id, as_generator=True)
        ```

    !!! example "How to filter based on Metadata"
        - `metadata_where = {key1: "value1"}` to filter on assets whose metadata
            have key "key1" with value "value1"
        - `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
            have key "key1" with value "value1" or value "value2
        - `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
            have key "key2" with a value between 2 and 10.

    !!! example "How to filter based on label categories"
        The search query is composed of logical expressions following this format:

            [job_name].[category_name].count [comparaison_operator] [value]
        where:

        - `[job_name]` is the name of the job in the interface
        - `[category_name]` is the name of the category in the interface for this job
        - `[comparaison_operator]` can be one of: [`==`, `>=`, `<=`, `<`, `>`]
        - `[value]` is an integer that represents the count of such objects of the given category in the label

        These operations can be separated by OR and AND operators

        Example:

            label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
            label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
            label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
    """
    if format == "pandas" and as_generator:
        raise ValueError(
            'Argument values as_generator==True and format=="pandas" are not compatible.'
        )

    saved_args = locals()
    count_args = {
        k: v
        for (k, v) in saved_args.items()
        if k
        not in [
            "skip",
            "first",
            "disable_tqdm",
            "format",
            "fields",
            "self",
            "as_generator",
            "message",
            "download_media",
            "local_media_dir",
        ]
    }

    # using tqdm with a generator is messy, so it is always disabled
    disable_tqdm = disable_tqdm or as_generator
    if label_category_search:
        validate_category_search_query(label_category_search)

    payload_query = {
        "where": {
            "id": asset_id,
            "project": {
                "id": project_id,
            },
            "externalIdStrictlyIn": external_id_contains,
            "statusIn": status_in,
            "consensusMarkGte": consensus_mark_gt,
            "consensusMarkLte": consensus_mark_lt,
            "honeypotMarkGte": honeypot_mark_gt,
            "honeypotMarkLte": honeypot_mark_lt,
            "idIn": asset_id_in,
            "metadata": metadata_where,
            "label": {
                "typeIn": label_type_in,
                "authorIn": label_author_in,
                "consensusMarkGte": label_consensus_mark_gt,
                "consensusMarkLte": label_consensus_mark_lt,
                "createdAt": label_created_at,
                "createdAtGte": label_created_at_gt,
                "createdAtLte": label_created_at_lt,
                "honeypotMarkGte": label_honeypot_mark_gt,
                "honeypotMarkLte": label_honeypot_mark_lt,
                "search": label_category_search,
            },
            "skipped": skipped,
            "updatedAtGte": updated_at_gte,
            "updatedAtLte": updated_at_lte,
        },
    }

    post_call_process = get_post_assets_call_process(
        download_media, local_media_dir, project_id
    )

    asset_generator = row_generator_from_paginated_calls(
        skip,
        first,
        self.count_assets,
        count_args,
        self._query_assets,
        payload_query,
        fields,
        disable_tqdm,
        post_call_process,
    )

    if format == "pandas":
        return pd.DataFrame(list(asset_generator))
    if as_generator:
        return asset_generator
    return list(asset_generator)

count_assets(self, project_id, asset_id=None, asset_id_in=None, external_id_contains=None, metadata_where=None, status_in=None, consensus_mark_gt=None, consensus_mark_lt=None, honeypot_mark_gt=None, honeypot_mark_lt=None, label_type_in=None, label_author_in=None, label_consensus_mark_gt=None, label_consensus_mark_lt=None, label_created_at=None, label_created_at_gt=None, label_created_at_lt=None, label_honeypot_mark_gt=None, label_honeypot_mark_lt=None, skipped=None, updated_at_gte=None, updated_at_lte=None, label_category_search=None)

Count and return the number of assets with the given constraints.

Parameters beginning with 'label_' apply to labels, others apply to assets.

Parameters:

Name	Type	Description	Default
`project_id`	`str`	Identifier of the project	required
`asset_id`	`Optional[str]`	The unique id of the asset to retrieve.	`None`
`asset_id_in`	`Optional[List[str]]`	A list of the ids of the assets to retrieve.	`None`
`external_id_contains`	`Optional[List[str]]`	Returned assets should have an external id that belongs to that list, if given.	`None`
`metadata_where`	`Optional[dict]`	Filters by the values of the metadata of the asset.	`None`
`status_in`	`Optional[List[str]]`	Returned assets should have a status that belongs to that list, if given. Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`	`None`
`consensus_mark_gt`	`Optional[float]`	Minimum amount of consensus for the asset.	`None`
`consensus_mark_lt`	`Optional[float]`	Maximum amount of consensus for the asset.	`None`
`honeypot_mark_gt`	`Optional[float]`	Minimum amount of honeypot for the asset.	`None`
`honeypot_mark_lt`	`Optional[float]`	Maximum amount of consensus for the asset.	`None`
`label_type_in`	`Optional[List[str]]`	Returned assets should have a label whose type belongs to that list, if given.	`None`
`label_author_in`	`Optional[List[str]]`	Returned assets should have a label whose status belongs to that list, if given.	`None`
`label_consensus_mark_gt`	`Optional[float]`	Returned assets should have a label whose consensus is greater than this number.	`None`
`label_consensus_mark_lt`	`Optional[float]`	Returned assets should have a label whose consensus is lower than this number.	`None`
`label_created_at`	`Optional[str]`	Returned assets should have a label whose creation date is equal to this date.	`None`
`label_created_at_gt`	`Optional[str]`	Returned assets should have a label whose creation date is greater than this date.	`None`
`label_created_at_lt`	`Optional[str]`	Returned assets should have a label whose creation date is lower than this date.	`None`
`label_honeypot_mark_gt`	`Optional[float]`	Returned assets should have a label whose honeypot is greater than this number.	`None`
`label_honeypot_mark_lt`	`Optional[float]`	Returned assets should have a label whose honeypot is lower than this number.	`None`
`skipped`	`Optional[bool]`	Returned assets should be skipped	`None`
`updated_at_gte`	`Optional[str]`	Returned assets should have a label whose update date is greated or equal to this date.	`None`
`updated_at_lte`	`Optional[str]`	Returned assets should have a label whose update date is lower or equal to this date.	`None`

Dates format

Date strings should have format: "YYYY-MM-DD"

Returns:

Type	Description
`int`	A result object which contains the query if it was successful, or an error message.

Examples:

>>> kili.count_assets(project_id=project_id)
250
>>> kili.count_assets(asset_id=asset_id)
1

How to filter based on Metadata

metadata_where = {key1: "value1"} to filter on assets whose metadata have key "key1" with value "value1"
metadata_where = {key1: ["value1", "value2"]} to filter on assets whose metadata have key "key1" with value "value1" or value "value2
metadata_where = {key2: [2, 10]} to filter on assets whose metadata have key "key2" with a value between 2 and 10.

Source code in kili/queries/asset/__init__.py

@typechecked
def count_assets(
    self,
    project_id: str,
    asset_id: Optional[str] = None,
    asset_id_in: Optional[List[str]] = None,
    external_id_contains: Optional[List[str]] = None,
    metadata_where: Optional[dict] = None,
    status_in: Optional[List[str]] = None,
    consensus_mark_gt: Optional[float] = None,
    consensus_mark_lt: Optional[float] = None,
    honeypot_mark_gt: Optional[float] = None,
    honeypot_mark_lt: Optional[float] = None,
    label_type_in: Optional[List[str]] = None,
    label_author_in: Optional[List[str]] = None,
    label_consensus_mark_gt: Optional[float] = None,
    label_consensus_mark_lt: Optional[float] = None,
    label_created_at: Optional[str] = None,
    label_created_at_gt: Optional[str] = None,
    label_created_at_lt: Optional[str] = None,
    label_honeypot_mark_gt: Optional[float] = None,
    label_honeypot_mark_lt: Optional[float] = None,
    skipped: Optional[bool] = None,
    updated_at_gte: Optional[str] = None,
    updated_at_lte: Optional[str] = None,
    label_category_search: Optional[str] = None,
) -> int:
    """Count and return the number of assets with the given constraints.

    Parameters beginning with 'label_' apply to labels, others apply to assets.

    Args:
        project_id: Identifier of the project
        asset_id: The unique id of the asset to retrieve.
        asset_id_in: A list of the ids of the assets to retrieve.
        external_id_contains: Returned assets should have an external id
            that belongs to that list, if given.
        metadata_where: Filters by the values of the metadata of the asset.
        status_in: Returned assets should have a status that belongs to that list, if given.
            Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
        consensus_mark_gt: Minimum amount of consensus for the asset.
        consensus_mark_lt: Maximum amount of consensus for the asset.
        honeypot_mark_gt: Minimum amount of honeypot for the asset.
        honeypot_mark_lt: Maximum amount of consensus for the asset.
        label_type_in: Returned assets should have a label
            whose type belongs to that list, if given.
        label_author_in: Returned assets should have a label
            whose status belongs to that list, if given.
        label_consensus_mark_gt: Returned assets should have a label
            whose consensus is greater than this number.
        label_consensus_mark_lt: Returned assets should have a label
            whose consensus is lower than this number.
        label_created_at: Returned assets should have a label
            whose creation date is equal to this date.
        label_created_at_gt: Returned assets should have a label
            whose creation date is greater than this date.
        label_created_at_lt: Returned assets should have a label
            whose creation date is lower than this date.
        label_honeypot_mark_gt: Returned assets should have a label
            whose honeypot is greater than this number.
        label_honeypot_mark_lt: Returned assets should have a label
            whose honeypot is lower than this number.
        skipped: Returned assets should be skipped
        updated_at_gte: Returned assets should have a label
            whose update date is greated or equal to this date.
        updated_at_lte: Returned assets should have a label
            whose update date is lower or equal to this date.

    !!! info "Dates format"
        Date strings should have format: "YYYY-MM-DD"

    Returns:
        A result object which contains the query if it was successful,
            or an error message.

    Examples:
        >>> kili.count_assets(project_id=project_id)
        250
        >>> kili.count_assets(asset_id=asset_id)
        1

    !!! example "How to filter based on Metadata"
        - `metadata_where = {key1: "value1"}` to filter on assets whose metadata
            have key "key1" with value "value1"
        - `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
            have key "key1" with value "value1" or value "value2
        - `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
            have key "key2" with a value between 2 and 10.
    """
    if label_category_search:
        validate_category_search_query(label_category_search)

    variables = {
        "where": {
            "id": asset_id,
            "project": {
                "id": project_id,
            },
            "externalIdStrictlyIn": external_id_contains,
            "statusIn": status_in,
            "consensusMarkGte": consensus_mark_gt,
            "consensusMarkLte": consensus_mark_lt,
            "honeypotMarkGte": honeypot_mark_gt,
            "honeypotMarkLte": honeypot_mark_lt,
            "idIn": asset_id_in,
            "metadata": metadata_where,
            "label": {
                "typeIn": label_type_in,
                "authorIn": label_author_in,
                "consensusMarkGte": label_consensus_mark_gt,
                "consensusMarkLte": label_consensus_mark_lt,
                "createdAt": label_created_at,
                "createdAtGte": label_created_at_gt,
                "createdAtLte": label_created_at_lt,
                "honeypotMarkGte": label_honeypot_mark_gt,
                "honeypotMarkLte": label_honeypot_mark_lt,
                "search": label_category_search,
            },
            "skipped": skipped,
            "updatedAtGte": updated_at_gte,
            "updatedAtLte": updated_at_lte,
        }
    }
    result = self.auth.client.execute(GQL_ASSETS_COUNT, variables)
    return format_result("data", result, int)

Mutations

Set of Asset mutations

Source code in kili/mutations/asset/__init__.py

class MutationsAsset:
    """
    Set of Asset mutations
    """

    # pylint: disable=too-many-arguments,too-many-locals

    def __init__(self, auth):
        """Initialize the subclass.

        Args:
            auth: KiliAuth object
        """
        self.auth = auth

    @typechecked
    def append_many_to_dataset(
        self,
        project_id: str,
        content_array: Optional[List[str]] = None,
        external_id_array: Optional[List[str]] = None,
        id_array: Optional[List[str]] = None,
        is_honeypot_array: Optional[List[bool]] = None,
        status_array: Optional[List[str]] = None,
        json_content_array: Optional[List[List[Union[dict, str]]]] = None,
        json_metadata_array: Optional[List[dict]] = None,
        disable_tqdm: bool = False,
    ) -> Dict[str, str]:
        # pylint: disable=line-too-long
        """Append assets to a project.

        Args:
            project_id: Identifier of the project
            content_array: List of elements added to the assets of the project
                Must not be None except if you provide json_content_array.

                - For a `TEXT` project, the content can be either raw text, or URLs to TEXT assets.
                - For an `IMAGE` / `PDF` project, the content can be either URLs or paths to existing
                    images/pdf on your computer.
                - For a VIDEO project, the content can be either URLs pointing to videos hosted on a web server or paths to
                existing video files on your computer. If you want to import video from frames, look at the json_content
                section below.
                - For an `VIDEO_LEGACY` project, the content can be only be URLs
            external_id_array: List of external ids given to identify the assets.
                If None, random identifiers are created.
            is_honeypot_array:  Whether to use the asset for honeypot
            status_array: By default, all imported assets are set to `TODO`. Other options:
                `ONGOING`, `LABELED`, `REVIEWED`.
            json_content_array: Useful for `VIDEO` or `TEXT` projects only.

                - For `VIDEO` projects, each element is a sequence of frames, i.e. a
                    list of URLs to images or a list of paths to images.
                - For `TEXT` projects, each element is a json_content dict,
                    formatted according to documentation [on how to import
                rich-text assets](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb)
            json_metadata_array: The metadata given to each asset should be stored in a json like dict with keys.

                - Add metadata visible on the asset with the following keys: `imageUrl`, `text`, `url`.
                    Example for one asset: `json_metadata_array = [{'imageUrl': '','text': '','url': ''}]`.
                - For VIDEO projects (and not VIDEO_LEGACY), you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30).
                    Example for one asset: `json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}]`.
            disable_tqdm: If `True`, the progress bar will be disabled

        Returns:
            A result object which indicates if the mutation was successful, or an error message.

        Examples:
            >>> kili.append_many_to_dataset(
                    project_id=project_id,
                    content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])

        !!! example "Recipe"
            - For more detailed examples on how to import assets,
                see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_assets.ipynb)
                or [other examples](https://docs.kili-technology.com/recipes/importing-data) in our documentation.
            - For more detailed examples on how to import text assets,
                see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb).
        """

        if content_array is None and json_content_array is None:
            raise ValueError("Variables content_array and json_content_array cannot be both None.")
        nb_data = (
            len(content_array)
            if content_array is not None
            else len(json_content_array)  # type:ignore
        )
        field_mapping = {
            "content": content_array,
            "json_content": json_content_array,
            "external_id": external_id_array,
            "id": id_array,
            "status": status_array,
            "json_metadata": json_metadata_array,
            "is_honeypot": is_honeypot_array,
        }
        assets = [{}] * nb_data
        for key, value in field_mapping.items():
            if value is not None:
                assets = [{**assets[i], key: value[i]} for i in range(nb_data)]
        result = import_assets(
            self.auth, project_id=project_id, assets=assets, disable_tqdm=disable_tqdm
        )
        return result

    @typechecked
    # pylint: disable=unused-argument
    def update_properties_in_assets(
        self,
        asset_ids: List[str],
        external_ids: Optional[List[str]] = None,
        priorities: Optional[List[int]] = None,
        json_metadatas: Optional[List[Union[dict, str]]] = None,
        consensus_marks: Optional[List[float]] = None,
        honeypot_marks: Optional[List[float]] = None,
        to_be_labeled_by_array: Optional[List[List[str]]] = None,
        contents: Optional[List[str]] = None,
        json_contents: Optional[List[str]] = None,
        status_array: Optional[List[str]] = None,
        is_used_for_consensus_array: Optional[List[bool]] = None,
        is_honeypot_array: Optional[List[bool]] = None,
    ) -> List[dict]:
        """Update the properties of one or more assets.

        Args:
            asset_ids : The asset IDs to modify
            external_ids: Change the external id of the assets
            priorities : You can change the priority of the assets
                By default, all assets have a priority of 0.
            json_metadatas: The metadata given to an asset should be stored
                in a json like dict with keys `imageUrl`, `text`, `url`:
                `json_metadata = {'imageUrl': '','text': '','url': ''}`
            consensus_marks: Should be between 0 and 1
            honeypot_marks: Should be between 0 and 1
            to_be_labeled_by_array: If given, each element of the list should contain the emails of
                the labelers authorized to label the asset.
            contents: - For a NLP project, the content can be directly in text format
                - For an Image / Video / Pdf project, the content must be hosted on a web server,
                and you point Kili to your data by giving the URLs
            json_contents: - For a NLP project, the `json_content`
                is a text formatted using RichText
                - For a Video project, the`json_content` is a json containg urls pointing
                    to each frame of the video.
            status_array: Each element should be in `TODO`, `ONGOING`, `LABELED`, `REVIEWED`
            is_used_for_consensus_array: Whether to use the asset to compute consensus kpis or not
            is_honeypot_array: Whether to use the asset for honeypot

        Returns:
            A result object which indicates if the mutation was successful,
                or an error message.

        Examples:
            >>> kili.update_properties_in_assets(
                    asset_ids=["ckg22d81r0jrg0885unmuswj8",
                        "ckg22d81s0jrh0885pdxfd03n"],
                    consensus_marks=[1, 0.7],
                    contents=[None, 'https://to/second/asset.png'],
                    external_ids=['external-id-of-your-choice-1',
                        'external-id-of-your-choice-2'],
                    honeypot_marks=[0.8, 0.5],
                    is_honeypot_array=[True, True],
                    is_used_for_consensus_array=[True, False],
                    priorities=[None, 2],
                    status_array=['LABELED', 'REVIEWED'],
                    to_be_labeled_by_array=[
                        ['test+pierre@kili-technology.com'], None],
            )
        """

        saved_args = locals()
        parameters = {
            k: v
            for (k, v) in saved_args.items()
            if k
            in [
                "asset_ids",
                "external_ids",
                "priorities",
                "json_metadatas",
                "consensus_marks",
                "honeypot_marks",
                "to_be_labeled_by_array",
                "contents",
                "json_contents",
                "status_array",
                "is_used_for_consensus_array",
                "is_honeypot_array",
            ]
        }
        properties_to_batch = process_update_properties_in_assets_parameters(parameters)

        def generate_variables(batch: Dict) -> Dict:
            data = {
                "externalId": batch["external_ids"],
                "priority": batch["priorities"],
                "jsonMetadata": batch["json_metadatas"],
                "consensusMark": batch["consensus_marks"],
                "honeypotMark": batch["honeypot_marks"],
                "toBeLabeledBy": batch["to_be_labeled_by_array"],
                "shouldResetToBeLabeledBy": batch["should_reset_to_be_labeled_by_array"],
                "content": batch["contents"],
                "jsonContent": batch["json_contents"],
                "status": batch["status_array"],
                "isUsedForConsensus": batch["is_used_for_consensus_array"],
                "isHoneypot": batch["is_honeypot_array"],
            }
            data_array = [dict(zip(data, t)) for t in zip(*data.values())]
            return {
                "whereArray": [{"id": asset_id} for asset_id in batch["asset_ids"]],
                "dataArray": data_array,
            }

        results = _mutate_from_paginated_call(
            self,
            properties_to_batch,
            generate_variables,
            GQL_UPDATE_PROPERTIES_IN_ASSETS,
        )
        formated_results = [format_result("data", result, Asset) for result in results]
        return [item for batch_list in formated_results for item in batch_list]

    @typechecked
    def delete_many_from_dataset(self, asset_ids: List[str]) -> Asset:
        """Delete assets from a project.

        Args:
            asset_ids: The list of identifiers of the assets to delete.

        Returns:
            A result object which indicates if the mutation was successful,
                or an error message.
        """
        properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}

        def generate_variables(batch):
            return {"where": {"idIn": batch["asset_ids"]}}

        results = _mutate_from_paginated_call(
            self, properties_to_batch, generate_variables, GQL_DELETE_MANY_FROM_DATASET
        )
        return format_result("data", results[0], Asset)

    @typechecked
    def add_to_review(self, asset_ids: List[str]) -> Dict:
        """Add assets to review.

        !!! warning
            Assets without any label will be ignored.

        Args:
            asset_ids: The asset IDs to add to review

        Returns:
            A result object which indicates if the mutation was successful,
                or an error message.

        Examples:
            >>> kili.add_to_review(
                    asset_ids=[
                        "ckg22d81r0jrg0885unmuswj8",
                        "ckg22d81s0jrh0885pdxfd03n"
                        ],
        """
        properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}

        def generate_variables(batch):
            return {"where": {"idIn": batch["asset_ids"]}}

        results = _mutate_from_paginated_call(
            self,
            properties_to_batch,
            generate_variables,
            GQL_ADD_ALL_LABELED_ASSETS_TO_REVIEW,
        )
        return format_result("data", results[0])

    @typechecked
    def send_back_to_queue(self, asset_ids: List[str]) -> Dict[str, str]:
        """Send assets back to queue.

        Args:
            asset_ids: The asset IDs to add to review

        Returns:
            A result object which indicates if the mutation was successful,
                or an error message.

        Examples:
            >>> kili.send_back_to_queue(
                    asset_ids=[
                        "ckg22d81r0jrg0885unmuswj8",
                        "ckg22d81s0jrh0885pdxfd03n"
                        ],
        """
        properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}

        def generate_variables(batch):
            return {"where": {"idIn": batch["asset_ids"]}}

        results = _mutate_from_paginated_call(
            self, properties_to_batch, generate_variables, GQL_SEND_BACK_ASSETS_TO_QUEUE
        )
        return format_result("data", results[0])

`add_to_review(self, asset_ids)`

Add assets to review.

Warning

Assets without any label will be ignored.

Parameters:

Name	Type	Description	Default
`asset_ids`	`List[str]`	The asset IDs to add to review	required

Returns:

Type	Description
`Dict`	A result object which indicates if the mutation was successful, or an error message.

Examples:

>>> kili.add_to_review(
        asset_ids=[
            "ckg22d81r0jrg0885unmuswj8",
            "ckg22d81s0jrh0885pdxfd03n"
            ],

Source code in kili/mutations/asset/__init__.py

@typechecked
def add_to_review(self, asset_ids: List[str]) -> Dict:
    """Add assets to review.

    !!! warning
        Assets without any label will be ignored.

    Args:
        asset_ids: The asset IDs to add to review

    Returns:
        A result object which indicates if the mutation was successful,
            or an error message.

    Examples:
        >>> kili.add_to_review(
                asset_ids=[
                    "ckg22d81r0jrg0885unmuswj8",
                    "ckg22d81s0jrh0885pdxfd03n"
                    ],
    """
    properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}

    def generate_variables(batch):
        return {"where": {"idIn": batch["asset_ids"]}}

    results = _mutate_from_paginated_call(
        self,
        properties_to_batch,
        generate_variables,
        GQL_ADD_ALL_LABELED_ASSETS_TO_REVIEW,
    )
    return format_result("data", results[0])

`append_many_to_dataset(self, project_id, content_array=None, external_id_array=None, id_array=None, is_honeypot_array=None, status_array=None, json_content_array=None, json_metadata_array=None, disable_tqdm=False)`

Append assets to a project.

Parameters:

Name	Type	Description	Default
`project_id`	`str`	Identifier of the project	required
`content_array`	`Optional[List[str]]`	List of elements added to the assets of the project Must not be None except if you provide json_content_array. For a `TEXT` project, the content can be either raw text, or URLs to TEXT assets. For an `IMAGE` / `PDF` project, the content can be either URLs or paths to existing images/pdf on your computer. For a VIDEO project, the content can be either URLs pointing to videos hosted on a web server or paths to existing video files on your computer. If you want to import video from frames, look at the json_content section below. For an `VIDEO_LEGACY` project, the content can be only be URLs	`None`
`external_id_array`	`Optional[List[str]]`	List of external ids given to identify the assets. If None, random identifiers are created.	`None`
`is_honeypot_array`	`Optional[List[bool]]`	Whether to use the asset for honeypot	`None`
`status_array`	`Optional[List[str]]`	By default, all imported assets are set to `TODO`. Other options: `ONGOING`, `LABELED`, `REVIEWED`.	`None`
`json_content_array`	`Optional[List[List[Union[dict, str]]]]`	Useful for `VIDEO` or `TEXT` projects only. For `VIDEO` projects, each element is a sequence of frames, i.e. a list of URLs to images or a list of paths to images. For `TEXT` projects, each element is a json_content dict, formatted according to documentation on how to import rich-text assets	`None`
`json_metadata_array`	`Optional[List[dict]]`	The metadata given to each asset should be stored in a json like dict with keys. Add metadata visible on the asset with the following keys: `imageUrl`, `text`, `url`. Example for one asset: `json_metadata_array = [{'imageUrl': '','text': '','url': ''}]`. For VIDEO projects (and not VIDEO_LEGACY), you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30). Example for one asset: `json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}]`.	`None`
`disable_tqdm`	`bool`	If `True`, the progress bar will be disabled	`False`

Returns:

Type	Description
`Dict[str, str]`	A result object which indicates if the mutation was successful, or an error message.

Examples:

>>> kili.append_many_to_dataset(
        project_id=project_id,
        content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])

Recipe

For more detailed examples on how to import assets, see the recipe or other examples in our documentation.
For more detailed examples on how to import text assets, see the recipe.

Source code in kili/mutations/asset/__init__.py

@typechecked
def append_many_to_dataset(
    self,
    project_id: str,
    content_array: Optional[List[str]] = None,
    external_id_array: Optional[List[str]] = None,
    id_array: Optional[List[str]] = None,
    is_honeypot_array: Optional[List[bool]] = None,
    status_array: Optional[List[str]] = None,
    json_content_array: Optional[List[List[Union[dict, str]]]] = None,
    json_metadata_array: Optional[List[dict]] = None,
    disable_tqdm: bool = False,
) -> Dict[str, str]:
    # pylint: disable=line-too-long
    """Append assets to a project.

    Args:
        project_id: Identifier of the project
        content_array: List of elements added to the assets of the project
            Must not be None except if you provide json_content_array.

            - For a `TEXT` project, the content can be either raw text, or URLs to TEXT assets.
            - For an `IMAGE` / `PDF` project, the content can be either URLs or paths to existing
                images/pdf on your computer.
            - For a VIDEO project, the content can be either URLs pointing to videos hosted on a web server or paths to
            existing video files on your computer. If you want to import video from frames, look at the json_content
            section below.
            - For an `VIDEO_LEGACY` project, the content can be only be URLs
        external_id_array: List of external ids given to identify the assets.
            If None, random identifiers are created.
        is_honeypot_array:  Whether to use the asset for honeypot
        status_array: By default, all imported assets are set to `TODO`. Other options:
            `ONGOING`, `LABELED`, `REVIEWED`.
        json_content_array: Useful for `VIDEO` or `TEXT` projects only.

            - For `VIDEO` projects, each element is a sequence of frames, i.e. a
                list of URLs to images or a list of paths to images.
            - For `TEXT` projects, each element is a json_content dict,
                formatted according to documentation [on how to import
            rich-text assets](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb)
        json_metadata_array: The metadata given to each asset should be stored in a json like dict with keys.

            - Add metadata visible on the asset with the following keys: `imageUrl`, `text`, `url`.
                Example for one asset: `json_metadata_array = [{'imageUrl': '','text': '','url': ''}]`.
            - For VIDEO projects (and not VIDEO_LEGACY), you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30).
                Example for one asset: `json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}]`.
        disable_tqdm: If `True`, the progress bar will be disabled

    Returns:
        A result object which indicates if the mutation was successful, or an error message.

    Examples:
        >>> kili.append_many_to_dataset(
                project_id=project_id,
                content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])

    !!! example "Recipe"
        - For more detailed examples on how to import assets,
            see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_assets.ipynb)
            or [other examples](https://docs.kili-technology.com/recipes/importing-data) in our documentation.
        - For more detailed examples on how to import text assets,
            see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb).
    """

    if content_array is None and json_content_array is None:
        raise ValueError("Variables content_array and json_content_array cannot be both None.")
    nb_data = (
        len(content_array)
        if content_array is not None
        else len(json_content_array)  # type:ignore
    )
    field_mapping = {
        "content": content_array,
        "json_content": json_content_array,
        "external_id": external_id_array,
        "id": id_array,
        "status": status_array,
        "json_metadata": json_metadata_array,
        "is_honeypot": is_honeypot_array,
    }
    assets = [{}] * nb_data
    for key, value in field_mapping.items():
        if value is not None:
            assets = [{**assets[i], key: value[i]} for i in range(nb_data)]
    result = import_assets(
        self.auth, project_id=project_id, assets=assets, disable_tqdm=disable_tqdm
    )
    return result

`delete_many_from_dataset(self, asset_ids)`

Delete assets from a project.

Parameters:

Name	Type	Description	Default
`asset_ids`	`List[str]`	The list of identifiers of the assets to delete.	required

Returns:

Type	Description
`Asset`	A result object which indicates if the mutation was successful, or an error message.

Source code in kili/mutations/asset/__init__.py

@typechecked
def delete_many_from_dataset(self, asset_ids: List[str]) -> Asset:
    """Delete assets from a project.

    Args:
        asset_ids: The list of identifiers of the assets to delete.

    Returns:
        A result object which indicates if the mutation was successful,
            or an error message.
    """
    properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}

    def generate_variables(batch):
        return {"where": {"idIn": batch["asset_ids"]}}

    results = _mutate_from_paginated_call(
        self, properties_to_batch, generate_variables, GQL_DELETE_MANY_FROM_DATASET
    )
    return format_result("data", results[0], Asset)

`send_back_to_queue(self, asset_ids)`

Send assets back to queue.

Parameters:

Name	Type	Description	Default
`asset_ids`	`List[str]`	The asset IDs to add to review	required

Returns:

Type	Description
`Dict[str, str]`	A result object which indicates if the mutation was successful, or an error message.

Examples:

>>> kili.send_back_to_queue(
        asset_ids=[
            "ckg22d81r0jrg0885unmuswj8",
            "ckg22d81s0jrh0885pdxfd03n"
            ],

Source code in kili/mutations/asset/__init__.py

@typechecked
def send_back_to_queue(self, asset_ids: List[str]) -> Dict[str, str]:
    """Send assets back to queue.

    Args:
        asset_ids: The asset IDs to add to review

    Returns:
        A result object which indicates if the mutation was successful,
            or an error message.

    Examples:
        >>> kili.send_back_to_queue(
                asset_ids=[
                    "ckg22d81r0jrg0885unmuswj8",
                    "ckg22d81s0jrh0885pdxfd03n"
                    ],
    """
    properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}

    def generate_variables(batch):
        return {"where": {"idIn": batch["asset_ids"]}}

    results = _mutate_from_paginated_call(
        self, properties_to_batch, generate_variables, GQL_SEND_BACK_ASSETS_TO_QUEUE
    )
    return format_result("data", results[0])

`update_properties_in_assets(self, asset_ids, external_ids=None, priorities=None, json_metadatas=None, consensus_marks=None, honeypot_marks=None, to_be_labeled_by_array=None, contents=None, json_contents=None, status_array=None, is_used_for_consensus_array=None, is_honeypot_array=None)`

Update the properties of one or more assets.

Parameters:

Name	Type	Description	Default
`asset_ids`		The asset IDs to modify	required
`external_ids`	`Optional[List[str]]`	Change the external id of the assets	`None`
`priorities`		You can change the priority of the assets By default, all assets have a priority of 0.	`None`
`json_metadatas`	`Optional[List[Union[dict, str]]]`	The metadata given to an asset should be stored in a json like dict with keys `imageUrl`, `text`, `url`: `json_metadata = {'imageUrl': '','text': '','url': ''}`	`None`
`consensus_marks`	`Optional[List[float]]`	Should be between 0 and 1	`None`
`honeypot_marks`	`Optional[List[float]]`	Should be between 0 and 1	`None`
`to_be_labeled_by_array`	`Optional[List[List[str]]]`	If given, each element of the list should contain the emails of the labelers authorized to label the asset.	`None`
`contents`	`Optional[List[str]]`	For a NLP project, the content can be directly in text format For an Image / Video / Pdf project, the content must be hosted on a web server, and you point Kili to your data by giving the URLs	`None`
`json_contents`	`Optional[List[str]]`	For a NLP project, the `json_content` is a text formatted using RichText For a Video project, the`json_content` is a json containg urls pointing to each frame of the video.	`None`
`status_array`	`Optional[List[str]]`	Each element should be in `TODO`, `ONGOING`, `LABELED`, `REVIEWED`	`None`
`is_used_for_consensus_array`	`Optional[List[bool]]`	Whether to use the asset to compute consensus kpis or not	`None`
`is_honeypot_array`	`Optional[List[bool]]`	Whether to use the asset for honeypot	`None`

Returns:

Type	Description
`List[dict]`	A result object which indicates if the mutation was successful, or an error message.

Examples:

>>> kili.update_properties_in_assets(
        asset_ids=["ckg22d81r0jrg0885unmuswj8",
            "ckg22d81s0jrh0885pdxfd03n"],
        consensus_marks=[1, 0.7],
        contents=[None, 'https://to/second/asset.png'],
        external_ids=['external-id-of-your-choice-1',
            'external-id-of-your-choice-2'],
        honeypot_marks=[0.8, 0.5],
        is_honeypot_array=[True, True],
        is_used_for_consensus_array=[True, False],
        priorities=[None, 2],
        status_array=['LABELED', 'REVIEWED'],
        to_be_labeled_by_array=[
            ['test+pierre@kili-technology.com'], None],
)

Source code in kili/mutations/asset/__init__.py

@typechecked
# pylint: disable=unused-argument
def update_properties_in_assets(
    self,
    asset_ids: List[str],
    external_ids: Optional[List[str]] = None,
    priorities: Optional[List[int]] = None,
    json_metadatas: Optional[List[Union[dict, str]]] = None,
    consensus_marks: Optional[List[float]] = None,
    honeypot_marks: Optional[List[float]] = None,
    to_be_labeled_by_array: Optional[List[List[str]]] = None,
    contents: Optional[List[str]] = None,
    json_contents: Optional[List[str]] = None,
    status_array: Optional[List[str]] = None,
    is_used_for_consensus_array: Optional[List[bool]] = None,
    is_honeypot_array: Optional[List[bool]] = None,
) -> List[dict]:
    """Update the properties of one or more assets.

    Args:
        asset_ids : The asset IDs to modify
        external_ids: Change the external id of the assets
        priorities : You can change the priority of the assets
            By default, all assets have a priority of 0.
        json_metadatas: The metadata given to an asset should be stored
            in a json like dict with keys `imageUrl`, `text`, `url`:
            `json_metadata = {'imageUrl': '','text': '','url': ''}`
        consensus_marks: Should be between 0 and 1
        honeypot_marks: Should be between 0 and 1
        to_be_labeled_by_array: If given, each element of the list should contain the emails of
            the labelers authorized to label the asset.
        contents: - For a NLP project, the content can be directly in text format
            - For an Image / Video / Pdf project, the content must be hosted on a web server,
            and you point Kili to your data by giving the URLs
        json_contents: - For a NLP project, the `json_content`
            is a text formatted using RichText
            - For a Video project, the`json_content` is a json containg urls pointing
                to each frame of the video.
        status_array: Each element should be in `TODO`, `ONGOING`, `LABELED`, `REVIEWED`
        is_used_for_consensus_array: Whether to use the asset to compute consensus kpis or not
        is_honeypot_array: Whether to use the asset for honeypot

    Returns:
        A result object which indicates if the mutation was successful,
            or an error message.

    Examples:
        >>> kili.update_properties_in_assets(
                asset_ids=["ckg22d81r0jrg0885unmuswj8",
                    "ckg22d81s0jrh0885pdxfd03n"],
                consensus_marks=[1, 0.7],
                contents=[None, 'https://to/second/asset.png'],
                external_ids=['external-id-of-your-choice-1',
                    'external-id-of-your-choice-2'],
                honeypot_marks=[0.8, 0.5],
                is_honeypot_array=[True, True],
                is_used_for_consensus_array=[True, False],
                priorities=[None, 2],
                status_array=['LABELED', 'REVIEWED'],
                to_be_labeled_by_array=[
                    ['test+pierre@kili-technology.com'], None],
        )
    """

    saved_args = locals()
    parameters = {
        k: v
        for (k, v) in saved_args.items()
        if k
        in [
            "asset_ids",
            "external_ids",
            "priorities",
            "json_metadatas",
            "consensus_marks",
            "honeypot_marks",
            "to_be_labeled_by_array",
            "contents",
            "json_contents",
            "status_array",
            "is_used_for_consensus_array",
            "is_honeypot_array",
        ]
    }
    properties_to_batch = process_update_properties_in_assets_parameters(parameters)

    def generate_variables(batch: Dict) -> Dict:
        data = {
            "externalId": batch["external_ids"],
            "priority": batch["priorities"],
            "jsonMetadata": batch["json_metadatas"],
            "consensusMark": batch["consensus_marks"],
            "honeypotMark": batch["honeypot_marks"],
            "toBeLabeledBy": batch["to_be_labeled_by_array"],
            "shouldResetToBeLabeledBy": batch["should_reset_to_be_labeled_by_array"],
            "content": batch["contents"],
            "jsonContent": batch["json_contents"],
            "status": batch["status_array"],
            "isUsedForConsensus": batch["is_used_for_consensus_array"],
            "isHoneypot": batch["is_honeypot_array"],
        }
        data_array = [dict(zip(data, t)) for t in zip(*data.values())]
        return {
            "whereArray": [{"id": asset_id} for asset_id in batch["asset_ids"]],
            "dataArray": data_array,
        }

    results = _mutate_from_paginated_call(
        self,
        properties_to_batch,
        generate_variables,
        GQL_UPDATE_PROPERTIES_IN_ASSETS,
    )
    formated_results = [format_result("data", result, Asset) for result in results]
    return [item for batch_list in formated_results for item in batch_list]