Skip to content

Asset module

Queries

Set of Asset queries

Source code in kili/queries/asset/__init__.py
class QueriesAsset:
    """
    Set of Asset queries
    """

    # pylint: disable=too-many-arguments,too-many-locals

    def __init__(self, auth):
        """Initialize the subclass.

        Args:
            auth: KiliAuth object
        """
        self.auth = auth

    # pylint: disable=dangerous-default-value
    @typechecked
    def assets(
        self,
        project_id: str,
        asset_id: Optional[str] = None,
        skip: int = 0,
        fields: List[str] = [
            "content",
            "createdAt",
            "externalId",
            "id",
            "isHoneypot",
            "jsonMetadata",
            "labels.author.id",
            "labels.author.email",
            "labels.createdAt",
            "labels.id",
            "labels.jsonResponse",
            "skipped",
            "status",
        ],
        asset_id_in: Optional[List[str]] = None,
        consensus_mark_gt: Optional[float] = None,
        consensus_mark_lt: Optional[float] = None,
        disable_tqdm: bool = False,
        external_id_contains: Optional[List[str]] = None,
        first: Optional[int] = None,
        format: Optional[str] = None,  # pylint: disable=redefined-builtin
        honeypot_mark_gt: Optional[float] = None,
        honeypot_mark_lt: Optional[float] = None,
        label_author_in: Optional[List[str]] = None,
        label_consensus_mark_gt: Optional[float] = None,
        label_consensus_mark_lt: Optional[float] = None,
        label_created_at: Optional[str] = None,
        label_created_at_gt: Optional[str] = None,
        label_created_at_lt: Optional[str] = None,
        label_honeypot_mark_gt: Optional[float] = None,
        label_honeypot_mark_lt: Optional[float] = None,
        label_type_in: Optional[List[str]] = None,
        metadata_where: Optional[dict] = None,
        skipped: Optional[bool] = None,
        status_in: Optional[List[str]] = None,
        updated_at_gte: Optional[str] = None,
        updated_at_lte: Optional[str] = None,
        as_generator: bool = False,
        label_category_search: Optional[str] = None,
        download_media: bool = False,
        local_media_dir: Optional[str] = None,
    ) -> Union[Iterable[Dict], pd.DataFrame]:
        # pylint: disable=line-too-long
        """Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.

        Args:
            project_id: Identifier of the project.
            asset_id: Identifier of the asset to retrieve.
            asset_id_in: A list of the IDs of the assets to retrieve.
            skip: Number of assets to skip (they are ordered by their date of creation, first to last).
            fields: All the fields to request among the possible fields for the assets.
                    See [the documentation](https://docs.kili-technology.com/reference/graphql-api#asset) for all possible fields.
            first: Maximum number of assets to return.
            consensus_mark_gt: Minimum amount of consensus for the asset.
            consensus_mark_lt: Maximum amount of consensus for the asset.
            external_id_contains: Returned assets have an external id that belongs to that list, if given.
            metadata_where: Filters by the values of the metadata of the asset.
            honeypot_mark_gt: Minimum amount of honeypot for the asset.
            honeypot_mark_lt : Maximum amount of honeypot for the asset.
            status_in: Returned assets should have a status that belongs to that list, if given.
                Possible choices: `TODO`, `ONGOING`, `LABELED`, `TO_REVIEW` or `REVIEWED`
            label_type_in: Returned assets should have a label whose type belongs to that list, if given.
            label_author_in: Returned assets should have a label whose status belongs to that list, if given.
            label_consensus_mark_gt: Returned assets should have a label whose consensus is greater than this number.
            label_consensus_mark_lt: Returned assets should have a label whose consensus is lower than this number.
            label_created_at: Returned assets should have a label whose creation date is equal to this date.
            label_created_at_gt: Returned assets should have a label whose creation date is greater than this date.
            label_created_at_lt: Returned assets should have a label whose creation date is lower than this date.
            label_honeypot_mark_gt: Returned assets should have a label whose honeypot is greater than this number
            label_honeypot_mark_lt: Returned assets should have a label whose honeypot is lower than this number
            skipped: Returned assets should be skipped
            updated_at_gte: Returned assets should have a label whose update date is greated or equal to this date.
            updated_at_lte: Returned assets should have a label whose update date is lower or equal to this date.
            format: If equal to 'pandas', returns a pandas DataFrame
            disable_tqdm: If `True`, the progress bar will be disabled
            as_generator: If `True`, a generator on the assets is returned.
            label_category_search: Returned assets should have a label that follows this category search query.
            download_media: Tell is the media have to be downloaded or not.
            local_media_dir: Directory where the media are downloaded if `download_media` is True.

        !!! info "Dates format"
            Date strings should have format: "YYYY-MM-DD"

        Returns:
            A result object which contains the query if it was successful,
                or an error message.

        Example:
            ```
            # returns the assets list of the project
            >>> kili.assets(project_id)
            >>> kili.assets(project_id, asset_id=asset_id)
            # returns a generator of the project assets
            >>> kili.assets(project_id, as_generator=True)
            ```

        !!! example "How to filter based on Metadata"
            - `metadata_where = {key1: "value1"}` to filter on assets whose metadata
                have key "key1" with value "value1"
            - `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
                have key "key1" with value "value1" or value "value2
            - `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
                have key "key2" with a value between 2 and 10.

        !!! example "How to filter based on label categories"
            The search query is composed of logical expressions following this format:

                [job_name].[category_name].count [comparaison_operator] [value]
            where:

            - `[job_name]` is the name of the job in the interface
            - `[category_name]` is the name of the category in the interface for this job
            - `[comparaison_operator]` can be one of: [`==`, `>=`, `<=`, `<`, `>`]
            - `[value]` is an integer that represents the count of such objects of the given category in the label

            These operations can be separated by OR and AND operators

            Example:

                label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
                label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
                label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
        """
        if format == "pandas" and as_generator:
            raise ValueError(
                'Argument values as_generator==True and format=="pandas" are not compatible.'
            )

        saved_args = locals()
        count_args = {
            k: v
            for (k, v) in saved_args.items()
            if k
            not in [
                "skip",
                "first",
                "disable_tqdm",
                "format",
                "fields",
                "self",
                "as_generator",
                "message",
                "download_media",
                "local_media_dir",
            ]
        }

        # using tqdm with a generator is messy, so it is always disabled
        disable_tqdm = disable_tqdm or as_generator
        if label_category_search:
            validate_category_search_query(label_category_search)

        payload_query = {
            "where": {
                "id": asset_id,
                "project": {
                    "id": project_id,
                },
                "externalIdStrictlyIn": external_id_contains,
                "statusIn": status_in,
                "consensusMarkGte": consensus_mark_gt,
                "consensusMarkLte": consensus_mark_lt,
                "honeypotMarkGte": honeypot_mark_gt,
                "honeypotMarkLte": honeypot_mark_lt,
                "idIn": asset_id_in,
                "metadata": metadata_where,
                "label": {
                    "typeIn": label_type_in,
                    "authorIn": label_author_in,
                    "consensusMarkGte": label_consensus_mark_gt,
                    "consensusMarkLte": label_consensus_mark_lt,
                    "createdAt": label_created_at,
                    "createdAtGte": label_created_at_gt,
                    "createdAtLte": label_created_at_lt,
                    "honeypotMarkGte": label_honeypot_mark_gt,
                    "honeypotMarkLte": label_honeypot_mark_lt,
                    "search": label_category_search,
                },
                "skipped": skipped,
                "updatedAtGte": updated_at_gte,
                "updatedAtLte": updated_at_lte,
            },
        }

        post_call_process = get_post_assets_call_process(
            download_media, local_media_dir, project_id
        )

        asset_generator = row_generator_from_paginated_calls(
            skip,
            first,
            self.count_assets,
            count_args,
            self._query_assets,
            payload_query,
            fields,
            disable_tqdm,
            post_call_process,
        )

        if format == "pandas":
            return pd.DataFrame(list(asset_generator))
        if as_generator:
            return asset_generator
        return list(asset_generator)

    def _query_assets(self, skip: int, first: int, payload: dict, fields: List[str]):
        payload.update({"skip": skip, "first": first})
        _gql_assets = gql_assets(fragment_builder(fields, AssetType))
        result = self.auth.client.execute(_gql_assets, payload)
        assets = format_result("data", result, _object=List[Asset])
        return assets

    @typechecked
    def count_assets(
        self,
        project_id: str,
        asset_id: Optional[str] = None,
        asset_id_in: Optional[List[str]] = None,
        external_id_contains: Optional[List[str]] = None,
        metadata_where: Optional[dict] = None,
        status_in: Optional[List[str]] = None,
        consensus_mark_gt: Optional[float] = None,
        consensus_mark_lt: Optional[float] = None,
        honeypot_mark_gt: Optional[float] = None,
        honeypot_mark_lt: Optional[float] = None,
        label_type_in: Optional[List[str]] = None,
        label_author_in: Optional[List[str]] = None,
        label_consensus_mark_gt: Optional[float] = None,
        label_consensus_mark_lt: Optional[float] = None,
        label_created_at: Optional[str] = None,
        label_created_at_gt: Optional[str] = None,
        label_created_at_lt: Optional[str] = None,
        label_honeypot_mark_gt: Optional[float] = None,
        label_honeypot_mark_lt: Optional[float] = None,
        skipped: Optional[bool] = None,
        updated_at_gte: Optional[str] = None,
        updated_at_lte: Optional[str] = None,
        label_category_search: Optional[str] = None,
    ) -> int:
        """Count and return the number of assets with the given constraints.

        Parameters beginning with 'label_' apply to labels, others apply to assets.

        Args:
            project_id: Identifier of the project
            asset_id: The unique id of the asset to retrieve.
            asset_id_in: A list of the ids of the assets to retrieve.
            external_id_contains: Returned assets should have an external id
                that belongs to that list, if given.
            metadata_where: Filters by the values of the metadata of the asset.
            status_in: Returned assets should have a status that belongs to that list, if given.
                Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
            consensus_mark_gt: Minimum amount of consensus for the asset.
            consensus_mark_lt: Maximum amount of consensus for the asset.
            honeypot_mark_gt: Minimum amount of honeypot for the asset.
            honeypot_mark_lt: Maximum amount of consensus for the asset.
            label_type_in: Returned assets should have a label
                whose type belongs to that list, if given.
            label_author_in: Returned assets should have a label
                whose status belongs to that list, if given.
            label_consensus_mark_gt: Returned assets should have a label
                whose consensus is greater than this number.
            label_consensus_mark_lt: Returned assets should have a label
                whose consensus is lower than this number.
            label_created_at: Returned assets should have a label
                whose creation date is equal to this date.
            label_created_at_gt: Returned assets should have a label
                whose creation date is greater than this date.
            label_created_at_lt: Returned assets should have a label
                whose creation date is lower than this date.
            label_honeypot_mark_gt: Returned assets should have a label
                whose honeypot is greater than this number.
            label_honeypot_mark_lt: Returned assets should have a label
                whose honeypot is lower than this number.
            skipped: Returned assets should be skipped
            updated_at_gte: Returned assets should have a label
                whose update date is greated or equal to this date.
            updated_at_lte: Returned assets should have a label
                whose update date is lower or equal to this date.

        !!! info "Dates format"
            Date strings should have format: "YYYY-MM-DD"

        Returns:
            A result object which contains the query if it was successful,
                or an error message.

        Examples:
            >>> kili.count_assets(project_id=project_id)
            250
            >>> kili.count_assets(asset_id=asset_id)
            1

        !!! example "How to filter based on Metadata"
            - `metadata_where = {key1: "value1"}` to filter on assets whose metadata
                have key "key1" with value "value1"
            - `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
                have key "key1" with value "value1" or value "value2
            - `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
                have key "key2" with a value between 2 and 10.
        """
        if label_category_search:
            validate_category_search_query(label_category_search)

        variables = {
            "where": {
                "id": asset_id,
                "project": {
                    "id": project_id,
                },
                "externalIdStrictlyIn": external_id_contains,
                "statusIn": status_in,
                "consensusMarkGte": consensus_mark_gt,
                "consensusMarkLte": consensus_mark_lt,
                "honeypotMarkGte": honeypot_mark_gt,
                "honeypotMarkLte": honeypot_mark_lt,
                "idIn": asset_id_in,
                "metadata": metadata_where,
                "label": {
                    "typeIn": label_type_in,
                    "authorIn": label_author_in,
                    "consensusMarkGte": label_consensus_mark_gt,
                    "consensusMarkLte": label_consensus_mark_lt,
                    "createdAt": label_created_at,
                    "createdAtGte": label_created_at_gt,
                    "createdAtLte": label_created_at_lt,
                    "honeypotMarkGte": label_honeypot_mark_gt,
                    "honeypotMarkLte": label_honeypot_mark_lt,
                    "search": label_category_search,
                },
                "skipped": skipped,
                "updatedAtGte": updated_at_gte,
                "updatedAtLte": updated_at_lte,
            }
        }
        result = self.auth.client.execute(GQL_ASSETS_COUNT, variables)
        return format_result("data", result, int)

assets(self, project_id, asset_id=None, skip=0, fields=['content', 'createdAt', 'externalId', 'id', 'isHoneypot', 'jsonMetadata', 'labels.author.id', 'labels.author.email', 'labels.createdAt', 'labels.id', 'labels.jsonResponse', 'skipped', 'status'], asset_id_in=None, consensus_mark_gt=None, consensus_mark_lt=None, disable_tqdm=False, external_id_contains=None, first=None, format=None, honeypot_mark_gt=None, honeypot_mark_lt=None, label_author_in=None, label_consensus_mark_gt=None, label_consensus_mark_lt=None, label_created_at=None, label_created_at_gt=None, label_created_at_lt=None, label_honeypot_mark_gt=None, label_honeypot_mark_lt=None, label_type_in=None, metadata_where=None, skipped=None, status_in=None, updated_at_gte=None, updated_at_lte=None, as_generator=False, label_category_search=None, download_media=False, local_media_dir=None)

Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.

Parameters:

Name Type Description Default
project_id str

Identifier of the project.

required
asset_id Optional[str]

Identifier of the asset to retrieve.

None
asset_id_in Optional[List[str]]

A list of the IDs of the assets to retrieve.

None
skip int

Number of assets to skip (they are ordered by their date of creation, first to last).

0
fields List[str]

All the fields to request among the possible fields for the assets. See the documentation for all possible fields.

['content', 'createdAt', 'externalId', 'id', 'isHoneypot', 'jsonMetadata', 'labels.author.id', 'labels.author.email', 'labels.createdAt', 'labels.id', 'labels.jsonResponse', 'skipped', 'status']
first Optional[int]

Maximum number of assets to return.

None
consensus_mark_gt Optional[float]

Minimum amount of consensus for the asset.

None
consensus_mark_lt Optional[float]

Maximum amount of consensus for the asset.

None
external_id_contains Optional[List[str]]

Returned assets have an external id that belongs to that list, if given.

None
metadata_where Optional[dict]

Filters by the values of the metadata of the asset.

None
honeypot_mark_gt Optional[float]

Minimum amount of honeypot for the asset.

None
honeypot_mark_lt

Maximum amount of honeypot for the asset.

None
status_in Optional[List[str]]

Returned assets should have a status that belongs to that list, if given. Possible choices: TODO, ONGOING, LABELED, TO_REVIEW or REVIEWED

None
label_type_in Optional[List[str]]

Returned assets should have a label whose type belongs to that list, if given.

None
label_author_in Optional[List[str]]

Returned assets should have a label whose status belongs to that list, if given.

None
label_consensus_mark_gt Optional[float]

Returned assets should have a label whose consensus is greater than this number.

None
label_consensus_mark_lt Optional[float]

Returned assets should have a label whose consensus is lower than this number.

None
label_created_at Optional[str]

Returned assets should have a label whose creation date is equal to this date.

None
label_created_at_gt Optional[str]

Returned assets should have a label whose creation date is greater than this date.

None
label_created_at_lt Optional[str]

Returned assets should have a label whose creation date is lower than this date.

None
label_honeypot_mark_gt Optional[float]

Returned assets should have a label whose honeypot is greater than this number

None
label_honeypot_mark_lt Optional[float]

Returned assets should have a label whose honeypot is lower than this number

None
skipped Optional[bool]

Returned assets should be skipped

None
updated_at_gte Optional[str]

Returned assets should have a label whose update date is greated or equal to this date.

None
updated_at_lte Optional[str]

Returned assets should have a label whose update date is lower or equal to this date.

None
format Optional[str]

If equal to 'pandas', returns a pandas DataFrame

None
disable_tqdm bool

If True, the progress bar will be disabled

False
as_generator bool

If True, a generator on the assets is returned.

False
label_category_search Optional[str]

Returned assets should have a label that follows this category search query.

None
download_media bool

Tell is the media have to be downloaded or not.

False
local_media_dir Optional[str]

Directory where the media are downloaded if download_media is True.

None

Dates format

Date strings should have format: "YYYY-MM-DD"

Returns:

Type Description
Union[Iterable[Dict], pandas.core.frame.DataFrame]

A result object which contains the query if it was successful, or an error message.

Examples:

# returns the assets list of the project
>>> kili.assets(project_id)
>>> kili.assets(project_id, asset_id=asset_id)
# returns a generator of the project assets
>>> kili.assets(project_id, as_generator=True)

How to filter based on Metadata

  • metadata_where = {key1: "value1"} to filter on assets whose metadata have key "key1" with value "value1"
  • metadata_where = {key1: ["value1", "value2"]} to filter on assets whose metadata have key "key1" with value "value1" or value "value2
  • metadata_where = {key2: [2, 10]} to filter on assets whose metadata have key "key2" with a value between 2 and 10.

How to filter based on label categories

The search query is composed of logical expressions following this format:

[job_name].[category_name].count [comparaison_operator] [value]

where:

  • [job_name] is the name of the job in the interface
  • [category_name] is the name of the category in the interface for this job
  • [comparaison_operator] can be one of: [==, >=, <=, <, >]
  • [value] is an integer that represents the count of such objects of the given category in the label

These operations can be separated by OR and AND operators

Example:

label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
Source code in kili/queries/asset/__init__.py
@typechecked
def assets(
    self,
    project_id: str,
    asset_id: Optional[str] = None,
    skip: int = 0,
    fields: List[str] = [
        "content",
        "createdAt",
        "externalId",
        "id",
        "isHoneypot",
        "jsonMetadata",
        "labels.author.id",
        "labels.author.email",
        "labels.createdAt",
        "labels.id",
        "labels.jsonResponse",
        "skipped",
        "status",
    ],
    asset_id_in: Optional[List[str]] = None,
    consensus_mark_gt: Optional[float] = None,
    consensus_mark_lt: Optional[float] = None,
    disable_tqdm: bool = False,
    external_id_contains: Optional[List[str]] = None,
    first: Optional[int] = None,
    format: Optional[str] = None,  # pylint: disable=redefined-builtin
    honeypot_mark_gt: Optional[float] = None,
    honeypot_mark_lt: Optional[float] = None,
    label_author_in: Optional[List[str]] = None,
    label_consensus_mark_gt: Optional[float] = None,
    label_consensus_mark_lt: Optional[float] = None,
    label_created_at: Optional[str] = None,
    label_created_at_gt: Optional[str] = None,
    label_created_at_lt: Optional[str] = None,
    label_honeypot_mark_gt: Optional[float] = None,
    label_honeypot_mark_lt: Optional[float] = None,
    label_type_in: Optional[List[str]] = None,
    metadata_where: Optional[dict] = None,
    skipped: Optional[bool] = None,
    status_in: Optional[List[str]] = None,
    updated_at_gte: Optional[str] = None,
    updated_at_lte: Optional[str] = None,
    as_generator: bool = False,
    label_category_search: Optional[str] = None,
    download_media: bool = False,
    local_media_dir: Optional[str] = None,
) -> Union[Iterable[Dict], pd.DataFrame]:
    # pylint: disable=line-too-long
    """Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.

    Args:
        project_id: Identifier of the project.
        asset_id: Identifier of the asset to retrieve.
        asset_id_in: A list of the IDs of the assets to retrieve.
        skip: Number of assets to skip (they are ordered by their date of creation, first to last).
        fields: All the fields to request among the possible fields for the assets.
                See [the documentation](https://docs.kili-technology.com/reference/graphql-api#asset) for all possible fields.
        first: Maximum number of assets to return.
        consensus_mark_gt: Minimum amount of consensus for the asset.
        consensus_mark_lt: Maximum amount of consensus for the asset.
        external_id_contains: Returned assets have an external id that belongs to that list, if given.
        metadata_where: Filters by the values of the metadata of the asset.
        honeypot_mark_gt: Minimum amount of honeypot for the asset.
        honeypot_mark_lt : Maximum amount of honeypot for the asset.
        status_in: Returned assets should have a status that belongs to that list, if given.
            Possible choices: `TODO`, `ONGOING`, `LABELED`, `TO_REVIEW` or `REVIEWED`
        label_type_in: Returned assets should have a label whose type belongs to that list, if given.
        label_author_in: Returned assets should have a label whose status belongs to that list, if given.
        label_consensus_mark_gt: Returned assets should have a label whose consensus is greater than this number.
        label_consensus_mark_lt: Returned assets should have a label whose consensus is lower than this number.
        label_created_at: Returned assets should have a label whose creation date is equal to this date.
        label_created_at_gt: Returned assets should have a label whose creation date is greater than this date.
        label_created_at_lt: Returned assets should have a label whose creation date is lower than this date.
        label_honeypot_mark_gt: Returned assets should have a label whose honeypot is greater than this number
        label_honeypot_mark_lt: Returned assets should have a label whose honeypot is lower than this number
        skipped: Returned assets should be skipped
        updated_at_gte: Returned assets should have a label whose update date is greated or equal to this date.
        updated_at_lte: Returned assets should have a label whose update date is lower or equal to this date.
        format: If equal to 'pandas', returns a pandas DataFrame
        disable_tqdm: If `True`, the progress bar will be disabled
        as_generator: If `True`, a generator on the assets is returned.
        label_category_search: Returned assets should have a label that follows this category search query.
        download_media: Tell is the media have to be downloaded or not.
        local_media_dir: Directory where the media are downloaded if `download_media` is True.

    !!! info "Dates format"
        Date strings should have format: "YYYY-MM-DD"

    Returns:
        A result object which contains the query if it was successful,
            or an error message.

    Example:
        ```
        # returns the assets list of the project
        >>> kili.assets(project_id)
        >>> kili.assets(project_id, asset_id=asset_id)
        # returns a generator of the project assets
        >>> kili.assets(project_id, as_generator=True)
        ```

    !!! example "How to filter based on Metadata"
        - `metadata_where = {key1: "value1"}` to filter on assets whose metadata
            have key "key1" with value "value1"
        - `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
            have key "key1" with value "value1" or value "value2
        - `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
            have key "key2" with a value between 2 and 10.

    !!! example "How to filter based on label categories"
        The search query is composed of logical expressions following this format:

            [job_name].[category_name].count [comparaison_operator] [value]
        where:

        - `[job_name]` is the name of the job in the interface
        - `[category_name]` is the name of the category in the interface for this job
        - `[comparaison_operator]` can be one of: [`==`, `>=`, `<=`, `<`, `>`]
        - `[value]` is an integer that represents the count of such objects of the given category in the label

        These operations can be separated by OR and AND operators

        Example:

            label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
            label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
            label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
    """
    if format == "pandas" and as_generator:
        raise ValueError(
            'Argument values as_generator==True and format=="pandas" are not compatible.'
        )

    saved_args = locals()
    count_args = {
        k: v
        for (k, v) in saved_args.items()
        if k
        not in [
            "skip",
            "first",
            "disable_tqdm",
            "format",
            "fields",
            "self",
            "as_generator",
            "message",
            "download_media",
            "local_media_dir",
        ]
    }

    # using tqdm with a generator is messy, so it is always disabled
    disable_tqdm = disable_tqdm or as_generator
    if label_category_search:
        validate_category_search_query(label_category_search)

    payload_query = {
        "where": {
            "id": asset_id,
            "project": {
                "id": project_id,
            },
            "externalIdStrictlyIn": external_id_contains,
            "statusIn": status_in,
            "consensusMarkGte": consensus_mark_gt,
            "consensusMarkLte": consensus_mark_lt,
            "honeypotMarkGte": honeypot_mark_gt,
            "honeypotMarkLte": honeypot_mark_lt,
            "idIn": asset_id_in,
            "metadata": metadata_where,
            "label": {
                "typeIn": label_type_in,
                "authorIn": label_author_in,
                "consensusMarkGte": label_consensus_mark_gt,
                "consensusMarkLte": label_consensus_mark_lt,
                "createdAt": label_created_at,
                "createdAtGte": label_created_at_gt,
                "createdAtLte": label_created_at_lt,
                "honeypotMarkGte": label_honeypot_mark_gt,
                "honeypotMarkLte": label_honeypot_mark_lt,
                "search": label_category_search,
            },
            "skipped": skipped,
            "updatedAtGte": updated_at_gte,
            "updatedAtLte": updated_at_lte,
        },
    }

    post_call_process = get_post_assets_call_process(
        download_media, local_media_dir, project_id
    )

    asset_generator = row_generator_from_paginated_calls(
        skip,
        first,
        self.count_assets,
        count_args,
        self._query_assets,
        payload_query,
        fields,
        disable_tqdm,
        post_call_process,
    )

    if format == "pandas":
        return pd.DataFrame(list(asset_generator))
    if as_generator:
        return asset_generator
    return list(asset_generator)

count_assets(self, project_id, asset_id=None, asset_id_in=None, external_id_contains=None, metadata_where=None, status_in=None, consensus_mark_gt=None, consensus_mark_lt=None, honeypot_mark_gt=None, honeypot_mark_lt=None, label_type_in=None, label_author_in=None, label_consensus_mark_gt=None, label_consensus_mark_lt=None, label_created_at=None, label_created_at_gt=None, label_created_at_lt=None, label_honeypot_mark_gt=None, label_honeypot_mark_lt=None, skipped=None, updated_at_gte=None, updated_at_lte=None, label_category_search=None)

Count and return the number of assets with the given constraints.

Parameters beginning with 'label_' apply to labels, others apply to assets.

Parameters:

Name Type Description Default
project_id str

Identifier of the project

required
asset_id Optional[str]

The unique id of the asset to retrieve.

None
asset_id_in Optional[List[str]]

A list of the ids of the assets to retrieve.

None
external_id_contains Optional[List[str]]

Returned assets should have an external id that belongs to that list, if given.

None
metadata_where Optional[dict]

Filters by the values of the metadata of the asset.

None
status_in Optional[List[str]]

Returned assets should have a status that belongs to that list, if given. Possible choices: TODO, ONGOING, LABELED or REVIEWED

None
consensus_mark_gt Optional[float]

Minimum amount of consensus for the asset.

None
consensus_mark_lt Optional[float]

Maximum amount of consensus for the asset.

None
honeypot_mark_gt Optional[float]

Minimum amount of honeypot for the asset.

None
honeypot_mark_lt Optional[float]

Maximum amount of consensus for the asset.

None
label_type_in Optional[List[str]]

Returned assets should have a label whose type belongs to that list, if given.

None
label_author_in Optional[List[str]]

Returned assets should have a label whose status belongs to that list, if given.

None
label_consensus_mark_gt Optional[float]

Returned assets should have a label whose consensus is greater than this number.

None
label_consensus_mark_lt Optional[float]

Returned assets should have a label whose consensus is lower than this number.

None
label_created_at Optional[str]

Returned assets should have a label whose creation date is equal to this date.

None
label_created_at_gt Optional[str]

Returned assets should have a label whose creation date is greater than this date.

None
label_created_at_lt Optional[str]

Returned assets should have a label whose creation date is lower than this date.

None
label_honeypot_mark_gt Optional[float]

Returned assets should have a label whose honeypot is greater than this number.

None
label_honeypot_mark_lt Optional[float]

Returned assets should have a label whose honeypot is lower than this number.

None
skipped Optional[bool]

Returned assets should be skipped

None
updated_at_gte Optional[str]

Returned assets should have a label whose update date is greated or equal to this date.

None
updated_at_lte Optional[str]

Returned assets should have a label whose update date is lower or equal to this date.

None

Dates format

Date strings should have format: "YYYY-MM-DD"

Returns:

Type Description
int

A result object which contains the query if it was successful, or an error message.

Examples:

>>> kili.count_assets(project_id=project_id)
250
>>> kili.count_assets(asset_id=asset_id)
1

How to filter based on Metadata

  • metadata_where = {key1: "value1"} to filter on assets whose metadata have key "key1" with value "value1"
  • metadata_where = {key1: ["value1", "value2"]} to filter on assets whose metadata have key "key1" with value "value1" or value "value2
  • metadata_where = {key2: [2, 10]} to filter on assets whose metadata have key "key2" with a value between 2 and 10.
Source code in kili/queries/asset/__init__.py
@typechecked
def count_assets(
    self,
    project_id: str,
    asset_id: Optional[str] = None,
    asset_id_in: Optional[List[str]] = None,
    external_id_contains: Optional[List[str]] = None,
    metadata_where: Optional[dict] = None,
    status_in: Optional[List[str]] = None,
    consensus_mark_gt: Optional[float] = None,
    consensus_mark_lt: Optional[float] = None,
    honeypot_mark_gt: Optional[float] = None,
    honeypot_mark_lt: Optional[float] = None,
    label_type_in: Optional[List[str]] = None,
    label_author_in: Optional[List[str]] = None,
    label_consensus_mark_gt: Optional[float] = None,
    label_consensus_mark_lt: Optional[float] = None,
    label_created_at: Optional[str] = None,
    label_created_at_gt: Optional[str] = None,
    label_created_at_lt: Optional[str] = None,
    label_honeypot_mark_gt: Optional[float] = None,
    label_honeypot_mark_lt: Optional[float] = None,
    skipped: Optional[bool] = None,
    updated_at_gte: Optional[str] = None,
    updated_at_lte: Optional[str] = None,
    label_category_search: Optional[str] = None,
) -> int:
    """Count and return the number of assets with the given constraints.

    Parameters beginning with 'label_' apply to labels, others apply to assets.

    Args:
        project_id: Identifier of the project
        asset_id: The unique id of the asset to retrieve.
        asset_id_in: A list of the ids of the assets to retrieve.
        external_id_contains: Returned assets should have an external id
            that belongs to that list, if given.
        metadata_where: Filters by the values of the metadata of the asset.
        status_in: Returned assets should have a status that belongs to that list, if given.
            Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
        consensus_mark_gt: Minimum amount of consensus for the asset.
        consensus_mark_lt: Maximum amount of consensus for the asset.
        honeypot_mark_gt: Minimum amount of honeypot for the asset.
        honeypot_mark_lt: Maximum amount of consensus for the asset.
        label_type_in: Returned assets should have a label
            whose type belongs to that list, if given.
        label_author_in: Returned assets should have a label
            whose status belongs to that list, if given.
        label_consensus_mark_gt: Returned assets should have a label
            whose consensus is greater than this number.
        label_consensus_mark_lt: Returned assets should have a label
            whose consensus is lower than this number.
        label_created_at: Returned assets should have a label
            whose creation date is equal to this date.
        label_created_at_gt: Returned assets should have a label
            whose creation date is greater than this date.
        label_created_at_lt: Returned assets should have a label
            whose creation date is lower than this date.
        label_honeypot_mark_gt: Returned assets should have a label
            whose honeypot is greater than this number.
        label_honeypot_mark_lt: Returned assets should have a label
            whose honeypot is lower than this number.
        skipped: Returned assets should be skipped
        updated_at_gte: Returned assets should have a label
            whose update date is greated or equal to this date.
        updated_at_lte: Returned assets should have a label
            whose update date is lower or equal to this date.

    !!! info "Dates format"
        Date strings should have format: "YYYY-MM-DD"

    Returns:
        A result object which contains the query if it was successful,
            or an error message.

    Examples:
        >>> kili.count_assets(project_id=project_id)
        250
        >>> kili.count_assets(asset_id=asset_id)
        1

    !!! example "How to filter based on Metadata"
        - `metadata_where = {key1: "value1"}` to filter on assets whose metadata
            have key "key1" with value "value1"
        - `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
            have key "key1" with value "value1" or value "value2
        - `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
            have key "key2" with a value between 2 and 10.
    """
    if label_category_search:
        validate_category_search_query(label_category_search)

    variables = {
        "where": {
            "id": asset_id,
            "project": {
                "id": project_id,
            },
            "externalIdStrictlyIn": external_id_contains,
            "statusIn": status_in,
            "consensusMarkGte": consensus_mark_gt,
            "consensusMarkLte": consensus_mark_lt,
            "honeypotMarkGte": honeypot_mark_gt,
            "honeypotMarkLte": honeypot_mark_lt,
            "idIn": asset_id_in,
            "metadata": metadata_where,
            "label": {
                "typeIn": label_type_in,
                "authorIn": label_author_in,
                "consensusMarkGte": label_consensus_mark_gt,
                "consensusMarkLte": label_consensus_mark_lt,
                "createdAt": label_created_at,
                "createdAtGte": label_created_at_gt,
                "createdAtLte": label_created_at_lt,
                "honeypotMarkGte": label_honeypot_mark_gt,
                "honeypotMarkLte": label_honeypot_mark_lt,
                "search": label_category_search,
            },
            "skipped": skipped,
            "updatedAtGte": updated_at_gte,
            "updatedAtLte": updated_at_lte,
        }
    }
    result = self.auth.client.execute(GQL_ASSETS_COUNT, variables)
    return format_result("data", result, int)

Mutations

Set of Asset mutations

Source code in kili/mutations/asset/__init__.py
class MutationsAsset:
    """
    Set of Asset mutations
    """

    # pylint: disable=too-many-arguments,too-many-locals

    def __init__(self, auth: KiliAuth):
        """Initialize the subclass.

        Args:
            auth: KiliAuth object
        """
        self.auth = auth

    @typechecked
    def append_many_to_dataset(
        self,
        project_id: str,
        content_array: Optional[List[str]] = None,
        external_id_array: Optional[List[str]] = None,
        id_array: Optional[List[str]] = None,
        is_honeypot_array: Optional[List[bool]] = None,
        status_array: Optional[List[str]] = None,
        json_content_array: Optional[List[List[Union[dict, str]]]] = None,
        json_metadata_array: Optional[List[dict]] = None,
        disable_tqdm: bool = False,
    ) -> Dict[str, str]:
        # pylint: disable=line-too-long
        """Append assets to a project.

        Args:
            project_id: Identifier of the project
            content_array: List of elements added to the assets of the project
                Must not be None except if you provide json_content_array.

                - For a `TEXT` project, the content can be either raw text, or URLs to TEXT assets.
                - For an `IMAGE` / `PDF` project, the content can be either URLs or paths to existing
                    images/pdf on your computer.
                - For a VIDEO project, the content can be either URLs pointing to videos hosted on a web server or paths to
                existing video files on your computer. If you want to import video from frames, look at the json_content
                section below.
                - For an `VIDEO_LEGACY` project, the content can be only be URLs
            external_id_array: List of external ids given to identify the assets.
                If None, random identifiers are created.
            is_honeypot_array:  Whether to use the asset for honeypot
            status_array: By default, all imported assets are set to `TODO`. Other options:
                `ONGOING`, `LABELED`, `REVIEWED`.
            json_content_array: Useful for `VIDEO` or `TEXT` projects only.

                - For `VIDEO` projects, each element is a sequence of frames, i.e. a
                    list of URLs to images or a list of paths to images.
                - For `TEXT` projects, each element is a json_content dict,
                    formatted according to documentation [on how to import
                rich-text assets](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb)
            json_metadata_array: The metadata given to each asset should be stored in a json like dict with keys.

                - Add metadata visible on the asset with the following keys: `imageUrl`, `text`, `url`.
                    Example for one asset: `json_metadata_array = [{'imageUrl': '','text': '','url': ''}]`.
                - For VIDEO projects (and not VIDEO_LEGACY), you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30).
                    Example for one asset: `json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}]`.
            disable_tqdm: If `True`, the progress bar will be disabled

        Returns:
            A result object which indicates if the mutation was successful, or an error message.

        Examples:
            >>> kili.append_many_to_dataset(
                    project_id=project_id,
                    content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])

        !!! example "Recipe"
            - For more detailed examples on how to import assets,
                see [the recipe](https://docs.kili-technology.com/recipes/importing-data).
            - For more detailed examples on how to import text assets,
                see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb).
        """

        if content_array is None and json_content_array is None:
            raise ValueError("Variables content_array and json_content_array cannot be both None.")
        nb_data = (
            len(content_array)
            if content_array is not None
            else len(json_content_array)  # type:ignore
        )
        field_mapping = {
            "content": content_array,
            "json_content": json_content_array,
            "external_id": external_id_array,
            "id": id_array,
            "status": status_array,
            "json_metadata": json_metadata_array,
            "is_honeypot": is_honeypot_array,
        }
        assets = [{}] * nb_data
        for key, value in field_mapping.items():
            if value is not None:
                assets = [{**assets[i], key: value[i]} for i in range(nb_data)]
        result = import_assets(
            self.auth, project_id=project_id, assets=assets, disable_tqdm=disable_tqdm
        )
        return result

    @typechecked
    # pylint: disable=unused-argument
    def update_properties_in_assets(
        self,
        asset_ids: Optional[List[str]] = None,
        external_ids: Optional[List[str]] = None,
        priorities: Optional[List[int]] = None,
        json_metadatas: Optional[List[Union[dict, str]]] = None,
        consensus_marks: Optional[List[float]] = None,
        honeypot_marks: Optional[List[float]] = None,
        to_be_labeled_by_array: Optional[List[List[str]]] = None,
        contents: Optional[List[str]] = None,
        json_contents: Optional[List[str]] = None,
        status_array: Optional[List[str]] = None,
        is_used_for_consensus_array: Optional[List[bool]] = None,
        is_honeypot_array: Optional[List[bool]] = None,
        project_id: Optional[str] = None,
    ) -> List[Dict]:
        """Update the properties of one or more assets.

        Args:
            asset_ids: The internal asset IDs to modify.
            external_ids: The external asset IDs to modify (if `asset_ids` is not already provided).
            priorities: You can change the priority of the assets.
                By default, all assets have a priority of 0.
            json_metadatas: The metadata given to an asset should be stored
                in a json like dict with keys `imageUrl`, `text`, `url`:
                `json_metadata = {'imageUrl': '','text': '','url': ''}`
            consensus_marks: Should be between 0 and 1.
            honeypot_marks: Should be between 0 and 1.
            to_be_labeled_by_array: If given, each element of the list should contain the emails of
                the labelers authorized to label the asset.
            contents: - For a NLP project, the content can be directly in text format.
                - For an Image / Video / Pdf project, the content must be hosted on a web server,
                and you point Kili to your data by giving the URLs.
            json_contents: - For a NLP project, the `json_content`
                is a text formatted using RichText.
                - For a Video project, the`json_content` is a json containg urls pointing
                    to each frame of the video.
            status_array: Each element should be in `TODO`, `ONGOING`, `LABELED`,
                `TO_REVIEW`, `REVIEWED`.
            is_used_for_consensus_array: Whether to use the asset to compute consensus kpis or not.
            is_honeypot_array: Whether to use the asset for honeypot.
            project_id: The project ID. Only required if `external_ids` argument is provided.

        Returns:
            A result object which indicates if the mutation was successful,
                or an error message.

        Examples:
            >>> kili.update_properties_in_assets(
                    asset_ids=["ckg22d81r0jrg0885unmuswj8", "ckg22d81s0jrh0885pdxfd03n"],
                    consensus_marks=[1, 0.7],
                    contents=[None, 'https://to/second/asset.png'],
                    honeypot_marks=[0.8, 0.5],
                    is_honeypot_array=[True, True],
                    is_used_for_consensus_array=[True, False],
                    priorities=[None, 2],
                    status_array=['LABELED', 'REVIEWED'],
                    to_be_labeled_by_array=[['test+pierre@kili-technology.com'], None],
                )
        """
        if asset_ids is not None and external_ids is not None:
            warnings.warn(
                "The use of `external_ids` argument has changed. It is now used to identify which"
                " properties of which assets to update. Please use"
                " `kili.change_asset_external_ids()` method instead to change asset external IDs.",
                DeprecationWarning,
            )
            raise MissingArgumentError("Please provide either `asset_ids` or `external_ids`.")

        asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)

        saved_args = locals()
        parameters = {
            k: v
            for (k, v) in saved_args.items()
            if k
            in [
                "asset_ids",
                "priorities",
                "json_metadatas",
                "consensus_marks",
                "honeypot_marks",
                "to_be_labeled_by_array",
                "contents",
                "json_contents",
                "status_array",
                "is_used_for_consensus_array",
                "is_honeypot_array",
            ]
        }
        properties_to_batch = process_update_properties_in_assets_parameters(parameters)

        def generate_variables(batch: Dict) -> Dict:
            data = {
                "priority": batch["priorities"],
                "jsonMetadata": batch["json_metadatas"],
                "consensusMark": batch["consensus_marks"],
                "honeypotMark": batch["honeypot_marks"],
                "toBeLabeledBy": batch["to_be_labeled_by_array"],
                "shouldResetToBeLabeledBy": batch["should_reset_to_be_labeled_by_array"],
                "content": batch["contents"],
                "jsonContent": batch["json_contents"],
                "status": batch["status_array"],
                "isUsedForConsensus": batch["is_used_for_consensus_array"],
                "isHoneypot": batch["is_honeypot_array"],
            }
            data_array = [dict(zip(data, t)) for t in zip(*data.values())]
            return {
                "whereArray": [{"id": asset_id} for asset_id in batch["asset_ids"]],
                "dataArray": data_array,
            }

        results = _mutate_from_paginated_call(
            self,
            properties_to_batch,
            generate_variables,
            GQL_UPDATE_PROPERTIES_IN_ASSETS,
        )
        formated_results = [format_result("data", result, Asset) for result in results]
        return [item for batch_list in formated_results for item in batch_list]

    @typechecked
    def change_asset_external_ids(
        self,
        new_external_ids: List[str],
        asset_ids: Optional[List[str]] = None,
        external_ids: Optional[List[str]] = None,
        project_id: Optional[str] = None,
    ) -> List[Dict]:
        """Update the external IDs of one or more assets.

        Args:
            new_external_ids: The new external IDs of the assets.
            asset_ids: The asset IDs to modify.
            external_ids: The external asset IDs to modify (if `asset_ids` is not already provided).
            project_id: The project ID. Only required if `external_ids` argument is provided.

        Returns:
            A result object which indicates if the mutation was successful,
                or an error message.

        Examples:
            >>> kili.change_asset_external_ids(
                    new_external_ids=["asset1", "asset2"],
                    asset_ids=["ckg22d81r0jrg0885unmuswj8", "ckg22d81s0jrh0885pdxfd03n"],
                )
        """
        asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)

        parameters = {
            "asset_ids": asset_ids,
            "new_external_ids": new_external_ids,
            "json_metadatas": None,
            "to_be_labeled_by_array": None,
        }
        properties_to_batch = process_update_properties_in_assets_parameters(parameters)

        def generate_variables(batch: Dict) -> Dict:
            data = {
                "externalId": batch["new_external_ids"],
                "jsonMetadata": batch["json_metadatas"],
                "toBeLabeledBy": batch["to_be_labeled_by_array"],
                "shouldResetToBeLabeledBy": batch["should_reset_to_be_labeled_by_array"],
            }
            data_array = [dict(zip(data, t)) for t in zip(*data.values())]
            return {
                "whereArray": [{"id": asset_id} for asset_id in batch["asset_ids"]],
                "dataArray": data_array,
            }

        results = _mutate_from_paginated_call(
            self,
            properties_to_batch,
            generate_variables,
            GQL_UPDATE_PROPERTIES_IN_ASSETS,
        )
        formated_results = [format_result("data", result, Asset) for result in results]
        return [item for batch_list in formated_results for item in batch_list]

    @typechecked
    def delete_many_from_dataset(
        self,
        asset_ids: Optional[List[str]] = None,
        external_ids: Optional[List[str]] = None,
        project_id: Optional[str] = None,
    ) -> Asset:
        """Delete assets from a project.

        Args:
            asset_ids: The list of asset internal IDs to delete.
            external_ids: The list of asset external IDs to delete.
            project_id: The project ID. Only required if `external_ids` argument is provided.

        Returns:
            A result object which indicates if the mutation was successful,
                or an error message.
        """
        asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)

        properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}

        def generate_variables(batch):
            return {"where": {"idIn": batch["asset_ids"]}}

        results = _mutate_from_paginated_call(
            self, properties_to_batch, generate_variables, GQL_DELETE_MANY_FROM_DATASET
        )
        return format_result("data", results[0], Asset)

    @typechecked
    def add_to_review(
        self,
        asset_ids: Optional[List[str]] = None,
        external_ids: Optional[List[str]] = None,
        project_id: Optional[str] = None,
    ) -> Optional[Dict[str, Any]]:
        """Add assets to review.

        !!! warning
            Assets without any label will be ignored.

        Args:
            asset_ids: The asset internal IDs to add to review.
            external_ids: The asset external IDs to add to review.
            project_id: The project ID. Only required if `external_ids` argument is provided.

        Returns:
            A dict object with the project `id` and the `asset_ids` of assets moved to review.
            `None` if no assets have changed status (already had `TO_REVIEW` status for example).
            An error message if mutation failed.

        Examples:
            >>> kili.add_to_review(
                    asset_ids=[
                        "ckg22d81r0jrg0885unmuswj8",
                        "ckg22d81s0jrh0885pdxfd03n",
                        ],
                )
        """
        asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)

        properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}

        def generate_variables(batch):
            return {"where": {"idIn": batch["asset_ids"]}}

        results = _mutate_from_paginated_call(
            self,
            properties_to_batch,
            generate_variables,
            GQL_ADD_ALL_LABELED_ASSETS_TO_REVIEW,
        )
        result = format_result("data", results[0])
        if isinstance(result, dict) and "id" in result:
            assets_in_review = QueriesAsset(self.auth).assets(
                project_id=result["id"],
                asset_id_in=asset_ids,
                fields=["id"],
                disable_tqdm=True,
                status_in=["TO_REVIEW"],
            )
            result["asset_ids"] = [asset["id"] for asset in assets_in_review]
            return result
        return result

    @typechecked
    def send_back_to_queue(
        self,
        asset_ids: Optional[List[str]] = None,
        external_ids: Optional[List[str]] = None,
        project_id: Optional[str] = None,
    ) -> Dict[str, Any]:
        """Send assets back to queue.

        Args:
            asset_ids: List of internal IDs of assets to send back to queue.
            external_ids: List of external IDs of assets to send back to queue.
            project_id: The project ID. Only required if `external_ids` argument is provided.

        Returns:
            A dict object with the project `id` and the `asset_ids` of assets moved to queue.
            An error message if mutation failed.

        Examples:
            >>> kili.send_back_to_queue(
                    asset_ids=[
                        "ckg22d81r0jrg0885unmuswj8",
                        "ckg22d81s0jrh0885pdxfd03n",
                        ],
                )
        """
        asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)

        properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}

        def generate_variables(batch):
            return {"where": {"idIn": batch["asset_ids"]}}

        results = _mutate_from_paginated_call(
            self, properties_to_batch, generate_variables, GQL_SEND_BACK_ASSETS_TO_QUEUE
        )
        result = format_result("data", results[0])
        assets_in_queue = QueriesAsset(self.auth).assets(
            project_id=result["id"],
            asset_id_in=asset_ids,
            fields=["id"],
            disable_tqdm=True,
            status_in=["ONGOING"],
        )
        result["asset_ids"] = [asset["id"] for asset in assets_in_queue]
        return result

add_to_review(self, asset_ids=None, external_ids=None, project_id=None)

Add assets to review.

Warning

Assets without any label will be ignored.

Parameters:

Name Type Description Default
asset_ids Optional[List[str]]

The asset internal IDs to add to review.

None
external_ids Optional[List[str]]

The asset external IDs to add to review.

None
project_id Optional[str]

The project ID. Only required if external_ids argument is provided.

None

Returns:

Type Description
Optional[Dict[str, Any]]

A dict object with the project id and the asset_ids of assets moved to review. None if no assets have changed status (already had TO_REVIEW status for example). An error message if mutation failed.

Examples:

>>> kili.add_to_review(
        asset_ids=[
            "ckg22d81r0jrg0885unmuswj8",
            "ckg22d81s0jrh0885pdxfd03n",
            ],
    )
Source code in kili/mutations/asset/__init__.py
@typechecked
def add_to_review(
    self,
    asset_ids: Optional[List[str]] = None,
    external_ids: Optional[List[str]] = None,
    project_id: Optional[str] = None,
) -> Optional[Dict[str, Any]]:
    """Add assets to review.

    !!! warning
        Assets without any label will be ignored.

    Args:
        asset_ids: The asset internal IDs to add to review.
        external_ids: The asset external IDs to add to review.
        project_id: The project ID. Only required if `external_ids` argument is provided.

    Returns:
        A dict object with the project `id` and the `asset_ids` of assets moved to review.
        `None` if no assets have changed status (already had `TO_REVIEW` status for example).
        An error message if mutation failed.

    Examples:
        >>> kili.add_to_review(
                asset_ids=[
                    "ckg22d81r0jrg0885unmuswj8",
                    "ckg22d81s0jrh0885pdxfd03n",
                    ],
            )
    """
    asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)

    properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}

    def generate_variables(batch):
        return {"where": {"idIn": batch["asset_ids"]}}

    results = _mutate_from_paginated_call(
        self,
        properties_to_batch,
        generate_variables,
        GQL_ADD_ALL_LABELED_ASSETS_TO_REVIEW,
    )
    result = format_result("data", results[0])
    if isinstance(result, dict) and "id" in result:
        assets_in_review = QueriesAsset(self.auth).assets(
            project_id=result["id"],
            asset_id_in=asset_ids,
            fields=["id"],
            disable_tqdm=True,
            status_in=["TO_REVIEW"],
        )
        result["asset_ids"] = [asset["id"] for asset in assets_in_review]
        return result
    return result

append_many_to_dataset(self, project_id, content_array=None, external_id_array=None, id_array=None, is_honeypot_array=None, status_array=None, json_content_array=None, json_metadata_array=None, disable_tqdm=False)

Append assets to a project.

Parameters:

Name Type Description Default
project_id str

Identifier of the project

required
content_array Optional[List[str]]

List of elements added to the assets of the project Must not be None except if you provide json_content_array.

  • For a TEXT project, the content can be either raw text, or URLs to TEXT assets.
  • For an IMAGE / PDF project, the content can be either URLs or paths to existing images/pdf on your computer.
  • For a VIDEO project, the content can be either URLs pointing to videos hosted on a web server or paths to existing video files on your computer. If you want to import video from frames, look at the json_content section below.
  • For an VIDEO_LEGACY project, the content can be only be URLs
None
external_id_array Optional[List[str]]

List of external ids given to identify the assets. If None, random identifiers are created.

None
is_honeypot_array Optional[List[bool]]

Whether to use the asset for honeypot

None
status_array Optional[List[str]]

By default, all imported assets are set to TODO. Other options: ONGOING, LABELED, REVIEWED.

None
json_content_array Optional[List[List[Union[dict, str]]]]

Useful for VIDEO or TEXT projects only.

  • For VIDEO projects, each element is a sequence of frames, i.e. a list of URLs to images or a list of paths to images.
  • For TEXT projects, each element is a json_content dict, formatted according to documentation on how to import rich-text assets
None
json_metadata_array Optional[List[dict]]

The metadata given to each asset should be stored in a json like dict with keys.

  • Add metadata visible on the asset with the following keys: imageUrl, text, url. Example for one asset: json_metadata_array = [{'imageUrl': '','text': '','url': ''}].
  • For VIDEO projects (and not VIDEO_LEGACY), you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30). Example for one asset: json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}].
None
disable_tqdm bool

If True, the progress bar will be disabled

False

Returns:

Type Description
Dict[str, str]

A result object which indicates if the mutation was successful, or an error message.

Examples:

>>> kili.append_many_to_dataset(
        project_id=project_id,
        content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])

Recipe

  • For more detailed examples on how to import assets, see the recipe.
  • For more detailed examples on how to import text assets, see the recipe.
Source code in kili/mutations/asset/__init__.py
@typechecked
def append_many_to_dataset(
    self,
    project_id: str,
    content_array: Optional[List[str]] = None,
    external_id_array: Optional[List[str]] = None,
    id_array: Optional[List[str]] = None,
    is_honeypot_array: Optional[List[bool]] = None,
    status_array: Optional[List[str]] = None,
    json_content_array: Optional[List[List[Union[dict, str]]]] = None,
    json_metadata_array: Optional[List[dict]] = None,
    disable_tqdm: bool = False,
) -> Dict[str, str]:
    # pylint: disable=line-too-long
    """Append assets to a project.

    Args:
        project_id: Identifier of the project
        content_array: List of elements added to the assets of the project
            Must not be None except if you provide json_content_array.

            - For a `TEXT` project, the content can be either raw text, or URLs to TEXT assets.
            - For an `IMAGE` / `PDF` project, the content can be either URLs or paths to existing
                images/pdf on your computer.
            - For a VIDEO project, the content can be either URLs pointing to videos hosted on a web server or paths to
            existing video files on your computer. If you want to import video from frames, look at the json_content
            section below.
            - For an `VIDEO_LEGACY` project, the content can be only be URLs
        external_id_array: List of external ids given to identify the assets.
            If None, random identifiers are created.
        is_honeypot_array:  Whether to use the asset for honeypot
        status_array: By default, all imported assets are set to `TODO`. Other options:
            `ONGOING`, `LABELED`, `REVIEWED`.
        json_content_array: Useful for `VIDEO` or `TEXT` projects only.

            - For `VIDEO` projects, each element is a sequence of frames, i.e. a
                list of URLs to images or a list of paths to images.
            - For `TEXT` projects, each element is a json_content dict,
                formatted according to documentation [on how to import
            rich-text assets](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb)
        json_metadata_array: The metadata given to each asset should be stored in a json like dict with keys.

            - Add metadata visible on the asset with the following keys: `imageUrl`, `text`, `url`.
                Example for one asset: `json_metadata_array = [{'imageUrl': '','text': '','url': ''}]`.
            - For VIDEO projects (and not VIDEO_LEGACY), you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30).
                Example for one asset: `json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}]`.
        disable_tqdm: If `True`, the progress bar will be disabled

    Returns:
        A result object which indicates if the mutation was successful, or an error message.

    Examples:
        >>> kili.append_many_to_dataset(
                project_id=project_id,
                content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])

    !!! example "Recipe"
        - For more detailed examples on how to import assets,
            see [the recipe](https://docs.kili-technology.com/recipes/importing-data).
        - For more detailed examples on how to import text assets,
            see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb).
    """

    if content_array is None and json_content_array is None:
        raise ValueError("Variables content_array and json_content_array cannot be both None.")
    nb_data = (
        len(content_array)
        if content_array is not None
        else len(json_content_array)  # type:ignore
    )
    field_mapping = {
        "content": content_array,
        "json_content": json_content_array,
        "external_id": external_id_array,
        "id": id_array,
        "status": status_array,
        "json_metadata": json_metadata_array,
        "is_honeypot": is_honeypot_array,
    }
    assets = [{}] * nb_data
    for key, value in field_mapping.items():
        if value is not None:
            assets = [{**assets[i], key: value[i]} for i in range(nb_data)]
    result = import_assets(
        self.auth, project_id=project_id, assets=assets, disable_tqdm=disable_tqdm
    )
    return result

change_asset_external_ids(self, new_external_ids, asset_ids=None, external_ids=None, project_id=None)

Update the external IDs of one or more assets.

Parameters:

Name Type Description Default
new_external_ids List[str]

The new external IDs of the assets.

required
asset_ids Optional[List[str]]

The asset IDs to modify.

None
external_ids Optional[List[str]]

The external asset IDs to modify (if asset_ids is not already provided).

None
project_id Optional[str]

The project ID. Only required if external_ids argument is provided.

None

Returns:

Type Description
List[Dict]

A result object which indicates if the mutation was successful, or an error message.

Examples:

>>> kili.change_asset_external_ids(
        new_external_ids=["asset1", "asset2"],
        asset_ids=["ckg22d81r0jrg0885unmuswj8", "ckg22d81s0jrh0885pdxfd03n"],
    )
Source code in kili/mutations/asset/__init__.py
@typechecked
def change_asset_external_ids(
    self,
    new_external_ids: List[str],
    asset_ids: Optional[List[str]] = None,
    external_ids: Optional[List[str]] = None,
    project_id: Optional[str] = None,
) -> List[Dict]:
    """Update the external IDs of one or more assets.

    Args:
        new_external_ids: The new external IDs of the assets.
        asset_ids: The asset IDs to modify.
        external_ids: The external asset IDs to modify (if `asset_ids` is not already provided).
        project_id: The project ID. Only required if `external_ids` argument is provided.

    Returns:
        A result object which indicates if the mutation was successful,
            or an error message.

    Examples:
        >>> kili.change_asset_external_ids(
                new_external_ids=["asset1", "asset2"],
                asset_ids=["ckg22d81r0jrg0885unmuswj8", "ckg22d81s0jrh0885pdxfd03n"],
            )
    """
    asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)

    parameters = {
        "asset_ids": asset_ids,
        "new_external_ids": new_external_ids,
        "json_metadatas": None,
        "to_be_labeled_by_array": None,
    }
    properties_to_batch = process_update_properties_in_assets_parameters(parameters)

    def generate_variables(batch: Dict) -> Dict:
        data = {
            "externalId": batch["new_external_ids"],
            "jsonMetadata": batch["json_metadatas"],
            "toBeLabeledBy": batch["to_be_labeled_by_array"],
            "shouldResetToBeLabeledBy": batch["should_reset_to_be_labeled_by_array"],
        }
        data_array = [dict(zip(data, t)) for t in zip(*data.values())]
        return {
            "whereArray": [{"id": asset_id} for asset_id in batch["asset_ids"]],
            "dataArray": data_array,
        }

    results = _mutate_from_paginated_call(
        self,
        properties_to_batch,
        generate_variables,
        GQL_UPDATE_PROPERTIES_IN_ASSETS,
    )
    formated_results = [format_result("data", result, Asset) for result in results]
    return [item for batch_list in formated_results for item in batch_list]

delete_many_from_dataset(self, asset_ids=None, external_ids=None, project_id=None)

Delete assets from a project.

Parameters:

Name Type Description Default
asset_ids Optional[List[str]]

The list of asset internal IDs to delete.

None
external_ids Optional[List[str]]

The list of asset external IDs to delete.

None
project_id Optional[str]

The project ID. Only required if external_ids argument is provided.

None

Returns:

Type Description
Asset

A result object which indicates if the mutation was successful, or an error message.

Source code in kili/mutations/asset/__init__.py
@typechecked
def delete_many_from_dataset(
    self,
    asset_ids: Optional[List[str]] = None,
    external_ids: Optional[List[str]] = None,
    project_id: Optional[str] = None,
) -> Asset:
    """Delete assets from a project.

    Args:
        asset_ids: The list of asset internal IDs to delete.
        external_ids: The list of asset external IDs to delete.
        project_id: The project ID. Only required if `external_ids` argument is provided.

    Returns:
        A result object which indicates if the mutation was successful,
            or an error message.
    """
    asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)

    properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}

    def generate_variables(batch):
        return {"where": {"idIn": batch["asset_ids"]}}

    results = _mutate_from_paginated_call(
        self, properties_to_batch, generate_variables, GQL_DELETE_MANY_FROM_DATASET
    )
    return format_result("data", results[0], Asset)

send_back_to_queue(self, asset_ids=None, external_ids=None, project_id=None)

Send assets back to queue.

Parameters:

Name Type Description Default
asset_ids Optional[List[str]]

List of internal IDs of assets to send back to queue.

None
external_ids Optional[List[str]]

List of external IDs of assets to send back to queue.

None
project_id Optional[str]

The project ID. Only required if external_ids argument is provided.

None

Returns:

Type Description
Dict[str, Any]

A dict object with the project id and the asset_ids of assets moved to queue. An error message if mutation failed.

Examples:

>>> kili.send_back_to_queue(
        asset_ids=[
            "ckg22d81r0jrg0885unmuswj8",
            "ckg22d81s0jrh0885pdxfd03n",
            ],
    )
Source code in kili/mutations/asset/__init__.py
@typechecked
def send_back_to_queue(
    self,
    asset_ids: Optional[List[str]] = None,
    external_ids: Optional[List[str]] = None,
    project_id: Optional[str] = None,
) -> Dict[str, Any]:
    """Send assets back to queue.

    Args:
        asset_ids: List of internal IDs of assets to send back to queue.
        external_ids: List of external IDs of assets to send back to queue.
        project_id: The project ID. Only required if `external_ids` argument is provided.

    Returns:
        A dict object with the project `id` and the `asset_ids` of assets moved to queue.
        An error message if mutation failed.

    Examples:
        >>> kili.send_back_to_queue(
                asset_ids=[
                    "ckg22d81r0jrg0885unmuswj8",
                    "ckg22d81s0jrh0885pdxfd03n",
                    ],
            )
    """
    asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)

    properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}

    def generate_variables(batch):
        return {"where": {"idIn": batch["asset_ids"]}}

    results = _mutate_from_paginated_call(
        self, properties_to_batch, generate_variables, GQL_SEND_BACK_ASSETS_TO_QUEUE
    )
    result = format_result("data", results[0])
    assets_in_queue = QueriesAsset(self.auth).assets(
        project_id=result["id"],
        asset_id_in=asset_ids,
        fields=["id"],
        disable_tqdm=True,
        status_in=["ONGOING"],
    )
    result["asset_ids"] = [asset["id"] for asset in assets_in_queue]
    return result

update_properties_in_assets(self, asset_ids=None, external_ids=None, priorities=None, json_metadatas=None, consensus_marks=None, honeypot_marks=None, to_be_labeled_by_array=None, contents=None, json_contents=None, status_array=None, is_used_for_consensus_array=None, is_honeypot_array=None, project_id=None)

Update the properties of one or more assets.

Parameters:

Name Type Description Default
asset_ids Optional[List[str]]

The internal asset IDs to modify.

None
external_ids Optional[List[str]]

The external asset IDs to modify (if asset_ids is not already provided).

None
priorities Optional[List[int]]

You can change the priority of the assets. By default, all assets have a priority of 0.

None
json_metadatas Optional[List[Union[dict, str]]]

The metadata given to an asset should be stored in a json like dict with keys imageUrl, text, url: json_metadata = {'imageUrl': '','text': '','url': ''}

None
consensus_marks Optional[List[float]]

Should be between 0 and 1.

None
honeypot_marks Optional[List[float]]

Should be between 0 and 1.

None
to_be_labeled_by_array Optional[List[List[str]]]

If given, each element of the list should contain the emails of the labelers authorized to label the asset.

None
contents Optional[List[str]]
  • For a NLP project, the content can be directly in text format.
  • For an Image / Video / Pdf project, the content must be hosted on a web server, and you point Kili to your data by giving the URLs.
None
json_contents Optional[List[str]]
  • For a NLP project, the json_content is a text formatted using RichText.
  • For a Video project, thejson_content is a json containg urls pointing to each frame of the video.
None
status_array Optional[List[str]]

Each element should be in TODO, ONGOING, LABELED, TO_REVIEW, REVIEWED.

None
is_used_for_consensus_array Optional[List[bool]]

Whether to use the asset to compute consensus kpis or not.

None
is_honeypot_array Optional[List[bool]]

Whether to use the asset for honeypot.

None
project_id Optional[str]

The project ID. Only required if external_ids argument is provided.

None

Returns:

Type Description
List[Dict]

A result object which indicates if the mutation was successful, or an error message.

Examples:

>>> kili.update_properties_in_assets(
        asset_ids=["ckg22d81r0jrg0885unmuswj8", "ckg22d81s0jrh0885pdxfd03n"],
        consensus_marks=[1, 0.7],
        contents=[None, 'https://to/second/asset.png'],
        honeypot_marks=[0.8, 0.5],
        is_honeypot_array=[True, True],
        is_used_for_consensus_array=[True, False],
        priorities=[None, 2],
        status_array=['LABELED', 'REVIEWED'],
        to_be_labeled_by_array=[['test+pierre@kili-technology.com'], None],
    )
Source code in kili/mutations/asset/__init__.py
@typechecked
# pylint: disable=unused-argument
def update_properties_in_assets(
    self,
    asset_ids: Optional[List[str]] = None,
    external_ids: Optional[List[str]] = None,
    priorities: Optional[List[int]] = None,
    json_metadatas: Optional[List[Union[dict, str]]] = None,
    consensus_marks: Optional[List[float]] = None,
    honeypot_marks: Optional[List[float]] = None,
    to_be_labeled_by_array: Optional[List[List[str]]] = None,
    contents: Optional[List[str]] = None,
    json_contents: Optional[List[str]] = None,
    status_array: Optional[List[str]] = None,
    is_used_for_consensus_array: Optional[List[bool]] = None,
    is_honeypot_array: Optional[List[bool]] = None,
    project_id: Optional[str] = None,
) -> List[Dict]:
    """Update the properties of one or more assets.

    Args:
        asset_ids: The internal asset IDs to modify.
        external_ids: The external asset IDs to modify (if `asset_ids` is not already provided).
        priorities: You can change the priority of the assets.
            By default, all assets have a priority of 0.
        json_metadatas: The metadata given to an asset should be stored
            in a json like dict with keys `imageUrl`, `text`, `url`:
            `json_metadata = {'imageUrl': '','text': '','url': ''}`
        consensus_marks: Should be between 0 and 1.
        honeypot_marks: Should be between 0 and 1.
        to_be_labeled_by_array: If given, each element of the list should contain the emails of
            the labelers authorized to label the asset.
        contents: - For a NLP project, the content can be directly in text format.
            - For an Image / Video / Pdf project, the content must be hosted on a web server,
            and you point Kili to your data by giving the URLs.
        json_contents: - For a NLP project, the `json_content`
            is a text formatted using RichText.
            - For a Video project, the`json_content` is a json containg urls pointing
                to each frame of the video.
        status_array: Each element should be in `TODO`, `ONGOING`, `LABELED`,
            `TO_REVIEW`, `REVIEWED`.
        is_used_for_consensus_array: Whether to use the asset to compute consensus kpis or not.
        is_honeypot_array: Whether to use the asset for honeypot.
        project_id: The project ID. Only required if `external_ids` argument is provided.

    Returns:
        A result object which indicates if the mutation was successful,
            or an error message.

    Examples:
        >>> kili.update_properties_in_assets(
                asset_ids=["ckg22d81r0jrg0885unmuswj8", "ckg22d81s0jrh0885pdxfd03n"],
                consensus_marks=[1, 0.7],
                contents=[None, 'https://to/second/asset.png'],
                honeypot_marks=[0.8, 0.5],
                is_honeypot_array=[True, True],
                is_used_for_consensus_array=[True, False],
                priorities=[None, 2],
                status_array=['LABELED', 'REVIEWED'],
                to_be_labeled_by_array=[['test+pierre@kili-technology.com'], None],
            )
    """
    if asset_ids is not None and external_ids is not None:
        warnings.warn(
            "The use of `external_ids` argument has changed. It is now used to identify which"
            " properties of which assets to update. Please use"
            " `kili.change_asset_external_ids()` method instead to change asset external IDs.",
            DeprecationWarning,
        )
        raise MissingArgumentError("Please provide either `asset_ids` or `external_ids`.")

    asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)

    saved_args = locals()
    parameters = {
        k: v
        for (k, v) in saved_args.items()
        if k
        in [
            "asset_ids",
            "priorities",
            "json_metadatas",
            "consensus_marks",
            "honeypot_marks",
            "to_be_labeled_by_array",
            "contents",
            "json_contents",
            "status_array",
            "is_used_for_consensus_array",
            "is_honeypot_array",
        ]
    }
    properties_to_batch = process_update_properties_in_assets_parameters(parameters)

    def generate_variables(batch: Dict) -> Dict:
        data = {
            "priority": batch["priorities"],
            "jsonMetadata": batch["json_metadatas"],
            "consensusMark": batch["consensus_marks"],
            "honeypotMark": batch["honeypot_marks"],
            "toBeLabeledBy": batch["to_be_labeled_by_array"],
            "shouldResetToBeLabeledBy": batch["should_reset_to_be_labeled_by_array"],
            "content": batch["contents"],
            "jsonContent": batch["json_contents"],
            "status": batch["status_array"],
            "isUsedForConsensus": batch["is_used_for_consensus_array"],
            "isHoneypot": batch["is_honeypot_array"],
        }
        data_array = [dict(zip(data, t)) for t in zip(*data.values())]
        return {
            "whereArray": [{"id": asset_id} for asset_id in batch["asset_ids"]],
            "dataArray": data_array,
        }

    results = _mutate_from_paginated_call(
        self,
        properties_to_batch,
        generate_variables,
        GQL_UPDATE_PROPERTIES_IN_ASSETS,
    )
    formated_results = [format_result("data", result, Asset) for result in results]
    return [item for batch_list in formated_results for item in batch_list]