Skip to content

Asset module

Queries

Set of Asset queries

Source code in kili/queries/asset/__init__.py
class QueriesAsset:
    """
    Set of Asset queries
    """

    # pylint: disable=too-many-arguments,too-many-locals

    def __init__(self, auth):
        """Initialize the subclass.

        Args:
            auth: KiliAuth object
        """
        self.auth = auth

    # pylint: disable=dangerous-default-value
    @Compatible(["v1", "v2"])
    @typechecked
    def assets(
        self,
        project_id: str,
        asset_id: Optional[str] = None,
        skip: int = 0,
        fields: List[str] = [
            "content",
            "createdAt",
            "externalId",
            "id",
            "isHoneypot",
            "jsonMetadata",
            "labels.author.id",
            "labels.author.email",
            "labels.createdAt",
            "labels.id",
            "labels.jsonResponse",
            "skipped",
            "status",
        ],
        asset_id_in: Optional[List[str]] = None,
        consensus_mark_gt: Optional[float] = None,
        consensus_mark_lt: Optional[float] = None,
        disable_tqdm: bool = False,
        external_id_contains: Optional[List[str]] = None,
        first: Optional[int] = None,
        format: Optional[str] = None,  # pylint: disable=redefined-builtin
        honeypot_mark_gt: Optional[float] = None,
        honeypot_mark_lt: Optional[float] = None,
        label_author_in: Optional[List[str]] = None,
        label_consensus_mark_gt: Optional[float] = None,
        label_consensus_mark_lt: Optional[float] = None,
        label_created_at: Optional[str] = None,
        label_created_at_gt: Optional[str] = None,
        label_created_at_lt: Optional[str] = None,
        label_honeypot_mark_gt: Optional[float] = None,
        label_honeypot_mark_lt: Optional[float] = None,
        label_type_in: Optional[List[str]] = None,
        metadata_where: Optional[dict] = None,
        skipped: Optional[bool] = None,
        status_in: Optional[List[str]] = None,
        updated_at_gte: Optional[str] = None,
        updated_at_lte: Optional[str] = None,
        as_generator: bool = False,
        label_category_search: Optional[str] = None,
    ) -> Union[List[dict], Generator[dict, None, None], pd.DataFrame]:
        # pylint: disable=line-too-long
        """Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.

        Args:
            project_id: Identifier of the project.
            asset_id: Identifier of the asset to retrieve.
            asset_id_in: A list of the IDs of the assets to retrieve.
            skip: Number of assets to skip (they are ordered by their date of creation, first to last).
            fields: All the fields to request among the possible fields for the assets.
                    See [the documentation](https://docs.kili-technology.com/reference/graphql-api#asset) for all possible fields.
            first: Maximum number of assets to return.
            consensus_mark_gt: Minimum amount of consensus for the asset.
            consensus_mark_lt: Maximum amount of consensus for the asset.
            external_id_contains: Returned assets have an external id that belongs to that list, if given.
            metadata_where: Filters by the values of the metadata of the asset.
            honeypot_mark_gt: Minimum amount of honeypot for the asset.
            honeypot_mark_lt : Maximum amount of honeypot for the asset.
            status_in: Returned assets should have a status that belongs to that list, if given.
                Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
            label_type_in: Returned assets should have a label whose type belongs to that list, if given.
            label_author_in: Returned assets should have a label whose status belongs to that list, if given.
            label_consensus_mark_gt: Returned assets should have a label whose consensus is greater than this number.
            label_consensus_mark_lt: Returned assets should have a label whose consensus is lower than this number.
            label_created_at: Returned assets should have a label whose creation date is equal to this date.
            label_created_at_gt: Returned assets should have a label whose creation date is greater than this date.
            label_created_at_lt: Returned assets should have a label whose creation date is lower than this date.
            label_honeypot_mark_gt: Returned assets should have a label whose honeypot is greater than this number
            label_honeypot_mark_lt: Returned assets should have a label whose honeypot is lower than this number
            skipped: Returned assets should be skipped
            updated_at_gte: Returned assets should have a label whose update date is greated or equal to this date.
            updated_at_lte: Returned assets should have a label whose update date is lower or equal to this date.
            format: If equal to 'pandas', returns a pandas DataFrame
            disable_tqdm: If `True`, the progress bar will be disabled
            as_generator: If `True`, a generator on the assets is returned.
            label_category_search: Returned assets should have a label that follows this category search query.

        !!! info "Dates format"
            Date strings should have format: "YYYY-MM-DD"

        Returns:
            A result object which contains the query if it was successful,
                or an error message.

        Example:
            ```
            # returns the assets list of the project
            >>> kili.assets(project_id)
            >>> kili.assets(project_id, asset_id=asset_id)
            # returns a generator of the project assets
            >>> kili.assets(project_id, as_generator=True)
            ```

        !!! example "How to filter based on Metadata"
            - `metadata_where = {key1: "value1"}` to filter on assets whose metadata
                have key "key1" with value "value1"
            - `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
                have key "key1" with value "value1" or value "value2
            - `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
                have key "key2" with a value between 2 and 10.

        !!! example "How to filter based on label categories"
            The search query is composed of logical expressions following this format:

                [job_name].[category_name].count [comparaison_operator] [value]
            where:

            - `[job_name]` is the name of the job in the interface
            - `[category_name]` is the name of the category in the interface for this job
            - `[comparaison_operator]` can be one of: [`==`, `>=`, `<=`, `<`, `>`]
            - `[value]` is an integer that represents the count of such objects of the given category in the label

            These operations can be separated by OR and AND operators

            Example:

                label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
                label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
                label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
        """
        if format == "pandas" and as_generator:
            raise ValueError(
                'Argument values as_generator==True and format=="pandas" are not compatible.'
            )

        saved_args = locals()
        count_args = {
            k: v
            for (k, v) in saved_args.items()
            if k
            not in [
                "skip",
                "first",
                "disable_tqdm",
                "format",
                "fields",
                "self",
                "as_generator",
                "message",
            ]
        }

        # using tqdm with a generator is messy, so it is always disabled
        disable_tqdm = disable_tqdm or as_generator
        if label_category_search:
            validate_category_search_query(label_category_search)

        payload_query = {
            "where": {
                "id": asset_id,
                "project": {
                    "id": project_id,
                },
                "externalIdIn": external_id_contains,
                "statusIn": status_in,
                "consensusMarkGte": consensus_mark_gt,
                "consensusMarkLte": consensus_mark_lt,
                "honeypotMarkGte": honeypot_mark_gt,
                "honeypotMarkLte": honeypot_mark_lt,
                "idIn": asset_id_in,
                "metadata": metadata_where,
                "label": {
                    "typeIn": label_type_in,
                    "authorIn": label_author_in,
                    "consensusMarkGte": label_consensus_mark_gt,
                    "consensusMarkLte": label_consensus_mark_lt,
                    "createdAt": label_created_at,
                    "createdAtGte": label_created_at_gt,
                    "createdAtLte": label_created_at_lt,
                    "honeypotMarkGte": label_honeypot_mark_gt,
                    "honeypotMarkLte": label_honeypot_mark_lt,
                    "search": label_category_search,
                },
                "skipped": skipped,
                "updatedAtGte": updated_at_gte,
                "updatedAtLte": updated_at_lte,
            },
        }

        asset_generator = row_generator_from_paginated_calls(
            skip,
            first,
            self.count_assets,
            count_args,
            self._query_assets,
            payload_query,
            fields,
            disable_tqdm,
        )

        if format == "pandas":
            return pd.DataFrame(list(asset_generator))
        if as_generator:
            return asset_generator
        return list(asset_generator)

    def _query_assets(self, skip: int, first: int, payload: dict, fields: List[str]):

        payload.update({"skip": skip, "first": first})
        _gql_assets = gql_assets(fragment_builder(fields, AssetType))
        result = self.auth.client.execute(_gql_assets, payload)
        assets = format_result("data", result, Asset)
        return assets

    @Compatible(["v1", "v2"])
    @typechecked
    def count_assets(
        self,
        project_id: str,
        asset_id: Optional[str] = None,
        asset_id_in: Optional[List[str]] = None,
        external_id_contains: Optional[List[str]] = None,
        metadata_where: Optional[dict] = None,
        status_in: Optional[List[str]] = None,
        consensus_mark_gt: Optional[float] = None,
        consensus_mark_lt: Optional[float] = None,
        honeypot_mark_gt: Optional[float] = None,
        honeypot_mark_lt: Optional[float] = None,
        label_type_in: Optional[List[str]] = None,
        label_author_in: Optional[List[str]] = None,
        label_consensus_mark_gt: Optional[float] = None,
        label_consensus_mark_lt: Optional[float] = None,
        label_created_at: Optional[str] = None,
        label_created_at_gt: Optional[str] = None,
        label_created_at_lt: Optional[str] = None,
        label_honeypot_mark_gt: Optional[float] = None,
        label_honeypot_mark_lt: Optional[float] = None,
        skipped: Optional[bool] = None,
        updated_at_gte: Optional[str] = None,
        updated_at_lte: Optional[str] = None,
        label_category_search: Optional[str] = None,
    ) -> int:
        """Count and return the number of assets with the given constraints.

        Parameters beginning with 'label_' apply to labels, others apply to assets.

        Args:
            project_id: Identifier of the project
            asset_id: The unique id of the asset to retrieve.
            asset_id_in: A list of the ids of the assets to retrieve.
            external_id_contains: Returned assets should have an external id
                that belongs to that list, if given.
            metadata_where: Filters by the values of the metadata of the asset.
            status_in: Returned assets should have a status that belongs to that list, if given.
                Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
            consensus_mark_gt: Minimum amount of consensus for the asset.
            consensus_mark_lt: Maximum amount of consensus for the asset.
            honeypot_mark_gt: Minimum amount of honeypot for the asset.
            honeypot_mark_lt: Maximum amount of consensus for the asset.
            label_type_in: Returned assets should have a label
                whose type belongs to that list, if given.
            label_author_in: Returned assets should have a label
                whose status belongs to that list, if given.
            label_consensus_mark_gt: Returned assets should have a label
                whose consensus is greater than this number.
            label_consensus_mark_lt: Returned assets should have a label
                whose consensus is lower than this number.
            label_created_at: Returned assets should have a label
                whose creation date is equal to this date.
            label_created_at_gt: Returned assets should have a label
                whose creation date is greater than this date.
            label_created_at_lt: Returned assets should have a label
                whose creation date is lower than this date.
            label_honeypot_mark_gt: Returned assets should have a label
                whose honeypot is greater than this number.
            label_honeypot_mark_lt: Returned assets should have a label
                whose honeypot is lower than this number.
            skipped: Returned assets should be skipped
            updated_at_gte: Returned assets should have a label
                whose update date is greated or equal to this date.
            updated_at_lte: Returned assets should have a label
                whose update date is lower or equal to this date.

        !!! info "Dates format"
            Date strings should have format: "YYYY-MM-DD"

        Returns:
            A result object which contains the query if it was successful,
                or an error message.

        Examples:
            >>> kili.count_assets(project_id=project_id)
            250
            >>> kili.count_assets(asset_id=asset_id)
            1

        !!! example "How to filter based on Metadata"
            - `metadata_where = {key1: "value1"}` to filter on assets whose metadata
                have key "key1" with value "value1"
            - `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
                have key "key1" with value "value1" or value "value2
            - `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
                have key "key2" with a value between 2 and 10.
        """
        if label_category_search:
            validate_category_search_query(label_category_search)

        variables = {
            "where": {
                "id": asset_id,
                "project": {
                    "id": project_id,
                },
                "externalIdIn": external_id_contains,
                "statusIn": status_in,
                "consensusMarkGte": consensus_mark_gt,
                "consensusMarkLte": consensus_mark_lt,
                "honeypotMarkGte": honeypot_mark_gt,
                "honeypotMarkLte": honeypot_mark_lt,
                "idIn": asset_id_in,
                "metadata": metadata_where,
                "label": {
                    "typeIn": label_type_in,
                    "authorIn": label_author_in,
                    "consensusMarkGte": label_consensus_mark_gt,
                    "consensusMarkLte": label_consensus_mark_lt,
                    "createdAt": label_created_at,
                    "createdAtGte": label_created_at_gt,
                    "createdAtLte": label_created_at_lt,
                    "honeypotMarkGte": label_honeypot_mark_gt,
                    "honeypotMarkLte": label_honeypot_mark_lt,
                    "search": label_category_search,
                },
                "skipped": skipped,
                "updatedAtGte": updated_at_gte,
                "updatedAtLte": updated_at_lte,
            }
        }
        result = self.auth.client.execute(GQL_ASSETS_COUNT, variables)
        count = format_result("data", result)
        return count

assets(self, project_id, asset_id=None, skip=0, fields=['content', 'createdAt', 'externalId', 'id', 'isHoneypot', 'jsonMetadata', 'labels.author.id', 'labels.author.email', 'labels.createdAt', 'labels.id', 'labels.jsonResponse', 'skipped', 'status'], asset_id_in=None, consensus_mark_gt=None, consensus_mark_lt=None, disable_tqdm=False, external_id_contains=None, first=None, format=None, honeypot_mark_gt=None, honeypot_mark_lt=None, label_author_in=None, label_consensus_mark_gt=None, label_consensus_mark_lt=None, label_created_at=None, label_created_at_gt=None, label_created_at_lt=None, label_honeypot_mark_gt=None, label_honeypot_mark_lt=None, label_type_in=None, metadata_where=None, skipped=None, status_in=None, updated_at_gte=None, updated_at_lte=None, as_generator=False, label_category_search=None)

Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.

Parameters:

Name Type Description Default
project_id str

Identifier of the project.

required
asset_id Optional[str]

Identifier of the asset to retrieve.

None
asset_id_in Optional[List[str]]

A list of the IDs of the assets to retrieve.

None
skip int

Number of assets to skip (they are ordered by their date of creation, first to last).

0
fields List[str]

All the fields to request among the possible fields for the assets. See the documentation for all possible fields.

['content', 'createdAt', 'externalId', 'id', 'isHoneypot', 'jsonMetadata', 'labels.author.id', 'labels.author.email', 'labels.createdAt', 'labels.id', 'labels.jsonResponse', 'skipped', 'status']
first Optional[int]

Maximum number of assets to return.

None
consensus_mark_gt Optional[float]

Minimum amount of consensus for the asset.

None
consensus_mark_lt Optional[float]

Maximum amount of consensus for the asset.

None
external_id_contains Optional[List[str]]

Returned assets have an external id that belongs to that list, if given.

None
metadata_where Optional[dict]

Filters by the values of the metadata of the asset.

None
honeypot_mark_gt Optional[float]

Minimum amount of honeypot for the asset.

None
honeypot_mark_lt

Maximum amount of honeypot for the asset.

None
status_in Optional[List[str]]

Returned assets should have a status that belongs to that list, if given. Possible choices: TODO, ONGOING, LABELED or REVIEWED

None
label_type_in Optional[List[str]]

Returned assets should have a label whose type belongs to that list, if given.

None
label_author_in Optional[List[str]]

Returned assets should have a label whose status belongs to that list, if given.

None
label_consensus_mark_gt Optional[float]

Returned assets should have a label whose consensus is greater than this number.

None
label_consensus_mark_lt Optional[float]

Returned assets should have a label whose consensus is lower than this number.

None
label_created_at Optional[str]

Returned assets should have a label whose creation date is equal to this date.

None
label_created_at_gt Optional[str]

Returned assets should have a label whose creation date is greater than this date.

None
label_created_at_lt Optional[str]

Returned assets should have a label whose creation date is lower than this date.

None
label_honeypot_mark_gt Optional[float]

Returned assets should have a label whose honeypot is greater than this number

None
label_honeypot_mark_lt Optional[float]

Returned assets should have a label whose honeypot is lower than this number

None
skipped Optional[bool]

Returned assets should be skipped

None
updated_at_gte Optional[str]

Returned assets should have a label whose update date is greated or equal to this date.

None
updated_at_lte Optional[str]

Returned assets should have a label whose update date is lower or equal to this date.

None
format Optional[str]

If equal to 'pandas', returns a pandas DataFrame

None
disable_tqdm bool

If True, the progress bar will be disabled

False
as_generator bool

If True, a generator on the assets is returned.

False
label_category_search Optional[str]

Returned assets should have a label that follows this category search query.

None

Dates format

Date strings should have format: "YYYY-MM-DD"

Returns:

Type Description
Union[List[dict], Generator[dict, NoneType], pandas.core.frame.DataFrame]

A result object which contains the query if it was successful, or an error message.

Examples:

# returns the assets list of the project
>>> kili.assets(project_id)
>>> kili.assets(project_id, asset_id=asset_id)
# returns a generator of the project assets
>>> kili.assets(project_id, as_generator=True)

How to filter based on Metadata

  • metadata_where = {key1: "value1"} to filter on assets whose metadata have key "key1" with value "value1"
  • metadata_where = {key1: ["value1", "value2"]} to filter on assets whose metadata have key "key1" with value "value1" or value "value2
  • metadata_where = {key2: [2, 10]} to filter on assets whose metadata have key "key2" with a value between 2 and 10.

How to filter based on label categories

The search query is composed of logical expressions following this format:

[job_name].[category_name].count [comparaison_operator] [value]

where:

  • [job_name] is the name of the job in the interface
  • [category_name] is the name of the category in the interface for this job
  • [comparaison_operator] can be one of: [==, >=, <=, <, >]
  • [value] is an integer that represents the count of such objects of the given category in the label

These operations can be separated by OR and AND operators

Example:

label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
Source code in kili/queries/asset/__init__.py
@Compatible(["v1", "v2"])
@typechecked
def assets(
    self,
    project_id: str,
    asset_id: Optional[str] = None,
    skip: int = 0,
    fields: List[str] = [
        "content",
        "createdAt",
        "externalId",
        "id",
        "isHoneypot",
        "jsonMetadata",
        "labels.author.id",
        "labels.author.email",
        "labels.createdAt",
        "labels.id",
        "labels.jsonResponse",
        "skipped",
        "status",
    ],
    asset_id_in: Optional[List[str]] = None,
    consensus_mark_gt: Optional[float] = None,
    consensus_mark_lt: Optional[float] = None,
    disable_tqdm: bool = False,
    external_id_contains: Optional[List[str]] = None,
    first: Optional[int] = None,
    format: Optional[str] = None,  # pylint: disable=redefined-builtin
    honeypot_mark_gt: Optional[float] = None,
    honeypot_mark_lt: Optional[float] = None,
    label_author_in: Optional[List[str]] = None,
    label_consensus_mark_gt: Optional[float] = None,
    label_consensus_mark_lt: Optional[float] = None,
    label_created_at: Optional[str] = None,
    label_created_at_gt: Optional[str] = None,
    label_created_at_lt: Optional[str] = None,
    label_honeypot_mark_gt: Optional[float] = None,
    label_honeypot_mark_lt: Optional[float] = None,
    label_type_in: Optional[List[str]] = None,
    metadata_where: Optional[dict] = None,
    skipped: Optional[bool] = None,
    status_in: Optional[List[str]] = None,
    updated_at_gte: Optional[str] = None,
    updated_at_lte: Optional[str] = None,
    as_generator: bool = False,
    label_category_search: Optional[str] = None,
) -> Union[List[dict], Generator[dict, None, None], pd.DataFrame]:
    # pylint: disable=line-too-long
    """Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.

    Args:
        project_id: Identifier of the project.
        asset_id: Identifier of the asset to retrieve.
        asset_id_in: A list of the IDs of the assets to retrieve.
        skip: Number of assets to skip (they are ordered by their date of creation, first to last).
        fields: All the fields to request among the possible fields for the assets.
                See [the documentation](https://docs.kili-technology.com/reference/graphql-api#asset) for all possible fields.
        first: Maximum number of assets to return.
        consensus_mark_gt: Minimum amount of consensus for the asset.
        consensus_mark_lt: Maximum amount of consensus for the asset.
        external_id_contains: Returned assets have an external id that belongs to that list, if given.
        metadata_where: Filters by the values of the metadata of the asset.
        honeypot_mark_gt: Minimum amount of honeypot for the asset.
        honeypot_mark_lt : Maximum amount of honeypot for the asset.
        status_in: Returned assets should have a status that belongs to that list, if given.
            Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
        label_type_in: Returned assets should have a label whose type belongs to that list, if given.
        label_author_in: Returned assets should have a label whose status belongs to that list, if given.
        label_consensus_mark_gt: Returned assets should have a label whose consensus is greater than this number.
        label_consensus_mark_lt: Returned assets should have a label whose consensus is lower than this number.
        label_created_at: Returned assets should have a label whose creation date is equal to this date.
        label_created_at_gt: Returned assets should have a label whose creation date is greater than this date.
        label_created_at_lt: Returned assets should have a label whose creation date is lower than this date.
        label_honeypot_mark_gt: Returned assets should have a label whose honeypot is greater than this number
        label_honeypot_mark_lt: Returned assets should have a label whose honeypot is lower than this number
        skipped: Returned assets should be skipped
        updated_at_gte: Returned assets should have a label whose update date is greated or equal to this date.
        updated_at_lte: Returned assets should have a label whose update date is lower or equal to this date.
        format: If equal to 'pandas', returns a pandas DataFrame
        disable_tqdm: If `True`, the progress bar will be disabled
        as_generator: If `True`, a generator on the assets is returned.
        label_category_search: Returned assets should have a label that follows this category search query.

    !!! info "Dates format"
        Date strings should have format: "YYYY-MM-DD"

    Returns:
        A result object which contains the query if it was successful,
            or an error message.

    Example:
        ```
        # returns the assets list of the project
        >>> kili.assets(project_id)
        >>> kili.assets(project_id, asset_id=asset_id)
        # returns a generator of the project assets
        >>> kili.assets(project_id, as_generator=True)
        ```

    !!! example "How to filter based on Metadata"
        - `metadata_where = {key1: "value1"}` to filter on assets whose metadata
            have key "key1" with value "value1"
        - `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
            have key "key1" with value "value1" or value "value2
        - `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
            have key "key2" with a value between 2 and 10.

    !!! example "How to filter based on label categories"
        The search query is composed of logical expressions following this format:

            [job_name].[category_name].count [comparaison_operator] [value]
        where:

        - `[job_name]` is the name of the job in the interface
        - `[category_name]` is the name of the category in the interface for this job
        - `[comparaison_operator]` can be one of: [`==`, `>=`, `<=`, `<`, `>`]
        - `[value]` is an integer that represents the count of such objects of the given category in the label

        These operations can be separated by OR and AND operators

        Example:

            label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
            label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
            label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
    """
    if format == "pandas" and as_generator:
        raise ValueError(
            'Argument values as_generator==True and format=="pandas" are not compatible.'
        )

    saved_args = locals()
    count_args = {
        k: v
        for (k, v) in saved_args.items()
        if k
        not in [
            "skip",
            "first",
            "disable_tqdm",
            "format",
            "fields",
            "self",
            "as_generator",
            "message",
        ]
    }

    # using tqdm with a generator is messy, so it is always disabled
    disable_tqdm = disable_tqdm or as_generator
    if label_category_search:
        validate_category_search_query(label_category_search)

    payload_query = {
        "where": {
            "id": asset_id,
            "project": {
                "id": project_id,
            },
            "externalIdIn": external_id_contains,
            "statusIn": status_in,
            "consensusMarkGte": consensus_mark_gt,
            "consensusMarkLte": consensus_mark_lt,
            "honeypotMarkGte": honeypot_mark_gt,
            "honeypotMarkLte": honeypot_mark_lt,
            "idIn": asset_id_in,
            "metadata": metadata_where,
            "label": {
                "typeIn": label_type_in,
                "authorIn": label_author_in,
                "consensusMarkGte": label_consensus_mark_gt,
                "consensusMarkLte": label_consensus_mark_lt,
                "createdAt": label_created_at,
                "createdAtGte": label_created_at_gt,
                "createdAtLte": label_created_at_lt,
                "honeypotMarkGte": label_honeypot_mark_gt,
                "honeypotMarkLte": label_honeypot_mark_lt,
                "search": label_category_search,
            },
            "skipped": skipped,
            "updatedAtGte": updated_at_gte,
            "updatedAtLte": updated_at_lte,
        },
    }

    asset_generator = row_generator_from_paginated_calls(
        skip,
        first,
        self.count_assets,
        count_args,
        self._query_assets,
        payload_query,
        fields,
        disable_tqdm,
    )

    if format == "pandas":
        return pd.DataFrame(list(asset_generator))
    if as_generator:
        return asset_generator
    return list(asset_generator)

count_assets(self, project_id, asset_id=None, asset_id_in=None, external_id_contains=None, metadata_where=None, status_in=None, consensus_mark_gt=None, consensus_mark_lt=None, honeypot_mark_gt=None, honeypot_mark_lt=None, label_type_in=None, label_author_in=None, label_consensus_mark_gt=None, label_consensus_mark_lt=None, label_created_at=None, label_created_at_gt=None, label_created_at_lt=None, label_honeypot_mark_gt=None, label_honeypot_mark_lt=None, skipped=None, updated_at_gte=None, updated_at_lte=None, label_category_search=None)

Count and return the number of assets with the given constraints.

Parameters beginning with 'label_' apply to labels, others apply to assets.

Parameters:

Name Type Description Default
project_id str

Identifier of the project

required
asset_id Optional[str]

The unique id of the asset to retrieve.

None
asset_id_in Optional[List[str]]

A list of the ids of the assets to retrieve.

None
external_id_contains Optional[List[str]]

Returned assets should have an external id that belongs to that list, if given.

None
metadata_where Optional[dict]

Filters by the values of the metadata of the asset.

None
status_in Optional[List[str]]

Returned assets should have a status that belongs to that list, if given. Possible choices: TODO, ONGOING, LABELED or REVIEWED

None
consensus_mark_gt Optional[float]

Minimum amount of consensus for the asset.

None
consensus_mark_lt Optional[float]

Maximum amount of consensus for the asset.

None
honeypot_mark_gt Optional[float]

Minimum amount of honeypot for the asset.

None
honeypot_mark_lt Optional[float]

Maximum amount of consensus for the asset.

None
label_type_in Optional[List[str]]

Returned assets should have a label whose type belongs to that list, if given.

None
label_author_in Optional[List[str]]

Returned assets should have a label whose status belongs to that list, if given.

None
label_consensus_mark_gt Optional[float]

Returned assets should have a label whose consensus is greater than this number.

None
label_consensus_mark_lt Optional[float]

Returned assets should have a label whose consensus is lower than this number.

None
label_created_at Optional[str]

Returned assets should have a label whose creation date is equal to this date.

None
label_created_at_gt Optional[str]

Returned assets should have a label whose creation date is greater than this date.

None
label_created_at_lt Optional[str]

Returned assets should have a label whose creation date is lower than this date.

None
label_honeypot_mark_gt Optional[float]

Returned assets should have a label whose honeypot is greater than this number.

None
label_honeypot_mark_lt Optional[float]

Returned assets should have a label whose honeypot is lower than this number.

None
skipped Optional[bool]

Returned assets should be skipped

None
updated_at_gte Optional[str]

Returned assets should have a label whose update date is greated or equal to this date.

None
updated_at_lte Optional[str]

Returned assets should have a label whose update date is lower or equal to this date.

None

Dates format

Date strings should have format: "YYYY-MM-DD"

Returns:

Type Description
int

A result object which contains the query if it was successful, or an error message.

Examples:

>>> kili.count_assets(project_id=project_id)
250
>>> kili.count_assets(asset_id=asset_id)
1

How to filter based on Metadata

  • metadata_where = {key1: "value1"} to filter on assets whose metadata have key "key1" with value "value1"
  • metadata_where = {key1: ["value1", "value2"]} to filter on assets whose metadata have key "key1" with value "value1" or value "value2
  • metadata_where = {key2: [2, 10]} to filter on assets whose metadata have key "key2" with a value between 2 and 10.
Source code in kili/queries/asset/__init__.py
@Compatible(["v1", "v2"])
@typechecked
def count_assets(
    self,
    project_id: str,
    asset_id: Optional[str] = None,
    asset_id_in: Optional[List[str]] = None,
    external_id_contains: Optional[List[str]] = None,
    metadata_where: Optional[dict] = None,
    status_in: Optional[List[str]] = None,
    consensus_mark_gt: Optional[float] = None,
    consensus_mark_lt: Optional[float] = None,
    honeypot_mark_gt: Optional[float] = None,
    honeypot_mark_lt: Optional[float] = None,
    label_type_in: Optional[List[str]] = None,
    label_author_in: Optional[List[str]] = None,
    label_consensus_mark_gt: Optional[float] = None,
    label_consensus_mark_lt: Optional[float] = None,
    label_created_at: Optional[str] = None,
    label_created_at_gt: Optional[str] = None,
    label_created_at_lt: Optional[str] = None,
    label_honeypot_mark_gt: Optional[float] = None,
    label_honeypot_mark_lt: Optional[float] = None,
    skipped: Optional[bool] = None,
    updated_at_gte: Optional[str] = None,
    updated_at_lte: Optional[str] = None,
    label_category_search: Optional[str] = None,
) -> int:
    """Count and return the number of assets with the given constraints.

    Parameters beginning with 'label_' apply to labels, others apply to assets.

    Args:
        project_id: Identifier of the project
        asset_id: The unique id of the asset to retrieve.
        asset_id_in: A list of the ids of the assets to retrieve.
        external_id_contains: Returned assets should have an external id
            that belongs to that list, if given.
        metadata_where: Filters by the values of the metadata of the asset.
        status_in: Returned assets should have a status that belongs to that list, if given.
            Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
        consensus_mark_gt: Minimum amount of consensus for the asset.
        consensus_mark_lt: Maximum amount of consensus for the asset.
        honeypot_mark_gt: Minimum amount of honeypot for the asset.
        honeypot_mark_lt: Maximum amount of consensus for the asset.
        label_type_in: Returned assets should have a label
            whose type belongs to that list, if given.
        label_author_in: Returned assets should have a label
            whose status belongs to that list, if given.
        label_consensus_mark_gt: Returned assets should have a label
            whose consensus is greater than this number.
        label_consensus_mark_lt: Returned assets should have a label
            whose consensus is lower than this number.
        label_created_at: Returned assets should have a label
            whose creation date is equal to this date.
        label_created_at_gt: Returned assets should have a label
            whose creation date is greater than this date.
        label_created_at_lt: Returned assets should have a label
            whose creation date is lower than this date.
        label_honeypot_mark_gt: Returned assets should have a label
            whose honeypot is greater than this number.
        label_honeypot_mark_lt: Returned assets should have a label
            whose honeypot is lower than this number.
        skipped: Returned assets should be skipped
        updated_at_gte: Returned assets should have a label
            whose update date is greated or equal to this date.
        updated_at_lte: Returned assets should have a label
            whose update date is lower or equal to this date.

    !!! info "Dates format"
        Date strings should have format: "YYYY-MM-DD"

    Returns:
        A result object which contains the query if it was successful,
            or an error message.

    Examples:
        >>> kili.count_assets(project_id=project_id)
        250
        >>> kili.count_assets(asset_id=asset_id)
        1

    !!! example "How to filter based on Metadata"
        - `metadata_where = {key1: "value1"}` to filter on assets whose metadata
            have key "key1" with value "value1"
        - `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
            have key "key1" with value "value1" or value "value2
        - `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
            have key "key2" with a value between 2 and 10.
    """
    if label_category_search:
        validate_category_search_query(label_category_search)

    variables = {
        "where": {
            "id": asset_id,
            "project": {
                "id": project_id,
            },
            "externalIdIn": external_id_contains,
            "statusIn": status_in,
            "consensusMarkGte": consensus_mark_gt,
            "consensusMarkLte": consensus_mark_lt,
            "honeypotMarkGte": honeypot_mark_gt,
            "honeypotMarkLte": honeypot_mark_lt,
            "idIn": asset_id_in,
            "metadata": metadata_where,
            "label": {
                "typeIn": label_type_in,
                "authorIn": label_author_in,
                "consensusMarkGte": label_consensus_mark_gt,
                "consensusMarkLte": label_consensus_mark_lt,
                "createdAt": label_created_at,
                "createdAtGte": label_created_at_gt,
                "createdAtLte": label_created_at_lt,
                "honeypotMarkGte": label_honeypot_mark_gt,
                "honeypotMarkLte": label_honeypot_mark_lt,
                "search": label_category_search,
            },
            "skipped": skipped,
            "updatedAtGte": updated_at_gte,
            "updatedAtLte": updated_at_lte,
        }
    }
    result = self.auth.client.execute(GQL_ASSETS_COUNT, variables)
    count = format_result("data", result)
    return count

Mutations

Set of Asset mutations

Source code in kili/mutations/asset/__init__.py
class MutationsAsset:
    """
    Set of Asset mutations
    """

    # pylint: disable=too-many-arguments,too-many-locals

    def __init__(self, auth):
        """Initialize the subclass.

        Args:
            auth: KiliAuth object
        """
        self.auth = auth

    @Compatible(["v1", "v2"])
    @typechecked
    def append_many_to_dataset(
        self,
        project_id: str,
        content_array: Optional[List[str]] = None,
        external_id_array: Optional[List[str]] = None,
        is_honeypot_array: Optional[List[bool]] = None,
        status_array: Optional[List[str]] = None,
        json_content_array: Optional[List[List[Union[dict, str]]]] = None,
        json_metadata_array: Optional[List[dict]] = None,
    ):
        # pylint: disable=line-too-long
        """Append assets to a project.

        Args:
            project_id: Identifier of the project
            content_array: List of elements added to the assets of the project
                Must not be None except if you provide json_content_array.

                - For a `TEXT` project, the content can be either raw text, or URLs to TEXT assets.
                - For an `IMAGE` / `PDF` project, the content can be either URLs or paths to existing
                    images/pdf on your computer.
                - For a `VIDEO`  project, the content must be hosted on a web server,
                    and you point Kili to your data by giving the URLs.
            external_id_array: List of external ids given to identify the assets.
                If None, random identifiers are created.
            is_honeypot_array:  Whether to use the asset for honeypot
            status_array: By default, all imported assets are set to `TODO`. Other options:
                `ONGOING`, `LABELED`, `REVIEWED`.
            json_content_array: Useful for `VIDEO` or `TEXT` projects only.

                - For `FRAME` projects, each element is a sequence of frames, i.e. a
                    list of URLs to images or a list of paths to images.
                - For `TEXT` projects, each element is a json_content dict,
                    formatted according to documentation [on how to import
                rich-text assets](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb)
            json_metadata_array: The metadata given to each asset should be stored in a json like dict with keys.

                - Add metadata visible on the asset with the following keys: `imageUrl`, `text`, `url`.
                    Example for one asset: `json_metadata_array = [{'imageUrl': '','text': '','url': ''}]`.
                - For video, you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30).
                    Example for one asset: `json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}]`.

        Returns:
            A result object which indicates if the mutation was successful, or an error message.

        Examples:
            >>> kili.append_many_to_dataset(
                    project_id=project_id,
                    content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])

        !!! example "Recipe"
            - For more detailed examples on how to import assets,
                see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_assets.ipynb)
                or [other examples](https://docs.kili-technology.com/recipes/importing-data) in our documentation.
            - For more detailed examples on how to import text assets,
                see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb).
        """
        kili = QueriesProject(self.auth)
        projects = kili.projects(project_id, disable_tqdm=True)
        assert len(projects) == 1, NO_ACCESS_RIGHT
        input_type = projects[0]["inputType"]
        (properties_to_batch, upload_type, request,) = process_append_many_to_dataset_parameters(
            input_type,
            content_array,
            external_id_array,
            is_honeypot_array,
            status_array,
            json_content_array,
            json_metadata_array,
        )

        def generate_variables(batch):
            if request == GQL_APPEND_MANY_FRAMES_TO_DATASET:
                payload_data = {
                    "contentArray": batch["content_array"],
                    "externalIDArray": batch["external_id_array"],
                    "jsonMetadataArray": batch["json_metadata_array"],
                    "uploadType": upload_type,
                }
            else:
                payload_data = {
                    "contentArray": batch["content_array"],
                    "externalIDArray": batch["external_id_array"],
                    "isHoneypotArray": batch["is_honeypot_array"],
                    "statusArray": batch["status_array"],
                    "jsonContentArray": batch["json_content_array"],
                    "jsonMetadataArray": batch["json_metadata_array"],
                }
            return {"data": payload_data, "where": {"id": project_id}}

        results = _mutate_from_paginated_call(
            self, properties_to_batch, generate_variables, request
        )
        return format_result("data", results[0], Asset)

    @Compatible(["v2"])
    @typechecked
    # pylint: disable=unused-argument
    def update_properties_in_assets(
        self,
        asset_ids: List[str],
        external_ids: Optional[List[str]] = None,
        priorities: Optional[List[int]] = None,
        json_metadatas: Optional[List[Union[dict, str]]] = None,
        consensus_marks: Optional[List[float]] = None,
        honeypot_marks: Optional[List[float]] = None,
        to_be_labeled_by_array: Optional[List[List[str]]] = None,
        contents: Optional[List[str]] = None,
        json_contents: Optional[List[str]] = None,
        status_array: Optional[List[str]] = None,
        is_used_for_consensus_array: Optional[List[bool]] = None,
        is_honeypot_array: Optional[List[bool]] = None,
    ) -> List[dict]:
        """Update the properties of one or more assets.

        Args:
            asset_ids : The asset IDs to modify
            external_ids: Change the external id of the assets
            priorities : You can change the priority of the assets
                By default, all assets have a priority of 0.
            json_metadatas: The metadata given to an asset should be stored
                in a json like dict with keys `imageUrl`, `text`, `url`:
                `json_metadata = {'imageUrl': '','text': '','url': ''}`
            consensus_marks: Should be between 0 and 1
            honeypot_marks: Should be between 0 and 1
            to_be_labeled_by_array: If given, each element of the list should contain the emails of
                the labelers authorized to label the asset.
            contents: - For a NLP project, the content can be directly in text format
                - For an Image / Video / Pdf project, the content must be hosted on a web server,
                and you point Kili to your data by giving the URLs
            json_contents: - For a NLP project, the `json_content`
                is a text formatted using RichText
                - For a Video project, the`json_content` is a json containg urls pointing
                    to each frame of the video.
            status_array: Each element should be in `TODO`, `ONGOING`, `LABELED`, `REVIEWED`
            is_used_for_consensus_array: Whether to use the asset to compute consensus kpis or not
            is_honeypot_array: Whether to use the asset for honeypot

        Returns:
            A result object which indicates if the mutation was successful,
                or an error message.

        Examples:
            >>> kili.update_properties_in_assets(
                    asset_ids=["ckg22d81r0jrg0885unmuswj8",
                        "ckg22d81s0jrh0885pdxfd03n"],
                    consensus_marks=[1, 0.7],
                    contents=[None, 'https://to/second/asset.png'],
                    external_ids=['external-id-of-your-choice-1',
                        'external-id-of-your-choice-2'],
                    honeypot_marks=[0.8, 0.5],
                    is_honeypot_array=[True, True],
                    is_used_for_consensus_array=[True, False],
                    priorities=[None, 2],
                    status_array=['LABELED', 'REVIEWED'],
                    to_be_labeled_by_array=[
                        ['test+pierre@kili-technology.com'], None],
            )
        """

        saved_args = locals()
        parameters = {
            k: v
            for (k, v) in saved_args.items()
            if k
            in [
                "asset_ids",
                "external_ids",
                "priorities",
                "json_metadatas",
                "consensus_marks",
                "honeypot_marks",
                "to_be_labeled_by_array",
                "contents",
                "json_contents",
                "status_array",
                "is_used_for_consensus_array",
                "is_honeypot_array",
            ]
        }
        properties_to_batch = process_update_properties_in_assets_parameters(parameters)

        def generate_variables(batch):
            data = {
                "externalId": batch["external_ids"],
                "priority": batch["priorities"],
                "jsonMetadata": batch["json_metadatas"],
                "consensusMark": batch["consensus_marks"],
                "honeypotMark": batch["honeypot_marks"],
                "toBeLabeledBy": batch["to_be_labeled_by_array"],
                "shouldResetToBeLabeledBy": batch["should_reset_to_be_labeled_by_array"],
                "content": batch["contents"],
                "jsonContent": batch["json_contents"],
                "status": batch["status_array"],
                "isUsedForConsensus": batch["is_used_for_consensus_array"],
                "isHoneypot": batch["is_honeypot_array"],
            }
            data_array = [dict(zip(data, t)) for t in zip(*data.values())]
            return {
                "whereArray": [{"id": asset_id} for asset_id in batch["asset_ids"]],
                "dataArray": data_array,
            }

        results = _mutate_from_paginated_call(
            self,
            properties_to_batch,
            generate_variables,
            GQL_UPDATE_PROPERTIES_IN_ASSETS,
        )
        formated_results = [format_result("data", result, Asset) for result in results]
        return [item for batch_list in formated_results for item in batch_list]

    @Compatible(["v1", "v2"])
    @typechecked
    def delete_many_from_dataset(self, asset_ids: List[str]):
        """Delete assets from a project.

        Args:
            asset_ids: The list of identifiers of the assets to delete.

        Returns:
            A result object which indicates if the mutation was successful,
                or an error message.
        """
        properties_to_batch = {"asset_ids": asset_ids}

        def generate_variables(batch):
            return {"where": {"idIn": batch["asset_ids"]}}

        results = _mutate_from_paginated_call(
            self, properties_to_batch, generate_variables, GQL_DELETE_MANY_FROM_DATASET
        )
        return format_result("data", results[0], Asset)

    @Compatible(["v1", "v2"])
    @typechecked
    def add_to_review(self, asset_ids: List[str]) -> dict:
        """Add assets to review.

        !!! warning
            Assets without any label will be ignored.

        Args:
            asset_ids: The asset IDs to add to review

        Returns:
            A result object which indicates if the mutation was successful,
                or an error message.

        Examples:
            >>> kili.add_to_review(
                    asset_ids=[
                        "ckg22d81r0jrg0885unmuswj8",
                        "ckg22d81s0jrh0885pdxfd03n"
                        ],
        """
        properties_to_batch = {"asset_ids": asset_ids}

        def generate_variables(batch):
            return {"where": {"idIn": batch["asset_ids"]}}

        results = _mutate_from_paginated_call(
            self,
            properties_to_batch,
            generate_variables,
            GQL_ADD_ALL_LABELED_ASSETS_TO_REVIEW,
        )
        return format_result("data", results[0])

    @Compatible(["v2"])
    @typechecked
    def send_back_to_queue(self, asset_ids: List[str]):
        """Send assets back to queue.

        Args:
            asset_ids: The asset IDs to add to review

        Returns:
            A result object which indicates if the mutation was successful,
                or an error message.

        Examples:
            >>> kili.send_back_to_queue(
                    asset_ids=[
                        "ckg22d81r0jrg0885unmuswj8",
                        "ckg22d81s0jrh0885pdxfd03n"
                        ],
        """
        properties_to_batch = {"asset_ids": asset_ids}

        def generate_variables(batch):
            return {"where": {"idIn": batch["asset_ids"]}}

        results = _mutate_from_paginated_call(
            self, properties_to_batch, generate_variables, GQL_SEND_BACK_ASSETS_TO_QUEUE
        )
        return format_result("data", results[0])

add_to_review(self, asset_ids)

Add assets to review.

Warning

Assets without any label will be ignored.

Parameters:

Name Type Description Default
asset_ids List[str]

The asset IDs to add to review

required

Returns:

Type Description
dict

A result object which indicates if the mutation was successful, or an error message.

Examples:

>>> kili.add_to_review(
        asset_ids=[
            "ckg22d81r0jrg0885unmuswj8",
            "ckg22d81s0jrh0885pdxfd03n"
            ],
Source code in kili/mutations/asset/__init__.py
@Compatible(["v1", "v2"])
@typechecked
def add_to_review(self, asset_ids: List[str]) -> dict:
    """Add assets to review.

    !!! warning
        Assets without any label will be ignored.

    Args:
        asset_ids: The asset IDs to add to review

    Returns:
        A result object which indicates if the mutation was successful,
            or an error message.

    Examples:
        >>> kili.add_to_review(
                asset_ids=[
                    "ckg22d81r0jrg0885unmuswj8",
                    "ckg22d81s0jrh0885pdxfd03n"
                    ],
    """
    properties_to_batch = {"asset_ids": asset_ids}

    def generate_variables(batch):
        return {"where": {"idIn": batch["asset_ids"]}}

    results = _mutate_from_paginated_call(
        self,
        properties_to_batch,
        generate_variables,
        GQL_ADD_ALL_LABELED_ASSETS_TO_REVIEW,
    )
    return format_result("data", results[0])

append_many_to_dataset(self, project_id, content_array=None, external_id_array=None, is_honeypot_array=None, status_array=None, json_content_array=None, json_metadata_array=None)

Append assets to a project.

Parameters:

Name Type Description Default
project_id str

Identifier of the project

required
content_array Optional[List[str]]

List of elements added to the assets of the project Must not be None except if you provide json_content_array.

  • For a TEXT project, the content can be either raw text, or URLs to TEXT assets.
  • For an IMAGE / PDF project, the content can be either URLs or paths to existing images/pdf on your computer.
  • For a VIDEO project, the content must be hosted on a web server, and you point Kili to your data by giving the URLs.
None
external_id_array Optional[List[str]]

List of external ids given to identify the assets. If None, random identifiers are created.

None
is_honeypot_array Optional[List[bool]]

Whether to use the asset for honeypot

None
status_array Optional[List[str]]

By default, all imported assets are set to TODO. Other options: ONGOING, LABELED, REVIEWED.

None
json_content_array Optional[List[List[Union[dict, str]]]]

Useful for VIDEO or TEXT projects only.

  • For FRAME projects, each element is a sequence of frames, i.e. a list of URLs to images or a list of paths to images.
  • For TEXT projects, each element is a json_content dict, formatted according to documentation on how to import rich-text assets
None
json_metadata_array Optional[List[dict]]

The metadata given to each asset should be stored in a json like dict with keys.

  • Add metadata visible on the asset with the following keys: imageUrl, text, url. Example for one asset: json_metadata_array = [{'imageUrl': '','text': '','url': ''}].
  • For video, you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30). Example for one asset: json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}].
None

Returns:

Type Description

A result object which indicates if the mutation was successful, or an error message.

Examples:

>>> kili.append_many_to_dataset(
        project_id=project_id,
        content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])

Recipe

  • For more detailed examples on how to import assets, see the recipe or other examples in our documentation.
  • For more detailed examples on how to import text assets, see the recipe.
Source code in kili/mutations/asset/__init__.py
@Compatible(["v1", "v2"])
@typechecked
def append_many_to_dataset(
    self,
    project_id: str,
    content_array: Optional[List[str]] = None,
    external_id_array: Optional[List[str]] = None,
    is_honeypot_array: Optional[List[bool]] = None,
    status_array: Optional[List[str]] = None,
    json_content_array: Optional[List[List[Union[dict, str]]]] = None,
    json_metadata_array: Optional[List[dict]] = None,
):
    # pylint: disable=line-too-long
    """Append assets to a project.

    Args:
        project_id: Identifier of the project
        content_array: List of elements added to the assets of the project
            Must not be None except if you provide json_content_array.

            - For a `TEXT` project, the content can be either raw text, or URLs to TEXT assets.
            - For an `IMAGE` / `PDF` project, the content can be either URLs or paths to existing
                images/pdf on your computer.
            - For a `VIDEO`  project, the content must be hosted on a web server,
                and you point Kili to your data by giving the URLs.
        external_id_array: List of external ids given to identify the assets.
            If None, random identifiers are created.
        is_honeypot_array:  Whether to use the asset for honeypot
        status_array: By default, all imported assets are set to `TODO`. Other options:
            `ONGOING`, `LABELED`, `REVIEWED`.
        json_content_array: Useful for `VIDEO` or `TEXT` projects only.

            - For `FRAME` projects, each element is a sequence of frames, i.e. a
                list of URLs to images or a list of paths to images.
            - For `TEXT` projects, each element is a json_content dict,
                formatted according to documentation [on how to import
            rich-text assets](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb)
        json_metadata_array: The metadata given to each asset should be stored in a json like dict with keys.

            - Add metadata visible on the asset with the following keys: `imageUrl`, `text`, `url`.
                Example for one asset: `json_metadata_array = [{'imageUrl': '','text': '','url': ''}]`.
            - For video, you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30).
                Example for one asset: `json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}]`.

    Returns:
        A result object which indicates if the mutation was successful, or an error message.

    Examples:
        >>> kili.append_many_to_dataset(
                project_id=project_id,
                content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])

    !!! example "Recipe"
        - For more detailed examples on how to import assets,
            see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_assets.ipynb)
            or [other examples](https://docs.kili-technology.com/recipes/importing-data) in our documentation.
        - For more detailed examples on how to import text assets,
            see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb).
    """
    kili = QueriesProject(self.auth)
    projects = kili.projects(project_id, disable_tqdm=True)
    assert len(projects) == 1, NO_ACCESS_RIGHT
    input_type = projects[0]["inputType"]
    (properties_to_batch, upload_type, request,) = process_append_many_to_dataset_parameters(
        input_type,
        content_array,
        external_id_array,
        is_honeypot_array,
        status_array,
        json_content_array,
        json_metadata_array,
    )

    def generate_variables(batch):
        if request == GQL_APPEND_MANY_FRAMES_TO_DATASET:
            payload_data = {
                "contentArray": batch["content_array"],
                "externalIDArray": batch["external_id_array"],
                "jsonMetadataArray": batch["json_metadata_array"],
                "uploadType": upload_type,
            }
        else:
            payload_data = {
                "contentArray": batch["content_array"],
                "externalIDArray": batch["external_id_array"],
                "isHoneypotArray": batch["is_honeypot_array"],
                "statusArray": batch["status_array"],
                "jsonContentArray": batch["json_content_array"],
                "jsonMetadataArray": batch["json_metadata_array"],
            }
        return {"data": payload_data, "where": {"id": project_id}}

    results = _mutate_from_paginated_call(
        self, properties_to_batch, generate_variables, request
    )
    return format_result("data", results[0], Asset)

delete_many_from_dataset(self, asset_ids)

Delete assets from a project.

Parameters:

Name Type Description Default
asset_ids List[str]

The list of identifiers of the assets to delete.

required

Returns:

Type Description

A result object which indicates if the mutation was successful, or an error message.

Source code in kili/mutations/asset/__init__.py
@Compatible(["v1", "v2"])
@typechecked
def delete_many_from_dataset(self, asset_ids: List[str]):
    """Delete assets from a project.

    Args:
        asset_ids: The list of identifiers of the assets to delete.

    Returns:
        A result object which indicates if the mutation was successful,
            or an error message.
    """
    properties_to_batch = {"asset_ids": asset_ids}

    def generate_variables(batch):
        return {"where": {"idIn": batch["asset_ids"]}}

    results = _mutate_from_paginated_call(
        self, properties_to_batch, generate_variables, GQL_DELETE_MANY_FROM_DATASET
    )
    return format_result("data", results[0], Asset)

send_back_to_queue(self, asset_ids)

Send assets back to queue.

Parameters:

Name Type Description Default
asset_ids List[str]

The asset IDs to add to review

required

Returns:

Type Description

A result object which indicates if the mutation was successful, or an error message.

Examples:

>>> kili.send_back_to_queue(
        asset_ids=[
            "ckg22d81r0jrg0885unmuswj8",
            "ckg22d81s0jrh0885pdxfd03n"
            ],
Source code in kili/mutations/asset/__init__.py
@Compatible(["v2"])
@typechecked
def send_back_to_queue(self, asset_ids: List[str]):
    """Send assets back to queue.

    Args:
        asset_ids: The asset IDs to add to review

    Returns:
        A result object which indicates if the mutation was successful,
            or an error message.

    Examples:
        >>> kili.send_back_to_queue(
                asset_ids=[
                    "ckg22d81r0jrg0885unmuswj8",
                    "ckg22d81s0jrh0885pdxfd03n"
                    ],
    """
    properties_to_batch = {"asset_ids": asset_ids}

    def generate_variables(batch):
        return {"where": {"idIn": batch["asset_ids"]}}

    results = _mutate_from_paginated_call(
        self, properties_to_batch, generate_variables, GQL_SEND_BACK_ASSETS_TO_QUEUE
    )
    return format_result("data", results[0])

update_properties_in_assets(self, asset_ids, external_ids=None, priorities=None, json_metadatas=None, consensus_marks=None, honeypot_marks=None, to_be_labeled_by_array=None, contents=None, json_contents=None, status_array=None, is_used_for_consensus_array=None, is_honeypot_array=None)

Update the properties of one or more assets.

Parameters:

Name Type Description Default
asset_ids

The asset IDs to modify

required
external_ids Optional[List[str]]

Change the external id of the assets

None
priorities

You can change the priority of the assets By default, all assets have a priority of 0.

None
json_metadatas Optional[List[Union[dict, str]]]

The metadata given to an asset should be stored in a json like dict with keys imageUrl, text, url: json_metadata = {'imageUrl': '','text': '','url': ''}

None
consensus_marks Optional[List[float]]

Should be between 0 and 1

None
honeypot_marks Optional[List[float]]

Should be between 0 and 1

None
to_be_labeled_by_array Optional[List[List[str]]]

If given, each element of the list should contain the emails of the labelers authorized to label the asset.

None
contents Optional[List[str]]
  • For a NLP project, the content can be directly in text format
  • For an Image / Video / Pdf project, the content must be hosted on a web server, and you point Kili to your data by giving the URLs
None
json_contents Optional[List[str]]
  • For a NLP project, the json_content is a text formatted using RichText
  • For a Video project, thejson_content is a json containg urls pointing to each frame of the video.
None
status_array Optional[List[str]]

Each element should be in TODO, ONGOING, LABELED, REVIEWED

None
is_used_for_consensus_array Optional[List[bool]]

Whether to use the asset to compute consensus kpis or not

None
is_honeypot_array Optional[List[bool]]

Whether to use the asset for honeypot

None

Returns:

Type Description
List[dict]

A result object which indicates if the mutation was successful, or an error message.

Examples:

>>> kili.update_properties_in_assets(
        asset_ids=["ckg22d81r0jrg0885unmuswj8",
            "ckg22d81s0jrh0885pdxfd03n"],
        consensus_marks=[1, 0.7],
        contents=[None, 'https://to/second/asset.png'],
        external_ids=['external-id-of-your-choice-1',
            'external-id-of-your-choice-2'],
        honeypot_marks=[0.8, 0.5],
        is_honeypot_array=[True, True],
        is_used_for_consensus_array=[True, False],
        priorities=[None, 2],
        status_array=['LABELED', 'REVIEWED'],
        to_be_labeled_by_array=[
            ['test+pierre@kili-technology.com'], None],
)
Source code in kili/mutations/asset/__init__.py
@Compatible(["v2"])
@typechecked
# pylint: disable=unused-argument
def update_properties_in_assets(
    self,
    asset_ids: List[str],
    external_ids: Optional[List[str]] = None,
    priorities: Optional[List[int]] = None,
    json_metadatas: Optional[List[Union[dict, str]]] = None,
    consensus_marks: Optional[List[float]] = None,
    honeypot_marks: Optional[List[float]] = None,
    to_be_labeled_by_array: Optional[List[List[str]]] = None,
    contents: Optional[List[str]] = None,
    json_contents: Optional[List[str]] = None,
    status_array: Optional[List[str]] = None,
    is_used_for_consensus_array: Optional[List[bool]] = None,
    is_honeypot_array: Optional[List[bool]] = None,
) -> List[dict]:
    """Update the properties of one or more assets.

    Args:
        asset_ids : The asset IDs to modify
        external_ids: Change the external id of the assets
        priorities : You can change the priority of the assets
            By default, all assets have a priority of 0.
        json_metadatas: The metadata given to an asset should be stored
            in a json like dict with keys `imageUrl`, `text`, `url`:
            `json_metadata = {'imageUrl': '','text': '','url': ''}`
        consensus_marks: Should be between 0 and 1
        honeypot_marks: Should be between 0 and 1
        to_be_labeled_by_array: If given, each element of the list should contain the emails of
            the labelers authorized to label the asset.
        contents: - For a NLP project, the content can be directly in text format
            - For an Image / Video / Pdf project, the content must be hosted on a web server,
            and you point Kili to your data by giving the URLs
        json_contents: - For a NLP project, the `json_content`
            is a text formatted using RichText
            - For a Video project, the`json_content` is a json containg urls pointing
                to each frame of the video.
        status_array: Each element should be in `TODO`, `ONGOING`, `LABELED`, `REVIEWED`
        is_used_for_consensus_array: Whether to use the asset to compute consensus kpis or not
        is_honeypot_array: Whether to use the asset for honeypot

    Returns:
        A result object which indicates if the mutation was successful,
            or an error message.

    Examples:
        >>> kili.update_properties_in_assets(
                asset_ids=["ckg22d81r0jrg0885unmuswj8",
                    "ckg22d81s0jrh0885pdxfd03n"],
                consensus_marks=[1, 0.7],
                contents=[None, 'https://to/second/asset.png'],
                external_ids=['external-id-of-your-choice-1',
                    'external-id-of-your-choice-2'],
                honeypot_marks=[0.8, 0.5],
                is_honeypot_array=[True, True],
                is_used_for_consensus_array=[True, False],
                priorities=[None, 2],
                status_array=['LABELED', 'REVIEWED'],
                to_be_labeled_by_array=[
                    ['test+pierre@kili-technology.com'], None],
        )
    """

    saved_args = locals()
    parameters = {
        k: v
        for (k, v) in saved_args.items()
        if k
        in [
            "asset_ids",
            "external_ids",
            "priorities",
            "json_metadatas",
            "consensus_marks",
            "honeypot_marks",
            "to_be_labeled_by_array",
            "contents",
            "json_contents",
            "status_array",
            "is_used_for_consensus_array",
            "is_honeypot_array",
        ]
    }
    properties_to_batch = process_update_properties_in_assets_parameters(parameters)

    def generate_variables(batch):
        data = {
            "externalId": batch["external_ids"],
            "priority": batch["priorities"],
            "jsonMetadata": batch["json_metadatas"],
            "consensusMark": batch["consensus_marks"],
            "honeypotMark": batch["honeypot_marks"],
            "toBeLabeledBy": batch["to_be_labeled_by_array"],
            "shouldResetToBeLabeledBy": batch["should_reset_to_be_labeled_by_array"],
            "content": batch["contents"],
            "jsonContent": batch["json_contents"],
            "status": batch["status_array"],
            "isUsedForConsensus": batch["is_used_for_consensus_array"],
            "isHoneypot": batch["is_honeypot_array"],
        }
        data_array = [dict(zip(data, t)) for t in zip(*data.values())]
        return {
            "whereArray": [{"id": asset_id} for asset_id in batch["asset_ids"]],
            "dataArray": data_array,
        }

    results = _mutate_from_paginated_call(
        self,
        properties_to_batch,
        generate_variables,
        GQL_UPDATE_PROPERTIES_IN_ASSETS,
    )
    formated_results = [format_result("data", result, Asset) for result in results]
    return [item for batch_list in formated_results for item in batch_list]