Skip to content

Asset module

Queries

Set of Asset queries

Source code in kili/queries/asset/__init__.py
class QueriesAsset:
    """
    Set of Asset queries
    """
    # pylint: disable=too-many-arguments,too-many-locals

    def __init__(self, auth):
        """Initialize the subclass.

        Args:
            auth: KiliAuth object
        """
        self.auth = auth

    # pylint: disable=dangerous-default-value
    @Compatible(['v1', 'v2'])
    @typechecked
    @deprecate(removed_in="2.116")
    def assets(self,
               asset_id: Optional[str] = None,
               project_id: Optional[str] = None,
               skip: int = 0,
               fields: List[str] = ['content',
                                    'createdAt',
                                    'externalId',
                                    'id',
                                    'isHoneypot',
                                    'jsonMetadata',
                                    'labels.author.id',
                                    'labels.author.email',
                                    'labels.createdAt',
                                    'labels.id',
                                    'labels.jsonResponse',
                                    'skipped',
                                    'status'],
               asset_id_in: Optional[List[str]] = None,
               consensus_mark_gt: Optional[float] = None,
               consensus_mark_lt: Optional[float] = None,
               disable_tqdm: bool = False,
               external_id_contains: Optional[List[str]] = None,
               first: Optional[int] = None,
               format: Optional[str] = None,  # pylint: disable=redefined-builtin
               honeypot_mark_gt: Optional[float] = None,
               honeypot_mark_lt: Optional[float] = None,
               label_author_in: Optional[List[str]] = None,
               label_consensus_mark_gt: Optional[float] = None,
               label_consensus_mark_lt: Optional[float] = None,
               label_created_at: Optional[str] = None,
               label_created_at_gt: Optional[str] = None,
               label_created_at_lt: Optional[str] = None,
               label_honeypot_mark_gt: Optional[float] = None,
               label_honeypot_mark_lt: Optional[float] = None,
               label_type_in: Optional[List[str]] = None,
               metadata_where: Optional[dict] = None,
               skipped: Optional[bool] = None,
               status_in: Optional[List[str]] = None,
               updated_at_gte: Optional[str] = None,
               updated_at_lte: Optional[str] = None,
               as_generator: bool = False,
               label_category_search: Optional[str] = None,
               ) -> Union[List[dict], Generator[dict, None, None], pd.DataFrame]:
        # pylint: disable=line-too-long
        """Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.

        Args:
            asset_id: Identifier of the asset to retrieve.
            asset_id_in: A list of the IDs of the assets to retrieve.
            project_id: Identifier of the project.
            skip: Number of assets to skip (they are ordered by their date of creation, first to last).
            fields: All the fields to request among the possible fields for the assets.
                    See [the documentation](https://docs.kili-technology.com/reference/graphql-api#asset) for all possible fields.
            first: Maximum number of assets to return.
            consensus_mark_gt: Minimum amount of consensus for the asset.
            consensus_mark_lt: Maximum amount of consensus for the asset.
            external_id_contains: Returned assets have an external id that belongs to that list, if given.
            metadata_where: Filters by the values of the metadata of the asset.
            honeypot_mark_gt: Minimum amount of honeypot for the asset.
            honeypot_mark_lt : Maximum amount of honeypot for the asset.
            status_in: Returned assets should have a status that belongs to that list, if given.
                Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
            label_type_in: Returned assets should have a label whose type belongs to that list, if given.
            label_author_in: Returned assets should have a label whose status belongs to that list, if given.
            label_consensus_mark_gt: Returned assets should have a label whose consensus is greater than this number.
            label_consensus_mark_lt: Returned assets should have a label whose consensus is lower than this number.
            label_created_at: Returned assets should have a label whose creation date is equal to this date.
            label_created_at_gt: Returned assets should have a label whose creation date is greater than this date.
            label_created_at_lt: Returned assets should have a label whose creation date is lower than this date.
            label_honeypot_mark_gt: Returned assets should have a label whose honeypot is greater than this number
            label_honeypot_mark_lt: Returned assets should have a label whose honeypot is lower than this number
            skipped: Returned assets should be skipped
            updated_at_gte: Returned assets should have a label whose update date is greated or equal to this date.
            updated_at_lte: Returned assets should have a label whose update date is lower or equal to this date.
            format: If equal to 'pandas', returns a pandas DataFrame
            disable_tqdm: If `True`, the progress bar will be disabled
            as_generator: If `True`, a generator on the assets is returned.
            label_category_search: Returned assets should have a label that follows this category search query.

        !!! info "Dates format"
            Date strings should have format: "YYYY-MM-DD"

        Returns:
            A result object which contains the query if it was successful,
                or an error message.

        Example:
            ```
            # returns the assets list of the project
            >>> kili.assets(project_id)
            >>> kili.assets(project_id, asset_id=asset_id)
            # returns a generator of the project assets
            >>> kili.assets(project_id, as_generator=True)
            ```

        !!! example "How to filter based on Metadata"
            - `metadata_where = {key1: "value1"}` to filter on assets whose metadata
                have key "key1" with value "value1"
            - `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
                have key "key1" with value "value1" or value "value2
            - `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
                have key "key2" with a value between 2 and 10.

        !!! example "How to filter based on label categories"
            The search query is composed of logical expressions following this format:

                [job_name].[category_name].count [comparaison_operator] [value]
            where:

            - `[job_name]` is the name of the job in the interface
            - `[category_name]` is the name of the category in the interface for this job
            - `[comparaison_operator]` can be one of: [`==`, `>=`, `<=`, `<`, `>`]
            - `[value]` is an integer that represents the count of such objects of the given category in the label

            These operations can be separated by OR and AND operators

            Example:

                label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
                label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
                label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
        """
        if project_id is None:
            message = """
                The field `project_id` must be specified since: 2.115
                It will be made mandatory in: 2.116
                If your workflow involves getting these entities over several projects,
                please iterate on your projects with .projects and concatenate the results.
                """
            warnings.warn(message, DeprecationWarning)
        if format == "pandas" and as_generator:
            raise ValueError(
                "Argument values as_generator==True and format==\"pandas\" are not compatible.")

        saved_args = locals()
        count_args = {k: v for (k, v) in saved_args.items()
                      if k not in ['skip', 'first', 'disable_tqdm', 'format', 'fields', 'self', 'as_generator', 'message']}

        # using tqdm with a generator is messy, so it is always disabled
        disable_tqdm = disable_tqdm or as_generator
        if label_category_search:
            validate_category_search_query(label_category_search)

        payload_query = {
            'where': {
                'id': asset_id,
                'project': {
                    'id': project_id,
                },
                'externalIdIn': external_id_contains,
                'statusIn': status_in,
                'consensusMarkGte': consensus_mark_gt,
                'consensusMarkLte': consensus_mark_lt,
                'honeypotMarkGte': honeypot_mark_gt,
                'honeypotMarkLte': honeypot_mark_lt,
                'idIn': asset_id_in,
                'metadata': metadata_where,
                'label': {
                    'typeIn': label_type_in,
                    'authorIn': label_author_in,
                    'consensusMarkGte': label_consensus_mark_gt,
                    'consensusMarkLte': label_consensus_mark_lt,
                    'createdAt': label_created_at,
                    'createdAtGte': label_created_at_gt,
                    'createdAtLte': label_created_at_lt,
                    'honeypotMarkGte': label_honeypot_mark_gt,
                    'honeypotMarkLte': label_honeypot_mark_lt,
                    'search': label_category_search
                },
                'skipped': skipped,
                'updatedAtGte': updated_at_gte,
                'updatedAtLte': updated_at_lte,
            },
        }

        asset_generator = row_generator_from_paginated_calls(
            skip,
            first,
            self.count_assets,
            count_args,
            self._query_assets,
            payload_query,
            fields,
            disable_tqdm
        )

        if format == "pandas":
            return pd.DataFrame(list(asset_generator))
        if as_generator:
            return asset_generator
        return list(asset_generator)

    def _query_assets(self,
                      skip: int,
                      first: int,
                      payload: dict,
                      fields: List[str]):

        payload.update({"skip": skip, "first": first})
        _gql_assets = gql_assets(fragment_builder(fields, AssetType))
        result = self.auth.client.execute(_gql_assets, payload)
        assets = format_result('data', result, Asset)
        return assets

    @Compatible(['v1', 'v2'])
    @typechecked
    @deprecate(removed_in="2.116")
    def count_assets(self, asset_id: Optional[str] = None,
                     project_id: Optional[str] = None,
                     asset_id_in: Optional[List[str]] = None,
                     external_id_contains: Optional[List[str]] = None,
                     metadata_where: Optional[dict] = None,
                     status_in: Optional[List[str]] = None,
                     consensus_mark_gt: Optional[float] = None,
                     consensus_mark_lt: Optional[float] = None,
                     honeypot_mark_gt: Optional[float] = None,
                     honeypot_mark_lt: Optional[float] = None,
                     label_type_in: Optional[List[str]] = None,
                     label_author_in: Optional[List[str]] = None,
                     label_consensus_mark_gt: Optional[float] = None,
                     label_consensus_mark_lt: Optional[float] = None,
                     label_created_at: Optional[str] = None,
                     label_created_at_gt: Optional[str] = None,
                     label_created_at_lt: Optional[str] = None,
                     label_honeypot_mark_gt: Optional[float] = None,
                     label_honeypot_mark_lt: Optional[float] = None,
                     skipped: Optional[bool] = None,
                     updated_at_gte: Optional[str] = None,
                     updated_at_lte: Optional[str] = None,
                     label_category_search: Optional[str] = None) -> int:
        """Count and return the number of assets with the given constraints.

        Parameters beginning with 'label_' apply to labels, others apply to assets.

        Args:
            asset_id: The unique id of the asset to retrieve.
            asset_id_in: A list of the ids of the assets to retrieve.
            project_id: Identifier of the project
            external_id_contains: Returned assets should have an external id
                that belongs to that list, if given.
            metadata_where: Filters by the values of the metadata of the asset.
            status_in: Returned assets should have a status that belongs to that list, if given.
                Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
            consensus_mark_gt: Minimum amount of consensus for the asset.
            consensus_mark_lt: Maximum amount of consensus for the asset.
            honeypot_mark_gt: Minimum amount of honeypot for the asset.
            honeypot_mark_lt: Maximum amount of consensus for the asset.
            label_type_in: Returned assets should have a label
                whose type belongs to that list, if given.
            label_author_in: Returned assets should have a label
                whose status belongs to that list, if given.
            label_consensus_mark_gt: Returned assets should have a label
                whose consensus is greater than this number.
            label_consensus_mark_lt: Returned assets should have a label
                whose consensus is lower than this number.
            label_created_at: Returned assets should have a label
                whose creation date is equal to this date.
            label_created_at_gt: Returned assets should have a label
                whose creation date is greater than this date.
            label_created_at_lt: Returned assets should have a label
                whose creation date is lower than this date.
            label_honeypot_mark_gt: Returned assets should have a label
                whose honeypot is greater than this number.
            label_honeypot_mark_lt: Returned assets should have a label
                whose honeypot is lower than this number.
            skipped: Returned assets should be skipped
            updated_at_gte: Returned assets should have a label
                whose update date is greated or equal to this date.
            updated_at_lte: Returned assets should have a label
                whose update date is lower or equal to this date.

        !!! info "Dates format"
            Date strings should have format: "YYYY-MM-DD"

        Returns:
            A result object which contains the query if it was successful,
                or an error message.

        Examples:
            >>> kili.count_assets(project_id=project_id)
            250
            >>> kili.count_assets(asset_id=asset_id)
            1

        !!! example "How to filter based on Metadata"
            - `metadata_where = {key1: "value1"}` to filter on assets whose metadata
                have key "key1" with value "value1"
            - `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
                have key "key1" with value "value1" or value "value2
            - `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
                have key "key2" with a value between 2 and 10.
        """
        if project_id is None:
            message = """
                The field `project_id` must be specified since: 2.115
                It will be made mandatory in: 2.116
                If your workflow involves getting these entities over several projects,
                please iterate on your projects with .projects and concatenate the results.
                """
            warnings.warn(message, DeprecationWarning)

        if label_category_search:
            validate_category_search_query(label_category_search)

        variables = {
            'where': {
                'id': asset_id,
                'project': {
                    'id': project_id,
                },
                'externalIdIn': external_id_contains,
                'statusIn': status_in,
                'consensusMarkGte': consensus_mark_gt,
                'consensusMarkLte': consensus_mark_lt,
                'honeypotMarkGte': honeypot_mark_gt,
                'honeypotMarkLte': honeypot_mark_lt,
                'idIn': asset_id_in,
                'metadata': metadata_where,
                'label': {
                    'typeIn': label_type_in,
                    'authorIn': label_author_in,
                    'consensusMarkGte': label_consensus_mark_gt,
                    'consensusMarkLte': label_consensus_mark_lt,
                    'createdAt': label_created_at,
                    'createdAtGte': label_created_at_gt,
                    'createdAtLte': label_created_at_lt,
                    'honeypotMarkGte': label_honeypot_mark_gt,
                    'honeypotMarkLte': label_honeypot_mark_lt,
                    'search': label_category_search
                },
                'skipped': skipped,
                'updatedAtGte': updated_at_gte,
                'updatedAtLte': updated_at_lte,
            }
        }
        result = self.auth.client.execute(GQL_ASSETS_COUNT, variables)
        count = format_result('data', result)
        return count

assets(self, asset_id=None, project_id=None, skip=0, fields=['content', 'createdAt', 'externalId', 'id', 'isHoneypot', 'jsonMetadata', 'labels.author.id', 'labels.author.email', 'labels.createdAt', 'labels.id', 'labels.jsonResponse', 'skipped', 'status'], asset_id_in=None, consensus_mark_gt=None, consensus_mark_lt=None, disable_tqdm=False, external_id_contains=None, first=None, format=None, honeypot_mark_gt=None, honeypot_mark_lt=None, label_author_in=None, label_consensus_mark_gt=None, label_consensus_mark_lt=None, label_created_at=None, label_created_at_gt=None, label_created_at_lt=None, label_honeypot_mark_gt=None, label_honeypot_mark_lt=None, label_type_in=None, metadata_where=None, skipped=None, status_in=None, updated_at_gte=None, updated_at_lte=None, as_generator=False, label_category_search=None)

Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.

Parameters:

Name Type Description Default
asset_id Optional[str]

Identifier of the asset to retrieve.

None
asset_id_in Optional[List[str]]

A list of the IDs of the assets to retrieve.

None
project_id Optional[str]

Identifier of the project.

None
skip int

Number of assets to skip (they are ordered by their date of creation, first to last).

0
fields List[str]

All the fields to request among the possible fields for the assets. See the documentation for all possible fields.

['content', 'createdAt', 'externalId', 'id', 'isHoneypot', 'jsonMetadata', 'labels.author.id', 'labels.author.email', 'labels.createdAt', 'labels.id', 'labels.jsonResponse', 'skipped', 'status']
first Optional[int]

Maximum number of assets to return.

None
consensus_mark_gt Optional[float]

Minimum amount of consensus for the asset.

None
consensus_mark_lt Optional[float]

Maximum amount of consensus for the asset.

None
external_id_contains Optional[List[str]]

Returned assets have an external id that belongs to that list, if given.

None
metadata_where Optional[dict]

Filters by the values of the metadata of the asset.

None
honeypot_mark_gt Optional[float]

Minimum amount of honeypot for the asset.

None
honeypot_mark_lt

Maximum amount of honeypot for the asset.

None
status_in Optional[List[str]]

Returned assets should have a status that belongs to that list, if given. Possible choices: TODO, ONGOING, LABELED or REVIEWED

None
label_type_in Optional[List[str]]

Returned assets should have a label whose type belongs to that list, if given.

None
label_author_in Optional[List[str]]

Returned assets should have a label whose status belongs to that list, if given.

None
label_consensus_mark_gt Optional[float]

Returned assets should have a label whose consensus is greater than this number.

None
label_consensus_mark_lt Optional[float]

Returned assets should have a label whose consensus is lower than this number.

None
label_created_at Optional[str]

Returned assets should have a label whose creation date is equal to this date.

None
label_created_at_gt Optional[str]

Returned assets should have a label whose creation date is greater than this date.

None
label_created_at_lt Optional[str]

Returned assets should have a label whose creation date is lower than this date.

None
label_honeypot_mark_gt Optional[float]

Returned assets should have a label whose honeypot is greater than this number

None
label_honeypot_mark_lt Optional[float]

Returned assets should have a label whose honeypot is lower than this number

None
skipped Optional[bool]

Returned assets should be skipped

None
updated_at_gte Optional[str]

Returned assets should have a label whose update date is greated or equal to this date.

None
updated_at_lte Optional[str]

Returned assets should have a label whose update date is lower or equal to this date.

None
format Optional[str]

If equal to 'pandas', returns a pandas DataFrame

None
disable_tqdm bool

If True, the progress bar will be disabled

False
as_generator bool

If True, a generator on the assets is returned.

False
label_category_search Optional[str]

Returned assets should have a label that follows this category search query.

None

Dates format

Date strings should have format: "YYYY-MM-DD"

Returns:

Type Description
Union[List[dict], Generator[dict, NoneType], pandas.core.frame.DataFrame]

A result object which contains the query if it was successful, or an error message.

Examples:

# returns the assets list of the project
>>> kili.assets(project_id)
>>> kili.assets(project_id, asset_id=asset_id)
# returns a generator of the project assets
>>> kili.assets(project_id, as_generator=True)

How to filter based on Metadata

  • metadata_where = {key1: "value1"} to filter on assets whose metadata have key "key1" with value "value1"
  • metadata_where = {key1: ["value1", "value2"]} to filter on assets whose metadata have key "key1" with value "value1" or value "value2
  • metadata_where = {key2: [2, 10]} to filter on assets whose metadata have key "key2" with a value between 2 and 10.

How to filter based on label categories

The search query is composed of logical expressions following this format:

[job_name].[category_name].count [comparaison_operator] [value]

where:

  • [job_name] is the name of the job in the interface
  • [category_name] is the name of the category in the interface for this job
  • [comparaison_operator] can be one of: [==, >=, <=, <, >]
  • [value] is an integer that represents the count of such objects of the given category in the label

These operations can be separated by OR and AND operators

Example:

label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
Source code in kili/queries/asset/__init__.py
@Compatible(['v1', 'v2'])
@typechecked
@deprecate(removed_in="2.116")
def assets(self,
           asset_id: Optional[str] = None,
           project_id: Optional[str] = None,
           skip: int = 0,
           fields: List[str] = ['content',
                                'createdAt',
                                'externalId',
                                'id',
                                'isHoneypot',
                                'jsonMetadata',
                                'labels.author.id',
                                'labels.author.email',
                                'labels.createdAt',
                                'labels.id',
                                'labels.jsonResponse',
                                'skipped',
                                'status'],
           asset_id_in: Optional[List[str]] = None,
           consensus_mark_gt: Optional[float] = None,
           consensus_mark_lt: Optional[float] = None,
           disable_tqdm: bool = False,
           external_id_contains: Optional[List[str]] = None,
           first: Optional[int] = None,
           format: Optional[str] = None,  # pylint: disable=redefined-builtin
           honeypot_mark_gt: Optional[float] = None,
           honeypot_mark_lt: Optional[float] = None,
           label_author_in: Optional[List[str]] = None,
           label_consensus_mark_gt: Optional[float] = None,
           label_consensus_mark_lt: Optional[float] = None,
           label_created_at: Optional[str] = None,
           label_created_at_gt: Optional[str] = None,
           label_created_at_lt: Optional[str] = None,
           label_honeypot_mark_gt: Optional[float] = None,
           label_honeypot_mark_lt: Optional[float] = None,
           label_type_in: Optional[List[str]] = None,
           metadata_where: Optional[dict] = None,
           skipped: Optional[bool] = None,
           status_in: Optional[List[str]] = None,
           updated_at_gte: Optional[str] = None,
           updated_at_lte: Optional[str] = None,
           as_generator: bool = False,
           label_category_search: Optional[str] = None,
           ) -> Union[List[dict], Generator[dict, None, None], pd.DataFrame]:
    # pylint: disable=line-too-long
    """Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.

    Args:
        asset_id: Identifier of the asset to retrieve.
        asset_id_in: A list of the IDs of the assets to retrieve.
        project_id: Identifier of the project.
        skip: Number of assets to skip (they are ordered by their date of creation, first to last).
        fields: All the fields to request among the possible fields for the assets.
                See [the documentation](https://docs.kili-technology.com/reference/graphql-api#asset) for all possible fields.
        first: Maximum number of assets to return.
        consensus_mark_gt: Minimum amount of consensus for the asset.
        consensus_mark_lt: Maximum amount of consensus for the asset.
        external_id_contains: Returned assets have an external id that belongs to that list, if given.
        metadata_where: Filters by the values of the metadata of the asset.
        honeypot_mark_gt: Minimum amount of honeypot for the asset.
        honeypot_mark_lt : Maximum amount of honeypot for the asset.
        status_in: Returned assets should have a status that belongs to that list, if given.
            Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
        label_type_in: Returned assets should have a label whose type belongs to that list, if given.
        label_author_in: Returned assets should have a label whose status belongs to that list, if given.
        label_consensus_mark_gt: Returned assets should have a label whose consensus is greater than this number.
        label_consensus_mark_lt: Returned assets should have a label whose consensus is lower than this number.
        label_created_at: Returned assets should have a label whose creation date is equal to this date.
        label_created_at_gt: Returned assets should have a label whose creation date is greater than this date.
        label_created_at_lt: Returned assets should have a label whose creation date is lower than this date.
        label_honeypot_mark_gt: Returned assets should have a label whose honeypot is greater than this number
        label_honeypot_mark_lt: Returned assets should have a label whose honeypot is lower than this number
        skipped: Returned assets should be skipped
        updated_at_gte: Returned assets should have a label whose update date is greated or equal to this date.
        updated_at_lte: Returned assets should have a label whose update date is lower or equal to this date.
        format: If equal to 'pandas', returns a pandas DataFrame
        disable_tqdm: If `True`, the progress bar will be disabled
        as_generator: If `True`, a generator on the assets is returned.
        label_category_search: Returned assets should have a label that follows this category search query.

    !!! info "Dates format"
        Date strings should have format: "YYYY-MM-DD"

    Returns:
        A result object which contains the query if it was successful,
            or an error message.

    Example:
        ```
        # returns the assets list of the project
        >>> kili.assets(project_id)
        >>> kili.assets(project_id, asset_id=asset_id)
        # returns a generator of the project assets
        >>> kili.assets(project_id, as_generator=True)
        ```

    !!! example "How to filter based on Metadata"
        - `metadata_where = {key1: "value1"}` to filter on assets whose metadata
            have key "key1" with value "value1"
        - `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
            have key "key1" with value "value1" or value "value2
        - `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
            have key "key2" with a value between 2 and 10.

    !!! example "How to filter based on label categories"
        The search query is composed of logical expressions following this format:

            [job_name].[category_name].count [comparaison_operator] [value]
        where:

        - `[job_name]` is the name of the job in the interface
        - `[category_name]` is the name of the category in the interface for this job
        - `[comparaison_operator]` can be one of: [`==`, `>=`, `<=`, `<`, `>`]
        - `[value]` is an integer that represents the count of such objects of the given category in the label

        These operations can be separated by OR and AND operators

        Example:

            label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
            label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
            label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
    """
    if project_id is None:
        message = """
            The field `project_id` must be specified since: 2.115
            It will be made mandatory in: 2.116
            If your workflow involves getting these entities over several projects,
            please iterate on your projects with .projects and concatenate the results.
            """
        warnings.warn(message, DeprecationWarning)
    if format == "pandas" and as_generator:
        raise ValueError(
            "Argument values as_generator==True and format==\"pandas\" are not compatible.")

    saved_args = locals()
    count_args = {k: v for (k, v) in saved_args.items()
                  if k not in ['skip', 'first', 'disable_tqdm', 'format', 'fields', 'self', 'as_generator', 'message']}

    # using tqdm with a generator is messy, so it is always disabled
    disable_tqdm = disable_tqdm or as_generator
    if label_category_search:
        validate_category_search_query(label_category_search)

    payload_query = {
        'where': {
            'id': asset_id,
            'project': {
                'id': project_id,
            },
            'externalIdIn': external_id_contains,
            'statusIn': status_in,
            'consensusMarkGte': consensus_mark_gt,
            'consensusMarkLte': consensus_mark_lt,
            'honeypotMarkGte': honeypot_mark_gt,
            'honeypotMarkLte': honeypot_mark_lt,
            'idIn': asset_id_in,
            'metadata': metadata_where,
            'label': {
                'typeIn': label_type_in,
                'authorIn': label_author_in,
                'consensusMarkGte': label_consensus_mark_gt,
                'consensusMarkLte': label_consensus_mark_lt,
                'createdAt': label_created_at,
                'createdAtGte': label_created_at_gt,
                'createdAtLte': label_created_at_lt,
                'honeypotMarkGte': label_honeypot_mark_gt,
                'honeypotMarkLte': label_honeypot_mark_lt,
                'search': label_category_search
            },
            'skipped': skipped,
            'updatedAtGte': updated_at_gte,
            'updatedAtLte': updated_at_lte,
        },
    }

    asset_generator = row_generator_from_paginated_calls(
        skip,
        first,
        self.count_assets,
        count_args,
        self._query_assets,
        payload_query,
        fields,
        disable_tqdm
    )

    if format == "pandas":
        return pd.DataFrame(list(asset_generator))
    if as_generator:
        return asset_generator
    return list(asset_generator)

count_assets(self, asset_id=None, project_id=None, asset_id_in=None, external_id_contains=None, metadata_where=None, status_in=None, consensus_mark_gt=None, consensus_mark_lt=None, honeypot_mark_gt=None, honeypot_mark_lt=None, label_type_in=None, label_author_in=None, label_consensus_mark_gt=None, label_consensus_mark_lt=None, label_created_at=None, label_created_at_gt=None, label_created_at_lt=None, label_honeypot_mark_gt=None, label_honeypot_mark_lt=None, skipped=None, updated_at_gte=None, updated_at_lte=None, label_category_search=None)

Count and return the number of assets with the given constraints.

Parameters beginning with 'label_' apply to labels, others apply to assets.

Parameters:

Name Type Description Default
asset_id Optional[str]

The unique id of the asset to retrieve.

None
asset_id_in Optional[List[str]]

A list of the ids of the assets to retrieve.

None
project_id Optional[str]

Identifier of the project

None
external_id_contains Optional[List[str]]

Returned assets should have an external id that belongs to that list, if given.

None
metadata_where Optional[dict]

Filters by the values of the metadata of the asset.

None
status_in Optional[List[str]]

Returned assets should have a status that belongs to that list, if given. Possible choices: TODO, ONGOING, LABELED or REVIEWED

None
consensus_mark_gt Optional[float]

Minimum amount of consensus for the asset.

None
consensus_mark_lt Optional[float]

Maximum amount of consensus for the asset.

None
honeypot_mark_gt Optional[float]

Minimum amount of honeypot for the asset.

None
honeypot_mark_lt Optional[float]

Maximum amount of consensus for the asset.

None
label_type_in Optional[List[str]]

Returned assets should have a label whose type belongs to that list, if given.

None
label_author_in Optional[List[str]]

Returned assets should have a label whose status belongs to that list, if given.

None
label_consensus_mark_gt Optional[float]

Returned assets should have a label whose consensus is greater than this number.

None
label_consensus_mark_lt Optional[float]

Returned assets should have a label whose consensus is lower than this number.

None
label_created_at Optional[str]

Returned assets should have a label whose creation date is equal to this date.

None
label_created_at_gt Optional[str]

Returned assets should have a label whose creation date is greater than this date.

None
label_created_at_lt Optional[str]

Returned assets should have a label whose creation date is lower than this date.

None
label_honeypot_mark_gt Optional[float]

Returned assets should have a label whose honeypot is greater than this number.

None
label_honeypot_mark_lt Optional[float]

Returned assets should have a label whose honeypot is lower than this number.

None
skipped Optional[bool]

Returned assets should be skipped

None
updated_at_gte Optional[str]

Returned assets should have a label whose update date is greated or equal to this date.

None
updated_at_lte Optional[str]

Returned assets should have a label whose update date is lower or equal to this date.

None

Dates format

Date strings should have format: "YYYY-MM-DD"

Returns:

Type Description
int

A result object which contains the query if it was successful, or an error message.

Examples:

>>> kili.count_assets(project_id=project_id)
250
>>> kili.count_assets(asset_id=asset_id)
1

How to filter based on Metadata

  • metadata_where = {key1: "value1"} to filter on assets whose metadata have key "key1" with value "value1"
  • metadata_where = {key1: ["value1", "value2"]} to filter on assets whose metadata have key "key1" with value "value1" or value "value2
  • metadata_where = {key2: [2, 10]} to filter on assets whose metadata have key "key2" with a value between 2 and 10.
Source code in kili/queries/asset/__init__.py
@Compatible(['v1', 'v2'])
@typechecked
@deprecate(removed_in="2.116")
def count_assets(self, asset_id: Optional[str] = None,
                 project_id: Optional[str] = None,
                 asset_id_in: Optional[List[str]] = None,
                 external_id_contains: Optional[List[str]] = None,
                 metadata_where: Optional[dict] = None,
                 status_in: Optional[List[str]] = None,
                 consensus_mark_gt: Optional[float] = None,
                 consensus_mark_lt: Optional[float] = None,
                 honeypot_mark_gt: Optional[float] = None,
                 honeypot_mark_lt: Optional[float] = None,
                 label_type_in: Optional[List[str]] = None,
                 label_author_in: Optional[List[str]] = None,
                 label_consensus_mark_gt: Optional[float] = None,
                 label_consensus_mark_lt: Optional[float] = None,
                 label_created_at: Optional[str] = None,
                 label_created_at_gt: Optional[str] = None,
                 label_created_at_lt: Optional[str] = None,
                 label_honeypot_mark_gt: Optional[float] = None,
                 label_honeypot_mark_lt: Optional[float] = None,
                 skipped: Optional[bool] = None,
                 updated_at_gte: Optional[str] = None,
                 updated_at_lte: Optional[str] = None,
                 label_category_search: Optional[str] = None) -> int:
    """Count and return the number of assets with the given constraints.

    Parameters beginning with 'label_' apply to labels, others apply to assets.

    Args:
        asset_id: The unique id of the asset to retrieve.
        asset_id_in: A list of the ids of the assets to retrieve.
        project_id: Identifier of the project
        external_id_contains: Returned assets should have an external id
            that belongs to that list, if given.
        metadata_where: Filters by the values of the metadata of the asset.
        status_in: Returned assets should have a status that belongs to that list, if given.
            Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
        consensus_mark_gt: Minimum amount of consensus for the asset.
        consensus_mark_lt: Maximum amount of consensus for the asset.
        honeypot_mark_gt: Minimum amount of honeypot for the asset.
        honeypot_mark_lt: Maximum amount of consensus for the asset.
        label_type_in: Returned assets should have a label
            whose type belongs to that list, if given.
        label_author_in: Returned assets should have a label
            whose status belongs to that list, if given.
        label_consensus_mark_gt: Returned assets should have a label
            whose consensus is greater than this number.
        label_consensus_mark_lt: Returned assets should have a label
            whose consensus is lower than this number.
        label_created_at: Returned assets should have a label
            whose creation date is equal to this date.
        label_created_at_gt: Returned assets should have a label
            whose creation date is greater than this date.
        label_created_at_lt: Returned assets should have a label
            whose creation date is lower than this date.
        label_honeypot_mark_gt: Returned assets should have a label
            whose honeypot is greater than this number.
        label_honeypot_mark_lt: Returned assets should have a label
            whose honeypot is lower than this number.
        skipped: Returned assets should be skipped
        updated_at_gte: Returned assets should have a label
            whose update date is greated or equal to this date.
        updated_at_lte: Returned assets should have a label
            whose update date is lower or equal to this date.

    !!! info "Dates format"
        Date strings should have format: "YYYY-MM-DD"

    Returns:
        A result object which contains the query if it was successful,
            or an error message.

    Examples:
        >>> kili.count_assets(project_id=project_id)
        250
        >>> kili.count_assets(asset_id=asset_id)
        1

    !!! example "How to filter based on Metadata"
        - `metadata_where = {key1: "value1"}` to filter on assets whose metadata
            have key "key1" with value "value1"
        - `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
            have key "key1" with value "value1" or value "value2
        - `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
            have key "key2" with a value between 2 and 10.
    """
    if project_id is None:
        message = """
            The field `project_id` must be specified since: 2.115
            It will be made mandatory in: 2.116
            If your workflow involves getting these entities over several projects,
            please iterate on your projects with .projects and concatenate the results.
            """
        warnings.warn(message, DeprecationWarning)

    if label_category_search:
        validate_category_search_query(label_category_search)

    variables = {
        'where': {
            'id': asset_id,
            'project': {
                'id': project_id,
            },
            'externalIdIn': external_id_contains,
            'statusIn': status_in,
            'consensusMarkGte': consensus_mark_gt,
            'consensusMarkLte': consensus_mark_lt,
            'honeypotMarkGte': honeypot_mark_gt,
            'honeypotMarkLte': honeypot_mark_lt,
            'idIn': asset_id_in,
            'metadata': metadata_where,
            'label': {
                'typeIn': label_type_in,
                'authorIn': label_author_in,
                'consensusMarkGte': label_consensus_mark_gt,
                'consensusMarkLte': label_consensus_mark_lt,
                'createdAt': label_created_at,
                'createdAtGte': label_created_at_gt,
                'createdAtLte': label_created_at_lt,
                'honeypotMarkGte': label_honeypot_mark_gt,
                'honeypotMarkLte': label_honeypot_mark_lt,
                'search': label_category_search
            },
            'skipped': skipped,
            'updatedAtGte': updated_at_gte,
            'updatedAtLte': updated_at_lte,
        }
    }
    result = self.auth.client.execute(GQL_ASSETS_COUNT, variables)
    count = format_result('data', result)
    return count

Mutations

Set of Asset mutations

Source code in kili/mutations/asset/__init__.py
class MutationsAsset:
    """
    Set of Asset mutations
    """
    # pylint: disable=too-many-arguments,too-many-locals

    def __init__(self, auth):
        """Initialize the subclass.

        Args:
            auth: KiliAuth object
        """
        self.auth = auth

    @Compatible(['v1', 'v2'])
    @typechecked
    def append_many_to_dataset(
            self,
            project_id: str,
            content_array: Optional[List[str]] = None,
            external_id_array: Optional[List[str]] = None,
            is_honeypot_array: Optional[List[bool]] = None,
            status_array: Optional[List[str]] = None,
            json_content_array: Optional[List[List[Union[dict, str]]]] = None,
            json_metadata_array: Optional[List[dict]] = None):
        # pylint: disable=line-too-long
        """Append assets to a project.

        Args:
            project_id: Identifier of the project
            content_array: List of elements added to the assets of the project
                Must not be None except if you provide json_content_array.

                - For a `TEXT` project, the content can be either raw text, or URLs to TEXT assets.
                - For an `IMAGE` / `PDF` project, the content can be either URLs or paths to existing
                    images/pdf on your computer.
                - For a `VIDEO`  project, the content must be hosted on a web server,
                    and you point Kili to your data by giving the URLs.
            external_id_array: List of external ids given to identify the assets.
                If None, random identifiers are created.
            is_honeypot_array:  Whether to use the asset for honeypot
            status_array: By default, all imported assets are set to `TODO`. Other options:
                `ONGOING`, `LABELED`, `REVIEWED`.
            json_content_array: Useful for `VIDEO` or `TEXT` projects only.

                - For `FRAME` projects, each element is a sequence of frames, i.e. a
                    list of URLs to images or a list of paths to images.
                - For `TEXT` projects, each element is a json_content dict,
                    formatted according to documentation [on how to import
                rich-text assets](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb)
            json_metadata_array: The metadata given to each asset should be stored in a json like dict with keys.

                - Add metadata visible on the asset with the following keys: `imageUrl`, `text`, `url`.
                    Example for one asset: `json_metadata_array = [{'imageUrl': '','text': '','url': ''}]`.
                - For video, you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30).
                    Example for one asset: `json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}]`.

        Returns:
            A result object which indicates if the mutation was successful, or an error message.

        Examples:
            >>> kili.append_many_to_dataset(
                    project_id=project_id,
                    content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])

        !!! example "Recipe"
            - For more detailed examples on how to import assets,
                see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_assets.ipynb)
                or [other examples](https://docs.kili-technology.com/recipes/importing-data) in our documentation.
            - For more detailed examples on how to import text assets,
                see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb).
        """
        kili = QueriesProject(self.auth)
        projects = kili.projects(project_id, disable_tqdm=True)
        assert len(projects) == 1, NO_ACCESS_RIGHT
        input_type = projects[0]['inputType']
        properties_to_batch, upload_type, request = process_append_many_to_dataset_parameters(input_type,
                                                                                              content_array,
                                                                                              external_id_array,
                                                                                              is_honeypot_array,
                                                                                              status_array,
                                                                                              json_content_array,
                                                                                              json_metadata_array)

        def generate_variables(batch):
            if request == GQL_APPEND_MANY_FRAMES_TO_DATASET:
                payload_data = {'contentArray': batch['content_array'],
                                'externalIDArray': batch['external_id_array'],
                                'jsonMetadataArray': batch['json_metadata_array'],
                                'uploadType': upload_type}
            else:
                payload_data = {'contentArray': batch['content_array'],
                                'externalIDArray': batch['external_id_array'],
                                'isHoneypotArray': batch['is_honeypot_array'],
                                'statusArray': batch['status_array'],
                                'jsonContentArray': batch['json_content_array'],
                                'jsonMetadataArray': batch['json_metadata_array']}
            return {
                'data': payload_data,
                'where': {'id': project_id}
            }

        results = _mutate_from_paginated_call(
            self, properties_to_batch, generate_variables, request)
        return format_result('data', results[0], Asset)

    @Compatible(['v2'])
    @typechecked
    # pylint: disable=unused-argument
    def update_properties_in_assets(self,
                                    asset_ids: List[str],
                                    external_ids: Optional[List[str]] = None,
                                    priorities: Optional[List[int]] = None,
                                    json_metadatas: Optional[List[Union[dict, str]]] = None,
                                    consensus_marks: Optional[List[float]] = None,
                                    honeypot_marks: Optional[List[float]] = None,
                                    to_be_labeled_by_array: Optional[List[List[str]]] = None,
                                    contents: Optional[List[str]] = None,
                                    json_contents: Optional[List[str]] = None,
                                    status_array: Optional[List[str]] = None,
                                    is_used_for_consensus_array: Optional[List[bool]] = None,
                                    is_honeypot_array: Optional[List[bool]] = None) -> List[dict]:
        """Update the properties of one or more assets.

        Args:
            asset_ids : The asset IDs to modify
            external_ids: Change the external id of the assets
            priorities : You can change the priority of the assets
                By default, all assets have a priority of 0.
            json_metadatas: The metadata given to an asset should be stored
                in a json like dict with keys `imageUrl`, `text`, `url`:
                `json_metadata = {'imageUrl': '','text': '','url': ''}`
            consensus_marks: Should be between 0 and 1
            honeypot_marks: Should be between 0 and 1
            to_be_labeled_by_array: If given, each element of the list should contain the emails of
                the labelers authorized to label the asset.
            contents: - For a NLP project, the content can be directly in text format
                - For an Image / Video / Pdf project, the content must be hosted on a web server,
                and you point Kili to your data by giving the URLs
            json_contents: - For a NLP project, the `json_content`
                is a text formatted using RichText
                - For a Video project, the`json_content` is a json containg urls pointing
                    to each frame of the video.
            status_array: Each element should be in `TODO`, `ONGOING`, `LABELED`, `REVIEWED`
            is_used_for_consensus_array: Whether to use the asset to compute consensus kpis or not
            is_honeypot_array: Whether to use the asset for honeypot

        Returns:
            A result object which indicates if the mutation was successful,
                or an error message.

        Examples:
            >>> kili.update_properties_in_assets(
                    asset_ids=["ckg22d81r0jrg0885unmuswj8",
                        "ckg22d81s0jrh0885pdxfd03n"],
                    consensus_marks=[1, 0.7],
                    contents=[None, 'https://to/second/asset.png'],
                    external_ids=['external-id-of-your-choice-1',
                        'external-id-of-your-choice-2'],
                    honeypot_marks=[0.8, 0.5],
                    is_honeypot_array=[True, True],
                    is_used_for_consensus_array=[True, False],
                    priorities=[None, 2],
                    status_array=['LABELED', 'REVIEWED'],
                    to_be_labeled_by_array=[
                        ['test+pierre@kili-technology.com'], None],
            )
        """

        saved_args = locals()
        parameters = {k: v for (k, v) in saved_args.items() if k in
                      ['asset_ids',
                       'external_ids',
                       'priorities',
                       'json_metadatas',
                       'consensus_marks',
                       'honeypot_marks',
                       'to_be_labeled_by_array',
                       'contents',
                       'json_contents',
                       'status_array',
                       'is_used_for_consensus_array',
                       'is_honeypot_array']}
        properties_to_batch = process_update_properties_in_assets_parameters(
            parameters)

        def generate_variables(batch):
            data = {
                'externalId': batch['external_ids'],
                'priority': batch['priorities'],
                'jsonMetadata': batch['json_metadatas'],
                'consensusMark': batch['consensus_marks'],
                'honeypotMark': batch['honeypot_marks'],
                'toBeLabeledBy': batch['to_be_labeled_by_array'],
                'shouldResetToBeLabeledBy': batch['should_reset_to_be_labeled_by_array'],
                'content': batch['contents'],
                'jsonContent': batch['json_contents'],
                'status': batch['status_array'],
                'isUsedForConsensus': batch['is_used_for_consensus_array'],
                'isHoneypot': batch['is_honeypot_array']
            }
            data_array = [dict(zip(data, t)) for t in zip(*data.values())]
            return {
                'whereArray': [{'id': asset_id} for asset_id in batch['asset_ids']],
                'dataArray': data_array
            }

        results = _mutate_from_paginated_call(
            self, properties_to_batch, generate_variables, GQL_UPDATE_PROPERTIES_IN_ASSETS)
        formated_results = [format_result(
            'data', result, Asset) for result in results]
        return [item for batch_list in formated_results for item in batch_list]

    @Compatible(['v1', 'v2'])
    @typechecked
    def delete_many_from_dataset(self, asset_ids: List[str]):
        """Delete assets from a project.

        Args:
            asset_ids: The list of identifiers of the assets to delete.

        Returns:
            A result object which indicates if the mutation was successful,
                or an error message.
        """
        properties_to_batch = {'asset_ids': asset_ids}

        def generate_variables(batch):
            return {'where': {'idIn': batch['asset_ids']}}

        results = _mutate_from_paginated_call(self,
                                              properties_to_batch,
                                              generate_variables,
                                              GQL_DELETE_MANY_FROM_DATASET)
        return format_result('data', results[0], Asset)

    @Compatible(['v1', 'v2'])
    @typechecked
    def add_to_review(
            self,
            asset_ids: List[str]) -> dict:
        """Add assets to review.

        !!! warning
            Assets without any label will be ignored.

        Args:
            asset_ids: The asset IDs to add to review

        Returns:
            A result object which indicates if the mutation was successful,
                or an error message.

        Examples:
            >>> kili.add_to_review(
                    asset_ids=[
                        "ckg22d81r0jrg0885unmuswj8",
                        "ckg22d81s0jrh0885pdxfd03n"
                        ],
        """
        properties_to_batch = {'asset_ids': asset_ids}

        def generate_variables(batch):
            return {'where': {'idIn': batch['asset_ids']}}

        results = _mutate_from_paginated_call(self,
                                              properties_to_batch,
                                              generate_variables,
                                              GQL_ADD_ALL_LABELED_ASSETS_TO_REVIEW)
        return format_result('data', results[0])

    @Compatible(['v2'])
    @typechecked
    def send_back_to_queue(
            self,
            asset_ids: List[str]):
        """Send assets back to queue.

        Args:
            asset_ids: The asset IDs to add to review

        Returns:
            A result object which indicates if the mutation was successful,
                or an error message.

        Examples:
            >>> kili.send_back_to_queue(
                    asset_ids=[
                        "ckg22d81r0jrg0885unmuswj8",
                        "ckg22d81s0jrh0885pdxfd03n"
                        ],
        """
        properties_to_batch = {'asset_ids': asset_ids}

        def generate_variables(batch):
            return {'where': {'idIn': batch['asset_ids']}}

        results = _mutate_from_paginated_call(self,
                                              properties_to_batch,
                                              generate_variables,
                                              GQL_SEND_BACK_ASSETS_TO_QUEUE)
        return format_result('data', results[0])

add_to_review(self, asset_ids)

Add assets to review.

Warning

Assets without any label will be ignored.

Parameters:

Name Type Description Default
asset_ids List[str]

The asset IDs to add to review

required

Returns:

Type Description
dict

A result object which indicates if the mutation was successful, or an error message.

Examples:

>>> kili.add_to_review(
        asset_ids=[
            "ckg22d81r0jrg0885unmuswj8",
            "ckg22d81s0jrh0885pdxfd03n"
            ],
Source code in kili/mutations/asset/__init__.py
@Compatible(['v1', 'v2'])
@typechecked
def add_to_review(
        self,
        asset_ids: List[str]) -> dict:
    """Add assets to review.

    !!! warning
        Assets without any label will be ignored.

    Args:
        asset_ids: The asset IDs to add to review

    Returns:
        A result object which indicates if the mutation was successful,
            or an error message.

    Examples:
        >>> kili.add_to_review(
                asset_ids=[
                    "ckg22d81r0jrg0885unmuswj8",
                    "ckg22d81s0jrh0885pdxfd03n"
                    ],
    """
    properties_to_batch = {'asset_ids': asset_ids}

    def generate_variables(batch):
        return {'where': {'idIn': batch['asset_ids']}}

    results = _mutate_from_paginated_call(self,
                                          properties_to_batch,
                                          generate_variables,
                                          GQL_ADD_ALL_LABELED_ASSETS_TO_REVIEW)
    return format_result('data', results[0])

append_many_to_dataset(self, project_id, content_array=None, external_id_array=None, is_honeypot_array=None, status_array=None, json_content_array=None, json_metadata_array=None)

Append assets to a project.

Parameters:

Name Type Description Default
project_id str

Identifier of the project

required
content_array Optional[List[str]]

List of elements added to the assets of the project Must not be None except if you provide json_content_array.

  • For a TEXT project, the content can be either raw text, or URLs to TEXT assets.
  • For an IMAGE / PDF project, the content can be either URLs or paths to existing images/pdf on your computer.
  • For a VIDEO project, the content must be hosted on a web server, and you point Kili to your data by giving the URLs.
None
external_id_array Optional[List[str]]

List of external ids given to identify the assets. If None, random identifiers are created.

None
is_honeypot_array Optional[List[bool]]

Whether to use the asset for honeypot

None
status_array Optional[List[str]]

By default, all imported assets are set to TODO. Other options: ONGOING, LABELED, REVIEWED.

None
json_content_array Optional[List[List[Union[dict, str]]]]

Useful for VIDEO or TEXT projects only.

  • For FRAME projects, each element is a sequence of frames, i.e. a list of URLs to images or a list of paths to images.
  • For TEXT projects, each element is a json_content dict, formatted according to documentation on how to import rich-text assets
None
json_metadata_array Optional[List[dict]]

The metadata given to each asset should be stored in a json like dict with keys.

  • Add metadata visible on the asset with the following keys: imageUrl, text, url. Example for one asset: json_metadata_array = [{'imageUrl': '','text': '','url': ''}].
  • For video, you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30). Example for one asset: json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}].
None

Returns:

Type Description

A result object which indicates if the mutation was successful, or an error message.

Examples:

>>> kili.append_many_to_dataset(
        project_id=project_id,
        content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])

Recipe

  • For more detailed examples on how to import assets, see the recipe or other examples in our documentation.
  • For more detailed examples on how to import text assets, see the recipe.
Source code in kili/mutations/asset/__init__.py
@Compatible(['v1', 'v2'])
@typechecked
def append_many_to_dataset(
        self,
        project_id: str,
        content_array: Optional[List[str]] = None,
        external_id_array: Optional[List[str]] = None,
        is_honeypot_array: Optional[List[bool]] = None,
        status_array: Optional[List[str]] = None,
        json_content_array: Optional[List[List[Union[dict, str]]]] = None,
        json_metadata_array: Optional[List[dict]] = None):
    # pylint: disable=line-too-long
    """Append assets to a project.

    Args:
        project_id: Identifier of the project
        content_array: List of elements added to the assets of the project
            Must not be None except if you provide json_content_array.

            - For a `TEXT` project, the content can be either raw text, or URLs to TEXT assets.
            - For an `IMAGE` / `PDF` project, the content can be either URLs or paths to existing
                images/pdf on your computer.
            - For a `VIDEO`  project, the content must be hosted on a web server,
                and you point Kili to your data by giving the URLs.
        external_id_array: List of external ids given to identify the assets.
            If None, random identifiers are created.
        is_honeypot_array:  Whether to use the asset for honeypot
        status_array: By default, all imported assets are set to `TODO`. Other options:
            `ONGOING`, `LABELED`, `REVIEWED`.
        json_content_array: Useful for `VIDEO` or `TEXT` projects only.

            - For `FRAME` projects, each element is a sequence of frames, i.e. a
                list of URLs to images or a list of paths to images.
            - For `TEXT` projects, each element is a json_content dict,
                formatted according to documentation [on how to import
            rich-text assets](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb)
        json_metadata_array: The metadata given to each asset should be stored in a json like dict with keys.

            - Add metadata visible on the asset with the following keys: `imageUrl`, `text`, `url`.
                Example for one asset: `json_metadata_array = [{'imageUrl': '','text': '','url': ''}]`.
            - For video, you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30).
                Example for one asset: `json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}]`.

    Returns:
        A result object which indicates if the mutation was successful, or an error message.

    Examples:
        >>> kili.append_many_to_dataset(
                project_id=project_id,
                content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])

    !!! example "Recipe"
        - For more detailed examples on how to import assets,
            see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_assets.ipynb)
            or [other examples](https://docs.kili-technology.com/recipes/importing-data) in our documentation.
        - For more detailed examples on how to import text assets,
            see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb).
    """
    kili = QueriesProject(self.auth)
    projects = kili.projects(project_id, disable_tqdm=True)
    assert len(projects) == 1, NO_ACCESS_RIGHT
    input_type = projects[0]['inputType']
    properties_to_batch, upload_type, request = process_append_many_to_dataset_parameters(input_type,
                                                                                          content_array,
                                                                                          external_id_array,
                                                                                          is_honeypot_array,
                                                                                          status_array,
                                                                                          json_content_array,
                                                                                          json_metadata_array)

    def generate_variables(batch):
        if request == GQL_APPEND_MANY_FRAMES_TO_DATASET:
            payload_data = {'contentArray': batch['content_array'],
                            'externalIDArray': batch['external_id_array'],
                            'jsonMetadataArray': batch['json_metadata_array'],
                            'uploadType': upload_type}
        else:
            payload_data = {'contentArray': batch['content_array'],
                            'externalIDArray': batch['external_id_array'],
                            'isHoneypotArray': batch['is_honeypot_array'],
                            'statusArray': batch['status_array'],
                            'jsonContentArray': batch['json_content_array'],
                            'jsonMetadataArray': batch['json_metadata_array']}
        return {
            'data': payload_data,
            'where': {'id': project_id}
        }

    results = _mutate_from_paginated_call(
        self, properties_to_batch, generate_variables, request)
    return format_result('data', results[0], Asset)

delete_many_from_dataset(self, asset_ids)

Delete assets from a project.

Parameters:

Name Type Description Default
asset_ids List[str]

The list of identifiers of the assets to delete.

required

Returns:

Type Description

A result object which indicates if the mutation was successful, or an error message.

Source code in kili/mutations/asset/__init__.py
@Compatible(['v1', 'v2'])
@typechecked
def delete_many_from_dataset(self, asset_ids: List[str]):
    """Delete assets from a project.

    Args:
        asset_ids: The list of identifiers of the assets to delete.

    Returns:
        A result object which indicates if the mutation was successful,
            or an error message.
    """
    properties_to_batch = {'asset_ids': asset_ids}

    def generate_variables(batch):
        return {'where': {'idIn': batch['asset_ids']}}

    results = _mutate_from_paginated_call(self,
                                          properties_to_batch,
                                          generate_variables,
                                          GQL_DELETE_MANY_FROM_DATASET)
    return format_result('data', results[0], Asset)

send_back_to_queue(self, asset_ids)

Send assets back to queue.

Parameters:

Name Type Description Default
asset_ids List[str]

The asset IDs to add to review

required

Returns:

Type Description

A result object which indicates if the mutation was successful, or an error message.

Examples:

>>> kili.send_back_to_queue(
        asset_ids=[
            "ckg22d81r0jrg0885unmuswj8",
            "ckg22d81s0jrh0885pdxfd03n"
            ],
Source code in kili/mutations/asset/__init__.py
@Compatible(['v2'])
@typechecked
def send_back_to_queue(
        self,
        asset_ids: List[str]):
    """Send assets back to queue.

    Args:
        asset_ids: The asset IDs to add to review

    Returns:
        A result object which indicates if the mutation was successful,
            or an error message.

    Examples:
        >>> kili.send_back_to_queue(
                asset_ids=[
                    "ckg22d81r0jrg0885unmuswj8",
                    "ckg22d81s0jrh0885pdxfd03n"
                    ],
    """
    properties_to_batch = {'asset_ids': asset_ids}

    def generate_variables(batch):
        return {'where': {'idIn': batch['asset_ids']}}

    results = _mutate_from_paginated_call(self,
                                          properties_to_batch,
                                          generate_variables,
                                          GQL_SEND_BACK_ASSETS_TO_QUEUE)
    return format_result('data', results[0])

update_properties_in_assets(self, asset_ids, external_ids=None, priorities=None, json_metadatas=None, consensus_marks=None, honeypot_marks=None, to_be_labeled_by_array=None, contents=None, json_contents=None, status_array=None, is_used_for_consensus_array=None, is_honeypot_array=None)

Update the properties of one or more assets.

Parameters:

Name Type Description Default
asset_ids

The asset IDs to modify

required
external_ids Optional[List[str]]

Change the external id of the assets

None
priorities

You can change the priority of the assets By default, all assets have a priority of 0.

None
json_metadatas Optional[List[Union[dict, str]]]

The metadata given to an asset should be stored in a json like dict with keys imageUrl, text, url: json_metadata = {'imageUrl': '','text': '','url': ''}

None
consensus_marks Optional[List[float]]

Should be between 0 and 1

None
honeypot_marks Optional[List[float]]

Should be between 0 and 1

None
to_be_labeled_by_array Optional[List[List[str]]]

If given, each element of the list should contain the emails of the labelers authorized to label the asset.

None
contents Optional[List[str]]
  • For a NLP project, the content can be directly in text format
  • For an Image / Video / Pdf project, the content must be hosted on a web server, and you point Kili to your data by giving the URLs
None
json_contents Optional[List[str]]
  • For a NLP project, the json_content is a text formatted using RichText
  • For a Video project, thejson_content is a json containg urls pointing to each frame of the video.
None
status_array Optional[List[str]]

Each element should be in TODO, ONGOING, LABELED, REVIEWED

None
is_used_for_consensus_array Optional[List[bool]]

Whether to use the asset to compute consensus kpis or not

None
is_honeypot_array Optional[List[bool]]

Whether to use the asset for honeypot

None

Returns:

Type Description
List[dict]

A result object which indicates if the mutation was successful, or an error message.

Examples:

>>> kili.update_properties_in_assets(
        asset_ids=["ckg22d81r0jrg0885unmuswj8",
            "ckg22d81s0jrh0885pdxfd03n"],
        consensus_marks=[1, 0.7],
        contents=[None, 'https://to/second/asset.png'],
        external_ids=['external-id-of-your-choice-1',
            'external-id-of-your-choice-2'],
        honeypot_marks=[0.8, 0.5],
        is_honeypot_array=[True, True],
        is_used_for_consensus_array=[True, False],
        priorities=[None, 2],
        status_array=['LABELED', 'REVIEWED'],
        to_be_labeled_by_array=[
            ['test+pierre@kili-technology.com'], None],
)
Source code in kili/mutations/asset/__init__.py
@Compatible(['v2'])
@typechecked
# pylint: disable=unused-argument
def update_properties_in_assets(self,
                                asset_ids: List[str],
                                external_ids: Optional[List[str]] = None,
                                priorities: Optional[List[int]] = None,
                                json_metadatas: Optional[List[Union[dict, str]]] = None,
                                consensus_marks: Optional[List[float]] = None,
                                honeypot_marks: Optional[List[float]] = None,
                                to_be_labeled_by_array: Optional[List[List[str]]] = None,
                                contents: Optional[List[str]] = None,
                                json_contents: Optional[List[str]] = None,
                                status_array: Optional[List[str]] = None,
                                is_used_for_consensus_array: Optional[List[bool]] = None,
                                is_honeypot_array: Optional[List[bool]] = None) -> List[dict]:
    """Update the properties of one or more assets.

    Args:
        asset_ids : The asset IDs to modify
        external_ids: Change the external id of the assets
        priorities : You can change the priority of the assets
            By default, all assets have a priority of 0.
        json_metadatas: The metadata given to an asset should be stored
            in a json like dict with keys `imageUrl`, `text`, `url`:
            `json_metadata = {'imageUrl': '','text': '','url': ''}`
        consensus_marks: Should be between 0 and 1
        honeypot_marks: Should be between 0 and 1
        to_be_labeled_by_array: If given, each element of the list should contain the emails of
            the labelers authorized to label the asset.
        contents: - For a NLP project, the content can be directly in text format
            - For an Image / Video / Pdf project, the content must be hosted on a web server,
            and you point Kili to your data by giving the URLs
        json_contents: - For a NLP project, the `json_content`
            is a text formatted using RichText
            - For a Video project, the`json_content` is a json containg urls pointing
                to each frame of the video.
        status_array: Each element should be in `TODO`, `ONGOING`, `LABELED`, `REVIEWED`
        is_used_for_consensus_array: Whether to use the asset to compute consensus kpis or not
        is_honeypot_array: Whether to use the asset for honeypot

    Returns:
        A result object which indicates if the mutation was successful,
            or an error message.

    Examples:
        >>> kili.update_properties_in_assets(
                asset_ids=["ckg22d81r0jrg0885unmuswj8",
                    "ckg22d81s0jrh0885pdxfd03n"],
                consensus_marks=[1, 0.7],
                contents=[None, 'https://to/second/asset.png'],
                external_ids=['external-id-of-your-choice-1',
                    'external-id-of-your-choice-2'],
                honeypot_marks=[0.8, 0.5],
                is_honeypot_array=[True, True],
                is_used_for_consensus_array=[True, False],
                priorities=[None, 2],
                status_array=['LABELED', 'REVIEWED'],
                to_be_labeled_by_array=[
                    ['test+pierre@kili-technology.com'], None],
        )
    """

    saved_args = locals()
    parameters = {k: v for (k, v) in saved_args.items() if k in
                  ['asset_ids',
                   'external_ids',
                   'priorities',
                   'json_metadatas',
                   'consensus_marks',
                   'honeypot_marks',
                   'to_be_labeled_by_array',
                   'contents',
                   'json_contents',
                   'status_array',
                   'is_used_for_consensus_array',
                   'is_honeypot_array']}
    properties_to_batch = process_update_properties_in_assets_parameters(
        parameters)

    def generate_variables(batch):
        data = {
            'externalId': batch['external_ids'],
            'priority': batch['priorities'],
            'jsonMetadata': batch['json_metadatas'],
            'consensusMark': batch['consensus_marks'],
            'honeypotMark': batch['honeypot_marks'],
            'toBeLabeledBy': batch['to_be_labeled_by_array'],
            'shouldResetToBeLabeledBy': batch['should_reset_to_be_labeled_by_array'],
            'content': batch['contents'],
            'jsonContent': batch['json_contents'],
            'status': batch['status_array'],
            'isUsedForConsensus': batch['is_used_for_consensus_array'],
            'isHoneypot': batch['is_honeypot_array']
        }
        data_array = [dict(zip(data, t)) for t in zip(*data.values())]
        return {
            'whereArray': [{'id': asset_id} for asset_id in batch['asset_ids']],
            'dataArray': data_array
        }

    results = _mutate_from_paginated_call(
        self, properties_to_batch, generate_variables, GQL_UPDATE_PROPERTIES_IN_ASSETS)
    formated_results = [format_result(
        'data', result, Asset) for result in results]
    return [item for batch_list in formated_results for item in batch_list]