Asset module
Set of Asset queries
Source code in kili/queries/asset/
class QueriesAsset:
Set of Asset queries
# pylint: disable=too-many-arguments,too-many-locals
def __init__(self, auth):
"""Initialize the subclass.
auth: KiliAuth object
self.auth = auth
# pylint: disable=dangerous-default-value
@Compatible(['v1', 'v2'])
def assets(self,
asset_id: Optional[str] = None,
project_id: Optional[str] = None,
skip: int = 0,
fields: List[str] = ['content',
asset_id_in: Optional[List[str]] = None,
consensus_mark_gt: Optional[float] = None,
consensus_mark_lt: Optional[float] = None,
disable_tqdm: bool = False,
external_id_contains: Optional[List[str]] = None,
first: Optional[int] = None,
format: Optional[str] = None, # pylint: disable=redefined-builtin
honeypot_mark_gt: Optional[float] = None,
honeypot_mark_lt: Optional[float] = None,
label_author_in: Optional[List[str]] = None,
label_consensus_mark_gt: Optional[float] = None,
label_consensus_mark_lt: Optional[float] = None,
label_created_at: Optional[str] = None,
label_created_at_gt: Optional[str] = None,
label_created_at_lt: Optional[str] = None,
label_honeypot_mark_gt: Optional[float] = None,
label_honeypot_mark_lt: Optional[float] = None,
label_json_response_contains: Optional[List[str]] = None,
label_type_in: Optional[List[str]] = None,
metadata_where: Optional[dict] = None,
skipped: Optional[bool] = None,
status_in: Optional[List[str]] = None,
updated_at_gte: Optional[str] = None,
updated_at_lte: Optional[str] = None,
as_generator: bool = False,
) -> Union[List[dict], Generator[dict, None, None], pd.DataFrame]:
# pylint: disable=line-too-long
"""Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.
asset_id: Identifier of the asset to retrieve.
asset_id_in: A list of the IDs of the assets to retrieve.
project_id: Identifier of the project.
skip: Number of assets to skip (they are ordered by their date of creation, first to last).
fields: All the fields to request among the possible fields for the assets.
See [the documentation]( for all possible fields.
first: Maximum number of assets to return.
consensus_mark_gt: Minimum amount of consensus for the asset.
consensus_mark_lt: Maximum amount of consensus for the asset.
external_id_contains: Returned assets have an external id that belongs to that list, if given.
metadata_where: Filters by the values of the metadata of the asset.
honeypot_mark_gt: Minimum amount of honeypot for the asset.
honeypot_mark_lt : Maximum amount of honeypot for the asset.
status_in: Returned assets should have a status that belongs to that list, if given.
Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
label_type_in: Returned assets should have a label whose type belongs to that list, if given.
label_author_in: Returned assets should have a label whose status belongs to that list, if given.
label_consensus_mark_gt: Returned assets should have a label whose consensus is greater than this number.
label_consensus_mark_lt: Returned assets should have a label whose consensus is lower than this number.
label_created_at: Returned assets should have a label whose creation date is equal to this date.
label_created_at_gt: Returned assets should have a label whose creation date is greater than this date.
label_created_at_lt: Returned assets should have a label whose creation date is lower than this date.
label_json_response_contains: Returned assets should have a substring of the label's jsonResponse
that belongs to that list, if given.
label_honeypot_mark_gt: Returned assets should have a label whose honeypot is greater than this number
label_honeypot_mark_lt: Returned assets should have a label whose honeypot is lower than this number
skipped: Returned assets should be skipped
updated_at_gte: Returned assets should have a label whose update date is greated or equal to this date.
updated_at_lte: Returned assets should have a label whose update date is lower or equal to this date.
format: If equal to 'pandas', returns a pandas DataFrame
disable_tqdm: If `True`, the progress bar will be disabled
as_generator: If `True`, a generator on the assets is returned.
!!! info "Dates format"
Date strings should have format: "YYYY-MM-DD"
A result object which contains the query if it was successful,
or an error message.
>>> kili.assets(project_id=project_id) # returns the assets list of the project
>>> kili.assets(asset_id=asset_id)
>>> kili.assets(project_id=project_id, as_generator=True) # returns a generator of the project assets
!!! example "How to filter based on Metadata"
- `metadata_where = {key1: "value1"}` to filter on assets whose metadata
have key "key1" with value "value1"
- `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
have key "key1" with value "value1" or value "value2
- `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
have key "key2" with a value between 2 and 10.
if format == "pandas" and as_generator:
raise ValueError(
"Argument values as_generator==True and format==\"pandas\" are not compatible.")
saved_args = locals()
count_args = {k: v for (k, v) in saved_args.items()
if k not in ['skip', 'first', 'disable_tqdm', 'format', 'fields', 'self', 'as_generator']}
# using tqdm with a generator is messy, so it is always disabled
disable_tqdm = disable_tqdm or as_generator
payload_query = {
'where': {
'id': asset_id,
'project': {
'id': project_id,
'externalIdIn': external_id_contains,
'statusIn': status_in,
'consensusMarkGte': consensus_mark_gt,
'consensusMarkLte': consensus_mark_lt,
'honeypotMarkGte': honeypot_mark_gt,
'honeypotMarkLte': honeypot_mark_lt,
'idIn': asset_id_in,
'metadata': metadata_where,
'label': {
'typeIn': label_type_in,
'authorIn': label_author_in,
'consensusMarkGte': label_consensus_mark_gt,
'consensusMarkLte': label_consensus_mark_lt,
'createdAt': label_created_at,
'createdAtGte': label_created_at_gt,
'createdAtLte': label_created_at_lt,
'honeypotMarkGte': label_honeypot_mark_gt,
'honeypotMarkLte': label_honeypot_mark_lt,
'jsonResponseContains': label_json_response_contains,
'skipped': skipped,
'updatedAtGte': updated_at_gte,
'updatedAtLte': updated_at_lte,
asset_generator = row_generator_from_paginated_calls(
if format == "pandas":
return pd.DataFrame(list(asset_generator))
if as_generator:
return asset_generator
return list(asset_generator)
def _query_assets(self,
skip: int,
first: int,
payload: dict,
fields: List[str]):
payload.update({"skip": skip, "first": first})
_gql_assets = gql_assets(fragment_builder(fields, AssetType))
result = self.auth.client.execute(_gql_assets, payload)
assets = format_result('data', result, Asset)
return assets
@Compatible(['v1', 'v2'])
def count_assets(self, asset_id: Optional[str] = None,
project_id: Optional[str] = None,
asset_id_in: Optional[List[str]] = None,
external_id_contains: Optional[List[str]] = None,
metadata_where: Optional[dict] = None,
status_in: Optional[List[str]] = None,
consensus_mark_gt: Optional[float] = None,
consensus_mark_lt: Optional[float] = None,
honeypot_mark_gt: Optional[float] = None,
honeypot_mark_lt: Optional[float] = None,
label_type_in: Optional[List[str]] = None,
label_author_in: Optional[List[str]] = None,
label_consensus_mark_gt: Optional[float] = None,
label_consensus_mark_lt: Optional[float] = None,
label_created_at: Optional[str] = None,
label_created_at_gt: Optional[str] = None,
label_created_at_lt: Optional[str] = None,
label_honeypot_mark_gt: Optional[float] = None,
label_honeypot_mark_lt: Optional[float] = None,
label_json_response_contains: Optional[List[str]] = None,
skipped: Optional[bool] = None,
updated_at_gte: Optional[str] = None,
updated_at_lte: Optional[str] = None) -> int:
"""Count and return the number of assets with the given constraints.
Parameters beginning with 'label_' apply to labels, others apply to assets.
asset_id: The unique id of the asset to retrieve.
asset_id_in: A list of the ids of the assets to retrieve.
project_id: Identifier of the project
external_id_contains: Returned assets should have an external id
that belongs to that list, if given.
metadata_where: Filters by the values of the metadata of the asset.
status_in: Returned assets should have a status that belongs to that list, if given.
Possible choices : `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
consensus_mark_gt:Minimum amount of consensus for the asset.
consensus_mark_lt: Maximum amount of consensus for the asset.
honeypot_mark_gt: Minimum amount of honeypot for the asset.
honeypot_mark_lt: Maximum amount of consensus for the asset.
label_type_in: Returned assets should have a label
whose type belongs to that list, if given.
label_author_in: Returned assets should have a label
whose status belongs to that list, if given.
label_consensus_mark_gt: Returned assets should have a label
whose consensus is greater than this number.
label_consensus_mark_lt: Returned assets should have a label
whose consensus is lower than this number.
label_created_at: Returned assets should have a label
whose creation date is equal to this date.
label_created_at_gt: Returned assets should have a label
whose creation date is greater than this date.
label_created_at_lt: Returned assets should have a label
whose creation date is lower than this date.
label_honeypot_mark_gt: Returned assets should have a label
whose honeypot is greater than this number.
label_honeypot_mark_lt: Returned assets should have a label
whose honeypot is lower than this number.
label_json_response_contains: Returned assets should have
a substring of the label's jsonResponse that belongs
to that list, if given.
skipped: Returned assets should be skipped
updated_at_gte: Returned assets should have a label
whose update date is greated or equal to this date.
updated_at_lte: Returned assets should have a label
whose update date is lower or equal to this date.
!!! info "Dates format"
Date strings should have format: "YYYY-MM-DD"
A result object which contains the query if it was successful,
or an error message.
>>> kili.count_assets(project_id=project_id)
>>> kili.count_assets(asset_id=asset_id)
!!! example "How to filter based on Metadata"
- `metadata_where = {key1: "value1"}` to filter on assets whose metadata
have key "key1" with value "value1"
- `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
have key "key1" with value "value1" or value "value2
- `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
have key "key2" with a value between 2 and 10.
variables = {
'where': {
'id': asset_id,
'project': {
'id': project_id,
'externalIdIn': external_id_contains,
'statusIn': status_in,
'consensusMarkGte': consensus_mark_gt,
'consensusMarkLte': consensus_mark_lt,
'honeypotMarkGte': honeypot_mark_gt,
'honeypotMarkLte': honeypot_mark_lt,
'idIn': asset_id_in,
'metadata': metadata_where,
'label': {
'typeIn': label_type_in,
'authorIn': label_author_in,
'consensusMarkGte': label_consensus_mark_gt,
'consensusMarkLte': label_consensus_mark_lt,
'createdAt': label_created_at,
'createdAtGte': label_created_at_gt,
'createdAtLte': label_created_at_lt,
'honeypotMarkGte': label_honeypot_mark_gt,
'honeypotMarkLte': label_honeypot_mark_lt,
'jsonResponseContains': label_json_response_contains,
'skipped': skipped,
'updatedAtGte': updated_at_gte,
'updatedAtLte': updated_at_lte,
result = self.auth.client.execute(GQL_ASSETS_COUNT, variables)
count = format_result('data', result)
return count
assets(self, asset_id=None, project_id=None, skip=0, fields=['content', 'createdAt', 'externalId', 'id', 'isHoneypot', 'jsonMetadata', '', '', 'labels.createdAt', '', 'labels.jsonResponse', 'skipped', 'status'], asset_id_in=None, consensus_mark_gt=None, consensus_mark_lt=None, disable_tqdm=False, external_id_contains=None, first=None, format=None, honeypot_mark_gt=None, honeypot_mark_lt=None, label_author_in=None, label_consensus_mark_gt=None, label_consensus_mark_lt=None, label_created_at=None, label_created_at_gt=None, label_created_at_lt=None, label_honeypot_mark_gt=None, label_honeypot_mark_lt=None, label_json_response_contains=None, label_type_in=None, metadata_where=None, skipped=None, status_in=None, updated_at_gte=None, updated_at_lte=None, as_generator=False)
Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.
Name | Type | Description | Default |
asset_id |
Optional[str] |
Identifier of the asset to retrieve. |
None |
asset_id_in |
Optional[List[str]] |
A list of the IDs of the assets to retrieve. |
None |
project_id |
Optional[str] |
Identifier of the project. |
None |
skip |
int |
Number of assets to skip (they are ordered by their date of creation, first to last). |
0 |
fields |
List[str] |
All the fields to request among the possible fields for the assets. See the documentation for all possible fields. |
['content', 'createdAt', 'externalId', 'id', 'isHoneypot', 'jsonMetadata', '', '', 'labels.createdAt', '', 'labels.jsonResponse', 'skipped', 'status'] |
first |
Optional[int] |
Maximum number of assets to return. |
None |
consensus_mark_gt |
Optional[float] |
Minimum amount of consensus for the asset. |
None |
consensus_mark_lt |
Optional[float] |
Maximum amount of consensus for the asset. |
None |
external_id_contains |
Optional[List[str]] |
Returned assets have an external id that belongs to that list, if given. |
None |
metadata_where |
Optional[dict] |
Filters by the values of the metadata of the asset. |
None |
honeypot_mark_gt |
Optional[float] |
Minimum amount of honeypot for the asset. |
None |
honeypot_mark_lt |
Maximum amount of honeypot for the asset. |
None |
status_in |
Optional[List[str]] |
Returned assets should have a status that belongs to that list, if given.
Possible choices: |
None |
label_type_in |
Optional[List[str]] |
Returned assets should have a label whose type belongs to that list, if given. |
None |
label_author_in |
Optional[List[str]] |
Returned assets should have a label whose status belongs to that list, if given. |
None |
label_consensus_mark_gt |
Optional[float] |
Returned assets should have a label whose consensus is greater than this number. |
None |
label_consensus_mark_lt |
Optional[float] |
Returned assets should have a label whose consensus is lower than this number. |
None |
label_created_at |
Optional[str] |
Returned assets should have a label whose creation date is equal to this date. |
None |
label_created_at_gt |
Optional[str] |
Returned assets should have a label whose creation date is greater than this date. |
None |
label_created_at_lt |
Optional[str] |
Returned assets should have a label whose creation date is lower than this date. |
None |
label_json_response_contains |
Optional[List[str]] |
Returned assets should have a substring of the label's jsonResponse that belongs to that list, if given. |
None |
label_honeypot_mark_gt |
Optional[float] |
Returned assets should have a label whose honeypot is greater than this number |
None |
label_honeypot_mark_lt |
Optional[float] |
Returned assets should have a label whose honeypot is lower than this number |
None |
skipped |
Optional[bool] |
Returned assets should be skipped |
None |
updated_at_gte |
Optional[str] |
Returned assets should have a label whose update date is greated or equal to this date. |
None |
updated_at_lte |
Optional[str] |
Returned assets should have a label whose update date is lower or equal to this date. |
None |
format |
Optional[str] |
If equal to 'pandas', returns a pandas DataFrame |
None |
disable_tqdm |
bool |
If |
False |
as_generator |
bool |
If |
False |
Dates format
Date strings should have format: "YYYY-MM-DD"
Type | Description |
Union[List[dict], Generator[dict, NoneType], pandas.core.frame.DataFrame] |
A result object which contains the query if it was successful, or an error message. |
>>> kili.assets(project_id=project_id) # returns the assets list of the project
>>> kili.assets(asset_id=asset_id)
>>> kili.assets(project_id=project_id, as_generator=True) # returns a generator of the project assets
How to filter based on Metadata
metadata_where = {key1: "value1"}
to filter on assets whose metadata have key "key1" with value "value1"metadata_where = {key1: ["value1", "value2"]}
to filter on assets whose metadata have key "key1" with value "value1" or value "value2metadata_where = {key2: [2, 10]}
to filter on assets whose metadata have key "key2" with a value between 2 and 10.
Source code in kili/queries/asset/
@Compatible(['v1', 'v2'])
def assets(self,
asset_id: Optional[str] = None,
project_id: Optional[str] = None,
skip: int = 0,
fields: List[str] = ['content',
asset_id_in: Optional[List[str]] = None,
consensus_mark_gt: Optional[float] = None,
consensus_mark_lt: Optional[float] = None,
disable_tqdm: bool = False,
external_id_contains: Optional[List[str]] = None,
first: Optional[int] = None,
format: Optional[str] = None, # pylint: disable=redefined-builtin
honeypot_mark_gt: Optional[float] = None,
honeypot_mark_lt: Optional[float] = None,
label_author_in: Optional[List[str]] = None,
label_consensus_mark_gt: Optional[float] = None,
label_consensus_mark_lt: Optional[float] = None,
label_created_at: Optional[str] = None,
label_created_at_gt: Optional[str] = None,
label_created_at_lt: Optional[str] = None,
label_honeypot_mark_gt: Optional[float] = None,
label_honeypot_mark_lt: Optional[float] = None,
label_json_response_contains: Optional[List[str]] = None,
label_type_in: Optional[List[str]] = None,
metadata_where: Optional[dict] = None,
skipped: Optional[bool] = None,
status_in: Optional[List[str]] = None,
updated_at_gte: Optional[str] = None,
updated_at_lte: Optional[str] = None,
as_generator: bool = False,
) -> Union[List[dict], Generator[dict, None, None], pd.DataFrame]:
# pylint: disable=line-too-long
"""Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.
asset_id: Identifier of the asset to retrieve.
asset_id_in: A list of the IDs of the assets to retrieve.
project_id: Identifier of the project.
skip: Number of assets to skip (they are ordered by their date of creation, first to last).
fields: All the fields to request among the possible fields for the assets.
See [the documentation]( for all possible fields.
first: Maximum number of assets to return.
consensus_mark_gt: Minimum amount of consensus for the asset.
consensus_mark_lt: Maximum amount of consensus for the asset.
external_id_contains: Returned assets have an external id that belongs to that list, if given.
metadata_where: Filters by the values of the metadata of the asset.
honeypot_mark_gt: Minimum amount of honeypot for the asset.
honeypot_mark_lt : Maximum amount of honeypot for the asset.
status_in: Returned assets should have a status that belongs to that list, if given.
Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
label_type_in: Returned assets should have a label whose type belongs to that list, if given.
label_author_in: Returned assets should have a label whose status belongs to that list, if given.
label_consensus_mark_gt: Returned assets should have a label whose consensus is greater than this number.
label_consensus_mark_lt: Returned assets should have a label whose consensus is lower than this number.
label_created_at: Returned assets should have a label whose creation date is equal to this date.
label_created_at_gt: Returned assets should have a label whose creation date is greater than this date.
label_created_at_lt: Returned assets should have a label whose creation date is lower than this date.
label_json_response_contains: Returned assets should have a substring of the label's jsonResponse
that belongs to that list, if given.
label_honeypot_mark_gt: Returned assets should have a label whose honeypot is greater than this number
label_honeypot_mark_lt: Returned assets should have a label whose honeypot is lower than this number
skipped: Returned assets should be skipped
updated_at_gte: Returned assets should have a label whose update date is greated or equal to this date.
updated_at_lte: Returned assets should have a label whose update date is lower or equal to this date.
format: If equal to 'pandas', returns a pandas DataFrame
disable_tqdm: If `True`, the progress bar will be disabled
as_generator: If `True`, a generator on the assets is returned.
!!! info "Dates format"
Date strings should have format: "YYYY-MM-DD"
A result object which contains the query if it was successful,
or an error message.
>>> kili.assets(project_id=project_id) # returns the assets list of the project
>>> kili.assets(asset_id=asset_id)
>>> kili.assets(project_id=project_id, as_generator=True) # returns a generator of the project assets
!!! example "How to filter based on Metadata"
- `metadata_where = {key1: "value1"}` to filter on assets whose metadata
have key "key1" with value "value1"
- `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
have key "key1" with value "value1" or value "value2
- `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
have key "key2" with a value between 2 and 10.
if format == "pandas" and as_generator:
raise ValueError(
"Argument values as_generator==True and format==\"pandas\" are not compatible.")
saved_args = locals()
count_args = {k: v for (k, v) in saved_args.items()
if k not in ['skip', 'first', 'disable_tqdm', 'format', 'fields', 'self', 'as_generator']}
# using tqdm with a generator is messy, so it is always disabled
disable_tqdm = disable_tqdm or as_generator
payload_query = {
'where': {
'id': asset_id,
'project': {
'id': project_id,
'externalIdIn': external_id_contains,
'statusIn': status_in,
'consensusMarkGte': consensus_mark_gt,
'consensusMarkLte': consensus_mark_lt,
'honeypotMarkGte': honeypot_mark_gt,
'honeypotMarkLte': honeypot_mark_lt,
'idIn': asset_id_in,
'metadata': metadata_where,
'label': {
'typeIn': label_type_in,
'authorIn': label_author_in,
'consensusMarkGte': label_consensus_mark_gt,
'consensusMarkLte': label_consensus_mark_lt,
'createdAt': label_created_at,
'createdAtGte': label_created_at_gt,
'createdAtLte': label_created_at_lt,
'honeypotMarkGte': label_honeypot_mark_gt,
'honeypotMarkLte': label_honeypot_mark_lt,
'jsonResponseContains': label_json_response_contains,
'skipped': skipped,
'updatedAtGte': updated_at_gte,
'updatedAtLte': updated_at_lte,
asset_generator = row_generator_from_paginated_calls(
if format == "pandas":
return pd.DataFrame(list(asset_generator))
if as_generator:
return asset_generator
return list(asset_generator)
count_assets(self, asset_id=None, project_id=None, asset_id_in=None, external_id_contains=None, metadata_where=None, status_in=None, consensus_mark_gt=None, consensus_mark_lt=None, honeypot_mark_gt=None, honeypot_mark_lt=None, label_type_in=None, label_author_in=None, label_consensus_mark_gt=None, label_consensus_mark_lt=None, label_created_at=None, label_created_at_gt=None, label_created_at_lt=None, label_honeypot_mark_gt=None, label_honeypot_mark_lt=None, label_json_response_contains=None, skipped=None, updated_at_gte=None, updated_at_lte=None)
Count and return the number of assets with the given constraints.
Parameters beginning with 'label_' apply to labels, others apply to assets.
Name | Type | Description | Default |
asset_id |
Optional[str] |
The unique id of the asset to retrieve. |
None |
asset_id_in |
Optional[List[str]] |
A list of the ids of the assets to retrieve. |
None |
project_id |
Optional[str] |
Identifier of the project |
None |
external_id_contains |
Optional[List[str]] |
Returned assets should have an external id that belongs to that list, if given. |
None |
metadata_where |
Optional[dict] |
Filters by the values of the metadata of the asset. |
None |
status_in |
Optional[List[str]] |
Returned assets should have a status that belongs to that list, if given.
Possible choices : |
None |
consensus_mark_gt |
Optional[float] |
Minimum amount of consensus for the asset. |
None |
consensus_mark_lt |
Optional[float] |
Maximum amount of consensus for the asset. |
None |
honeypot_mark_gt |
Optional[float] |
Minimum amount of honeypot for the asset. |
None |
honeypot_mark_lt |
Optional[float] |
Maximum amount of consensus for the asset. |
None |
label_type_in |
Optional[List[str]] |
Returned assets should have a label whose type belongs to that list, if given. |
None |
label_author_in |
Optional[List[str]] |
Returned assets should have a label whose status belongs to that list, if given. |
None |
label_consensus_mark_gt |
Optional[float] |
Returned assets should have a label whose consensus is greater than this number. |
None |
label_consensus_mark_lt |
Optional[float] |
Returned assets should have a label whose consensus is lower than this number. |
None |
label_created_at |
Optional[str] |
Returned assets should have a label whose creation date is equal to this date. |
None |
label_created_at_gt |
Optional[str] |
Returned assets should have a label whose creation date is greater than this date. |
None |
label_created_at_lt |
Optional[str] |
Returned assets should have a label whose creation date is lower than this date. |
None |
label_honeypot_mark_gt |
Optional[float] |
Returned assets should have a label whose honeypot is greater than this number. |
None |
label_honeypot_mark_lt |
Optional[float] |
Returned assets should have a label whose honeypot is lower than this number. |
None |
label_json_response_contains |
Optional[List[str]] |
Returned assets should have a substring of the label's jsonResponse that belongs to that list, if given. |
None |
skipped |
Optional[bool] |
Returned assets should be skipped |
None |
updated_at_gte |
Optional[str] |
Returned assets should have a label whose update date is greated or equal to this date. |
None |
updated_at_lte |
Optional[str] |
Returned assets should have a label whose update date is lower or equal to this date. |
None |
Dates format
Date strings should have format: "YYYY-MM-DD"
Type | Description |
int |
A result object which contains the query if it was successful, or an error message. |
>>> kili.count_assets(project_id=project_id)
>>> kili.count_assets(asset_id=asset_id)
How to filter based on Metadata
metadata_where = {key1: "value1"}
to filter on assets whose metadata have key "key1" with value "value1"metadata_where = {key1: ["value1", "value2"]}
to filter on assets whose metadata have key "key1" with value "value1" or value "value2metadata_where = {key2: [2, 10]}
to filter on assets whose metadata have key "key2" with a value between 2 and 10.
Source code in kili/queries/asset/
@Compatible(['v1', 'v2'])
def count_assets(self, asset_id: Optional[str] = None,
project_id: Optional[str] = None,
asset_id_in: Optional[List[str]] = None,
external_id_contains: Optional[List[str]] = None,
metadata_where: Optional[dict] = None,
status_in: Optional[List[str]] = None,
consensus_mark_gt: Optional[float] = None,
consensus_mark_lt: Optional[float] = None,
honeypot_mark_gt: Optional[float] = None,
honeypot_mark_lt: Optional[float] = None,
label_type_in: Optional[List[str]] = None,
label_author_in: Optional[List[str]] = None,
label_consensus_mark_gt: Optional[float] = None,
label_consensus_mark_lt: Optional[float] = None,
label_created_at: Optional[str] = None,
label_created_at_gt: Optional[str] = None,
label_created_at_lt: Optional[str] = None,
label_honeypot_mark_gt: Optional[float] = None,
label_honeypot_mark_lt: Optional[float] = None,
label_json_response_contains: Optional[List[str]] = None,
skipped: Optional[bool] = None,
updated_at_gte: Optional[str] = None,
updated_at_lte: Optional[str] = None) -> int:
"""Count and return the number of assets with the given constraints.
Parameters beginning with 'label_' apply to labels, others apply to assets.
asset_id: The unique id of the asset to retrieve.
asset_id_in: A list of the ids of the assets to retrieve.
project_id: Identifier of the project
external_id_contains: Returned assets should have an external id
that belongs to that list, if given.
metadata_where: Filters by the values of the metadata of the asset.
status_in: Returned assets should have a status that belongs to that list, if given.
Possible choices : `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
consensus_mark_gt:Minimum amount of consensus for the asset.
consensus_mark_lt: Maximum amount of consensus for the asset.
honeypot_mark_gt: Minimum amount of honeypot for the asset.
honeypot_mark_lt: Maximum amount of consensus for the asset.
label_type_in: Returned assets should have a label
whose type belongs to that list, if given.
label_author_in: Returned assets should have a label
whose status belongs to that list, if given.
label_consensus_mark_gt: Returned assets should have a label
whose consensus is greater than this number.
label_consensus_mark_lt: Returned assets should have a label
whose consensus is lower than this number.
label_created_at: Returned assets should have a label
whose creation date is equal to this date.
label_created_at_gt: Returned assets should have a label
whose creation date is greater than this date.
label_created_at_lt: Returned assets should have a label
whose creation date is lower than this date.
label_honeypot_mark_gt: Returned assets should have a label
whose honeypot is greater than this number.
label_honeypot_mark_lt: Returned assets should have a label
whose honeypot is lower than this number.
label_json_response_contains: Returned assets should have
a substring of the label's jsonResponse that belongs
to that list, if given.
skipped: Returned assets should be skipped
updated_at_gte: Returned assets should have a label
whose update date is greated or equal to this date.
updated_at_lte: Returned assets should have a label
whose update date is lower or equal to this date.
!!! info "Dates format"
Date strings should have format: "YYYY-MM-DD"
A result object which contains the query if it was successful,
or an error message.
>>> kili.count_assets(project_id=project_id)
>>> kili.count_assets(asset_id=asset_id)
!!! example "How to filter based on Metadata"
- `metadata_where = {key1: "value1"}` to filter on assets whose metadata
have key "key1" with value "value1"
- `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
have key "key1" with value "value1" or value "value2
- `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
have key "key2" with a value between 2 and 10.
variables = {
'where': {
'id': asset_id,
'project': {
'id': project_id,
'externalIdIn': external_id_contains,
'statusIn': status_in,
'consensusMarkGte': consensus_mark_gt,
'consensusMarkLte': consensus_mark_lt,
'honeypotMarkGte': honeypot_mark_gt,
'honeypotMarkLte': honeypot_mark_lt,
'idIn': asset_id_in,
'metadata': metadata_where,
'label': {
'typeIn': label_type_in,
'authorIn': label_author_in,
'consensusMarkGte': label_consensus_mark_gt,
'consensusMarkLte': label_consensus_mark_lt,
'createdAt': label_created_at,
'createdAtGte': label_created_at_gt,
'createdAtLte': label_created_at_lt,
'honeypotMarkGte': label_honeypot_mark_gt,
'honeypotMarkLte': label_honeypot_mark_lt,
'jsonResponseContains': label_json_response_contains,
'skipped': skipped,
'updatedAtGte': updated_at_gte,
'updatedAtLte': updated_at_lte,
result = self.auth.client.execute(GQL_ASSETS_COUNT, variables)
count = format_result('data', result)
return count
Set of Asset mutations
Source code in kili/mutations/asset/
class MutationsAsset:
Set of Asset mutations
# pylint: disable=too-many-arguments,too-many-locals
def __init__(self, auth):
"""Initialize the subclass.
auth: KiliAuth object
self.auth = auth
@Compatible(['v1', 'v2'])
def append_many_to_dataset(
project_id: str,
content_array: Optional[List[str]] = None,
external_id_array: Optional[List[str]] = None,
is_honeypot_array: Optional[List[bool]] = None,
status_array: Optional[List[str]] = None,
json_content_array: Optional[List[List[Union[dict, str]]]] = None,
json_metadata_array: Optional[List[dict]] = None):
# pylint: disable=line-too-long
"""Append assets to a project.
project_id: Identifier of the project
content_array: List of elements added to the assets of the project
Must not be None except if you provide json_content_array.
- For a `TEXT` project, the content can be either raw text, or URLs to TEXT assets.
- For an `IMAGE` / `PDF` project, the content can be either URLs or paths to existing
images/pdf on your computer.
- For a `VIDEO` project, the content must be hosted on a web server,
and you point Kili to your data by giving the URLs.
external_id_array: List of external ids given to identify the assets.
If None, random identifiers are created.
is_honeypot_array: Whether to use the asset for honeypot
status_array: By default, all imported assets are set to `TODO`. Other options:
json_content_array: Useful for `FRAME` or `TEXT` projects only.
- For `FRAME` projects, each element is a sequence of frames, i.e. a
list of URLs to images or a list of paths to images.
- For `TEXT` projects, each element is a json_content dict,
formatted according to documentation [on how to import
rich-text assets](
json_metadata_array: The metadata given to each asset should be stored in a json like dict with keys.
- Add metadata visible on the asset with the following keys: `imageUrl`, `text`, `url`.
Example for one asset: `json_metadata_array = [{'imageUrl': '','text': '','url': ''}]`.
- For video, you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30).
Example for one asset: `json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}]`.
A result object which indicates if the mutation was successful, or an error message.
>>> kili.append_many_to_dataset(
!!! example "Recipe"
- For more detailed examples on how to import assets,
see [the recipe](
or [other examples]( in our documentation.
- For more detailed examples on how to import text assets,
see [the recipe](
kili = QueriesProject(self.auth)
projects = kili.projects(project_id, disable_tqdm=True)
assert len(projects) == 1, NO_ACCESS_RIGHT
input_type = projects[0]['inputType']
properties_to_batch, upload_type, request = process_append_many_to_dataset_parameters(input_type,
def generate_variables(batch):
payload_data = {'contentArray': batch['content_array'],
'externalIDArray': batch['external_id_array'],
'jsonMetadataArray': batch['json_metadata_array'],
'uploadType': upload_type}
payload_data = {'contentArray': batch['content_array'],
'externalIDArray': batch['external_id_array'],
'isHoneypotArray': batch['is_honeypot_array'],
'statusArray': batch['status_array'],
'jsonContentArray': batch['json_content_array'],
'jsonMetadataArray': batch['json_metadata_array']}
return {
'data': payload_data,
'where': {'id': project_id}
results = _mutate_from_paginated_call(
self, properties_to_batch, generate_variables, request)
return format_result('data', results[0], Asset)
#pylint: disable=unused-argument
def update_properties_in_assets(self,
asset_ids: List[str],
external_ids: Optional[List[str]] = None,
priorities: Optional[List[int]] = None,
json_metadatas: Optional[List[Union[dict, str]]] = None,
consensus_marks: Optional[List[float]] = None,
honeypot_marks: Optional[List[float]] = None,
to_be_labeled_by_array: Optional[List[List[str]]] = None,
contents: Optional[List[str]] = None,
json_contents: Optional[List[str]] = None,
status_array: Optional[List[str]] = None,
is_used_for_consensus_array: Optional[List[bool]] = None,
is_honeypot_array: Optional[List[bool]] = None) -> List[dict]:
"""Update the properties of one or more assets.
asset_ids : The asset IDs to modify
external_ids: Change the external id of the assets
priorities : You can change the priority of the assets
By default, all assets have a priority of 0.
json_metadatas: The metadata given to an asset should be stored
in a json like dict with keys `imageUrl`, `text`, `url`:
`json_metadata = {'imageUrl': '','text': '','url': ''}`
consensus_marks: Should be between 0 and 1
honeypot_marks: Should be between 0 and 1
to_be_labeled_by_array: If given, each element of the list should contain the emails of
the labelers authorized to label the asset.
contents: - For a NLP project, the content can be directly in text format
- For an Image / Video / Pdf project, the content must be hosted on a web server,
and you point Kili to your data by giving the URLs
json_contents: - For a NLP project, the `json_content`
is a text formatted using RichText
- For a Video project, the`json_content` is a json containg urls pointing
to each frame of the video.
status_array: Each element should be in `TODO`, `ONGOING`, `LABELED`, `REVIEWED`
is_used_for_consensus_array: Whether to use the asset to compute consensus kpis or not
is_honeypot_array: Whether to use the asset for honeypot
A result object which indicates if the mutation was successful,
or an error message.
>>> kili.update_properties_in_assets(
asset_ids=["ckg22d81r0jrg0885unmuswj8", "ckg22d81s0jrh0885pdxfd03n"],
consensus_marks=[1, 0.7],
contents=[None, 'https://to/second/asset.png'],
external_ids=['external-id-of-your-choice-1', 'external-id-of-your-choice-2'],
honeypot_marks=[0.8, 0.5],
is_honeypot_array=[True, True],
is_used_for_consensus_array=[True, False],
priorities=[None, 2],
status_array=['LABELED', 'REVIEWED'],
to_be_labeled_by_array=[[''], None],
saved_args = locals()
parameters = {k: v for (k, v) in saved_args.items() if k in
properties_to_batch = process_update_properties_in_assets_parameters(
def generate_variables(batch):
data = {
'externalId': batch['external_ids'],
'priority': batch['priorities'],
'jsonMetadata': batch['json_metadatas'],
'consensusMark': batch['consensus_marks'],
'honeypotMark': batch['honeypot_marks'],
'toBeLabeledBy': batch['to_be_labeled_by_array'],
'shouldResetToBeLabeledBy': batch['should_reset_to_be_labeled_by_array'],
'content': batch['contents'],
'jsonContent': batch['json_contents'],
'status': batch['status_array'],
'isUsedForConsensus': batch['is_used_for_consensus_array'],
'isHoneypot': batch['is_honeypot_array']
data_array = [dict(zip(data, t)) for t in zip(*data.values())]
return {
'whereArray': [{'id': asset_id} for asset_id in batch['asset_ids']],
'dataArray': data_array
results = _mutate_from_paginated_call(
self, properties_to_batch, generate_variables, GQL_UPDATE_PROPERTIES_IN_ASSETS)
formated_results = [format_result(
'data', result, Asset) for result in results]
return [item for batch_list in formated_results for item in batch_list]
@Compatible(['v1', 'v2'])
def delete_many_from_dataset(self, asset_ids: List[str]):
"""Delete assets from a project.
asset_ids: The list of identifiers of the assets to delete.
A result object which indicates if the mutation was successful,
or an error message.
properties_to_batch = {'asset_ids': asset_ids}
def generate_variables(batch):
return {'where': {'idIn': batch['asset_ids']}}
results = _mutate_from_paginated_call(self,
return format_result('data', results[0], Asset)
append_many_to_dataset(self, project_id, content_array=None, external_id_array=None, is_honeypot_array=None, status_array=None, json_content_array=None, json_metadata_array=None)
Append assets to a project.
Name | Type | Description | Default |
project_id |
str |
Identifier of the project |
required |
content_array |
Optional[List[str]] |
List of elements added to the assets of the project Must not be None except if you provide json_content_array.
None |
external_id_array |
Optional[List[str]] |
List of external ids given to identify the assets. If None, random identifiers are created. |
None |
is_honeypot_array |
Optional[List[bool]] |
Whether to use the asset for honeypot |
None |
status_array |
Optional[List[str]] |
By default, all imported assets are set to |
None |
json_content_array |
Optional[List[List[Union[dict, str]]]] |
Useful for
None |
json_metadata_array |
Optional[List[dict]] |
The metadata given to each asset should be stored in a json like dict with keys.
None |
Type | Description |
A result object which indicates if the mutation was successful, or an error message. |
>>> kili.append_many_to_dataset(
- For more detailed examples on how to import assets, see the recipe or other examples in our documentation.
- For more detailed examples on how to import text assets, see the recipe.
Source code in kili/mutations/asset/
@Compatible(['v1', 'v2'])
def append_many_to_dataset(
project_id: str,
content_array: Optional[List[str]] = None,
external_id_array: Optional[List[str]] = None,
is_honeypot_array: Optional[List[bool]] = None,
status_array: Optional[List[str]] = None,
json_content_array: Optional[List[List[Union[dict, str]]]] = None,
json_metadata_array: Optional[List[dict]] = None):
# pylint: disable=line-too-long
"""Append assets to a project.
project_id: Identifier of the project
content_array: List of elements added to the assets of the project
Must not be None except if you provide json_content_array.
- For a `TEXT` project, the content can be either raw text, or URLs to TEXT assets.
- For an `IMAGE` / `PDF` project, the content can be either URLs or paths to existing
images/pdf on your computer.
- For a `VIDEO` project, the content must be hosted on a web server,
and you point Kili to your data by giving the URLs.
external_id_array: List of external ids given to identify the assets.
If None, random identifiers are created.
is_honeypot_array: Whether to use the asset for honeypot
status_array: By default, all imported assets are set to `TODO`. Other options:
json_content_array: Useful for `FRAME` or `TEXT` projects only.
- For `FRAME` projects, each element is a sequence of frames, i.e. a
list of URLs to images or a list of paths to images.
- For `TEXT` projects, each element is a json_content dict,
formatted according to documentation [on how to import
rich-text assets](
json_metadata_array: The metadata given to each asset should be stored in a json like dict with keys.
- Add metadata visible on the asset with the following keys: `imageUrl`, `text`, `url`.
Example for one asset: `json_metadata_array = [{'imageUrl': '','text': '','url': ''}]`.
- For video, you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30).
Example for one asset: `json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}]`.
A result object which indicates if the mutation was successful, or an error message.
>>> kili.append_many_to_dataset(
!!! example "Recipe"
- For more detailed examples on how to import assets,
see [the recipe](
or [other examples]( in our documentation.
- For more detailed examples on how to import text assets,
see [the recipe](
kili = QueriesProject(self.auth)
projects = kili.projects(project_id, disable_tqdm=True)
assert len(projects) == 1, NO_ACCESS_RIGHT
input_type = projects[0]['inputType']
properties_to_batch, upload_type, request = process_append_many_to_dataset_parameters(input_type,
def generate_variables(batch):
payload_data = {'contentArray': batch['content_array'],
'externalIDArray': batch['external_id_array'],
'jsonMetadataArray': batch['json_metadata_array'],
'uploadType': upload_type}
payload_data = {'contentArray': batch['content_array'],
'externalIDArray': batch['external_id_array'],
'isHoneypotArray': batch['is_honeypot_array'],
'statusArray': batch['status_array'],
'jsonContentArray': batch['json_content_array'],
'jsonMetadataArray': batch['json_metadata_array']}
return {
'data': payload_data,
'where': {'id': project_id}
results = _mutate_from_paginated_call(
self, properties_to_batch, generate_variables, request)
return format_result('data', results[0], Asset)
delete_many_from_dataset(self, asset_ids)
Delete assets from a project.
Name | Type | Description | Default |
asset_ids |
List[str] |
The list of identifiers of the assets to delete. |
required |
Type | Description |
A result object which indicates if the mutation was successful, or an error message. |
Source code in kili/mutations/asset/
@Compatible(['v1', 'v2'])
def delete_many_from_dataset(self, asset_ids: List[str]):
"""Delete assets from a project.
asset_ids: The list of identifiers of the assets to delete.
A result object which indicates if the mutation was successful,
or an error message.
properties_to_batch = {'asset_ids': asset_ids}
def generate_variables(batch):
return {'where': {'idIn': batch['asset_ids']}}
results = _mutate_from_paginated_call(self,
return format_result('data', results[0], Asset)
update_properties_in_assets(self, asset_ids, external_ids=None, priorities=None, json_metadatas=None, consensus_marks=None, honeypot_marks=None, to_be_labeled_by_array=None, contents=None, json_contents=None, status_array=None, is_used_for_consensus_array=None, is_honeypot_array=None)
Update the properties of one or more assets.
Name | Type | Description | Default |
asset_ids |
The asset IDs to modify |
required | |
external_ids |
Optional[List[str]] |
Change the external id of the assets |
None |
priorities |
You can change the priority of the assets By default, all assets have a priority of 0. |
None |
json_metadatas |
Optional[List[Union[dict, str]]] |
The metadata given to an asset should be stored
in a json like dict with keys |
None |
consensus_marks |
Optional[List[float]] |
Should be between 0 and 1 |
None |
honeypot_marks |
Optional[List[float]] |
Should be between 0 and 1 |
None |
to_be_labeled_by_array |
Optional[List[List[str]]] |
If given, each element of the list should contain the emails of the labelers authorized to label the asset. |
None |
contents |
Optional[List[str]] |
None |
json_contents |
Optional[List[str]] |
None |
status_array |
Optional[List[str]] |
Each element should be in |
None |
is_used_for_consensus_array |
Optional[List[bool]] |
Whether to use the asset to compute consensus kpis or not |
None |
is_honeypot_array |
Optional[List[bool]] |
Whether to use the asset for honeypot |
None |
Type | Description |
List[dict] |
A result object which indicates if the mutation was successful, or an error message. |
>>> kili.update_properties_in_assets(
asset_ids=["ckg22d81r0jrg0885unmuswj8", "ckg22d81s0jrh0885pdxfd03n"],
consensus_marks=[1, 0.7],
contents=[None, 'https://to/second/asset.png'],
external_ids=['external-id-of-your-choice-1', 'external-id-of-your-choice-2'],
honeypot_marks=[0.8, 0.5],
is_honeypot_array=[True, True],
is_used_for_consensus_array=[True, False],
priorities=[None, 2],
status_array=['LABELED', 'REVIEWED'],
to_be_labeled_by_array=[[''], None],
Source code in kili/mutations/asset/
#pylint: disable=unused-argument
def update_properties_in_assets(self,
asset_ids: List[str],
external_ids: Optional[List[str]] = None,
priorities: Optional[List[int]] = None,
json_metadatas: Optional[List[Union[dict, str]]] = None,
consensus_marks: Optional[List[float]] = None,
honeypot_marks: Optional[List[float]] = None,
to_be_labeled_by_array: Optional[List[List[str]]] = None,
contents: Optional[List[str]] = None,
json_contents: Optional[List[str]] = None,
status_array: Optional[List[str]] = None,
is_used_for_consensus_array: Optional[List[bool]] = None,
is_honeypot_array: Optional[List[bool]] = None) -> List[dict]:
"""Update the properties of one or more assets.
asset_ids : The asset IDs to modify
external_ids: Change the external id of the assets
priorities : You can change the priority of the assets
By default, all assets have a priority of 0.
json_metadatas: The metadata given to an asset should be stored
in a json like dict with keys `imageUrl`, `text`, `url`:
`json_metadata = {'imageUrl': '','text': '','url': ''}`
consensus_marks: Should be between 0 and 1
honeypot_marks: Should be between 0 and 1
to_be_labeled_by_array: If given, each element of the list should contain the emails of
the labelers authorized to label the asset.
contents: - For a NLP project, the content can be directly in text format
- For an Image / Video / Pdf project, the content must be hosted on a web server,
and you point Kili to your data by giving the URLs
json_contents: - For a NLP project, the `json_content`
is a text formatted using RichText
- For a Video project, the`json_content` is a json containg urls pointing
to each frame of the video.
status_array: Each element should be in `TODO`, `ONGOING`, `LABELED`, `REVIEWED`
is_used_for_consensus_array: Whether to use the asset to compute consensus kpis or not
is_honeypot_array: Whether to use the asset for honeypot
A result object which indicates if the mutation was successful,
or an error message.
>>> kili.update_properties_in_assets(
asset_ids=["ckg22d81r0jrg0885unmuswj8", "ckg22d81s0jrh0885pdxfd03n"],
consensus_marks=[1, 0.7],
contents=[None, 'https://to/second/asset.png'],
external_ids=['external-id-of-your-choice-1', 'external-id-of-your-choice-2'],
honeypot_marks=[0.8, 0.5],
is_honeypot_array=[True, True],
is_used_for_consensus_array=[True, False],
priorities=[None, 2],
status_array=['LABELED', 'REVIEWED'],
to_be_labeled_by_array=[[''], None],
saved_args = locals()
parameters = {k: v for (k, v) in saved_args.items() if k in
properties_to_batch = process_update_properties_in_assets_parameters(
def generate_variables(batch):
data = {
'externalId': batch['external_ids'],
'priority': batch['priorities'],
'jsonMetadata': batch['json_metadatas'],
'consensusMark': batch['consensus_marks'],
'honeypotMark': batch['honeypot_marks'],
'toBeLabeledBy': batch['to_be_labeled_by_array'],
'shouldResetToBeLabeledBy': batch['should_reset_to_be_labeled_by_array'],
'content': batch['contents'],
'jsonContent': batch['json_contents'],
'status': batch['status_array'],
'isUsedForConsensus': batch['is_used_for_consensus_array'],
'isHoneypot': batch['is_honeypot_array']
data_array = [dict(zip(data, t)) for t in zip(*data.values())]
return {
'whereArray': [{'id': asset_id} for asset_id in batch['asset_ids']],
'dataArray': data_array
results = _mutate_from_paginated_call(
self, properties_to_batch, generate_variables, GQL_UPDATE_PROPERTIES_IN_ASSETS)
formated_results = [format_result(
'data', result, Asset) for result in results]
return [item for batch_list in formated_results for item in batch_list]