Asset module
Queries
Set of Asset queries
Source code in kili/queries/asset/__init__.py
class QueriesAsset:
"""
Set of Asset queries
"""
# pylint: disable=too-many-arguments,too-many-locals
def __init__(self, auth):
"""Initialize the subclass.
Args:
auth: KiliAuth object
"""
self.auth = auth
# pylint: disable=dangerous-default-value
def assets(
self,
project_id: str,
asset_id: Optional[str] = None,
skip: int = 0,
fields: List[str] = [
"content",
"createdAt",
"externalId",
"id",
"isHoneypot",
"jsonMetadata",
"labels.author.id",
"labels.author.email",
"labels.createdAt",
"labels.id",
"labels.jsonResponse",
"skipped",
"status",
],
asset_id_in: Optional[List[str]] = None,
consensus_mark_gt: Optional[float] = None,
consensus_mark_lt: Optional[float] = None,
disable_tqdm: bool = False,
external_id_contains: Optional[List[str]] = None,
first: Optional[int] = None,
format: Optional[str] = None, # pylint: disable=redefined-builtin
honeypot_mark_gt: Optional[float] = None,
honeypot_mark_lt: Optional[float] = None,
label_author_in: Optional[List[str]] = None,
label_consensus_mark_gt: Optional[float] = None,
label_consensus_mark_lt: Optional[float] = None,
label_created_at: Optional[str] = None,
label_created_at_gt: Optional[str] = None,
label_created_at_lt: Optional[str] = None,
label_honeypot_mark_gt: Optional[float] = None,
label_honeypot_mark_lt: Optional[float] = None,
label_type_in: Optional[List[str]] = None,
metadata_where: Optional[dict] = None,
skipped: Optional[bool] = None,
status_in: Optional[List[str]] = None,
updated_at_gte: Optional[str] = None,
updated_at_lte: Optional[str] = None,
as_generator: bool = False,
label_category_search: Optional[str] = None,
download_media: bool = False,
local_media_dir: Optional[str] = None,
) -> Iterable[Dict]:
# pylint: disable=line-too-long
"""Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.
Args:
project_id: Identifier of the project.
asset_id: Identifier of the asset to retrieve.
asset_id_in: A list of the IDs of the assets to retrieve.
skip: Number of assets to skip (they are ordered by their date of creation, first to last).
fields: All the fields to request among the possible fields for the assets.
See [the documentation](https://docs.kili-technology.com/reference/graphql-api#asset) for all possible fields.
first: Maximum number of assets to return.
consensus_mark_gt: Minimum amount of consensus for the asset.
consensus_mark_lt: Maximum amount of consensus for the asset.
external_id_contains: Returned assets have an external id that belongs to that list, if given.
metadata_where: Filters by the values of the metadata of the asset.
honeypot_mark_gt: Minimum amount of honeypot for the asset.
honeypot_mark_lt : Maximum amount of honeypot for the asset.
status_in: Returned assets should have a status that belongs to that list, if given.
Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
label_type_in: Returned assets should have a label whose type belongs to that list, if given.
label_author_in: Returned assets should have a label whose status belongs to that list, if given.
label_consensus_mark_gt: Returned assets should have a label whose consensus is greater than this number.
label_consensus_mark_lt: Returned assets should have a label whose consensus is lower than this number.
label_created_at: Returned assets should have a label whose creation date is equal to this date.
label_created_at_gt: Returned assets should have a label whose creation date is greater than this date.
label_created_at_lt: Returned assets should have a label whose creation date is lower than this date.
label_honeypot_mark_gt: Returned assets should have a label whose honeypot is greater than this number
label_honeypot_mark_lt: Returned assets should have a label whose honeypot is lower than this number
skipped: Returned assets should be skipped
updated_at_gte: Returned assets should have a label whose update date is greated or equal to this date.
updated_at_lte: Returned assets should have a label whose update date is lower or equal to this date.
format: If equal to 'pandas', returns a pandas DataFrame
disable_tqdm: If `True`, the progress bar will be disabled
as_generator: If `True`, a generator on the assets is returned.
label_category_search: Returned assets should have a label that follows this category search query.
download_media: Tell is the media have to be downloaded or not.
local_media_dir: Directory where the media is downloaded if `download_media` is True.
!!! info "Dates format"
Date strings should have format: "YYYY-MM-DD"
Returns:
A result object which contains the query if it was successful,
or an error message.
Example:
```
# returns the assets list of the project
>>> kili.assets(project_id)
>>> kili.assets(project_id, asset_id=asset_id)
# returns a generator of the project assets
>>> kili.assets(project_id, as_generator=True)
```
!!! example "How to filter based on Metadata"
- `metadata_where = {key1: "value1"}` to filter on assets whose metadata
have key "key1" with value "value1"
- `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
have key "key1" with value "value1" or value "value2
- `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
have key "key2" with a value between 2 and 10.
!!! example "How to filter based on label categories"
The search query is composed of logical expressions following this format:
[job_name].[category_name].count [comparaison_operator] [value]
where:
- `[job_name]` is the name of the job in the interface
- `[category_name]` is the name of the category in the interface for this job
- `[comparaison_operator]` can be one of: [`==`, `>=`, `<=`, `<`, `>`]
- `[value]` is an integer that represents the count of such objects of the given category in the label
These operations can be separated by OR and AND operators
Example:
label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
"""
if format == "pandas" and as_generator:
raise ValueError(
'Argument values as_generator==True and format=="pandas" are not compatible.'
)
saved_args = locals()
count_args = {
k: v
for (k, v) in saved_args.items()
if k
not in [
"skip",
"first",
"disable_tqdm",
"format",
"fields",
"self",
"as_generator",
"message",
"download_media",
"local_media_dir",
]
}
# using tqdm with a generator is messy, so it is always disabled
disable_tqdm = disable_tqdm or as_generator
if label_category_search:
validate_category_search_query(label_category_search)
payload_query = {
"where": {
"id": asset_id,
"project": {
"id": project_id,
},
"externalIdIn": external_id_contains,
"statusIn": status_in,
"consensusMarkGte": consensus_mark_gt,
"consensusMarkLte": consensus_mark_lt,
"honeypotMarkGte": honeypot_mark_gt,
"honeypotMarkLte": honeypot_mark_lt,
"idIn": asset_id_in,
"metadata": metadata_where,
"label": {
"typeIn": label_type_in,
"authorIn": label_author_in,
"consensusMarkGte": label_consensus_mark_gt,
"consensusMarkLte": label_consensus_mark_lt,
"createdAt": label_created_at,
"createdAtGte": label_created_at_gt,
"createdAtLte": label_created_at_lt,
"honeypotMarkGte": label_honeypot_mark_gt,
"honeypotMarkLte": label_honeypot_mark_lt,
"search": label_category_search,
},
"skipped": skipped,
"updatedAtGte": updated_at_gte,
"updatedAtLte": updated_at_lte,
},
}
post_call_process = get_post_assets_call_process(
download_media, local_media_dir, project_id
)
asset_generator = row_generator_from_paginated_calls(
skip,
first,
self.count_assets,
count_args,
self._query_assets,
payload_query,
fields,
disable_tqdm,
post_call_process,
)
if format == "pandas":
return pd.DataFrame(list(asset_generator))
if as_generator:
return asset_generator
return list(asset_generator)
def _query_assets(self, skip: int, first: int, payload: dict, fields: List[str]):
payload.update({"skip": skip, "first": first})
_gql_assets = gql_assets(fragment_builder(fields, AssetType))
result = self.auth.client.execute(_gql_assets, payload)
assets = format_result("data", result, _object=List[Asset])
return assets
@typechecked
def count_assets(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_id_in: Optional[List[str]] = None,
external_id_contains: Optional[List[str]] = None,
metadata_where: Optional[dict] = None,
status_in: Optional[List[str]] = None,
consensus_mark_gt: Optional[float] = None,
consensus_mark_lt: Optional[float] = None,
honeypot_mark_gt: Optional[float] = None,
honeypot_mark_lt: Optional[float] = None,
label_type_in: Optional[List[str]] = None,
label_author_in: Optional[List[str]] = None,
label_consensus_mark_gt: Optional[float] = None,
label_consensus_mark_lt: Optional[float] = None,
label_created_at: Optional[str] = None,
label_created_at_gt: Optional[str] = None,
label_created_at_lt: Optional[str] = None,
label_honeypot_mark_gt: Optional[float] = None,
label_honeypot_mark_lt: Optional[float] = None,
skipped: Optional[bool] = None,
updated_at_gte: Optional[str] = None,
updated_at_lte: Optional[str] = None,
label_category_search: Optional[str] = None,
) -> int:
"""Count and return the number of assets with the given constraints.
Parameters beginning with 'label_' apply to labels, others apply to assets.
Args:
project_id: Identifier of the project
asset_id: The unique id of the asset to retrieve.
asset_id_in: A list of the ids of the assets to retrieve.
external_id_contains: Returned assets should have an external id
that belongs to that list, if given.
metadata_where: Filters by the values of the metadata of the asset.
status_in: Returned assets should have a status that belongs to that list, if given.
Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
consensus_mark_gt: Minimum amount of consensus for the asset.
consensus_mark_lt: Maximum amount of consensus for the asset.
honeypot_mark_gt: Minimum amount of honeypot for the asset.
honeypot_mark_lt: Maximum amount of consensus for the asset.
label_type_in: Returned assets should have a label
whose type belongs to that list, if given.
label_author_in: Returned assets should have a label
whose status belongs to that list, if given.
label_consensus_mark_gt: Returned assets should have a label
whose consensus is greater than this number.
label_consensus_mark_lt: Returned assets should have a label
whose consensus is lower than this number.
label_created_at: Returned assets should have a label
whose creation date is equal to this date.
label_created_at_gt: Returned assets should have a label
whose creation date is greater than this date.
label_created_at_lt: Returned assets should have a label
whose creation date is lower than this date.
label_honeypot_mark_gt: Returned assets should have a label
whose honeypot is greater than this number.
label_honeypot_mark_lt: Returned assets should have a label
whose honeypot is lower than this number.
skipped: Returned assets should be skipped
updated_at_gte: Returned assets should have a label
whose update date is greated or equal to this date.
updated_at_lte: Returned assets should have a label
whose update date is lower or equal to this date.
!!! info "Dates format"
Date strings should have format: "YYYY-MM-DD"
Returns:
A result object which contains the query if it was successful,
or an error message.
Examples:
>>> kili.count_assets(project_id=project_id)
250
>>> kili.count_assets(asset_id=asset_id)
1
!!! example "How to filter based on Metadata"
- `metadata_where = {key1: "value1"}` to filter on assets whose metadata
have key "key1" with value "value1"
- `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
have key "key1" with value "value1" or value "value2
- `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
have key "key2" with a value between 2 and 10.
"""
if label_category_search:
validate_category_search_query(label_category_search)
variables = {
"where": {
"id": asset_id,
"project": {
"id": project_id,
},
"externalIdIn": external_id_contains,
"statusIn": status_in,
"consensusMarkGte": consensus_mark_gt,
"consensusMarkLte": consensus_mark_lt,
"honeypotMarkGte": honeypot_mark_gt,
"honeypotMarkLte": honeypot_mark_lt,
"idIn": asset_id_in,
"metadata": metadata_where,
"label": {
"typeIn": label_type_in,
"authorIn": label_author_in,
"consensusMarkGte": label_consensus_mark_gt,
"consensusMarkLte": label_consensus_mark_lt,
"createdAt": label_created_at,
"createdAtGte": label_created_at_gt,
"createdAtLte": label_created_at_lt,
"honeypotMarkGte": label_honeypot_mark_gt,
"honeypotMarkLte": label_honeypot_mark_lt,
"search": label_category_search,
},
"skipped": skipped,
"updatedAtGte": updated_at_gte,
"updatedAtLte": updated_at_lte,
}
}
result = self.auth.client.execute(GQL_ASSETS_COUNT, variables)
return format_result("data", result, int)
assets(self, project_id, asset_id=None, skip=0, fields=['content', 'createdAt', 'externalId', 'id', 'isHoneypot', 'jsonMetadata', 'labels.author.id', 'labels.author.email', 'labels.createdAt', 'labels.id', 'labels.jsonResponse', 'skipped', 'status'], asset_id_in=None, consensus_mark_gt=None, consensus_mark_lt=None, disable_tqdm=False, external_id_contains=None, first=None, format=None, honeypot_mark_gt=None, honeypot_mark_lt=None, label_author_in=None, label_consensus_mark_gt=None, label_consensus_mark_lt=None, label_created_at=None, label_created_at_gt=None, label_created_at_lt=None, label_honeypot_mark_gt=None, label_honeypot_mark_lt=None, label_type_in=None, metadata_where=None, skipped=None, status_in=None, updated_at_gte=None, updated_at_lte=None, as_generator=False, label_category_search=None, download_media=False, local_media_dir=None)
Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
project_id |
str |
Identifier of the project. |
required |
asset_id |
Optional[str] |
Identifier of the asset to retrieve. |
None |
asset_id_in |
Optional[List[str]] |
A list of the IDs of the assets to retrieve. |
None |
skip |
int |
Number of assets to skip (they are ordered by their date of creation, first to last). |
0 |
fields |
List[str] |
All the fields to request among the possible fields for the assets. See the documentation for all possible fields. |
['content', 'createdAt', 'externalId', 'id', 'isHoneypot', 'jsonMetadata', 'labels.author.id', 'labels.author.email', 'labels.createdAt', 'labels.id', 'labels.jsonResponse', 'skipped', 'status'] |
first |
Optional[int] |
Maximum number of assets to return. |
None |
consensus_mark_gt |
Optional[float] |
Minimum amount of consensus for the asset. |
None |
consensus_mark_lt |
Optional[float] |
Maximum amount of consensus for the asset. |
None |
external_id_contains |
Optional[List[str]] |
Returned assets have an external id that belongs to that list, if given. |
None |
metadata_where |
Optional[dict] |
Filters by the values of the metadata of the asset. |
None |
honeypot_mark_gt |
Optional[float] |
Minimum amount of honeypot for the asset. |
None |
honeypot_mark_lt |
Maximum amount of honeypot for the asset. |
None |
|
status_in |
Optional[List[str]] |
Returned assets should have a status that belongs to that list, if given.
Possible choices: |
None |
label_type_in |
Optional[List[str]] |
Returned assets should have a label whose type belongs to that list, if given. |
None |
label_author_in |
Optional[List[str]] |
Returned assets should have a label whose status belongs to that list, if given. |
None |
label_consensus_mark_gt |
Optional[float] |
Returned assets should have a label whose consensus is greater than this number. |
None |
label_consensus_mark_lt |
Optional[float] |
Returned assets should have a label whose consensus is lower than this number. |
None |
label_created_at |
Optional[str] |
Returned assets should have a label whose creation date is equal to this date. |
None |
label_created_at_gt |
Optional[str] |
Returned assets should have a label whose creation date is greater than this date. |
None |
label_created_at_lt |
Optional[str] |
Returned assets should have a label whose creation date is lower than this date. |
None |
label_honeypot_mark_gt |
Optional[float] |
Returned assets should have a label whose honeypot is greater than this number |
None |
label_honeypot_mark_lt |
Optional[float] |
Returned assets should have a label whose honeypot is lower than this number |
None |
skipped |
Optional[bool] |
Returned assets should be skipped |
None |
updated_at_gte |
Optional[str] |
Returned assets should have a label whose update date is greated or equal to this date. |
None |
updated_at_lte |
Optional[str] |
Returned assets should have a label whose update date is lower or equal to this date. |
None |
format |
Optional[str] |
If equal to 'pandas', returns a pandas DataFrame |
None |
disable_tqdm |
bool |
If |
False |
as_generator |
bool |
If |
False |
label_category_search |
Optional[str] |
Returned assets should have a label that follows this category search query. |
None |
download_media |
bool |
Tell is the media have to be downloaded or not. |
False |
local_media_dir |
Optional[str] |
Directory where the media is downloaded if |
None |
Dates format
Date strings should have format: "YYYY-MM-DD"
Returns:
Type | Description |
---|---|
Iterable[Dict] |
A result object which contains the query if it was successful, or an error message. |
Examples:
# returns the assets list of the project
>>> kili.assets(project_id)
>>> kili.assets(project_id, asset_id=asset_id)
# returns a generator of the project assets
>>> kili.assets(project_id, as_generator=True)
How to filter based on Metadata
metadata_where = {key1: "value1"}
to filter on assets whose metadata have key "key1" with value "value1"metadata_where = {key1: ["value1", "value2"]}
to filter on assets whose metadata have key "key1" with value "value1" or value "value2metadata_where = {key2: [2, 10]}
to filter on assets whose metadata have key "key2" with a value between 2 and 10.
How to filter based on label categories
The search query is composed of logical expressions following this format:
[job_name].[category_name].count [comparaison_operator] [value]
where:
[job_name]
is the name of the job in the interface[category_name]
is the name of the category in the interface for this job[comparaison_operator]
can be one of: [==
,>=
,<=
,<
,>
][value]
is an integer that represents the count of such objects of the given category in the label
These operations can be separated by OR and AND operators
Example:
label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
Source code in kili/queries/asset/__init__.py
def assets(
self,
project_id: str,
asset_id: Optional[str] = None,
skip: int = 0,
fields: List[str] = [
"content",
"createdAt",
"externalId",
"id",
"isHoneypot",
"jsonMetadata",
"labels.author.id",
"labels.author.email",
"labels.createdAt",
"labels.id",
"labels.jsonResponse",
"skipped",
"status",
],
asset_id_in: Optional[List[str]] = None,
consensus_mark_gt: Optional[float] = None,
consensus_mark_lt: Optional[float] = None,
disable_tqdm: bool = False,
external_id_contains: Optional[List[str]] = None,
first: Optional[int] = None,
format: Optional[str] = None, # pylint: disable=redefined-builtin
honeypot_mark_gt: Optional[float] = None,
honeypot_mark_lt: Optional[float] = None,
label_author_in: Optional[List[str]] = None,
label_consensus_mark_gt: Optional[float] = None,
label_consensus_mark_lt: Optional[float] = None,
label_created_at: Optional[str] = None,
label_created_at_gt: Optional[str] = None,
label_created_at_lt: Optional[str] = None,
label_honeypot_mark_gt: Optional[float] = None,
label_honeypot_mark_lt: Optional[float] = None,
label_type_in: Optional[List[str]] = None,
metadata_where: Optional[dict] = None,
skipped: Optional[bool] = None,
status_in: Optional[List[str]] = None,
updated_at_gte: Optional[str] = None,
updated_at_lte: Optional[str] = None,
as_generator: bool = False,
label_category_search: Optional[str] = None,
download_media: bool = False,
local_media_dir: Optional[str] = None,
) -> Iterable[Dict]:
# pylint: disable=line-too-long
"""Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.
Args:
project_id: Identifier of the project.
asset_id: Identifier of the asset to retrieve.
asset_id_in: A list of the IDs of the assets to retrieve.
skip: Number of assets to skip (they are ordered by their date of creation, first to last).
fields: All the fields to request among the possible fields for the assets.
See [the documentation](https://docs.kili-technology.com/reference/graphql-api#asset) for all possible fields.
first: Maximum number of assets to return.
consensus_mark_gt: Minimum amount of consensus for the asset.
consensus_mark_lt: Maximum amount of consensus for the asset.
external_id_contains: Returned assets have an external id that belongs to that list, if given.
metadata_where: Filters by the values of the metadata of the asset.
honeypot_mark_gt: Minimum amount of honeypot for the asset.
honeypot_mark_lt : Maximum amount of honeypot for the asset.
status_in: Returned assets should have a status that belongs to that list, if given.
Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
label_type_in: Returned assets should have a label whose type belongs to that list, if given.
label_author_in: Returned assets should have a label whose status belongs to that list, if given.
label_consensus_mark_gt: Returned assets should have a label whose consensus is greater than this number.
label_consensus_mark_lt: Returned assets should have a label whose consensus is lower than this number.
label_created_at: Returned assets should have a label whose creation date is equal to this date.
label_created_at_gt: Returned assets should have a label whose creation date is greater than this date.
label_created_at_lt: Returned assets should have a label whose creation date is lower than this date.
label_honeypot_mark_gt: Returned assets should have a label whose honeypot is greater than this number
label_honeypot_mark_lt: Returned assets should have a label whose honeypot is lower than this number
skipped: Returned assets should be skipped
updated_at_gte: Returned assets should have a label whose update date is greated or equal to this date.
updated_at_lte: Returned assets should have a label whose update date is lower or equal to this date.
format: If equal to 'pandas', returns a pandas DataFrame
disable_tqdm: If `True`, the progress bar will be disabled
as_generator: If `True`, a generator on the assets is returned.
label_category_search: Returned assets should have a label that follows this category search query.
download_media: Tell is the media have to be downloaded or not.
local_media_dir: Directory where the media is downloaded if `download_media` is True.
!!! info "Dates format"
Date strings should have format: "YYYY-MM-DD"
Returns:
A result object which contains the query if it was successful,
or an error message.
Example:
```
# returns the assets list of the project
>>> kili.assets(project_id)
>>> kili.assets(project_id, asset_id=asset_id)
# returns a generator of the project assets
>>> kili.assets(project_id, as_generator=True)
```
!!! example "How to filter based on Metadata"
- `metadata_where = {key1: "value1"}` to filter on assets whose metadata
have key "key1" with value "value1"
- `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
have key "key1" with value "value1" or value "value2
- `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
have key "key2" with a value between 2 and 10.
!!! example "How to filter based on label categories"
The search query is composed of logical expressions following this format:
[job_name].[category_name].count [comparaison_operator] [value]
where:
- `[job_name]` is the name of the job in the interface
- `[category_name]` is the name of the category in the interface for this job
- `[comparaison_operator]` can be one of: [`==`, `>=`, `<=`, `<`, `>`]
- `[value]` is an integer that represents the count of such objects of the given category in the label
These operations can be separated by OR and AND operators
Example:
label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
"""
if format == "pandas" and as_generator:
raise ValueError(
'Argument values as_generator==True and format=="pandas" are not compatible.'
)
saved_args = locals()
count_args = {
k: v
for (k, v) in saved_args.items()
if k
not in [
"skip",
"first",
"disable_tqdm",
"format",
"fields",
"self",
"as_generator",
"message",
"download_media",
"local_media_dir",
]
}
# using tqdm with a generator is messy, so it is always disabled
disable_tqdm = disable_tqdm or as_generator
if label_category_search:
validate_category_search_query(label_category_search)
payload_query = {
"where": {
"id": asset_id,
"project": {
"id": project_id,
},
"externalIdIn": external_id_contains,
"statusIn": status_in,
"consensusMarkGte": consensus_mark_gt,
"consensusMarkLte": consensus_mark_lt,
"honeypotMarkGte": honeypot_mark_gt,
"honeypotMarkLte": honeypot_mark_lt,
"idIn": asset_id_in,
"metadata": metadata_where,
"label": {
"typeIn": label_type_in,
"authorIn": label_author_in,
"consensusMarkGte": label_consensus_mark_gt,
"consensusMarkLte": label_consensus_mark_lt,
"createdAt": label_created_at,
"createdAtGte": label_created_at_gt,
"createdAtLte": label_created_at_lt,
"honeypotMarkGte": label_honeypot_mark_gt,
"honeypotMarkLte": label_honeypot_mark_lt,
"search": label_category_search,
},
"skipped": skipped,
"updatedAtGte": updated_at_gte,
"updatedAtLte": updated_at_lte,
},
}
post_call_process = get_post_assets_call_process(
download_media, local_media_dir, project_id
)
asset_generator = row_generator_from_paginated_calls(
skip,
first,
self.count_assets,
count_args,
self._query_assets,
payload_query,
fields,
disable_tqdm,
post_call_process,
)
if format == "pandas":
return pd.DataFrame(list(asset_generator))
if as_generator:
return asset_generator
return list(asset_generator)
count_assets(self, project_id, asset_id=None, asset_id_in=None, external_id_contains=None, metadata_where=None, status_in=None, consensus_mark_gt=None, consensus_mark_lt=None, honeypot_mark_gt=None, honeypot_mark_lt=None, label_type_in=None, label_author_in=None, label_consensus_mark_gt=None, label_consensus_mark_lt=None, label_created_at=None, label_created_at_gt=None, label_created_at_lt=None, label_honeypot_mark_gt=None, label_honeypot_mark_lt=None, skipped=None, updated_at_gte=None, updated_at_lte=None, label_category_search=None)
Count and return the number of assets with the given constraints.
Parameters beginning with 'label_' apply to labels, others apply to assets.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
project_id |
str |
Identifier of the project |
required |
asset_id |
Optional[str] |
The unique id of the asset to retrieve. |
None |
asset_id_in |
Optional[List[str]] |
A list of the ids of the assets to retrieve. |
None |
external_id_contains |
Optional[List[str]] |
Returned assets should have an external id that belongs to that list, if given. |
None |
metadata_where |
Optional[dict] |
Filters by the values of the metadata of the asset. |
None |
status_in |
Optional[List[str]] |
Returned assets should have a status that belongs to that list, if given.
Possible choices: |
None |
consensus_mark_gt |
Optional[float] |
Minimum amount of consensus for the asset. |
None |
consensus_mark_lt |
Optional[float] |
Maximum amount of consensus for the asset. |
None |
honeypot_mark_gt |
Optional[float] |
Minimum amount of honeypot for the asset. |
None |
honeypot_mark_lt |
Optional[float] |
Maximum amount of consensus for the asset. |
None |
label_type_in |
Optional[List[str]] |
Returned assets should have a label whose type belongs to that list, if given. |
None |
label_author_in |
Optional[List[str]] |
Returned assets should have a label whose status belongs to that list, if given. |
None |
label_consensus_mark_gt |
Optional[float] |
Returned assets should have a label whose consensus is greater than this number. |
None |
label_consensus_mark_lt |
Optional[float] |
Returned assets should have a label whose consensus is lower than this number. |
None |
label_created_at |
Optional[str] |
Returned assets should have a label whose creation date is equal to this date. |
None |
label_created_at_gt |
Optional[str] |
Returned assets should have a label whose creation date is greater than this date. |
None |
label_created_at_lt |
Optional[str] |
Returned assets should have a label whose creation date is lower than this date. |
None |
label_honeypot_mark_gt |
Optional[float] |
Returned assets should have a label whose honeypot is greater than this number. |
None |
label_honeypot_mark_lt |
Optional[float] |
Returned assets should have a label whose honeypot is lower than this number. |
None |
skipped |
Optional[bool] |
Returned assets should be skipped |
None |
updated_at_gte |
Optional[str] |
Returned assets should have a label whose update date is greated or equal to this date. |
None |
updated_at_lte |
Optional[str] |
Returned assets should have a label whose update date is lower or equal to this date. |
None |
Dates format
Date strings should have format: "YYYY-MM-DD"
Returns:
Type | Description |
---|---|
int |
A result object which contains the query if it was successful, or an error message. |
Examples:
>>> kili.count_assets(project_id=project_id)
250
>>> kili.count_assets(asset_id=asset_id)
1
How to filter based on Metadata
metadata_where = {key1: "value1"}
to filter on assets whose metadata have key "key1" with value "value1"metadata_where = {key1: ["value1", "value2"]}
to filter on assets whose metadata have key "key1" with value "value1" or value "value2metadata_where = {key2: [2, 10]}
to filter on assets whose metadata have key "key2" with a value between 2 and 10.
Source code in kili/queries/asset/__init__.py
@typechecked
def count_assets(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_id_in: Optional[List[str]] = None,
external_id_contains: Optional[List[str]] = None,
metadata_where: Optional[dict] = None,
status_in: Optional[List[str]] = None,
consensus_mark_gt: Optional[float] = None,
consensus_mark_lt: Optional[float] = None,
honeypot_mark_gt: Optional[float] = None,
honeypot_mark_lt: Optional[float] = None,
label_type_in: Optional[List[str]] = None,
label_author_in: Optional[List[str]] = None,
label_consensus_mark_gt: Optional[float] = None,
label_consensus_mark_lt: Optional[float] = None,
label_created_at: Optional[str] = None,
label_created_at_gt: Optional[str] = None,
label_created_at_lt: Optional[str] = None,
label_honeypot_mark_gt: Optional[float] = None,
label_honeypot_mark_lt: Optional[float] = None,
skipped: Optional[bool] = None,
updated_at_gte: Optional[str] = None,
updated_at_lte: Optional[str] = None,
label_category_search: Optional[str] = None,
) -> int:
"""Count and return the number of assets with the given constraints.
Parameters beginning with 'label_' apply to labels, others apply to assets.
Args:
project_id: Identifier of the project
asset_id: The unique id of the asset to retrieve.
asset_id_in: A list of the ids of the assets to retrieve.
external_id_contains: Returned assets should have an external id
that belongs to that list, if given.
metadata_where: Filters by the values of the metadata of the asset.
status_in: Returned assets should have a status that belongs to that list, if given.
Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
consensus_mark_gt: Minimum amount of consensus for the asset.
consensus_mark_lt: Maximum amount of consensus for the asset.
honeypot_mark_gt: Minimum amount of honeypot for the asset.
honeypot_mark_lt: Maximum amount of consensus for the asset.
label_type_in: Returned assets should have a label
whose type belongs to that list, if given.
label_author_in: Returned assets should have a label
whose status belongs to that list, if given.
label_consensus_mark_gt: Returned assets should have a label
whose consensus is greater than this number.
label_consensus_mark_lt: Returned assets should have a label
whose consensus is lower than this number.
label_created_at: Returned assets should have a label
whose creation date is equal to this date.
label_created_at_gt: Returned assets should have a label
whose creation date is greater than this date.
label_created_at_lt: Returned assets should have a label
whose creation date is lower than this date.
label_honeypot_mark_gt: Returned assets should have a label
whose honeypot is greater than this number.
label_honeypot_mark_lt: Returned assets should have a label
whose honeypot is lower than this number.
skipped: Returned assets should be skipped
updated_at_gte: Returned assets should have a label
whose update date is greated or equal to this date.
updated_at_lte: Returned assets should have a label
whose update date is lower or equal to this date.
!!! info "Dates format"
Date strings should have format: "YYYY-MM-DD"
Returns:
A result object which contains the query if it was successful,
or an error message.
Examples:
>>> kili.count_assets(project_id=project_id)
250
>>> kili.count_assets(asset_id=asset_id)
1
!!! example "How to filter based on Metadata"
- `metadata_where = {key1: "value1"}` to filter on assets whose metadata
have key "key1" with value "value1"
- `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
have key "key1" with value "value1" or value "value2
- `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
have key "key2" with a value between 2 and 10.
"""
if label_category_search:
validate_category_search_query(label_category_search)
variables = {
"where": {
"id": asset_id,
"project": {
"id": project_id,
},
"externalIdIn": external_id_contains,
"statusIn": status_in,
"consensusMarkGte": consensus_mark_gt,
"consensusMarkLte": consensus_mark_lt,
"honeypotMarkGte": honeypot_mark_gt,
"honeypotMarkLte": honeypot_mark_lt,
"idIn": asset_id_in,
"metadata": metadata_where,
"label": {
"typeIn": label_type_in,
"authorIn": label_author_in,
"consensusMarkGte": label_consensus_mark_gt,
"consensusMarkLte": label_consensus_mark_lt,
"createdAt": label_created_at,
"createdAtGte": label_created_at_gt,
"createdAtLte": label_created_at_lt,
"honeypotMarkGte": label_honeypot_mark_gt,
"honeypotMarkLte": label_honeypot_mark_lt,
"search": label_category_search,
},
"skipped": skipped,
"updatedAtGte": updated_at_gte,
"updatedAtLte": updated_at_lte,
}
}
result = self.auth.client.execute(GQL_ASSETS_COUNT, variables)
return format_result("data", result, int)
Mutations
Set of Asset mutations
Source code in kili/mutations/asset/__init__.py
class MutationsAsset:
"""
Set of Asset mutations
"""
# pylint: disable=too-many-arguments,too-many-locals
def __init__(self, auth):
"""Initialize the subclass.
Args:
auth: KiliAuth object
"""
self.auth = auth
@typechecked
def append_many_to_dataset(
self,
project_id: str,
content_array: Optional[List[str]] = None,
external_id_array: Optional[List[str]] = None,
id_array: Optional[List[str]] = None,
is_honeypot_array: Optional[List[bool]] = None,
status_array: Optional[List[str]] = None,
json_content_array: Optional[List[List[Union[dict, str]]]] = None,
json_metadata_array: Optional[List[dict]] = None,
disable_tqdm: bool = False,
):
# pylint: disable=line-too-long
"""Append assets to a project.
Args:
project_id: Identifier of the project
content_array: List of elements added to the assets of the project
Must not be None except if you provide json_content_array.
- For a `TEXT` project, the content can be either raw text, or URLs to TEXT assets.
- For an `IMAGE` / `PDF` project, the content can be either URLs or paths to existing
images/pdf on your computer.
- For a `VIDEO` project, the content must be hosted on a web server,
and you point Kili to your data by giving the URLs.
external_id_array: List of external ids given to identify the assets.
If None, random identifiers are created.
is_honeypot_array: Whether to use the asset for honeypot
status_array: By default, all imported assets are set to `TODO`. Other options:
`ONGOING`, `LABELED`, `REVIEWED`.
json_content_array: Useful for `VIDEO` or `TEXT` projects only.
- For `FRAME` projects, each element is a sequence of frames, i.e. a
list of URLs to images or a list of paths to images.
- For `TEXT` projects, each element is a json_content dict,
formatted according to documentation [on how to import
rich-text assets](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb)
json_metadata_array: The metadata given to each asset should be stored in a json like dict with keys.
- Add metadata visible on the asset with the following keys: `imageUrl`, `text`, `url`.
Example for one asset: `json_metadata_array = [{'imageUrl': '','text': '','url': ''}]`.
- For video, you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30).
Example for one asset: `json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}]`.
disable_tqdm: If `True`, the progress bar will be disabled
Returns:
A result object which indicates if the mutation was successful, or an error message.
Examples:
>>> kili.append_many_to_dataset(
project_id=project_id,
content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])
!!! example "Recipe"
- For more detailed examples on how to import assets,
see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_assets.ipynb)
or [other examples](https://docs.kili-technology.com/recipes/importing-data) in our documentation.
- For more detailed examples on how to import text assets,
see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb).
"""
if content_array is None and json_content_array is None:
raise ValueError("Variables content_array and json_content_array cannot be both None.")
nb_data = (
len(content_array)
if content_array is not None
else len(json_content_array) # type:ignore
)
field_mapping = {
"content": content_array,
"json_content": json_content_array,
"external_id": external_id_array,
"id": id_array,
"status": status_array,
"json_metadata": json_metadata_array,
"is_honeypot": is_honeypot_array,
}
assets = [{}] * nb_data
for key, value in field_mapping.items():
if value is not None:
assets = [{**assets[i], key: value[i]} for i in range(nb_data)]
result = import_assets(
self.auth, project_id=project_id, assets=assets, disable_tqdm=disable_tqdm
)
return result
@typechecked
# pylint: disable=unused-argument
def update_properties_in_assets(
self,
asset_ids: List[str],
external_ids: Optional[List[str]] = None,
priorities: Optional[List[int]] = None,
json_metadatas: Optional[List[Union[dict, str]]] = None,
consensus_marks: Optional[List[float]] = None,
honeypot_marks: Optional[List[float]] = None,
to_be_labeled_by_array: Optional[List[List[str]]] = None,
contents: Optional[List[str]] = None,
json_contents: Optional[List[str]] = None,
status_array: Optional[List[str]] = None,
is_used_for_consensus_array: Optional[List[bool]] = None,
is_honeypot_array: Optional[List[bool]] = None,
) -> List[dict]:
"""Update the properties of one or more assets.
Args:
asset_ids : The asset IDs to modify
external_ids: Change the external id of the assets
priorities : You can change the priority of the assets
By default, all assets have a priority of 0.
json_metadatas: The metadata given to an asset should be stored
in a json like dict with keys `imageUrl`, `text`, `url`:
`json_metadata = {'imageUrl': '','text': '','url': ''}`
consensus_marks: Should be between 0 and 1
honeypot_marks: Should be between 0 and 1
to_be_labeled_by_array: If given, each element of the list should contain the emails of
the labelers authorized to label the asset.
contents: - For a NLP project, the content can be directly in text format
- For an Image / Video / Pdf project, the content must be hosted on a web server,
and you point Kili to your data by giving the URLs
json_contents: - For a NLP project, the `json_content`
is a text formatted using RichText
- For a Video project, the`json_content` is a json containg urls pointing
to each frame of the video.
status_array: Each element should be in `TODO`, `ONGOING`, `LABELED`, `REVIEWED`
is_used_for_consensus_array: Whether to use the asset to compute consensus kpis or not
is_honeypot_array: Whether to use the asset for honeypot
Returns:
A result object which indicates if the mutation was successful,
or an error message.
Examples:
>>> kili.update_properties_in_assets(
asset_ids=["ckg22d81r0jrg0885unmuswj8",
"ckg22d81s0jrh0885pdxfd03n"],
consensus_marks=[1, 0.7],
contents=[None, 'https://to/second/asset.png'],
external_ids=['external-id-of-your-choice-1',
'external-id-of-your-choice-2'],
honeypot_marks=[0.8, 0.5],
is_honeypot_array=[True, True],
is_used_for_consensus_array=[True, False],
priorities=[None, 2],
status_array=['LABELED', 'REVIEWED'],
to_be_labeled_by_array=[
['test+pierre@kili-technology.com'], None],
)
"""
saved_args = locals()
parameters = {
k: v
for (k, v) in saved_args.items()
if k
in [
"asset_ids",
"external_ids",
"priorities",
"json_metadatas",
"consensus_marks",
"honeypot_marks",
"to_be_labeled_by_array",
"contents",
"json_contents",
"status_array",
"is_used_for_consensus_array",
"is_honeypot_array",
]
}
properties_to_batch = process_update_properties_in_assets_parameters(parameters)
def generate_variables(batch: Dict) -> Dict:
data = {
"externalId": batch["external_ids"],
"priority": batch["priorities"],
"jsonMetadata": batch["json_metadatas"],
"consensusMark": batch["consensus_marks"],
"honeypotMark": batch["honeypot_marks"],
"toBeLabeledBy": batch["to_be_labeled_by_array"],
"shouldResetToBeLabeledBy": batch["should_reset_to_be_labeled_by_array"],
"content": batch["contents"],
"jsonContent": batch["json_contents"],
"status": batch["status_array"],
"isUsedForConsensus": batch["is_used_for_consensus_array"],
"isHoneypot": batch["is_honeypot_array"],
}
data_array = [dict(zip(data, t)) for t in zip(*data.values())]
return {
"whereArray": [{"id": asset_id} for asset_id in batch["asset_ids"]],
"dataArray": data_array,
}
results = _mutate_from_paginated_call(
self,
properties_to_batch,
generate_variables,
GQL_UPDATE_PROPERTIES_IN_ASSETS,
)
formated_results = [format_result("data", result, Asset) for result in results]
return [item for batch_list in formated_results for item in batch_list]
@typechecked
def delete_many_from_dataset(self, asset_ids: List[str]):
"""Delete assets from a project.
Args:
asset_ids: The list of identifiers of the assets to delete.
Returns:
A result object which indicates if the mutation was successful,
or an error message.
"""
properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}
def generate_variables(batch):
return {"where": {"idIn": batch["asset_ids"]}}
results = _mutate_from_paginated_call(
self, properties_to_batch, generate_variables, GQL_DELETE_MANY_FROM_DATASET
)
return format_result("data", results[0], Asset)
@typechecked
def add_to_review(self, asset_ids: List[str]) -> dict:
"""Add assets to review.
!!! warning
Assets without any label will be ignored.
Args:
asset_ids: The asset IDs to add to review
Returns:
A result object which indicates if the mutation was successful,
or an error message.
Examples:
>>> kili.add_to_review(
asset_ids=[
"ckg22d81r0jrg0885unmuswj8",
"ckg22d81s0jrh0885pdxfd03n"
],
"""
properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}
def generate_variables(batch):
return {"where": {"idIn": batch["asset_ids"]}}
results = _mutate_from_paginated_call(
self,
properties_to_batch,
generate_variables,
GQL_ADD_ALL_LABELED_ASSETS_TO_REVIEW,
)
return format_result("data", results[0])
@typechecked
def send_back_to_queue(self, asset_ids: List[str]):
"""Send assets back to queue.
Args:
asset_ids: The asset IDs to add to review
Returns:
A result object which indicates if the mutation was successful,
or an error message.
Examples:
>>> kili.send_back_to_queue(
asset_ids=[
"ckg22d81r0jrg0885unmuswj8",
"ckg22d81s0jrh0885pdxfd03n"
],
"""
properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}
def generate_variables(batch):
return {"where": {"idIn": batch["asset_ids"]}}
results = _mutate_from_paginated_call(
self, properties_to_batch, generate_variables, GQL_SEND_BACK_ASSETS_TO_QUEUE
)
return format_result("data", results[0])
add_to_review(self, asset_ids)
Add assets to review.
Warning
Assets without any label will be ignored.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
asset_ids |
List[str] |
The asset IDs to add to review |
required |
Returns:
Type | Description |
---|---|
dict |
A result object which indicates if the mutation was successful, or an error message. |
Examples:
>>> kili.add_to_review(
asset_ids=[
"ckg22d81r0jrg0885unmuswj8",
"ckg22d81s0jrh0885pdxfd03n"
],
Source code in kili/mutations/asset/__init__.py
@typechecked
def add_to_review(self, asset_ids: List[str]) -> dict:
"""Add assets to review.
!!! warning
Assets without any label will be ignored.
Args:
asset_ids: The asset IDs to add to review
Returns:
A result object which indicates if the mutation was successful,
or an error message.
Examples:
>>> kili.add_to_review(
asset_ids=[
"ckg22d81r0jrg0885unmuswj8",
"ckg22d81s0jrh0885pdxfd03n"
],
"""
properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}
def generate_variables(batch):
return {"where": {"idIn": batch["asset_ids"]}}
results = _mutate_from_paginated_call(
self,
properties_to_batch,
generate_variables,
GQL_ADD_ALL_LABELED_ASSETS_TO_REVIEW,
)
return format_result("data", results[0])
append_many_to_dataset(self, project_id, content_array=None, external_id_array=None, id_array=None, is_honeypot_array=None, status_array=None, json_content_array=None, json_metadata_array=None, disable_tqdm=False)
Append assets to a project.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
project_id |
str |
Identifier of the project |
required |
content_array |
Optional[List[str]] |
List of elements added to the assets of the project Must not be None except if you provide json_content_array.
|
None |
external_id_array |
Optional[List[str]] |
List of external ids given to identify the assets. If None, random identifiers are created. |
None |
is_honeypot_array |
Optional[List[bool]] |
Whether to use the asset for honeypot |
None |
status_array |
Optional[List[str]] |
By default, all imported assets are set to |
None |
json_content_array |
Optional[List[List[Union[dict, str]]]] |
Useful for
|
None |
json_metadata_array |
Optional[List[dict]] |
The metadata given to each asset should be stored in a json like dict with keys.
|
None |
disable_tqdm |
bool |
If |
False |
Returns:
Type | Description |
---|---|
A result object which indicates if the mutation was successful, or an error message. |
Examples:
>>> kili.append_many_to_dataset(
project_id=project_id,
content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])
Recipe
- For more detailed examples on how to import assets, see the recipe or other examples in our documentation.
- For more detailed examples on how to import text assets, see the recipe.
Source code in kili/mutations/asset/__init__.py
@typechecked
def append_many_to_dataset(
self,
project_id: str,
content_array: Optional[List[str]] = None,
external_id_array: Optional[List[str]] = None,
id_array: Optional[List[str]] = None,
is_honeypot_array: Optional[List[bool]] = None,
status_array: Optional[List[str]] = None,
json_content_array: Optional[List[List[Union[dict, str]]]] = None,
json_metadata_array: Optional[List[dict]] = None,
disable_tqdm: bool = False,
):
# pylint: disable=line-too-long
"""Append assets to a project.
Args:
project_id: Identifier of the project
content_array: List of elements added to the assets of the project
Must not be None except if you provide json_content_array.
- For a `TEXT` project, the content can be either raw text, or URLs to TEXT assets.
- For an `IMAGE` / `PDF` project, the content can be either URLs or paths to existing
images/pdf on your computer.
- For a `VIDEO` project, the content must be hosted on a web server,
and you point Kili to your data by giving the URLs.
external_id_array: List of external ids given to identify the assets.
If None, random identifiers are created.
is_honeypot_array: Whether to use the asset for honeypot
status_array: By default, all imported assets are set to `TODO`. Other options:
`ONGOING`, `LABELED`, `REVIEWED`.
json_content_array: Useful for `VIDEO` or `TEXT` projects only.
- For `FRAME` projects, each element is a sequence of frames, i.e. a
list of URLs to images or a list of paths to images.
- For `TEXT` projects, each element is a json_content dict,
formatted according to documentation [on how to import
rich-text assets](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb)
json_metadata_array: The metadata given to each asset should be stored in a json like dict with keys.
- Add metadata visible on the asset with the following keys: `imageUrl`, `text`, `url`.
Example for one asset: `json_metadata_array = [{'imageUrl': '','text': '','url': ''}]`.
- For video, you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30).
Example for one asset: `json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}]`.
disable_tqdm: If `True`, the progress bar will be disabled
Returns:
A result object which indicates if the mutation was successful, or an error message.
Examples:
>>> kili.append_many_to_dataset(
project_id=project_id,
content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])
!!! example "Recipe"
- For more detailed examples on how to import assets,
see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_assets.ipynb)
or [other examples](https://docs.kili-technology.com/recipes/importing-data) in our documentation.
- For more detailed examples on how to import text assets,
see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb).
"""
if content_array is None and json_content_array is None:
raise ValueError("Variables content_array and json_content_array cannot be both None.")
nb_data = (
len(content_array)
if content_array is not None
else len(json_content_array) # type:ignore
)
field_mapping = {
"content": content_array,
"json_content": json_content_array,
"external_id": external_id_array,
"id": id_array,
"status": status_array,
"json_metadata": json_metadata_array,
"is_honeypot": is_honeypot_array,
}
assets = [{}] * nb_data
for key, value in field_mapping.items():
if value is not None:
assets = [{**assets[i], key: value[i]} for i in range(nb_data)]
result = import_assets(
self.auth, project_id=project_id, assets=assets, disable_tqdm=disable_tqdm
)
return result
delete_many_from_dataset(self, asset_ids)
Delete assets from a project.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
asset_ids |
List[str] |
The list of identifiers of the assets to delete. |
required |
Returns:
Type | Description |
---|---|
A result object which indicates if the mutation was successful, or an error message. |
Source code in kili/mutations/asset/__init__.py
@typechecked
def delete_many_from_dataset(self, asset_ids: List[str]):
"""Delete assets from a project.
Args:
asset_ids: The list of identifiers of the assets to delete.
Returns:
A result object which indicates if the mutation was successful,
or an error message.
"""
properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}
def generate_variables(batch):
return {"where": {"idIn": batch["asset_ids"]}}
results = _mutate_from_paginated_call(
self, properties_to_batch, generate_variables, GQL_DELETE_MANY_FROM_DATASET
)
return format_result("data", results[0], Asset)
send_back_to_queue(self, asset_ids)
Send assets back to queue.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
asset_ids |
List[str] |
The asset IDs to add to review |
required |
Returns:
Type | Description |
---|---|
A result object which indicates if the mutation was successful, or an error message. |
Examples:
>>> kili.send_back_to_queue(
asset_ids=[
"ckg22d81r0jrg0885unmuswj8",
"ckg22d81s0jrh0885pdxfd03n"
],
Source code in kili/mutations/asset/__init__.py
@typechecked
def send_back_to_queue(self, asset_ids: List[str]):
"""Send assets back to queue.
Args:
asset_ids: The asset IDs to add to review
Returns:
A result object which indicates if the mutation was successful,
or an error message.
Examples:
>>> kili.send_back_to_queue(
asset_ids=[
"ckg22d81r0jrg0885unmuswj8",
"ckg22d81s0jrh0885pdxfd03n"
],
"""
properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}
def generate_variables(batch):
return {"where": {"idIn": batch["asset_ids"]}}
results = _mutate_from_paginated_call(
self, properties_to_batch, generate_variables, GQL_SEND_BACK_ASSETS_TO_QUEUE
)
return format_result("data", results[0])
update_properties_in_assets(self, asset_ids, external_ids=None, priorities=None, json_metadatas=None, consensus_marks=None, honeypot_marks=None, to_be_labeled_by_array=None, contents=None, json_contents=None, status_array=None, is_used_for_consensus_array=None, is_honeypot_array=None)
Update the properties of one or more assets.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
asset_ids |
The asset IDs to modify |
required | |
external_ids |
Optional[List[str]] |
Change the external id of the assets |
None |
priorities |
You can change the priority of the assets By default, all assets have a priority of 0. |
None |
|
json_metadatas |
Optional[List[Union[dict, str]]] |
The metadata given to an asset should be stored
in a json like dict with keys |
None |
consensus_marks |
Optional[List[float]] |
Should be between 0 and 1 |
None |
honeypot_marks |
Optional[List[float]] |
Should be between 0 and 1 |
None |
to_be_labeled_by_array |
Optional[List[List[str]]] |
If given, each element of the list should contain the emails of the labelers authorized to label the asset. |
None |
contents |
Optional[List[str]] |
|
None |
json_contents |
Optional[List[str]] |
|
None |
status_array |
Optional[List[str]] |
Each element should be in |
None |
is_used_for_consensus_array |
Optional[List[bool]] |
Whether to use the asset to compute consensus kpis or not |
None |
is_honeypot_array |
Optional[List[bool]] |
Whether to use the asset for honeypot |
None |
Returns:
Type | Description |
---|---|
List[dict] |
A result object which indicates if the mutation was successful, or an error message. |
Examples:
>>> kili.update_properties_in_assets(
asset_ids=["ckg22d81r0jrg0885unmuswj8",
"ckg22d81s0jrh0885pdxfd03n"],
consensus_marks=[1, 0.7],
contents=[None, 'https://to/second/asset.png'],
external_ids=['external-id-of-your-choice-1',
'external-id-of-your-choice-2'],
honeypot_marks=[0.8, 0.5],
is_honeypot_array=[True, True],
is_used_for_consensus_array=[True, False],
priorities=[None, 2],
status_array=['LABELED', 'REVIEWED'],
to_be_labeled_by_array=[
['test+pierre@kili-technology.com'], None],
)
Source code in kili/mutations/asset/__init__.py
@typechecked
# pylint: disable=unused-argument
def update_properties_in_assets(
self,
asset_ids: List[str],
external_ids: Optional[List[str]] = None,
priorities: Optional[List[int]] = None,
json_metadatas: Optional[List[Union[dict, str]]] = None,
consensus_marks: Optional[List[float]] = None,
honeypot_marks: Optional[List[float]] = None,
to_be_labeled_by_array: Optional[List[List[str]]] = None,
contents: Optional[List[str]] = None,
json_contents: Optional[List[str]] = None,
status_array: Optional[List[str]] = None,
is_used_for_consensus_array: Optional[List[bool]] = None,
is_honeypot_array: Optional[List[bool]] = None,
) -> List[dict]:
"""Update the properties of one or more assets.
Args:
asset_ids : The asset IDs to modify
external_ids: Change the external id of the assets
priorities : You can change the priority of the assets
By default, all assets have a priority of 0.
json_metadatas: The metadata given to an asset should be stored
in a json like dict with keys `imageUrl`, `text`, `url`:
`json_metadata = {'imageUrl': '','text': '','url': ''}`
consensus_marks: Should be between 0 and 1
honeypot_marks: Should be between 0 and 1
to_be_labeled_by_array: If given, each element of the list should contain the emails of
the labelers authorized to label the asset.
contents: - For a NLP project, the content can be directly in text format
- For an Image / Video / Pdf project, the content must be hosted on a web server,
and you point Kili to your data by giving the URLs
json_contents: - For a NLP project, the `json_content`
is a text formatted using RichText
- For a Video project, the`json_content` is a json containg urls pointing
to each frame of the video.
status_array: Each element should be in `TODO`, `ONGOING`, `LABELED`, `REVIEWED`
is_used_for_consensus_array: Whether to use the asset to compute consensus kpis or not
is_honeypot_array: Whether to use the asset for honeypot
Returns:
A result object which indicates if the mutation was successful,
or an error message.
Examples:
>>> kili.update_properties_in_assets(
asset_ids=["ckg22d81r0jrg0885unmuswj8",
"ckg22d81s0jrh0885pdxfd03n"],
consensus_marks=[1, 0.7],
contents=[None, 'https://to/second/asset.png'],
external_ids=['external-id-of-your-choice-1',
'external-id-of-your-choice-2'],
honeypot_marks=[0.8, 0.5],
is_honeypot_array=[True, True],
is_used_for_consensus_array=[True, False],
priorities=[None, 2],
status_array=['LABELED', 'REVIEWED'],
to_be_labeled_by_array=[
['test+pierre@kili-technology.com'], None],
)
"""
saved_args = locals()
parameters = {
k: v
for (k, v) in saved_args.items()
if k
in [
"asset_ids",
"external_ids",
"priorities",
"json_metadatas",
"consensus_marks",
"honeypot_marks",
"to_be_labeled_by_array",
"contents",
"json_contents",
"status_array",
"is_used_for_consensus_array",
"is_honeypot_array",
]
}
properties_to_batch = process_update_properties_in_assets_parameters(parameters)
def generate_variables(batch: Dict) -> Dict:
data = {
"externalId": batch["external_ids"],
"priority": batch["priorities"],
"jsonMetadata": batch["json_metadatas"],
"consensusMark": batch["consensus_marks"],
"honeypotMark": batch["honeypot_marks"],
"toBeLabeledBy": batch["to_be_labeled_by_array"],
"shouldResetToBeLabeledBy": batch["should_reset_to_be_labeled_by_array"],
"content": batch["contents"],
"jsonContent": batch["json_contents"],
"status": batch["status_array"],
"isUsedForConsensus": batch["is_used_for_consensus_array"],
"isHoneypot": batch["is_honeypot_array"],
}
data_array = [dict(zip(data, t)) for t in zip(*data.values())]
return {
"whereArray": [{"id": asset_id} for asset_id in batch["asset_ids"]],
"dataArray": data_array,
}
results = _mutate_from_paginated_call(
self,
properties_to_batch,
generate_variables,
GQL_UPDATE_PROPERTIES_IN_ASSETS,
)
formated_results = [format_result("data", result, Asset) for result in results]
return [item for batch_list in formated_results for item in batch_list]