Asset module
Queries
Set of Asset queries
Source code in kili/queries/asset/__init__.py
class QueriesAsset:
"""
Set of Asset queries
"""
# pylint: disable=too-many-arguments,too-many-locals
def __init__(self, auth):
"""Initialize the subclass.
Args:
auth: KiliAuth object
"""
self.auth = auth
# pylint: disable=dangerous-default-value
@typechecked
def assets(
self,
project_id: str,
asset_id: Optional[str] = None,
skip: int = 0,
fields: List[str] = [
"content",
"createdAt",
"externalId",
"id",
"isHoneypot",
"jsonMetadata",
"labels.author.id",
"labels.author.email",
"labels.createdAt",
"labels.id",
"labels.jsonResponse",
"skipped",
"status",
],
asset_id_in: Optional[List[str]] = None,
consensus_mark_gt: Optional[float] = None,
consensus_mark_lt: Optional[float] = None,
disable_tqdm: bool = False,
external_id_contains: Optional[List[str]] = None,
first: Optional[int] = None,
format: Optional[str] = None, # pylint: disable=redefined-builtin
honeypot_mark_gt: Optional[float] = None,
honeypot_mark_lt: Optional[float] = None,
label_author_in: Optional[List[str]] = None,
label_consensus_mark_gt: Optional[float] = None,
label_consensus_mark_lt: Optional[float] = None,
label_created_at: Optional[str] = None,
label_created_at_gt: Optional[str] = None,
label_created_at_lt: Optional[str] = None,
label_honeypot_mark_gt: Optional[float] = None,
label_honeypot_mark_lt: Optional[float] = None,
label_type_in: Optional[List[str]] = None,
metadata_where: Optional[dict] = None,
skipped: Optional[bool] = None,
status_in: Optional[List[str]] = None,
updated_at_gte: Optional[str] = None,
updated_at_lte: Optional[str] = None,
as_generator: bool = False,
label_category_search: Optional[str] = None,
download_media: bool = False,
local_media_dir: Optional[str] = None,
) -> Union[Iterable[Dict], pd.DataFrame]:
# pylint: disable=line-too-long
"""Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.
Args:
project_id: Identifier of the project.
asset_id: Identifier of the asset to retrieve.
asset_id_in: A list of the IDs of the assets to retrieve.
skip: Number of assets to skip (they are ordered by their date of creation, first to last).
fields: All the fields to request among the possible fields for the assets.
See [the documentation](https://docs.kili-technology.com/reference/graphql-api#asset) for all possible fields.
first: Maximum number of assets to return.
consensus_mark_gt: Minimum amount of consensus for the asset.
consensus_mark_lt: Maximum amount of consensus for the asset.
external_id_contains: Returned assets have an external id that belongs to that list, if given.
metadata_where: Filters by the values of the metadata of the asset.
honeypot_mark_gt: Minimum amount of honeypot for the asset.
honeypot_mark_lt : Maximum amount of honeypot for the asset.
status_in: Returned assets should have a status that belongs to that list, if given.
Possible choices: `TODO`, `ONGOING`, `LABELED`, `TO_REVIEW` or `REVIEWED`
label_type_in: Returned assets should have a label whose type belongs to that list, if given.
label_author_in: Returned assets should have a label whose status belongs to that list, if given.
label_consensus_mark_gt: Returned assets should have a label whose consensus is greater than this number.
label_consensus_mark_lt: Returned assets should have a label whose consensus is lower than this number.
label_created_at: Returned assets should have a label whose creation date is equal to this date.
label_created_at_gt: Returned assets should have a label whose creation date is greater than this date.
label_created_at_lt: Returned assets should have a label whose creation date is lower than this date.
label_honeypot_mark_gt: Returned assets should have a label whose honeypot is greater than this number
label_honeypot_mark_lt: Returned assets should have a label whose honeypot is lower than this number
skipped: Returned assets should be skipped
updated_at_gte: Returned assets should have a label whose update date is greated or equal to this date.
updated_at_lte: Returned assets should have a label whose update date is lower or equal to this date.
format: If equal to 'pandas', returns a pandas DataFrame
disable_tqdm: If `True`, the progress bar will be disabled
as_generator: If `True`, a generator on the assets is returned.
label_category_search: Returned assets should have a label that follows this category search query.
download_media: Tell is the media have to be downloaded or not.
local_media_dir: Directory where the media are downloaded if `download_media` is True.
!!! info "Dates format"
Date strings should have format: "YYYY-MM-DD"
Returns:
A result object which contains the query if it was successful,
or an error message.
Example:
```
# returns the assets list of the project
>>> kili.assets(project_id)
>>> kili.assets(project_id, asset_id=asset_id)
# returns a generator of the project assets
>>> kili.assets(project_id, as_generator=True)
```
!!! example "How to filter based on Metadata"
- `metadata_where = {key1: "value1"}` to filter on assets whose metadata
have key "key1" with value "value1"
- `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
have key "key1" with value "value1" or value "value2
- `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
have key "key2" with a value between 2 and 10.
!!! example "How to filter based on label categories"
The search query is composed of logical expressions following this format:
[job_name].[category_name].count [comparaison_operator] [value]
where:
- `[job_name]` is the name of the job in the interface
- `[category_name]` is the name of the category in the interface for this job
- `[comparaison_operator]` can be one of: [`==`, `>=`, `<=`, `<`, `>`]
- `[value]` is an integer that represents the count of such objects of the given category in the label
These operations can be separated by OR and AND operators
Example:
label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
"""
if format == "pandas" and as_generator:
raise ValueError(
'Argument values as_generator==True and format=="pandas" are not compatible.'
)
if label_category_search:
validate_category_search_query(label_category_search)
where = AssetWhere(
project_id=project_id,
asset_id=asset_id,
asset_id_in=asset_id_in,
consensus_mark_gt=consensus_mark_gt,
consensus_mark_lt=consensus_mark_lt,
external_id_contains=external_id_contains,
honeypot_mark_gt=honeypot_mark_gt,
honeypot_mark_lt=honeypot_mark_lt,
label_author_in=label_author_in,
label_consensus_mark_gt=label_consensus_mark_gt,
label_consensus_mark_lt=label_consensus_mark_lt,
label_created_at=label_created_at,
label_created_at_gt=label_created_at_gt,
label_created_at_lt=label_created_at_lt,
label_honeypot_mark_gt=label_honeypot_mark_gt,
label_honeypot_mark_lt=label_honeypot_mark_lt,
label_type_in=label_type_in,
metadata_where=metadata_where,
skipped=skipped,
status_in=status_in,
updated_at_gte=updated_at_gte,
updated_at_lte=updated_at_lte,
label_category_search=label_category_search,
)
options = QueryOptions(disable_tqdm, first, skip, as_generator)
post_call_function = get_download_assets_function(
self, download_media, fields, project_id, local_media_dir
)
assets = AssetQuery(self.auth.client)(where, fields, options, post_call_function)
if format == "pandas":
return pd.DataFrame(assets)
return assets
@typechecked
def count_assets(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_id_in: Optional[List[str]] = None,
external_id_contains: Optional[List[str]] = None,
metadata_where: Optional[dict] = None,
status_in: Optional[List[str]] = None,
consensus_mark_gt: Optional[float] = None,
consensus_mark_lt: Optional[float] = None,
honeypot_mark_gt: Optional[float] = None,
honeypot_mark_lt: Optional[float] = None,
label_type_in: Optional[List[str]] = None,
label_author_in: Optional[List[str]] = None,
label_consensus_mark_gt: Optional[float] = None,
label_consensus_mark_lt: Optional[float] = None,
label_created_at: Optional[str] = None,
label_created_at_gt: Optional[str] = None,
label_created_at_lt: Optional[str] = None,
label_honeypot_mark_gt: Optional[float] = None,
label_honeypot_mark_lt: Optional[float] = None,
skipped: Optional[bool] = None,
updated_at_gte: Optional[str] = None,
updated_at_lte: Optional[str] = None,
label_category_search: Optional[str] = None,
) -> int:
"""Count and return the number of assets with the given constraints.
Parameters beginning with 'label_' apply to labels, others apply to assets.
Args:
project_id: Identifier of the project
asset_id: The unique id of the asset to retrieve.
asset_id_in: A list of the ids of the assets to retrieve.
external_id_contains: Returned assets should have an external id
that belongs to that list, if given.
metadata_where: Filters by the values of the metadata of the asset.
status_in: Returned assets should have a status that belongs to that list, if given.
Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
consensus_mark_gt: Minimum amount of consensus for the asset.
consensus_mark_lt: Maximum amount of consensus for the asset.
honeypot_mark_gt: Minimum amount of honeypot for the asset.
honeypot_mark_lt: Maximum amount of consensus for the asset.
label_type_in: Returned assets should have a label
whose type belongs to that list, if given.
label_author_in: Returned assets should have a label
whose status belongs to that list, if given.
label_consensus_mark_gt: Returned assets should have a label
whose consensus is greater than this number.
label_consensus_mark_lt: Returned assets should have a label
whose consensus is lower than this number.
label_created_at: Returned assets should have a label
whose creation date is equal to this date.
label_created_at_gt: Returned assets should have a label
whose creation date is greater than this date.
label_created_at_lt: Returned assets should have a label
whose creation date is lower than this date.
label_honeypot_mark_gt: Returned assets should have a label
whose honeypot is greater than this number.
label_honeypot_mark_lt: Returned assets should have a label
whose honeypot is lower than this number.
skipped: Returned assets should be skipped
updated_at_gte: Returned assets should have a label
whose update date is greated or equal to this date.
updated_at_lte: Returned assets should have a label
whose update date is lower or equal to this date.
!!! info "Dates format"
Date strings should have format: "YYYY-MM-DD"
Returns:
A result object which contains the query if it was successful,
or an error message.
Examples:
>>> kili.count_assets(project_id=project_id)
250
>>> kili.count_assets(asset_id=asset_id)
1
!!! example "How to filter based on Metadata"
- `metadata_where = {key1: "value1"}` to filter on assets whose metadata
have key "key1" with value "value1"
- `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
have key "key1" with value "value1" or value "value2
- `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
have key "key2" with a value between 2 and 10.
"""
if label_category_search:
validate_category_search_query(label_category_search)
where = AssetWhere(
project_id=project_id,
asset_id=asset_id,
asset_id_in=asset_id_in,
consensus_mark_gt=consensus_mark_gt,
consensus_mark_lt=consensus_mark_lt,
external_id_contains=external_id_contains,
honeypot_mark_gt=honeypot_mark_gt,
honeypot_mark_lt=honeypot_mark_lt,
label_author_in=label_author_in,
label_consensus_mark_gt=label_consensus_mark_gt,
label_consensus_mark_lt=label_consensus_mark_lt,
label_created_at=label_created_at,
label_created_at_gt=label_created_at_gt,
label_created_at_lt=label_created_at_lt,
label_honeypot_mark_gt=label_honeypot_mark_gt,
label_honeypot_mark_lt=label_honeypot_mark_lt,
label_type_in=label_type_in,
metadata_where=metadata_where,
skipped=skipped,
status_in=status_in,
updated_at_gte=updated_at_gte,
updated_at_lte=updated_at_lte,
label_category_search=label_category_search,
)
return AssetQuery(self.auth.client).count(where)
assets(self, project_id, asset_id=None, skip=0, fields=['content', 'createdAt', 'externalId', 'id', 'isHoneypot', 'jsonMetadata', 'labels.author.id', 'labels.author.email', 'labels.createdAt', 'labels.id', 'labels.jsonResponse', 'skipped', 'status'], asset_id_in=None, consensus_mark_gt=None, consensus_mark_lt=None, disable_tqdm=False, external_id_contains=None, first=None, format=None, honeypot_mark_gt=None, honeypot_mark_lt=None, label_author_in=None, label_consensus_mark_gt=None, label_consensus_mark_lt=None, label_created_at=None, label_created_at_gt=None, label_created_at_lt=None, label_honeypot_mark_gt=None, label_honeypot_mark_lt=None, label_type_in=None, metadata_where=None, skipped=None, status_in=None, updated_at_gte=None, updated_at_lte=None, as_generator=False, label_category_search=None, download_media=False, local_media_dir=None)
Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
project_id |
str |
Identifier of the project. |
required |
asset_id |
Optional[str] |
Identifier of the asset to retrieve. |
None |
asset_id_in |
Optional[List[str]] |
A list of the IDs of the assets to retrieve. |
None |
skip |
int |
Number of assets to skip (they are ordered by their date of creation, first to last). |
0 |
fields |
List[str] |
All the fields to request among the possible fields for the assets. See the documentation for all possible fields. |
['content', 'createdAt', 'externalId', 'id', 'isHoneypot', 'jsonMetadata', 'labels.author.id', 'labels.author.email', 'labels.createdAt', 'labels.id', 'labels.jsonResponse', 'skipped', 'status'] |
first |
Optional[int] |
Maximum number of assets to return. |
None |
consensus_mark_gt |
Optional[float] |
Minimum amount of consensus for the asset. |
None |
consensus_mark_lt |
Optional[float] |
Maximum amount of consensus for the asset. |
None |
external_id_contains |
Optional[List[str]] |
Returned assets have an external id that belongs to that list, if given. |
None |
metadata_where |
Optional[dict] |
Filters by the values of the metadata of the asset. |
None |
honeypot_mark_gt |
Optional[float] |
Minimum amount of honeypot for the asset. |
None |
honeypot_mark_lt |
Maximum amount of honeypot for the asset. |
None |
|
status_in |
Optional[List[str]] |
Returned assets should have a status that belongs to that list, if given.
Possible choices: |
None |
label_type_in |
Optional[List[str]] |
Returned assets should have a label whose type belongs to that list, if given. |
None |
label_author_in |
Optional[List[str]] |
Returned assets should have a label whose status belongs to that list, if given. |
None |
label_consensus_mark_gt |
Optional[float] |
Returned assets should have a label whose consensus is greater than this number. |
None |
label_consensus_mark_lt |
Optional[float] |
Returned assets should have a label whose consensus is lower than this number. |
None |
label_created_at |
Optional[str] |
Returned assets should have a label whose creation date is equal to this date. |
None |
label_created_at_gt |
Optional[str] |
Returned assets should have a label whose creation date is greater than this date. |
None |
label_created_at_lt |
Optional[str] |
Returned assets should have a label whose creation date is lower than this date. |
None |
label_honeypot_mark_gt |
Optional[float] |
Returned assets should have a label whose honeypot is greater than this number |
None |
label_honeypot_mark_lt |
Optional[float] |
Returned assets should have a label whose honeypot is lower than this number |
None |
skipped |
Optional[bool] |
Returned assets should be skipped |
None |
updated_at_gte |
Optional[str] |
Returned assets should have a label whose update date is greated or equal to this date. |
None |
updated_at_lte |
Optional[str] |
Returned assets should have a label whose update date is lower or equal to this date. |
None |
format |
Optional[str] |
If equal to 'pandas', returns a pandas DataFrame |
None |
disable_tqdm |
bool |
If |
False |
as_generator |
bool |
If |
False |
label_category_search |
Optional[str] |
Returned assets should have a label that follows this category search query. |
None |
download_media |
bool |
Tell is the media have to be downloaded or not. |
False |
local_media_dir |
Optional[str] |
Directory where the media are downloaded if |
None |
Dates format
Date strings should have format: "YYYY-MM-DD"
Returns:
Type | Description |
---|---|
Union[Iterable[Dict], pandas.core.frame.DataFrame] |
A result object which contains the query if it was successful, or an error message. |
Examples:
# returns the assets list of the project
>>> kili.assets(project_id)
>>> kili.assets(project_id, asset_id=asset_id)
# returns a generator of the project assets
>>> kili.assets(project_id, as_generator=True)
How to filter based on Metadata
metadata_where = {key1: "value1"}
to filter on assets whose metadata have key "key1" with value "value1"metadata_where = {key1: ["value1", "value2"]}
to filter on assets whose metadata have key "key1" with value "value1" or value "value2metadata_where = {key2: [2, 10]}
to filter on assets whose metadata have key "key2" with a value between 2 and 10.
How to filter based on label categories
The search query is composed of logical expressions following this format:
[job_name].[category_name].count [comparaison_operator] [value]
where:
[job_name]
is the name of the job in the interface[category_name]
is the name of the category in the interface for this job[comparaison_operator]
can be one of: [==
,>=
,<=
,<
,>
][value]
is an integer that represents the count of such objects of the given category in the label
These operations can be separated by OR and AND operators
Example:
label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
Source code in kili/queries/asset/__init__.py
@typechecked
def assets(
self,
project_id: str,
asset_id: Optional[str] = None,
skip: int = 0,
fields: List[str] = [
"content",
"createdAt",
"externalId",
"id",
"isHoneypot",
"jsonMetadata",
"labels.author.id",
"labels.author.email",
"labels.createdAt",
"labels.id",
"labels.jsonResponse",
"skipped",
"status",
],
asset_id_in: Optional[List[str]] = None,
consensus_mark_gt: Optional[float] = None,
consensus_mark_lt: Optional[float] = None,
disable_tqdm: bool = False,
external_id_contains: Optional[List[str]] = None,
first: Optional[int] = None,
format: Optional[str] = None, # pylint: disable=redefined-builtin
honeypot_mark_gt: Optional[float] = None,
honeypot_mark_lt: Optional[float] = None,
label_author_in: Optional[List[str]] = None,
label_consensus_mark_gt: Optional[float] = None,
label_consensus_mark_lt: Optional[float] = None,
label_created_at: Optional[str] = None,
label_created_at_gt: Optional[str] = None,
label_created_at_lt: Optional[str] = None,
label_honeypot_mark_gt: Optional[float] = None,
label_honeypot_mark_lt: Optional[float] = None,
label_type_in: Optional[List[str]] = None,
metadata_where: Optional[dict] = None,
skipped: Optional[bool] = None,
status_in: Optional[List[str]] = None,
updated_at_gte: Optional[str] = None,
updated_at_lte: Optional[str] = None,
as_generator: bool = False,
label_category_search: Optional[str] = None,
download_media: bool = False,
local_media_dir: Optional[str] = None,
) -> Union[Iterable[Dict], pd.DataFrame]:
# pylint: disable=line-too-long
"""Get an asset list, an asset generator or a pandas DataFrame that match a set of constraints.
Args:
project_id: Identifier of the project.
asset_id: Identifier of the asset to retrieve.
asset_id_in: A list of the IDs of the assets to retrieve.
skip: Number of assets to skip (they are ordered by their date of creation, first to last).
fields: All the fields to request among the possible fields for the assets.
See [the documentation](https://docs.kili-technology.com/reference/graphql-api#asset) for all possible fields.
first: Maximum number of assets to return.
consensus_mark_gt: Minimum amount of consensus for the asset.
consensus_mark_lt: Maximum amount of consensus for the asset.
external_id_contains: Returned assets have an external id that belongs to that list, if given.
metadata_where: Filters by the values of the metadata of the asset.
honeypot_mark_gt: Minimum amount of honeypot for the asset.
honeypot_mark_lt : Maximum amount of honeypot for the asset.
status_in: Returned assets should have a status that belongs to that list, if given.
Possible choices: `TODO`, `ONGOING`, `LABELED`, `TO_REVIEW` or `REVIEWED`
label_type_in: Returned assets should have a label whose type belongs to that list, if given.
label_author_in: Returned assets should have a label whose status belongs to that list, if given.
label_consensus_mark_gt: Returned assets should have a label whose consensus is greater than this number.
label_consensus_mark_lt: Returned assets should have a label whose consensus is lower than this number.
label_created_at: Returned assets should have a label whose creation date is equal to this date.
label_created_at_gt: Returned assets should have a label whose creation date is greater than this date.
label_created_at_lt: Returned assets should have a label whose creation date is lower than this date.
label_honeypot_mark_gt: Returned assets should have a label whose honeypot is greater than this number
label_honeypot_mark_lt: Returned assets should have a label whose honeypot is lower than this number
skipped: Returned assets should be skipped
updated_at_gte: Returned assets should have a label whose update date is greated or equal to this date.
updated_at_lte: Returned assets should have a label whose update date is lower or equal to this date.
format: If equal to 'pandas', returns a pandas DataFrame
disable_tqdm: If `True`, the progress bar will be disabled
as_generator: If `True`, a generator on the assets is returned.
label_category_search: Returned assets should have a label that follows this category search query.
download_media: Tell is the media have to be downloaded or not.
local_media_dir: Directory where the media are downloaded if `download_media` is True.
!!! info "Dates format"
Date strings should have format: "YYYY-MM-DD"
Returns:
A result object which contains the query if it was successful,
or an error message.
Example:
```
# returns the assets list of the project
>>> kili.assets(project_id)
>>> kili.assets(project_id, asset_id=asset_id)
# returns a generator of the project assets
>>> kili.assets(project_id, as_generator=True)
```
!!! example "How to filter based on Metadata"
- `metadata_where = {key1: "value1"}` to filter on assets whose metadata
have key "key1" with value "value1"
- `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
have key "key1" with value "value1" or value "value2
- `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
have key "key2" with a value between 2 and 10.
!!! example "How to filter based on label categories"
The search query is composed of logical expressions following this format:
[job_name].[category_name].count [comparaison_operator] [value]
where:
- `[job_name]` is the name of the job in the interface
- `[category_name]` is the name of the category in the interface for this job
- `[comparaison_operator]` can be one of: [`==`, `>=`, `<=`, `<`, `>`]
- `[value]` is an integer that represents the count of such objects of the given category in the label
These operations can be separated by OR and AND operators
Example:
label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
label_category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
label_category_search = `(JOB_CLASSIF.CATEGORY_A.count == 1 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
"""
if format == "pandas" and as_generator:
raise ValueError(
'Argument values as_generator==True and format=="pandas" are not compatible.'
)
if label_category_search:
validate_category_search_query(label_category_search)
where = AssetWhere(
project_id=project_id,
asset_id=asset_id,
asset_id_in=asset_id_in,
consensus_mark_gt=consensus_mark_gt,
consensus_mark_lt=consensus_mark_lt,
external_id_contains=external_id_contains,
honeypot_mark_gt=honeypot_mark_gt,
honeypot_mark_lt=honeypot_mark_lt,
label_author_in=label_author_in,
label_consensus_mark_gt=label_consensus_mark_gt,
label_consensus_mark_lt=label_consensus_mark_lt,
label_created_at=label_created_at,
label_created_at_gt=label_created_at_gt,
label_created_at_lt=label_created_at_lt,
label_honeypot_mark_gt=label_honeypot_mark_gt,
label_honeypot_mark_lt=label_honeypot_mark_lt,
label_type_in=label_type_in,
metadata_where=metadata_where,
skipped=skipped,
status_in=status_in,
updated_at_gte=updated_at_gte,
updated_at_lte=updated_at_lte,
label_category_search=label_category_search,
)
options = QueryOptions(disable_tqdm, first, skip, as_generator)
post_call_function = get_download_assets_function(
self, download_media, fields, project_id, local_media_dir
)
assets = AssetQuery(self.auth.client)(where, fields, options, post_call_function)
if format == "pandas":
return pd.DataFrame(assets)
return assets
count_assets(self, project_id, asset_id=None, asset_id_in=None, external_id_contains=None, metadata_where=None, status_in=None, consensus_mark_gt=None, consensus_mark_lt=None, honeypot_mark_gt=None, honeypot_mark_lt=None, label_type_in=None, label_author_in=None, label_consensus_mark_gt=None, label_consensus_mark_lt=None, label_created_at=None, label_created_at_gt=None, label_created_at_lt=None, label_honeypot_mark_gt=None, label_honeypot_mark_lt=None, skipped=None, updated_at_gte=None, updated_at_lte=None, label_category_search=None)
Count and return the number of assets with the given constraints.
Parameters beginning with 'label_' apply to labels, others apply to assets.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
project_id |
str |
Identifier of the project |
required |
asset_id |
Optional[str] |
The unique id of the asset to retrieve. |
None |
asset_id_in |
Optional[List[str]] |
A list of the ids of the assets to retrieve. |
None |
external_id_contains |
Optional[List[str]] |
Returned assets should have an external id that belongs to that list, if given. |
None |
metadata_where |
Optional[dict] |
Filters by the values of the metadata of the asset. |
None |
status_in |
Optional[List[str]] |
Returned assets should have a status that belongs to that list, if given.
Possible choices: |
None |
consensus_mark_gt |
Optional[float] |
Minimum amount of consensus for the asset. |
None |
consensus_mark_lt |
Optional[float] |
Maximum amount of consensus for the asset. |
None |
honeypot_mark_gt |
Optional[float] |
Minimum amount of honeypot for the asset. |
None |
honeypot_mark_lt |
Optional[float] |
Maximum amount of consensus for the asset. |
None |
label_type_in |
Optional[List[str]] |
Returned assets should have a label whose type belongs to that list, if given. |
None |
label_author_in |
Optional[List[str]] |
Returned assets should have a label whose status belongs to that list, if given. |
None |
label_consensus_mark_gt |
Optional[float] |
Returned assets should have a label whose consensus is greater than this number. |
None |
label_consensus_mark_lt |
Optional[float] |
Returned assets should have a label whose consensus is lower than this number. |
None |
label_created_at |
Optional[str] |
Returned assets should have a label whose creation date is equal to this date. |
None |
label_created_at_gt |
Optional[str] |
Returned assets should have a label whose creation date is greater than this date. |
None |
label_created_at_lt |
Optional[str] |
Returned assets should have a label whose creation date is lower than this date. |
None |
label_honeypot_mark_gt |
Optional[float] |
Returned assets should have a label whose honeypot is greater than this number. |
None |
label_honeypot_mark_lt |
Optional[float] |
Returned assets should have a label whose honeypot is lower than this number. |
None |
skipped |
Optional[bool] |
Returned assets should be skipped |
None |
updated_at_gte |
Optional[str] |
Returned assets should have a label whose update date is greated or equal to this date. |
None |
updated_at_lte |
Optional[str] |
Returned assets should have a label whose update date is lower or equal to this date. |
None |
Dates format
Date strings should have format: "YYYY-MM-DD"
Returns:
Type | Description |
---|---|
int |
A result object which contains the query if it was successful, or an error message. |
Examples:
>>> kili.count_assets(project_id=project_id)
250
>>> kili.count_assets(asset_id=asset_id)
1
How to filter based on Metadata
metadata_where = {key1: "value1"}
to filter on assets whose metadata have key "key1" with value "value1"metadata_where = {key1: ["value1", "value2"]}
to filter on assets whose metadata have key "key1" with value "value1" or value "value2metadata_where = {key2: [2, 10]}
to filter on assets whose metadata have key "key2" with a value between 2 and 10.
Source code in kili/queries/asset/__init__.py
@typechecked
def count_assets(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_id_in: Optional[List[str]] = None,
external_id_contains: Optional[List[str]] = None,
metadata_where: Optional[dict] = None,
status_in: Optional[List[str]] = None,
consensus_mark_gt: Optional[float] = None,
consensus_mark_lt: Optional[float] = None,
honeypot_mark_gt: Optional[float] = None,
honeypot_mark_lt: Optional[float] = None,
label_type_in: Optional[List[str]] = None,
label_author_in: Optional[List[str]] = None,
label_consensus_mark_gt: Optional[float] = None,
label_consensus_mark_lt: Optional[float] = None,
label_created_at: Optional[str] = None,
label_created_at_gt: Optional[str] = None,
label_created_at_lt: Optional[str] = None,
label_honeypot_mark_gt: Optional[float] = None,
label_honeypot_mark_lt: Optional[float] = None,
skipped: Optional[bool] = None,
updated_at_gte: Optional[str] = None,
updated_at_lte: Optional[str] = None,
label_category_search: Optional[str] = None,
) -> int:
"""Count and return the number of assets with the given constraints.
Parameters beginning with 'label_' apply to labels, others apply to assets.
Args:
project_id: Identifier of the project
asset_id: The unique id of the asset to retrieve.
asset_id_in: A list of the ids of the assets to retrieve.
external_id_contains: Returned assets should have an external id
that belongs to that list, if given.
metadata_where: Filters by the values of the metadata of the asset.
status_in: Returned assets should have a status that belongs to that list, if given.
Possible choices: `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
consensus_mark_gt: Minimum amount of consensus for the asset.
consensus_mark_lt: Maximum amount of consensus for the asset.
honeypot_mark_gt: Minimum amount of honeypot for the asset.
honeypot_mark_lt: Maximum amount of consensus for the asset.
label_type_in: Returned assets should have a label
whose type belongs to that list, if given.
label_author_in: Returned assets should have a label
whose status belongs to that list, if given.
label_consensus_mark_gt: Returned assets should have a label
whose consensus is greater than this number.
label_consensus_mark_lt: Returned assets should have a label
whose consensus is lower than this number.
label_created_at: Returned assets should have a label
whose creation date is equal to this date.
label_created_at_gt: Returned assets should have a label
whose creation date is greater than this date.
label_created_at_lt: Returned assets should have a label
whose creation date is lower than this date.
label_honeypot_mark_gt: Returned assets should have a label
whose honeypot is greater than this number.
label_honeypot_mark_lt: Returned assets should have a label
whose honeypot is lower than this number.
skipped: Returned assets should be skipped
updated_at_gte: Returned assets should have a label
whose update date is greated or equal to this date.
updated_at_lte: Returned assets should have a label
whose update date is lower or equal to this date.
!!! info "Dates format"
Date strings should have format: "YYYY-MM-DD"
Returns:
A result object which contains the query if it was successful,
or an error message.
Examples:
>>> kili.count_assets(project_id=project_id)
250
>>> kili.count_assets(asset_id=asset_id)
1
!!! example "How to filter based on Metadata"
- `metadata_where = {key1: "value1"}` to filter on assets whose metadata
have key "key1" with value "value1"
- `metadata_where = {key1: ["value1", "value2"]}` to filter on assets whose metadata
have key "key1" with value "value1" or value "value2
- `metadata_where = {key2: [2, 10]}` to filter on assets whose metadata
have key "key2" with a value between 2 and 10.
"""
if label_category_search:
validate_category_search_query(label_category_search)
where = AssetWhere(
project_id=project_id,
asset_id=asset_id,
asset_id_in=asset_id_in,
consensus_mark_gt=consensus_mark_gt,
consensus_mark_lt=consensus_mark_lt,
external_id_contains=external_id_contains,
honeypot_mark_gt=honeypot_mark_gt,
honeypot_mark_lt=honeypot_mark_lt,
label_author_in=label_author_in,
label_consensus_mark_gt=label_consensus_mark_gt,
label_consensus_mark_lt=label_consensus_mark_lt,
label_created_at=label_created_at,
label_created_at_gt=label_created_at_gt,
label_created_at_lt=label_created_at_lt,
label_honeypot_mark_gt=label_honeypot_mark_gt,
label_honeypot_mark_lt=label_honeypot_mark_lt,
label_type_in=label_type_in,
metadata_where=metadata_where,
skipped=skipped,
status_in=status_in,
updated_at_gte=updated_at_gte,
updated_at_lte=updated_at_lte,
label_category_search=label_category_search,
)
return AssetQuery(self.auth.client).count(where)
Mutations
Set of Asset mutations
Source code in kili/mutations/asset/__init__.py
class MutationsAsset:
"""
Set of Asset mutations
"""
# pylint: disable=too-many-arguments,too-many-locals
def __init__(self, auth: KiliAuth):
"""Initialize the subclass.
Args:
auth: KiliAuth object
"""
self.auth = auth
@typechecked
def append_many_to_dataset(
self,
project_id: str,
content_array: Optional[List[str]] = None,
external_id_array: Optional[List[str]] = None,
id_array: Optional[List[str]] = None,
is_honeypot_array: Optional[List[bool]] = None,
status_array: Optional[List[str]] = None,
json_content_array: Optional[List[List[Union[dict, str]]]] = None,
json_metadata_array: Optional[List[dict]] = None,
disable_tqdm: bool = False,
) -> Dict[str, str]:
# pylint: disable=line-too-long
"""Append assets to a project.
Args:
project_id: Identifier of the project
content_array: List of elements added to the assets of the project
Must not be None except if you provide json_content_array.
- For a `TEXT` project, the content can be either raw text, or URLs to TEXT assets.
- For an `IMAGE` / `PDF` project, the content can be either URLs or paths to existing
images/pdf on your computer.
- For a VIDEO project, the content can be either URLs pointing to videos hosted on a web server or paths to
existing video files on your computer. If you want to import video from frames, look at the json_content
section below.
- For an `VIDEO_LEGACY` project, the content can be only be URLs
external_id_array: List of external ids given to identify the assets.
If None, random identifiers are created.
is_honeypot_array: Whether to use the asset for honeypot
status_array: By default, all imported assets are set to `TODO`. Other options:
`ONGOING`, `LABELED`, `REVIEWED`.
json_content_array: Useful for `VIDEO` or `TEXT` projects only.
- For `VIDEO` projects, each element is a sequence of frames, i.e. a
list of URLs to images or a list of paths to images.
- For `TEXT` projects, each element is a json_content dict,
formatted according to documentation [on how to import
rich-text assets](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb)
json_metadata_array: The metadata given to each asset should be stored in a json like dict with keys.
- Add metadata visible on the asset with the following keys: `imageUrl`, `text`, `url`.
Example for one asset: `json_metadata_array = [{'imageUrl': '','text': '','url': ''}]`.
- For VIDEO projects (and not VIDEO_LEGACY), you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30).
Example for one asset: `json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}]`.
disable_tqdm: If `True`, the progress bar will be disabled
Returns:
A result object which indicates if the mutation was successful, or an error message.
Examples:
>>> kili.append_many_to_dataset(
project_id=project_id,
content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])
!!! example "Recipe"
- For more detailed examples on how to import assets,
see [the recipe](https://docs.kili-technology.com/recipes/importing-data).
- For more detailed examples on how to import text assets,
see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb).
"""
if content_array is None and json_content_array is None:
raise ValueError("Variables content_array and json_content_array cannot be both None.")
nb_data = (
len(content_array)
if content_array is not None
else len(json_content_array) # type:ignore
)
field_mapping = {
"content": content_array,
"json_content": json_content_array,
"external_id": external_id_array,
"id": id_array,
"status": status_array,
"json_metadata": json_metadata_array,
"is_honeypot": is_honeypot_array,
}
assets = [{}] * nb_data
for key, value in field_mapping.items():
if value is not None:
assets = [{**assets[i], key: value[i]} for i in range(nb_data)]
result = import_assets(
self.auth, project_id=project_id, assets=assets, disable_tqdm=disable_tqdm
)
return result
@typechecked
# pylint: disable=unused-argument
def update_properties_in_assets(
self,
asset_ids: Optional[List[str]] = None,
external_ids: Optional[List[str]] = None,
priorities: Optional[List[int]] = None,
json_metadatas: Optional[List[Union[dict, str]]] = None,
consensus_marks: Optional[List[float]] = None,
honeypot_marks: Optional[List[float]] = None,
to_be_labeled_by_array: Optional[List[List[str]]] = None,
contents: Optional[List[str]] = None,
json_contents: Optional[List[str]] = None,
status_array: Optional[List[str]] = None,
is_used_for_consensus_array: Optional[List[bool]] = None,
is_honeypot_array: Optional[List[bool]] = None,
project_id: Optional[str] = None,
) -> List[Dict]:
"""Update the properties of one or more assets.
Args:
asset_ids: The internal asset IDs to modify.
external_ids: The external asset IDs to modify (if `asset_ids` is not already provided).
priorities: You can change the priority of the assets.
By default, all assets have a priority of 0.
json_metadatas: The metadata given to an asset should be stored
in a json like dict with keys `imageUrl`, `text`, `url`:
`json_metadata = {'imageUrl': '','text': '','url': ''}`
consensus_marks: Should be between 0 and 1.
honeypot_marks: Should be between 0 and 1.
to_be_labeled_by_array: If given, each element of the list should contain the emails of
the labelers authorized to label the asset.
contents: - For a NLP project, the content can be directly in text format.
- For an Image / Video / Pdf project, the content must be hosted on a web server,
and you point Kili to your data by giving the URLs.
json_contents: - For a NLP project, the `json_content`
is a text formatted using RichText.
- For a Video project, the`json_content` is a json containg urls pointing
to each frame of the video.
status_array: Each element should be in `TODO`, `ONGOING`, `LABELED`,
`TO_REVIEW`, `REVIEWED`.
is_used_for_consensus_array: Whether to use the asset to compute consensus kpis or not.
is_honeypot_array: Whether to use the asset for honeypot.
project_id: The project ID. Only required if `external_ids` argument is provided.
Returns:
A result object which indicates if the mutation was successful,
or an error message.
Examples:
>>> kili.update_properties_in_assets(
asset_ids=["ckg22d81r0jrg0885unmuswj8", "ckg22d81s0jrh0885pdxfd03n"],
consensus_marks=[1, 0.7],
contents=[None, 'https://to/second/asset.png'],
honeypot_marks=[0.8, 0.5],
is_honeypot_array=[True, True],
is_used_for_consensus_array=[True, False],
priorities=[None, 2],
status_array=['LABELED', 'REVIEWED'],
to_be_labeled_by_array=[['test+pierre@kili-technology.com'], None],
)
"""
if asset_ids is not None and external_ids is not None:
warnings.warn(
"The use of `external_ids` argument has changed. It is now used to identify which"
" properties of which assets to update. Please use"
" `kili.change_asset_external_ids()` method instead to change asset external IDs.",
DeprecationWarning,
)
raise MissingArgumentError("Please provide either `asset_ids` or `external_ids`.")
asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)
saved_args = locals()
parameters = {
k: v
for (k, v) in saved_args.items()
if k
in [
"asset_ids",
"priorities",
"json_metadatas",
"consensus_marks",
"honeypot_marks",
"to_be_labeled_by_array",
"contents",
"json_contents",
"status_array",
"is_used_for_consensus_array",
"is_honeypot_array",
]
}
properties_to_batch = process_update_properties_in_assets_parameters(parameters)
def generate_variables(batch: Dict) -> Dict:
data = {
"priority": batch["priorities"],
"jsonMetadata": batch["json_metadatas"],
"consensusMark": batch["consensus_marks"],
"honeypotMark": batch["honeypot_marks"],
"toBeLabeledBy": batch["to_be_labeled_by_array"],
"shouldResetToBeLabeledBy": batch["should_reset_to_be_labeled_by_array"],
"content": batch["contents"],
"jsonContent": batch["json_contents"],
"status": batch["status_array"],
"isUsedForConsensus": batch["is_used_for_consensus_array"],
"isHoneypot": batch["is_honeypot_array"],
}
data_array = [dict(zip(data, t)) for t in zip(*data.values())]
return {
"whereArray": [{"id": asset_id} for asset_id in batch["asset_ids"]],
"dataArray": data_array,
}
results = _mutate_from_paginated_call(
self,
properties_to_batch,
generate_variables,
GQL_UPDATE_PROPERTIES_IN_ASSETS,
)
formated_results = [format_result("data", result, Asset) for result in results]
return [item for batch_list in formated_results for item in batch_list]
@typechecked
def change_asset_external_ids(
self,
new_external_ids: List[str],
asset_ids: Optional[List[str]] = None,
external_ids: Optional[List[str]] = None,
project_id: Optional[str] = None,
) -> List[Dict]:
"""Update the external IDs of one or more assets.
Args:
new_external_ids: The new external IDs of the assets.
asset_ids: The asset IDs to modify.
external_ids: The external asset IDs to modify (if `asset_ids` is not already provided).
project_id: The project ID. Only required if `external_ids` argument is provided.
Returns:
A result object which indicates if the mutation was successful,
or an error message.
Examples:
>>> kili.change_asset_external_ids(
new_external_ids=["asset1", "asset2"],
asset_ids=["ckg22d81r0jrg0885unmuswj8", "ckg22d81s0jrh0885pdxfd03n"],
)
"""
asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)
parameters = {
"asset_ids": asset_ids,
"new_external_ids": new_external_ids,
"json_metadatas": None,
"to_be_labeled_by_array": None,
}
properties_to_batch = process_update_properties_in_assets_parameters(parameters)
def generate_variables(batch: Dict) -> Dict:
data = {
"externalId": batch["new_external_ids"],
"jsonMetadata": batch["json_metadatas"],
"toBeLabeledBy": batch["to_be_labeled_by_array"],
"shouldResetToBeLabeledBy": batch["should_reset_to_be_labeled_by_array"],
}
data_array = [dict(zip(data, t)) for t in zip(*data.values())]
return {
"whereArray": [{"id": asset_id} for asset_id in batch["asset_ids"]],
"dataArray": data_array,
}
results = _mutate_from_paginated_call(
self,
properties_to_batch,
generate_variables,
GQL_UPDATE_PROPERTIES_IN_ASSETS,
)
formated_results = [format_result("data", result, Asset) for result in results]
return [item for batch_list in formated_results for item in batch_list]
@typechecked
def delete_many_from_dataset(
self,
asset_ids: Optional[List[str]] = None,
external_ids: Optional[List[str]] = None,
project_id: Optional[str] = None,
) -> Asset:
"""Delete assets from a project.
Args:
asset_ids: The list of asset internal IDs to delete.
external_ids: The list of asset external IDs to delete.
project_id: The project ID. Only required if `external_ids` argument is provided.
Returns:
A result object which indicates if the mutation was successful,
or an error message.
"""
asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)
properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}
def generate_variables(batch):
return {"where": {"idIn": batch["asset_ids"]}}
results = _mutate_from_paginated_call(
self, properties_to_batch, generate_variables, GQL_DELETE_MANY_FROM_DATASET
)
return format_result("data", results[0], Asset)
@typechecked
def add_to_review(
self,
asset_ids: Optional[List[str]] = None,
external_ids: Optional[List[str]] = None,
project_id: Optional[str] = None,
) -> Optional[Dict[str, Any]]:
"""Add assets to review.
!!! warning
Assets without any label will be ignored.
Args:
asset_ids: The asset internal IDs to add to review.
external_ids: The asset external IDs to add to review.
project_id: The project ID. Only required if `external_ids` argument is provided.
Returns:
A dict object with the project `id` and the `asset_ids` of assets moved to review.
`None` if no assets have changed status (already had `TO_REVIEW` status for example).
An error message if mutation failed.
Examples:
>>> kili.add_to_review(
asset_ids=[
"ckg22d81r0jrg0885unmuswj8",
"ckg22d81s0jrh0885pdxfd03n",
],
)
"""
asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)
properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}
def generate_variables(batch):
return {"where": {"idIn": batch["asset_ids"]}}
results = _mutate_from_paginated_call(
self,
properties_to_batch,
generate_variables,
GQL_ADD_ALL_LABELED_ASSETS_TO_REVIEW,
)
result = format_result("data", results[0])
if isinstance(result, dict) and "id" in result:
assets_in_review = AssetQuery(self.auth.client)(
AssetWhere(project_id=result["id"], asset_id_in=asset_ids, status_in=["TO_REVIEW"]),
["id"],
QueryOptions(disable_tqdm=True),
)
result["asset_ids"] = [asset["id"] for asset in assets_in_review]
return result
return result
@typechecked
def send_back_to_queue(
self,
asset_ids: Optional[List[str]] = None,
external_ids: Optional[List[str]] = None,
project_id: Optional[str] = None,
) -> Dict[str, Any]:
"""Send assets back to queue.
Args:
asset_ids: List of internal IDs of assets to send back to queue.
external_ids: List of external IDs of assets to send back to queue.
project_id: The project ID. Only required if `external_ids` argument is provided.
Returns:
A dict object with the project `id` and the `asset_ids` of assets moved to queue.
An error message if mutation failed.
Examples:
>>> kili.send_back_to_queue(
asset_ids=[
"ckg22d81r0jrg0885unmuswj8",
"ckg22d81s0jrh0885pdxfd03n",
],
)
"""
asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)
properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}
def generate_variables(batch):
return {"where": {"idIn": batch["asset_ids"]}}
results = _mutate_from_paginated_call(
self, properties_to_batch, generate_variables, GQL_SEND_BACK_ASSETS_TO_QUEUE
)
result = format_result("data", results[0])
assets_in_queue = AssetQuery(self.auth.client)(
AssetWhere(project_id=result["id"], asset_id_in=asset_ids, status_in=["ONGOING"]),
["id"],
QueryOptions(disable_tqdm=True),
)
result["asset_ids"] = [asset["id"] for asset in assets_in_queue]
return result
add_to_review(self, asset_ids=None, external_ids=None, project_id=None)
Add assets to review.
Warning
Assets without any label will be ignored.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
asset_ids |
Optional[List[str]] |
The asset internal IDs to add to review. |
None |
external_ids |
Optional[List[str]] |
The asset external IDs to add to review. |
None |
project_id |
Optional[str] |
The project ID. Only required if |
None |
Returns:
Type | Description |
---|---|
Optional[Dict[str, Any]] |
A dict object with the project |
Examples:
>>> kili.add_to_review(
asset_ids=[
"ckg22d81r0jrg0885unmuswj8",
"ckg22d81s0jrh0885pdxfd03n",
],
)
Source code in kili/mutations/asset/__init__.py
@typechecked
def add_to_review(
self,
asset_ids: Optional[List[str]] = None,
external_ids: Optional[List[str]] = None,
project_id: Optional[str] = None,
) -> Optional[Dict[str, Any]]:
"""Add assets to review.
!!! warning
Assets without any label will be ignored.
Args:
asset_ids: The asset internal IDs to add to review.
external_ids: The asset external IDs to add to review.
project_id: The project ID. Only required if `external_ids` argument is provided.
Returns:
A dict object with the project `id` and the `asset_ids` of assets moved to review.
`None` if no assets have changed status (already had `TO_REVIEW` status for example).
An error message if mutation failed.
Examples:
>>> kili.add_to_review(
asset_ids=[
"ckg22d81r0jrg0885unmuswj8",
"ckg22d81s0jrh0885pdxfd03n",
],
)
"""
asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)
properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}
def generate_variables(batch):
return {"where": {"idIn": batch["asset_ids"]}}
results = _mutate_from_paginated_call(
self,
properties_to_batch,
generate_variables,
GQL_ADD_ALL_LABELED_ASSETS_TO_REVIEW,
)
result = format_result("data", results[0])
if isinstance(result, dict) and "id" in result:
assets_in_review = AssetQuery(self.auth.client)(
AssetWhere(project_id=result["id"], asset_id_in=asset_ids, status_in=["TO_REVIEW"]),
["id"],
QueryOptions(disable_tqdm=True),
)
result["asset_ids"] = [asset["id"] for asset in assets_in_review]
return result
return result
append_many_to_dataset(self, project_id, content_array=None, external_id_array=None, id_array=None, is_honeypot_array=None, status_array=None, json_content_array=None, json_metadata_array=None, disable_tqdm=False)
Append assets to a project.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
project_id |
str |
Identifier of the project |
required |
content_array |
Optional[List[str]] |
List of elements added to the assets of the project Must not be None except if you provide json_content_array.
|
None |
external_id_array |
Optional[List[str]] |
List of external ids given to identify the assets. If None, random identifiers are created. |
None |
is_honeypot_array |
Optional[List[bool]] |
Whether to use the asset for honeypot |
None |
status_array |
Optional[List[str]] |
By default, all imported assets are set to |
None |
json_content_array |
Optional[List[List[Union[dict, str]]]] |
Useful for
|
None |
json_metadata_array |
Optional[List[dict]] |
The metadata given to each asset should be stored in a json like dict with keys.
|
None |
disable_tqdm |
bool |
If |
False |
Returns:
Type | Description |
---|---|
Dict[str, str] |
A result object which indicates if the mutation was successful, or an error message. |
Examples:
>>> kili.append_many_to_dataset(
project_id=project_id,
content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])
Recipe
- For more detailed examples on how to import assets, see the recipe.
- For more detailed examples on how to import text assets, see the recipe.
Source code in kili/mutations/asset/__init__.py
@typechecked
def append_many_to_dataset(
self,
project_id: str,
content_array: Optional[List[str]] = None,
external_id_array: Optional[List[str]] = None,
id_array: Optional[List[str]] = None,
is_honeypot_array: Optional[List[bool]] = None,
status_array: Optional[List[str]] = None,
json_content_array: Optional[List[List[Union[dict, str]]]] = None,
json_metadata_array: Optional[List[dict]] = None,
disable_tqdm: bool = False,
) -> Dict[str, str]:
# pylint: disable=line-too-long
"""Append assets to a project.
Args:
project_id: Identifier of the project
content_array: List of elements added to the assets of the project
Must not be None except if you provide json_content_array.
- For a `TEXT` project, the content can be either raw text, or URLs to TEXT assets.
- For an `IMAGE` / `PDF` project, the content can be either URLs or paths to existing
images/pdf on your computer.
- For a VIDEO project, the content can be either URLs pointing to videos hosted on a web server or paths to
existing video files on your computer. If you want to import video from frames, look at the json_content
section below.
- For an `VIDEO_LEGACY` project, the content can be only be URLs
external_id_array: List of external ids given to identify the assets.
If None, random identifiers are created.
is_honeypot_array: Whether to use the asset for honeypot
status_array: By default, all imported assets are set to `TODO`. Other options:
`ONGOING`, `LABELED`, `REVIEWED`.
json_content_array: Useful for `VIDEO` or `TEXT` projects only.
- For `VIDEO` projects, each element is a sequence of frames, i.e. a
list of URLs to images or a list of paths to images.
- For `TEXT` projects, each element is a json_content dict,
formatted according to documentation [on how to import
rich-text assets](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb)
json_metadata_array: The metadata given to each asset should be stored in a json like dict with keys.
- Add metadata visible on the asset with the following keys: `imageUrl`, `text`, `url`.
Example for one asset: `json_metadata_array = [{'imageUrl': '','text': '','url': ''}]`.
- For VIDEO projects (and not VIDEO_LEGACY), you can specify a value with key 'processingParameters' to specify the sampling rate (default: 30).
Example for one asset: `json_metadata_array = [{'processingParameters': {'framesPlayedPerSecond': 10}}]`.
disable_tqdm: If `True`, the progress bar will be disabled
Returns:
A result object which indicates if the mutation was successful, or an error message.
Examples:
>>> kili.append_many_to_dataset(
project_id=project_id,
content_array=['https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png'])
!!! example "Recipe"
- For more detailed examples on how to import assets,
see [the recipe](https://docs.kili-technology.com/recipes/importing-data).
- For more detailed examples on how to import text assets,
see [the recipe](https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/import_text_assets.ipynb).
"""
if content_array is None and json_content_array is None:
raise ValueError("Variables content_array and json_content_array cannot be both None.")
nb_data = (
len(content_array)
if content_array is not None
else len(json_content_array) # type:ignore
)
field_mapping = {
"content": content_array,
"json_content": json_content_array,
"external_id": external_id_array,
"id": id_array,
"status": status_array,
"json_metadata": json_metadata_array,
"is_honeypot": is_honeypot_array,
}
assets = [{}] * nb_data
for key, value in field_mapping.items():
if value is not None:
assets = [{**assets[i], key: value[i]} for i in range(nb_data)]
result = import_assets(
self.auth, project_id=project_id, assets=assets, disable_tqdm=disable_tqdm
)
return result
change_asset_external_ids(self, new_external_ids, asset_ids=None, external_ids=None, project_id=None)
Update the external IDs of one or more assets.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
new_external_ids |
List[str] |
The new external IDs of the assets. |
required |
asset_ids |
Optional[List[str]] |
The asset IDs to modify. |
None |
external_ids |
Optional[List[str]] |
The external asset IDs to modify (if |
None |
project_id |
Optional[str] |
The project ID. Only required if |
None |
Returns:
Type | Description |
---|---|
List[Dict] |
A result object which indicates if the mutation was successful, or an error message. |
Examples:
>>> kili.change_asset_external_ids(
new_external_ids=["asset1", "asset2"],
asset_ids=["ckg22d81r0jrg0885unmuswj8", "ckg22d81s0jrh0885pdxfd03n"],
)
Source code in kili/mutations/asset/__init__.py
@typechecked
def change_asset_external_ids(
self,
new_external_ids: List[str],
asset_ids: Optional[List[str]] = None,
external_ids: Optional[List[str]] = None,
project_id: Optional[str] = None,
) -> List[Dict]:
"""Update the external IDs of one or more assets.
Args:
new_external_ids: The new external IDs of the assets.
asset_ids: The asset IDs to modify.
external_ids: The external asset IDs to modify (if `asset_ids` is not already provided).
project_id: The project ID. Only required if `external_ids` argument is provided.
Returns:
A result object which indicates if the mutation was successful,
or an error message.
Examples:
>>> kili.change_asset_external_ids(
new_external_ids=["asset1", "asset2"],
asset_ids=["ckg22d81r0jrg0885unmuswj8", "ckg22d81s0jrh0885pdxfd03n"],
)
"""
asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)
parameters = {
"asset_ids": asset_ids,
"new_external_ids": new_external_ids,
"json_metadatas": None,
"to_be_labeled_by_array": None,
}
properties_to_batch = process_update_properties_in_assets_parameters(parameters)
def generate_variables(batch: Dict) -> Dict:
data = {
"externalId": batch["new_external_ids"],
"jsonMetadata": batch["json_metadatas"],
"toBeLabeledBy": batch["to_be_labeled_by_array"],
"shouldResetToBeLabeledBy": batch["should_reset_to_be_labeled_by_array"],
}
data_array = [dict(zip(data, t)) for t in zip(*data.values())]
return {
"whereArray": [{"id": asset_id} for asset_id in batch["asset_ids"]],
"dataArray": data_array,
}
results = _mutate_from_paginated_call(
self,
properties_to_batch,
generate_variables,
GQL_UPDATE_PROPERTIES_IN_ASSETS,
)
formated_results = [format_result("data", result, Asset) for result in results]
return [item for batch_list in formated_results for item in batch_list]
delete_many_from_dataset(self, asset_ids=None, external_ids=None, project_id=None)
Delete assets from a project.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
asset_ids |
Optional[List[str]] |
The list of asset internal IDs to delete. |
None |
external_ids |
Optional[List[str]] |
The list of asset external IDs to delete. |
None |
project_id |
Optional[str] |
The project ID. Only required if |
None |
Returns:
Type | Description |
---|---|
Asset |
A result object which indicates if the mutation was successful, or an error message. |
Source code in kili/mutations/asset/__init__.py
@typechecked
def delete_many_from_dataset(
self,
asset_ids: Optional[List[str]] = None,
external_ids: Optional[List[str]] = None,
project_id: Optional[str] = None,
) -> Asset:
"""Delete assets from a project.
Args:
asset_ids: The list of asset internal IDs to delete.
external_ids: The list of asset external IDs to delete.
project_id: The project ID. Only required if `external_ids` argument is provided.
Returns:
A result object which indicates if the mutation was successful,
or an error message.
"""
asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)
properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}
def generate_variables(batch):
return {"where": {"idIn": batch["asset_ids"]}}
results = _mutate_from_paginated_call(
self, properties_to_batch, generate_variables, GQL_DELETE_MANY_FROM_DATASET
)
return format_result("data", results[0], Asset)
send_back_to_queue(self, asset_ids=None, external_ids=None, project_id=None)
Send assets back to queue.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
asset_ids |
Optional[List[str]] |
List of internal IDs of assets to send back to queue. |
None |
external_ids |
Optional[List[str]] |
List of external IDs of assets to send back to queue. |
None |
project_id |
Optional[str] |
The project ID. Only required if |
None |
Returns:
Type | Description |
---|---|
Dict[str, Any] |
A dict object with the project |
Examples:
>>> kili.send_back_to_queue(
asset_ids=[
"ckg22d81r0jrg0885unmuswj8",
"ckg22d81s0jrh0885pdxfd03n",
],
)
Source code in kili/mutations/asset/__init__.py
@typechecked
def send_back_to_queue(
self,
asset_ids: Optional[List[str]] = None,
external_ids: Optional[List[str]] = None,
project_id: Optional[str] = None,
) -> Dict[str, Any]:
"""Send assets back to queue.
Args:
asset_ids: List of internal IDs of assets to send back to queue.
external_ids: List of external IDs of assets to send back to queue.
project_id: The project ID. Only required if `external_ids` argument is provided.
Returns:
A dict object with the project `id` and the `asset_ids` of assets moved to queue.
An error message if mutation failed.
Examples:
>>> kili.send_back_to_queue(
asset_ids=[
"ckg22d81r0jrg0885unmuswj8",
"ckg22d81s0jrh0885pdxfd03n",
],
)
"""
asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)
properties_to_batch: Dict[str, Optional[List[Any]]] = {"asset_ids": asset_ids}
def generate_variables(batch):
return {"where": {"idIn": batch["asset_ids"]}}
results = _mutate_from_paginated_call(
self, properties_to_batch, generate_variables, GQL_SEND_BACK_ASSETS_TO_QUEUE
)
result = format_result("data", results[0])
assets_in_queue = AssetQuery(self.auth.client)(
AssetWhere(project_id=result["id"], asset_id_in=asset_ids, status_in=["ONGOING"]),
["id"],
QueryOptions(disable_tqdm=True),
)
result["asset_ids"] = [asset["id"] for asset in assets_in_queue]
return result
update_properties_in_assets(self, asset_ids=None, external_ids=None, priorities=None, json_metadatas=None, consensus_marks=None, honeypot_marks=None, to_be_labeled_by_array=None, contents=None, json_contents=None, status_array=None, is_used_for_consensus_array=None, is_honeypot_array=None, project_id=None)
Update the properties of one or more assets.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
asset_ids |
Optional[List[str]] |
The internal asset IDs to modify. |
None |
external_ids |
Optional[List[str]] |
The external asset IDs to modify (if |
None |
priorities |
Optional[List[int]] |
You can change the priority of the assets. By default, all assets have a priority of 0. |
None |
json_metadatas |
Optional[List[Union[dict, str]]] |
The metadata given to an asset should be stored
in a json like dict with keys |
None |
consensus_marks |
Optional[List[float]] |
Should be between 0 and 1. |
None |
honeypot_marks |
Optional[List[float]] |
Should be between 0 and 1. |
None |
to_be_labeled_by_array |
Optional[List[List[str]]] |
If given, each element of the list should contain the emails of the labelers authorized to label the asset. |
None |
contents |
Optional[List[str]] |
|
None |
json_contents |
Optional[List[str]] |
|
None |
status_array |
Optional[List[str]] |
Each element should be in |
None |
is_used_for_consensus_array |
Optional[List[bool]] |
Whether to use the asset to compute consensus kpis or not. |
None |
is_honeypot_array |
Optional[List[bool]] |
Whether to use the asset for honeypot. |
None |
project_id |
Optional[str] |
The project ID. Only required if |
None |
Returns:
Type | Description |
---|---|
List[Dict] |
A result object which indicates if the mutation was successful, or an error message. |
Examples:
>>> kili.update_properties_in_assets(
asset_ids=["ckg22d81r0jrg0885unmuswj8", "ckg22d81s0jrh0885pdxfd03n"],
consensus_marks=[1, 0.7],
contents=[None, 'https://to/second/asset.png'],
honeypot_marks=[0.8, 0.5],
is_honeypot_array=[True, True],
is_used_for_consensus_array=[True, False],
priorities=[None, 2],
status_array=['LABELED', 'REVIEWED'],
to_be_labeled_by_array=[['test+pierre@kili-technology.com'], None],
)
Source code in kili/mutations/asset/__init__.py
@typechecked
# pylint: disable=unused-argument
def update_properties_in_assets(
self,
asset_ids: Optional[List[str]] = None,
external_ids: Optional[List[str]] = None,
priorities: Optional[List[int]] = None,
json_metadatas: Optional[List[Union[dict, str]]] = None,
consensus_marks: Optional[List[float]] = None,
honeypot_marks: Optional[List[float]] = None,
to_be_labeled_by_array: Optional[List[List[str]]] = None,
contents: Optional[List[str]] = None,
json_contents: Optional[List[str]] = None,
status_array: Optional[List[str]] = None,
is_used_for_consensus_array: Optional[List[bool]] = None,
is_honeypot_array: Optional[List[bool]] = None,
project_id: Optional[str] = None,
) -> List[Dict]:
"""Update the properties of one or more assets.
Args:
asset_ids: The internal asset IDs to modify.
external_ids: The external asset IDs to modify (if `asset_ids` is not already provided).
priorities: You can change the priority of the assets.
By default, all assets have a priority of 0.
json_metadatas: The metadata given to an asset should be stored
in a json like dict with keys `imageUrl`, `text`, `url`:
`json_metadata = {'imageUrl': '','text': '','url': ''}`
consensus_marks: Should be between 0 and 1.
honeypot_marks: Should be between 0 and 1.
to_be_labeled_by_array: If given, each element of the list should contain the emails of
the labelers authorized to label the asset.
contents: - For a NLP project, the content can be directly in text format.
- For an Image / Video / Pdf project, the content must be hosted on a web server,
and you point Kili to your data by giving the URLs.
json_contents: - For a NLP project, the `json_content`
is a text formatted using RichText.
- For a Video project, the`json_content` is a json containg urls pointing
to each frame of the video.
status_array: Each element should be in `TODO`, `ONGOING`, `LABELED`,
`TO_REVIEW`, `REVIEWED`.
is_used_for_consensus_array: Whether to use the asset to compute consensus kpis or not.
is_honeypot_array: Whether to use the asset for honeypot.
project_id: The project ID. Only required if `external_ids` argument is provided.
Returns:
A result object which indicates if the mutation was successful,
or an error message.
Examples:
>>> kili.update_properties_in_assets(
asset_ids=["ckg22d81r0jrg0885unmuswj8", "ckg22d81s0jrh0885pdxfd03n"],
consensus_marks=[1, 0.7],
contents=[None, 'https://to/second/asset.png'],
honeypot_marks=[0.8, 0.5],
is_honeypot_array=[True, True],
is_used_for_consensus_array=[True, False],
priorities=[None, 2],
status_array=['LABELED', 'REVIEWED'],
to_be_labeled_by_array=[['test+pierre@kili-technology.com'], None],
)
"""
if asset_ids is not None and external_ids is not None:
warnings.warn(
"The use of `external_ids` argument has changed. It is now used to identify which"
" properties of which assets to update. Please use"
" `kili.change_asset_external_ids()` method instead to change asset external IDs.",
DeprecationWarning,
)
raise MissingArgumentError("Please provide either `asset_ids` or `external_ids`.")
asset_ids = get_asset_ids_or_throw_error(self, asset_ids, external_ids, project_id)
saved_args = locals()
parameters = {
k: v
for (k, v) in saved_args.items()
if k
in [
"asset_ids",
"priorities",
"json_metadatas",
"consensus_marks",
"honeypot_marks",
"to_be_labeled_by_array",
"contents",
"json_contents",
"status_array",
"is_used_for_consensus_array",
"is_honeypot_array",
]
}
properties_to_batch = process_update_properties_in_assets_parameters(parameters)
def generate_variables(batch: Dict) -> Dict:
data = {
"priority": batch["priorities"],
"jsonMetadata": batch["json_metadatas"],
"consensusMark": batch["consensus_marks"],
"honeypotMark": batch["honeypot_marks"],
"toBeLabeledBy": batch["to_be_labeled_by_array"],
"shouldResetToBeLabeledBy": batch["should_reset_to_be_labeled_by_array"],
"content": batch["contents"],
"jsonContent": batch["json_contents"],
"status": batch["status_array"],
"isUsedForConsensus": batch["is_used_for_consensus_array"],
"isHoneypot": batch["is_honeypot_array"],
}
data_array = [dict(zip(data, t)) for t in zip(*data.values())]
return {
"whereArray": [{"id": asset_id} for asset_id in batch["asset_ids"]],
"dataArray": data_array,
}
results = _mutate_from_paginated_call(
self,
properties_to_batch,
generate_variables,
GQL_UPDATE_PROPERTIES_IN_ASSETS,
)
formated_results = [format_result("data", result, Asset) for result in results]
return [item for batch_list in formated_results for item in batch_list]