Label module
Queries
Set of Label queries.
Source code in kili/entrypoints/queries/label/__init__.py
class QueriesLabel(BaseOperationEntrypointMixin):
"""Set of Label queries."""
# pylint: disable=too-many-arguments,too-many-locals
@overload
def labels(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_status_in: Optional[List[str]] = None,
asset_external_id_in: Optional[List[str]] = None,
author_in: Optional[List[str]] = None,
created_at: Optional[str] = None,
created_at_gte: Optional[str] = None,
created_at_lte: Optional[str] = None,
fields: ListOrTuple[str] = (
"author.email",
"author.id",
"id",
"jsonResponse",
"labelType",
"secondsToLabel",
"assetId",
),
first: Optional[int] = None,
honeypot_mark_gte: Optional[float] = None,
honeypot_mark_lte: Optional[float] = None,
id_contains: Optional[List[str]] = None,
label_id: Optional[str] = None,
skip: int = 0,
type_in: Optional[List[str]] = None,
user_id: Optional[str] = None,
disable_tqdm: Optional[bool] = None,
category_search: Optional[str] = None,
output_format: Literal["dict"] = "dict",
*,
as_generator: Literal[True],
) -> Generator[Dict, None, None]: ...
@overload
def labels(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_status_in: Optional[List[str]] = None,
asset_external_id_in: Optional[List[str]] = None,
author_in: Optional[List[str]] = None,
created_at: Optional[str] = None,
created_at_gte: Optional[str] = None,
created_at_lte: Optional[str] = None,
fields: ListOrTuple[str] = (
"author.email",
"author.id",
"id",
"jsonResponse",
"labelType",
"secondsToLabel",
"assetId",
),
first: Optional[int] = None,
honeypot_mark_gte: Optional[float] = None,
honeypot_mark_lte: Optional[float] = None,
id_contains: Optional[List[str]] = None,
label_id: Optional[str] = None,
skip: int = 0,
type_in: Optional[List[str]] = None,
user_id: Optional[str] = None,
disable_tqdm: Optional[bool] = None,
category_search: Optional[str] = None,
output_format: Literal["dict"] = "dict",
*,
as_generator: Literal[False] = False,
) -> List[Dict]: ...
@overload
def labels(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_status_in: Optional[List[str]] = None,
asset_external_id_in: Optional[List[str]] = None,
author_in: Optional[List[str]] = None,
created_at: Optional[str] = None,
created_at_gte: Optional[str] = None,
created_at_lte: Optional[str] = None,
fields: ListOrTuple[str] = (
"author.email",
"author.id",
"id",
"jsonResponse",
"labelType",
"secondsToLabel",
"assetId",
),
first: Optional[int] = None,
honeypot_mark_gte: Optional[float] = None,
honeypot_mark_lte: Optional[float] = None,
id_contains: Optional[List[str]] = None,
label_id: Optional[str] = None,
skip: int = 0,
type_in: Optional[List[str]] = None,
user_id: Optional[str] = None,
disable_tqdm: Optional[bool] = None,
category_search: Optional[str] = None,
output_format: Literal["parsed_label"] = "parsed_label",
*,
as_generator: Literal[False] = False,
) -> List[ParsedLabel]: ...
@overload
def labels(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_status_in: Optional[List[str]] = None,
asset_external_id_in: Optional[List[str]] = None,
author_in: Optional[List[str]] = None,
created_at: Optional[str] = None,
created_at_gte: Optional[str] = None,
created_at_lte: Optional[str] = None,
fields: ListOrTuple[str] = (
"author.email",
"author.id",
"id",
"jsonResponse",
"labelType",
"secondsToLabel",
"assetId",
),
first: Optional[int] = None,
honeypot_mark_gte: Optional[float] = None,
honeypot_mark_lte: Optional[float] = None,
id_contains: Optional[List[str]] = None,
label_id: Optional[str] = None,
skip: int = 0,
type_in: Optional[List[str]] = None,
user_id: Optional[str] = None,
disable_tqdm: Optional[bool] = None,
category_search: Optional[str] = None,
output_format: Literal["parsed_label"] = "parsed_label",
*,
as_generator: Literal[True] = True,
) -> Generator[ParsedLabel, None, None]: ...
@typechecked
def labels(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_status_in: Optional[List[str]] = None,
asset_external_id_in: Optional[List[str]] = None,
author_in: Optional[List[str]] = None,
created_at: Optional[str] = None,
created_at_gte: Optional[str] = None,
created_at_lte: Optional[str] = None,
fields: ListOrTuple[str] = (
"author.email",
"author.id",
"id",
"jsonResponse",
"labelType",
"secondsToLabel",
"isLatestLabelForUser",
"assetId",
),
first: Optional[int] = None,
honeypot_mark_gte: Optional[float] = None,
honeypot_mark_lte: Optional[float] = None,
id_contains: Optional[List[str]] = None,
label_id: Optional[str] = None,
skip: int = 0,
type_in: Optional[List[str]] = None,
user_id: Optional[str] = None,
disable_tqdm: Optional[bool] = None,
category_search: Optional[str] = None,
output_format: Literal["dict", "parsed_label"] = "dict",
*,
as_generator: bool = False,
) -> Iterable[Union[Dict, ParsedLabel]]:
# pylint: disable=line-too-long
"""Get a label list or a label generator from a project based on a set of criteria.
Args:
project_id: Identifier of the project.
asset_id: Identifier of the asset.
asset_status_in: Returned labels should have a status that belongs to that list, if given.
Possible choices : `TODO`, `ONGOING`, `LABELED`, `TO REVIEW` or `REVIEWED`.
asset_external_id_in: Returned labels should have an external id that belongs to that list, if given.
author_in: Returned labels should have been made by authors in that list, if given.
An author can be designated by the first name, the last name, or the first name + last name.
created_at: Returned labels should have their creation date equal to this date.
created_at_gte: Returned labels should have their creation date greater or equal to this date.
created_at_lte: Returned labels should have their creation date lower or equal to this date.
fields: All the fields to request among the possible fields for the labels.
See [the documentation](https://docs.kili-technology.com/reference/graphql-api#label) for all possible fields.
first: Maximum number of labels to return.
honeypot_mark_gte: Returned labels should have a label whose honeypot is greater than this number.
honeypot_mark_lte: Returned labels should have a label whose honeypot is lower than this number.
id_contains: Filters out labels not belonging to that list. If empty, no filtering is applied.
label_id: Identifier of the label.
skip: Number of labels to skip (they are ordered by their date of creation, first to last).
type_in: Returned labels should have a label whose type belongs to that list, if given.
user_id: Identifier of the user.
disable_tqdm: If `True`, the progress bar will be disabled.
as_generator: If `True`, a generator on the labels is returned.
category_search: Query to filter labels based on the content of their jsonResponse.
output_format: If `dict`, the output is an iterable of Python dictionaries.
If `parsed_label`, the output is an iterable of parsed labels objects. More information on parsed labels in the [documentation](https://python-sdk-docs.kili-technology.com/latest/sdk/tutorials/label_parsing/).
!!! info "Dates format"
Date strings should have format: "YYYY-MM-DD"
Returns:
An iterable of labels.
Examples:
>>> kili.labels(project_id=project_id, fields=['jsonResponse', 'labelOf.externalId']) # returns a list of all labels of a project and their assets external ID
>>> kili.labels(project_id=project_id, fields=['jsonResponse'], as_generator=True) # returns a generator of all labels of a project
!!! example "How to filter based on label categories"
The search query is composed of logical expressions following this format:
[job_name].[category_name].count [comparaison_operator] [value]
where:
- `[job_name]` is the name of the job in the interface
- `[category_name]` is the name of the category in the interface for this job
- `[comparaison_operator]` can be one of: [`==`, `>=`, `<=`, `<`, `>`]
- `[value]` is an integer that represents the count of such objects of the given category in the label
These operations can be separated by OR and AND operators
Example:
category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
category_search = `(JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
"""
if category_search:
validate_category_search_query(category_search)
where = LabelWhere(
project_id=project_id,
asset_id=asset_id,
asset_status_in=asset_status_in,
asset_external_id_in=asset_external_id_in,
author_in=author_in,
created_at=created_at,
created_at_gte=created_at_gte,
created_at_lte=created_at_lte,
honeypot_mark_gte=honeypot_mark_gte,
honeypot_mark_lte=honeypot_mark_lte,
id_contains=id_contains,
label_id=label_id,
type_in=type_in,
user_id=user_id,
category_search=category_search,
)
post_call_function = None
if output_format == "parsed_label":
if "jsonResponse" not in fields:
raise ValueError(
"The field 'jsonResponse' is required to parse labels. Please add it to the"
" 'fields' argument."
)
project = get_project(self, project_id, ["jsonInterface", "inputType"])
post_call_function = partial(
parse_labels,
json_interface=project["jsonInterface"],
input_type=project["inputType"],
)
disable_tqdm = disable_tqdm_if_as_generator(as_generator, disable_tqdm)
options = QueryOptions(disable_tqdm, first, skip)
labels_gen = LabelQuery(self.graphql_client, self.http_client)(
where, fields, options, post_call_function
)
if as_generator:
return labels_gen
return list(labels_gen)
@overload
def predictions(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_status_in: Optional[List[str]] = None,
asset_external_id_in: Optional[List[str]] = None,
author_in: Optional[List[str]] = None,
created_at: Optional[str] = None,
created_at_gte: Optional[str] = None,
created_at_lte: Optional[str] = None,
fields: ListOrTuple[str] = (
"author.email",
"author.id",
"id",
"jsonResponse",
"labelType",
"modelName",
),
first: Optional[int] = None,
honeypot_mark_gte: Optional[float] = None,
honeypot_mark_lte: Optional[float] = None,
id_contains: Optional[List[str]] = None,
label_id: Optional[str] = None,
skip: int = 0,
user_id: Optional[str] = None,
disable_tqdm: Optional[bool] = None,
category_search: Optional[str] = None,
*,
as_generator: Literal[True],
) -> Generator[Dict, None, None]: ...
@overload
def predictions(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_status_in: Optional[List[str]] = None,
asset_external_id_in: Optional[List[str]] = None,
author_in: Optional[List[str]] = None,
created_at: Optional[str] = None,
created_at_gte: Optional[str] = None,
created_at_lte: Optional[str] = None,
fields: ListOrTuple[str] = (
"author.email",
"author.id",
"id",
"jsonResponse",
"labelType",
"modelName",
),
first: Optional[int] = None,
honeypot_mark_gte: Optional[float] = None,
honeypot_mark_lte: Optional[float] = None,
id_contains: Optional[List[str]] = None,
label_id: Optional[str] = None,
skip: int = 0,
user_id: Optional[str] = None,
disable_tqdm: Optional[bool] = None,
category_search: Optional[str] = None,
*,
as_generator: Literal[False] = False,
) -> List[Dict]: ...
@typechecked
def predictions(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_status_in: Optional[List[str]] = None,
asset_external_id_in: Optional[List[str]] = None,
author_in: Optional[List[str]] = None,
created_at: Optional[str] = None,
created_at_gte: Optional[str] = None,
created_at_lte: Optional[str] = None,
fields: ListOrTuple[str] = (
"author.email",
"author.id",
"id",
"jsonResponse",
"labelType",
"modelName",
),
first: Optional[int] = None,
honeypot_mark_gte: Optional[float] = None,
honeypot_mark_lte: Optional[float] = None,
id_contains: Optional[List[str]] = None,
label_id: Optional[str] = None,
skip: int = 0,
user_id: Optional[str] = None,
disable_tqdm: Optional[bool] = None,
category_search: Optional[str] = None,
*,
as_generator: bool = False,
) -> Iterable[Dict]:
# pylint: disable=line-too-long
"""Get prediction labels from a project based on a set of criteria.
This method is equivalent to the `kili.labels()` method, but it only returns label of type "PREDICTION".
Args:
project_id: Identifier of the project.
asset_id: Identifier of the asset.
asset_status_in: Returned labels should have a status that belongs to that list, if given.
Possible choices : `TODO`, `ONGOING`, `LABELED`, `TO REVIEW` or `REVIEWED`
asset_external_id_in: Returned labels should have an external id that belongs to that list, if given.
author_in: Returned labels should have been made by authors in that list, if given.
An author can be designated by the first name, the last name, or the first name + last name.
created_at: Returned labels should have a label whose creation date is equal to this date.
created_at_gte: Returned labels should have a label whose creation date is greater than this date.
created_at_lte: Returned labels should have a label whose creation date is lower than this date.
fields: All the fields to request among the possible fields for the labels.
See [the documentation](https://docs.kili-technology.com/reference/graphql-api#label) for all possible fields.
first: Maximum number of labels to return.
honeypot_mark_gte: Returned labels should have a label whose honeypot is greater than this number.
honeypot_mark_lte: Returned labels should have a label whose honeypot is lower than this number.
id_contains: Filters out labels not belonging to that list. If empty, no filtering is applied.
label_id: Identifier of the label.
skip: Number of labels to skip (they are ordered by their date of creation, first to last).
user_id: Identifier of the user.
disable_tqdm: If `True`, the progress bar will be disabled
as_generator: If `True`, a generator on the labels is returned.
category_search: Query to filter labels based on the content of their jsonResponse
Returns:
An iterable of labels.
Examples:
>>> kili.predictions(project_id=project_id) # returns a list of prediction labels of a project
"""
return self.labels(
project_id=project_id,
asset_id=asset_id,
asset_status_in=asset_status_in,
asset_external_id_in=asset_external_id_in,
author_in=author_in,
created_at=created_at,
created_at_gte=created_at_gte,
created_at_lte=created_at_lte,
fields=fields,
first=first,
honeypot_mark_gte=honeypot_mark_gte,
honeypot_mark_lte=honeypot_mark_lte,
id_contains=id_contains,
label_id=label_id,
skip=skip,
type_in=["PREDICTION"],
user_id=user_id,
disable_tqdm=disable_tqdm,
category_search=category_search,
as_generator=as_generator, # type: ignore
)
@overload
def inferences(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_status_in: Optional[List[str]] = None,
asset_external_id_in: Optional[List[str]] = None,
author_in: Optional[List[str]] = None,
created_at: Optional[str] = None,
created_at_gte: Optional[str] = None,
created_at_lte: Optional[str] = None,
fields: ListOrTuple[str] = (
"author.email",
"author.id",
"id",
"jsonResponse",
"labelType",
"modelName",
),
first: Optional[int] = None,
honeypot_mark_gte: Optional[float] = None,
honeypot_mark_lte: Optional[float] = None,
id_contains: Optional[List[str]] = None,
label_id: Optional[str] = None,
skip: int = 0,
user_id: Optional[str] = None,
disable_tqdm: Optional[bool] = None,
category_search: Optional[str] = None,
*,
as_generator: Literal[True],
) -> Generator[Dict, None, None]: ...
@overload
def inferences(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_status_in: Optional[List[str]] = None,
asset_external_id_in: Optional[List[str]] = None,
author_in: Optional[List[str]] = None,
created_at: Optional[str] = None,
created_at_gte: Optional[str] = None,
created_at_lte: Optional[str] = None,
fields: ListOrTuple[str] = (
"author.email",
"author.id",
"id",
"jsonResponse",
"labelType",
"modelName",
),
first: Optional[int] = None,
honeypot_mark_gte: Optional[float] = None,
honeypot_mark_lte: Optional[float] = None,
id_contains: Optional[List[str]] = None,
label_id: Optional[str] = None,
skip: int = 0,
user_id: Optional[str] = None,
disable_tqdm: Optional[bool] = None,
category_search: Optional[str] = None,
*,
as_generator: Literal[False] = False,
) -> List[Dict]: ...
@typechecked
def inferences(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_status_in: Optional[List[str]] = None,
asset_external_id_in: Optional[List[str]] = None,
author_in: Optional[List[str]] = None,
created_at: Optional[str] = None,
created_at_gte: Optional[str] = None,
created_at_lte: Optional[str] = None,
fields: ListOrTuple[str] = (
"author.email",
"author.id",
"id",
"jsonResponse",
"labelType",
"modelName",
),
first: Optional[int] = None,
honeypot_mark_gte: Optional[float] = None,
honeypot_mark_lte: Optional[float] = None,
id_contains: Optional[List[str]] = None,
label_id: Optional[str] = None,
skip: int = 0,
user_id: Optional[str] = None,
disable_tqdm: Optional[bool] = None,
category_search: Optional[str] = None,
*,
as_generator: bool = False,
) -> Iterable[Dict]:
# pylint: disable=line-too-long
"""Get inference labels from a project based on a set of criteria.
This method is equivalent to the `kili.labels()` method, but it only returns label of type "INFERENCE".
Args:
project_id: Identifier of the project.
asset_id: Identifier of the asset.
asset_status_in: Returned labels should have a status that belongs to that list, if given.
Possible choices : `TODO`, `ONGOING`, `LABELED`, `TO REVIEW` or `REVIEWED`
asset_external_id_in: Returned labels should have an external id that belongs to that list, if given.
author_in: Returned labels should have been made by authors in that list, if given.
An author can be designated by the first name, the last name, or the first name + last name.
created_at: Returned labels should have a label whose creation date is equal to this date.
created_at_gte: Returned labels should have a label whose creation date is greater than this date.
created_at_lte: Returned labels should have a label whose creation date is lower than this date.
fields: All the fields to request among the possible fields for the labels.
See [the documentation](https://docs.kili-technology.com/reference/graphql-api#label) for all possible fields.
first: Maximum number of labels to return.
honeypot_mark_gte: Returned labels should have a label whose honeypot is greater than this number.
honeypot_mark_lte: Returned labels should have a label whose honeypot is lower than this number.
id_contains: Filters out labels not belonging to that list. If empty, no filtering is applied.
label_id: Identifier of the label.
skip: Number of labels to skip (they are ordered by their date of creation, first to last).
user_id: Identifier of the user.
disable_tqdm: If `True`, the progress bar will be disabled
as_generator: If `True`, a generator on the labels is returned.
category_search: Query to filter labels based on the content of their jsonResponse
Returns:
An iterable of inference labels.
Examples:
>>> kili.inferences(project_id=project_id) # returns a list of inference labels of a project
"""
return self.labels(
project_id=project_id,
asset_id=asset_id,
asset_status_in=asset_status_in,
asset_external_id_in=asset_external_id_in,
author_in=author_in,
created_at=created_at,
created_at_gte=created_at_gte,
created_at_lte=created_at_lte,
fields=fields,
first=first,
honeypot_mark_gte=honeypot_mark_gte,
honeypot_mark_lte=honeypot_mark_lte,
id_contains=id_contains,
label_id=label_id,
skip=skip,
type_in=["INFERENCE"],
user_id=user_id,
disable_tqdm=disable_tqdm,
category_search=category_search,
as_generator=as_generator, # type: ignore
)
@typechecked
def export_labels_as_df(
self,
project_id: str,
fields: ListOrTuple[str] = ("author.email", "author.id", "createdAt", "id", "labelType"),
asset_fields: ListOrTuple[str] = ("externalId",),
) -> pd.DataFrame:
# pylint: disable=line-too-long
"""Get the labels of a project as a pandas DataFrame.
Args:
project_id: Identifier of the project
fields: All the fields to request among the possible fields for the labels.
See [the documentation](https://docs.kili-technology.com/reference/graphql-api#label) for all possible fields.
asset_fields: All the fields to request among the possible fields for the assets.
See [the documentation](https://docs.kili-technology.com/reference/graphql-api#asset) for all possible fields.
Returns:
A pandas DataFrame containing the labels.
"""
get_project(self, project_id, ["id"])
assets_gen = self.kili_api_gateway.list_assets(
AssetFilters(project_id=project_id),
tuple(asset_fields) + tuple("labels." + field for field in fields),
QueryOptions(disable_tqdm=False),
)
labels = [
dict(
label,
**{f"asset_{key}": asset[key] for key in asset if key != "labels"},
)
for asset in assets_gen
for label in asset["labels"]
]
return pd.DataFrame(labels)
@typechecked
def count_labels(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_status_in: Optional[List[str]] = None,
asset_external_id_in: Optional[List[str]] = None,
author_in: Optional[List[str]] = None,
created_at: Optional[str] = None,
created_at_gte: Optional[str] = None,
created_at_lte: Optional[str] = None,
honeypot_mark_gte: Optional[float] = None,
honeypot_mark_lte: Optional[float] = None,
label_id: Optional[str] = None,
type_in: Optional[List[str]] = None,
user_id: Optional[str] = None,
category_search: Optional[str] = None,
id_contains: Optional[List[str]] = None,
) -> int:
# pylint: disable=line-too-long
"""Get the number of labels for the given parameters.
Args:
project_id: Identifier of the project.
asset_id: Identifier of the asset.
asset_status_in: Returned labels should have a status that belongs to that list, if given.
Possible choices : `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
asset_external_id_in: Returned labels should have an external id that belongs to that list, if given.
author_in: Returned labels should have been made by authors in that list, if given.
An author can be designated by the first name, the last name, or the first name + last name.
created_at: Returned labels should have a label whose creation date is equal to this date.
created_at_gte: Returned labels should have a label whose creation date is greater than this date.
created_at_lte: Returned labels should have a label whose creation date is lower than this date.
honeypot_mark_gte: Returned labels should have a label whose honeypot is greater than this number.
honeypot_mark_lte: Returned labels should have a label whose honeypot is lower than this number.
label_id: Identifier of the label.
type_in: Returned labels should have a label whose type belongs to that list, if given.
user_id: Identifier of the user.
category_search: Query to filter labels based on the content of their jsonResponse
id_contains: Filters out labels not belonging to that list. If empty, no filtering is applied.
!!! info "Dates format"
Date strings should have format: "YYYY-MM-DD"
Returns:
The number of labels with the parameters provided
"""
if category_search:
validate_category_search_query(category_search)
where = LabelWhere(
project_id=project_id,
asset_id=asset_id,
asset_status_in=asset_status_in,
asset_external_id_in=asset_external_id_in,
author_in=author_in,
created_at=created_at,
created_at_gte=created_at_gte,
created_at_lte=created_at_lte,
honeypot_mark_gte=honeypot_mark_gte,
honeypot_mark_lte=honeypot_mark_lte,
id_contains=id_contains,
label_id=label_id,
type_in=type_in,
user_id=user_id,
category_search=category_search,
)
return LabelQuery(self.graphql_client, self.http_client).count(where)
def export_labels(
self,
project_id: str,
filename: str,
fmt: LabelFormat,
asset_ids: Optional[List[str]] = None,
layout: SplitOption = "split",
single_file: bool = False,
disable_tqdm: Optional[bool] = None,
with_assets: bool = True,
external_ids: Optional[List[str]] = None,
annotation_modifier: Optional[CocoAnnotationModifier] = None,
asset_filter_kwargs: Optional[Dict[str, object]] = None,
normalized_coordinates: Optional[bool] = None,
) -> None:
# pylint: disable=line-too-long
"""Export the project labels with the requested format into the requested output path.
Args:
project_id: Identifier of the project.
filename: Relative or full path of the archive that will contain
the exported data.
fmt: Format of the exported labels.
asset_ids: Optional list of the assets internal IDs from which to export the labels.
layout: Layout of the exported files. "split" means there is one folder
per job, "merged" that there is one folder with every labels.
single_file: Layout of the exported labels. Single file mode is
only available for some specific formats (COCO and Kili).
disable_tqdm: Disable the progress bar if True.
with_assets: Download the assets in the export.
external_ids: Optional list of the assets external IDs from which to export the labels.
annotation_modifier: (For COCO export only) function that takes the COCO annotation, the
COCO image, and the Kili annotation, and should return an updated COCO annotation.
This can be used if you want to add a new attribute to the COCO annotation. For
example, you can add a method that computes if the annotation is a rectangle or not
and add it to the COCO annotation (see example).
asset_filter_kwargs: Optional dictionary of arguments to pass to `kili.assets()` in order to filter the assets the labels are exported from. The supported arguments are:
- `consensus_mark_gte`
- `consensus_mark_lte`
- `external_id_strictly_in`
- `external_id_in`
- `honeypot_mark_gte`
- `honeypot_mark_lte`
- `label_author_in`
- `label_reviewer_in`
- `skipped`
- `status_in`
- `label_category_search`
- `created_at_gte`
- `created_at_lte`
- `issue_type`
- `issue_status`
- `inference_mark_gte`
- `inference_mark_lte`
- `metadata_where`
See the documentation of [`kili.assets()`](https://python-sdk-docs.kili-technology.com/latest/sdk/asset/#kili.queries.asset.__init__.QueriesAsset.assets) for more information.
normalized_coordinates: This parameter is only effective on the Kili (a.k.a raw) format.
If True, the coordinates of the `(x, y)` vertices are normalized between 0 and 1.
If False, the json response will contain additional fields with coordinates in absolute values, that is, in pixels.
!!! Info
The supported formats are:
- Yolo V4, V5, V7, V8 for object detection tasks.
- Kili (a.k.a raw) for all tasks.
- COCO for object detection tasks (bounding box and semantic segmentation).
- Pascal VOC for object detection tasks (bounding box).
!!! warning "Cloud storage"
Export with asset download (`with_assets=True`) is not allowed for projects connected to a cloud storage.
!!! Example
```python
kili.export_labels("your_project_id", "export.zip", "yolo_v4")
```
!!! Example
```python
def is_rectangle(coco_annotation, coco_image, kili_annotation):
is_rectangle = ...
return {**coco_annotation, "attributes": {"is_rectangle": is_rectangle}}
kili.export_labels(
"your_project_id",
"export.zip",
"coco",
annotation_modifier=add_is_rectangle
)
```
"""
if external_ids is not None and asset_ids is None:
id_map = infer_ids_from_external_ids(
kili_api_gateway=self.kili_api_gateway,
asset_external_ids=external_ids,
project_id=project_id,
)
asset_ids = [id_map[id] for id in external_ids]
try:
export_labels(
self,
asset_ids=asset_ids,
project_id=cast(ProjectId, project_id),
export_type="latest",
label_format=fmt,
split_option=layout,
single_file=single_file,
output_file=filename,
disable_tqdm=disable_tqdm,
log_level="WARNING",
with_assets=with_assets,
annotation_modifier=annotation_modifier,
asset_filter_kwargs=asset_filter_kwargs,
normalized_coordinates=normalized_coordinates,
)
except NoCompatibleJobError as excp:
print(str(excp))
count_labels(self, project_id, asset_id=None, asset_status_in=None, asset_external_id_in=None, author_in=None, created_at=None, created_at_gte=None, created_at_lte=None, honeypot_mark_gte=None, honeypot_mark_lte=None, label_id=None, type_in=None, user_id=None, category_search=None, id_contains=None)
Get the number of labels for the given parameters.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
project_id |
str |
Identifier of the project. |
required |
asset_id |
Optional[str] |
Identifier of the asset. |
None |
asset_status_in |
Optional[List[str]] |
Returned labels should have a status that belongs to that list, if given.
Possible choices : |
None |
asset_external_id_in |
Optional[List[str]] |
Returned labels should have an external id that belongs to that list, if given. |
None |
author_in |
Optional[List[str]] |
Returned labels should have been made by authors in that list, if given. An author can be designated by the first name, the last name, or the first name + last name. |
None |
created_at |
Optional[str] |
Returned labels should have a label whose creation date is equal to this date. |
None |
created_at_gte |
Optional[str] |
Returned labels should have a label whose creation date is greater than this date. |
None |
created_at_lte |
Optional[str] |
Returned labels should have a label whose creation date is lower than this date. |
None |
honeypot_mark_gte |
Optional[float] |
Returned labels should have a label whose honeypot is greater than this number. |
None |
honeypot_mark_lte |
Optional[float] |
Returned labels should have a label whose honeypot is lower than this number. |
None |
label_id |
Optional[str] |
Identifier of the label. |
None |
type_in |
Optional[List[str]] |
Returned labels should have a label whose type belongs to that list, if given. |
None |
user_id |
Optional[str] |
Identifier of the user. |
None |
category_search |
Optional[str] |
Query to filter labels based on the content of their jsonResponse |
None |
id_contains |
Optional[List[str]] |
Filters out labels not belonging to that list. If empty, no filtering is applied. |
None |
Dates format
Date strings should have format: "YYYY-MM-DD"
Returns:
Type | Description |
---|---|
int |
The number of labels with the parameters provided |
Source code in kili/entrypoints/queries/label/__init__.py
@typechecked
def count_labels(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_status_in: Optional[List[str]] = None,
asset_external_id_in: Optional[List[str]] = None,
author_in: Optional[List[str]] = None,
created_at: Optional[str] = None,
created_at_gte: Optional[str] = None,
created_at_lte: Optional[str] = None,
honeypot_mark_gte: Optional[float] = None,
honeypot_mark_lte: Optional[float] = None,
label_id: Optional[str] = None,
type_in: Optional[List[str]] = None,
user_id: Optional[str] = None,
category_search: Optional[str] = None,
id_contains: Optional[List[str]] = None,
) -> int:
# pylint: disable=line-too-long
"""Get the number of labels for the given parameters.
Args:
project_id: Identifier of the project.
asset_id: Identifier of the asset.
asset_status_in: Returned labels should have a status that belongs to that list, if given.
Possible choices : `TODO`, `ONGOING`, `LABELED` or `REVIEWED`
asset_external_id_in: Returned labels should have an external id that belongs to that list, if given.
author_in: Returned labels should have been made by authors in that list, if given.
An author can be designated by the first name, the last name, or the first name + last name.
created_at: Returned labels should have a label whose creation date is equal to this date.
created_at_gte: Returned labels should have a label whose creation date is greater than this date.
created_at_lte: Returned labels should have a label whose creation date is lower than this date.
honeypot_mark_gte: Returned labels should have a label whose honeypot is greater than this number.
honeypot_mark_lte: Returned labels should have a label whose honeypot is lower than this number.
label_id: Identifier of the label.
type_in: Returned labels should have a label whose type belongs to that list, if given.
user_id: Identifier of the user.
category_search: Query to filter labels based on the content of their jsonResponse
id_contains: Filters out labels not belonging to that list. If empty, no filtering is applied.
!!! info "Dates format"
Date strings should have format: "YYYY-MM-DD"
Returns:
The number of labels with the parameters provided
"""
if category_search:
validate_category_search_query(category_search)
where = LabelWhere(
project_id=project_id,
asset_id=asset_id,
asset_status_in=asset_status_in,
asset_external_id_in=asset_external_id_in,
author_in=author_in,
created_at=created_at,
created_at_gte=created_at_gte,
created_at_lte=created_at_lte,
honeypot_mark_gte=honeypot_mark_gte,
honeypot_mark_lte=honeypot_mark_lte,
id_contains=id_contains,
label_id=label_id,
type_in=type_in,
user_id=user_id,
category_search=category_search,
)
return LabelQuery(self.graphql_client, self.http_client).count(where)
export_labels(self, project_id, filename, fmt, asset_ids=None, layout='split', single_file=False, disable_tqdm=None, with_assets=True, external_ids=None, annotation_modifier=None, asset_filter_kwargs=None, normalized_coordinates=None)
Export the project labels with the requested format into the requested output path.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
project_id |
str |
Identifier of the project. |
required |
filename |
str |
Relative or full path of the archive that will contain the exported data. |
required |
fmt |
Literal['raw', 'kili', 'yolo_v4', 'yolo_v5', 'yolo_v7', 'yolo_v8', 'coco', 'pascal_voc', 'geojson'] |
Format of the exported labels. |
required |
asset_ids |
Optional[List[str]] |
Optional list of the assets internal IDs from which to export the labels. |
None |
layout |
Literal['split', 'merged'] |
Layout of the exported files. "split" means there is one folder per job, "merged" that there is one folder with every labels. |
'split' |
single_file |
bool |
Layout of the exported labels. Single file mode is only available for some specific formats (COCO and Kili). |
False |
disable_tqdm |
Optional[bool] |
Disable the progress bar if True. |
None |
with_assets |
bool |
Download the assets in the export. |
True |
external_ids |
Optional[List[str]] |
Optional list of the assets external IDs from which to export the labels. |
None |
annotation_modifier |
Optional[Callable[[Dict, Dict, Dict], Dict]] |
(For COCO export only) function that takes the COCO annotation, the COCO image, and the Kili annotation, and should return an updated COCO annotation. This can be used if you want to add a new attribute to the COCO annotation. For example, you can add a method that computes if the annotation is a rectangle or not and add it to the COCO annotation (see example). |
None |
asset_filter_kwargs |
Optional[Dict[str, object]] |
Optional dictionary of arguments to pass to
See the documentation of |
None |
normalized_coordinates |
Optional[bool] |
This parameter is only effective on the Kili (a.k.a raw) format.
If True, the coordinates of the |
None |
Info
The supported formats are:
- Yolo V4, V5, V7, V8 for object detection tasks.
- Kili (a.k.a raw) for all tasks.
- COCO for object detection tasks (bounding box and semantic segmentation).
- Pascal VOC for object detection tasks (bounding box).
Cloud storage
Export with asset download (with_assets=True
) is not allowed for projects connected to a cloud storage.
Example
kili.export_labels("your_project_id", "export.zip", "yolo_v4")
Example
def is_rectangle(coco_annotation, coco_image, kili_annotation):
is_rectangle = ...
return {**coco_annotation, "attributes": {"is_rectangle": is_rectangle}}
kili.export_labels(
"your_project_id",
"export.zip",
"coco",
annotation_modifier=add_is_rectangle
)
Source code in kili/entrypoints/queries/label/__init__.py
def export_labels(
self,
project_id: str,
filename: str,
fmt: LabelFormat,
asset_ids: Optional[List[str]] = None,
layout: SplitOption = "split",
single_file: bool = False,
disable_tqdm: Optional[bool] = None,
with_assets: bool = True,
external_ids: Optional[List[str]] = None,
annotation_modifier: Optional[CocoAnnotationModifier] = None,
asset_filter_kwargs: Optional[Dict[str, object]] = None,
normalized_coordinates: Optional[bool] = None,
) -> None:
# pylint: disable=line-too-long
"""Export the project labels with the requested format into the requested output path.
Args:
project_id: Identifier of the project.
filename: Relative or full path of the archive that will contain
the exported data.
fmt: Format of the exported labels.
asset_ids: Optional list of the assets internal IDs from which to export the labels.
layout: Layout of the exported files. "split" means there is one folder
per job, "merged" that there is one folder with every labels.
single_file: Layout of the exported labels. Single file mode is
only available for some specific formats (COCO and Kili).
disable_tqdm: Disable the progress bar if True.
with_assets: Download the assets in the export.
external_ids: Optional list of the assets external IDs from which to export the labels.
annotation_modifier: (For COCO export only) function that takes the COCO annotation, the
COCO image, and the Kili annotation, and should return an updated COCO annotation.
This can be used if you want to add a new attribute to the COCO annotation. For
example, you can add a method that computes if the annotation is a rectangle or not
and add it to the COCO annotation (see example).
asset_filter_kwargs: Optional dictionary of arguments to pass to `kili.assets()` in order to filter the assets the labels are exported from. The supported arguments are:
- `consensus_mark_gte`
- `consensus_mark_lte`
- `external_id_strictly_in`
- `external_id_in`
- `honeypot_mark_gte`
- `honeypot_mark_lte`
- `label_author_in`
- `label_reviewer_in`
- `skipped`
- `status_in`
- `label_category_search`
- `created_at_gte`
- `created_at_lte`
- `issue_type`
- `issue_status`
- `inference_mark_gte`
- `inference_mark_lte`
- `metadata_where`
See the documentation of [`kili.assets()`](https://python-sdk-docs.kili-technology.com/latest/sdk/asset/#kili.queries.asset.__init__.QueriesAsset.assets) for more information.
normalized_coordinates: This parameter is only effective on the Kili (a.k.a raw) format.
If True, the coordinates of the `(x, y)` vertices are normalized between 0 and 1.
If False, the json response will contain additional fields with coordinates in absolute values, that is, in pixels.
!!! Info
The supported formats are:
- Yolo V4, V5, V7, V8 for object detection tasks.
- Kili (a.k.a raw) for all tasks.
- COCO for object detection tasks (bounding box and semantic segmentation).
- Pascal VOC for object detection tasks (bounding box).
!!! warning "Cloud storage"
Export with asset download (`with_assets=True`) is not allowed for projects connected to a cloud storage.
!!! Example
```python
kili.export_labels("your_project_id", "export.zip", "yolo_v4")
```
!!! Example
```python
def is_rectangle(coco_annotation, coco_image, kili_annotation):
is_rectangle = ...
return {**coco_annotation, "attributes": {"is_rectangle": is_rectangle}}
kili.export_labels(
"your_project_id",
"export.zip",
"coco",
annotation_modifier=add_is_rectangle
)
```
"""
if external_ids is not None and asset_ids is None:
id_map = infer_ids_from_external_ids(
kili_api_gateway=self.kili_api_gateway,
asset_external_ids=external_ids,
project_id=project_id,
)
asset_ids = [id_map[id] for id in external_ids]
try:
export_labels(
self,
asset_ids=asset_ids,
project_id=cast(ProjectId, project_id),
export_type="latest",
label_format=fmt,
split_option=layout,
single_file=single_file,
output_file=filename,
disable_tqdm=disable_tqdm,
log_level="WARNING",
with_assets=with_assets,
annotation_modifier=annotation_modifier,
asset_filter_kwargs=asset_filter_kwargs,
normalized_coordinates=normalized_coordinates,
)
except NoCompatibleJobError as excp:
print(str(excp))
export_labels_as_df(self, project_id, fields=('author.email', 'author.id', 'createdAt', 'id', 'labelType'), asset_fields=('externalId',))
Get the labels of a project as a pandas DataFrame.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
project_id |
str |
Identifier of the project |
required |
fields |
Union[List[str], Tuple[str, ...]] |
All the fields to request among the possible fields for the labels. See the documentation for all possible fields. |
('author.email', 'author.id', 'createdAt', 'id', 'labelType') |
asset_fields |
Union[List[str], Tuple[str, ...]] |
All the fields to request among the possible fields for the assets. See the documentation for all possible fields. |
('externalId',) |
Returns:
Type | Description |
---|---|
DataFrame |
A pandas DataFrame containing the labels. |
Source code in kili/entrypoints/queries/label/__init__.py
@typechecked
def export_labels_as_df(
self,
project_id: str,
fields: ListOrTuple[str] = ("author.email", "author.id", "createdAt", "id", "labelType"),
asset_fields: ListOrTuple[str] = ("externalId",),
) -> pd.DataFrame:
# pylint: disable=line-too-long
"""Get the labels of a project as a pandas DataFrame.
Args:
project_id: Identifier of the project
fields: All the fields to request among the possible fields for the labels.
See [the documentation](https://docs.kili-technology.com/reference/graphql-api#label) for all possible fields.
asset_fields: All the fields to request among the possible fields for the assets.
See [the documentation](https://docs.kili-technology.com/reference/graphql-api#asset) for all possible fields.
Returns:
A pandas DataFrame containing the labels.
"""
get_project(self, project_id, ["id"])
assets_gen = self.kili_api_gateway.list_assets(
AssetFilters(project_id=project_id),
tuple(asset_fields) + tuple("labels." + field for field in fields),
QueryOptions(disable_tqdm=False),
)
labels = [
dict(
label,
**{f"asset_{key}": asset[key] for key in asset if key != "labels"},
)
for asset in assets_gen
for label in asset["labels"]
]
return pd.DataFrame(labels)
inferences(self, project_id, asset_id=None, asset_status_in=None, asset_external_id_in=None, author_in=None, created_at=None, created_at_gte=None, created_at_lte=None, fields=('author.email', 'author.id', 'id', 'jsonResponse', 'labelType', 'modelName'), first=None, honeypot_mark_gte=None, honeypot_mark_lte=None, id_contains=None, label_id=None, skip=0, user_id=None, disable_tqdm=None, category_search=None, *, as_generator=False)
Get inference labels from a project based on a set of criteria.
This method is equivalent to the kili.labels()
method, but it only returns label of type "INFERENCE".
Parameters:
Name | Type | Description | Default |
---|---|---|---|
project_id |
str |
Identifier of the project. |
required |
asset_id |
Optional[str] |
Identifier of the asset. |
None |
asset_status_in |
Optional[List[str]] |
Returned labels should have a status that belongs to that list, if given.
Possible choices : |
None |
asset_external_id_in |
Optional[List[str]] |
Returned labels should have an external id that belongs to that list, if given. |
None |
author_in |
Optional[List[str]] |
Returned labels should have been made by authors in that list, if given. An author can be designated by the first name, the last name, or the first name + last name. |
None |
created_at |
Optional[str] |
Returned labels should have a label whose creation date is equal to this date. |
None |
created_at_gte |
Optional[str] |
Returned labels should have a label whose creation date is greater than this date. |
None |
created_at_lte |
Optional[str] |
Returned labels should have a label whose creation date is lower than this date. |
None |
fields |
Union[List[str], Tuple[str, ...]] |
All the fields to request among the possible fields for the labels. See the documentation for all possible fields. |
('author.email', 'author.id', 'id', 'jsonResponse', 'labelType', 'modelName') |
first |
Optional[int] |
Maximum number of labels to return. |
None |
honeypot_mark_gte |
Optional[float] |
Returned labels should have a label whose honeypot is greater than this number. |
None |
honeypot_mark_lte |
Optional[float] |
Returned labels should have a label whose honeypot is lower than this number. |
None |
id_contains |
Optional[List[str]] |
Filters out labels not belonging to that list. If empty, no filtering is applied. |
None |
label_id |
Optional[str] |
Identifier of the label. |
None |
skip |
int |
Number of labels to skip (they are ordered by their date of creation, first to last). |
0 |
user_id |
Optional[str] |
Identifier of the user. |
None |
disable_tqdm |
Optional[bool] |
If |
None |
as_generator |
bool |
If |
False |
category_search |
Optional[str] |
Query to filter labels based on the content of their jsonResponse |
None |
Returns:
Type | Description |
---|---|
Iterable[Dict] |
An iterable of inference labels. |
Examples:
>>> kili.inferences(project_id=project_id) # returns a list of inference labels of a project
Source code in kili/entrypoints/queries/label/__init__.py
@typechecked
def inferences(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_status_in: Optional[List[str]] = None,
asset_external_id_in: Optional[List[str]] = None,
author_in: Optional[List[str]] = None,
created_at: Optional[str] = None,
created_at_gte: Optional[str] = None,
created_at_lte: Optional[str] = None,
fields: ListOrTuple[str] = (
"author.email",
"author.id",
"id",
"jsonResponse",
"labelType",
"modelName",
),
first: Optional[int] = None,
honeypot_mark_gte: Optional[float] = None,
honeypot_mark_lte: Optional[float] = None,
id_contains: Optional[List[str]] = None,
label_id: Optional[str] = None,
skip: int = 0,
user_id: Optional[str] = None,
disable_tqdm: Optional[bool] = None,
category_search: Optional[str] = None,
*,
as_generator: bool = False,
) -> Iterable[Dict]:
# pylint: disable=line-too-long
"""Get inference labels from a project based on a set of criteria.
This method is equivalent to the `kili.labels()` method, but it only returns label of type "INFERENCE".
Args:
project_id: Identifier of the project.
asset_id: Identifier of the asset.
asset_status_in: Returned labels should have a status that belongs to that list, if given.
Possible choices : `TODO`, `ONGOING`, `LABELED`, `TO REVIEW` or `REVIEWED`
asset_external_id_in: Returned labels should have an external id that belongs to that list, if given.
author_in: Returned labels should have been made by authors in that list, if given.
An author can be designated by the first name, the last name, or the first name + last name.
created_at: Returned labels should have a label whose creation date is equal to this date.
created_at_gte: Returned labels should have a label whose creation date is greater than this date.
created_at_lte: Returned labels should have a label whose creation date is lower than this date.
fields: All the fields to request among the possible fields for the labels.
See [the documentation](https://docs.kili-technology.com/reference/graphql-api#label) for all possible fields.
first: Maximum number of labels to return.
honeypot_mark_gte: Returned labels should have a label whose honeypot is greater than this number.
honeypot_mark_lte: Returned labels should have a label whose honeypot is lower than this number.
id_contains: Filters out labels not belonging to that list. If empty, no filtering is applied.
label_id: Identifier of the label.
skip: Number of labels to skip (they are ordered by their date of creation, first to last).
user_id: Identifier of the user.
disable_tqdm: If `True`, the progress bar will be disabled
as_generator: If `True`, a generator on the labels is returned.
category_search: Query to filter labels based on the content of their jsonResponse
Returns:
An iterable of inference labels.
Examples:
>>> kili.inferences(project_id=project_id) # returns a list of inference labels of a project
"""
return self.labels(
project_id=project_id,
asset_id=asset_id,
asset_status_in=asset_status_in,
asset_external_id_in=asset_external_id_in,
author_in=author_in,
created_at=created_at,
created_at_gte=created_at_gte,
created_at_lte=created_at_lte,
fields=fields,
first=first,
honeypot_mark_gte=honeypot_mark_gte,
honeypot_mark_lte=honeypot_mark_lte,
id_contains=id_contains,
label_id=label_id,
skip=skip,
type_in=["INFERENCE"],
user_id=user_id,
disable_tqdm=disable_tqdm,
category_search=category_search,
as_generator=as_generator, # type: ignore
)
labels(self, project_id, asset_id=None, asset_status_in=None, asset_external_id_in=None, author_in=None, created_at=None, created_at_gte=None, created_at_lte=None, fields=('author.email', 'author.id', 'id', 'jsonResponse', 'labelType', 'secondsToLabel', 'isLatestLabelForUser', 'assetId'), first=None, honeypot_mark_gte=None, honeypot_mark_lte=None, id_contains=None, label_id=None, skip=0, type_in=None, user_id=None, disable_tqdm=None, category_search=None, output_format='dict', *, as_generator=False)
Get a label list or a label generator from a project based on a set of criteria.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
project_id |
str |
Identifier of the project. |
required |
asset_id |
Optional[str] |
Identifier of the asset. |
None |
asset_status_in |
Optional[List[str]] |
Returned labels should have a status that belongs to that list, if given.
Possible choices : |
None |
asset_external_id_in |
Optional[List[str]] |
Returned labels should have an external id that belongs to that list, if given. |
None |
author_in |
Optional[List[str]] |
Returned labels should have been made by authors in that list, if given. An author can be designated by the first name, the last name, or the first name + last name. |
None |
created_at |
Optional[str] |
Returned labels should have their creation date equal to this date. |
None |
created_at_gte |
Optional[str] |
Returned labels should have their creation date greater or equal to this date. |
None |
created_at_lte |
Optional[str] |
Returned labels should have their creation date lower or equal to this date. |
None |
fields |
Union[List[str], Tuple[str, ...]] |
All the fields to request among the possible fields for the labels. See the documentation for all possible fields. |
('author.email', 'author.id', 'id', 'jsonResponse', 'labelType', 'secondsToLabel', 'isLatestLabelForUser', 'assetId') |
first |
Optional[int] |
Maximum number of labels to return. |
None |
honeypot_mark_gte |
Optional[float] |
Returned labels should have a label whose honeypot is greater than this number. |
None |
honeypot_mark_lte |
Optional[float] |
Returned labels should have a label whose honeypot is lower than this number. |
None |
id_contains |
Optional[List[str]] |
Filters out labels not belonging to that list. If empty, no filtering is applied. |
None |
label_id |
Optional[str] |
Identifier of the label. |
None |
skip |
int |
Number of labels to skip (they are ordered by their date of creation, first to last). |
0 |
type_in |
Optional[List[str]] |
Returned labels should have a label whose type belongs to that list, if given. |
None |
user_id |
Optional[str] |
Identifier of the user. |
None |
disable_tqdm |
Optional[bool] |
If |
None |
as_generator |
bool |
If |
False |
category_search |
Optional[str] |
Query to filter labels based on the content of their jsonResponse. |
None |
output_format |
Literal['dict', 'parsed_label'] |
If |
'dict' |
Dates format
Date strings should have format: "YYYY-MM-DD"
Returns:
Type | Description |
---|---|
Iterable[Union[Dict, kili.utils.labels.parsing.ParsedLabel]] |
An iterable of labels. |
Examples:
>>> kili.labels(project_id=project_id, fields=['jsonResponse', 'labelOf.externalId']) # returns a list of all labels of a project and their assets external ID
>>> kili.labels(project_id=project_id, fields=['jsonResponse'], as_generator=True) # returns a generator of all labels of a project
How to filter based on label categories
The search query is composed of logical expressions following this format:
[job_name].[category_name].count [comparaison_operator] [value]
where:
[job_name]
is the name of the job in the interface[category_name]
is the name of the category in the interface for this job[comparaison_operator]
can be one of: [==
,>=
,<=
,<
,>
][value]
is an integer that represents the count of such objects of the given category in the label
These operations can be separated by OR and AND operators
Examples:
category_search = JOB_CLASSIF.CATEGORY_A.count > 0
category_search = JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0
category_search = (JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10
Source code in kili/entrypoints/queries/label/__init__.py
@typechecked
def labels(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_status_in: Optional[List[str]] = None,
asset_external_id_in: Optional[List[str]] = None,
author_in: Optional[List[str]] = None,
created_at: Optional[str] = None,
created_at_gte: Optional[str] = None,
created_at_lte: Optional[str] = None,
fields: ListOrTuple[str] = (
"author.email",
"author.id",
"id",
"jsonResponse",
"labelType",
"secondsToLabel",
"isLatestLabelForUser",
"assetId",
),
first: Optional[int] = None,
honeypot_mark_gte: Optional[float] = None,
honeypot_mark_lte: Optional[float] = None,
id_contains: Optional[List[str]] = None,
label_id: Optional[str] = None,
skip: int = 0,
type_in: Optional[List[str]] = None,
user_id: Optional[str] = None,
disable_tqdm: Optional[bool] = None,
category_search: Optional[str] = None,
output_format: Literal["dict", "parsed_label"] = "dict",
*,
as_generator: bool = False,
) -> Iterable[Union[Dict, ParsedLabel]]:
# pylint: disable=line-too-long
"""Get a label list or a label generator from a project based on a set of criteria.
Args:
project_id: Identifier of the project.
asset_id: Identifier of the asset.
asset_status_in: Returned labels should have a status that belongs to that list, if given.
Possible choices : `TODO`, `ONGOING`, `LABELED`, `TO REVIEW` or `REVIEWED`.
asset_external_id_in: Returned labels should have an external id that belongs to that list, if given.
author_in: Returned labels should have been made by authors in that list, if given.
An author can be designated by the first name, the last name, or the first name + last name.
created_at: Returned labels should have their creation date equal to this date.
created_at_gte: Returned labels should have their creation date greater or equal to this date.
created_at_lte: Returned labels should have their creation date lower or equal to this date.
fields: All the fields to request among the possible fields for the labels.
See [the documentation](https://docs.kili-technology.com/reference/graphql-api#label) for all possible fields.
first: Maximum number of labels to return.
honeypot_mark_gte: Returned labels should have a label whose honeypot is greater than this number.
honeypot_mark_lte: Returned labels should have a label whose honeypot is lower than this number.
id_contains: Filters out labels not belonging to that list. If empty, no filtering is applied.
label_id: Identifier of the label.
skip: Number of labels to skip (they are ordered by their date of creation, first to last).
type_in: Returned labels should have a label whose type belongs to that list, if given.
user_id: Identifier of the user.
disable_tqdm: If `True`, the progress bar will be disabled.
as_generator: If `True`, a generator on the labels is returned.
category_search: Query to filter labels based on the content of their jsonResponse.
output_format: If `dict`, the output is an iterable of Python dictionaries.
If `parsed_label`, the output is an iterable of parsed labels objects. More information on parsed labels in the [documentation](https://python-sdk-docs.kili-technology.com/latest/sdk/tutorials/label_parsing/).
!!! info "Dates format"
Date strings should have format: "YYYY-MM-DD"
Returns:
An iterable of labels.
Examples:
>>> kili.labels(project_id=project_id, fields=['jsonResponse', 'labelOf.externalId']) # returns a list of all labels of a project and their assets external ID
>>> kili.labels(project_id=project_id, fields=['jsonResponse'], as_generator=True) # returns a generator of all labels of a project
!!! example "How to filter based on label categories"
The search query is composed of logical expressions following this format:
[job_name].[category_name].count [comparaison_operator] [value]
where:
- `[job_name]` is the name of the job in the interface
- `[category_name]` is the name of the category in the interface for this job
- `[comparaison_operator]` can be one of: [`==`, `>=`, `<=`, `<`, `>`]
- `[value]` is an integer that represents the count of such objects of the given category in the label
These operations can be separated by OR and AND operators
Example:
category_search = `JOB_CLASSIF.CATEGORY_A.count > 0`
category_search = `JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0`
category_search = `(JOB_CLASSIF.CATEGORY_A.count > 0 OR JOB_NER.CATEGORY_B.count > 0) AND JOB_BBOX.CATEGORY_C.count > 10`
"""
if category_search:
validate_category_search_query(category_search)
where = LabelWhere(
project_id=project_id,
asset_id=asset_id,
asset_status_in=asset_status_in,
asset_external_id_in=asset_external_id_in,
author_in=author_in,
created_at=created_at,
created_at_gte=created_at_gte,
created_at_lte=created_at_lte,
honeypot_mark_gte=honeypot_mark_gte,
honeypot_mark_lte=honeypot_mark_lte,
id_contains=id_contains,
label_id=label_id,
type_in=type_in,
user_id=user_id,
category_search=category_search,
)
post_call_function = None
if output_format == "parsed_label":
if "jsonResponse" not in fields:
raise ValueError(
"The field 'jsonResponse' is required to parse labels. Please add it to the"
" 'fields' argument."
)
project = get_project(self, project_id, ["jsonInterface", "inputType"])
post_call_function = partial(
parse_labels,
json_interface=project["jsonInterface"],
input_type=project["inputType"],
)
disable_tqdm = disable_tqdm_if_as_generator(as_generator, disable_tqdm)
options = QueryOptions(disable_tqdm, first, skip)
labels_gen = LabelQuery(self.graphql_client, self.http_client)(
where, fields, options, post_call_function
)
if as_generator:
return labels_gen
return list(labels_gen)
predictions(self, project_id, asset_id=None, asset_status_in=None, asset_external_id_in=None, author_in=None, created_at=None, created_at_gte=None, created_at_lte=None, fields=('author.email', 'author.id', 'id', 'jsonResponse', 'labelType', 'modelName'), first=None, honeypot_mark_gte=None, honeypot_mark_lte=None, id_contains=None, label_id=None, skip=0, user_id=None, disable_tqdm=None, category_search=None, *, as_generator=False)
Get prediction labels from a project based on a set of criteria.
This method is equivalent to the kili.labels()
method, but it only returns label of type "PREDICTION".
Parameters:
Name | Type | Description | Default |
---|---|---|---|
project_id |
str |
Identifier of the project. |
required |
asset_id |
Optional[str] |
Identifier of the asset. |
None |
asset_status_in |
Optional[List[str]] |
Returned labels should have a status that belongs to that list, if given.
Possible choices : |
None |
asset_external_id_in |
Optional[List[str]] |
Returned labels should have an external id that belongs to that list, if given. |
None |
author_in |
Optional[List[str]] |
Returned labels should have been made by authors in that list, if given. An author can be designated by the first name, the last name, or the first name + last name. |
None |
created_at |
Optional[str] |
Returned labels should have a label whose creation date is equal to this date. |
None |
created_at_gte |
Optional[str] |
Returned labels should have a label whose creation date is greater than this date. |
None |
created_at_lte |
Optional[str] |
Returned labels should have a label whose creation date is lower than this date. |
None |
fields |
Union[List[str], Tuple[str, ...]] |
All the fields to request among the possible fields for the labels. See the documentation for all possible fields. |
('author.email', 'author.id', 'id', 'jsonResponse', 'labelType', 'modelName') |
first |
Optional[int] |
Maximum number of labels to return. |
None |
honeypot_mark_gte |
Optional[float] |
Returned labels should have a label whose honeypot is greater than this number. |
None |
honeypot_mark_lte |
Optional[float] |
Returned labels should have a label whose honeypot is lower than this number. |
None |
id_contains |
Optional[List[str]] |
Filters out labels not belonging to that list. If empty, no filtering is applied. |
None |
label_id |
Optional[str] |
Identifier of the label. |
None |
skip |
int |
Number of labels to skip (they are ordered by their date of creation, first to last). |
0 |
user_id |
Optional[str] |
Identifier of the user. |
None |
disable_tqdm |
Optional[bool] |
If |
None |
as_generator |
bool |
If |
False |
category_search |
Optional[str] |
Query to filter labels based on the content of their jsonResponse |
None |
Returns:
Type | Description |
---|---|
Iterable[Dict] |
An iterable of labels. |
Examples:
>>> kili.predictions(project_id=project_id) # returns a list of prediction labels of a project
Source code in kili/entrypoints/queries/label/__init__.py
@typechecked
def predictions(
self,
project_id: str,
asset_id: Optional[str] = None,
asset_status_in: Optional[List[str]] = None,
asset_external_id_in: Optional[List[str]] = None,
author_in: Optional[List[str]] = None,
created_at: Optional[str] = None,
created_at_gte: Optional[str] = None,
created_at_lte: Optional[str] = None,
fields: ListOrTuple[str] = (
"author.email",
"author.id",
"id",
"jsonResponse",
"labelType",
"modelName",
),
first: Optional[int] = None,
honeypot_mark_gte: Optional[float] = None,
honeypot_mark_lte: Optional[float] = None,
id_contains: Optional[List[str]] = None,
label_id: Optional[str] = None,
skip: int = 0,
user_id: Optional[str] = None,
disable_tqdm: Optional[bool] = None,
category_search: Optional[str] = None,
*,
as_generator: bool = False,
) -> Iterable[Dict]:
# pylint: disable=line-too-long
"""Get prediction labels from a project based on a set of criteria.
This method is equivalent to the `kili.labels()` method, but it only returns label of type "PREDICTION".
Args:
project_id: Identifier of the project.
asset_id: Identifier of the asset.
asset_status_in: Returned labels should have a status that belongs to that list, if given.
Possible choices : `TODO`, `ONGOING`, `LABELED`, `TO REVIEW` or `REVIEWED`
asset_external_id_in: Returned labels should have an external id that belongs to that list, if given.
author_in: Returned labels should have been made by authors in that list, if given.
An author can be designated by the first name, the last name, or the first name + last name.
created_at: Returned labels should have a label whose creation date is equal to this date.
created_at_gte: Returned labels should have a label whose creation date is greater than this date.
created_at_lte: Returned labels should have a label whose creation date is lower than this date.
fields: All the fields to request among the possible fields for the labels.
See [the documentation](https://docs.kili-technology.com/reference/graphql-api#label) for all possible fields.
first: Maximum number of labels to return.
honeypot_mark_gte: Returned labels should have a label whose honeypot is greater than this number.
honeypot_mark_lte: Returned labels should have a label whose honeypot is lower than this number.
id_contains: Filters out labels not belonging to that list. If empty, no filtering is applied.
label_id: Identifier of the label.
skip: Number of labels to skip (they are ordered by their date of creation, first to last).
user_id: Identifier of the user.
disable_tqdm: If `True`, the progress bar will be disabled
as_generator: If `True`, a generator on the labels is returned.
category_search: Query to filter labels based on the content of their jsonResponse
Returns:
An iterable of labels.
Examples:
>>> kili.predictions(project_id=project_id) # returns a list of prediction labels of a project
"""
return self.labels(
project_id=project_id,
asset_id=asset_id,
asset_status_in=asset_status_in,
asset_external_id_in=asset_external_id_in,
author_in=author_in,
created_at=created_at,
created_at_gte=created_at_gte,
created_at_lte=created_at_lte,
fields=fields,
first=first,
honeypot_mark_gte=honeypot_mark_gte,
honeypot_mark_lte=honeypot_mark_lte,
id_contains=id_contains,
label_id=label_id,
skip=skip,
type_in=["PREDICTION"],
user_id=user_id,
disable_tqdm=disable_tqdm,
category_search=category_search,
as_generator=as_generator, # type: ignore
)
Mutations
Set of Label mutations.
Source code in kili/entrypoints/mutations/label/__init__.py
class MutationsLabel(BaseOperationEntrypointMixin):
"""Set of Label mutations."""
# pylint: disable=too-many-arguments
@typechecked
def create_predictions(
self,
project_id: str,
external_id_array: Optional[List[str]] = None,
model_name_array: Optional[List[str]] = None,
json_response_array: Optional[List[dict]] = None,
model_name: Optional[str] = None,
asset_id_array: Optional[List[str]] = None,
disable_tqdm: Optional[bool] = None,
overwrite: bool = False,
) -> Dict[Literal["id"], str]:
# pylint: disable=line-too-long
"""Create predictions for specific assets.
Args:
project_id: Identifier of the project.
external_id_array: The external IDs of the assets for which we want to add predictions.
model_name_array: Deprecated, use `model_name` instead.
json_response_array: The predictions are given here. For examples,
see [the recipe](https://docs.kili-technology.com/recipes/importing-labels-and-predictions).
model_name: The name of the model that generated the predictions
asset_id_array: The internal IDs of the assets for which we want to add predictions.
disable_tqdm: Disable tqdm progress bar.
overwrite: if True, it will overwrite existing predictions of
the same model name on the targeted assets.
Returns:
A dictionary with the project `id`.
!!! example "Recipe"
For more detailed examples on how to create predictions, see [the recipe](https://docs.kili-technology.com/recipes/importing-labels-and-predictions).
!!! warning "model name"
The use of `model_name_array` is deprecated. Creating predictions from different
models is not supported anymore. Please use `model_name` argument instead to
provide the predictions model name.
"""
if json_response_array is None or len(json_response_array) == 0:
raise ValueError(
"json_response_array is empty, you must provide at least one prediction to upload"
)
assert_all_arrays_have_same_size(
[external_id_array, json_response_array, model_name_array, asset_id_array]
)
if model_name is None:
if model_name_array is None:
raise ValueError("You must provide a model name with the model_name argument ")
if len(set(model_name_array)) > 1:
raise ValueError(
"Creating predictions from different models is not supported anymore. Separate"
" your calls by models."
)
warnings.warn(
"The use of `model_name_array` is deprecated. Creating predictions from"
" different models is not supported anymore. Please use `model_name` argument"
" instead to provide the predictions model name.",
DeprecationWarning,
stacklevel=1,
)
model_name = model_name_array[0]
labels = [
{
"asset_id": asset_id,
"asset_external_id": asset_external_id,
"json_response": json_response,
}
for (asset_id, asset_external_id, json_response) in list(
zip(
asset_id_array or [None] * len(json_response_array),
external_id_array or [None] * len(json_response_array),
json_response_array,
)
)
]
import_labels_from_dict(
self, project_id, labels, "PREDICTION", overwrite, model_name, disable_tqdm
)
return {"id": project_id}
@deprecate(
msg=(
"append_to_labels method is deprecated. Please use append_labels instead. This new"
" function allows to import several labels 10 times faster."
)
)
@typechecked
def append_to_labels(
self,
json_response: dict,
author_id: Optional[str] = None,
label_asset_external_id: Optional[str] = None,
label_asset_id: Optional[str] = None,
label_type: LabelType = "DEFAULT",
project_id: Optional[str] = None,
seconds_to_label: Optional[int] = 0,
):
"""!!! danger "[DEPRECATED]"
append_to_labels method is deprecated. Please use append_labels instead.
This new function allows to import several labels 10 times faster.
Append a label to an asset.
Args:
json_response: Label is given here.
author_id: ID of the author of the label.
label_asset_external_id: External identifier of the asset.
label_asset_id: Identifier of the asset.
project_id: Identifier of the project.
label_type: Can be one of `AUTOSAVE`, `DEFAULT`, `PREDICTION`, `REVIEW` or `INFERENCE`.
seconds_to_label: Time to create the label.
!!! warning
Either provide `label_asset_id` or `label_asset_external_id` and `project_id`
Returns:
A result object which indicates if the mutation was successful,
or an error message.
Examples:
>>> kili.append_to_labels(label_asset_id=asset_id, json_response={...})
"""
if author_id is None:
user = self.get_user() # type: ignore # pylint: disable=no-member
author_id = user["id"]
check_asset_identifier_arguments(
project_id,
[label_asset_id] if label_asset_id else None,
[label_asset_external_id] if label_asset_external_id else None,
)
if label_asset_id is None:
assert label_asset_external_id
assert project_id
label_asset_id = infer_ids_from_external_ids(
self.kili_api_gateway, [label_asset_external_id], project_id
)[label_asset_external_id]
variables = {
"data": {
"authorID": author_id,
"jsonResponse": dumps(json_response),
"labelType": label_type,
"secondsToLabel": seconds_to_label,
},
"where": {"id": label_asset_id},
}
result = self.graphql_client.execute(GQL_APPEND_TO_LABELS, variables)
return self.format_result("data", result, Label)
@typechecked
def append_labels(
self,
asset_id_array: Optional[List[str]] = None,
json_response_array: ListOrTuple[Dict] = (),
author_id_array: Optional[List[str]] = None,
seconds_to_label_array: Optional[List[int]] = None,
model_name: Optional[str] = None,
label_type: LabelType = "DEFAULT",
project_id: Optional[str] = None,
asset_external_id_array: Optional[List[str]] = None,
disable_tqdm: Optional[bool] = None,
overwrite: bool = False,
) -> List[Dict[Literal["id"], str]]:
"""Append labels to assets.
Args:
asset_id_array: list of asset internal ids to append labels on.
json_response_array: list of labels to append.
author_id_array: list of the author id of the labels.
seconds_to_label_array: list of times taken to produce the label, in seconds.
model_name: Name of the model that generated the labels.
Only useful when uploading PREDICTION or INFERENCE labels.
label_type: Can be one of `AUTOSAVE`, `DEFAULT`, `PREDICTION`, `REVIEW` or `INFERENCE`.
project_id: Identifier of the project.
asset_external_id_array: list of asset external ids to append labels on.
disable_tqdm: Disable tqdm progress bar.
overwrite: when uploading prediction or inference labels, if True,
it will overwrite existing labels with the same model name
and of the same label type, on the targeted assets.
Returns:
A list of dictionaries with the label ids.
Examples:
>>> kili.append_labels(
asset_id_array=['cl9wmlkuc00050qsz6ut39g8h', 'cl9wmlkuw00080qsz2kqh8aiy'],
json_response_array=[{...}, {...}]
)
"""
if len(json_response_array) == 0:
raise ValueError(
"json_response_array is empty, you must provide at least one label to upload"
)
check_asset_identifier_arguments(project_id, asset_id_array, asset_external_id_array)
assert_all_arrays_have_same_size(
[
seconds_to_label_array,
author_id_array,
json_response_array,
asset_external_id_array,
asset_id_array,
]
)
labels = [
{
"asset_id": asset_id,
"asset_external_id": asset_external_id,
"json_response": json_response,
"seconds_to_label": seconds_to_label,
"author_id": author_id,
}
for (asset_id, asset_external_id, json_response, seconds_to_label, author_id) in list(
zip(
asset_id_array or [None] * len(json_response_array),
asset_external_id_array or [None] * len(json_response_array),
json_response_array,
seconds_to_label_array or [None] * len(json_response_array),
author_id_array or [None] * len(json_response_array),
)
)
]
return import_labels_from_dict(
self, project_id, labels, label_type, overwrite, model_name, disable_tqdm
)
@typechecked
def update_properties_in_label(
self,
label_id: str,
seconds_to_label: Optional[int] = None,
model_name: Optional[str] = None,
json_response: Optional[dict] = None,
) -> Dict[Literal["id"], str]:
"""Update properties of a label.
Args:
label_id: Identifier of the label
seconds_to_label: Time to create the label
model_name: Name of the model
json_response: The label is given here
Returns:
A dictionary with the label `id`.
Examples:
>>> kili.update_properties_in_label(label_id=label_id, json_response={...})
"""
formatted_json_response = None if json_response is None else dumps(json_response)
variables = {
"labelID": label_id,
"secondsToLabel": seconds_to_label,
"modelName": model_name,
"jsonResponse": formatted_json_response,
}
result = self.graphql_client.execute(GQL_UPDATE_PROPERTIES_IN_LABEL, variables)
return self.format_result("data", result)
@typechecked
def create_honeypot(
self,
json_response: dict,
asset_external_id: Optional[str] = None,
asset_id: Optional[str] = None,
project_id: Optional[str] = None,
) -> Label:
"""Create honeypot for an asset.
!!! info
Uses the given `json_response` to create a `REVIEW` label.
This enables Kili to compute a`honeypotMark`,
which measures the similarity between this label and other labels.
Args:
json_response: The JSON response of the honeypot label of the asset.
asset_id: Identifier of the asset.
Either provide `asset_id` or `asset_external_id` and `project_id`.
asset_external_id: External identifier of the asset.
Either provide `asset_id` or `asset_external_id` and `project_id`.
project_id: Identifier of the project.
Either provide `asset_id` or `asset_external_id` and `project_id`.
Returns:
A dictionary-like object representing the created label.
"""
if asset_id is None:
if asset_external_id is None or project_id is None:
raise ValueError(
"Either provide `asset_id` or `asset_external_id` and `project_id`."
)
asset_id = infer_ids_from_external_ids(
self.kili_api_gateway, [asset_external_id], project_id
)[asset_external_id]
variables = {
"data": {"jsonResponse": dumps(json_response)},
"where": {"id": asset_id},
}
result = self.graphql_client.execute(GQL_CREATE_HONEYPOT, variables)
return self.format_result("data", result, Label)
@typechecked
def delete_labels(self, ids: List[str]) -> List[str]:
"""Delete labels.
Currently, only `PREDICTION` and `INFERENCE` labels can be deleted.
Args:
ids: List of label ids to delete.
Returns:
The deleted label ids.
"""
if is_empty_list_with_warning("delete_labels", "ids", ids):
return []
def generate_variables(batch):
return {"ids": batch["ids"]}
properties_to_batch = {"ids": ids}
result = mutate_from_paginated_call(
self,
properties_to_batch, # type: ignore
generate_variables,
GQL_DELETE_LABELS,
)
return self.format_result("data", result[0])
append_labels(self, asset_id_array=None, json_response_array=(), author_id_array=None, seconds_to_label_array=None, model_name=None, label_type='DEFAULT', project_id=None, asset_external_id_array=None, disable_tqdm=None, overwrite=False)
Append labels to assets.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
asset_id_array |
Optional[List[str]] |
list of asset internal ids to append labels on. |
None |
json_response_array |
Union[List[Dict], Tuple[Dict, ...]] |
list of labels to append. |
() |
author_id_array |
Optional[List[str]] |
list of the author id of the labels. |
None |
seconds_to_label_array |
Optional[List[int]] |
list of times taken to produce the label, in seconds. |
None |
model_name |
Optional[str] |
Name of the model that generated the labels. Only useful when uploading PREDICTION or INFERENCE labels. |
None |
label_type |
Literal['AUTOSAVE', 'DEFAULT', 'PREDICTION', 'REVIEW', 'INFERENCE'] |
Can be one of |
'DEFAULT' |
project_id |
Optional[str] |
Identifier of the project. |
None |
asset_external_id_array |
Optional[List[str]] |
list of asset external ids to append labels on. |
None |
disable_tqdm |
Optional[bool] |
Disable tqdm progress bar. |
None |
overwrite |
bool |
when uploading prediction or inference labels, if True, it will overwrite existing labels with the same model name and of the same label type, on the targeted assets. |
False |
Returns:
Type | Description |
---|---|
List[Dict[Literal['id'], str]] |
A list of dictionaries with the label ids. |
Examples:
>>> kili.append_labels(
asset_id_array=['cl9wmlkuc00050qsz6ut39g8h', 'cl9wmlkuw00080qsz2kqh8aiy'],
json_response_array=[{...}, {...}]
)
Source code in kili/entrypoints/mutations/label/__init__.py
@typechecked
def append_labels(
self,
asset_id_array: Optional[List[str]] = None,
json_response_array: ListOrTuple[Dict] = (),
author_id_array: Optional[List[str]] = None,
seconds_to_label_array: Optional[List[int]] = None,
model_name: Optional[str] = None,
label_type: LabelType = "DEFAULT",
project_id: Optional[str] = None,
asset_external_id_array: Optional[List[str]] = None,
disable_tqdm: Optional[bool] = None,
overwrite: bool = False,
) -> List[Dict[Literal["id"], str]]:
"""Append labels to assets.
Args:
asset_id_array: list of asset internal ids to append labels on.
json_response_array: list of labels to append.
author_id_array: list of the author id of the labels.
seconds_to_label_array: list of times taken to produce the label, in seconds.
model_name: Name of the model that generated the labels.
Only useful when uploading PREDICTION or INFERENCE labels.
label_type: Can be one of `AUTOSAVE`, `DEFAULT`, `PREDICTION`, `REVIEW` or `INFERENCE`.
project_id: Identifier of the project.
asset_external_id_array: list of asset external ids to append labels on.
disable_tqdm: Disable tqdm progress bar.
overwrite: when uploading prediction or inference labels, if True,
it will overwrite existing labels with the same model name
and of the same label type, on the targeted assets.
Returns:
A list of dictionaries with the label ids.
Examples:
>>> kili.append_labels(
asset_id_array=['cl9wmlkuc00050qsz6ut39g8h', 'cl9wmlkuw00080qsz2kqh8aiy'],
json_response_array=[{...}, {...}]
)
"""
if len(json_response_array) == 0:
raise ValueError(
"json_response_array is empty, you must provide at least one label to upload"
)
check_asset_identifier_arguments(project_id, asset_id_array, asset_external_id_array)
assert_all_arrays_have_same_size(
[
seconds_to_label_array,
author_id_array,
json_response_array,
asset_external_id_array,
asset_id_array,
]
)
labels = [
{
"asset_id": asset_id,
"asset_external_id": asset_external_id,
"json_response": json_response,
"seconds_to_label": seconds_to_label,
"author_id": author_id,
}
for (asset_id, asset_external_id, json_response, seconds_to_label, author_id) in list(
zip(
asset_id_array or [None] * len(json_response_array),
asset_external_id_array or [None] * len(json_response_array),
json_response_array,
seconds_to_label_array or [None] * len(json_response_array),
author_id_array or [None] * len(json_response_array),
)
)
]
return import_labels_from_dict(
self, project_id, labels, label_type, overwrite, model_name, disable_tqdm
)
append_to_labels(self, json_response, author_id=None, label_asset_external_id=None, label_asset_id=None, label_type='DEFAULT', project_id=None, seconds_to_label=0)
[DEPRECATED]
append_to_labels method is deprecated. Please use append_labels instead. This new function allows to import several labels 10 times faster.
Append a label to an asset.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
json_response |
dict |
Label is given here. |
required |
author_id |
Optional[str] |
ID of the author of the label. |
None |
label_asset_external_id |
Optional[str] |
External identifier of the asset. |
None |
label_asset_id |
Optional[str] |
Identifier of the asset. |
None |
project_id |
Optional[str] |
Identifier of the project. |
None |
label_type |
Literal['AUTOSAVE', 'DEFAULT', 'PREDICTION', 'REVIEW', 'INFERENCE'] |
Can be one of |
'DEFAULT' |
seconds_to_label |
Optional[int] |
Time to create the label. |
0 |
Warning
Either provide label_asset_id
or label_asset_external_id
and project_id
Returns:
Type | Description |
---|---|
A result object which indicates if the mutation was successful, or an error message. |
Examples:
>>> kili.append_to_labels(label_asset_id=asset_id, json_response={...})
Source code in kili/entrypoints/mutations/label/__init__.py
@deprecate(
msg=(
"append_to_labels method is deprecated. Please use append_labels instead. This new"
" function allows to import several labels 10 times faster."
)
)
@typechecked
def append_to_labels(
self,
json_response: dict,
author_id: Optional[str] = None,
label_asset_external_id: Optional[str] = None,
label_asset_id: Optional[str] = None,
label_type: LabelType = "DEFAULT",
project_id: Optional[str] = None,
seconds_to_label: Optional[int] = 0,
):
"""!!! danger "[DEPRECATED]"
append_to_labels method is deprecated. Please use append_labels instead.
This new function allows to import several labels 10 times faster.
Append a label to an asset.
Args:
json_response: Label is given here.
author_id: ID of the author of the label.
label_asset_external_id: External identifier of the asset.
label_asset_id: Identifier of the asset.
project_id: Identifier of the project.
label_type: Can be one of `AUTOSAVE`, `DEFAULT`, `PREDICTION`, `REVIEW` or `INFERENCE`.
seconds_to_label: Time to create the label.
!!! warning
Either provide `label_asset_id` or `label_asset_external_id` and `project_id`
Returns:
A result object which indicates if the mutation was successful,
or an error message.
Examples:
>>> kili.append_to_labels(label_asset_id=asset_id, json_response={...})
"""
if author_id is None:
user = self.get_user() # type: ignore # pylint: disable=no-member
author_id = user["id"]
check_asset_identifier_arguments(
project_id,
[label_asset_id] if label_asset_id else None,
[label_asset_external_id] if label_asset_external_id else None,
)
if label_asset_id is None:
assert label_asset_external_id
assert project_id
label_asset_id = infer_ids_from_external_ids(
self.kili_api_gateway, [label_asset_external_id], project_id
)[label_asset_external_id]
variables = {
"data": {
"authorID": author_id,
"jsonResponse": dumps(json_response),
"labelType": label_type,
"secondsToLabel": seconds_to_label,
},
"where": {"id": label_asset_id},
}
result = self.graphql_client.execute(GQL_APPEND_TO_LABELS, variables)
return self.format_result("data", result, Label)
create_honeypot(self, json_response, asset_external_id=None, asset_id=None, project_id=None)
Create honeypot for an asset.
Info
Uses the given json_response
to create a REVIEW
label.
This enables Kili to compute ahoneypotMark
,
which measures the similarity between this label and other labels.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
json_response |
dict |
The JSON response of the honeypot label of the asset. |
required |
asset_id |
Optional[str] |
Identifier of the asset.
Either provide |
None |
asset_external_id |
Optional[str] |
External identifier of the asset.
Either provide |
None |
project_id |
Optional[str] |
Identifier of the project.
Either provide |
None |
Returns:
Type | Description |
---|---|
Label |
A dictionary-like object representing the created label. |
Source code in kili/entrypoints/mutations/label/__init__.py
@typechecked
def create_honeypot(
self,
json_response: dict,
asset_external_id: Optional[str] = None,
asset_id: Optional[str] = None,
project_id: Optional[str] = None,
) -> Label:
"""Create honeypot for an asset.
!!! info
Uses the given `json_response` to create a `REVIEW` label.
This enables Kili to compute a`honeypotMark`,
which measures the similarity between this label and other labels.
Args:
json_response: The JSON response of the honeypot label of the asset.
asset_id: Identifier of the asset.
Either provide `asset_id` or `asset_external_id` and `project_id`.
asset_external_id: External identifier of the asset.
Either provide `asset_id` or `asset_external_id` and `project_id`.
project_id: Identifier of the project.
Either provide `asset_id` or `asset_external_id` and `project_id`.
Returns:
A dictionary-like object representing the created label.
"""
if asset_id is None:
if asset_external_id is None or project_id is None:
raise ValueError(
"Either provide `asset_id` or `asset_external_id` and `project_id`."
)
asset_id = infer_ids_from_external_ids(
self.kili_api_gateway, [asset_external_id], project_id
)[asset_external_id]
variables = {
"data": {"jsonResponse": dumps(json_response)},
"where": {"id": asset_id},
}
result = self.graphql_client.execute(GQL_CREATE_HONEYPOT, variables)
return self.format_result("data", result, Label)
create_predictions(self, project_id, external_id_array=None, model_name_array=None, json_response_array=None, model_name=None, asset_id_array=None, disable_tqdm=None, overwrite=False)
Create predictions for specific assets.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
project_id |
str |
Identifier of the project. |
required |
external_id_array |
Optional[List[str]] |
The external IDs of the assets for which we want to add predictions. |
None |
model_name_array |
Optional[List[str]] |
Deprecated, use |
None |
json_response_array |
Optional[List[dict]] |
The predictions are given here. For examples, see the recipe. |
None |
model_name |
Optional[str] |
The name of the model that generated the predictions |
None |
asset_id_array |
Optional[List[str]] |
The internal IDs of the assets for which we want to add predictions. |
None |
disable_tqdm |
Optional[bool] |
Disable tqdm progress bar. |
None |
overwrite |
bool |
if True, it will overwrite existing predictions of the same model name on the targeted assets. |
False |
Returns:
Type | Description |
---|---|
Dict[Literal['id'], str] |
A dictionary with the project |
Recipe
For more detailed examples on how to create predictions, see the recipe.
model name
The use of model_name_array
is deprecated. Creating predictions from different
models is not supported anymore. Please use model_name
argument instead to
provide the predictions model name.
Source code in kili/entrypoints/mutations/label/__init__.py
@typechecked
def create_predictions(
self,
project_id: str,
external_id_array: Optional[List[str]] = None,
model_name_array: Optional[List[str]] = None,
json_response_array: Optional[List[dict]] = None,
model_name: Optional[str] = None,
asset_id_array: Optional[List[str]] = None,
disable_tqdm: Optional[bool] = None,
overwrite: bool = False,
) -> Dict[Literal["id"], str]:
# pylint: disable=line-too-long
"""Create predictions for specific assets.
Args:
project_id: Identifier of the project.
external_id_array: The external IDs of the assets for which we want to add predictions.
model_name_array: Deprecated, use `model_name` instead.
json_response_array: The predictions are given here. For examples,
see [the recipe](https://docs.kili-technology.com/recipes/importing-labels-and-predictions).
model_name: The name of the model that generated the predictions
asset_id_array: The internal IDs of the assets for which we want to add predictions.
disable_tqdm: Disable tqdm progress bar.
overwrite: if True, it will overwrite existing predictions of
the same model name on the targeted assets.
Returns:
A dictionary with the project `id`.
!!! example "Recipe"
For more detailed examples on how to create predictions, see [the recipe](https://docs.kili-technology.com/recipes/importing-labels-and-predictions).
!!! warning "model name"
The use of `model_name_array` is deprecated. Creating predictions from different
models is not supported anymore. Please use `model_name` argument instead to
provide the predictions model name.
"""
if json_response_array is None or len(json_response_array) == 0:
raise ValueError(
"json_response_array is empty, you must provide at least one prediction to upload"
)
assert_all_arrays_have_same_size(
[external_id_array, json_response_array, model_name_array, asset_id_array]
)
if model_name is None:
if model_name_array is None:
raise ValueError("You must provide a model name with the model_name argument ")
if len(set(model_name_array)) > 1:
raise ValueError(
"Creating predictions from different models is not supported anymore. Separate"
" your calls by models."
)
warnings.warn(
"The use of `model_name_array` is deprecated. Creating predictions from"
" different models is not supported anymore. Please use `model_name` argument"
" instead to provide the predictions model name.",
DeprecationWarning,
stacklevel=1,
)
model_name = model_name_array[0]
labels = [
{
"asset_id": asset_id,
"asset_external_id": asset_external_id,
"json_response": json_response,
}
for (asset_id, asset_external_id, json_response) in list(
zip(
asset_id_array or [None] * len(json_response_array),
external_id_array or [None] * len(json_response_array),
json_response_array,
)
)
]
import_labels_from_dict(
self, project_id, labels, "PREDICTION", overwrite, model_name, disable_tqdm
)
return {"id": project_id}
delete_labels(self, ids)
Delete labels.
Currently, only PREDICTION
and INFERENCE
labels can be deleted.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
ids |
List[str] |
List of label ids to delete. |
required |
Returns:
Type | Description |
---|---|
List[str] |
The deleted label ids. |
Source code in kili/entrypoints/mutations/label/__init__.py
@typechecked
def delete_labels(self, ids: List[str]) -> List[str]:
"""Delete labels.
Currently, only `PREDICTION` and `INFERENCE` labels can be deleted.
Args:
ids: List of label ids to delete.
Returns:
The deleted label ids.
"""
if is_empty_list_with_warning("delete_labels", "ids", ids):
return []
def generate_variables(batch):
return {"ids": batch["ids"]}
properties_to_batch = {"ids": ids}
result = mutate_from_paginated_call(
self,
properties_to_batch, # type: ignore
generate_variables,
GQL_DELETE_LABELS,
)
return self.format_result("data", result[0])
update_properties_in_label(self, label_id, seconds_to_label=None, model_name=None, json_response=None)
Update properties of a label.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
label_id |
str |
Identifier of the label |
required |
seconds_to_label |
Optional[int] |
Time to create the label |
None |
model_name |
Optional[str] |
Name of the model |
None |
json_response |
Optional[dict] |
The label is given here |
None |
Returns:
Type | Description |
---|---|
Dict[Literal['id'], str] |
A dictionary with the label |
Examples:
>>> kili.update_properties_in_label(label_id=label_id, json_response={...})
Source code in kili/entrypoints/mutations/label/__init__.py
@typechecked
def update_properties_in_label(
self,
label_id: str,
seconds_to_label: Optional[int] = None,
model_name: Optional[str] = None,
json_response: Optional[dict] = None,
) -> Dict[Literal["id"], str]:
"""Update properties of a label.
Args:
label_id: Identifier of the label
seconds_to_label: Time to create the label
model_name: Name of the model
json_response: The label is given here
Returns:
A dictionary with the label `id`.
Examples:
>>> kili.update_properties_in_label(label_id=label_id, json_response={...})
"""
formatted_json_response = None if json_response is None else dumps(json_response)
variables = {
"labelID": label_id,
"secondsToLabel": seconds_to_label,
"modelName": model_name,
"jsonResponse": formatted_json_response,
}
result = self.graphql_client.execute(GQL_UPDATE_PROPERTIES_IN_LABEL, variables)
return self.format_result("data", result)
Subscriptions
Set of Label subscriptions.
Source code in kili/entrypoints/subscriptions/label/__init__.py
class SubscriptionsLabel:
"""Set of Label subscriptions."""
graphql_client: GraphQLClient
@typechecked
def label_created_or_updated(
self, project_id: str, callback: Callable[[str, str], None]
) -> SubscriptionGraphQLClient:
# pylint: disable=line-too-long
"""Subscribe a callback to a project, which is executed when a label is created or updated.
Args:
project_id: Identifier of the project
callback: This function takes as input the id of the asset and its content.
Returns:
A subscription client.
!!! example "Recipe"
For more detailed examples on how to use Webhooks,
See [the related recipe](https://github.com/kili-technology/kili-python-sdk/blob/main/recipes/webhooks_example.ipynb)
"""
ws_endpoint = self.graphql_client.endpoint.replace("http", "ws")
websocket = SubscriptionGraphQLClient(ws_endpoint)
headers = {"Accept": "application/json", "Content-Type": "application/json"}
authorization = f"X-API-Key: {self.api_key}" # type: ignore # pylint: disable=no-member
headers["Authorization"] = authorization
variables = {"projectID": project_id}
websocket.subscribe(
GQL_LABEL_CREATED_OR_UPDATED,
variables=variables,
callback=callback,
headers=headers,
authorization=authorization,
)
return websocket
label_created_or_updated(self, project_id, callback)
Subscribe a callback to a project, which is executed when a label is created or updated.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
project_id |
str |
Identifier of the project |
required |
callback |
Callable[[str, str], NoneType] |
This function takes as input the id of the asset and its content. |
required |
Returns:
Type | Description |
---|---|
SubscriptionGraphQLClient |
A subscription client. |
Recipe
For more detailed examples on how to use Webhooks, See the related recipe
Source code in kili/entrypoints/subscriptions/label/__init__.py
@typechecked
def label_created_or_updated(
self, project_id: str, callback: Callable[[str, str], None]
) -> SubscriptionGraphQLClient:
# pylint: disable=line-too-long
"""Subscribe a callback to a project, which is executed when a label is created or updated.
Args:
project_id: Identifier of the project
callback: This function takes as input the id of the asset and its content.
Returns:
A subscription client.
!!! example "Recipe"
For more detailed examples on how to use Webhooks,
See [the related recipe](https://github.com/kili-technology/kili-python-sdk/blob/main/recipes/webhooks_example.ipynb)
"""
ws_endpoint = self.graphql_client.endpoint.replace("http", "ws")
websocket = SubscriptionGraphQLClient(ws_endpoint)
headers = {"Accept": "application/json", "Content-Type": "application/json"}
authorization = f"X-API-Key: {self.api_key}" # type: ignore # pylint: disable=no-member
headers["Authorization"] = authorization
variables = {"projectID": project_id}
websocket.subscribe(
GQL_LABEL_CREATED_OR_UPDATED,
variables=variables,
callback=callback,
headers=headers,
authorization=authorization,
)
return websocket