Cloud storage module

Alpha feature

The cloud storage feature is currently in alpha. It is still under active development: methods and behaviors can still evolve until the feature is complete.

Cloud Storage Integration and Connection

A cloud storage integration is a connection between a Kili organization and a cloud storage (AWS, GCP or Azure). Once a cloud storage integration is created, it can be used in any project of the organization. Adding a cloud storage integration from the SDK is currently not supported. More information about how to create a cloud storage integration can be found here.

A cloud storage connection is a cloud storage integration used in a Kili project. It is used to import data from a cloud storage to a project. More information about how to use a cloud storage integration in a project can be found here.

Azure

It is recommended to install the Azure dependencies to use the Azure cloud storage integration and connection.

pip install kili[azure]

Queries

Set of cloud storage integration queries.

Source code in kili/entrypoints/queries/data_integration/__init__.py

class QueriesDataIntegration:
    """Set of cloud storage integration queries."""

    graphql_client: GraphQLClient

    # pylint: disable=too-many-arguments,dangerous-default-value

    @overload
    def cloud_storage_integrations(
        self,
        cloud_storage_integration_id: Optional[str] = None,
        name: Optional[str] = None,
        platform: Optional[Literal["AWS", "Azure", "GCP"]] = None,
        status: Optional[Literal["CONNECTED", "DISCONNECTED", "CHECKING"]] = None,
        organization_id: Optional[str] = None,
        fields: List[str] = ["name", "id", "platform", "status"],
        first: Optional[int] = None,
        skip: int = 0,
        disable_tqdm: bool = False,
        *,
        as_generator: Literal[True],
    ) -> Generator[Dict, None, None]:
        ...

    @overload
    def cloud_storage_integrations(
        self,
        cloud_storage_integration_id: Optional[str] = None,
        name: Optional[str] = None,
        platform: Optional[Literal["AWS", "Azure", "GCP"]] = None,
        status: Optional[Literal["CONNECTED", "DISCONNECTED", "CHECKING"]] = None,
        organization_id: Optional[str] = None,
        fields: List[str] = ["name", "id", "platform", "status"],
        first: Optional[int] = None,
        skip: int = 0,
        disable_tqdm: bool = False,
        *,
        as_generator: Literal[False] = False,
    ) -> List[Dict]:
        ...

    @typechecked
    def cloud_storage_integrations(
        self,
        cloud_storage_integration_id: Optional[str] = None,
        name: Optional[str] = None,
        platform: Optional[Literal["AWS", "Azure", "GCP"]] = None,
        status: Optional[Literal["CONNECTED", "DISCONNECTED", "CHECKING"]] = None,
        organization_id: Optional[str] = None,
        fields: List[str] = ["name", "id", "platform", "status"],
        first: Optional[int] = None,
        skip: int = 0,
        disable_tqdm: bool = False,
        *,
        as_generator: bool = False,
    ) -> Iterable[Dict]:
        # pylint: disable=line-too-long
        """Get a generator or a list of cloud storage integrations that match a set of criteria.

        Args:
            cloud_storage_integration_id: ID of the cloud storage integration.
            name: Name of the cloud storage integration.
            platform: Platform of the cloud storage integration.
            status: Status of the cloud storage integration.
            organization_id: ID of the organization.
            fields: All the fields to request among the possible fields for the cloud storage integrations.
                See [the documentation](https://docs.kili-technology.com/reference/graphql-api#dataintegration) for all possible fields.
            first: Maximum number of cloud storage integrations to return.
            skip: Number of skipped cloud storage integrations.
            disable_tqdm: If `True`, the progress bar will be disabled.
            as_generator: If `True`, a generator on the cloud storage integrations is returned.

        Returns:
            A list or a generator of the cloud storage integrations that match the criteria.

        Examples:
            >>> kili.cloud_storage_integrations()
            [{'name': 'My bucket', 'id': '123456789', 'platform': 'AWS', 'status': 'CONNECTED'}]
        """
        where = DataIntegrationWhere(
            data_integration_id=cloud_storage_integration_id,
            name=name,
            platform=platform,
            status=status,
            organization_id=organization_id,
        )
        disable_tqdm = disable_tqdm_if_as_generator(as_generator, disable_tqdm)
        options = QueryOptions(disable_tqdm, first, skip)
        data_integrations_gen = DataIntegrationsQuery(self.graphql_client)(where, fields, options)

        if as_generator:
            return data_integrations_gen
        return list(data_integrations_gen)

    @typechecked
    def count_cloud_storage_integrations(
        self,
        cloud_storage_integration_id: Optional[str] = None,
        name: Optional[str] = None,
        platform: Optional[Literal["AWS", "Azure", "GCP"]] = None,
        status: Optional[Literal["CONNECTED", "DISCONNECTED", "CHECKING"]] = None,
        organization_id: Optional[str] = None,
    ) -> int:
        """Count and return the number of cloud storage integrations that match a set of criteria.

        Args:
            cloud_storage_integration_id: ID of the cloud storage integration.
            name: Name of the cloud storage integration.
            platform: Platform of the cloud storage integration.
            status: Status of the cloud storage integration.
            organization_id: ID of the organization.

        Returns:
            The number of cloud storage integrations that match the criteria.
        """
        where = DataIntegrationWhere(
            data_integration_id=cloud_storage_integration_id,
            name=name,
            platform=platform,
            status=status,
            organization_id=organization_id,
        )
        return DataIntegrationsQuery(self.graphql_client).count(where)

`cloud_storage_integrations(self, cloud_storage_integration_id=None, name=None, platform=None, status=None, organization_id=None, fields=['name', 'id', 'platform', 'status'], first=None, skip=0, disable_tqdm=False, *, as_generator=False)`

Get a generator or a list of cloud storage integrations that match a set of criteria.

Parameters:

Name	Type	Description	Default
`cloud_storage_integration_id`	`Optional[str]`	ID of the cloud storage integration.	`None`
`name`	`Optional[str]`	Name of the cloud storage integration.	`None`
`platform`	`Optional[typing_extensions.Literal['AWS', 'Azure', 'GCP']]`	Platform of the cloud storage integration.	`None`
`status`	`Optional[typing_extensions.Literal['CONNECTED', 'DISCONNECTED', 'CHECKING']]`	Status of the cloud storage integration.	`None`
`organization_id`	`Optional[str]`	ID of the organization.	`None`
`fields`	`List[str]`	All the fields to request among the possible fields for the cloud storage integrations. See the documentation for all possible fields.	`['name', 'id', 'platform', 'status']`
`first`	`Optional[int]`	Maximum number of cloud storage integrations to return.	`None`
`skip`	`int`	Number of skipped cloud storage integrations.	`0`
`disable_tqdm`	`bool`	If `True`, the progress bar will be disabled.	`False`
`as_generator`	`bool`	If `True`, a generator on the cloud storage integrations is returned.	`False`

Returns:

Type	Description
`Iterable[Dict]`	A list or a generator of the cloud storage integrations that match the criteria.

Examples:

>>> kili.cloud_storage_integrations()
[{'name': 'My bucket', 'id': '123456789', 'platform': 'AWS', 'status': 'CONNECTED'}]

Source code in kili/entrypoints/queries/data_integration/__init__.py

@typechecked
def cloud_storage_integrations(
    self,
    cloud_storage_integration_id: Optional[str] = None,
    name: Optional[str] = None,
    platform: Optional[Literal["AWS", "Azure", "GCP"]] = None,
    status: Optional[Literal["CONNECTED", "DISCONNECTED", "CHECKING"]] = None,
    organization_id: Optional[str] = None,
    fields: List[str] = ["name", "id", "platform", "status"],
    first: Optional[int] = None,
    skip: int = 0,
    disable_tqdm: bool = False,
    *,
    as_generator: bool = False,
) -> Iterable[Dict]:
    # pylint: disable=line-too-long
    """Get a generator or a list of cloud storage integrations that match a set of criteria.

    Args:
        cloud_storage_integration_id: ID of the cloud storage integration.
        name: Name of the cloud storage integration.
        platform: Platform of the cloud storage integration.
        status: Status of the cloud storage integration.
        organization_id: ID of the organization.
        fields: All the fields to request among the possible fields for the cloud storage integrations.
            See [the documentation](https://docs.kili-technology.com/reference/graphql-api#dataintegration) for all possible fields.
        first: Maximum number of cloud storage integrations to return.
        skip: Number of skipped cloud storage integrations.
        disable_tqdm: If `True`, the progress bar will be disabled.
        as_generator: If `True`, a generator on the cloud storage integrations is returned.

    Returns:
        A list or a generator of the cloud storage integrations that match the criteria.

    Examples:
        >>> kili.cloud_storage_integrations()
        [{'name': 'My bucket', 'id': '123456789', 'platform': 'AWS', 'status': 'CONNECTED'}]
    """
    where = DataIntegrationWhere(
        data_integration_id=cloud_storage_integration_id,
        name=name,
        platform=platform,
        status=status,
        organization_id=organization_id,
    )
    disable_tqdm = disable_tqdm_if_as_generator(as_generator, disable_tqdm)
    options = QueryOptions(disable_tqdm, first, skip)
    data_integrations_gen = DataIntegrationsQuery(self.graphql_client)(where, fields, options)

    if as_generator:
        return data_integrations_gen
    return list(data_integrations_gen)

`count_cloud_storage_integrations(self, cloud_storage_integration_id=None, name=None, platform=None, status=None, organization_id=None)`

Count and return the number of cloud storage integrations that match a set of criteria.

Parameters:

Name	Type	Description	Default
`cloud_storage_integration_id`	`Optional[str]`	ID of the cloud storage integration.	`None`
`name`	`Optional[str]`	Name of the cloud storage integration.	`None`
`platform`	`Optional[typing_extensions.Literal['AWS', 'Azure', 'GCP']]`	Platform of the cloud storage integration.	`None`
`status`	`Optional[typing_extensions.Literal['CONNECTED', 'DISCONNECTED', 'CHECKING']]`	Status of the cloud storage integration.	`None`
`organization_id`	`Optional[str]`	ID of the organization.	`None`

Returns:

Type	Description
`int`	The number of cloud storage integrations that match the criteria.

Source code in kili/entrypoints/queries/data_integration/__init__.py

@typechecked
def count_cloud_storage_integrations(
    self,
    cloud_storage_integration_id: Optional[str] = None,
    name: Optional[str] = None,
    platform: Optional[Literal["AWS", "Azure", "GCP"]] = None,
    status: Optional[Literal["CONNECTED", "DISCONNECTED", "CHECKING"]] = None,
    organization_id: Optional[str] = None,
) -> int:
    """Count and return the number of cloud storage integrations that match a set of criteria.

    Args:
        cloud_storage_integration_id: ID of the cloud storage integration.
        name: Name of the cloud storage integration.
        platform: Platform of the cloud storage integration.
        status: Status of the cloud storage integration.
        organization_id: ID of the organization.

    Returns:
        The number of cloud storage integrations that match the criteria.
    """
    where = DataIntegrationWhere(
        data_integration_id=cloud_storage_integration_id,
        name=name,
        platform=platform,
        status=status,
        organization_id=organization_id,
    )
    return DataIntegrationsQuery(self.graphql_client).count(where)

Set of cloud storage connection queries.

Source code in kili/entrypoints/queries/data_connection/__init__.py

class QueriesDataConnection:
    """Set of cloud storage connection queries."""

    graphql_client: GraphQLClient

    # pylint: disable=too-many-arguments,dangerous-default-value

    @overload
    def cloud_storage_connections(
        self,
        cloud_storage_connection_id: Optional[str] = None,
        cloud_storage_integration_id: Optional[str] = None,
        project_id: Optional[str] = None,
        fields: List[str] = [
            "id",
            "lastChecked",
            "numberOfAssets",
            "selectedFolders",
            "projectId",
        ],
        first: Optional[int] = None,
        skip: int = 0,
        disable_tqdm: bool = False,
        *,
        as_generator: Literal[True],
    ) -> Generator[Dict, None, None]:
        ...

    @overload
    def cloud_storage_connections(
        self,
        cloud_storage_connection_id: Optional[str] = None,
        cloud_storage_integration_id: Optional[str] = None,
        project_id: Optional[str] = None,
        fields: List[str] = [
            "id",
            "lastChecked",
            "numberOfAssets",
            "selectedFolders",
            "projectId",
        ],
        first: Optional[int] = None,
        skip: int = 0,
        disable_tqdm: bool = False,
        *,
        as_generator: Literal[False] = False,
    ) -> List[Dict]:
        ...

    @typechecked
    def cloud_storage_connections(
        self,
        cloud_storage_connection_id: Optional[str] = None,
        cloud_storage_integration_id: Optional[str] = None,
        project_id: Optional[str] = None,
        fields: List[str] = [
            "id",
            "lastChecked",
            "numberOfAssets",
            "selectedFolders",
            "projectId",
        ],
        first: Optional[int] = None,
        skip: int = 0,
        disable_tqdm: bool = False,
        *,
        as_generator: bool = False,
    ) -> Iterable[Dict]:
        # pylint: disable=line-too-long
        """Get a generator or a list of cloud storage connections that match a set of criteria.

        Args:
            cloud_storage_connection_id: ID of the cloud storage connection.
            cloud_storage_integration_id: ID of the cloud storage integration.
            project_id: ID of the project.
            fields: All the fields to request among the possible fields for the cloud storage connections.
                See [the documentation](https://docs.kili-technology.com/reference/graphql-api#dataconnection) for all possible fields.
            first: Maximum number of cloud storage connections to return.
            skip: Number of skipped cloud storage connections.
            disable_tqdm: If `True`, the progress bar will be disabled.
            as_generator: If `True`, a generator on the cloud storage connections is returned.

        Returns:
            A list or a generator of the cloud storage connections that match the criteria.

        Examples:
            >>> kili.cloud_storage_connections(project_id="789465123")
            [{'id': '123456789', 'lastChecked': '2023-02-21T14:49:35.606Z', 'numberOfAssets': 42, 'selectedFolders': ['folder1', 'folder2'], 'projectId': '789465123'}]
        """
        if (
            cloud_storage_connection_id is None
            and cloud_storage_integration_id is None
            and project_id is None
        ):
            raise ValueError(
                "At least one of cloud_storage_connection_id, cloud_storage_integration_id or"
                " project_id must be specified"
            )

        # call dataConnection resolver
        if cloud_storage_connection_id is not None:
            data_connection = services.get_data_connection(
                self, cloud_storage_connection_id, fields
            )
            data_connection_list = [data_connection]
            if as_generator:
                return iter(data_connection_list)
            return data_connection_list

        # call dataConnections resolver
        where = DataConnectionsWhere(
            project_id=project_id, data_integration_id=cloud_storage_integration_id
        )
        disable_tqdm = disable_tqdm_if_as_generator(as_generator, disable_tqdm)
        options = QueryOptions(disable_tqdm, first, skip)
        data_connections_gen = DataConnectionsQuery(self.graphql_client)(where, fields, options)

        if as_generator:
            return data_connections_gen
        return list(data_connections_gen)

`cloud_storage_connections(self, cloud_storage_connection_id=None, cloud_storage_integration_id=None, project_id=None, fields=['id', 'lastChecked', 'numberOfAssets', 'selectedFolders', 'projectId'], first=None, skip=0, disable_tqdm=False, *, as_generator=False)`

Get a generator or a list of cloud storage connections that match a set of criteria.

Parameters:

Name	Type	Description	Default
`cloud_storage_connection_id`	`Optional[str]`	ID of the cloud storage connection.	`None`
`cloud_storage_integration_id`	`Optional[str]`	ID of the cloud storage integration.	`None`
`project_id`	`Optional[str]`	ID of the project.	`None`
`fields`	`List[str]`	All the fields to request among the possible fields for the cloud storage connections. See the documentation for all possible fields.	`['id', 'lastChecked', 'numberOfAssets', 'selectedFolders', 'projectId']`
`first`	`Optional[int]`	Maximum number of cloud storage connections to return.	`None`
`skip`	`int`	Number of skipped cloud storage connections.	`0`
`disable_tqdm`	`bool`	If `True`, the progress bar will be disabled.	`False`
`as_generator`	`bool`	If `True`, a generator on the cloud storage connections is returned.	`False`

Returns:

Type	Description
`Iterable[Dict]`	A list or a generator of the cloud storage connections that match the criteria.

Examples:

>>> kili.cloud_storage_connections(project_id="789465123")
[{'id': '123456789', 'lastChecked': '2023-02-21T14:49:35.606Z', 'numberOfAssets': 42, 'selectedFolders': ['folder1', 'folder2'], 'projectId': '789465123'}]

Source code in kili/entrypoints/queries/data_connection/__init__.py

@typechecked
def cloud_storage_connections(
    self,
    cloud_storage_connection_id: Optional[str] = None,
    cloud_storage_integration_id: Optional[str] = None,
    project_id: Optional[str] = None,
    fields: List[str] = [
        "id",
        "lastChecked",
        "numberOfAssets",
        "selectedFolders",
        "projectId",
    ],
    first: Optional[int] = None,
    skip: int = 0,
    disable_tqdm: bool = False,
    *,
    as_generator: bool = False,
) -> Iterable[Dict]:
    # pylint: disable=line-too-long
    """Get a generator or a list of cloud storage connections that match a set of criteria.

    Args:
        cloud_storage_connection_id: ID of the cloud storage connection.
        cloud_storage_integration_id: ID of the cloud storage integration.
        project_id: ID of the project.
        fields: All the fields to request among the possible fields for the cloud storage connections.
            See [the documentation](https://docs.kili-technology.com/reference/graphql-api#dataconnection) for all possible fields.
        first: Maximum number of cloud storage connections to return.
        skip: Number of skipped cloud storage connections.
        disable_tqdm: If `True`, the progress bar will be disabled.
        as_generator: If `True`, a generator on the cloud storage connections is returned.

    Returns:
        A list or a generator of the cloud storage connections that match the criteria.

    Examples:
        >>> kili.cloud_storage_connections(project_id="789465123")
        [{'id': '123456789', 'lastChecked': '2023-02-21T14:49:35.606Z', 'numberOfAssets': 42, 'selectedFolders': ['folder1', 'folder2'], 'projectId': '789465123'}]
    """
    if (
        cloud_storage_connection_id is None
        and cloud_storage_integration_id is None
        and project_id is None
    ):
        raise ValueError(
            "At least one of cloud_storage_connection_id, cloud_storage_integration_id or"
            " project_id must be specified"
        )

    # call dataConnection resolver
    if cloud_storage_connection_id is not None:
        data_connection = services.get_data_connection(
            self, cloud_storage_connection_id, fields
        )
        data_connection_list = [data_connection]
        if as_generator:
            return iter(data_connection_list)
        return data_connection_list

    # call dataConnections resolver
    where = DataConnectionsWhere(
        project_id=project_id, data_integration_id=cloud_storage_integration_id
    )
    disable_tqdm = disable_tqdm_if_as_generator(as_generator, disable_tqdm)
    options = QueryOptions(disable_tqdm, first, skip)
    data_connections_gen = DataConnectionsQuery(self.graphql_client)(where, fields, options)

    if as_generator:
        return data_connections_gen
    return list(data_connections_gen)

Mutations

Set of DataConnection mutations.

Source code in kili/entrypoints/mutations/data_connection/__init__.py

class MutationsDataConnection:
    """Set of DataConnection mutations."""

    graphql_client: GraphQLClient

    @typechecked
    def add_cloud_storage_connection(
        self,
        project_id: str,
        cloud_storage_integration_id: str,
        selected_folders: Optional[List[str]] = None,
    ) -> Dict:
        """Connect a cloud storage to a project.

        Args:
            project_id: ID of the project.
            cloud_storage_integration_id: ID of the cloud storage integration.
            selected_folders: List of folders of the data integration to connect to the project.
                If not provided, all folders of the data integration will be connected.

        Returns:
            A dict with the DataConnection ID.
        """
        data_integrations = list(
            DataIntegrationsQuery(self.graphql_client)(
                where=DataIntegrationWhere(data_integration_id=cloud_storage_integration_id),
                fields=["id"],
                options=QueryOptions(disable_tqdm=True, first=1, skip=0),
            )
        )
        if len(data_integrations) == 0:
            raise ValueError(
                f"Cloud storage integration with id {cloud_storage_integration_id} not found."
            )

        variables = {
            "data": {
                "projectId": project_id,
                "integrationId": cloud_storage_integration_id,
                "isChecking": False,
                "lastChecked": datetime.now().isoformat(sep="T", timespec="milliseconds") + "Z",
                "selectedFolders": selected_folders,
            }
        }
        result = self.graphql_client.execute(GQL_ADD_PROJECT_DATA_CONNECTION, variables)
        result = format_result("data", result)

        # We trigger data difference computation (same behavior as in the frontend)
        services.compute_differences(self, result["id"])

        return result

    @typechecked
    def synchronize_cloud_storage_connection(
        self,
        cloud_storage_connection_id: str,
        delete_extraneous_files: bool = False,
        dry_run: bool = False,
    ) -> Dict:
        """Synchronize a cloud storage connection.

        This method will compute differences between the cloud storage connection and the project,
            and then validate the differences.

        If `delete_extraneous_files` is True, it will also delete files that are not in the
            cloud storage integration anymore but that are still in the project.

        Args:
            cloud_storage_connection_id: ID of the cloud storage connection.
            delete_extraneous_files: If True, delete extraneous files.
            dry_run: If True, will not synchronize the data connection but only print the
                differences. This is useful to check the differences before applying them to the
                project.

        Returns:
            A dict with the cloud storage connection ID.
        """
        return services.synchronize_data_connection(
            self, cloud_storage_connection_id, delete_extraneous_files, dry_run
        )

`add_cloud_storage_connection(self, project_id, cloud_storage_integration_id, selected_folders=None)`

Connect a cloud storage to a project.

Parameters:

Name	Type	Description	Default
`project_id`	`str`	ID of the project.	required
`cloud_storage_integration_id`	`str`	ID of the cloud storage integration.	required
`selected_folders`	`Optional[List[str]]`	List of folders of the data integration to connect to the project. If not provided, all folders of the data integration will be connected.	`None`

Returns:

Type	Description
`Dict`	A dict with the DataConnection ID.

Source code in kili/entrypoints/mutations/data_connection/__init__.py

@typechecked
def add_cloud_storage_connection(
    self,
    project_id: str,
    cloud_storage_integration_id: str,
    selected_folders: Optional[List[str]] = None,
) -> Dict:
    """Connect a cloud storage to a project.

    Args:
        project_id: ID of the project.
        cloud_storage_integration_id: ID of the cloud storage integration.
        selected_folders: List of folders of the data integration to connect to the project.
            If not provided, all folders of the data integration will be connected.

    Returns:
        A dict with the DataConnection ID.
    """
    data_integrations = list(
        DataIntegrationsQuery(self.graphql_client)(
            where=DataIntegrationWhere(data_integration_id=cloud_storage_integration_id),
            fields=["id"],
            options=QueryOptions(disable_tqdm=True, first=1, skip=0),
        )
    )
    if len(data_integrations) == 0:
        raise ValueError(
            f"Cloud storage integration with id {cloud_storage_integration_id} not found."
        )

    variables = {
        "data": {
            "projectId": project_id,
            "integrationId": cloud_storage_integration_id,
            "isChecking": False,
            "lastChecked": datetime.now().isoformat(sep="T", timespec="milliseconds") + "Z",
            "selectedFolders": selected_folders,
        }
    }
    result = self.graphql_client.execute(GQL_ADD_PROJECT_DATA_CONNECTION, variables)
    result = format_result("data", result)

    # We trigger data difference computation (same behavior as in the frontend)
    services.compute_differences(self, result["id"])

    return result

`synchronize_cloud_storage_connection(self, cloud_storage_connection_id, delete_extraneous_files=False, dry_run=False)`

Synchronize a cloud storage connection.

This method will compute differences between the cloud storage connection and the project, and then validate the differences.

If delete_extraneous_files is True, it will also delete files that are not in the cloud storage integration anymore but that are still in the project.

Parameters:

Name	Type	Description	Default
`cloud_storage_connection_id`	`str`	ID of the cloud storage connection.	required
`delete_extraneous_files`	`bool`	If True, delete extraneous files.	`False`
`dry_run`	`bool`	If True, will not synchronize the data connection but only print the differences. This is useful to check the differences before applying them to the project.	`False`

Returns:

Type	Description
`Dict`	A dict with the cloud storage connection ID.

Source code in kili/entrypoints/mutations/data_connection/__init__.py

@typechecked
def synchronize_cloud_storage_connection(
    self,
    cloud_storage_connection_id: str,
    delete_extraneous_files: bool = False,
    dry_run: bool = False,
) -> Dict:
    """Synchronize a cloud storage connection.

    This method will compute differences between the cloud storage connection and the project,
        and then validate the differences.

    If `delete_extraneous_files` is True, it will also delete files that are not in the
        cloud storage integration anymore but that are still in the project.

    Args:
        cloud_storage_connection_id: ID of the cloud storage connection.
        delete_extraneous_files: If True, delete extraneous files.
        dry_run: If True, will not synchronize the data connection but only print the
            differences. This is useful to check the differences before applying them to the
            project.

    Returns:
        A dict with the cloud storage connection ID.
    """
    return services.synchronize_data_connection(
        self, cloud_storage_connection_id, delete_extraneous_files, dry_run
    )