Skip to content

Open In Colab

How to upload medical images to Kili, and visualize segmentation labels with matplotlib

In this tutorial, we will learn how to:

  • upload medical images to Kili using pydicom
  • upload dicom tags as metadata to our assets
  • download segmentation labels from Kili, and convert them to Numpy masks for visualization with matplotlib.

Data used in this tutorial comes from the RSNA Pneumonia Detection Challenge hosted on Kaggle.

First of all, let's import the packages, and install pydicom in case you don't have it installed.

%pip install pydicom matplotlib Pillow wget numpy pandas kili
import getpass
import os
import pickle
from functools import reduce
from pathlib import Path

import matplotlib.colors as mcolors
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np
import pydicom
import wget
from PIL import Image

from kili.client import Kili

Get data

Let's download some dicom images:

wget.download(
    "https://github.com/kili-technology/kili-python-sdk/blob/main/recipes/datasets/0000a175-0e68-4ca4-b1af-167204a7e0bc.dcm?raw=true"
)
wget.download(
    "https://github.com/kili-technology/kili-python-sdk/blob/main/recipes/datasets/0005d3cc-3c3f-40b9-93c3-46231c3eb813.dcm?raw=true"
)

data_folder = Path(".")
files = list(data_folder.glob("*.dcm"))
assert len(files) == 2, files

Process data

A dicom image not only contains pixels (or voxels), but also dicom tags, that can contain information about the patient, the scanner, etc.

Below, we extract the dicom tags and add them to metadata_array.

We also convert all images to JPEG format.

def extract_dicom_tags(img_dicom):
    metadata = {}
    for key in img_dicom.keys():
        if key.group == 32736:  # key containing the image pixels
            continue
        item = img_dicom.get(key)
        if hasattr(item, "description") and hasattr(item, "value"):
            metadata[item.description()] = str(item.value)
    return metadata
metadata_array = []
processed_imgs = []

for file in files:
    sample = pydicom.dcmread(str(file))

    im = Image.fromarray(sample.pixel_array)
    fpath = data_folder / f"{file.stem}.jpeg"
    im.save(str(fpath))
    processed_imgs.append(str(fpath))

    metadata_array.append(extract_dicom_tags(sample))
print(metadata_array[0])
{'Specific Character Set': 'ISO_IR 100', 'SOP Class UID': '1.2.840.10008.5.1.4.1.1.7', 'SOP Instance UID': '1.2.276.0.7230010.3.1.4.8323329.1747.1517874292.605928', 'Study Date': '19010101', 'Study Time': '000000.00', 'Accession Number': '', 'Modality': 'CR', 'Conversion Type': 'WSD', "Referring Physician's Name": '', 'Series Description': 'view: PA', "Patient's Name": '0005d3cc-3c3f-40b9-93c3-46231c3eb813', 'Patient ID': '0005d3cc-3c3f-40b9-93c3-46231c3eb813', "Patient's Birth Date": '', "Patient's Sex": 'F', "Patient's Age": '22', 'Body Part Examined': 'CHEST', 'View Position': 'PA', 'Study Instance UID': '1.2.276.0.7230010.3.1.2.8323329.1747.1517874292.605927', 'Series Instance UID': '1.2.276.0.7230010.3.1.3.8323329.1747.1517874292.605926', 'Study ID': '', 'Series Number': '1', 'Instance Number': '1', 'Patient Orientation': '', 'Samples per Pixel': '1', 'Photometric Interpretation': 'MONOCHROME2', 'Rows': '1024', 'Columns': '1024', 'Pixel Spacing': '[0.14300000000000002, 0.14300000000000002]', 'Bits Allocated': '8', 'Bits Stored': '8', 'High Bit': '7', 'Pixel Representation': '0', 'Lossy Image Compression': '01', 'Lossy Image Compression Method': 'ISO_10918_1'}

Create the Kili project

Next, we need to connect to Kili, create a project, and define the annotation interface (ontology).

if "KILI_API_KEY" not in os.environ:
    KILI_API_KEY = getpass.getpass("Please enter your API key: ")
else:
    KILI_API_KEY = os.environ["KILI_API_KEY"]
kili = Kili(
    api_key=KILI_API_KEY,  # no need to pass the API_KEY if it is already in your environment variables
    # api_endpoint="https://cloud.kili-technology.com/api/label/v2/graphql",
    # the line above can be uncommented and changed if you are working with an on-premise version of Kili
)
json_interface = {
    "jobs": {
        "CLASSIFICATION_JOB": {
            "mlTask": "CLASSIFICATION",
            "content": {
                "categories": {"YES": {"name": "Yes"}, "NO": {"name": "No"}},
                "input": "radio",
            },
            "required": 1,
            "isChild": False,
            "instruction": "Healthy ?",
        },
        "JOB_0": {
            "mlTask": "OBJECT_DETECTION",
            "content": {
                "categories": {
                    "BONE": {"name": "bone"},
                    "TISSUE": {"name": "tissue"},
                    "LUNG": {"name": "lung"},
                    "RIB": {"name": "rib"},
                },
                "input": "radio",
            },
            "required": True,
            "tools": ["polygon"],
            "isChild": False,
            "instruction": "Segmentation",
        },
    }
}

We can now use the Kili SDK to create our project and upload our images to the project.

title = "[Kili SDK Notebook]: Medical Imaging with Kili Technology"
description = "This is a test project"
input_type = "IMAGE"

project = kili.create_project(
    title=title, description=description, input_type=input_type, json_interface=json_interface
)
project_id = project["id"]
kili.append_many_to_dataset(
    project_id=project_id,
    content_array=processed_imgs,
    external_id_array=processed_imgs,
    json_metadata_array=metadata_array,
)

Done! Your images and their metadata are in the project:

image.png

image.png

All that remains is to start labeling! To learn more about how to label images in Kili, check out our documentation.

Convert Kili labels to numpy masks

Once your assets are labeled, you might want to download them and visualize them using matplotlib.

To download your labels, simply use kili.labels(project_id). You can also export your labels to a zip file using kili.export_labels(project_id). For more information, see the documentation.

In this tutorial, we assume that our labels have already been downloaded and stored in a file medical-labels.pkl.

from kili.utils.labels.parsing import ParsedLabel
wget.download(
    "https://github.com/kili-technology/kili-python-sdk/blob/main/recipes/conf/medical-labels.pkl?raw=true"
)

with open("medical-labels.pkl", "rb") as f:
    label = pickle.load(f)

label = ParsedLabel(
    label={"jsonResponse": label}, json_interface=json_interface, input_type="IMAGE"
)

healthy = label.jobs["CLASSIFICATION_JOB"].category.name
annotations = label.jobs["JOB_0"].annotations
print(healthy)
YES

In this example, annotations is a list containing 10 masks.

A mask is represented by a Python list of vertices, each vertex being a list of two coordinates (x, y).

print(len(annotations))
print(type(annotations))
print(len(annotations[0].bounding_poly[0].normalized_vertices))
print(annotations[0].bounding_poly[0].normalized_vertices[0])
10
<class 'kili.services.label_data_parsing.annotation.AnnotationList'>
255
{'x': 0.401891, 'y': 0.024966000000015254}

We assign a color to each class:

colors = plt.rcParams["axes.prop_cycle"].by_key()["color"]
colors = [
    tuple(int(x * 255) for x in mcolors.hex2color(hex_color))
    for hex_color in plt.rcParams["axes.prop_cycle"].by_key()["color"]
]
CLASS_TO_COLOR = {}
for class_name, color in zip(
    json_interface["jobs"]["JOB_0"]["content"]["categories"].keys(), colors
):
    CLASS_TO_COLOR[class_name] = color
print(CLASS_TO_COLOR)
{'BONE': (31, 119, 180), 'TISSUE': (255, 127, 14), 'LUNG': (44, 160, 44), 'RIB': (214, 39, 40)}

We convert those labels using the kili.utils.labels module, and plot them using matplotlib:

from kili.utils.labels.image import normalized_vertices_to_mask
im = Image.open(processed_imgs[0])

img_width, img_height = im.size
class_names = []
masks = []
for annotation in annotations:
    class_name = annotation.category.name
    normalized_vertices = annotation.bounding_poly[0].normalized_vertices

    # convert the label normalized vertices to a numpy mask
    mask = normalized_vertices_to_mask(normalized_vertices, img_width, img_height)

    # add color to the mask
    mask_rgb = np.zeros((*mask.shape, 3), dtype=np.int32)
    mask_rgb[mask > 0] = CLASS_TO_COLOR[class_name]

    class_names.append(class_name)
    masks.append(mask_rgb)

Let's merge all masks into a single one:

merged_masks = reduce(lambda mask_1, mask_2: np.where(mask_1 != (0, 0, 0), mask_1, mask_2), masks)

Plot the image and masks

Finally, we can plot the image as well as the masks converted from our Kili labels:

handles = []
labels = []
for class_name, color in CLASS_TO_COLOR.items():
    patch = mpatches.Patch(color=tuple(x / 255 for x in color), label=class_name)
    handles.append(patch)
    labels.append(class_name)
fig, ax = plt.subplots(figsize=(10, 10))
ax.imshow(im, cmap="gray")
ax.imshow(merged_masks, alpha=0.5)
ax.set_title(f"Healthy: {healthy}")
ax.legend(handles=handles, labels=labels, fontsize=16, loc="upper left")
plt.show()

png

Congrats! 👏

In this tutorial, we have seen how to upload medical images to Kili, and how to download the segmentation labels and convert them to Numpy masks.

Project cleanup

kili.delete_project(project_id)