Skip to content

Open In Colab

How to upload medical images to Kili, and visualize segmentation labels with matplotlib

In this tutorial, we will learn how to:

  • upload medical images to Kili using pydicom
  • upload dicom tags as metadata to our assets
  • download segmentation labels from Kili, and convert them to Numpy masks for visualization with matplotlib.

Data used in this tutorial comes from the RSNA Pneumonia Detection Challenge hosted on Kaggle.

First of all, let's import the packages, and install pydicom in case you don't have it installed.

!pip install pydicom matplotlib Pillow wget numpy pandas kili
import os
import pickle
import pydicom
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import getpass
import wget
import matplotlib.colors as mcolors
from pathlib import Path
from functools import reduce
from kili.client import Kili
from PIL import Image

Get data

Let's download some dicom images:

wget.download(
    "https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/datasets/0000a175-0e68-4ca4-b1af-167204a7e0bc.dcm?raw=true"
)
wget.download(
    "https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/datasets/0005d3cc-3c3f-40b9-93c3-46231c3eb813.dcm?raw=true"
)

data_folder = Path(".")
files = list(data_folder.glob("*.dcm"))
assert len(files) == 2, files

Process data

A dicom image not only contains pixels (or voxels), but also dicom tags, that can contain information about the patient, the scanner, etc.

Below, we extract the dicom tags and add them to metadata_array.

We also convert all images to JPEG format.

def extract_dicom_tags(img_dicom):
    metadata = {}
    for key in img_dicom.keys():
        if key.group == 32736:  # key containing the image pixels
            continue
        item = img_dicom.get(key)
        if hasattr(item, "description") and hasattr(item, "value"):
            metadata[item.description()] = str(item.value)
    return metadata
metadata_array = []
processed_imgs = []

for file in files:
    sample = pydicom.dcmread(str(file))

    im = Image.fromarray(sample.pixel_array)
    fpath = data_folder / f"{file.stem}.jpeg"
    im.save(str(fpath))
    processed_imgs.append(str(fpath))

    metadata_array.append(extract_dicom_tags(sample))
print(metadata_array[0])
{'Specific Character Set': 'ISO_IR 100', 'SOP Class UID': '1.2.840.10008.5.1.4.1.1.7', 'SOP Instance UID': '1.2.276.0.7230010.3.1.4.8323329.1747.1517874292.605928', 'Study Date': '19010101', 'Study Time': '000000.00', 'Accession Number': '', 'Modality': 'CR', 'Conversion Type': 'WSD', "Referring Physician's Name": '', 'Series Description': 'view: PA', "Patient's Name": '0005d3cc-3c3f-40b9-93c3-46231c3eb813', 'Patient ID': '0005d3cc-3c3f-40b9-93c3-46231c3eb813', "Patient's Birth Date": '', "Patient's Sex": 'F', "Patient's Age": '22', 'Body Part Examined': 'CHEST', 'View Position': 'PA', 'Study Instance UID': '1.2.276.0.7230010.3.1.2.8323329.1747.1517874292.605927', 'Series Instance UID': '1.2.276.0.7230010.3.1.3.8323329.1747.1517874292.605926', 'Study ID': '', 'Series Number': '1', 'Instance Number': '1', 'Patient Orientation': '', 'Samples per Pixel': '1', 'Photometric Interpretation': 'MONOCHROME2', 'Rows': '1024', 'Columns': '1024', 'Pixel Spacing': '[0.14300000000000002, 0.14300000000000002]', 'Bits Allocated': '8', 'Bits Stored': '8', 'High Bit': '7', 'Pixel Representation': '0', 'Lossy Image Compression': '01', 'Lossy Image Compression Method': 'ISO_10918_1'}

Create the Kili project

Next, we need to connect to Kili, create a project, and define the annotation interface (ontology).

if "KILI_API_KEY" not in os.environ:
    KILI_API_KEY = getpass.getpass("Please enter your API key: ")
else:
    KILI_API_KEY = os.environ["KILI_API_KEY"]
kili = Kili(
    api_key=KILI_API_KEY,  # no need to pass the API_KEY if it is already in your environment variables
    # api_endpoint="https://cloud.kili-technology.com/api/label/v2/graphql",
    # the line above can be uncommented and changed if you are working with an on-premise version of Kili
)
json_interface = {
    "jobs": {
        "CLASSIFICATION_JOB": {
            "mlTask": "CLASSIFICATION",
            "content": {
                "categories": {"YES": {"name": "Yes"}, "NO": {"name": "No"}},
                "input": "radio",
            },
            "required": 1,
            "isChild": False,
            "instruction": "Healthy ?",
        },
        "JOB_0": {
            "mlTask": "OBJECT_DETECTION",
            "content": {
                "categories": {
                    "BONE": {"name": "bone"},
                    "TISSUE": {"name": "tissue"},
                    "LUNG": {"name": "lung"},
                    "RIB": {"name": "rib"},
                },
                "input": "radio",
            },
            "required": True,
            "tools": ["semantic"],
            "instruction": "Segmentation",
        },
    }
}

We can now use the Kili SDK to create our project and upload our images to the project.

title = "Medical Imaging with Kili Technology"
description = "This is a test project"
input_type = "IMAGE"

project = kili.create_project(
    title=title, description=description, input_type=input_type, json_interface=json_interface
)
project_id = project["id"]
kili.append_many_to_dataset(
    project_id=project_id,
    content_array=processed_imgs,
    external_id_array=processed_imgs,
    json_metadata_array=metadata_array,
)

Done! Your images and their metadata are in the project:

image.png

image.png

All that remains is to start labeling! To learn more about how to label images in Kili, check out our documentation.

Convert Kili labels to numpy masks

Once your assets are labeled, you might want to download them and visualize them using matplotlib.

To download your labels, simply use kili.labels(project_id). You can also export your labels to a zip file using kili.export_labels(project_id). For more information, see the documentation.

In this tutorial, we assume that our labels have already been downloaded and stored in a file medical-labels.pkl.

wget.download(
    "https://github.com/kili-technology/kili-python-sdk/blob/master/recipes/conf/medical-labels.pkl?raw=true"
)

with open("medical-labels.pkl", "rb") as f:
    labels = pickle.load(f)

healthy = labels["CLASSIFICATION_JOB"]["categories"][0]["name"]
annotations = labels["JOB_0"]["annotations"]
print(healthy)
YES

In this example, annotations is a list containing 10 masks.

A mask is represented by a Python dictionary where the vertices are stored in the boundingPoly key. See the documentation for more information about the json response format.

print(len(annotations), type(annotations))
print(annotations[0].keys())
10 <class 'list'>
dict_keys(['boundingPoly', 'categories', 'mid', 'score', 'type'])

We assign a color to each class:

colors = plt.rcParams["axes.prop_cycle"].by_key()["color"]
colors = [
    tuple(int(x * 255) for x in mcolors.hex2color(hex_color))
    for hex_color in plt.rcParams["axes.prop_cycle"].by_key()["color"]
]
CLASS_TO_COLOR = {}
for class_name, color in zip(
    json_interface["jobs"]["JOB_0"]["content"]["categories"].keys(), colors
):
    CLASS_TO_COLOR[class_name] = color
print(CLASS_TO_COLOR)
{'BONE': (31, 119, 180), 'TISSUE': (255, 127, 14), 'LUNG': (44, 160, 44), 'RIB': (214, 39, 40)}

We convert those labels using the kili.utils.labels module, and plot them using matplotlib:

from kili.utils.labels.image import normalized_vertices_to_mask
im = Image.open(processed_imgs[0])

img_width, img_height = im.size
class_names = []
masks = []
for annotation in annotations:
    class_name = annotation["categories"][0]["name"]
    normalized_vertices = annotation["boundingPoly"][0]["normalizedVertices"]

    # convert the label normalized vertices to a numpy mask
    mask = normalized_vertices_to_mask(normalized_vertices, img_width, img_height)

    # add color to the mask
    mask_rgb = np.zeros((*mask.shape, 3), dtype=np.int32)
    mask_rgb[mask > 0] = CLASS_TO_COLOR[class_name]

    class_names.append(class_name)
    masks.append(mask_rgb)

Let's merge all masks into a single one:

merged_masks = reduce(lambda mask_1, mask_2: np.where(mask_1 != (0, 0, 0), mask_1, mask_2), masks)

Plot the image and masks

Finally, we can plot the image as well as the masks converted from our Kili labels:

handles = []
labels = []
for class_name, color in CLASS_TO_COLOR.items():
    patch = mpatches.Patch(color=tuple(x / 255 for x in color), label=class_name)
    handles.append(patch)
    labels.append(class_name)
fig, ax = plt.subplots(figsize=(10, 10))
ax.imshow(im, cmap="gray")
ax.imshow(merged_masks, alpha=0.5)
ax.set_title(f"Healthy: {healthy}")
ax.legend(handles=handles, labels=labels, fontsize=16, loc="upper left")
plt.show()

png

Congrats! 👏

In this tutorial, we have seen how to upload medical images to Kili, and how to download the segmentation labels and convert them to Numpy masks.

Project cleanup

kili.delete_project(project_id)