How to upload medical images to Kili, and visualize segmentation labels with matplotlib
In this tutorial, we will learn how to:
- upload medical images to Kili using pydicom
- upload dicom tags as metadata to our assets
- download segmentation labels from Kili, and convert them to Numpy masks for visualization with matplotlib.
Data used in this tutorial comes from the RSNA Pneumonia Detection Challenge hosted on Kaggle.
First of all, let's import the packages, and install pydicom in case you don't have it installed.
%pip install pydicom matplotlib Pillow wget numpy pandas kili
import pickle
from functools import reduce
from pathlib import Path
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np
import pydicom
import wget
from PIL import Image
from kili.client import Kili
Get data
Let's download some dicom images:
wget.download(
"https://github.com/kili-technology/kili-python-sdk/blob/main/recipes/datasets/0000a175-0e68-4ca4-b1af-167204a7e0bc.dcm?raw=true"
)
wget.download(
"https://github.com/kili-technology/kili-python-sdk/blob/main/recipes/datasets/0005d3cc-3c3f-40b9-93c3-46231c3eb813.dcm?raw=true"
)
data_folder = Path()
files = list(data_folder.glob("*.dcm"))
assert len(files) == 2, files
Process data
A dicom image not only contains pixels (or voxels), but also dicom tags, that can contain information about the patient, the scanner, etc.
Below, we extract the dicom tags and add them to metadata_array
.
We also convert all images to JPEG format.
def extract_dicom_tags(img_dicom):
metadata = {}
for key in img_dicom.keys():
if key.group == 32736: # key containing the image pixels
continue
item = img_dicom.get(key)
if hasattr(item, "description") and hasattr(item, "value"):
metadata[item.description()] = str(item.value)
return metadata
metadata_array = []
processed_imgs = []
for file in files:
sample = pydicom.dcmread(str(file))
im = Image.fromarray(sample.pixel_array)
fpath = data_folder / f"{file.stem}.jpeg"
im.save(str(fpath))
processed_imgs.append(str(fpath))
metadata_array.append(extract_dicom_tags(sample))
print(metadata_array[0])
{'Specific Character Set': 'ISO_IR 100', 'SOP Class UID': '1.2.840.10008.5.1.4.1.1.7', 'SOP Instance UID': '1.2.276.0.7230010.3.1.4.8323329.1747.1517874292.605928', 'Study Date': '19010101', 'Study Time': '000000.00', 'Accession Number': '', 'Modality': 'CR', 'Conversion Type': 'WSD', "Referring Physician's Name": '', 'Series Description': 'view: PA', "Patient's Name": '0005d3cc-3c3f-40b9-93c3-46231c3eb813', 'Patient ID': '0005d3cc-3c3f-40b9-93c3-46231c3eb813', "Patient's Birth Date": '', "Patient's Sex": 'F', "Patient's Age": '22', 'Body Part Examined': 'CHEST', 'View Position': 'PA', 'Study Instance UID': '1.2.276.0.7230010.3.1.2.8323329.1747.1517874292.605927', 'Series Instance UID': '1.2.276.0.7230010.3.1.3.8323329.1747.1517874292.605926', 'Study ID': '', 'Series Number': '1', 'Instance Number': '1', 'Patient Orientation': '', 'Samples per Pixel': '1', 'Photometric Interpretation': 'MONOCHROME2', 'Rows': '1024', 'Columns': '1024', 'Pixel Spacing': '[0.14300000000000002, 0.14300000000000002]', 'Bits Allocated': '8', 'Bits Stored': '8', 'High Bit': '7', 'Pixel Representation': '0', 'Lossy Image Compression': '01', 'Lossy Image Compression Method': 'ISO_10918_1'}
Create the Kili project
Next, we need to connect to Kili, create a project, and define the annotation interface (ontology).
kili = Kili(
# api_endpoint="https://cloud.kili-technology.com/api/label/v2/graphql",
# the line above can be uncommented and changed if you are working with an on-premise version of Kili
)
json_interface = {
"jobs": {
"CLASSIFICATION_JOB": {
"mlTask": "CLASSIFICATION",
"content": {
"categories": {"YES": {"name": "Yes"}, "NO": {"name": "No"}},
"input": "radio",
},
"required": 1,
"isChild": False,
"instruction": "Healthy ?",
},
"JOB_0": {
"mlTask": "OBJECT_DETECTION",
"content": {
"categories": {
"BONE": {"name": "bone"},
"TISSUE": {"name": "tissue"},
"LUNG": {"name": "lung"},
"RIB": {"name": "rib"},
},
"input": "radio",
},
"required": True,
"tools": ["polygon"],
"isChild": False,
"instruction": "Segmentation",
},
}
}
We can now use the Kili SDK to create our project and upload our images to the project.
title = "[Kili SDK Notebook]: Medical Imaging with Kili Technology"
description = "This is a test project"
input_type = "IMAGE"
project = kili.create_project(
title=title, description=description, input_type=input_type, json_interface=json_interface
)
project_id = project["id"]
kili.append_many_to_dataset(
project_id=project_id,
content_array=processed_imgs,
external_id_array=processed_imgs,
json_metadata_array=metadata_array,
)
Done! Your images and their metadata are in the project:
All that remains is to start labeling! To learn more about how to label images in Kili, check out our documentation.
Convert Kili labels to numpy masks
Once your assets are labeled, you might want to download them and visualize them using matplotlib.
To download your labels, simply use kili.labels(project_id)
. You can also export your labels to a zip file using kili.export_labels(project_id)
. For more information, see the documentation.
In this tutorial, we assume that our labels have already been downloaded and stored in a file medical-labels.pkl
.
from kili.utils.labels.parsing import ParsedLabel
wget.download(
"https://github.com/kili-technology/kili-python-sdk/blob/main/recipes/conf/medical-labels.pkl?raw=true"
)
with open("medical-labels.pkl", "rb") as f:
label = pickle.load(f)
label = ParsedLabel(
label={"jsonResponse": label}, json_interface=json_interface, input_type="IMAGE"
)
healthy = label.jobs["CLASSIFICATION_JOB"].category.name
annotations = label.jobs["JOB_0"].annotations
print(healthy)
YES
In this example, annotations
is a list containing 10 masks.
A mask is represented by a Python list of vertices, each vertex being a list of two coordinates (x, y).
print(len(annotations))
print(type(annotations))
print(len(annotations[0].bounding_poly[0].normalized_vertices))
print(annotations[0].bounding_poly[0].normalized_vertices[0])
10
<class 'kili.services.label_data_parsing.annotation.AnnotationList'>
255
{'x': 0.401891, 'y': 0.024966000000015254}
We assign a color to each class:
import matplotlib.colors as mcolors
colors = plt.rcParams["axes.prop_cycle"].by_key()["color"]
colors = [
tuple(int(x * 255) for x in mcolors.hex2color(hex_color))
for hex_color in plt.rcParams["axes.prop_cycle"].by_key()["color"]
]
CLASS_TO_COLOR = {}
for class_name, color in zip(
json_interface["jobs"]["JOB_0"]["content"]["categories"].keys(), colors
):
CLASS_TO_COLOR[class_name] = color
print(CLASS_TO_COLOR)
{'BONE': (31, 119, 180), 'TISSUE': (255, 127, 14), 'LUNG': (44, 160, 44), 'RIB': (214, 39, 40)}
We convert those labels using the kili.utils.labels
module, and plot them using matplotlib:
from kili.utils.labels.image import normalized_vertices_to_mask
im = Image.open(processed_imgs[0])
img_width, img_height = im.size
class_names = []
masks = []
for annotation in annotations:
class_name = annotation.category.name
normalized_vertices = annotation.bounding_poly[0].normalized_vertices
# convert the label normalized vertices to a numpy mask
mask = normalized_vertices_to_mask(normalized_vertices, img_width, img_height)
# add color to the mask
mask_rgb = np.zeros((*mask.shape, 3), dtype=np.int32)
mask_rgb[mask > 0] = CLASS_TO_COLOR[class_name]
class_names.append(class_name)
masks.append(mask_rgb)
Let's merge all masks into a single one:
merged_masks = reduce(lambda mask_1, mask_2: np.where(mask_1 != (0, 0, 0), mask_1, mask_2), masks)
Plot the image and masks
Finally, we can plot the image as well as the masks converted from our Kili labels:
handles = []
labels = []
for class_name, color in CLASS_TO_COLOR.items():
patch = mpatches.Patch(color=tuple(x / 255 for x in color), label=class_name)
handles.append(patch)
labels.append(class_name)
fig, ax = plt.subplots(figsize=(10, 10))
ax.imshow(im, cmap="gray")
ax.imshow(merged_masks, alpha=0.5)
ax.set_title(f"Healthy: {healthy}")
ax.legend(handles=handles, labels=labels, fontsize=16, loc="upper left")
plt.show()
Congrats! 👏
In this tutorial, we have seen how to upload medical images to Kili, and how to download the segmentation labels and convert them to Numpy masks.
Project cleanup
kili.delete_project(project_id)