Skip to content

Instantly share code, notes, and snippets.

@prerakmody
Last active July 1, 2024 12:27
Show Gist options
  • Save prerakmody/7987e8b62e7c5237747a840b19200692 to your computer and use it in GitHub Desktop.
Save prerakmody/7987e8b62e7c5237747a840b19200692 to your computer and use it in GitHub Desktop.
TCIA scripts

Datasets present in OHIF dicom server

Samples are writen down with their StudyInstanceUID as the header.

Other Referrences

  1. To get all collections in TCIA: link

Samples

  1. [StageII-Colorectal-CT --> CT-005] 1.3.6.1.4.1.14519.5.2.1.256467663913010332776401703474716742458

  1. [HCC-TACE-SEG --> HCC_004] 1.3.6.1.4.1.14519.5.2.1.1706.8374.643249677828306008300337414785

  1. [QIN-PROSTATE-Repeatability -- PCAMPMRI-00012] 1.3.6.1.4.1.14519.5.2.1.3671.4754.298665348758363466150039312520

  1. [FreeSurfer -- ??] 1.3.12.2.1107.5.2.32.35162.30000015050317233592200000046

    • in ohif repo (cant find)
    • in tcia (not present)
    • in ohif-dicomweb
    • [in postman] to download data ---> ??
  2. [NSCLC-Radiomics -- LUNG1-008] 1.3.6.1.4.1.32722.99.99.62087908186665265759322018723889952421

# Inspired by https://github.com/jzagoli/ProstateX2nii. Modified by Prerak Mody
# This processes this dataset --> https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=61080779#61080779ccf02ddde6884e4ba1ec73d08be362c6
# Alternatively, one could also look here --> https://github.com/rcuocolo/PROSTATEx_masks/tree/master/Files/prostate for this dataset (https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=23691656)
import pdb
import tqdm # pip install tqdm
import urllib
import zipfile
import nibabel # pip install nibabel
import pydicom # pip install pydicom
import skimage
import traceback
import subprocess
import numpy as np
import urllib.request
import skimage.transform
from pathlib import Path
import matplotlib.pyplot as plt
DIR_MAIN = Path(__file__).parent.absolute()
DIRNAME_RAW = 'raw'
DIRNAME_PROCESSED = 'processed'
DIRNAME_PROCESSED2 = 'processed2'
DIRNAME_TMP = '_tmp'
EXT_ZIP = '.zip'
EXT_NII = '.nii'
EXT_NRRD = '.nrrd'
FOLDERNAME_MRI = '{}_mri{}'
FOLDERNAME_MASK = '{}_mask{}'
def read_zip(filepath_zip, filepath_output=None, leave=False):
# Step 0 - Init
if Path(filepath_zip).exists():
if filepath_output is None:
filepath_zip_parts = list(Path(filepath_zip).parts)
filepath_zip_name = filepath_zip_parts[-1].split(EXT_ZIP)[0]
filepath_zip_parts[-1] = filepath_zip_name
filepath_output = Path(*filepath_zip_parts)
zip_fp = zipfile.ZipFile(filepath_zip, 'r')
zip_fp_members = zip_fp.namelist()
with tqdm.tqdm(total=len(zip_fp_members), desc=' - [Unzip] ' + str(filepath_zip.parts[-1]), leave=leave) as pbar_zip:
for member in zip_fp_members:
zip_fp.extract(member, filepath_output)
pbar_zip.update(1)
return filepath_output
else:
print (' - [ERROR][utils.read_zip()] Path does not exist: ', filepath_zip)
return None
class TCIAClient:
"""
- This has now been DEPRECATED for the NBIA-API: https://wiki.cancerimagingarchive.net/display/Public/NBIA+Search+with+Authentication+REST+API+Guide
- Ref: https://wiki.cancerimagingarchive.net/display/Public/TCIA+Programmatic+Interface+%28REST+API%29+Usage+Guide
- Ref: https://github.com/TCIA-Community/TCIA-API-SDK/tree/master/tcia-rest-client-python/src
"""
GET_IMAGE = "getImage"
GET_MANUFACTURER_VALUES = "getManufacturerValues"
GET_MODALITY_VALUES = "getModalityValues"
GET_COLLECTION_VALUES = "getCollectionValues"
GET_BODY_PART_VALUES = "getBodyPartValues"
GET_PATIENT_STUDY = "getPatientStudy"
GET_SERIES = "getSeries"
GET_PATIENT = "getPatient"
GET_SERIES_SIZE = "getSeriesSize"
CONTENTS_BY_NAME = "ContentsByName"
def __init__(self, baseUrl, resource):
self.baseUrl = baseUrl + "/" + resource
self.STATUS_OK = 200
self.DECODER = 'utf-8'
def execute(self, url, queryParameters={}, verbose=False):
queryParameters = dict((k, v) for k, v in queryParameters.items() if v)
queryString = "?%s" % urllib.parse.urlencode(queryParameters)
requestUrl = url + queryString
if verbose:
print (' - [execute()] URL: ', requestUrl)
request = urllib.request.Request(url=requestUrl, headers={})
resp = urllib.request.urlopen(request)
return resp
def read_response(self, resp):
if resp.status == self.STATUS_OK:
return eval(resp.read().decode(self.DECODER))
else:
return None
def get_patient(self,collection = None , outputFormat = "json" ):
serviceUrl = self.baseUrl + "/query/" + self.GET_PATIENT
queryParameters = {"Collection" : collection , "format" : outputFormat }
resp = self.execute(serviceUrl , queryParameters)
return resp
def get_patient_study(self,collection = None , patientId = None , studyInstanceUid = None , outputFormat = "json" ):
serviceUrl = self.baseUrl + "/query/" + self.GET_PATIENT_STUDY
queryParameters = {"Collection" : collection , "PatientID" : patientId , "StudyInstanceUID" : studyInstanceUid , "format" : outputFormat }
resp = self.execute(serviceUrl , queryParameters)
return resp
def get_series(self, collection=None, patientId=None, modality=None, studyInstanceUid=None, seriesInstanceUid=None, outputFormat = "json" ):
serviceUrl = self.baseUrl + "/query/" + self.GET_SERIES
queryParameters = {"Collection" : collection, "patientId": patientId, "StudyInstanceUID" : studyInstanceUid, 'SeriesInstanceUID': seriesInstanceUid,"Modality" : modality, "format" : outputFormat }
resp = self.execute(serviceUrl , queryParameters)
return resp
def get_image(self , seriesInstanceUid , downloadPath, zipFileName):
try:
serviceUrl = self.baseUrl + "/query/" + self.GET_IMAGE
queryParameters = { "SeriesInstanceUID" : seriesInstanceUid }
resp = self.execute(serviceUrl, queryParameters)
filepath = Path(downloadPath).joinpath(zipFileName)
data = resp.read()
with open(filepath, 'wb') as fp:
fp.write(data)
tmp = list(Path(filepath).parts)
tmp[-1] = tmp[-1].split('.zip')[0]
filepath_output = Path(*tmp)
read_zip(filepath, filepath_output, leave=False)
Path(filepath).unlink()
except:
traceback.print_exc()
def download_dcm2niix(DIR_TMP):
# Ref: https://github.com/rordenlab/dcm2niix/releases/tag/v1.0.20220720
try:
import sys
path_file = Path(DIR_TMP).joinpath('dcm2niix')
if sys.platform == 'win32':
if not Path(path_file).exists():
path_zip = Path(DIR_TMP).joinpath('dcm2niix_win.zip')
urllib.request.urlretrieve(url='https://github.com/rordenlab/dcm2niix/releases/download/v1.0.20220720/dcm2niix_win.zip', filename=str(path_zip))
read_zip(path_zip, path_file)
Path(path_zip).unlink()
elif sys.platform == 'linux':
pass
return Path(path_file).joinpath('dcm2niix.exe')
except:
traceback.print_exc()
pdb.set_trace()
return None
def freqhist_bins(array, bins=100):
"""
A numpy based function to split the range of pixel values into groups, such that each group has around the same number of pixels
Ref: https://github.com/AIEMMU/MRI_Prostate/blob/master/Pre%20processing%20Promise12.ipynb
Ref: https://github.com/fastai/fastai/blob/master/fastai/medical/imaging.py
"""
imsd = np.sort(array.flatten())
t = np.array([0.001])
t = np.append(t, np.arange(bins)/bins+(1/2/bins))
t = np.append(t, 0.999)
t = (len(imsd)*t+0.5).astype(np.int)
return np.unique(imsd[t])
def hist_scaled(array, brks=None, bins=100):
"""
Scales a tensor using `freqhist_bins` to values between 0 and 1
Ref: https://github.com/AIEMMU/MRI_Prostate/blob/master/Pre%20processing%20Promise12.ipynb
Ref: https://github.com/fastai/fastai/blob/master/fastai/medical/imaging.py
"""
if brks is None:
brks = freqhist_bins(array, bins=bins)
ys = np.linspace(0., 1., len(brks))
x = array.flatten()
x = np.interp(x, brks, ys)
if 0:
plt.plot(brks, ys, color='blue', label='Non-Normalized', marker='x', markersize=2)
plt.xlim([0,np.max(array)])
brks2 = freqhist_bins(x, bins=bins)*np.max(array)
ys2 = np.linspace(0., 1., len(brks2))
plt.plot(brks2, ys2, color='orange', label='Histogram Equalization')
plt.legend()
plt.show()
array_tmp = np.reshape(x, array.shape)
array_tmp = np.clip(array_tmp, 0.,1.)
return array_tmp
if __name__ == "__main__":
# Step 0.1 - Init TCIA client
baseUrl = 'https://services.cancerimagingarchive.net/services/v4'
resource = 'TCIA'
collection = 'PROSTATEx'
client = TCIAClient(baseUrl=baseUrl, resource=resource)
# Step 0.2 - Init seriesids from https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=61080779#61080779ccf02ddde6884e4ba1ec73d08be362c6
seriesids_mask = ['1.2.276.0.7230010.3.1.3.1070885483.26120.1599221759.302','1.2.276.0.7230010.3.1.3.1070885483.24432.1599221753.549','1.2.276.0.7230010.3.1.3.1070885483.5008.1599221756.491','1.2.276.0.7230010.3.1.3.1070885483.27140.1599221767.245','1.2.276.0.7230010.3.1.3.1070885483.27624.1599221764.667','1.2.276.0.7230010.3.1.3.1070885483.16240.1599221761.962','1.2.276.0.7230010.3.1.3.1070885483.8220.1599221770.50','1.2.276.0.7230010.3.1.3.1070885483.9460.1599221773.50','1.2.276.0.7230010.3.1.3.1070885483.24532.1599221775.720','1.2.276.0.7230010.3.1.3.1070885483.24608.1599221781.105','1.2.276.0.7230010.3.1.3.1070885483.24736.1599221778.322','1.2.276.0.7230010.3.1.3.1070885483.22584.1599221783.667','1.2.276.0.7230010.3.1.3.1070885483.24012.1599221786.269','1.2.276.0.7230010.3.1.3.1070885483.21792.1599221788.891','1.2.276.0.7230010.3.1.3.1070885483.5464.1599221791.439','1.2.276.0.7230010.3.1.3.1070885483.5672.1599221794.52','1.2.276.0.7230010.3.1.3.1070885483.16948.1599221799.294','1.2.276.0.7230010.3.1.3.1070885483.16324.1599221796.636','1.2.276.0.7230010.3.1.3.1070885483.26880.1599221801.878','1.2.276.0.7230010.3.1.3.1070885483.26756.1599221804.455','1.2.276.0.7230010.3.1.3.1070885483.21116.1599221807.124','1.2.276.0.7230010.3.1.3.1070885483.23772.1599221809.995','1.2.276.0.7230010.3.1.3.1070885483.23924.1599221812.620','1.2.276.0.7230010.3.1.3.1070885483.24056.1599221815.833','1.2.276.0.7230010.3.1.3.1070885483.23100.1599221818.605','1.2.276.0.7230010.3.1.3.1070885483.4648.1599221821.338','1.2.276.0.7230010.3.1.3.1070885483.5584.1599221824.60','1.2.276.0.7230010.3.1.3.1070885483.17604.1599221827.76','1.2.276.0.7230010.3.1.3.1070885483.23788.1599221829.820','1.2.276.0.7230010.3.1.3.1070885483.26076.1599221832.601','1.2.276.0.7230010.3.1.3.1070885483.22052.1599221835.360','1.2.276.0.7230010.3.1.3.1070885483.26444.1599221837.990','1.2.276.0.7230010.3.1.3.1070885483.27528.1599221841.604','1.2.276.0.7230010.3.1.3.1070885483.3400.1599221844.204','1.2.276.0.7230010.3.1.3.1070885483.15416.1599221846.853','1.2.276.0.7230010.3.1.3.1070885483.22008.1599221849.372','1.2.276.0.7230010.3.1.3.1070885483.16016.1599221851.990','1.2.276.0.7230010.3.1.3.1070885483.8088.1599221854.809','1.2.276.0.7230010.3.1.3.1070885483.5396.1599221858.335','1.2.276.0.7230010.3.1.3.1070885483.25220.1599221863.968','1.2.276.0.7230010.3.1.3.1070885483.5464.1599221860.888','1.2.276.0.7230010.3.1.3.1070885483.7992.1599221866.575','1.2.276.0.7230010.3.1.3.1070885483.16948.1599221869.130','1.2.276.0.7230010.3.1.3.1070885483.24588.1599221871.775','1.2.276.0.7230010.3.1.3.1070885483.24928.1599221874.342','1.2.276.0.7230010.3.1.3.1070885483.14224.1599221879.641','1.2.276.0.7230010.3.1.3.1070885483.25300.1599221882.469','1.2.276.0.7230010.3.1.3.1070885483.21512.1599221877.6','1.2.276.0.7230010.3.1.3.1070885483.5808.1599221885.265','1.2.276.0.7230010.3.1.3.1070885483.22380.1599221887.327','1.2.276.0.7230010.3.1.3.1070885483.23580.1599221890.67','1.2.276.0.7230010.3.1.3.1070885483.15904.1599221892.237','1.2.276.0.7230010.3.1.3.1070885483.4220.1599221894.379','1.2.276.0.7230010.3.1.3.1070885483.25552.1599221897.694','1.2.276.0.7230010.3.1.3.1070885483.26088.1599221900.380','1.2.276.0.7230010.3.1.3.1070885483.5296.1599221903.189','1.2.276.0.7230010.3.1.3.1070885483.6284.1599221908.467','1.2.276.0.7230010.3.1.3.1070885483.17416.1599221911.141','1.2.276.0.7230010.3.1.3.1070885483.1780.1599221905.866','1.2.276.0.7230010.3.1.3.1070885483.24236.1599221913.873','1.2.276.0.7230010.3.1.3.1070885483.24632.1599221916.651','1.2.276.0.7230010.3.1.3.1070885483.25044.1599221924.876','1.2.276.0.7230010.3.1.3.1070885483.8668.1599221919.430','1.2.276.0.7230010.3.1.3.1070885483.24644.1599221922.113','1.2.276.0.7230010.3.1.3.1070885483.27560.1599221927.547','1.2.276.0.7230010.3.1.3.1070885483.25100.1599221930.643']
seriesids_mri = ['1.3.6.1.4.1.14519.5.2.1.7310.5101.107276353018221365492863559094','1.3.6.1.4.1.14519.5.2.1.7310.5101.127076787998581344993055912043','1.3.6.1.4.1.14519.5.2.1.7310.5101.128915395083536972793214199559','1.3.6.1.4.1.14519.5.2.1.7310.5101.156355474235536930096765953749','1.3.6.1.4.1.14519.5.2.1.7310.5101.182666056353308077379356745296','1.3.6.1.4.1.14519.5.2.1.7310.5101.217705819956691748023253683197','1.3.6.1.4.1.14519.5.2.1.7310.5101.233894782007809884236855995183','1.3.6.1.4.1.14519.5.2.1.7310.5101.234424615181029312892423746307','1.3.6.1.4.1.14519.5.2.1.7310.5101.259467270859277582122403078829','1.3.6.1.4.1.14519.5.2.1.7310.5101.260386623049829078909727548239','1.3.6.1.4.1.14519.5.2.1.7310.5101.306748150836286513093400468929','1.3.6.1.4.1.14519.5.2.1.7310.5101.318856235542799349557600990165','1.3.6.1.4.1.14519.5.2.1.7310.5101.337896560664468718350275750005','1.3.6.1.4.1.14519.5.2.1.7310.5101.568951414433420775694157731047','1.3.6.1.4.1.14519.5.2.1.7310.5101.877607202381357458819664904037','1.3.6.1.4.1.14519.5.2.1.7310.5101.898676446450078900613536879876','1.3.6.1.4.1.14519.5.2.1.7311.5101.101130890931274399577478152963','1.3.6.1.4.1.14519.5.2.1.7311.5101.101130934168942593154270621032','1.3.6.1.4.1.14519.5.2.1.7311.5101.105766764724449379107432362750','1.3.6.1.4.1.14519.5.2.1.7311.5101.109832262779867794170417053903','1.3.6.1.4.1.14519.5.2.1.7311.5101.119868581724901379856626647643','1.3.6.1.4.1.14519.5.2.1.7311.5101.124933244802704100684011471210','1.3.6.1.4.1.14519.5.2.1.7311.5101.126808882287123482193592987944','1.3.6.1.4.1.14519.5.2.1.7311.5101.146422450520894659910416624671','1.3.6.1.4.1.14519.5.2.1.7311.5101.149728379277305470281540696443','1.3.6.1.4.1.14519.5.2.1.7311.5101.155875154686610653777230856177','1.3.6.1.4.1.14519.5.2.1.7311.5101.156910737340231640626298466189','1.3.6.1.4.1.14519.5.2.1.7311.5101.160225377533762960695041069832','1.3.6.1.4.1.14519.5.2.1.7311.5101.165636727522505811947315188478','1.3.6.1.4.1.14519.5.2.1.7311.5101.166265209513832781969059787909','1.3.6.1.4.1.14519.5.2.1.7311.5101.175570448971296597162147241386','1.3.6.1.4.1.14519.5.2.1.7311.5101.178750617003896154912438088527','1.3.6.1.4.1.14519.5.2.1.7311.5101.180041601751316950966041961765','1.3.6.1.4.1.14519.5.2.1.7311.5101.180650601474055581355948988643','1.3.6.1.4.1.14519.5.2.1.7311.5101.186553403053805718209125341361','1.3.6.1.4.1.14519.5.2.1.7311.5101.196511587017551386049158542619','1.3.6.1.4.1.14519.5.2.1.7311.5101.206828891270520544417996275680','1.3.6.1.4.1.14519.5.2.1.7311.5101.226939533786316417608293472819','1.3.6.1.4.1.14519.5.2.1.7311.5101.236967836312224538274213979128','1.3.6.1.4.1.14519.5.2.1.7311.5101.242424835264181527414562151046','1.3.6.1.4.1.14519.5.2.1.7311.5101.245315537897059083725251245833','1.3.6.1.4.1.14519.5.2.1.7311.5101.248203933751895346486742820088','1.3.6.1.4.1.14519.5.2.1.7311.5101.252185783543855817242399041825','1.3.6.1.4.1.14519.5.2.1.7311.5101.252385246988168654915352850239','1.3.6.1.4.1.14519.5.2.1.7311.5101.257928599770116866513356683535','1.3.6.1.4.1.14519.5.2.1.7311.5101.258067469374761412596368241889','1.3.6.1.4.1.14519.5.2.1.7311.5101.262719318163046156031519223454','1.3.6.1.4.1.14519.5.2.1.7311.5101.266720221212035669954581062639','1.3.6.1.4.1.14519.5.2.1.7311.5101.271991298059338527584681788043','1.3.6.1.4.1.14519.5.2.1.7311.5101.282008038881694967871335639325','1.3.6.1.4.1.14519.5.2.1.7311.5101.287403883614425048490255475041','1.3.6.1.4.1.14519.5.2.1.7311.5101.289711251504454079274667690291','1.3.6.1.4.1.14519.5.2.1.7311.5101.304058605325520782900293953679','1.3.6.1.4.1.14519.5.2.1.7311.5101.312442050391027773578890036831','1.3.6.1.4.1.14519.5.2.1.7311.5101.318660391556220519803669869815','1.3.6.1.4.1.14519.5.2.1.7311.5101.322192860849906453257530108507','1.3.6.1.4.1.14519.5.2.1.7311.5101.333332554059735467244825907777','1.3.6.1.4.1.14519.5.2.1.7311.5101.334326985258868181144176796864','1.3.6.1.4.1.14519.5.2.1.7311.5101.357054868848226402548612686613','1.3.6.1.4.1.14519.5.2.1.7311.5101.460453169764361555745878708935','1.3.6.1.4.1.14519.5.2.1.7311.5101.472900684997509231929411950536','1.3.6.1.4.1.14519.5.2.1.7311.5101.666342438883185464672112795625','1.3.6.1.4.1.14519.5.2.1.7311.5101.785303327607349403635620326985','1.3.6.1.4.1.14519.5.2.1.7311.5101.813054628707353396925223754592','1.3.6.1.4.1.14519.5.2.1.7311.5101.905763996607212880296069425861','1.3.6.1.4.1.14519.5.2.1.7311.5101.928012760939767746081753156593']
# Step 0.3 - Make folders
DIR_RAW = Path(DIR_MAIN).joinpath(DIRNAME_RAW)
DIR_PROCESSED = Path(DIR_MAIN).joinpath(DIRNAME_PROCESSED)
DIR_PROCESSED2 = Path(DIR_MAIN).joinpath(DIRNAME_PROCESSED2)
DIR_TMP = Path(DIR_MAIN).joinpath(DIRNAME_TMP)
Path(DIR_RAW).mkdir(exist_ok=True, parents=True)
Path(DIR_PROCESSED).mkdir(exist_ok=True, parents=True)
Path(DIR_PROCESSED2).mkdir(exist_ok=True, parents=True)
Path(DIR_TMP).mkdir(exist_ok=True, parents=True)
# Step 1 - Loop over all seriesids_mask
if 1:
print ('')
with tqdm.tqdm(total=len(seriesids_mask), desc=' - [MASKS]') as pbar_mask:
for id_, seriesid_mask in enumerate(seriesids_mask):
try:
resp_series = client.get_series(collection=collection, seriesInstanceUid=seriesid_mask)
if resp_series.status == 200:
obj_series = client.read_response(resp_series)
if len(obj_series):
patient_id = obj_series[0]['PatientID']
downloadPath = Path(DIR_RAW).joinpath(patient_id)
Path(downloadPath).mkdir(exist_ok=True, parents=True)
client.get_image(seriesInstanceUid=seriesid_mask, downloadPath=str(downloadPath), zipFileName=FOLDERNAME_MASK.format(patient_id, EXT_ZIP))
else:
print (' - [ERROR] No patient info for seriesid_mask: ', seriesid_mask)
else:
print (' - [ERROR] Could not read seriesid_mask: ', seriesid_mask)
except:
print (' - [ERROR] Some error with seriesid_mask', seriesid_mask)
traceback.print_exc()
pdb.set_trace()
pbar_mask.update(1)
# Step 2 - Loop over all seriesids_mri
if 1:
print ('')
with tqdm.tqdm(total=len(seriesids_mri), desc='\n - [MRI]') as pbar_mri:
for id_, seriesid_mri in enumerate(seriesids_mri):
try:
resp_series = client.get_series(collection=collection, seriesInstanceUid=seriesid_mri)
if resp_series.status == 200:
obj_series = client.read_response(resp_series)
if len(obj_series):
patient_id = obj_series[0]['PatientID']
downloadPath = Path(DIR_RAW).joinpath(patient_id)
Path(downloadPath).mkdir(exist_ok=True, parents=True)
client.get_image(seriesInstanceUid=seriesid_mri, downloadPath=str(downloadPath), zipFileName=FOLDERNAME_MRI.format(patient_id, EXT_ZIP))
else:
print (' - [ERROR] No patient info for seriesid_mri: ', seriesid_mri)
else:
print (' - [ERROR] Could not read seriesid_mri: ', seriesid_mri)
except:
print (' - [ERROR] Some error with seriesid_mri', seriesid_mri)
traceback.print_exc()
pdb.set_trace()
pbar_mri.update(1)
# Step 3 - Convert to NIFTI using dcm2niix
if 1:
print ('')
path_dcm2niix = download_dcm2niix(DIR_TMP)
if path_dcm2niix is not None:
with tqdm.tqdm(total=len(list(Path(DIR_RAW).glob('*'))), desc=' - [Convert to volume] ') as pbar_convert:
for path_patient in Path(DIR_RAW).iterdir():
if Path(path_patient).is_dir():
# Step 3.1 - Init vars
patient_id = Path(path_patient).parts[-1]
foldername_mri = FOLDERNAME_MRI.format(patient_id, '')
path_mri = Path(path_patient).joinpath(foldername_mri)
foldername_mask = FOLDERNAME_MASK.format(patient_id, '')
path_masks = list(Path(path_patient).joinpath(foldername_mask).glob('*')) # there should only be 1, but I am still looping over it
# Step 3.2 - Volumize MRI
if 1:
try:
launch_mri = [str(path_dcm2niix), "-e", "n", "-b", "n", "-v", "0" , "-o", str(path_patient), "-f", foldername_mri, str(path_mri)]
# e.g. .\dcm2niix.exe -e n -b n -v 0 -o . -f ProstateX-0323_mri .\ProstateX-0323_mri
_ = subprocess.run(launch_mri, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
# parameters for conversion.
# -e : export as NRRD (y) or MGH (o) instead of NIfTI (y/n/o, default n)
# -o : output directory (omit to save to input folder)
# -b : BIDS sidecar (y/n/o [o=only: no NIfTI], default y)
# -f : filename (%a=antenna (coil) name, %b=basename, %c=comments, %d=description, %e=echo number, %f=folder name,
except:
print (' - [ERROR][MRI] patient_id: ', patient_id)
traceback.print_exc()
pdb.set_trace()
# Step 3.3 - Volumize MASK
if 1:
try:
for path_dcm in path_masks:
# Step 3.3.1 - Read data
ds_mask = pydicom.dcmread(str(path_dcm))
# len(ds_mask.ReferencedSeriesSequence[0].ReferencedInstanceSequence) == len(vol_mri)
# e.g. --> ds_mask.ReferencedSeriesSequence[0].ReferencedInstanceSequence[0].ReferencedSOPInstanceUID
# len(ds_mask.PerFrameFunctionalGroupsSequence) == len(vol_mask)
vol_mask = ds_mask.pixel_array # [D,H,W]
nii_mri = nibabel.load(str(Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII))))
vol_mri = nii_mri.get_fdata() # [H,W,D] e.g. [384,384,19]
if 1:
print ('\n - [convert to nifti][mask] pixelspacing: {} | slicespacing: {}'.format(
ds_mask.SharedFunctionalGroupsSequence[0].PixelMeasuresSequence[0].PixelSpacing
, ds_mask.SharedFunctionalGroupsSequence[0].PixelMeasuresSequence[0].SliceThickness))
print (' - [convert to nifti][vol] pixelspacing: {} | slicespacing: {}'.format(nii_mri.header['pixdim'][1:3], nii_mri.header['pixdim'][3] ))
# Step 3.3.2 - Downsample segmentation
if 0:
end = vol_mask.shape[0] # .shape=[D,H,W]
step = end // vol_mri.shape[2]
vol_mask = vol_mask[0:end:step, ...]
diff = vol_mask.shape[0] - vol_mri.shape[2]
if diff != 0:
vol_mask = vol_mask[:-diff, ...]
if vol_mri.shape != vol_mask.shape:
# order = 0 is nearest neighbour interpolation
vol_mask = skimage.transform.resize(vol_mask, vol_mri.shape, preserve_range=True, order=0)
else:
idxs = [idx for idx, item in enumerate(ds_mask.PerFrameFunctionalGroupsSequence) if len(ds_mask.PerFrameFunctionalGroupsSequence[idx].DerivationImageSequence)]
vol_mask = vol_mask[idxs, :, :]
vol_mask = np.rot90(vol_mask, k=-1, axes=(1, 2))
vol_mask = np.swapaxes(vol_mask, 0, 1)
vol_mask = np.swapaxes(vol_mask, 1, 2)
assert vol_mask.shape == vol_mri.shape
# saving segmentations
niiobj = nibabel.Nifti1Image(vol_mask, nii_mri.affine)
path_mask = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII))
nibabel.save(niiobj, str(path_mask))
except:
print (' - [ERROR][MASK] patient_id: ', patient_id)
traceback.print_exc()
pdb.set_trace()
pbar_convert.update(1)
# Step 4 - Do voxel spacing analysis
if 0:
spacings = []
with tqdm.tqdm(total=len(list(Path(DIR_RAW).glob('*'))), desc=' - [ProstateX Analysis] ') as pbar_analysis:
for path_patient in Path(DIR_RAW).iterdir():
if Path(path_patient).is_dir():
patient_id = Path(path_patient).parts[-1]
path_mask = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII))
path_raw = Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII))
nii_mri = nibabel.load(str(path_raw))
header_mri = nii_mri.header
spacing_mri = list(nii_mri.header['pixdim'][1:4])
origin_mri = [nii_mri.header['qoffset_x'], nii_mri.header['qoffset_y'], nii_mri.header['qoffset_z'] ]
# vol_mri = nii_mri.get_fdata()
spacings.append(spacing_mri)
pbar_analysis.update(1)
if len(spacings):
import matplotlib.pyplot as plt
spacings = np.array(spacings)
f,axarr = plt.subplots(1,3)
axarr[0].hist(spacings[:,0], bins=10); axarr[0].set_title('Voxel Spacing (X)')
axarr[1].hist(spacings[:,1], bins=10); axarr[1].set_title('Voxel Spacing (Y)')
axarr[2].hist(spacings[:,2], bins=10); axarr[2].set_title('Voxel Spacing (Z)')
plt.savefig('prostatex_spacings.png', bbox_inches='tight')
plt.show() # resmaple every image to [0.5, 0.5, 3] as per this image
# Step 5 - Do resampling to (0.5, 0.5, 3.0)
if 1:
SPACING_NEW = (0.5, 0.5, 3.0)
import medloader.dataloader.utils as medUtils
with tqdm.tqdm(total=len(list(Path(DIR_RAW).glob('*'))), desc=' - [ProstateX Analysis] ') as pbar_resample:
for path_patient in Path(DIR_RAW).iterdir():
if Path(path_patient).is_dir():
patient_id = Path(path_patient).parts[-1]
path_mask_raw = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII))
path_img_raw = Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII))
if 0:
path_img_proc = Path(DIR_PROCESSED).joinpath(patient_id, FOLDERNAME_MRI.format(patient_id, EXT_NRRD))
path_mask_proc = Path(DIR_PROCESSED).joinpath(patient_id, FOLDERNAME_MASK.format(patient_id, EXT_NRRD))
nrrd_img = medUtils.resample_img(path_img=str(path_img_raw), path_new_img=str(path_img_proc), spacing=SPACING_NEW, verbose=True)
nrrd_mask = medUtils.resample_mask(path_mask=str(path_mask_raw), path_new_mask=str(path_mask_proc), spacing=SPACING_NEW, size=nrrd_img.GetSize(), labels_allowed = [1], verbose=True)
elif 1:
path_img_proc = Path(DIR_PROCESSED).joinpath(patient_id, FOLDERNAME_MRI.format(patient_id, EXT_NII))
path_mask_proc = Path(DIR_PROCESSED).joinpath(patient_id, FOLDERNAME_MASK.format(patient_id, EXT_NII))
import math
import skimage.transform as skTrans
img_og = nibabel.load(path_img_raw)
ratio = [spacing_old / SPACING_NEW[i] for i, spacing_old in enumerate(img_og.header['pixdim'][1:4])]
size_new = tuple(math.ceil(size_old * ratio[i]) - 1 for i, size_old in enumerate(img_og.header['dim'][1:4]))
img_og.header['pixdim'][1:4] = SPACING_NEW
img_resampled = skTrans.resize(img_og.get_fdata(), size_new, order=3, preserve_range=True)
img_resampled = img_resampled.astype(np.int16)
niiobj = nibabel.Nifti1Image(img_resampled, img_og.affine, img_og.header)
Path(path_img_proc).parent.mkdir(exist_ok=True, parents=True)
nibabel.save(niiobj, str(path_img_proc))
mask_og = nibabel.load(path_mask_raw)
mask_og.header['pixdim'][1:4] = SPACING_NEW
mask_resampled = skTrans.resize(mask_og.get_fdata(), size_new, order=0, preserve_range=True) # 0 = Nearest Neighbour
mask_resampled = mask_resampled.astype(np.uint8)
print (' - [{}] np.unique(mask_resampled): {}'.format(patient_id, np.unique(mask_resampled)))
niiobj = nibabel.Nifti1Image(mask_resampled, mask_og.affine, mask_og.header)
Path(path_mask_proc).parent.mkdir(exist_ok=True, parents=True)
nibabel.save(niiobj, str(path_mask_proc))
pbar_resample.update(1)
# Step 6 - Do image cropping analysis
if 0:
"""
Notes
- ProstateX-0241 has 31 slices (25 slices=prostate)
- ProstateX-0198 has 18 slices (11 slices=prostate)
- Result: (200, 200, 28) --> [100,100, 28] --> [50,50,14] --> [25,25,7]
"""
prostate_slice_count = []
prostate_voxel_width = []
prostate_voxel_breadth = []
import matplotlib.pyplot as plt
with tqdm.tqdm(total=len(list(Path(DIR_PROCESSED).glob('*'))), desc=' - [ProstateX Cropping] ') as pbar_cropping:
for path_patient in Path(DIR_PROCESSED).iterdir():
if Path(path_patient).is_dir():
patient_id = Path(path_patient).parts[-1]
path_mask_proc = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII))
path_img_proc = Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII))
# img = nibabel.load(path_img_proc)
# img_array = img.get_fdata()
mask = nibabel.load(path_mask_proc)
mask_array = mask.get_fdata()
prostate_idxs = np.argwhere(mask_array > 0)
prostate_slice_count.append(np.max(prostate_idxs[:,2]) - np.min(prostate_idxs[:,2]))
prostate_voxel_width.append(np.max(prostate_idxs[:,1]) - np.min(prostate_idxs[:,1]))
prostate_voxel_breadth.append(np.max(prostate_idxs[:,0]) - np.min(prostate_idxs[:,0]))
print (' - [{}] mask: {} (height,width,breadth of prostate=[{},{},{}])'.format(patient_id, mask_array.shape, prostate_slice_count[-1], prostate_voxel_width[-1], prostate_voxel_breadth[-1]))
if len(prostate_slice_count):
f,axarr = plt.subplots(1,4)
axarr[0].hist(prostate_slice_count, bins=10); axarr[0].set_title('Height')
axarr[1].hist(prostate_voxel_width, bins=10); axarr[1].set_title('Width')
axarr[2].hist(prostate_voxel_breadth, bins=10); axarr[2].set_title('Breadth')
axarr[3].imshow(mask_array[:,:,prostate_slice_count[-1]//2])
plt.savefig('prostatex_prostatesize.png', bbox_inches='tight')
plt.show()
# Step 7 - Do cropping, padding and save in DIR_PROCESSED2
# Highly custom logic, please change values with caution
if 1:
VOXELS_X = 100
VOXELS_Y = 100
VOXELS_Z_MAX = 28
mri_maxvals = []
with tqdm.tqdm(total=len(list(Path(DIR_PROCESSED).glob('*'))), desc=' - [ProstateX Padding] ') as pbar_cropping:
for path_patient in Path(DIR_PROCESSED).iterdir():
if Path(path_patient).is_dir():
# Step 7.1 - Get paths
patient_id = Path(path_patient).parts[-1]
path_mask_proc = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII))
path_mri_proc = Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII))
# Step 7.1 - Get mask (crop and pad)
mri = nibabel.load(path_mri_proc)
mri_array = mri.get_fdata()
mri_maxval = np.max(mri_array)
mri_maxvals.append(mri_maxval)
if 0:
import matplotlib.pyplot as plt
f,axarr = plt.subplots(1,2); axarr[0].imshow(mri_array[:,:,18], cmap='gray'); axarr[1].imshow(hist_scaled(mri_array, bins=100)[:,:,18], cmap='gray', vmin=0., vmax=1.); plt.suptitle('Bins=100');plt.show(block=False)
f,axarr = plt.subplots(1,2); axarr[0].imshow(mri_array[:,:,18], cmap='gray'); axarr[1].imshow(hist_scaled(mri_array, bins=1000)[:,:,18], cmap='gray', vmin=0., vmax=1.); plt.suptitle('Bins=1000');plt.show(block=False)
f,axarr = plt.subplots(1,2); axarr[0].imshow(mri_array[:,:,18], cmap='gray'); axarr[1].imshow(hist_scaled(mri_array, bins=10000)[:,:,18], cmap='gray', vmin=0., vmax=1.); plt.suptitle('Bins=10000');plt.show(block=False)
pdb.set_trace()
mri_array = hist_scaled(mri_array, bins=10000)
mask = nibabel.load(path_mask_proc)
mask_array = mask.get_fdata()
mask_idxs = np.argwhere(mask_array > 0)
mask_midpt = np.array([np.mean(mask_idxs[:,0]), np.mean(mask_idxs[:,1]), np.mean(mask_idxs[:,2])]).astype(np.uint8).tolist()
# Step 7.2 - Crop
mask_array_new = np.array(mask_array[mask_midpt[0] - VOXELS_X:mask_midpt[0] + VOXELS_X, mask_midpt[1] - VOXELS_Y:mask_midpt[1] + VOXELS_Y, :], copy=True)
mri_array_new = np.array(mri_array[mask_midpt[0] - VOXELS_X:mask_midpt[0] + VOXELS_X, mask_midpt[1] - VOXELS_Y:mask_midpt[1] + VOXELS_Y, :], copy=True)
# if patient_id == 'ProstateX-0309':
# pdb.set_trace()
# Step 7.3 - Pad (in z)
mask_array_height = mask_array_new.shape[2]
mask_array_diff = np.abs(mask_array_height - VOXELS_Z_MAX)
pad_left = mask_array_diff//2
pad_right = mask_array_diff-mask_array_diff//2
if mask_array_height > VOXELS_Z_MAX:
mask_array_new = mask_array_new[:,:,pad_left:-pad_right]
mri_array_new = mri_array_new[:,:,pad_left:-pad_right]
else:
if mask_array_diff:
mask_array_new = np.pad(mask_array_new, (pad_left, pad_right), 'constant')
mask_array_new = mask_array_new[pad_left:-pad_right, pad_left:-pad_right, :]
mri_array_new = np.pad(mri_array_new, (pad_left, pad_right), 'constant')
mri_array_new = mri_array_new[pad_left:-pad_right, pad_left:-pad_right, :]
print (' - [crop/pad][{}] mask_array: {} || mask_array_new: {}'.format(patient_id, mask_array.shape, mask_array_new.shape))
print (' - [crop/pad][{}] mri_array : {} || mri_array_new : {} || maxval: {}'.format(patient_id, mri_array.shape, mri_array_new.shape, mri_maxval))
# Step 7.4 - Save to disk
path_mask_proc2 = Path(DIR_PROCESSED2).joinpath(patient_id, FOLDERNAME_MASK.format(patient_id, EXT_NII))
Path(path_mask_proc2).parent.mkdir(exist_ok=True, parents=True)
niiobj = nibabel.Nifti1Image(mask_array_new, mask.affine, mask.header)
nibabel.save(niiobj, str(path_mask_proc2))
path_mri_proc2 = Path(DIR_PROCESSED2).joinpath(patient_id, FOLDERNAME_MRI.format(patient_id, EXT_NII))
Path(path_mri_proc2).parent.mkdir(exist_ok=True, parents=True)
niiobj = nibabel.Nifti1Image(mri_array_new, mri.affine, mri.header)
nibabel.save(niiobj, str(path_mri_proc2))
if len(mri_maxvals):
import matplotlib.pyplot as plt
plt.hist(mri_maxvals)
plt.title('ProstateX: Max Intensity Vals \n Histogram showing the need for intensity transformations in MRI')
plt.show()
pdb.set_trace()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment