|
# Inspired by https://github.com/jzagoli/ProstateX2nii. Modified by Prerak Mody |
|
# This processes this dataset --> https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=61080779#61080779ccf02ddde6884e4ba1ec73d08be362c6 |
|
# Alternatively, one could also look here --> https://github.com/rcuocolo/PROSTATEx_masks/tree/master/Files/prostate for this dataset (https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=23691656) |
|
|
|
import pdb |
|
import tqdm # pip install tqdm |
|
import urllib |
|
import zipfile |
|
import nibabel # pip install nibabel |
|
import pydicom # pip install pydicom |
|
import skimage |
|
import traceback |
|
import subprocess |
|
import numpy as np |
|
import urllib.request |
|
import skimage.transform |
|
from pathlib import Path |
|
import matplotlib.pyplot as plt |
|
|
|
DIR_MAIN = Path(__file__).parent.absolute() |
|
DIRNAME_RAW = 'raw' |
|
DIRNAME_PROCESSED = 'processed' |
|
DIRNAME_PROCESSED2 = 'processed2' |
|
DIRNAME_TMP = '_tmp' |
|
EXT_ZIP = '.zip' |
|
EXT_NII = '.nii' |
|
EXT_NRRD = '.nrrd' |
|
|
|
FOLDERNAME_MRI = '{}_mri{}' |
|
FOLDERNAME_MASK = '{}_mask{}' |
|
|
|
def read_zip(filepath_zip, filepath_output=None, leave=False): |
|
|
|
# Step 0 - Init |
|
if Path(filepath_zip).exists(): |
|
if filepath_output is None: |
|
filepath_zip_parts = list(Path(filepath_zip).parts) |
|
filepath_zip_name = filepath_zip_parts[-1].split(EXT_ZIP)[0] |
|
filepath_zip_parts[-1] = filepath_zip_name |
|
filepath_output = Path(*filepath_zip_parts) |
|
|
|
zip_fp = zipfile.ZipFile(filepath_zip, 'r') |
|
zip_fp_members = zip_fp.namelist() |
|
with tqdm.tqdm(total=len(zip_fp_members), desc=' - [Unzip] ' + str(filepath_zip.parts[-1]), leave=leave) as pbar_zip: |
|
for member in zip_fp_members: |
|
zip_fp.extract(member, filepath_output) |
|
pbar_zip.update(1) |
|
|
|
return filepath_output |
|
else: |
|
print (' - [ERROR][utils.read_zip()] Path does not exist: ', filepath_zip) |
|
return None |
|
|
|
class TCIAClient: |
|
""" |
|
- This has now been DEPRECATED for the NBIA-API: https://wiki.cancerimagingarchive.net/display/Public/NBIA+Search+with+Authentication+REST+API+Guide |
|
- Ref: https://wiki.cancerimagingarchive.net/display/Public/TCIA+Programmatic+Interface+%28REST+API%29+Usage+Guide |
|
- Ref: https://github.com/TCIA-Community/TCIA-API-SDK/tree/master/tcia-rest-client-python/src |
|
""" |
|
GET_IMAGE = "getImage" |
|
GET_MANUFACTURER_VALUES = "getManufacturerValues" |
|
GET_MODALITY_VALUES = "getModalityValues" |
|
GET_COLLECTION_VALUES = "getCollectionValues" |
|
GET_BODY_PART_VALUES = "getBodyPartValues" |
|
GET_PATIENT_STUDY = "getPatientStudy" |
|
GET_SERIES = "getSeries" |
|
GET_PATIENT = "getPatient" |
|
GET_SERIES_SIZE = "getSeriesSize" |
|
CONTENTS_BY_NAME = "ContentsByName" |
|
|
|
def __init__(self, baseUrl, resource): |
|
self.baseUrl = baseUrl + "/" + resource |
|
self.STATUS_OK = 200 |
|
self.DECODER = 'utf-8' |
|
|
|
def execute(self, url, queryParameters={}, verbose=False): |
|
queryParameters = dict((k, v) for k, v in queryParameters.items() if v) |
|
queryString = "?%s" % urllib.parse.urlencode(queryParameters) |
|
requestUrl = url + queryString |
|
if verbose: |
|
print (' - [execute()] URL: ', requestUrl) |
|
request = urllib.request.Request(url=requestUrl, headers={}) |
|
resp = urllib.request.urlopen(request) |
|
return resp |
|
|
|
def read_response(self, resp): |
|
if resp.status == self.STATUS_OK: |
|
return eval(resp.read().decode(self.DECODER)) |
|
else: |
|
return None |
|
|
|
def get_patient(self,collection = None , outputFormat = "json" ): |
|
serviceUrl = self.baseUrl + "/query/" + self.GET_PATIENT |
|
queryParameters = {"Collection" : collection , "format" : outputFormat } |
|
resp = self.execute(serviceUrl , queryParameters) |
|
return resp |
|
|
|
def get_patient_study(self,collection = None , patientId = None , studyInstanceUid = None , outputFormat = "json" ): |
|
serviceUrl = self.baseUrl + "/query/" + self.GET_PATIENT_STUDY |
|
queryParameters = {"Collection" : collection , "PatientID" : patientId , "StudyInstanceUID" : studyInstanceUid , "format" : outputFormat } |
|
resp = self.execute(serviceUrl , queryParameters) |
|
return resp |
|
|
|
def get_series(self, collection=None, patientId=None, modality=None, studyInstanceUid=None, seriesInstanceUid=None, outputFormat = "json" ): |
|
serviceUrl = self.baseUrl + "/query/" + self.GET_SERIES |
|
queryParameters = {"Collection" : collection, "patientId": patientId, "StudyInstanceUID" : studyInstanceUid, 'SeriesInstanceUID': seriesInstanceUid,"Modality" : modality, "format" : outputFormat } |
|
resp = self.execute(serviceUrl , queryParameters) |
|
return resp |
|
|
|
def get_image(self , seriesInstanceUid , downloadPath, zipFileName): |
|
try: |
|
serviceUrl = self.baseUrl + "/query/" + self.GET_IMAGE |
|
queryParameters = { "SeriesInstanceUID" : seriesInstanceUid } |
|
resp = self.execute(serviceUrl, queryParameters) |
|
filepath = Path(downloadPath).joinpath(zipFileName) |
|
data = resp.read() |
|
with open(filepath, 'wb') as fp: |
|
fp.write(data) |
|
|
|
tmp = list(Path(filepath).parts) |
|
tmp[-1] = tmp[-1].split('.zip')[0] |
|
filepath_output = Path(*tmp) |
|
read_zip(filepath, filepath_output, leave=False) |
|
|
|
Path(filepath).unlink() |
|
|
|
except: |
|
traceback.print_exc() |
|
|
|
def download_dcm2niix(DIR_TMP): |
|
# Ref: https://github.com/rordenlab/dcm2niix/releases/tag/v1.0.20220720 |
|
|
|
try: |
|
import sys |
|
path_file = Path(DIR_TMP).joinpath('dcm2niix') |
|
if sys.platform == 'win32': |
|
if not Path(path_file).exists(): |
|
path_zip = Path(DIR_TMP).joinpath('dcm2niix_win.zip') |
|
urllib.request.urlretrieve(url='https://github.com/rordenlab/dcm2niix/releases/download/v1.0.20220720/dcm2niix_win.zip', filename=str(path_zip)) |
|
read_zip(path_zip, path_file) |
|
Path(path_zip).unlink() |
|
|
|
|
|
|
|
elif sys.platform == 'linux': |
|
pass |
|
|
|
return Path(path_file).joinpath('dcm2niix.exe') |
|
|
|
except: |
|
traceback.print_exc() |
|
pdb.set_trace() |
|
return None |
|
|
|
def freqhist_bins(array, bins=100): |
|
""" |
|
A numpy based function to split the range of pixel values into groups, such that each group has around the same number of pixels |
|
|
|
Ref: https://github.com/AIEMMU/MRI_Prostate/blob/master/Pre%20processing%20Promise12.ipynb |
|
Ref: https://github.com/fastai/fastai/blob/master/fastai/medical/imaging.py |
|
""" |
|
|
|
imsd = np.sort(array.flatten()) |
|
t = np.array([0.001]) |
|
t = np.append(t, np.arange(bins)/bins+(1/2/bins)) |
|
t = np.append(t, 0.999) |
|
t = (len(imsd)*t+0.5).astype(np.int) |
|
return np.unique(imsd[t]) |
|
|
|
def hist_scaled(array, brks=None, bins=100): |
|
""" |
|
Scales a tensor using `freqhist_bins` to values between 0 and 1 |
|
|
|
Ref: https://github.com/AIEMMU/MRI_Prostate/blob/master/Pre%20processing%20Promise12.ipynb |
|
Ref: https://github.com/fastai/fastai/blob/master/fastai/medical/imaging.py |
|
""" |
|
|
|
if brks is None: |
|
brks = freqhist_bins(array, bins=bins) |
|
ys = np.linspace(0., 1., len(brks)) |
|
x = array.flatten() |
|
x = np.interp(x, brks, ys) |
|
|
|
if 0: |
|
plt.plot(brks, ys, color='blue', label='Non-Normalized', marker='x', markersize=2) |
|
plt.xlim([0,np.max(array)]) |
|
brks2 = freqhist_bins(x, bins=bins)*np.max(array) |
|
ys2 = np.linspace(0., 1., len(brks2)) |
|
plt.plot(brks2, ys2, color='orange', label='Histogram Equalization') |
|
plt.legend() |
|
plt.show() |
|
|
|
array_tmp = np.reshape(x, array.shape) |
|
array_tmp = np.clip(array_tmp, 0.,1.) |
|
return array_tmp |
|
|
|
if __name__ == "__main__": |
|
|
|
# Step 0.1 - Init TCIA client |
|
baseUrl = 'https://services.cancerimagingarchive.net/services/v4' |
|
resource = 'TCIA' |
|
collection = 'PROSTATEx' |
|
client = TCIAClient(baseUrl=baseUrl, resource=resource) |
|
|
|
# Step 0.2 - Init seriesids from https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=61080779#61080779ccf02ddde6884e4ba1ec73d08be362c6 |
|
seriesids_mask = ['1.2.276.0.7230010.3.1.3.1070885483.26120.1599221759.302','1.2.276.0.7230010.3.1.3.1070885483.24432.1599221753.549','1.2.276.0.7230010.3.1.3.1070885483.5008.1599221756.491','1.2.276.0.7230010.3.1.3.1070885483.27140.1599221767.245','1.2.276.0.7230010.3.1.3.1070885483.27624.1599221764.667','1.2.276.0.7230010.3.1.3.1070885483.16240.1599221761.962','1.2.276.0.7230010.3.1.3.1070885483.8220.1599221770.50','1.2.276.0.7230010.3.1.3.1070885483.9460.1599221773.50','1.2.276.0.7230010.3.1.3.1070885483.24532.1599221775.720','1.2.276.0.7230010.3.1.3.1070885483.24608.1599221781.105','1.2.276.0.7230010.3.1.3.1070885483.24736.1599221778.322','1.2.276.0.7230010.3.1.3.1070885483.22584.1599221783.667','1.2.276.0.7230010.3.1.3.1070885483.24012.1599221786.269','1.2.276.0.7230010.3.1.3.1070885483.21792.1599221788.891','1.2.276.0.7230010.3.1.3.1070885483.5464.1599221791.439','1.2.276.0.7230010.3.1.3.1070885483.5672.1599221794.52','1.2.276.0.7230010.3.1.3.1070885483.16948.1599221799.294','1.2.276.0.7230010.3.1.3.1070885483.16324.1599221796.636','1.2.276.0.7230010.3.1.3.1070885483.26880.1599221801.878','1.2.276.0.7230010.3.1.3.1070885483.26756.1599221804.455','1.2.276.0.7230010.3.1.3.1070885483.21116.1599221807.124','1.2.276.0.7230010.3.1.3.1070885483.23772.1599221809.995','1.2.276.0.7230010.3.1.3.1070885483.23924.1599221812.620','1.2.276.0.7230010.3.1.3.1070885483.24056.1599221815.833','1.2.276.0.7230010.3.1.3.1070885483.23100.1599221818.605','1.2.276.0.7230010.3.1.3.1070885483.4648.1599221821.338','1.2.276.0.7230010.3.1.3.1070885483.5584.1599221824.60','1.2.276.0.7230010.3.1.3.1070885483.17604.1599221827.76','1.2.276.0.7230010.3.1.3.1070885483.23788.1599221829.820','1.2.276.0.7230010.3.1.3.1070885483.26076.1599221832.601','1.2.276.0.7230010.3.1.3.1070885483.22052.1599221835.360','1.2.276.0.7230010.3.1.3.1070885483.26444.1599221837.990','1.2.276.0.7230010.3.1.3.1070885483.27528.1599221841.604','1.2.276.0.7230010.3.1.3.1070885483.3400.1599221844.204','1.2.276.0.7230010.3.1.3.1070885483.15416.1599221846.853','1.2.276.0.7230010.3.1.3.1070885483.22008.1599221849.372','1.2.276.0.7230010.3.1.3.1070885483.16016.1599221851.990','1.2.276.0.7230010.3.1.3.1070885483.8088.1599221854.809','1.2.276.0.7230010.3.1.3.1070885483.5396.1599221858.335','1.2.276.0.7230010.3.1.3.1070885483.25220.1599221863.968','1.2.276.0.7230010.3.1.3.1070885483.5464.1599221860.888','1.2.276.0.7230010.3.1.3.1070885483.7992.1599221866.575','1.2.276.0.7230010.3.1.3.1070885483.16948.1599221869.130','1.2.276.0.7230010.3.1.3.1070885483.24588.1599221871.775','1.2.276.0.7230010.3.1.3.1070885483.24928.1599221874.342','1.2.276.0.7230010.3.1.3.1070885483.14224.1599221879.641','1.2.276.0.7230010.3.1.3.1070885483.25300.1599221882.469','1.2.276.0.7230010.3.1.3.1070885483.21512.1599221877.6','1.2.276.0.7230010.3.1.3.1070885483.5808.1599221885.265','1.2.276.0.7230010.3.1.3.1070885483.22380.1599221887.327','1.2.276.0.7230010.3.1.3.1070885483.23580.1599221890.67','1.2.276.0.7230010.3.1.3.1070885483.15904.1599221892.237','1.2.276.0.7230010.3.1.3.1070885483.4220.1599221894.379','1.2.276.0.7230010.3.1.3.1070885483.25552.1599221897.694','1.2.276.0.7230010.3.1.3.1070885483.26088.1599221900.380','1.2.276.0.7230010.3.1.3.1070885483.5296.1599221903.189','1.2.276.0.7230010.3.1.3.1070885483.6284.1599221908.467','1.2.276.0.7230010.3.1.3.1070885483.17416.1599221911.141','1.2.276.0.7230010.3.1.3.1070885483.1780.1599221905.866','1.2.276.0.7230010.3.1.3.1070885483.24236.1599221913.873','1.2.276.0.7230010.3.1.3.1070885483.24632.1599221916.651','1.2.276.0.7230010.3.1.3.1070885483.25044.1599221924.876','1.2.276.0.7230010.3.1.3.1070885483.8668.1599221919.430','1.2.276.0.7230010.3.1.3.1070885483.24644.1599221922.113','1.2.276.0.7230010.3.1.3.1070885483.27560.1599221927.547','1.2.276.0.7230010.3.1.3.1070885483.25100.1599221930.643'] |
|
seriesids_mri = ['1.3.6.1.4.1.14519.5.2.1.7310.5101.107276353018221365492863559094','1.3.6.1.4.1.14519.5.2.1.7310.5101.127076787998581344993055912043','1.3.6.1.4.1.14519.5.2.1.7310.5101.128915395083536972793214199559','1.3.6.1.4.1.14519.5.2.1.7310.5101.156355474235536930096765953749','1.3.6.1.4.1.14519.5.2.1.7310.5101.182666056353308077379356745296','1.3.6.1.4.1.14519.5.2.1.7310.5101.217705819956691748023253683197','1.3.6.1.4.1.14519.5.2.1.7310.5101.233894782007809884236855995183','1.3.6.1.4.1.14519.5.2.1.7310.5101.234424615181029312892423746307','1.3.6.1.4.1.14519.5.2.1.7310.5101.259467270859277582122403078829','1.3.6.1.4.1.14519.5.2.1.7310.5101.260386623049829078909727548239','1.3.6.1.4.1.14519.5.2.1.7310.5101.306748150836286513093400468929','1.3.6.1.4.1.14519.5.2.1.7310.5101.318856235542799349557600990165','1.3.6.1.4.1.14519.5.2.1.7310.5101.337896560664468718350275750005','1.3.6.1.4.1.14519.5.2.1.7310.5101.568951414433420775694157731047','1.3.6.1.4.1.14519.5.2.1.7310.5101.877607202381357458819664904037','1.3.6.1.4.1.14519.5.2.1.7310.5101.898676446450078900613536879876','1.3.6.1.4.1.14519.5.2.1.7311.5101.101130890931274399577478152963','1.3.6.1.4.1.14519.5.2.1.7311.5101.101130934168942593154270621032','1.3.6.1.4.1.14519.5.2.1.7311.5101.105766764724449379107432362750','1.3.6.1.4.1.14519.5.2.1.7311.5101.109832262779867794170417053903','1.3.6.1.4.1.14519.5.2.1.7311.5101.119868581724901379856626647643','1.3.6.1.4.1.14519.5.2.1.7311.5101.124933244802704100684011471210','1.3.6.1.4.1.14519.5.2.1.7311.5101.126808882287123482193592987944','1.3.6.1.4.1.14519.5.2.1.7311.5101.146422450520894659910416624671','1.3.6.1.4.1.14519.5.2.1.7311.5101.149728379277305470281540696443','1.3.6.1.4.1.14519.5.2.1.7311.5101.155875154686610653777230856177','1.3.6.1.4.1.14519.5.2.1.7311.5101.156910737340231640626298466189','1.3.6.1.4.1.14519.5.2.1.7311.5101.160225377533762960695041069832','1.3.6.1.4.1.14519.5.2.1.7311.5101.165636727522505811947315188478','1.3.6.1.4.1.14519.5.2.1.7311.5101.166265209513832781969059787909','1.3.6.1.4.1.14519.5.2.1.7311.5101.175570448971296597162147241386','1.3.6.1.4.1.14519.5.2.1.7311.5101.178750617003896154912438088527','1.3.6.1.4.1.14519.5.2.1.7311.5101.180041601751316950966041961765','1.3.6.1.4.1.14519.5.2.1.7311.5101.180650601474055581355948988643','1.3.6.1.4.1.14519.5.2.1.7311.5101.186553403053805718209125341361','1.3.6.1.4.1.14519.5.2.1.7311.5101.196511587017551386049158542619','1.3.6.1.4.1.14519.5.2.1.7311.5101.206828891270520544417996275680','1.3.6.1.4.1.14519.5.2.1.7311.5101.226939533786316417608293472819','1.3.6.1.4.1.14519.5.2.1.7311.5101.236967836312224538274213979128','1.3.6.1.4.1.14519.5.2.1.7311.5101.242424835264181527414562151046','1.3.6.1.4.1.14519.5.2.1.7311.5101.245315537897059083725251245833','1.3.6.1.4.1.14519.5.2.1.7311.5101.248203933751895346486742820088','1.3.6.1.4.1.14519.5.2.1.7311.5101.252185783543855817242399041825','1.3.6.1.4.1.14519.5.2.1.7311.5101.252385246988168654915352850239','1.3.6.1.4.1.14519.5.2.1.7311.5101.257928599770116866513356683535','1.3.6.1.4.1.14519.5.2.1.7311.5101.258067469374761412596368241889','1.3.6.1.4.1.14519.5.2.1.7311.5101.262719318163046156031519223454','1.3.6.1.4.1.14519.5.2.1.7311.5101.266720221212035669954581062639','1.3.6.1.4.1.14519.5.2.1.7311.5101.271991298059338527584681788043','1.3.6.1.4.1.14519.5.2.1.7311.5101.282008038881694967871335639325','1.3.6.1.4.1.14519.5.2.1.7311.5101.287403883614425048490255475041','1.3.6.1.4.1.14519.5.2.1.7311.5101.289711251504454079274667690291','1.3.6.1.4.1.14519.5.2.1.7311.5101.304058605325520782900293953679','1.3.6.1.4.1.14519.5.2.1.7311.5101.312442050391027773578890036831','1.3.6.1.4.1.14519.5.2.1.7311.5101.318660391556220519803669869815','1.3.6.1.4.1.14519.5.2.1.7311.5101.322192860849906453257530108507','1.3.6.1.4.1.14519.5.2.1.7311.5101.333332554059735467244825907777','1.3.6.1.4.1.14519.5.2.1.7311.5101.334326985258868181144176796864','1.3.6.1.4.1.14519.5.2.1.7311.5101.357054868848226402548612686613','1.3.6.1.4.1.14519.5.2.1.7311.5101.460453169764361555745878708935','1.3.6.1.4.1.14519.5.2.1.7311.5101.472900684997509231929411950536','1.3.6.1.4.1.14519.5.2.1.7311.5101.666342438883185464672112795625','1.3.6.1.4.1.14519.5.2.1.7311.5101.785303327607349403635620326985','1.3.6.1.4.1.14519.5.2.1.7311.5101.813054628707353396925223754592','1.3.6.1.4.1.14519.5.2.1.7311.5101.905763996607212880296069425861','1.3.6.1.4.1.14519.5.2.1.7311.5101.928012760939767746081753156593'] |
|
|
|
# Step 0.3 - Make folders |
|
DIR_RAW = Path(DIR_MAIN).joinpath(DIRNAME_RAW) |
|
DIR_PROCESSED = Path(DIR_MAIN).joinpath(DIRNAME_PROCESSED) |
|
DIR_PROCESSED2 = Path(DIR_MAIN).joinpath(DIRNAME_PROCESSED2) |
|
DIR_TMP = Path(DIR_MAIN).joinpath(DIRNAME_TMP) |
|
Path(DIR_RAW).mkdir(exist_ok=True, parents=True) |
|
Path(DIR_PROCESSED).mkdir(exist_ok=True, parents=True) |
|
Path(DIR_PROCESSED2).mkdir(exist_ok=True, parents=True) |
|
Path(DIR_TMP).mkdir(exist_ok=True, parents=True) |
|
|
|
# Step 1 - Loop over all seriesids_mask |
|
if 1: |
|
print ('') |
|
with tqdm.tqdm(total=len(seriesids_mask), desc=' - [MASKS]') as pbar_mask: |
|
for id_, seriesid_mask in enumerate(seriesids_mask): |
|
|
|
try: |
|
resp_series = client.get_series(collection=collection, seriesInstanceUid=seriesid_mask) |
|
if resp_series.status == 200: |
|
obj_series = client.read_response(resp_series) |
|
if len(obj_series): |
|
patient_id = obj_series[0]['PatientID'] |
|
downloadPath = Path(DIR_RAW).joinpath(patient_id) |
|
Path(downloadPath).mkdir(exist_ok=True, parents=True) |
|
client.get_image(seriesInstanceUid=seriesid_mask, downloadPath=str(downloadPath), zipFileName=FOLDERNAME_MASK.format(patient_id, EXT_ZIP)) |
|
else: |
|
print (' - [ERROR] No patient info for seriesid_mask: ', seriesid_mask) |
|
else: |
|
print (' - [ERROR] Could not read seriesid_mask: ', seriesid_mask) |
|
|
|
except: |
|
print (' - [ERROR] Some error with seriesid_mask', seriesid_mask) |
|
traceback.print_exc() |
|
pdb.set_trace() |
|
|
|
pbar_mask.update(1) |
|
|
|
# Step 2 - Loop over all seriesids_mri |
|
if 1: |
|
print ('') |
|
with tqdm.tqdm(total=len(seriesids_mri), desc='\n - [MRI]') as pbar_mri: |
|
for id_, seriesid_mri in enumerate(seriesids_mri): |
|
|
|
try: |
|
resp_series = client.get_series(collection=collection, seriesInstanceUid=seriesid_mri) |
|
if resp_series.status == 200: |
|
obj_series = client.read_response(resp_series) |
|
if len(obj_series): |
|
patient_id = obj_series[0]['PatientID'] |
|
downloadPath = Path(DIR_RAW).joinpath(patient_id) |
|
Path(downloadPath).mkdir(exist_ok=True, parents=True) |
|
client.get_image(seriesInstanceUid=seriesid_mri, downloadPath=str(downloadPath), zipFileName=FOLDERNAME_MRI.format(patient_id, EXT_ZIP)) |
|
else: |
|
print (' - [ERROR] No patient info for seriesid_mri: ', seriesid_mri) |
|
else: |
|
print (' - [ERROR] Could not read seriesid_mri: ', seriesid_mri) |
|
|
|
except: |
|
print (' - [ERROR] Some error with seriesid_mri', seriesid_mri) |
|
traceback.print_exc() |
|
pdb.set_trace() |
|
|
|
pbar_mri.update(1) |
|
|
|
# Step 3 - Convert to NIFTI using dcm2niix |
|
if 1: |
|
print ('') |
|
path_dcm2niix = download_dcm2niix(DIR_TMP) |
|
if path_dcm2niix is not None: |
|
with tqdm.tqdm(total=len(list(Path(DIR_RAW).glob('*'))), desc=' - [Convert to volume] ') as pbar_convert: |
|
for path_patient in Path(DIR_RAW).iterdir(): |
|
if Path(path_patient).is_dir(): |
|
|
|
# Step 3.1 - Init vars |
|
patient_id = Path(path_patient).parts[-1] |
|
foldername_mri = FOLDERNAME_MRI.format(patient_id, '') |
|
path_mri = Path(path_patient).joinpath(foldername_mri) |
|
foldername_mask = FOLDERNAME_MASK.format(patient_id, '') |
|
path_masks = list(Path(path_patient).joinpath(foldername_mask).glob('*')) # there should only be 1, but I am still looping over it |
|
|
|
# Step 3.2 - Volumize MRI |
|
if 1: |
|
try: |
|
launch_mri = [str(path_dcm2niix), "-e", "n", "-b", "n", "-v", "0" , "-o", str(path_patient), "-f", foldername_mri, str(path_mri)] |
|
# e.g. .\dcm2niix.exe -e n -b n -v 0 -o . -f ProstateX-0323_mri .\ProstateX-0323_mri |
|
_ = subprocess.run(launch_mri, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT) |
|
|
|
# parameters for conversion. |
|
# -e : export as NRRD (y) or MGH (o) instead of NIfTI (y/n/o, default n) |
|
# -o : output directory (omit to save to input folder) |
|
# -b : BIDS sidecar (y/n/o [o=only: no NIfTI], default y) |
|
# -f : filename (%a=antenna (coil) name, %b=basename, %c=comments, %d=description, %e=echo number, %f=folder name, |
|
|
|
except: |
|
print (' - [ERROR][MRI] patient_id: ', patient_id) |
|
traceback.print_exc() |
|
pdb.set_trace() |
|
|
|
# Step 3.3 - Volumize MASK |
|
if 1: |
|
try: |
|
|
|
for path_dcm in path_masks: |
|
|
|
# Step 3.3.1 - Read data |
|
ds_mask = pydicom.dcmread(str(path_dcm)) |
|
# len(ds_mask.ReferencedSeriesSequence[0].ReferencedInstanceSequence) == len(vol_mri) |
|
# e.g. --> ds_mask.ReferencedSeriesSequence[0].ReferencedInstanceSequence[0].ReferencedSOPInstanceUID |
|
# len(ds_mask.PerFrameFunctionalGroupsSequence) == len(vol_mask) |
|
|
|
vol_mask = ds_mask.pixel_array # [D,H,W] |
|
nii_mri = nibabel.load(str(Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII)))) |
|
vol_mri = nii_mri.get_fdata() # [H,W,D] e.g. [384,384,19] |
|
if 1: |
|
print ('\n - [convert to nifti][mask] pixelspacing: {} | slicespacing: {}'.format( |
|
ds_mask.SharedFunctionalGroupsSequence[0].PixelMeasuresSequence[0].PixelSpacing |
|
, ds_mask.SharedFunctionalGroupsSequence[0].PixelMeasuresSequence[0].SliceThickness)) |
|
print (' - [convert to nifti][vol] pixelspacing: {} | slicespacing: {}'.format(nii_mri.header['pixdim'][1:3], nii_mri.header['pixdim'][3] )) |
|
|
|
# Step 3.3.2 - Downsample segmentation |
|
if 0: |
|
end = vol_mask.shape[0] # .shape=[D,H,W] |
|
step = end // vol_mri.shape[2] |
|
vol_mask = vol_mask[0:end:step, ...] |
|
diff = vol_mask.shape[0] - vol_mri.shape[2] |
|
if diff != 0: |
|
vol_mask = vol_mask[:-diff, ...] |
|
|
|
if vol_mri.shape != vol_mask.shape: |
|
# order = 0 is nearest neighbour interpolation |
|
vol_mask = skimage.transform.resize(vol_mask, vol_mri.shape, preserve_range=True, order=0) |
|
|
|
else: |
|
idxs = [idx for idx, item in enumerate(ds_mask.PerFrameFunctionalGroupsSequence) if len(ds_mask.PerFrameFunctionalGroupsSequence[idx].DerivationImageSequence)] |
|
vol_mask = vol_mask[idxs, :, :] |
|
|
|
vol_mask = np.rot90(vol_mask, k=-1, axes=(1, 2)) |
|
vol_mask = np.swapaxes(vol_mask, 0, 1) |
|
vol_mask = np.swapaxes(vol_mask, 1, 2) |
|
|
|
assert vol_mask.shape == vol_mri.shape |
|
|
|
# saving segmentations |
|
niiobj = nibabel.Nifti1Image(vol_mask, nii_mri.affine) |
|
path_mask = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII)) |
|
nibabel.save(niiobj, str(path_mask)) |
|
|
|
except: |
|
print (' - [ERROR][MASK] patient_id: ', patient_id) |
|
traceback.print_exc() |
|
pdb.set_trace() |
|
|
|
pbar_convert.update(1) |
|
|
|
# Step 4 - Do voxel spacing analysis |
|
if 0: |
|
spacings = [] |
|
with tqdm.tqdm(total=len(list(Path(DIR_RAW).glob('*'))), desc=' - [ProstateX Analysis] ') as pbar_analysis: |
|
for path_patient in Path(DIR_RAW).iterdir(): |
|
if Path(path_patient).is_dir(): |
|
patient_id = Path(path_patient).parts[-1] |
|
path_mask = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII)) |
|
path_raw = Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII)) |
|
|
|
nii_mri = nibabel.load(str(path_raw)) |
|
header_mri = nii_mri.header |
|
spacing_mri = list(nii_mri.header['pixdim'][1:4]) |
|
origin_mri = [nii_mri.header['qoffset_x'], nii_mri.header['qoffset_y'], nii_mri.header['qoffset_z'] ] |
|
# vol_mri = nii_mri.get_fdata() |
|
|
|
spacings.append(spacing_mri) |
|
|
|
pbar_analysis.update(1) |
|
|
|
if len(spacings): |
|
import matplotlib.pyplot as plt |
|
spacings = np.array(spacings) |
|
f,axarr = plt.subplots(1,3) |
|
axarr[0].hist(spacings[:,0], bins=10); axarr[0].set_title('Voxel Spacing (X)') |
|
axarr[1].hist(spacings[:,1], bins=10); axarr[1].set_title('Voxel Spacing (Y)') |
|
axarr[2].hist(spacings[:,2], bins=10); axarr[2].set_title('Voxel Spacing (Z)') |
|
plt.savefig('prostatex_spacings.png', bbox_inches='tight') |
|
plt.show() # resmaple every image to [0.5, 0.5, 3] as per this image |
|
|
|
# Step 5 - Do resampling to (0.5, 0.5, 3.0) |
|
if 1: |
|
SPACING_NEW = (0.5, 0.5, 3.0) |
|
import medloader.dataloader.utils as medUtils |
|
with tqdm.tqdm(total=len(list(Path(DIR_RAW).glob('*'))), desc=' - [ProstateX Analysis] ') as pbar_resample: |
|
for path_patient in Path(DIR_RAW).iterdir(): |
|
if Path(path_patient).is_dir(): |
|
patient_id = Path(path_patient).parts[-1] |
|
path_mask_raw = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII)) |
|
path_img_raw = Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII)) |
|
|
|
if 0: |
|
path_img_proc = Path(DIR_PROCESSED).joinpath(patient_id, FOLDERNAME_MRI.format(patient_id, EXT_NRRD)) |
|
path_mask_proc = Path(DIR_PROCESSED).joinpath(patient_id, FOLDERNAME_MASK.format(patient_id, EXT_NRRD)) |
|
nrrd_img = medUtils.resample_img(path_img=str(path_img_raw), path_new_img=str(path_img_proc), spacing=SPACING_NEW, verbose=True) |
|
nrrd_mask = medUtils.resample_mask(path_mask=str(path_mask_raw), path_new_mask=str(path_mask_proc), spacing=SPACING_NEW, size=nrrd_img.GetSize(), labels_allowed = [1], verbose=True) |
|
elif 1: |
|
|
|
path_img_proc = Path(DIR_PROCESSED).joinpath(patient_id, FOLDERNAME_MRI.format(patient_id, EXT_NII)) |
|
path_mask_proc = Path(DIR_PROCESSED).joinpath(patient_id, FOLDERNAME_MASK.format(patient_id, EXT_NII)) |
|
|
|
import math |
|
import skimage.transform as skTrans |
|
|
|
img_og = nibabel.load(path_img_raw) |
|
ratio = [spacing_old / SPACING_NEW[i] for i, spacing_old in enumerate(img_og.header['pixdim'][1:4])] |
|
size_new = tuple(math.ceil(size_old * ratio[i]) - 1 for i, size_old in enumerate(img_og.header['dim'][1:4])) |
|
img_og.header['pixdim'][1:4] = SPACING_NEW |
|
|
|
img_resampled = skTrans.resize(img_og.get_fdata(), size_new, order=3, preserve_range=True) |
|
img_resampled = img_resampled.astype(np.int16) |
|
niiobj = nibabel.Nifti1Image(img_resampled, img_og.affine, img_og.header) |
|
Path(path_img_proc).parent.mkdir(exist_ok=True, parents=True) |
|
nibabel.save(niiobj, str(path_img_proc)) |
|
|
|
mask_og = nibabel.load(path_mask_raw) |
|
mask_og.header['pixdim'][1:4] = SPACING_NEW |
|
|
|
mask_resampled = skTrans.resize(mask_og.get_fdata(), size_new, order=0, preserve_range=True) # 0 = Nearest Neighbour |
|
mask_resampled = mask_resampled.astype(np.uint8) |
|
print (' - [{}] np.unique(mask_resampled): {}'.format(patient_id, np.unique(mask_resampled))) |
|
niiobj = nibabel.Nifti1Image(mask_resampled, mask_og.affine, mask_og.header) |
|
Path(path_mask_proc).parent.mkdir(exist_ok=True, parents=True) |
|
nibabel.save(niiobj, str(path_mask_proc)) |
|
|
|
pbar_resample.update(1) |
|
|
|
# Step 6 - Do image cropping analysis |
|
if 0: |
|
""" |
|
Notes |
|
- ProstateX-0241 has 31 slices (25 slices=prostate) |
|
- ProstateX-0198 has 18 slices (11 slices=prostate) |
|
|
|
- Result: (200, 200, 28) --> [100,100, 28] --> [50,50,14] --> [25,25,7] |
|
""" |
|
prostate_slice_count = [] |
|
prostate_voxel_width = [] |
|
prostate_voxel_breadth = [] |
|
import matplotlib.pyplot as plt |
|
with tqdm.tqdm(total=len(list(Path(DIR_PROCESSED).glob('*'))), desc=' - [ProstateX Cropping] ') as pbar_cropping: |
|
for path_patient in Path(DIR_PROCESSED).iterdir(): |
|
if Path(path_patient).is_dir(): |
|
patient_id = Path(path_patient).parts[-1] |
|
path_mask_proc = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII)) |
|
path_img_proc = Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII)) |
|
|
|
# img = nibabel.load(path_img_proc) |
|
# img_array = img.get_fdata() |
|
|
|
mask = nibabel.load(path_mask_proc) |
|
mask_array = mask.get_fdata() |
|
prostate_idxs = np.argwhere(mask_array > 0) |
|
|
|
prostate_slice_count.append(np.max(prostate_idxs[:,2]) - np.min(prostate_idxs[:,2])) |
|
prostate_voxel_width.append(np.max(prostate_idxs[:,1]) - np.min(prostate_idxs[:,1])) |
|
prostate_voxel_breadth.append(np.max(prostate_idxs[:,0]) - np.min(prostate_idxs[:,0])) |
|
|
|
print (' - [{}] mask: {} (height,width,breadth of prostate=[{},{},{}])'.format(patient_id, mask_array.shape, prostate_slice_count[-1], prostate_voxel_width[-1], prostate_voxel_breadth[-1])) |
|
|
|
|
|
if len(prostate_slice_count): |
|
f,axarr = plt.subplots(1,4) |
|
axarr[0].hist(prostate_slice_count, bins=10); axarr[0].set_title('Height') |
|
axarr[1].hist(prostate_voxel_width, bins=10); axarr[1].set_title('Width') |
|
axarr[2].hist(prostate_voxel_breadth, bins=10); axarr[2].set_title('Breadth') |
|
axarr[3].imshow(mask_array[:,:,prostate_slice_count[-1]//2]) |
|
plt.savefig('prostatex_prostatesize.png', bbox_inches='tight') |
|
plt.show() |
|
|
|
# Step 7 - Do cropping, padding and save in DIR_PROCESSED2 |
|
# Highly custom logic, please change values with caution |
|
if 1: |
|
VOXELS_X = 100 |
|
VOXELS_Y = 100 |
|
VOXELS_Z_MAX = 28 |
|
mri_maxvals = [] |
|
with tqdm.tqdm(total=len(list(Path(DIR_PROCESSED).glob('*'))), desc=' - [ProstateX Padding] ') as pbar_cropping: |
|
for path_patient in Path(DIR_PROCESSED).iterdir(): |
|
if Path(path_patient).is_dir(): |
|
|
|
# Step 7.1 - Get paths |
|
patient_id = Path(path_patient).parts[-1] |
|
path_mask_proc = Path(path_patient).joinpath(FOLDERNAME_MASK.format(patient_id, EXT_NII)) |
|
path_mri_proc = Path(path_patient).joinpath(FOLDERNAME_MRI.format(patient_id, EXT_NII)) |
|
|
|
# Step 7.1 - Get mask (crop and pad) |
|
mri = nibabel.load(path_mri_proc) |
|
mri_array = mri.get_fdata() |
|
mri_maxval = np.max(mri_array) |
|
mri_maxvals.append(mri_maxval) |
|
if 0: |
|
import matplotlib.pyplot as plt |
|
f,axarr = plt.subplots(1,2); axarr[0].imshow(mri_array[:,:,18], cmap='gray'); axarr[1].imshow(hist_scaled(mri_array, bins=100)[:,:,18], cmap='gray', vmin=0., vmax=1.); plt.suptitle('Bins=100');plt.show(block=False) |
|
f,axarr = plt.subplots(1,2); axarr[0].imshow(mri_array[:,:,18], cmap='gray'); axarr[1].imshow(hist_scaled(mri_array, bins=1000)[:,:,18], cmap='gray', vmin=0., vmax=1.); plt.suptitle('Bins=1000');plt.show(block=False) |
|
f,axarr = plt.subplots(1,2); axarr[0].imshow(mri_array[:,:,18], cmap='gray'); axarr[1].imshow(hist_scaled(mri_array, bins=10000)[:,:,18], cmap='gray', vmin=0., vmax=1.); plt.suptitle('Bins=10000');plt.show(block=False) |
|
pdb.set_trace() |
|
|
|
mri_array = hist_scaled(mri_array, bins=10000) |
|
mask = nibabel.load(path_mask_proc) |
|
mask_array = mask.get_fdata() |
|
mask_idxs = np.argwhere(mask_array > 0) |
|
mask_midpt = np.array([np.mean(mask_idxs[:,0]), np.mean(mask_idxs[:,1]), np.mean(mask_idxs[:,2])]).astype(np.uint8).tolist() |
|
|
|
# Step 7.2 - Crop |
|
mask_array_new = np.array(mask_array[mask_midpt[0] - VOXELS_X:mask_midpt[0] + VOXELS_X, mask_midpt[1] - VOXELS_Y:mask_midpt[1] + VOXELS_Y, :], copy=True) |
|
mri_array_new = np.array(mri_array[mask_midpt[0] - VOXELS_X:mask_midpt[0] + VOXELS_X, mask_midpt[1] - VOXELS_Y:mask_midpt[1] + VOXELS_Y, :], copy=True) |
|
|
|
# if patient_id == 'ProstateX-0309': |
|
# pdb.set_trace() |
|
|
|
# Step 7.3 - Pad (in z) |
|
mask_array_height = mask_array_new.shape[2] |
|
mask_array_diff = np.abs(mask_array_height - VOXELS_Z_MAX) |
|
pad_left = mask_array_diff//2 |
|
pad_right = mask_array_diff-mask_array_diff//2 |
|
if mask_array_height > VOXELS_Z_MAX: |
|
mask_array_new = mask_array_new[:,:,pad_left:-pad_right] |
|
mri_array_new = mri_array_new[:,:,pad_left:-pad_right] |
|
else: |
|
if mask_array_diff: |
|
mask_array_new = np.pad(mask_array_new, (pad_left, pad_right), 'constant') |
|
mask_array_new = mask_array_new[pad_left:-pad_right, pad_left:-pad_right, :] |
|
|
|
mri_array_new = np.pad(mri_array_new, (pad_left, pad_right), 'constant') |
|
mri_array_new = mri_array_new[pad_left:-pad_right, pad_left:-pad_right, :] |
|
|
|
print (' - [crop/pad][{}] mask_array: {} || mask_array_new: {}'.format(patient_id, mask_array.shape, mask_array_new.shape)) |
|
print (' - [crop/pad][{}] mri_array : {} || mri_array_new : {} || maxval: {}'.format(patient_id, mri_array.shape, mri_array_new.shape, mri_maxval)) |
|
|
|
# Step 7.4 - Save to disk |
|
path_mask_proc2 = Path(DIR_PROCESSED2).joinpath(patient_id, FOLDERNAME_MASK.format(patient_id, EXT_NII)) |
|
Path(path_mask_proc2).parent.mkdir(exist_ok=True, parents=True) |
|
niiobj = nibabel.Nifti1Image(mask_array_new, mask.affine, mask.header) |
|
nibabel.save(niiobj, str(path_mask_proc2)) |
|
|
|
path_mri_proc2 = Path(DIR_PROCESSED2).joinpath(patient_id, FOLDERNAME_MRI.format(patient_id, EXT_NII)) |
|
Path(path_mri_proc2).parent.mkdir(exist_ok=True, parents=True) |
|
niiobj = nibabel.Nifti1Image(mri_array_new, mri.affine, mri.header) |
|
nibabel.save(niiobj, str(path_mri_proc2)) |
|
|
|
if len(mri_maxvals): |
|
import matplotlib.pyplot as plt |
|
plt.hist(mri_maxvals) |
|
plt.title('ProstateX: Max Intensity Vals \n Histogram showing the need for intensity transformations in MRI') |
|
plt.show() |
|
|
|
pdb.set_trace() |