Coverage for src/ensae_projects/datainc/data_medical.py: 88%
58 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-07-20 04:37 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2023-07-20 04:37 +0200
1"""
2@file
3@brief Functions to handle data coming from
4:epkg:`Cancer Imaging Archive`.
5"""
6import os
7import pydicom
8import pandas
9import cv2
10from pyquickhelper.filehelper.synchelper import explore_folder_iterfile # pylint: disable=C0411
13def _recurse_fill(obs, dataset, parent=""):
14 for data_element in dataset:
15 if isinstance(data_element.value, bytes):
16 continue
17 if data_element.VR == "SQ": # a sequence
18 name = data_element.name
19 for i, ds in enumerate(data_element.value):
20 _recurse_fill(obs, ds,
21 parent="{parent}.{name}[{i}]".format(
22 parent=parent, name=name, i=i))
23 else:
24 text = str(data_element.value)
25 name = str(data_element.name)
26 key = name if parent == '' else parent + "." + name
27 obs[key] = text
30def convert_dcm2png(folder, dest, fLOG=None):
31 """
32 Converts all medical images in a folder from format
33 :epkg:`dcm` to :epkg:`png`.
35 @param folder source folder
36 @param dest destination folder
37 @param fLOG logging function
38 @return :epkg:`pandas:DataFrame` with many data
40 The function uses module :epkg:`pydicom`.
41 """
42 if not os.path.exists(dest):
43 raise FileNotFoundError("Unable to find folder '{}'.".format(dest))
44 if fLOG is not None:
45 fLOG("[convert_dcm2png] convert dcm files from '{}'.".format(folder))
46 fLOG("[convert_dcm2png] into '{}'.".format(dest))
47 done = {}
48 rows = []
49 for name in explore_folder_iterfile(folder, ".*[.]dcm$"):
50 relname = os.path.relpath(name, folder)
51 if fLOG is not None:
52 fLOG("[convert_dcm2png] read {}: '{}'.".format(
53 len(rows) + 1, relname))
54 f1 = relname.replace("\\", "/").split("/")[0]
55 name_ = "img_%06d.png" % len(done)
56 if "_" in f1:
57 sub = f1.split('_')[0]
58 fsub = os.path.join(dest, sub)
59 if not os.path.exists(fsub):
60 if fLOG is not None:
61 fLOG("[convert_dcm2png] create folder '{}'.".format(sub))
62 os.mkdir(fsub)
63 new_name = os.path.join(sub, name_)
64 else:
65 new_name = name_
67 # read
68 ds = pydicom.dcmread(name)
70 # data
71 obs = dict(_src=relname, _dest=new_name, _size=len(ds.pixel_array))
72 _recurse_fill(obs, ds)
73 rows.append(obs)
75 # image
76 full_name = os.path.join(dest, new_name)
77 if os.path.exists(full_name):
78 done[name] = full_name
79 continue
81 pixel_array_numpy = ds.pixel_array
82 cv2.imwrite(full_name, pixel_array_numpy) # pylint: disable=E1101
83 done[name] = full_name
85 final = os.path.join(dest, "_summary.csv")
86 if fLOG is not None:
87 fLOG("[convert_dcm2png] converted {} images.".format(len(rows)))
88 fLOG("[convert_dcm2png] write '{}'.".format(final))
89 df = pandas.DataFrame(rows)
90 df.to_csv(final, index=False, encoding="utf-8")
91 return df