Coverage for src/ensae_projects/datainc/data_medical.py: 88%

58 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-07-20 04:37 +0200

1""" 

2@file 

3@brief Functions to handle data coming from 

4:epkg:`Cancer Imaging Archive`. 

5""" 

6import os 

7import pydicom 

8import pandas 

9import cv2 

10from pyquickhelper.filehelper.synchelper import explore_folder_iterfile # pylint: disable=C0411 

11 

12 

13def _recurse_fill(obs, dataset, parent=""): 

14 for data_element in dataset: 

15 if isinstance(data_element.value, bytes): 

16 continue 

17 if data_element.VR == "SQ": # a sequence 

18 name = data_element.name 

19 for i, ds in enumerate(data_element.value): 

20 _recurse_fill(obs, ds, 

21 parent="{parent}.{name}[{i}]".format( 

22 parent=parent, name=name, i=i)) 

23 else: 

24 text = str(data_element.value) 

25 name = str(data_element.name) 

26 key = name if parent == '' else parent + "." + name 

27 obs[key] = text 

28 

29 

30def convert_dcm2png(folder, dest, fLOG=None): 

31 """ 

32 Converts all medical images in a folder from format 

33 :epkg:`dcm` to :epkg:`png`. 

34 

35 @param folder source folder 

36 @param dest destination folder 

37 @param fLOG logging function 

38 @return :epkg:`pandas:DataFrame` with many data 

39 

40 The function uses module :epkg:`pydicom`. 

41 """ 

42 if not os.path.exists(dest): 

43 raise FileNotFoundError("Unable to find folder '{}'.".format(dest)) 

44 if fLOG is not None: 

45 fLOG("[convert_dcm2png] convert dcm files from '{}'.".format(folder)) 

46 fLOG("[convert_dcm2png] into '{}'.".format(dest)) 

47 done = {} 

48 rows = [] 

49 for name in explore_folder_iterfile(folder, ".*[.]dcm$"): 

50 relname = os.path.relpath(name, folder) 

51 if fLOG is not None: 

52 fLOG("[convert_dcm2png] read {}: '{}'.".format( 

53 len(rows) + 1, relname)) 

54 f1 = relname.replace("\\", "/").split("/")[0] 

55 name_ = "img_%06d.png" % len(done) 

56 if "_" in f1: 

57 sub = f1.split('_')[0] 

58 fsub = os.path.join(dest, sub) 

59 if not os.path.exists(fsub): 

60 if fLOG is not None: 

61 fLOG("[convert_dcm2png] create folder '{}'.".format(sub)) 

62 os.mkdir(fsub) 

63 new_name = os.path.join(sub, name_) 

64 else: 

65 new_name = name_ 

66 

67 # read 

68 ds = pydicom.dcmread(name) 

69 

70 # data 

71 obs = dict(_src=relname, _dest=new_name, _size=len(ds.pixel_array)) 

72 _recurse_fill(obs, ds) 

73 rows.append(obs) 

74 

75 # image 

76 full_name = os.path.join(dest, new_name) 

77 if os.path.exists(full_name): 

78 done[name] = full_name 

79 continue 

80 

81 pixel_array_numpy = ds.pixel_array 

82 cv2.imwrite(full_name, pixel_array_numpy) # pylint: disable=E1101 

83 done[name] = full_name 

84 

85 final = os.path.join(dest, "_summary.csv") 

86 if fLOG is not None: 

87 fLOG("[convert_dcm2png] converted {} images.".format(len(rows))) 

88 fLOG("[convert_dcm2png] write '{}'.".format(final)) 

89 df = pandas.DataFrame(rows) 

90 df.to_csv(final, index=False, encoding="utf-8") 

91 return df