Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3@file 

4@brief various basic functions often needed 

5""" 

6 

7import os 

8import re 

9import random 

10from pyquickhelper.loghelper.flog import fLOG, GetSepLine 

11from pyquickhelper.filehelper.synchelper import explore_folder_iterfile 

12 

13 

14_keep_var_character = re.compile("[^a-zA-Z0-9_]") 

15 

16 

17def _clean_name_variable(st): 

18 """ 

19 Cleans a string. 

20 

21 @param st string to clean 

22 @return another string 

23 """ 

24 res = _keep_var_character.split(st) 

25 if res is None: 

26 raise Exception("unable to clean " + st) 

27 return "_".join(res) 

28 

29 

30def _get_format_zero_nb_integer(nb): 

31 h = nb 

32 c = 0 

33 while h > 0: 

34 h = int(h / 10) 

35 c += 1 

36 if c > 20: 

37 raise Exception( 

38 "this should not be that high %s (nb=%s)" % (str(c), str(nb))) 

39 return "%0" + str(int(c)) + "d" 

40 

41 

42def test_regular_expression(exp=".*", text="", fLOG=fLOG): 

43 """ 

44 Tests a regular expression. 

45 @param exp regular expression 

46 @param text text to check 

47 @param fLOG logging function 

48 """ 

49 fLOG("regex", exp) 

50 fLOG("text", text) 

51 ex = re.compile(exp) 

52 ma = ex.search(text) 

53 if ma is None: 

54 fLOG("no result") 

55 else: 

56 fLOG(ma.groups()) 

57 

58 

59def IsEmptyString(s): 

60 """ 

61 Tells if a string is empty. 

62 

63 @param s string 

64 @return boolean 

65 """ 

66 if s is None: 

67 return True 

68 return len(s) == 0 

69 

70 

71def is_empty_string(s): 

72 """ 

73 Tells if a string is empty. 

74 

75 @param s string 

76 @return boolean 

77 """ 

78 if s is None: 

79 return True 

80 return len(s) == 0 

81 

82 

83def file_head(file="", 

84 head=1000, 

85 out=""): 

86 """ 

87 Keeps the head of a file. 

88 

89 @param file file name 

90 @param head number of lines to keep 

91 @param out output file, if == None or empty, then, it becomes: 

92 file + ".head.%d.ext" % head 

93 @return out 

94 """ 

95 if not os.path.exists(file): 

96 raise Exception("unable to find file %s" % file) 

97 if IsEmptyString(out): 

98 f, ext = os.path.splitext(file) 

99 out = "%s.head.%d%s" % (file, head, ext) 

100 

101 f = open(file, "r") 

102 g = open(out, "w") 

103 for i, line in enumerate(f): 

104 if i >= head: 

105 break 

106 g.write(line) 

107 f.close() 

108 g.close() 

109 return out 

110 

111 

112def file_split(file="", nb=2, out="", header=False, rnd=False): 

113 """ 

114 Splits a file. 

115 

116 @param file file name or stream 

117 @param nb number of files 

118 @param out output file, if == None or empty, then, it becomes: 

119 ``file + ".split.%d.ext" % i``, it must contain ``%d`` 

120 or it must a a list or strings or streams 

121 @param header consider a header or not 

122 @param rnd randomly draw the file which receives the current line 

123 @return number of processed lines 

124 """ 

125 if not os.path.exists(file): 

126 raise Exception("unable to find file %s" % file) 

127 

128 if is_empty_string(out): 

129 f, ext = os.path.splitext(file) 

130 out = "%s.split.%s%s" % (file, _get_format_zero_nb_integer(nb), ext) 

131 elif not isinstance(out, list) and "%d" not in out: 

132 raise ValueError("%d should be present in out='{0}'".format(out)) 

133 

134 size = os.stat(file).st_size 

135 f = open(file, "r") if isinstance(file, str) else file 

136 g = {} 

137 tot = 0 

138 last_line = 0 

139 for i, line in enumerate(f): 

140 last_line = i 

141 if i == 0 and header: 

142 for n in range(0, nb): 

143 if n not in g: 

144 if isinstance(out, list): 

145 if isinstance(out[n], str): 

146 g[n] = open(out[n], "w") 

147 else: 

148 g[n] = out[n] 

149 else: 

150 g[n] = open(out % n, "w") 

151 g[n].write(line) 

152 continue 

153 

154 if rnd: 

155 n = random.randint(0, nb - 1) 

156 else: 

157 n = int(min(nb, tot * nb / size)) 

158 tot += len(line) 

159 

160 if n not in g: 

161 if isinstance(out, list): 

162 if isinstance(out[n], str): 

163 g[n] = open(out[n], "w") 

164 else: 

165 g[n] = out[n] 

166 else: 

167 g[n] = open(out % n, "w") 

168 g[n].write(line) 

169 

170 if (i + 1) % 10000 == 0: 

171 fLOG(" processed ", i, " bytes ", tot, 

172 " out of ", size, " lines in ", out) 

173 

174 if isinstance(file, str): 

175 f.close() 

176 for k, v in g.items(): 

177 if not isinstance(out, list) or isinstance(out[k], str): 

178 v.close() 

179 return last_line 

180 

181 

182def file_list(folder, out=""): 

183 """ 

184 Prints the list of files and sub files in a text file. 

185 

186 @param folder folder 

187 @param out result 

188 @return out 

189 """ 

190 if out is None or isinstance(out, str): 

191 if is_empty_string(out): 

192 out = "%s_.list_of_files.txt" % folder 

193 f = open(out, "w") 

194 else: 

195 f = out 

196 

197 for li in explore_folder_iterfile(folder): 

198 f.write(li) 

199 f.write(GetSepLine()) 

200 

201 if isinstance(out, str): 

202 f.close() 

203 

204 return out 

205 

206 

207def file_grep(file="", regex=".*", out="", head=-1): 

208 """ 

209 Grep. 

210 

211 @param file file name 

212 @param regex regular expression 

213 @param out output file, if == None or empty, then, it becomes: 

214 file + ".head.%d.ext" % head 

215 @param head stops after the first head lines (or -1 if not stop) 

216 @return out 

217 """ 

218 if not os.path.exists(file): 

219 raise Exception("unable to find file %s" % file) 

220 if IsEmptyString(out): 

221 f, ext = os.path.splitext(file) 

222 out = "%s.regex.%d%s" % (file, head, ext) 

223 

224 exp = re.compile(regex) 

225 

226 f = open(file, "r") 

227 g = open(out, "w") 

228 nb = 0 

229 for line in f: 

230 if exp.search(line): 

231 g.write(line) 

232 nb += 1 

233 if nb >= head >= 0: 

234 break 

235 f.close() 

236 g.close() 

237 return out