Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief various basic functions often needed
5"""
7import os
8import re
9import random
10from pyquickhelper.loghelper.flog import fLOG, GetSepLine
11from pyquickhelper.filehelper.synchelper import explore_folder_iterfile
14_keep_var_character = re.compile("[^a-zA-Z0-9_]")
17def _clean_name_variable(st):
18 """
19 Cleans a string.
21 @param st string to clean
22 @return another string
23 """
24 res = _keep_var_character.split(st)
25 if res is None:
26 raise Exception("unable to clean " + st)
27 return "_".join(res)
30def _get_format_zero_nb_integer(nb):
31 h = nb
32 c = 0
33 while h > 0:
34 h = int(h / 10)
35 c += 1
36 if c > 20:
37 raise Exception(
38 "this should not be that high %s (nb=%s)" % (str(c), str(nb)))
39 return "%0" + str(int(c)) + "d"
42def test_regular_expression(exp=".*", text="", fLOG=fLOG):
43 """
44 Tests a regular expression.
45 @param exp regular expression
46 @param text text to check
47 @param fLOG logging function
48 """
49 fLOG("regex", exp)
50 fLOG("text", text)
51 ex = re.compile(exp)
52 ma = ex.search(text)
53 if ma is None:
54 fLOG("no result")
55 else:
56 fLOG(ma.groups())
59def IsEmptyString(s):
60 """
61 Tells if a string is empty.
63 @param s string
64 @return boolean
65 """
66 if s is None:
67 return True
68 return len(s) == 0
71def is_empty_string(s):
72 """
73 Tells if a string is empty.
75 @param s string
76 @return boolean
77 """
78 if s is None:
79 return True
80 return len(s) == 0
83def file_head(file="",
84 head=1000,
85 out=""):
86 """
87 Keeps the head of a file.
89 @param file file name
90 @param head number of lines to keep
91 @param out output file, if == None or empty, then, it becomes:
92 file + ".head.%d.ext" % head
93 @return out
94 """
95 if not os.path.exists(file):
96 raise Exception("unable to find file %s" % file)
97 if IsEmptyString(out):
98 f, ext = os.path.splitext(file)
99 out = "%s.head.%d%s" % (file, head, ext)
101 f = open(file, "r")
102 g = open(out, "w")
103 for i, line in enumerate(f):
104 if i >= head:
105 break
106 g.write(line)
107 f.close()
108 g.close()
109 return out
112def file_split(file="", nb=2, out="", header=False, rnd=False):
113 """
114 Splits a file.
116 @param file file name or stream
117 @param nb number of files
118 @param out output file, if == None or empty, then, it becomes:
119 ``file + ".split.%d.ext" % i``, it must contain ``%d``
120 or it must a a list or strings or streams
121 @param header consider a header or not
122 @param rnd randomly draw the file which receives the current line
123 @return number of processed lines
124 """
125 if not os.path.exists(file):
126 raise Exception("unable to find file %s" % file)
128 if is_empty_string(out):
129 f, ext = os.path.splitext(file)
130 out = "%s.split.%s%s" % (file, _get_format_zero_nb_integer(nb), ext)
131 elif not isinstance(out, list) and "%d" not in out:
132 raise ValueError("%d should be present in out='{0}'".format(out))
134 size = os.stat(file).st_size
135 f = open(file, "r") if isinstance(file, str) else file
136 g = {}
137 tot = 0
138 last_line = 0
139 for i, line in enumerate(f):
140 last_line = i
141 if i == 0 and header:
142 for n in range(0, nb):
143 if n not in g:
144 if isinstance(out, list):
145 if isinstance(out[n], str):
146 g[n] = open(out[n], "w")
147 else:
148 g[n] = out[n]
149 else:
150 g[n] = open(out % n, "w")
151 g[n].write(line)
152 continue
154 if rnd:
155 n = random.randint(0, nb - 1)
156 else:
157 n = int(min(nb, tot * nb / size))
158 tot += len(line)
160 if n not in g:
161 if isinstance(out, list):
162 if isinstance(out[n], str):
163 g[n] = open(out[n], "w")
164 else:
165 g[n] = out[n]
166 else:
167 g[n] = open(out % n, "w")
168 g[n].write(line)
170 if (i + 1) % 10000 == 0:
171 fLOG(" processed ", i, " bytes ", tot,
172 " out of ", size, " lines in ", out)
174 if isinstance(file, str):
175 f.close()
176 for k, v in g.items():
177 if not isinstance(out, list) or isinstance(out[k], str):
178 v.close()
179 return last_line
182def file_list(folder, out=""):
183 """
184 Prints the list of files and sub files in a text file.
186 @param folder folder
187 @param out result
188 @return out
189 """
190 if out is None or isinstance(out, str):
191 if is_empty_string(out):
192 out = "%s_.list_of_files.txt" % folder
193 f = open(out, "w")
194 else:
195 f = out
197 for li in explore_folder_iterfile(folder):
198 f.write(li)
199 f.write(GetSepLine())
201 if isinstance(out, str):
202 f.close()
204 return out
207def file_grep(file="", regex=".*", out="", head=-1):
208 """
209 Grep.
211 @param file file name
212 @param regex regular expression
213 @param out output file, if == None or empty, then, it becomes:
214 file + ".head.%d.ext" % head
215 @param head stops after the first head lines (or -1 if not stop)
216 @return out
217 """
218 if not os.path.exists(file):
219 raise Exception("unable to find file %s" % file)
220 if IsEmptyString(out):
221 f, ext = os.path.splitext(file)
222 out = "%s.regex.%d%s" % (file, head, ext)
224 exp = re.compile(regex)
226 f = open(file, "r")
227 g = open(out, "w")
228 nb = 0
229 for line in f:
230 if exp.search(line):
231 g.write(line)
232 nb += 1
233 if nb >= head >= 0:
234 break
235 f.close()
236 g.close()
237 return out