Coverage for src/pyrsslocal/xmlhelper/xmlfilewalk.py: 26%
96 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-04-23 08:45 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2024-04-23 08:45 +0200
1"""
2@file
4@brief functions related to XML files representing objects
5"""
7from pyquickhelper.loghelper.flog import GetSepLine
8from .xml_tree import XMLHandlerDict, XMLIterParser
11def _iteration_values(values):
12 """
13 Iterators on all possible tuple of values taken into a list.
14 Let's assume you have two rows:
16 ::
18 a1 a2 a3
19 b1 b2
21 The function will produce:
23 ::
25 a1 b1
26 a1 b2
27 a2 b1
28 a2 b2
29 a3 b1
30 a3 b2
33 The function is used by @see fn table_extraction_from_xml_files_iterator.
35 @param values list of rows
36 @return iterator on rows
37 """
38 co = []
39 for v in values:
40 if isinstance(v, list):
41 co.append(v)
42 else:
43 co.append([v])
45 ind = [0 for _ in co]
46 while ind[0] < len(co[0]):
47 line = [c[i] for c, i in zip(co, ind)]
48 yield line
50 ind[-1] += 1
51 i = len(ind) - 1
52 while i > 0:
53 if ind[i] >= len(co[i]):
54 ind[i] = 0
55 ind[i - 1] += 1
56 i -= 1
59def table_extraction_from_xml_files_iterator(file, fields, log=False, fLOG=None, encoding="utf-8", errors=None):
60 """
61 Goes through a XML file, extracts values and put
62 them into an iterator.
64 @param file a file
65 @param fields list of fields to get from the XML files (see below)
66 @param log do logs if True
67 @param fLOG logging function
68 @param errors sent to function :epkg:`*py:library:function`
69 @param encoding encoding
70 @return iterator on lines
72 One example for fields:
74 ::
76 [ ("tag1/tag2", "all"),
77 ("tag1/tag2/tag3/_", "one"),
78 ...
79 ]
80 """
82 fileh = open(file, "r", encoding=encoding, errors=errors) if isinstance(
83 file, str) else file
85 parser = XMLIterParser()
86 handler = XMLHandlerDict(no_content=True)
87 parser.setContentHandler(handler)
89 fields = [(a.split("/"), b) for a, b in fields]
90 if log:
91 fLOG("table_extraction_from_xml_files: begin")
93 for i_, o in enumerate(parser.parse(fileh)):
95 values = []
96 nb = 0
97 for look, typ in fields:
98 path = o.find_node_value(look)
100 if typ == "one":
101 if len(path) == 0:
102 if log:
103 fLOG(o.get_xml_content())
104 raise ValueError(
105 "unable to find a value for path %s" %
106 "/".join(look))
107 val = path[0]
108 if val is None:
109 val = ""
110 elif typ == "all":
111 if len(path) == 1:
112 val = path[0]
113 elif len(path) == 0:
114 val = ""
115 else:
116 val = path
117 nb += 1
118 else:
119 raise ValueError(
120 "the type must in (one, all) %s,%s" %
121 (look, typ))
122 values.append(val)
124 if nb == 0:
125 line = "\t".join(values)
126 yield line
127 else:
128 for v in _iteration_values(values):
129 line = "\t".join(v)
130 yield line
132 if log and (i_ + 1) % 1000 == 0:
133 fLOG("table_extraction_from_xml_files reading ", i_)
135 if isinstance(file, str):
136 fileh.close()
137 if log:
138 fLOG("table_extraction_from_xml_files: end")
141def table_extraction_from_xml_files(file, output, fields, log=False, encoding="utf-8", errors=None):
142 """
143 Goes through a :epkg:`XML` file, extracts values and
144 put them into a flat file.
146 @param file a file
147 @param output output file, string or file object,
148 @param fields list of fields to get from the XML files
149 @param log do logs if True
150 @param errors sent to function :epkg:`*py:library:function`
151 @param encoding encoding
153 One example for fields:
155 ::
157 [ ("tag1/tag2", "all"),
158 ("tag1/tag2/tag3/_", "one"),
159 ...
160 ]
161 """
162 outputh = open(output, "w", encoding=encoding,
163 errors=errors) if isinstance(output, str) else output
164 for line in table_extraction_from_xml_files_iterator(file, fields, log):
165 outputh.write(line)
166 outputh.write(GetSepLine())
167 if isinstance(output, str):
168 outputh.close()
171def xml_filter_iterator(file, filter_=None, log=False, xmlformat=True,
172 fLOG=None, encoding="utf-8", errors=None):
173 """
174 Goes through a :epkg:`XML` file,
175 returns :epkg:`XML` content if a condition is verified,
176 the result is an iterator.
178 @param file a file
179 @param filter_ a function which takes a node and returns a boolean, if None, accepts everything
180 @param log do logs if True
181 @param xmlformat if True, return the xml, otherwise return the node
182 @param fLOG logging function
183 @param encoding encoding
184 @param errors sent to function :epkg:`*py:library:function`
185 @return the xml format or a node depending on thevalue of xmlformat
186 """
187 if filter_ is None:
188 def filter__(node):
189 return True
191 filter_ = filter__
193 fileh = open(file, "r", encoding=encoding, errors=errors) if isinstance(
194 file, str) else file
196 parser = XMLIterParser()
197 handler = XMLHandlerDict()
198 parser.setContentHandler(handler)
200 for i_, o in enumerate(parser.parse(fileh)):
202 res = filter_(o)
203 if res:
204 if xmlformat:
205 yield o.get_xml_content()
206 else:
207 yield o
209 if fLOG and log and (i_ + 1) % 1000 == 0:
210 fLOG("table_extraction_from_xml_files reading ", i_)
212 if isinstance(file, str):
213 fileh.close()
214 if log and fLOG:
215 fLOG("xml_filter_iterator: end")
218def xml_filter(file, output, filter_, log=False, xmlformat=True, encoding="utf-8", errors=None):
219 """
220 Goes through a :epkg:`XML` file, returns :epkg:`XML` content
221 if a condition is verified, the result is put into a stream.
223 @param file a file
224 @param output output file, string or file object
225 @param filter_ a function which takes a node and returns a boolean
226 @param xmlformat if True, return the xml, otherwise return the node
227 @param encoding encoding
228 @param errors sent to function :epkg:`*py:library:function`
229 @param log do logs if True
230 """
231 outputh = open(output, "r", encoding=encoding,
232 errors=errors) if isinstance(output, str) else output
233 for line in xml_filter_iterator(file, filter_, log, xmlformat):
234 outputh.write(line)
235 outputh.write(GetSepLine())
236 if isinstance(output, str):
237 outputh.close()