Coverage for pyquickhelper/pandashelper/tblformat.py: 100%
181 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 02:21 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 02:21 +0200
1# -*- coding:utf-8 -*-
2"""
3@file
4@brief To format a pandas dataframe
5"""
6import warnings
9def enumerate_split_df(df, common, subsets):
10 """
11 Splits a dataframe by columns to display shorter
12 dataframes.
14 @param df dataframe
15 @param common common columns
16 @param subsets subsets of columns
17 @return split dataframes
19 .. runpython::
20 :showcode:
22 from pandas import DataFrame
23 from pyquickhelper.pandashelper.tblformat import enumerate_split_df
25 df = DataFrame([{'A': 0, 'B': 'text'},
26 {'A': 1e-5, 'C': 'longer text'}])
27 res = list(enumerate_split_df(df, ['A'], [['B'], ['C']]))
28 print(res[0])
29 print('-----')
30 print(res[1])
31 """
32 for sub in subsets:
33 if set(common) & set(sub):
34 raise ValueError("Common columns between common={} and subset={}.".format(
35 common, sub))
36 yield df[common + sub]
39def df2rst(df, add_line=True, align="l", column_size=None, index=False,
40 list_table=False, title=None, header=True, sep=',',
41 number_format=None, replacements=None, split_row=None,
42 split_row_level="+", split_col_common=None,
43 split_col_subsets=None, filter_rows=None,
44 label_pattern=None):
45 """
46 Builds a string in :epkg:`RST` format from a :epkg:`dataframe`.
48 :param df: dataframe
49 :param add_line: (bool) add a line separator between each row
50 :param align: ``r`` or ``l`` or ``c``
51 :param column_size: something like ``[1, 2, 5]`` to multiply the column size,
52 a dictionary (if *list_table* is False) to overwrite
53 a column size like ``{'col_name1': 20}`` or ``{3: 20}``
54 :param index: add the index
55 :param list_table: use the
56 `list_table <http://docutils.sourceforge.net/docs/ref/rst/directives.html#list-table>`_
57 :param title: used only if *list_table* is True
58 :param header: add one header
59 :param sep: separator if *df* is a string and is a filename to load
60 :param number_format: formats number in a specific way, if *number_format*
61 is an integer, the pattern is replaced by
62 ``{numpy.float64: '{:.2g}'}`` (if *number_format* is 2),
63 see also :epkg:`pyformat.info`
64 :param replacements: replacements just before converting into RST (dictionary)
65 :param split_row: displays several table, one column is used as the
66 name of each section
67 :param split_row_level: title level if option *split_row* is used
68 :param split_col_common: splits the dataframe by columns, see @see fn enumerate_split_df
69 :param split_col_subsets: splits the dataframe by columns, see @see fn enumerate_split_df
70 :param filter_rows: None or function to removes rows, signature
71 ``def filter_rows(df: DataFrame) -> DataFrame``
72 :param label_pattern: if *split_row* is used, the function may insert
73 a label in front of every section, example: ``".. _lpy-{section}:"``
74 :return: string
76 If *list_table* is False, the format is the following.
78 *None* values are replaced by empty string (4 spaces).
79 It produces the following results:
81 ::
83 +------------------------+------------+----------+----------+
84 | Header row, column 1 | Header 2 | Header 3 | Header 4 |
85 | (header rows optional) | | | |
86 +========================+============+==========+==========+
87 | body row 1, column 1 | column 2 | column 3 | column 4 |
88 +------------------------+------------+----------+----------+
89 | body row 2 | ... | ... | |
90 +------------------------+------------+----------+----------+
92 If *list_table* is True, the format is the following:
94 ::
96 .. list-table:: title
97 :widths: 15 10 30
98 :header-rows: 1
100 * - Treat
101 - Quantity
102 - Description
103 * - Albatross
104 - 2.99
105 - anythings
106 ...
108 .. exref::
109 :title: Convert a dataframe into RST
111 .. runpython::
112 :showcode:
114 from pandas import DataFrame
115 from pyquickhelper.pandashelper import df2rst
117 df = DataFrame([{'A': 0, 'B': 'text'},
118 {'A': 1e-5, 'C': 'longer text'}])
119 print(df2rst(df))
121 .. exref::
122 :title: Convert a dataframe into markdown
124 .. runpython::
125 :showcode:
127 from io import StringIO
128 from textwrap import dedent
129 import pandas
131 from_excel = dedent('''
132 Op;axes;shape;SpeedUp
133 ReduceMax;(3,);(8, 24, 48, 8);2.96
134 ReduceMax;(3,);(8, 24, 48, 16);2.57
135 ReduceMax;(3,);(8, 24, 48, 32);2.95
136 ReduceMax;(3,);(8, 24, 48, 64);3.28
137 ReduceMax;(3,);(8, 24, 48, 100);3.05
138 ReduceMax;(3,);(8, 24, 48, 128);3.11
139 ReduceMax;(3,);(8, 24, 48, 200);2.86
140 ReduceMax;(3,);(8, 24, 48, 256);2.50
141 ReduceMax;(3,);(8, 24, 48, 400);2.48
142 ReduceMax;(3,);(8, 24, 48, 512);2.90
143 ReduceMax;(3,);(8, 24, 48, 1024);2.76
144 ReduceMax;(0,);(8, 24, 48, 8);19.29
145 ReduceMax;(0,);(8, 24, 48, 16);11.83
146 ReduceMax;(0,);(8, 24, 48, 32);5.69
147 ReduceMax;(0,);(8, 24, 48, 64);5.49
148 ReduceMax;(0,);(8, 24, 48, 100);6.13
149 ReduceMax;(0,);(8, 24, 48, 128);6.27
150 ReduceMax;(0,);(8, 24, 48, 200);5.46
151 ReduceMax;(0,);(8, 24, 48, 256);4.76
152 ReduceMax;(0,);(8, 24, 48, 400);2.21
153 ReduceMax;(0,);(8, 24, 48, 512);4.52
154 ReduceMax;(0,);(8, 24, 48, 1024);4.38
155 ReduceSum;(3,);(8, 24, 48, 8);1.79
156 ReduceSum;(3,);(8, 24, 48, 16);0.79
157 ReduceSum;(3,);(8, 24, 48, 32);1.67
158 ReduceSum;(3,);(8, 24, 48, 64);1.19
159 ReduceSum;(3,);(8, 24, 48, 100);2.08
160 ReduceSum;(3,);(8, 24, 48, 128);2.96
161 ReduceSum;(3,);(8, 24, 48, 200);1.66
162 ReduceSum;(3,);(8, 24, 48, 256);2.26
163 ReduceSum;(3,);(8, 24, 48, 400);1.76
164 ReduceSum;(3,);(8, 24, 48, 512);2.61
165 ReduceSum;(3,);(8, 24, 48, 1024);2.21
166 ReduceSum;(0,);(8, 24, 48, 8);2.56
167 ReduceSum;(0,);(8, 24, 48, 16);2.05
168 ReduceSum;(0,);(8, 24, 48, 32);3.04
169 ReduceSum;(0,);(8, 24, 48, 64);2.57
170 ReduceSum;(0,);(8, 24, 48, 100);2.41
171 ReduceSum;(0,);(8, 24, 48, 128);2.77
172 ReduceSum;(0,);(8, 24, 48, 200);2.02
173 ReduceSum;(0,);(8, 24, 48, 256);1.61
174 ReduceSum;(0,);(8, 24, 48, 400);1.59
175 ReduceSum;(0,);(8, 24, 48, 512);1.48
176 ReduceSum;(0,);(8, 24, 48, 1024);1.50
177 ''')
179 df = pandas.read_csv(StringIO(from_excel), sep=";")
180 print(df.columns)
182 sub = df[["Op", "axes", "shape", "SpeedUp"]]
183 piv = df.pivot_table(values="SpeedUp", index=['axes', "shape"], columns="Op")
184 piv = piv.reset_index(drop=False)
186 print(piv.to_markdown(index=False))
188 Nan value are replaced by empty string even if *number_format* is not None.
189 """
190 if isinstance(df, str):
191 import pandas # pragma: no cover
192 df = pandas.read_csv( # pragma: no cover
193 df, encoding="utf-8", sep=sep)
195 if split_row is not None:
196 gdf = df.groupby(split_row)
197 rows = []
198 for key, g in gdf:
199 key = str(key).strip('()')
200 if ':ref:' in key:
201 try:
202 key = key.split("`")[1].split("<")[0].strip()
203 except IndexError: # pragma: no cover
204 pass
205 if label_pattern is not None:
206 lab = label_pattern.format(section=key.replace(".", "D"))
207 rows.append("")
208 rows.append(lab)
209 rows.append("")
210 rows.append(key)
211 rows.append(split_row_level * len(key))
212 rows.append("")
213 rg = df2rst(g, add_line=add_line, align=align,
214 column_size=column_size, index=index,
215 list_table=list_table,
216 title=title, header=header, sep=sep,
217 number_format=number_format, replacements=replacements,
218 split_row=None, split_row_level=None,
219 split_col_common=split_col_common,
220 split_col_subsets=split_col_subsets,
221 filter_rows=filter_rows,
222 label_pattern=None)
223 rows.append(rg)
224 rows.append("")
225 return "\n".join(rows)
227 if split_col_common is not None:
228 rows = []
229 for sub in enumerate_split_df(df, split_col_common, split_col_subsets):
230 rg = df2rst(sub, add_line=add_line, align=align,
231 column_size=column_size, index=index,
232 list_table=list_table,
233 title=title, header=header, sep=sep,
234 number_format=number_format,
235 replacements=replacements,
236 filter_rows=filter_rows)
237 rows.append(rg)
238 rows.append('')
239 return "\n".join(rows)
241 import numpy
242 typstr = str
244 if filter_rows is not None:
245 df = filter_rows(df).copy()
246 if df.shape[0] == 0:
247 return ""
248 else:
249 df = df.copy()
251 def patternification(value, pattern):
252 if isinstance(value, float) and numpy.isnan(value):
253 return ""
254 return pattern.format(value)
256 def nan2empty(value):
257 if isinstance(value, float) and numpy.isnan(value):
258 return ""
259 return value
261 if number_format is not None:
262 if isinstance(number_format, int):
263 number_format = "{:.%dg}" % number_format
264 import pandas
265 typ1 = numpy.float64
266 _df = pandas.DataFrame({'f': [0.12]})
267 typ2 = list(_df.dtypes)[0]
268 number_format = {typ1: number_format, typ2: number_format}
269 df = df.copy()
270 for name, typ in zip(df.columns, df.dtypes):
271 if name in number_format:
272 pattern = number_format[name]
273 df[name] = df[name].apply(
274 lambda x: patternification(x, pattern))
275 elif typ in number_format:
276 pattern = number_format[typ]
277 df[name] = df[name].apply(
278 lambda x: patternification(x, pattern))
280 # check empty strings
281 col_strings = df.select_dtypes(include=[object]).columns
282 for c in col_strings:
283 df[c] = df[c].apply(nan2empty)
285 if index:
286 df = df.reset_index(drop=False).copy()
287 ind = df.columns[0]
289 def boldify(x):
290 try:
291 return f"**{x}**"
292 except Exception as e: # pragma: no cover
293 raise RuntimeError(
294 f"Unable to boldify type {type(x)}") from e
296 try:
297 values = df[ind].apply(boldify)
298 except Exception: # pragma: no cover
299 warnings.warn("Unable to boldify the index (1).", SyntaxWarning)
301 try:
302 df[ind] = values
303 except Exception: # pragma: no cover
304 warnings.warn("Unable to boldify the index (2).", SyntaxWarning)
306 def align_string(s, align, length):
307 if len(s) < length:
308 if align == "l":
309 return s + " " * (length - len(s))
310 if align == "r":
311 return " " * (length - len(s)) + s
312 if align == "c":
313 m = (length - len(s)) // 2
314 return " " * m + s + " " * (length - m - len(s))
315 raise ValueError( # pragma: no cover
316 f"align should be 'l', 'r', 'c' not '{align}'")
317 return s
319 def complete(cool):
320 if list_table:
321 i, s = cool
322 if s is None:
323 s = "" # pragma: no cover
324 if isinstance(s, float) and numpy.isnan(s):
325 s = ""
326 else:
327 s = typstr(s).replace("\n", " ")
328 if replacements is not None:
329 if s in replacements:
330 s = replacements[s]
331 return (" " + s) if s else s
332 else:
333 i, s = cool
334 if s is None:
335 s = " " * 4 # pragma: no cover
336 if isinstance(s, float) and numpy.isnan(s):
337 s = "" # pragma: no cover
338 else:
339 s = typstr(s).replace("\n", " ")
340 i -= 2
341 if replacements is not None:
342 if s in replacements:
343 s = replacements[s]
344 s = align_string(s.strip(), align, i)
345 return s
347 if list_table:
349 def format_on_row(row):
350 one = "\n -".join(map(complete, enumerate(row)))
351 res = " * -" + one
352 return res
354 rows = [f".. list-table:: {title if title else ''}".strip()]
355 if column_size is None:
356 rows.append(" :widths: auto")
357 else:
358 rows.append(" :widths: " + " ".join(map(str, column_size)))
359 if header:
360 rows.append(" :header-rows: 1")
361 rows.append("")
362 if header:
363 rows.append(format_on_row(df.columns))
364 rows.extend(map(format_on_row, df.values))
365 rows.append("")
366 table = "\n".join(rows)
367 return table
368 else:
369 length = [(len(_) if isinstance(_, typstr) else 5) for _ in df.columns]
370 for row in df.values:
371 for i, v in enumerate(row):
372 length[i] = max(length[i], len(typstr(v).strip()))
373 if column_size is not None:
374 if isinstance(column_size, list):
375 if len(length) != len(column_size):
376 raise ValueError( # pragma: no cover
377 "length and column_size should have the same size {0} != {1}".format(
378 len(length), len(column_size)))
379 for i in range(len(length)):
380 if not isinstance(column_size[i], int):
381 raise TypeError( # pragma: no cover
382 f"column_size[{i}] is not an integer")
383 length[i] *= column_size[i]
384 elif isinstance(column_size, dict):
385 for i, c in enumerate(df.columns):
386 if c in column_size:
387 length[i] = column_size[c]
388 elif i in column_size:
389 length[i] = column_size[i]
390 else:
391 raise TypeError( # pragma: no cover
392 "column_size must be a list or a dictionary not {}".format(
393 type(column_size)))
395 ic = 2
396 length = [_ + ic for _ in length]
397 line = ["-" * lc for lc in length]
398 lineb = ["=" * lc for lc in length]
399 sline = f"+{'+'.join(line)}+"
400 slineb = f"+{'+'.join(lineb)}+"
401 res = [sline]
403 res.append(f"| {' | '.join(map(complete, zip(length, df.columns)))} |")
404 res.append(slineb)
405 res.extend([f"| {' | '.join(map(complete, zip(length, row)))} |"
406 for row in df.values])
407 if add_line:
408 t = len(res)
409 for i in range(t - 1, 3, -1):
410 res.insert(i, sline)
411 res.append(sline)
412 table = "\n".join(res) + "\n"
413 return table
416def df2html(self, class_table=None, class_td=None, class_tr=None,
417 class_th=None):
418 """
419 Converts the table into a :epkg:`html` string.
421 :param self: dataframe (to be added as a class method)
422 :param class_table: adds a class to the tag ``table`` (None for none)
423 :param class_td: adds a class to the tag ``td`` (None for none)
424 :param class_tr: adds a class to the tag ``tr`` (None for none)
425 :param class_th: adds a class to the tag ``th`` (None for none)
426 :return: HTML
427 """
428 clta = f' class="{class_table}"' if class_table is not None else ""
429 cltr = f' class="{class_tr}"' if class_tr is not None else ""
430 cltd = f' class="{class_td}"' if class_td is not None else ""
431 clth = f' class="{class_th}"' if class_th is not None else ""
433 rows = [f"<table{clta}>"]
434 rows.append(f"<tr{cltr}><th{clth}>" + ("</th><th%s>" %
435 clth).join(self.columns) + "</th></tr>")
436 septd = f"</td><td{cltd}>"
437 strtd = f"<tr{cltr}><td{cltd}>"
439 typstr = str
441 def conv(s):
442 if s is None:
443 return "" # pragma: no cover
444 return typstr(s)
446 for row in self.values:
447 s = septd.join(conv(_) for _ in row)
448 rows.append(strtd + s + "</td></tr>")
449 rows.append("</table>")
450 rows.append("")
451 return "\n".join(rows)