Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding:utf-8 -*-
2"""
3@file
4@brief To format a pandas dataframe
5"""
6import warnings
9def enumerate_split_df(df, common, subsets):
10 """
11 Splits a dataframe by columns to display shorter
12 dataframes.
14 @param df dataframe
15 @param common common columns
16 @param subsets subsets of columns
17 @return split dataframes
19 .. runpython::
20 :showcode:
22 from pandas import DataFrame
23 from pyquickhelper.pandashelper.tblformat import enumerate_split_df
25 df = DataFrame([{'A': 0, 'B': 'text'},
26 {'A': 1e-5, 'C': 'longer text'}])
27 res = list(enumerate_split_df(df, ['A'], [['B'], ['C']]))
28 print(res[0])
29 print('-----')
30 print(res[1])
31 """
32 for sub in subsets:
33 if set(common) & set(sub):
34 raise ValueError("Common columns between common={} and subset={}.".format(
35 common, sub))
36 yield df[common + sub]
39def df2rst(df, add_line=True, align="l", column_size=None, index=False,
40 list_table=False, title=None, header=True, sep=',',
41 number_format=None, replacements=None, split_row=None,
42 split_row_level="+", split_col_common=None,
43 split_col_subsets=None, filter_rows=None,
44 label_pattern=None):
45 """
46 Builds a string in :epkg:`RST` format from a :epkg:`dataframe`.
48 :param df: dataframe
49 :param add_line: (bool) add a line separator between each row
50 :param align: ``r`` or ``l`` or ``c``
51 :param column_size: something like ``[1, 2, 5]`` to multiply the column size,
52 a dictionary (if *list_table* is False) to overwrite
53 a column size like ``{'col_name1': 20}`` or ``{3: 20}``
54 :param index: add the index
55 :param list_table: use the
56 `list_table <http://docutils.sourceforge.net/docs/ref/rst/directives.html#list-table>`_
57 :param title: used only if *list_table* is True
58 :param header: add one header
59 :param sep: separator if *df* is a string and is a filename to load
60 :param number_format: formats number in a specific way, if *number_format*
61 is an integer, the pattern is replaced by
62 ``{numpy.float64: '{:.2g}'}`` (if *number_format* is 2),
63 see also :epkg:`pyformat.info`
64 :param replacements: replacements just before converting into RST (dictionary)
65 :param split_row: displays several table, one column is used as the
66 name of each section
67 :param split_row_level: title level if option *split_row* is used
68 :param split_col_common: splits the dataframe by columns, see @see fn enumerate_split_df
69 :param split_col_subsets: splits the dataframe by columns, see @see fn enumerate_split_df
70 :param filter_rows: None or function to removes rows, signature
71 ``def filter_rows(df: DataFrame) -> DataFrame``
72 :param label_pattern: if *split_row* is used, the function may insert
73 a label in front of every section, example: ``".. _lpy-{section}:"``
74 :return: string
76 If *list_table* is False, the format is the following.
78 *None* values are replaced by empty string (4 spaces).
79 It produces the following results:
81 ::
83 +------------------------+------------+----------+----------+
84 | Header row, column 1 | Header 2 | Header 3 | Header 4 |
85 | (header rows optional) | | | |
86 +========================+============+==========+==========+
87 | body row 1, column 1 | column 2 | column 3 | column 4 |
88 +------------------------+------------+----------+----------+
89 | body row 2 | ... | ... | |
90 +------------------------+------------+----------+----------+
92 If *list_table* is True, the format is the following:
94 ::
96 .. list-table:: title
97 :widths: 15 10 30
98 :header-rows: 1
100 * - Treat
101 - Quantity
102 - Description
103 * - Albatross
104 - 2.99
105 - anythings
106 ...
108 .. exref::
109 :title: Convert a dataframe into RST
111 .. runpython::
112 :showcode:
114 from pandas import DataFrame
115 from pyquickhelper.pandashelper import df2rst
117 df = DataFrame([{'A': 0, 'B': 'text'},
118 {'A': 1e-5, 'C': 'longer text'}])
119 print(df2rst(df))
121 .. exref::
122 :title: Convert a dataframe into markdown
124 .. runpython::
125 :showcode:
127 from io import StringIO
128 from textwrap import dedent
129 import pandas
131 from_excel = dedent('''
132 Op;axes;shape;SpeedUp
133 ReduceMax;(3,);(8, 24, 48, 8);2.96
134 ReduceMax;(3,);(8, 24, 48, 16);2.57
135 ReduceMax;(3,);(8, 24, 48, 32);2.95
136 ReduceMax;(3,);(8, 24, 48, 64);3.28
137 ReduceMax;(3,);(8, 24, 48, 100);3.05
138 ReduceMax;(3,);(8, 24, 48, 128);3.11
139 ReduceMax;(3,);(8, 24, 48, 200);2.86
140 ReduceMax;(3,);(8, 24, 48, 256);2.50
141 ReduceMax;(3,);(8, 24, 48, 400);2.48
142 ReduceMax;(3,);(8, 24, 48, 512);2.90
143 ReduceMax;(3,);(8, 24, 48, 1024);2.76
144 ReduceMax;(0,);(8, 24, 48, 8);19.29
145 ReduceMax;(0,);(8, 24, 48, 16);11.83
146 ReduceMax;(0,);(8, 24, 48, 32);5.69
147 ReduceMax;(0,);(8, 24, 48, 64);5.49
148 ReduceMax;(0,);(8, 24, 48, 100);6.13
149 ReduceMax;(0,);(8, 24, 48, 128);6.27
150 ReduceMax;(0,);(8, 24, 48, 200);5.46
151 ReduceMax;(0,);(8, 24, 48, 256);4.76
152 ReduceMax;(0,);(8, 24, 48, 400);2.21
153 ReduceMax;(0,);(8, 24, 48, 512);4.52
154 ReduceMax;(0,);(8, 24, 48, 1024);4.38
155 ReduceSum;(3,);(8, 24, 48, 8);1.79
156 ReduceSum;(3,);(8, 24, 48, 16);0.79
157 ReduceSum;(3,);(8, 24, 48, 32);1.67
158 ReduceSum;(3,);(8, 24, 48, 64);1.19
159 ReduceSum;(3,);(8, 24, 48, 100);2.08
160 ReduceSum;(3,);(8, 24, 48, 128);2.96
161 ReduceSum;(3,);(8, 24, 48, 200);1.66
162 ReduceSum;(3,);(8, 24, 48, 256);2.26
163 ReduceSum;(3,);(8, 24, 48, 400);1.76
164 ReduceSum;(3,);(8, 24, 48, 512);2.61
165 ReduceSum;(3,);(8, 24, 48, 1024);2.21
166 ReduceSum;(0,);(8, 24, 48, 8);2.56
167 ReduceSum;(0,);(8, 24, 48, 16);2.05
168 ReduceSum;(0,);(8, 24, 48, 32);3.04
169 ReduceSum;(0,);(8, 24, 48, 64);2.57
170 ReduceSum;(0,);(8, 24, 48, 100);2.41
171 ReduceSum;(0,);(8, 24, 48, 128);2.77
172 ReduceSum;(0,);(8, 24, 48, 200);2.02
173 ReduceSum;(0,);(8, 24, 48, 256);1.61
174 ReduceSum;(0,);(8, 24, 48, 400);1.59
175 ReduceSum;(0,);(8, 24, 48, 512);1.48
176 ReduceSum;(0,);(8, 24, 48, 1024);1.50
177 ''')
179 df = pandas.read_csv(StringIO(from_excel), sep=";")
180 print(df.columns)
182 sub = df[["Op", "axes", "shape", "SpeedUp"]]
183 piv = df.pivot_table(values="SpeedUp", index=['axes', "shape"], columns="Op")
184 piv = piv.reset_index(drop=False)
186 print(piv.to_markdown(index=False))
188 .. versionchanged:: 1.9
189 Nan value are replaced by empty string even if
190 *number_format* is not None.
191 Parameters *replacements*, *split_row*, *split_col_subsets*,
192 *split_col_common*, *filter_rows* were added.
193 """
194 if isinstance(df, str):
195 import pandas # pragma: no cover
196 df = pandas.read_csv( # pragma: no cover
197 df, encoding="utf-8", sep=sep)
199 if split_row is not None:
200 gdf = df.groupby(split_row)
201 rows = []
202 for key, g in gdf:
203 key = str(key).strip('()')
204 if ':ref:' in key:
205 try:
206 key = key.split("`")[1].split("<")[0].strip()
207 except IndexError: # pragma: no cover
208 pass
209 if label_pattern is not None:
210 lab = label_pattern.format(section=key.replace(".", "D"))
211 rows.append("")
212 rows.append(lab)
213 rows.append("")
214 rows.append(key)
215 rows.append(split_row_level * len(key))
216 rows.append("")
217 rg = df2rst(g, add_line=add_line, align=align,
218 column_size=column_size, index=index,
219 list_table=list_table,
220 title=title, header=header, sep=sep,
221 number_format=number_format, replacements=replacements,
222 split_row=None, split_row_level=None,
223 split_col_common=split_col_common,
224 split_col_subsets=split_col_subsets,
225 filter_rows=filter_rows,
226 label_pattern=None)
227 rows.append(rg)
228 rows.append("")
229 return "\n".join(rows)
231 if split_col_common is not None:
232 rows = []
233 for sub in enumerate_split_df(df, split_col_common, split_col_subsets):
234 rg = df2rst(sub, add_line=add_line, align=align,
235 column_size=column_size, index=index,
236 list_table=list_table,
237 title=title, header=header, sep=sep,
238 number_format=number_format,
239 replacements=replacements,
240 filter_rows=filter_rows)
241 rows.append(rg)
242 rows.append('')
243 return "\n".join(rows)
245 import numpy
246 typstr = str
248 if filter_rows is not None:
249 df = filter_rows(df).copy()
250 if df.shape[0] == 0:
251 return ""
252 else:
253 df = df.copy()
255 def patternification(value, pattern):
256 if isinstance(value, float) and numpy.isnan(value):
257 return ""
258 return pattern.format(value)
260 def nan2empty(value):
261 if isinstance(value, float) and numpy.isnan(value):
262 return ""
263 return value
265 if number_format is not None:
266 if isinstance(number_format, int):
267 number_format = "{:.%dg}" % number_format
268 import pandas
269 typ1 = numpy.float64
270 _df = pandas.DataFrame({'f': [0.12]})
271 typ2 = list(_df.dtypes)[0]
272 number_format = {typ1: number_format, typ2: number_format}
273 df = df.copy()
274 for name, typ in zip(df.columns, df.dtypes):
275 if name in number_format:
276 pattern = number_format[name]
277 df[name] = df[name].apply(
278 lambda x: patternification(x, pattern))
279 elif typ in number_format:
280 pattern = number_format[typ]
281 df[name] = df[name].apply(
282 lambda x: patternification(x, pattern))
284 # check empty strings
285 col_strings = df.select_dtypes(include=[object]).columns
286 for c in col_strings:
287 df[c] = df[c].apply(nan2empty)
289 if index:
290 df = df.reset_index(drop=False).copy()
291 ind = df.columns[0]
293 def boldify(x):
294 try:
295 return "**{0}**".format(x)
296 except Exception as e: # pragma: no cover
297 raise Exception(
298 "Unable to boldify type {0}".format(type(x))) from e
300 try:
301 values = df[ind].apply(boldify)
302 except Exception: # pragma: no cover
303 warnings.warn("Unable to boldify the index (1).", SyntaxWarning)
305 try:
306 df[ind] = values
307 except Exception: # pragma: no cover
308 warnings.warn("Unable to boldify the index (2).", SyntaxWarning)
310 def align_string(s, align, length):
311 if len(s) < length:
312 if align == "l":
313 return s + " " * (length - len(s))
314 if align == "r":
315 return " " * (length - len(s)) + s
316 if align == "c":
317 m = (length - len(s)) // 2
318 return " " * m + s + " " * (length - m - len(s))
319 raise ValueError( # pragma: no cover
320 "align should be 'l', 'r', 'c' not '{0}'".format(align))
321 return s
323 def complete(cool):
324 if list_table:
325 i, s = cool
326 if s is None:
327 s = "" # pragma: no cover
328 if isinstance(s, float) and numpy.isnan(s):
329 s = ""
330 else:
331 s = typstr(s).replace("\n", " ")
332 if replacements is not None:
333 if s in replacements:
334 s = replacements[s]
335 return (" " + s) if s else s
336 else:
337 i, s = cool
338 if s is None:
339 s = " " * 4 # pragma: no cover
340 if isinstance(s, float) and numpy.isnan(s):
341 s = "" # pragma: no cover
342 else:
343 s = typstr(s).replace("\n", " ")
344 i -= 2
345 if replacements is not None:
346 if s in replacements:
347 s = replacements[s]
348 s = align_string(s.strip(), align, i)
349 return s
351 if list_table:
353 def format_on_row(row):
354 one = "\n -".join(map(complete, enumerate(row)))
355 res = " * -" + one
356 return res
358 rows = [".. list-table:: {0}".format(title if title else "").strip()]
359 if column_size is None:
360 rows.append(" :widths: auto")
361 else:
362 rows.append(" :widths: " + " ".join(map(str, column_size)))
363 if header:
364 rows.append(" :header-rows: 1")
365 rows.append("")
366 if header:
367 rows.append(format_on_row(df.columns))
368 rows.extend(map(format_on_row, df.values))
369 rows.append("")
370 table = "\n".join(rows)
371 return table
372 else:
373 length = [(len(_) if isinstance(_, typstr) else 5) for _ in df.columns]
374 for row in df.values:
375 for i, v in enumerate(row):
376 length[i] = max(length[i], len(typstr(v).strip()))
377 if column_size is not None:
378 if isinstance(column_size, list):
379 if len(length) != len(column_size):
380 raise ValueError( # pragma: no cover
381 "length and column_size should have the same size {0} != {1}".format(
382 len(length), len(column_size)))
383 for i in range(len(length)):
384 if not isinstance(column_size[i], int):
385 raise TypeError( # pragma: no cover
386 "column_size[{0}] is not an integer".format(i))
387 length[i] *= column_size[i]
388 elif isinstance(column_size, dict):
389 for i, c in enumerate(df.columns):
390 if c in column_size:
391 length[i] = column_size[c]
392 elif i in column_size:
393 length[i] = column_size[i]
394 else:
395 raise TypeError( # pragma: no cover
396 "column_size must be a list or a dictionary not {}".format(
397 type(column_size)))
399 ic = 2
400 length = [_ + ic for _ in length]
401 line = ["-" * lc for lc in length]
402 lineb = ["=" * lc for lc in length]
403 sline = "+%s+" % ("+".join(line))
404 slineb = "+%s+" % ("+".join(lineb))
405 res = [sline]
407 res.append("| %s |" % " | ".join(
408 map(complete, zip(length, df.columns))))
409 res.append(slineb)
410 res.extend(["| %s |" % " | ".join(map(complete, zip(length, row)))
411 for row in df.values])
412 if add_line:
413 t = len(res)
414 for i in range(t - 1, 3, -1):
415 res.insert(i, sline)
416 res.append(sline)
417 table = "\n".join(res) + "\n"
418 return table
421def df2html(self, class_table=None, class_td=None, class_tr=None,
422 class_th=None):
423 """
424 Converts the table into a :epkg:`html` string.
426 :param self: dataframe (to be added as a class method)
427 :param class_table: adds a class to the tag ``table`` (None for none)
428 :param class_td: adds a class to the tag ``td`` (None for none)
429 :param class_tr: adds a class to the tag ``tr`` (None for none)
430 :param class_th: adds a class to the tag ``th`` (None for none)
431 :return: HTML
432 """
433 clta = ' class="%s"' % class_table if class_table is not None else ""
434 cltr = ' class="%s"' % class_tr if class_tr is not None else ""
435 cltd = ' class="%s"' % class_td if class_td is not None else ""
436 clth = ' class="%s"' % class_th if class_th is not None else ""
438 rows = ["<table%s>" % clta]
439 rows.append(("<tr%s><th%s>" % (cltr, clth)) + ("</th><th%s>" %
440 clth).join(self.columns) + "</th></tr>")
441 septd = "</td><td%s>" % cltd
442 strtd = "<tr%s><td%s>" % (cltr, cltd)
444 typstr = str
446 def conv(s):
447 if s is None:
448 return "" # pragma: no cover
449 return typstr(s)
451 for row in self.values:
452 s = septd.join(conv(_) for _ in row)
453 rows.append(strtd + s + "</td></tr>")
454 rows.append("</table>")
455 rows.append("")
456 return "\n".join(rows)