Coverage for src/pyquickhelper/pandashelper/tblformat.py: 100%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1# -*- coding:utf-8 -*-

2"""

3@file

4@brief To format a pandas dataframe

5"""

6import warnings

9def enumerate_split_df(df, common, subsets):

10 """

11 Splits a dataframe by columns to display shorter

12 dataframes.

14 @param df dataframe

15 @param common common columns

16 @param subsets subsets of columns

17 @return split dataframes

19 .. runpython::

20 :showcode:

22 from pandas import DataFrame

23 from pyquickhelper.pandashelper.tblformat import enumerate_split_df

25 df = DataFrame([{'A': 0, 'B': 'text'},

26 {'A': 1e-5, 'C': 'longer text'}])

27 res = list(enumerate_split_df(df, ['A'], [['B'], ['C']]))

28 print(res[0])

29 print('-----')

30 print(res[1])

31 """

32 for sub in subsets:

33 if set(common) & set(sub):

34 raise ValueError("Common columns between common={} and subset={}.".format(

35 common, sub))

36 yield df[common + sub]

39def df2rst(df, add_line=True, align="l", column_size=None, index=False,

40 list_table=False, title=None, header=True, sep=',',

41 number_format=None, replacements=None, split_row=None,

42 split_row_level="+", split_col_common=None,

43 split_col_subsets=None, filter_rows=None,

44 label_pattern=None):

45 """

46 Builds a string in :epkg:`RST` format from a :epkg:`dataframe`.

48 :param df: dataframe

49 :param add_line: (bool) add a line separator between each row

50 :param align: ``r`` or ``l`` or ``c``

51 :param column_size: something like ``[1, 2, 5]`` to multiply the column size,

52 a dictionary (if *list_table* is False) to overwrite

53 a column size like ``{'col_name1': 20}`` or ``{3: 20}``

54 :param index: add the index

55 :param list_table: use the

56 `list_table <http://docutils.sourceforge.net/docs/ref/rst/directives.html#list-table>`_

57 :param title: used only if *list_table* is True

58 :param header: add one header

59 :param sep: separator if *df* is a string and is a filename to load

60 :param number_format: formats number in a specific way, if *number_format*

61 is an integer, the pattern is replaced by

62 ``{numpy.float64: '{:.2g}'}`` (if *number_format* is 2),

63 see also :epkg:`pyformat.info`

64 :param replacements: replacements just before converting into RST (dictionary)

65 :param split_row: displays several table, one column is used as the

66 name of each section

67 :param split_row_level: title level if option *split_row* is used

68 :param split_col_common: splits the dataframe by columns, see @see fn enumerate_split_df

69 :param split_col_subsets: splits the dataframe by columns, see @see fn enumerate_split_df

70 :param filter_rows: None or function to removes rows, signature

71 ``def filter_rows(df: DataFrame) -> DataFrame``

72 :param label_pattern: if *split_row* is used, the function may insert

73 a label in front of every section, example: ``".. _lpy-{section}:"``

74 :return: string

76 If *list_table* is False, the format is the following.

78 *None* values are replaced by empty string (4 spaces).

79 It produces the following results:

81 ::

83 +------------------------+------------+----------+----------+

85 | (header rows optional) | | | |

86 +========================+============+==========+==========+

88 +------------------------+------------+----------+----------+

89 | body row 2 | ... | ... | |

90 +------------------------+------------+----------+----------+

92 If *list_table* is True, the format is the following:

94 ::

96 .. list-table:: title

97 :widths: 15 10 30

98 :header-rows: 1

100 * - Treat

101 - Quantity

102 - Description

103 * - Albatross

104 - 2.99

105 - anythings

106 ...

107

108 .. exref::

109 :title: Convert a dataframe into RST

110

111 .. runpython::

112 :showcode:

113

114 from pandas import DataFrame

115 from pyquickhelper.pandashelper import df2rst

116

117 df = DataFrame([{'A': 0, 'B': 'text'},

118 {'A': 1e-5, 'C': 'longer text'}])

119 print(df2rst(df))

120

121 .. exref::

122 :title: Convert a dataframe into markdown

123

124 .. runpython::

125 :showcode:

126

127 from io import StringIO

128 from textwrap import dedent

129 import pandas

130

131 from_excel = dedent('''

132 Op;axes;shape;SpeedUp

133 ReduceMax;(3,);(8, 24, 48, 8);2.96

134 ReduceMax;(3,);(8, 24, 48, 16);2.57

135 ReduceMax;(3,);(8, 24, 48, 32);2.95

136 ReduceMax;(3,);(8, 24, 48, 64);3.28

137 ReduceMax;(3,);(8, 24, 48, 100);3.05

138 ReduceMax;(3,);(8, 24, 48, 128);3.11

139 ReduceMax;(3,);(8, 24, 48, 200);2.86

140 ReduceMax;(3,);(8, 24, 48, 256);2.50

141 ReduceMax;(3,);(8, 24, 48, 400);2.48

142 ReduceMax;(3,);(8, 24, 48, 512);2.90

143 ReduceMax;(3,);(8, 24, 48, 1024);2.76

144 ReduceMax;(0,);(8, 24, 48, 8);19.29

145 ReduceMax;(0,);(8, 24, 48, 16);11.83

146 ReduceMax;(0,);(8, 24, 48, 32);5.69

147 ReduceMax;(0,);(8, 24, 48, 64);5.49

148 ReduceMax;(0,);(8, 24, 48, 100);6.13

149 ReduceMax;(0,);(8, 24, 48, 128);6.27

150 ReduceMax;(0,);(8, 24, 48, 200);5.46

151 ReduceMax;(0,);(8, 24, 48, 256);4.76

152 ReduceMax;(0,);(8, 24, 48, 400);2.21

153 ReduceMax;(0,);(8, 24, 48, 512);4.52

154 ReduceMax;(0,);(8, 24, 48, 1024);4.38

155 ReduceSum;(3,);(8, 24, 48, 8);1.79

156 ReduceSum;(3,);(8, 24, 48, 16);0.79

157 ReduceSum;(3,);(8, 24, 48, 32);1.67

158 ReduceSum;(3,);(8, 24, 48, 64);1.19

159 ReduceSum;(3,);(8, 24, 48, 100);2.08

160 ReduceSum;(3,);(8, 24, 48, 128);2.96

161 ReduceSum;(3,);(8, 24, 48, 200);1.66

162 ReduceSum;(3,);(8, 24, 48, 256);2.26

163 ReduceSum;(3,);(8, 24, 48, 400);1.76

164 ReduceSum;(3,);(8, 24, 48, 512);2.61

165 ReduceSum;(3,);(8, 24, 48, 1024);2.21

166 ReduceSum;(0,);(8, 24, 48, 8);2.56

167 ReduceSum;(0,);(8, 24, 48, 16);2.05

168 ReduceSum;(0,);(8, 24, 48, 32);3.04

169 ReduceSum;(0,);(8, 24, 48, 64);2.57

170 ReduceSum;(0,);(8, 24, 48, 100);2.41

171 ReduceSum;(0,);(8, 24, 48, 128);2.77

172 ReduceSum;(0,);(8, 24, 48, 200);2.02

173 ReduceSum;(0,);(8, 24, 48, 256);1.61

174 ReduceSum;(0,);(8, 24, 48, 400);1.59

175 ReduceSum;(0,);(8, 24, 48, 512);1.48

176 ReduceSum;(0,);(8, 24, 48, 1024);1.50

177 ''')

178

179 df = pandas.read_csv(StringIO(from_excel), sep=";")

180 print(df.columns)

181

182 sub = df[["Op", "axes", "shape", "SpeedUp"]]

183 piv = df.pivot_table(values="SpeedUp", index=['axes', "shape"], columns="Op")

184 piv = piv.reset_index(drop=False)

185

186 print(piv.to_markdown(index=False))

187

188 .. versionchanged:: 1.9

189 Nan value are replaced by empty string even if

190 *number_format* is not None.

191 Parameters *replacements*, *split_row*, *split_col_subsets*,

192 *split_col_common*, *filter_rows* were added.

193 """

194 if isinstance(df, str):

195 import pandas # pragma: no cover

196 df = pandas.read_csv( # pragma: no cover

197 df, encoding="utf-8", sep=sep)

198

199 if split_row is not None:

200 gdf = df.groupby(split_row)

201 rows = []

202 for key, g in gdf:

203 key = str(key).strip('()')

204 if ':ref:' in key:

205 try:

206 key = key.split("`")[1].split("<")[0].strip()

207 except IndexError: # pragma: no cover

208 pass

209 if label_pattern is not None:

210 lab = label_pattern.format(section=key.replace(".", "D"))

211 rows.append("")

212 rows.append(lab)

213 rows.append("")

214 rows.append(key)

215 rows.append(split_row_level * len(key))

216 rows.append("")

217 rg = df2rst(g, add_line=add_line, align=align,

218 column_size=column_size, index=index,

219 list_table=list_table,

220 title=title, header=header, sep=sep,

221 number_format=number_format, replacements=replacements,

222 split_row=None, split_row_level=None,

223 split_col_common=split_col_common,

224 split_col_subsets=split_col_subsets,

225 filter_rows=filter_rows,

226 label_pattern=None)

227 rows.append(rg)

228 rows.append("")

229 return "\n".join(rows)

230

231 if split_col_common is not None:

232 rows = []

233 for sub in enumerate_split_df(df, split_col_common, split_col_subsets):

234 rg = df2rst(sub, add_line=add_line, align=align,

235 column_size=column_size, index=index,

236 list_table=list_table,

237 title=title, header=header, sep=sep,

238 number_format=number_format,

239 replacements=replacements,

240 filter_rows=filter_rows)

241 rows.append(rg)

242 rows.append('')

243 return "\n".join(rows)

244

245 import numpy

246 typstr = str

247

248 if filter_rows is not None:

249 df = filter_rows(df).copy()

250 if df.shape[0] == 0:

251 return ""

252 else:

253 df = df.copy()

254

255 def patternification(value, pattern):

256 if isinstance(value, float) and numpy.isnan(value):

257 return ""

258 return pattern.format(value)

259

260 def nan2empty(value):

261 if isinstance(value, float) and numpy.isnan(value):

262 return ""

263 return value

264

265 if number_format is not None:

266 if isinstance(number_format, int):

267 number_format = "{:.%dg}" % number_format

268 import pandas

269 typ1 = numpy.float64

270 _df = pandas.DataFrame({'f': [0.12]})

271 typ2 = list(_df.dtypes)[0]

272 number_format = {typ1: number_format, typ2: number_format}

273 df = df.copy()

274 for name, typ in zip(df.columns, df.dtypes):

275 if name in number_format:

276 pattern = number_format[name]

277 df[name] = df[name].apply(

278 lambda x: patternification(x, pattern))

279 elif typ in number_format:

280 pattern = number_format[typ]

281 df[name] = df[name].apply(

282 lambda x: patternification(x, pattern))

283

284 # check empty strings

285 col_strings = df.select_dtypes(include=[object]).columns

286 for c in col_strings:

287 df[c] = df[c].apply(nan2empty)

288

289 if index:

290 df = df.reset_index(drop=False).copy()

291 ind = df.columns[0]

292

293 def boldify(x):

294 try:

295 return "**{0}**".format(x)

296 except Exception as e: # pragma: no cover

297 raise Exception(

298 "Unable to boldify type {0}".format(type(x))) from e

299

300 try:

301 values = df[ind].apply(boldify)

302 except Exception: # pragma: no cover

303 warnings.warn("Unable to boldify the index (1).", SyntaxWarning)

304

305 try:

306 df[ind] = values

307 except Exception: # pragma: no cover

308 warnings.warn("Unable to boldify the index (2).", SyntaxWarning)

309

310 def align_string(s, align, length):

311 if len(s) < length:

312 if align == "l":

313 return s + " " * (length - len(s))

314 if align == "r":

315 return " " * (length - len(s)) + s

316 if align == "c":

317 m = (length - len(s)) // 2

318 return " " * m + s + " " * (length - m - len(s))

319 raise ValueError( # pragma: no cover

320 "align should be 'l', 'r', 'c' not '{0}'".format(align))

321 return s

322

323 def complete(cool):

324 if list_table:

325 i, s = cool

326 if s is None:

327 s = "" # pragma: no cover

328 if isinstance(s, float) and numpy.isnan(s):

329 s = ""

330 else:

331 s = typstr(s).replace("\n", " ")

332 if replacements is not None:

333 if s in replacements:

334 s = replacements[s]

335 return (" " + s) if s else s

336 else:

337 i, s = cool

338 if s is None:

339 s = " " * 4 # pragma: no cover

340 if isinstance(s, float) and numpy.isnan(s):

341 s = "" # pragma: no cover

342 else:

343 s = typstr(s).replace("\n", " ")

344 i -= 2

345 if replacements is not None:

346 if s in replacements:

347 s = replacements[s]

348 s = align_string(s.strip(), align, i)

349 return s

350

351 if list_table:

352

353 def format_on_row(row):

354 one = "\n -".join(map(complete, enumerate(row)))

355 res = " * -" + one

356 return res

357

358 rows = [".. list-table:: {0}".format(title if title else "").strip()]

359 if column_size is None:

360 rows.append(" :widths: auto")

361 else:

362 rows.append(" :widths: " + " ".join(map(str, column_size)))

363 if header:

364 rows.append(" :header-rows: 1")

365 rows.append("")

366 if header:

367 rows.append(format_on_row(df.columns))

368 rows.extend(map(format_on_row, df.values))

369 rows.append("")

370 table = "\n".join(rows)

371 return table

372 else:

373 length = [(len(_) if isinstance(_, typstr) else 5) for _ in df.columns]

374 for row in df.values:

375 for i, v in enumerate(row):

376 length[i] = max(length[i], len(typstr(v).strip()))

377 if column_size is not None:

378 if isinstance(column_size, list):

379 if len(length) != len(column_size):

380 raise ValueError( # pragma: no cover

381 "length and column_size should have the same size {0} != {1}".format(

382 len(length), len(column_size)))

383 for i in range(len(length)):

384 if not isinstance(column_size[i], int):

385 raise TypeError( # pragma: no cover

386 "column_size[{0}] is not an integer".format(i))

387 length[i] *= column_size[i]

388 elif isinstance(column_size, dict):

389 for i, c in enumerate(df.columns):

390 if c in column_size:

391 length[i] = column_size[c]

392 elif i in column_size:

393 length[i] = column_size[i]

394 else:

395 raise TypeError( # pragma: no cover

396 "column_size must be a list or a dictionary not {}".format(

397 type(column_size)))

398

399 ic = 2

400 length = [_ + ic for _ in length]

401 line = ["-" * lc for lc in length]

402 lineb = ["=" * lc for lc in length]

403 sline = "+%s+" % ("+".join(line))

404 slineb = "+%s+" % ("+".join(lineb))

405 res = [sline]

406

407 res.append("| %s |" % " | ".join(

408 map(complete, zip(length, df.columns))))

409 res.append(slineb)

410 res.extend(["| %s |" % " | ".join(map(complete, zip(length, row)))

411 for row in df.values])

412 if add_line:

413 t = len(res)

414 for i in range(t - 1, 3, -1):

415 res.insert(i, sline)

416 res.append(sline)

417 table = "\n".join(res) + "\n"

418 return table

419

420

421def df2html(self, class_table=None, class_td=None, class_tr=None,

422 class_th=None):

423 """

424 Converts the table into a :epkg:`html` string.

425

426 :param self: dataframe (to be added as a class method)

427 :param class_table: adds a class to the tag ``table`` (None for none)

428 :param class_td: adds a class to the tag ``td`` (None for none)

429 :param class_tr: adds a class to the tag ``tr`` (None for none)

430 :param class_th: adds a class to the tag ``th`` (None for none)

431 :return: HTML

432 """

433 clta = ' class="%s"' % class_table if class_table is not None else ""

434 cltr = ' class="%s"' % class_tr if class_tr is not None else ""

435 cltd = ' class="%s"' % class_td if class_td is not None else ""

436 clth = ' class="%s"' % class_th if class_th is not None else ""

437

438 rows = ["<table%s>" % clta]

439 rows.append(("<tr%s><th%s>" % (cltr, clth)) + ("</th><th%s>" %

440 clth).join(self.columns) + "</th></tr>")

441 septd = "</td><td%s>" % cltd

442 strtd = "<tr%s><td%s>" % (cltr, cltd)

443

444 typstr = str

445

446 def conv(s):

447 if s is None:

448 return "" # pragma: no cover

449 return typstr(s)

450

451 for row in self.values:

452 s = septd.join(conv(_) for _ in row)

453 rows.append(strtd + s + "</td></tr>")

454 rows.append("</table>")

455 rows.append("")

456 return "\n".join(rows)

Coverage for src/pyquickhelper/pandashelper/tblformat.py : 100%

181 statements

Coverage for src/pyquickhelper/pandashelper/tblformat.py : 100%

181 statements 181 run 0 missing 18 excluded

181 statements