Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief Saves and reads a :epkg:`dataframe` into a :epkg:`zip` file.
5"""
6import io
7import os
8import zipfile
9import pandas
10import numpy
13def to_zip(df, zipfilename, zname="df.csv", **kwargs):
14 """
15 Saves a :epkg:`Dataframe` into a :epkg:`zip` file.
16 It can be read by @see fn to_zip.
18 :param df: :epkg:`dataframe` or :epkg:`numpy:array`
19 :param zipfilename: a :epkg:`*py:zipfile:ZipFile` or a filename
20 :param zname: a filename in th zipfile
21 :param kwargs: parameters for :epkg:`pandas:to_csv` or
22 :epkg:`numpy:save`
23 :return: zipfilename
25 .. exref::
26 :title: Saves and reads a dataframe in a zip file
27 :tag: dataframe
29 This shows an example on how to save and read a
30 :epkg:`pandas:dataframe` directly into a zip file.
32 .. runpython::
33 :showcode:
35 import pandas
36 from pandas_streaming.df import to_zip, read_zip
38 df = pandas.DataFrame([dict(a=1, b="e"),
39 dict(b="f", a=5.7)])
41 name = "dfs.zip"
42 to_zip(df, name, encoding="utf-8", index=False)
43 df2 = read_zip(name, encoding="utf-8")
44 print(df2)
46 .. exref::
47 :title: Saves and reads a numpy array in a zip file
48 :tag: array
50 This shows an example on how to save and read a
51 :epkg:`numpy:ndarray` directly into a zip file.
53 .. runpython::
54 :showcode:
56 import numpy
57 from pandas_streaming.df import to_zip, read_zip
59 arr = numpy.array([[0.5, 1.5], [0.4, 1.6]])
61 name = "dfsa.zip"
62 to_zip(arr, name, 'arr.npy')
63 arr2 = read_zip(name, 'arr.npy')
64 print(arr2)
65 """
66 if isinstance(df, pandas.DataFrame):
67 stb = io.StringIO()
68 ext = os.path.splitext(zname)[-1]
69 if ext == '.npy':
70 raise ValueError( # pragma: no cover
71 "Extension '.npy' cannot be used to save a dataframe.")
72 df.to_csv(stb, **kwargs)
73 elif isinstance(df, numpy.ndarray):
74 stb = io.BytesIO()
75 ext = os.path.splitext(zname)[-1]
76 if ext != '.npy':
77 raise ValueError( # pragma: no cover
78 "Extension '.npy' is required when saving a numpy array.")
79 numpy.save(stb, df, **kwargs)
80 else:
81 raise TypeError( # pragma: no cover
82 "Type not handled {0}".format(type(df)))
83 text = stb.getvalue()
85 if isinstance(zipfilename, str):
86 ext = os.path.splitext(zipfilename)[-1]
87 if ext != '.zip':
88 raise NotImplementedError( # pragma: no cover
89 "Only zip file are implemented not '{0}'.".format(ext))
90 zf = zipfile.ZipFile(zipfilename, 'w') # pylint: disable=R1732
91 close = True
92 elif isinstance(zipfilename, zipfile.ZipFile):
93 zf = zipfilename
94 close = False
95 else:
96 raise TypeError( # pragma: no cover
97 "No implementation for type '{0}'".format(type(zipfilename)))
99 zf.writestr(zname, text)
100 if close:
101 zf.close()
104def read_zip(zipfilename, zname=None, **kwargs):
105 """
106 Reads a :epkg:`dataframe` from a :epkg:`zip` file.
107 It can be saved by @see fn read_zip.
109 :param zipfilename: a :epkg:`*py:zipfile:ZipFile` or a filename
110 :param zname: a filename in zipfile, if None, takes the first one
111 :param kwargs: parameters for :func:`pandas.read_csv`
112 :return: :func:`pandas.DataFrame` or :epkg:`numpy:array`
113 """
114 if isinstance(zipfilename, str):
115 ext = os.path.splitext(zipfilename)[-1]
116 if ext != '.zip':
117 raise NotImplementedError( # pragma: no cover
118 "Only zip files are supported not '{0}'.".format(ext))
119 zf = zipfile.ZipFile(zipfilename, 'r') # pylint: disable=R1732
120 close = True
121 elif isinstance(zipfilename, zipfile.ZipFile):
122 zf = zipfilename
123 close = False
124 else:
125 raise TypeError( # pragma: no cover
126 "No implementation for type '{0}'".format(type(zipfilename)))
128 if zname is None:
129 zname = zf.namelist()[0]
130 content = zf.read(zname)
131 stb = io.BytesIO(content)
132 ext = os.path.splitext(zname)[-1]
133 if ext == '.npy':
134 df = numpy.load(stb, **kwargs)
135 else:
136 df = pandas.read_csv(stb, **kwargs)
138 if close:
139 zf.close()
141 return df