Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3@file 

4@brief Saves and reads a :epkg:`dataframe` into a :epkg:`zip` file. 

5""" 

6import io 

7import os 

8import zipfile 

9import pandas 

10import numpy 

11 

12 

13def to_zip(df, zipfilename, zname="df.csv", **kwargs): 

14 """ 

15 Saves a :epkg:`Dataframe` into a :epkg:`zip` file. 

16 It can be read by @see fn to_zip. 

17 

18 :param df: :epkg:`dataframe` or :epkg:`numpy:array` 

19 :param zipfilename: a :epkg:`*py:zipfile:ZipFile` or a filename 

20 :param zname: a filename in th zipfile 

21 :param kwargs: parameters for :epkg:`pandas:to_csv` or 

22 :epkg:`numpy:save` 

23 :return: zipfilename 

24 

25 .. exref:: 

26 :title: Saves and reads a dataframe in a zip file 

27 :tag: dataframe 

28 

29 This shows an example on how to save and read a 

30 :epkg:`pandas:dataframe` directly into a zip file. 

31 

32 .. runpython:: 

33 :showcode: 

34 

35 import pandas 

36 from pandas_streaming.df import to_zip, read_zip 

37 

38 df = pandas.DataFrame([dict(a=1, b="e"), 

39 dict(b="f", a=5.7)]) 

40 

41 name = "dfs.zip" 

42 to_zip(df, name, encoding="utf-8", index=False) 

43 df2 = read_zip(name, encoding="utf-8") 

44 print(df2) 

45 

46 .. exref:: 

47 :title: Saves and reads a numpy array in a zip file 

48 :tag: array 

49 

50 This shows an example on how to save and read a 

51 :epkg:`numpy:ndarray` directly into a zip file. 

52 

53 .. runpython:: 

54 :showcode: 

55 

56 import numpy 

57 from pandas_streaming.df import to_zip, read_zip 

58 

59 arr = numpy.array([[0.5, 1.5], [0.4, 1.6]]) 

60 

61 name = "dfsa.zip" 

62 to_zip(arr, name, 'arr.npy') 

63 arr2 = read_zip(name, 'arr.npy') 

64 print(arr2) 

65 """ 

66 if isinstance(df, pandas.DataFrame): 

67 stb = io.StringIO() 

68 ext = os.path.splitext(zname)[-1] 

69 if ext == '.npy': 

70 raise ValueError( # pragma: no cover 

71 "Extension '.npy' cannot be used to save a dataframe.") 

72 df.to_csv(stb, **kwargs) 

73 elif isinstance(df, numpy.ndarray): 

74 stb = io.BytesIO() 

75 ext = os.path.splitext(zname)[-1] 

76 if ext != '.npy': 

77 raise ValueError( # pragma: no cover 

78 "Extension '.npy' is required when saving a numpy array.") 

79 numpy.save(stb, df, **kwargs) 

80 else: 

81 raise TypeError( # pragma: no cover 

82 "Type not handled {0}".format(type(df))) 

83 text = stb.getvalue() 

84 

85 if isinstance(zipfilename, str): 

86 ext = os.path.splitext(zipfilename)[-1] 

87 if ext != '.zip': 

88 raise NotImplementedError( # pragma: no cover 

89 "Only zip file are implemented not '{0}'.".format(ext)) 

90 zf = zipfile.ZipFile(zipfilename, 'w') # pylint: disable=R1732 

91 close = True 

92 elif isinstance(zipfilename, zipfile.ZipFile): 

93 zf = zipfilename 

94 close = False 

95 else: 

96 raise TypeError( # pragma: no cover 

97 "No implementation for type '{0}'".format(type(zipfilename))) 

98 

99 zf.writestr(zname, text) 

100 if close: 

101 zf.close() 

102 

103 

104def read_zip(zipfilename, zname=None, **kwargs): 

105 """ 

106 Reads a :epkg:`dataframe` from a :epkg:`zip` file. 

107 It can be saved by @see fn read_zip. 

108 

109 :param zipfilename: a :epkg:`*py:zipfile:ZipFile` or a filename 

110 :param zname: a filename in zipfile, if None, takes the first one 

111 :param kwargs: parameters for :func:`pandas.read_csv` 

112 :return: :func:`pandas.DataFrame` or :epkg:`numpy:array` 

113 """ 

114 if isinstance(zipfilename, str): 

115 ext = os.path.splitext(zipfilename)[-1] 

116 if ext != '.zip': 

117 raise NotImplementedError( # pragma: no cover 

118 "Only zip files are supported not '{0}'.".format(ext)) 

119 zf = zipfile.ZipFile(zipfilename, 'r') # pylint: disable=R1732 

120 close = True 

121 elif isinstance(zipfilename, zipfile.ZipFile): 

122 zf = zipfilename 

123 close = False 

124 else: 

125 raise TypeError( # pragma: no cover 

126 "No implementation for type '{0}'".format(type(zipfilename))) 

127 

128 if zname is None: 

129 zname = zf.namelist()[0] 

130 content = zf.read(zname) 

131 stb = io.BytesIO(content) 

132 ext = os.path.splitext(zname)[-1] 

133 if ext == '.npy': 

134 df = numpy.load(stb, **kwargs) 

135 else: 

136 df = pandas.read_csv(stb, **kwargs) 

137 

138 if close: 

139 zf.close() 

140 

141 return df