Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding:utf-8 -*-
2"""
3@file
4@brief Various ways to import data into a dataframe
5"""
6import zipfile
7from io import StringIO, BytesIO
8from ..filehelper import read_content_ufs
11def read_csv(filepath_or_buffer, compression=None, fvalid=None, **params):
12 """
13 Reads a file from a file, it adds the compression zip
14 which was removed in the latest version,
15 see :epkg:`pandas:read_csv`.
17 @param filepath_or_buffer filepath or buffer
18 @param compression see :epkg:`pandas:read_csv`
19 @param params see :epkg:`pandas:read_csv`
20 @param fvalid if the zip file contains many files, this function
21 validates which one must be returned based on its name,
22 the function returns the content of the file in that case (bytes)
23 @return dataframe or a dictionary (name, dataframe)
25 See blog post :ref:`blogpost_read_csv`.
26 """
27 import pandas
28 if isinstance(filepath_or_buffer, str) and \
29 (compression == "zip" or (compression is None and
30 filepath_or_buffer.endswith(".zip"))):
31 content = read_content_ufs(filepath_or_buffer, asbytes=True)
32 with zipfile.ZipFile(BytesIO(content)) as myzip:
33 infos = myzip.infolist()
34 if not infos:
35 raise FileNotFoundError( # pragma: no cover
36 "unable to find a file in " + filepath_or_buffer)
37 res = {}
38 for info in infos:
39 name = info.filename
40 with myzip.open(name, "r") as f:
41 text = f.read()
42 if fvalid is not None and not fvalid(name):
43 res[name] = text
44 else:
45 if text is None:
46 raise FileNotFoundError( # pragma: no cover
47 "Empty file '{0}' in '{1}'".format(
48 name, filepath_or_buffer))
49 text = text.decode(
50 encoding=params.get('encoding', 'ascii'))
51 st = StringIO(text)
52 try:
53 df = pandas.read_csv(
54 st, compression=compression, **params)
55 except pandas.errors.ParserError as e: # pragma: no cover
56 lines = text.split("\n")
57 end = min(len(lines), 5)
58 mes = "Parsing errors in '{0}', first lines:\n{1}".format(
59 name, "\n".join(lines[:end]))
60 raise Exception(mes) from e
61 res[name] = df
62 return res if len(res) > 1 else list(res.values())[0]
63 else:
64 return pandas.read_csv( # pragma: no cover
65 filepath_or_buffer, compression=compression, **params)