Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief Jeux de données reliés aux données carroyées.
5"""
6import os
7import zipfile
8import collections
9import datetime
10import tempfile
11from io import BytesIO
12from dbfread.field_parser import FieldParser
13from dbfread import DBF
14from dbfread.dbf import expand_year
15import pandas
16from .data_helper import get_data_folder
19class DBFInMemory(DBF):
20 """
21 Overwrites `DBF <https://github.com/olemb/dbfread/blob/master/dbfread/dbf.py#L77>`_
22 to read data from memory and not from a file. The object
23 `DBF <http://dbfread.readthedocs.io/en/latest/dbf_objects.html>`_
24 needs a file by default. This class avoids creating an intermediate
25 file when the data is compressed in a :epkg:`zip` file.
26 """
28 def __init__(self, filename, encoding=None, ignorecase=True,
29 lowernames=False, parserclass=FieldParser,
30 recfactory=collections.OrderedDict,
31 load=False, raw=False, ignore_missing_memofile=False,
32 char_decode_errors='strict'):
34 if isinstance(filename, str):
35 DBF.__init__(self, filename, encoding=encoding, ignorecase=ignorecase,
36 lowernames=lowernames, parserclass=parserclass,
37 recfactory=recfactory, load=load,
38 raw=raw, ignore_missing_memofile=ignore_missing_memofile,
39 char_decode_errors=char_decode_errors)
40 else:
41 self.encoding = encoding
42 self.ignorecase = ignorecase
43 self.lowernames = lowernames
44 self.parserclass = parserclass
45 self.raw = raw
46 self.ignore_missing_memofile = ignore_missing_memofile
47 self.char_decode_errors = char_decode_errors
49 if recfactory is None:
50 self.recfactory = lambda items: items
51 else:
52 self.recfactory = recfactory
54 self.name = None
55 self.filename = None
56 self.content = filename
58 self._records = None
59 self._deleted = None
61 # Filled in by self._read_headers()
62 self.memofilename = None
63 self.header = None
64 self.fields = [] # namedtuples
65 self.field_names = [] # strings
67 obj = BytesIO(filename)
68 self._read_header(obj)
69 self._read_field_headers(obj)
70 self._check_headers()
72 try:
73 self.date = datetime.date(expand_year(self.header.year),
74 self.header.month, self.header.day)
75 except ValueError: # pragma: no cover
76 # Invalid date or '\x00\x00\x00'.
77 self.date = None
79 self.memofilename = self._get_memofilename()
81 if load:
82 self.load()
84 def _iter_records(self, record_type=b' '):
85 infile = BytesIO(self.content)
86 with self._open_memofile() as memofile:
88 # Skip to first record.
89 infile.seek(self.header.headerlen, 0)
91 if not self.raw:
92 field_parser = self.parserclass(self, memofile)
93 parse = field_parser.parse
95 # Shortcuts for speed.
96 skip_record = self._skip_record
97 read = infile.read
99 while True:
100 sep = read(1)
102 if sep == record_type:
103 if self.raw:
104 items = [(field.name, read(field.length))
105 for field in self.fields]
106 else:
107 items = [(field.name,
108 parse(field, read(field.length)))
109 for field in self.fields]
111 yield self.recfactory(items)
113 elif sep in (b'\x1a', b''):
114 # End of records.
115 break
116 else:
117 skip_record(infile)
120def load_dbf_from_zip(filename):
121 """
122 Loads a *.dbf* file compressed into a zip file.
123 It only takes the first *.dbf* file from the zip.
125 @param filename zip file
126 @return dataframe
127 """
128 with zipfile.ZipFile(filename) as myzip:
129 names0 = myzip.infolist()
130 names = [_.filename for _ in names0 if _.filename.endswith(".dbf")]
131 if len(names) == 0:
132 raise FileNotFoundError( # pragma: no cover
133 "No dbf file in '{0}'".format(filename))
134 with myzip.open(names[0], "r") as f:
135 content = f.read()
136 data = list(DBFInMemory(content))
137 return pandas.DataFrame(data)
140def _read_geopandas_from_bytes(mif, mid, **kwargs):
141 """
142 Returns a :epkg:`GeoDataFrame` from two sequences of bytes,
143 one for file *.mif*, one from file *.mid*.
144 Unfortunately, :epkg:`geopandas` does not read from
145 a buffer, and :epkg:`fiona` does it after writing
146 in a virtual file (not clear if it is a temporary file or not).
147 """
148 # Delayed import because the import fails sometimes
149 # on Windows.
150 from geopandas import GeoDataFrame
152 with tempfile.NamedTemporaryFile(mode='w+b', delete=False, suffix='.mif') as temp:
153 temp.write(mif)
154 name_mif = temp.name
155 name_mid = temp.name.replace(".mif", ".mid")
156 with open(name_mid, "wb") as f:
157 f.write(mid)
158 gdf = GeoDataFrame.from_file(name_mid, **kwargs)
159 if os.path.exists(name_mid):
160 os.remove(name_mid)
161 if os.path.exists(name_mif):
162 os.remove(name_mif)
163 return gdf
166def load_shapes_from_zip(filename):
167 """
168 Loads a *.mif* and a *.mid* file compressed into a zip file.
169 It only takes the first *.mid* and *.mif* files from the zip.
171 @param filename zip file
172 @return dataframe
173 """
174 with zipfile.ZipFile(filename) as myzip:
175 names0 = myzip.infolist()
176 names = [_.filename for _ in names0 if _.filename.endswith(".mif")]
177 if len(names) == 0:
178 raise FileNotFoundError( # pragma: no cover
179 "No mif file in '{0}'".format(filename))
180 with myzip.open(names[0], "r") as f:
181 mif = f.read()
182 names = [_.filename for _ in names0 if _.filename.endswith(".mid")]
183 if len(names) == 0:
184 raise FileNotFoundError( # pragma: no cover
185 "No mid file in '{0}'".format(filename))
186 with myzip.open(names[0], "r") as f:
187 mid = f.read()
189 data = _read_geopandas_from_bytes(mif, mid)
190 return data
193def load_carreau_from_zip(file_car=None, file_rect=None):
194 """
195 Retourne un exemple de données carroyées.
196 Les données sont disponibles dans le répertoire
197 `data <https://github.com/sdpython/papierstat/tree/master/
198 src/papierstat/datasets/data>`_.
199 Notebooks associés à ce jeu de données :
201 .. runpython::
202 :rst:
204 from papierstat.datasets.documentation import list_notebooks_rst_links
205 links = list_notebooks_rst_links('visualisation', 'carte_carreau')
206 links = [' * %s' % s for s in links]
207 print('\\n'.join(links))
209 @param file_car les carreaux
210 @param file_rect les données
211 @return 4 dataframes
213 Résultats:
215 * données sur la population par carreaux
216 * shapefiles des carreaux
217 * données sur la population par rectangles
218 * shapefiles des rectangles
220 .. note::
222 Afin de respecter la règle de diffusion des données sur les
223 revenus fiscaux des ménages, aucune information
224 statistique (à l'exception du nombre total d'individus) n'est
225 diffusée sur des carreaux de moins de 11 ménages. Ces carreaux de
226 faibles effectifs sont donc regroupés en rectangles de taille plus
227 importante et satisfaisant à cette règle des 11 ménages minimum.
228 `source : INSEE <https://www.insee.fr/fr/statistiques/2520034>`_.
229 """
230 if file_rect is None and file_car is None:
231 data = get_data_folder()
232 file_rect = os.path.join(data, 'reunion_rect.zip')
233 file_car = os.path.join(data, 'reunion.zip')
234 dfcar = load_dbf_from_zip(file_car)
235 shpcar = load_shapes_from_zip(file_car)
236 dfrect = load_dbf_from_zip(file_rect)
237 shprect = load_shapes_from_zip(file_rect)
238 return dfcar, shpcar, dfrect, shprect