Coverage for src/ensae_teaching_cs/data/data_shape_files.py: 100%

24 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-04-28 06:23 +0200

1# coding: utf-8 

2""" 

3@file 

4@brief Shapefiles data. 

5""" 

6import os 

7import pandas 

8from pyquickhelper.filehelper import get_url_content_timeout, unzip_files 

9 

10 

11def data_shape_files(name, cache=".", load=True): 

12 """ 

13 Downloads shape files. 

14 

15 :param name: name of the shape file (see below) 

16 :param cache: cache folder 

17 :param load: loads the shape files, the function relies on 

18 :epkg:`geopandas` 

19 :return: shape files 

20 

21 List of availables shape files: 

22 * `'depfr2018'`: see `Contours des départements français issus d'OpenStreetMap 

23 <https://www.data.gouv.fr/en/datasets/contours-des-departements-francais-issus-d-openstreetmap/>`_ 

24 """ 

25 if name == 'depfr2018': 

26 url = 'https://github.com/sdpython/data/raw/master/shapefiles/france_departements/departements-20180101-shp.zip' 

27 dest = os.path.join(cache, 'departements-20180101-shp.zip') 

28 if not os.path.exists(dest): 

29 get_url_content_timeout(url, output=dest, encoding=None) 

30 res = unzip_files(dest, where_to=cache) 

31 shp = [name for name in res if name.endswith('.shp')] 

32 if len(shp) == 0: 

33 raise FileNotFoundError( # pragma: no cover 

34 f"Unable to find shp file in '{cache}'.") 

35 import geopandas 

36 df = geopandas.read_file(shp[0]) 

37 df['centroid'] = df['geometry'].apply(lambda r: r.centroid) 

38 df['DEPLONG'] = df['centroid'].apply(lambda r: r.x) 

39 df['DEPLAT'] = df['centroid'].apply(lambda r: r.y) 

40 return df 

41 raise ValueError( 

42 f"Unpexpected value for shape files: '{name}'.") 

43 

44 

45def load_french_departments(): 

46 """ 

47 Loads a dataframe with the list of French 

48 departments and the center of each. 

49 """ 

50 this = os.path.abspath(os.path.dirname(__file__)) 

51 name = os.path.join(this, "data_shp", "departement_french_2018.csv") 

52 return pandas.read_csv(name)