Main housing construction period

[1]:
from pynsee.download import get_file_list, get_column_metadata, download_file
from pynsee.geodata import get_geodata, get_geodata_list
from pynsee.geodata import GeoFrDataFrame

import py7zr
import tempfile
import os
import re
import shutil
import math
import urllib.request as request
from contextlib import closing
from pathlib import Path

import numpy as np
import pandas as pd
import geopandas as gpd
from pandas.api.types import CategoricalDtype
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import descartes
[2]:
import logging
import sys
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format="%(message)s")
[3]:
meta = get_file_list()
metacol = get_column_metadata("RP_LOGEMENT_2016")
pynsee.download's metadata rely on volunteering contributors and their manual updates. get_file_list does not provide data from official Insee's metadata API
Consequently, please report any issue
Column-specific metadata has been found for this file
[4]:
meta_achl = metacol[metacol["column"] == "ACHL"].reset_index(drop=True)
meta_achl.loc[~meta_achl["value"].isin(["A11", "A12", "B11", "B12", "C100"]), "value"] = ">2005"
meta_achl.loc[meta_achl["value"] == ">2005", "value_label_fr"] = "Après 2005"
meta_achl = meta_achl[["value", "value_label_fr"]].drop_duplicates()
meta_achl.columns = ["ACHL", "ACHL_label_fr"]
meta_achl
[4]:
ACHL ACHL_label_fr
0 A11 Avant 1919
1 A12 De 1919 à 1945
2 B11 De 1946 à 1970
3 B12 De 1971 à 1990
4 C100 De 1991 à 2005
5 >2005 Après 2005
[5]:
df = download_file("RP_LOGEMENT_2017", variables = ["COMMUNE", "IRIS", "ACHL", "IPONDL"])
DF = df[["COMMUNE", "IRIS", "ACHL", "IPONDL"]].reset_index(drop=True)
DF["IPONDL"] = pd.to_numeric(DF["IPONDL"])
DF = DF.groupby(["COMMUNE", "IRIS", "ACHL"], as_index=False).IPONDL.agg("sum")
Downloading: 100%|██████████| 373M/373M [00:30<00:00, 12.9MiB/s]
Extracting: 100%|██████████| 4.42G/4.42G [00:13<00:00, 354MB/s]
[6]:
pondTot = DF.groupby(["COMMUNE", "IRIS"], as_index=False)['IPONDL'].agg('sum')
pondTot = pondTot.rename(columns={'IPONDL': 'pondTot'})
DF = DF.merge(pondTot, on = ["COMMUNE", "IRIS"], how='left')
[7]:
DF["pct"] = DF["IPONDL"] / DF["pondTot"] * 100
DF.loc[~DF["ACHL"].isin(["A11", "A12", "B11", "B12", "C100"]), "ACHL"] = ">2005"
data = DF.groupby(["COMMUNE", "IRIS", "ACHL"], as_index=False).pct.agg("sum")
data = data.merge(meta_achl, on = "ACHL", how="left")
data.loc[data["IRIS"] == "ZZZZZZZZZ", "IRIS"] = "0000"
data["CODE_IRIS"] = data["IRIS"]
data.loc[data["IRIS"] == "0000", "CODE_IRIS"] = data.loc[data["IRIS"] == "0000", "COMMUNE"] + data.loc[data["IRIS"] == "0000", "IRIS"]

#data.head(10)
[8]:
data.head(10)
[8]:
COMMUNE IRIS ACHL pct ACHL_label_fr CODE_IRIS
0 01001 0000 >2005 7.141565 Après 2005 010010000
1 01001 0000 A11 28.765250 Avant 1919 010010000
2 01001 0000 A12 3.160147 De 1919 à 1945 010010000
3 01001 0000 B11 3.445027 De 1946 à 1970 010010000
4 01001 0000 B12 29.377355 De 1971 à 1990 010010000
5 01001 0000 C100 28.110656 De 1991 à 2005 010010000
6 01002 0000 >2005 9.469609 Après 2005 010020000
7 01002 0000 A11 62.087134 Avant 1919 010020000
8 01002 0000 A12 10.058386 De 1919 à 1945 010020000
9 01002 0000 B11 4.744642 De 1946 à 1970 010020000
[9]:
# https://geoservices.ign.fr/contoursiris
url_ign_iris22 = "https://wxs.ign.fr/1yhlj2ehpqf3q6dt6a2y7b64/telechargement/inspire/CONTOURS-IRIS-PACK_2022-01$CONTOURS-IRIS_2-1__SHP__FRA_2022-01-01/file/CONTOURS-IRIS_2-1__SHP__FRA_2022-01-01.7z"
ign_iris_ftp20 = "https://wxs.ign.fr/1yhlj2ehpqf3q6dt6a2y7b64/telechargement/inspire/CONTOURS-IRIS-2020-01-01$CONTOURS-IRIS_2-1__SHP__FRA_2020-01-01/file/CONTOURS-IRIS_2-1__SHP__FRA_2020-01-01.7z"

def get_iris_data(url=ign_iris_ftp20, update=False):

    list_string_split = url.split("/")
    filename = list_string_split[len(list_string_split)-1]
    list_string_split_filename = filename.split(".")
    filename_clean = list_string_split_filename[0]

    home = Path.home()
    dataDir = str(home) + "/" + filename_clean
    if not os.path.exists(dataDir):
        os.mkdir(dataDir)

    file_iris = dataDir + "/" + "file_" + filename_clean

    if (not os.path.exists(file_iris)) or (update):
        with closing(request.urlopen(url, timeout=None)) as r:
            with open(file_iris, 'wb') as f:
                shutil.copyfileobj(r, f)

    with py7zr.SevenZipFile(file_iris, mode='r') as z:
            z.extractall(dataDir)

    list_file = []
    list_file_size = []

    for root, d_names, f_names in os.walk(dataDir):
        for f in f_names:
            filename = root + "/" + f
            if re.search("CONTOURS-IRIS.shp", filename):
                list_file += [filename]
                list_file_size += [os.path.getsize(filename)]

    list_shp_df = []

    for f in list_file:

        shp = gpd.read_file(f)
        shp = shp.to_crs("EPSG:3857")
        list_shp_df += [shp]

    shpFinal = pd.concat(list_shp_df)

    return shpFinal
[10]:
shapefile = get_iris_data(update=False)
Found credentials in environment variables.
Found credentials in environment variables.
Found credentials in environment variables.
ERROR 1: PROJ: proj_create_from_database: Open of /opt/mamba/share/proj failed
Found credentials in environment variables.
Found credentials in environment variables.
Found credentials in environment variables.
Found credentials in environment variables.
[11]:
shp = shapefile.sort_values(by = ["INSEE_COM"]).reset_index(drop=True)
shp.loc[shp["IRIS"] == "0000", "IRIS"] = "ZZZZZZZZZ"
shp.rename(columns={'INSEE_COM':'COMMUNE'}, inplace=True)
[12]:
data['pct_max'] = data.groupby(['COMMUNE', "IRIS"])['pct'].transform(max)
dataMax = data[data["pct"] == data["pct_max"]].reset_index(drop=True)

list_dep_extract = []
for i in dataMax.index:
    dep = dataMax["COMMUNE"].values[i][:3]
    if not re.search("^97", dep):
        dep = dataMax["COMMUNE"].values[i][:2]
    list_dep_extract += [dep]

dataMax["insee_dep"] = list_dep_extract

dataFinal = dataMax.merge(shp[["CODE_IRIS", "geometry", "NOM_COM"]], on=["CODE_IRIS"], how = "left")
[13]:
dataFinal["crsCoord"] = "EPSG:3857"
dataFinal = dataFinal.reset_index(drop=True)
[14]:
# iris limit missing for the following iris
dataFinal.loc[dataFinal["geometry"].isnull(), ['insee_dep', "COMMUNE", "NOM_COM","CODE_IRIS", "geometry", "pct"]]
[14]:
insee_dep COMMUNE NOM_COM CODE_IRIS geometry pct
8678 21 21213 NaN 212130000 None 33.436533
9049 21 21507 NaN 215070000 None 35.000000
14199 31 31149 NaN 311490901 None 47.047208
21497 45 45287 NaN 452870000 None 48.279338
34770 69 69072 NaN 690720101 None 47.106233
34771 69 69072 NaN 690720102 None 61.776404
34772 69 69072 NaN 690720103 None 37.821362
47614 94 94022 NaN 940220101 None 27.941306
47616 94 94022 NaN 940220103 None 24.618540
47617 94 94022 NaN 940220104 None 36.287769
48925 973 97353 NaN 973530000 None 49.746619
[15]:
com = get_geodata('ADMINEXPRESS-COG-CARTO.LATEST:commune')
Previously saved data has been used:
/home/onyxia/.cache/pynsee/pynsee/a4f589755961ab3aa6bdff99605334d1
Set update=True to get the most up-to-date data
[16]:
list_com = dataFinal.loc[dataFinal["geometry"].isnull()].COMMUNE.unique()

for c in list_com:
    dc = dataFinal[dataFinal["COMMUNE"] == c].reset_index(drop=True)
    if len(dc.index) == 1:
        geo = com[com["insee_com"] == c]
        if len(geo.index) == 1:
            geo = geo.get_geom()
            dataFinal.loc[(dataFinal["COMMUNE"] == c), "geometry"] = [geo]
[17]:
dataFinal = dataFinal[~dataFinal.geometry.isnull()]
dataFinal = GeoFrDataFrame(dataFinal).translate()
dataFinal = dataFinal.zoom(["75","92", "93", "91", "77", "78", "95", "94"],
                          factor=1.5, startAngle = math.pi * (1 - 3 * 1/9))
976 is missing from insee_dep column !
[18]:
dataFinal['ACHL_label_en'] = dataFinal['ACHL_label_fr'].astype(str)
dataFinal = dataFinal.replace({'ACHL_label_en': {"Après": "After", "Avant" : "Before", "De" : "From", "à" : "to"}}, regex=True)
list_ranges_ACHL_label_en = ['Before 1919', 'From 1919 to 1945', 'From 1946 to 1970', 'From 1971 to 1990','From 1991 to 2005', 'After 2005']
dataFinal['ACHL_label_en'] = dataFinal['ACHL_label_en'].astype(CategoricalDtype(categories=list_ranges_ACHL_label_en, ordered=True))

[19]:
dataFinal.head(3)
[19]:
COMMUNE IRIS ACHL pct ACHL_label_fr CODE_IRIS pct_max insee_dep geometry NOM_COM crsCoord ACHL_label_en
0 97101 971010101 B12 40.872978 De 1971 à 1990 971010101 40.872978 971 MULTIPOLYGON (((-706086.0658401633 6311115.277... Les Abymes EPSG:3857 From 1971 to 1990
1 97101 971010102 B12 44.975760 De 1971 à 1990 971010102 44.975760 971 POLYGON ((-707141.4767627036 6308691.888286703... Les Abymes EPSG:3857 From 1971 to 1990
2 97101 971010103 C100 27.177460 De 1991 à 2005 971010103 27.177460 971 POLYGON ((-707141.4767627036 6308691.888286703... Les Abymes EPSG:3857 From 1991 to 2005
[20]:
mapplot = gpd.GeoDataFrame(dataFinal, crs='EPSG:3857')
[21]:
fig, ax = plt.subplots(1,1,figsize=[15,15])
mapplot.plot(column='ACHL_label_en', cmap=cm.turbo,
    legend=True, ax=ax)
ax.set_axis_off()
ax.set(title='Main housing construction period')
plt.show()
../_images/examples_example_rp_logement_2017_21_0.png