Upper and middle class workers in Ile de France

[1]:
from pynsee import *

import pandas as pd
import geopandas as gpd
from pathlib import Path
from contextlib import closing
# import urllib.request as request
from urllib.request import Request, urlopen
import os
import py7zr
import re
import shutil

import matplotlib.cm as cm
import matplotlib.pyplot as plt
import descartes
[2]:
import logging
import sys
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format="%(message)s")
[3]:
meta = get_file_list()
meta[meta.id.str.contains("RP_ACTRES_IRIS")].T.dropna()
pynsee.download's metadata rely on volunteering contributors and their manual updates. get_file_list does not provide data from official Insee's metadata API
Consequently, please report any issue
[3]:
1332
id RP_ACTRES_IRIS
name RP_ACTRES_IRIS
label Données du Recensement de Population, données ...
collection RP
link https://www.insee.fr/fr/statistiques/fichier/4...
type xlsx
zip True
big_zip False
data_file base-ic-activite-residents-2017.xlsx
tab IRIS
first_row 6
api_rest False
md5 2df77900dcd6544a454d39abee9070a9
size 60768390
date_ref 2017-01-01
[4]:
dfraw = download_file("RP_ACTRES_IRIS")
Downloading: 100%|██████████| 58.0M/58.0M [00:02<00:00, 20.8MiB/s]
Extracting: 100%|██████████| 58.0M/58.0M [00:00<00:00, 637MB/s]
Data saved:
/home/onyxia/.cache/pynsee/pynsee/f68f175baadf284aadfd854e462a0cce.parquet
[5]:
# Metadata from insee.fr : https://www.insee.fr/fr/statistiques/4799323#dictionnaire
# P17_ACT1564 : nombre de personnes actives de 15 à 64 ans
# C17_ACT1564 : nombre de personnes actives de 15 à 64 ans
# C17_ACT1564_CS1 : nombre d'agriculteurs exploitants actifs de 15 à 64 ans
# C17_ACT1564_CS2 : nombre d'artisans, commerçants, chefs d'entreprise actifs de 15 à 64 ans
# C17_ACT1564_CS3 : nombre de cadres et professions intellectuelles supérieures actifs de 15 à 64 ans
[6]:
df = dfraw[["IRIS", "COM", "REG", "DEP", "LIBCOM", "C17_ACT1564", "C17_ACT1564_CS2", "C17_ACT1564_CS3"]]
df = df[(df["REG"] == "11") ] #& ~(df["DEP"] == "77")
df["C17_ACT1564_CS2"] = pd.to_numeric(df["C17_ACT1564_CS2"])
df["C17_ACT1564_CS3"] = pd.to_numeric(df["C17_ACT1564_CS3"])
df["C17_ACT1564"] = pd.to_numeric(df["C17_ACT1564"])
df["pct"] = 100 * (df["C17_ACT1564_CS2"] + df["C17_ACT1564_CS3"]) / df["C17_ACT1564"]

df = df.reset_index(drop=True)
df
[6]:
IRIS COM REG DEP LIBCOM C17_ACT1564 C17_ACT1564_CS2 C17_ACT1564_CS3 pct
0 751010101 75101 11 75 Paris 1er Arrondissement 520.643927 29.702144 190.009987 42.200076
1 751010102 75101 11 75 Paris 1er Arrondissement 71.145629 9.899243 36.903044 65.783784
2 751010103 75101 11 75 Paris 1er Arrondissement 107.952527 18.057574 55.539455 68.175365
3 751010104 75101 11 75 Paris 1er Arrondissement 0.000000 0.000000 0.000000 NaN
4 751010105 75101 11 75 Paris 1er Arrondissement 0.000000 0.000000 0.000000 NaN
... ... ... ... ... ... ... ... ... ...
5257 956800112 95680 11 95 Villiers-le-Bel 831.396737 54.511641 35.417896 10.816681
5258 956800113 95680 11 95 Villiers-le-Bel 885.164069 26.592849 32.183605 6.640176
5259 956800114 95680 11 95 Villiers-le-Bel 664.336730 24.113011 11.521560 5.363932
5260 956820000 95682 11 95 Villiers-le-Sec 117.460317 4.894180 24.470899 25.000000
5261 956900000 95690 11 95 Wy-dit-Joli-Village 193.914518 20.062030 49.941941 36.100428

5262 rows × 9 columns

[7]:
import os
import shutil
from pathlib import Path
from urllib.request import Request, urlopen
from contextlib import closing

ign_iris_2022 = "https://data.geopf.fr/telechargement/download/CONTOURS-IRIS/CONTOURS-IRIS_2-1__SHP__FRA_2022-01-01/CONTOURS-IRIS_2-1__SHP__FRA_2022-01-01.7z"

def get_iris_data(url=ign_iris_2022, update=False):
    list_string_split = url.split("/")
    filename = list_string_split[-1]
    list_string_split_filename = filename.split(".")
    filename_clean = list_string_split_filename[0]

    home = Path.home()
    dataDir = os.path.join(home, filename_clean)
    if not os.path.exists(dataDir):
        os.mkdir(dataDir)

    file_iris = os.path.join(dataDir, "file_" + filename_clean)

    req = Request(
        url=url,
        headers={'User-Agent': 'Mozilla/5.0'},
    )

    if not os.path.exists(file_iris) or update:
        with closing(urlopen(req)) as r:
            with open(file_iris, 'wb') as f:
                shutil.copyfileobj(r, f)

    with py7zr.SevenZipFile(file_iris, mode='r') as z:
            z.extractall(dataDir)

    list_file = []
    list_file_size = []

    for root, d_names, f_names in os.walk(dataDir):
        for f in f_names:
            filename = root + "/" + f
            if re.search("CONTOURS-IRIS.shp", filename):
                list_file += [filename]
                list_file_size += [os.path.getsize(filename)]

    list_shp_df = []

    for f in list_file:

        shp = gpd.read_file(f)
        shp = shp.to_crs("EPSG:3857")
        list_shp_df += [shp]

    shpFinal = pd.concat(list_shp_df)

    return shpFinal

# Example call to the function
shapefile = get_iris_data()
ERROR 1: PROJ: proj_create_from_database: Open of /opt/conda/share/proj failed
[8]:
shp = shapefile.sort_values(by = ["INSEE_COM"]).reset_index(drop=True)
[9]:
DF = df.merge(shp[["CODE_IRIS", "geometry"]], how = "left", left_on = "IRIS", right_on = "CODE_IRIS")
[10]:
mapplot = gpd.GeoDataFrame(DF).set_crs("EPSG:3857")

fig, ax = plt.subplots(1,1,figsize=[15,15])
mapplot.plot(column='pct', cmap=cm.jet,
    legend=True, ax=ax, legend_kwds={'shrink': 0.5})
ax.set_axis_off()
ax.set(title='Proportion of upper and middle middle class workers among working population in 2017')
plt.show()
../_images/examples_example_cadre_iris_idf_10_0.png
[ ]: