Upper and middle class workers in Ile de France

[1]:
from pynsee import *

import pandas as pd
import geopandas as gpd
from pathlib import Path
from contextlib import closing
import urllib.request as request
import os
import py7zr
import re
import shutil

import matplotlib.cm as cm
import matplotlib.pyplot as plt
import descartes
[2]:
import logging
import sys
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format="%(message)s")
[3]:
# to avoid ssl bug, the following is a quick fix
# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error
#import ssl
#ssl._create_default_https_context = ssl._create_unverified_context
[4]:
meta = get_file_list()
meta[meta.id.str.contains("RP_ACTRES_IRIS")].T.dropna()
pynsee.download's metadata rely on volunteering contributors and their manual updates. get_file_list does not provide data from official Insee's metadata API
Consequently, please report any issue
[4]:
1332
id RP_ACTRES_IRIS
name RP_ACTRES_IRIS
label Données du Recensement de Population, données ...
collection RP
link https://www.insee.fr/fr/statistiques/fichier/4...
type xlsx
zip True
big_zip False
data_file base-ic-activite-residents-2017.xlsx
tab IRIS
first_row 6
api_rest False
md5 2df77900dcd6544a454d39abee9070a9
size 60768390
date_ref 2017-01-01
[5]:
dfraw = download_file("RP_ACTRES_IRIS")
Downloading: 100%|██████████| 58.0M/58.0M [00:11<00:00, 5.35MiB/s]
Extracting: 100%|██████████| 58.0M/58.0M [00:00<00:00, 594MB/s]
[6]:
# Metadata from insee.fr : https://www.insee.fr/fr/statistiques/4799323#dictionnaire
# P17_ACT1564 : nombre de personnes actives de 15 à 64 ans
# C17_ACT1564 : nombre de personnes actives de 15 à 64 ans
# C17_ACT1564_CS1 : nombre d'agriculteurs exploitants actifs de 15 à 64 ans
# C17_ACT1564_CS2 : nombre d'artisans, commerçants, chefs d'entreprise actifs de 15 à 64 ans
# C17_ACT1564_CS3 : nombre de cadres et professions intellectuelles supérieures actifs de 15 à 64 ans
[7]:
df = dfraw[["IRIS", "COM", "REG", "DEP", "LIBCOM", "C17_ACT1564", "C17_ACT1564_CS2", "C17_ACT1564_CS3"]]
df = df[(df["REG"] == "11") ] #& ~(df["DEP"] == "77")
df["C17_ACT1564_CS2"] = pd.to_numeric(df["C17_ACT1564_CS2"])
df["C17_ACT1564_CS3"] = pd.to_numeric(df["C17_ACT1564_CS3"])
df["C17_ACT1564"] = pd.to_numeric(df["C17_ACT1564"])
df["pct"] = 100 * (df["C17_ACT1564_CS2"] + df["C17_ACT1564_CS3"]) / df["C17_ACT1564"]

df = df.reset_index(drop=True)
df
[7]:
IRIS COM REG DEP LIBCOM C17_ACT1564 C17_ACT1564_CS2 C17_ACT1564_CS3 pct
0 751010101 75101 11 75 Paris 1er Arrondissement 520.643927 29.702144 190.009987 42.200076
1 751010102 75101 11 75 Paris 1er Arrondissement 71.145629 9.899243 36.903044 65.783784
2 751010103 75101 11 75 Paris 1er Arrondissement 107.952527 18.057574 55.539455 68.175365
3 751010104 75101 11 75 Paris 1er Arrondissement 0.000000 0.000000 0.000000 NaN
4 751010105 75101 11 75 Paris 1er Arrondissement 0.000000 0.000000 0.000000 NaN
... ... ... ... ... ... ... ... ... ...
5257 956800112 95680 11 95 Villiers-le-Bel 831.396737 54.511641 35.417896 10.816681
5258 956800113 95680 11 95 Villiers-le-Bel 885.164069 26.592849 32.183605 6.640176
5259 956800114 95680 11 95 Villiers-le-Bel 664.336730 24.113011 11.521560 5.363932
5260 956820000 95682 11 95 Villiers-le-Sec 117.460317 4.894180 24.470899 25.000000
5261 956900000 95690 11 95 Wy-dit-Joli-Village 193.914518 20.062030 49.941941 36.100428

5262 rows × 9 columns

[8]:
# https://geoservices.ign.fr/contoursiris
ign_iris_2022 = "https://wxs.ign.fr/1yhlj2ehpqf3q6dt6a2y7b64/telechargement/inspire/CONTOURS-IRIS-PACK_2022-01$CONTOURS-IRIS_2-1__SHP__FRA_2022-01-01/file/CONTOURS-IRIS_2-1__SHP__FRA_2022-01-01.7z"
ign_iris_2017 = "https://wxs.ign.fr/1yhlj2ehpqf3q6dt6a2y7b64/telechargement/inspire/CONTOURS-IRIS-2017-01-01$CONTOURS-IRIS_2-1__SHP__FRA_2018-06-08/file/CONTOURS-IRIS_2-1__SHP__FRA_2018-06-08.7z"

def get_iris_data(url=ign_iris_2017, update=False):

    list_string_split = url.split("/")
    filename = list_string_split[len(list_string_split)-1]
    list_string_split_filename = filename.split(".")
    filename_clean = list_string_split_filename[0]

    home = Path.home()
    dataDir = str(home) + "/" + filename_clean
    if not os.path.exists(dataDir):
        os.mkdir(dataDir)

    file_iris = dataDir + "/" + "file_" + filename_clean

    if (not os.path.exists(file_iris)) or (update):
        with closing(request.urlopen(url, timeout=None)) as r:
            with open(file_iris, 'wb') as f:
                shutil.copyfileobj(r, f)

    with py7zr.SevenZipFile(file_iris, mode='r') as z:
            z.extractall(dataDir)

    list_file = []
    list_file_size = []

    for root, d_names, f_names in os.walk(dataDir):
        for f in f_names:
            filename = root + "/" + f
            if re.search("CONTOURS-IRIS.shp", filename):
                list_file += [filename]
                list_file_size += [os.path.getsize(filename)]

    list_shp_df = []

    for f in list_file:

        shp = gpd.read_file(f)
        shp = shp.to_crs("EPSG:3857")
        list_shp_df += [shp]

    shpFinal = pd.concat(list_shp_df)

    return shpFinal

shapefile = get_iris_data(update=True)

shp = shapefile.sort_values(by = ["INSEE_COM"]).reset_index(drop=True)
Found credentials in environment variables.
Found credentials in environment variables.
ERROR 1: PROJ: proj_create_from_database: Open of /opt/mamba/share/proj failed
Found credentials in environment variables.
Found credentials in environment variables.
Found credentials in environment variables.
Found credentials in environment variables.
Found credentials in environment variables.
Found credentials in environment variables.
[9]:
DF = df.merge(shp[["CODE_IRIS", "geometry"]], how = "left", left_on = "IRIS", right_on = "CODE_IRIS")
[10]:
mapplot = gpd.GeoDataFrame(DF)

fig, ax = plt.subplots(1,1,figsize=[15,15])
mapplot.plot(column='pct', cmap=cm.jet,
    legend=True, ax=ax, legend_kwds={'shrink': 0.5})
ax.set_axis_off()
ax.set(title='Proportion of upper and middle middle class workers among working population in 2017')
plt.show()
../_images/examples_example_cadre_iris_idf_10_0.png