Upper and middle class workers in Ile de France¶
[1]:
from pynsee import *
import pandas as pd
import geopandas as gpd
from pathlib import Path
from contextlib import closing
import urllib.request as request
import os
import py7zr
import re
import shutil
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import descartes
[2]:
import logging
import sys
logging.basicConfig(stream=sys.stdout,
level=logging.INFO,
format="%(message)s")
[3]:
# to avoid ssl bug, the following is a quick fix
# https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error
#import ssl
#ssl._create_default_https_context = ssl._create_unverified_context
[4]:
meta = get_file_list()
meta[meta.id.str.contains("RP_ACTRES_IRIS")].T.dropna()
pynsee.download's metadata rely on volunteering contributors and their manual updates. get_file_list does not provide data from official Insee's metadata API
Consequently, please report any issue
[4]:
1332 | |
---|---|
id | RP_ACTRES_IRIS |
name | RP_ACTRES_IRIS |
label | Données du Recensement de Population, données ... |
collection | RP |
link | https://www.insee.fr/fr/statistiques/fichier/4... |
type | xlsx |
zip | True |
big_zip | False |
data_file | base-ic-activite-residents-2017.xlsx |
tab | IRIS |
first_row | 6 |
api_rest | False |
md5 | 2df77900dcd6544a454d39abee9070a9 |
size | 60768390 |
date_ref | 2017-01-01 |
[5]:
dfraw = download_file("RP_ACTRES_IRIS")
Downloading: 100%|██████████| 58.0M/58.0M [00:11<00:00, 5.35MiB/s]
Extracting: 100%|██████████| 58.0M/58.0M [00:00<00:00, 594MB/s]
[6]:
# Metadata from insee.fr : https://www.insee.fr/fr/statistiques/4799323#dictionnaire
# P17_ACT1564 : nombre de personnes actives de 15 à 64 ans
# C17_ACT1564 : nombre de personnes actives de 15 à 64 ans
# C17_ACT1564_CS1 : nombre d'agriculteurs exploitants actifs de 15 à 64 ans
# C17_ACT1564_CS2 : nombre d'artisans, commerçants, chefs d'entreprise actifs de 15 à 64 ans
# C17_ACT1564_CS3 : nombre de cadres et professions intellectuelles supérieures actifs de 15 à 64 ans
[7]:
df = dfraw[["IRIS", "COM", "REG", "DEP", "LIBCOM", "C17_ACT1564", "C17_ACT1564_CS2", "C17_ACT1564_CS3"]]
df = df[(df["REG"] == "11") ] #& ~(df["DEP"] == "77")
df["C17_ACT1564_CS2"] = pd.to_numeric(df["C17_ACT1564_CS2"])
df["C17_ACT1564_CS3"] = pd.to_numeric(df["C17_ACT1564_CS3"])
df["C17_ACT1564"] = pd.to_numeric(df["C17_ACT1564"])
df["pct"] = 100 * (df["C17_ACT1564_CS2"] + df["C17_ACT1564_CS3"]) / df["C17_ACT1564"]
df = df.reset_index(drop=True)
df
[7]:
IRIS | COM | REG | DEP | LIBCOM | C17_ACT1564 | C17_ACT1564_CS2 | C17_ACT1564_CS3 | pct | |
---|---|---|---|---|---|---|---|---|---|
0 | 751010101 | 75101 | 11 | 75 | Paris 1er Arrondissement | 520.643927 | 29.702144 | 190.009987 | 42.200076 |
1 | 751010102 | 75101 | 11 | 75 | Paris 1er Arrondissement | 71.145629 | 9.899243 | 36.903044 | 65.783784 |
2 | 751010103 | 75101 | 11 | 75 | Paris 1er Arrondissement | 107.952527 | 18.057574 | 55.539455 | 68.175365 |
3 | 751010104 | 75101 | 11 | 75 | Paris 1er Arrondissement | 0.000000 | 0.000000 | 0.000000 | NaN |
4 | 751010105 | 75101 | 11 | 75 | Paris 1er Arrondissement | 0.000000 | 0.000000 | 0.000000 | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5257 | 956800112 | 95680 | 11 | 95 | Villiers-le-Bel | 831.396737 | 54.511641 | 35.417896 | 10.816681 |
5258 | 956800113 | 95680 | 11 | 95 | Villiers-le-Bel | 885.164069 | 26.592849 | 32.183605 | 6.640176 |
5259 | 956800114 | 95680 | 11 | 95 | Villiers-le-Bel | 664.336730 | 24.113011 | 11.521560 | 5.363932 |
5260 | 956820000 | 95682 | 11 | 95 | Villiers-le-Sec | 117.460317 | 4.894180 | 24.470899 | 25.000000 |
5261 | 956900000 | 95690 | 11 | 95 | Wy-dit-Joli-Village | 193.914518 | 20.062030 | 49.941941 | 36.100428 |
5262 rows × 9 columns
[8]:
# https://geoservices.ign.fr/contoursiris
ign_iris_2022 = "https://wxs.ign.fr/1yhlj2ehpqf3q6dt6a2y7b64/telechargement/inspire/CONTOURS-IRIS-PACK_2022-01$CONTOURS-IRIS_2-1__SHP__FRA_2022-01-01/file/CONTOURS-IRIS_2-1__SHP__FRA_2022-01-01.7z"
ign_iris_2017 = "https://wxs.ign.fr/1yhlj2ehpqf3q6dt6a2y7b64/telechargement/inspire/CONTOURS-IRIS-2017-01-01$CONTOURS-IRIS_2-1__SHP__FRA_2018-06-08/file/CONTOURS-IRIS_2-1__SHP__FRA_2018-06-08.7z"
def get_iris_data(url=ign_iris_2017, update=False):
list_string_split = url.split("/")
filename = list_string_split[len(list_string_split)-1]
list_string_split_filename = filename.split(".")
filename_clean = list_string_split_filename[0]
home = Path.home()
dataDir = str(home) + "/" + filename_clean
if not os.path.exists(dataDir):
os.mkdir(dataDir)
file_iris = dataDir + "/" + "file_" + filename_clean
if (not os.path.exists(file_iris)) or (update):
with closing(request.urlopen(url, timeout=None)) as r:
with open(file_iris, 'wb') as f:
shutil.copyfileobj(r, f)
with py7zr.SevenZipFile(file_iris, mode='r') as z:
z.extractall(dataDir)
list_file = []
list_file_size = []
for root, d_names, f_names in os.walk(dataDir):
for f in f_names:
filename = root + "/" + f
if re.search("CONTOURS-IRIS.shp", filename):
list_file += [filename]
list_file_size += [os.path.getsize(filename)]
list_shp_df = []
for f in list_file:
shp = gpd.read_file(f)
shp = shp.to_crs("EPSG:3857")
list_shp_df += [shp]
shpFinal = pd.concat(list_shp_df)
return shpFinal
shapefile = get_iris_data(update=True)
shp = shapefile.sort_values(by = ["INSEE_COM"]).reset_index(drop=True)
Found credentials in environment variables.
Found credentials in environment variables.
ERROR 1: PROJ: proj_create_from_database: Open of /opt/mamba/share/proj failed
Found credentials in environment variables.
Found credentials in environment variables.
Found credentials in environment variables.
Found credentials in environment variables.
Found credentials in environment variables.
Found credentials in environment variables.
[9]:
DF = df.merge(shp[["CODE_IRIS", "geometry"]], how = "left", left_on = "IRIS", right_on = "CODE_IRIS")
[10]:
mapplot = gpd.GeoDataFrame(DF)
fig, ax = plt.subplots(1,1,figsize=[15,15])
mapplot.plot(column='pct', cmap=cm.jet,
legend=True, ax=ax, legend_kwds={'shrink': 0.5})
ax.set_axis_off()
ax.set(title='Proportion of upper and middle middle class workers among working population in 2017')
plt.show()