Poverty in Marseille with granular data

Import modules

[1]:
import io
import re
import urllib3
import warnings
import zipfile

import geopandas
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
import py7zr
import requests
from shapely.geometry import Polygon, Point

from pynsee.geodata.get_geodata import get_geodata

Retrieve sample dataset from INSEE (filter VAR departement)

[2]:
URL = "https://www.insee.fr/fr/statistiques/fichier/4176290/Filosofi2015_carreaux_200m_csv.zip"

def _download_data_var_departement(url):

    try:
        # Note: the dataset will be cached on your local disk at first download
        return pd.read_parquet("poverty_marseille.parquet")
    except FileNotFoundError:
        pass

    with warnings.catch_warnings():
        warnings.simplefilter(
            "ignore", urllib3.exceptions.InsecureRequestWarning
        )

        results = requests.get(url, verify=False)
    obj = io.BytesIO(results.content)

    with zipfile.ZipFile(obj, "r") as zip_ref:
        sevenfile = zip_ref.namelist()[0]
        obj = io.BytesIO(zip_ref.read(sevenfile))

    with py7zr.SevenZipFile(obj, mode="r") as z:
        file = [
            f for f in z.list() if re.match(".*metropole.csv$", f.filename)
        ]
        file = file[0]
        with z.read([file.filename])[file.filename] as f:
            obj = io.BytesIO(f.read())

    data = pd.read_csv(obj, dtype="str", sep=",")

    # filter data on Var departement
    data13 = (
        data[data.Depcom.str.contains("^13")].reset_index(drop=True).copy()
    )

    data13.to_parquet("poverty_marseille.parquet")

    return data13


data = _download_data_var_departement(URL)

make geographical squares and construct GeoDataFrame

[3]:
# Extract latitude and longitude from data
geoms = data["IdINSPIRE"].str.extractall("^CRS3035RES200mN(?P<lat>[0-9]+)E(?P<lon>[0-9]+)$")

# construct squares using translations
sideLength=200
north_west = geopandas.GeoSeries(geopandas.points_from_xy(x=geoms["lon"], y=geoms["lat"]), crs=3035)
north_east = north_west.translate(xoff=sideLength, yoff=0)
south_east = north_east.translate(xoff=0, yoff=-sideLength)
south_west = south_east.translate(xoff=-sideLength, yoff=0)
geoms = geopandas.pd.concat([north_west, north_east, south_east, south_west], axis=1).apply(Polygon, axis=1)
geoms.name = "geometry"

# join to initial data
data = geopandas.GeoDataFrame(data.join(geoms), crs=3035)

download geographical data from IGN API using pynsee

[4]:
arr = get_geodata(
    "ADMINEXPRESS-COG-CARTO.LATEST:arrondissement_municipal", crs="EPSG:3035"
)

# filter geographical limits of Marseille city
marseille = arr[arr.nom.str.contains("Marseille")]
geoMarseille = marseille.geometry.union_all()
marseilleMap = geopandas.GeoDataFrame(marseille, crs=3035)

compute percentage of poor households for each 200m square

[5]:
# filter dataset on Marseill city

data["check_marseille"] = data.intersects(geoMarseille)
dataMarseille = data[data["check_marseille"]].reset_index(drop=True)

# compute percentage of poor households for each 200m square
cols = ["Men_pauv", "Men"]
dataMarseille[cols] = dataMarseille[cols].apply(pd.to_numeric, errors="coerce")
dataMarseille["Men_pauv_pct"] = (
    dataMarseille["Men_pauv"] / dataMarseille["Men"] * 100
)

make plot

[6]:
# make plot
ax = dataMarseille.plot(
    column="Men_pauv_pct",
    cmap=cm.viridis,
    legend=True,
    legend_kwds={"shrink": 0.5},
    figsize=(15, 15),
)
marseilleMap.plot(ax=ax, edgecolor="red", color="none")
ax.set_axis_off()
plt.suptitle("Poverty in Marseille in 2015", y=0.85, fontweight="bold")
plt.title(
    "percentage of poor households on a 200m-square grid", size=11, x=0.6
)
plt.show()
../_images/examples_example_poverty_marseille_12_0.png