Presidential Election 2022

[1]:
from pynsee import get_geodata, get_geodata_list
import requests
import tempfile
import pandas as pd
/usr/local/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
[2]:
import matplotlib.cm as cm
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
import seaborn as sns
[3]:
import logging
import sys
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format="%(message)s")
[4]:
# download data from data.gouv.fr
url = 'https://www.data.gouv.fr/fr/datasets/r/1ffb6125-1cea-4a03-80be-520c1c3a5411'

def download_data(url=url):

    results = requests.get(url)
    dirpath = tempfile.mkdtemp()
    dwnFile = dirpath + '\\fr_presidential_election_1stround_2022.txt'

    with open(dwnFile, 'wb') as f:
        f.write(results.content)
        f.close()

    return dwnFile

dwnFile = download_data()
[5]:
def make_dataframes(dataFile=dwnFile):
    """
    function to make clean dataframes from data.gouv.fr file
        it returns one dataframe containing data about votes by candidate and commune,
        and a dataframe containing communes agregated data
    """

    # import data
    data = pd.read_csv(dataFile, dtype='str', sep=';', encoding='latin-1', skiprows=1, header=None)
    colNames = pd.read_csv(dataFile, dtype='str', sep=';', encoding='latin-1', nrows=1, header=None)
    colNamesCommunes = colNames.iloc[0,:18].to_list()
    colNamesBallots = colNames.iloc[0,19:len(colNames.columns)].to_list()

    # make dataframe only on communes agregated data
    dfCommunes = data.iloc[:,:18]
    dfCommunes.columns = colNamesCommunes

    # make dataframe only with detailed data, candidate by candidate
    icols = [x for x in range(4)] + [x for x in range(19, len(data.columns))]
    dfBallots = data.iloc[:, icols]
    dfBallots.columns = [x for x in range(len(dfBallots.columns))]
    colnamesCandidates = colNamesCommunes[:4] + colNamesBallots

    # reshape detailed data
    list_df = []
    for icandidate in range(1, 12):
        icols = [x for x in range(4)] + [x for x in range(7 * (icandidate - 1) + 4, 7 * icandidate + 4)]
        dfBallotsCandidate = dfBallots.iloc[:, icols]
        dfBallotsCandidate.columns = colnamesCandidates
        list_df += [dfBallotsCandidate]

    dfFinal = pd.concat(list_df).reset_index(drop=True)

    dfFinal['Voix'] = pd.to_numeric(dfFinal['Voix'])

    dfFinal = dfFinal.rename(columns={"Code du département":"dep", "Code de la commune":"com"})
    dfCommunes = dfCommunes.rename(columns={"Code du département":"dep", "Code de la commune":"com"})

    # add clean communes id
    for r in dfCommunes.index:
        dep = str(dfCommunes.loc[r, "dep"])
        comId = str(dfCommunes.loc[r, "com"])
        if dep.startswith('Z'):
            dep = '97'
        dfCommunes.loc[r,"insee_com"] = dep + comId

    dfFinal = dfFinal.merge(dfCommunes, on = ["dep", "com"], how="left")

    return dfFinal, dfCommunes

dfBallots, dfCommunes = make_dataframes()
[6]:
# select only the candidate who has won the most votes for each commune
colList = dfBallots.columns[:4].to_list() + ["insee_com"]
FirstCandidate = dfBallots.loc[dfBallots.reset_index().groupby(colList)['Voix'].idxmax()].reset_index(drop=True)
[7]:
FirstCandidate[:2]
[7]:
dep Libellé du département_x com Libellé de la commune_x N°Panneau Sexe Nom Prénom Voix % Voix/Ins ... % Vot/Ins Blancs % Blancs/Ins % Blancs/Vot Nuls % Nuls/Ins % Nuls/Vot Exprimés % Exp/Ins insee_com
0 01 Ain 001 L'Abergement-Clémenciat 3 M MACRON Emmanuel 150 23,26 ... 83,26 16 2,48 2,98 1 0,16 0,19 520 80,62 01001
1 01 Ain 002 L'Abergement-de-Varey 3 M MACRON Emmanuel 50 23,47 ... 82,16 3 1,41 1,71 1 0,47 0,57 171 80,28 01002

2 rows × 28 columns

[8]:
# download geographical limits of communes from IGN, and merge it with
IGNCom = get_geodata('ADMINEXPRESS-COG-CARTO.LATEST:commune').transform_overseas().zoom()
IGNCom.loc[lambda x: x['code_insee'].str.contains('^75')]
Previously saved data has been used:
/home/onyxia/.cache/pynsee/pynsee/6cb5dc200198fcdb00222fe8b8f7945b.parquet
Creation date: 2025-06-28, today
Set update=True to get the most up-to-date data
Previously saved data has been used:
/home/onyxia/.cache/pynsee/pynsee/f2bbdea5546ef02791b1286f1b6b1a51.parquet
Creation date: 2025-06-28, today
Set update=True to get the most up-to-date data
[8]:
geometry cleabs nom_officiel nom_officiel_en_majuscules statut code_insee population date_du_recensement organisme_recenseur code_insee_du_canton code_insee_de_l_arrondissement code_insee_du_departement code_insee_de_la_region code_siren codes_siren_des_epci code_postal superficie_cadastrale
19308 MULTIPOLYGON (((259950.061 6258104.764, 260905... COMMUNE_0000000000075056 Paris PARIS Capitale d'Etat 75056 2113705 2022-01-01Z INSEE NR 751 75 11 217500016 200054781 None 10540
34877 MULTIPOLYGON (((-366033.08 6599847.402, -36412... COMMUNE_0000000000075056 Paris PARIS Capitale d'Etat 75056 2113705 2022-01-01Z INSEE NR 751 75 11 217500016 200054781 None 10540
[9]:
FirstCandidate = FirstCandidate.rename(columns = {'insee_com': 'code_insee'})[['code_insee', 'Nom']]
com = IGNCom.merge(FirstCandidate, how='left', on ='code_insee')
[10]:
# make color palette
set1 = sns.color_palette("Set1")
colors = [sns.color_palette("Paired")[4]] + [set1[2], set1[1]] + set1[3:5] + [set1[0]] + set1[6:len(set1)]
cmap = ListedColormap(colors)
[11]:
fig, ax = plt.subplots(1,1,figsize=[15,15])
com.plot(column='Nom', legend=True, ax=ax, cmap=cmap)
plt.suptitle('The results of the first round of the 2022 presidential election by commune', x = 0.3, y = 0.85,
            horizontalalignment= 'left',
            fontsize = 12, fontweight='bold')
plt.show()
../_images/examples_example_presidential_election2022_11_0.png