{ "cells": [ { "cell_type": "markdown", "id": "69cd2732-2098-47ff-844e-e8177afa3b6e", "metadata": {}, "source": [ "# Presidential Election 2022" ] }, { "cell_type": "code", "execution_count": 1, "id": "588391a7-1eb2-4122-94f4-7a7ba54fdd17", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "from pynsee import get_geodata, get_geodata_list\n", "import requests\n", "import tempfile\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "id": "d34efd86-120a-4f9d-abf1-5ada86659f79", "metadata": {}, "outputs": [], "source": [ "import matplotlib.cm as cm\n", "from matplotlib.colors import ListedColormap\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 3, "id": "c7338ae4-48c4-4d80-a698-d4f64f772515", "metadata": {}, "outputs": [], "source": [ "import logging\n", "import sys\n", "logging.basicConfig(stream=sys.stdout,\n", " level=logging.INFO, \n", " format=\"%(message)s\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "c87edfad-f6b0-44ec-85c7-d7752bb9a9a4", "metadata": {}, "outputs": [], "source": [ "# download data from data.gouv.fr\n", "url = 'https://www.data.gouv.fr/fr/datasets/r/1ffb6125-1cea-4a03-80be-520c1c3a5411'\n", "\n", "def download_data(url=url):\n", " \n", " results = requests.get(url)\n", " dirpath = tempfile.mkdtemp()\n", " dwnFile = dirpath + '\\\\fr_presidential_election_1stround_2022.txt'\n", "\n", " with open(dwnFile, 'wb') as f:\n", " f.write(results.content)\n", " f.close()\n", " \n", " return dwnFile\n", "\n", "dwnFile = download_data()" ] }, { "cell_type": "code", "execution_count": 5, "id": "525bc99c-ce24-40f0-a3ae-9d6b35ada53b", "metadata": {}, "outputs": [], "source": [ "def make_dataframes(dataFile=dwnFile):\n", " \"\"\"\n", " function to make clean dataframes from data.gouv.fr file\n", " it returns one dataframe containing data about votes by candidate and commune, \n", " and a dataframe containing communes agregated data\n", " \"\"\" \n", " \n", " # import data\n", " data = pd.read_csv(dataFile, dtype='str', sep=';', encoding='latin-1', skiprows=1, header=None)\n", " colNames = pd.read_csv(dataFile, dtype='str', sep=';', encoding='latin-1', nrows=1, header=None)\n", " colNamesCommunes = colNames.iloc[0,:18].to_list()\n", " colNamesBallots = colNames.iloc[0,19:len(colNames.columns)].to_list()\n", " \n", " # make dataframe only on communes agregated data\n", " dfCommunes = data.iloc[:,:18]\n", " dfCommunes.columns = colNamesCommunes\n", " \n", " # make dataframe only with detailed data, candidate by candidate\n", " icols = [x for x in range(4)] + [x for x in range(19, len(data.columns))]\n", " dfBallots = data.iloc[:, icols]\n", " dfBallots.columns = [x for x in range(len(dfBallots.columns))]\n", " colnamesCandidates = colNamesCommunes[:4] + colNamesBallots\n", " \n", " # reshape detailed data\n", " list_df = []\n", " for icandidate in range(1, 12):\n", " icols = [x for x in range(4)] + [x for x in range(7 * (icandidate - 1) + 4, 7 * icandidate + 4)]\n", " dfBallotsCandidate = dfBallots.iloc[:, icols]\n", " dfBallotsCandidate.columns = colnamesCandidates\n", " list_df += [dfBallotsCandidate]\n", "\n", " dfFinal = pd.concat(list_df).reset_index(drop=True)\n", "\n", " dfFinal['Voix'] = pd.to_numeric(dfFinal['Voix'])\n", " \n", " dfFinal = dfFinal.rename(columns={\"Code du département\":\"dep\", \"Code de la commune\":\"com\"})\n", " dfCommunes = dfCommunes.rename(columns={\"Code du département\":\"dep\", \"Code de la commune\":\"com\"})\n", " \n", " # add clean communes id \n", " for r in dfCommunes.index:\n", " dep = str(dfCommunes.loc[r, \"dep\"])\n", " comId = str(dfCommunes.loc[r, \"com\"])\n", " if dep.startswith('Z'): \n", " dep = '97'\n", " dfCommunes.loc[r,\"insee_com\"] = dep + comId\n", " \n", " dfFinal = dfFinal.merge(dfCommunes, on = [\"dep\", \"com\"], how=\"left\")\n", " \n", " return dfFinal, dfCommunes\n", "\n", "dfBallots, dfCommunes = make_dataframes()" ] }, { "cell_type": "code", "execution_count": 6, "id": "0f146d95-455f-4715-b0dc-bd8e787c6783", "metadata": {}, "outputs": [], "source": [ "# select only the candidate who has won the most votes for each commune\n", "colList = dfBallots.columns[:4].to_list() + [\"insee_com\"]\n", "FirstCandidate = dfBallots.loc[dfBallots.reset_index().groupby(colList)['Voix'].idxmax()].reset_index(drop=True) " ] }, { "cell_type": "code", "execution_count": 7, "id": "38c28af2-98eb-4242-ab12-92b1e502a72f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | dep | \n", "Libellé du département_x | \n", "com | \n", "Libellé de la commune_x | \n", "N°Panneau | \n", "Sexe | \n", "Nom | \n", "Prénom | \n", "Voix | \n", "% Voix/Ins | \n", "... | \n", "% Vot/Ins | \n", "Blancs | \n", "% Blancs/Ins | \n", "% Blancs/Vot | \n", "Nuls | \n", "% Nuls/Ins | \n", "% Nuls/Vot | \n", "Exprimés | \n", "% Exp/Ins | \n", "insee_com | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "01 | \n", "Ain | \n", "001 | \n", "L'Abergement-Clémenciat | \n", "3 | \n", "M | \n", "MACRON | \n", "Emmanuel | \n", "150 | \n", "23,26 | \n", "... | \n", "83,26 | \n", "16 | \n", "2,48 | \n", "2,98 | \n", "1 | \n", "0,16 | \n", "0,19 | \n", "520 | \n", "80,62 | \n", "01001 | \n", "
| 1 | \n", "01 | \n", "Ain | \n", "002 | \n", "L'Abergement-de-Varey | \n", "3 | \n", "M | \n", "MACRON | \n", "Emmanuel | \n", "50 | \n", "23,47 | \n", "... | \n", "82,16 | \n", "3 | \n", "1,41 | \n", "1,71 | \n", "1 | \n", "0,47 | \n", "0,57 | \n", "171 | \n", "80,28 | \n", "01002 | \n", "
2 rows × 28 columns
\n", "| \n", " | geometry | \n", "cleabs | \n", "nom_officiel | \n", "nom_officiel_en_majuscules | \n", "statut | \n", "code_insee | \n", "population | \n", "date_du_recensement | \n", "organisme_recenseur | \n", "code_insee_du_canton | \n", "code_insee_de_l_arrondissement | \n", "code_insee_du_departement | \n", "code_insee_de_la_region | \n", "code_siren | \n", "codes_siren_des_epci | \n", "code_postal | \n", "superficie_cadastrale | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 19308 | \n", "MULTIPOLYGON (((259950.061 6258104.764, 260905... | \n", "COMMUNE_0000000000075056 | \n", "Paris | \n", "PARIS | \n", "Capitale d'Etat | \n", "75056 | \n", "2113705 | \n", "2022-01-01Z | \n", "INSEE | \n", "NR | \n", "751 | \n", "75 | \n", "11 | \n", "217500016 | \n", "200054781 | \n", "None | \n", "10540 | \n", "
| 34877 | \n", "MULTIPOLYGON (((-366033.08 6599847.402, -36412... | \n", "COMMUNE_0000000000075056 | \n", "Paris | \n", "PARIS | \n", "Capitale d'Etat | \n", "75056 | \n", "2113705 | \n", "2022-01-01Z | \n", "INSEE | \n", "NR | \n", "751 | \n", "75 | \n", "11 | \n", "217500016 | \n", "200054781 | \n", "None | \n", "10540 | \n", "