{ "cells": [ { "cell_type": "markdown", "id": "8e79a16a-cda5-4295-bcb2-bbb1bfc8d7ef", "metadata": {}, "source": [ "# INSEE’s premises" ] }, { "cell_type": "code", "execution_count": 1, "id": "97452f4c-771b-4fac-8b9b-31147347546e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "import geopandas\n", "import re\n", "import pandas as pd\n", "\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import matplotlib.patches as mpatches\n", "import matplotlib, descartes\n", "\n", "from pynsee.sirene import search_sirene\n", "from pynsee.geodata import get_geodata_list, get_geodata" ] }, { "cell_type": "code", "execution_count": 2, "id": "980335ee-cb80-4124-9805-123304c03b67", "metadata": {}, "outputs": [], "source": [ "import logging\n", "import sys\n", "logging.basicConfig(stream=sys.stdout,\n", " level=logging.INFO, \n", " format=\"%(message)s\")" ] }, { "cell_type": "markdown", "id": "471a2041-65c6-4742-8624-582689d2f2a7", "metadata": {}, "source": [ "Subscribe to api.insee.fr and get your credentials!\n", "\n", "Save your credentials with init_conn function :\n", "```python\n", "from pynsee.utils import init_conn\n", "init_conn(sirene_key=\"my_sirene_key\")\n", "```\n", "\n", "Beware : any change to the keys should be tested after having cleared the cache \n", "Please do : ``from pynsee.utils import clear_all_cache; clear_all_cache()``" ] }, { "cell_type": "code", "execution_count": 3, "id": "454244e0-3bac-498e-b200-5efc11b45e07", "metadata": {}, "outputs": [], "source": [ "import os\n", "os.environ['sirene_key'] = \"f7345356-8301-4567-b453-568301456723\"" ] }, { "cell_type": "code", "execution_count": 4, "id": "60f0e648-a041-4154-9df7-4304bc054afe", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/acaae682614742b3fc8d1b32c8e57c06.parquet\n", "Creation date: 2025-06-28, today\n", "Set update=True to get the most up-to-date data\n" ] } ], "source": [ "variable = [\"denominationUniteLegale\", 'sigleUniteLegale', 'categorieJuridiqueUniteLegale']\n", "\n", "insee_pattern = \"INSTITUT NATIONAL DE LA STATISTIQUE ET DES ETUDES ECONOMIQUES\"\n", "\n", "pattern = [insee_pattern, 'INSEE', '7120']\n", "# 7120 : Service central d'un ministère\n", "\n", "data = search_sirene(variable = variable, pattern = pattern, kind=\"siret\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "5411b645-e328-492c-a39b-87f5452ab68e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "This function returns data made available by OpenStreetMap and its contributors.\n", "Please comply with Openstreetmap's Copyright and ODbL Licence\n", "INSEE API credentials have not been found: please try to reuse pynsee.init_conn to save them locally.\n", "Otherwise, you can still use environment variables as follow:\n", "\n", "import os\n", "os.environ['sirene_key'] = 'my_sirene_key'\n", "Existing environment variable sirene_key used, instead of locally saved credentials\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Getting location: 0%| | 0/32 [00:00, ?it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/6b05d2e8cf8de48b5a73c9a400c8c4c1.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/b8a82eb66c63b83c98f2e2a4bf25bc11.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/8f30fd18cb688dafa93fe22a82443ade.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/50abc53ac9ba98eabaed84a40aa7ca8e.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/8f614709b2a3e103afda2454a5678ab6.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/bf44d6f7f8d1c27ae4efdb0ae7a239dd.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/31b39d7e833eefa400581c6ba71cb447.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/89a67ed79b14d5183feaae31d4d767c5.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/a1f7fc6de16cbf525641ddc6e13f1b5f.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/5faf9447c506b220c2cba72c480c98e8.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/717dd9025eb4719fe9610d7129df43f0.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/df676ca37c304dddba4f422f8edbbb7e.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/a9a8c3d2c93133cca8d1f469fdd0486f.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/f107a9122f02b785cfe6735b7c1379d6.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/bf29cb16ce2c18b0c3bafb12591fbb06.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/762bbc311d97cf3946f0339fc36b296c.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/5626619af34d1fefe798615ef5a667b7.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/389bc20b0bb00d25ebe79548de02796b.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/0bac0871892bb556c48ff964d34f20fb.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/3655a69ebe64cbeea6344a886fdd8635.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/46117914b0804652787d9b7d72a45bf7.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/33a6a6bca537cf561fc93826f1001a99.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/ae15ee9ca6adea71ba65a5b79b7d8455.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/0257ca28cbbc2dae2ef8f2df77ab8c3c.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/921feff526c6552202bcd6277ff149a7.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/48be8843ad1a68a69116880ce0338f06.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/00a74dfb427c73250ad673aecca2ca60.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/7d1ccd74a8d87908fae0624b8968ad64.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/ed72fdfe535f40fff903f1443090787e.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/7fb87565654627619a206fa33789162b.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/e2f5fbd0576802d9c22f655f502a0843.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/7f2b603e6a7eadc11c89407fbf635dc6.json\n", "Set update=True to get the most up-to-date data\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Getting location: 100%|██████████| 32/32 [00:00<00:00, 1292.07it/s]\n", "Getting location: 0%| | 0/3 [00:00, ?it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/76ce91da17c666e71b9ba4ad4020629d.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/f224a1c88bd3afe1d2def2af095851ab.json\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/952870fe54e1acc305187a4692b985e6.json\n", "Set update=True to get the most up-to-date data\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Getting location: 100%|██████████| 3/3 [00:00<00:00, 1585.35it/s]\n" ] } ], "source": [ "gdf = data.get_location()" ] }, { "cell_type": "code", "execution_count": 6, "id": "e5a6c13d-caf0-43be-a29b-ca6784c78586", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Converting GeoDataFrame to EPSG:3857.\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/f2bbdea5546ef02791b1286f1b6b1a51.parquet\n", "Creation date: 2025-06-28, today\n", "Set update=True to get the most up-to-date data\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Finding departement: 100%|██████████| 32/32 [00:00<00:00, 1051.00it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "NR is missing from code_insee_du_departement column !\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "# move overseas departement closer to metropolitan France\n", "gdf = gdf.transform_overseas().zoom()" ] }, { "cell_type": "code", "execution_count": 7, "id": "44cd4448-7284-4de6-8414-39da87131ba8", "metadata": {}, "outputs": [], "source": [ "for i, row in gdf.iterrows():\n", " gdf.loc[i, 'name'] = str(i + 1) + ' - ' + row.enseigne1Etablissement\n", " gdf.loc[i, 'i'] = i + 1\n", " \n", "# cleaning\n", "gdf = gdf[~gdf.geometry.isnull()]" ] }, { "cell_type": "code", "execution_count": 8, "id": "3b9a82b0-ce07-4ef1-9126-410b8c0237b1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/e879822687bab4b12f23762b5d56742b.parquet\n", "Creation date: 2025-06-28, today\n", "Set update=True to get the most up-to-date data\n", "Previously saved data has been used:\n", "/home/onyxia/.cache/pynsee/pynsee/7a91d3072047b4ad17253bc108ef87cf.parquet\n", "Creation date: 2025-06-28, today\n", "Set update=True to get the most up-to-date data\n", "NR is missing from code_insee_du_departement column !\n" ] }, { "data": { "text/html": [ "
| \n", " | geometry | \n", "cleabs | \n", "nom_officiel | \n", "nom_officiel_en_majuscules | \n", "code_insee | \n", "code_insee_de_la_region | \n", "code_siren | \n", "code_insee_du_departement | \n", "
|---|---|---|---|---|---|---|---|---|
| 0 | \n", "MULTIPOLYGON (((277926.054 5709617.925, 277848... | \n", "DEPARTEM0000000000000063 | \n", "Puy-de-Dôme | \n", "PUY-DE-DOME | \n", "63 | \n", "84 | \n", "226300010 | \n", "63 | \n", "
| 1 | \n", "MULTIPOLYGON (((336295.261 6472777.256, 336428... | \n", "DEPARTEM0000000000000059 | \n", "Nord | \n", "NORD | \n", "59 | \n", "32 | \n", "225900018 | \n", "59 | \n", "
| 2 | \n", "MULTIPOLYGON (((-91403.654 6186431.719, -91543... | \n", "DEPARTEM0000000000000061 | \n", "Orne | \n", "ORNE | \n", "61 | \n", "28 | \n", "226100014 | \n", "61 | \n", "
| 3 | \n", "MULTIPOLYGON (((582085.39 5342294.655, 582092.... | \n", "DEPARTEM0000000000000013 | \n", "Bouches-du-Rhône | \n", "BOUCHES-DU-RHONE | \n", "13 | \n", "93 | \n", "221300015 | \n", "13 | \n", "
| 4 | \n", "MULTIPOLYGON (((259950.061 6258104.764, 260905... | \n", "DEPARTEM0000000000000075 | \n", "Paris | \n", "PARIS | \n", "75 | \n", "11 | \n", "227500055 | \n", "75 | \n", "