Automotive industry sites

[1]:
import pandas as pd
import numpy as np
from shapely.ops import transform

#%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.offsetbox import AnchoredText

from pynsee import *
/opt/python/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
[2]:
# get activity list
naf5 = get_activity_list('NAF5')

# search data in SIRENE database
data = search_sirene(variable="activitePrincipaleEtablissement",
                     pattern='29.10Z', kind='siret', number=2000)
[3]:
data.to_csv('sirene.csv')
[4]:
data.head()
[4]:
siren nic siret dateDebut dateCreationEtablissement dateCreationUniteLegale dateFin denominationUniteLegale nomUniteLegale prenomUsuelUniteLegale ... libelleVoie2Etablissement codePostal2Etablissement libelleCommune2Etablissement libelleCommuneEtranger2Etablissement distributionSpeciale2Etablissement codeCommune2Etablissement codeCedex2Etablissement libelleCedex2Etablissement codePaysEtranger2Etablissement libellePaysEtranger2Etablissement
0 006580195 00011 00658019500011 2008-01-01 1965-01-01 1965-01-01 None SOCIETE INDUSTRIELLE POUR LE DEVELOPPEMENT DE ... None None ... None None None None None None None None None None
1 085520195 00109 08552019500109 2022-09-01 2022-06-30 1955-01-01 None SAFRA None None ... None None None None None None None None None None
2 301692307 00020 30169230700020 2020-12-31 2014-08-15 1965-01-01 None GARAGE BEYRIS None None ... None None None None None None None None None None
3 302279229 00025 30227922900025 2014-07-25 1900-01-01 1975-01-01 None RAPIDO None None ... None None None None None None None None None None
4 302279229 00033 30227922900033 2008-01-01 1996-01-01 1975-01-01 None RAPIDO None None ... None None None None None None None None None None

5 rows × 96 columns

[5]:
# keep only businesses with more then 100 employees
df = data.loc[data['effectifsMinEtablissement'] > 100]
df = df.reset_index(drop=True)
# find latitude and longitude of all businesses
gdf = df.get_location().sort_values(by=['effectifsMinEtablissement'], ascending=False)
Existing environment variable sirene_key used, instead of locally saved credentials
Getting location: 100%|██████████| 49/49 [00:00<00:00, 16367.04it/s]
Getting location: 100%|██████████| 8/8 [00:00<00:00, 7339.11it/s]
[6]:
# get map - departement limits
geodataList =  get_geodata_list()
mapdep = get_geodata('ADMINEXPRESS-COG-CARTO.LATEST:departement', update=True)
mapdep = mapdep.transform_overseas()
mapdep.head()
NR is missing from code_insee_du_departement column !
[6]:
geometry cleabs nom_officiel nom_officiel_en_majuscules code_insee code_insee_de_la_region code_siren code_insee_du_departement
0 MULTIPOLYGON (((277926.054 5709617.925, 277848... DEPARTEM0000000000000063 Puy-de-Dôme PUY-DE-DOME 63 84 226300010 63
1 MULTIPOLYGON (((336295.261 6472777.256, 336428... DEPARTEM0000000000000059 Nord NORD 59 32 225900018 59
2 MULTIPOLYGON (((-91403.654 6186431.719, -91543... DEPARTEM0000000000000061 Orne ORNE 61 28 226100014 61
3 MULTIPOLYGON (((582085.39 5342294.655, 582092.... DEPARTEM0000000000000013 Bouches-du-Rhône BOUCHES-DU-RHONE 13 93 221300015 13
4 MULTIPOLYGON (((259950.061 6258104.764, 260905... DEPARTEM0000000000000075 Paris PARIS 75 11 227500055 75
[7]:
# make cleaned labels
match_list = ['RENAULT SAS', 'ALPINE', 'BATILLY', 'MAUBEUGE CONSTRUCTION',
              'TOYOTA', 'STELLANTIS AUTO SAS', 'RENAULT TRUCKS']
other_string_list = '|'.join(['[^' + x + ']' for x in match_list])

conditions = [gdf['denominationUniteLegale'].str.contains(x) for x in match_list]
conditions += [gdf['denominationUniteLegale'].str.contains(other_string_list)]

values = ['RENAULT SAS', 'RENAULT SAS', 'RENAULT SAS', 'RENAULT SAS',
          'TOYOTA', 'STELLANTIS AUTO SAS', 'RENAULT TRUCKS', 'OTHER']

gdf['label'] = np.select(conditions, values, default="OTHER")
[8]:
# change projection
gdf = gdf.to_crs('EPSG:3857')
# annotation
txt = 'Circles are proportionate to the minimum of the employee number range'
txt += '\nIf headcount is missing in SIRENE database, some factories may not be displayed'

#plot
ax = mapdep.plot(color='white', edgecolor='black', figsize = (15,7))
plt.title('Automotive industry sites in France')
gdf.plot(ax=ax,
         column = 'label',
         edgecolor='white',
         markersize=gdf.effectifsMinEtablissement/5,
         legend=True,
         legend_kwds={'bbox_to_anchor': (1.1,1),
                       'loc':1, 'borderaxespad': 0})

at = AnchoredText(txt, prop=dict(size=9), frameon=True, loc='lower left')
ax.add_artist(at)
ax.set_axis_off()
plt.show()
../_images/examples_example_automotive_industry_sirene_8_0.png