From f4d821ad2ad5709910988d05b2ff1867bc39dee3 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Tue, 22 Aug 2023 14:22:25 +0200 Subject: [PATCH] use country_converter instead of pycountry --- scripts/build_industrial_distribution_key.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/scripts/build_industrial_distribution_key.py b/scripts/build_industrial_distribution_key.py index 3c62ca1b..25d0235a 100644 --- a/scripts/build_industrial_distribution_key.py +++ b/scripts/build_industrial_distribution_key.py @@ -93,22 +93,16 @@ def prepare_hotmaps_database(regions): gdf.rename(columns={"index_right": "bus"}, inplace=True) gdf["country"] = gdf.bus.str[:2] - # the .sjoin can lead to duplicates if a geom is in two regions + # the .sjoin can lead to duplicates if a geom is in two overlapping regions if gdf.index.duplicated().any(): - import pycountry - # get all duplicated entries duplicated_i = gdf.index[gdf.index.duplicated()] # convert from raw data country name to iso-2-code - s = df.loc[duplicated_i, "Country"].apply( - lambda x: pycountry.countries.lookup(x).alpha_2 - ) - # Get a boolean mask where gdf's country column matches s's values for the same index - mask = gdf["country"] == gdf.index.map(s) - # Filter gdf using the mask - gdf_filtered = gdf[mask] + code = cc.convert(gdf.loc[duplicated_i, "Country"], to="iso2") + # screen out malformed country allocation + gdf_filtered = gdf.loc[duplicated_i].query("country == @code") # concat not duplicated and filtered gdf - gdf = pd.concat([gdf.drop(duplicated_i), gdf_filtered]).sort_index() + gdf = pd.concat([gdf.drop(duplicated_i), gdf_filtered]) return gdf