drop duplicates in prepare_hotmaps_database
This commit is contained in:
parent
1fc83b793d
commit
8d09e38627
@ -93,6 +93,20 @@ def prepare_hotmaps_database(regions):
|
||||
gdf.rename(columns={"index_right": "bus"}, inplace=True)
|
||||
gdf["country"] = gdf.bus.str[:2]
|
||||
|
||||
# the .sjoin can lead to duplicates if a geom is in two regions
|
||||
if gdf.index.duplicated().any():
|
||||
import pycountry
|
||||
# get all duplicated entries
|
||||
duplicated_i = gdf.index[gdf.index.duplicated()]
|
||||
# convert from raw data country name to iso-2-code
|
||||
s = df.loc[duplicated_i, "Country"].apply(lambda x: pycountry.countries.lookup(x).alpha_2)
|
||||
# Get a boolean mask where gdf's country column matches s's values for the same index
|
||||
mask = gdf['country'] == gdf.index.map(s)
|
||||
# Filter gdf using the mask
|
||||
gdf_filtered = gdf[mask]
|
||||
# concat not duplicated and filtered gdf
|
||||
gdf = pd.concat([gdf.drop(duplicated_i), gdf_filtered]).sort_index()
|
||||
|
||||
return gdf
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user