From f0937e203bd71039b1265f249128fa25235ea6a7 Mon Sep 17 00:00:00 2001 From: Tom Brown Date: Mon, 12 Oct 2020 12:07:49 +0200 Subject: [PATCH] industry: separate code for distribution key from nodal production This allows us to reuse the key for today's nodal energy demand from industry. --- Snakefile | 17 +- scripts/build_industrial_distribution_key.py | 153 ++++++++++++++++++ .../build_industrial_production_per_node.py | 147 ++--------------- 3 files changed, 178 insertions(+), 139 deletions(-) create mode 100644 scripts/build_industrial_distribution_key.py diff --git a/Snakefile b/Snakefile index a55a5cca..d55a2853 100644 --- a/Snakefile +++ b/Snakefile @@ -199,12 +199,25 @@ rule build_industrial_production_per_country_tomorrow: script: 'scripts/build_industrial_production_per_country_tomorrow.py' -rule build_industrial_production_per_node: + + +rule build_industrial_distribution_key: input: clustered_pop_layout="resources/pop_layout_{network}_s{simpl}_{clusters}.csv", europe_shape=pypsaeur('resources/europe_shape.geojson'), hotmaps_industrial_database="data/Industrial_Database.csv", - network=pypsaeur('networks/{network}_s{simpl}_{clusters}.nc'), + network=pypsaeur('networks/{network}_s{simpl}_{clusters}.nc') + output: + industrial_distribution_key="resources/industrial_distribution_key_{network}_s{simpl}_{clusters}.csv" + threads: 1 + resources: mem_mb=1000 + script: 'scripts/build_industrial_distribution_key.py' + + + +rule build_industrial_production_per_node: + input: + industrial_distribution_key="resources/industrial_distribution_key_{network}_s{simpl}_{clusters}.csv", industrial_production_per_country_tomorrow="resources/industrial_production_per_country_tomorrow.csv" output: industrial_production_per_node="resources/industrial_production_{network}_s{simpl}_{clusters}.csv" diff --git a/scripts/build_industrial_distribution_key.py b/scripts/build_industrial_distribution_key.py new file mode 100644 index 00000000..c5d55a2c --- /dev/null +++ b/scripts/build_industrial_distribution_key.py @@ -0,0 +1,153 @@ + +import pypsa +import pandas as pd +import geopandas as gpd +from shapely import wkt, prepared +from scipy.spatial import cKDTree as KDTree + + +def prepare_hotmaps_database(): + + df = pd.read_csv(snakemake.input.hotmaps_industrial_database, + sep=";", + index_col=0) + + #remove those sites without valid geometries + df.drop(df.index[df.geom.isna()], + inplace=True) + + #parse geometry + #https://geopandas.org/gallery/create_geopandas_from_pandas.html?highlight=parse#from-wkt-format + df["Coordinates"] = df.geom.apply(lambda x : wkt.loads(x[x.find(";POINT")+1:])) + + gdf = gpd.GeoDataFrame(df, geometry='Coordinates') + + europe_shape = gpd.read_file(snakemake.input.europe_shape).loc[0, 'geometry'] + europe_shape_prepped = prepared.prep(europe_shape) + not_in_europe = gdf.index[~gdf.geometry.apply(europe_shape_prepped.contains)] + print("Removing the following industrial facilities since they are not in European area:") + print(gdf.loc[not_in_europe]) + gdf.drop(not_in_europe, + inplace=True) + + country_to_code = { + 'Belgium' : 'BE', + 'Bulgaria' : 'BG', + 'Czech Republic' : 'CZ', + 'Denmark' : 'DK', + 'Germany' : 'DE', + 'Estonia' : 'EE', + 'Ireland' : 'IE', + 'Greece' : 'GR', + 'Spain' : 'ES', + 'France' : 'FR', + 'Croatia' : 'HR', + 'Italy' : 'IT', + 'Cyprus' : 'CY', + 'Latvia' : 'LV', + 'Lithuania' : 'LT', + 'Luxembourg' : 'LU', + 'Hungary' : 'HU', + 'Malta' : 'MA', + 'Netherland' : 'NL', + 'Austria' : 'AT', + 'Poland' : 'PL', + 'Portugal' : 'PT', + 'Romania' : 'RO', + 'Slovenia' : 'SI', + 'Slovakia' : 'SK', + 'Finland' : 'FI', + 'Sweden' : 'SE', + 'United Kingdom' : 'GB', + 'Iceland' : 'IS', + 'Norway' : 'NO', + 'Montenegro' : 'ME', + 'FYR of Macedonia' : 'MK', + 'Albania' : 'AL', + 'Serbia' : 'RS', + 'Turkey' : 'TU', + 'Bosnia and Herzegovina' : 'BA', + 'Switzerland' : 'CH', + 'Liechtenstein' : 'AT', + } + gdf["country_code"] = gdf.Country.map(country_to_code) + + if gdf["country_code"].isna().any(): + print("Warning, some countries not assigned an ISO code") + + gdf["x"] = gdf.geometry.x + gdf["y"] = gdf.geometry.y + + return gdf + + +def assign_buses(gdf): + + gdf["bus"] = "" + + for c in n.buses.country.unique(): + buses_i = n.buses.index[n.buses.country == c] + kdtree = KDTree(n.buses.loc[buses_i, ['x','y']].values) + + industry_i = gdf.index[(gdf.country_code == c)] + + if industry_i.empty: + print("Skipping country with no industry:",c) + else: + tree_i = kdtree.query(gdf.loc[industry_i, ['x','y']].values)[1] + gdf.loc[industry_i, 'bus'] = buses_i[tree_i] + + if (gdf.bus == "").any(): + print("Some industrial facilities have empty buses") + if gdf.bus.isna().any(): + print("Some industrial facilities have NaN buses") + + +def build_nodal_distribution_key(gdf): + + sectors = ['Iron and steel','Chemical industry','Cement','Non-metallic mineral products','Glass','Paper and printing','Non-ferrous metals'] + + distribution_keys = pd.DataFrame(index=n.buses.index, + columns=sectors, + dtype=float) + + pop_layout = pd.read_csv(snakemake.input.clustered_pop_layout,index_col=0) + pop_layout["ct"] = pop_layout.index.str[:2] + ct_total = pop_layout.total.groupby(pop_layout["ct"]).sum() + pop_layout["ct_total"] = pop_layout["ct"].map(ct_total) + distribution_keys["population"] = pop_layout["total"]/pop_layout["ct_total"] + + for c in n.buses.country.unique(): + buses = n.buses.index[n.buses.country == c] + for sector in sectors: + facilities = gdf.index[(gdf.country_code == c) & (gdf.Subsector == sector)] + if not facilities.empty: + emissions = gdf.loc[facilities,"Emissions_ETS_2014"] + if emissions.sum() == 0: + distribution_key = pd.Series(1/len(facilities), + facilities) + else: + #BEWARE: this is a strong assumption + emissions = emissions.fillna(emissions.mean()) + distribution_key = emissions/emissions.sum() + distribution_key = distribution_key.groupby(gdf.loc[facilities,"bus"]).sum().reindex(buses,fill_value=0.) + else: + distribution_key = distribution_keys.loc[buses,"population"] + + if abs(distribution_key.sum() - 1) > 1e-4: + print(c,sector,distribution_key) + + distribution_keys.loc[buses,sector] = distribution_key + + distribution_keys.to_csv(snakemake.output.industrial_distribution_key) + +if __name__ == "__main__": + + + n = pypsa.Network(snakemake.input.network) + + hotmaps_database = prepare_hotmaps_database() + + assign_buses(hotmaps_database) + + build_nodal_distribution_key(hotmaps_database) diff --git a/scripts/build_industrial_production_per_node.py b/scripts/build_industrial_production_per_node.py index f8efdfd3..9e56e49a 100644 --- a/scripts/build_industrial_production_per_node.py +++ b/scripts/build_industrial_production_per_node.py @@ -1,123 +1,19 @@ -import pypsa import pandas as pd -import geopandas as gpd -from shapely import wkt, prepared -from scipy.spatial import cKDTree as KDTree - -def prepare_hotmaps_database(): - - df = pd.read_csv(snakemake.input.hotmaps_industrial_database, - sep=";", - index_col=0) - - #remove those sites without valid geometries - df.drop(df.index[df.geom.isna()], - inplace=True) - - #parse geometry - #https://geopandas.org/gallery/create_geopandas_from_pandas.html?highlight=parse#from-wkt-format - df["Coordinates"] = df.geom.apply(lambda x : wkt.loads(x[x.find(";POINT")+1:])) - - gdf = gpd.GeoDataFrame(df, geometry='Coordinates') - - europe_shape = gpd.read_file(snakemake.input.europe_shape).loc[0, 'geometry'] - europe_shape_prepped = prepared.prep(europe_shape) - not_in_europe = gdf.index[~gdf.geometry.apply(europe_shape_prepped.contains)] - print("Removing the following industrial facilities since they are not in European area:") - print(gdf.loc[not_in_europe]) - gdf.drop(not_in_europe, - inplace=True) - - country_to_code = { - 'Belgium' : 'BE', - 'Bulgaria' : 'BG', - 'Czech Republic' : 'CZ', - 'Denmark' : 'DK', - 'Germany' : 'DE', - 'Estonia' : 'EE', - 'Ireland' : 'IE', - 'Greece' : 'GR', - 'Spain' : 'ES', - 'France' : 'FR', - 'Croatia' : 'HR', - 'Italy' : 'IT', - 'Cyprus' : 'CY', - 'Latvia' : 'LV', - 'Lithuania' : 'LT', - 'Luxembourg' : 'LU', - 'Hungary' : 'HU', - 'Malta' : 'MA', - 'Netherland' : 'NL', - 'Austria' : 'AT', - 'Poland' : 'PL', - 'Portugal' : 'PT', - 'Romania' : 'RO', - 'Slovenia' : 'SI', - 'Slovakia' : 'SK', - 'Finland' : 'FI', - 'Sweden' : 'SE', - 'United Kingdom' : 'GB', - 'Iceland' : 'IS', - 'Norway' : 'NO', - 'Montenegro' : 'ME', - 'FYR of Macedonia' : 'MK', - 'Albania' : 'AL', - 'Serbia' : 'RS', - 'Turkey' : 'TU', - 'Bosnia and Herzegovina' : 'BA', - 'Switzerland' : 'CH', - 'Liechtenstein' : 'AT', - } - gdf["country_code"] = gdf.Country.map(country_to_code) - - if gdf["country_code"].isna().any(): - print("Warning, some countries not assigned an ISO code") - - gdf["x"] = gdf.geometry.x - gdf["y"] = gdf.geometry.y - - return gdf - - -def assign_buses(gdf): - - gdf["bus"] = "" - - for c in n.buses.country.unique(): - buses_i = n.buses.index[n.buses.country == c] - kdtree = KDTree(n.buses.loc[buses_i, ['x','y']].values) - - industry_i = gdf.index[(gdf.country_code == c)] - - if industry_i.empty: - print("Skipping country with no industry:",c) - else: - tree_i = kdtree.query(gdf.loc[industry_i, ['x','y']].values)[1] - gdf.loc[industry_i, 'bus'] = buses_i[tree_i] - - if (gdf.bus == "").any(): - print("Some industrial facilities have empty buses") - if gdf.bus.isna().any(): - print("Some industrial facilities have NaN buses") - - -def build_nodal_industrial_production(gdf): +def build_nodal_industrial_production(): industrial_production = pd.read_csv(snakemake.input.industrial_production_per_country_tomorrow, index_col=0) - nodal_industrial_production = pd.DataFrame(index=n.buses.index, + distribution_keys = pd.read_csv(snakemake.input.industrial_distribution_key, + index_col=0) + distribution_keys["country"] = distribution_keys.index.str[:2] + + nodal_industrial_production = pd.DataFrame(index=distribution_keys.index, columns=industrial_production.columns, dtype=float) - pop_layout = pd.read_csv(snakemake.input.clustered_pop_layout,index_col=0) - pop_layout["ct"] = pop_layout.index.str[:2] - ct_total = pop_layout.total.groupby(pop_layout["ct"]).sum() - pop_layout["ct_total"] = pop_layout["ct"].map(ct_total) - pop_layout["fraction"] = pop_layout["total"]/pop_layout["ct_total"] - #map JRC/our sectors to hotmaps sector, where mapping exist sector_mapping = {'Electric arc' : 'Iron and steel', 'Integrated steelworks' : 'Iron and steel', @@ -138,37 +34,14 @@ def build_nodal_industrial_production(gdf): 'Other non-ferrous metals' : 'Non-ferrous metals', } - for c in n.buses.country.unique(): - buses = n.buses.index[n.buses.country == c] + for c in distribution_keys.country.unique(): + buses = distribution_keys.index[distribution_keys.country == c] for sector in industrial_production.columns: - facilities = gdf.index[(gdf.country_code == c) & (gdf.Subsector == sector_mapping.get(sector,"None"))] - if (sector in sector_mapping) and not facilities.empty: - emissions = gdf.loc[facilities,"Emissions_ETS_2014"] - if emissions.sum() == 0: - distribution_key = pd.Series(1/len(facilities), - facilities) - else: - #BEWARE: this is a strong assumption - emissions = emissions.fillna(emissions.mean()) - distribution_key = emissions/emissions.sum() - distribution_key = distribution_key.groupby(gdf.loc[facilities,"bus"]).sum().reindex(buses,fill_value=0.) - else: - distribution_key = pop_layout.loc[buses,"fraction"] - - if abs(distribution_key.sum() - 1) > 1e-4: - print(c,sector,distribution_key) - + distribution_key = distribution_keys.loc[buses,sector_mapping.get(sector,"population")] nodal_industrial_production.loc[buses,sector] = industrial_production.at[c,sector]*distribution_key nodal_industrial_production.to_csv(snakemake.output.industrial_production_per_node) if __name__ == "__main__": - - n = pypsa.Network(snakemake.input.network) - - hotmaps_database = prepare_hotmaps_database() - - assign_buses(hotmaps_database) - - build_nodal_industrial_production(hotmaps_database) + build_nodal_industrial_production()