From f0937e203bd71039b1265f249128fa25235ea6a7 Mon Sep 17 00:00:00 2001
From: Tom Brown <tom@nworbmot.org>
Date: Mon, 12 Oct 2020 12:07:49 +0200
Subject: [PATCH] industry: separate code for distribution key from nodal
 production

This allows us to reuse the key for today's nodal energy demand from
industry.
---
 Snakefile                                     |  17 +-
 scripts/build_industrial_distribution_key.py  | 153 ++++++++++++++++++
 .../build_industrial_production_per_node.py   | 147 ++---------------
 3 files changed, 178 insertions(+), 139 deletions(-)
 create mode 100644 scripts/build_industrial_distribution_key.py

diff --git a/Snakefile b/Snakefile
index a55a5cca..d55a2853 100644
--- a/Snakefile
+++ b/Snakefile
@@ -199,12 +199,25 @@ rule build_industrial_production_per_country_tomorrow:
     script: 'scripts/build_industrial_production_per_country_tomorrow.py'
 
 
-rule build_industrial_production_per_node:
+
+
+rule build_industrial_distribution_key:
     input:
         clustered_pop_layout="resources/pop_layout_{network}_s{simpl}_{clusters}.csv",
         europe_shape=pypsaeur('resources/europe_shape.geojson'),
         hotmaps_industrial_database="data/Industrial_Database.csv",
-        network=pypsaeur('networks/{network}_s{simpl}_{clusters}.nc'),
+        network=pypsaeur('networks/{network}_s{simpl}_{clusters}.nc')
+    output:
+        industrial_distribution_key="resources/industrial_distribution_key_{network}_s{simpl}_{clusters}.csv"
+    threads: 1
+    resources: mem_mb=1000
+    script: 'scripts/build_industrial_distribution_key.py'
+
+
+
+rule build_industrial_production_per_node:
+    input:
+        industrial_distribution_key="resources/industrial_distribution_key_{network}_s{simpl}_{clusters}.csv",
         industrial_production_per_country_tomorrow="resources/industrial_production_per_country_tomorrow.csv"
     output:
         industrial_production_per_node="resources/industrial_production_{network}_s{simpl}_{clusters}.csv"
diff --git a/scripts/build_industrial_distribution_key.py b/scripts/build_industrial_distribution_key.py
new file mode 100644
index 00000000..c5d55a2c
--- /dev/null
+++ b/scripts/build_industrial_distribution_key.py
@@ -0,0 +1,153 @@
+
+import pypsa
+import pandas as pd
+import geopandas as gpd
+from shapely import wkt, prepared
+from scipy.spatial import cKDTree as KDTree
+
+
+def prepare_hotmaps_database():
+
+    df = pd.read_csv(snakemake.input.hotmaps_industrial_database,
+                     sep=";",
+                     index_col=0)
+
+    #remove those sites without valid geometries
+    df.drop(df.index[df.geom.isna()],
+            inplace=True)
+
+    #parse geometry
+    #https://geopandas.org/gallery/create_geopandas_from_pandas.html?highlight=parse#from-wkt-format
+    df["Coordinates"] = df.geom.apply(lambda x : wkt.loads(x[x.find(";POINT")+1:]))
+
+    gdf = gpd.GeoDataFrame(df, geometry='Coordinates')
+
+    europe_shape = gpd.read_file(snakemake.input.europe_shape).loc[0, 'geometry']
+    europe_shape_prepped = prepared.prep(europe_shape)
+    not_in_europe = gdf.index[~gdf.geometry.apply(europe_shape_prepped.contains)]
+    print("Removing the following industrial facilities since they are not in European area:")
+    print(gdf.loc[not_in_europe])
+    gdf.drop(not_in_europe,
+             inplace=True)
+
+    country_to_code = {
+        'Belgium' : 'BE',
+        'Bulgaria' : 'BG',
+        'Czech Republic' : 'CZ',
+        'Denmark' : 'DK',
+        'Germany' : 'DE',
+        'Estonia' : 'EE',
+        'Ireland' : 'IE',
+        'Greece' : 'GR',
+        'Spain' : 'ES',
+        'France' : 'FR',
+        'Croatia' : 'HR',
+        'Italy' : 'IT',
+        'Cyprus' : 'CY',
+        'Latvia' : 'LV',
+        'Lithuania' : 'LT',
+        'Luxembourg' : 'LU',
+        'Hungary' : 'HU',
+        'Malta' : 'MA',
+        'Netherland' : 'NL',
+        'Austria' : 'AT',
+        'Poland' : 'PL',
+        'Portugal' : 'PT',
+        'Romania' : 'RO',
+        'Slovenia' : 'SI',
+        'Slovakia' : 'SK',
+        'Finland' : 'FI',
+        'Sweden' : 'SE',
+        'United Kingdom' : 'GB',
+        'Iceland' : 'IS',
+        'Norway' : 'NO',
+        'Montenegro' : 'ME',
+        'FYR of Macedonia' : 'MK',
+        'Albania' : 'AL',
+        'Serbia' : 'RS',
+        'Turkey' : 'TU',
+        'Bosnia and Herzegovina' : 'BA',
+        'Switzerland' : 'CH',
+        'Liechtenstein' : 'AT',
+    }
+    gdf["country_code"] = gdf.Country.map(country_to_code)
+
+    if gdf["country_code"].isna().any():
+        print("Warning, some countries not assigned an ISO code")
+
+    gdf["x"] = gdf.geometry.x
+    gdf["y"] = gdf.geometry.y
+
+    return gdf
+
+
+def assign_buses(gdf):
+
+    gdf["bus"] = ""
+
+    for c in n.buses.country.unique():
+        buses_i = n.buses.index[n.buses.country == c]
+        kdtree = KDTree(n.buses.loc[buses_i, ['x','y']].values)
+
+        industry_i = gdf.index[(gdf.country_code == c)]
+
+        if industry_i.empty:
+            print("Skipping country with no industry:",c)
+        else:
+            tree_i = kdtree.query(gdf.loc[industry_i, ['x','y']].values)[1]
+            gdf.loc[industry_i, 'bus'] = buses_i[tree_i]
+
+    if (gdf.bus == "").any():
+        print("Some industrial facilities have empty buses")
+    if gdf.bus.isna().any():
+        print("Some industrial facilities have NaN buses")
+
+
+def build_nodal_distribution_key(gdf):
+
+    sectors = ['Iron and steel','Chemical industry','Cement','Non-metallic mineral products','Glass','Paper and printing','Non-ferrous metals']
+
+    distribution_keys = pd.DataFrame(index=n.buses.index,
+                                    columns=sectors,
+                                    dtype=float)
+
+    pop_layout = pd.read_csv(snakemake.input.clustered_pop_layout,index_col=0)
+    pop_layout["ct"] = pop_layout.index.str[:2]
+    ct_total = pop_layout.total.groupby(pop_layout["ct"]).sum()
+    pop_layout["ct_total"] = pop_layout["ct"].map(ct_total)
+    distribution_keys["population"] = pop_layout["total"]/pop_layout["ct_total"]
+
+    for c in n.buses.country.unique():
+        buses = n.buses.index[n.buses.country == c]
+        for sector in sectors:
+            facilities = gdf.index[(gdf.country_code == c) & (gdf.Subsector == sector)]
+            if not facilities.empty:
+                emissions = gdf.loc[facilities,"Emissions_ETS_2014"]
+                if emissions.sum() == 0:
+                    distribution_key = pd.Series(1/len(facilities),
+                                                 facilities)
+                else:
+                    #BEWARE: this is a strong assumption
+                    emissions = emissions.fillna(emissions.mean())
+                    distribution_key = emissions/emissions.sum()
+                distribution_key = distribution_key.groupby(gdf.loc[facilities,"bus"]).sum().reindex(buses,fill_value=0.)
+            else:
+                distribution_key = distribution_keys.loc[buses,"population"]
+
+            if abs(distribution_key.sum() - 1) > 1e-4:
+                print(c,sector,distribution_key)
+
+            distribution_keys.loc[buses,sector] = distribution_key
+
+    distribution_keys.to_csv(snakemake.output.industrial_distribution_key)
+
+if __name__ == "__main__":
+
+
+    n = pypsa.Network(snakemake.input.network)
+
+    hotmaps_database = prepare_hotmaps_database()
+
+    assign_buses(hotmaps_database)
+
+    build_nodal_distribution_key(hotmaps_database)
diff --git a/scripts/build_industrial_production_per_node.py b/scripts/build_industrial_production_per_node.py
index f8efdfd3..9e56e49a 100644
--- a/scripts/build_industrial_production_per_node.py
+++ b/scripts/build_industrial_production_per_node.py
@@ -1,123 +1,19 @@
 
-import pypsa
 import pandas as pd
-import geopandas as gpd
-from shapely import wkt, prepared
-from scipy.spatial import cKDTree as KDTree
 
-
-def prepare_hotmaps_database():
-
-    df = pd.read_csv(snakemake.input.hotmaps_industrial_database,
-                     sep=";",
-                     index_col=0)
-
-    #remove those sites without valid geometries
-    df.drop(df.index[df.geom.isna()],
-            inplace=True)
-
-    #parse geometry
-    #https://geopandas.org/gallery/create_geopandas_from_pandas.html?highlight=parse#from-wkt-format
-    df["Coordinates"] = df.geom.apply(lambda x : wkt.loads(x[x.find(";POINT")+1:]))
-
-    gdf = gpd.GeoDataFrame(df, geometry='Coordinates')
-
-    europe_shape = gpd.read_file(snakemake.input.europe_shape).loc[0, 'geometry']
-    europe_shape_prepped = prepared.prep(europe_shape)
-    not_in_europe = gdf.index[~gdf.geometry.apply(europe_shape_prepped.contains)]
-    print("Removing the following industrial facilities since they are not in European area:")
-    print(gdf.loc[not_in_europe])
-    gdf.drop(not_in_europe,
-             inplace=True)
-
-    country_to_code = {
-        'Belgium' : 'BE',
-        'Bulgaria' : 'BG',
-        'Czech Republic' : 'CZ',
-        'Denmark' : 'DK',
-        'Germany' : 'DE',
-        'Estonia' : 'EE',
-        'Ireland' : 'IE',
-        'Greece' : 'GR',
-        'Spain' : 'ES',
-        'France' : 'FR',
-        'Croatia' : 'HR',
-        'Italy' : 'IT',
-        'Cyprus' : 'CY',
-        'Latvia' : 'LV',
-        'Lithuania' : 'LT',
-        'Luxembourg' : 'LU',
-        'Hungary' : 'HU',
-        'Malta' : 'MA',
-        'Netherland' : 'NL',
-        'Austria' : 'AT',
-        'Poland' : 'PL',
-        'Portugal' : 'PT',
-        'Romania' : 'RO',
-        'Slovenia' : 'SI',
-        'Slovakia' : 'SK',
-        'Finland' : 'FI',
-        'Sweden' : 'SE',
-        'United Kingdom' : 'GB',
-        'Iceland' : 'IS',
-        'Norway' : 'NO',
-        'Montenegro' : 'ME',
-        'FYR of Macedonia' : 'MK',
-        'Albania' : 'AL',
-        'Serbia' : 'RS',
-        'Turkey' : 'TU',
-        'Bosnia and Herzegovina' : 'BA',
-        'Switzerland' : 'CH',
-        'Liechtenstein' : 'AT',
-    }
-    gdf["country_code"] = gdf.Country.map(country_to_code)
-
-    if gdf["country_code"].isna().any():
-        print("Warning, some countries not assigned an ISO code")
-
-    gdf["x"] = gdf.geometry.x
-    gdf["y"] = gdf.geometry.y
-
-    return gdf
-
-
-def assign_buses(gdf):
-
-    gdf["bus"] = ""
-
-    for c in n.buses.country.unique():
-        buses_i = n.buses.index[n.buses.country == c]
-        kdtree = KDTree(n.buses.loc[buses_i, ['x','y']].values)
-
-        industry_i = gdf.index[(gdf.country_code == c)]
-
-        if industry_i.empty:
-            print("Skipping country with no industry:",c)
-        else:
-            tree_i = kdtree.query(gdf.loc[industry_i, ['x','y']].values)[1]
-            gdf.loc[industry_i, 'bus'] = buses_i[tree_i]
-
-    if (gdf.bus == "").any():
-        print("Some industrial facilities have empty buses")
-    if gdf.bus.isna().any():
-        print("Some industrial facilities have NaN buses")
-
-
-def build_nodal_industrial_production(gdf):
+def build_nodal_industrial_production():
 
     industrial_production = pd.read_csv(snakemake.input.industrial_production_per_country_tomorrow,
                                         index_col=0)
 
-    nodal_industrial_production = pd.DataFrame(index=n.buses.index,
+    distribution_keys = pd.read_csv(snakemake.input.industrial_distribution_key,
+                                        index_col=0)
+    distribution_keys["country"] = distribution_keys.index.str[:2]
+
+    nodal_industrial_production = pd.DataFrame(index=distribution_keys.index,
                                                columns=industrial_production.columns,
                                                dtype=float)
 
-    pop_layout = pd.read_csv(snakemake.input.clustered_pop_layout,index_col=0)
-    pop_layout["ct"] = pop_layout.index.str[:2]
-    ct_total = pop_layout.total.groupby(pop_layout["ct"]).sum()
-    pop_layout["ct_total"] = pop_layout["ct"].map(ct_total)
-    pop_layout["fraction"] = pop_layout["total"]/pop_layout["ct_total"]
-
     #map JRC/our sectors to hotmaps sector, where mapping exist
     sector_mapping = {'Electric arc' : 'Iron and steel',
                       'Integrated steelworks' : 'Iron and steel',
@@ -138,37 +34,14 @@ def build_nodal_industrial_production(gdf):
                       'Other non-ferrous metals' : 'Non-ferrous metals',
     }
 
-    for c in n.buses.country.unique():
-        buses = n.buses.index[n.buses.country == c]
+    for c in distribution_keys.country.unique():
+        buses = distribution_keys.index[distribution_keys.country == c]
         for sector in industrial_production.columns:
-            facilities = gdf.index[(gdf.country_code == c) & (gdf.Subsector == sector_mapping.get(sector,"None"))]
-            if (sector in sector_mapping) and not facilities.empty:
-                emissions = gdf.loc[facilities,"Emissions_ETS_2014"]
-                if emissions.sum() == 0:
-                    distribution_key = pd.Series(1/len(facilities),
-                                                 facilities)
-                else:
-                    #BEWARE: this is a strong assumption
-                    emissions = emissions.fillna(emissions.mean())
-                    distribution_key = emissions/emissions.sum()
-                distribution_key = distribution_key.groupby(gdf.loc[facilities,"bus"]).sum().reindex(buses,fill_value=0.)
-            else:
-                distribution_key = pop_layout.loc[buses,"fraction"]
-
-            if abs(distribution_key.sum() - 1) > 1e-4:
-                print(c,sector,distribution_key)
-
+            distribution_key = distribution_keys.loc[buses,sector_mapping.get(sector,"population")]
             nodal_industrial_production.loc[buses,sector] = industrial_production.at[c,sector]*distribution_key
 
     nodal_industrial_production.to_csv(snakemake.output.industrial_production_per_node)
 
 if __name__ == "__main__":
 
-
-    n = pypsa.Network(snakemake.input.network)
-
-    hotmaps_database = prepare_hotmaps_database()
-
-    assign_buses(hotmaps_database)
-
-    build_nodal_industrial_production(hotmaps_database)
+    build_nodal_industrial_production()