From 90f5349b7d1eef29e30f5763b68cf3b5ca4e9237 Mon Sep 17 00:00:00 2001
From: Tom Brown <tom@nworbmot.org>
Date: Mon, 5 Oct 2020 20:04:04 +0200
Subject: [PATCH 1/7] Use hotmaps industrial database for distribution in each
 country

I.e. per sector geographical distribution of industrial facilities
within each country.

Drop facilities outside Europe and with no geocoordinates.

Use ETS emissions as a distribution key; where emissions data is
missing, substitute with an average for that sector and that country
(strong assumption).
---
 .gitignore                                    |   1 +
 Snakefile                                     |  15 ++
 .../build_industrial_production_per_node.py   | 174 ++++++++++++++++++
 3 files changed, 190 insertions(+)
 create mode 100644 scripts/build_industrial_production_per_node.py

diff --git a/.gitignore b/.gitignore
index b41436e7..3909265b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ gurobi.log
 /data/transport_data.csv
 /data/switzerland*
 /data/.nfs*
+/data/Industrial_Database.csv
 
 *.org
 
diff --git a/Snakefile b/Snakefile
index b01d7edd..a55a5cca 100644
--- a/Snakefile
+++ b/Snakefile
@@ -198,6 +198,21 @@ rule build_industrial_production_per_country_tomorrow:
     resources: mem_mb=1000
     script: 'scripts/build_industrial_production_per_country_tomorrow.py'
 
+
+rule build_industrial_production_per_node:
+    input:
+        clustered_pop_layout="resources/pop_layout_{network}_s{simpl}_{clusters}.csv",
+        europe_shape=pypsaeur('resources/europe_shape.geojson'),
+        hotmaps_industrial_database="data/Industrial_Database.csv",
+        network=pypsaeur('networks/{network}_s{simpl}_{clusters}.nc'),
+        industrial_production_per_country_tomorrow="resources/industrial_production_per_country_tomorrow.csv"
+    output:
+        industrial_production_per_node="resources/industrial_production_{network}_s{simpl}_{clusters}.csv"
+    threads: 1
+    resources: mem_mb=1000
+    script: 'scripts/build_industrial_production_per_node.py'
+
+
 rule build_industrial_energy_demand_per_country_today:
     input:
         ammonia_production="resources/ammonia_production.csv",
diff --git a/scripts/build_industrial_production_per_node.py b/scripts/build_industrial_production_per_node.py
new file mode 100644
index 00000000..f8efdfd3
--- /dev/null
+++ b/scripts/build_industrial_production_per_node.py
@@ -0,0 +1,174 @@
+
+import pypsa
+import pandas as pd
+import geopandas as gpd
+from shapely import wkt, prepared
+from scipy.spatial import cKDTree as KDTree
+
+
+def prepare_hotmaps_database():
+
+    df = pd.read_csv(snakemake.input.hotmaps_industrial_database,
+                     sep=";",
+                     index_col=0)
+
+    #remove those sites without valid geometries
+    df.drop(df.index[df.geom.isna()],
+            inplace=True)
+
+    #parse geometry
+    #https://geopandas.org/gallery/create_geopandas_from_pandas.html?highlight=parse#from-wkt-format
+    df["Coordinates"] = df.geom.apply(lambda x : wkt.loads(x[x.find(";POINT")+1:]))
+
+    gdf = gpd.GeoDataFrame(df, geometry='Coordinates')
+
+    europe_shape = gpd.read_file(snakemake.input.europe_shape).loc[0, 'geometry']
+    europe_shape_prepped = prepared.prep(europe_shape)
+    not_in_europe = gdf.index[~gdf.geometry.apply(europe_shape_prepped.contains)]
+    print("Removing the following industrial facilities since they are not in European area:")
+    print(gdf.loc[not_in_europe])
+    gdf.drop(not_in_europe,
+             inplace=True)
+
+    country_to_code = {
+        'Belgium' : 'BE',
+        'Bulgaria' : 'BG',
+        'Czech Republic' : 'CZ',
+        'Denmark' : 'DK',
+        'Germany' : 'DE',
+        'Estonia' : 'EE',
+        'Ireland' : 'IE',
+        'Greece' : 'GR',
+        'Spain' : 'ES',
+        'France' : 'FR',
+        'Croatia' : 'HR',
+        'Italy' : 'IT',
+        'Cyprus' : 'CY',
+        'Latvia' : 'LV',
+        'Lithuania' : 'LT',
+        'Luxembourg' : 'LU',
+        'Hungary' : 'HU',
+        'Malta' : 'MA',
+        'Netherland' : 'NL',
+        'Austria' : 'AT',
+        'Poland' : 'PL',
+        'Portugal' : 'PT',
+        'Romania' : 'RO',
+        'Slovenia' : 'SI',
+        'Slovakia' : 'SK',
+        'Finland' : 'FI',
+        'Sweden' : 'SE',
+        'United Kingdom' : 'GB',
+        'Iceland' : 'IS',
+        'Norway' : 'NO',
+        'Montenegro' : 'ME',
+        'FYR of Macedonia' : 'MK',
+        'Albania' : 'AL',
+        'Serbia' : 'RS',
+        'Turkey' : 'TU',
+        'Bosnia and Herzegovina' : 'BA',
+        'Switzerland' : 'CH',
+        'Liechtenstein' : 'AT',
+    }
+    gdf["country_code"] = gdf.Country.map(country_to_code)
+
+    if gdf["country_code"].isna().any():
+        print("Warning, some countries not assigned an ISO code")
+
+    gdf["x"] = gdf.geometry.x
+    gdf["y"] = gdf.geometry.y
+
+    return gdf
+
+
+def assign_buses(gdf):
+
+    gdf["bus"] = ""
+
+    for c in n.buses.country.unique():
+        buses_i = n.buses.index[n.buses.country == c]
+        kdtree = KDTree(n.buses.loc[buses_i, ['x','y']].values)
+
+        industry_i = gdf.index[(gdf.country_code == c)]
+
+        if industry_i.empty:
+            print("Skipping country with no industry:",c)
+        else:
+            tree_i = kdtree.query(gdf.loc[industry_i, ['x','y']].values)[1]
+            gdf.loc[industry_i, 'bus'] = buses_i[tree_i]
+
+    if (gdf.bus == "").any():
+        print("Some industrial facilities have empty buses")
+    if gdf.bus.isna().any():
+        print("Some industrial facilities have NaN buses")
+
+
+def build_nodal_industrial_production(gdf):
+
+    industrial_production = pd.read_csv(snakemake.input.industrial_production_per_country_tomorrow,
+                                        index_col=0)
+
+    nodal_industrial_production = pd.DataFrame(index=n.buses.index,
+                                               columns=industrial_production.columns,
+                                               dtype=float)
+
+    pop_layout = pd.read_csv(snakemake.input.clustered_pop_layout,index_col=0)
+    pop_layout["ct"] = pop_layout.index.str[:2]
+    ct_total = pop_layout.total.groupby(pop_layout["ct"]).sum()
+    pop_layout["ct_total"] = pop_layout["ct"].map(ct_total)
+    pop_layout["fraction"] = pop_layout["total"]/pop_layout["ct_total"]
+
+    #map JRC/our sectors to hotmaps sector, where mapping exist
+    sector_mapping = {'Electric arc' : 'Iron and steel',
+                      'Integrated steelworks' : 'Iron and steel',
+                      'DRI + Electric arc' : 'Iron and steel',
+                      'Ammonia' : 'Chemical industry',
+                      'Basic chemicals (without ammonia)' : 'Chemical industry',
+                      'Other chemicals' : 'Chemical industry',
+                      'Pharmaceutical products etc.' : 'Chemical industry',
+                      'Cement' : 'Cement',
+                      'Ceramics & other NMM' : 'Non-metallic mineral products',
+                      'Glass production' : 'Glass',
+                      'Pulp production' : 'Paper and printing',
+                      'Paper production' : 'Paper and printing',
+                      'Printing and media reproduction' : 'Paper and printing',
+                      'Alumina production' : 'Non-ferrous metals',
+                      'Aluminium - primary production' : 'Non-ferrous metals',
+                      'Aluminium - secondary production' : 'Non-ferrous metals',
+                      'Other non-ferrous metals' : 'Non-ferrous metals',
+    }
+
+    for c in n.buses.country.unique():
+        buses = n.buses.index[n.buses.country == c]
+        for sector in industrial_production.columns:
+            facilities = gdf.index[(gdf.country_code == c) & (gdf.Subsector == sector_mapping.get(sector,"None"))]
+            if (sector in sector_mapping) and not facilities.empty:
+                emissions = gdf.loc[facilities,"Emissions_ETS_2014"]
+                if emissions.sum() == 0:
+                    distribution_key = pd.Series(1/len(facilities),
+                                                 facilities)
+                else:
+                    #BEWARE: this is a strong assumption
+                    emissions = emissions.fillna(emissions.mean())
+                    distribution_key = emissions/emissions.sum()
+                distribution_key = distribution_key.groupby(gdf.loc[facilities,"bus"]).sum().reindex(buses,fill_value=0.)
+            else:
+                distribution_key = pop_layout.loc[buses,"fraction"]
+
+            if abs(distribution_key.sum() - 1) > 1e-4:
+                print(c,sector,distribution_key)
+
+            nodal_industrial_production.loc[buses,sector] = industrial_production.at[c,sector]*distribution_key
+
+    nodal_industrial_production.to_csv(snakemake.output.industrial_production_per_node)
+
+if __name__ == "__main__":
+
+
+    n = pypsa.Network(snakemake.input.network)
+
+    hotmaps_database = prepare_hotmaps_database()
+
+    assign_buses(hotmaps_database)
+
+    build_nodal_industrial_production(hotmaps_database)

From f0937e203bd71039b1265f249128fa25235ea6a7 Mon Sep 17 00:00:00 2001
From: Tom Brown <tom@nworbmot.org>
Date: Mon, 12 Oct 2020 12:07:49 +0200
Subject: [PATCH 2/7] industry: separate code for distribution key from nodal
 production

This allows us to reuse the key for today's nodal energy demand from
industry.
---
 Snakefile                                     |  17 +-
 scripts/build_industrial_distribution_key.py  | 153 ++++++++++++++++++
 .../build_industrial_production_per_node.py   | 147 ++---------------
 3 files changed, 178 insertions(+), 139 deletions(-)
 create mode 100644 scripts/build_industrial_distribution_key.py

diff --git a/Snakefile b/Snakefile
index a55a5cca..d55a2853 100644
--- a/Snakefile
+++ b/Snakefile
@@ -199,12 +199,25 @@ rule build_industrial_production_per_country_tomorrow:
     script: 'scripts/build_industrial_production_per_country_tomorrow.py'
 
 
-rule build_industrial_production_per_node:
+
+
+rule build_industrial_distribution_key:
     input:
         clustered_pop_layout="resources/pop_layout_{network}_s{simpl}_{clusters}.csv",
         europe_shape=pypsaeur('resources/europe_shape.geojson'),
         hotmaps_industrial_database="data/Industrial_Database.csv",
-        network=pypsaeur('networks/{network}_s{simpl}_{clusters}.nc'),
+        network=pypsaeur('networks/{network}_s{simpl}_{clusters}.nc')
+    output:
+        industrial_distribution_key="resources/industrial_distribution_key_{network}_s{simpl}_{clusters}.csv"
+    threads: 1
+    resources: mem_mb=1000
+    script: 'scripts/build_industrial_distribution_key.py'
+
+
+
+rule build_industrial_production_per_node:
+    input:
+        industrial_distribution_key="resources/industrial_distribution_key_{network}_s{simpl}_{clusters}.csv",
         industrial_production_per_country_tomorrow="resources/industrial_production_per_country_tomorrow.csv"
     output:
         industrial_production_per_node="resources/industrial_production_{network}_s{simpl}_{clusters}.csv"
diff --git a/scripts/build_industrial_distribution_key.py b/scripts/build_industrial_distribution_key.py
new file mode 100644
index 00000000..c5d55a2c
--- /dev/null
+++ b/scripts/build_industrial_distribution_key.py
@@ -0,0 +1,153 @@
+
+import pypsa
+import pandas as pd
+import geopandas as gpd
+from shapely import wkt, prepared
+from scipy.spatial import cKDTree as KDTree
+
+
+def prepare_hotmaps_database():
+
+    df = pd.read_csv(snakemake.input.hotmaps_industrial_database,
+                     sep=";",
+                     index_col=0)
+
+    #remove those sites without valid geometries
+    df.drop(df.index[df.geom.isna()],
+            inplace=True)
+
+    #parse geometry
+    #https://geopandas.org/gallery/create_geopandas_from_pandas.html?highlight=parse#from-wkt-format
+    df["Coordinates"] = df.geom.apply(lambda x : wkt.loads(x[x.find(";POINT")+1:]))
+
+    gdf = gpd.GeoDataFrame(df, geometry='Coordinates')
+
+    europe_shape = gpd.read_file(snakemake.input.europe_shape).loc[0, 'geometry']
+    europe_shape_prepped = prepared.prep(europe_shape)
+    not_in_europe = gdf.index[~gdf.geometry.apply(europe_shape_prepped.contains)]
+    print("Removing the following industrial facilities since they are not in European area:")
+    print(gdf.loc[not_in_europe])
+    gdf.drop(not_in_europe,
+             inplace=True)
+
+    country_to_code = {
+        'Belgium' : 'BE',
+        'Bulgaria' : 'BG',
+        'Czech Republic' : 'CZ',
+        'Denmark' : 'DK',
+        'Germany' : 'DE',
+        'Estonia' : 'EE',
+        'Ireland' : 'IE',
+        'Greece' : 'GR',
+        'Spain' : 'ES',
+        'France' : 'FR',
+        'Croatia' : 'HR',
+        'Italy' : 'IT',
+        'Cyprus' : 'CY',
+        'Latvia' : 'LV',
+        'Lithuania' : 'LT',
+        'Luxembourg' : 'LU',
+        'Hungary' : 'HU',
+        'Malta' : 'MA',
+        'Netherland' : 'NL',
+        'Austria' : 'AT',
+        'Poland' : 'PL',
+        'Portugal' : 'PT',
+        'Romania' : 'RO',
+        'Slovenia' : 'SI',
+        'Slovakia' : 'SK',
+        'Finland' : 'FI',
+        'Sweden' : 'SE',
+        'United Kingdom' : 'GB',
+        'Iceland' : 'IS',
+        'Norway' : 'NO',
+        'Montenegro' : 'ME',
+        'FYR of Macedonia' : 'MK',
+        'Albania' : 'AL',
+        'Serbia' : 'RS',
+        'Turkey' : 'TU',
+        'Bosnia and Herzegovina' : 'BA',
+        'Switzerland' : 'CH',
+        'Liechtenstein' : 'AT',
+    }
+    gdf["country_code"] = gdf.Country.map(country_to_code)
+
+    if gdf["country_code"].isna().any():
+        print("Warning, some countries not assigned an ISO code")
+
+    gdf["x"] = gdf.geometry.x
+    gdf["y"] = gdf.geometry.y
+
+    return gdf
+
+
+def assign_buses(gdf):
+
+    gdf["bus"] = ""
+
+    for c in n.buses.country.unique():
+        buses_i = n.buses.index[n.buses.country == c]
+        kdtree = KDTree(n.buses.loc[buses_i, ['x','y']].values)
+
+        industry_i = gdf.index[(gdf.country_code == c)]
+
+        if industry_i.empty:
+            print("Skipping country with no industry:",c)
+        else:
+            tree_i = kdtree.query(gdf.loc[industry_i, ['x','y']].values)[1]
+            gdf.loc[industry_i, 'bus'] = buses_i[tree_i]
+
+    if (gdf.bus == "").any():
+        print("Some industrial facilities have empty buses")
+    if gdf.bus.isna().any():
+        print("Some industrial facilities have NaN buses")
+
+
+def build_nodal_distribution_key(gdf):
+
+    sectors = ['Iron and steel','Chemical industry','Cement','Non-metallic mineral products','Glass','Paper and printing','Non-ferrous metals']
+
+    distribution_keys = pd.DataFrame(index=n.buses.index,
+                                    columns=sectors,
+                                    dtype=float)
+
+    pop_layout = pd.read_csv(snakemake.input.clustered_pop_layout,index_col=0)
+    pop_layout["ct"] = pop_layout.index.str[:2]
+    ct_total = pop_layout.total.groupby(pop_layout["ct"]).sum()
+    pop_layout["ct_total"] = pop_layout["ct"].map(ct_total)
+    distribution_keys["population"] = pop_layout["total"]/pop_layout["ct_total"]
+
+    for c in n.buses.country.unique():
+        buses = n.buses.index[n.buses.country == c]
+        for sector in sectors:
+            facilities = gdf.index[(gdf.country_code == c) & (gdf.Subsector == sector)]
+            if not facilities.empty:
+                emissions = gdf.loc[facilities,"Emissions_ETS_2014"]
+                if emissions.sum() == 0:
+                    distribution_key = pd.Series(1/len(facilities),
+                                                 facilities)
+                else:
+                    #BEWARE: this is a strong assumption
+                    emissions = emissions.fillna(emissions.mean())
+                    distribution_key = emissions/emissions.sum()
+                distribution_key = distribution_key.groupby(gdf.loc[facilities,"bus"]).sum().reindex(buses,fill_value=0.)
+            else:
+                distribution_key = distribution_keys.loc[buses,"population"]
+
+            if abs(distribution_key.sum() - 1) > 1e-4:
+                print(c,sector,distribution_key)
+
+            distribution_keys.loc[buses,sector] = distribution_key
+
+    distribution_keys.to_csv(snakemake.output.industrial_distribution_key)
+
+if __name__ == "__main__":
+
+
+    n = pypsa.Network(snakemake.input.network)
+
+    hotmaps_database = prepare_hotmaps_database()
+
+    assign_buses(hotmaps_database)
+
+    build_nodal_distribution_key(hotmaps_database)
diff --git a/scripts/build_industrial_production_per_node.py b/scripts/build_industrial_production_per_node.py
index f8efdfd3..9e56e49a 100644
--- a/scripts/build_industrial_production_per_node.py
+++ b/scripts/build_industrial_production_per_node.py
@@ -1,123 +1,19 @@
 
-import pypsa
 import pandas as pd
-import geopandas as gpd
-from shapely import wkt, prepared
-from scipy.spatial import cKDTree as KDTree
 
-
-def prepare_hotmaps_database():
-
-    df = pd.read_csv(snakemake.input.hotmaps_industrial_database,
-                     sep=";",
-                     index_col=0)
-
-    #remove those sites without valid geometries
-    df.drop(df.index[df.geom.isna()],
-            inplace=True)
-
-    #parse geometry
-    #https://geopandas.org/gallery/create_geopandas_from_pandas.html?highlight=parse#from-wkt-format
-    df["Coordinates"] = df.geom.apply(lambda x : wkt.loads(x[x.find(";POINT")+1:]))
-
-    gdf = gpd.GeoDataFrame(df, geometry='Coordinates')
-
-    europe_shape = gpd.read_file(snakemake.input.europe_shape).loc[0, 'geometry']
-    europe_shape_prepped = prepared.prep(europe_shape)
-    not_in_europe = gdf.index[~gdf.geometry.apply(europe_shape_prepped.contains)]
-    print("Removing the following industrial facilities since they are not in European area:")
-    print(gdf.loc[not_in_europe])
-    gdf.drop(not_in_europe,
-             inplace=True)
-
-    country_to_code = {
-        'Belgium' : 'BE',
-        'Bulgaria' : 'BG',
-        'Czech Republic' : 'CZ',
-        'Denmark' : 'DK',
-        'Germany' : 'DE',
-        'Estonia' : 'EE',
-        'Ireland' : 'IE',
-        'Greece' : 'GR',
-        'Spain' : 'ES',
-        'France' : 'FR',
-        'Croatia' : 'HR',
-        'Italy' : 'IT',
-        'Cyprus' : 'CY',
-        'Latvia' : 'LV',
-        'Lithuania' : 'LT',
-        'Luxembourg' : 'LU',
-        'Hungary' : 'HU',
-        'Malta' : 'MA',
-        'Netherland' : 'NL',
-        'Austria' : 'AT',
-        'Poland' : 'PL',
-        'Portugal' : 'PT',
-        'Romania' : 'RO',
-        'Slovenia' : 'SI',
-        'Slovakia' : 'SK',
-        'Finland' : 'FI',
-        'Sweden' : 'SE',
-        'United Kingdom' : 'GB',
-        'Iceland' : 'IS',
-        'Norway' : 'NO',
-        'Montenegro' : 'ME',
-        'FYR of Macedonia' : 'MK',
-        'Albania' : 'AL',
-        'Serbia' : 'RS',
-        'Turkey' : 'TU',
-        'Bosnia and Herzegovina' : 'BA',
-        'Switzerland' : 'CH',
-        'Liechtenstein' : 'AT',
-    }
-    gdf["country_code"] = gdf.Country.map(country_to_code)
-
-    if gdf["country_code"].isna().any():
-        print("Warning, some countries not assigned an ISO code")
-
-    gdf["x"] = gdf.geometry.x
-    gdf["y"] = gdf.geometry.y
-
-    return gdf
-
-
-def assign_buses(gdf):
-
-    gdf["bus"] = ""
-
-    for c in n.buses.country.unique():
-        buses_i = n.buses.index[n.buses.country == c]
-        kdtree = KDTree(n.buses.loc[buses_i, ['x','y']].values)
-
-        industry_i = gdf.index[(gdf.country_code == c)]
-
-        if industry_i.empty:
-            print("Skipping country with no industry:",c)
-        else:
-            tree_i = kdtree.query(gdf.loc[industry_i, ['x','y']].values)[1]
-            gdf.loc[industry_i, 'bus'] = buses_i[tree_i]
-
-    if (gdf.bus == "").any():
-        print("Some industrial facilities have empty buses")
-    if gdf.bus.isna().any():
-        print("Some industrial facilities have NaN buses")
-
-
-def build_nodal_industrial_production(gdf):
+def build_nodal_industrial_production():
 
     industrial_production = pd.read_csv(snakemake.input.industrial_production_per_country_tomorrow,
                                         index_col=0)
 
-    nodal_industrial_production = pd.DataFrame(index=n.buses.index,
+    distribution_keys = pd.read_csv(snakemake.input.industrial_distribution_key,
+                                        index_col=0)
+    distribution_keys["country"] = distribution_keys.index.str[:2]
+
+    nodal_industrial_production = pd.DataFrame(index=distribution_keys.index,
                                                columns=industrial_production.columns,
                                                dtype=float)
 
-    pop_layout = pd.read_csv(snakemake.input.clustered_pop_layout,index_col=0)
-    pop_layout["ct"] = pop_layout.index.str[:2]
-    ct_total = pop_layout.total.groupby(pop_layout["ct"]).sum()
-    pop_layout["ct_total"] = pop_layout["ct"].map(ct_total)
-    pop_layout["fraction"] = pop_layout["total"]/pop_layout["ct_total"]
-
     #map JRC/our sectors to hotmaps sector, where mapping exist
     sector_mapping = {'Electric arc' : 'Iron and steel',
                       'Integrated steelworks' : 'Iron and steel',
@@ -138,37 +34,14 @@ def build_nodal_industrial_production(gdf):
                       'Other non-ferrous metals' : 'Non-ferrous metals',
     }
 
-    for c in n.buses.country.unique():
-        buses = n.buses.index[n.buses.country == c]
+    for c in distribution_keys.country.unique():
+        buses = distribution_keys.index[distribution_keys.country == c]
         for sector in industrial_production.columns:
-            facilities = gdf.index[(gdf.country_code == c) & (gdf.Subsector == sector_mapping.get(sector,"None"))]
-            if (sector in sector_mapping) and not facilities.empty:
-                emissions = gdf.loc[facilities,"Emissions_ETS_2014"]
-                if emissions.sum() == 0:
-                    distribution_key = pd.Series(1/len(facilities),
-                                                 facilities)
-                else:
-                    #BEWARE: this is a strong assumption
-                    emissions = emissions.fillna(emissions.mean())
-                    distribution_key = emissions/emissions.sum()
-                distribution_key = distribution_key.groupby(gdf.loc[facilities,"bus"]).sum().reindex(buses,fill_value=0.)
-            else:
-                distribution_key = pop_layout.loc[buses,"fraction"]
-
-            if abs(distribution_key.sum() - 1) > 1e-4:
-                print(c,sector,distribution_key)
-
+            distribution_key = distribution_keys.loc[buses,sector_mapping.get(sector,"population")]
             nodal_industrial_production.loc[buses,sector] = industrial_production.at[c,sector]*distribution_key
 
     nodal_industrial_production.to_csv(snakemake.output.industrial_production_per_node)
 
 if __name__ == "__main__":
 
-
-    n = pypsa.Network(snakemake.input.network)
-
-    hotmaps_database = prepare_hotmaps_database()
-
-    assign_buses(hotmaps_database)
-
-    build_nodal_industrial_production(hotmaps_database)
+    build_nodal_industrial_production()

From f2b347334dd629ec660bce47cffadbe3b8983012 Mon Sep 17 00:00:00 2001
From: Tom Brown <tom@nworbmot.org>
Date: Mon, 12 Oct 2020 12:20:04 +0200
Subject: [PATCH 3/7] industry: build nodal energy demand from nodal production

---
 Snakefile                                     | 11 ++++++++
 ...build_industrial_energy_demand_per_node.py | 26 +++++++++++++++++++
 2 files changed, 37 insertions(+)
 create mode 100644 scripts/build_industrial_energy_demand_per_node.py

diff --git a/Snakefile b/Snakefile
index d55a2853..93bc574f 100644
--- a/Snakefile
+++ b/Snakefile
@@ -226,6 +226,17 @@ rule build_industrial_production_per_node:
     script: 'scripts/build_industrial_production_per_node.py'
 
 
+rule build_industrial_energy_demand_per_node:
+    input:
+        industry_sector_ratios="resources/industry_sector_ratios.csv",
+        industrial_production_per_node="resources/industrial_production_{network}_s{simpl}_{clusters}.csv"
+    output:
+        industrial_energy_demand_per_node="resources/industrial_energy_demand_{network}_s{simpl}_{clusters}.csv"
+    threads: 1
+    resources: mem_mb=1000
+    script: 'scripts/build_industrial_energy_demand_per_node.py'
+
+
 rule build_industrial_energy_demand_per_country_today:
     input:
         ammonia_production="resources/ammonia_production.csv",
diff --git a/scripts/build_industrial_energy_demand_per_node.py b/scripts/build_industrial_energy_demand_per_node.py
new file mode 100644
index 00000000..adedbd3c
--- /dev/null
+++ b/scripts/build_industrial_energy_demand_per_node.py
@@ -0,0 +1,26 @@
+
+import pandas as pd
+import numpy as np
+
+# import EU ratios df as csv
+industry_sector_ratios=pd.read_csv(snakemake.input.industry_sector_ratios,
+                                   index_col=0)
+
+#material demand per node and industry (kton/a)
+nodal_production = pd.read_csv(snakemake.input.industrial_production_per_node, index_col=0)
+
+#final energy consumption per node and industry (TWh/a)
+nodal_df = nodal_production.dot(industry_sector_ratios.T)
+nodal_df*= 0.001 #GWh -> TWh (ktCO2 -> MtCO2)
+
+
+rename_sectors = {'elec':'electricity',
+                  'biomass':'solid biomass',
+                  'heat':'low-temperature heat'}
+
+nodal_df.rename(columns=rename_sectors,inplace=True)
+
+nodal_df.index.name = "TWh/a (MtCO2/a)"
+
+nodal_df.to_csv(snakemake.output.industrial_energy_demand_per_node,
+                float_format='%.2f')

From 80cbe986307693fc75f55145f99958c57fad0f1a Mon Sep 17 00:00:00 2001
From: Tom Brown <tom@nworbmot.org>
Date: Mon, 12 Oct 2020 13:26:21 +0200
Subject: [PATCH 4/7] industry: add current nodal electricity demand to
 subtract later

---
 Snakefile                                     | 18 ++++++-
 ...build_industrial_energy_demand_per_node.py |  9 +++-
 ...industrial_energy_demand_per_node_today.py | 54 +++++++++++++++++++
 3 files changed, 78 insertions(+), 3 deletions(-)
 create mode 100644 scripts/build_industrial_energy_demand_per_node_today.py

diff --git a/Snakefile b/Snakefile
index 93bc574f..57792c24 100644
--- a/Snakefile
+++ b/Snakefile
@@ -3,6 +3,7 @@ configfile: "config.yaml"
 
 wildcard_constraints:
     lv="[a-z0-9\.]+",
+    network="[a-zA-Z0-9]*",
     simpl="[a-zA-Z0-9]*",
     clusters="[0-9]+m?",
     sectors="[+a-zA-Z0-9]+",
@@ -229,7 +230,8 @@ rule build_industrial_production_per_node:
 rule build_industrial_energy_demand_per_node:
     input:
         industry_sector_ratios="resources/industry_sector_ratios.csv",
-        industrial_production_per_node="resources/industrial_production_{network}_s{simpl}_{clusters}.csv"
+        industrial_production_per_node="resources/industrial_production_{network}_s{simpl}_{clusters}.csv",
+        industrial_energy_demand_per_node_today="resources/industrial_energy_demand_today_{network}_s{simpl}_{clusters}.csv"
     output:
         industrial_energy_demand_per_node="resources/industrial_energy_demand_{network}_s{simpl}_{clusters}.csv"
     threads: 1
@@ -248,6 +250,18 @@ rule build_industrial_energy_demand_per_country_today:
     script: 'scripts/build_industrial_energy_demand_per_country_today.py'
 
 
+rule build_industrial_energy_demand_per_node_today:
+    input:
+        industrial_distribution_key="resources/industrial_distribution_key_{network}_s{simpl}_{clusters}.csv",
+        industrial_energy_demand_per_country_today="resources/industrial_energy_demand_per_country_today.csv"
+    output:
+        industrial_energy_demand_per_node_today="resources/industrial_energy_demand_today_{network}_s{simpl}_{clusters}.csv"
+    threads: 1
+    resources: mem_mb=1000
+    script: 'scripts/build_industrial_energy_demand_per_node_today.py'
+
+
+
 rule build_industrial_energy_demand_per_country:
     input:
         industry_sector_ratios="resources/industry_sector_ratios.csv",
@@ -287,7 +301,7 @@ rule prepare_sector_network:
         clustermaps=pypsaeur('resources/clustermaps_{network}_s{simpl}_{clusters}.h5'),
         clustered_pop_layout="resources/pop_layout_{network}_s{simpl}_{clusters}.csv",
         simplified_pop_layout="resources/pop_layout_{network}_s{simpl}.csv",
-        industrial_demand="resources/industrial_demand_{network}_s{simpl}_{clusters}.csv",
+        industrial_demand="resources/industrial_energy_demand_{network}_s{simpl}_{clusters}.csv",
         heat_demand_urban="resources/heat_demand_urban_{network}_s{simpl}_{clusters}.nc",
         heat_demand_rural="resources/heat_demand_rural_{network}_s{simpl}_{clusters}.nc",
         heat_demand_total="resources/heat_demand_total_{network}_s{simpl}_{clusters}.nc",
diff --git a/scripts/build_industrial_energy_demand_per_node.py b/scripts/build_industrial_energy_demand_per_node.py
index adedbd3c..0c2300d1 100644
--- a/scripts/build_industrial_energy_demand_per_node.py
+++ b/scripts/build_industrial_energy_demand_per_node.py
@@ -7,7 +7,12 @@ industry_sector_ratios=pd.read_csv(snakemake.input.industry_sector_ratios,
                                    index_col=0)
 
 #material demand per node and industry (kton/a)
-nodal_production = pd.read_csv(snakemake.input.industrial_production_per_node, index_col=0)
+nodal_production = pd.read_csv(snakemake.input.industrial_production_per_node,
+                               index_col=0)
+
+#energy demand today to get current electricity
+nodal_today = pd.read_csv(snakemake.input.industrial_energy_demand_per_node_today,
+                          index_col=0)
 
 #final energy consumption per node and industry (TWh/a)
 nodal_df = nodal_production.dot(industry_sector_ratios.T)
@@ -20,6 +25,8 @@ rename_sectors = {'elec':'electricity',
 
 nodal_df.rename(columns=rename_sectors,inplace=True)
 
+nodal_df["current electricity"] = nodal_today["electricity"]
+
 nodal_df.index.name = "TWh/a (MtCO2/a)"
 
 nodal_df.to_csv(snakemake.output.industrial_energy_demand_per_node,
diff --git a/scripts/build_industrial_energy_demand_per_node_today.py b/scripts/build_industrial_energy_demand_per_node_today.py
new file mode 100644
index 00000000..6caf1f58
--- /dev/null
+++ b/scripts/build_industrial_energy_demand_per_node_today.py
@@ -0,0 +1,54 @@
+
+import pandas as pd
+import numpy as np
+
+def build_nodal_demand():
+
+    industrial_demand = pd.read_csv(snakemake.input.industrial_energy_demand_per_country_today,
+                                    header=[0,1],
+                                    index_col=0)
+
+    distribution_keys = pd.read_csv(snakemake.input.industrial_distribution_key,
+                                        index_col=0)
+    distribution_keys["country"] = distribution_keys.index.str[:2]
+
+    nodal_demand = pd.DataFrame(0.,
+                                index=distribution_keys.index,
+                                columns=industrial_demand.index,
+                                dtype=float)
+
+    #map JRC/our sectors to hotmaps sector, where mapping exist
+    sector_mapping = {'Electric arc' : 'Iron and steel',
+                      'Integrated steelworks' : 'Iron and steel',
+                      'DRI + Electric arc' : 'Iron and steel',
+                      'Ammonia' : 'Chemical industry',
+                      'Basic chemicals (without ammonia)' : 'Chemical industry',
+                      'Other chemicals' : 'Chemical industry',
+                      'Pharmaceutical products etc.' : 'Chemical industry',
+                      'Cement' : 'Cement',
+                      'Ceramics & other NMM' : 'Non-metallic mineral products',
+                      'Glass production' : 'Glass',
+                      'Pulp production' : 'Paper and printing',
+                      'Paper production' : 'Paper and printing',
+                      'Printing and media reproduction' : 'Paper and printing',
+                      'Alumina production' : 'Non-ferrous metals',
+                      'Aluminium - primary production' : 'Non-ferrous metals',
+                      'Aluminium - secondary production' : 'Non-ferrous metals',
+                      'Other non-ferrous metals' : 'Non-ferrous metals',
+    }
+
+    for c in distribution_keys.country.unique():
+        buses = distribution_keys.index[distribution_keys.country == c]
+        for sector in industrial_demand.columns.levels[1]:
+            distribution_key = distribution_keys.loc[buses,sector_mapping.get(sector,"population")]
+            demand = industrial_demand[c,sector]
+            outer = pd.DataFrame(np.outer(distribution_key,demand),index=distribution_key.index,columns=demand.index)
+            nodal_demand.loc[buses] += outer
+
+    nodal_demand.index.name = "TWh/a"
+
+    nodal_demand.to_csv(snakemake.output.industrial_energy_demand_per_node_today)
+
+if __name__ == "__main__":
+
+    build_nodal_demand()

From e8b923e076b7bfb89684138787cae9f01a62878b Mon Sep 17 00:00:00 2001
From: Tom Brown <tom@nworbmot.org>
Date: Mon, 12 Oct 2020 14:56:41 +0200
Subject: [PATCH 5/7] industry: subtract today's ind elec demand, add back new
 demand

Since today's industrial electricity demand is distributed by
population and GDP, subtract this from the regular electricity demand
(which already has space/water heating subtracted).

Now regular electricity demand is only non-heating electricity demand
in residential and tertiary sectors.

Add back new industry electricity demand at the correct locations, as
determined using the hotmaps database.
---
 scripts/prepare_sector_network.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/scripts/prepare_sector_network.py b/scripts/prepare_sector_network.py
index 0bab8487..6e79e897 100644
--- a/scripts/prepare_sector_network.py
+++ b/scripts/prepare_sector_network.py
@@ -666,7 +666,7 @@ def insert_electricity_distribution_grid(network):
                  capital_cost=costs.at['electricity distribution grid','fixed']*snakemake.config["sector"]['electricity_distribution_grid_cost_factor'])
 
 
-    #this catches regular electricity load and "industry new electricity"
+    #this catches regular electricity load and "industry electricity"
     loads = network.loads.index[network.loads.carrier.str.contains("electricity")]
     network.loads.loc[loads,"bus"] += " low voltage"
 
@@ -1635,12 +1635,18 @@ def add_industry(network):
                  carrier="low-temperature heat for industry",
                  p_set=industrial_demand.loc[nodes,"low-temperature heat"]/8760.)
 
+    #remove today's industrial electricity demand by scaling down total electricity demand
+    for ct in n.buses.country.unique():
+        loads = n.loads.index[(n.loads.index.str[:2] == ct) & (n.loads.carrier == "electricity")]
+        factor = 1 - industrial_demand.loc[loads,"current electricity"].sum()/n.loads_t.p_set[loads].sum().sum()
+        n.loads_t.p_set[loads] *= factor
+
     network.madd("Load",
                  nodes,
-                 suffix=" industry new electricity",
+                 suffix=" industry electricity",
                  bus=nodes,
-                 carrier="industry new electricity",
-                 p_set = (industrial_demand.loc[nodes,"electricity"]-industrial_demand.loc[nodes,"current electricity"])/8760.)
+                 carrier="industry electricity",
+                 p_set=industrial_demand.loc[nodes,"electricity"]/8760.)
 
     network.madd("Bus",
                  ["process emissions"],

From 8f6b551efbc793729542c2c50f10240b864dc27c Mon Sep 17 00:00:00 2001
From: Tom Brown <tom@nworbmot.org>
Date: Mon, 12 Oct 2020 14:59:15 +0200
Subject: [PATCH 6/7] Update data bundle to include hotmaps industrial site
 database

---
 doc/installation.rst  | 4 ++--
 doc/release_notes.rst | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/installation.rst b/doc/installation.rst
index 60deb625..c5dc79f3 100644
--- a/doc/installation.rst
+++ b/doc/installation.rst
@@ -69,8 +69,8 @@ To download and extract it on the command line:
 
 .. code:: bash
 
-    projects/pypsa-eur-sec/data % wget "https://nworbmot.org/pypsa-eur-sec-data-bundle-200921.tar.gz"
-    projects/pypsa-eur-sec/data % tar xvzf pypsa-eur-sec-data-bundle-200921.tar.gz
+    projects/pypsa-eur-sec/data % wget "https://nworbmot.org/pypsa-eur-sec-data-bundle-201012.tar.gz"
+    projects/pypsa-eur-sec/data % tar xvzf pypsa-eur-sec-data-bundle-201012.tar.gz
 
 Set up the default configuration
 ================================
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index 09ff30a6..69b0221f 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -102,4 +102,4 @@ To make a new release of the data bundle, make an archive of the files in ``data
 
 .. code:: bash
 
-    data % tar pczf pypsa-eur-sec-data-bundle-date.tar.gz eea switzerland-sfoe biomass eurostat-energy_balances-* jrc-idees-2015 emobility urban_percent.csv timezone_mappings.csv heat_load_profile_DK_AdamJensen.csv WindWaveWEC_GLTB.xlsx myb1-2017-nitro.xls
+    data % tar pczf pypsa-eur-sec-data-bundle-date.tar.gz eea switzerland-sfoe biomass eurostat-energy_balances-* jrc-idees-2015 emobility urban_percent.csv timezone_mappings.csv heat_load_profile_DK_AdamJensen.csv WindWaveWEC_GLTB.xlsx myb1-2017-nitro.xls Industrial_Database.csv

From 9e91d2c1f09901f543006ca5a47ed4dc97065872 Mon Sep 17 00:00:00 2001
From: Tom Brown <tom@nworbmot.org>
Date: Mon, 12 Oct 2020 15:37:47 +0200
Subject: [PATCH 7/7] doc: Document sources for input data

---
 doc/data.csv         | 19 +++++++++++++++++++
 doc/installation.rst | 21 ++++++++++++++-------
 2 files changed, 33 insertions(+), 7 deletions(-)
 create mode 100644 doc/data.csv

diff --git a/doc/data.csv b/doc/data.csv
new file mode 100644
index 00000000..dd66b491
--- /dev/null
+++ b/doc/data.csv
@@ -0,0 +1,19 @@
+description,file/folder,licence,source
+JRC IDEES database,jrc-idees-2015/,CC BY 4.0,https://ec.europa.eu/jrc/en/potencia/jrc-idees
+urban/rural fraction,urban_percent.csv,unknown,unknown
+JRC biomass potentials,biomass/,unknown,https://doi.org/10.2790/39014
+EEA emission statistics,eea/,unknown,https://www.eea.europa.eu/data-and-maps/data/national-emissions-reported-to-the-unfccc-and-to-the-eu-greenhouse-gas-monitoring-mechanism-14
+Eurostat Energy Balances,eurostat-energy_balances-*/,Eurostat,https://ec.europa.eu/eurostat/web/energy/data/energy-balances
+Swiss energy statistics from Swiss Federal Office of Energy,switzerland-sfoe/,unknown,http://www.bfe.admin.ch/themen/00526/00541/00542/02167/index.html?dossier_id=02169
+BASt emobility statistics,emobility/,unknown,http://www.bast.de/DE/Verkehrstechnik/Fachthemen/v2-verkehrszaehlung/Stundenwerte.html?nn=626916
+timezone mappings,timezone_mappings.csv,CC BY 4.0,Tom Brown
+BDEW heating profile,heat_load_profile_BDEW.csv,unknown,https://github.com/oemof/demandlib
+heating profiles for Aarhus,heat_load_profile_DK_AdamJensen.csv,unknown,Adam Jensen MA thesis at Aarhus University
+George Lavidas wind/wave costs,WindWaveWEC_GLTB.xlsx,unknown,George Lavidas
+country codes,Country_codes.csv,CC BY 4.0,Marta Victoria
+co2 budgets,co2_budget.csv,CC BY 4.0,https://arxiv.org/abs/2004.11009
+existing heating potentials,existing_infrastructure/existing_heating_raw.csv,unknown,https://ec.europa.eu/energy/studies/mapping-and-analyses-current-and-future-2020-2030-heatingcooling-fuel-deployment_en?redir=1
+IRENA existing VRE capacities,existing_infrastructure/{solar|onwind|offwind}_capcity_IRENA.csv,unknown,https://www.irena.org/Statistics/Download-Data
+USGS ammonia production,myb1-2017-nitro.xls,unknown,https://www.usgs.gov/centers/nmic/nitrogen-statistics-and-information
+hydrogen salt cavern potentials,hydrogen_salt_cavern_potentials.csv,CC BY 4.0,https://doi.org/10.1016/j.ijhydene.2019.12.161
+hotmaps industrial site database,Industrial_Database.csv,CC BY 4.0,https://gitlab.com/hotmaps/industrial_sites/industrial_sites_Industrial_Database
diff --git a/doc/installation.rst b/doc/installation.rst
index c5dc79f3..997f5221 100644
--- a/doc/installation.rst
+++ b/doc/installation.rst
@@ -58,20 +58,27 @@ atlite version 0.0.2.
 Data requirements
 =================
 
-The data requirements include the JRC-IDEES-2015 database, JRC biomass
-potentials, EEA emission statistics, Eurostat Energy Balances, urban
-district heating potentials, emobility statistics, timezone mappings
-and heating profiles.
+Small data files are included directly in the git repository, while
+larger ones are archived in a data bundle. The data bundle's size is
+around 640 MB.
 
-The data bundle is about 640 MB.
-
-To download and extract it on the command line:
+To download and extract the data bundle on the command line:
 
 .. code:: bash
 
     projects/pypsa-eur-sec/data % wget "https://nworbmot.org/pypsa-eur-sec-data-bundle-201012.tar.gz"
     projects/pypsa-eur-sec/data % tar xvzf pypsa-eur-sec-data-bundle-201012.tar.gz
 
+
+The data licences and sources are given in the following table.
+
+
+.. csv-table::
+   :header-rows: 1
+   :file: data.csv
+
+
+
 Set up the default configuration
 ================================