From 2563d1277a5daff685b50df9f1a7957f8f78f9e7 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Sun, 11 Jul 2021 17:52:32 +0200
Subject: [PATCH] spatially-explicit biomass potentials from ENSPRESO (NUTS2)

---
 Snakefile                           |  19 ++-
 config.default.yaml                 |  34 ++---
 scripts/build_biomass_potentials.py | 193 ++++++++++++++++++++++------
 scripts/prepare_sector_network.py   |  11 +-
 4 files changed, 186 insertions(+), 71 deletions(-)

diff --git a/Snakefile b/Snakefile
index b91785d9..286df421 100644
--- a/Snakefile
+++ b/Snakefile
@@ -1,4 +1,7 @@
 
+from snakemake.remote.HTTP import RemoteProvider as HTTPRemoteProvider
+HTTP = HTTPRemoteProvider()
+
 configfile: "config.yaml"
 
 
@@ -170,13 +173,19 @@ rule build_energy_totals:
 
 rule build_biomass_potentials:
     input:
-        jrc_potentials="data/biomass/JRC Biomass Potentials.xlsx"
+        enspreso_biomass=HTTP.remote("https://cidportal.jrc.ec.europa.eu/ftp/jrc-opendata/ENSPRESO/ENSPRESO_BIOMASS.xlsx", keep_local=True),
+        nuts2="data/nuts/NUTS_RG_10M_2013_4326_LEVL_2.geojson", # https://gisco-services.ec.europa.eu/distribution/v2/nuts/download/#nuts21
+        regions_onshore=pypsaeur("resources/regions_onshore_elec_s{simpl}_{clusters}.geojson"),
+        nuts3_population=pypsaeur("data/bundle/nama_10r_3popgdp.tsv.gz"),
+        swiss_cantons=pypsaeur("data/bundle/ch_cantons.csv"),
+        swiss_population=pypsaeur("data/bundle/je-e-21.03.02.xls"),
+        country_shapes=pypsaeur('resources/country_shapes.geojson')
     output:
-        biomass_potentials_all='resources/biomass_potentials_all.csv',
-        biomass_potentials='resources/biomass_potentials.csv'
+        biomass_potentials_all='resources/biomass_potentials_all_s{simpl}_{clusters}.csv',
+        biomass_potentials='resources/biomass_potentials_s{simpl}_{clusters}.csv'
     threads: 1
     resources: mem_mb=1000
-    benchmark: "benchmarks/build_biomass_potentials"
+    benchmark: "benchmarks/build_biomass_potentials_s{simpl}_{clusters}"
     script: 'scripts/build_biomass_potentials.py'
 
 
@@ -323,7 +332,7 @@ rule prepare_sector_network:
         transport_name='resources/transport_data.csv',
         traffic_data_KFZ = "data/emobility/KFZ__count",
         traffic_data_Pkw = "data/emobility/Pkw__count",
-        biomass_potentials='resources/biomass_potentials.csv',
+        biomass_potentials='resources/biomass_potentials_s{simpl}_{clusters}.csv',
         heat_profile="data/heat_load_profile_BDEW.csv",
         costs=CDIR + "costs_{planning_horizons}.csv",
         profile_offwind_ac=pypsaeur("resources/profile_offwind-ac.nc"),
diff --git a/config.default.yaml b/config.default.yaml
index 457c3660..4161470e 100644
--- a/config.default.yaml
+++ b/config.default.yaml
@@ -99,28 +99,28 @@ energy:
 
 biomass:
   year: 2030
-  scenario: Med
+  scenario: ENS_Med
   classes:
     solid biomass:
-      - Primary agricultural residues
-      - Forestry energy residue
-      - Secondary forestry residues
-      - Secondary Forestry residues sawdust
-      - Forestry residues from landscape care biomass
+      - Argicultural waste
       - Municipal waste
+      - Residues from landscape care
+      - Sawdust
+      - Secondary Forestry residues - woodchips
     not included:
-      - Bioethanol sugar beet biomass
-      - Rapeseeds for biodiesel
-      - sunflower and soya for Biodiesel
-      - Starchy crops biomass
-      - Grassy crops biomass
-      - Willow biomass
-      - Poplar biomass potential
-      - Roundwood fuelwood
-      - Roundwood Chips & Pellets
+      - Bioethanol barley, wheat, grain maize, oats, other cereals and rye
+      - Fuelwood residues
+      - C&P_RW
+      - FuelwoodRW
+      - Rape seed
+      - Sugar from sugar beet
+      - Miscanthus, switchgrass, RCG
+      - "Sunflower, soya seed "
+      - Poplar
+      - Willow
     biogas:
-      - Manure biomass potential
-      - Sludge biomass
+      - Manure solid, liquid
+      - Sludge
 
 
 solar_thermal:
diff --git a/scripts/build_biomass_potentials.py b/scripts/build_biomass_potentials.py
index f02c9093..59eb0051 100644
--- a/scripts/build_biomass_potentials.py
+++ b/scripts/build_biomass_potentials.py
@@ -1,55 +1,148 @@
 import pandas as pd
-
-rename = {"UK" : "GB", "BH" : "BA"}
+import geopandas as gpd
 
 
-def build_biomass_potentials():
+def build_nuts_population_data(year=2013):
 
-    config = snakemake.config['biomass']
-    year = config["year"]
-    scenario = config["scenario"]
+    pop = pd.read_csv(
+        snakemake.input.nuts3_population,
+        sep=r'\,| \t|\t',
+        engine='python',
+        na_values=[":"],
+        index_col=1
+    )[str(year)]
+    
+    # only countries
+    pop.drop("EU28", inplace=True)
 
-    df = pd.read_excel(snakemake.input.jrc_potentials,
-                    "Potentials (PJ)",
-                    index_col=[0,1])
+    # mapping from Cantons to NUTS3
+    cantons = pd.read_csv(snakemake.input.swiss_cantons)
+    cantons = cantons.set_index(cantons.HASC.str[3:]).NUTS
+    cantons = cantons.str.pad(5, side='right', fillchar='0')
 
-    df.rename(columns={"Unnamed: 18": "Municipal waste"}, inplace=True)
-    df.drop(columns="Total", inplace=True)
-    df.replace("-", 0., inplace=True)
+    # get population by NUTS3
+    swiss = pd.read_excel(snakemake.input.swiss_population, skiprows=3, index_col=0).loc["Residents in 1000"]
+    swiss = swiss.rename(cantons).filter(like="CH")
 
-    column = df.iloc[:,0]
-    countries = column.where(column.str.isalpha()).pad()
-    countries = [rename.get(ct, ct) for ct in countries]
-    countries_i = pd.Index(countries, name='country')
-    df.set_index(countries_i, append=True, inplace=True)
+    # aggregate also to higher order NUTS levels
+    swiss = [swiss.groupby(swiss.index.str[:i]).sum() for i in range(2, 6)]
 
-    df.drop(index='MS', level=0, inplace=True)
+    # merge Europe + Switzerland
+    pop = pd.DataFrame(pop.append(swiss), columns=["total"])
+    
+    # add missing manually
+    pop["AL"] = 2893
+    pop["BA"] = 3871
+    pop["RS"] = 7210
+    
+    pop["ct"] = pop.index.str[:2]
+    
+    return pop
 
-    # convert from PJ to MWh
-    df = df / 3.6 * 1e6
 
-    df.to_csv(snakemake.output.biomass_potentials_all)
+def enspreso_biomass_potentials(year=2020, scenario="ENS_Low"):
+    
+    glossary = pd.read_excel(
+        snakemake.input.enspreso_biomass,
+        sheet_name="Glossary",
+        usecols="B:D",
+        skiprows=1,
+        index_col=0
+    )
+    
+    df = pd.read_excel(
+        snakemake.input.enspreso_biomass,
+        sheet_name="ENER - NUTS2 BioCom E",
+        usecols="A:H"
+    )
 
-    # solid biomass includes:
-    # Primary agricultural residues (MINBIOAGRW1),
-    # Forestry energy residue (MINBIOFRSF1),
-    # Secondary forestry residues (MINBIOWOOW1),
-    # Secondary Forestry residues – sawdust (MINBIOWOO1a)',
-    # Forestry residues from landscape care biomass (MINBIOFRSF1a),
-    # Municipal waste (MINBIOMUN1)',
+    df["group"] = df["E-Comm"].map(glossary.group)
+    df["commodity"] = df["E-Comm"].map(glossary.description)
 
-    # biogas includes:
-    # Manure biomass potential (MINBIOGAS1),
-    # Sludge biomass (MINBIOSLU1),
+    to_rename = {
+        "NUTS2 Potential available by Bio Commodity": "potential",
+        "NUST2": "NUTS2",
+    }
+    df.rename(columns=to_rename, inplace=True)
+    
+    # fill up with NUTS0 if NUTS2 is not given
+    df.NUTS2 = df.apply(lambda x: x.NUTS0 if x.NUTS2 == '-' else x.NUTS2, axis=1)
 
-    df = df.loc[year, scenario, :]
+    # convert PJ to TWh
+    df.potential /= 3.6
+    df.Unit = "TWh/a"
 
-    grouper = {v: k for k, vv in config["classes"].items() for v in vv}
-    df = df.groupby(grouper, axis=1).sum()
+    dff = df.query("Year == @year and Scenario == @scenario")
 
-    df.index.name = "MWh/a"
+    bio = dff.groupby(["NUTS2", "commodity"]).potential.sum().unstack()
+    
+    # currently Serbia and Kosovo not split, so aggregate
+    bio.loc["RS"] += bio.loc["XK"]
+    bio.drop("XK", inplace=True)
+    
+    return bio
 
-    df.to_csv(snakemake.output.biomass_potentials)
+
+def disaggregate_nuts0(bio):    
+    
+    pop = build_nuts_population_data()
+    
+    # get population in nuts2
+    pop_nuts2 = pop.loc[pop.index.str.len() == 4]
+    by_country = pop_nuts2.total.groupby(pop_nuts2.ct).sum()
+    pop_nuts2["fraction"] = pop_nuts2.total / pop_nuts2.ct.map(by_country)
+
+    # distribute nuts0 data to nuts2 by population
+    bio_nodal = bio.loc[pop_nuts2.ct]
+    bio_nodal.index = pop_nuts2.index
+    bio_nodal = bio_nodal.mul(pop_nuts2.fraction, axis=0)
+
+    # update inplace
+    bio.update(bio_nodal)
+    
+    return bio
+
+
+def build_nuts2_shapes():
+    """
+    - load NUTS2 geometries
+    - add RS, AL, BA country shapes (not covered in NUTS 2013)
+    - consistently name ME, MK
+    """
+
+    nuts2 = gpd.GeoDataFrame(gpd.read_file(snakemake.input.nuts2).set_index('id').geometry)
+
+    countries = gpd.read_file(snakemake.input.country_shapes).set_index('name')
+    missing = countries.loc[["AL", "RS", "BA"]]
+    nuts2.rename(index={"ME00": "ME", "MK00": "MK"}, inplace=True)
+
+    return nuts2.append(missing)
+
+
+def area(gdf):
+    """Returns area of GeoDataFrame geometries in square kilometers."""
+    return gdf.to_crs(epsg=3035).area.div(1e6)
+
+
+def convert_nuts2_to_regions(bio_nuts2, regions):
+    
+    # calculate area of nuts2 regions
+    bio_nuts2["area_nuts2"] = area(bio_nuts2)
+
+    overlay = gpd.overlay(regions, bio_nuts2)
+
+    # calculate share of nuts2 area inside region
+    overlay["share"] = area(overlay) / overlay["area_nuts2"]
+
+    # multiply all nuts2-level values with share of nuts2 inside region
+    adjust_cols = overlay.columns.difference({"name", "area_nuts2", "geometry", "share"})
+    overlay[adjust_cols] = overlay[adjust_cols].multiply(overlay["share"], axis=0)
+
+    bio_regions = overlay.groupby("name").sum()
+
+    bio_regions.drop(["area_nuts2", "share"], axis=1, inplace=True)
+    
+    return bio_regions
 
 
 if __name__ == "__main__":
@@ -57,12 +150,28 @@ if __name__ == "__main__":
         from helper import mock_snakemake
         snakemake = mock_snakemake('build_biomass_potentials')
 
+    config = snakemake.config['biomass']
+    year = config["year"]
+    scenario = config["scenario"]
 
-    # This is a hack, to be replaced once snakemake is unicode-conform
+    enspreso = enspreso_biomass_potentials(year, scenario)
 
-    solid_biomass = snakemake.config['biomass']['classes']['solid biomass']
-    if 'Secondary Forestry residues sawdust' in solid_biomass:
-        solid_biomass.remove('Secondary Forestry residues sawdust')
-        solid_biomass.append('Secondary Forestry residues – sawdust')
+    enspreso = disaggregate_nuts0(enspreso)
 
-    build_biomass_potentials()
+    nuts2 = build_nuts2_shapes()
+
+    df_nuts2 = gpd.GeoDataFrame(nuts2.geometry).join(enspreso)
+
+    regions = gpd.read_file(snakemake.input.regions_onshore)
+
+    df = convert_nuts2_to_regions(df_nuts2, regions)
+
+    df.to_csv(snakemake.output.biomass_potentials_all)
+
+    grouper = {v: k for k, vv in config["classes"].items() for v in vv}
+    df = df.groupby(grouper, axis=1).sum()
+
+    df *= 1e6 # TWh/a to MWh/a
+    df.index.name = "MWh/a"
+
+    df.to_csv(snakemake.output.biomass_potentials)
diff --git a/scripts/prepare_sector_network.py b/scripts/prepare_sector_network.py
index 82e15e60..ebf2ee71 100644
--- a/scripts/prepare_sector_network.py
+++ b/scripts/prepare_sector_network.py
@@ -1527,9 +1527,6 @@ def add_biomass(n, costs):
 
     print("adding biomass")
 
-    # biomass distributed at country level - i.e. transport within country allowed
-    countries = n.buses.country.dropna().unique()
-
     biomass_potentials = pd.read_csv(snakemake.input.biomass_potentials, index_col=0)
 
     n.add("Carrier", "biogas")
@@ -1552,18 +1549,18 @@ def add_biomass(n, costs):
         "EU biogas",
         bus="EU biogas",
         carrier="biogas",
-        e_nom=biomass_potentials.loc[countries, "biogas"].sum(),
+        e_nom=biomass_potentials["biogas"].sum(),
         marginal_cost=costs.at['biogas', 'fuel'],
-        e_initial=biomass_potentials.loc[countries, "biogas"].sum()
+        e_initial=biomass_potentials["biogas"].sum()
     )
 
     n.add("Store",
         "EU solid biomass",
         bus="EU solid biomass",
         carrier="solid biomass",
-        e_nom=biomass_potentials.loc[countries, "solid biomass"].sum(),
+        e_nom=biomass_potentials["solid biomass"].sum(),
         marginal_cost=costs.at['solid biomass', 'fuel'],
-        e_initial=biomass_potentials.loc[countries, "solid biomass"].sum()
+        e_initial=biomass_potentials["solid biomass"].sum()
     )
 
     n.add("Link",