From 4cd964b3bdd84a7b2a60ebbfbcf24c5c7ceaf0bc Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Sat, 13 Nov 2021 16:48:08 +0100 Subject: [PATCH] gas_network: use IGGIELGN scigrid dataset --- Snakefile | 8 +- scripts/build_gas_network.py | 149 ++++++++++++++---------------- scripts/cluster_gas_network.py | 4 +- scripts/prepare_sector_network.py | 4 +- 4 files changed, 78 insertions(+), 87 deletions(-) diff --git a/Snakefile b/Snakefile index 68b281c4..ce5be51d 100644 --- a/Snakefile +++ b/Snakefile @@ -85,20 +85,24 @@ if config["sector"]["gas_network"]: "IGGIELGN_LNGs.geojson", "IGGIELGN_BorderPoints.geojson", "IGGIELGN_Productions.geojson", + "IGGIELGN_PipeSegments.geojson", ] + rule retrieve_gas_infrastructure_data: output: expand("data/gas_network/scigrid-gas/data/{files}", files=datafiles) script: 'scripts/retrieve_gas_infrastructure_data.py' + rule build_gas_network: input: - gas_network="data/gas_network/gas_network_dataset.csv" + gas_network="data/gas_network/scigrid-gas/data/IGGIELGN_PipeSegments.geojson" output: cleaned_gas_network="resources/gas_network.csv" resources: mem_mb=4000 script: "scripts/build_gas_network.py" + rule build_gas_input_locations: input: lng="data/gas_network/scigrid-gas/data/IGGIELGN_LNGs.geojson", @@ -112,6 +116,7 @@ if config["sector"]["gas_network"]: resources: mem_mb=2000, script: "scripts/build_gas_input_locations.py" + rule cluster_gas_network: input: cleaned_gas_network="resources/gas_network.csv", @@ -122,6 +127,7 @@ if config["sector"]["gas_network"]: resources: mem_mb=4000 script: "scripts/cluster_gas_network.py" + gas_infrastructure = {**rules.cluster_gas_network.output, **rules.build_gas_input_locations.output} else: gas_infrastructure = {} diff --git a/scripts/build_gas_network.py b/scripts/build_gas_network.py index cf4a86a1..3d4bcb2a 100644 --- a/scripts/build_gas_network.py +++ b/scripts/build_gas_network.py @@ -1,38 +1,15 @@ -""" -Preprocess gas network based on data from: - - [1] the SciGRID Gas project - (https://www.gas.scigrid.de/) - - [2] ENTSOG capacity map - (https://www.entsog.eu/sites/default/files/2019-10/Capacities%20for%20Transmission%20Capacity%20Map%20RTS008_NS%20-%20DWH_final.xlsx) -""" +"""Preprocess gas network based on data from bthe SciGRID Gas project (https://www.gas.scigrid.de/).""" import logging logger = logging.getLogger(__name__) -import re -import json - import pandas as pd +import geopandas as gpd from shapely.geometry import Point from pypsa.geo import haversine_pts -def string2list(string, with_none=True): - """Convert string format to a list.""" - - if with_none: - p2 = re.compile('None') - string = p2.sub('\"None\"', string) - else: - p = re.compile('(? 1.5 GW CH4 pipe capacity (LHV) @@ -65,75 +42,81 @@ def diameter2capacity(pipe_diameter_mm): return a3 + m3 * pipe_diameter_mm -def find_terminal_points(df): - - latlon = [] - - for attr in ["lat", "long"]: - - s = df[attr].apply(string2list) - - s = s.apply(lambda x: [x[0], x[-1]]) - - latlon.append(pd.DataFrame(s.to_list(), - columns=[f"{attr}0", f"{attr}1"] - )) - - latlon = pd.concat(latlon, axis=1) - - points = latlon.apply( - lambda x: { - "point0": Point(x.long0, x.lat0), - "point1": Point(x.long1, x.lat1) - }, - axis=1, - result_type='expand' - ) - - return pd.concat([df, points], axis=1) - - -def process_gas_network_data(fn): - - df = pd.read_csv(fn, sep=',') - - df = find_terminal_points(df) - - to_drop = ["name", "source_id", "country_code", "node_id", - "long", "lat", "lat_mean", "long_mean", "num_compressor"] +def load_dataset(fn): + df = gpd.read_file(fn) + param = df.param.apply(pd.Series) + method = df.method.apply(pd.Series)[["diameter_mm", "max_cap_M_m3_per_d"]] + method.columns = method.columns + "_method" + df = pd.concat([df, param, method], axis=1) + to_drop = ["param", "uncertainty", "method", "tags"] + to_drop = df.columns.intersection(to_drop) df.drop(to_drop, axis=1, inplace=True) + return df + +def prepare_dataset( + df, + length_factor=1.5, + correction_threshold_length=4, + correction_threshold_p_nom=8, + bidirectional_below=10 +): + + # extract start and end from LineString + df["point0"] = df.geometry.apply(lambda x: Point(x.coords[0])) + df["point1"] = df.geometry.apply(lambda x: Point(x.coords[-1])) + + conversion_factor = 437.5 # MCM/day to MWh/h + df["p_nom"] = df.max_cap_M_m3_per_d * conversion_factor + + # for inferred diameters, assume 500 mm rather than 900 mm (more conservative) + df.loc[df.diameter_mm_method != 'raw', "diameter_mm"] = 500. + + keep = ["name", "diameter_mm", "is_H_gas", "is_bothDirection", + "length_km", "p_nom", "max_pressure_bar", + "start_year", "point0", "point1", "geometry"] to_rename = { "is_bothDirection": "bidirectional", + "is_H_gas": "H_gas", "start_year": "build_year", "length_km": "length", - "Capacity_GWh_h": "p_nom_data", - "id": "tags", } - df.rename(columns=to_rename, inplace=True) - + df = df[keep].rename(columns=to_rename) + df.bidirectional = df.bidirectional.astype(bool) + df.H_gas = df.H_gas.astype(bool) - # convert from GWh/h to MW - df.p_nom_data *= 1e3 + # short lines below 10 km are assumed to be bidirectional + short_lines = df["length"] < bidirectional_below + df.loc[short_lines, "bidirectional"] = True - # for pipes with missing diameter, assume 500 mm - df.loc[df.diameter_mm.isna(), "diameter_mm"] = 500. - - # for nord stream and small pipelines take original capacity data - # otherwise inferred values from pipe diameter - df["p_nom"] = df.diameter_mm.map(diameter2capacity) - df.p_nom.update( - df.p_nom_data.where((df.diameter_mm < 500) | (df.max_pressure_bar == 220)) - ) + # correct all capacities that deviate correction_threshold factor + # to diameter-based capacities, unless they are NordStream pipelines + # also all capacities below 0.5 GW are now diameter-based capacities + df["p_nom_diameter"] = df.diameter_mm.apply(diameter_to_capacity) + ratio = df.p_nom / df.p_nom_diameter + not_nordstream = df.max_pressure_bar < 220 + df.p_nom.update(df.p_nom_diameter.where( + (df.p_nom <= 500) | + ((ratio > correction_threshold_p_nom) & not_nordstream) | + ((ratio < 1 / correction_threshold_p_nom) & not_nordstream) + )) + # lines which have way too discrepant line lengths + # get assigned haversine length * length factor df["length_haversine"] = df.apply( - lambda p: 1.5 * haversine_pts([p.point0.x, p.point1.y], [p.point1.x, p.point1.y]), - axis=1 + lambda p: length_factor * haversine_pts( + [p.point0.x, p.point1.y], + [p.point1.x, p.point1.y] + ), axis=1 ) + ratio = df.eval("length / length_haversine") + df["length"].update(df.length_haversine.where( + (df["length"] < 20) | + (ratio > correction_threshold_length) | + (ratio < 1 / correction_threshold_length) + )) - df.length.update(df.length_haversine.where(df.length.isna())) - return df @@ -145,6 +128,8 @@ if __name__ == "__main__": logging.basicConfig(level=snakemake.config['logging_level']) - gas_network = process_gas_network_data(snakemake.input.gas_network) + gas_network = load_dataset(snakemake.input.gas_network) + + gas_network = prepare_dataset(gas_network) gas_network.to_csv(snakemake.output.cleaned_gas_network) \ No newline at end of file diff --git a/scripts/cluster_gas_network.py b/scripts/cluster_gas_network.py index 9f192a92..f6d15af0 100755 --- a/scripts/cluster_gas_network.py +++ b/scripts/cluster_gas_network.py @@ -70,12 +70,12 @@ def aggregate_parallel_pipes(df): 'bus0': 'first', 'bus1': 'first', "p_nom": 'sum', - "p_nom_data": 'sum', + "p_nom_diameter": 'sum', "max_pressure_bar": "mean", "build_year": "mean", "diameter_mm": "mean", "length": 'mean', - 'tags': ' '.join, + 'name': ' '.join, "p_min_pu": 'min', } return df.groupby(df.index).agg(strategies) diff --git a/scripts/prepare_sector_network.py b/scripts/prepare_sector_network.py index 35728a8b..2f10443d 100644 --- a/scripts/prepare_sector_network.py +++ b/scripts/prepare_sector_network.py @@ -1115,7 +1115,7 @@ def add_storage_and_grids(n, costs): p_nom_min=gas_pipes.p_nom_min, length=gas_pipes.length, capital_cost=gas_pipes.capital_cost, - tags=gas_pipes.tags, + tags=gas_pipes.name, carrier="gas pipeline", lifetime=costs.at['CH4 (g) pipeline', 'lifetime'] ) @@ -1190,7 +1190,7 @@ def add_storage_and_grids(n, costs): p_nom_extendable=True, length=h2_pipes.length, capital_cost=costs.at['H2 (g) pipeline repurposed', 'fixed'] * h2_pipes.length, - tags=h2_pipes.tags, + tags=h2_pipes.name, carrier="H2 pipeline retrofitted", lifetime=costs.at['H2 (g) pipeline repurposed', 'lifetime'] )