From 50f02f64e8302f85a05545fbea71384c96b5b666 Mon Sep 17 00:00:00 2001 From: lisazeyen Date: Thu, 15 Apr 2021 14:21:48 +0200 Subject: [PATCH] build_gas_network: first steps to map current gas infrastructure to PyPSA-Eur-Sec --- scripts/build_gas_network.py | 334 +++++++++++++++++++++++++++++++++++ 1 file changed, 334 insertions(+) create mode 100755 scripts/build_gas_network.py diff --git a/scripts/build_gas_network.py b/scripts/build_gas_network.py new file mode 100755 index 00000000..166a8b5d --- /dev/null +++ b/scripts/build_gas_network.py @@ -0,0 +1,334 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Builds natural gas network based on data from the SciGRID Gas project +(https://www.gas.scigrid.de/) and ENTSOG capacity map +(https://www.entsog.eu/sites/default/files/2019-10/Capacities%20for%20Transmission%20Capacity%20Map%20RTS008_NS%20-%20DWH_final.xlsx) + +Relevant Settings +----------------- +.. code:: yaml + sector: + + +Inputs +------ +gas network data from SciGRID gas and ENTSOG: + - IGGINL='data/gas_network/IGGINL_PipeSegments.csv' + - entsog_2019='data/gas_network/entsog_2019_dataset.csv' + - EMAP='data/gas_network/EMAP_Raw_PipeSegments.csv' + +Outputs +------- +combined gas network data for corresponding PyPSA-Eur-Sec network + - gas_network='resources/gas_network_{clusters}.csv' + +Description +----------- + +""" + +import logging +logger = logging.getLogger(__name__) +from _helpers import configure_logging + +import re +import pandas as pd +import numpy as np +import json + +from shapely.geometry import LineString,Point + +from sklearn.linear_model import Lasso +from sklearn.metrics import mean_absolute_error + +# helper functions ------------------------------------------------------------ +def map_entsog_point_to_IGG(entsog_index, IGGINL): + """ + maps ENTSOG point to closest IGG pipe segment which is connecting the same + countries + """ + # missing countries are labelled with "XX" in IGG dataset + countries = entsog.loc[entsog_index,["From", "To"]].to_list()+["XX"] + # do not consider direction of pipe + igg_points = IGGINL[(IGGINL["from"].isin(countries)) & (IGGINL["to"].isin(countries))] + + # get from the IGG pipes connecting the same countries as ENTSOG pipe the closest + closest = (igg_points.mid_Point.apply(lambda x: x.distance(entsog.Point[entsog_index])) + .sort_values().iloc[:1].reset_index() + .rename(columns={"index":"IGG_index", "mid_Point":"distance"})) + closest["entsog_index"] = entsog_index + # rename back to original IGG index + closest["IGG_index"] = closest["IGG_index"].apply(lambda x: x% len(IGGINL)) + + return closest + + +def map_EMAP(series, EMAP_Raw, class_dict={'S': 400, 'M': 700, 'L': 1000}, threshold=0.6,): + """ + maps EMAP pipe diameter classes to closest gas network pipes with uncertain + pipe diameter + if the distance is larger than the threshold distance, original values are + kept + """ + if series.loc["uncertain_diameter_mm"]!=0: + + distance = pd.DataFrame() + distance[0] = EMAP_Raw.Point0.apply(lambda x: x.distance(series.Point0)) + distance[1] = EMAP_Raw.Point1.apply(lambda x: x.distance(series.Point1)) + distance[2] = EMAP_Raw.Point0.apply(lambda x: x.distance(series.mid_Point)) + distance[3] = EMAP_Raw.Point1.apply(lambda x: x.distance(series.mid_Point)) + + average_dist = distance.sum(axis=1).sort_values().iloc[:1] / (len(distance.columns)) + + if all(average_dist < threshold): + series['EMAP_class'] = EMAP_Raw.loc[average_dist.index, 'pipe_class_EMap'].values[0] + series['diameter_mm'] = EMAP_Raw.loc[average_dist.index, 'pipe_class_EMap'].map(class_dict).values[0] + return series + +# main functions -------------------------------------------------------------- +def prepare_datasets(): + """ + this function prepares the following 3 dataset to be used in pypsa-eur-sec: + + (1) Scigrid gas data set IGGINL (https://doi.org/10.5281/zenodo.4288440) + (2) ENTSOG capacity map + (3) SciGRID gas data set EMAP + + """ + + # (1) read and prepocess IGGINL dataset ----------------------------------- + IGGINL = pd.read_csv(snakemake.input.IGGINL, sep=';') + # convert json format to columns + IGGINL = pd.concat([IGGINL, IGGINL.param.apply(eval).apply(pd.Series)], + axis=1) + # uncertainty parameters + uncertainty_parameters = ['max_cap_M_m3_per_d', 'diameter_mm', 'max_pressure_bar'] + # convert json to columns and rename to avoid duplicate column names + uncertainty_df = (IGGINL.uncertainty.apply(eval).apply(pd.Series)[uncertainty_parameters] + .rename(columns=lambda x: "uncertain_" + x)) + IGGINL = pd.concat([IGGINL, uncertainty_df], axis=1) + + # add from to country + IGGINL['from'] = IGGINL.country_code.apply(lambda x: x.split("'")[1]).str.strip() + IGGINL['to'] = IGGINL.country_code.apply(lambda x: x.split("'")[3]).str.strip() + + # get the buses + IGGINL["bus0"] = IGGINL.node_id.apply(lambda x: x.split("'")[1]) + IGGINL["bus1"] = IGGINL.node_id.apply(lambda x: x.split("'")[3]) + + # combine long lat to shapely point, take midpoint of pipe segment + long = IGGINL.long.apply(lambda x: sum(eval(x)) / len(eval(x))) + lat = IGGINL.lat.apply(lambda x: sum(eval(x)) / len(eval(x))) + IGGINL['mid_Point'] = (pd.concat([long, lat], axis=1) + .apply(lambda x: Point(x['long'], x['lat']), axis=1)) + IGGINL['Point0'] = IGGINL.apply(lambda x: Point(eval(x['long'])[0], eval(x['lat'])[0]), axis=1) + IGGINL['Point1'] = IGGINL.apply(lambda x: Point(eval(x['long'])[-1], eval(x['lat'])[-1]), axis=1) + + + # convert capacity from 1e6*m^3/day -> MWh/h + # TODO: units NOT really clear in documentation + # documentation p.18: + # max_cap_M_m3_per_d: The maximum annual gas volume that the pipe can transmit in units of [Mm 3 d −1 ]. + # documentation p.50 + # daily gas flow capacity + + # varibales: + # v: velocity, Q: volumetric_flow, d: pipe diameter, A: cross-sectional area + # v = Q /A = Q / (pi * (d/2)^2) + # to get sensefull gas flow velocities (v=5-10 m/s, sometimes 20m/s) volumetric flow should be annual + velocity = IGGINL.max_cap_M_m3_per_d * 1e6 / 8760 / 60 / 60 / (np.pi * (IGGINL.diameter_mm * 1e-3 * 0.5)**2) + + # specific gas constant methane R_s=518.4 J/(kgK) + R_s = 518.4 + # temperature [Kelvin] (assuming 10°Celsius) + T = 10 + 273.15 + # density [kg/m^3]= pressure [kg/ms^2] / (T * R_s), 1 bar = 1e5 kg/(ms^2) + density = IGGINL.max_pressure_bar * 1e5 / (T * R_s) + # mass flow [kg/ h], Mega = 1e6, + mass_flow = IGGINL.max_cap_M_m3_per_d * 1e6 / 8760 * density + # gross calorific value (GCV in ENTSOT table) [kWh/kg] + gcv_lgas = 38.3 / 3.6 + gcv_hgas = 47.3 / 3.6 + # energy cap [MW] = mass_flow [kg/h] * gcv [kWh/kg] * 1e-3 + energy_cap = mass_flow * 1e-3 + energy_cap.loc[IGGINL.is_H_gas==1] *= gcv_hgas + energy_cap.loc[IGGINL.is_H_gas!=1] *= gcv_lgas + IGGINL['max_capacity'] = energy_cap + + + # (2) read and preprocess ENTSOG data ------------------------------------- + entsog = pd.read_csv(snakemake.input.entsog_2019) + entsog.drop(['From_ID', 'To_ID'], axis=1, inplace=True) + # group parallel pipes and take maximum pipe capacity + entsog_wrapping = entsog.groupby(['long', 'lat', 'From', 'To']).max()['Capacity'].reset_index() + # add shapely object + entsog_wrapping['Point'] = entsog_wrapping.apply(lambda x: Point(x['long'], x['lat']), axis=1) + # convert GWh/day to MW + entsog_wrapping["Capacity"] *= 1e3 + + # (3) read and preprocess EMAP data --------------------------------------- + EMAP_Raw = pd.read_csv(snakemake.input.EMAP, sep=';') + # convert json format to columns + EMAP_Raw = pd.concat([EMAP_Raw, EMAP_Raw.param.apply(eval).apply(pd.Series)], + axis=1) + # fill missing pipe size (["S", "M", "L"]) values with "A" + EMAP_Raw.pipe_class_EMap = EMAP_Raw.pipe_class_EMap.fillna('A') + # add shapely object + EMAP_Raw['Point0'] = EMAP_Raw.apply(lambda x: Point(eval(x['long'])[0], eval(x['lat'])[0]), axis=1) + EMAP_Raw['Point1'] = EMAP_Raw.apply(lambda x: Point(eval(x['long'])[-1], eval(x['lat'])[-1]), axis=1) + + return IGGINL, entsog_wrapping, EMAP_Raw + + +def train_lasso(IGG, alpha=0.001): + """ + trains lasso regression with unapproximated data of IGG with known + pipe diameter, pressure and capacity + + normal lasso method is choosen + """ + # ------------preprocessing---------------- + # find all pipe that have not approximated diameter, pressure and capacity data + all_data_i = (IGG.loc[:,IGG.columns.str.contains("uncertain_")]==0).all(axis=1) + train_data = IGG[all_data_i].reset_index(drop=True) + + # capacity depends on squared diameter -> add diameter^2 to training data + train_data['diameter_squared'] = train_data.diameter_mm ** 2 + + # -------------start training-------------- + logger.info('training lasso') + rg_model_normal = Lasso(alpha=alpha) + rg_model_normal.fit(train_data.diameter_mm.values.reshape(-1, 1), + train_data.max_cap_M_m3_per_d) + train_data['predict_normal'] = rg_model_normal.predict( + train_data.diameter_mm.values.reshape(-1, 1)) + + # calculate mean absolute error (MAE) + MAE = str(round(mean_absolute_error(train_data.max_cap_M_m3_per_d, + train_data.predict_normal), 3)) + + logger.info('sucessful training lasso regression, mean absoulte error (MAE) = {}' \ + .format(MAE)) + + + return rg_model_normal + + +def add_entsog_capacity(IGGINL, entsog, distance_threshold=0.5): + ''' + merges IGGINL and entsog crossborder pipe capacities, currently pipe + directions are not considered + + ''' + gas_network = IGGINL.copy() + + # find for every entsog point closest IGG point + distance = (pd.concat([map_entsog_point_to_IGG(i, IGGINL) for i in entsog.index]) + .groupby("IGG_index").min()) + + # get all points within threshold + distance = distance[distance.distance= 600) & (gas_network_diameter < 900)].mean() + gas_network_mean_diameter_l = gas_network_diameter[gas_network_diameter >= 900].mean() + pipe_diameter_dict = {'S': gas_network_mean_diameter_s, + 'M': gas_network_mean_diameter_m, + 'L': gas_network_mean_diameter_l} + + # filter on EMAP, length>50, only keep S M L + EMAP = EMAP[EMAP.length_km > 50] + EMAP = EMAP[EMAP.pipe_class_EMap.isin(['S', 'M', 'L'])] + EMAP = EMAP.reset_index(drop=True) + + # start matching + gas_network['EMAP_class'] = np.nan + gas_network = gas_network.apply(lambda x: map_EMAP(x, EMAP, + pipe_diameter_dict, + threshold=threshold), + axis=1) + + logger.info('adding {} pipe diameters from EMAP ' + .format(gas_network["EMAP_class"].notna().sum())) + + return gas_network + + +def filling_with_lasso(gas_network, regression_model): + """ + fills uncertain values with own lasso regression model + + if diameter of a pipe is still missing, use diameter data from + diameter_mm of the pipe + """ + + uncertain = gas_network['uncertain_max_cap_M_m3_per_d']!=0 + minimum_value = gas_network[~uncertain]['max_cap_M_m3_per_d'].min() + + gas_network.capacity_nan = gas_network.apply( + lambda x: regression_model.predict(np.array([x['diameter_nan']]).reshape(-1, 1))[0] + if (np.isnan(x['capacity_nan'])) & (not np.isnan(x['diameter_nan'])) else x['capacity_nan'], axis=1) + + #remove extremely small value + gas_network.capacity_nan = gas_network.capacity_nan.apply(lambda x: np.nan if x < minimum_value else x) + logger.info('finish filling with lasso') + return gas_network +#%% +if __name__ == "__main__": + + # for testing + if 'snakemake' not in globals(): + from vresutils.snakemake import MockSnakemake + snakemake = MockSnakemake( + wildcards=dict(network='elec', simpl='', clusters='37', lv='1.0', + opts='', planning_horizons='2020', + sector_opts='168H-T-H-B-I'), + + input=dict(IGGINL='data/gas_network/IGGINL_PipeSegments.csv', + entsog_2019='data/gas_network/entsog_2019_dataset.csv', + EMAP='data/gas_network/EMAP_Raw_PipeSegments.csv' + ), + output=dict(gas_network='resources/gas_network_{clusters}.csv'), + ) + import yaml + with open('config.yaml', encoding='utf8') as f: + snakemake.config = yaml.safe_load(f) + + + # prepare the data sets + IGGINL, entsog, EMAP = prepare_datasets() + + # train lasso regression + regression_model = train_lasso(IGGINL) + + # add crossborder capacities from ENTSOG + gas_network = add_entsog_capacity(IGGINL, entsog) + + # TODO ------------------------------------------------------ + # add pipe diameters from EMAP + gas_network = add_EMAP_diameter(gas_network, EMAP) + + # fill other missing values with lasso regression model + IGGINL = filling_with_lasso(IGGINL, regression_model) + + # IGGINL = node_capacity_spread(IGGINL) + # clean_save(IGGINL, output_path)