diff --git a/Snakefile b/Snakefile index 54b4f67e..26d04b6e 100644 --- a/Snakefile +++ b/Snakefile @@ -71,11 +71,12 @@ rule build_simplified_population_layouts: rule build_gas_network: input: - IGGINL_path='data/gas_network/IGGINL_PipeSegments.csv', - entsog_2019_path='data/gas_network/entsog_2019_dataset.csv', - EMAP_path='data/gas_network/EMAP_Raw_PipeSegments.csv' + gas_network="data/gas_network/gas_network_dataset.csv", + country_shapes=pypsaeur("resources/country_shapes.geojson"), + regions_onshore=pypsaeur("resources/regions_onshore_elec_s{simpl}_{clusters}.geojson"), + regions_offshore=pypsaeur("resources/regions_offshore_elec_s{simpl}_{clusters}.geojson") output: - clustered_pop_layout="resources/gas_network_{clusters}.csv" + clustered_gas_network="resources/gas_network_elec_s{simpl}_{clusters}.csv" resources: mem_mb=10000 script: "scripts/build_gas_network.py" @@ -188,7 +189,7 @@ rule build_industrial_production_per_country: input: ammonia_production="resources/ammonia_production.csv" output: - industrial_production_per_country="resources/industrial_production_per_country.csv" + industrial_production_per_country="resources/industrial_production_per_country.csv" threads: 1 resources: mem_mb=1000 script: 'scripts/build_industrial_production_per_country.py' @@ -312,6 +313,7 @@ rule prepare_sector_network: energy_totals_name='resources/energy_totals.csv', co2_totals_name='resources/co2_totals.csv', transport_name='resources/transport_data.csv', + clustered_gas_network="resources/gas_network_elec_s{simpl}_{clusters}.csv", traffic_data = "data/emobility/", biomass_potentials='resources/biomass_potentials.csv', timezone_mappings='data/timezone_mappings.csv', diff --git a/scripts/build_gas_network.py b/scripts/build_gas_network.py index 166a8b5d..9860f608 100755 --- a/scripts/build_gas_network.py +++ b/scripts/build_gas_network.py @@ -1,9 +1,11 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -Builds natural gas network based on data from the SciGRID Gas project -(https://www.gas.scigrid.de/) and ENTSOG capacity map -(https://www.entsog.eu/sites/default/files/2019-10/Capacities%20for%20Transmission%20Capacity%20Map%20RTS008_NS%20-%20DWH_final.xlsx) +Builds clustered natural gas network based on data from: + [1] the SciGRID Gas project + (https://www.gas.scigrid.de/) + [2] ENTSOG capacity map + (https://www.entsog.eu/sites/default/files/2019-10/Capacities%20for%20Transmission%20Capacity%20Map%20RTS008_NS%20-%20DWH_final.xlsx) Relevant Settings ----------------- @@ -14,321 +16,241 @@ Relevant Settings Inputs ------ gas network data from SciGRID gas and ENTSOG: - - IGGINL='data/gas_network/IGGINL_PipeSegments.csv' - - entsog_2019='data/gas_network/entsog_2019_dataset.csv' - - EMAP='data/gas_network/EMAP_Raw_PipeSegments.csv' + - gas_network="data/gas_network/gas_network_dataset.csv", + combined gas network data set from [1] and [2] + - country_shapes=pypsaeur("resources/country_shapes.geojson"), + - regions_onshore=pypsaeur("resources/regions_onshore_elec_s{simpl}_{clusters}.geojson"), + - regions_offshore=pypsaeur("resources/regions_offshore_elec_s{simpl}_{clusters}.geojson") Outputs ------- -combined gas network data for corresponding PyPSA-Eur-Sec network +clustered gas network data for corresponding PyPSA-Eur-Sec network - gas_network='resources/gas_network_{clusters}.csv' Description ----------- """ +import geoplot +import geoplot.crs as gcrs +import matplotlib.pyplot as plt import logging logger = logging.getLogger(__name__) -from _helpers import configure_logging import re import pandas as pd -import numpy as np import json from shapely.geometry import LineString,Point -from sklearn.linear_model import Lasso -from sklearn.metrics import mean_absolute_error +import geopandas as gpd -# helper functions ------------------------------------------------------------ -def map_entsog_point_to_IGG(entsog_index, IGGINL): +#-----------------########################################################## +# helper functions # +#-----------------# +def concat_gdf(gdf_list, crs = 'EPSG:4326'): + """Convert to gepandas dataframe with given Coordinate Reference System (crs).""" + return gpd.GeoDataFrame(pd.concat(gdf_list),crs=crs) + + +def string2list(string, with_None=True): + """Convert string format to a list.""" + p = re.compile('(? MWh/h - # TODO: units NOT really clear in documentation - # documentation p.18: - # max_cap_M_m3_per_d: The maximum annual gas volume that the pipe can transmit in units of [Mm 3 d −1 ]. - # documentation p.50 - # daily gas flow capacity +def clean_dataset(cross_buses_gas_network): + """Convert units and save only necessary data.""" + cols = ['is_bothDirection', 'Capacity_GWh_h','buses_start', + 'buses_destination', 'id', 'length_km'] + clean_pipes = cross_buses_gas_network[cols].dropna() - # varibales: - # v: velocity, Q: volumetric_flow, d: pipe diameter, A: cross-sectional area - # v = Q /A = Q / (pi * (d/2)^2) - # to get sensefull gas flow velocities (v=5-10 m/s, sometimes 20m/s) volumetric flow should be annual - velocity = IGGINL.max_cap_M_m3_per_d * 1e6 / 8760 / 60 / 60 / (np.pi * (IGGINL.diameter_mm * 1e-3 * 0.5)**2) - - # specific gas constant methane R_s=518.4 J/(kgK) - R_s = 518.4 - # temperature [Kelvin] (assuming 10°Celsius) - T = 10 + 273.15 - # density [kg/m^3]= pressure [kg/ms^2] / (T * R_s), 1 bar = 1e5 kg/(ms^2) - density = IGGINL.max_pressure_bar * 1e5 / (T * R_s) - # mass flow [kg/ h], Mega = 1e6, - mass_flow = IGGINL.max_cap_M_m3_per_d * 1e6 / 8760 * density - # gross calorific value (GCV in ENTSOT table) [kWh/kg] - gcv_lgas = 38.3 / 3.6 - gcv_hgas = 47.3 / 3.6 - # energy cap [MW] = mass_flow [kg/h] * gcv [kWh/kg] * 1e-3 - energy_cap = mass_flow * 1e-3 - energy_cap.loc[IGGINL.is_H_gas==1] *= gcv_hgas - energy_cap.loc[IGGINL.is_H_gas!=1] *= gcv_lgas - IGGINL['max_capacity'] = energy_cap + # convert GW -> MW + clean_pipes.loc[:, 'Capacity_GWh_h'] *= 1e3 + # rename columns + clean_pipes.rename(columns={'Capacity_GWh_h': 'pipe_capacity_MW', + 'buses_start': 'bus0', + 'buses_destination': 'bus1'}, inplace=True) + return clean_pipes - # (2) read and preprocess ENTSOG data ------------------------------------- - entsog = pd.read_csv(snakemake.input.entsog_2019) - entsog.drop(['From_ID', 'To_ID'], axis=1, inplace=True) - # group parallel pipes and take maximum pipe capacity - entsog_wrapping = entsog.groupby(['long', 'lat', 'From', 'To']).max()['Capacity'].reset_index() - # add shapely object - entsog_wrapping['Point'] = entsog_wrapping.apply(lambda x: Point(x['long'], x['lat']), axis=1) - # convert GWh/day to MW - entsog_wrapping["Capacity"] *= 1e3 +# ----------- VISULAISATION -------------------------------------------------- +def create_view_object(cbgn_no_duplicate,buses_region): + """Create object to view gas network data.""" + cbgn_no_duplicate=cbgn_no_duplicate.merge(buses_region,left_on='buses_start',right_on='name') + cbgn_no_duplicate=cbgn_no_duplicate.merge(buses_region,left_on='buses_destination',right_on='name') - # (3) read and preprocess EMAP data --------------------------------------- - EMAP_Raw = pd.read_csv(snakemake.input.EMAP, sep=';') - # convert json format to columns - EMAP_Raw = pd.concat([EMAP_Raw, EMAP_Raw.param.apply(eval).apply(pd.Series)], - axis=1) - # fill missing pipe size (["S", "M", "L"]) values with "A" - EMAP_Raw.pipe_class_EMap = EMAP_Raw.pipe_class_EMap.fillna('A') - # add shapely object - EMAP_Raw['Point0'] = EMAP_Raw.apply(lambda x: Point(eval(x['long'])[0], eval(x['lat'])[0]), axis=1) - EMAP_Raw['Point1'] = EMAP_Raw.apply(lambda x: Point(eval(x['long'])[-1], eval(x['lat'])[-1]), axis=1) + cbgn_no_duplicate.geometry_x=cbgn_no_duplicate.geometry_x.apply(lambda x: x.centroid) + cbgn_no_duplicate.geometry_y=cbgn_no_duplicate.geometry_y.apply(lambda x: x.centroid) + cbgn_no_duplicate['geometry']=list(zip(cbgn_no_duplicate['geometry_x'], + cbgn_no_duplicate['geometry_y'])) - return IGGINL, entsog_wrapping, EMAP_Raw + final = cbgn_no_duplicate[['buses_start', 'buses_destination', + 'Capacity_GWh_h', 'geometry']] + final['geometry'] = final['geometry'].apply(LineString) + final=gpd.GeoDataFrame(final,crs='EPSG:4326') + + return final -def train_lasso(IGG, alpha=0.001): - """ - trains lasso regression with unapproximated data of IGG with known - pipe diameter, pressure and capacity +def view(cbgn_no_duplicate, buses_region, shapes_path): + """Plot gas network.""" + final = create_view_object(cbgn_no_duplicate,buses_region) - normal lasso method is choosen - """ - # ------------preprocessing---------------- - # find all pipe that have not approximated diameter, pressure and capacity data - all_data_i = (IGG.loc[:,IGG.columns.str.contains("uncertain_")]==0).all(axis=1) - train_data = IGG[all_data_i].reset_index(drop=True) + eu=gpd.read_file(shapes_path) + ax = geoplot.webmap(eu, projection=gcrs.WebMercator(), figsize=(20,20), + alpha=0.5) + geoplot.choropleth(buses_region, hue='name',ax=ax, alpha=0.2, + edgecolor='red', linewidth=2) + geoplot.sankey( final, scale='Capacity_GWh_h', hue='Capacity_GWh_h', + cmap='viridis', ax=ax, legend=True, legend_var='hue') + plt.savefig("../graphics/clustered-gas-network_{}.pdf".format(snakemake.wildcards.clusters), + bbox_inches='tight', pad_inches=0.1) - # capacity depends on squared diameter -> add diameter^2 to training data - train_data['diameter_squared'] = train_data.diameter_mm ** 2 - - # -------------start training-------------- - logger.info('training lasso') - rg_model_normal = Lasso(alpha=alpha) - rg_model_normal.fit(train_data.diameter_mm.values.reshape(-1, 1), - train_data.max_cap_M_m3_per_d) - train_data['predict_normal'] = rg_model_normal.predict( - train_data.diameter_mm.values.reshape(-1, 1)) - - # calculate mean absolute error (MAE) - MAE = str(round(mean_absolute_error(train_data.max_cap_M_m3_per_d, - train_data.predict_normal), 3)) - - logger.info('sucessful training lasso regression, mean absoulte error (MAE) = {}' \ - .format(MAE)) - - - return rg_model_normal - - -def add_entsog_capacity(IGGINL, entsog, distance_threshold=0.5): - ''' - merges IGGINL and entsog crossborder pipe capacities, currently pipe - directions are not considered - - ''' - gas_network = IGGINL.copy() - - # find for every entsog point closest IGG point - distance = (pd.concat([map_entsog_point_to_IGG(i, IGGINL) for i in entsog.index]) - .groupby("IGG_index").min()) - - # get all points within threshold - distance = distance[distance.distance= 600) & (gas_network_diameter < 900)].mean() - gas_network_mean_diameter_l = gas_network_diameter[gas_network_diameter >= 900].mean() - pipe_diameter_dict = {'S': gas_network_mean_diameter_s, - 'M': gas_network_mean_diameter_m, - 'L': gas_network_mean_diameter_l} - - # filter on EMAP, length>50, only keep S M L - EMAP = EMAP[EMAP.length_km > 50] - EMAP = EMAP[EMAP.pipe_class_EMap.isin(['S', 'M', 'L'])] - EMAP = EMAP.reset_index(drop=True) - - # start matching - gas_network['EMAP_class'] = np.nan - gas_network = gas_network.apply(lambda x: map_EMAP(x, EMAP, - pipe_diameter_dict, - threshold=threshold), - axis=1) - - logger.info('adding {} pipe diameters from EMAP ' - .format(gas_network["EMAP_class"].notna().sum())) - - return gas_network - - -def filling_with_lasso(gas_network, regression_model): - """ - fills uncertain values with own lasso regression model - - if diameter of a pipe is still missing, use diameter data from - diameter_mm of the pipe - """ - - uncertain = gas_network['uncertain_max_cap_M_m3_per_d']!=0 - minimum_value = gas_network[~uncertain]['max_cap_M_m3_per_d'].min() - - gas_network.capacity_nan = gas_network.apply( - lambda x: regression_model.predict(np.array([x['diameter_nan']]).reshape(-1, 1))[0] - if (np.isnan(x['capacity_nan'])) & (not np.isnan(x['diameter_nan'])) else x['capacity_nan'], axis=1) - - #remove extremely small value - gas_network.capacity_nan = gas_network.capacity_nan.apply(lambda x: np.nan if x < minimum_value else x) - logger.info('finish filling with lasso') - return gas_network #%% if __name__ == "__main__": # for testing if 'snakemake' not in globals(): - from vresutils.snakemake import MockSnakemake - snakemake = MockSnakemake( - wildcards=dict(network='elec', simpl='', clusters='37', lv='1.0', - opts='', planning_horizons='2020', - sector_opts='168H-T-H-B-I'), + from helper import mock_snakemake + snakemake = mock_snakemake('build_gas_network', + network='elec', simpl='', clusters='37', + lv='1.0', opts='', planning_horizons='2020', + sector_opts='168H-T-H-B-I') - input=dict(IGGINL='data/gas_network/IGGINL_PipeSegments.csv', - entsog_2019='data/gas_network/entsog_2019_dataset.csv', - EMAP='data/gas_network/EMAP_Raw_PipeSegments.csv' - ), - output=dict(gas_network='resources/gas_network_{clusters}.csv'), - ) - import yaml - with open('config.yaml', encoding='utf8') as f: - snakemake.config = yaml.safe_load(f) + logging.basicConfig(level=snakemake.config['logging_level']) + # import gas network data + gas_network, points = preprocessing(snakemake.input.gas_network) - # prepare the data sets - IGGINL, entsog, EMAP = prepare_datasets() + # get clustered bus regions + buses_region = load_region(snakemake.input.regions_onshore, + snakemake.input.regions_offshore) + nodes = pd.Index(buses_region.name.unique()) - # train lasso regression - regression_model = train_lasso(IGGINL) + # map gas network points to network buses + points2buses_map = create_points2buses_map(points, buses_region) + # create gas network between pypsa nodes + cross_buses_gas_network = create_cross_regions_network(gas_network, + points2buses_map) - # add crossborder capacities from ENTSOG - gas_network = add_entsog_capacity(IGGINL, entsog) + # view(cross_buses_gas_network, buses_region, snakemake.input.country_shapes) - # TODO ------------------------------------------------------ - # add pipe diameters from EMAP - gas_network = add_EMAP_diameter(gas_network, EMAP) + # check which buses are not connected in gas network + check_missing(nodes, cross_buses_gas_network) - # fill other missing values with lasso regression model - IGGINL = filling_with_lasso(IGGINL, regression_model) - - # IGGINL = node_capacity_spread(IGGINL) - # clean_save(IGGINL, output_path) + # convert units and save only needed data + gas_pipes = clean_dataset(cross_buses_gas_network) + gas_pipes.to_csv(snakemake.output.clustered_gas_network)