comment functions, remove not used input

This commit is contained in:
lisazeyen 2021-06-21 12:34:08 +02:00
parent e4dad6429d
commit 17a1153648
2 changed files with 203 additions and 279 deletions

View File

@ -71,11 +71,12 @@ rule build_simplified_population_layouts:
rule build_gas_network: rule build_gas_network:
input: input:
IGGINL_path='data/gas_network/IGGINL_PipeSegments.csv', gas_network="data/gas_network/gas_network_dataset.csv",
entsog_2019_path='data/gas_network/entsog_2019_dataset.csv', country_shapes=pypsaeur("resources/country_shapes.geojson"),
EMAP_path='data/gas_network/EMAP_Raw_PipeSegments.csv' regions_onshore=pypsaeur("resources/regions_onshore_elec_s{simpl}_{clusters}.geojson"),
regions_offshore=pypsaeur("resources/regions_offshore_elec_s{simpl}_{clusters}.geojson")
output: output:
clustered_pop_layout="resources/gas_network_{clusters}.csv" clustered_gas_network="resources/gas_network_elec_s{simpl}_{clusters}.csv"
resources: mem_mb=10000 resources: mem_mb=10000
script: "scripts/build_gas_network.py" script: "scripts/build_gas_network.py"
@ -188,7 +189,7 @@ rule build_industrial_production_per_country:
input: input:
ammonia_production="resources/ammonia_production.csv" ammonia_production="resources/ammonia_production.csv"
output: output:
industrial_production_per_country="resources/industrial_production_per_country.csv" industrial_production_per_country="resources/industrial_production_per_country.csv"
threads: 1 threads: 1
resources: mem_mb=1000 resources: mem_mb=1000
script: 'scripts/build_industrial_production_per_country.py' script: 'scripts/build_industrial_production_per_country.py'
@ -312,6 +313,7 @@ rule prepare_sector_network:
energy_totals_name='resources/energy_totals.csv', energy_totals_name='resources/energy_totals.csv',
co2_totals_name='resources/co2_totals.csv', co2_totals_name='resources/co2_totals.csv',
transport_name='resources/transport_data.csv', transport_name='resources/transport_data.csv',
clustered_gas_network="resources/gas_network_elec_s{simpl}_{clusters}.csv",
traffic_data = "data/emobility/", traffic_data = "data/emobility/",
biomass_potentials='resources/biomass_potentials.csv', biomass_potentials='resources/biomass_potentials.csv',
timezone_mappings='data/timezone_mappings.csv', timezone_mappings='data/timezone_mappings.csv',

View File

@ -1,9 +1,11 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Builds natural gas network based on data from the SciGRID Gas project Builds clustered natural gas network based on data from:
(https://www.gas.scigrid.de/) and ENTSOG capacity map [1] the SciGRID Gas project
(https://www.entsog.eu/sites/default/files/2019-10/Capacities%20for%20Transmission%20Capacity%20Map%20RTS008_NS%20-%20DWH_final.xlsx) (https://www.gas.scigrid.de/)
[2] ENTSOG capacity map
(https://www.entsog.eu/sites/default/files/2019-10/Capacities%20for%20Transmission%20Capacity%20Map%20RTS008_NS%20-%20DWH_final.xlsx)
Relevant Settings Relevant Settings
----------------- -----------------
@ -14,321 +16,241 @@ Relevant Settings
Inputs Inputs
------ ------
gas network data from SciGRID gas and ENTSOG: gas network data from SciGRID gas and ENTSOG:
- IGGINL='data/gas_network/IGGINL_PipeSegments.csv' - gas_network="data/gas_network/gas_network_dataset.csv",
- entsog_2019='data/gas_network/entsog_2019_dataset.csv' combined gas network data set from [1] and [2]
- EMAP='data/gas_network/EMAP_Raw_PipeSegments.csv' - country_shapes=pypsaeur("resources/country_shapes.geojson"),
- regions_onshore=pypsaeur("resources/regions_onshore_elec_s{simpl}_{clusters}.geojson"),
- regions_offshore=pypsaeur("resources/regions_offshore_elec_s{simpl}_{clusters}.geojson")
Outputs Outputs
------- -------
combined gas network data for corresponding PyPSA-Eur-Sec network clustered gas network data for corresponding PyPSA-Eur-Sec network
- gas_network='resources/gas_network_{clusters}.csv' - gas_network='resources/gas_network_{clusters}.csv'
Description Description
----------- -----------
""" """
import geoplot
import geoplot.crs as gcrs
import matplotlib.pyplot as plt
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
from _helpers import configure_logging
import re import re
import pandas as pd import pandas as pd
import numpy as np
import json import json
from shapely.geometry import LineString,Point from shapely.geometry import LineString,Point
from sklearn.linear_model import Lasso import geopandas as gpd
from sklearn.metrics import mean_absolute_error
# helper functions ------------------------------------------------------------ #-----------------##########################################################
def map_entsog_point_to_IGG(entsog_index, IGGINL): # helper functions #
#-----------------#
def concat_gdf(gdf_list, crs = 'EPSG:4326'):
"""Convert to gepandas dataframe with given Coordinate Reference System (crs)."""
return gpd.GeoDataFrame(pd.concat(gdf_list),crs=crs)
def string2list(string, with_None=True):
"""Convert string format to a list."""
p = re.compile('(?<!\\\\)\'')
string = p.sub('\"', string)
if with_None:
p2 = re.compile('None')
string = p2.sub('\"None\"', string)
return json.loads(string)
#-----------------############################################################
# main functions #
#-----------------#
def preprocessing(df_path):
"""Load and format gas network data."""
df = pd.read_csv(df_path, sep=',')
df.long = df.long.apply(string2list)
df.lat = df.lat.apply(string2list)
df.node_id = df.node_id.apply(string2list)
# pipes which can be used in both directions
both_direct_df = df[df.is_bothDirection == 1].reset_index(drop=True)
both_direct_df.node_id = both_direct_df.node_id.apply(lambda x: [x[1], x[0]])
both_direct_df.long = both_direct_df.long.apply(lambda x: [x[1], x[0]])
both_direct_df.lat = both_direct_df.lat.apply(lambda x: [x[1], x[0]])
df_singledirect = pd.concat([df, both_direct_df]).reset_index(drop=True)
df_singledirect.drop('is_bothDirection', axis=1)
# create shapely geometry points
df['point1'] = df.apply(lambda x: Point((x['long'][0], x['lat'][0])), axis=1)
df['point2'] = df.apply(lambda x: Point((x['long'][1], x['lat'][1])), axis=1)
df['point1_name'] = df.node_id.str[0]
df['point2_name'] = df.node_id.str[1]
part1 = df[['point1', 'point1_name']]
part2 = df[['point2', 'point2_name']]
part1.columns = ['geometry', 'name']
part2.columns = ['geometry', 'name']
points = [part1, part2]
points = concat_gdf(points)
points = points.drop_duplicates()
points.reset_index(drop=True, inplace=True)
return df, points
def load_region(onshore_path, offshore_path):
"""Load pypsa-eur on- and offshore regions and concat."""
buses_region_offshore = gpd.read_file(offshore_path)
buses_region_onshore = gpd.read_file(onshore_path)
buses_region = concat_gdf([buses_region_offshore, buses_region_onshore])
buses_region = buses_region.dissolve(by='name', aggfunc='sum')
buses_region = buses_region.reset_index()
return buses_region
def create_points2buses_map(input_points, buses_region):
"""Map gas network points to network buses depending on bus region."""
points = input_points.copy()
points['bus'] = None
buses_list = set(buses_region.name)
for bus in buses_list:
mask = buses_region[buses_region.name == bus]
index = gpd.clip(points, mask).index
points.loc[index, 'bus'] = bus
return points
def create_cross_regions_network(df, points2buses_map):
"""Create gas network between pypsa buses.
Input:
df : gas network data (pd.DataFrame)
points2buses_map : map gas network points to pypsa buses (pd.DataFrame)
Return:
cross_buses_gas_network : gas network connecting pypsa buses
(pd.DataFrame)
""" """
maps ENTSOG point to closest IGG pipe segment which is connecting the same tmp_df = points2buses_map[['bus', 'name']]
countries tmp_df.columns = ['buses_start','name']
""" cross_buses_gas_network = df.merge(tmp_df, left_on='point1_name',
# missing countries are labelled with "XX" in IGG dataset right_on='name')
countries = entsog.loc[entsog_index,["From", "To"]].to_list()+["XX"] tmp_df.columns = ['buses_destination', 'name']
# do not consider direction of pipe cross_buses_gas_network = cross_buses_gas_network.merge(tmp_df,
igg_points = IGGINL[(IGGINL["from"].isin(countries)) & (IGGINL["to"].isin(countries))] left_on='point2_name',
right_on='name')
# drop all pipes connecting the same bus
cross_buses_gas_network = cross_buses_gas_network[cross_buses_gas_network.buses_start \
!= cross_buses_gas_network.buses_destination]
cross_buses_gas_network.reset_index(drop=True, inplace=True)
cross_buses_gas_network.drop(['point1','point2'], axis=1, inplace=True)
# get from the IGG pipes connecting the same countries as ENTSOG pipe the closest return cross_buses_gas_network
closest = (igg_points.mid_Point.apply(lambda x: x.distance(entsog.Point[entsog_index]))
.sort_values().iloc[:1].reset_index()
.rename(columns={"index":"IGG_index", "mid_Point":"distance"}))
closest["entsog_index"] = entsog_index
# rename back to original IGG index
closest["IGG_index"] = closest["IGG_index"].apply(lambda x: x% len(IGGINL))
return closest
def map_EMAP(series, EMAP_Raw, class_dict={'S': 400, 'M': 700, 'L': 1000}, threshold=0.6,): def check_missing(nodes, cross_buses_gas_network):
""" """Check which nodes are not connected to the gas network."""
maps EMAP pipe diameter classes to closest gas network pipes with uncertain missing0 = nodes[[bus not in cross_buses_gas_network.buses_start.dropna().unique()
pipe diameter for bus in nodes]]
if the distance is larger than the threshold distance, original values are missing1 = nodes[[bus not in cross_buses_gas_network.buses_destination.dropna().unique()
kept for bus in nodes]]
""" logger.info("\n - The following buses are missing in gas network data as a start bus: \n {} \n"
if series.loc["uncertain_diameter_mm"]!=0: "- The following buses are missing in gas network data as an end bus: \n {} \n "
"- The following buses are missing completely: \n {}"
distance = pd.DataFrame() .format(', '.join(map(str, missing0)),
distance[0] = EMAP_Raw.Point0.apply(lambda x: x.distance(series.Point0)) ', '.join(map(str, missing1)),
distance[1] = EMAP_Raw.Point1.apply(lambda x: x.distance(series.Point1)) ', '.join(map(str, missing0.intersection(missing1)))))
distance[2] = EMAP_Raw.Point0.apply(lambda x: x.distance(series.mid_Point))
distance[3] = EMAP_Raw.Point1.apply(lambda x: x.distance(series.mid_Point))
average_dist = distance.sum(axis=1).sort_values().iloc[:1] / (len(distance.columns))
if all(average_dist < threshold):
series['EMAP_class'] = EMAP_Raw.loc[average_dist.index, 'pipe_class_EMap'].values[0]
series['diameter_mm'] = EMAP_Raw.loc[average_dist.index, 'pipe_class_EMap'].map(class_dict).values[0]
return series
# main functions --------------------------------------------------------------
def prepare_datasets():
"""
this function prepares the following 3 dataset to be used in pypsa-eur-sec:
(1) Scigrid gas data set IGGINL (https://doi.org/10.5281/zenodo.4288440)
(2) ENTSOG capacity map
(3) SciGRID gas data set EMAP
"""
# (1) read and prepocess IGGINL dataset -----------------------------------
IGGINL = pd.read_csv(snakemake.input.IGGINL, sep=';')
# convert json format to columns
IGGINL = pd.concat([IGGINL, IGGINL.param.apply(eval).apply(pd.Series)],
axis=1)
# uncertainty parameters
uncertainty_parameters = ['max_cap_M_m3_per_d', 'diameter_mm', 'max_pressure_bar']
# convert json to columns and rename to avoid duplicate column names
uncertainty_df = (IGGINL.uncertainty.apply(eval).apply(pd.Series)[uncertainty_parameters]
.rename(columns=lambda x: "uncertain_" + x))
IGGINL = pd.concat([IGGINL, uncertainty_df], axis=1)
# add from to country
IGGINL['from'] = IGGINL.country_code.apply(lambda x: x.split("'")[1]).str.strip()
IGGINL['to'] = IGGINL.country_code.apply(lambda x: x.split("'")[3]).str.strip()
# get the buses
IGGINL["bus0"] = IGGINL.node_id.apply(lambda x: x.split("'")[1])
IGGINL["bus1"] = IGGINL.node_id.apply(lambda x: x.split("'")[3])
# combine long lat to shapely point, take midpoint of pipe segment
long = IGGINL.long.apply(lambda x: sum(eval(x)) / len(eval(x)))
lat = IGGINL.lat.apply(lambda x: sum(eval(x)) / len(eval(x)))
IGGINL['mid_Point'] = (pd.concat([long, lat], axis=1)
.apply(lambda x: Point(x['long'], x['lat']), axis=1))
IGGINL['Point0'] = IGGINL.apply(lambda x: Point(eval(x['long'])[0], eval(x['lat'])[0]), axis=1)
IGGINL['Point1'] = IGGINL.apply(lambda x: Point(eval(x['long'])[-1], eval(x['lat'])[-1]), axis=1)
# convert capacity from 1e6*m^3/day -> MWh/h def clean_dataset(cross_buses_gas_network):
# TODO: units NOT really clear in documentation """Convert units and save only necessary data."""
# documentation p.18: cols = ['is_bothDirection', 'Capacity_GWh_h','buses_start',
# max_cap_M_m3_per_d: The maximum annual gas volume that the pipe can transmit in units of [Mm 3 d 1 ]. 'buses_destination', 'id', 'length_km']
# documentation p.50 clean_pipes = cross_buses_gas_network[cols].dropna()
# daily gas flow capacity
# varibales: # convert GW -> MW
# v: velocity, Q: volumetric_flow, d: pipe diameter, A: cross-sectional area clean_pipes.loc[:, 'Capacity_GWh_h'] *= 1e3
# v = Q /A = Q / (pi * (d/2)^2) # rename columns
# to get sensefull gas flow velocities (v=5-10 m/s, sometimes 20m/s) volumetric flow should be annual clean_pipes.rename(columns={'Capacity_GWh_h': 'pipe_capacity_MW',
velocity = IGGINL.max_cap_M_m3_per_d * 1e6 / 8760 / 60 / 60 / (np.pi * (IGGINL.diameter_mm * 1e-3 * 0.5)**2) 'buses_start': 'bus0',
'buses_destination': 'bus1'}, inplace=True)
# specific gas constant methane R_s=518.4 J/(kgK) return clean_pipes
R_s = 518.4
# temperature [Kelvin] (assuming 10°Celsius)
T = 10 + 273.15
# density [kg/m^3]= pressure [kg/ms^2] / (T * R_s), 1 bar = 1e5 kg/(ms^2)
density = IGGINL.max_pressure_bar * 1e5 / (T * R_s)
# mass flow [kg/ h], Mega = 1e6,
mass_flow = IGGINL.max_cap_M_m3_per_d * 1e6 / 8760 * density
# gross calorific value (GCV in ENTSOT table) [kWh/kg]
gcv_lgas = 38.3 / 3.6
gcv_hgas = 47.3 / 3.6
# energy cap [MW] = mass_flow [kg/h] * gcv [kWh/kg] * 1e-3
energy_cap = mass_flow * 1e-3
energy_cap.loc[IGGINL.is_H_gas==1] *= gcv_hgas
energy_cap.loc[IGGINL.is_H_gas!=1] *= gcv_lgas
IGGINL['max_capacity'] = energy_cap
# (2) read and preprocess ENTSOG data ------------------------------------- # ----------- VISULAISATION --------------------------------------------------
entsog = pd.read_csv(snakemake.input.entsog_2019) def create_view_object(cbgn_no_duplicate,buses_region):
entsog.drop(['From_ID', 'To_ID'], axis=1, inplace=True) """Create object to view gas network data."""
# group parallel pipes and take maximum pipe capacity cbgn_no_duplicate=cbgn_no_duplicate.merge(buses_region,left_on='buses_start',right_on='name')
entsog_wrapping = entsog.groupby(['long', 'lat', 'From', 'To']).max()['Capacity'].reset_index() cbgn_no_duplicate=cbgn_no_duplicate.merge(buses_region,left_on='buses_destination',right_on='name')
# add shapely object
entsog_wrapping['Point'] = entsog_wrapping.apply(lambda x: Point(x['long'], x['lat']), axis=1)
# convert GWh/day to MW
entsog_wrapping["Capacity"] *= 1e3
# (3) read and preprocess EMAP data --------------------------------------- cbgn_no_duplicate.geometry_x=cbgn_no_duplicate.geometry_x.apply(lambda x: x.centroid)
EMAP_Raw = pd.read_csv(snakemake.input.EMAP, sep=';') cbgn_no_duplicate.geometry_y=cbgn_no_duplicate.geometry_y.apply(lambda x: x.centroid)
# convert json format to columns cbgn_no_duplicate['geometry']=list(zip(cbgn_no_duplicate['geometry_x'],
EMAP_Raw = pd.concat([EMAP_Raw, EMAP_Raw.param.apply(eval).apply(pd.Series)], cbgn_no_duplicate['geometry_y']))
axis=1)
# fill missing pipe size (["S", "M", "L"]) values with "A"
EMAP_Raw.pipe_class_EMap = EMAP_Raw.pipe_class_EMap.fillna('A')
# add shapely object
EMAP_Raw['Point0'] = EMAP_Raw.apply(lambda x: Point(eval(x['long'])[0], eval(x['lat'])[0]), axis=1)
EMAP_Raw['Point1'] = EMAP_Raw.apply(lambda x: Point(eval(x['long'])[-1], eval(x['lat'])[-1]), axis=1)
return IGGINL, entsog_wrapping, EMAP_Raw final = cbgn_no_duplicate[['buses_start', 'buses_destination',
'Capacity_GWh_h', 'geometry']]
final['geometry'] = final['geometry'].apply(LineString)
final=gpd.GeoDataFrame(final,crs='EPSG:4326')
return final
def train_lasso(IGG, alpha=0.001): def view(cbgn_no_duplicate, buses_region, shapes_path):
""" """Plot gas network."""
trains lasso regression with unapproximated data of IGG with known final = create_view_object(cbgn_no_duplicate,buses_region)
pipe diameter, pressure and capacity
normal lasso method is choosen eu=gpd.read_file(shapes_path)
""" ax = geoplot.webmap(eu, projection=gcrs.WebMercator(), figsize=(20,20),
# ------------preprocessing---------------- alpha=0.5)
# find all pipe that have not approximated diameter, pressure and capacity data geoplot.choropleth(buses_region, hue='name',ax=ax, alpha=0.2,
all_data_i = (IGG.loc[:,IGG.columns.str.contains("uncertain_")]==0).all(axis=1) edgecolor='red', linewidth=2)
train_data = IGG[all_data_i].reset_index(drop=True) geoplot.sankey( final, scale='Capacity_GWh_h', hue='Capacity_GWh_h',
cmap='viridis', ax=ax, legend=True, legend_var='hue')
plt.savefig("../graphics/clustered-gas-network_{}.pdf".format(snakemake.wildcards.clusters),
bbox_inches='tight', pad_inches=0.1)
# capacity depends on squared diameter -> add diameter^2 to training data
train_data['diameter_squared'] = train_data.diameter_mm ** 2
# -------------start training--------------
logger.info('training lasso')
rg_model_normal = Lasso(alpha=alpha)
rg_model_normal.fit(train_data.diameter_mm.values.reshape(-1, 1),
train_data.max_cap_M_m3_per_d)
train_data['predict_normal'] = rg_model_normal.predict(
train_data.diameter_mm.values.reshape(-1, 1))
# calculate mean absolute error (MAE)
MAE = str(round(mean_absolute_error(train_data.max_cap_M_m3_per_d,
train_data.predict_normal), 3))
logger.info('sucessful training lasso regression, mean absoulte error (MAE) = {}' \
.format(MAE))
return rg_model_normal
def add_entsog_capacity(IGGINL, entsog, distance_threshold=0.5):
'''
merges IGGINL and entsog crossborder pipe capacities, currently pipe
directions are not considered
'''
gas_network = IGGINL.copy()
# find for every entsog point closest IGG point
distance = (pd.concat([map_entsog_point_to_IGG(i, IGGINL) for i in entsog.index])
.groupby("IGG_index").min())
# get all points within threshold
distance = distance[distance.distance<distance_threshold].reset_index()
# set capacitiy
IGGINL.loc[distance.IGG_index, 'max_capacity'] = entsog.loc[distance.entsog_index, "Capacity"]
IGGINL['distance_to_capacity_point'] = np.nan
IGGINL.loc[distance.IGG_index, 'distance_to_capacity_point'] = distance.set_index("IGG_index")["distance"]
logger.info('adding {} pipe capacities from ENTSOG '
.format(len(distance)))
return gas_network
def add_EMAP_diameter(gas_network, EMAP, threshold=0.6):
"""
add EMAP diameter to the combined data set gas_network for diameters with
uncertainty
"""
# calculate mean value of each class with original diameter data
gas_network_diameter = gas_network.loc[gas_network.uncertain_diameter_mm==0]["diameter_mm"]
# get mean IGG diameter for pipe classes S,M,L
gas_network_mean_diameter_s = gas_network_diameter[gas_network_diameter < 600].mean()
gas_network_mean_diameter_m = gas_network_diameter[(gas_network_diameter >= 600) & (gas_network_diameter < 900)].mean()
gas_network_mean_diameter_l = gas_network_diameter[gas_network_diameter >= 900].mean()
pipe_diameter_dict = {'S': gas_network_mean_diameter_s,
'M': gas_network_mean_diameter_m,
'L': gas_network_mean_diameter_l}
# filter on EMAP, length>50, only keep S M L
EMAP = EMAP[EMAP.length_km > 50]
EMAP = EMAP[EMAP.pipe_class_EMap.isin(['S', 'M', 'L'])]
EMAP = EMAP.reset_index(drop=True)
# start matching
gas_network['EMAP_class'] = np.nan
gas_network = gas_network.apply(lambda x: map_EMAP(x, EMAP,
pipe_diameter_dict,
threshold=threshold),
axis=1)
logger.info('adding {} pipe diameters from EMAP '
.format(gas_network["EMAP_class"].notna().sum()))
return gas_network
def filling_with_lasso(gas_network, regression_model):
"""
fills uncertain values with own lasso regression model
if diameter of a pipe is still missing, use diameter data from
diameter_mm of the pipe
"""
uncertain = gas_network['uncertain_max_cap_M_m3_per_d']!=0
minimum_value = gas_network[~uncertain]['max_cap_M_m3_per_d'].min()
gas_network.capacity_nan = gas_network.apply(
lambda x: regression_model.predict(np.array([x['diameter_nan']]).reshape(-1, 1))[0]
if (np.isnan(x['capacity_nan'])) & (not np.isnan(x['diameter_nan'])) else x['capacity_nan'], axis=1)
#remove extremely small value
gas_network.capacity_nan = gas_network.capacity_nan.apply(lambda x: np.nan if x < minimum_value else x)
logger.info('finish filling with lasso')
return gas_network
#%% #%%
if __name__ == "__main__": if __name__ == "__main__":
# for testing # for testing
if 'snakemake' not in globals(): if 'snakemake' not in globals():
from vresutils.snakemake import MockSnakemake from helper import mock_snakemake
snakemake = MockSnakemake( snakemake = mock_snakemake('build_gas_network',
wildcards=dict(network='elec', simpl='', clusters='37', lv='1.0', network='elec', simpl='', clusters='37',
opts='', planning_horizons='2020', lv='1.0', opts='', planning_horizons='2020',
sector_opts='168H-T-H-B-I'), sector_opts='168H-T-H-B-I')
input=dict(IGGINL='data/gas_network/IGGINL_PipeSegments.csv', logging.basicConfig(level=snakemake.config['logging_level'])
entsog_2019='data/gas_network/entsog_2019_dataset.csv',
EMAP='data/gas_network/EMAP_Raw_PipeSegments.csv'
),
output=dict(gas_network='resources/gas_network_{clusters}.csv'),
)
import yaml
with open('config.yaml', encoding='utf8') as f:
snakemake.config = yaml.safe_load(f)
# import gas network data
gas_network, points = preprocessing(snakemake.input.gas_network)
# prepare the data sets # get clustered bus regions
IGGINL, entsog, EMAP = prepare_datasets() buses_region = load_region(snakemake.input.regions_onshore,
snakemake.input.regions_offshore)
nodes = pd.Index(buses_region.name.unique())
# train lasso regression # map gas network points to network buses
regression_model = train_lasso(IGGINL) points2buses_map = create_points2buses_map(points, buses_region)
# create gas network between pypsa nodes
cross_buses_gas_network = create_cross_regions_network(gas_network,
points2buses_map)
# add crossborder capacities from ENTSOG # view(cross_buses_gas_network, buses_region, snakemake.input.country_shapes)
gas_network = add_entsog_capacity(IGGINL, entsog)
# TODO ------------------------------------------------------ # check which buses are not connected in gas network
# add pipe diameters from EMAP check_missing(nodes, cross_buses_gas_network)
gas_network = add_EMAP_diameter(gas_network, EMAP)
# fill other missing values with lasso regression model # convert units and save only needed data
IGGINL = filling_with_lasso(IGGINL, regression_model) gas_pipes = clean_dataset(cross_buses_gas_network)
gas_pipes.to_csv(snakemake.output.clustered_gas_network)
# IGGINL = node_capacity_spread(IGGINL)
# clean_save(IGGINL, output_path)