From f096592b7f61ea807b650b89e3f8eca3dec8957c Mon Sep 17 00:00:00 2001
From: AmosSchledorn <amos.schledorn@icloud.com>
Date: Fri, 26 Jul 2024 19:45:45 +0200
Subject: [PATCH 01/38] add option to make central heating forward/return
 temperatures country-specific

---
 config/config.default.yaml        | 14 ++++++--
 scripts/_helpers.py               |  4 +++
 scripts/build_cop_profiles/run.py | 60 +++++++++++++++++++++++++++----
 3 files changed, 70 insertions(+), 8 deletions(-)

diff --git a/config/config.default.yaml b/config/config.default.yaml
index a606cd25..16825d59 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -409,8 +409,18 @@ sector:
       2050: 1.0
     district_heating_loss: 0.15
     # check these numbers!
-    forward_temperature: 90 #C
-    return_temperature: 50 #C
+    forward_temperature:
+      generic: 90
+      DK: 70
+      SE: 70
+      NO: 70
+      FI: 70
+    return_temperature: 
+      generic: 50
+      DK: 40
+      SE: 40
+      NO: 40
+      FI: 40
     heat_source_cooling: 6 #K
     heat_pump_cop_approximation:
       refrigerant: ammonia
diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index a3b77c1c..040ba125 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -785,3 +785,7 @@ def get_snapshots(snapshots, drop_leap_day=False, freq="h", **kwargs):
         time = time[~((time.month == 2) & (time.day == 29))]
 
     return time
+
+
+def get_country_from_node_name(node_name: str) -> str:
+    return node_name[:2]
\ No newline at end of file
diff --git a/scripts/build_cop_profiles/run.py b/scripts/build_cop_profiles/run.py
index 12d012bb..0c40a94d 100644
--- a/scripts/build_cop_profiles/run.py
+++ b/scripts/build_cop_profiles/run.py
@@ -5,10 +5,43 @@
 
 import numpy as np
 import xarray as xr
-from _helpers import set_scenario_config
+from _helpers import set_scenario_config, get_country_from_node_name
 from CentralHeatingCopApproximator import CentralHeatingCopApproximator
 from DecentralHeatingCopApproximator import DecentralHeatingCopApproximator
 
+
+def map_temperature_dict_to_onshore_regions(
+    temperature_dict: dict, onshore_regions: xr.DataArray
+) -> xr.DataArray:
+    """
+    Map dictionary of temperatures to onshore regions.
+
+    Parameters:
+    ----------
+    temperature_dict : dictionary
+        Dictionary with temperatures as values and country keys as keys. One key must be named "generic"
+    onshore_regions : xr.DataArray
+        Names of onshore regions
+
+    Returns:
+    -------
+    xr.DataArray
+        The dictionary values mapped to onshore regions with onshore regions as coordinates.
+    """
+    return xr.DataArray(
+        [
+            (
+                temperature_dict[get_country_from_node_name(node_name)]
+                if get_country_from_node_name(node_name) in temperature_dict.keys()
+                else temperature_dict["generic"]
+            )
+            for node_name in onshore_regions["name"].values
+        ],
+        dims=["name"],
+        coords={"name": onshore_regions["name"]},
+    )
+
+
 if __name__ == "__main__":
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
@@ -23,12 +56,12 @@ if __name__ == "__main__":
 
     for source_type in ["air", "soil"]:
         # source inlet temperature (air/soil) is based on weather data
-        source_inlet_temperature_celsius = xr.open_dataarray(
+        source_inlet_temperature_celsius: xr.DataArray = xr.open_dataarray(
             snakemake.input[f"temp_{source_type}_total"]
         )
 
         # Approximate COP for decentral (individual) heating
-        cop_individual_heating = DecentralHeatingCopApproximator(
+        cop_individual_heating: xr.DataArray = DecentralHeatingCopApproximator(
             forward_temperature_celsius=snakemake.params.heat_pump_sink_T_decentral_heating,
             source_inlet_temperature_celsius=source_inlet_temperature_celsius,
             source_type=source_type,
@@ -37,10 +70,25 @@ if __name__ == "__main__":
             snakemake.output[f"cop_{source_type}_decentral_heating"]
         )
 
+        # map forward and return temperatures specified on country-level to onshore regions
+        onshore_regions: xr.DataArray = source_inlet_temperature_celsius["name"]
+        forward_temperature_central_heating: xr.DataArray = (
+            map_temperature_dict_to_onshore_regions(
+                temperature_dict=snakemake.params.forward_temperature_central_heating,
+                onshore_regions=onshore_regions,
+            )
+        )
+        return_temperature_central_heating: xr.DataArray = (
+            map_temperature_dict_to_onshore_regions(
+                temperature_dict=snakemake.params.return_temperature_central_heating,
+                onshore_regions=onshore_regions,
+            )
+        )
+
         # Approximate COP for central (district) heating
-        cop_central_heating = CentralHeatingCopApproximator(
-            forward_temperature_celsius=snakemake.params.forward_temperature_central_heating,
-            return_temperature_celsius=snakemake.params.return_temperature_central_heating,
+        cop_central_heating: xr.DataArray = CentralHeatingCopApproximator(
+            forward_temperature_celsius=forward_temperature_central_heating,
+            return_temperature_celsius=return_temperature_central_heating,
             source_inlet_temperature_celsius=source_inlet_temperature_celsius,
             source_outlet_temperature_celsius=source_inlet_temperature_celsius
             - snakemake.params.heat_source_cooling_central_heating,

From 08c30d8b2ecd2be73168a87e61fdd32f07405d24 Mon Sep 17 00:00:00 2001
From: AmosSchledorn <amos.schledorn@icloud.com>
Date: Fri, 26 Jul 2024 20:08:06 +0200
Subject: [PATCH 02/38] ensure correct dimensions in forward/return temperature
 arayx and change naming from "generic" to "default"

---
 config/config.default.yaml        |  5 ++---
 scripts/build_cop_profiles/run.py | 26 ++++++++++++++++----------
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/config/config.default.yaml b/config/config.default.yaml
index 16825d59..527f68c4 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -408,15 +408,14 @@ sector:
       2045: 0.8
       2050: 1.0
     district_heating_loss: 0.15
-    # check these numbers!
     forward_temperature:
-      generic: 90
+      default: 90
       DK: 70
       SE: 70
       NO: 70
       FI: 70
     return_temperature: 
-      generic: 50
+      default: 50
       DK: 40
       SE: 40
       NO: 40
diff --git a/scripts/build_cop_profiles/run.py b/scripts/build_cop_profiles/run.py
index 0c40a94d..d2976b6e 100644
--- a/scripts/build_cop_profiles/run.py
+++ b/scripts/build_cop_profiles/run.py
@@ -11,7 +11,7 @@ from DecentralHeatingCopApproximator import DecentralHeatingCopApproximator
 
 
 def map_temperature_dict_to_onshore_regions(
-    temperature_dict: dict, onshore_regions: xr.DataArray
+    temperature_dict: dict, onshore_regions: xr.DataArray, snapshots: xr.DataArray
 ) -> xr.DataArray:
     """
     Map dictionary of temperatures to onshore regions.
@@ -19,7 +19,7 @@ def map_temperature_dict_to_onshore_regions(
     Parameters:
     ----------
     temperature_dict : dictionary
-        Dictionary with temperatures as values and country keys as keys. One key must be named "generic"
+        Dictionary with temperatures as values and country keys as keys. One key must be named "default"
     onshore_regions : xr.DataArray
         Names of onshore regions
 
@@ -30,15 +30,19 @@ def map_temperature_dict_to_onshore_regions(
     """
     return xr.DataArray(
         [
-            (
-                temperature_dict[get_country_from_node_name(node_name)]
-                if get_country_from_node_name(node_name) in temperature_dict.keys()
-                else temperature_dict["generic"]
-            )
-            for node_name in onshore_regions["name"].values
+            [
+                (
+                    temperature_dict[get_country_from_node_name(node_name)]
+                    if get_country_from_node_name(node_name) in temperature_dict.keys()
+                    else temperature_dict["default"]
+                )
+                for node_name in onshore_regions["name"].values
+            ]
+            # pass both nodes and snapshots as dimensions to preserve correct data structure
+            for _ in snapshots["time"].values
         ],
-        dims=["name"],
-        coords={"name": onshore_regions["name"]},
+        dims=["time", "name"],
+        coords={"time": snapshots["time"], "name": onshore_regions["name"]},
     )
 
 
@@ -76,12 +80,14 @@ if __name__ == "__main__":
             map_temperature_dict_to_onshore_regions(
                 temperature_dict=snakemake.params.forward_temperature_central_heating,
                 onshore_regions=onshore_regions,
+                snapshots=source_inlet_temperature_celsius["time"]
             )
         )
         return_temperature_central_heating: xr.DataArray = (
             map_temperature_dict_to_onshore_regions(
                 temperature_dict=snakemake.params.return_temperature_central_heating,
                 onshore_regions=onshore_regions,
+                snapshots=source_inlet_temperature_celsius["time"]
             )
         )
 

From 9d485a2185ce4a39a27325e070573b2859c1647e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 26 Jul 2024 18:12:18 +0000
Subject: [PATCH 03/38] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 config/config.default.yaml        | 2 +-
 scripts/_helpers.py               | 2 +-
 scripts/build_cop_profiles/run.py | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/config/config.default.yaml b/config/config.default.yaml
index 527f68c4..92a24534 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -414,7 +414,7 @@ sector:
       SE: 70
       NO: 70
       FI: 70
-    return_temperature: 
+    return_temperature:
       default: 50
       DK: 40
       SE: 40
diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index 040ba125..1c890ea5 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -788,4 +788,4 @@ def get_snapshots(snapshots, drop_leap_day=False, freq="h", **kwargs):
 
 
 def get_country_from_node_name(node_name: str) -> str:
-    return node_name[:2]
\ No newline at end of file
+    return node_name[:2]
diff --git a/scripts/build_cop_profiles/run.py b/scripts/build_cop_profiles/run.py
index d2976b6e..ef6253dd 100644
--- a/scripts/build_cop_profiles/run.py
+++ b/scripts/build_cop_profiles/run.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 import xarray as xr
-from _helpers import set_scenario_config, get_country_from_node_name
+from _helpers import get_country_from_node_name, set_scenario_config
 from CentralHeatingCopApproximator import CentralHeatingCopApproximator
 from DecentralHeatingCopApproximator import DecentralHeatingCopApproximator
 
@@ -80,14 +80,14 @@ if __name__ == "__main__":
             map_temperature_dict_to_onshore_regions(
                 temperature_dict=snakemake.params.forward_temperature_central_heating,
                 onshore_regions=onshore_regions,
-                snapshots=source_inlet_temperature_celsius["time"]
+                snapshots=source_inlet_temperature_celsius["time"],
             )
         )
         return_temperature_central_heating: xr.DataArray = (
             map_temperature_dict_to_onshore_regions(
                 temperature_dict=snakemake.params.return_temperature_central_heating,
                 onshore_regions=onshore_regions,
-                snapshots=source_inlet_temperature_celsius["time"]
+                snapshots=source_inlet_temperature_celsius["time"],
             )
         )
 

From 80603fda6eef226ca48de1b037150bf9faa33495 Mon Sep 17 00:00:00 2001
From: AmosSchledorn <amos.schledorn@icloud.com>
Date: Wed, 7 Aug 2024 13:33:59 +0200
Subject: [PATCH 04/38] update config and script

---
 config/config.default.yaml        | 15 +++++----------
 rules/build_sector.smk            |  2 ++
 scripts/build_cop_profiles/run.py | 30 +++++++++++++++++-------------
 3 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/config/config.default.yaml b/config/config.default.yaml
index f7ee6716..12610771 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -410,18 +410,13 @@ sector:
       2045: 0.8
       2050: 1.0
     district_heating_loss: 0.15
+    # check these numbers!
     forward_temperature:
-      default: 90
-      DK: 70
-      SE: 70
-      NO: 70
-      FI: 70
+      DE: 90 #C
+      DK: 60 #C
     return_temperature:
-      default: 50
-      DK: 40
-      SE: 40
-      NO: 40
-      FI: 40
+      DE: 50 #C
+      DK: 40 #C
     heat_source_cooling: 6 #K
     heat_pump_cop_approximation:
       refrigerant: ammonia
diff --git a/rules/build_sector.smk b/rules/build_sector.smk
index 5916aa02..c7796bab 100644
--- a/rules/build_sector.smk
+++ b/rules/build_sector.smk
@@ -242,9 +242,11 @@ rule build_cop_profiles:
             "sector", "district_heating", "heat_pump_cop_approximation"
         ),
         heat_pump_sources=config_provider("sector", "heat_pump_sources"),
+        snapshots=config_provider("snapshots"),
     input:
         temp_soil_total=resources("temp_soil_total_elec_s{simpl}_{clusters}.nc"),
         temp_air_total=resources("temp_air_total_elec_s{simpl}_{clusters}.nc"),
+        regions_onshore=resources("regions_onshore_elec_s{simpl}_{clusters}.geojson"),
     output:
         cop_profiles=resources("cop_profiles_elec_s{simpl}_{clusters}.nc"),
     resources:
diff --git a/scripts/build_cop_profiles/run.py b/scripts/build_cop_profiles/run.py
index 44bca6eb..69e2b519 100644
--- a/scripts/build_cop_profiles/run.py
+++ b/scripts/build_cop_profiles/run.py
@@ -8,6 +8,7 @@ import sys
 import numpy as np
 import pandas as pd
 import xarray as xr
+import geopandas as gpd
 from _helpers import get_country_from_node_name, set_scenario_config
 from CentralHeatingCopApproximator import CentralHeatingCopApproximator
 from DecentralHeatingCopApproximator import DecentralHeatingCopApproximator
@@ -17,8 +18,8 @@ from scripts.definitions.heat_system_type import HeatSystemType
 
 def map_temperature_dict_to_onshore_regions(
     supply_temperature_by_country: dict,
-    onshore_regions: xr.DataArray,
-    snapshots: xr.DataArray,
+    regions_onshore: pd.Index,
+    snapshots: pd.DatetimeIndex,
 ) -> xr.DataArray:
     """
     Map dictionary of temperatures to onshore regions.
@@ -27,8 +28,10 @@ def map_temperature_dict_to_onshore_regions(
     ----------
     supply_temperature_by_country : dictionary
         Dictionary with temperatures as values and country keys as keys. One key must be named "default"
-    onshore_regions : xr.DataArray
+    regions_onshore : pd.Index
         Names of onshore regions
+    snapshots : pd.DatetimeIndex
+        Time stamps
 
     Returns:
     -------
@@ -44,13 +47,13 @@ def map_temperature_dict_to_onshore_regions(
                     in supply_temperature_by_country.keys()
                     else supply_temperature_by_country["default"]
                 )
-                for node_name in onshore_regions["name"].values
+                for node_name in regions_onshore.values
             ]
             # pass both nodes and snapshots as dimensions to preserve correct data structure
-            for _ in snapshots["time"].values
+            for _ in snapshots
         ],
         dims=["time", "name"],
-        coords={"time": snapshots["time"], "name": onshore_regions["name"]},
+        coords={"time": snapshots, "name": regions_onshore},
     )
 
 
@@ -108,19 +111,20 @@ if __name__ == "__main__":
     set_scenario_config(snakemake)
 
     # map forward and return temperatures specified on country-level to onshore regions
-    onshore_regions: xr.DataArray = source_inlet_temperature_celsius["name"]
+    regions_onshore = gpd.read_file(snakemake.input.regions_onshore)["name"]
+    snapshots = pd.date_range(freq="h", **snakemake.params.snapshots)
     forward_temperature_central_heating_by_node_and_time: xr.DataArray = (
         map_temperature_dict_to_onshore_regions(
-            temperature_dict=snakemake.params.forward_temperature_central_heating,
-            onshore_regions=onshore_regions,
-            snapshots=source_inlet_temperature_celsius["time"],
+            supply_temperature_by_country=snakemake.params.forward_temperature_central_heating,
+            regions_onshore=regions_onshore,
+            snapshots=snapshots,
         )
     )
     return_temperature_central_heating_by_node_and_time: xr.DataArray = (
         map_temperature_dict_to_onshore_regions(
-            temperature_dict=snakemake.params.return_temperature_central_heating,
-            onshore_regions=onshore_regions,
-            snapshots=source_inlet_temperature_celsius["time"],
+            supply_temperature_by_country=snakemake.params.return_temperature_central_heating,
+            regions_onshore=regions_onshore,
+            snapshots=snapshots,
         )
     )
     cop_all_system_types = []

From 7f76bf8e84a7b0a962cf1c4c8bb186d4ae0103da Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 7 Aug 2024 12:37:44 +0000
Subject: [PATCH 05/38] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 scripts/build_cop_profiles/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/build_cop_profiles/run.py b/scripts/build_cop_profiles/run.py
index 69e2b519..d21b863d 100644
--- a/scripts/build_cop_profiles/run.py
+++ b/scripts/build_cop_profiles/run.py
@@ -5,10 +5,10 @@
 
 import sys
 
+import geopandas as gpd
 import numpy as np
 import pandas as pd
 import xarray as xr
-import geopandas as gpd
 from _helpers import get_country_from_node_name, set_scenario_config
 from CentralHeatingCopApproximator import CentralHeatingCopApproximator
 from DecentralHeatingCopApproximator import DecentralHeatingCopApproximator

From ab6415bdc33253796cf4c3b9831a2144bc012ffc Mon Sep 17 00:00:00 2001
From: AmosSchledorn <amos.schledorn@icloud.com>
Date: Wed, 7 Aug 2024 14:54:28 +0200
Subject: [PATCH 06/38] re-add default supply temperature

---
 config/config.default.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/config/config.default.yaml b/config/config.default.yaml
index 12610771..aead3e3d 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -412,9 +412,11 @@ sector:
     district_heating_loss: 0.15
     # check these numbers!
     forward_temperature:
+      Default: 90
       DE: 90 #C
       DK: 60 #C
     return_temperature:
+      Default: 50
       DE: 50 #C
       DK: 40 #C
     heat_source_cooling: 6 #K

From 69c6ea0e1229a52364c04001222a39eccd3b52a0 Mon Sep 17 00:00:00 2001
From: AmosSchledorn <amos.schledorn@icloud.com>
Date: Wed, 7 Aug 2024 17:55:29 +0200
Subject: [PATCH 07/38] use relative imports of scripts.definitions where
 possible

---
 scripts/add_existing_baseyear.py  | 6 +++---
 scripts/prepare_sector_network.py | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/scripts/add_existing_baseyear.py b/scripts/add_existing_baseyear.py
index f67c38d9..942b1398 100644
--- a/scripts/add_existing_baseyear.py
+++ b/scripts/add_existing_baseyear.py
@@ -24,9 +24,9 @@ from _helpers import (
 from add_electricity import sanitize_carriers
 from prepare_sector_network import cluster_heat_buses, define_spatial, prepare_costs
 
-from scripts.definitions.heat_sector import HeatSector
-from scripts.definitions.heat_system import HeatSystem
-from scripts.definitions.heat_system_type import HeatSystemType
+from definitions.heat_sector import HeatSector
+from definitions.heat_system import HeatSystem
+from definitions.heat_system_type import HeatSystemType
 
 logger = logging.getLogger(__name__)
 cc = coco.CountryConverter()
diff --git a/scripts/prepare_sector_network.py b/scripts/prepare_sector_network.py
index 585b3f25..7c6f7c61 100644
--- a/scripts/prepare_sector_network.py
+++ b/scripts/prepare_sector_network.py
@@ -37,9 +37,9 @@ from pypsa.geo import haversine_pts
 from pypsa.io import import_components_from_dataframe
 from scipy.stats import beta
 
-from scripts.definitions.heat_sector import HeatSector
-from scripts.definitions.heat_system import HeatSystem
-from scripts.definitions.heat_system_type import HeatSystemType
+from definitions.heat_sector import HeatSector
+from definitions.heat_system import HeatSystem
+from definitions.heat_system_type import HeatSystemType
 
 spatial = SimpleNamespace()
 logger = logging.getLogger(__name__)

From cfed67f794ea68f0200bde8023c9e1ae4defe9f0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 8 Aug 2024 14:46:06 +0000
Subject: [PATCH 08/38] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 scripts/add_existing_baseyear.py  | 3 +--
 scripts/prepare_sector_network.py | 7 +++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/scripts/add_existing_baseyear.py b/scripts/add_existing_baseyear.py
index 942b1398..212ae8af 100644
--- a/scripts/add_existing_baseyear.py
+++ b/scripts/add_existing_baseyear.py
@@ -22,11 +22,10 @@ from _helpers import (
     update_config_from_wildcards,
 )
 from add_electricity import sanitize_carriers
-from prepare_sector_network import cluster_heat_buses, define_spatial, prepare_costs
-
 from definitions.heat_sector import HeatSector
 from definitions.heat_system import HeatSystem
 from definitions.heat_system_type import HeatSystemType
+from prepare_sector_network import cluster_heat_buses, define_spatial, prepare_costs
 
 logger = logging.getLogger(__name__)
 cc = coco.CountryConverter()
diff --git a/scripts/prepare_sector_network.py b/scripts/prepare_sector_network.py
index 7c6f7c61..191294b6 100644
--- a/scripts/prepare_sector_network.py
+++ b/scripts/prepare_sector_network.py
@@ -30,6 +30,9 @@ from build_energy_totals import (
     build_eurostat_co2,
 )
 from build_transport_demand import transport_degree_factor
+from definitions.heat_sector import HeatSector
+from definitions.heat_system import HeatSystem
+from definitions.heat_system_type import HeatSystemType
 from networkx.algorithms import complement
 from networkx.algorithms.connectivity.edge_augmentation import k_edge_augmentation
 from prepare_network import maybe_adjust_costs_and_potentials
@@ -37,10 +40,6 @@ from pypsa.geo import haversine_pts
 from pypsa.io import import_components_from_dataframe
 from scipy.stats import beta
 
-from definitions.heat_sector import HeatSector
-from definitions.heat_system import HeatSystem
-from definitions.heat_system_type import HeatSystemType
-
 spatial = SimpleNamespace()
 logger = logging.getLogger(__name__)
 

From 130ee49dbd132c07713c165b4b43a91676fe47c5 Mon Sep 17 00:00:00 2001
From: AmosSchledorn <amos.schledorn@icloud.com>
Date: Fri, 26 Jul 2024 19:45:45 +0200
Subject: [PATCH 09/38] add option to make central heating forward/return
 temperatures country-specific

---
 scripts/build_cop_profiles/run.py | 37 +++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/scripts/build_cop_profiles/run.py b/scripts/build_cop_profiles/run.py
index d21b863d..1b2c0a35 100644
--- a/scripts/build_cop_profiles/run.py
+++ b/scripts/build_cop_profiles/run.py
@@ -3,6 +3,43 @@
 #
 # SPDX-License-Identifier: MIT
 
+"""
+Approximate heat pump coefficient-of-performance (COP) profiles for different heat sources and systems.
+
+For central heating, this is based on Jensen et al. (2018) (c.f. `CentralHeatingCopApproximator <CentralHeatingCopApproximator.py>`_) and for decentral heating, the approximation is based on Staffell et al. (2012) (c.f. `DecentralHeatingCopApproximator <DecentralHeatingCopApproximator.py>`_).
+
+Relevant Settings
+-----------------
+
+.. code:: yaml
+    sector:
+        heat_pump_sink_T_decentral_heating:
+        district_heating:
+            forward_temperature:
+            return_temperature:
+            heat_source_cooling:
+            heat_pump_cop_approximation:
+                refrigerant:
+                heat_exchanger_pinch_point_temperature_difference
+                isentropic_compressor_efficiency:
+                heat_loss:
+            heat_pump_sources:
+                urban central:
+                urban decentral:
+                rural:
+    snapshots:
+
+Inputs
+------
+- `resources/<run_name>/regions_onshore.geojson`: Onshore regions
+- `resources/<run_name>/temp_soil_total`: Ground temperature
+- `resources/<run_name>/temp_air_total`: Air temperature
+
+Outputs
+-------
+- `resources/<run_name>/cop_profiles.nc`: Heat pump coefficient-of-performance (COP) profiles
+"""
+
 import sys
 
 import geopandas as gpd

From a914366391b03e6c10a7056e955eccbf21d0520e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 8 Aug 2024 15:13:31 +0000
Subject: [PATCH 10/38] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 scripts/build_cop_profiles/run.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/build_cop_profiles/run.py b/scripts/build_cop_profiles/run.py
index 1b2c0a35..23229e98 100644
--- a/scripts/build_cop_profiles/run.py
+++ b/scripts/build_cop_profiles/run.py
@@ -2,9 +2,9 @@
 # SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors
 #
 # SPDX-License-Identifier: MIT
-
 """
-Approximate heat pump coefficient-of-performance (COP) profiles for different heat sources and systems.
+Approximate heat pump coefficient-of-performance (COP) profiles for different
+heat sources and systems.
 
 For central heating, this is based on Jensen et al. (2018) (c.f. `CentralHeatingCopApproximator <CentralHeatingCopApproximator.py>`_) and for decentral heating, the approximation is based on Staffell et al. (2012) (c.f. `DecentralHeatingCopApproximator <DecentralHeatingCopApproximator.py>`_).
 

From 023cc7529e4ff70d81c64f0027e2553d673f24ec Mon Sep 17 00:00:00 2001
From: AmosSchledorn <amos.schledorn@icloud.com>
Date: Thu, 8 Aug 2024 17:15:01 +0200
Subject: [PATCH 11/38] set lower supply temperatures for Scandinavia

---
 config/config.default.yaml | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/config/config.default.yaml b/config/config.default.yaml
index aead3e3d..adb9ffa1 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -412,13 +412,17 @@ sector:
     district_heating_loss: 0.15
     # check these numbers!
     forward_temperature:
-      Default: 90
-      DE: 90 #C
-      DK: 60 #C
+      default: 90
+      DK: 70 
+      SE: 70 
+      NO: 70 
+      FI: 70 
     return_temperature:
-      Default: 50
-      DE: 50 #C
-      DK: 40 #C
+      default: 50
+      DK: 40
+      SE: 40
+      NO: 40
+      FI: 40
     heat_source_cooling: 6 #K
     heat_pump_cop_approximation:
       refrigerant: ammonia

From 5ee5a43be30dd953685f4f78c291bce3a5d92edc Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 8 Aug 2024 15:16:03 +0000
Subject: [PATCH 12/38] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 config/config.default.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/config/config.default.yaml b/config/config.default.yaml
index adb9ffa1..65006938 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -413,10 +413,10 @@ sector:
     # check these numbers!
     forward_temperature:
       default: 90
-      DK: 70 
-      SE: 70 
-      NO: 70 
-      FI: 70 
+      DK: 70
+      SE: 70
+      NO: 70
+      FI: 70
     return_temperature:
       default: 50
       DK: 40

From 9a0930419e98a010dba814d2201c4ad639e1d94f Mon Sep 17 00:00:00 2001
From: AmosSchledorn <amos.schledorn@icloud.com>
Date: Thu, 8 Aug 2024 17:17:33 +0200
Subject: [PATCH 13/38] update configtables

---
 doc/configtables/sector.csv | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/configtables/sector.csv b/doc/configtables/sector.csv
index e0deb8ca..9b8ed932 100644
--- a/doc/configtables/sector.csv
+++ b/doc/configtables/sector.csv
@@ -9,8 +9,8 @@ district_heating,--,,`prepare_sector_network.py <https://github.com/PyPSA/pypsa-
 -- potential,--,float,maximum fraction of urban demand which can be supplied by district heating
 -- progress,--,Dictionary with planning horizons as keys., Increase of today's district heating demand to potential maximum district heating share. Progress = 0 means today's district heating share. Progress = 1 means maximum fraction of urban demand is supplied by district heating
 -- district_heating_loss,--,float,Share increase in district heat demand in urban central due to heat losses
--- forward_temperature,°C,float,Forward temperature in district heating
--- return_temperature,°C,float,Return temperature in district heating. Must be lower than forward temperature
+-- forward_temperature,°C,Dictionary with country codes as keys. One key must be 'default'.,Forward temperature in district heating
+-- return_temperature,°C,Dictionary with country codes as keys. One key must be 'default'.,Return temperature in district heating. Must be lower than forward temperature
 -- heat_source_cooling,K,float,Cooling of heat source for heat pumps
 -- heat_pump_cop_approximation,,,
 -- -- refrigerant,--,"{ammonia, isobutane}",Heat pump refrigerant assumed for COP approximation

From 166895ac4336d665e9b3f5b9f9b48f6139213766 Mon Sep 17 00:00:00 2001
From: AmosSchledorn <amos.schledorn@icloud.com>
Date: Thu, 8 Aug 2024 17:18:48 +0200
Subject: [PATCH 14/38] update release notes

---
 doc/release_notes.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index 9ce418b2..7a815ebc 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -10,6 +10,8 @@ Release Notes
 Upcoming Release
 ================
 
+* Added option to use country-specific district heating forward and return temperatures. Defaults to lower temperatures in Scandinavia.
+
 * Changed heat pump COP approximation for central heating to be based on `Jensen et al. (2018) <https://backend.orbit.dtu.dk/ws/portalfiles/portal/151965635/MAIN_Final.pdf>`__ and a default forward temperature of 90C. This is more realistic for district heating than the previously used approximation method.
 
 * split solid biomass potentials into solid biomass and municipal solid waste. Add option to use municipal solid waste. This option is only activated in combination with the flag ``waste_to_energy``

From 7542d57820e84970f90ff2407c13238200156763 Mon Sep 17 00:00:00 2001
From: AmosSchledorn <amos.schledorn@icloud.com>
Date: Tue, 13 Aug 2024 17:07:20 +0200
Subject: [PATCH 15/38] style: move get_country_from_node_name function from
 _helpers tu run

---
 scripts/_helpers.py               | 3 ---
 scripts/build_cop_profiles/run.py | 4 +++-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index 1c890ea5..d97144ba 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -786,6 +786,3 @@ def get_snapshots(snapshots, drop_leap_day=False, freq="h", **kwargs):
 
     return time
 
-
-def get_country_from_node_name(node_name: str) -> str:
-    return node_name[:2]
diff --git a/scripts/build_cop_profiles/run.py b/scripts/build_cop_profiles/run.py
index 23229e98..401fdc18 100644
--- a/scripts/build_cop_profiles/run.py
+++ b/scripts/build_cop_profiles/run.py
@@ -46,7 +46,7 @@ import geopandas as gpd
 import numpy as np
 import pandas as pd
 import xarray as xr
-from _helpers import get_country_from_node_name, set_scenario_config
+from _helpers import set_scenario_config
 from CentralHeatingCopApproximator import CentralHeatingCopApproximator
 from DecentralHeatingCopApproximator import DecentralHeatingCopApproximator
 
@@ -134,6 +134,8 @@ def get_cop(
             source_type=heat_source,
         ).approximate_cop()
 
+def get_country_from_node_name(node_name: str) -> str:
+    return node_name[:2]
 
 if __name__ == "__main__":
     if "snakemake" not in globals():

From 92435678723b01482d544f0606ff5c26ced8063c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 13 Aug 2024 15:08:47 +0000
Subject: [PATCH 16/38] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 scripts/_helpers.py               | 1 -
 scripts/build_cop_profiles/run.py | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index d97144ba..a3b77c1c 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -785,4 +785,3 @@ def get_snapshots(snapshots, drop_leap_day=False, freq="h", **kwargs):
         time = time[~((time.month == 2) & (time.day == 29))]
 
     return time
-
diff --git a/scripts/build_cop_profiles/run.py b/scripts/build_cop_profiles/run.py
index 401fdc18..b4ec3e43 100644
--- a/scripts/build_cop_profiles/run.py
+++ b/scripts/build_cop_profiles/run.py
@@ -134,9 +134,11 @@ def get_cop(
             source_type=heat_source,
         ).approximate_cop()
 
+
 def get_country_from_node_name(node_name: str) -> str:
     return node_name[:2]
 
+
 if __name__ == "__main__":
     if "snakemake" not in globals():
         from _helpers import mock_snakemake

From 55370a57fb61f5985dfaf05b322a00153df5e44f Mon Sep 17 00:00:00 2001
From: Fabian Hofmann <fab.hof@gmx.de>
Date: Thu, 15 Aug 2024 14:51:51 +0200
Subject: [PATCH 17/38] retrieve: remove cutout protection (#1220)

---
 rules/retrieve.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rules/retrieve.smk b/rules/retrieve.smk
index b2382f51..de84b2fa 100644
--- a/rules/retrieve.smk
+++ b/rules/retrieve.smk
@@ -85,7 +85,7 @@ if config["enable"]["retrieve"] and config["enable"].get("retrieve_cutout", True
                 "https://zenodo.org/records/12791128/files/{cutout}.nc",
             ),
         output:
-            protected("cutouts/" + CDIR + "{cutout}.nc"),
+            "cutouts/" + CDIR + "{cutout}.nc",
         log:
             "logs/" + CDIR + "retrieve_cutout_{cutout}.log",
         resources:

From 0221372b499e9685986ba593fb799c156f144c44 Mon Sep 17 00:00:00 2001
From: cpschau <124347782+cpschau@users.noreply.github.com>
Date: Thu, 15 Aug 2024 14:52:33 +0200
Subject: [PATCH 18/38] modified environments to include tabula-py update and
 fixed PDF econding (#1219)

---
 doc/release_notes.rst                    | 2 ++
 envs/environment.fixed.yaml              | 3 ++-
 envs/environment.yaml                    | 3 ++-
 scripts/build_biomass_transport_costs.py | 2 +-
 4 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index ac8c2559..892558b1 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -9,6 +9,8 @@ Release Notes
 
 Upcoming Release
 ================
+* Fixed PDF encoding in ``build_biomass_transport_costs`` with update of tabula-py and jpype1
+
 * More modular and flexible handling of transmission projects. One can now add new transmission projects in a subfolder of `data/transmission projects` similar to the files in the template folder. After adding the new files and updating the config section `transmission_projects:`, transmission projects will be included if they are not duplicates of existing lines or other projects.
 
 * Add option to apply a gaussian kernel density smoothing to wind turbine power curves.
diff --git a/envs/environment.fixed.yaml b/envs/environment.fixed.yaml
index 917deab7..31dc7527 100644
--- a/envs/environment.fixed.yaml
+++ b/envs/environment.fixed.yaml
@@ -391,7 +391,7 @@ dependencies:
 - stack_data=0.6.2
 - statsmodels=0.14.2
 - stopit=1.1.2
-- tabula-py=2.7.0
+- jpype1=1.5.0
 - tabulate=0.9.0
 - tbb=2021.11.0
 - tblib=3.0.0
@@ -466,3 +466,4 @@ dependencies:
   - snakemake-executor-plugin-slurm-jobstep==0.2.1
   - snakemake-storage-plugin-http==0.2.3
   - tsam==2.3.1
+  - tabula-py=2.9.3
diff --git a/envs/environment.yaml b/envs/environment.yaml
index c87de77a..9115ccd9 100644
--- a/envs/environment.yaml
+++ b/envs/environment.yaml
@@ -43,7 +43,7 @@ dependencies:
 - geopy
 - tqdm
 - pytz
-- tabula-py
+- jpype1
 - pyxlsb
 - graphviz
 - pre-commit
@@ -63,3 +63,4 @@ dependencies:
   - snakemake-executor-plugin-slurm
   - snakemake-executor-plugin-cluster-generic
   - highspy
+  - tabula-py
diff --git a/scripts/build_biomass_transport_costs.py b/scripts/build_biomass_transport_costs.py
index 085a0f00..d5650902 100644
--- a/scripts/build_biomass_transport_costs.py
+++ b/scripts/build_biomass_transport_costs.py
@@ -24,7 +24,7 @@ import tabula as tbl
 
 ENERGY_CONTENT = 4.8  # unit MWh/t (wood pellets)
 system = platform.system()
-encoding = "cp1252" if system == "Windows" else None
+encoding = "cp1252" if system == "Windows" else "utf-8"
 
 
 def get_countries():

From 55cca6a0f93308d7500e36dd6c98b672d4501d06 Mon Sep 17 00:00:00 2001
From: Bobby Xiong <36541459+bobbyxng@users.noreply.github.com>
Date: Thu, 15 Aug 2024 19:45:49 +0200
Subject: [PATCH 19/38] Bug fix: Carrier type of added supernodes in
 simplify_network need to be set to "AC" (#1221)

* Fixed simplify_network.py to handle more complex topologies, i.e. if two links are connected to nearby AC buses separated by an AC line.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Added fix for Corsica node: If region containing Corsica is modelled, include substation on Corsica as supernode (end point).

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Bug fix: Carrier type of all supernodes corrected to 'AC'

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Bug fix: Carrier type of all supernodes corrected to 'AC'

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Bug fix: Carrier type of all supernodes corrected to 'AC'

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 scripts/simplify_network.py | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index 9ab0cb23..66a5d119 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -301,19 +301,11 @@ def simplify_links(
     # Only span graph over the DC link components
     G = n.graph(branch_components=["Link"])
 
-    def split_links(nodes):
+    def split_links(nodes, added_supernodes=None):
         nodes = frozenset(nodes)
 
         seen = set()
 
-        # Corsica substation
-        node_corsica = find_closest_bus(
-            n,
-            x=9.44802,
-            y=42.52842,
-            tol=2000,  # Tolerance needed to only return the bus if the region is actually modelled
-        )
-
         # Supernodes are endpoints of links, identified by having lass then two neighbours or being an AC Bus
         # An example for the latter is if two different links are connected to the same AC bus.
         supernodes = {
@@ -322,7 +314,7 @@ def simplify_links(
             if (
                 (len(G.adj[m]) < 2 or (set(G.adj[m]) - nodes))
                 or (n.buses.loc[m, "carrier"] == "AC")
-                or (m == node_corsica)
+                or (m in added_supernodes)
             )
         }
 
@@ -359,6 +351,17 @@ def simplify_links(
         0.0, index=n.buses.index, columns=list(connection_costs_per_link)
     )
 
+    node_corsica = find_closest_bus(
+        n,
+        x=9.44802,
+        y=42.52842,
+        tol=2000,  # Tolerance needed to only return the bus if the region is actually modelled
+    )
+
+    added_supernodes = []
+    if node_corsica is not None:
+        added_supernodes.append(node_corsica)
+
     for lbl in labels.value_counts().loc[lambda s: s > 2].index:
         for b, buses, links in split_links(labels.index[labels == lbl]):
             if len(buses) <= 2:
@@ -421,6 +424,9 @@ def simplify_links(
 
     logger.debug("Collecting all components using the busmap")
 
+    # Change carrier type of all added super_nodes to "AC"
+    n.buses.loc[added_supernodes, "carrier"] = "AC"
+
     _aggregate_and_move_components(
         n,
         busmap,

From 8480d62d33bef82aee9f8300d56c1c2427451bc8 Mon Sep 17 00:00:00 2001
From: Philipp Glaum <95913147+p-glaum@users.noreply.github.com>
Date: Fri, 16 Aug 2024 13:16:52 +0200
Subject: [PATCH 20/38] fix bug in simplyfy network (#1222)

---
 scripts/simplify_network.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index 66a5d119..9db3435b 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -301,7 +301,7 @@ def simplify_links(
     # Only span graph over the DC link components
     G = n.graph(branch_components=["Link"])
 
-    def split_links(nodes, added_supernodes=None):
+    def split_links(nodes, added_supernodes):
         nodes = frozenset(nodes)
 
         seen = set()
@@ -363,7 +363,9 @@ def simplify_links(
         added_supernodes.append(node_corsica)
 
     for lbl in labels.value_counts().loc[lambda s: s > 2].index:
-        for b, buses, links in split_links(labels.index[labels == lbl]):
+        for b, buses, links in split_links(
+            labels.index[labels == lbl], added_supernodes
+        ):
             if len(buses) <= 2:
                 continue
 

From ebbb75491bbcd79876fc75d4312b9f4efb2f94ff Mon Sep 17 00:00:00 2001
From: Markus Millinger <50738187+millingermarkus@users.noreply.github.com>
Date: Fri, 16 Aug 2024 13:18:10 +0200
Subject: [PATCH 21/38] Solid biomass to hydrogen process (#1213)

* Added solid biomass to hydrogen, bioH2, process

* Add doc for new config

* add release notes

* enable bioH2 also without biomass spatially resolved

* Update doc/release_notes.rst

Co-authored-by: Fabian Neumann <fabian.neumann@outlook.de>

* Added overnight cost

---------

Co-authored-by: Fabian Neumann <fabian.neumann@outlook.de>
---
 config/config.default.yaml        |  2 ++
 doc/configtables/sector.csv       |  1 +
 doc/release_notes.rst             |  3 +++
 scripts/prepare_sector_network.py | 23 +++++++++++++++++++++++
 4 files changed, 29 insertions(+)

diff --git a/config/config.default.yaml b/config/config.default.yaml
index 14221dac..654745dc 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -654,6 +654,7 @@ sector:
   biomass_to_liquid: false
   electrobiofuels: false
   biosng: false
+  bioH2: false
   municipal_solid_waste: false
   limit_max_growth:
     enable: false
@@ -1097,6 +1098,7 @@ plotting:
     unsustainable bioliquids: '#32CD32'
     electrobiofuels: 'red'
     BioSNG: '#123456'
+    solid biomass to hydrogen: '#654321'
     # power transmission
     lines: '#6c9459'
     transmission lines: '#6c9459'
diff --git a/doc/configtables/sector.csv b/doc/configtables/sector.csv
index e0deb8ca..573805bb 100644
--- a/doc/configtables/sector.csv
+++ b/doc/configtables/sector.csv
@@ -151,6 +151,7 @@ biogas_upgrading_cc,--,"{true, false}",Add option to capture CO2 from biomass up
 conventional_generation,,,Add a more detailed description of conventional carriers. Any power generation requires the consumption of fuel from nodes representing that fuel.
 biomass_to_liquid,--,"{true, false}",Add option for transforming solid biomass into liquid fuel with the same properties as oil
 biosng,--,"{true, false}",Add option for transforming solid biomass into synthesis gas with the same properties as natural gas
+bioH2,--,"{true, false}",Add option for transforming solid biomass into hydrogen with carbon capture
 municipal_solid_waste,--,"{true, false}",Add option for municipal solid waste
 limit_max_growth,,,
 -- enable,--,"{true, false}",Add option to limit the maximum growth of a carrier
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index 892558b1..6c9a289c 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -9,6 +9,9 @@ Release Notes
 
 Upcoming Release
 ================
+
+* Add option to produce hydrogen from solid biomass (flag ``solid biomass to hydrogen``), combined with carbon capture
+
 * Fixed PDF encoding in ``build_biomass_transport_costs`` with update of tabula-py and jpype1
 
 * More modular and flexible handling of transmission projects. One can now add new transmission projects in a subfolder of `data/transmission projects` similar to the files in the template folder. After adding the new files and updating the config section `transmission_projects:`, transmission projects will be included if they are not duplicates of existing lines or other projects.
diff --git a/scripts/prepare_sector_network.py b/scripts/prepare_sector_network.py
index c73373ee..4f3e3d2b 100644
--- a/scripts/prepare_sector_network.py
+++ b/scripts/prepare_sector_network.py
@@ -2830,6 +2830,29 @@ def add_biomass(n, costs):
             marginal_cost=costs.at["BioSNG", "VOM"] * costs.at["BioSNG", "efficiency"],
         )
 
+    if options["bioH2"]:
+        name = (pd.Index(spatial.biomass.nodes) + " "
+                + pd.Index(spatial.h2.nodes.str.replace(" H2", "")))
+        n.madd(
+            "Link",
+            name,
+            suffix=" solid biomass to hydrogen CC",
+            bus0=spatial.biomass.nodes,
+            bus1=spatial.h2.nodes,
+            bus2=spatial.co2.nodes,
+            bus3="co2 atmosphere",
+            carrier="solid biomass to hydrogen",
+            efficiency=costs.at['solid biomass to hydrogen', 'efficiency'],
+            efficiency2=costs.at['solid biomass', 'CO2 intensity'] * options["cc_fraction"],
+            efficiency3=-costs.at['solid biomass', 'CO2 intensity'] * options["cc_fraction"],
+            p_nom_extendable=True,
+            capital_cost=costs.at['solid biomass to hydrogen', 'fixed'] * costs.at['solid biomass to hydrogen', 'efficiency']
+                         + costs.at['biomass CHP capture', 'fixed'] * costs.at['solid biomass', 'CO2 intensity'],
+            overnight_cost=costs.at['solid biomass to hydrogen', 'investment'] * costs.at['solid biomass to hydrogen', 'efficiency']
+                         + costs.at['biomass CHP capture', 'investment'] * costs.at['solid biomass', 'CO2 intensity'],
+            marginal_cost=0.,
+            )
+
 
 def add_industry(n, costs):
     logger.info("Add industrial demand")

From eb16d74a47df49dc719e904634c35c50ebf12afb Mon Sep 17 00:00:00 2001
From: lisazeyen <35347358+lisazeyen@users.noreply.github.com>
Date: Fri, 16 Aug 2024 13:30:01 +0200
Subject: [PATCH 22/38] increase min cluster to 38 because of corsica link

---
 config/config.default.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/config.default.yaml b/config/config.default.yaml
index 654745dc..cecd34e5 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -42,7 +42,7 @@ scenario:
   ll:
   - vopt
   clusters:
-  - 37
+  - 38
   - 128
   - 256
   opts:

From 18759053f01476f6ab2cf624d13ce3d20b3b03c0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 16 Aug 2024 11:42:18 +0000
Subject: [PATCH 23/38] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 scripts/prepare_sector_network.py | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/scripts/prepare_sector_network.py b/scripts/prepare_sector_network.py
index 23863067..71b50439 100644
--- a/scripts/prepare_sector_network.py
+++ b/scripts/prepare_sector_network.py
@@ -2830,8 +2830,11 @@ def add_biomass(n, costs):
         )
 
     if options["bioH2"]:
-        name = (pd.Index(spatial.biomass.nodes) + " "
-                + pd.Index(spatial.h2.nodes.str.replace(" H2", "")))
+        name = (
+            pd.Index(spatial.biomass.nodes)
+            + " "
+            + pd.Index(spatial.h2.nodes.str.replace(" H2", ""))
+        )
         n.madd(
             "Link",
             name,
@@ -2841,16 +2844,22 @@ def add_biomass(n, costs):
             bus2=spatial.co2.nodes,
             bus3="co2 atmosphere",
             carrier="solid biomass to hydrogen",
-            efficiency=costs.at['solid biomass to hydrogen', 'efficiency'],
-            efficiency2=costs.at['solid biomass', 'CO2 intensity'] * options["cc_fraction"],
-            efficiency3=-costs.at['solid biomass', 'CO2 intensity'] * options["cc_fraction"],
+            efficiency=costs.at["solid biomass to hydrogen", "efficiency"],
+            efficiency2=costs.at["solid biomass", "CO2 intensity"]
+            * options["cc_fraction"],
+            efficiency3=-costs.at["solid biomass", "CO2 intensity"]
+            * options["cc_fraction"],
             p_nom_extendable=True,
-            capital_cost=costs.at['solid biomass to hydrogen', 'fixed'] * costs.at['solid biomass to hydrogen', 'efficiency']
-                         + costs.at['biomass CHP capture', 'fixed'] * costs.at['solid biomass', 'CO2 intensity'],
-            overnight_cost=costs.at['solid biomass to hydrogen', 'investment'] * costs.at['solid biomass to hydrogen', 'efficiency']
-                         + costs.at['biomass CHP capture', 'investment'] * costs.at['solid biomass', 'CO2 intensity'],
-            marginal_cost=0.,
-            )
+            capital_cost=costs.at["solid biomass to hydrogen", "fixed"]
+            * costs.at["solid biomass to hydrogen", "efficiency"]
+            + costs.at["biomass CHP capture", "fixed"]
+            * costs.at["solid biomass", "CO2 intensity"],
+            overnight_cost=costs.at["solid biomass to hydrogen", "investment"]
+            * costs.at["solid biomass to hydrogen", "efficiency"]
+            + costs.at["biomass CHP capture", "investment"]
+            * costs.at["solid biomass", "CO2 intensity"],
+            marginal_cost=0.0,
+        )
 
 
 def add_industry(n, costs):

From ad100c494b4c562b79f65b75322d2eb34984d7aa Mon Sep 17 00:00:00 2001
From: lisazeyen <35347358+lisazeyen@users.noreply.github.com>
Date: Fri, 16 Aug 2024 13:57:02 +0200
Subject: [PATCH 24/38] increase CH transport data until 2022

---
 scripts/build_energy_totals.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/build_energy_totals.py b/scripts/build_energy_totals.py
index 48004ea2..a4f22220 100644
--- a/scripts/build_energy_totals.py
+++ b/scripts/build_energy_totals.py
@@ -1167,7 +1167,7 @@ def build_transport_data(
 
     if "CH" in countries:
         fn = snakemake.input.swiss_transport
-        swiss_cars = pd.read_csv(fn, index_col=0).loc[2000:2015, ["passenger cars"]]
+        swiss_cars = pd.read_csv(fn, index_col=0).loc[2000:2023, ["passenger cars"]]
 
         swiss_cars.index = pd.MultiIndex.from_product(
             [["CH"], swiss_cars.index], names=["country", "year"]

From 885a881e7824f40b109faedfbf88b46dff9f462b Mon Sep 17 00:00:00 2001
From: lisazeyen <35347358+lisazeyen@users.noreply.github.com>
Date: Fri, 16 Aug 2024 14:39:04 +0200
Subject: [PATCH 25/38] bug fix for reference year<2018 for ammonia

---
 scripts/build_industry_sector_ratios.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/build_industry_sector_ratios.py b/scripts/build_industry_sector_ratios.py
index 760dca76..530ac910 100644
--- a/scripts/build_industry_sector_ratios.py
+++ b/scripts/build_industry_sector_ratios.py
@@ -445,7 +445,7 @@ def chemicals_industry():
 
     # subtract ammonia energy demand (in ktNH3/a)
     ammonia = pd.read_csv(snakemake.input.ammonia_production, index_col=0)
-    ammonia_total = ammonia.loc[ammonia.index.intersection(eu27), str(year)].sum()
+    ammonia_total = ammonia.loc[ammonia.index.intersection(eu27), str(max(2018, year))].sum()
     df.loc["methane", sector] -= ammonia_total * params["MWh_CH4_per_tNH3_SMR"]
     df.loc["elec", sector] -= ammonia_total * params["MWh_elec_per_tNH3_SMR"]
 

From ca47becc873431da30f4770d1cf3e0e330ad50f1 Mon Sep 17 00:00:00 2001
From: Fabian <fab.hof@gmx.de>
Date: Mon, 19 Aug 2024 13:33:28 +0200
Subject: [PATCH 26/38] build_transport_data: select swiss years according to
 data selection use index.unique instead of index.levels where needed

---
 scripts/base_network.py                        |  2 +-
 scripts/build_biomass_potentials.py            |  2 +-
 scripts/build_energy_totals.py                 | 18 ++++++++----------
 ..._industrial_energy_demand_per_node_today.py |  2 +-
 scripts/build_retro_cost.py                    |  2 +-
 scripts/make_summary_perfect.py                | 12 ++++++------
 6 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/scripts/base_network.py b/scripts/base_network.py
index 78cf6984..2b0fee63 100644
--- a/scripts/base_network.py
+++ b/scripts/base_network.py
@@ -95,7 +95,7 @@ logger = logging.getLogger(__name__)
 
 def _get_oid(df):
     if "tags" in df.columns:
-        return df.tags.str.extract('"oid"=>"(\d+)"', expand=False)
+        return df.tags.str.extract(r'"oid"=>"(\d+)"', expand=False)
     else:
         return pd.Series(np.nan, df.index)
 
diff --git a/scripts/build_biomass_potentials.py b/scripts/build_biomass_potentials.py
index 0a2692e8..b69fcfbe 100644
--- a/scripts/build_biomass_potentials.py
+++ b/scripts/build_biomass_potentials.py
@@ -311,7 +311,7 @@ def add_unsustainable_potentials(df):
     # Phase out unsustainable biomass potentials linearly from 2020 to 2035 while phasing in sustainable potentials
     share_unsus = params.get("share_unsustainable_use_retained").get(investment_year)
 
-    df_wo_ch = df.drop(df.filter(regex="CH\d", axis=0).index)
+    df_wo_ch = df.drop(df.filter(regex=r"CH\d", axis=0).index)
 
     # Calculate unsustainable solid biomass
     df_wo_ch["unsustainable solid biomass"] = _calc_unsustainable_potential(
diff --git a/scripts/build_energy_totals.py b/scripts/build_energy_totals.py
index a4f22220..5aefb26b 100644
--- a/scripts/build_energy_totals.py
+++ b/scripts/build_energy_totals.py
@@ -242,7 +242,7 @@ def build_eurostat(
     temp.index = pd.MultiIndex.from_frame(
         temp.index.to_frame().fillna("International aviation")
     )
-    df = pd.concat([temp, df.loc[~int_avia]])
+    df = pd.concat([temp, df.loc[~int_avia]]).sort_index()
 
     # Fill in missing data on "Domestic aviation" for each country.
     for country in countries:
@@ -651,8 +651,8 @@ def build_energy_totals(
     """
 
     eurostat_fuels = {"electricity": "Electricity", "total": "Total all products"}
-    eurostat_countries = eurostat.index.levels[0]
-    eurostat_years = eurostat.index.levels[1]
+    eurostat_countries = eurostat.index.unique(0)
+    eurostat_years = eurostat.index.unique(1)
 
     to_drop = ["passenger cars", "passenger car efficiency"]
     new_index = pd.MultiIndex.from_product(
@@ -1153,13 +1153,14 @@ def build_transport_data(
     ----------
     - Swiss transport data: `BFS <https://www.bfs.admin.ch/bfs/en/home/statistics/mobility-transport/transport-infrastructure-vehicles/vehicles/road-vehicles-stock-level-motorisation.html>`_
     """
+    years = np.arange(2000, 2022)
 
     # first collect number of cars
     transport_data = pd.DataFrame(idees["passenger cars"])
 
     countries_without_ch = set(countries) - {"CH"}
     new_index = pd.MultiIndex.from_product(
-        [countries_without_ch, transport_data.index.levels[1]],
+        [countries_without_ch, transport_data.index.unique(1)],
         names=["country", "year"],
     )
 
@@ -1167,7 +1168,7 @@ def build_transport_data(
 
     if "CH" in countries:
         fn = snakemake.input.swiss_transport
-        swiss_cars = pd.read_csv(fn, index_col=0).loc[2000:2023, ["passenger cars"]]
+        swiss_cars = pd.read_csv(fn, index_col=0).loc[years, ["passenger cars"]]
 
         swiss_cars.index = pd.MultiIndex.from_product(
             [["CH"], swiss_cars.index], names=["country", "year"]
@@ -1175,10 +1176,8 @@ def build_transport_data(
 
         transport_data = pd.concat([transport_data, swiss_cars]).sort_index()
 
-    transport_data.rename(columns={"passenger cars": "number cars"}, inplace=True)
-
+    transport_data = transport_data.rename(columns={"passenger cars": "number cars"})
     # clean up dataframe
-    years = np.arange(2000, 2022)
     transport_data = transport_data[
         transport_data.index.get_level_values(1).isin(years)
     ]
@@ -1188,11 +1187,10 @@ def build_transport_data(
         logger.info(
             f"Missing data on cars from:\n{list(missing)}\nFilling gaps with averaged data."
         )
-
         cars_pp = transport_data["number cars"] / population
 
         fill_values = {
-            year: cars_pp.mean() * population for year in transport_data.index.levels[1]
+            year: cars_pp.mean() * population for year in transport_data.index.unique(1)
         }
         fill_values = pd.DataFrame(fill_values).stack()
         fill_values = pd.DataFrame(fill_values, columns=["number cars"])
diff --git a/scripts/build_industrial_energy_demand_per_node_today.py b/scripts/build_industrial_energy_demand_per_node_today.py
index 7a1ee7ac..aa9e9dff 100644
--- a/scripts/build_industrial_energy_demand_per_node_today.py
+++ b/scripts/build_industrial_energy_demand_per_node_today.py
@@ -66,7 +66,7 @@ def build_nodal_industrial_energy_demand():
     )
 
     countries = keys.country.unique()
-    sectors = industrial_demand.columns.levels[1]
+    sectors = industrial_demand.columns.unique(1)
 
     for country, sector in product(countries, sectors):
         buses = keys.index[keys.country == country]
diff --git a/scripts/build_retro_cost.py b/scripts/build_retro_cost.py
index 44f4a738..c6dd31cf 100755
--- a/scripts/build_retro_cost.py
+++ b/scripts/build_retro_cost.py
@@ -254,7 +254,7 @@ def prepare_building_stock_data():
         index = pd.MultiIndex.from_product([[ct], averaged_data.index.to_list()])
         averaged_data.index = index
         averaged_data["estimated"] = 1
-        if ct not in area_tot.index.levels[0]:
+        if ct not in area_tot.index.unique(0):
             area_tot = pd.concat([area_tot, averaged_data], sort=True)
         else:
             area_tot.loc[averaged_data.index] = averaged_data
diff --git a/scripts/make_summary_perfect.py b/scripts/make_summary_perfect.py
index 8e56e5d4..59aa6e74 100644
--- a/scripts/make_summary_perfect.py
+++ b/scripts/make_summary_perfect.py
@@ -40,9 +40,9 @@ def calculate_costs(n, label, costs):
     investments = n.investment_periods
     cols = pd.MultiIndex.from_product(
         [
-            costs.columns.levels[0],
-            costs.columns.levels[1],
-            costs.columns.levels[2],
+            costs.columns.unique(0),
+            costs.columns.unique(1),
+            costs.columns.unique(2),
             investments,
         ],
         names=costs.columns.names[:3] + ["year"],
@@ -339,9 +339,9 @@ def calculate_supply_energy(n, label, supply_energy):
     investments = n.investment_periods
     cols = pd.MultiIndex.from_product(
         [
-            supply_energy.columns.levels[0],
-            supply_energy.columns.levels[1],
-            supply_energy.columns.levels[2],
+            supply_energy.columns.unique(0),
+            supply_energy.columns.unique(1),
+            supply_energy.columns.unique(2),
             investments,
         ],
         names=supply_energy.columns.names[:3] + ["year"],

From 54a2d24517c140fdd9026d4b1e882865f1302d2e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 19 Aug 2024 11:39:34 +0000
Subject: [PATCH 27/38] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 scripts/build_industry_sector_ratios.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/build_industry_sector_ratios.py b/scripts/build_industry_sector_ratios.py
index 530ac910..c2438f91 100644
--- a/scripts/build_industry_sector_ratios.py
+++ b/scripts/build_industry_sector_ratios.py
@@ -445,7 +445,9 @@ def chemicals_industry():
 
     # subtract ammonia energy demand (in ktNH3/a)
     ammonia = pd.read_csv(snakemake.input.ammonia_production, index_col=0)
-    ammonia_total = ammonia.loc[ammonia.index.intersection(eu27), str(max(2018, year))].sum()
+    ammonia_total = ammonia.loc[
+        ammonia.index.intersection(eu27), str(max(2018, year))
+    ].sum()
     df.loc["methane", sector] -= ammonia_total * params["MWh_CH4_per_tNH3_SMR"]
     df.loc["elec", sector] -= ammonia_total * params["MWh_elec_per_tNH3_SMR"]
 

From 50c2fee6ce9eb320f31196354579295c2d794e8d Mon Sep 17 00:00:00 2001
From: lisazeyen <35347358+lisazeyen@users.noreply.github.com>
Date: Tue, 20 Aug 2024 10:14:59 +0200
Subject: [PATCH 28/38] avoid infinity in sector rations

---
 scripts/build_industry_sector_ratios_intermediate.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/build_industry_sector_ratios_intermediate.py b/scripts/build_industry_sector_ratios_intermediate.py
index 42e8a7dc..d4d46c39 100644
--- a/scripts/build_industry_sector_ratios_intermediate.py
+++ b/scripts/build_industry_sector_ratios_intermediate.py
@@ -80,6 +80,7 @@ Unit of the output file is MWh/t.
 """
 
 import pandas as pd
+import numpy as np
 from prepare_sector_network import get
 
 
@@ -104,7 +105,7 @@ def build_industry_sector_ratios_intermediate():
         snakemake.input.industry_sector_ratios, index_col=0
     )
 
-    today_sector_ratios = demand.div(production, axis=1)
+    today_sector_ratios = demand.div(production, axis=1).replace([np.inf, -np.inf], 0)
 
     today_sector_ratios.dropna(how="all", axis=1, inplace=True)
 

From 8b0474e41cb625442a119e0fd01b651e9524cf0e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 08:15:58 +0000
Subject: [PATCH 29/38] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 scripts/build_industry_sector_ratios_intermediate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/build_industry_sector_ratios_intermediate.py b/scripts/build_industry_sector_ratios_intermediate.py
index d4d46c39..0fbaa567 100644
--- a/scripts/build_industry_sector_ratios_intermediate.py
+++ b/scripts/build_industry_sector_ratios_intermediate.py
@@ -79,8 +79,8 @@ with the following carriers are considered:
 Unit of the output file is MWh/t.
 """
 
-import pandas as pd
 import numpy as np
+import pandas as pd
 from prepare_sector_network import get
 
 

From f4819d0b4c605ed59363263835ed88f36e144d6d Mon Sep 17 00:00:00 2001
From: lisazeyen <lisa.zeyen@web.de>
Date: Tue, 20 Aug 2024 13:51:33 +0200
Subject: [PATCH 30/38] co2 sequestration depending on period

---
 scripts/solve_network.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/scripts/solve_network.py b/scripts/solve_network.py
index a2391ea2..5f644bc9 100644
--- a/scripts/solve_network.py
+++ b/scripts/solve_network.py
@@ -43,6 +43,8 @@ from _helpers import (
     set_scenario_config,
     update_config_from_wildcards,
 )
+
+from prepare_sector_network import get
 from pypsa.descriptors import get_activity_mask
 from pypsa.descriptors import get_switchable_as_dense as get_as_dense
 
@@ -287,15 +289,19 @@ def add_solar_potential_constraints(n, config):
     n.model.add_constraints(lhs <= rhs, name="solar_potential")
 
 
-def add_co2_sequestration_limit(n, limit=200):
+def add_co2_sequestration_limit(n, limit_dict):
     """
     Add a global constraint on the amount of Mt CO2 that can be sequestered.
     """
 
     if not n.investment_periods.empty:
         periods = n.investment_periods
-        names = pd.Index([f"co2_sequestration_limit-{period}" for period in periods])
+        limit = pd.Series({f"co2_sequestration_limit-{period}":
+                           limit_dict.get(period, 200) for period in periods})
+        names = limit.index
     else:
+        limit = get(limit_dict,
+                    int(snakemake.wildcards.planning_horizons))
         periods = [np.nan]
         names = pd.Index(["co2_sequestration_limit"])
 
@@ -515,8 +521,8 @@ def prepare_network(
             n = add_max_growth(n)
 
     if n.stores.carrier.eq("co2 sequestered").any():
-        limit = co2_sequestration_potential
-        add_co2_sequestration_limit(n, limit=limit)
+        limit_dict = co2_sequestration_potential
+        add_co2_sequestration_limit(n, limit_dict=limit_dict)
 
     return n
 
@@ -1116,19 +1122,19 @@ def solve_network(n, config, solving, **kwargs):
 
     return n
 
-
+#%%
 if __name__ == "__main__":
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
         snakemake = mock_snakemake(
-            "solve_sector_network",
-            configfiles="../config/test/config.perfect.yaml",
+            "solve_sector_network_myopic",
+            # configfiles="../config/test/config.perfect.yaml",
             simpl="",
             opts="",
-            clusters="37",
-            ll="v1.0",
-            sector_opts="CO2L0-1H-T-H-B-I-A-dist1",
+            clusters="38",
+            ll="vopt",
+            sector_opts="",
             planning_horizons="2030",
         )
     configure_logging(snakemake)
@@ -1140,7 +1146,7 @@ if __name__ == "__main__":
     np.random.seed(solve_opts.get("seed", 123))
 
     n = pypsa.Network(snakemake.input.network)
-
+    
     n = prepare_network(
         n,
         solve_opts,

From dcc84dfb9f623376dfeafe518fdf8dd43770f031 Mon Sep 17 00:00:00 2001
From: lisazeyen <lisa.zeyen@web.de>
Date: Tue, 20 Aug 2024 14:02:12 +0200
Subject: [PATCH 31/38] update config

---
 config/config.default.yaml | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/config/config.default.yaml b/config/config.default.yaml
index 3d1292e3..74e672e2 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -612,7 +612,14 @@ sector:
     min_size: 3
     max_size: 25
     years_of_storage: 25
-  co2_sequestration_potential: 200
+    co2_sequestration_potential:
+      2020: 0
+      2025: 0
+      2030: 50
+      2035: 100
+      2040: 200
+      2045: 200
+      2050: 200
   co2_sequestration_cost: 10
   co2_sequestration_lifetime: 50
   co2_spatial: false

From 2682c313ba5d691cb1c232502b58e1fe4faca11c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 12:04:44 +0000
Subject: [PATCH 32/38] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 scripts/solve_network.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/scripts/solve_network.py b/scripts/solve_network.py
index 5f644bc9..b7d5665b 100644
--- a/scripts/solve_network.py
+++ b/scripts/solve_network.py
@@ -43,7 +43,6 @@ from _helpers import (
     set_scenario_config,
     update_config_from_wildcards,
 )
-
 from prepare_sector_network import get
 from pypsa.descriptors import get_activity_mask
 from pypsa.descriptors import get_switchable_as_dense as get_as_dense
@@ -296,12 +295,15 @@ def add_co2_sequestration_limit(n, limit_dict):
 
     if not n.investment_periods.empty:
         periods = n.investment_periods
-        limit = pd.Series({f"co2_sequestration_limit-{period}":
-                           limit_dict.get(period, 200) for period in periods})
+        limit = pd.Series(
+            {
+                f"co2_sequestration_limit-{period}": limit_dict.get(period, 200)
+                for period in periods
+            }
+        )
         names = limit.index
     else:
-        limit = get(limit_dict,
-                    int(snakemake.wildcards.planning_horizons))
+        limit = get(limit_dict, int(snakemake.wildcards.planning_horizons))
         periods = [np.nan]
         names = pd.Index(["co2_sequestration_limit"])
 
@@ -1122,7 +1124,8 @@ def solve_network(n, config, solving, **kwargs):
 
     return n
 
-#%%
+
+# %%
 if __name__ == "__main__":
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
@@ -1146,7 +1149,7 @@ if __name__ == "__main__":
     np.random.seed(solve_opts.get("seed", 123))
 
     n = pypsa.Network(snakemake.input.network)
-    
+
     n = prepare_network(
         n,
         solve_opts,

From 5fb89068fda761567710fe89a3c41321d02c92e5 Mon Sep 17 00:00:00 2001
From: lisazeyen <lisa.zeyen@web.de>
Date: Tue, 20 Aug 2024 15:05:30 +0200
Subject: [PATCH 33/38] right intend for co2 seq potential

---
 config/config.default.yaml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/config/config.default.yaml b/config/config.default.yaml
index 74e672e2..9d565b63 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -612,14 +612,14 @@ sector:
     min_size: 3
     max_size: 25
     years_of_storage: 25
-    co2_sequestration_potential:
-      2020: 0
-      2025: 0
-      2030: 50
-      2035: 100
-      2040: 200
-      2045: 200
-      2050: 200
+  co2_sequestration_potential:
+    2020: 0
+    2025: 0
+    2030: 50
+    2035: 100
+    2040: 200
+    2045: 200
+    2050: 200
   co2_sequestration_cost: 10
   co2_sequestration_lifetime: 50
   co2_spatial: false

From 7490647084f35127d43e253b7a6586fd1ccb2ea5 Mon Sep 17 00:00:00 2001
From: lisazeyen <lisa.zeyen@web.de>
Date: Tue, 20 Aug 2024 15:05:45 +0200
Subject: [PATCH 34/38] update release notes and doc

---
 doc/configtables/sector.csv | 350 ++++++++++++++++++------------------
 doc/release_notes.rst       |   1 +
 scripts/solve_network.py    |  10 +-
 3 files changed, 181 insertions(+), 180 deletions(-)

diff --git a/doc/configtables/sector.csv b/doc/configtables/sector.csv
index 88f56abe..88442a9e 100644
--- a/doc/configtables/sector.csv
+++ b/doc/configtables/sector.csv
@@ -1,175 +1,175 @@
-,Unit,Values,Description
-transport,--,"{true, false}",Flag to include transport sector.
-heating,--,"{true, false}",Flag to include heating sector.
-biomass,--,"{true, false}",Flag to include biomass sector.
-industry,--,"{true, false}",Flag to include industry sector.
-agriculture,--,"{true, false}",Flag to include agriculture sector.
-fossil_fuels,--,"{true, false}","Flag to include imports of fossil fuels ( [""coal"", ""gas"", ""oil"", ""lignite""])"
-district_heating,--,,`prepare_sector_network.py <https://github.com/PyPSA/pypsa-eur-sec/blob/master/scripts/prepare_sector_network.py>`_
--- potential,--,float,maximum fraction of urban demand which can be supplied by district heating
--- progress,--,Dictionary with planning horizons as keys., Increase of today's district heating demand to potential maximum district heating share. Progress = 0 means today's district heating share. Progress = 1 means maximum fraction of urban demand is supplied by district heating
--- district_heating_loss,--,float,Share increase in district heat demand in urban central due to heat losses
--- forward_temperature,°C,Dictionary with country codes as keys. One key must be 'default'.,Forward temperature in district heating
--- return_temperature,°C,Dictionary with country codes as keys. One key must be 'default'.,Return temperature in district heating. Must be lower than forward temperature
--- heat_source_cooling,K,float,Cooling of heat source for heat pumps
--- heat_pump_cop_approximation,,,
--- -- refrigerant,--,"{ammonia, isobutane}",Heat pump refrigerant assumed for COP approximation
--- -- heat_exchanger_pinch_point_temperature_difference,K,float,Heat pump pinch point temperature difference in heat exchangers assumed for approximation.
--- -- isentropic_compressor_efficiency,--,float,Isentropic efficiency of heat pump compressor assumed for approximation. Must be between 0 and 1.
--- -- heat_loss,--,float,Heat pump heat loss assumed for approximation. Must be between 0 and 1.
--- heat_pump_sources,--,,
--- -- urban central,--,List of heat sources for heat pumps in urban central heating,
--- -- urban decentral,--,List of heat sources for heat pumps in urban decentral heating,
--- -- rural,--,List of heat sources for heat pumps in rural heating,
-cluster_heat_buses,--,"{true, false}",Cluster residential and service heat buses in `prepare_sector_network.py <https://github.com/PyPSA/pypsa-eur-sec/blob/master/scripts/prepare_sector_network.py>`_  to one to save memory.
-,,,
-bev_dsm_restriction _value,--,float,Adds a lower state of charge (SOC) limit for battery electric vehicles (BEV) to manage its own energy demand (DSM). Located in `build_transport_demand.py <https://github.com/PyPSA/pypsa-eur-sec/blob/master/scripts/build_transport_demand.py>`_. Set to 0 for no restriction on BEV DSM
-bev_dsm_restriction _time,--,float,Time at which SOC of BEV has to be dsm_restriction_value
-transport_heating _deadband_upper,°C,float,"The maximum temperature in the vehicle. At higher temperatures, the energy required for cooling in the vehicle increases."
-transport_heating _deadband_lower,°C,float,"The minimum temperature in the vehicle. At lower temperatures, the energy required for heating in the vehicle increases."
-,,,
-ICE_lower_degree_factor,--,float,Share increase in energy demand in internal combustion engine (ICE) for each degree difference between the cold environment and the minimum temperature.
-ICE_upper_degree_factor,--,float,Share increase in energy demand in internal combustion engine (ICE) for each degree difference between the hot environment and the maximum temperature.
-EV_lower_degree_factor,--,float,Share increase in energy demand in electric vehicles (EV) for each degree difference between the cold environment and the minimum temperature.
-EV_upper_degree_factor,--,float,Share increase in energy demand in electric vehicles (EV) for each degree difference between the hot environment and the maximum temperature.
-bev_dsm,--,"{true, false}",Add the option for battery electric vehicles (BEV) to participate in demand-side management (DSM)
-,,,
-bev_availability,--,float,The share for battery electric vehicles (BEV) that are able to do demand side management (DSM)
-bev_energy,--,float,The average size of battery electric vehicles (BEV) in MWh
-bev_charge_efficiency,--,float,Battery electric vehicles (BEV) charge and discharge efficiency
-bev_charge_rate,MWh,float,The power consumption for one electric vehicle (EV) in MWh. Value derived from 3-phase charger with 11 kW.
-bev_avail_max,--,float,The maximum share plugged-in availability for passenger electric vehicles.
-bev_avail_mean,--,float,The average share plugged-in availability for passenger electric vehicles.
-v2g,--,"{true, false}",Allows feed-in to grid from EV battery
-land_transport_fuel_cell _share,--,Dictionary with planning horizons as keys.,The share of vehicles that uses fuel cells in a given year
-land_transport_electric _share,--,Dictionary with planning horizons as keys.,The share of vehicles that uses electric vehicles (EV) in a given year
-land_transport_ice _share,--,Dictionary with planning horizons as keys.,The share of vehicles that uses internal combustion engines (ICE) in a given year. What is not EV or FCEV is oil-fuelled ICE.
-transport_electric_efficiency,MWh/100km,float,The conversion efficiencies of electric vehicles in transport
-transport_fuel_cell_efficiency,MWh/100km,float,The H2 conversion efficiencies of fuel cells in transport
-transport_ice_efficiency,MWh/100km,float,The oil conversion efficiencies of internal combustion engine (ICE) in transport
-agriculture_machinery _electric_share,--,float,The share for agricultural machinery that uses electricity
-agriculture_machinery _oil_share,--,float,The share for agricultural machinery that uses oil
-agriculture_machinery _fuel_efficiency,--,float,The efficiency of electric-powered machinery in the conversion of electricity to meet agricultural needs.
-agriculture_machinery _electric_efficiency,--,float,The efficiency of oil-powered machinery in the conversion of oil to meet agricultural needs.
-Mwh_MeOH_per_MWh_H2,LHV,float,"The energy amount of the produced methanol per energy amount of hydrogen. From `DECHEMA (2017) <https://dechema.de/dechema_media/Downloads/Positionspapiere/Technology_study_Low_carbon_energy_and_feedstock_for_the_European_chemical_industry-p-20002750.pdf>`_, page 64."
-MWh_MeOH_per_tCO2,LHV,float,"The energy amount of the produced methanol per ton of CO2. From `DECHEMA (2017) <https://dechema.de/dechema_media/Downloads/Positionspapiere/Technology_study_Low_carbon_energy_and_feedstock_for_the_European_chemical_industry-p-20002750.pdf>`_, page 66."
-MWh_MeOH_per_MWh_e,LHV,float,"The energy amount of the produced methanol per energy amount of electricity. From `DECHEMA (2017) <https://dechema.de/dechema_media/Downloads/Positionspapiere/Technology_study_Low_carbon_energy_and_feedstock_for_the_European_chemical_industry-p-20002750.pdf>`_, page 64."
-shipping_hydrogen _liquefaction,--,"{true, false}",Whether to include liquefaction costs for hydrogen demand in shipping.
-,,,
-shipping_hydrogen_share,--,Dictionary with planning horizons as keys.,The share of ships powered by hydrogen in a given year
-shipping_methanol_share,--,Dictionary with planning horizons as keys.,The share of ships powered by methanol in a given year
-shipping_oil_share,--,Dictionary with planning horizons as keys.,The share of ships powered by oil in a given year
-shipping_methanol _efficiency,--,float,The efficiency of methanol-powered ships in the conversion of methanol to meet shipping needs (propulsion). The efficiency increase from oil can be 10-15% higher according to the `IEA <https://www.iea-amf.org/app/webroot/files/file/Annex%20Reports/AMF_Annex_56.pdf>`_
-,,,
-shipping_oil_efficiency,--,float,The efficiency of oil-powered ships in the conversion of oil to meet shipping needs (propulsion). Base value derived from 2011
-aviation_demand_factor,--,float,The proportion of demand for aviation compared to today's consumption
-HVC_demand_factor,--,float,The proportion of demand for high-value chemicals compared to today's consumption
-,,,
-time_dep_hp_cop,--,"{true, false}",Consider the time dependent coefficient of performance (COP) of the heat pump
-heat_pump_sink_T,°C,float,The temperature heat sink used in heat pumps based on DTU / large area radiators. The value is conservatively high to cover hot water and space heating in poorly-insulated buildings
-reduce_space_heat _exogenously,--,"{true, false}",Influence on space heating demand by a certain factor (applied before losses in district heating).
-reduce_space_heat _exogenously_factor,--,Dictionary with planning horizons as keys.,"A positive factor can mean renovation or demolition of a building. If the factor is negative, it can mean an increase in floor area, increased thermal comfort, population growth. The default factors are determined by the `Eurocalc Homes and buildings decarbonization scenario <http://tool.european-calculator.eu/app/buildings/building-types-area/?levers=1ddd4444421213bdbbbddd44444ffffff11f411111221111211l212221>`_"
-retrofitting,,,
--- retro_endogen,--,"{true, false}",Add retrofitting as an endogenous system which co-optimise space heat savings.
--- cost_factor,--,float,Weight costs for building renovation
--- interest_rate,--,float,The interest rate for investment in building components
--- annualise_cost,--,"{true, false}",Annualise the investment costs of retrofitting
--- tax_weighting,--,"{true, false}",Weight the costs of retrofitting depending on taxes in countries
--- construction_index,--,"{true, false}",Weight the costs of retrofitting depending on labour/material costs per country
-tes,--,"{true, false}",Add option for storing thermal energy in large water pits associated with district heating systems and individual thermal energy storage (TES)
-tes_tau,,,The time constant used to calculate the decay of thermal energy in thermal energy storage (TES): 1- :math:`e^{-1/24τ}`.
--- decentral,days,float,The time constant in decentralized thermal energy storage (TES)
--- central,days,float,The time constant in centralized thermal energy storage (TES)
-boilers,--,"{true, false}",Add option for transforming gas into heat using gas boilers
-resistive_heaters,--,"{true, false}",Add option for transforming electricity into heat using resistive heaters (independently from gas boilers)
-oil_boilers,--,"{true, false}",Add option for transforming oil into heat using boilers
-biomass_boiler,--,"{true, false}",Add option for transforming biomass into heat using boilers
-overdimension_individual_heating,--,float,Add option for overdimensioning individual heating systems by a certain factor. This allows them to cover heat demand peaks e.g. 10% higher than those in the data with a setting of 1.1.
-chp,--,"{true, false}",Add option for using Combined Heat and Power (CHP)
-micro_chp,--,"{true, false}",Add option for using Combined Heat and Power (CHP) for decentral areas.
-solar_thermal,--,"{true, false}",Add option for using solar thermal to generate heat.
-solar_cf_correction,--,float,The correction factor for the value provided by the solar thermal profile calculations
-marginal_cost_storage,currency/MWh ,float,The marginal cost of discharging batteries in distributed grids
-methanation,--,"{true, false}",Add option for transforming hydrogen and CO2 into methane using methanation.
-coal_cc,--,"{true, false}",Add option for coal CHPs with carbon capture
-dac,--,"{true, false}",Add option for Direct Air Capture (DAC)
-co2_vent,--,"{true, false}",Add option for vent out CO2 from storages to the atmosphere.
-allam_cycle,--,"{true, false}",Add option to include `Allam cycle gas power plants <https://en.wikipedia.org/wiki/Allam_power_cycle>`_
-hydrogen_fuel_cell,--,"{true, false}",Add option to include hydrogen fuel cell for re-electrification. Assuming OCGT technology costs
-hydrogen_turbine,--,"{true, false}",Add option to include hydrogen turbine for re-electrification. Assuming OCGT technology costs
-SMR,--,"{true, false}",Add option for transforming natural gas into hydrogen and CO2 using Steam Methane Reforming (SMR)
-SMR CC,--,"{true, false}",Add option for transforming natural gas into hydrogen and CO2 using Steam Methane Reforming (SMR) and Carbon Capture (CC)
-regional_methanol_demand,--,"{true, false}",Spatially resolve methanol demand. Set to true if regional CO2 constraints needed.
-regional_oil_demand,--,"{true, false}",Spatially resolve oil demand. Set to true if regional CO2 constraints needed.
-regional_co2 _sequestration_potential,,,
--- enable,--,"{true, false}",Add option for regionally-resolved geological carbon dioxide sequestration potentials based on `CO2StoP <https://setis.ec.europa.eu/european-co2-storage-database_en>`_.
--- attribute,--,string or list,Name (or list of names) of the attribute(s) for the sequestration potential
--- include_onshore,--,"{true, false}",Add options for including onshore sequestration potentials
--- min_size,Gt ,float,Any sites with lower potential than this value will be excluded
--- max_size,Gt ,float,The maximum sequestration potential for any one site.
--- years_of_storage,years,float,The years until potential exhausted at optimised annual rate
-co2_sequestration_potential,MtCO2/a,float,The potential of sequestering CO2 in Europe per year
-co2_sequestration_cost,currency/tCO2,float,The cost of sequestering a ton of CO2
-co2_sequestration_lifetime,years,int,The lifetime of a CO2 sequestration site
-co2_spatial,--,"{true, false}","Add option to spatially resolve carrier representing stored carbon dioxide. This allows for more detailed modelling of CCUTS, e.g. regarding the capturing of industrial process emissions, usage as feedstock for electrofuels, transport of carbon dioxide, and geological sequestration sites."
-,,,
-co2network,--,"{true, false}",Add option for planning a new carbon dioxide transmission network
-co2_network_cost_factor,p.u.,float,The cost factor for the capital cost of the carbon dioxide transmission network
-,,,
-cc_fraction,--,float,The default fraction of CO2 captured with post-combustion capture
-hydrogen_underground _storage,--,"{true, false}",Add options for storing hydrogen underground. Storage potential depends regionally.
-hydrogen_underground _storage_locations,,"{onshore, nearshore, offshore}","The location where hydrogen underground storage can be located. Onshore, nearshore, offshore means it must be located more than 50 km away from the sea, within 50 km of the sea, or within the sea itself respectively."
-,,,
-ammonia,--,"{true, false, regional}","Add ammonia as a carrrier. It can be either true (copperplated NH3), false (no NH3 carrier) or ""regional"" (regionalised NH3 without network)"
-min_part_load_fischer _tropsch,per unit of p_nom ,float,The minimum unit dispatch (``p_min_pu``) for the Fischer-Tropsch process
-min_part_load _methanolisation,per unit of p_nom ,float,The minimum unit dispatch (``p_min_pu``) for the methanolisation process
-,,,
-use_fischer_tropsch _waste_heat,--,"{true, false}",Add option for using waste heat of Fischer Tropsch in district heating networks
-use_fuel_cell_waste_heat,--,"{true, false}",Add option for using waste heat of fuel cells in district heating networks
-use_electrolysis_waste _heat,--,"{true, false}",Add option for using waste heat of electrolysis in district heating networks
-electricity_transmission _grid,--,"{true, false}",Switch for enabling/disabling the electricity transmission grid.
-electricity_distribution _grid,--,"{true, false}",Add a simplified representation of the exchange capacity between transmission and distribution grid level through a link.
-electricity_distribution _grid_cost_factor,,,Multiplies the investment cost of the electricity distribution grid
-,,,
-electricity_grid _connection,--,"{true, false}",Add the cost of electricity grid connection for onshore wind and solar
-transmission_efficiency,,,Section to specify transmission losses or compression energy demands of bidirectional links. Splits them into two capacity-linked unidirectional links.
--- {carrier},--,str,The carrier of the link.
--- -- efficiency_static,p.u.,float,Length-independent transmission efficiency.
--- -- efficiency_per_1000km,p.u. per 1000 km,float,Length-dependent transmission efficiency ($\eta^{\text{length}}$)
--- -- compression_per_1000km,p.u. per 1000 km,float,Length-dependent electricity demand for compression ($\eta \cdot \text{length}$) implemented as multi-link to local electricity bus.
-H2_network,--,"{true, false}",Add option for new hydrogen pipelines
-gas_network,--,"{true, false}","Add existing natural gas infrastructure, incl. LNG terminals, production and entry-points. The existing gas network is added with a lossless transport model. A length-weighted `k-edge augmentation algorithm   <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.connectivity.edge_augmentation.k_edge_augmentation.html#networkx.algorithms.connectivity.edge_augmentation.k_edge_augmentation>`_   can be run to add new candidate gas pipelines such that all regions of the model can be connected to the gas network. When activated, all the gas demands are regionally disaggregated as well."
-H2_retrofit,--,"{true, false}",Add option for retrofiting existing pipelines to transport hydrogen.
-H2_retrofit_capacity _per_CH4,--,float,"The ratio for H2 capacity per original CH4 capacity of retrofitted pipelines. The `European Hydrogen Backbone (April, 2020) p.15 <https://gasforclimate2050.eu/wp-content/uploads/2020/07/2020_European-Hydrogen-Backbone_Report.pdf>`_ 60% of original natural gas capacity could be used in cost-optimal case as H2 capacity."
-gas_network_connectivity _upgrade ,--,float,The number of desired edge connectivity (k) in the length-weighted `k-edge augmentation algorithm <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.connectivity.edge_augmentation.k_edge_augmentation.html#networkx.algorithms.connectivity.edge_augmentation.k_edge_augmentation>`_ used for the gas network
-gas_distribution_grid,--,"{true, false}",Add a gas distribution grid
-gas_distribution_grid _cost_factor,,,Multiplier for the investment cost of the gas distribution grid
-,,,
-biomass_spatial,--,"{true, false}",Add option for resolving biomass demand regionally
-biomass_transport,--,"{true, false}",Add option for transporting solid biomass between nodes
-biogas_upgrading_cc,--,"{true, false}",Add option to capture CO2 from biomass upgrading
-conventional_generation,,,Add a more detailed description of conventional carriers. Any power generation requires the consumption of fuel from nodes representing that fuel.
-biomass_to_liquid,--,"{true, false}",Add option for transforming solid biomass into liquid fuel with the same properties as oil
-biosng,--,"{true, false}",Add option for transforming solid biomass into synthesis gas with the same properties as natural gas
-bioH2,--,"{true, false}",Add option for transforming solid biomass into hydrogen with carbon capture
-municipal_solid_waste,--,"{true, false}",Add option for municipal solid waste
-limit_max_growth,,,
--- enable,--,"{true, false}",Add option to limit the maximum growth of a carrier
--- factor,p.u.,float,The maximum growth factor of a carrier (e.g. 1.3 allows  30% larger than max historic growth)
--- max_growth,,,
--- -- {carrier},GW,float,The historic maximum growth of a carrier
--- max_relative_growth,,,
--- -- {carrier},p.u.,float,The historic maximum relative growth of a carrier
-,,,
-enhanced_geothermal,,,
--- enable,--,"{true, false}",Add option to include Enhanced Geothermal Systems
--- flexible,--,"{true, false}",Add option for flexible operation (see Ricks et al. 2024)
--- max_hours,--,int,The maximum hours the reservoir can be charged under flexible operation
--- max_boost,--,float,The maximum boost in power output under flexible operation
--- var_cf,--,"{true, false}",Add option for variable capacity factor (see Ricks et al. 2024)
--- sustainability_factor,--,float,Share of sourced heat that is replenished by the earth's core (see details in `build_egs_potentials.py <https://github.com/PyPSA/pypsa-eur-sec/blob/master/scripts/build_egs_potentials.py>`_)
-solid_biomass_import,,,
--- enable,--,"{true, false}",Add option to include solid biomass imports
--- price,currency/MWh,float,Price for importing solid biomass
--- max_amount,Twh,float,Maximum solid biomass import potential
--- upstream_emissions_factor,p.u.,float,Upstream emissions of solid biomass imports
+,Unit,Values,Description
+transport,--,"{true, false}",Flag to include transport sector.
+heating,--,"{true, false}",Flag to include heating sector.
+biomass,--,"{true, false}",Flag to include biomass sector.
+industry,--,"{true, false}",Flag to include industry sector.
+agriculture,--,"{true, false}",Flag to include agriculture sector.
+fossil_fuels,--,"{true, false}","Flag to include imports of fossil fuels ( [""coal"", ""gas"", ""oil"", ""lignite""])"
+district_heating,--,,`prepare_sector_network.py <https://github.com/PyPSA/pypsa-eur-sec/blob/master/scripts/prepare_sector_network.py>`_
+-- potential,--,float,maximum fraction of urban demand which can be supplied by district heating
+-- progress,--,Dictionary with planning horizons as keys., Increase of today's district heating demand to potential maximum district heating share. Progress = 0 means today's district heating share. Progress = 1 means maximum fraction of urban demand is supplied by district heating
+-- district_heating_loss,--,float,Share increase in district heat demand in urban central due to heat losses
+-- forward_temperature,°C,Dictionary with country codes as keys. One key must be 'default'.,Forward temperature in district heating
+-- return_temperature,°C,Dictionary with country codes as keys. One key must be 'default'.,Return temperature in district heating. Must be lower than forward temperature
+-- heat_source_cooling,K,float,Cooling of heat source for heat pumps
+-- heat_pump_cop_approximation,,,
+-- -- refrigerant,--,"{ammonia, isobutane}",Heat pump refrigerant assumed for COP approximation
+-- -- heat_exchanger_pinch_point_temperature_difference,K,float,Heat pump pinch point temperature difference in heat exchangers assumed for approximation.
+-- -- isentropic_compressor_efficiency,--,float,Isentropic efficiency of heat pump compressor assumed for approximation. Must be between 0 and 1.
+-- -- heat_loss,--,float,Heat pump heat loss assumed for approximation. Must be between 0 and 1.
+-- heat_pump_sources,--,,
+-- -- urban central,--,List of heat sources for heat pumps in urban central heating,
+-- -- urban decentral,--,List of heat sources for heat pumps in urban decentral heating,
+-- -- rural,--,List of heat sources for heat pumps in rural heating,
+cluster_heat_buses,--,"{true, false}",Cluster residential and service heat buses in `prepare_sector_network.py <https://github.com/PyPSA/pypsa-eur-sec/blob/master/scripts/prepare_sector_network.py>`_  to one to save memory.
+,,,
+bev_dsm_restriction _value,--,float,Adds a lower state of charge (SOC) limit for battery electric vehicles (BEV) to manage its own energy demand (DSM). Located in `build_transport_demand.py <https://github.com/PyPSA/pypsa-eur-sec/blob/master/scripts/build_transport_demand.py>`_. Set to 0 for no restriction on BEV DSM
+bev_dsm_restriction _time,--,float,Time at which SOC of BEV has to be dsm_restriction_value
+transport_heating _deadband_upper,°C,float,"The maximum temperature in the vehicle. At higher temperatures, the energy required for cooling in the vehicle increases."
+transport_heating _deadband_lower,°C,float,"The minimum temperature in the vehicle. At lower temperatures, the energy required for heating in the vehicle increases."
+,,,
+ICE_lower_degree_factor,--,float,Share increase in energy demand in internal combustion engine (ICE) for each degree difference between the cold environment and the minimum temperature.
+ICE_upper_degree_factor,--,float,Share increase in energy demand in internal combustion engine (ICE) for each degree difference between the hot environment and the maximum temperature.
+EV_lower_degree_factor,--,float,Share increase in energy demand in electric vehicles (EV) for each degree difference between the cold environment and the minimum temperature.
+EV_upper_degree_factor,--,float,Share increase in energy demand in electric vehicles (EV) for each degree difference between the hot environment and the maximum temperature.
+bev_dsm,--,"{true, false}",Add the option for battery electric vehicles (BEV) to participate in demand-side management (DSM)
+,,,
+bev_availability,--,float,The share for battery electric vehicles (BEV) that are able to do demand side management (DSM)
+bev_energy,--,float,The average size of battery electric vehicles (BEV) in MWh
+bev_charge_efficiency,--,float,Battery electric vehicles (BEV) charge and discharge efficiency
+bev_charge_rate,MWh,float,The power consumption for one electric vehicle (EV) in MWh. Value derived from 3-phase charger with 11 kW.
+bev_avail_max,--,float,The maximum share plugged-in availability for passenger electric vehicles.
+bev_avail_mean,--,float,The average share plugged-in availability for passenger electric vehicles.
+v2g,--,"{true, false}",Allows feed-in to grid from EV battery
+land_transport_fuel_cell _share,--,Dictionary with planning horizons as keys.,The share of vehicles that uses fuel cells in a given year
+land_transport_electric _share,--,Dictionary with planning horizons as keys.,The share of vehicles that uses electric vehicles (EV) in a given year
+land_transport_ice _share,--,Dictionary with planning horizons as keys.,The share of vehicles that uses internal combustion engines (ICE) in a given year. What is not EV or FCEV is oil-fuelled ICE.
+transport_electric_efficiency,MWh/100km,float,The conversion efficiencies of electric vehicles in transport
+transport_fuel_cell_efficiency,MWh/100km,float,The H2 conversion efficiencies of fuel cells in transport
+transport_ice_efficiency,MWh/100km,float,The oil conversion efficiencies of internal combustion engine (ICE) in transport
+agriculture_machinery _electric_share,--,float,The share for agricultural machinery that uses electricity
+agriculture_machinery _oil_share,--,float,The share for agricultural machinery that uses oil
+agriculture_machinery _fuel_efficiency,--,float,The efficiency of electric-powered machinery in the conversion of electricity to meet agricultural needs.
+agriculture_machinery _electric_efficiency,--,float,The efficiency of oil-powered machinery in the conversion of oil to meet agricultural needs.
+Mwh_MeOH_per_MWh_H2,LHV,float,"The energy amount of the produced methanol per energy amount of hydrogen. From `DECHEMA (2017) <https://dechema.de/dechema_media/Downloads/Positionspapiere/Technology_study_Low_carbon_energy_and_feedstock_for_the_European_chemical_industry-p-20002750.pdf>`_, page 64."
+MWh_MeOH_per_tCO2,LHV,float,"The energy amount of the produced methanol per ton of CO2. From `DECHEMA (2017) <https://dechema.de/dechema_media/Downloads/Positionspapiere/Technology_study_Low_carbon_energy_and_feedstock_for_the_European_chemical_industry-p-20002750.pdf>`_, page 66."
+MWh_MeOH_per_MWh_e,LHV,float,"The energy amount of the produced methanol per energy amount of electricity. From `DECHEMA (2017) <https://dechema.de/dechema_media/Downloads/Positionspapiere/Technology_study_Low_carbon_energy_and_feedstock_for_the_European_chemical_industry-p-20002750.pdf>`_, page 64."
+shipping_hydrogen _liquefaction,--,"{true, false}",Whether to include liquefaction costs for hydrogen demand in shipping.
+,,,
+shipping_hydrogen_share,--,Dictionary with planning horizons as keys.,The share of ships powered by hydrogen in a given year
+shipping_methanol_share,--,Dictionary with planning horizons as keys.,The share of ships powered by methanol in a given year
+shipping_oil_share,--,Dictionary with planning horizons as keys.,The share of ships powered by oil in a given year
+shipping_methanol _efficiency,--,float,The efficiency of methanol-powered ships in the conversion of methanol to meet shipping needs (propulsion). The efficiency increase from oil can be 10-15% higher according to the `IEA <https://www.iea-amf.org/app/webroot/files/file/Annex%20Reports/AMF_Annex_56.pdf>`_
+,,,
+shipping_oil_efficiency,--,float,The efficiency of oil-powered ships in the conversion of oil to meet shipping needs (propulsion). Base value derived from 2011
+aviation_demand_factor,--,float,The proportion of demand for aviation compared to today's consumption
+HVC_demand_factor,--,float,The proportion of demand for high-value chemicals compared to today's consumption
+,,,
+time_dep_hp_cop,--,"{true, false}",Consider the time dependent coefficient of performance (COP) of the heat pump
+heat_pump_sink_T,°C,float,The temperature heat sink used in heat pumps based on DTU / large area radiators. The value is conservatively high to cover hot water and space heating in poorly-insulated buildings
+reduce_space_heat _exogenously,--,"{true, false}",Influence on space heating demand by a certain factor (applied before losses in district heating).
+reduce_space_heat _exogenously_factor,--,Dictionary with planning horizons as keys.,"A positive factor can mean renovation or demolition of a building. If the factor is negative, it can mean an increase in floor area, increased thermal comfort, population growth. The default factors are determined by the `Eurocalc Homes and buildings decarbonization scenario <http://tool.european-calculator.eu/app/buildings/building-types-area/?levers=1ddd4444421213bdbbbddd44444ffffff11f411111221111211l212221>`_"
+retrofitting,,,
+-- retro_endogen,--,"{true, false}",Add retrofitting as an endogenous system which co-optimise space heat savings.
+-- cost_factor,--,float,Weight costs for building renovation
+-- interest_rate,--,float,The interest rate for investment in building components
+-- annualise_cost,--,"{true, false}",Annualise the investment costs of retrofitting
+-- tax_weighting,--,"{true, false}",Weight the costs of retrofitting depending on taxes in countries
+-- construction_index,--,"{true, false}",Weight the costs of retrofitting depending on labour/material costs per country
+tes,--,"{true, false}",Add option for storing thermal energy in large water pits associated with district heating systems and individual thermal energy storage (TES)
+tes_tau,,,The time constant used to calculate the decay of thermal energy in thermal energy storage (TES): 1- :math:`e^{-1/24τ}`.
+-- decentral,days,float,The time constant in decentralized thermal energy storage (TES)
+-- central,days,float,The time constant in centralized thermal energy storage (TES)
+boilers,--,"{true, false}",Add option for transforming gas into heat using gas boilers
+resistive_heaters,--,"{true, false}",Add option for transforming electricity into heat using resistive heaters (independently from gas boilers)
+oil_boilers,--,"{true, false}",Add option for transforming oil into heat using boilers
+biomass_boiler,--,"{true, false}",Add option for transforming biomass into heat using boilers
+overdimension_individual_heating,--,float,Add option for overdimensioning individual heating systems by a certain factor. This allows them to cover heat demand peaks e.g. 10% higher than those in the data with a setting of 1.1.
+chp,--,"{true, false}",Add option for using Combined Heat and Power (CHP)
+micro_chp,--,"{true, false}",Add option for using Combined Heat and Power (CHP) for decentral areas.
+solar_thermal,--,"{true, false}",Add option for using solar thermal to generate heat.
+solar_cf_correction,--,float,The correction factor for the value provided by the solar thermal profile calculations
+marginal_cost_storage,currency/MWh ,float,The marginal cost of discharging batteries in distributed grids
+methanation,--,"{true, false}",Add option for transforming hydrogen and CO2 into methane using methanation.
+coal_cc,--,"{true, false}",Add option for coal CHPs with carbon capture
+dac,--,"{true, false}",Add option for Direct Air Capture (DAC)
+co2_vent,--,"{true, false}",Add option for vent out CO2 from storages to the atmosphere.
+allam_cycle,--,"{true, false}",Add option to include `Allam cycle gas power plants <https://en.wikipedia.org/wiki/Allam_power_cycle>`_
+hydrogen_fuel_cell,--,"{true, false}",Add option to include hydrogen fuel cell for re-electrification. Assuming OCGT technology costs
+hydrogen_turbine,--,"{true, false}",Add option to include hydrogen turbine for re-electrification. Assuming OCGT technology costs
+SMR,--,"{true, false}",Add option for transforming natural gas into hydrogen and CO2 using Steam Methane Reforming (SMR)
+SMR CC,--,"{true, false}",Add option for transforming natural gas into hydrogen and CO2 using Steam Methane Reforming (SMR) and Carbon Capture (CC)
+regional_methanol_demand,--,"{true, false}",Spatially resolve methanol demand. Set to true if regional CO2 constraints needed.
+regional_oil_demand,--,"{true, false}",Spatially resolve oil demand. Set to true if regional CO2 constraints needed.
+regional_co2 _sequestration_potential,,,
+-- enable,--,"{true, false}",Add option for regionally-resolved geological carbon dioxide sequestration potentials based on `CO2StoP <https://setis.ec.europa.eu/european-co2-storage-database_en>`_.
+-- attribute,--,string or list,Name (or list of names) of the attribute(s) for the sequestration potential
+-- include_onshore,--,"{true, false}",Add options for including onshore sequestration potentials
+-- min_size,Gt ,float,Any sites with lower potential than this value will be excluded
+-- max_size,Gt ,float,The maximum sequestration potential for any one site.
+-- years_of_storage,years,float,The years until potential exhausted at optimised annual rate
+co2_sequestration_potential,--,Dictionary with planning horizons as keys.,The potential of sequestering CO2 in Europe per year and investment period
+co2_sequestration_cost,currency/tCO2,float,The cost of sequestering a ton of CO2
+co2_sequestration_lifetime,years,int,The lifetime of a CO2 sequestration site
+co2_spatial,--,"{true, false}","Add option to spatially resolve carrier representing stored carbon dioxide. This allows for more detailed modelling of CCUTS, e.g. regarding the capturing of industrial process emissions, usage as feedstock for electrofuels, transport of carbon dioxide, and geological sequestration sites."
+,,,
+co2network,--,"{true, false}",Add option for planning a new carbon dioxide transmission network
+co2_network_cost_factor,p.u.,float,The cost factor for the capital cost of the carbon dioxide transmission network
+,,,
+cc_fraction,--,float,The default fraction of CO2 captured with post-combustion capture
+hydrogen_underground _storage,--,"{true, false}",Add options for storing hydrogen underground. Storage potential depends regionally.
+hydrogen_underground _storage_locations,,"{onshore, nearshore, offshore}","The location where hydrogen underground storage can be located. Onshore, nearshore, offshore means it must be located more than 50 km away from the sea, within 50 km of the sea, or within the sea itself respectively."
+,,,
+ammonia,--,"{true, false, regional}","Add ammonia as a carrrier. It can be either true (copperplated NH3), false (no NH3 carrier) or ""regional"" (regionalised NH3 without network)"
+min_part_load_fischer _tropsch,per unit of p_nom ,float,The minimum unit dispatch (``p_min_pu``) for the Fischer-Tropsch process
+min_part_load _methanolisation,per unit of p_nom ,float,The minimum unit dispatch (``p_min_pu``) for the methanolisation process
+,,,
+use_fischer_tropsch _waste_heat,--,"{true, false}",Add option for using waste heat of Fischer Tropsch in district heating networks
+use_fuel_cell_waste_heat,--,"{true, false}",Add option for using waste heat of fuel cells in district heating networks
+use_electrolysis_waste _heat,--,"{true, false}",Add option for using waste heat of electrolysis in district heating networks
+electricity_transmission _grid,--,"{true, false}",Switch for enabling/disabling the electricity transmission grid.
+electricity_distribution _grid,--,"{true, false}",Add a simplified representation of the exchange capacity between transmission and distribution grid level through a link.
+electricity_distribution _grid_cost_factor,,,Multiplies the investment cost of the electricity distribution grid
+,,,
+electricity_grid _connection,--,"{true, false}",Add the cost of electricity grid connection for onshore wind and solar
+transmission_efficiency,,,Section to specify transmission losses or compression energy demands of bidirectional links. Splits them into two capacity-linked unidirectional links.
+-- {carrier},--,str,The carrier of the link.
+-- -- efficiency_static,p.u.,float,Length-independent transmission efficiency.
+-- -- efficiency_per_1000km,p.u. per 1000 km,float,Length-dependent transmission efficiency ($\eta^{\text{length}}$)
+-- -- compression_per_1000km,p.u. per 1000 km,float,Length-dependent electricity demand for compression ($\eta \cdot \text{length}$) implemented as multi-link to local electricity bus.
+H2_network,--,"{true, false}",Add option for new hydrogen pipelines
+gas_network,--,"{true, false}","Add existing natural gas infrastructure, incl. LNG terminals, production and entry-points. The existing gas network is added with a lossless transport model. A length-weighted `k-edge augmentation algorithm   <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.connectivity.edge_augmentation.k_edge_augmentation.html#networkx.algorithms.connectivity.edge_augmentation.k_edge_augmentation>`_   can be run to add new candidate gas pipelines such that all regions of the model can be connected to the gas network. When activated, all the gas demands are regionally disaggregated as well."
+H2_retrofit,--,"{true, false}",Add option for retrofiting existing pipelines to transport hydrogen.
+H2_retrofit_capacity _per_CH4,--,float,"The ratio for H2 capacity per original CH4 capacity of retrofitted pipelines. The `European Hydrogen Backbone (April, 2020) p.15 <https://gasforclimate2050.eu/wp-content/uploads/2020/07/2020_European-Hydrogen-Backbone_Report.pdf>`_ 60% of original natural gas capacity could be used in cost-optimal case as H2 capacity."
+gas_network_connectivity _upgrade ,--,float,The number of desired edge connectivity (k) in the length-weighted `k-edge augmentation algorithm <https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.connectivity.edge_augmentation.k_edge_augmentation.html#networkx.algorithms.connectivity.edge_augmentation.k_edge_augmentation>`_ used for the gas network
+gas_distribution_grid,--,"{true, false}",Add a gas distribution grid
+gas_distribution_grid _cost_factor,,,Multiplier for the investment cost of the gas distribution grid
+,,,
+biomass_spatial,--,"{true, false}",Add option for resolving biomass demand regionally
+biomass_transport,--,"{true, false}",Add option for transporting solid biomass between nodes
+biogas_upgrading_cc,--,"{true, false}",Add option to capture CO2 from biomass upgrading
+conventional_generation,,,Add a more detailed description of conventional carriers. Any power generation requires the consumption of fuel from nodes representing that fuel.
+biomass_to_liquid,--,"{true, false}",Add option for transforming solid biomass into liquid fuel with the same properties as oil
+biosng,--,"{true, false}",Add option for transforming solid biomass into synthesis gas with the same properties as natural gas
+bioH2,--,"{true, false}",Add option for transforming solid biomass into hydrogen with carbon capture
+municipal_solid_waste,--,"{true, false}",Add option for municipal solid waste
+limit_max_growth,,,
+-- enable,--,"{true, false}",Add option to limit the maximum growth of a carrier
+-- factor,p.u.,float,The maximum growth factor of a carrier (e.g. 1.3 allows  30% larger than max historic growth)
+-- max_growth,,,
+-- -- {carrier},GW,float,The historic maximum growth of a carrier
+-- max_relative_growth,,,
+-- -- {carrier},p.u.,float,The historic maximum relative growth of a carrier
+,,,
+enhanced_geothermal,,,
+-- enable,--,"{true, false}",Add option to include Enhanced Geothermal Systems
+-- flexible,--,"{true, false}",Add option for flexible operation (see Ricks et al. 2024)
+-- max_hours,--,int,The maximum hours the reservoir can be charged under flexible operation
+-- max_boost,--,float,The maximum boost in power output under flexible operation
+-- var_cf,--,"{true, false}",Add option for variable capacity factor (see Ricks et al. 2024)
+-- sustainability_factor,--,float,Share of sourced heat that is replenished by the earth's core (see details in `build_egs_potentials.py <https://github.com/PyPSA/pypsa-eur-sec/blob/master/scripts/build_egs_potentials.py>`_)
+solid_biomass_import,,,
+-- enable,--,"{true, false}",Add option to include solid biomass imports
+-- price,currency/MWh,float,Price for importing solid biomass
+-- max_amount,Twh,float,Maximum solid biomass import potential
+-- upstream_emissions_factor,p.u.,float,Upstream emissions of solid biomass imports
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index 6fb6c51c..3ee04284 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -9,6 +9,7 @@ Release Notes
 
 Upcoming Release
 ================
+* Add investment period dependent CO2 sequestration potentials
 
 * Add option to produce hydrogen from solid biomass (flag ``solid biomass to hydrogen``), combined with carbon capture
 
diff --git a/scripts/solve_network.py b/scripts/solve_network.py
index b7d5665b..99f7b575 100644
--- a/scripts/solve_network.py
+++ b/scripts/solve_network.py
@@ -1131,14 +1131,14 @@ if __name__ == "__main__":
         from _helpers import mock_snakemake
 
         snakemake = mock_snakemake(
-            "solve_sector_network_myopic",
-            # configfiles="../config/test/config.perfect.yaml",
+            "solve_sector_network_perfect",
+            configfiles="../config/test/config.perfect.yaml",
             simpl="",
             opts="",
-            clusters="38",
-            ll="vopt",
+            clusters="5",
+            ll="v1.0",
             sector_opts="",
-            planning_horizons="2030",
+            # planning_horizons="2030",
         )
     configure_logging(snakemake)
     set_scenario_config(snakemake)

From 0c36de9bf8b72b474ccde63fb65fa0e97e18a08e Mon Sep 17 00:00:00 2001
From: Bobby Xiong <36541459+bobbyxng@users.noreply.github.com>
Date: Thu, 22 Aug 2024 15:01:20 +0200
Subject: [PATCH 35/38] Introducing OpenStreetMap high-voltage grid to
 PyPSA-Eur (#1079)

* Implemented  which uses the overpass API to download power features for individual countries.

* Extended  rule by input.

* Bug fixes and improvements to clean_osm_data.py. Added  in retrieve_osm_data.py.

* Updated clean_osm_data and retrieve_osm_data to create clean substations.

* Finished clean_osm_data function.

* Added check whether line is a circle. If so, drop it.

* Extended build_electricity.smk by build_osm_network.py

* Added build_osm_network

* Working osm-network-fast

* Bug fixes.

* Finalised and cleaned  including docstrings.

* Added try catch to retrieve_osm_data. Allows for parallelisation of downloads.

* Updated cleaning process.

* Set maximum number of threads for retrieving to 4, wrt. fair usage policy and potential request errors.

* Intermediate update on clean_osm_data.py. Added docstrings.

* Bug fix.

* Bug fix.

* Bug fixes in data types out of clean_osm_data

* Significant improvements to retrieve_osm_data, clean_osm_data. Cleaned code. Speed improvements

* Cleaned config.

* Fixes.

* Bug fixes.

* Updated default config

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Removed overpass from required packages. Not needed anymore.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Added links_relations (route = power, frequency = 0) to retrieval. This will change how HVDC links are extracted in the near future.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Work-in-progress clean_osm_data

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Added clean links output to clean_osm_data. Script uses OSM relations to retrieve clean HVDC links.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* New code for integrating HVDC links. Using relations. Base network implementation functioning.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* removed manual line dropping.

* Updated clean script

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* reverted Snakefile to default: sync settings

* added prebuilt functionality.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Updated build_electricity.smk to work with scenario management.

* removed commented-out code.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* removed commented-out code.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fixed bug in pdf export by substituting pdf export with svg.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Bug-fix Snakefile

* dropped not needed columns from build_osm_network.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Updated build_shapes, config.default and clean_osm_data.

* pre-commit changes.

* test

* Added initial prepare_osm_network_release.py script

* Finalised prepare_osm_network_release script to build clean and stable OSM base_network input files.

* Added new rules/development.smk

* Updated clean_osm_data to add substation_centroid to linestrings

* Updated clean_osm_data to add substation_centroid to linestrings

* Updated clean_osm_data to add substation_centroid to linestrings

* Updated clean_osm_data to add substation_centroid to linestrings

* Added osm-prebuilt functionality and zenodo sandbox repository.

* Updated clean_osm_data to geopandas v.1.01

* Made base_network and build_osm_network function more robust for empty links.

* Made base_network and build_osm_network function more robust for empty links.

* Bug fix in base_network. Voltage level null is now kept (relevant e.g. for Corsica)

* Merge with hcanges in upstream PR 1146. Fixing UA and MD.

* Updated Zenodo and fixed prepare_osm_network_release

* Updated osm network release.

* Updated prepare osm network release.

* Updated MD, UA scripts.

* Cleaned determine_availability_matrix_MD_UA.py, removed redundant code

* Bug fixes.

* Bug fixes for UA MD scripts.

* Rename of build script.

* Bug fix: only distribute load to buses with substation.

* Updated zenodo sandbox repository.

* Updated config.default

* Cleaned config.default.yaml: Related settings grouped together and redundant voltage settings aggregated.

* Cleaned config.default.yaml: Related settings grouped together and redundant voltage settings aggregated. Added release notes.

* Updated Zenodo repositories for OSM-prebuilt to offcial publication.

* Updated configtables

* Updated links.csv: Under_construction lines to in commission.

* Updated link 8394 and parameter_corrections: Continuation of North-Sea-Link.

* Major update: fix simplify_network, fix Corsica, updated build_osm_network to include lines overpassing nodes.

* remove config backup

* Bug fix: Carrier type of all supernodes corrected to 'AC'

* Bug fix: Carrier type of all supernodes corrected to 'AC'

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Updated rules and base_network for compatibility with TYNDP projects.

* Updated Zenodo repository and prebuilt network to include 150 kV HVDC connections.

* Removed outdated config backup.

* Implemented all comments from PR #1079. Cleaned up OSM implementation.

* Bug fix: Added all voltages, 200 kV-750 kV, to default config.

* Cleaning and bugfixes.

* Updated Zenodo repository to https://zenodo.org/records/13358976. Added converter voltages, 'underground' property for DC lines/cables, and included Konti-Skan HVDC (DK-SE). Added compatibility with https://github.com/PyPSA/pypsa-eur/pull/1079 and https://github.com/PyPSA/pypsa-eur/pull/1085

* Apply suggestions from code review

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* simplify_network: handle complicated transformer topologies

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* syntax fix

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Fabian Neumann <fabian.neumann@outlook.de>
---
 Snakefile                              |    1 +
 config/config.default.yaml             |    7 +-
 doc/configtables/electricity.csv       |    3 +-
 doc/release_notes.rst                  |    2 +
 envs/environment.yaml                  |    1 +
 rules/build_electricity.smk            |   97 +-
 rules/development.smk                  |   26 +
 rules/retrieve.smk                     |   82 ++
 scripts/base_network.py                |  305 +++-
 scripts/build_osm_network.py           |  863 +++++++++++
 scripts/clean_osm_data.py              | 1807 ++++++++++++++++++++++++
 scripts/prepare_osm_network_release.py |  146 ++
 scripts/retrieve_osm_data.py           |  152 ++
 scripts/simplify_network.py            |    9 +-
 14 files changed, 3430 insertions(+), 71 deletions(-)
 create mode 100644 rules/development.smk
 create mode 100644 scripts/build_osm_network.py
 create mode 100644 scripts/clean_osm_data.py
 create mode 100644 scripts/prepare_osm_network_release.py
 create mode 100644 scripts/retrieve_osm_data.py

diff --git a/Snakefile b/Snakefile
index be2545ad..eb99437b 100644
--- a/Snakefile
+++ b/Snakefile
@@ -54,6 +54,7 @@ include: "rules/build_sector.smk"
 include: "rules/solve_electricity.smk"
 include: "rules/postprocess.smk"
 include: "rules/validate.smk"
+include: "rules/development.smk"
 
 
 if config["foresight"] == "overnight":
diff --git a/config/config.default.yaml b/config/config.default.yaml
index 9d565b63..86689ea7 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -42,7 +42,7 @@ scenario:
   ll:
   - vopt
   clusters:
-  - 38
+  - 41
   - 128
   - 256
   opts:
@@ -74,7 +74,6 @@ enable:
   custom_busmap: false
   drop_leap_day: true
 
-
 # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#co2-budget
 co2_budget:
   2020: 0.701
@@ -87,7 +86,8 @@ co2_budget:
 
 # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#electricity
 electricity:
-  voltages: [220., 300., 380., 500., 750.]
+  voltages: [200., 220., 300., 380., 500., 750.]
+  base_network: entsoegridkit
   gaslimit_enable: false
   gaslimit: false
   co2limit_enable: false
@@ -276,6 +276,7 @@ conventional:
 # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#lines
 lines:
   types:
+    200.: "Al/St 240/40 2-bundle 200.0"
     220.: "Al/St 240/40 2-bundle 220.0"
     300.: "Al/St 240/40 3-bundle 300.0"
     380.: "Al/St 240/40 4-bundle 380.0"
diff --git a/doc/configtables/electricity.csv b/doc/configtables/electricity.csv
index ee733660..9bad7bfc 100644
--- a/doc/configtables/electricity.csv
+++ b/doc/configtables/electricity.csv
@@ -1,5 +1,6 @@
 ,Unit,Values,Description
-voltages,kV,"Any subset of {220., 300., 380.}",Voltage levels to consider
+voltages,kV,"Any subset of {200., 220., 300., 380., 500., 750.}",Voltage levels to consider
+base_network, --, "Any value in {'entsoegridkit', 'osm-prebuilt', 'osm-raw}", "Specify the underlying base network, i.e. GridKit (based on ENTSO-E web map extract, OpenStreetMap (OSM) prebuilt or raw (built from raw OSM data), takes longer."
 gaslimit_enable,bool,true or false,Add an overall absolute gas limit configured in ``electricity: gaslimit``.
 gaslimit,MWhth,float or false,Global gas usage limit
 co2limit_enable,bool,true or false,Add an overall absolute carbon-dioxide emissions limit configured in ``electricity: co2limit`` in :mod:`prepare_network`. **Warning:** This option should currently only be used with electricity-only networks, not for sector-coupled networks..
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index 3ee04284..b1345e0c 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -74,6 +74,8 @@ Upcoming Release
 
 * Enable parallelism in :mod:`determine_availability_matrix_MD_UA.py` and remove plots. This requires the use of temporary files.
 
+* Added new major feature to create the base_network from OpenStreetMap (OSM) data (PR https://github.com/PyPSA/pypsa-eur/pull/1079). Note that a heuristics based cleaning process is used for lines and links where electrical parameters are incomplete, missing, or ambiguous. Through ``electricity["base_network"]``, the base network can be set to "entsoegridkit" (original default setting, deprecated soon), "osm-prebuilt" (which downloads the latest prebuilt snapshot based on OSM data from Zenodo), or "osm-raw" which retrieves (once) and cleans the raw OSM data and subsequently builds the network. Note that this process may take a few minutes.
+
 * Updated pre-built `weather data cutouts
   <https://zenodo.org/records/12791128>`__. These are now merged cutouts with
   solar irradiation from the new SARAH-3 dataset while taking all other
diff --git a/envs/environment.yaml b/envs/environment.yaml
index 9115ccd9..98cf858d 100644
--- a/envs/environment.yaml
+++ b/envs/environment.yaml
@@ -47,6 +47,7 @@ dependencies:
 - pyxlsb
 - graphviz
 - pre-commit
+- geojson
 
   # Keep in conda environment when calling ipython
 - ipython
diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk
index f3505091..4446ef76 100644
--- a/rules/build_electricity.smk
+++ b/rules/build_electricity.smk
@@ -50,6 +50,19 @@ rule build_powerplants:
         "../scripts/build_powerplants.py"
 
 
+def input_base_network(w):
+    base_network = config_provider("electricity", "base_network")(w)
+    components = {"buses", "lines", "links", "converters", "transformers"}
+    if base_network == "osm-raw":
+        inputs = {c: resources(f"osm-raw/build/{c}.csv") for c in components}
+    else:
+        inputs = {c: f"data/{base_network}/{c}.csv" for c in components}
+    if base_network == "entsoegridkit":
+        inputs["parameter_corrections"] = "data/parameter_corrections.yaml"
+        inputs["links_p_nom"] = "data/links_p_nom.csv"
+    return inputs
+
+
 rule base_network:
     params:
         countries=config_provider("countries"),
@@ -58,13 +71,7 @@ rule base_network:
         lines=config_provider("lines"),
         transformers=config_provider("transformers"),
     input:
-        eg_buses="data/entsoegridkit/buses.csv",
-        eg_lines="data/entsoegridkit/lines.csv",
-        eg_links="data/entsoegridkit/links.csv",
-        eg_converters="data/entsoegridkit/converters.csv",
-        eg_transformers="data/entsoegridkit/transformers.csv",
-        parameter_corrections="data/parameter_corrections.yaml",
-        links_p_nom="data/links_p_nom.csv",
+        unpack(input_base_network),
         country_shapes=resources("country_shapes.geojson"),
         offshore_shapes=resources("offshore_shapes.geojson"),
         europe_shape=resources("europe_shape.geojson"),
@@ -629,3 +636,79 @@ rule prepare_network:
         "../envs/environment.yaml"
     script:
         "../scripts/prepare_network.py"
+
+
+if config["electricity"]["base_network"] == "osm-raw":
+
+    rule clean_osm_data:
+        input:
+            cables_way=expand(
+                "data/osm-raw/{country}/cables_way.json",
+                country=config_provider("countries"),
+            ),
+            lines_way=expand(
+                "data/osm-raw/{country}/lines_way.json",
+                country=config_provider("countries"),
+            ),
+            links_relation=expand(
+                "data/osm-raw/{country}/links_relation.json",
+                country=config_provider("countries"),
+            ),
+            substations_way=expand(
+                "data/osm-raw/{country}/substations_way.json",
+                country=config_provider("countries"),
+            ),
+            substations_relation=expand(
+                "data/osm-raw/{country}/substations_relation.json",
+                country=config_provider("countries"),
+            ),
+            offshore_shapes=resources("offshore_shapes.geojson"),
+            country_shapes=resources("country_shapes.geojson"),
+        output:
+            substations=resources("osm-raw/clean/substations.geojson"),
+            substations_polygon=resources("osm-raw/clean/substations_polygon.geojson"),
+            lines=resources("osm-raw/clean/lines.geojson"),
+            links=resources("osm-raw/clean/links.geojson"),
+        log:
+            logs("clean_osm_data.log"),
+        benchmark:
+            benchmarks("clean_osm_data")
+        threads: 1
+        resources:
+            mem_mb=4000,
+        conda:
+            "../envs/environment.yaml"
+        script:
+            "../scripts/clean_osm_data.py"
+
+
+if config["electricity"]["base_network"] == "osm-raw":
+
+    rule build_osm_network:
+        input:
+            substations=resources("osm-raw/clean/substations.geojson"),
+            lines=resources("osm-raw/clean/lines.geojson"),
+            links=resources("osm-raw/clean/links.geojson"),
+            country_shapes=resources("country_shapes.geojson"),
+        output:
+            lines=resources("osm-raw/build/lines.csv"),
+            links=resources("osm-raw/build/links.csv"),
+            converters=resources("osm-raw/build/converters.csv"),
+            transformers=resources("osm-raw/build/transformers.csv"),
+            substations=resources("osm-raw/build/buses.csv"),
+            lines_geojson=resources("osm-raw/build/geojson/lines.geojson"),
+            links_geojson=resources("osm-raw/build/geojson/links.geojson"),
+            converters_geojson=resources("osm-raw/build/geojson/converters.geojson"),
+            transformers_geojson=resources("osm-raw/build/geojson/transformers.geojson"),
+            substations_geojson=resources("osm-raw/build/geojson/buses.geojson"),
+        log:
+            logs("build_osm_network.log"),
+        benchmark:
+            benchmarks("build_osm_network")
+        threads: 1
+        resources:
+            mem_mb=4000,
+        conda:
+            "../envs/environment.yaml"
+        script:
+            "../scripts/build_osm_network.py"
diff --git a/rules/development.smk b/rules/development.smk
new file mode 100644
index 00000000..46549025
--- /dev/null
+++ b/rules/development.smk
@@ -0,0 +1,26 @@
+# SPDX-FileCopyrightText: : 2023-2024 The PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: MIT
+
+if config["electricity"]["base_network"] == "osm-raw":
+
+    rule prepare_osm_network_release:
+        input:
+            base_network=resources("networks/base.nc"),
+        output:
+            buses=resources("osm-raw/release/buses.csv"),
+            converters=resources("osm-raw/release/converters.csv"),
+            lines=resources("osm-raw/release/lines.csv"),
+            links=resources("osm-raw/release/links.csv"),
+            transformers=resources("osm-raw/release/transformers.csv"),
+        log:
+            logs("prepare_osm_network_release.log"),
+        benchmark:
+            benchmarks("prepare_osm_network_release")
+        threads: 1
+        resources:
+            mem_mb=1000,
+        conda:
+            "../envs/environment.yaml"
+        script:
+            "../scripts/prepare_osm_network_release.py"
diff --git a/rules/retrieve.smk b/rules/retrieve.smk
index de84b2fa..c30696cc 100644
--- a/rules/retrieve.smk
+++ b/rules/retrieve.smk
@@ -390,3 +390,85 @@ if config["enable"]["retrieve"]:
             "../envs/retrieve.yaml"
         script:
             "../scripts/retrieve_monthly_fuel_prices.py"
+
+
+if config["enable"]["retrieve"] and (
+    config["electricity"]["base_network"] == "osm-prebuilt"
+):
+
+    rule retrieve_osm_prebuilt:
+        input:
+            buses=storage("https://zenodo.org/records/13358976/files/buses.csv"),
+            converters=storage(
+                "https://zenodo.org/records/13358976/files/converters.csv"
+            ),
+            lines=storage("https://zenodo.org/records/13358976/files/lines.csv"),
+            links=storage("https://zenodo.org/records/13358976/files/links.csv"),
+            transformers=storage(
+                "https://zenodo.org/records/13358976/files/transformers.csv"
+            ),
+        output:
+            buses="data/osm-prebuilt/buses.csv",
+            converters="data/osm-prebuilt/converters.csv",
+            lines="data/osm-prebuilt/lines.csv",
+            links="data/osm-prebuilt/links.csv",
+            transformers="data/osm-prebuilt/transformers.csv",
+        log:
+            "logs/retrieve_osm_prebuilt.log",
+        threads: 1
+        resources:
+            mem_mb=500,
+        retries: 2
+        run:
+            for key in input.keys():
+                move(input[key], output[key])
+                validate_checksum(output[key], input[key])
+
+
+
+if config["enable"]["retrieve"] and (
+    config["electricity"]["base_network"] == "osm-raw"
+):
+
+    rule retrieve_osm_data:
+        output:
+            cables_way="data/osm-raw/{country}/cables_way.json",
+            lines_way="data/osm-raw/{country}/lines_way.json",
+            links_relation="data/osm-raw/{country}/links_relation.json",
+            substations_way="data/osm-raw/{country}/substations_way.json",
+            substations_relation="data/osm-raw/{country}/substations_relation.json",
+        log:
+            "logs/retrieve_osm_data_{country}.log",
+        threads: 1
+        conda:
+            "../envs/retrieve.yaml"
+        script:
+            "../scripts/retrieve_osm_data.py"
+
+
+if config["enable"]["retrieve"] and (
+    config["electricity"]["base_network"] == "osm-raw"
+):
+
+    rule retrieve_osm_data_all:
+        input:
+            expand(
+                "data/osm-raw/{country}/cables_way.json",
+                country=config_provider("countries"),
+            ),
+            expand(
+                "data/osm-raw/{country}/lines_way.json",
+                country=config_provider("countries"),
+            ),
+            expand(
+                "data/osm-raw/{country}/links_relation.json",
+                country=config_provider("countries"),
+            ),
+            expand(
+                "data/osm-raw/{country}/substations_way.json",
+                country=config_provider("countries"),
+            ),
+            expand(
+                "data/osm-raw/{country}/substations_relation.json",
+                country=config_provider("countries"),
+            ),
diff --git a/scripts/base_network.py b/scripts/base_network.py
index 2b0fee63..afb66387 100644
--- a/scripts/base_network.py
+++ b/scripts/base_network.py
@@ -5,7 +5,11 @@
 
 # coding: utf-8
 """
-Creates the network topology from an `ENTSO-E map extract <https://github.com/PyPSA/GridKit/tree/master/entsoe>`_ (March 2022) as a PyPSA network.
+Creates the network topology from a `ENTSO-E map extract.
+<https://github.com/PyPSA/GridKit/tree/master/entsoe>`_ (March 2022)
+or `OpenStreetMap data <https://www.openstreetmap.org/>`_ (Aug 2024)
+as a PyPSA
+network.
 
 Relevant Settings
 -----------------
@@ -131,45 +135,50 @@ def _find_closest_links(links, new_links, distance_upper_bound=1.5):
     )
 
 
-def _load_buses_from_eg(eg_buses, europe_shape, config_elec):
+def _load_buses(buses, europe_shape, config):
     buses = (
         pd.read_csv(
-            eg_buses,
+            buses,
             quotechar="'",
             true_values=["t"],
             false_values=["f"],
             dtype=dict(bus_id="str"),
         )
         .set_index("bus_id")
-        .drop(["station_id"], axis=1)
         .rename(columns=dict(voltage="v_nom"))
     )
 
+    if "station_id" in buses.columns:
+        buses.drop("station_id", axis=1, inplace=True)
+
     buses["carrier"] = buses.pop("dc").map({True: "DC", False: "AC"})
     buses["under_construction"] = buses.under_construction.where(
         lambda s: s.notnull(), False
     ).astype(bool)
-
-    # remove all buses outside of all countries including exclusive economic zones (offshore)
     europe_shape = gpd.read_file(europe_shape).loc[0, "geometry"]
     europe_shape_prepped = shapely.prepared.prep(europe_shape)
     buses_in_europe_b = buses[["x", "y"]].apply(
         lambda p: europe_shape_prepped.contains(Point(p)), axis=1
     )
 
+    v_nom_min = min(config["electricity"]["voltages"])
+    v_nom_max = max(config["electricity"]["voltages"])
+
     buses_with_v_nom_to_keep_b = (
-        buses.v_nom.isin(config_elec["voltages"]) | buses.v_nom.isnull()
-    )
-    logger.info(
-        f'Removing buses with voltages {pd.Index(buses.v_nom.unique()).dropna().difference(config_elec["voltages"])}'
+        (v_nom_min <= buses.v_nom) & (buses.v_nom <= v_nom_max)
+        | (buses.v_nom.isnull())
+        | (
+            buses.carrier == "DC"
+        )  # Keeping all DC buses from the input dataset independent of voltage (e.g. 150 kV connections)
     )
 
+    logger.info(f"Removing buses outside of range AC {v_nom_min} - {v_nom_max} V")
     return pd.DataFrame(buses.loc[buses_in_europe_b & buses_with_v_nom_to_keep_b])
 
 
-def _load_transformers_from_eg(buses, eg_transformers):
+def _load_transformers(buses, transformers):
     transformers = pd.read_csv(
-        eg_transformers,
+        transformers,
         quotechar="'",
         true_values=["t"],
         false_values=["f"],
@@ -181,9 +190,9 @@ def _load_transformers_from_eg(buses, eg_transformers):
     return transformers
 
 
-def _load_converters_from_eg(buses, eg_converters):
+def _load_converters_from_eg(buses, converters):
     converters = pd.read_csv(
-        eg_converters,
+        converters,
         quotechar="'",
         true_values=["t"],
         false_values=["f"],
@@ -197,9 +206,25 @@ def _load_converters_from_eg(buses, eg_converters):
     return converters
 
 
-def _load_links_from_eg(buses, eg_links):
+def _load_converters_from_osm(buses, converters):
+    converters = pd.read_csv(
+        converters,
+        quotechar="'",
+        true_values=["t"],
+        false_values=["f"],
+        dtype=dict(converter_id="str", bus0="str", bus1="str"),
+    ).set_index("converter_id")
+
+    converters = _remove_dangling_branches(converters, buses)
+
+    converters["carrier"] = ""
+
+    return converters
+
+
+def _load_links_from_eg(buses, links):
     links = pd.read_csv(
-        eg_links,
+        links,
         quotechar="'",
         true_values=["t"],
         false_values=["f"],
@@ -208,7 +233,7 @@ def _load_links_from_eg(buses, eg_links):
 
     links["length"] /= 1e3
 
-    # Skagerrak Link is connected to 132kV bus which is removed in _load_buses_from_eg.
+    # Skagerrak Link is connected to 132kV bus which is removed in _load_buses.
     # Connect to neighboring 380kV bus
     links.loc[links.bus1 == "6396", "bus1"] = "6398"
 
@@ -220,10 +245,35 @@ def _load_links_from_eg(buses, eg_links):
     return links
 
 
-def _load_lines_from_eg(buses, eg_lines):
+def _load_links_from_osm(buses, links):
+    links = pd.read_csv(
+        links,
+        quotechar="'",
+        true_values=["t"],
+        false_values=["f"],
+        dtype=dict(
+            link_id="str",
+            bus0="str",
+            bus1="str",
+            voltage="int",
+            p_nom="float",
+        ),
+    ).set_index("link_id")
+
+    links["length"] /= 1e3
+
+    links = _remove_dangling_branches(links, buses)
+
+    # Add DC line parameters
+    links["carrier"] = "DC"
+
+    return links
+
+
+def _load_lines(buses, lines):
     lines = (
         pd.read_csv(
-            eg_lines,
+            lines,
             quotechar="'",
             true_values=["t"],
             false_values=["f"],
@@ -240,6 +290,7 @@ def _load_lines_from_eg(buses, eg_lines):
     )
 
     lines["length"] /= 1e3
+
     lines["carrier"] = "AC"
     lines = _remove_dangling_branches(lines, buses)
 
@@ -290,7 +341,7 @@ def _reconnect_crimea(lines):
     return pd.concat([lines, lines_to_crimea])
 
 
-def _set_electrical_parameters_lines(lines, config):
+def _set_electrical_parameters_lines_eg(lines, config):
     v_noms = config["electricity"]["voltages"]
     linetypes = config["lines"]["types"]
 
@@ -302,16 +353,36 @@ def _set_electrical_parameters_lines(lines, config):
     return lines
 
 
+def _set_electrical_parameters_lines_osm(lines, config):
+    if lines.empty:
+        lines["type"] = []
+        return lines
+
+    v_noms = config["electricity"]["voltages"]
+    linetypes = _get_linetypes_config(config["lines"]["types"], v_noms)
+
+    lines["carrier"] = "AC"
+    lines["dc"] = False
+
+    lines.loc[:, "type"] = lines.v_nom.apply(
+        lambda x: _get_linetype_by_voltage(x, linetypes)
+    )
+
+    lines["s_max_pu"] = config["lines"]["s_max_pu"]
+
+    return lines
+
+
 def _set_lines_s_nom_from_linetypes(n):
     n.lines["s_nom"] = (
         np.sqrt(3)
         * n.lines["type"].map(n.line_types.i_nom)
         * n.lines["v_nom"]
-        * n.lines.num_parallel
+        * n.lines["num_parallel"]
     )
 
 
-def _set_electrical_parameters_links(links, config, links_p_nom):
+def _set_electrical_parameters_links_eg(links, config, links_p_nom):
     if links.empty:
         return links
 
@@ -343,12 +414,27 @@ def _set_electrical_parameters_links(links, config, links_p_nom):
     return links
 
 
+def _set_electrical_parameters_links_osm(links, config):
+    if links.empty:
+        return links
+
+    p_max_pu = config["links"].get("p_max_pu", 1.0)
+    links["p_max_pu"] = p_max_pu
+    links["p_min_pu"] = -p_max_pu
+    links["carrier"] = "DC"
+    links["dc"] = True
+
+    return links
+
+
 def _set_electrical_parameters_converters(converters, config):
     p_max_pu = config["links"].get("p_max_pu", 1.0)
     converters["p_max_pu"] = p_max_pu
     converters["p_min_pu"] = -p_max_pu
 
-    converters["p_nom"] = 2000
+    # if column "p_nom" does not exist, set to 2000
+    if "p_nom" not in converters:
+        converters["p_nom"] = 2000
 
     # Converters are combined with links
     converters["under_construction"] = False
@@ -463,7 +549,7 @@ def _set_countries_and_substations(n, config, country_shapes, offshore_shapes):
     buses["substation_lv"] = (
         lv_b & onshore_b & (~buses["under_construction"]) & has_connections_b
     )
-    buses["substation_off"] = ((hv_b & offshore_b) | (hv_b & onshore_b)) & (
+    buses["substation_off"] = (offshore_b | (hv_b & onshore_b)) & (
         ~buses["under_construction"]
     )
 
@@ -617,11 +703,11 @@ def _set_shapes(n, country_shapes, offshore_shapes):
 
 
 def base_network(
-    eg_buses,
-    eg_converters,
-    eg_transformers,
-    eg_lines,
-    eg_links,
+    buses,
+    converters,
+    transformers,
+    lines,
+    links,
     links_p_nom,
     europe_shape,
     country_shapes,
@@ -629,29 +715,59 @@ def base_network(
     parameter_corrections,
     config,
 ):
-    buses = _load_buses_from_eg(eg_buses, europe_shape, config["electricity"])
 
-    links = _load_links_from_eg(buses, eg_links)
+    base_network = config["electricity"].get("base_network")
+    assert base_network in {
+        "entsoegridkit",
+        "osm-raw",
+        "osm-prebuilt",
+    }, f"base_network must be either 'entsoegridkit', 'osm-raw' or 'osm-prebuilt', but got '{base_network}'"
+    if base_network == "entsoegridkit":
+        warnings.warn(
+            "The 'entsoegridkit' base network is deprecated and will be removed in future versions. Please use 'osm-raw' or 'osm-prebuilt' instead.",
+            DeprecationWarning,
+        )
 
-    converters = _load_converters_from_eg(buses, eg_converters)
+    logger.info(f"Creating base network using {base_network}.")
 
-    lines = _load_lines_from_eg(buses, eg_lines)
-    transformers = _load_transformers_from_eg(buses, eg_transformers)
+    buses = _load_buses(buses, europe_shape, config)
+    transformers = _load_transformers(buses, transformers)
+    lines = _load_lines(buses, lines)
 
-    if config["lines"].get("reconnect_crimea", True) and "UA" in config["countries"]:
-        lines = _reconnect_crimea(lines)
+    if base_network == "entsoegridkit":
+        links = _load_links_from_eg(buses, links)
+        converters = _load_converters_from_eg(buses, converters)
 
-    lines = _set_electrical_parameters_lines(lines, config)
+        # Optionally reconnect Crimea
+        if (config["lines"].get("reconnect_crimea", True)) & (
+            "UA" in config["countries"]
+        ):
+            lines = _reconnect_crimea(lines)
+
+        # Set electrical parameters of lines and links
+        lines = _set_electrical_parameters_lines_eg(lines, config)
+        links = _set_electrical_parameters_links_eg(links, config, links_p_nom)
+    elif base_network in {"osm-prebuilt", "osm-raw"}:
+        links = _load_links_from_osm(buses, links)
+        converters = _load_converters_from_osm(buses, converters)
+
+        # Set electrical parameters of lines and links
+        lines = _set_electrical_parameters_lines_osm(lines, config)
+        links = _set_electrical_parameters_links_osm(links, config)
+    else:
+        raise ValueError(
+            "base_network must be either 'entsoegridkit', 'osm-raw', or 'osm-prebuilt'"
+        )
+
+    # Set electrical parameters of transformers and converters
     transformers = _set_electrical_parameters_transformers(transformers, config)
-    links = _set_electrical_parameters_links(links, config, links_p_nom)
     converters = _set_electrical_parameters_converters(converters, config)
 
     n = pypsa.Network()
-    n.name = "PyPSA-Eur"
+    n.name = f"PyPSA-Eur ({base_network})"
 
     time = get_snapshots(snakemake.params.snapshots, snakemake.params.drop_leap_day)
     n.set_snapshots(time)
-    n.madd("Carrier", ["AC", "DC"])
 
     n.import_components_from_dataframe(buses, "Bus")
     n.import_components_from_dataframe(lines, "Line")
@@ -660,8 +776,8 @@ def base_network(
     n.import_components_from_dataframe(converters, "Link")
 
     _set_lines_s_nom_from_linetypes(n)
-
-    _apply_parameter_corrections(n, parameter_corrections)
+    if config["electricity"].get("base_network") == "entsoegridkit":
+        _apply_parameter_corrections(n, parameter_corrections)
 
     n = _remove_unconnected_components(n)
 
@@ -675,9 +791,64 @@ def base_network(
 
     _set_shapes(n, country_shapes, offshore_shapes)
 
+    # Add carriers if they are present in buses.carriers
+    carriers_in_buses = set(n.buses.carrier.dropna().unique())
+    carriers = carriers_in_buses.intersection({"AC", "DC"})
+
+    if carriers:
+        n.madd("Carrier", carriers)
+
     return n
 
 
+def _get_linetypes_config(line_types, voltages):
+    """
+    Return the dictionary of linetypes for selected voltages. The dictionary is
+    a subset of the dictionary line_types, whose keys match the selected
+    voltages.
+
+    Parameters
+    ----------
+    line_types : dict
+        Dictionary of linetypes: keys are nominal voltages and values are linetypes.
+    voltages : list
+        List of selected voltages.
+
+    Returns
+    -------
+        Dictionary of linetypes for selected voltages.
+    """
+    # get voltages value that are not available in the line types
+    vnoms_diff = set(voltages).symmetric_difference(set(line_types.keys()))
+    if vnoms_diff:
+        logger.warning(
+            f"Voltages {vnoms_diff} not in the {line_types} or {voltages} list."
+        )
+    return {k: v for k, v in line_types.items() if k in voltages}
+
+
+def _get_linetype_by_voltage(v_nom, d_linetypes):
+    """
+    Return the linetype of a specific line based on its voltage v_nom.
+
+    Parameters
+    ----------
+    v_nom : float
+        The voltage of the line.
+    d_linetypes : dict
+        Dictionary of linetypes: keys are nominal voltages and values are linetypes.
+
+    Returns
+    -------
+        The linetype of the line whose nominal voltage is closest to the line voltage.
+    """
+    v_nom_min, line_type_min = min(
+        d_linetypes.items(),
+        key=lambda x: abs(x[0] - v_nom),
+    )
+    return line_type_min
+
+
 def voronoi(points, outline, crs=4326):
     """
     Create Voronoi polygons from a set of points within an outline.
@@ -793,25 +964,47 @@ if __name__ == "__main__":
     configure_logging(snakemake)
     set_scenario_config(snakemake)
 
+    countries = snakemake.params.countries
+
+    buses = snakemake.input.buses
+    converters = snakemake.input.converters
+    transformers = snakemake.input.transformers
+    lines = snakemake.input.lines
+    links = snakemake.input.links
+    europe_shape = snakemake.input.europe_shape
+    country_shapes = snakemake.input.country_shapes
+    offshore_shapes = snakemake.input.offshore_shapes
+    config = snakemake.config
+
+    if "links_p_nom" in snakemake.input.keys():
+        links_p_nom = snakemake.input.links_p_nom
+    else:
+        links_p_nom = None
+
+    if "parameter_corrections" in snakemake.input.keys():
+        parameter_corrections = snakemake.input.parameter_corrections
+    else:
+        parameter_corrections = None
+
     n = base_network(
-        snakemake.input.eg_buses,
-        snakemake.input.eg_converters,
-        snakemake.input.eg_transformers,
-        snakemake.input.eg_lines,
-        snakemake.input.eg_links,
-        snakemake.input.links_p_nom,
-        snakemake.input.europe_shape,
-        snakemake.input.country_shapes,
-        snakemake.input.offshore_shapes,
-        snakemake.input.parameter_corrections,
-        snakemake.config,
+        buses,
+        converters,
+        transformers,
+        lines,
+        links,
+        links_p_nom,
+        europe_shape,
+        country_shapes,
+        offshore_shapes,
+        parameter_corrections,
+        config,
     )
 
     onshore_regions, offshore_regions, shapes, offshore_shapes = build_bus_shapes(
         n,
-        snakemake.input.country_shapes,
-        snakemake.input.offshore_shapes,
-        snakemake.params.countries,
+        country_shapes,
+        offshore_shapes,
+        countries,
     )
 
     shapes.to_file(snakemake.output.regions_onshore)
diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py
new file mode 100644
index 00000000..83461a98
--- /dev/null
+++ b/scripts/build_osm_network.py
@@ -0,0 +1,863 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur and PyPSA-Earth Authors
+#
+# SPDX-License-Identifier: MIT
+
+import logging
+import os
+import string
+
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+from _helpers import configure_logging, set_scenario_config
+from shapely.geometry import LineString, Point
+from shapely.ops import linemerge, nearest_points, split
+from tqdm import tqdm
+
+logger = logging.getLogger(__name__)
+
+
+GEO_CRS = "EPSG:4326"
+DISTANCE_CRS = "EPSG:3035"
+BUS_TOL = (
+    5000  # unit: meters, default 5000 - Buses within this distance are grouped together
+)
+LINES_COLUMNS = [
+    "bus0",
+    "bus1",
+    "voltage",
+    "circuits",
+    "length",
+    "underground",
+    "under_construction",
+    "geometry",
+]
+LINKS_COLUMNS = [
+    "bus0",
+    "bus1",
+    "voltage",
+    "p_nom",
+    "length",
+    "underground",
+    "under_construction",
+    "geometry",
+]
+TRANSFORMERS_COLUMNS = [
+    "bus0",
+    "bus1",
+    "voltage_bus0",
+    "voltage_bus1",
+    "country",
+    "geometry",
+]
+
+
+def line_endings_to_bus_conversion(lines):
+    """
+    Converts line endings to bus connections.
+
+    This function takes a df of lines and converts the line endings to bus
+    connections. It performs the necessary operations to ensure that the line
+    endings are properly connected to the buses in the network.
+
+    Parameters:
+    lines (DataFrame)
+
+    Returns:
+    lines (DataFrame)
+    """
+    # Assign to every line a start and end point
+
+    lines["bounds"] = lines["geometry"].boundary  # create start and end point
+
+    lines["bus_0_coors"] = lines["bounds"].map(lambda p: p.geoms[0])
+    lines["bus_1_coors"] = lines["bounds"].map(lambda p: p.geoms[-1])
+
+    # splits into coordinates
+    lines["bus0_lon"] = lines["bus_0_coors"].x
+    lines["bus0_lat"] = lines["bus_0_coors"].y
+    lines["bus1_lon"] = lines["bus_1_coors"].x
+    lines["bus1_lat"] = lines["bus_1_coors"].y
+
+    return lines
+
+
+# tol in m
+def set_substations_ids(buses, distance_crs, tol=5000):
+    """
+    Function to set substations ids to buses, accounting for location
+    tolerance.
+
+    The algorithm is as follows:
+
+    1. initialize all substation ids to -1
+    2. if the current substation has been already visited [substation_id < 0], then skip the calculation
+    3. otherwise:
+        1. identify the substations within the specified tolerance (tol)
+        2. when all the substations in tolerance have substation_id < 0, then specify a new substation_id
+        3. otherwise, if one of the substation in tolerance has a substation_id >= 0, then set that substation_id to all the others;
+           in case of multiple substations with substation_ids >= 0, the first value is picked for all
+    """
+
+    buses["station_id"] = -1
+
+    # create temporary series to execute distance calculations using m as reference distances
+    temp_bus_geom = buses.geometry.to_crs(distance_crs)
+
+    # set tqdm options for substation ids
+    tqdm_kwargs_substation_ids = dict(
+        ascii=False,
+        unit=" buses",
+        total=buses.shape[0],
+        desc="Set substation ids ",
+    )
+
+    station_id = 0
+    for i, row in tqdm(buses.iterrows(), **tqdm_kwargs_substation_ids):
+        if buses.loc[i, "station_id"] >= 0:
+            continue
+
+        # get substations within tolerance
+        close_nodes = np.flatnonzero(
+            temp_bus_geom.distance(temp_bus_geom.loc[i]) <= tol
+        )
+
+        if len(close_nodes) == 1:
+            # if only one substation is in tolerance, then the substation is the current one iì
+            # Note that the node cannot be with substation_id >= 0, given the preliminary check
+            # at the beginning of the for loop
+            buses.loc[buses.index[i], "station_id"] = station_id
+            # update station id
+            station_id += 1
+        else:
+            # several substations in tolerance
+            # get their ids
+            subset_substation_ids = buses.loc[buses.index[close_nodes], "station_id"]
+            # check if all substation_ids are negative (<0)
+            all_neg = subset_substation_ids.max() < 0
+            # check if at least a substation_id is negative (<0)
+            some_neg = subset_substation_ids.min() < 0
+
+            if all_neg:
+                # when all substation_ids are negative, then this is a new substation id
+                # set the current station_id and increment the counter
+                buses.loc[buses.index[close_nodes], "station_id"] = station_id
+                station_id += 1
+            elif some_neg:
+                # otherwise, when at least a substation_id is non-negative, then pick the first value
+                # and set it to all the other substations within tolerance
+                sub_id = -1
+                for substation_id in subset_substation_ids:
+                    if substation_id >= 0:
+                        sub_id = substation_id
+                        break
+                buses.loc[buses.index[close_nodes], "station_id"] = sub_id
+
+
+def set_lines_ids(lines, buses, distance_crs):
+    """
+    Function to set line buses ids to the closest bus in the list.
+    """
+    # set tqdm options for set lines ids
+    tqdm_kwargs_line_ids = dict(
+        ascii=False,
+        unit=" lines",
+        total=lines.shape[0],
+        desc="Set line/link bus ids ",
+    )
+
+    # initialization
+    lines["bus0"] = -1
+    lines["bus1"] = -1
+
+    busesepsg = buses.to_crs(distance_crs)
+    linesepsg = lines.to_crs(distance_crs)
+
+    for i, row in tqdm(linesepsg.iterrows(), **tqdm_kwargs_line_ids):
+        # select buses having the voltage level of the current line
+        buses_sel = busesepsg[
+            (buses["voltage"] == row["voltage"]) & (buses["dc"] == row["dc"])
+        ]
+
+        # find the closest node of the bus0 of the line
+        bus0_id = buses_sel.geometry.distance(row.geometry.boundary.geoms[0]).idxmin()
+        lines.loc[i, "bus0"] = buses.loc[bus0_id, "bus_id"]
+
+        # check if the line starts exactly in the node, otherwise modify the linestring
+        distance_bus0 = busesepsg.geometry.loc[bus0_id].distance(
+            row.geometry.boundary.geoms[0]
+        )
+
+        if distance_bus0 > 0:
+            # the line does not start in the node, thus modify the linestring
+            line_start_point = lines.geometry.loc[i].boundary.geoms[0]
+            new_segment = LineString([buses.geometry.loc[bus0_id], line_start_point])
+            modified_line = linemerge([new_segment, lines.geometry.loc[i]])
+            lines.loc[i, "geometry"] = modified_line
+
+        # find the closest node of the bus1 of the line
+        bus1_id = buses_sel.geometry.distance(row.geometry.boundary.geoms[1]).idxmin()
+        lines.loc[i, "bus1"] = buses.loc[bus1_id, "bus_id"]
+
+        # check if the line ends exactly in the node, otherwise modify the linestring
+        distance_bus1 = busesepsg.geometry.loc[bus1_id].distance(
+            row.geometry.boundary.geoms[1]
+        )
+
+        if distance_bus1 > 0:
+            # the line does not start in the node, thus modify the linestring
+            line_end_point = lines.geometry.loc[i].boundary.geoms[1]
+            new_segment = LineString([line_end_point, buses.geometry.loc[bus1_id]])
+            modified_line = linemerge([lines.geometry.loc[i], new_segment])
+            lines.loc[i, "geometry"] = modified_line
+
+    return lines, buses
+
+
+def merge_stations_same_station_id(
+    buses, delta_lon=0.001, delta_lat=0.001, precision=4
+):
+    """
+    Function to merge buses with same voltage and station_id This function
+    iterates over all substation ids and creates a bus_id for every substation
+    and voltage level.
+
+    Therefore, a substation with multiple voltage levels is represented
+    with different buses, one per voltage level
+    """
+    # initialize list of cleaned buses
+    buses_clean = []
+
+    # initialize the number of buses
+    n_buses = 0
+
+    for g_name, g_value in buses.groupby(by="station_id"):
+        # average location of the buses having the same station_id
+        station_point_x = np.round(g_value.geometry.x.mean(), precision)
+        station_point_y = np.round(g_value.geometry.y.mean(), precision)
+        # is_dclink_boundary_point = any(g_value["is_dclink_boundary_point"])
+
+        # loop for every voltage level in the bus
+        # The location of the buses is averaged; in the case of multiple voltage levels for the same station_id,
+        # each bus corresponding to a voltage level and each polatity is located at a distance regulated by delta_lon/delta_lat
+        v_it = 0
+        for v_name, bus_row in g_value.groupby(by=["voltage", "dc"]):
+            lon_bus = np.round(station_point_x + v_it * delta_lon, precision)
+            lat_bus = np.round(station_point_y + v_it * delta_lat, precision)
+
+            bus_data = [
+                n_buses,  # "bus_id"
+                g_name,  # "station_id"
+                v_name[0],  # "voltage"
+                bus_row["dc"].all(),  # "dc"
+                "|".join(bus_row["symbol"].unique()),  # "symbol"
+                bus_row["under_construction"].any(),  # "under_construction"
+                "|".join(bus_row["tag_substation"].unique()),  # "tag_substation"
+                bus_row["tag_area"].sum(),  # "tag_area"
+                lon_bus,  # "lon"
+                lat_bus,  # "lat"
+                bus_row["country"].iloc[0],  # "country"
+                Point(lon_bus, lat_bus),  # "geometry"
+            ]
+
+            # add the bus
+            buses_clean.append(bus_data)
+
+            # increase counters
+            v_it += 1
+            n_buses += 1
+
+    # names of the columns
+    buses_clean_columns = [
+        "bus_id",
+        "station_id",
+        "voltage",
+        "dc",
+        "symbol",
+        "under_construction",
+        "tag_substation",
+        "tag_area",
+        "x",
+        "y",
+        "country",
+        # "is_dclink_boundary_point",
+        "geometry",
+    ]
+
+    gdf_buses_clean = gpd.GeoDataFrame(
+        buses_clean, columns=buses_clean_columns
+    ).set_crs(crs=buses.crs, inplace=True)
+
+    return gdf_buses_clean
+
+
+def get_ac_frequency(df, fr_col="tag_frequency"):
+    """
+    # Function to define a default frequency value.
+
+    Attempts to find the most usual non-zero frequency across the
+    dataframe; 50 Hz is assumed as a back-up value
+    """
+
+    # Initialize a default frequency value
+    ac_freq_default = 50
+
+    grid_freq_levels = df[fr_col].value_counts(sort=True, dropna=True)
+    if not grid_freq_levels.empty:
+        # AC lines frequency shouldn't be 0Hz
+        ac_freq_levels = grid_freq_levels.loc[
+            grid_freq_levels.index.get_level_values(0) != "0"
+        ]
+        ac_freq_default = ac_freq_levels.index.get_level_values(0)[0]
+
+    return ac_freq_default
+
+
+def get_transformers(buses, lines):
+    """
+    Function to create fake transformer lines that connect buses of the same
+    station_id at different voltage.
+    """
+
+    ac_freq = get_ac_frequency(lines)
+    df_transformers = []
+
+    # Transformers should be added between AC buses only
+    buses_ac = buses[buses["dc"] != True]
+
+    for g_name, g_value in buses_ac.sort_values("voltage", ascending=True).groupby(
+        by="station_id"
+    ):
+        # note: by construction there cannot be more that two buses with the same station_id and same voltage
+        n_voltages = len(g_value)
+
+        if n_voltages > 1:
+            for id in range(n_voltages - 1):
+                # when g_value has more than one node, it means that there are multiple voltages for the same bus
+                transformer_geometry = LineString(
+                    [g_value.geometry.iloc[id], g_value.geometry.iloc[id + 1]]
+                )
+
+                transformer_data = [
+                    f"transf_{g_name}_{id}",  # "line_id"
+                    g_value["bus_id"].iloc[id],  # "bus0"
+                    g_value["bus_id"].iloc[id + 1],  # "bus1"
+                    g_value.voltage.iloc[id],  # "voltage_bus0"
+                    g_value.voltage.iloc[id + 1],  # "voltage_bus0"
+                    g_value.country.iloc[id],  # "country"
+                    transformer_geometry,  # "geometry"
+                ]
+
+                df_transformers.append(transformer_data)
+
+    # name of the columns
+    transformers_columns = [
+        "transformer_id",
+        "bus0",
+        "bus1",
+        "voltage_bus0",
+        "voltage_bus1",
+        "country",
+        "geometry",
+    ]
+
+    df_transformers = gpd.GeoDataFrame(df_transformers, columns=transformers_columns)
+    if not df_transformers.empty:
+        init_index = 0 if lines.empty else lines.index[-1] + 1
+        df_transformers.set_index(init_index + df_transformers.index, inplace=True)
+    # update line endings
+    df_transformers = line_endings_to_bus_conversion(df_transformers)
+    df_transformers.drop(columns=["bounds", "bus_0_coors", "bus_1_coors"], inplace=True)
+
+    gdf_transformers = gpd.GeoDataFrame(df_transformers)
+    gdf_transformers.crs = GEO_CRS
+
+    return gdf_transformers
+
+
+def _find_closest_bus(row, buses, distance_crs, tol=5000):
+    """
+    Find the closest bus to a given bus based on geographical distance and
+    country.
+
+    Parameters:
+    - row: The bus_id of the bus to find the closest bus for.
+    - buses: A GeoDataFrame containing information about all the buses.
+    - distance_crs: The coordinate reference system to use for distance calculations.
+    - tol: The tolerance distance within which a bus is considered closest (default: 5000).
+    Returns:
+    - closest_bus_id: The bus_id of the closest bus, or None if no bus is found within the distance and same country.
+    """
+    gdf_buses = buses.copy()
+    gdf_buses = gdf_buses.to_crs(distance_crs)
+    # Get the geometry of the bus with bus_id = link_bus_id
+    bus = gdf_buses[gdf_buses["bus_id"] == row]
+    bus_geom = bus.geometry.values[0]
+
+    gdf_buses_filtered = gdf_buses[gdf_buses["dc"] == False]
+
+    # Find the closest point in the filtered buses
+    nearest_geom = nearest_points(bus_geom, gdf_buses_filtered.union_all())[1]
+
+    # Get the bus_id of the closest bus
+    closest_bus = gdf_buses_filtered.loc[gdf_buses["geometry"] == nearest_geom]
+
+    # check if closest_bus_id is within the distance
+    within_distance = (
+        closest_bus.to_crs(distance_crs).distance(bus.to_crs(distance_crs), align=False)
+    ).values[0] <= tol
+
+    in_same_country = closest_bus.country.values[0] == bus.country.values[0]
+
+    if within_distance and in_same_country:
+        closest_bus_id = closest_bus.bus_id.values[0]
+    else:
+        closest_bus_id = None
+
+    return closest_bus_id
+
+
+def _get_converters(buses, links, distance_crs):
+    """
+    Get the converters for the given buses and links. Connecting link endings
+    to closest AC bus.
+
+    Parameters:
+    - buses (pandas.DataFrame): DataFrame containing information about buses.
+    - links (pandas.DataFrame): DataFrame containing information about links.
+    Returns:
+    - gdf_converters (geopandas.GeoDataFrame): GeoDataFrame containing information about converters.
+    """
+    converters = []
+    for idx, row in links.iterrows():
+        for conv in range(2):
+            link_end = row[f"bus{conv}"]
+            # HVDC Gotland is connected to 130 kV grid, closest HVAC bus is further away
+
+            closest_bus = _find_closest_bus(link_end, buses, distance_crs, tol=40000)
+
+            if closest_bus is None:
+                continue
+
+            converter_id = f"converter/{row['link_id']}_{conv}"
+            converter_geometry = LineString(
+                [
+                    buses[buses["bus_id"] == link_end].geometry.values[0],
+                    buses[buses["bus_id"] == closest_bus].geometry.values[0],
+                ]
+            )
+
+            logger.info(
+                f"Added converter #{conv+1}/2 for link {row['link_id']}:{converter_id}."
+            )
+
+            converter_data = [
+                converter_id,  # "line_id"
+                link_end,  # "bus0"
+                closest_bus,  # "bus1"
+                row["voltage"],  # "voltage"
+                row["p_nom"],  # "p_nom"
+                False,  # "underground"
+                False,  # "under_construction"
+                buses[buses["bus_id"] == closest_bus].country.values[0],  # "country"
+                converter_geometry,  # "geometry"
+            ]
+
+            # Create the converter
+            converters.append(converter_data)
+
+    conv_columns = [
+        "converter_id",
+        "bus0",
+        "bus1",
+        "voltage",
+        "p_nom",
+        "underground",
+        "under_construction",
+        "country",
+        "geometry",
+    ]
+
+    gdf_converters = gpd.GeoDataFrame(
+        converters, columns=conv_columns, crs=GEO_CRS
+    ).reset_index()
+
+    return gdf_converters
+
+
+def connect_stations_same_station_id(lines, buses):
+    """
+    Function to create fake links between substations with the same
+    substation_id.
+    """
+    ac_freq = get_ac_frequency(lines)
+    station_id_list = buses.station_id.unique()
+
+    add_lines = []
+    from shapely.geometry import LineString
+
+    for s_id in station_id_list:
+        buses_station_id = buses[buses.station_id == s_id]
+
+        if len(buses_station_id) > 1:
+            for b_it in range(1, len(buses_station_id)):
+                line_geometry = LineString(
+                    [
+                        buses_station_id.geometry.iloc[0],
+                        buses_station_id.geometry.iloc[b_it],
+                    ]
+                )
+                line_bounds = line_geometry.bounds
+
+                line_data = [
+                    f"link{buses_station_id}_{b_it}",  # "line_id"
+                    buses_station_id.index[0],  # "bus0"
+                    buses_station_id.index[b_it],  # "bus1"
+                    400000,  # "voltage"
+                    1,  # "circuits"
+                    0.0,  # "length"
+                    False,  # "underground"
+                    False,  # "under_construction"
+                    "transmission",  # "tag_type"
+                    ac_freq,  # "tag_frequency"
+                    buses_station_id.country.iloc[0],  # "country"
+                    line_geometry,  # "geometry"
+                    line_bounds,  # "bounds"
+                    buses_station_id.geometry.iloc[0],  # "bus_0_coors"
+                    buses_station_id.geometry.iloc[b_it],  # "bus_1_coors"
+                    buses_station_id.lon.iloc[0],  # "bus0_lon"
+                    buses_station_id.lat.iloc[0],  # "bus0_lat"
+                    buses_station_id.lon.iloc[b_it],  # "bus1_lon"
+                    buses_station_id.lat.iloc[b_it],  # "bus1_lat"
+                ]
+
+                add_lines.append(line_data)
+
+    # name of the columns
+    add_lines_columns = [
+        "line_id",
+        "bus0",
+        "bus1",
+        "voltage",
+        "circuits",
+        "length",
+        "underground",
+        "under_construction",
+        "tag_type",
+        "tag_frequency",
+        "country",
+        "geometry",
+        "bounds",
+        "bus_0_coors",
+        "bus_1_coors",
+        "bus0_lon",
+        "bus0_lat",
+        "bus1_lon",
+        "bus1_lat",
+    ]
+
+    df_add_lines = gpd.GeoDataFrame(pd.concat(add_lines), columns=add_lines_columns)
+    lines = pd.concat([lines, df_add_lines], ignore_index=True)
+
+    return lines
+
+
+def set_lv_substations(buses):
+    """
+    Function to set what nodes are lv, thereby setting substation_lv The
+    current methodology is to set lv nodes to buses where multiple voltage
+    level are found, hence when the station_id is duplicated.
+    """
+    # initialize column substation_lv to true
+    buses["substation_lv"] = True
+
+    # For each station number with multiple buses make lowest voltage `substation_lv = TRUE`
+    bus_with_stations_duplicates = buses[
+        buses.station_id.duplicated(keep=False)
+    ].sort_values(by=["station_id", "voltage"])
+    lv_bus_at_station_duplicates = (
+        buses[buses.station_id.duplicated(keep=False)]
+        .sort_values(by=["station_id", "voltage"])
+        .drop_duplicates(subset=["station_id"])
+    )
+    # Set all buses with station duplicates "False"
+    buses.loc[bus_with_stations_duplicates.index, "substation_lv"] = False
+    # Set lv_buses with station duplicates "True"
+    buses.loc[lv_bus_at_station_duplicates.index, "substation_lv"] = True
+
+    return buses
+
+
+def merge_stations_lines_by_station_id_and_voltage(
+    lines, links, buses, distance_crs, tol=5000
+):
+    """
+    Function to merge close stations and adapt the line datasets to adhere to
+    the merged dataset.
+    """
+
+    logger.info(" - Setting substation ids with tolerance of %.2f m" % (tol))
+
+    # bus types (AC != DC)
+    buses_ac = buses[buses["dc"] == False].reset_index()
+    buses_dc = buses[buses["dc"] == True].reset_index()
+
+    # set_substations_ids(buses, distance_crs, tol=tol)
+    set_substations_ids(buses_ac, distance_crs, tol=tol)
+    set_substations_ids(buses_dc, distance_crs, tol=tol)
+
+    logger.info(" - Merging substations with the same id")
+
+    # merge buses with same station id and voltage
+    if not buses.empty:
+        buses_ac = merge_stations_same_station_id(buses_ac)
+        buses_dc = merge_stations_same_station_id(buses_dc)
+        buses_dc["bus_id"] = buses_ac["bus_id"].max() + buses_dc["bus_id"] + 1
+        buses = pd.concat([buses_ac, buses_dc], ignore_index=True)
+        set_substations_ids(buses, distance_crs, tol=tol)
+
+    logger.info(" - Specifying the bus ids of the line endings")
+
+    # set the bus ids to the line dataset
+    lines, buses = set_lines_ids(lines, buses, distance_crs)
+    links, buses = set_lines_ids(links, buses, distance_crs)
+
+    # drop lines starting and ending in the same node
+    lines.drop(lines[lines["bus0"] == lines["bus1"]].index, inplace=True)
+    links.drop(links[links["bus0"] == links["bus1"]].index, inplace=True)
+    # update line endings
+    lines = line_endings_to_bus_conversion(lines)
+    links = line_endings_to_bus_conversion(links)
+
+    # set substation_lv
+    set_lv_substations(buses)
+
+    # reset index
+    lines.reset_index(drop=True, inplace=True)
+    links.reset_index(drop=True, inplace=True)
+
+    return lines, links, buses
+
+
+def _split_linestring_by_point(linestring, points):
+    """
+    Function to split a linestring geometry by multiple inner points.
+
+    Parameters
+    ----------
+    lstring : LineString
+        Linestring of the line to be split
+    points : list
+        List of points to split the linestring
+
+    Return
+    ------
+    list_lines : list
+        List of linestring to split the line
+    """
+
+    list_linestrings = [linestring]
+
+    for p in points:
+        # execute split to all lines and store results
+        temp_list = [split(l, p) for l in list_linestrings]
+        # nest all geometries
+        list_linestrings = [lstring for tval in temp_list for lstring in tval.geoms]
+
+    return list_linestrings
+
+
+def fix_overpassing_lines(lines, buses, distance_crs, tol=1):
+    """
+    Fix overpassing lines by splitting them at nodes within a given tolerance,
+    to include the buses being overpassed.
+
+    Parameters:
+    - lines (GeoDataFrame): The lines to be fixed.
+    - buses (GeoDataFrame): The buses representing nodes.
+    - distance_crs (str): The coordinate reference system (CRS) for distance calculations.
+    - tol (float): The tolerance distance in meters for determining if a bus is within a line.
+    Returns:
+    - lines (GeoDataFrame): The fixed lines.
+    - buses (GeoDataFrame): The buses representing nodes.
+    """
+
+    lines_to_add = []  # list of lines to be added
+    lines_to_split = []  # list of lines that have been split
+
+    lines_epsgmod = lines.to_crs(distance_crs)
+    buses_epsgmod = buses.to_crs(distance_crs)
+
+    # set tqdm options for substation ids
+    tqdm_kwargs_substation_ids = dict(
+        ascii=False,
+        unit=" lines",
+        total=lines.shape[0],
+        desc="Verify lines overpassing nodes ",
+    )
+
+    for l in tqdm(lines.index, **tqdm_kwargs_substation_ids):
+        # bus indices being within tolerance from the line
+        bus_in_tol_epsg = buses_epsgmod[
+            buses_epsgmod.geometry.distance(lines_epsgmod.geometry.loc[l]) <= tol
+        ]
+
+        # Get boundary points
+        endpoint0 = lines_epsgmod.geometry.loc[l].boundary.geoms[0]
+        endpoint1 = lines_epsgmod.geometry.loc[l].boundary.geoms[1]
+
+        # Calculate distances
+        dist_to_ep0 = bus_in_tol_epsg.geometry.distance(endpoint0)
+        dist_to_ep1 = bus_in_tol_epsg.geometry.distance(endpoint1)
+
+        # exclude endings of the lines
+        bus_in_tol_epsg = bus_in_tol_epsg[(dist_to_ep0 > tol) | (dist_to_ep1 > tol)]
+
+        if not bus_in_tol_epsg.empty:
+            # add index of line to split
+            lines_to_split.append(l)
+
+            buses_locs = buses.geometry.loc[bus_in_tol_epsg.index]
+
+            # get new line geometries
+            new_geometries = _split_linestring_by_point(lines.geometry[l], buses_locs)
+            n_geoms = len(new_geometries)
+
+            # create temporary copies of the line
+            df_append = gpd.GeoDataFrame([lines.loc[l]] * n_geoms)
+            # update geometries
+            df_append["geometry"] = new_geometries
+            # update name of the line if there are multiple line segments
+            df_append["line_id"] = [
+                str(df_append["line_id"].iloc[0])
+                + (f"-{letter}" if n_geoms > 1 else "")
+                for letter in string.ascii_lowercase[:n_geoms]
+            ]
+
+            lines_to_add.append(df_append)
+
+    if not lines_to_add:
+        return lines, buses
+
+    df_to_add = gpd.GeoDataFrame(pd.concat(lines_to_add, ignore_index=True))
+    df_to_add.set_crs(lines.crs, inplace=True)
+    df_to_add.set_index(lines.index[-1] + df_to_add.index, inplace=True)
+
+    # update length
+    df_to_add["length"] = df_to_add.to_crs(distance_crs).geometry.length
+
+    # update line endings
+    df_to_add = line_endings_to_bus_conversion(df_to_add)
+
+    # remove original lines
+    lines.drop(lines_to_split, inplace=True)
+
+    lines = df_to_add if lines.empty else pd.concat([lines, df_to_add])
+
+    lines = gpd.GeoDataFrame(lines.reset_index(drop=True), crs=lines.crs)
+
+    return lines, buses
+
+
+def build_network(inputs, outputs):
+
+    logger.info("Reading input data.")
+    buses = gpd.read_file(inputs["substations"])
+    lines = gpd.read_file(inputs["lines"])
+    links = gpd.read_file(inputs["links"])
+
+    lines = line_endings_to_bus_conversion(lines)
+    links = line_endings_to_bus_conversion(links)
+
+    logger.info(
+        "Fixing lines overpassing nodes: Connecting nodes and splittling lines."
+    )
+    lines, buses = fix_overpassing_lines(lines, buses, DISTANCE_CRS, tol=1)
+
+    # Merge buses with same voltage and within tolerance
+    logger.info(f"Aggregating close substations with a tolerance of {BUS_TOL} m")
+
+    lines, links, buses = merge_stations_lines_by_station_id_and_voltage(
+        lines, links, buses, DISTANCE_CRS, BUS_TOL
+    )
+
+    # Recalculate lengths of lines
+    utm = lines.estimate_utm_crs(datum_name="WGS 84")
+    lines["length"] = lines.to_crs(utm).length
+    links["length"] = links.to_crs(utm).length
+
+    transformers = get_transformers(buses, lines)
+    converters = _get_converters(buses, links, DISTANCE_CRS)
+
+    logger.info("Saving outputs")
+
+    ### Convert output to pypsa-eur friendly format
+    # Rename "substation" in buses["symbol"] to "Substation"
+    buses["symbol"] = buses["symbol"].replace({"substation": "Substation"})
+
+    # Drop unnecessary index column and set respective element ids as index
+    lines.set_index("line_id", inplace=True)
+    if not links.empty:
+        links.set_index("link_id", inplace=True)
+    converters.set_index("converter_id", inplace=True)
+    transformers.set_index("transformer_id", inplace=True)
+    buses.set_index("bus_id", inplace=True)
+
+    # Convert voltages from V to kV
+    lines["voltage"] = lines["voltage"] / 1000
+    if not links.empty:
+        links["voltage"] = links["voltage"] / 1000
+    if not converters.empty:
+        converters["voltage"] = converters["voltage"] / 1000
+    transformers["voltage_bus0"], transformers["voltage_bus1"] = (
+        transformers["voltage_bus0"] / 1000,
+        transformers["voltage_bus1"] / 1000,
+    )
+    buses["voltage"] = buses["voltage"] / 1000
+
+    # Convert 'true' and 'false' to 't' and 'f'
+    lines = lines.replace({True: "t", False: "f"})
+    links = links.replace({True: "t", False: "f"})
+    converters = converters.replace({True: "t", False: "f"})
+    buses = buses.replace({True: "t", False: "f"})
+
+    # Change column orders
+    lines = lines[LINES_COLUMNS]
+    if not links.empty:
+        links = links[LINKS_COLUMNS]
+    else:
+        links = pd.DataFrame(columns=["link_id"] + LINKS_COLUMNS)
+        links.set_index("link_id", inplace=True)
+    transformers = transformers[TRANSFORMERS_COLUMNS]
+
+    # Export to csv for base_network
+    buses.to_csv(outputs["substations"], quotechar="'")
+    lines.to_csv(outputs["lines"], quotechar="'")
+    links.to_csv(outputs["links"], quotechar="'")
+    converters.to_csv(outputs["converters"], quotechar="'")
+    transformers.to_csv(outputs["transformers"], quotechar="'")
+
+    # Export to GeoJSON for quick validations
+    buses.to_file(outputs["substations_geojson"])
+    lines.to_file(outputs["lines_geojson"])
+    links.to_file(outputs["links_geojson"])
+    converters.to_file(outputs["converters_geojson"])
+    transformers.to_file(outputs["transformers_geojson"])
+
+    return None
+
+
+if __name__ == "__main__":
+    # Detect running outside of snakemake and mock snakemake for testing
+    if "snakemake" not in globals():
+        from _helpers import mock_snakemake
+
+        snakemake = mock_snakemake("build_osm_network")
+
+    configure_logging(snakemake)
+    set_scenario_config(snakemake)
+
+    countries = snakemake.config["countries"]
+
+    build_network(snakemake.input, snakemake.output)
diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py
new file mode 100644
index 00000000..8669d0af
--- /dev/null
+++ b/scripts/clean_osm_data.py
@@ -0,0 +1,1807 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: MIT
+"""
+This script is used to clean OpenStreetMap (OSM) data for creating a PyPSA-Eur
+ready network.
+
+The script performs various cleaning operations on the OSM data, including:
+- Cleaning voltage, circuits, cables, wires, and frequency columns
+- Splitting semicolon-separated cells into new rows
+- Distributing values to circuits based on the number of splits
+- Adding line endings to substations based on line data
+"""
+
+import json
+import logging
+import os
+import re
+
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+from _helpers import configure_logging, set_scenario_config
+from shapely.geometry import LineString, MultiLineString, Point, Polygon
+from shapely.ops import linemerge, unary_union
+
+logger = logging.getLogger(__name__)
+
+
+def _create_linestring(row):
+    """
+    Create a LineString object from the given row.
+
+    Args:
+        row (dict): A dictionary containing the row data.
+
+    Returns:
+        LineString: A LineString object representing the geometry.
+    """
+    coords = [(coord["lon"], coord["lat"]) for coord in row["geometry"]]
+    return LineString(coords)
+
+
+def _create_polygon(row):
+    """
+    Create a Shapely Polygon from a list of coordinate dictionaries.
+
+    Parameters:
+        coords (list): List of dictionaries with 'lat' and 'lon' keys
+        representing coordinates.
+
+    Returns:
+        shapely.geometry.Polygon: The constructed polygon object.
+    """
+    # Extract coordinates as tuples
+    point_coords = [(coord["lon"], coord["lat"]) for coord in row["geometry"]]
+
+    # Ensure closure by repeating the first coordinate as the last coordinate
+    if point_coords[0] != point_coords[-1]:
+        point_coords.append(point_coords[0])
+
+    # Create Polygon object
+    polygon = Polygon(point_coords)
+
+    return polygon
+
+
+def _find_closest_polygon(gdf, point):
+    """
+    Find the closest polygon in a GeoDataFrame to a given point.
+
+    Parameters:
+    gdf (GeoDataFrame): A GeoDataFrame containing polygons.
+    point (Point): A Point object representing the target point.
+
+    Returns:
+    int: The index of the closest polygon in the GeoDataFrame.
+    """
+    # Compute the distance to each polygon
+    gdf["distance"] = gdf["geometry"].apply(lambda geom: point.distance(geom))
+
+    # Find the index of the closest polygon
+    closest_idx = gdf["distance"].idxmin()
+
+    # Get the closest polygon's row
+    closest_polygon = gdf.loc[closest_idx]
+
+    return closest_idx
+
+
+def _clean_voltage(column):
+    """
+    Function to clean the raw voltage column: manual fixing and drop nan values
+
+    Args:
+    - column: pandas Series, the column to be cleaned
+
+    Returns:
+    - column: pandas Series, the cleaned column
+    """
+    logger.info("Cleaning voltages.")
+    column = column.copy()
+
+    column = (
+        column.astype(str)
+        .str.lower()
+        .str.replace("400/220/110 kV'", "400000;220000;110000")
+        .str.replace("400/220/110/20_kv", "400000;220000;110000;20000")
+        .str.replace("2x25000", "25000;25000")
+        .str.replace("é", ";")
+    )
+
+    column = (
+        column.astype(str)
+        .str.lower()
+        .str.replace("(temp 150000)", "")
+        .str.replace("low", "1000")
+        .str.replace("minor", "1000")
+        .str.replace("medium", "33000")
+        .str.replace("med", "33000")
+        .str.replace("m", "33000")
+        .str.replace("high", "150000")
+        .str.replace("23000-109000", "109000")
+        .str.replace("380000>220000", "380000;220000")
+        .str.replace(":", ";")
+        .str.replace("<", ";")
+        .str.replace(",", ";")
+        .str.replace("kv", "000")
+        .str.replace("kva", "000")
+        .str.replace("/", ";")
+        .str.replace("nan", "")
+        .str.replace("<na>", "")
+    )
+
+    # Remove all remaining non-numeric characters except for semicolons
+    column = column.apply(lambda x: re.sub(r"[^0-9;]", "", str(x)))
+
+    column.dropna(inplace=True)
+    return column
+
+
+def _clean_circuits(column):
+    """
+    Function to clean the raw circuits column: manual fixing and drop nan
+    values
+
+    Args:
+    - column: pandas Series, the column to be cleaned
+
+    Returns:
+    - column: pandas Series, the cleaned column
+    """
+    logger.info("Cleaning circuits.")
+    column = column.copy()
+    column = (
+        column.astype(str)
+        .str.replace("partial", "")
+        .str.replace("1operator=RTE operator:wikidata=Q2178795", "")
+        .str.lower()
+        .str.replace("1,5", "3")
+        .str.replace("1/3", "1")
+        .str.replace("<na>", "")
+        .str.replace("nan", "")
+    )
+
+    # Remove all remaining non-numeric characters except for semicolons
+    column = column.apply(lambda x: re.sub(r"[^0-9;]", "", x))
+
+    column.dropna(inplace=True)
+    return column.astype(str)
+
+
+def _clean_cables(column):
+    """
+    Function to clean the raw cables column: manual fixing and drop nan values
+
+    Args:
+    - column: pandas Series, the column to be cleaned
+
+    Returns:
+    - column: pandas Series, the cleaned column
+    """
+    logger.info("Cleaning cables.")
+    column = column.copy()
+    column = (
+        column.astype(str)
+        .str.lower()
+        .str.replace("1/3", "1")
+        .str.replace("3x2;2", "3")
+        .str.replace("<na>", "")
+        .str.replace("nan", "")
+    )
+
+    # Remove all remaining non-numeric characters except for semicolons
+    column = column.apply(lambda x: re.sub(r"[^0-9;]", "", x))
+
+    column.dropna(inplace=True)
+    return column.astype(str)
+
+
+def _clean_wires(column):
+    """
+    Function to clean the raw wires column: manual fixing and drop nan values
+
+    Args:
+    - column: pandas Series, the column to be cleaned
+
+    Returns:
+    - column: pandas Series, the cleaned column
+    """
+    logger.info("Cleaning wires.")
+    column = column.copy()
+    column = (
+        column.astype(str)
+        .str.lower()
+        .str.replace("?", "")
+        .str.replace("trzyprzewodowe", "3")
+        .str.replace("pojedyńcze", "1")
+        .str.replace("single", "1")
+        .str.replace("double", "2")
+        .str.replace("triple", "3")
+        .str.replace("quad", "4")
+        .str.replace("fivefold", "5")
+        .str.replace("yes", "3")
+        .str.replace("1/3", "1")
+        .str.replace("3x2;2", "3")
+        .str.replace("_", "")
+        .str.replace("<na>", "")
+        .str.replace("nan", "")
+    )
+
+    # Remove all remaining non-numeric characters except for semicolons
+    column = column.apply(lambda x: re.sub(r"[^0-9;]", "", x))
+
+    column.dropna(inplace=True)
+    return column.astype(str)
+
+
+def _check_voltage(voltage, list_voltages):
+    """
+    Check if the given voltage is present in the list of allowed voltages.
+
+    Parameters:
+    voltage (str): The voltage to check.
+    list_voltages (list): A list of allowed voltages.
+
+    Returns:
+    bool: True if the voltage is present in the list of allowed voltages,
+    False otherwise.
+    """
+    voltages = voltage.split(";")
+    for v in voltages:
+        if v in list_voltages:
+            return True
+    return False
+
+
+def _clean_frequency(column):
+    """
+    Function to clean the raw frequency column: manual fixing and drop nan
+    values
+
+    Args:
+    - column: pandas Series, the column to be cleaned
+
+    Returns:
+    - column: pandas Series, the cleaned column
+    """
+    logger.info("Cleaning frequencies.")
+    column = column.copy()
+    column = (
+        column.astype(str)
+        .str.lower()
+        .str.replace("16.67", "16.7")
+        .str.replace("16,7", "16.7")
+        .str.replace("?", "")
+        .str.replace("hz", "")
+        .str.replace(" ", "")
+        .str.replace("<NA>", "")
+        .str.replace("nan", "")
+    )
+
+    # Remove all remaining non-numeric characters except for semicolons
+    column = column.apply(lambda x: re.sub(r"[^0-9;.]", "", x))
+
+    column.dropna(inplace=True)
+    return column.astype(str)
+
+
+def _clean_rating(column):
+    """
+    Function to clean and sum the rating columns:
+
+    Args:
+    - column: pandas Series, the column to be cleaned
+
+    Returns:
+    - column: pandas Series, the cleaned column
+    """
+    logger.info("Cleaning ratings.")
+    column = column.copy()
+    column = column.astype(str).str.replace("MW", "")
+
+    # Remove all remaining non-numeric characters except for semicolons
+    column = column.apply(lambda x: re.sub(r"[^0-9;]", "", x))
+
+    # Sum up all ratings if there are multiple entries
+    column = column.str.split(";").apply(lambda x: sum([int(i) for i in x]))
+
+    column.dropna(inplace=True)
+    return column.astype(str)
+
+
+def _split_cells(df, cols=["voltage"]):
+    """
+    Split semicolon separated cells i.e. [66000;220000] and create new
+    identical rows.
+
+    Parameters
+    ----------
+    df : dataframe
+        Dataframe under analysis
+    cols : list
+        List of target columns over which to perform the analysis
+
+    Example
+    -------
+    Original data:
+    row 1: '66000;220000', '50'
+
+    After applying split_cells():
+    row 1, '66000', '50', 2
+    row 2, '220000', '50', 2
+    """
+    if df.empty:
+        return df
+
+    # Create a dictionary to store the suffix count for each original ID
+    suffix_counts = {}
+    # Create a dictionary to store the number of splits associated with each
+    # original ID
+    num_splits = {}
+
+    # Split cells and create new rows
+    x = df.assign(**{col: df[col].str.split(";") for col in cols})
+    x = x.explode(cols, ignore_index=True)
+
+    # Count the number of splits associated with each original ID
+    num_splits = x.groupby("id").size().to_dict()
+
+    # Update the 'split_elements' column
+    x["split_elements"] = x["id"].map(num_splits)
+
+    # Function to generate the new ID with suffix and update the number of
+    # splits
+    def generate_new_id(row):
+        original_id = row["id"]
+        if row["split_elements"] == 1:
+            return original_id
+        else:
+            suffix_counts[original_id] = suffix_counts.get(original_id, 0) + 1
+            return f"{original_id}-{suffix_counts[original_id]}"
+
+    # Update the ID column with the new IDs
+    x["id"] = x.apply(generate_new_id, axis=1)
+
+    return x
+
+
+def _distribute_to_circuits(row):
+    """
+    Distributes the number of circuits or cables to individual circuits based
+    on the given row data.
+
+    Parameters:
+    - row: A dictionary representing a row of data containing information about
+      circuits and cables.
+
+    Returns:
+    - single_circuit: The number of circuits to be assigned to each individual
+      circuit.
+    """
+    if row["circuits"] != "":
+        circuits = int(row["circuits"])
+    else:
+        cables = int(row["cables"])
+        circuits = cables / 3
+
+    single_circuit = int(max(1, np.floor_divide(circuits, row["split_elements"])))
+    single_circuit = str(single_circuit)
+
+    return single_circuit
+
+
+def _add_line_endings_to_substations(
+    df_substations,
+    gdf_lines,
+    path_country_shapes,
+    path_offshore_shapes,
+    prefix,
+):
+    """
+    Add line endings to substations.
+
+    This function takes two pandas DataFrames, `substations` and `lines`, and
+    adds line endings to the substations based on the information from the
+    lines DataFrame.
+
+    Parameters:
+    - substations (pandas DataFrame): DataFrame containing information about
+      substations.
+    - lines (pandas DataFrame): DataFrame containing information about lines.
+
+    Returns:
+    - buses (pandas DataFrame): DataFrame containing the updated information
+      about substations with line endings.
+    """
+    if gdf_lines.empty:
+        return df_substations
+
+    logger.info("Adding line endings to substations")
+    # extract columns from df_substations
+    bus_s = pd.DataFrame(columns=df_substations.columns)
+    bus_e = pd.DataFrame(columns=df_substations.columns)
+
+    # TODO pypsa-eur: fix country code to contain single country code
+    # Read information from gdf_lines
+    bus_s[["voltage", "country"]] = gdf_lines[["voltage", "country"]]
+    bus_s.loc[:, "geometry"] = gdf_lines.geometry.boundary.map(
+        lambda p: p.geoms[0] if len(p.geoms) >= 2 else None
+    )
+    bus_s.loc[:, "lon"] = bus_s["geometry"].map(lambda p: p.x if p != None else None)
+    bus_s.loc[:, "lat"] = bus_s["geometry"].map(lambda p: p.y if p != None else None)
+    bus_s.loc[:, "dc"] = gdf_lines["dc"]
+
+    bus_e[["voltage", "country"]] = gdf_lines[["voltage", "country"]]
+    bus_e.loc[:, "geometry"] = gdf_lines.geometry.boundary.map(
+        lambda p: p.geoms[1] if len(p.geoms) >= 2 else None
+    )
+    bus_e.loc[:, "lon"] = bus_e["geometry"].map(lambda p: p.x if p != None else None)
+    bus_e.loc[:, "lat"] = bus_e["geometry"].map(lambda p: p.y if p != None else None)
+    bus_e.loc[:, "dc"] = gdf_lines["dc"]
+
+    bus_all = pd.concat([bus_s, bus_e], ignore_index=True)
+
+    # Group gdf_substations by voltage and and geometry (dropping duplicates)
+    bus_all = bus_all.groupby(["voltage", "lon", "lat", "dc"]).first().reset_index()
+    bus_all = bus_all[df_substations.columns]
+    bus_all.loc[:, "bus_id"] = bus_all.apply(
+        lambda row: f"{prefix}/{row.name + 1}", axis=1
+    )
+
+    # Initialize default values
+    bus_all["station_id"] = None
+    # Assuming substations completed for installed lines
+    bus_all["under_construction"] = False
+    bus_all["tag_area"] = None
+    bus_all["symbol"] = "substation"
+    # TODO: this tag may be improved, maybe depending on voltage levels
+    bus_all["tag_substation"] = "transmission"
+    bus_all["tag_source"] = prefix
+
+    buses = pd.concat([df_substations, bus_all], ignore_index=True)
+    buses.set_index("bus_id", inplace=True)
+
+    # Fix country codes
+    # TODO pypsa-eur: Temporary solution as long as the shapes have a low,
+    # incomplete resolution (cf. 2500 meters for buffering)
+    bool_multiple_countries = buses["country"].str.contains(";")
+    gdf_offshore = gpd.read_file(path_offshore_shapes).set_index("name")["geometry"]
+    gdf_offshore = gpd.GeoDataFrame(
+        gdf_offshore, geometry=gdf_offshore, crs=gdf_offshore.crs
+    )
+    gdf_countries = gpd.read_file(path_country_shapes).set_index("name")["geometry"]
+    # reproject to enable buffer
+    gdf_countries = gpd.GeoDataFrame(geometry=gdf_countries, crs=gdf_countries.crs)
+    gdf_union = gdf_countries.merge(
+        gdf_offshore, how="outer", left_index=True, right_index=True
+    )
+    gdf_union["geometry"] = gdf_union.apply(
+        lambda row: gpd.GeoSeries([row["geometry_x"], row["geometry_y"]]).union_all(),
+        axis=1,
+    )
+    gdf_union = gpd.GeoDataFrame(geometry=gdf_union["geometry"], crs=crs)
+    gdf_buses_tofix = gpd.GeoDataFrame(
+        buses[bool_multiple_countries], geometry="geometry", crs=crs
+    )
+    joined = gpd.sjoin(
+        gdf_buses_tofix, gdf_union.reset_index(), how="left", predicate="within"
+    )
+
+    # For all remaining rows where the country/index_right column is NaN, find
+    # find the closest polygon index
+    joined.loc[joined["name"].isna(), "name"] = joined.loc[
+        joined["name"].isna(), "geometry"
+    ].apply(lambda x: _find_closest_polygon(gdf_union, x))
+
+    joined.reset_index(inplace=True)
+    joined = joined.drop_duplicates(subset="bus_id")
+    joined.set_index("bus_id", inplace=True)
+
+    buses.loc[bool_multiple_countries, "country"] = joined.loc[
+        bool_multiple_countries, "name"
+    ]
+
+    return buses.reset_index()
+
+
+def _import_lines_and_cables(path_lines):
+    """
+    Import lines and cables from the given input paths.
+
+    Parameters:
+    - path_lines (dict): A dictionary containing the input paths for lines and
+      cables data.
+
+    Returns:
+    - df_lines (DataFrame): A DataFrame containing the imported lines and
+      cables data.
+    """
+    columns = [
+        "id",
+        "bounds",
+        "nodes",
+        "geometry",
+        "country",
+        "power",
+        "cables",
+        "circuits",
+        "frequency",
+        "voltage",
+        "wires",
+    ]
+    df_lines = pd.DataFrame(columns=columns)
+
+    logger.info("Importing lines and cables")
+    for key in path_lines:
+        logger.info(f"Processing {key}...")
+        for idx, ip in enumerate(path_lines[key]):
+            if (
+                os.path.exists(ip) and os.path.getsize(ip) > 400
+            ):  # unpopulated OSM json is about 51 bytes
+                country = os.path.basename(os.path.dirname(path_lines[key][idx]))
+
+                logger.info(
+                    f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(path_lines[key])).zfill(2)}: {ip}"
+                )
+                with open(ip, "r") as f:
+                    data = json.load(f)
+
+                df = pd.DataFrame(data["elements"])
+                df["id"] = df["id"].astype(str)
+                df["country"] = country
+
+                col_tags = [
+                    "power",
+                    "cables",
+                    "circuits",
+                    "frequency",
+                    "voltage",
+                    "wires",
+                ]
+
+                tags = pd.json_normalize(df["tags"]).map(
+                    lambda x: str(x) if pd.notnull(x) else x
+                )
+
+                for ct in col_tags:
+                    if ct not in tags.columns:
+                        tags[ct] = pd.NA
+
+                tags = tags.loc[:, col_tags]
+
+                df = pd.concat([df, tags], axis="columns")
+                df.drop(columns=["type", "tags"], inplace=True)
+
+                df_lines = pd.concat([df_lines, df], axis="rows")
+
+            else:
+                logger.info(
+                    f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(path_lines[key])).zfill(2)} (empty): {ip}"
+                )
+                continue
+        logger.info("---")
+
+    return df_lines
+
+
+def _import_links(path_links):
+    """
+    Import links from the given input paths.
+
+    Parameters:
+    - path_links (dict): A dictionary containing the input paths for links.
+
+    Returns:
+    - df_links (DataFrame): A DataFrame containing the imported links data.
+    """
+    columns = [
+        "id",
+        "bounds",
+        "nodes",
+        "geometry",
+        "country",
+        "circuits",
+        "frequency",
+        "rating",
+        "voltage",
+    ]
+    df_links = pd.DataFrame(columns=columns)
+
+    logger.info("Importing links")
+    for key in path_links:
+        logger.info(f"Processing {key}...")
+        for idx, ip in enumerate(path_links[key]):
+            if (
+                os.path.exists(ip) and os.path.getsize(ip) > 400
+            ):  # unpopulated OSM json is about 51 bytes
+                country = os.path.basename(os.path.dirname(path_links[key][idx]))
+
+                logger.info(
+                    f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(path_links[key])).zfill(2)}: {ip}"
+                )
+                with open(ip, "r") as f:
+                    data = json.load(f)
+
+                df = pd.DataFrame(data["elements"])
+                df["id"] = df["id"].astype(str)
+                df["id"] = df["id"].apply(lambda x: (f"relation/{x}"))
+                df["country"] = country
+
+                col_tags = [
+                    "circuits",
+                    "frequency",
+                    "rating",
+                    "voltage",
+                ]
+
+                tags = pd.json_normalize(df["tags"]).map(
+                    lambda x: str(x) if pd.notnull(x) else x
+                )
+
+                for ct in col_tags:
+                    if ct not in tags.columns:
+                        tags[ct] = pd.NA
+
+                tags = tags.loc[:, col_tags]
+
+                df = pd.concat([df, tags], axis="columns")
+                df.drop(columns=["type", "tags"], inplace=True)
+
+                df_links = pd.concat([df_links, df], axis="rows")
+
+            else:
+                logger.info(
+                    f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(path_links[key])).zfill(2)} (empty): {ip}"
+                )
+                continue
+        logger.info("---")
+        logger.info("Dropping lines without rating.")
+        len_before = len(df_links)
+        df_links = df_links.dropna(subset=["rating"])
+        len_after = len(df_links)
+        logger.info(
+            f"Dropped {len_before-len_after} elements without rating. "
+            + f"Imported {len_after} elements."
+        )
+
+    return df_links
+
+
+def _create_single_link(row):
+    """
+    Create a single link from multiple rows within a OSM link relation.
+
+    Parameters:
+    - row: A row of OSM data containing information about the link.
+
+    Returns:
+    - single_link: A single LineString representing the link.
+
+    This function takes a row of OSM data and extracts the relevant information
+    to create a single link. It filters out elements (substations, electrodes)
+    with invalid roles and finds the longest link based on its endpoints.
+    If the longest link is a MultiLineString, it extracts the longest
+    linestring from it. The resulting single link is returned.
+    """
+    valid_roles = ["line", "cable"]
+    df = pd.json_normalize(row["members"])
+    df = df[df["role"].isin(valid_roles)]
+    df.loc[:, "geometry"] = df.apply(_create_linestring, axis=1)
+    df.loc[:, "length"] = df["geometry"].apply(lambda x: x.length)
+
+    list_endpoints = []
+    for idx, row in df.iterrows():
+        tuple = sorted([row["geometry"].coords[0], row["geometry"].coords[-1]])
+        # round tuple to 3 decimals
+        tuple = (
+            round(tuple[0][0], 3),
+            round(tuple[0][1], 3),
+            round(tuple[1][0], 3),
+            round(tuple[1][1], 3),
+        )
+        list_endpoints.append(tuple)
+
+    df.loc[:, "endpoints"] = list_endpoints
+    df_longest = df.loc[df.groupby("endpoints")["length"].idxmin()]
+
+    single_link = linemerge(df_longest["geometry"].values.tolist())
+
+    # If the longest component is a MultiLineString, extract the longest linestring from it
+    if isinstance(single_link, MultiLineString):
+        # Find connected components
+        components = list(single_link.geoms)
+
+        # Find the longest connected linestring
+        single_link = max(components, key=lambda x: x.length)
+
+    return single_link
+
+
+def _drop_duplicate_lines(df_lines):
+    """
+    Drop duplicate lines from the given dataframe. Duplicates are usually lines
+    cross-border lines or slightly outside the country border of focus.
+
+    Parameters:
+    - df_lines (pandas.DataFrame): The dataframe containing lines data.
+
+    Returns:
+    - df_lines (pandas.DataFrame): The dataframe with duplicate lines removed
+      and cleaned data.
+
+    This function drops duplicate lines from the given dataframe based on the
+    'id' column. It groups the duplicate rows by 'id' and aggregates the
+    'country' column to a string split by semicolon, as they appear in multiple
+    country datasets. One example of the duplicates is kept, accordingly.
+    Finally, the updated dataframe without multiple duplicates is returned.
+    """
+    logger.info("Dropping duplicate lines.")
+    duplicate_rows = df_lines[df_lines.duplicated(subset=["id"], keep=False)].copy()
+
+    # Group rows by id and aggregate the country column to a string split by semicolon
+    grouped_duplicates = (
+        duplicate_rows.groupby("id")["country"].agg(lambda x: ";".join(x)).reset_index()
+    )
+    duplicate_rows.drop_duplicates(subset="id", inplace=True)
+    duplicate_rows.drop(columns=["country"], inplace=True)
+    duplicate_rows = duplicate_rows.join(
+        grouped_duplicates.set_index("id"), on="id", how="left"
+    )
+
+    len_before = len(df_lines)
+    # Drop duplicates and update the df_lines dataframe with the cleaned data
+    df_lines = df_lines[~df_lines["id"].isin(duplicate_rows["id"])]
+    df_lines = pd.concat([df_lines, duplicate_rows], axis="rows")
+    len_after = len(df_lines)
+
+    logger.info(
+        f"Dropped {len_before - len_after} duplicate elements. "
+        + f"Keeping {len_after} elements."
+    )
+
+    return df_lines
+
+
+def _filter_by_voltage(df, min_voltage=200000):
+    """
+    Filter rows in the DataFrame based on the voltage in V.
+
+    Parameters:
+    - df (pandas.DataFrame): The DataFrame containing the substations or lines data.
+    - min_voltage (int, optional): The minimum voltage value to filter the
+      rows. Defaults to 200000 [unit: V].
+
+    Returns:
+    - filtered df (pandas.DataFrame): The filtered DataFrame containing
+      the lines or substations above min_voltage.
+    - list_voltages (list): A list of unique voltage values above min_voltage.
+      The type of the list elements is string.
+    """
+    if df.empty:
+        return df, []
+
+    logger.info(
+        f"Filtering dataframe by voltage. Only keeping rows above and including {min_voltage} V."
+    )
+    list_voltages = df["voltage"].str.split(";").explode().unique().astype(str)
+    # Keep numeric strings
+    list_voltages = list_voltages[np.vectorize(str.isnumeric)(list_voltages)]
+    list_voltages = list_voltages.astype(int)
+    list_voltages = list_voltages[list_voltages >= int(min_voltage)]
+    list_voltages = list_voltages.astype(str)
+
+    bool_voltages = df["voltage"].apply(_check_voltage, list_voltages=list_voltages)
+    len_before = len(df)
+    df = df[bool_voltages]
+    len_after = len(df)
+    logger.info(
+        f"Dropped {len_before - len_after} elements with voltage below {min_voltage}. "
+        + f"Keeping {len_after} elements."
+    )
+
+    return df, list_voltages
+
+
+def _clean_substations(df_substations, list_voltages):
+    """
+    Clean the substation data by performing the following steps:
+    - Split cells in the dataframe.
+    - Filter substation data based on specified voltages.
+    - Update the frequency values based on the split count.
+    - Split cells in the 'frequency' column.
+    - Set remaining invalid frequency values that are not in ['0', '50']
+      to '50'.
+
+    Parameters:
+    - df_substations (pandas.DataFrame): The input dataframe containing
+      substation data.
+    - list_voltages (list): A list of voltages above min_voltage to filter the
+    substation data.
+
+    Returns:
+    - df_substations (pandas.DataFrame): The cleaned substation dataframe.
+    """
+    df_substations = df_substations.copy()
+
+    df_substations = _split_cells(df_substations)
+
+    bool_voltages = df_substations["voltage"].apply(
+        _check_voltage, list_voltages=list_voltages
+    )
+    df_substations = df_substations[bool_voltages]
+    df_substations.loc[:, "split_count"] = df_substations["id"].apply(
+        lambda x: x.split("-")[1] if "-" in x else "0"
+    )
+    df_substations.loc[:, "split_count"] = df_substations["split_count"].astype(int)
+
+    bool_split = df_substations["split_elements"] > 1
+    bool_frequency_len = (
+        df_substations["frequency"].apply(lambda x: len(x.split(";")))
+        == df_substations["split_elements"]
+    )
+
+    op_freq = lambda row: row["frequency"].split(";")[row["split_count"] - 1]
+
+    df_substations.loc[bool_frequency_len & bool_split, "frequency"] = (
+        df_substations.loc[bool_frequency_len & bool_split,].apply(op_freq, axis=1)
+    )
+
+    df_substations = _split_cells(df_substations, cols=["frequency"])
+    bool_invalid_frequency = df_substations["frequency"].apply(
+        lambda x: x not in ["50", "0"]
+    )
+    df_substations.loc[bool_invalid_frequency, "frequency"] = "50"
+
+    return df_substations
+
+
+def _clean_lines(df_lines, list_voltages):
+    """
+    Cleans and processes the `df_lines` DataFrame heuristically based on the
+    information available per respective line and cable. Further checks to
+    ensure data consistency and completeness.
+
+    Parameters
+    ----------
+    df_lines : pandas.DataFrame
+        The input DataFrame containing line information with columns such as
+        'voltage', 'circuits', 'frequency', 'cables', 'split_elements', 'id',
+        etc.
+    list_voltages : list
+        A list of unique voltage values above a certain threshold. (type: str)
+
+    Returns
+    -------
+    df_lines : pandas.DataFrame
+        The cleaned DataFrame with updated columns 'circuits', 'frequency', and
+        'cleaned' to reflect the applied transformations.
+
+    Description
+    -----------
+    This function performs the following operations:
+
+    - Initializes a 'cleaned' column with False, step-wise updates to True
+       following the respective cleaning step.
+    - Splits the voltage cells in the DataFrame at semicolons using a helper
+       function `_split_cells`.
+    - Filters the DataFrame to only include rows with valid voltages.
+    - Sets circuits of remaining lines without any applicable heuristic equal
+      to 1.
+
+    The function ensures that the resulting DataFrame has consistent and
+    complete information for further processing or analysis while maintaining
+    the data of the original OSM data set wherever possible.
+    """
+    logger.info("Cleaning lines and determining circuits.")
+    # Initiate boolean with False, only set to true if all cleaning steps are
+    # passed
+    df_lines = df_lines.copy()
+    df_lines["cleaned"] = False
+
+    df_lines["voltage_original"] = df_lines["voltage"]
+    df_lines["circuits_original"] = df_lines["circuits"]
+
+    df_lines = _split_cells(df_lines)
+    bool_voltages = df_lines["voltage"].apply(
+        _check_voltage, list_voltages=list_voltages
+    )
+    df_lines = df_lines[bool_voltages]
+
+    bool_ac = df_lines["frequency"] != "0"
+    bool_dc = ~bool_ac
+    valid_frequency = ["50", "0"]
+    bool_invalid_frequency = df_lines["frequency"].apply(
+        lambda x: x not in valid_frequency
+    )
+
+    bool_noinfo = (df_lines["cables"] == "") & (df_lines["circuits"] == "")
+    # Fill in all values where cables info and circuits does not exist. Assuming 1 circuit
+    df_lines.loc[bool_noinfo, "circuits"] = "1"
+    df_lines.loc[bool_noinfo & bool_invalid_frequency, "frequency"] = "50"
+    df_lines.loc[bool_noinfo, "cleaned"] = True
+
+    # Fill in all values where cables info exists and split_elements == 1
+    bool_cables_ac = (
+        (df_lines["cables"] != "")
+        & (df_lines["split_elements"] == 1)
+        & (df_lines["cables"] != "0")
+        & (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1))
+        & (df_lines["circuits"] == "")
+        & (df_lines["cleaned"] == False)
+        & bool_ac
+    )
+
+    df_lines.loc[bool_cables_ac, "circuits"] = df_lines.loc[
+        bool_cables_ac, "cables"
+    ].apply(lambda x: str(int(max(1, np.floor_divide(int(x), 3)))))
+
+    df_lines.loc[bool_cables_ac, "frequency"] = "50"
+    df_lines.loc[bool_cables_ac, "cleaned"] = True
+
+    bool_cables_dc = (
+        (df_lines["cables"] != "")
+        & (df_lines["split_elements"] == 1)
+        & (df_lines["cables"] != "0")
+        & (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1))
+        & (df_lines["circuits"] == "")
+        & (df_lines["cleaned"] == False)
+        & bool_dc
+    )
+
+    df_lines.loc[bool_cables_dc, "circuits"] = df_lines.loc[
+        bool_cables_dc, "cables"
+    ].apply(lambda x: str(int(max(1, np.floor_divide(int(x), 2)))))
+
+    df_lines.loc[bool_cables_dc, "frequency"] = "0"
+    df_lines.loc[bool_cables_dc, "cleaned"] = True
+
+    # Fill in all values where circuits info exists and split_elements == 1
+    bool_lines = (
+        (df_lines["circuits"] != "")
+        & (df_lines["split_elements"] == 1)
+        & (df_lines["circuits"] != "0")
+        & (df_lines["circuits"].apply(lambda x: len(x.split(";")) == 1))
+        & (df_lines["cleaned"] == False)
+    )
+
+    df_lines.loc[bool_lines & bool_ac, "frequency"] = "50"
+    df_lines.loc[bool_lines & bool_dc, "frequency"] = "0"
+    df_lines.loc[bool_lines, "cleaned"] = True
+
+    # Clean those values where number of voltages split by semicolon is larger
+    # than no cables or no circuits
+    bool_cables = (
+        (df_lines["voltage_original"].apply(lambda x: len(x.split(";")) > 1))
+        & (df_lines["cables"].apply(lambda x: len(x.split(";")) == 1))
+        & (df_lines["circuits"].apply(lambda x: len(x.split(";")) == 1))
+        & (df_lines["cleaned"] == False)
+    )
+
+    df_lines.loc[bool_cables, "circuits"] = df_lines[bool_cables].apply(
+        _distribute_to_circuits, axis=1
+    )
+    df_lines.loc[bool_cables & bool_ac, "frequency"] = "50"
+    df_lines.loc[bool_cables & bool_dc, "frequency"] = "0"
+    df_lines.loc[bool_cables, "cleaned"] = True
+
+    # Clean those values where multiple circuit values are present, divided by
+    # semicolon
+    has_multiple_circuits = df_lines["circuits"].apply(lambda x: len(x.split(";")) > 1)
+    circuits_match_split_elements = df_lines.apply(
+        lambda row: len(row["circuits"].split(";")) == row["split_elements"],
+        axis=1,
+    )
+    is_not_cleaned = df_lines["cleaned"] == False
+    bool_cables = has_multiple_circuits & circuits_match_split_elements & is_not_cleaned
+
+    df_lines.loc[bool_cables, "circuits"] = df_lines.loc[bool_cables].apply(
+        lambda row: str(row["circuits"].split(";")[int(row["id"].split("-")[-1]) - 1]),
+        axis=1,
+    )
+
+    df_lines.loc[bool_cables & bool_ac, "frequency"] = "50"
+    df_lines.loc[bool_cables & bool_dc, "frequency"] = "0"
+    df_lines.loc[bool_cables, "cleaned"] = True
+
+    # Clean those values where multiple cables values are present, divided by
+    # semicolon
+    has_multiple_cables = df_lines["cables"].apply(lambda x: len(x.split(";")) > 1)
+    cables_match_split_elements = df_lines.apply(
+        lambda row: len(row["cables"].split(";")) == row["split_elements"],
+        axis=1,
+    )
+    is_not_cleaned = df_lines["cleaned"] == False
+    bool_cables = has_multiple_cables & cables_match_split_elements & is_not_cleaned
+
+    df_lines.loc[bool_cables, "circuits"] = df_lines.loc[bool_cables].apply(
+        lambda row: str(
+            max(
+                1,
+                np.floor_divide(
+                    int(row["cables"].split(";")[int(row["id"].split("-")[-1]) - 1]), 3
+                ),
+            )
+        ),
+        axis=1,
+    )
+
+    df_lines.loc[bool_cables & bool_ac, "frequency"] = "50"
+    df_lines.loc[bool_cables & bool_dc, "frequency"] = "0"
+    df_lines.loc[bool_cables, "cleaned"] = True
+
+    # All remaining lines to circuits == 1
+    bool_leftover = df_lines["cleaned"] == False
+    if sum(bool_leftover) > 0:
+        str_id = "; ".join(str(id) for id in df_lines.loc[bool_leftover, "id"])
+        logger.info(f"Setting circuits of remaining {sum(bool_leftover)} lines to 1...")
+        logger.info(f"Lines affected: {str_id}")
+
+    df_lines.loc[bool_leftover, "circuits"] = "1"
+    df_lines.loc[bool_leftover & bool_ac, "frequency"] = "50"
+    df_lines.loc[bool_leftover & bool_dc, "frequency"] = "0"
+    df_lines.loc[bool_leftover, "cleaned"] = True
+
+    return df_lines
+
+
+def _create_substations_geometry(df_substations):
+    """
+    Creates geometries.
+
+    Parameters:
+    df_substations (DataFrame): The input DataFrame containing the substations
+    data.
+
+    Returns:
+    df_substations (DataFrame): A new DataFrame with the
+    polygons ["polygon"] of the substations geometries.
+    """
+    logger.info("Creating substations geometry.")
+    df_substations = df_substations.copy()
+
+    # Create centroids from geometries and keep the original polygons
+    df_substations.loc[:, "polygon"] = df_substations["geometry"]
+
+    return df_substations
+
+
+def _create_substations_centroid(df_substations):
+    """
+    Creates centroids from geometries and keeps the original polygons.
+
+    Parameters:
+    df_substations (DataFrame): The input DataFrame containing the substations
+    data.
+
+    Returns:
+    df_substations (DataFrame): A new DataFrame with the centroids ["geometry"]
+    and polygons ["polygon"] of the substations geometries.
+    """
+    logger.info("Creating substations geometry.")
+    df_substations = df_substations.copy()
+
+    df_substations.loc[:, "geometry"] = df_substations["polygon"].apply(
+        lambda x: x.centroid
+    )
+
+    df_substations.loc[:, "lon"] = df_substations["geometry"].apply(lambda x: x.x)
+    df_substations.loc[:, "lat"] = df_substations["geometry"].apply(lambda x: x.y)
+
+    return df_substations
+
+
+def _create_lines_geometry(df_lines):
+    """
+    Create line geometry for the given DataFrame of lines.
+
+    Parameters:
+    - df_lines (pandas.DataFrame): DataFrame containing lines data.
+
+    Returns:
+    - df_lines (pandas.DataFrame): DataFrame with transformed 'geometry'
+      column (type: shapely LineString).
+
+    Notes:
+    - This function transforms 'geometry' column in the input DataFrame by
+      applying the '_create_linestring' function to each row.
+    - It then drops rows where the geometry has equal start and end points,
+      as these are usually not lines but outlines of areas.
+    """
+    logger.info("Creating lines geometry.")
+    df_lines = df_lines.copy()
+    df_lines.loc[:, "geometry"] = df_lines.apply(_create_linestring, axis=1)
+
+    bool_circle = df_lines["geometry"].apply(lambda x: x.coords[0] == x.coords[-1])
+    df_lines = df_lines[~bool_circle]
+
+    return df_lines
+
+
+def _add_bus_centroid_to_line(linestring, point):
+    """
+    Adds the centroid of a substation to a linestring by extending the
+    linestring with a new segment.
+
+    Parameters:
+    linestring (LineString): The original linestring to extend.
+    point (Point): The centroid of the bus.
+
+    Returns:
+    merged (LineString): The extended linestring with the new segment.
+    """
+    start = linestring.coords[0]
+    end = linestring.coords[-1]
+
+    dist_to_start = point.distance(Point(start))
+    dist_to_end = point.distance(Point(end))
+
+    if dist_to_start < dist_to_end:
+        new_segment = LineString([point.coords[0], start])
+    else:
+        new_segment = LineString([point.coords[0], end])
+
+    merged = linemerge([linestring, new_segment])
+
+    return merged
+
+
+def _finalise_substations(df_substations):
+    """
+    Finalises the substations column types.
+
+    Args:
+        df_substations (pandas.DataFrame): The input DataFrame
+        containing substations data.
+
+    Returns:
+        df_substations (pandas.DataFrame(): The DataFrame with finalised column
+        types and transformed data.
+    """
+    logger.info("Finalising substations column types.")
+    df_substations = df_substations.copy()
+    # rename columns
+    df_substations.rename(
+        columns={
+            "id": "bus_id",
+            "power": "symbol",
+            "substation": "tag_substation",
+        },
+        inplace=True,
+    )
+
+    # Initiate new columns for subsequent build_osm_network step
+    df_substations.loc[:, "symbol"] = "substation"
+    df_substations.loc[:, "tag_substation"] = "transmission"
+    df_substations.loc[:, "dc"] = False
+    df_substations.loc[df_substations["frequency"] == "0", "dc"] = True
+    df_substations.loc[:, "under_construction"] = False
+    df_substations.loc[:, "station_id"] = None
+    df_substations.loc[:, "tag_area"] = None
+    df_substations.loc[:, "tag_source"] = df_substations["bus_id"]
+
+    # Only included needed columns
+    df_substations = df_substations[
+        [
+            "bus_id",
+            "symbol",
+            "tag_substation",
+            "voltage",
+            "lon",
+            "lat",
+            "dc",
+            "under_construction",
+            "station_id",
+            "tag_area",
+            "country",
+            "geometry",
+            "polygon",
+            "tag_source",
+        ]
+    ]
+
+    # Substation data types
+    df_substations["voltage"] = df_substations["voltage"].astype(int)
+
+    return df_substations
+
+
+def _finalise_lines(df_lines):
+    """
+    Finalises the lines column types.
+
+    Args:
+        df_lines (pandas.DataFrame): The input DataFrame containing lines data.
+
+    Returns:
+        df_lines (pandas.DataFrame(): The DataFrame with finalised column types
+        and transformed data.
+    """
+    logger.info("Finalising lines column types.")
+    df_lines = df_lines.copy()
+    # Rename columns
+    df_lines.rename(
+        columns={
+            "id": "line_id",
+            "power": "tag_type",
+            "frequency": "tag_frequency",
+        },
+        inplace=True,
+    )
+
+    # Initiate new columns for subsequent build_osm_network step
+    df_lines.loc[:, "bus0"] = None
+    df_lines.loc[:, "bus1"] = None
+    df_lines.loc[:, "length"] = None
+    df_lines.loc[:, "underground"] = False
+    df_lines.loc[df_lines["tag_type"] == "line", "underground"] = False
+    df_lines.loc[df_lines["tag_type"] == "cable", "underground"] = True
+    df_lines.loc[:, "under_construction"] = False
+    df_lines.loc[:, "dc"] = False
+    df_lines.loc[df_lines["tag_frequency"] == "50", "dc"] = False
+    df_lines.loc[df_lines["tag_frequency"] == "0", "dc"] = True
+
+    # Only include needed columns
+    df_lines = df_lines[
+        [
+            "line_id",
+            "circuits",
+            "tag_type",
+            "voltage",
+            "tag_frequency",
+            "bus0",
+            "bus1",
+            "length",
+            "underground",
+            "under_construction",
+            "dc",
+            "country",
+            "geometry",
+        ]
+    ]
+
+    df_lines["circuits"] = df_lines["circuits"].astype(int)
+    df_lines["voltage"] = df_lines["voltage"].astype(int)
+    df_lines["tag_frequency"] = df_lines["tag_frequency"].astype(int)
+
+    return df_lines
+
+
+def _finalise_links(df_links):
+    """
+    Finalises the links column types.
+
+    Args:
+        df_links (pandas.DataFrame): The input DataFrame containing links data.
+
+    Returns:
+        df_links (pandas.DataFrame(): The DataFrame with finalised column types
+        and transformed data.
+    """
+    logger.info("Finalising links column types.")
+    df_links = df_links.copy()
+    # Rename columns
+    df_links.rename(
+        columns={
+            "id": "link_id",
+            "rating": "p_nom",
+        },
+        inplace=True,
+    )
+
+    # Initiate new columns for subsequent build_osm_network step
+    df_links["bus0"] = None
+    df_links["bus1"] = None
+    df_links["length"] = None
+    df_links["underground"] = True
+    df_links["under_construction"] = False
+    df_links["dc"] = True
+
+    # Only include needed columns
+    df_links = df_links[
+        [
+            "link_id",
+            "voltage",
+            "p_nom",
+            "bus0",
+            "bus1",
+            "length",
+            "underground",
+            "under_construction",
+            "dc",
+            "country",
+            "geometry",
+        ]
+    ]
+
+    df_links["p_nom"] = df_links["p_nom"].astype(int)
+    df_links["voltage"] = df_links["voltage"].astype(int)
+
+    return df_links
+
+
+def _import_substations(path_substations):
+    """
+    Import substations from the given input paths. This function imports both
+    substations from OSM ways as well as relations that contain nested
+    information on the substations shape and electrical parameters. Ways and
+    relations are subsequently concatenated to form a single DataFrame
+    containing unique bus ids.
+
+    Args:
+        path_substations (dict): A dictionary containing input paths for
+        substations.
+
+    Returns:
+        pd.DataFrame: A DataFrame containing the imported substations data.
+    """
+    cols_substations_way = [
+        "id",
+        "geometry",
+        "country",
+        "power",
+        "substation",
+        "voltage",
+        "frequency",
+    ]
+    cols_substations_relation = [
+        "id",
+        "country",
+        "power",
+        "substation",
+        "voltage",
+        "frequency",
+    ]
+    df_substations_way = pd.DataFrame(columns=cols_substations_way)
+    df_substations_relation = pd.DataFrame(columns=cols_substations_relation)
+
+    logger.info("Importing substations")
+    for key in path_substations:
+        logger.info(f"Processing {key}...")
+        for idx, ip in enumerate(path_substations[key]):
+            if (
+                os.path.exists(ip) and os.path.getsize(ip) > 400
+            ):  # unpopulated OSM json is about 51 bytes
+                country = os.path.basename(os.path.dirname(path_substations[key][idx]))
+                logger.info(
+                    f" - Importing {key} {str(idx+1).zfill(2)}/{str(len(path_substations[key])).zfill(2)}: {ip}"
+                )
+                with open(ip, "r") as f:
+                    data = json.load(f)
+
+                df = pd.DataFrame(data["elements"])
+                df["id"] = df["id"].astype(str)
+                # new string that adds "way/" to id
+                df["id"] = df["id"].apply(
+                    lambda x: (
+                        f"way/{x}" if key == "substations_way" else f"relation/{x}"
+                    )
+                )
+                df["country"] = country
+
+                col_tags = ["power", "substation", "voltage", "frequency"]
+
+                tags = pd.json_normalize(df["tags"]).map(
+                    lambda x: str(x) if pd.notnull(x) else x
+                )
+
+                for ct in col_tags:
+                    if ct not in tags.columns:
+                        tags[ct] = pd.NA
+
+                tags = tags.loc[:, col_tags]
+
+                df = pd.concat([df, tags], axis="columns")
+
+                if key == "substations_way":
+                    df.drop(columns=["type", "tags", "bounds", "nodes"], inplace=True)
+                    df_substations_way = pd.concat(
+                        [df_substations_way, df], axis="rows"
+                    )
+                elif key == "substations_relation":
+                    df.drop(columns=["type", "tags", "bounds"], inplace=True)
+                    df_substations_relation = pd.concat(
+                        [df_substations_relation, df], axis="rows"
+                    )
+
+            else:
+                logger.info(
+                    f" - Skipping {key} {str(idx+1).zfill(2)}/{str(len(path_substations[key])).zfill(2)} (empty): {ip}"
+                )
+                continue
+        logger.info("---")
+
+    df_substations_way.drop_duplicates(subset="id", keep="first", inplace=True)
+    df_substations_relation.drop_duplicates(subset="id", keep="first", inplace=True)
+
+    df_substations_way["geometry"] = df_substations_way.apply(_create_polygon, axis=1)
+
+    # Normalise the members column of df_substations_relation
+    cols_members = ["id", "type", "ref", "role", "geometry"]
+    df_substations_relation_members = pd.DataFrame(columns=cols_members)
+
+    for index, row in df_substations_relation.iterrows():
+        col_members = ["type", "ref", "role", "geometry"]
+        df = pd.json_normalize(row["members"])
+
+        for cm in col_members:
+            if cm not in df.columns:
+                df[cm] = pd.NA
+
+        df = df.loc[:, col_members]
+        df["id"] = str(row["id"])
+        df["ref"] = df["ref"].astype(str)
+        df = df[df["type"] != "node"]
+        df = df.dropna(subset=["geometry"])
+        df = df[~df["role"].isin(["", "incoming_line", "substation", "inner"])]
+        df_substations_relation_members = pd.concat(
+            [df_substations_relation_members, df], axis="rows"
+        )
+
+    df_substations_relation_members.reset_index(inplace=True)
+    df_substations_relation_members["linestring"] = (
+        df_substations_relation_members.apply(_create_linestring, axis=1)
+    )
+    df_substations_relation_members_grouped = (
+        df_substations_relation_members.groupby("id")["linestring"]
+        .apply(lambda x: linemerge(x.tolist()))
+        .reset_index()
+    )
+    df_substations_relation_members_grouped["geometry"] = (
+        df_substations_relation_members_grouped["linestring"].apply(
+            lambda x: x.convex_hull
+        )
+    )
+
+    df_substations_relation = (
+        df_substations_relation.join(
+            df_substations_relation_members_grouped.set_index("id"), on="id", how="left"
+        )
+        .drop(columns=["members", "linestring"])
+        .dropna(subset=["geometry"])
+    )
+
+    # reorder columns and concatenate
+    df_substations_relation = df_substations_relation[cols_substations_way]
+    df_substations = pd.concat(
+        [df_substations_way, df_substations_relation], axis="rows"
+    )
+
+    return df_substations
+
+
+def _remove_lines_within_substations(gdf_lines, gdf_substations_polygon):
+    """
+    Removes lines that are within substation polygons from the given
+    GeoDataFrame of lines. These are not needed to create network (e.g. bus
+    bars, switchgear, etc.)
+
+    Parameters:
+    - gdf_lines (GeoDataFrame): A GeoDataFrame containing lines with 'line_id'
+      and 'geometry' columns.
+    - gdf_substations_polygon (GeoDataFrame): A GeoDataFrame containing
+      substation polygons.
+
+    Returns:
+    GeoDataFrame: A new GeoDataFrame without lines within substation polygons.
+    """
+    logger.info("Identifying and removing lines within substation polygons...")
+    gdf = gpd.sjoin(
+        gdf_lines[["line_id", "geometry"]],
+        gdf_substations_polygon,
+        how="inner",
+        predicate="within",
+    )["line_id"]
+
+    logger.info(
+        f"Removed {len(gdf)} lines within substations of original {len(gdf_lines)} lines."
+    )
+    gdf_lines = gdf_lines[~gdf_lines["line_id"].isin(gdf)]
+
+    return gdf_lines
+
+
+def _merge_touching_polygons(df):
+    """
+    Merge touching polygons in a GeoDataFrame.
+
+    Parameters:
+    - df: pandas.DataFrame or geopandas.GeoDataFrame
+        The input DataFrame containing the polygons to be merged.
+
+    Returns:
+    - gdf: geopandas.GeoDataFrame
+        The GeoDataFrame with merged polygons.
+    """
+
+    gdf = gpd.GeoDataFrame(df, geometry="polygon", crs=crs)
+    combined_polygons = unary_union(gdf.geometry)
+    if combined_polygons.geom_type == "MultiPolygon":
+        gdf_combined = gpd.GeoDataFrame(
+            geometry=[poly for poly in combined_polygons.geoms], crs=crs
+        )
+    else:
+        gdf_combined = gpd.GeoDataFrame(geometry=[combined_polygons], crs=crs)
+
+    gdf.reset_index(drop=True, inplace=True)
+
+    for i, combined_geom in gdf_combined.iterrows():
+        mask = gdf.intersects(combined_geom.geometry)
+        gdf.loc[mask, "polygon_merged"] = combined_geom.geometry
+
+    gdf.drop(columns=["polygon"], inplace=True)
+    gdf.rename(columns={"polygon_merged": "polygon"}, inplace=True)
+
+    return gdf
+
+
+def _add_endpoints_to_line(linestring, polygon_dict):
+    """
+    Adds endpoints to a line by removing any overlapping areas with polygons.
+
+    Parameters:
+    linestring (LineString): The original line to add endpoints to.
+    polygon_dict (dict): A dictionary of polygons, where the keys are bus IDs and the values are the corresponding polygons.
+
+    Returns:
+    LineString: The modified line with added endpoints.
+    """
+    if not polygon_dict:
+        return linestring
+    polygon_centroids = {
+        bus_id: polygon.centroid for bus_id, polygon in polygon_dict.items()
+    }
+    polygon_unary = polygons = unary_union(list(polygon_dict.values()))
+
+    # difference with polygon
+    linestring_new = linestring.difference(polygon_unary)
+
+    if type(linestring_new) == MultiLineString:
+        # keep the longest line in the multilinestring
+        linestring_new = max(linestring_new.geoms, key=lambda x: x.length)
+
+    for p in polygon_centroids:
+        linestring_new = _add_bus_centroid_to_line(linestring_new, polygon_centroids[p])
+
+    return linestring_new
+
+
+def _get_polygons_at_endpoints(linestring, polygon_dict):
+    """
+    Get the polygons that contain the endpoints of a given linestring.
+
+    Parameters:
+    linestring (LineString): The linestring for which to find the polygons at the endpoints.
+    polygon_dict (dict): A dictionary containing polygons as values, with bus_ids as keys.
+
+    Returns:
+    dict: A dictionary containing bus_ids as keys and polygons as values, where the polygons contain the endpoints of the linestring.
+    """
+    # Get the endpoints of the linestring
+    start_point = Point(linestring.coords[0])
+    end_point = Point(linestring.coords[-1])
+
+    # Initialize dictionary to store bus_ids as keys and polygons as values
+    bus_id_polygon_dict = {}
+
+    for bus_id, polygon in polygon_dict.items():
+        if polygon.contains(start_point) or polygon.contains(end_point):
+            bus_id_polygon_dict[bus_id] = polygon
+
+    return bus_id_polygon_dict
+
+
+def _extend_lines_to_substations(gdf_lines, gdf_substations_polygon):
+    """
+    Extends the lines in the given GeoDataFrame `gdf_lines` to the centroid of
+    the nearest substations represented by the polygons in the
+    `gdf_substations_polygon` GeoDataFrame.
+
+    Parameters:
+    gdf_lines (GeoDataFrame): A GeoDataFrame containing the lines to be extended.
+    gdf_substations_polygon (GeoDataFrame): A GeoDataFrame containing the polygons representing substations.
+
+    Returns:
+    GeoDataFrame: A new GeoDataFrame with the lines extended to the substations.
+    """
+    gdf = gpd.sjoin(
+        gdf_lines,
+        gdf_substations_polygon.drop_duplicates(subset="polygon", inplace=False),
+        how="left",
+        lsuffix="line",
+        rsuffix="bus",
+        predicate="intersects",
+    ).drop(columns="index_bus")
+
+    # Group by 'line_id' and create a dictionary mapping 'bus_id' to 'geometry_bus', excluding the grouping columns
+    gdf = (
+        gdf.groupby("line_id")
+        .apply(
+            lambda x: x[["bus_id", "geometry_bus"]]
+            .dropna()
+            .set_index("bus_id")["geometry_bus"]
+            .to_dict(),
+            include_groups=False,
+        )
+        .reset_index()
+    )
+    gdf.columns = ["line_id", "bus_dict"]
+
+    gdf["intersects_bus"] = gdf.apply(lambda row: len(row["bus_dict"]) > 0, axis=1)
+
+    gdf.loc[:, "line_geometry"] = gdf.join(
+        gdf_lines.set_index("line_id")["geometry"], on="line_id"
+    )["geometry"]
+
+    # Polygons at the endpoints of the linestring
+    gdf["bus_endpoints"] = gdf.apply(
+        lambda row: _get_polygons_at_endpoints(row["line_geometry"], row["bus_dict"]),
+        axis=1,
+    )
+
+    gdf.loc[:, "line_geometry_new"] = gdf.apply(
+        lambda row: _add_endpoints_to_line(row["line_geometry"], row["bus_endpoints"]),
+        axis=1,
+    )
+
+    gdf.set_index("line_id", inplace=True)
+    gdf_lines.set_index("line_id", inplace=True)
+
+    gdf_lines.loc[:, "geometry"] = gdf["line_geometry_new"]
+
+    return gdf_lines
+
+
+# Function to bridge gaps between all lines
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        from _helpers import mock_snakemake
+
+        snakemake = mock_snakemake("clean_osm_data")
+
+    configure_logging(snakemake)
+    set_scenario_config(snakemake)
+
+    # Parameters
+    crs = "EPSG:4326"  # Correct crs for OSM data
+    min_voltage_ac = 200000  # [unit: V] Minimum voltage value to filter AC lines.
+    min_voltage_dc = 150000  #  [unit: V] Minimum voltage value to filter DC links.
+
+    logger.info("---")
+    logger.info("SUBSTATIONS")
+    # Input
+    path_substations = {
+        "substations_way": snakemake.input.substations_way,
+        "substations_relation": snakemake.input.substations_relation,
+    }
+
+    # Cleaning process
+    df_substations = _import_substations(path_substations)
+    df_substations["voltage"] = _clean_voltage(df_substations["voltage"])
+    df_substations, list_voltages = _filter_by_voltage(
+        df_substations, min_voltage=min_voltage_ac
+    )
+    df_substations["frequency"] = _clean_frequency(df_substations["frequency"])
+    df_substations = _clean_substations(df_substations, list_voltages)
+    df_substations = _create_substations_geometry(df_substations)
+
+    # Merge touching polygons
+    df_substations = _merge_touching_polygons(df_substations)
+    df_substations = _create_substations_centroid(df_substations)
+    df_substations = _finalise_substations(df_substations)
+
+    # Create polygon GeoDataFrame to remove lines within substations
+    gdf_substations_polygon = gpd.GeoDataFrame(
+        df_substations[["bus_id", "polygon", "voltage"]],
+        geometry="polygon",
+        crs=crs,
+    )
+
+    gdf_substations_polygon["geometry"] = gdf_substations_polygon.polygon.copy()
+
+    logger.info("---")
+    logger.info("LINES AND CABLES")
+    path_lines = {
+        "lines": snakemake.input.lines_way,
+        "cables": snakemake.input.cables_way,
+    }
+
+    # Cleaning process
+    df_lines = _import_lines_and_cables(path_lines)
+    df_lines = _drop_duplicate_lines(df_lines)
+    df_lines.loc[:, "voltage"] = _clean_voltage(df_lines["voltage"])
+    df_lines, list_voltages = _filter_by_voltage(df_lines, min_voltage=min_voltage_ac)
+    df_lines.loc[:, "circuits"] = _clean_circuits(df_lines["circuits"])
+    df_lines.loc[:, "cables"] = _clean_cables(df_lines["cables"])
+    df_lines.loc[:, "frequency"] = _clean_frequency(df_lines["frequency"])
+    df_lines.loc[:, "wires"] = _clean_wires(df_lines["wires"])
+    df_lines = _clean_lines(df_lines, list_voltages)
+
+    # Drop DC lines, will be added through relations later
+    len_before = len(df_lines)
+    df_lines = df_lines[df_lines["frequency"] == "50"]
+    len_after = len(df_lines)
+    logger.info(
+        f"Dropped {len_before - len_after} DC lines. Keeping {len_after} AC lines."
+    )
+
+    df_lines = _create_lines_geometry(df_lines)
+    df_lines = _finalise_lines(df_lines)
+
+    # Create GeoDataFrame
+    gdf_lines = gpd.GeoDataFrame(df_lines, geometry="geometry", crs=crs)
+    gdf_lines = _remove_lines_within_substations(gdf_lines, gdf_substations_polygon)
+    gdf_lines = _extend_lines_to_substations(gdf_lines, gdf_substations_polygon)
+
+    logger.info("---")
+    logger.info("HVDC LINKS")
+    path_links = {
+        "links": snakemake.input.links_relation,
+    }
+
+    df_links = _import_links(path_links)
+
+    df_links = _drop_duplicate_lines(df_links)
+    df_links.loc[:, "voltage"] = _clean_voltage(df_links["voltage"])
+    df_links, list_voltages = _filter_by_voltage(df_links, min_voltage=min_voltage_dc)
+    # Keep only highest voltage of split string
+    df_links.loc[:, "voltage"] = df_links["voltage"].apply(
+        lambda x: str(max(map(int, x.split(";"))))
+    )
+    df_links.loc[:, "frequency"] = _clean_frequency(df_links["frequency"])
+    df_links.loc[:, "rating"] = _clean_rating(df_links["rating"])
+
+    df_links.loc[:, "geometry"] = df_links.apply(_create_single_link, axis=1)
+    df_links = _finalise_links(df_links)
+    gdf_links = gpd.GeoDataFrame(df_links, geometry="geometry", crs=crs).set_index(
+        "link_id"
+    )
+
+    # Add line endings to substations
+    path_country_shapes = snakemake.input.country_shapes
+    path_offshore_shapes = snakemake.input.offshore_shapes
+
+    df_substations = _add_line_endings_to_substations(
+        df_substations,
+        gdf_lines,
+        path_country_shapes,
+        path_offshore_shapes,
+        prefix="line-end",
+    )
+
+    df_substations = _add_line_endings_to_substations(
+        df_substations,
+        gdf_links,
+        path_country_shapes,
+        path_offshore_shapes,
+        prefix="link-end",
+    )
+
+    # Drop polygons and create GDF
+    gdf_substations = gpd.GeoDataFrame(
+        df_substations.drop(columns=["polygon"]), geometry="geometry", crs=crs
+    )
+
+    output_substations_polygon = snakemake.output["substations_polygon"]
+    output_substations = snakemake.output["substations"]
+    output_lines = snakemake.output["lines"]
+    output_links = snakemake.output["links"]
+
+    logger.info(
+        f"Exporting clean substations with polygon shapes to {output_substations_polygon}"
+    )
+    gdf_substations_polygon.drop(columns=["geometry"]).to_file(
+        output_substations_polygon, driver="GeoJSON"
+    )
+    logger.info(f"Exporting clean substations to {output_substations}")
+    gdf_substations.to_file(output_substations, driver="GeoJSON")
+    logger.info(f"Exporting clean lines to {output_lines}")
+    gdf_lines.to_file(output_lines, driver="GeoJSON")
+    logger.info(f"Exporting clean links to {output_links}")
+    gdf_links.to_file(output_links, driver="GeoJSON")
+
+    logger.info("Cleaning OSM data completed.")
diff --git a/scripts/prepare_osm_network_release.py b/scripts/prepare_osm_network_release.py
new file mode 100644
index 00000000..ac6b2535
--- /dev/null
+++ b/scripts/prepare_osm_network_release.py
@@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: MIT
+
+import logging
+import os
+
+import pandas as pd
+import pypsa
+from _helpers import configure_logging, set_scenario_config
+
+logger = logging.getLogger(__name__)
+
+
+BUSES_COLUMNS = [
+    "bus_id",
+    "voltage",
+    "dc",
+    "symbol",
+    "under_construction",
+    "x",
+    "y",
+    "country",
+    "geometry",
+]
+LINES_COLUMNS = [
+    "line_id",
+    "bus0",
+    "bus1",
+    "voltage",
+    "circuits",
+    "length",
+    "underground",
+    "under_construction",
+    "geometry",
+]
+LINKS_COLUMNS = [
+    "link_id",
+    "bus0",
+    "bus1",
+    "voltage",
+    "p_nom",
+    "length",
+    "underground",
+    "under_construction",
+    "geometry",
+]
+TRANSFORMERS_COLUMNS = [
+    "transformer_id",
+    "bus0",
+    "bus1",
+    "voltage_bus0",
+    "voltage_bus1",
+    "geometry",
+]
+CONVERTERS_COLUMNS = [
+    "converter_id",
+    "bus0",
+    "bus1",
+    "voltage",
+    "geometry",
+]
+
+
+def export_clean_csv(df, columns, output_file):
+    """
+    Export a cleaned DataFrame to a CSV file.
+
+    Args:
+        df (pandas.DataFrame): The DataFrame to be exported.
+        columns (list): A list of column names to include in the exported CSV file.
+        output_file (str): The path to the output CSV file.
+
+    Returns:
+        None
+    """
+    rename_dict = {
+        "Bus": "bus_id",
+        "Line": "line_id",
+        "Link": "link_id",
+        "Transformer": "transformer_id",
+        "v_nom": "voltage",
+        "num_parallel": "circuits",
+    }
+
+    if "converter_id" in columns:
+        rename_dict["Link"] = "converter_id"
+
+    df.reset_index().rename(columns=rename_dict).loc[:, columns].replace(
+        {True: "t", False: "f"}
+    ).to_csv(output_file, index=False, quotechar="'")
+
+    return None
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        from _helpers import mock_snakemake
+
+        snakemake = mock_snakemake("prepare_osm_network_release")
+
+    configure_logging(snakemake)
+    set_scenario_config(snakemake)
+
+    network = pypsa.Network(snakemake.input.base_network)
+
+    network.buses["dc"] = network.buses.pop("carrier").map({"DC": "t", "AC": "f"})
+    network.lines.length = network.lines.length * 1e3
+    network.links.length = network.links.length * 1e3
+
+    # Export to clean csv for release
+    logger.info(f"Exporting {len(network.buses)} buses to %s", snakemake.output.buses)
+    export_clean_csv(network.buses, BUSES_COLUMNS, snakemake.output.buses)
+
+    logger.info(
+        f"Exporting {len(network.transformers)} transformers to %s",
+        snakemake.output.transformers,
+    )
+    export_clean_csv(
+        network.transformers, TRANSFORMERS_COLUMNS, snakemake.output.transformers
+    )
+
+    logger.info(f"Exporting {len(network.lines)} lines to %s", snakemake.output.lines)
+    export_clean_csv(network.lines, LINES_COLUMNS, snakemake.output.lines)
+
+    # Boolean that specifies if link element is a converter
+    is_converter = network.links.index.str.startswith("conv") == True
+
+    logger.info(
+        f"Exporting {len(network.links[~is_converter])} links to %s",
+        snakemake.output.links,
+    )
+    export_clean_csv(
+        network.links[~is_converter], LINKS_COLUMNS, snakemake.output.links
+    )
+
+    logger.info(
+        f"Exporting {len(network.links[is_converter])} converters to %s",
+        snakemake.output.converters,
+    )
+    export_clean_csv(
+        network.links[is_converter], CONVERTERS_COLUMNS, snakemake.output.converters
+    )
+
+    logger.info("Export of OSM network for release complete.")
diff --git a/scripts/retrieve_osm_data.py b/scripts/retrieve_osm_data.py
new file mode 100644
index 00000000..745533cf
--- /dev/null
+++ b/scripts/retrieve_osm_data.py
@@ -0,0 +1,152 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: MIT
+"""
+Retrieve OSM data for the specified country using the overpass API and save it
+to the specified output files.
+
+Note that overpass requests are based on a fair
+use policy. `retrieve_osm_data` is meant to be used in a way that respects this
+policy by fetching the needed data once, only.
+"""
+
+import json
+import logging
+import os
+import time
+
+import requests
+from _helpers import (  # set_scenario_config,; update_config_from_wildcards,; update_config_from_wildcards,
+    configure_logging,
+    set_scenario_config,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def retrieve_osm_data(
+    country,
+    output,
+    features=[
+        "cables_way",
+        "lines_way",
+        "links_relation",
+        "substations_way",
+        "substations_relation",
+    ],
+):
+    """
+    Retrieve OSM data for the specified country and save it to the specified
+    output files.
+
+    Parameters
+    ----------
+    country : str
+        The country code for which the OSM data should be retrieved.
+    output : dict
+        A dictionary mapping feature names to the corresponding output file
+        paths. Saving the OSM data to .json files.
+    features : list, optional
+        A list of OSM features to retrieve. The default is [
+            "cables_way",
+            "lines_way",
+            "substations_way",
+            "substations_relation",
+            ].
+    """
+    # Overpass API endpoint URL
+    overpass_url = "https://overpass-api.de/api/interpreter"
+
+    features_dict = {
+        "cables_way": 'way["power"="cable"]',
+        "lines_way": 'way["power"="line"]',
+        "links_relation": 'relation["route"="power"]["frequency"="0"]',
+        "substations_way": 'way["power"="substation"]',
+        "substations_relation": 'relation["power"="substation"]',
+    }
+
+    wait_time = 5
+
+    for f in features:
+        if f not in features_dict:
+            logger.info(
+                f"Invalid feature: {f}. Supported features: {list(features_dict.keys())}"
+            )
+            raise ValueError(
+                f"Invalid feature: {f}. Supported features: {list(features_dict.keys())}"
+            )
+
+        retries = 3
+        for attempt in range(retries):
+            logger.info(
+                f" - Fetching OSM data for feature '{f}' in {country} (Attempt {attempt+1})..."
+            )
+
+            # Build the overpass query
+            op_area = f'area["ISO3166-1"="{country}"]'
+            op_query = f"""
+                [out:json];
+                {op_area}->.searchArea;
+                (
+                {features_dict[f]}(area.searchArea);
+                );
+                out body geom;
+            """
+            try:
+                # Send the request
+                response = requests.post(overpass_url, data=op_query)
+                response.raise_for_status()  # Raise HTTPError for bad responses
+                data = response.json()
+
+                filepath = output[f]
+                parentfolder = os.path.dirname(filepath)
+                if not os.path.exists(parentfolder):
+                    os.makedirs(parentfolder)
+
+                with open(filepath, mode="w") as f:
+                    json.dump(response.json(), f, indent=2)
+                logger.info(" - Done.")
+                break  # Exit the retry loop on success
+            except (json.JSONDecodeError, requests.exceptions.RequestException) as e:
+                logger.error(f"Error for feature '{f}' in country {country}: {e}")
+                logger.debug(
+                    f"Response text: {response.text if response else 'No response'}"
+                )
+                if attempt < retries - 1:
+                    wait_time += 15
+                    logger.info(f"Waiting {wait_time} seconds before retrying...")
+                    time.sleep(wait_time)
+                else:
+                    logger.error(
+                        f"Failed to retrieve data for feature '{f}' in country {country} after {retries} attempts."
+                    )
+            except Exception as e:
+                # For now, catch any other exceptions and log them. Treat this
+                # the same as a RequestException and try to run again two times.
+                logger.error(
+                    f"Unexpected error for feature '{f}' in country {country}: {e}"
+                )
+                if attempt < retries - 1:
+                    wait_time += 10
+                    logger.info(f"Waiting {wait_time} seconds before retrying...")
+                    time.sleep(wait_time)
+                else:
+                    logger.error(
+                        f"Failed to retrieve data for feature '{f}' in country {country} after {retries} attempts."
+                    )
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        from _helpers import mock_snakemake
+
+        snakemake = mock_snakemake("retrieve_osm_data", country="BE")
+    configure_logging(snakemake)
+    set_scenario_config(snakemake)
+
+    # Retrieve the OSM data
+    country = snakemake.wildcards.country
+    output = snakemake.output
+
+    retrieve_osm_data(country, output)
diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index 9db3435b..741fd324 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -108,7 +108,7 @@ from scipy.sparse.csgraph import connected_components, dijkstra
 logger = logging.getLogger(__name__)
 
 
-def simplify_network_to_380(n):
+def simplify_network_to_380(n, linetype_380):
     """
     Fix all lines to a voltage level of 380 kV and remove all transformers.
 
@@ -132,8 +132,8 @@ def simplify_network_to_380(n):
 
     trafo_map = pd.Series(n.transformers.bus1.values, n.transformers.bus0.values)
     trafo_map = trafo_map[~trafo_map.index.duplicated(keep="first")]
-    several_trafo_b = trafo_map.isin(trafo_map.index)
-    trafo_map[several_trafo_b] = trafo_map[several_trafo_b].map(trafo_map)
+    while (several_trafo_b := trafo_map.isin(trafo_map.index)).any():
+        trafo_map[several_trafo_b] = trafo_map[several_trafo_b].map(trafo_map)
     missing_buses_i = n.buses.index.difference(trafo_map.index)
     missing = pd.Series(missing_buses_i, missing_buses_i)
     trafo_map = pd.concat([trafo_map, missing])
@@ -600,7 +600,8 @@ if __name__ == "__main__":
     # remove integer outputs for compatibility with PyPSA v0.26.0
     n.generators.drop("n_mod", axis=1, inplace=True, errors="ignore")
 
-    n, trafo_map = simplify_network_to_380(n)
+    linetype_380 = snakemake.config["lines"]["types"][380]
+    n, trafo_map = simplify_network_to_380(n, linetype_380)
 
     technology_costs = load_costs(
         snakemake.input.tech_costs,

From 6c9d23ced245b099b2dda38edb3c3cb300694731 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Thu, 22 Aug 2024 15:24:22 +0200
Subject: [PATCH 36/38] config: set correct minimum number of clusters

---
 config/config.default.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/config.default.yaml b/config/config.default.yaml
index 86689ea7..0fd6e986 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -42,7 +42,7 @@ scenario:
   ll:
   - vopt
   clusters:
-  - 41
+  - 38
   - 128
   - 256
   opts:

From 73d13536083218d0ff2bf1c5782c4aef922dd217 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Fri, 23 Aug 2024 09:17:21 +0200
Subject: [PATCH 37/38] cluster_network: fix if no clustering required

---
 scripts/cluster_network.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index 79184167..9e1db6a7 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -512,7 +512,7 @@ if __name__ == "__main__":
         busmap = n.buses.index.to_series()
         linemap = n.lines.index.to_series()
         clustering = pypsa.clustering.spatial.Clustering(
-            n, busmap, linemap, linemap, pd.Series(dtype="O")
+            n, busmap, linemap
         )
     else:
         Nyears = n.snapshot_weightings.objective.sum() / 8760

From ef0bbd5f379bae377ae30e8a4fb191503d4bec20 Mon Sep 17 00:00:00 2001
From: Bobby Xiong <36541459+bobbyxng@users.noreply.github.com>
Date: Fri, 23 Aug 2024 17:35:46 +0200
Subject: [PATCH 38/38] Custom busmap (#1231)

* Added functionality to set custom_busmaps for different base_networks.

* Updated configtables and location for busmaps.

* Added release nots.

* removed run parameter in mock_snakemake (only used for debugging).

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: Fabian Neumann <fabian.neumann@outlook.de>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 doc/configtables/enable.csv |  2 +-
 doc/release_notes.rst       |  2 ++
 rules/build_electricity.smk | 15 ++++++++++-----
 scripts/cluster_network.py  |  6 ++----
 4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/doc/configtables/enable.csv b/doc/configtables/enable.csv
index 0268319e..5359131d 100644
--- a/doc/configtables/enable.csv
+++ b/doc/configtables/enable.csv
@@ -4,5 +4,5 @@ retrieve_databundle,bool,"{true, false}","Switch to retrieve databundle from zen
 retrieve_cost_data,bool,"{true, false}","Switch to retrieve technology cost data from `technology-data repository <https://github.com/PyPSA/technology-data>`_."
 build_cutout,bool,"{true, false}","Switch to enable the building of cutouts via the rule :mod:`build_cutout`."
 retrieve_cutout,bool,"{true, false}","Switch to enable the retrieval of cutouts from zenodo with :mod:`retrieve_cutout`."
-custom_busmap,bool,"{true, false}","Switch to enable the use of custom busmaps in rule :mod:`cluster_network`. If activated the rule looks for provided busmaps at ``data/custom_busmap_elec_s{simpl}_{clusters}.csv`` which should have the same format as ``resources/busmap_elec_s{simpl}_{clusters}.csv``, i.e. the index should contain the buses of ``networks/elec_s{simpl}.nc``."
+custom_busmap,bool,"{true, false}","Switch to enable the use of custom busmaps in rule :mod:`cluster_network`. If activated the rule looks for provided busmaps at ``data/busmaps/elec_s{simpl}_{clusters}_{base_network}.csv`` which should have the same format as ``resources/busmap_elec_s{simpl}_{clusters}.csv``, i.e. the index should contain the buses of ``networks/elec_s{simpl}.nc``. {base_network} is the name of the selected base_network in electricity, e.g. ``gridkit``, ``osm-prebuilt``, or ``osm-raw``."
 drop_leap_day,bool,"{true, false}","Switch to drop February 29 from all time-dependent data in leap years"
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index b1345e0c..4df38b01 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -91,6 +91,8 @@ Upcoming Release
 * In :mod:`base_network`, replace own voronoi polygon calculation function with
   Geopandas `gdf.voronoi_polygons` method.
 
+*  Move custom busmaps to ```data/busmaps/elec_s{simpl}_{clusters}_{base_network}.csv``` (if enabled). This allows for different busmaps depending on the base network and scenario.
+
 PyPSA-Eur 0.11.0 (25th May 2024)
 =====================================
 
diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk
index 4446ef76..1f568e99 100644
--- a/rules/build_electricity.smk
+++ b/rules/build_electricity.smk
@@ -530,6 +530,15 @@ rule simplify_network:
         "../scripts/simplify_network.py"
 
 
+# Optional input when using custom busmaps - Needs to be tailored to selected base_network
+def input_cluster_network(w):
+    if config_provider("enable", "custom_busmap", default=False)(w):
+        base_network = config_provider("electricity", "base_network")(w)
+        custom_busmap = f"data/busmaps/elec_s{w.simpl}_{w.clusters}_{base_network}.csv"
+        return {"custom_busmap": custom_busmap}
+    return {"custom_busmap": []}
+
+
 rule cluster_network:
     params:
         cluster_network=config_provider("clustering", "cluster_network"),
@@ -546,15 +555,11 @@ rule cluster_network:
         length_factor=config_provider("lines", "length_factor"),
         costs=config_provider("costs"),
     input:
+        unpack(input_cluster_network),
         network=resources("networks/elec_s{simpl}.nc"),
         regions_onshore=resources("regions_onshore_elec_s{simpl}.geojson"),
         regions_offshore=resources("regions_offshore_elec_s{simpl}.geojson"),
         busmap=ancient(resources("busmap_elec_s{simpl}.csv")),
-        custom_busmap=lambda w: (
-            "data/custom_busmap_elec_s{simpl}_{clusters}.csv"
-            if config_provider("enable", "custom_busmap", default=False)(w)
-            else []
-        ),
         tech_costs=lambda w: resources(
             f"costs_{config_provider('costs', 'year')(w)}.csv"
         ),
diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index 9e1db6a7..b63747c2 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -36,7 +36,7 @@ Inputs
 - ``resources/regions_offshore_elec_s{simpl}.geojson``: confer :ref:`simplify`
 - ``resources/busmap_elec_s{simpl}.csv``: confer :ref:`simplify`
 - ``networks/elec_s{simpl}.nc``: confer :ref:`simplify`
-- ``data/custom_busmap_elec_s{simpl}_{clusters}.csv``: optional input
+- ``data/custom_busmap_elec_s{simpl}_{clusters}_{base_network}.csv``: optional input
 
 Outputs
 -------
@@ -511,9 +511,7 @@ if __name__ == "__main__":
         # Fast-path if no clustering is necessary
         busmap = n.buses.index.to_series()
         linemap = n.lines.index.to_series()
-        clustering = pypsa.clustering.spatial.Clustering(
-            n, busmap, linemap
-        )
+        clustering = pypsa.clustering.spatial.Clustering(n, busmap, linemap)
     else:
         Nyears = n.snapshot_weightings.objective.sum() / 8760