pypsa-eur/rules/retrieve.smk

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

492 lines
16 KiB
Plaintext
Raw Normal View History

2024-02-19 15:21:48 +00:00
# SPDX-FileCopyrightText: : 2023-2024 The PyPSA-Eur Authors
#
# SPDX-License-Identifier: MIT
import requests
from datetime import datetime, timedelta
from shutil import move, unpack_archive
2023-07-14 06:53:39 +00:00
if config["enable"].get("retrieve", "auto") == "auto":
config["enable"]["retrieve"] = has_internet_access()
if config["enable"]["retrieve"] is False:
print("Datafile downloads disabled in config[retrieve] or no internet access.")
if config["enable"]["retrieve"] and config["enable"].get("retrieve_databundle", True):
datafiles = [
"je-e-21.03.02.xls",
"NUTS_2013_60M_SH/data/NUTS_RG_60M_2013.shp",
"nama_10r_3popgdp.tsv.gz",
"nama_10r_3gdp.tsv.gz",
"corine/g250_clc06_V18_5.tif",
2024-04-15 12:48:34 +00:00
"eea/UNFCCC_v23.csv",
"nuts/NUTS_RG_10M_2013_4326_LEVL_2.geojson",
"emobility/KFZ__count",
"emobility/Pkw__count",
"h2_salt_caverns_GWh_per_sqkm.geojson",
"natura/natura.tiff",
"gebco/GEBCO_2014_2D.nc",
"GDP_per_capita_PPP_1990_2015_v2.nc",
"ppp_2013_1km_Aggregated.tif",
]
rule retrieve_databundle:
output:
expand("data/bundle/{file}", file=datafiles),
directory("data/bundle/jrc-idees-2015"),
log:
2023-08-15 13:02:41 +00:00
"logs/retrieve_databundle.log",
resources:
mem_mb=1000,
retries: 2
2023-03-08 18:22:00 +00:00
conda:
"../envs/retrieve.yaml"
script:
"../scripts/retrieve_databundle.py"
2024-04-15 12:48:34 +00:00
rule retrieve_eurostat_data:
output:
2024-05-21 13:59:46 +00:00
directory("data/eurostat/Balances-April2023"),
2024-04-15 12:48:34 +00:00
log:
"logs/retrieve_eurostat_data.log",
retries: 2
conda:
"../envs/retrieve.yaml"
2024-04-15 12:48:34 +00:00
script:
"../scripts/retrieve_eurostat_data.py"
2024-07-18 13:39:27 +00:00
rule retrieve_jrc_idees:
output:
directory("data/jrc-idees-2021"),
2024-07-18 13:39:27 +00:00
log:
"logs/retrieve_jrc_idees.log",
retries: 2
script:
"../scripts/retrieve_jrc_idees.py"
rule retrieve_eurostat_household_data:
output:
"data/eurostat/eurostat-household_energy_balances-february_2024.csv",
log:
"logs/retrieve_eurostat_household_data.log",
retries: 2
conda:
"../envs/retrieve.yaml"
script:
"../scripts/retrieve_eurostat_household_data.py"
2023-07-14 06:53:39 +00:00
if config["enable"]["retrieve"] and config["enable"].get("retrieve_cutout", True):
rule retrieve_cutout:
input:
storage(
"https://zenodo.org/records/12791128/files/{cutout}.nc",
),
output:
"cutouts/" + CDIR + "{cutout}.nc",
log:
"logs/" + CDIR + "retrieve_cutout_{cutout}.log",
resources:
mem_mb=5000,
retries: 2
run:
move(input[0], output[0])
validate_checksum(output[0], input[0])
2023-07-14 06:53:39 +00:00
if config["enable"]["retrieve"] and config["enable"].get("retrieve_cost_data", True):
rule retrieve_cost_data:
params:
version=config_provider("costs", "version"),
output:
resources("costs_{year}.csv"),
log:
2024-02-16 10:17:00 +00:00
logs("retrieve_cost_data_{year}.log"),
resources:
mem_mb=1000,
retries: 2
2024-02-16 10:17:00 +00:00
conda:
"../envs/retrieve.yaml"
script:
"../scripts/retrieve_cost_data.py"
if config["enable"]["retrieve"]:
datafiles = [
"IGGIELGN_LNGs.geojson",
"IGGIELGN_BorderPoints.geojson",
"IGGIELGN_Productions.geojson",
"IGGIELGN_Storages.geojson",
"IGGIELGN_PipeSegments.geojson",
]
rule retrieve_gas_infrastructure_data:
output:
expand("data/gas_network/scigrid-gas/data/{files}", files=datafiles),
2023-03-08 18:22:00 +00:00
log:
"logs/retrieve_gas_infrastructure_data.log",
retries: 2
2023-03-08 18:22:00 +00:00
conda:
"../envs/retrieve.yaml"
script:
"../scripts/retrieve_gas_infrastructure_data.py"
if config["enable"]["retrieve"]:
2023-04-29 10:40:55 +00:00
rule retrieve_electricity_demand:
params:
versions=["2019-06-05", "2020-10-06"],
2023-04-29 10:40:55 +00:00
output:
2024-02-12 09:49:45 +00:00
"data/electricity_demand_raw.csv",
2023-04-29 10:40:55 +00:00
log:
"logs/retrieve_electricity_demand.log",
2023-04-29 10:40:55 +00:00
resources:
mem_mb=5000,
retries: 2
2024-02-16 10:17:00 +00:00
conda:
"../envs/retrieve.yaml"
script:
"../scripts/retrieve_electricity_demand.py"
2023-04-29 10:40:55 +00:00
if config["enable"]["retrieve"]:
2023-04-29 10:40:55 +00:00
rule retrieve_synthetic_electricity_demand:
input:
storage(
"https://zenodo.org/records/10820928/files/demand_hourly.csv",
),
output:
"data/load_synthetic_raw.csv",
log:
"logs/retrieve_synthetic_electricity_demand.log",
resources:
mem_mb=5000,
2023-04-29 10:40:55 +00:00
retries: 2
run:
move(input[0], output[0])
2023-07-14 06:53:39 +00:00
if config["enable"]["retrieve"]:
rule retrieve_ship_raster:
input:
storage(
"https://zenodo.org/records/12760663/files/shipdensity_global.zip",
2023-07-14 06:53:39 +00:00
keep_local=True,
),
output:
"data/shipdensity_global.zip",
2023-07-14 06:53:39 +00:00
log:
"logs/retrieve_ship_raster.log",
2023-07-14 06:53:39 +00:00
resources:
mem_mb=5000,
retries: 2
run:
move(input[0], output[0])
validate_checksum(output[0], input[0])
2023-12-18 10:57:21 +00:00
if config["enable"]["retrieve"]:
# Downloading Copernicus Global Land Cover for land cover and land use:
# Website: https://land.copernicus.eu/global/products/lc
rule download_copernicus_land_cover:
input:
storage(
"https://zenodo.org/records/3939050/files/PROBAV_LC100_global_v3.0.1_2019-nrt_Discrete-Classification-map_EPSG-4326.tif",
),
output:
"data/Copernicus_LC100_global_v3.0.1_2019-nrt_Discrete-Classification-map_EPSG-4326.tif",
2023-12-18 10:57:21 +00:00
run:
move(input[0], output[0])
validate_checksum(output[0], input[0])
if config["enable"]["retrieve"]:
# Downloading LUISA Base Map for land cover and land use:
# Website: https://ec.europa.eu/jrc/en/luisa
rule retrieve_luisa_land_cover:
input:
storage(
"https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/LUISA/EUROPE/Basemaps/LandUse/2018/LATEST/LUISA_basemap_020321_50m.tif",
),
output:
"data/LUISA_basemap_020321_50m.tif",
run:
move(input[0], output[0])
if config["enable"]["retrieve"]:
rule retrieve_eez:
params:
zip="data/eez/World_EEZ_v12_20231025_LR.zip",
output:
gpkg="data/eez/World_EEZ_v12_20231025_LR/eez_v12_lowres.gpkg",
run:
import os
import requests
from uuid import uuid4
name = str(uuid4())[:8]
org = str(uuid4())[:8]
response = requests.post(
"https://www.marineregions.org/download_file.php",
params={"name": "World_EEZ_v12_20231025_LR.zip"},
data={
"name": name,
"organisation": org,
"email": f"{name}@{org}.org",
"country": "Germany",
"user_category": "academia",
"purpose_category": "Research",
"agree": "1",
},
)
with open(params["zip"], "wb") as f:
f.write(response.content)
output_folder = Path(params["zip"]).parent
unpack_archive(params["zip"], output_folder)
os.remove(params["zip"])
Addition of unsustainable biomass potentials (#1139) * add columns to potential df defined by difference to eurostat * add network components * add unsustainable bioliquids * replaced stores by generators, still infeasible * remove municipal waste * remove separate treatment of waste from biomass potential calculation * phase out unsustainble biomass potentials * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * phase-out unsustainable bioliquids * remove waste_incineration from build_sector rule * multiple potential calculation for different planning horizons * raised costs of unsustainable solid biomass * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * stores instead of generators * change snakemake inputs * add phas-eout to config * add techcolor for unsustainable bioliquids * add config parameter to disable inclusion of unsustainable bioenergy potentials * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add biomass to params * remove call of snakemake object in define_spatial * Quick resolve of review part 1 (config parameters, if-clause-reduction, bioliquid spatial, fix bioliquid link capacity * Quick resolve of review part 2 (config table, helper function, fixed build_eurostat, removed dir-change, forced unsustainable usage, reverted overnight distinction in Snakefile) * Cast of planning_horizon parameter to int type after test run * added JRC fuel costs for solid and liquid biofuels, BtL VOM * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * clean-up after master merge * adressed review (increase threads for build_eurostat, fixed e_max_pu of Stores, changed version of technology-data); added release note --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: lisazeyen <35347358+lisazeyen@users.noreply.github.com>
2024-08-07 15:52:00 +00:00
if config["enable"]["retrieve"]:
# Download directly from naciscdn.org which is a redirect from naturalearth.com
# (https://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-admin-0-countries/)
# Use point-of-view (POV) variant of Germany so that Crimea is included.
rule retrieve_naturalearth_countries:
input:
storage(
"https://naciscdn.org/naturalearth/10m/cultural/ne_10m_admin_0_countries_deu.zip"
),
params:
zip="data/naturalearth/ne_10m_admin_0_countries_deu.zip",
output:
countries="data/naturalearth/ne_10m_admin_0_countries_deu.shp",
run:
move(input[0], params["zip"])
output_folder = Path(output["countries"]).parent
unpack_archive(params["zip"], output_folder)
os.remove(params["zip"])
if config["enable"]["retrieve"]:
rule retrieve_gem_europe_gas_tracker:
output:
"data/gem/Europe-Gas-Tracker-2024-05.xlsx",
run:
import requests
response = requests.get(
"https://globalenergymonitor.org/wp-content/uploads/2024/05/Europe-Gas-Tracker-2024-05.xlsx",
headers={"User-Agent": "Mozilla/5.0"},
)
with open(output[0], "wb") as f:
f.write(response.content)
if config["enable"]["retrieve"]:
2023-12-21 17:39:06 +00:00
# Some logic to find the correct file URL
# Sometimes files are released delayed or ahead of schedule, check which file is currently available
def check_file_exists(url):
2023-12-21 17:39:06 +00:00
response = requests.head(url)
return response.status_code == 200
# Basic pattern where WDPA files can be found
2023-12-22 13:40:30 +00:00
url_pattern = (
2023-12-29 10:20:14 +00:00
"https://d1gam3xoknrgr2.cloudfront.net/current/WDPA_{bYYYY}_Public_shp.zip"
)
2023-12-21 17:39:06 +00:00
# 3-letter month + 4 digit year for current/previous/next month to test
current_monthyear = datetime.now().strftime("%b%Y")
prev_monthyear = (datetime.now() - timedelta(30)).strftime("%b%Y")
next_monthyear = (datetime.now() + timedelta(30)).strftime("%b%Y")
2023-12-21 17:39:06 +00:00
# Test prioritised: current month -> previous -> next
2023-12-22 13:40:30 +00:00
for bYYYY in [current_monthyear, prev_monthyear, next_monthyear]:
if check_file_exists(url := url_pattern.format(bYYYY=bYYYY)):
2023-12-21 17:39:06 +00:00
break
else:
# If None of the three URLs are working
url = False
2023-12-21 17:39:06 +00:00
assert (
url
), f"No WDPA files found at {url_pattern} for bY='{current_monthyear}, {prev_monthyear}, or {next_monthyear}'"
# Downloading protected area database from WDPA
# extract the main zip and then merge the contained 3 zipped shapefiles
# Website: https://www.protectedplanet.net/en/thematic-areas/wdpa
rule download_wdpa:
input:
storage(url, keep_local=True),
params:
zip="data/WDPA_shp.zip",
folder=directory("data/WDPA"),
output:
gpkg="data/WDPA.gpkg",
run:
shell("cp {input} {params.zip}")
shell("unzip -o {params.zip} -d {params.folder}")
for i in range(3):
# vsizip is special driver for directly working with zipped shapefiles in ogr2ogr
2023-12-18 10:57:21 +00:00
layer_path = (
f"/vsizip/{params.folder}/WDPA_{bYYYY}_Public_shp_{i}.zip"
)
print(f"Adding layer {i+1} of 3 to combined output file.")
shell("ogr2ogr -f gpkg -update -append {output.gpkg} {layer_path}")
rule download_wdpa_marine:
2023-12-18 10:57:21 +00:00
# Downloading Marine protected area database from WDPA
# extract the main zip and then merge the contained 3 zipped shapefiles
# Website: https://www.protectedplanet.net/en/thematic-areas/marine-protected-areas
input:
storage(
f"https://d1gam3xoknrgr2.cloudfront.net/current/WDPA_WDOECM_{bYYYY}_Public_marine_shp.zip",
keep_local=True,
),
params:
zip="data/WDPA_WDOECM_marine.zip",
folder=directory("data/WDPA_WDOECM_marine"),
output:
gpkg="data/WDPA_WDOECM_marine.gpkg",
run:
shell("cp {input} {params.zip}")
shell("unzip -o {params.zip} -d {params.folder}")
for i in range(3):
# vsizip is special driver for directly working with zipped shapefiles in ogr2ogr
layer_path = f"/vsizip/{params.folder}/WDPA_WDOECM_{bYYYY}_Public_marine_shp_{i}.zip"
print(f"Adding layer {i+1} of 3 to combined output file.")
shell("ogr2ogr -f gpkg -update -append {output.gpkg} {layer_path}")
if config["enable"]["retrieve"]:
rule retrieve_monthly_co2_prices:
input:
storage(
"https://www.eex.com/fileadmin/EEX/Downloads/EUA_Emission_Spot_Primary_Market_Auction_Report/Archive_Reports/emission-spot-primary-market-auction-report-2019-data.xls",
keep_local=True,
),
output:
"data/validation/emission-spot-primary-market-auction-report-2019-data.xls",
log:
"logs/retrieve_monthly_co2_prices.log",
resources:
mem_mb=5000,
retries: 2
run:
move(input[0], output[0])
if config["enable"]["retrieve"]:
rule retrieve_monthly_fuel_prices:
output:
2023-08-05 14:41:16 +00:00
"data/validation/energy-price-trends-xlsx-5619002.xlsx",
log:
"logs/retrieve_monthly_fuel_prices.log",
resources:
mem_mb=5000,
retries: 2
conda:
"../envs/retrieve.yaml"
script:
"../scripts/retrieve_monthly_fuel_prices.py"
Introducing OpenStreetMap high-voltage grid to PyPSA-Eur (#1079) * Implemented which uses the overpass API to download power features for individual countries. * Extended rule by input. * Bug fixes and improvements to clean_osm_data.py. Added in retrieve_osm_data.py. * Updated clean_osm_data and retrieve_osm_data to create clean substations. * Finished clean_osm_data function. * Added check whether line is a circle. If so, drop it. * Extended build_electricity.smk by build_osm_network.py * Added build_osm_network * Working osm-network-fast * Bug fixes. * Finalised and cleaned including docstrings. * Added try catch to retrieve_osm_data. Allows for parallelisation of downloads. * Updated cleaning process. * Set maximum number of threads for retrieving to 4, wrt. fair usage policy and potential request errors. * Intermediate update on clean_osm_data.py. Added docstrings. * Bug fix. * Bug fix. * Bug fixes in data types out of clean_osm_data * Significant improvements to retrieve_osm_data, clean_osm_data. Cleaned code. Speed improvements * Cleaned config. * Fixes. * Bug fixes. * Updated default config * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Removed overpass from required packages. Not needed anymore. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Added links_relations (route = power, frequency = 0) to retrieval. This will change how HVDC links are extracted in the near future. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Work-in-progress clean_osm_data * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Added clean links output to clean_osm_data. Script uses OSM relations to retrieve clean HVDC links. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * New code for integrating HVDC links. Using relations. Base network implementation functioning. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * removed manual line dropping. * Updated clean script * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * reverted Snakefile to default: sync settings * added prebuilt functionality. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Updated build_electricity.smk to work with scenario management. * removed commented-out code. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * removed commented-out code. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fixed bug in pdf export by substituting pdf export with svg. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Bug-fix Snakefile * dropped not needed columns from build_osm_network. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Updated build_shapes, config.default and clean_osm_data. * pre-commit changes. * test * Added initial prepare_osm_network_release.py script * Finalised prepare_osm_network_release script to build clean and stable OSM base_network input files. * Added new rules/development.smk * Updated clean_osm_data to add substation_centroid to linestrings * Updated clean_osm_data to add substation_centroid to linestrings * Updated clean_osm_data to add substation_centroid to linestrings * Updated clean_osm_data to add substation_centroid to linestrings * Added osm-prebuilt functionality and zenodo sandbox repository. * Updated clean_osm_data to geopandas v.1.01 * Made base_network and build_osm_network function more robust for empty links. * Made base_network and build_osm_network function more robust for empty links. * Bug fix in base_network. Voltage level null is now kept (relevant e.g. for Corsica) * Merge with hcanges in upstream PR 1146. Fixing UA and MD. * Updated Zenodo and fixed prepare_osm_network_release * Updated osm network release. * Updated prepare osm network release. * Updated MD, UA scripts. * Cleaned determine_availability_matrix_MD_UA.py, removed redundant code * Bug fixes. * Bug fixes for UA MD scripts. * Rename of build script. * Bug fix: only distribute load to buses with substation. * Updated zenodo sandbox repository. * Updated config.default * Cleaned config.default.yaml: Related settings grouped together and redundant voltage settings aggregated. * Cleaned config.default.yaml: Related settings grouped together and redundant voltage settings aggregated. Added release notes. * Updated Zenodo repositories for OSM-prebuilt to offcial publication. * Updated configtables * Updated links.csv: Under_construction lines to in commission. * Updated link 8394 and parameter_corrections: Continuation of North-Sea-Link. * Major update: fix simplify_network, fix Corsica, updated build_osm_network to include lines overpassing nodes. * remove config backup * Bug fix: Carrier type of all supernodes corrected to 'AC' * Bug fix: Carrier type of all supernodes corrected to 'AC' * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Updated rules and base_network for compatibility with TYNDP projects. * Updated Zenodo repository and prebuilt network to include 150 kV HVDC connections. * Removed outdated config backup. * Implemented all comments from PR #1079. Cleaned up OSM implementation. * Bug fix: Added all voltages, 200 kV-750 kV, to default config. * Cleaning and bugfixes. * Updated Zenodo repository to https://zenodo.org/records/13358976. Added converter voltages, 'underground' property for DC lines/cables, and included Konti-Skan HVDC (DK-SE). Added compatibility with https://github.com/PyPSA/pypsa-eur/pull/1079 and https://github.com/PyPSA/pypsa-eur/pull/1085 * Apply suggestions from code review * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * simplify_network: handle complicated transformer topologies * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * syntax fix --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Fabian Neumann <fabian.neumann@outlook.de>
2024-08-22 13:01:20 +00:00
if config["enable"]["retrieve"] and (
config["electricity"]["base_network"] == "osm-prebuilt"
):
rule retrieve_osm_prebuilt:
input:
buses=storage("https://zenodo.org/records/13358976/files/buses.csv"),
converters=storage(
"https://zenodo.org/records/13358976/files/converters.csv"
),
lines=storage("https://zenodo.org/records/13358976/files/lines.csv"),
links=storage("https://zenodo.org/records/13358976/files/links.csv"),
transformers=storage(
"https://zenodo.org/records/13358976/files/transformers.csv"
),
output:
buses="data/osm-prebuilt/buses.csv",
converters="data/osm-prebuilt/converters.csv",
lines="data/osm-prebuilt/lines.csv",
links="data/osm-prebuilt/links.csv",
transformers="data/osm-prebuilt/transformers.csv",
log:
"logs/retrieve_osm_prebuilt.log",
threads: 1
resources:
mem_mb=500,
retries: 2
run:
for key in input.keys():
move(input[key], output[key])
validate_checksum(output[key], input[key])
if config["enable"]["retrieve"] and (
config["electricity"]["base_network"] == "osm-raw"
):
rule retrieve_osm_data:
output:
cables_way="data/osm-raw/{country}/cables_way.json",
lines_way="data/osm-raw/{country}/lines_way.json",
links_relation="data/osm-raw/{country}/links_relation.json",
substations_way="data/osm-raw/{country}/substations_way.json",
substations_relation="data/osm-raw/{country}/substations_relation.json",
log:
"logs/retrieve_osm_data_{country}.log",
threads: 1
conda:
"../envs/retrieve.yaml"
script:
"../scripts/retrieve_osm_data.py"
if config["enable"]["retrieve"] and (
config["electricity"]["base_network"] == "osm-raw"
):
rule retrieve_osm_data_all:
input:
expand(
"data/osm-raw/{country}/cables_way.json",
country=config_provider("countries"),
),
expand(
"data/osm-raw/{country}/lines_way.json",
country=config_provider("countries"),
),
expand(
"data/osm-raw/{country}/links_relation.json",
country=config_provider("countries"),
),
expand(
"data/osm-raw/{country}/substations_way.json",
country=config_provider("countries"),
),
expand(
"data/osm-raw/{country}/substations_relation.json",
country=config_provider("countries"),
),