pypsa-eur/rules/retrieve.smk

364 lines
12 KiB
Plaintext
Raw Normal View History

# SPDX-FileCopyrightText: : 2023 The PyPSA-Eur Authors
#
# SPDX-License-Identifier: MIT
import requests
from datetime import datetime, timedelta
2023-07-14 06:53:39 +00:00
if config["enable"].get("retrieve", "auto") == "auto":
config["enable"]["retrieve"] = has_internet_access()
if config["enable"]["retrieve"] is False:
print("Datafile downloads disabled in config[retrieve] or no internet access.")
if config["enable"]["retrieve"] and config["enable"].get("retrieve_databundle", True):
datafiles = [
"ch_cantons.csv",
"je-e-21.03.02.xls",
"eez/World_EEZ_v8_2014.shp",
"hydro_capacities.csv",
"naturalearth/ne_10m_admin_0_countries.shp",
"NUTS_2013_60M_SH/data/NUTS_RG_60M_2013.shp",
"nama_10r_3popgdp.tsv.gz",
"nama_10r_3gdp.tsv.gz",
"corine/g250_clc06_V18_5.tif",
]
if not config.get("tutorial", False):
datafiles.extend(["natura/Natura2000_end2015.shp", "GEBCO_2014_2D.nc"])
rule retrieve_databundle:
output:
protected(expand("data/bundle/{file}", file=datafiles)),
log:
LOGS + "retrieve_databundle.log",
resources:
mem_mb=1000,
retries: 2
2023-03-08 18:22:00 +00:00
conda:
"../envs/environment.yaml"
script:
"../scripts/retrieve_databundle.py"
if config["enable"].get("retrieve_irena"):
rule retrieve_irena:
output:
offwind="data/existing_infrastructure/offwind_capacity_IRENA.csv",
onwind="data/existing_infrastructure/onwind_capacity_IRENA.csv",
solar="data/existing_infrastructure/solar_capacity_IRENA.csv",
log:
LOGS + "retrieve_irena.log",
resources:
mem_mb=1000,
retries: 2
conda:
"../envs/environment.yaml"
script:
"../scripts/retrieve_irena.py"
2023-07-14 06:53:39 +00:00
if config["enable"]["retrieve"] and config["enable"].get("retrieve_cutout", True):
rule retrieve_cutout:
input:
HTTP.remote(
"zenodo.org/record/6382570/files/{cutout}.nc",
static=True,
),
output:
protected("cutouts/" + CDIR + "{cutout}.nc"),
log:
"logs/" + CDIR + "retrieve_cutout_{cutout}.log",
resources:
mem_mb=5000,
retries: 2
run:
move(input[0], output[0])
2023-07-14 06:53:39 +00:00
if config["enable"]["retrieve"] and config["enable"].get("retrieve_cost_data", True):
rule retrieve_cost_data:
input:
HTTP.remote(
"raw.githubusercontent.com/PyPSA/technology-data/{}/outputs/".format(
config["costs"]["version"]
)
+ "costs_{year}.csv",
keep_local=True,
),
output:
"data/costs_{year}.csv",
log:
2023-03-08 18:22:00 +00:00
LOGS + "retrieve_cost_data_{year}.log",
resources:
mem_mb=1000,
retries: 2
run:
move(input[0], output[0])
2023-07-14 06:53:39 +00:00
if config["enable"]["retrieve"] and config["enable"].get(
"retrieve_natura_raster", True
):
rule retrieve_natura_raster:
input:
HTTP.remote(
"zenodo.org/record/4706686/files/natura.tiff",
keep_local=True,
static=True,
),
output:
protected(RESOURCES + "natura.tiff"),
2023-03-08 18:22:00 +00:00
log:
2023-03-08 18:22:29 +00:00
LOGS + "retrieve_natura_raster.log",
resources:
mem_mb=5000,
retries: 2
run:
move(input[0], output[0])
2023-07-14 06:53:39 +00:00
if config["enable"]["retrieve"] and config["enable"].get(
"retrieve_sector_databundle", True
):
datafiles = [
"eea/UNFCCC_v23.csv",
"switzerland-sfoe/switzerland-new_format.csv",
"nuts/NUTS_RG_10M_2013_4326_LEVL_2.geojson",
"myb1-2017-nitro.xls",
"Industrial_Database.csv",
"emobility/KFZ__count",
"emobility/Pkw__count",
"h2_salt_caverns_GWh_per_sqkm.geojson",
]
datafolders = [
protected(
directory("data/bundle-sector/eurostat-energy_balances-june_2016_edition")
),
protected(
directory("data/bundle-sector/eurostat-energy_balances-may_2018_edition")
),
protected(directory("data/bundle-sector/jrc-idees-2015")),
]
rule retrieve_sector_databundle:
output:
protected(expand("data/bundle-sector/{files}", files=datafiles)),
*datafolders,
log:
2023-03-08 18:22:00 +00:00
LOGS + "retrieve_sector_databundle.log",
retries: 2
2023-03-08 18:22:00 +00:00
conda:
"../envs/environment.yaml"
script:
"../scripts/retrieve_sector_databundle.py"
2023-07-14 06:53:39 +00:00
if config["enable"]["retrieve"] and (
config["sector"]["gas_network"] or config["sector"]["H2_retrofit"]
):
datafiles = [
"IGGIELGN_LNGs.geojson",
"IGGIELGN_BorderPoints.geojson",
"IGGIELGN_Productions.geojson",
"IGGIELGN_PipeSegments.geojson",
]
rule retrieve_gas_infrastructure_data:
output:
protected(
expand("data/gas_network/scigrid-gas/data/{files}", files=datafiles)
),
2023-03-08 18:22:00 +00:00
log:
LOGS + "retrieve_gas_infrastructure_data.log",
retries: 2
2023-03-08 18:22:00 +00:00
conda:
"../envs/environment.yaml"
script:
"../scripts/retrieve_gas_infrastructure_data.py"
2023-07-14 06:53:39 +00:00
if config["enable"]["retrieve"]:
rule retrieve_electricity_demand:
input:
HTTP.remote(
2023-07-25 10:10:42 +00:00
"data.open-power-system-data.org/time_series/{version}/time_series_60min_singleindex.csv".format(
version="2019-06-05"
if config["snapshots"]["end"] < "2019"
else "2020-10-06"
),
2023-07-14 06:53:39 +00:00
keep_local=True,
static=True,
),
output:
RESOURCES + "load_raw.csv",
2023-07-14 06:53:39 +00:00
log:
LOGS + "retrieve_electricity_demand.log",
resources:
mem_mb=5000,
retries: 2
run:
move(input[0], output[0])
if config["enable"]["retrieve"]:
rule retrieve_ship_raster:
input:
HTTP.remote(
"https://zenodo.org/record/6953563/files/shipdensity_global.zip",
keep_local=True,
static=True,
),
output:
protected("data/shipdensity_global.zip"),
2023-07-14 06:53:39 +00:00
log:
LOGS + "retrieve_ship_raster.log",
resources:
mem_mb=5000,
retries: 2
run:
move(input[0], output[0])
2023-12-18 10:57:21 +00:00
if config["enable"]["retrieve"]:
# Downloading Copernicus Global Land Cover for land cover and land use:
# Website: https://land.copernicus.eu/global/products/lc
rule download_copernicus_land_cover:
input:
HTTP.remote(
"zenodo.org/record/3939050/files/PROBAV_LC100_global_v3.0.1_2019-nrt_Discrete-Classification-map_EPSG-4326.tif",
static=True,
),
output:
2023-12-18 10:57:21 +00:00
RESOURCES
+ "Copernicus_LC100_global_v3.0.1_2019-nrt_Discrete-Classification-map_EPSG-4326.tif",
run:
move(input[0], output[0])
if config["enable"]["retrieve"]:
2023-12-21 17:39:06 +00:00
# Some logic to find the correct file URL
# Sometimes files are released delayed or ahead of schedule, check which file is currently available
def check_file_exists(url):
response = requests.head(url)
return response.status_code == 200
2023-12-21 17:39:06 +00:00
# Basic pattern where WDPA files can be found
url_pattern = (
"https://d1gam3xoknrgr2.cloudfront.net/current/WDPA_{bYYYY}_Public.zip"
)
2023-12-21 17:39:06 +00:00
# 3-letter month + 4 digit year for current/previous/next month to test
current_monthyear = datetime.now().strftime("%b%Y")
prev_monthyear = (datetime.now() - timedelta(30)).strftime("%b%Y")
next_monthyear = (datetime.now() + timedelta(30)).strftime("%b%Y")
2023-12-21 17:39:06 +00:00
# Test prioritised: current month -> previous -> next
2023-12-22 13:40:30 +00:00
for bYYYY in [current_monthyear, prev_monthyear, next_monthyear]:
if check_file_exists(url := url_pattern.format(bYYYY=bYYYY)):
2023-12-21 17:39:06 +00:00
break
else:
# If None of the three URLs are working
url = False
assert (
url
), f"No WDPA files found at {url_pattern} for bY='{current_monthyear}, {prev_monthyear}, or {next_monthyear}'"
# Downloading protected area database from WDPA
# extract the main zip and then merge the contained 3 zipped shapefiles
# Website: https://www.protectedplanet.net/en/thematic-areas/wdpa
rule download_wdpa:
input:
HTTP.remote(
2023-12-21 17:39:06 +00:00
url,
static=True,
keep_local=True,
2023-12-18 10:57:21 +00:00
),
params:
zip=RESOURCES + f"WDPA_{bYYYY}_shp.zip",
folder=directory(RESOURCES + f"WDPA_{bYYYY}"),
output:
gpkg=RESOURCES + f"WDPA_{bYYYY}.gpkg",
run:
shell("cp {input} {params.zip}")
shell("unzip -o {params.zip} -d {params.folder}")
for i in range(3):
# vsizip is special driver for directly working with zipped shapefiles in ogr2ogr
2023-12-18 10:57:21 +00:00
layer_path = (
f"/vsizip/{params.folder}/WDPA_{bYYYY}_Public_shp_{i}.zip"
)
print(f"Adding layer {i+1} of 3 to combined output file.")
shell("ogr2ogr -f gpkg -update -append {output.gpkg} {layer_path}")
rule download_wdpa_marine:
2023-12-18 10:57:21 +00:00
# Downloading Marine protected area database from WDPA
# extract the main zip and then merge the contained 3 zipped shapefiles
# Website: https://www.protectedplanet.net/en/thematic-areas/marine-protected-areas
input:
HTTP.remote(
f"d1gam3xoknrgr2.cloudfront.net/current/WDPA_WDOECM_{bYYYY}_Public_marine_shp.zip",
static=True,
keep_local=True,
),
params:
zip=RESOURCES + f"WDPA_WDOECM_{bYYYY}_marine.zip",
folder=directory(RESOURCES + f"WDPA_WDOECM_{bYYYY}_marine"),
output:
gpkg=RESOURCES + f"WDPA_WDOECM_{bYYYY}_marine.gpkg",
run:
shell("cp {input} {params.zip}")
shell("unzip -o {params.zip} -d {params.folder}")
for i in range(3):
# vsizip is special driver for directly working with zipped shapefiles in ogr2ogr
layer_path = f"/vsizip/{params.folder}/WDPA_WDOECM_{bYYYY}_Public_marine_shp_{i}.zip"
print(f"Adding layer {i+1} of 3 to combined output file.")
shell("ogr2ogr -f gpkg -update -append {output.gpkg} {layer_path}")
2023-12-18 10:57:21 +00:00
if config["enable"]["retrieve"]:
rule retrieve_monthly_co2_prices:
input:
HTTP.remote(
"https://www.eex.com/fileadmin/EEX/Downloads/EUA_Emission_Spot_Primary_Market_Auction_Report/Archive_Reports/emission-spot-primary-market-auction-report-2019-data.xls",
keep_local=True,
static=True,
),
output:
"data/validation/emission-spot-primary-market-auction-report-2019-data.xls",
log:
LOGS + "retrieve_monthly_co2_prices.log",
resources:
mem_mb=5000,
retries: 2
run:
move(input[0], output[0])
if config["enable"]["retrieve"]:
rule retrieve_monthly_fuel_prices:
output:
2023-08-05 14:41:16 +00:00
"data/validation/energy-price-trends-xlsx-5619002.xlsx",
log:
LOGS + "retrieve_monthly_fuel_prices.log",
resources:
mem_mb=5000,
retries: 2
conda:
"../envs/environment.yaml"
script:
"../scripts/retrieve_monthly_fuel_prices.py"