# SPDX-FileCopyrightText: : 2023-2024 The PyPSA-Eur Authors # # SPDX-License-Identifier: MIT import requests from datetime import datetime, timedelta from shutil import move, unpack_archive from zipfile import ZipFile if config["enable"].get("retrieve", "auto") == "auto": config["enable"]["retrieve"] = has_internet_access() if config["enable"]["retrieve"] is False: print("Datafile downloads disabled in config[retrieve] or no internet access.") if config["enable"]["retrieve"] and config["enable"].get("retrieve_databundle", True): datafiles = [ "je-e-21.03.02.xls", "nama_10r_3popgdp.tsv.gz", "nama_10r_3gdp.tsv.gz", "corine/g250_clc06_V18_5.tif", "eea/UNFCCC_v23.csv", "emobility/KFZ__count", "emobility/Pkw__count", "h2_salt_caverns_GWh_per_sqkm.geojson", "natura/natura.tiff", "gebco/GEBCO_2014_2D.nc", "GDP_per_capita_PPP_1990_2015_v2.nc", "ppp_2013_1km_Aggregated.tif", ] rule retrieve_databundle: output: expand("data/bundle/{file}", file=datafiles), directory("data/bundle/jrc-idees-2015"), log: "logs/retrieve_databundle.log", resources: mem_mb=1000, retries: 2 conda: "../envs/retrieve.yaml" script: "../scripts/retrieve_databundle.py" rule retrieve_eurostat_data: output: directory("data/eurostat/Balances-April2023"), log: "logs/retrieve_eurostat_data.log", retries: 2 conda: "../envs/retrieve.yaml" script: "../scripts/retrieve_eurostat_data.py" rule retrieve_jrc_idees: output: directory("data/jrc-idees-2021"), log: "logs/retrieve_jrc_idees.log", retries: 2 script: "../scripts/retrieve_jrc_idees.py" rule retrieve_eurostat_household_data: output: "data/eurostat/eurostat-household_energy_balances-february_2024.csv", log: "logs/retrieve_eurostat_household_data.log", retries: 2 conda: "../envs/retrieve.yaml" script: "../scripts/retrieve_eurostat_household_data.py" if config["enable"]["retrieve"]: rule retrieve_nuts_shapes: input: shapes=storage( "https://gisco-services.ec.europa.eu/distribution/v2/nuts/download/ref-nuts-2013-03m.geojson.zip" ), output: shapes_level_3="data/nuts/NUTS_RG_03M_2013_4326_LEVL_3.geojson", shapes_level_2="data/nuts/NUTS_RG_03M_2013_4326_LEVL_2.geojson", params: zip_file="data/nuts/ref-nuts-2013-03m.geojson.zip", run: os.rename(input.shapes, params.zip_file) with ZipFile(params.zip_file, "r") as zip_ref: for level in ["LEVL_3", "LEVL_2"]: filename = f"NUTS_RG_03M_2013_4326_{level}.geojson" zip_ref.extract(filename, Path(output.shapes_level_3).parent) extracted_file = Path(output.shapes_level_3).parent / filename extracted_file.rename( getattr(output, f"shapes_level_{level[-1]}") ) os.remove(params.zip_file) if config["enable"]["retrieve"] and config["enable"].get("retrieve_cutout", True): rule retrieve_cutout: input: storage( "https://zenodo.org/records/12791128/files/{cutout}.nc", ), output: "cutouts/" + CDIR + "{cutout}.nc", log: "logs/" + CDIR + "retrieve_cutout_{cutout}.log", resources: mem_mb=5000, retries: 2 run: move(input[0], output[0]) validate_checksum(output[0], input[0]) if config["enable"]["retrieve"] and config["enable"].get("retrieve_cost_data", True): rule retrieve_cost_data: params: version=config_provider("costs", "version"), output: resources("costs_{year}.csv"), log: logs("retrieve_cost_data_{year}.log"), resources: mem_mb=1000, retries: 2 conda: "../envs/retrieve.yaml" script: "../scripts/retrieve_cost_data.py" if config["enable"]["retrieve"]: datafiles = [ "IGGIELGN_LNGs.geojson", "IGGIELGN_BorderPoints.geojson", "IGGIELGN_Productions.geojson", "IGGIELGN_Storages.geojson", "IGGIELGN_PipeSegments.geojson", ] rule retrieve_gas_infrastructure_data: output: expand("data/gas_network/scigrid-gas/data/{files}", files=datafiles), log: "logs/retrieve_gas_infrastructure_data.log", retries: 2 conda: "../envs/retrieve.yaml" script: "../scripts/retrieve_gas_infrastructure_data.py" if config["enable"]["retrieve"]: rule retrieve_electricity_demand: params: versions=["2019-06-05", "2020-10-06"], output: "data/electricity_demand_raw.csv", log: "logs/retrieve_electricity_demand.log", resources: mem_mb=5000, retries: 2 conda: "../envs/retrieve.yaml" script: "../scripts/retrieve_electricity_demand.py" if config["enable"]["retrieve"]: rule retrieve_synthetic_electricity_demand: input: storage( "https://zenodo.org/records/10820928/files/demand_hourly.csv", ), output: "data/load_synthetic_raw.csv", log: "logs/retrieve_synthetic_electricity_demand.log", resources: mem_mb=5000, retries: 2 run: move(input[0], output[0]) if config["enable"]["retrieve"]: rule retrieve_ship_raster: input: storage( "https://zenodo.org/records/12760663/files/shipdensity_global.zip", keep_local=True, ), output: "data/shipdensity_global.zip", log: "logs/retrieve_ship_raster.log", resources: mem_mb=5000, retries: 2 run: move(input[0], output[0]) validate_checksum(output[0], input[0]) if config["enable"]["retrieve"]: rule retrieve_jrc_enspreso_biomass: input: storage( "https://zenodo.org/records/10356004/files/ENSPRESO_BIOMASS.xlsx", keep_local=True, ), output: "data/ENSPRESO_BIOMASS.xlsx", retries: 1 run: move(input[0], output[0]) if config["enable"]["retrieve"]: rule retrieve_hotmaps_industrial_sites: input: storage( "https://gitlab.com/hotmaps/industrial_sites/industrial_sites_Industrial_Database/-/raw/master/data/Industrial_Database.csv", keep_local=True, ), output: "data/Industrial_Database.csv", retries: 1 run: move(input[0], output[0]) if config["enable"]["retrieve"]: rule retrieve_usgs_ammonia_production: input: storage( "https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/media/files/myb1-2022-nitro-ert.xlsx" ), output: "data/myb1-2022-nitro-ert.xlsx", retries: 1 run: move(input[0], output[0]) if config["enable"]["retrieve"]: rule retrieve_geological_co2_storage_potential: input: storage( "https://raw.githubusercontent.com/ericzhou571/Co2Storage/main/resources/complete_map_2020_unit_Mt.geojson", keep_local=True, ), output: "data/complete_map_2020_unit_Mt.geojson", retries: 1 run: move(input[0], output[0]) if config["enable"]["retrieve"]: # Downloading Copernicus Global Land Cover for land cover and land use: # Website: https://land.copernicus.eu/global/products/lc rule download_copernicus_land_cover: input: storage( "https://zenodo.org/records/3939050/files/PROBAV_LC100_global_v3.0.1_2019-nrt_Discrete-Classification-map_EPSG-4326.tif", ), output: "data/Copernicus_LC100_global_v3.0.1_2019-nrt_Discrete-Classification-map_EPSG-4326.tif", run: move(input[0], output[0]) validate_checksum(output[0], input[0]) if config["enable"]["retrieve"]: # Downloading LUISA Base Map for land cover and land use: # Website: https://ec.europa.eu/jrc/en/luisa rule retrieve_luisa_land_cover: input: storage( "https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/LUISA/EUROPE/Basemaps/LandUse/2018/LATEST/LUISA_basemap_020321_50m.tif", ), output: "data/LUISA_basemap_020321_50m.tif", run: move(input[0], output[0]) if config["enable"]["retrieve"]: rule retrieve_eez: params: zip="data/eez/World_EEZ_v12_20231025_LR.zip", output: gpkg="data/eez/World_EEZ_v12_20231025_LR/eez_v12_lowres.gpkg", run: import os import requests from uuid import uuid4 name = str(uuid4())[:8] org = str(uuid4())[:8] response = requests.post( "https://www.marineregions.org/download_file.php", params={"name": "World_EEZ_v12_20231025_LR.zip"}, data={ "name": name, "organisation": org, "email": f"{name}@{org}.org", "country": "Germany", "user_category": "academia", "purpose_category": "Research", "agree": "1", }, ) with open(params["zip"], "wb") as f: f.write(response.content) output_folder = Path(params["zip"]).parent unpack_archive(params["zip"], output_folder) os.remove(params["zip"]) if config["enable"]["retrieve"]: rule retrieve_worldbank_urban_population: params: zip="data/worldbank/API_SP.URB.TOTL.IN.ZS_DS2_en_csv_v2_3403768.zip", output: gpkg="data/worldbank/API_SP.URB.TOTL.IN.ZS_DS2_en_csv_v2_3403768.csv", run: import os import requests response = requests.get( "https://api.worldbank.org/v2/en/indicator/SP.URB.TOTL.IN.ZS?downloadformat=csv", params={"name": "API_SP.URB.TOTL.IN.ZS_DS2_en_csv_v2_3403768.zip"}, ) with open(params["zip"], "wb") as f: f.write(response.content) output_folder = Path(params["zip"]).parent unpack_archive(params["zip"], output_folder) os.remove(params["zip"]) if config["enable"]["retrieve"]: # Download directly from naciscdn.org which is a redirect from naturalearth.com # (https://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-admin-0-countries/) # Use point-of-view (POV) variant of Germany so that Crimea is included. rule retrieve_naturalearth_countries: input: storage( "https://naciscdn.org/naturalearth/10m/cultural/ne_10m_admin_0_countries_deu.zip" ), params: zip="data/naturalearth/ne_10m_admin_0_countries_deu.zip", output: countries="data/naturalearth/ne_10m_admin_0_countries_deu.shp", run: move(input[0], params["zip"]) output_folder = Path(output["countries"]).parent unpack_archive(params["zip"], output_folder) os.remove(params["zip"]) if config["enable"]["retrieve"]: rule retrieve_gem_europe_gas_tracker: output: "data/gem/Europe-Gas-Tracker-2024-05.xlsx", run: import requests # mirror of https://globalenergymonitor.org/wp-content/uploads/2024/05/Europe-Gas-Tracker-2024-05.xlsx url = "https://tubcloud.tu-berlin.de/s/LMBJQCsN6Ez5cN2/download/Europe-Gas-Tracker-2024-05.xlsx" response = requests.get(url) with open(output[0], "wb") as f: f.write(response.content) if config["enable"]["retrieve"]: rule retrieve_gem_steel_plant_tracker: output: "data/gem/Global-Steel-Plant-Tracker-April-2024-Standard-Copy-V1.xlsx", run: import requests # mirror or https://globalenergymonitor.org/wp-content/uploads/2024/04/Global-Steel-Plant-Tracker-April-2024-Standard-Copy-V1.xlsx url = "https://tubcloud.tu-berlin.de/s/Aqebo3rrQZWKGsG/download/Global-Steel-Plant-Tracker-April-2024-Standard-Copy-V1.xlsx" response = requests.get(url) with open(output[0], "wb") as f: f.write(response.content) if config["enable"]["retrieve"]: # Some logic to find the correct file URL # Sometimes files are released delayed or ahead of schedule, check which file is currently available def check_file_exists(url): response = requests.head(url) return response.status_code == 200 # Basic pattern where WDPA files can be found url_pattern = ( "https://d1gam3xoknrgr2.cloudfront.net/current/WDPA_{bYYYY}_Public_shp.zip" ) # 3-letter month + 4 digit year for current/previous/next month to test current_monthyear = datetime.now().strftime("%b%Y") prev_monthyear = (datetime.now() - timedelta(30)).strftime("%b%Y") next_monthyear = (datetime.now() + timedelta(30)).strftime("%b%Y") # Test prioritised: current month -> previous -> next for bYYYY in [current_monthyear, prev_monthyear, next_monthyear]: if check_file_exists(url := url_pattern.format(bYYYY=bYYYY)): break else: # If None of the three URLs are working url = False assert ( url ), f"No WDPA files found at {url_pattern} for bY='{current_monthyear}, {prev_monthyear}, or {next_monthyear}'" # Downloading protected area database from WDPA # extract the main zip and then merge the contained 3 zipped shapefiles # Website: https://www.protectedplanet.net/en/thematic-areas/wdpa rule download_wdpa: input: storage(url, keep_local=True), params: zip="data/WDPA_shp.zip", folder=directory("data/WDPA"), output: gpkg="data/WDPA.gpkg", run: shell("cp {input} {params.zip}") shell("unzip -o {params.zip} -d {params.folder}") for i in range(3): # vsizip is special driver for directly working with zipped shapefiles in ogr2ogr layer_path = ( f"/vsizip/{params.folder}/WDPA_{bYYYY}_Public_shp_{i}.zip" ) print(f"Adding layer {i+1} of 3 to combined output file.") shell("ogr2ogr -f gpkg -update -append {output.gpkg} {layer_path}") rule download_wdpa_marine: # Downloading Marine protected area database from WDPA # extract the main zip and then merge the contained 3 zipped shapefiles # Website: https://www.protectedplanet.net/en/thematic-areas/marine-protected-areas input: storage( f"https://d1gam3xoknrgr2.cloudfront.net/current/WDPA_WDOECM_{bYYYY}_Public_marine_shp.zip", keep_local=True, ), params: zip="data/WDPA_WDOECM_marine.zip", folder=directory("data/WDPA_WDOECM_marine"), output: gpkg="data/WDPA_WDOECM_marine.gpkg", run: shell("cp {input} {params.zip}") shell("unzip -o {params.zip} -d {params.folder}") for i in range(3): # vsizip is special driver for directly working with zipped shapefiles in ogr2ogr layer_path = f"/vsizip/{params.folder}/WDPA_WDOECM_{bYYYY}_Public_marine_shp_{i}.zip" print(f"Adding layer {i+1} of 3 to combined output file.") shell("ogr2ogr -f gpkg -update -append {output.gpkg} {layer_path}") if config["enable"]["retrieve"]: rule retrieve_monthly_co2_prices: input: storage( "https://www.eex.com/fileadmin/EEX/Downloads/EUA_Emission_Spot_Primary_Market_Auction_Report/Archive_Reports/emission-spot-primary-market-auction-report-2019-data.xls", keep_local=True, ), output: "data/validation/emission-spot-primary-market-auction-report-2019-data.xls", log: "logs/retrieve_monthly_co2_prices.log", resources: mem_mb=5000, retries: 2 run: move(input[0], output[0]) if config["enable"]["retrieve"]: rule retrieve_monthly_fuel_prices: output: "data/validation/energy-price-trends-xlsx-5619002.xlsx", log: "logs/retrieve_monthly_fuel_prices.log", resources: mem_mb=5000, retries: 2 conda: "../envs/retrieve.yaml" script: "../scripts/retrieve_monthly_fuel_prices.py" if config["enable"]["retrieve"] and ( config["electricity"]["base_network"] == "osm-prebuilt" ): rule retrieve_osm_prebuilt: input: buses=storage("https://zenodo.org/records/13358976/files/buses.csv"), converters=storage( "https://zenodo.org/records/13358976/files/converters.csv" ), lines=storage("https://zenodo.org/records/13358976/files/lines.csv"), links=storage("https://zenodo.org/records/13358976/files/links.csv"), transformers=storage( "https://zenodo.org/records/13358976/files/transformers.csv" ), output: buses="data/osm-prebuilt/buses.csv", converters="data/osm-prebuilt/converters.csv", lines="data/osm-prebuilt/lines.csv", links="data/osm-prebuilt/links.csv", transformers="data/osm-prebuilt/transformers.csv", log: "logs/retrieve_osm_prebuilt.log", threads: 1 resources: mem_mb=500, retries: 2 run: for key in input.keys(): move(input[key], output[key]) validate_checksum(output[key], input[key]) if config["enable"]["retrieve"] and ( config["electricity"]["base_network"] == "osm-raw" ): rule retrieve_osm_data: output: cables_way="data/osm-raw/{country}/cables_way.json", lines_way="data/osm-raw/{country}/lines_way.json", links_relation="data/osm-raw/{country}/links_relation.json", substations_way="data/osm-raw/{country}/substations_way.json", substations_relation="data/osm-raw/{country}/substations_relation.json", log: "logs/retrieve_osm_data_{country}.log", threads: 1 conda: "../envs/retrieve.yaml" script: "../scripts/retrieve_osm_data.py" if config["enable"]["retrieve"] and ( config["electricity"]["base_network"] == "osm-raw" ): rule retrieve_osm_data_all: input: expand( "data/osm-raw/{country}/cables_way.json", country=config_provider("countries"), ), expand( "data/osm-raw/{country}/lines_way.json", country=config_provider("countries"), ), expand( "data/osm-raw/{country}/links_relation.json", country=config_provider("countries"), ), expand( "data/osm-raw/{country}/substations_way.json", country=config_provider("countries"), ), expand( "data/osm-raw/{country}/substations_relation.json", country=config_provider("countries"), ),