diff --git a/rules/retrieve.smk b/rules/retrieve.smk index 10ad9684..5b0871ca 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -54,6 +54,15 @@ if config["enable"]["retrieve"] and config["enable"].get("retrieve_databundle", script: "../scripts/retrieve_eurostat_data.py" + rule retrieve_jrc_idees: + output: + directory("data/bundle/jrc-idees-2021"), + log: + "logs/retrieve_jrc_idees.log", + retries: 2 + script: + "../scripts/retrieve_jrc_idees.py" + rule retrieve_eurostat_household_data: output: "data/eurostat/eurostat-household_energy_balances-february_2024.csv", diff --git a/scripts/retrieve_jrc_idees.py b/scripts/retrieve_jrc_idees.py new file mode 100644 index 00000000..09484311 --- /dev/null +++ b/scripts/retrieve_jrc_idees.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: : 2024- The PyPSA-Eur Authors +# +# SPDX-License-Identifier: MIT +""" +Retrieve and extract JRC IDEES 2021 data. +""" + +import os +import requests +from pathlib import Path +from bs4 import BeautifulSoup +from _helpers import configure_logging, progress_retrieve, set_scenario_config +import zipfile +import logging + + +logger = logging.getLogger(__name__) + +# Define the base URL +url_jrc = "https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/JRC-IDEES/JRC-IDEES-2021_v1/" + +if __name__ == "__main__": + if "snakemake" not in globals(): + from _helpers import mock_snakemake + snakemake = mock_snakemake("retrieve_jrc_idees") + rootpath = ".." + else: + rootpath = "." + + configure_logging(snakemake) + set_scenario_config(snakemake) + + disable_progress = snakemake.config["run"].get("disable_progressbar", False) + + # create a local directory to save the zip files + local_dir = snakemake.output[0] + if not os.path.exists(local_dir): + os.makedirs(local_dir) + + # get the list of zip files from the JRC URL + response = requests.get(url_jrc) + soup = BeautifulSoup(response.text, 'html.parser') + zip_files = [link.get('href') for link in soup.find_all('a') + if link.get('href').endswith('.zip')] + + logger.info(f"Downloading {len(zip_files)} .zip files for JRC IDEES from '{url_jrc}'.") + + # download and unpack each zip file + for zip_file in zip_files: + logger.info(f"Downloading and unpacking {zip_file}") + zip_url = url_jrc + zip_file + to_fn = local_dir+"/" + zip_file[:-4] + progress_retrieve(zip_url, to_fn, disable=disable_progress) +