diff --git a/scripts/build_energy_totals.py b/scripts/build_energy_totals.py index b3f6ddb7..a93562b2 100644 --- a/scripts/build_energy_totals.py +++ b/scripts/build_energy_totals.py @@ -329,9 +329,9 @@ def idees_per_country(ct: str, base_dir: str) -> pd.DataFrame: """ ct_idees = idees_rename.get(ct, ct) - fn_residential = f"{base_dir}/JRC-IDEES-2021_Residential_{ct_idees}.xlsx" - fn_tertiary = f"{base_dir}/JRC-IDEES-2021_Tertiary_{ct_idees}.xlsx" - fn_transport = f"{base_dir}/JRC-IDEES-2021_Transport_{ct_idees}.xlsx" + fn_residential = f"{base_dir}/{ct_idees}/JRC-IDEES-2021_Residential_{ct_idees}.xlsx" + fn_tertiary = f"{base_dir}/{ct_idees}/JRC-IDEES-2021_Tertiary_{ct_idees}.xlsx" + fn_transport = f"{base_dir}/{ct_idees}/JRC-IDEES-2021_Transport_{ct_idees}.xlsx" ct_totals = {} @@ -1103,6 +1103,10 @@ def build_transport_data( transport_data = pd.concat([transport_data, swiss_cars]).sort_index() transport_data.rename(columns={"passenger cars": "number cars"}, inplace=True) + + # clean up dataframe + years = np.arange(2000, 2022) + transport_data = transport_data[transport_data.index.get_level_values(1).isin(years)] missing = transport_data.index[transport_data["number cars"].isna()] if not missing.empty: diff --git a/scripts/retrieve_jrc_idees.py b/scripts/retrieve_jrc_idees.py index e163a163..6c61ee19 100644 --- a/scripts/retrieve_jrc_idees.py +++ b/scripts/retrieve_jrc_idees.py @@ -10,22 +10,19 @@ import logging import os import zipfile from pathlib import Path - -import requests from _helpers import configure_logging, progress_retrieve, set_scenario_config -from bs4 import BeautifulSoup + logger = logging.getLogger(__name__) # Define the base URL url_jrc = ( - "https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/JRC-IDEES/JRC-IDEES-2021_v1/" + "https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/JRC-IDEES/JRC-IDEES-2021_v1/JRC-IDEES-2021.zip" ) if __name__ == "__main__": if "snakemake" not in globals(): from _helpers import mock_snakemake - snakemake = mock_snakemake("retrieve_jrc_idees") rootpath = ".." else: @@ -33,30 +30,22 @@ if __name__ == "__main__": configure_logging(snakemake) set_scenario_config(snakemake) - disable_progress = snakemake.config["run"].get("disable_progressbar", False) - # create a local directory to save the zip files - local_dir = snakemake.output[0] - if not os.path.exists(local_dir): - os.makedirs(local_dir) - - # get the list of zip files from the JRC URL - response = requests.get(url_jrc) - soup = BeautifulSoup(response.text, "html.parser") - zip_files = [ - link.get("href") - for link in soup.find_all("a") - if link.get("href").endswith(".zip") - ] + to_fn = snakemake.output[0] + to_fn_zp = to_fn + ".zip" + # download .zip file logger.info( - f"Downloading {len(zip_files)} .zip files for JRC IDEES from '{url_jrc}'." + f"Downloading JRC IDEES from {url_jrc}." ) + progress_retrieve(url_jrc, to_fn_zp, disable=disable_progress) + + # extract + logger.info("Extracting JRC IDEES data.") + with zipfile.ZipFile(to_fn_zp, "r") as zip_ref: + zip_ref.extractall(to_fn) - # download and unpack each zip file - for zip_file in zip_files: - logger.info(f"Downloading and unpacking {zip_file}") - zip_url = url_jrc + zip_file - to_fn = local_dir + "/" + zip_file[:-4] - progress_retrieve(zip_url, to_fn, disable=disable_progress) + logger.info(f"JRC IDEES data available in '{to_fn}'.") + + \ No newline at end of file