avoid double download of JRC idees
This commit is contained in:
parent
5f009590f5
commit
4c46c57fec
@ -329,9 +329,9 @@ def idees_per_country(ct: str, base_dir: str) -> pd.DataFrame:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
ct_idees = idees_rename.get(ct, ct)
|
ct_idees = idees_rename.get(ct, ct)
|
||||||
fn_residential = f"{base_dir}/JRC-IDEES-2021_Residential_{ct_idees}.xlsx"
|
fn_residential = f"{base_dir}/{ct_idees}/JRC-IDEES-2021_Residential_{ct_idees}.xlsx"
|
||||||
fn_tertiary = f"{base_dir}/JRC-IDEES-2021_Tertiary_{ct_idees}.xlsx"
|
fn_tertiary = f"{base_dir}/{ct_idees}/JRC-IDEES-2021_Tertiary_{ct_idees}.xlsx"
|
||||||
fn_transport = f"{base_dir}/JRC-IDEES-2021_Transport_{ct_idees}.xlsx"
|
fn_transport = f"{base_dir}/{ct_idees}/JRC-IDEES-2021_Transport_{ct_idees}.xlsx"
|
||||||
|
|
||||||
ct_totals = {}
|
ct_totals = {}
|
||||||
|
|
||||||
@ -1104,6 +1104,10 @@ def build_transport_data(
|
|||||||
|
|
||||||
transport_data.rename(columns={"passenger cars": "number cars"}, inplace=True)
|
transport_data.rename(columns={"passenger cars": "number cars"}, inplace=True)
|
||||||
|
|
||||||
|
# clean up dataframe
|
||||||
|
years = np.arange(2000, 2022)
|
||||||
|
transport_data = transport_data[transport_data.index.get_level_values(1).isin(years)]
|
||||||
|
|
||||||
missing = transport_data.index[transport_data["number cars"].isna()]
|
missing = transport_data.index[transport_data["number cars"].isna()]
|
||||||
if not missing.empty:
|
if not missing.empty:
|
||||||
logger.info(
|
logger.info(
|
||||||
|
@ -10,22 +10,19 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import zipfile
|
import zipfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import requests
|
|
||||||
from _helpers import configure_logging, progress_retrieve, set_scenario_config
|
from _helpers import configure_logging, progress_retrieve, set_scenario_config
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Define the base URL
|
# Define the base URL
|
||||||
url_jrc = (
|
url_jrc = (
|
||||||
"https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/JRC-IDEES/JRC-IDEES-2021_v1/"
|
"https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/JRC-IDEES/JRC-IDEES-2021_v1/JRC-IDEES-2021.zip"
|
||||||
)
|
)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if "snakemake" not in globals():
|
if "snakemake" not in globals():
|
||||||
from _helpers import mock_snakemake
|
from _helpers import mock_snakemake
|
||||||
|
|
||||||
snakemake = mock_snakemake("retrieve_jrc_idees")
|
snakemake = mock_snakemake("retrieve_jrc_idees")
|
||||||
rootpath = ".."
|
rootpath = ".."
|
||||||
else:
|
else:
|
||||||
@ -33,30 +30,22 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
configure_logging(snakemake)
|
configure_logging(snakemake)
|
||||||
set_scenario_config(snakemake)
|
set_scenario_config(snakemake)
|
||||||
|
|
||||||
disable_progress = snakemake.config["run"].get("disable_progressbar", False)
|
disable_progress = snakemake.config["run"].get("disable_progressbar", False)
|
||||||
|
|
||||||
# create a local directory to save the zip files
|
to_fn = snakemake.output[0]
|
||||||
local_dir = snakemake.output[0]
|
to_fn_zp = to_fn + ".zip"
|
||||||
if not os.path.exists(local_dir):
|
|
||||||
os.makedirs(local_dir)
|
|
||||||
|
|
||||||
# get the list of zip files from the JRC URL
|
|
||||||
response = requests.get(url_jrc)
|
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
|
||||||
zip_files = [
|
|
||||||
link.get("href")
|
|
||||||
for link in soup.find_all("a")
|
|
||||||
if link.get("href").endswith(".zip")
|
|
||||||
]
|
|
||||||
|
|
||||||
|
# download .zip file
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Downloading {len(zip_files)} .zip files for JRC IDEES from '{url_jrc}'."
|
f"Downloading JRC IDEES from {url_jrc}."
|
||||||
)
|
)
|
||||||
|
progress_retrieve(url_jrc, to_fn_zp, disable=disable_progress)
|
||||||
|
|
||||||
|
# extract
|
||||||
|
logger.info("Extracting JRC IDEES data.")
|
||||||
|
with zipfile.ZipFile(to_fn_zp, "r") as zip_ref:
|
||||||
|
zip_ref.extractall(to_fn)
|
||||||
|
|
||||||
|
logger.info(f"JRC IDEES data available in '{to_fn}'.")
|
||||||
|
|
||||||
|
|
||||||
# download and unpack each zip file
|
|
||||||
for zip_file in zip_files:
|
|
||||||
logger.info(f"Downloading and unpacking {zip_file}")
|
|
||||||
zip_url = url_jrc + zip_file
|
|
||||||
to_fn = local_dir + "/" + zip_file[:-4]
|
|
||||||
progress_retrieve(zip_url, to_fn, disable=disable_progress)
|
|
||||||
|
Loading…
Reference in New Issue
Block a user