naturalearth: automatically download and remove from data bundle

This commit is contained in:
Fabian Neumann 2024-08-02 09:53:34 +02:00
parent 89d28ed06e
commit 311c82d65e
2 changed files with 21 additions and 2 deletions

View File

@ -106,7 +106,7 @@ rule build_shapes:
params:
countries=config_provider("countries"),
input:
naturalearth=ancient("data/bundle/naturalearth/ne_10m_admin_0_countries.shp"),
naturalearth=ancient("data/naturalearth/ne_10m_admin_0_countries_deu.shp"),
eez=ancient("data/bundle/eez/World_EEZ_v8_2014.shp"),
nuts3=ancient("data/bundle/NUTS_2013_60M_SH/data/NUTS_RG_60M_2013.shp"),
nuts3pop=ancient("data/bundle/nama_10r_3popgdp.tsv.gz"),

View File

@ -4,6 +4,7 @@
import requests
from datetime import datetime, timedelta
from shutil import move, unpack_archive
if config["enable"].get("retrieve", "auto") == "auto":
config["enable"]["retrieve"] = has_internet_access()
@ -16,7 +17,6 @@ if config["enable"]["retrieve"] and config["enable"].get("retrieve_databundle",
datafiles = [
"je-e-21.03.02.xls",
"eez/World_EEZ_v8_2014.shp",
"naturalearth/ne_10m_admin_0_countries.shp",
"NUTS_2013_60M_SH/data/NUTS_RG_60M_2013.shp",
"nama_10r_3popgdp.tsv.gz",
"nama_10r_3gdp.tsv.gz",
@ -211,6 +211,25 @@ if config["enable"]["retrieve"]:
move(input[0], output[0])
if config["enable"]["retrieve"]:
# Download directly from naciscdn.org which is a redirect from naturalearth.com
# (https://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-admin-0-countries/)
# Use point-of-view (POV) variant of Germany so that Crimea is included.
rule retrieve_naturalearth_countries:
input:
storage("https://naciscdn.org/naturalearth/10m/cultural/ne_10m_admin_0_countries_deu.zip")
params:
zip="data/naturalearth/ne_10m_admin_0_countries_deu.zip",
output:
countries="data/naturalearth/ne_10m_admin_0_countries_deu.shp"
run:
move(input[0], params["zip"])
output_folder = Path(output["countries"]).parent
unpack_archive(params["zip"], output_folder)
os.remove(params["zip"])
if config["enable"]["retrieve"]:
# Some logic to find the correct file URL
# Sometimes files are released delayed or ahead of schedule, check which file is currently available