2022-09-16 13:04:04 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
2023-02-16 10:50:55 +00:00
|
|
|
# SPDX-FileCopyrightText: : 2017-2023 The PyPSA-Eur Authors
|
2020-05-29 07:50:55 +00:00
|
|
|
#
|
2021-09-14 14:37:41 +00:00
|
|
|
# SPDX-License-Identifier: MIT
|
2019-08-08 13:02:28 +00:00
|
|
|
"""
|
2023-03-09 12:29:13 +00:00
|
|
|
Creates GIS shape files of the countries, exclusive economic zones and `NUTS3 <
|
|
|
|
https://en.wikipedia.org/wiki/Nomenclature_of_Territorial_Units_for_Statistics>
|
|
|
|
`_ areas.
|
2019-08-11 09:40:47 +00:00
|
|
|
|
|
|
|
Relevant Settings
|
|
|
|
-----------------
|
|
|
|
|
2019-08-11 11:17:36 +00:00
|
|
|
.. code:: yaml
|
|
|
|
|
|
|
|
countries:
|
|
|
|
|
2019-11-05 13:29:15 +00:00
|
|
|
.. seealso::
|
2023-04-21 08:41:44 +00:00
|
|
|
Documentation of the configuration file ``config/config.yaml`` at
|
2019-08-13 08:03:46 +00:00
|
|
|
:ref:`toplevel_cf`
|
|
|
|
|
2019-08-11 09:40:47 +00:00
|
|
|
Inputs
|
|
|
|
------
|
|
|
|
|
2019-08-11 20:34:18 +00:00
|
|
|
- ``data/bundle/naturalearth/ne_10m_admin_0_countries.shp``: World country shapes
|
|
|
|
|
2023-03-09 12:28:42 +00:00
|
|
|
.. image:: img/countries.png
|
2019-08-11 20:34:18 +00:00
|
|
|
:scale: 33 %
|
|
|
|
|
|
|
|
- ``data/bundle/eez/World_EEZ_v8_2014.shp``: World `exclusive economic zones <https://en.wikipedia.org/wiki/Exclusive_economic_zone>`_ (EEZ)
|
|
|
|
|
2023-03-09 12:28:42 +00:00
|
|
|
.. image:: img/eez.png
|
2019-08-11 20:34:18 +00:00
|
|
|
:scale: 33 %
|
|
|
|
|
|
|
|
- ``data/bundle/NUTS_2013_60M_SH/data/NUTS_RG_60M_2013.shp``: Europe NUTS3 regions
|
|
|
|
|
2023-03-09 12:28:42 +00:00
|
|
|
.. image:: img/nuts3.png
|
2019-08-11 20:34:18 +00:00
|
|
|
:scale: 33 %
|
|
|
|
|
2021-08-27 10:30:29 +00:00
|
|
|
- ``data/bundle/nama_10r_3popgdp.tsv.gz``: Average annual population by NUTS3 region (`eurostat <http://appsso.eurostat.ec.europa.eu/nui/show.do?dataset=nama_10r_3popgdp&lang=en>`__)
|
|
|
|
- ``data/bundle/nama_10r_3gdp.tsv.gz``: Gross domestic product (GDP) by NUTS 3 regions (`eurostat <http://appsso.eurostat.ec.europa.eu/nui/show.do?dataset=nama_10r_3gdp&lang=en>`__)
|
2019-08-11 20:34:18 +00:00
|
|
|
- ``data/bundle/ch_cantons.csv``: Mapping between Swiss Cantons and NUTS3 regions
|
|
|
|
- ``data/bundle/je-e-21.03.02.xls``: Population and GDP data per Canton (`BFS - Swiss Federal Statistical Office <https://www.bfs.admin.ch/bfs/en/home/news/whats-new.assetdetail.7786557.html>`_ )
|
|
|
|
|
2019-08-11 09:40:47 +00:00
|
|
|
Outputs
|
|
|
|
-------
|
|
|
|
|
2019-08-11 20:34:18 +00:00
|
|
|
- ``resources/country_shapes.geojson``: country shapes out of country selection
|
|
|
|
|
2023-03-09 12:28:42 +00:00
|
|
|
.. image:: img/country_shapes.png
|
2019-08-11 20:34:18 +00:00
|
|
|
:scale: 33 %
|
|
|
|
|
|
|
|
- ``resources/offshore_shapes.geojson``: EEZ shapes out of country selection
|
|
|
|
|
2023-03-09 12:28:42 +00:00
|
|
|
.. image:: img/offshore_shapes.png
|
2019-08-11 20:34:18 +00:00
|
|
|
:scale: 33 %
|
|
|
|
|
|
|
|
- ``resources/europe_shape.geojson``: Shape of Europe including countries and EEZ
|
|
|
|
|
2023-03-09 12:28:42 +00:00
|
|
|
.. image:: img/europe_shape.png
|
2019-08-11 20:34:18 +00:00
|
|
|
:scale: 33 %
|
2019-11-05 13:29:15 +00:00
|
|
|
|
2019-08-13 13:48:21 +00:00
|
|
|
- ``resources/nuts3_shapes.geojson``: NUTS3 shapes out of country selection including population and GDP data.
|
2019-11-05 13:29:15 +00:00
|
|
|
|
2023-03-09 12:28:42 +00:00
|
|
|
.. image:: img/nuts3_shapes.png
|
2019-08-11 20:34:18 +00:00
|
|
|
:scale: 33 %
|
|
|
|
|
2019-08-11 09:40:47 +00:00
|
|
|
Description
|
|
|
|
-----------
|
2019-08-08 13:02:28 +00:00
|
|
|
"""
|
|
|
|
|
2019-11-28 07:22:52 +00:00
|
|
|
import logging
|
2021-05-25 09:29:47 +00:00
|
|
|
from functools import reduce
|
2018-08-03 09:49:23 +00:00
|
|
|
from itertools import takewhile
|
|
|
|
from operator import attrgetter
|
|
|
|
|
|
|
|
import geopandas as gpd
|
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
2018-12-10 17:34:22 +00:00
|
|
|
import pycountry as pyc
|
2022-01-24 18:48:26 +00:00
|
|
|
from _helpers import configure_logging
|
2018-08-03 09:49:23 +00:00
|
|
|
from shapely.geometry import MultiPolygon, Polygon
|
2018-12-10 17:34:22 +00:00
|
|
|
|
2019-12-09 20:29:15 +00:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2018-12-10 17:34:22 +00:00
|
|
|
def _get_country(target, **keys):
|
|
|
|
assert len(keys) == 1
|
|
|
|
try:
|
|
|
|
return getattr(pyc.countries.get(**keys), target)
|
2019-02-07 12:03:18 +00:00
|
|
|
except (KeyError, AttributeError):
|
2018-12-10 17:34:22 +00:00
|
|
|
return np.nan
|
2018-08-03 09:49:23 +00:00
|
|
|
|
2020-12-03 18:50:53 +00:00
|
|
|
|
2018-08-27 17:11:36 +00:00
|
|
|
def _simplify_polys(polys, minarea=0.1, tolerance=0.01, filterremote=True):
|
2018-08-03 09:49:23 +00:00
|
|
|
if isinstance(polys, MultiPolygon):
|
2022-01-14 07:43:21 +00:00
|
|
|
polys = sorted(polys.geoms, key=attrgetter("area"), reverse=True)
|
2018-08-03 09:49:23 +00:00
|
|
|
mainpoly = polys[0]
|
|
|
|
mainlength = np.sqrt(mainpoly.area / (2.0 * np.pi))
|
|
|
|
if mainpoly.area > minarea:
|
|
|
|
polys = MultiPolygon(
|
2022-09-16 13:04:04 +00:00
|
|
|
[
|
2018-08-03 09:49:23 +00:00
|
|
|
p
|
|
|
|
for p in takewhile(lambda p: p.area > minarea, polys)
|
2018-08-27 17:11:36 +00:00
|
|
|
if not filterremote or (mainpoly.distance(p) < mainlength)
|
2022-09-16 13:04:04 +00:00
|
|
|
]
|
|
|
|
)
|
2018-08-03 09:49:23 +00:00
|
|
|
else:
|
|
|
|
polys = mainpoly
|
|
|
|
return polys.simplify(tolerance=tolerance)
|
|
|
|
|
2020-12-03 18:50:53 +00:00
|
|
|
|
2022-01-11 09:23:22 +00:00
|
|
|
def countries(naturalearth, country_list):
|
|
|
|
if "RS" in country_list:
|
|
|
|
country_list.append("KV")
|
2018-08-08 14:28:32 +00:00
|
|
|
|
2021-09-14 14:34:02 +00:00
|
|
|
df = gpd.read_file(naturalearth)
|
2018-08-03 09:49:23 +00:00
|
|
|
|
|
|
|
# Names are a hassle in naturalearth, try several fields
|
|
|
|
fieldnames = (
|
|
|
|
df[x].where(lambda s: s != "-99") for x in ("ISO_A2", "WB_A2", "ADM0_A3")
|
2022-09-16 13:04:04 +00:00
|
|
|
)
|
2018-08-03 09:49:23 +00:00
|
|
|
df["name"] = reduce(lambda x, y: x.fillna(y), fieldnames, next(fieldnames)).str[0:2]
|
2022-09-16 13:04:04 +00:00
|
|
|
|
2022-01-11 09:23:22 +00:00
|
|
|
df = df.loc[
|
|
|
|
df.name.isin(country_list) & ((df["scalerank"] == 0) | (df["scalerank"] == 5))
|
|
|
|
]
|
2018-08-03 09:49:23 +00:00
|
|
|
s = df.set_index("name")["geometry"].map(_simplify_polys)
|
2022-01-11 09:23:22 +00:00
|
|
|
if "RS" in country_list:
|
|
|
|
s["RS"] = s["RS"].union(s.pop("KV"))
|
2022-12-28 08:35:11 +00:00
|
|
|
# cleanup shape union
|
|
|
|
s["RS"] = Polygon(s["RS"].exterior.coords)
|
2018-08-03 09:49:23 +00:00
|
|
|
|
|
|
|
return s
|
|
|
|
|
2020-12-03 18:50:53 +00:00
|
|
|
|
2022-01-11 09:24:45 +00:00
|
|
|
def eez(country_shapes, eez, country_list):
|
2021-09-14 14:34:02 +00:00
|
|
|
df = gpd.read_file(eez)
|
2022-01-11 09:24:45 +00:00
|
|
|
df = df.loc[
|
|
|
|
df["ISO_3digit"].isin(
|
|
|
|
[_get_country("alpha_3", alpha_2=c) for c in country_list]
|
2022-09-16 13:04:04 +00:00
|
|
|
)
|
|
|
|
]
|
2018-12-10 17:34:22 +00:00
|
|
|
df["name"] = df["ISO_3digit"].map(lambda c: _get_country("alpha_2", alpha_3=c))
|
2018-08-27 17:11:36 +00:00
|
|
|
s = df.set_index("name").geometry.map(
|
|
|
|
lambda s: _simplify_polys(s, filterremote=False)
|
2018-12-19 14:42:03 +00:00
|
|
|
)
|
|
|
|
s = gpd.GeoSeries(
|
2022-12-27 10:42:32 +00:00
|
|
|
{k: v for k, v in s.items() if v.distance(country_shapes[k]) < 1e-3}
|
2022-09-16 13:04:04 +00:00
|
|
|
)
|
2022-06-23 12:25:30 +00:00
|
|
|
s = s.to_frame("geometry")
|
2018-12-19 14:42:03 +00:00
|
|
|
s.index.name = "name"
|
|
|
|
return s
|
2018-08-03 09:49:23 +00:00
|
|
|
|
2020-12-03 18:50:53 +00:00
|
|
|
|
2018-08-03 09:49:23 +00:00
|
|
|
def country_cover(country_shapes, eez_shapes=None):
|
2022-06-23 12:25:30 +00:00
|
|
|
shapes = country_shapes
|
2018-08-03 09:49:23 +00:00
|
|
|
if eez_shapes is not None:
|
2022-06-23 12:25:30 +00:00
|
|
|
shapes = pd.concat([shapes, eez_shapes])
|
2023-02-07 13:07:44 +00:00
|
|
|
europe_shape = shapes.unary_union
|
2018-08-03 09:49:23 +00:00
|
|
|
if isinstance(europe_shape, MultiPolygon):
|
|
|
|
europe_shape = max(europe_shape, key=attrgetter("area"))
|
|
|
|
return Polygon(shell=europe_shape.exterior)
|
|
|
|
|
2020-12-03 18:50:53 +00:00
|
|
|
|
2021-09-14 14:34:02 +00:00
|
|
|
def nuts3(country_shapes, nuts3, nuts3pop, nuts3gdp, ch_cantons, ch_popgdp):
|
|
|
|
df = gpd.read_file(nuts3)
|
2018-08-03 09:49:23 +00:00
|
|
|
df = df.loc[df["STAT_LEVL_"] == 3]
|
|
|
|
df["geometry"] = df["geometry"].map(_simplify_polys)
|
|
|
|
df = df.rename(columns={"NUTS_ID": "id"})[["id", "geometry"]].set_index("id")
|
2022-09-16 13:04:04 +00:00
|
|
|
|
2021-09-14 14:34:02 +00:00
|
|
|
pop = pd.read_table(nuts3pop, na_values=[":"], delimiter=" ?\t", engine="python")
|
2018-08-03 09:49:23 +00:00
|
|
|
pop = (
|
|
|
|
pop.set_index(
|
|
|
|
pd.MultiIndex.from_tuples(pop.pop("unit,geo\\time").str.split(","))
|
2022-09-16 13:04:04 +00:00
|
|
|
)
|
2018-08-03 09:49:23 +00:00
|
|
|
.loc["THS"]
|
|
|
|
.applymap(lambda x: pd.to_numeric(x, errors="coerce"))
|
|
|
|
.fillna(method="bfill", axis=1)
|
|
|
|
)["2014"]
|
2022-09-16 13:04:04 +00:00
|
|
|
|
2021-09-14 14:34:02 +00:00
|
|
|
gdp = pd.read_table(nuts3gdp, na_values=[":"], delimiter=" ?\t", engine="python")
|
2018-08-03 09:49:23 +00:00
|
|
|
gdp = (
|
|
|
|
gdp.set_index(
|
|
|
|
pd.MultiIndex.from_tuples(gdp.pop("unit,geo\\time").str.split(","))
|
2022-09-16 13:04:04 +00:00
|
|
|
)
|
2018-08-03 09:49:23 +00:00
|
|
|
.loc["EUR_HAB"]
|
|
|
|
.applymap(lambda x: pd.to_numeric(x, errors="coerce"))
|
|
|
|
.fillna(method="bfill", axis=1)
|
|
|
|
)["2014"]
|
|
|
|
|
2021-09-14 14:34:02 +00:00
|
|
|
cantons = pd.read_csv(ch_cantons)
|
2018-08-03 09:49:23 +00:00
|
|
|
cantons = cantons.set_index(cantons["HASC"].str[3:])["NUTS"]
|
|
|
|
cantons = cantons.str.pad(5, side="right", fillchar="0")
|
|
|
|
|
2021-09-14 14:34:02 +00:00
|
|
|
swiss = pd.read_excel(ch_popgdp, skiprows=3, index_col=0)
|
2018-08-03 09:49:23 +00:00
|
|
|
swiss.columns = swiss.columns.to_series().map(cantons)
|
|
|
|
|
2022-01-29 15:17:46 +00:00
|
|
|
swiss_pop = pd.to_numeric(swiss.loc["Residents in 1000", "CH040":])
|
|
|
|
pop = pd.concat([pop, swiss_pop])
|
|
|
|
swiss_gdp = pd.to_numeric(
|
|
|
|
swiss.loc["Gross domestic product per capita in Swiss francs", "CH040":]
|
|
|
|
)
|
|
|
|
gdp = pd.concat([gdp, swiss_gdp])
|
2018-08-03 09:49:23 +00:00
|
|
|
|
|
|
|
df = df.join(pd.DataFrame(dict(pop=pop, gdp=gdp)))
|
|
|
|
|
|
|
|
df["country"] = df.index.to_series().str[:2].replace(dict(UK="GB", EL="GR"))
|
2022-09-16 13:04:04 +00:00
|
|
|
|
2018-08-03 09:49:23 +00:00
|
|
|
excludenuts = pd.Index(
|
2022-09-16 13:04:04 +00:00
|
|
|
(
|
2018-08-03 09:49:23 +00:00
|
|
|
"FRA10",
|
|
|
|
"FRA20",
|
|
|
|
"FRA30",
|
|
|
|
"FRA40",
|
|
|
|
"FRA50",
|
|
|
|
"PT200",
|
|
|
|
"PT300",
|
|
|
|
"ES707",
|
|
|
|
"ES703",
|
|
|
|
"ES704",
|
|
|
|
"ES705",
|
|
|
|
"ES706",
|
|
|
|
"ES708",
|
|
|
|
"ES709",
|
|
|
|
"FI2",
|
|
|
|
"FR9",
|
|
|
|
)
|
2022-09-16 13:04:04 +00:00
|
|
|
)
|
2018-08-03 09:49:23 +00:00
|
|
|
excludecountry = pd.Index(("MT", "TR", "LI", "IS", "CY", "KV"))
|
|
|
|
|
|
|
|
df = df.loc[df.index.difference(excludenuts)]
|
|
|
|
df = df.loc[~df.country.isin(excludecountry)]
|
|
|
|
|
|
|
|
manual = gpd.GeoDataFrame(
|
|
|
|
[["BA1", "BA", 3871.0], ["RS1", "RS", 7210.0], ["AL1", "AL", 2893.0]],
|
|
|
|
columns=["NUTS_ID", "country", "pop"],
|
2023-05-16 11:34:58 +00:00
|
|
|
geometry=gpd.GeoSeries(),
|
2023-02-16 13:04:13 +00:00
|
|
|
)
|
2018-08-03 09:49:23 +00:00
|
|
|
manual["geometry"] = manual["country"].map(country_shapes)
|
2018-08-08 14:28:32 +00:00
|
|
|
manual = manual.dropna()
|
2023-02-16 13:04:13 +00:00
|
|
|
manual = manual.set_index("NUTS_ID")
|
|
|
|
manual = manual.set_crs("ETRS89")
|
2018-08-03 09:49:23 +00:00
|
|
|
|
2022-01-29 15:17:46 +00:00
|
|
|
df = pd.concat([df, manual], sort=False)
|
2018-08-03 09:49:23 +00:00
|
|
|
|
|
|
|
df.loc["ME000", "pop"] = 650.0
|
|
|
|
|
|
|
|
return df
|
|
|
|
|
2020-12-03 18:50:53 +00:00
|
|
|
|
2018-08-03 09:49:23 +00:00
|
|
|
if __name__ == "__main__":
|
|
|
|
if "snakemake" not in globals():
|
2019-12-09 20:29:15 +00:00
|
|
|
from _helpers import mock_snakemake
|
2022-09-16 13:04:04 +00:00
|
|
|
|
2019-12-09 20:29:15 +00:00
|
|
|
snakemake = mock_snakemake("build_shapes")
|
2019-11-28 07:22:52 +00:00
|
|
|
configure_logging(snakemake)
|
|
|
|
|
2022-01-24 18:48:26 +00:00
|
|
|
country_shapes = countries(
|
2023-05-15 08:33:17 +00:00
|
|
|
snakemake.input.naturalearth, snakemake.params["countries"]
|
2022-01-24 18:48:26 +00:00
|
|
|
)
|
2022-06-23 12:25:30 +00:00
|
|
|
country_shapes.reset_index().to_file(snakemake.output.country_shapes)
|
2020-12-03 18:50:53 +00:00
|
|
|
|
2022-01-24 18:48:26 +00:00
|
|
|
offshore_shapes = eez(
|
2023-05-15 08:33:17 +00:00
|
|
|
country_shapes, snakemake.input.eez, snakemake.params["countries"]
|
2022-01-24 18:48:26 +00:00
|
|
|
)
|
2022-06-23 12:25:30 +00:00
|
|
|
offshore_shapes.reset_index().to_file(snakemake.output.offshore_shapes)
|
2018-08-03 09:49:23 +00:00
|
|
|
|
2022-06-23 12:25:30 +00:00
|
|
|
europe_shape = gpd.GeoDataFrame(
|
|
|
|
geometry=[country_cover(country_shapes, offshore_shapes.geometry)]
|
|
|
|
)
|
|
|
|
europe_shape.reset_index().to_file(snakemake.output.europe_shape)
|
2018-08-03 09:49:23 +00:00
|
|
|
|
2022-01-24 18:48:26 +00:00
|
|
|
nuts3_shapes = nuts3(
|
|
|
|
country_shapes,
|
|
|
|
snakemake.input.nuts3,
|
|
|
|
snakemake.input.nuts3pop,
|
|
|
|
snakemake.input.nuts3gdp,
|
|
|
|
snakemake.input.ch_cantons,
|
|
|
|
snakemake.input.ch_popgdp,
|
|
|
|
)
|
2022-06-23 12:25:30 +00:00
|
|
|
nuts3_shapes.reset_index().to_file(snakemake.output.nuts3_shapes)
|