2019-08-08 13:02:28 +00:00
|
|
|
"""
|
2019-08-11 20:34:18 +00:00
|
|
|
Creates GIS shape files of the countries, exclusive economic zones and `NUTS3 <https://en.wikipedia.org/wiki/Nomenclature_of_Territorial_Units_for_Statistics>`_ areas.
|
2019-08-11 09:40:47 +00:00
|
|
|
|
|
|
|
Relevant Settings
|
|
|
|
-----------------
|
|
|
|
|
2019-08-11 11:17:36 +00:00
|
|
|
.. code:: yaml
|
|
|
|
|
|
|
|
countries:
|
|
|
|
|
2019-08-13 08:03:46 +00:00
|
|
|
.. seealso::
|
|
|
|
Documentation of the configuration file ``config.yaml`` at
|
|
|
|
:ref:`toplevel_cf`
|
|
|
|
|
2019-08-11 09:40:47 +00:00
|
|
|
Inputs
|
|
|
|
------
|
|
|
|
|
2019-08-11 20:34:18 +00:00
|
|
|
- ``data/bundle/naturalearth/ne_10m_admin_0_countries.shp``: World country shapes
|
|
|
|
|
2019-08-14 13:36:46 +00:00
|
|
|
.. image:: ../img/countries.png
|
2019-08-11 20:34:18 +00:00
|
|
|
:scale: 33 %
|
|
|
|
|
|
|
|
- ``data/bundle/eez/World_EEZ_v8_2014.shp``: World `exclusive economic zones <https://en.wikipedia.org/wiki/Exclusive_economic_zone>`_ (EEZ)
|
|
|
|
|
2019-08-14 13:36:46 +00:00
|
|
|
.. image:: ../img/eez.png
|
2019-08-11 20:34:18 +00:00
|
|
|
:scale: 33 %
|
|
|
|
|
|
|
|
- ``data/bundle/NUTS_2013_60M_SH/data/NUTS_RG_60M_2013.shp``: Europe NUTS3 regions
|
|
|
|
|
2019-08-14 13:36:46 +00:00
|
|
|
.. image:: ../img/nuts3.png
|
2019-08-11 20:34:18 +00:00
|
|
|
:scale: 33 %
|
|
|
|
|
|
|
|
- ``data/bundle/nama_10r_3popgdp.tsv.gz``: Average annual population by NUTS3 region (`eurostat <http://appsso.eurostat.ec.europa.eu/nui/show.do?dataset=nama_10r_3popgdp&lang=en>`_)
|
|
|
|
- ``data/bundle/nama_10r_3gdp.tsv.gz``: Gross domestic product (GDP) by NUTS 3 regions (`eurostat <http://appsso.eurostat.ec.europa.eu/nui/show.do?dataset=nama_10r_3gdp&lang=en>`_)
|
|
|
|
- ``data/bundle/ch_cantons.csv``: Mapping between Swiss Cantons and NUTS3 regions
|
|
|
|
- ``data/bundle/je-e-21.03.02.xls``: Population and GDP data per Canton (`BFS - Swiss Federal Statistical Office <https://www.bfs.admin.ch/bfs/en/home/news/whats-new.assetdetail.7786557.html>`_ )
|
|
|
|
|
2019-08-11 09:40:47 +00:00
|
|
|
Outputs
|
|
|
|
-------
|
|
|
|
|
2019-08-11 20:34:18 +00:00
|
|
|
- ``resources/country_shapes.geojson``: country shapes out of country selection
|
|
|
|
|
2019-08-14 13:36:46 +00:00
|
|
|
.. image:: ../img/country_shapes.png
|
2019-08-11 20:34:18 +00:00
|
|
|
:scale: 33 %
|
|
|
|
|
|
|
|
- ``resources/offshore_shapes.geojson``: EEZ shapes out of country selection
|
|
|
|
|
2019-08-14 13:36:46 +00:00
|
|
|
.. image:: ../img/offshore_shapes.png
|
2019-08-11 20:34:18 +00:00
|
|
|
:scale: 33 %
|
|
|
|
|
|
|
|
- ``resources/europe_shape.geojson``: Shape of Europe including countries and EEZ
|
|
|
|
|
2019-08-14 13:36:46 +00:00
|
|
|
.. image:: ../img/europe_shape.png
|
2019-08-11 20:34:18 +00:00
|
|
|
:scale: 33 %
|
|
|
|
|
2019-08-13 13:48:21 +00:00
|
|
|
- ``resources/nuts3_shapes.geojson``: NUTS3 shapes out of country selection including population and GDP data.
|
2019-08-11 20:34:18 +00:00
|
|
|
|
2019-08-14 13:36:46 +00:00
|
|
|
.. image:: ../img/nuts3_shapes.png
|
2019-08-11 20:34:18 +00:00
|
|
|
:scale: 33 %
|
|
|
|
|
2019-08-11 09:40:47 +00:00
|
|
|
Description
|
|
|
|
-----------
|
|
|
|
|
2019-08-08 13:02:28 +00:00
|
|
|
"""
|
|
|
|
|
2018-08-03 09:49:23 +00:00
|
|
|
import os
|
|
|
|
import numpy as np
|
|
|
|
from operator import attrgetter
|
|
|
|
from six.moves import reduce
|
|
|
|
from itertools import takewhile
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
import geopandas as gpd
|
|
|
|
from shapely.geometry import MultiPolygon, Polygon
|
|
|
|
from shapely.ops import cascaded_union
|
|
|
|
|
2018-12-10 17:34:22 +00:00
|
|
|
import pycountry as pyc
|
|
|
|
|
|
|
|
def _get_country(target, **keys):
|
|
|
|
assert len(keys) == 1
|
|
|
|
try:
|
|
|
|
return getattr(pyc.countries.get(**keys), target)
|
2019-02-07 12:03:18 +00:00
|
|
|
except (KeyError, AttributeError):
|
2018-12-10 17:34:22 +00:00
|
|
|
return np.nan
|
2018-08-03 09:49:23 +00:00
|
|
|
|
2018-08-27 17:11:36 +00:00
|
|
|
def _simplify_polys(polys, minarea=0.1, tolerance=0.01, filterremote=True):
|
2018-08-03 09:49:23 +00:00
|
|
|
if isinstance(polys, MultiPolygon):
|
|
|
|
polys = sorted(polys, key=attrgetter('area'), reverse=True)
|
|
|
|
mainpoly = polys[0]
|
|
|
|
mainlength = np.sqrt(mainpoly.area/(2.*np.pi))
|
|
|
|
if mainpoly.area > minarea:
|
|
|
|
polys = MultiPolygon([p
|
|
|
|
for p in takewhile(lambda p: p.area > minarea, polys)
|
2018-08-27 17:11:36 +00:00
|
|
|
if not filterremote or (mainpoly.distance(p) < mainlength)])
|
2018-08-03 09:49:23 +00:00
|
|
|
else:
|
|
|
|
polys = mainpoly
|
|
|
|
return polys.simplify(tolerance=tolerance)
|
|
|
|
|
|
|
|
def countries():
|
2018-08-08 14:28:32 +00:00
|
|
|
cntries = snakemake.config['countries']
|
|
|
|
if 'RS' in cntries: cntries.append('KV')
|
|
|
|
|
2018-08-03 09:49:23 +00:00
|
|
|
df = gpd.read_file(snakemake.input.naturalearth)
|
|
|
|
|
|
|
|
# Names are a hassle in naturalearth, try several fields
|
|
|
|
fieldnames = (df[x].where(lambda s: s!='-99') for x in ('ISO_A2', 'WB_A2', 'ADM0_A3'))
|
|
|
|
df['name'] = reduce(lambda x,y: x.fillna(y), fieldnames, next(fieldnames)).str[0:2]
|
|
|
|
|
|
|
|
df = df.loc[df.name.isin(cntries) & (df['scalerank'] == 0)]
|
|
|
|
s = df.set_index('name')['geometry'].map(_simplify_polys)
|
2018-08-08 14:28:32 +00:00
|
|
|
if 'RS' in cntries: s['RS'] = s['RS'].union(s.pop('KV'))
|
2018-08-03 09:49:23 +00:00
|
|
|
|
|
|
|
return s
|
|
|
|
|
|
|
|
def eez(country_shapes):
|
|
|
|
df = gpd.read_file(snakemake.input.eez)
|
2018-12-10 17:34:22 +00:00
|
|
|
df = df.loc[df['ISO_3digit'].isin([_get_country('alpha_3', alpha_2=c) for c in snakemake.config['countries']])]
|
|
|
|
df['name'] = df['ISO_3digit'].map(lambda c: _get_country('alpha_2', alpha_3=c))
|
2018-08-27 17:11:36 +00:00
|
|
|
s = df.set_index('name').geometry.map(lambda s: _simplify_polys(s, filterremote=False))
|
2018-12-19 14:42:03 +00:00
|
|
|
s = gpd.GeoSeries({k:v for k,v in s.iteritems() if v.distance(country_shapes[k]) < 1e-3})
|
|
|
|
s.index.name = "name"
|
|
|
|
return s
|
2018-08-03 09:49:23 +00:00
|
|
|
|
|
|
|
def country_cover(country_shapes, eez_shapes=None):
|
|
|
|
shapes = list(country_shapes)
|
|
|
|
if eez_shapes is not None:
|
|
|
|
shapes += list(eez_shapes)
|
|
|
|
|
|
|
|
europe_shape = cascaded_union(shapes)
|
|
|
|
if isinstance(europe_shape, MultiPolygon):
|
|
|
|
europe_shape = max(europe_shape, key=attrgetter('area'))
|
|
|
|
return Polygon(shell=europe_shape.exterior)
|
|
|
|
|
|
|
|
def nuts3(country_shapes):
|
|
|
|
df = gpd.read_file(snakemake.input.nuts3)
|
|
|
|
df = df.loc[df['STAT_LEVL_'] == 3]
|
|
|
|
df['geometry'] = df['geometry'].map(_simplify_polys)
|
|
|
|
df = df.rename(columns={'NUTS_ID': 'id'})[['id', 'geometry']].set_index('id')
|
|
|
|
|
|
|
|
pop = pd.read_table(snakemake.input.nuts3pop, na_values=[':'], delimiter=' ?\t', engine='python')
|
|
|
|
pop = (pop
|
|
|
|
.set_index(pd.MultiIndex.from_tuples(pop.pop('unit,geo\\time').str.split(','))).loc['THS']
|
|
|
|
.applymap(lambda x: pd.to_numeric(x, errors='coerce'))
|
|
|
|
.fillna(method='bfill', axis=1))['2014']
|
|
|
|
|
|
|
|
gdp = pd.read_table(snakemake.input.nuts3gdp, na_values=[':'], delimiter=' ?\t', engine='python')
|
|
|
|
gdp = (gdp
|
|
|
|
.set_index(pd.MultiIndex.from_tuples(gdp.pop('unit,geo\\time').str.split(','))).loc['EUR_HAB']
|
|
|
|
.applymap(lambda x: pd.to_numeric(x, errors='coerce'))
|
|
|
|
.fillna(method='bfill', axis=1))['2014']
|
|
|
|
|
|
|
|
# Swiss data
|
|
|
|
cantons = pd.read_csv(snakemake.input.ch_cantons)
|
|
|
|
cantons = cantons.set_index(cantons['HASC'].str[3:])['NUTS']
|
|
|
|
cantons = cantons.str.pad(5, side='right', fillchar='0')
|
|
|
|
|
|
|
|
swiss = pd.read_excel(snakemake.input.ch_popgdp, skiprows=3, index_col=0)
|
|
|
|
swiss.columns = swiss.columns.to_series().map(cantons)
|
|
|
|
|
|
|
|
pop = pop.append(pd.to_numeric(swiss.loc['Residents in 1000', 'CH040':]))
|
|
|
|
gdp = gdp.append(pd.to_numeric(swiss.loc['Gross domestic product per capita in Swiss francs', 'CH040':]))
|
|
|
|
|
|
|
|
df = df.join(pd.DataFrame(dict(pop=pop, gdp=gdp)))
|
|
|
|
|
|
|
|
df['country'] = df.index.to_series().str[:2].replace(dict(UK='GB', EL='GR'))
|
|
|
|
|
|
|
|
excludenuts = pd.Index(('FRA10', 'FRA20', 'FRA30', 'FRA40', 'FRA50',
|
|
|
|
'PT200', 'PT300',
|
|
|
|
'ES707', 'ES703', 'ES704','ES705', 'ES706', 'ES708', 'ES709',
|
|
|
|
'FI2', 'FR9'))
|
|
|
|
excludecountry = pd.Index(('MT', 'TR', 'LI', 'IS', 'CY', 'KV'))
|
|
|
|
|
|
|
|
df = df.loc[df.index.difference(excludenuts)]
|
|
|
|
df = df.loc[~df.country.isin(excludecountry)]
|
|
|
|
|
|
|
|
manual = gpd.GeoDataFrame(
|
|
|
|
[['BA1', 'BA', 3871.],
|
|
|
|
['RS1', 'RS', 7210.],
|
|
|
|
['AL1', 'AL', 2893.]],
|
|
|
|
columns=['NUTS_ID', 'country', 'pop']
|
|
|
|
).set_index('NUTS_ID')
|
|
|
|
manual['geometry'] = manual['country'].map(country_shapes)
|
2018-08-08 14:28:32 +00:00
|
|
|
manual = manual.dropna()
|
2018-08-03 09:49:23 +00:00
|
|
|
|
|
|
|
df = df.append(manual)
|
|
|
|
|
|
|
|
df.loc['ME000', 'pop'] = 650.
|
|
|
|
|
|
|
|
return df
|
|
|
|
|
2018-12-19 14:14:21 +00:00
|
|
|
def save_to_geojson(df, fn):
|
2018-08-03 09:49:23 +00:00
|
|
|
if os.path.exists(fn):
|
|
|
|
os.unlink(fn)
|
2018-12-19 14:14:21 +00:00
|
|
|
if not isinstance(df, gpd.GeoDataFrame):
|
|
|
|
df = gpd.GeoDataFrame(dict(geometry=df))
|
|
|
|
df = df.reset_index()
|
|
|
|
schema = {**gpd.io.file.infer_schema(df), 'geometry': 'Unknown'}
|
|
|
|
df.to_file(fn, driver='GeoJSON', schema=schema)
|
2018-08-03 09:49:23 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
# Detect running outside of snakemake and mock snakemake for testing
|
|
|
|
if 'snakemake' not in globals():
|
|
|
|
from vresutils.snakemake import MockSnakemake, Dict
|
|
|
|
snakemake = MockSnakemake(
|
|
|
|
path='..',
|
|
|
|
wildcards={},
|
|
|
|
input=Dict(
|
2018-10-22 21:20:18 +00:00
|
|
|
naturalearth='data/bundle/naturalearth/ne_10m_admin_0_countries.shp',
|
|
|
|
eez='data/bundle/eez/World_EEZ_v8_2014.shp',
|
|
|
|
nuts3='data/bundle/NUTS_2013_60M_SH/data/NUTS_RG_60M_2013.shp',
|
|
|
|
nuts3pop='data/bundle/nama_10r_3popgdp.tsv.gz',
|
|
|
|
nuts3gdp='data/bundle/nama_10r_3gdp.tsv.gz',
|
|
|
|
ch_cantons='data/bundle/ch_cantons.csv',
|
|
|
|
ch_popgdp='data/bundle/je-e-21.03.02.xls'
|
2018-08-03 09:49:23 +00:00
|
|
|
),
|
|
|
|
output=Dict(
|
|
|
|
country_shapes='resources/country_shapes.geojson',
|
|
|
|
offshore_shapes='resource/offshore_shapes.geojson',
|
|
|
|
europe_shape='resources/europe_shape.geojson',
|
|
|
|
nuts3_shapes='resources/nuts3_shapes.geojson'
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
country_shapes = countries()
|
|
|
|
save_to_geojson(country_shapes, snakemake.output.country_shapes)
|
|
|
|
|
|
|
|
offshore_shapes = eez(country_shapes)
|
|
|
|
save_to_geojson(offshore_shapes, snakemake.output.offshore_shapes)
|
|
|
|
|
|
|
|
europe_shape = country_cover(country_shapes, offshore_shapes)
|
|
|
|
save_to_geojson(gpd.GeoSeries(europe_shape), snakemake.output.europe_shape)
|
|
|
|
|
|
|
|
nuts3_shapes = nuts3(country_shapes)
|
|
|
|
save_to_geojson(nuts3_shapes, snakemake.output.nuts3_shapes)
|