build_shapes: Build shapes in separate rule from bundled data

- country_shapes - offshore_shapes - nuts3_shapes (nuts3 shapes already include population and gdp data)
2018-08-03 11:49:23 +02:00 · 2018-08-03 11:49:23 +02:00 · 8e4abc3fce
commit 8e4abc3fce
parent 9986c381be
5 changed files with 198 additions and 22 deletions
--- a/25
+++ b/25
@ -31,13 +31,34 @@ rule base_network:
        eg_converters='data/entsoegridkit/converters.csv',
        eg_transformers='data/entsoegridkit/transformers.csv',
        parameter_corrections='data/parameter_corrections.yaml',
-        links_p_nom='data/links_p_nom.csv'
+        links_p_nom='data/links_p_nom.csv',
        country_shapes='resources/country_shapes.geojson',
        offshore_shapes='resources/offshore_shapes.geojson',
        europe_shape='resources/europe_shape.geojson'
    output: "networks/base.nc"
    benchmark: "benchmarks/base_network"
    threads: 1
    resources: mem_mb=500
    script: "scripts/base_network.py"
 rule build_shapes:
    input:
        naturalearth='data/bundle/naturalearth/ne_10m_admin_0_countries.shp',
        eez='data/bundle/eez/World_EEZ_v8_2014.shp',
        nuts3='data/bundle/NUTS_2013_60M_SH/data/NUTS_RG_60M_2013.shp',
        nuts3pop='data/bundle/nama_10r_3popgdp.tsv.gz',
        nuts3gdp='data/bundle/nama_10r_3gdp.tsv.gz',
        ch_cantons='data/bundle/ch_cantons.csv',
        ch_popgdp='data/bundle/je-e-21.03.02.xls'
    output:
        country_shapes='resources/country_shapes.geojson',
        offshore_shapes='resources/offshore_shapes.geojson',
        europe_shape='resources/europe_shape.geojson',
        nuts3_shapes='resources/nuts3_shapes.geojson'
    threads: 1
    resources: mem_mb=500
    script: "scripts/build_shapes.py"
 rule build_powerplants:
    input: base_network="networks/base.nc"
    output: "resources/powerplants.csv"
@ -47,6 +68,8 @@ rule build_powerplants:
 rule build_bus_regions:
    input:
        country_shapes='resources/country_shapes.geojson',
        offshore_shapes='resources/offshore_shapes.geojson',
        base_network="networks/base.nc"
    output:
        regions_onshore="resources/regions_onshore.geojson",
--- a/scripts/base_network.py
+++ b/scripts/base_network.py
@ -2,18 +2,16 @@
 import yaml
 import pandas as pd
 import geopandas as gpd
 import numpy as np
 import scipy as sp, scipy.spatial
 from scipy.sparse import csgraph
 from operator import attrgetter
 from six import iteritems
 from six.moves import filter
 from itertools import count, chain
 import shapely, shapely.prepared, shapely.wkt
 from shapely.geometry import Point
 import shapely, shapely.prepared, shapely.wkt
 from vresutils import shapes as vshapes
 from vresutils.graph import BreadthFirstLevels
 import logging
@ -33,10 +31,9 @@ def _load_buses_from_eg():
    buses['under_construction'] = buses['under_construction'].fillna(False).astype(bool)
    # remove all buses outside of all countries including exclusive economic zones (offshore)
-    europe_shape = vshapes.country_cover(snakemake.config['countries'])
+    europe_shape = gpd.read_file(snakemake.input.europe_shape).loc[0, 'geometry']
-    europe_shape_exterior = shapely.geometry.Polygon(shell=europe_shape.exterior) # no holes
+    europe_shape_prepped = shapely.prepared.prep(europe_shape)
-    europe_shape_exterior_prepped = shapely.prepared.prep(europe_shape_exterior)
+    buses_in_europe_b = buses[['x', 'y']].apply(lambda p: europe_shape_prepped.contains(Point(p)), axis=1)
    buses_in_europe_b = buses[['x', 'y']].apply(lambda p: europe_shape_exterior_prepped.contains(Point(p)), axis=1)
    buses_with_v_nom_to_keep_b = buses.v_nom.isin(snakemake.config['electricity']['voltages']) | buses.v_nom.isnull()
    logger.info("Removing buses with voltages {}".format(pd.Index(buses.v_nom.unique()).dropna().difference(snakemake.config['electricity']['voltages'])))
@ -212,10 +209,8 @@ def _set_countries_and_substations(n):
        )
    countries = snakemake.config['countries']
-    country_shapes = vshapes.countries(subset=countries, add_KV_to_RS=True,
+    country_shapes = gpd.read_file(snakemake.input.country_shapes).set_index('id')['geometry']
-                                       tolerance=0.01, minarea=0.1)
+    offshore_shapes = gpd.read_file(snakemake.input.offshore_shapes).set_index('id')['geometry']
    offshore_shapes = vshapes.eez(subset=countries, tolerance=0.01)
    substation_b = buses['symbol'].str.contains('substation', case=False)
    def prefer_voltage(x, which):
--- a/scripts/build_bus_regions.py
+++ b/scripts/build_bus_regions.py
@ -4,7 +4,6 @@ from operator import attrgetter
 import pandas as pd
 import geopandas as gpd
 from vresutils import shapes as vshapes
 from vresutils.graph import voronoi_partition_pts
 import pypsa
@ -13,9 +12,8 @@ countries = snakemake.config['countries']
 n = pypsa.Network(snakemake.input.base_network)
-country_shapes = vshapes.countries(subset=countries, add_KV_to_RS=True,
+country_shapes = gpd.read_file(snakemake.input.country_shapes).set_index('id')['geometry']
-                                   tolerance=0.01, minarea=0.1)
+offshore_shapes = gpd.read_file(snakemake.input.offshore_shapes).set_index('id')['geometry']
 offshore_shapes = vshapes.eez(subset=countries, tolerance=0.01)
 onshore_regions = []
 offshore_regions = []
@ -30,7 +28,7 @@ for country in countries:
            'country': country
        }, index=onshore_locs.index))
-    if country not in offshore_shapes: continue
+    if country not in offshore_shapes.index: continue
    offshore_shape = offshore_shapes[country]
    offshore_locs = n.buses.loc[c_b & n.buses.substation_off, ["x", "y"]]
    offshore_regions_c = gpd.GeoDataFrame({
--- a/scripts/build_shapes.py
+++ b/scripts/build_shapes.py
@ -0,0 +1,160 @@
 import os
 import numpy as np
 from operator import attrgetter
 from six.moves import reduce
 from itertools import takewhile
 import pandas as pd
 import geopandas as gpd
 from shapely.geometry import MultiPolygon, Polygon
 from shapely.ops import cascaded_union
 try:
    from countrycode.countrycode import countrycode
 except ImportError:
    from countrycode import countrycode
 def _simplify_polys(polys, minarea=0.1, tolerance=0.01):
    if isinstance(polys, MultiPolygon):
        polys = sorted(polys, key=attrgetter('area'), reverse=True)
        mainpoly = polys[0]
        mainlength = np.sqrt(mainpoly.area/(2.*np.pi))
        if mainpoly.area > minarea:
            polys = MultiPolygon([p
                                  for p in takewhile(lambda p: p.area > minarea, polys)
                                  if mainpoly.distance(p) < mainlength])
        else:
            polys = mainpoly
    return polys.simplify(tolerance=tolerance)
 def countries():
    cntries = snakemake.config['countries'] + ['KV']
    df = gpd.read_file(snakemake.input.naturalearth)
    # Names are a hassle in naturalearth, try several fields
    fieldnames = (df[x].where(lambda s: s!='-99') for x in ('ISO_A2', 'WB_A2', 'ADM0_A3'))
    df['name'] = reduce(lambda x,y: x.fillna(y), fieldnames, next(fieldnames)).str[0:2]
    df = df.loc[df.name.isin(cntries) & (df['scalerank'] == 0)]
    s = df.set_index('name')['geometry'].map(_simplify_polys)
    s['RS'] = s['RS'].union(s.pop('KV'))
    return s
 def eez(country_shapes):
    cntries = snakemake.config['countries']
    cntries3 = frozenset(countrycode(cntries, origin='iso2c', target='iso3c'))
    df = gpd.read_file(snakemake.input.eez)
    df = df.loc[df['ISO_3digit'].isin(cntries3)]
    df['name'] = countrycode(df['ISO_3digit'], origin='iso3c', target='iso2c')
    s = df.set_index('name').geometry.map(_simplify_polys)
    return gpd.GeoSeries({k:v for k,v in s.iteritems() if v.distance(country_shapes[k]) < 1e-3})
 def country_cover(country_shapes, eez_shapes=None):
    shapes = list(country_shapes)
    if eez_shapes is not None:
        shapes += list(eez_shapes)
    europe_shape = cascaded_union(shapes)
    if isinstance(europe_shape, MultiPolygon):
        europe_shape = max(europe_shape, key=attrgetter('area'))
    return Polygon(shell=europe_shape.exterior)
 def nuts3(country_shapes):
    df = gpd.read_file(snakemake.input.nuts3)
    df = df.loc[df['STAT_LEVL_'] == 3]
    df['geometry'] = df['geometry'].map(_simplify_polys)
    df = df.rename(columns={'NUTS_ID': 'id'})[['id', 'geometry']].set_index('id')
    pop = pd.read_table(snakemake.input.nuts3pop, na_values=[':'], delimiter=' ?\t', engine='python')
    pop = (pop
           .set_index(pd.MultiIndex.from_tuples(pop.pop('unit,geo\\time').str.split(','))).loc['THS']
           .applymap(lambda x: pd.to_numeric(x, errors='coerce'))
           .fillna(method='bfill', axis=1))['2014']
    gdp = pd.read_table(snakemake.input.nuts3gdp, na_values=[':'], delimiter=' ?\t', engine='python')
    gdp = (gdp
           .set_index(pd.MultiIndex.from_tuples(gdp.pop('unit,geo\\time').str.split(','))).loc['EUR_HAB']
           .applymap(lambda x: pd.to_numeric(x, errors='coerce'))
           .fillna(method='bfill', axis=1))['2014']
    # Swiss data
    cantons = pd.read_csv(snakemake.input.ch_cantons)
    cantons = cantons.set_index(cantons['HASC'].str[3:])['NUTS']
    cantons = cantons.str.pad(5, side='right', fillchar='0')
    swiss = pd.read_excel(snakemake.input.ch_popgdp, skiprows=3, index_col=0)
    swiss.columns = swiss.columns.to_series().map(cantons)
    pop = pop.append(pd.to_numeric(swiss.loc['Residents in 1000', 'CH040':]))
    gdp = gdp.append(pd.to_numeric(swiss.loc['Gross domestic product per capita in Swiss francs', 'CH040':]))
    df = df.join(pd.DataFrame(dict(pop=pop, gdp=gdp)))
    df['country'] = df.index.to_series().str[:2].replace(dict(UK='GB', EL='GR'))
    excludenuts = pd.Index(('FRA10', 'FRA20', 'FRA30', 'FRA40', 'FRA50',
                            'PT200', 'PT300',
                            'ES707', 'ES703', 'ES704','ES705', 'ES706', 'ES708', 'ES709',
                            'FI2', 'FR9'))
    excludecountry = pd.Index(('MT', 'TR', 'LI', 'IS', 'CY', 'KV'))
    df = df.loc[df.index.difference(excludenuts)]
    df = df.loc[~df.country.isin(excludecountry)]
    manual = gpd.GeoDataFrame(
        [['BA1', 'BA', 3871.],
         ['RS1', 'RS', 7210.],
         ['AL1', 'AL', 2893.]],
        columns=['NUTS_ID', 'country', 'pop']
    ).set_index('NUTS_ID')
    manual['geometry'] = manual['country'].map(country_shapes)
    df = df.append(manual)
    df.loc['ME000', 'pop'] = 650.
    return df
 def save_to_geojson(s, fn):
    if os.path.exists(fn):
        os.unlink(fn)
    if isinstance(s, gpd.GeoDataFrame):
        s = s.reset_index()
    s.to_file(fn, driver='GeoJSON')
 if __name__ == "__main__":
    # Detect running outside of snakemake and mock snakemake for testing
    if 'snakemake' not in globals():
        from vresutils.snakemake import MockSnakemake, Dict
        snakemake = MockSnakemake(
            path='..',
            wildcards={},
            input=Dict(
                naturalearth='data/naturalearth/ne_10m_admin_0_countries.shp',
                eez='data/eez/World_EEZ_v8_2014.shp',
                nuts3='data/NUTS_2013_60M_SH/data/NUTS_RG_60M_2013.shp',
                nuts3pop='data/nama_10r_3popgdp.tsv.gz',
                nuts3gdp='data/nama_10r_3gdp.tsv.gz',
                ch_cantons='data/ch_cantons.csv',
                ch_popgdp='data/je-e-21.03.02.xls'
            ),
            output=Dict(
                country_shapes='resources/country_shapes.geojson',
                offshore_shapes='resource/offshore_shapes.geojson',
                europe_shape='resources/europe_shape.geojson',
                nuts3_shapes='resources/nuts3_shapes.geojson'
            )
        )
    country_shapes = countries()
    save_to_geojson(country_shapes, snakemake.output.country_shapes)
    offshore_shapes = eez(country_shapes)
    save_to_geojson(offshore_shapes, snakemake.output.offshore_shapes)
    europe_shape = country_cover(country_shapes, offshore_shapes)
    save_to_geojson(gpd.GeoSeries(europe_shape), snakemake.output.europe_shape)
    nuts3_shapes = nuts3(country_shapes)
    save_to_geojson(nuts3_shapes, snakemake.output.nuts3_shapes)
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@ -51,11 +51,11 @@ def weighting_for_country(n, x):
 ## Plot weighting for Germany
-def plot_weighting(n, country):
+def plot_weighting(n, country, country_shape=None):
    n.plot(bus_sizes=(2*weighting_for_country(n.buses.loc[n.buses.country == country])).reindex(n.buses.index, fill_value=1))
-    p = vshapes.countries()['DE']
+    if country_shape is not None:
-    plt.xlim(p.bounds[0], p.bounds[2])
+        plt.xlim(country_shape.bounds[0], country_shape.bounds[2])
-    plt.ylim(p.bounds[1], p.bounds[3])
+        plt.ylim(country_shape.bounds[1], country_shape.bounds[3])
 # # Determining the number of clusters per country