2021-07-01 18:09:04 +00:00
|
|
|
"""Build mapping between grid cells and population (total, urban, rural)"""
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
import multiprocessing as mp
|
2019-04-16 14:03:51 +00:00
|
|
|
import atlite
|
2021-07-01 18:09:04 +00:00
|
|
|
import numpy as np
|
2019-04-16 14:03:51 +00:00
|
|
|
import pandas as pd
|
|
|
|
import xarray as xr
|
|
|
|
import geopandas as gpd
|
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
from vresutils import shapes as vshapes
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
if __name__ == '__main__':
|
|
|
|
if 'snakemake' not in globals():
|
|
|
|
from helper import mock_snakemake
|
|
|
|
snakemake = mock_snakemake('build_population_layouts')
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
cutout = atlite.Cutout(snakemake.config['atlite']['cutout'])
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
grid_cells = cutout.grid_cells()
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
# nuts3 has columns country, gdp, pop, geometry
|
|
|
|
# population is given in dimensions of 1e3=k
|
|
|
|
nuts3 = gpd.read_file(snakemake.input.nuts3_shapes).set_index('index')
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
# Indicator matrix NUTS3 -> grid cells
|
|
|
|
I = atlite.cutout.compute_indicatormatrix(nuts3.geometry, grid_cells)
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
# Indicator matrix grid_cells -> NUTS3; inprinciple Iinv*I is identity
|
|
|
|
# but imprecisions mean not perfect
|
|
|
|
Iinv = cutout.indicatormatrix(nuts3.geometry)
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
countries = np.sort(nuts3.country.unique())
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
urban_fraction = pd.read_csv(snakemake.input.urban_percent,
|
|
|
|
header=None, index_col=0,
|
2022-04-12 12:37:05 +00:00
|
|
|
names=['fraction']).squeeze() / 100.
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
# fill missing Balkans values
|
|
|
|
missing = ["AL", "ME", "MK"]
|
|
|
|
reference = ["RS", "BA"]
|
|
|
|
average = urban_fraction[reference].mean()
|
|
|
|
fill_values = pd.Series({ct: average for ct in missing})
|
|
|
|
urban_fraction = urban_fraction.append(fill_values)
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
# population in each grid cell
|
|
|
|
pop_cells = pd.Series(I.dot(nuts3['pop']))
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
# in km^2
|
|
|
|
with mp.Pool(processes=snakemake.threads) as pool:
|
|
|
|
cell_areas = pd.Series(pool.map(vshapes.area, grid_cells)) / 1e6
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
# pop per km^2
|
|
|
|
density_cells = pop_cells / cell_areas
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
# rural or urban population in grid cell
|
|
|
|
pop_rural = pd.Series(0., density_cells.index)
|
|
|
|
pop_urban = pd.Series(0., density_cells.index)
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
for ct in countries:
|
|
|
|
print(ct, urban_fraction[ct])
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
indicator_nuts3_ct = nuts3.country.apply(lambda x: 1. if x == ct else 0.)
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
indicator_cells_ct = pd.Series(Iinv.T.dot(indicator_nuts3_ct))
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
density_cells_ct = indicator_cells_ct * density_cells
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
pop_cells_ct = indicator_cells_ct * pop_cells
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
# correct for imprecision of Iinv*I
|
|
|
|
pop_ct = nuts3.loc[nuts3.country==ct,'pop'].sum()
|
|
|
|
pop_cells_ct *= pop_ct / pop_cells_ct.sum()
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
# The first low density grid cells to reach rural fraction are rural
|
|
|
|
asc_density_i = density_cells_ct.sort_values().index
|
|
|
|
asc_density_cumsum = pop_cells_ct[asc_density_i].cumsum() / pop_cells_ct.sum()
|
|
|
|
rural_fraction_ct = 1 - urban_fraction[ct]
|
|
|
|
pop_ct_rural_b = asc_density_cumsum < rural_fraction_ct
|
|
|
|
pop_ct_urban_b = ~pop_ct_rural_b
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
pop_ct_rural_b[indicator_cells_ct == 0.] = False
|
|
|
|
pop_ct_urban_b[indicator_cells_ct == 0.] = False
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
pop_rural += pop_cells_ct.where(pop_ct_rural_b, 0.)
|
|
|
|
pop_urban += pop_cells_ct.where(pop_ct_urban_b, 0.)
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
pop_cells = {"total": pop_cells}
|
|
|
|
pop_cells["rural"] = pop_rural
|
|
|
|
pop_cells["urban"] = pop_urban
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
for key, pop in pop_cells.items():
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-08-18 10:17:31 +00:00
|
|
|
ycoords = ('y', cutout.coords['y'].data)
|
|
|
|
xcoords = ('x', cutout.coords['x'].data)
|
2021-07-01 18:09:04 +00:00
|
|
|
values = pop.values.reshape(cutout.shape)
|
|
|
|
layout = xr.DataArray(values, [ycoords, xcoords])
|
2019-04-16 14:03:51 +00:00
|
|
|
|
2021-07-01 18:09:04 +00:00
|
|
|
layout.to_netcdf(snakemake.output[f"pop_layout_{key}"])
|