Copy data preparation scripts from pypsa-eur sector branch

This commit is contained in:
Tom Brown 2019-04-16 16:03:51 +02:00
commit 1b26c3c737
12 changed files with 1022 additions and 0 deletions

32
.gitignore vendored Normal file
View File

@ -0,0 +1,32 @@
.snakemake*
.ipynb_checkpoints
__pycache__
gurobi.log
/bak
/resources
/results
/networks
/benchmarks
/logs
/notebooks
/data
/data/links_p_nom.csv
/data/*totals.csv
/data/emobility/
/data/heating/
/data/eurostat*
/data/odyssee/
/data/transport_data.csv
/data/switzerland*
/data/.nfs*
*.org
*.nc
*~
/scripts/old
*.pyc
/cutouts

126
Snakefile Normal file
View File

@ -0,0 +1,126 @@
configfile: "config.yaml"
subworkflow pypsaeur:
workdir: "../pypsa-eur"
snakefile: "../pypsa-eur/Snakefile"
configfile: "../pypsa-eur/config.yaml"
rule prepare_sector_networks:
input:
pypsaeur(expand(config['results_dir'] + config['run'] + "/prenetworks/elec_s{simpl}_{clusters}_lv{lv}_{opts}.nc",
**config['scenario']))
rule build_population_layouts:
input:
nuts3_shapes='resources/nuts3_shapes.geojson',
urban_percent="data/urban_percent.csv"
output:
pop_layout_total="resources/pop_layout_total.nc",
pop_layout_urban="resources/pop_layout_urban.nc",
pop_layout_rural="resources/pop_layout_rural.nc"
script: "scripts/build_population_layouts.py"
rule build_clustered_population_layouts:
input:
pop_layout_total="resources/pop_layout_total.nc",
pop_layout_urban="resources/pop_layout_urban.nc",
pop_layout_rural="resources/pop_layout_rural.nc",
regions_onshore="resources/regions_onshore_{network}_s{simpl}_{clusters}.geojson"
output:
clustered_pop_layout="resources/pop_layout_{network}_s{simpl}_{clusters}.csv"
script: "scripts/build_clustered_population_layouts.py"
rule build_heat_demand:
input:
pop_layout_total="resources/pop_layout_total.nc",
pop_layout_urban="resources/pop_layout_urban.nc",
pop_layout_rural="resources/pop_layout_rural.nc",
regions_onshore="resources/regions_onshore_{network}_s{simpl}_{clusters}.geojson"
output:
heat_demand_urban="resources/heat_demand_urban_{network}_s{simpl}_{clusters}.nc",
heat_demand_rural="resources/heat_demand_rural_{network}_s{simpl}_{clusters}.nc",
heat_demand_total="resources/heat_demand_total_{network}_s{simpl}_{clusters}.nc"
script: "scripts/build_heat_demand.py"
rule build_temperature_profiles:
input:
pop_layout_total="resources/pop_layout_total.nc",
pop_layout_urban="resources/pop_layout_urban.nc",
pop_layout_rural="resources/pop_layout_rural.nc",
regions_onshore="resources/regions_onshore_{network}_s{simpl}_{clusters}.geojson"
output:
temp_soil_total="resources/temp_soil_total_{network}_s{simpl}_{clusters}.nc",
temp_soil_rural="resources/temp_soil_rural_{network}_s{simpl}_{clusters}.nc",
temp_soil_urban="resources/temp_soil_urban_{network}_s{simpl}_{clusters}.nc",
temp_air_total="resources/temp_air_total_{network}_s{simpl}_{clusters}.nc",
temp_air_rural="resources/temp_air_rural_{network}_s{simpl}_{clusters}.nc",
temp_air_urban="resources/temp_air_urban_{network}_s{simpl}_{clusters}.nc"
script: "scripts/build_temperature_profiles.py"
rule build_cop_profiles:
input:
temp_soil_total="resources/temp_soil_total_{network}_s{simpl}_{clusters}.nc",
temp_soil_rural="resources/temp_soil_rural_{network}_s{simpl}_{clusters}.nc",
temp_soil_urban="resources/temp_soil_urban_{network}_s{simpl}_{clusters}.nc",
temp_air_total="resources/temp_air_total_{network}_s{simpl}_{clusters}.nc",
temp_air_rural="resources/temp_air_rural_{network}_s{simpl}_{clusters}.nc",
temp_air_urban="resources/temp_air_urban_{network}_s{simpl}_{clusters}.nc"
output:
cop_soil_total="resources/cop_soil_total_{network}_s{simpl}_{clusters}.nc",
cop_soil_rural="resources/cop_soil_rural_{network}_s{simpl}_{clusters}.nc",
cop_soil_urban="resources/cop_soil_urban_{network}_s{simpl}_{clusters}.nc",
cop_air_total="resources/cop_air_total_{network}_s{simpl}_{clusters}.nc",
cop_air_rural="resources/cop_air_rural_{network}_s{simpl}_{clusters}.nc",
cop_air_urban="resources/cop_air_urban_{network}_s{simpl}_{clusters}.nc"
script: "scripts/build_cop_profiles.py"
rule build_solar_thermal_profiles:
input:
pop_layout_total="resources/pop_layout_total.nc",
pop_layout_urban="resources/pop_layout_urban.nc",
pop_layout_rural="resources/pop_layout_rural.nc",
regions_onshore="resources/regions_onshore_{network}_s{simpl}_{clusters}.geojson"
output:
solar_thermal_total="resources/solar_thermal_total_{network}_s{simpl}_{clusters}.nc",
solar_thermal_urban="resources/solar_thermal_urban_{network}_s{simpl}_{clusters}.nc",
solar_thermal_rural="resources/solar_thermal_rural_{network}_s{simpl}_{clusters}.nc"
script: "scripts/build_solar_thermal_profiles.py"
rule build_energy_totals:
input:
nuts3_shapes='resources/nuts3_shapes.geojson'
output:
energy_name='data/energy_totals.csv',
co2_name='data/co2_totals.csv',
transport_name='data/transport_data.csv'
threads: 1
resources: mem_mb=10000
script: 'scripts/build_energy_totals.py'
rule build_biomass_potentials:
input:
jrc_potentials="data/biomass/JRC Biomass Potentials.xlsx"
output:
biomass_potentials='data/biomass_potentials.csv'
threads: 1
resources: mem_mb=1000
script: 'scripts/build_biomass_potentials.py'
rule build_industrial_demand:
input:
clustered_pop_layout="resources/pop_layout_{network}_s{simpl}_{clusters}.csv"
output:
industrial_demand="resources/industrial_demand_{network}_s{simpl}_{clusters}.csv"
threads: 1
resources: mem_mb=1000
script: 'scripts/build_industrial_demand.py'

12
config.yaml Normal file
View File

@ -0,0 +1,12 @@
results_dir: 'results/'
summary_dir: results
run: '190416-modular-test'
scenario:
sectors: [E] # ,E+EV,E+BEV,E+BEV+V2G] # [ E+EV, E+BEV, E+BEV+V2G ]
simpl: ['']
lv: [1.0]#[1.0, 1.125, 1.25, 1.5, 2.0, opt]# or opt
clusters: [128] #[90, 128, 181] #[45, 64, 90, 128, 181, 256] #, 362] # (2**np.r_[5.5:9:.5]).astype(int) minimum is 37
opts: [Co2L0-3H-T-H-B-I,Co2L0-3H-T-H-B-I-onwind0,Co2L0-3H-T-H-B-I-onwind0p1]#,Co2L0p05-3H-T-H-B-I,Co2L0p10-3H-T-H-B-I,Co2L0p20-3H-T-H-B-I,Co2L0p30-3H-T-H-B-I,Co2L0p50-3H-T-H-B-I]#[Co2L-3H-T-H,Co2L0p10-3H-T-H,Co2L0-3H-T-H,Co2L0p20-3H-T-H] #Co2L-3H-T-H,Co2L0p10-3H-T-H,Co2L0p20-3H-T-HCo2L-3H-T-H,Co2L0p10-3H-T-H,Co2L0p30-3H-T-H,Co2L0p50-3H-T-H] #Co2L-3H,Co2L-3H-T,, LC-FL, LC-T, Ep-T, Co2L-T]
# Co2L will give default (5%); Co2L0p25 will give 25% CO2 emissions; Co2Lm0p05 will give 5% negative emissions

View File

@ -0,0 +1,53 @@
import pandas as pd
idx = pd.IndexSlice
def build_biomass_potentials():
#delete empty column C from this sheet first before reading it in
df = pd.read_excel(snakemake.input.jrc_potentials,
"Potentials (PJ)")
df.rename(columns={"Unnamed: 16":"Municipal waste"},inplace=True)
df.drop(columns="Total",inplace=True)
df.replace("-",0.,inplace=True)
df_dict = {}
for i in range(36):
df_dict[df.iloc[i*16,1]] = df.iloc[1+i*16:(i+1)*16].astype(float)
df_new = pd.concat(df_dict)
us_type = pd.Series(index=df_new.columns)
us_type.iloc[0:7] = "not included"
us_type.iloc[7:8] = "biogas"
us_type.iloc[8:9] = "solid biomass"
us_type.iloc[9:11] = "not included"
us_type.iloc[11:16] = "solid biomass"
us_type.iloc[16:17] = "biogas"
#convert from PJ to MWh
biomass_potentials = df_new.loc[idx[:,snakemake.config['biomass']['year'],snakemake.config['biomass']['scenario']],:].groupby(us_type,axis=1).sum().groupby(level=0).sum().rename({"UK" : "GB", "BH" : "BA"})/3.6*1e6
biomass_potentials.to_csv(snakemake.output.biomass_potentials)
if __name__ == "__main__":
# Detect running outside of snakemake and mock snakemake for testing
if 'snakemake' not in globals():
from vresutils import Dict
import yaml
snakemake = Dict()
snakemake.input = Dict()
snakemake.input['jrc_potentials'] = "data/biomass/JRC Biomass Potentials.xlsx"
snakemake.output = Dict()
snakemake.output['biomass_potentials'] = 'data/biomass_potentials.csv'
with open('config.yaml') as f:
snakemake.config = yaml.load(f)
build_biomass_potentials()

View File

@ -0,0 +1,32 @@
import geopandas as gpd
import xarray as xr
import pandas as pd
import atlite
cutout = atlite.Cutout(snakemake.config['renewable']['onwind']['cutout'],
cutout_dir=snakemake.config['atlite']['cutout_dir'])
clustered_busregions_as_geopd = gpd.read_file(snakemake.input.regions_onshore).set_index('name', drop=True)
clustered_busregions = pd.Series(clustered_busregions_as_geopd.geometry, index=clustered_busregions_as_geopd.index)
I = cutout.indicatormatrix(clustered_busregions)
items = ["total","urban","rural"]
pop = pd.DataFrame(columns=items,
index=clustered_busregions.index)
for item in items:
pop_layout = xr.open_dataarray(snakemake.input['pop_layout_'+item])
pop[item] = I.dot(pop_layout.stack(spatial=('y', 'x')))
pop.to_csv(snakemake.output.clustered_pop_layout)

View File

@ -0,0 +1,25 @@
import xarray as xr
#quadratic regression based on Staffell et al. (2012)
#https://doi.org/10.1039/C2EE22653G
# COP is function of temp difference source to sink
cop_f = {"air" : lambda d_t: 6.81 -0.121*d_t + 0.000630*d_t**2,
"soil" : lambda d_t: 8.77 -0.150*d_t + 0.000734*d_t**2}
sink_T = 55. # Based on DTU / large area radiators
for area in ["total", "urban", "rural"]:
for source in ["air", "soil"]:
source_T = xr.open_dataarray(snakemake.input["temp_{}_{}".format(source,area)])
delta_T = sink_T - source_T
cop = cop_f[source](delta_T)
cop.to_netcdf(snakemake.output["cop_{}_{}".format(source,area)])

View File

@ -0,0 +1,460 @@
import pandas as pd
import geopandas as gpd
idx = pd.IndexSlice
#translations for Eurostat
country_to_code = {
'EU28' : 'EU',
'EA19' : 'EA',
'Belgium' : 'BE',
'Bulgaria' : 'BG',
'Czech Republic' : 'CZ',
'Denmark' : 'DK',
'Germany' : 'DE',
'Estonia' : 'EE',
'Ireland' : 'IE',
'Greece' : 'GR',
'Spain' : 'ES',
'France' : 'FR',
'Croatia' : 'HR',
'Italy' : 'IT',
'Cyprus' : 'CY',
'Latvia' : 'LV',
'Lithuania' : 'LT',
'Luxembourg' : 'LU',
'Hungary' : 'HU',
'Malta' : 'MA',
'Netherlands' : 'NL',
'Austria' : 'AT',
'Poland' : 'PL',
'Portugal' : 'PT',
'Romania' : 'RO',
'Slovenia' : 'SI',
'Slovakia' : 'SK',
'Finland' : 'FI',
'Sweden' : 'SE',
'United Kingdom' : 'GB',
'Iceland' : 'IS',
'Norway' : 'NO',
'Montenegro' : 'ME',
'FYR of Macedonia' : 'MK',
'Albania' : 'AL',
'Serbia' : 'RS',
'Turkey' : 'TU',
'Bosnia and Herzegovina' : 'BA',
'Kosovo\n(UNSCR 1244/99)' : 'KO', #2017 version
'Kosovo\n(under United Nations Security Council Resolution 1244/99)' : 'KO', #2016 version
'Moldova' : 'MO',
'Ukraine' : 'UK',
'Switzerland' : 'CH',
}
non_EU = ['NO', 'CH', 'ME', 'MK', 'RS', 'BA', 'AL']
rename = {"GR" : "EL",
"GB" : "UK"}
eu28 = ['FR', 'DE', 'GB', 'IT', 'ES', 'PL', 'SE', 'NL', 'BE', 'FI', 'CZ',
'DK', 'PT', 'RO', 'AT', 'BG', 'EE', 'GR', 'LV',
'HU', 'IE', 'SK', 'LT', 'HR', 'LU', 'SI'] + ['CY','MT']
eu28_eea = eu28[:]
eu28_eea.remove("GB")
eu28_eea.append("UK")
def build_eurostat(year):
"""Return multi-index for all countries' energy data in TWh/a."""
stats_from_year = 2016
fns = {2016: "data/eurostat-energy_balances-june_2016_edition/{year}-Energy-Balances-June2016edition.xlsx",
2017: "data/eurostat-energy_balances-june_2017_edition/{year}-ENERGY-BALANCES-June2017edition.xlsx"}
#2016 includes BA, 2017 doesn't
#with sheet as None, an ordered dictionary of all sheets is returned
dfs = pd.read_excel(fns[stats_from_year].format(year=year),
None,
skiprows=1,
index_col=list(range(4)))
#sorted_index necessary for slicing
df = pd.concat({country_to_code[df.columns[0]] : df for ct,df in dfs.items()},sort=True).sort_index()
#drop non-numeric columns; convert ktoe/a to TWh/a
return df.drop(df.columns[df.dtypes != float],axis=1)*11.63/1e3
def build_swiss(year):
fn = "data/switzerland-sfoe/switzerland-new_format.csv"
#convert PJ/a to TWh/a
return (pd.read_csv(fn,index_col=list(range(2)))/3.6).loc["CH",str(year)]
def build_idees(year):
base_dir = "data/jrc-idees-2015"
totals = pd.DataFrame()
#convert ktoe/a to TWh/a
factor = 11.63/1e3
for ct in population.index:
if ct in non_EU:
print("When reading IDEES, skipping non-EU28 country",ct)
continue
#RESIDENTIAL
filename = "{}/JRC-IDEES-2015_Residential_{}.xlsx".format(base_dir,rename.get(ct,ct))
df = pd.read_excel(filename,"RES_hh_fec")
assert df.iloc[2,0] == "Space heating"
totals.loc[ct,"total residential space"] = df.loc[2,year]
assert df.iloc[10,0] == "Advanced electric heating"
assert df.iloc[11,0] == "Conventional electric heating"
totals.loc[ct,"electricity residential space"] = df.loc[[10,11],year].sum()
assert df.iloc[15,0] == "Water heating"
totals.loc[ct,"total residential water"] = df.loc[15,year]
assert df.iloc[23,0] == "Electricity"
totals.loc[ct,"electricity residential water"] = df.loc[23,year]
assert df.iloc[25,0] == "Cooking"
totals.loc[ct,"total residential cooking"] = df.loc[25,year]
assert df.iloc[30,0] == "Electricity"
totals.loc[ct,"electricity residential cooking"] = df.loc[30,year]
df = pd.read_excel(filename,"RES_summary")
assert df.iloc[34,0] == "Energy consumption by fuel - Eurostat structure (ktoe)"
totals.loc[ct,"total residential"] = df.loc[34,year]
assert df.iloc[47,0] == "Electricity"
totals.loc[ct,"electricity residential"] = df.loc[47,year]
#SERVICES
filename = "{}/JRC-IDEES-2015_Tertiary_{}.xlsx".format(base_dir,rename.get(ct,ct))
df = pd.read_excel(filename,"SER_hh_fec")
assert df.iloc[2,0] == "Space heating"
totals.loc[ct,"total services space"] = df.loc[2,year]
assert df.iloc[11,0] == "Advanced electric heating"
assert df.iloc[12,0] == "Conventional electric heating"
totals.loc[ct,"electricity services space"] = df.loc[[11,12],year].sum()
assert df.iloc[17,0] == "Hot water"
totals.loc[ct,"total services water"] = df.loc[17,year]
assert df.iloc[24,0] == "Electricity"
totals.loc[ct,"electricity services water"] = df.loc[24,year]
assert df.iloc[27,0] == "Catering"
totals.loc[ct,"total services cooking"] = df.loc[27,year]
assert df.iloc[31,0] == "Electricity"
totals.loc[ct,"electricity services cooking"] = df.loc[31,year]
df = pd.read_excel(filename,"SER_summary")
assert df.iloc[37,0] == "Energy consumption by fuel - Eurostat structure (ktoe)"
totals.loc[ct,"total services"] = df.loc[37,year]
assert df.iloc[50,0] == "Electricity"
totals.loc[ct,"electricity services"] = df.loc[50,year]
# TRANSPORT
filename = "{}/JRC-IDEES-2015_Transport_{}.xlsx".format(base_dir,rename.get(ct,ct))
df = pd.read_excel(filename,"TrRoad_ene")
assert df.iloc[2,0] == "by fuel (EUROSTAT DATA)"
totals.loc[ct,"total road"] = df.loc[2,year]
assert df.iloc[13,0] == "Electricity"
totals.loc[ct,"electricity road"] = df.loc[13,year]
assert df.iloc[61,0] == "Passenger cars"
totals.loc[ct,"passenger car efficiency"] = df.loc[61,year]
df = pd.read_excel(filename,"TrRail_ene")
assert df.iloc[2,0] == "by fuel (EUROSTAT DATA)"
totals.loc[ct,"total rail"] = df.loc[2,year]
assert df.iloc[12,0] == "Electricity"
totals.loc[ct,"electricity rail"] = df.loc[12,year]
df = pd.read_excel(filename,"TrRoad_act")
assert df.iloc[85,0] == "Passenger cars"
totals.loc[ct,"passenger cars"] = df.loc[85,year]
totals = totals*factor
totals["passenger cars"] = totals["passenger cars"]/factor
#convert ktoe/100km to kWh per km
totals["passenger car efficiency"] = 10*totals["passenger car efficiency"]
return totals
def build_energy_totals():
clean_df = idees.reindex(population.index).drop(["passenger cars","passenger car efficiency"],axis=1)
clean_df.loc["CH"] = swiss
#get values for missing countries based on Eurostat EnergyBalances
#divide cooking/space/water according to averages in EU28
missing = clean_df.index[clean_df["total residential"].isnull()]
missing_in_eurostat = missing.intersection(eurostat.index.levels[0])
uses = ["space","cooking","water"]
for sector,eurostat_sector in [("residential","Residential"),("services","Services"),
("road","Road"),("rail","Rail")]:
for fuel,eurostat_fuel in [("electricity","Electricity"),("total","Total all products")]:
clean_df.loc[missing_in_eurostat,"{} {}".format(fuel,sector)] = eurostat.loc[idx[missing_in_eurostat,:,:,eurostat_sector],eurostat_fuel].groupby(level=0).sum()
if sector in ["road","rail"]:
continue
fuel = "electricity"
for use in uses:
avg = (clean_df["{} {} {}".format(fuel,sector,use)]/clean_df["{} {}".format(fuel,sector)]).mean()
print("{}: average fraction of {} for {} is {}".format(sector,fuel,use,avg))
clean_df.loc[missing_in_eurostat,"{} {} {}".format(fuel,sector,use)] = avg*clean_df.loc[missing_in_eurostat,"{} {}".format(fuel,sector)]
fuel = "total"
for use in uses:
avg = ((clean_df["{} {} {}".format("total",sector,use)]-clean_df["{} {} {}".format("electricity",sector,use)])/
(clean_df["{} {}".format("total",sector)]-clean_df["{} {}".format("electricity",sector)])).mean()
print("{}: average fraction of non-electric for {} is {}".format(sector,use,avg))
clean_df.loc[missing_in_eurostat,"{} {} {}".format(fuel,sector,use)] = \
clean_df.loc[missing_in_eurostat,"{} {} {}".format("electricity",sector,use)] \
+ avg*(clean_df.loc[missing_in_eurostat,"{} {}".format("total",sector)] - clean_df.loc[missing_in_eurostat,"{} {}".format("electricity",sector)])
#Fix Norway space and water heating fractions
#http://www.ssb.no/en/energi-og-industri/statistikker/husenergi/hvert-3-aar/2014-07-14
#The main heating source for about 73 per cent of the households is based on electricity
#=> 26% is non-electric
elec_fraction = 0.73
without_norway = clean_df.drop("NO")
for sector in ["residential","services"]:
#assume non-electric is heating
total_heating = (clean_df.loc["NO","{} {}".format("total",sector)]-clean_df.loc["NO","{} {}".format("electricity",sector)])/(1-elec_fraction)
for use in uses:
fraction = ((without_norway["{} {} {}".format("total",sector,use)]-without_norway["{} {} {}".format("electricity",sector,use)])/
(without_norway["{} {}".format("total",sector)]-without_norway["{} {}".format("electricity",sector)])).mean()
clean_df.loc["NO","{} {} {}".format("total",sector,use)] = total_heating*fraction
clean_df.loc["NO","{} {} {}".format("electricity",sector,use)] = total_heating*fraction*elec_fraction
#fix missing data for BA (services and road energy data)
missing = (clean_df.loc["BA"] == 0.)
#add back in proportional to RS with ratio of total residential demand
clean_df.loc["BA",missing] = clean_df.loc["BA","total residential"]/clean_df.loc["RS","total residential"]*clean_df.loc["RS",missing]
clean_df.to_csv(snakemake.output.energy_name)
return clean_df
def build_eea_co2():
# see ../notebooks/compute_1990_Europe_emissions_for_targets.ipynb
#https://www.eea.europa.eu/data-and-maps/data/national-emissions-reported-to-the-unfccc-and-to-the-eu-greenhouse-gas-monitoring-mechanism-14
#downloaded 190222 (modified by EEA last on 181130)
fn = "data/eea/UNFCCC_v21.csv"
df = pd.read_csv(fn, encoding="latin-1")
df.loc[df["Year"] == "1985-1987","Year"] = 1986
df["Year"] = df["Year"].astype(int)
df = df.set_index(['Country_code', 'Pollutant_name', 'Year', 'Sector_name']).sort_index()
e = pd.Series()
e["electricity"] = '1.A.1.a - Public Electricity and Heat Production'
e['residential non-elec'] = '1.A.4.b - Residential'
e['services non-elec'] = '1.A.4.a - Commercial/Institutional'
e['rail non-elec'] = "1.A.3.c - Railways"
e["road non-elec"] = '1.A.3.b - Road Transportation'
e["domestic navigation"] = "1.A.3.d - Domestic Navigation"
e['international navigation'] = '1.D.1.b - International Navigation'
e["domestic aviation"] = '1.A.3.a - Domestic Aviation'
e["international aviation"] = '1.D.1.a - International Aviation'
e['total energy'] = '1 - Energy'
e['industrial processes'] = '2 - Industrial Processes and Product Use'
e['agriculture'] = '3 - Agriculture'
e['LULUCF'] = '4 - Land Use, Land-Use Change and Forestry'
e['waste management'] = '5 - Waste management'
e['other'] = '6 - Other Sector'
e['indirect'] = 'ind_CO2 - Indirect CO2'
e["total wL"] = "Total (with LULUCF, with indirect CO2)"
e["total woL"] = "Total (without LULUCF, with indirect CO2)"
pol = "CO2" #["All greenhouse gases - (CO2 equivalent)","CO2"]
cts = ["CH","EUA","NO"] + eu28_eea
year = 1990
emissions = df.loc[idx[cts,pol,year,e.values],"emissions"].unstack("Sector_name").rename(columns=pd.Series(e.index,e.values)).rename(index={"All greenhouse gases - (CO2 equivalent)" : "GHG"},level=1)
#only take level 0, since level 1 (pol) and level 2 (year) are trivial
emissions = emissions.groupby(level=0,axis=0).sum()
emissions.rename(index={"EUA" : "EU28", "UK" : "GB"},inplace=True)
emissions['industrial non-elec'] = emissions['total energy'] - emissions[['electricity', 'services non-elec','residential non-elec', 'road non-elec',
'rail non-elec', 'domestic aviation', 'international aviation', 'domestic navigation',
'international navigation']].sum(axis=1)
emissions.drop(columns=["total energy", "total wL", "total woL"],inplace=True)
return emissions/1e3
def build_eurostat_co2(year=1990):
eurostat_for_co2 = build_eurostat(year)
se = pd.Series(index=eurostat_for_co2.columns,dtype=float)
#emissions in tCO2_equiv per MWh_th
se["Solid fuels"] = 0.36 #Approximates coal
se["Oil (total)"] = 0.285 #Average of distillate and residue
se["Gas"] = 0.2 #For natural gas
#oil values from https://www.eia.gov/tools/faqs/faq.cfm?id=74&t=11
#Distillate oil (No. 2) 0.276
#Residual oil (No. 6) 0.298
#https://www.eia.gov/electricity/annual/html/epa_a_03.html
eurostat_co2 = eurostat_for_co2.multiply(se).sum(axis=1)
return eurostat_co2
def build_co2_totals(year=1990):
co2 = eea_co2.reindex(["EU28","NO","CH","BA","RS","AL","ME","MK"] + eu28)
for ct in ["BA","RS","AL","ME","MK"]:
co2.loc[ct,"electricity"] = eurostat_co2[ct,"+","Conventional Thermal Power Stations","of which From Coal"].sum()
co2.loc[ct,"residential non-elec"] = eurostat_co2[ct,"+","+","Residential"].sum()
co2.loc[ct,"services non-elec"] = eurostat_co2[ct,"+","+","Services"].sum()
co2.loc[ct,"road non-elec"] = eurostat_co2[ct,"+","+","Road"].sum()
co2.loc[ct,"rail non-elec"] = eurostat_co2[ct,"+","+","Rail"].sum()
co2.loc[ct,"domestic navigation"] = eurostat_co2[ct,"+","+","Domestic Navigation"].sum()
co2.loc[ct,'international navigation'] = eurostat_co2[ct,"-","Bunkers"].sum()
co2.loc[ct,"domestic aviation"] = eurostat_co2[ct,"+","+","Domestic aviation"].sum()
co2.loc[ct,"international aviation"] = eurostat_co2[ct,"+","+","International aviation"].sum()
#doesn't include industrial process emissions or fuel processing/refining
co2.loc[ct,'industrial non-elec'] = eurostat_co2[ct,"+","Industry"].sum()
#doesn't include non-energy emissions
co2.loc[ct,'agriculture'] = eurostat_co2[ct,"+","+","Agriculture / Forestry"].sum()
co2.to_csv(snakemake.output.co2_name)
return co2
def build_transport_data():
transport_data = pd.DataFrame(columns=["number cars","average fuel efficiency"],
index=population.index)
## collect number of cars
transport_data["number cars"] = idees["passenger cars"]
#CH from http://ec.europa.eu/eurostat/statistics-explained/index.php/Passenger_cars_in_the_EU#Luxembourg_has_the_highest_number_of_passenger_cars_per_inhabitant
transport_data.loc["CH","number cars"] = 4.136e6
missing = transport_data.index[transport_data["number cars"].isnull()]
print("Missing data on cars from:")
print(missing)
cars_pp = transport_data["number cars"]/population
transport_data.loc[missing,"number cars"] = cars_pp.mean()*population
## collect average fuel efficiency in kWh/km
transport_data["average fuel efficiency"] = idees["passenger car efficiency"]
missing = transport_data.index[transport_data["average fuel efficiency"].isnull()]
print("Missing data on fuel efficiency from:")
print(missing)
transport_data.loc[missing,"average fuel efficiency"] = transport_data["average fuel efficiency"].mean()
transport_data.to_csv(snakemake.output.transport_name)
return transport_data
if __name__ == "__main__":
# Detect running outside of snakemake and mock snakemake for testing
if 'snakemake' not in globals():
from vresutils import Dict
snakemake = Dict()
snakemake.output = Dict()
snakemake.output['energy_name'] = "data/energy_totals.csv"
snakemake.output['co2_name'] = "data/co2_totals.csv"
snakemake.output['transport_name'] = "data/transport_data.csv"
snakemake.input = Dict()
snakemake.input['nuts3_shapes'] = 'resources/nuts3_shapes.geojson'
nuts3 = gpd.read_file(snakemake.input.nuts3_shapes).set_index('index')
population = nuts3['pop'].groupby(nuts3.country).sum()
year = 2011
eurostat = build_eurostat(year)
swiss = build_swiss(year)
idees = build_idees(year)
build_energy_totals()
eea_co2 = build_eea_co2()
eurostat_co2 = build_eurostat_co2()
build_co2_totals()
build_transport_data()

View File

@ -0,0 +1,40 @@
import geopandas as gpd
import atlite
import pandas as pd
import xarray as xr
import scipy as sp
if 'snakemake' not in globals():
from vresutils import Dict
import yaml
snakemake = Dict()
with open('config.yaml') as f:
snakemake.config = yaml.load(f)
snakemake.input = Dict()
snakemake.output = Dict()
time = pd.date_range(freq='m', **snakemake.config['snapshots'])
params = dict(years=slice(*time.year[[0, -1]]), months=slice(*time.month[[0, -1]]))
cutout = atlite.Cutout(snakemake.config['renewable']['onwind']['cutout'],
cutout_dir=snakemake.config['atlite']['cutout_dir'],
**params)
clustered_busregions_as_geopd = gpd.read_file(snakemake.input.regions_onshore).set_index('name', drop=True)
clustered_busregions = pd.Series(clustered_busregions_as_geopd.geometry, index=clustered_busregions_as_geopd.index)
I = cutout.indicatormatrix(clustered_busregions)
for item in ["rural","urban","total"]:
pop_layout = xr.open_dataarray(snakemake.input['pop_layout_'+item])
M = I.T.dot(sp.diag(I.dot(pop_layout.stack(spatial=('y', 'x')))))
heat_demand = cutout.heat_demand(matrix=M.T,index=clustered_busregions.index)
heat_demand.to_netcdf(snakemake.output["heat_demand_"+item])

View File

@ -0,0 +1,35 @@
import pandas as pd
idx = pd.IndexSlice
def build_industrial_demand():
population = pd.read_csv(snakemake.input.clustered_pop_layout,
index_col=0)
totals = pd.Series(data=[1100.,1814.,586.,400.,580.,186.],
index=["industry new electricity","industry process heat",
"naptha feedstock","shipping H2","aviation kerosene","process emissions"])
industrial_demand = pd.DataFrame({i : population["total"]*totals[i]*1e6/population["total"].sum() for i in totals.index })
industrial_demand.to_csv(snakemake.output.industrial_demand)
if __name__ == "__main__":
# Detect running outside of snakemake and mock snakemake for testing
if 'snakemake' not in globals():
from vresutils import Dict
import yaml
snakemake = Dict()
snakemake.input = Dict()
snakemake.input['clustered_pop_layout'] = "resources/pop_layout_elec_s_128.csv"
snakemake.output = Dict()
snakemake.output['industrial_demand'] = "resources/industrial_demand_elec_s_128.csv"
with open('config.yaml') as f:
snakemake.config = yaml.load(f)
build_industrial_demand()

View File

@ -0,0 +1,104 @@
# Build mapping between grid cells and population (total, urban, rural)
import atlite
import pandas as pd
import xarray as xr
from vresutils import shapes as vshapes
import geopandas as gpd
if 'snakemake' not in globals():
from vresutils import Dict
import yaml
snakemake = Dict()
with open('config.yaml') as f:
snakemake.config = yaml.load(f)
snakemake.input = Dict()
snakemake.output = Dict()
snakemake.input["urban_percent"] = "data/urban_percent.csv"
cutout = atlite.Cutout(snakemake.config['renewable']['onwind']['cutout'],
cutout_dir=snakemake.config['atlite']['cutout_dir'])
grid_cells = cutout.grid_cells()
#nuts3 has columns country, gdp, pop, geometry
#population is given in dimensions of 1e3=k
nuts3 = gpd.read_file(snakemake.input.nuts3_shapes).set_index('index')
# Indicator matrix NUTS3 -> grid cells
I = atlite.cutout.compute_indicatormatrix(nuts3.geometry, grid_cells)
# Indicator matrix grid_cells -> NUTS3; inprinciple Iinv*I is identity
# but imprecisions mean not perfect
Iinv = cutout.indicatormatrix(nuts3.geometry)
countries = nuts3.country.value_counts().index.sort_values()
urban_fraction = pd.read_csv(snakemake.input.urban_percent,
header=None,index_col=0,squeeze=True)/100.
#fill missing Balkans values
missing = ["AL","ME","MK"]
reference = ["RS","BA"]
urban_fraction = urban_fraction.reindex(urban_fraction.index|missing)
urban_fraction.loc[missing] = urban_fraction[reference].mean()
#population in each grid cell
pop_cells = pd.Series(I.dot(nuts3['pop']))
#in km^2
cell_areas = pd.Series(cutout.grid_cells()).map(vshapes.area)/1e6
#pop per km^2
density_cells = pop_cells/cell_areas
#rural or urban population in grid cell
pop_rural = pd.Series(0.,density_cells.index)
pop_urban = pd.Series(0.,density_cells.index)
for ct in countries:
print(ct,urban_fraction[ct])
indicator_nuts3_ct = pd.Series(0.,nuts3.index)
indicator_nuts3_ct[nuts3.index[nuts3.country==ct]] = 1.
indicator_cells_ct = pd.Series(Iinv.T.dot(indicator_nuts3_ct))
density_cells_ct = indicator_cells_ct*density_cells
pop_cells_ct = indicator_cells_ct*pop_cells
#correct for imprecision of Iinv*I
pop_ct = nuts3['pop'][indicator_nuts3_ct.index[indicator_nuts3_ct == 1.]].sum()
pop_cells_ct = pop_cells_ct*pop_ct/pop_cells_ct.sum()
# The first low density grid cells to reach rural fraction are rural
index_from_low_d_to_high_d = density_cells_ct.sort_values().index
pop_ct_rural_b = pop_cells_ct[index_from_low_d_to_high_d].cumsum()/pop_cells_ct.sum() < (1-urban_fraction[ct])
pop_ct_urban_b = ~pop_ct_rural_b
pop_ct_rural_b[indicator_cells_ct==0.] = False
pop_ct_urban_b[indicator_cells_ct==0.] = False
pop_rural += pop_cells_ct.where(pop_ct_rural_b,0.)
pop_urban += pop_cells_ct.where(pop_ct_urban_b,0.)
pop_cells = {"total" : pop_cells}
pop_cells["rural"] = pop_rural
pop_cells["urban"] = pop_urban
for key in pop_cells.keys():
layout = xr.DataArray(pop_cells[key].values.reshape(cutout.shape),
[('y', cutout.coords['y']), ('x', cutout.coords['x'])])
layout.to_netcdf(snakemake.output["pop_layout_"+key])

View File

@ -0,0 +1,55 @@
import sys
#override atlite
sys.path = ["/home/vres/data/tom/lib/atlite"] + sys.path
import geopandas as gpd
import atlite
import pandas as pd
import xarray as xr
import scipy as sp
if 'snakemake' not in globals():
from vresutils import Dict
import yaml
snakemake = Dict()
with open('config.yaml') as f:
snakemake.config = yaml.load(f)
snakemake.input = Dict()
snakemake.output = Dict()
time = pd.date_range(freq='m', **snakemake.config['snapshots'])
params = dict(years=slice(*time.year[[0, -1]]), months=slice(*time.month[[0, -1]]))
cutout = atlite.Cutout(snakemake.config['renewable']['onwind']['cutout'],
cutout_dir=snakemake.config['atlite']['cutout_dir'],
**params)
clustered_busregions_as_geopd = gpd.read_file(snakemake.input.regions_onshore).set_index('name', drop=True)
clustered_busregions = pd.Series(clustered_busregions_as_geopd.geometry, index=clustered_busregions_as_geopd.index)
I = cutout.indicatormatrix(clustered_busregions)
for item in ["total","rural","urban"]:
pop_layout = xr.open_dataarray(snakemake.input['pop_layout_'+item])
M = I.T.dot(sp.diag(I.dot(pop_layout.stack(spatial=('y', 'x')))))
nonzero_sum = M.sum(axis=0, keepdims=True)
nonzero_sum[nonzero_sum == 0.] = 1.
M_tilde = M/nonzero_sum
solar_thermal_angle = 45.
#should clearsky_model be "simple" or "enhanced"?
solar_thermal = cutout.solar_thermal(clearsky_model="simple",
orientation={'slope': solar_thermal_angle, 'azimuth': 180.},
matrix = M_tilde.T,
index=clustered_busregions.index)
solar_thermal.to_netcdf(snakemake.output["solar_thermal_"+item])

View File

@ -0,0 +1,48 @@
import geopandas as gpd
import atlite
import pandas as pd
import xarray as xr
import scipy as sp
if 'snakemake' not in globals():
from vresutils import Dict
import yaml
snakemake = Dict()
with open('config.yaml') as f:
snakemake.config = yaml.load(f)
snakemake.input = Dict()
snakemake.output = Dict()
time = pd.date_range(freq='m', **snakemake.config['snapshots'])
params = dict(years=slice(*time.year[[0, -1]]), months=slice(*time.month[[0, -1]]))
cutout = atlite.Cutout(snakemake.config['renewable']['onwind']['cutout'],
cutout_dir=snakemake.config['atlite']['cutout_dir'],
**params)
clustered_busregions_as_geopd = gpd.read_file(snakemake.input.regions_onshore).set_index('name', drop=True)
clustered_busregions = pd.Series(clustered_busregions_as_geopd.geometry, index=clustered_busregions_as_geopd.index)
I = cutout.indicatormatrix(clustered_busregions)
for item in ["total","rural","urban"]:
pop_layout = xr.open_dataarray(snakemake.input['pop_layout_'+item])
M = I.T.dot(sp.diag(I.dot(pop_layout.stack(spatial=('y', 'x')))))
nonzero_sum = M.sum(axis=0, keepdims=True)
nonzero_sum[nonzero_sum == 0.] = 1.
M_tilde = M/nonzero_sum
temp_air = cutout.temperature(matrix=M_tilde.T,index=clustered_busregions.index)
temp_air.to_netcdf(snakemake.output["temp_air_"+item])
temp_soil = cutout.soil_temperature(matrix=M_tilde.T,index=clustered_busregions.index)
temp_soil.to_netcdf(snakemake.output["temp_soil_"+item])