pypsa-eur/notebooks/entsoe_data.ipynb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

557 lines
16 KiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from entsoe import EntsoePandasClient\n",
2023-04-26 17:00:23 +00:00
"from entsoe.exceptions import NoMatchingDataError\n",
"import pandas as pd\n",
"from pathlib import Path\n",
"import os\n",
"\n",
"Path.cwd()\n",
"os.listdir()\n",
"\n",
"api_key = \"b45ffb86-fea3-49e7-9c01-9ad6429d3ec9\"\n",
"client = EntsoePandasClient(api_key=api_key)\n",
"\n",
"start = pd.Timestamp(\"20190101\", tz=\"Europe/Brussels\")\n",
"end = pd.Timestamp(\"20200101\", tz=\"Europe/Brussels\")\n",
"\n",
"countries = [\n",
" \"AL\",\n",
" \"AT\",\n",
" \"BA\",\n",
" \"BE\",\n",
" \"BG\",\n",
" \"CH\",\n",
" \"CZ\",\n",
" \"DE\",\n",
" \"DK\",\n",
" \"EE\",\n",
" \"ES\",\n",
" \"FI\",\n",
" \"FR\",\n",
" \"GB\",\n",
" \"GR\",\n",
" \"HR\",\n",
" \"HU\",\n",
" \"IE\",\n",
" \"IT\",\n",
" \"LT\",\n",
" \"LU\",\n",
" \"LV\",\n",
" \"ME\",\n",
" \"MK\",\n",
" \"NL\",\n",
" \"NO\",\n",
" \"PL\",\n",
" \"PT\",\n",
" \"RO\",\n",
" \"RS\",\n",
" \"SE\",\n",
" \"SI\",\n",
" \"SK\",\n",
"]\n",
2023-04-26 17:00:23 +00:00
"\n",
"for country in countries:\n",
" print(f\"Trying country {country}.\")\n",
"\n",
" country_code = country\n",
" # generation_country = client.query_generation(country_code, start=start,end=end, psr_type=None)\n",
" try:\n",
" gen = client.query_generation(country, start=start, end=end, psr_type=None)\n",
" except NoMatchingDataError:\n",
" continue\n",
"\n",
2023-04-26 17:00:23 +00:00
" gen.to_csv(Path.cwd() / \"generation_data\" / f\"{country}.csv\")\n",
"\n",
" # generation.append(generation_country)\n",
" # day_ahead_prices_country = client.query_day_ahead_prices(country_code, start, end)\n",
2023-04-26 17:00:23 +00:00
"\n",
"# generation = pd.concat(generation, keys=countries, axis=1)\n",
2023-04-26 17:00:23 +00:00
"\n",
"# client.query_crossborder_flows(country_code_from, country_code_to, start, end)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
2023-04-26 17:00:23 +00:00
"gen = generation_country"
]
},
{
"cell_type": "code",
"execution_count": null,
2023-04-26 17:00:23 +00:00
"metadata": {},
"outputs": [],
2023-04-26 17:00:23 +00:00
"source": [
"gen.transpose().xs(\"Actual Consumption\", level=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
2023-04-26 17:00:23 +00:00
"metadata": {},
"outputs": [],
2023-04-26 17:00:23 +00:00
"source": [
"import os\n",
"from pathlib import Path\n",
"import pandas as pd\n",
"\n",
"data_path = Path.cwd() / \"..\" / \"..\" / \"generation_data\"\n",
2023-04-26 17:00:23 +00:00
"\n",
"gen = pd.read_csv(data_path / \"DE.csv\", parse_dates=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
2023-04-26 17:00:23 +00:00
"metadata": {},
"outputs": [],
2023-04-26 17:00:23 +00:00
"source": [
"gen = pd.read_csv(data_path / \"AT.csv\", parse_dates=True, index_col=0).iloc[1:]"
]
},
{
"cell_type": "code",
"execution_count": null,
2023-04-26 17:00:23 +00:00
"metadata": {},
"outputs": [],
2023-04-26 17:00:23 +00:00
"source": [
"gen.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
2023-04-26 17:00:23 +00:00
"metadata": {},
"outputs": [],
2023-04-26 17:00:23 +00:00
"source": [
"# \"Gas\": [\"Fossil Coalderived gas\", \"Fossil Gas\"],\n",
"gen[[\"Fossil Coalderived gas\", \"Fossil Gas\"]].astype(float).sum()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Scout datasets available in pypsa-eur and entsoe"
]
},
{
"cell_type": "code",
"execution_count": null,
2023-04-26 17:00:23 +00:00
"metadata": {},
"outputs": [],
2023-04-26 17:00:23 +00:00
"source": [
"import numpy as np\n",
"import re\n",
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"\n",
"def scout_techs():\n",
" techs = set()\n",
" valid_consumption_types = [\"Hydro Pumped Storage\"]\n",
"\n",
" def two_columns_analysis(df, all_techs, country_techs, col_types):\n",
" for i, (col, col_type) in enumerate(zip(country_techs, col_types)):\n",
" col = re.sub(\"[^A-Za-z ]+\", \"\", col)\n",
2023-04-26 17:00:23 +00:00
"\n",
" if col_type == \"Actual Consumption\":\n",
2023-04-26 17:00:23 +00:00
" if not col in valid_consumption_types:\n",
" continue\n",
"\n",
2023-04-26 17:00:23 +00:00
" if col in valid_consumption_types:\n",
" all_techs.add(col + \" Charging\")\n",
"\n",
2023-04-26 17:00:23 +00:00
" else:\n",
" all_techs.add(col)\n",
"\n",
2023-04-26 17:00:23 +00:00
" return all_techs\n",
"\n",
2023-04-26 17:00:23 +00:00
" def is_country_generation(filename):\n",
" return filename.endswith(\".csv\") and len(filename) == 6\n",
2023-04-26 17:00:23 +00:00
"\n",
" generation_files = [\n",
" file for file in os.listdir(data_path) if is_country_generation(file)\n",
" ]\n",
2023-04-26 17:00:23 +00:00
"\n",
" for i, country in enumerate(generation_files):\n",
" print(f\"Gathering techs from {country[:2]}; {i+1}/{len(generation_files)}\")\n",
"\n",
" gen = pd.read_csv(data_path / \"DE.csv\", parse_dates=True, index_col=0)\n",
2023-04-26 17:00:23 +00:00
"\n",
" if np.isnan(gen.index[0]):\n",
" col_types = gen.iloc[0].tolist()\n",
" gen = gen.iloc[1:]\n",
2023-04-26 17:00:23 +00:00
" else:\n",
" col_types = [\"Actual Aggregated\" for _ in range(gen.shape[1])]\n",
"\n",
" techs = two_columns_analysis(gen.iloc[1:], techs, gen.columns, col_types)\n",
"\n",
" return techs\n",
2023-04-26 17:00:23 +00:00
"\n",
"\n",
"techs = scout_techs()"
]
},
{
"cell_type": "code",
"execution_count": null,
2023-04-26 17:00:23 +00:00
"metadata": {},
"outputs": [],
2023-04-26 17:00:23 +00:00
"source": [
"techs"
]
},
{
"cell_type": "code",
"execution_count": null,
2023-04-26 17:00:23 +00:00
"metadata": {},
"outputs": [],
2023-04-26 17:00:23 +00:00
"source": [
"import pypsa\n",
"from pathlib import Path\n",
"import pandas as pd\n",
"\n",
"data_path = Path.cwd() / \"..\" / \"..\" / \"generation_data\"\n",
2023-04-26 17:00:23 +00:00
"\n",
"n = pypsa.Network(data_path / \"elec_s_37.nc\")"
]
},
{
"cell_type": "code",
"execution_count": null,
2023-04-26 17:00:23 +00:00
"metadata": {},
"outputs": [],
2023-04-26 17:00:23 +00:00
"source": [
"n.generators.carrier.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
2023-04-26 17:00:23 +00:00
"generation_mapper_entsoe = {\n",
" \"Wind Onshore\": [\"Wind Onshore\"],\n",
" \"Wind Offshore\": [\"Wind Offshore\"],\n",
" \"Solar\": [\"Solar\"],\n",
" \"Biomass\": [\"Waste\", \"Biomass\"],\n",
" \"Hydro\": [\"Hydro Pumped Storage\", \"Hydro Water Reservoir\"],\n",
" \"Run of River\": [\"Hydro Run-of-river and poundage\"],\n",
" \"Nuclear\": [\"Nuclear\"],\n",
" \"Gas\": [\"Fossil Coal-derived gas\", \"Fossil Gas\"],\n",
" \"Oil\": [\"Fossil Oil\"],\n",
" \"Lignite\": [\"Fossil Brown coal/Lignite\"],\n",
" \"Hard Coal\": [\"Fossil Hard coal\"],\n",
"}\n",
"\n",
2023-04-26 17:00:23 +00:00
"generation_mapper_pypsa = {\n",
" \"Solar\": [\"solar\"],\n",
" \"Wind Onshore\": [\"onwind\"],\n",
" \"Wind Offshore\": [\"offwind\"],\n",
" \"Biomass\": [\"biomass\"],\n",
" \"Hydro\": [],\n",
" \"Run of River\": [\"ror\"],\n",
" \"Nuclear\": [\"nuclear\"],\n",
" \"Gas\": [\"OCGT\", \"CCGT\"],\n",
" \"Oil\": [\"oil\"],\n",
" \"Lignite\": [\"lignite\"],\n",
" \"Hard Coal\": [\"coal\"],\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
2023-04-26 17:00:23 +00:00
"\n",
"with open(data_path / \"generation_mapper_entsoe.json\", \"w\") as f:\n",
" json.dump(generation_mapper_entsoe, f)\n",
"\n",
"with open(data_path / \"generation_mapper_pypsa.json\", \"w\") as f:\n",
" json.dump(generation_mapper_pypsa, f)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Harmonize Data"
]
},
{
"cell_type": "code",
"execution_count": null,
2023-04-26 17:00:23 +00:00
"metadata": {},
"outputs": [],
2023-04-26 17:00:23 +00:00
"source": [
"import matplotlib.pyplot as plt\n",
"import pycountry\n",
"\n",
2023-04-26 17:00:23 +00:00
"plt.style.use(\"ggplot\")\n",
"\n",
"import os\n",
"from pathlib import Path\n",
"from pprint import pprint\n",
"import pandas as pd\n",
"import numpy as np\n",
"import json\n",
"import warnings\n",
"import datetime\n",
"\n",
2023-04-26 17:00:23 +00:00
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"\n",
2023-04-26 17:00:23 +00:00
"def intersection(a, b):\n",
" return [entry for entry in list(set(b)) if entry in a]\n",
"\n",
"\n",
"def harmonise_data(freq=\"h\", quiet=True):\n",
" data_path = Path.cwd() / \"..\" / \"..\" / \"generation_data\"\n",
" target_path = Path.cwd() / \"..\" / \"..\" / \"harmonised_generation_data\"\n",
"\n",
" with open(data_path / \"generation_mapper_entsoe.json\", \"r\") as f:\n",
" mapper_to_entsoe = json.load(f)\n",
" pprint(mapper_to_entsoe)\n",
"\n",
2023-04-26 17:00:23 +00:00
" def is_country_generation(filename):\n",
" return filename.endswith(\".csv\") and len(filename) == 6\n",
2023-04-26 17:00:23 +00:00
"\n",
" generation_files = [\n",
" file for file in os.listdir(data_path) if is_country_generation(file)\n",
" ]\n",
2023-04-26 17:00:23 +00:00
" # generation_files = [\"DE.csv\"]\n",
"\n",
" for i, gen in enumerate(generation_files):\n",
" print(f\"Gathering techs from {gen[:2]}; {i+1}/{len(generation_files)}\")\n",
"\n",
" gen = pd.read_csv(data_path / gen, parse_dates=True, index_col=0)\n",
"\n",
" if not isinstance(gen.index[0], datetime.datetime):\n",
" gen = gen.iloc[1:].fillna(0).astype(np.float)\n",
2023-04-26 17:00:23 +00:00
"\n",
" def make_tz_time(time):\n",
" return pd.Timestamp(time).tz_convert(\"utc\")\n",
"\n",
" gen.index = pd.Series(gen.index).apply(lambda time: make_tz_time(time))\n",
"\n",
" clean_generation = pd.DataFrame(index=gen.index)\n",
"\n",
" taken_generators = list()\n",
" for generator, entsoe_names in mapper_to_entsoe.items():\n",
" inter = intersection(entsoe_names, gen.columns)\n",
" taken_generators += inter\n",
" if len(inter) > 0:\n",
" clean_generation[generator] = gen[inter].sum(axis=1)\n",
" else:\n",
" clean_generation[generator] = np.zeros(gen.shape[0])\n",
"\n",
" if not quiet:\n",
" fig, ax = plt.subplots(1, 2, figsize=(16, 4))\n",
"\n",
" clean_generation.iloc[:800].plot(ax=ax[0])\n",
2023-04-26 17:00:23 +00:00
"\n",
" totals = clean_generation.sum(axis=0).sort_values(ascending=False)\n",
"\n",
" shares = totals / totals.sum()\n",
" ax[1].pie(\n",
" totals.values,\n",
" labels=[\n",
" f\"{name}: {np.around(share*100, decimals=2)}%, {np.around(value/4*1e-6, decimals=2)} TWh\"\n",
" for name, share, value in zip(\n",
" totals.index, shares.tolist(), totals.values\n",
" )\n",
" ],\n",
" )\n",
" ax[0].set_title(\n",
" pycountry.countries.get(alpha_2=generation_files[i][:2].upper()).name\n",
" )\n",
2023-04-26 17:00:23 +00:00
"\n",
" plt.show()\n",
"\n",
2023-04-26 17:00:23 +00:00
" clean_generation = clean_generation.resample(freq).mean()\n",
" clean_generation.columns = [col + \" (MWh)\" for col in clean_generation.columns]\n",
" clean_generation.to_csv(target_path / (\"prepared_\" + generation_files[i]))\n",
2023-04-26 17:00:23 +00:00
"\n",
" # return gen, clean_generation, taken_generators\n",
"\n",
"\n",
"# gen, proc, taken = harmonise_data()\n",
"harmonise_data(quiet=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
2023-04-26 17:00:23 +00:00
"metadata": {},
"outputs": [],
2023-04-26 17:00:23 +00:00
"source": [
"print(proc.sum().sum() / gen.sum().sum())\n",
"\n",
"print(\n",
" proc.sum().sort_values(ascending=False).round(decimals=3) * 100 / proc.sum().sum()\n",
")"
2023-04-26 17:00:23 +00:00
]
},
{
"cell_type": "code",
"execution_count": null,
2023-04-26 17:00:23 +00:00
"metadata": {},
"outputs": [],
2023-04-26 17:00:23 +00:00
"source": [
"gen.drop(columns=taken).sum().sort_values(ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
2023-04-26 17:00:23 +00:00
"def make_tz_time(time):\n",
" return pd.Timestamp(time).tz_convert(\"utc\")\n",
"\n",
"\n",
2023-04-26 17:00:23 +00:00
"# type(pd.Timestamp(gen.index[4]).tz_convert(\"utc\"))\n",
"\n",
"index = pd.Series(gen.index).apply(lambda time: make_tz_time(time))\n",
"\n",
"# gen.index[4].tz_convert(\"utc\")\n",
2023-04-26 17:00:23 +00:00
"\n",
"print(index[:10])\n",
"print(pd.DatetimeIndex(index))"
]
2023-04-26 17:00:23 +00:00
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Day-Ahead Prices "
]
},
{
"cell_type": "code",
"execution_count": null,
2023-04-26 17:00:23 +00:00
"metadata": {},
"outputs": [],
2023-04-26 17:00:23 +00:00
"source": [
"from entsoe import EntsoePandasClient\n",
"from entsoe.exceptions import NoMatchingDataError\n",
"import pandas as pd\n",
"from pathlib import Path\n",
"import os\n",
"\n",
"Path.cwd()\n",
"os.listdir()\n",
"\n",
"api_key = \"b45ffb86-fea3-49e7-9c01-9ad6429d3ec9\"\n",
"client = EntsoePandasClient(api_key=api_key)\n",
"\n",
"start = pd.Timestamp(\"20190101\", tz=\"Europe/Brussels\")\n",
"end = pd.Timestamp(\"20200101\", tz=\"Europe/Brussels\")\n",
"\n",
"countries = [\n",
" \"AL\",\n",
" \"AT\",\n",
" \"BA\",\n",
" \"BE\",\n",
" \"BG\",\n",
" \"CH\",\n",
" \"CZ\",\n",
" \"DE\",\n",
" \"DK\",\n",
" \"EE\",\n",
" \"ES\",\n",
" \"FI\",\n",
" \"FR\",\n",
" \"GB\",\n",
" \"GR\",\n",
" \"HR\",\n",
" \"HU\",\n",
" \"IE\",\n",
" \"IT\",\n",
" \"LT\",\n",
" \"LU\",\n",
" \"LV\",\n",
" \"ME\",\n",
" \"MK\",\n",
" \"NL\",\n",
" \"NO\",\n",
" \"PL\",\n",
" \"PT\",\n",
" \"RO\",\n",
" \"RS\",\n",
" \"SE\",\n",
" \"SI\",\n",
" \"SK\",\n",
"]\n",
2023-04-26 17:00:23 +00:00
"\n",
"for country in countries:\n",
" print(f\"Trying country {country}.\")\n",
"\n",
" country_code = country\n",
"\n",
" try:\n",
" day_ahead_prices_country = client.query_day_ahead_prices(\n",
" country_code, start=start, end=end\n",
" )\n",
2023-04-26 17:00:23 +00:00
" except NoMatchingDataError:\n",
" continue\n",
"\n",
" day_ahead_prices_country.to_csv(\n",
" Path.cwd() / \"..\" / \"..\" / \"price_data\" / f\"{country}.csv\"\n",
" )\n",
2023-04-26 17:00:23 +00:00
"\n",
"# generation = pd.concat(generation, keys=countries, axis=1)\n",
"# client.query_crossborder_flows(country_code_from, country_code_to, start, end)"
2023-04-26 17:00:23 +00:00
]
},
{
"cell_type": "code",
"execution_count": null,
2023-04-26 17:00:23 +00:00
"metadata": {},
"outputs": [],
2023-04-26 17:00:23 +00:00
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "",
"language": "python",
"name": ""
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2023-04-26 17:00:23 +00:00
"version": "3.10.10"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}