pypsa-eur/notebooks/entsoe_data.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from entsoe import EntsoePandasClient\n",
    "from entsoe.exceptions import NoMatchingDataError\n",
    "import pandas as pd\n",
    "from pathlib import Path\n",
    "import os\n",
    "\n",
    "Path.cwd()\n",
    "os.listdir()\n",
    "\n",
    "api_key = \"b45ffb86-fea3-49e7-9c01-9ad6429d3ec9\"\n",
    "client = EntsoePandasClient(api_key=api_key)\n",
    "\n",
    "start = pd.Timestamp(\"20190101\", tz=\"Europe/Brussels\")\n",
    "end = pd.Timestamp(\"20200101\", tz=\"Europe/Brussels\")\n",
    "\n",
    "countries = [\n",
    "    \"AL\",\n",
    "    \"AT\",\n",
    "    \"BA\",\n",
    "    \"BE\",\n",
    "    \"BG\",\n",
    "    \"CH\",\n",
    "    \"CZ\",\n",
    "    \"DE\",\n",
    "    \"DK\",\n",
    "    \"EE\",\n",
    "    \"ES\",\n",
    "    \"FI\",\n",
    "    \"FR\",\n",
    "    \"GB\",\n",
    "    \"GR\",\n",
    "    \"HR\",\n",
    "    \"HU\",\n",
    "    \"IE\",\n",
    "    \"IT\",\n",
    "    \"LT\",\n",
    "    \"LU\",\n",
    "    \"LV\",\n",
    "    \"ME\",\n",
    "    \"MK\",\n",
    "    \"NL\",\n",
    "    \"NO\",\n",
    "    \"PL\",\n",
    "    \"PT\",\n",
    "    \"RO\",\n",
    "    \"RS\",\n",
    "    \"SE\",\n",
    "    \"SI\",\n",
    "    \"SK\",\n",
    "]\n",
    "\n",
    "for country in countries:\n",
    "    print(f\"Trying country {country}.\")\n",
    "\n",
    "    country_code = country\n",
    "    # generation_country = client.query_generation(country_code, start=start,end=end, psr_type=None)\n",
    "    try:\n",
    "        gen = client.query_generation(country, start=start, end=end, psr_type=None)\n",
    "    except NoMatchingDataError:\n",
    "        continue\n",
    "\n",
    "    gen.to_csv(Path.cwd() / \"generation_data\" / f\"{country}.csv\")\n",
    "\n",
    "    # generation.append(generation_country)\n",
    "    # day_ahead_prices_country = client.query_day_ahead_prices(country_code, start, end)\n",
    "\n",
    "# generation = pd.concat(generation, keys=countries, axis=1)\n",
    "\n",
    "# client.query_crossborder_flows(country_code_from, country_code_to, start, end)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gen = generation_country"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gen.transpose().xs(\"Actual Consumption\", level=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from pathlib import Path\n",
    "import pandas as pd\n",
    "\n",
    "data_path = Path.cwd() / \"..\" / \"..\" / \"generation_data\"\n",
    "\n",
    "gen = pd.read_csv(data_path / \"DE.csv\", parse_dates=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gen = pd.read_csv(data_path / \"AT.csv\", parse_dates=True, index_col=0).iloc[1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gen.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# \"Gas\": [\"Fossil Coalderived gas\", \"Fossil Gas\"],\n",
    "gen[[\"Fossil Coalderived gas\", \"Fossil Gas\"]].astype(float).sum()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Scout datasets available in pypsa-eur and entsoe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import re\n",
    "import warnings\n",
    "\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "\n",
    "\n",
    "def scout_techs():\n",
    "    techs = set()\n",
    "    valid_consumption_types = [\"Hydro Pumped Storage\"]\n",
    "\n",
    "    def two_columns_analysis(df, all_techs, country_techs, col_types):\n",
    "        for i, (col, col_type) in enumerate(zip(country_techs, col_types)):\n",
    "            col = re.sub(\"[^A-Za-z ]+\", \"\", col)\n",
    "\n",
    "            if col_type == \"Actual Consumption\":\n",
    "                if not col in valid_consumption_types:\n",
    "                    continue\n",
    "\n",
    "                if col in valid_consumption_types:\n",
    "                    all_techs.add(col + \" Charging\")\n",
    "\n",
    "            else:\n",
    "                all_techs.add(col)\n",
    "\n",
    "        return all_techs\n",
    "\n",
    "    def is_country_generation(filename):\n",
    "        return filename.endswith(\".csv\") and len(filename) == 6\n",
    "\n",
    "    generation_files = [\n",
    "        file for file in os.listdir(data_path) if is_country_generation(file)\n",
    "    ]\n",
    "\n",
    "    for i, country in enumerate(generation_files):\n",
    "        print(f\"Gathering techs from {country[:2]}; {i+1}/{len(generation_files)}\")\n",
    "\n",
    "        gen = pd.read_csv(data_path / \"DE.csv\", parse_dates=True, index_col=0)\n",
    "\n",
    "        if np.isnan(gen.index[0]):\n",
    "            col_types = gen.iloc[0].tolist()\n",
    "            gen = gen.iloc[1:]\n",
    "        else:\n",
    "            col_types = [\"Actual Aggregated\" for _ in range(gen.shape[1])]\n",
    "\n",
    "        techs = two_columns_analysis(gen.iloc[1:], techs, gen.columns, col_types)\n",
    "\n",
    "    return techs\n",
    "\n",
    "\n",
    "techs = scout_techs()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "techs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pypsa\n",
    "from pathlib import Path\n",
    "import pandas as pd\n",
    "\n",
    "data_path = Path.cwd() / \"..\" / \"..\" / \"generation_data\"\n",
    "\n",
    "n = pypsa.Network(data_path / \"elec_s_37.nc\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "n.generators.carrier.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "generation_mapper_entsoe = {\n",
    "    \"Wind Onshore\": [\"Wind Onshore\"],\n",
    "    \"Wind Offshore\": [\"Wind Offshore\"],\n",
    "    \"Solar\": [\"Solar\"],\n",
    "    \"Biomass\": [\"Waste\", \"Biomass\"],\n",
    "    \"Hydro\": [\"Hydro Pumped Storage\", \"Hydro Water Reservoir\"],\n",
    "    \"Run of River\": [\"Hydro Run-of-river and poundage\"],\n",
    "    \"Nuclear\": [\"Nuclear\"],\n",
    "    \"Gas\": [\"Fossil Coal-derived gas\", \"Fossil Gas\"],\n",
    "    \"Oil\": [\"Fossil Oil\"],\n",
    "    \"Lignite\": [\"Fossil Brown coal/Lignite\"],\n",
    "    \"Hard Coal\": [\"Fossil Hard coal\"],\n",
    "}\n",
    "\n",
    "generation_mapper_pypsa = {\n",
    "    \"Solar\": [\"solar\"],\n",
    "    \"Wind Onshore\": [\"onwind\"],\n",
    "    \"Wind Offshore\": [\"offwind\"],\n",
    "    \"Biomass\": [\"biomass\"],\n",
    "    \"Hydro\": [],\n",
    "    \"Run of River\": [\"ror\"],\n",
    "    \"Nuclear\": [\"nuclear\"],\n",
    "    \"Gas\": [\"OCGT\", \"CCGT\"],\n",
    "    \"Oil\": [\"oil\"],\n",
    "    \"Lignite\": [\"lignite\"],\n",
    "    \"Hard Coal\": [\"coal\"],\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "with open(data_path / \"generation_mapper_entsoe.json\", \"w\") as f:\n",
    "    json.dump(generation_mapper_entsoe, f)\n",
    "\n",
    "with open(data_path / \"generation_mapper_pypsa.json\", \"w\") as f:\n",
    "    json.dump(generation_mapper_pypsa, f)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Harmonize Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import pycountry\n",
    "\n",
    "plt.style.use(\"ggplot\")\n",
    "\n",
    "import os\n",
    "from pathlib import Path\n",
    "from pprint import pprint\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import json\n",
    "import warnings\n",
    "import datetime\n",
    "\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "\n",
    "\n",
    "def intersection(a, b):\n",
    "    return [entry for entry in list(set(b)) if entry in a]\n",
    "\n",
    "\n",
    "def harmonise_data(freq=\"h\", quiet=True):\n",
    "    data_path = Path.cwd() / \"..\" / \"..\" / \"generation_data\"\n",
    "    target_path = Path.cwd() / \"..\" / \"..\" / \"harmonised_generation_data\"\n",
    "\n",
    "    with open(data_path / \"generation_mapper_entsoe.json\", \"r\") as f:\n",
    "        mapper_to_entsoe = json.load(f)\n",
    "    pprint(mapper_to_entsoe)\n",
    "\n",
    "    def is_country_generation(filename):\n",
    "        return filename.endswith(\".csv\") and len(filename) == 6\n",
    "\n",
    "    generation_files = [\n",
    "        file for file in os.listdir(data_path) if is_country_generation(file)\n",
    "    ]\n",
    "    # generation_files = [\"DE.csv\"]\n",
    "\n",
    "    for i, gen in enumerate(generation_files):\n",
    "        print(f\"Gathering techs from {gen[:2]}; {i+1}/{len(generation_files)}\")\n",
    "\n",
    "        gen = pd.read_csv(data_path / gen, parse_dates=True, index_col=0)\n",
    "\n",
    "        if not isinstance(gen.index[0], datetime.datetime):\n",
    "            gen = gen.iloc[1:].fillna(0).astype(np.float)\n",
    "\n",
    "        def make_tz_time(time):\n",
    "            return pd.Timestamp(time).tz_convert(\"utc\")\n",
    "\n",
    "        gen.index = pd.Series(gen.index).apply(lambda time: make_tz_time(time))\n",
    "\n",
    "        clean_generation = pd.DataFrame(index=gen.index)\n",
    "\n",
    "        taken_generators = list()\n",
    "        for generator, entsoe_names in mapper_to_entsoe.items():\n",
    "            inter = intersection(entsoe_names, gen.columns)\n",
    "            taken_generators += inter\n",
    "            if len(inter) > 0:\n",
    "                clean_generation[generator] = gen[inter].sum(axis=1)\n",
    "            else:\n",
    "                clean_generation[generator] = np.zeros(gen.shape[0])\n",
    "\n",
    "        if not quiet:\n",
    "            fig, ax = plt.subplots(1, 2, figsize=(16, 4))\n",
    "\n",
    "            clean_generation.iloc[:800].plot(ax=ax[0])\n",
    "\n",
    "            totals = clean_generation.sum(axis=0).sort_values(ascending=False)\n",
    "\n",
    "            shares = totals / totals.sum()\n",
    "            ax[1].pie(\n",
    "                totals.values,\n",
    "                labels=[\n",
    "                    f\"{name}: {np.around(share*100, decimals=2)}%, {np.around(value/4*1e-6, decimals=2)} TWh\"\n",
    "                    for name, share, value in zip(\n",
    "                        totals.index, shares.tolist(), totals.values\n",
    "                    )\n",
    "                ],\n",
    "            )\n",
    "            ax[0].set_title(\n",
    "                pycountry.countries.get(alpha_2=generation_files[i][:2].upper()).name\n",
    "            )\n",
    "\n",
    "            plt.show()\n",
    "\n",
    "        clean_generation = clean_generation.resample(freq).mean()\n",
    "        clean_generation.columns = [col + \" (MWh)\" for col in clean_generation.columns]\n",
    "        clean_generation.to_csv(target_path / (\"prepared_\" + generation_files[i]))\n",
    "\n",
    "    # return gen, clean_generation, taken_generators\n",
    "\n",
    "\n",
    "# gen, proc, taken = harmonise_data()\n",
    "harmonise_data(quiet=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(proc.sum().sum() / gen.sum().sum())\n",
    "\n",
    "print(\n",
    "    proc.sum().sort_values(ascending=False).round(decimals=3) * 100 / proc.sum().sum()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gen.drop(columns=taken).sum().sort_values(ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_tz_time(time):\n",
    "    return pd.Timestamp(time).tz_convert(\"utc\")\n",
    "\n",
    "\n",
    "# type(pd.Timestamp(gen.index[4]).tz_convert(\"utc\"))\n",
    "\n",
    "index = pd.Series(gen.index).apply(lambda time: make_tz_time(time))\n",
    "\n",
    "# gen.index[4].tz_convert(\"utc\")\n",
    "\n",
    "print(index[:10])\n",
    "print(pd.DatetimeIndex(index))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Day-Ahead Prices "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from entsoe import EntsoePandasClient\n",
    "from entsoe.exceptions import NoMatchingDataError\n",
    "import pandas as pd\n",
    "from pathlib import Path\n",
    "import os\n",
    "\n",
    "Path.cwd()\n",
    "os.listdir()\n",
    "\n",
    "api_key = \"b45ffb86-fea3-49e7-9c01-9ad6429d3ec9\"\n",
    "client = EntsoePandasClient(api_key=api_key)\n",
    "\n",
    "start = pd.Timestamp(\"20190101\", tz=\"Europe/Brussels\")\n",
    "end = pd.Timestamp(\"20200101\", tz=\"Europe/Brussels\")\n",
    "\n",
    "countries = [\n",
    "    \"AL\",\n",
    "    \"AT\",\n",
    "    \"BA\",\n",
    "    \"BE\",\n",
    "    \"BG\",\n",
    "    \"CH\",\n",
    "    \"CZ\",\n",
    "    \"DE\",\n",
    "    \"DK\",\n",
    "    \"EE\",\n",
    "    \"ES\",\n",
    "    \"FI\",\n",
    "    \"FR\",\n",
    "    \"GB\",\n",
    "    \"GR\",\n",
    "    \"HR\",\n",
    "    \"HU\",\n",
    "    \"IE\",\n",
    "    \"IT\",\n",
    "    \"LT\",\n",
    "    \"LU\",\n",
    "    \"LV\",\n",
    "    \"ME\",\n",
    "    \"MK\",\n",
    "    \"NL\",\n",
    "    \"NO\",\n",
    "    \"PL\",\n",
    "    \"PT\",\n",
    "    \"RO\",\n",
    "    \"RS\",\n",
    "    \"SE\",\n",
    "    \"SI\",\n",
    "    \"SK\",\n",
    "]\n",
    "\n",
    "for country in countries:\n",
    "    print(f\"Trying country {country}.\")\n",
    "\n",
    "    country_code = country\n",
    "\n",
    "    try:\n",
    "        day_ahead_prices_country = client.query_day_ahead_prices(\n",
    "            country_code, start=start, end=end\n",
    "        )\n",
    "    except NoMatchingDataError:\n",
    "        continue\n",
    "\n",
    "    day_ahead_prices_country.to_csv(\n",
    "        Path.cwd() / \"..\" / \"..\" / \"price_data\" / f\"{country}.csv\"\n",
    "    )\n",
    "\n",
    "# generation = pd.concat(generation, keys=countries, axis=1)\n",
    "# client.query_crossborder_flows(country_code_from, country_code_to, start, end)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "",
   "language": "python",
   "name": ""
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.10"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}