In [None]:
from entsoe import EntsoePandasClient
from entsoe.exceptions import NoMatchingDataError
import pandas as pd
from pathlib import Path
import os

Path.cwd()
os.listdir()

api_key = "b45ffb86-fea3-49e7-9c01-9ad6429d3ec9"
client = EntsoePandasClient(api_key=api_key)

start = pd.Timestamp("20190101", tz="Europe/Brussels")
end = pd.Timestamp("20200101", tz="Europe/Brussels")

countries = [
    "AL",
    "AT",
    "BA",
    "BE",
    "BG",
    "CH",
    "CZ",
    "DE",
    "DK",
    "EE",
    "ES",
    "FI",
    "FR",
    "GB",
    "GR",
    "HR",
    "HU",
    "IE",
    "IT",
    "LT",
    "LU",
    "LV",
    "ME",
    "MK",
    "NL",
    "NO",
    "PL",
    "PT",
    "RO",
    "RS",
    "SE",
    "SI",
    "SK",
]

for country in countries:
    print(f"Trying country {country}.")

    country_code = country
    # generation_country = client.query_generation(country_code, start=start,end=end, psr_type=None)
    try:
        gen = client.query_generation(country, start=start, end=end, psr_type=None)
    except NoMatchingDataError:
        continue

    gen.to_csv(Path.cwd() / "generation_data" / f"{country}.csv")

    # generation.append(generation_country)
    # day_ahead_prices_country = client.query_day_ahead_prices(country_code, start, end)

# generation = pd.concat(generation, keys=countries, axis=1)

# client.query_crossborder_flows(country_code_from, country_code_to, start, end)

In [None]:
gen = generation_country

In [None]:
gen.transpose().xs("Actual Consumption", level=1)

In [None]:
import os
from pathlib import Path
import pandas as pd

data_path = Path.cwd() / ".." / ".." / "generation_data"

gen = pd.read_csv(data_path / "DE.csv", parse_dates=True)

In [None]:
gen = pd.read_csv(data_path / "AT.csv", parse_dates=True, index_col=0).iloc[1:]

In [None]:
gen.columns

In [None]:
# "Gas": ["Fossil Coalderived gas", "Fossil Gas"],
gen[["Fossil Coalderived gas", "Fossil Gas"]].astype(float).sum()

##### Scout datasets available in pypsa-eur and entsoe

In [None]:
import numpy as np
import re
import warnings

warnings.filterwarnings("ignore")


def scout_techs():
    techs = set()
    valid_consumption_types = ["Hydro Pumped Storage"]

    def two_columns_analysis(df, all_techs, country_techs, col_types):
        for i, (col, col_type) in enumerate(zip(country_techs, col_types)):
            col = re.sub("[^A-Za-z ]+", "", col)

            if col_type == "Actual Consumption":
                if not col in valid_consumption_types:
                    continue

                if col in valid_consumption_types:
                    all_techs.add(col + " Charging")

            else:
                all_techs.add(col)

        return all_techs

    def is_country_generation(filename):
        return filename.endswith(".csv") and len(filename) == 6

    generation_files = [
        file for file in os.listdir(data_path) if is_country_generation(file)
    ]

    for i, country in enumerate(generation_files):
        print(f"Gathering techs from {country[:2]}; {i+1}/{len(generation_files)}")

        gen = pd.read_csv(data_path / "DE.csv", parse_dates=True, index_col=0)

        if np.isnan(gen.index[0]):
            col_types = gen.iloc[0].tolist()
            gen = gen.iloc[1:]
        else:
            col_types = ["Actual Aggregated" for _ in range(gen.shape[1])]

        techs = two_columns_analysis(gen.iloc[1:], techs, gen.columns, col_types)

    return techs


techs = scout_techs()

In [None]:
techs

In [None]:
import pypsa
from pathlib import Path
import pandas as pd

data_path = Path.cwd() / ".." / ".." / "generation_data"

n = pypsa.Network(data_path / "elec_s_37.nc")

In [None]:
n.generators.carrier.value_counts()

In [None]:
generation_mapper_entsoe = {
    "Wind Onshore": ["Wind Onshore"],
    "Wind Offshore": ["Wind Offshore"],
    "Solar": ["Solar"],
    "Biomass": ["Waste", "Biomass"],
    "Hydro": ["Hydro Pumped Storage", "Hydro Water Reservoir"],
    "Run of River": ["Hydro Run-of-river and poundage"],
    "Nuclear": ["Nuclear"],
    "Gas": ["Fossil Coal-derived gas", "Fossil Gas"],
    "Oil": ["Fossil Oil"],
    "Lignite": ["Fossil Brown coal/Lignite"],
    "Hard Coal": ["Fossil Hard coal"],
}

generation_mapper_pypsa = {
    "Solar": ["solar"],
    "Wind Onshore": ["onwind"],
    "Wind Offshore": ["offwind"],
    "Biomass": ["biomass"],
    "Hydro": [],
    "Run of River": ["ror"],
    "Nuclear": ["nuclear"],
    "Gas": ["OCGT", "CCGT"],
    "Oil": ["oil"],
    "Lignite": ["lignite"],
    "Hard Coal": ["coal"],
}

In [None]:
import json

with open(data_path / "generation_mapper_entsoe.json", "w") as f:
    json.dump(generation_mapper_entsoe, f)

with open(data_path / "generation_mapper_pypsa.json", "w") as f:
    json.dump(generation_mapper_pypsa, f)

##### Harmonize Data

In [None]:
import matplotlib.pyplot as plt
import pycountry

plt.style.use("ggplot")

import os
from pathlib import Path
from pprint import pprint
import pandas as pd
import numpy as np
import json
import warnings
import datetime

warnings.filterwarnings("ignore")


def intersection(a, b):
    return [entry for entry in list(set(b)) if entry in a]


def harmonise_data(freq="h", quiet=True):
    data_path = Path.cwd() / ".." / ".." / "generation_data"
    target_path = Path.cwd() / ".." / ".." / "harmonised_generation_data"

    with open(data_path / "generation_mapper_entsoe.json", "r") as f:
        mapper_to_entsoe = json.load(f)
    pprint(mapper_to_entsoe)

    def is_country_generation(filename):
        return filename.endswith(".csv") and len(filename) == 6

    generation_files = [
        file for file in os.listdir(data_path) if is_country_generation(file)
    ]
    # generation_files = ["DE.csv"]

    for i, gen in enumerate(generation_files):
        print(f"Gathering techs from {gen[:2]}; {i+1}/{len(generation_files)}")

        gen = pd.read_csv(data_path / gen, parse_dates=True, index_col=0)

        if not isinstance(gen.index[0], datetime.datetime):
            gen = gen.iloc[1:].fillna(0).astype(np.float)

        def make_tz_time(time):
            return pd.Timestamp(time).tz_convert("utc")

        gen.index = pd.Series(gen.index).apply(lambda time: make_tz_time(time))

        clean_generation = pd.DataFrame(index=gen.index)

        taken_generators = list()
        for generator, entsoe_names in mapper_to_entsoe.items():
            inter = intersection(entsoe_names, gen.columns)
            taken_generators += inter
            if len(inter) > 0:
                clean_generation[generator] = gen[inter].sum(axis=1)
            else:
                clean_generation[generator] = np.zeros(gen.shape[0])

        if not quiet:
            fig, ax = plt.subplots(1, 2, figsize=(16, 4))

            clean_generation.iloc[:800].plot(ax=ax[0])

            totals = clean_generation.sum(axis=0).sort_values(ascending=False)

            shares = totals / totals.sum()
            ax[1].pie(
                totals.values,
                labels=[
                    f"{name}: {np.around(share*100, decimals=2)}%, {np.around(value/4*1e-6, decimals=2)} TWh"
                    for name, share, value in zip(
                        totals.index, shares.tolist(), totals.values
                    )
                ],
            )
            ax[0].set_title(
                pycountry.countries.get(alpha_2=generation_files[i][:2].upper()).name
            )

            plt.show()

        clean_generation = clean_generation.resample(freq).mean()
        clean_generation.columns = [col + " (MWh)" for col in clean_generation.columns]
        clean_generation.to_csv(target_path / ("prepared_" + generation_files[i]))

    # return gen, clean_generation, taken_generators


# gen, proc, taken = harmonise_data()
harmonise_data(quiet=False)

In [None]:
print(proc.sum().sum() / gen.sum().sum())

print(
    proc.sum().sort_values(ascending=False).round(decimals=3) * 100 / proc.sum().sum()
)

In [None]:
gen.drop(columns=taken).sum().sort_values(ascending=False)

In [None]:
def make_tz_time(time):
    return pd.Timestamp(time).tz_convert("utc")


# type(pd.Timestamp(gen.index[4]).tz_convert("utc"))

index = pd.Series(gen.index).apply(lambda time: make_tz_time(time))

# gen.index[4].tz_convert("utc")

print(index[:10])
print(pd.DatetimeIndex(index))

##### Day-Ahead Prices 

In [None]:
from entsoe import EntsoePandasClient
from entsoe.exceptions import NoMatchingDataError
import pandas as pd
from pathlib import Path
import os

Path.cwd()
os.listdir()

api_key = "b45ffb86-fea3-49e7-9c01-9ad6429d3ec9"
client = EntsoePandasClient(api_key=api_key)

start = pd.Timestamp("20190101", tz="Europe/Brussels")
end = pd.Timestamp("20200101", tz="Europe/Brussels")

countries = [
    "AL",
    "AT",
    "BA",
    "BE",
    "BG",
    "CH",
    "CZ",
    "DE",
    "DK",
    "EE",
    "ES",
    "FI",
    "FR",
    "GB",
    "GR",
    "HR",
    "HU",
    "IE",
    "IT",
    "LT",
    "LU",
    "LV",
    "ME",
    "MK",
    "NL",
    "NO",
    "PL",
    "PT",
    "RO",
    "RS",
    "SE",
    "SI",
    "SK",
]

for country in countries:
    print(f"Trying country {country}.")

    country_code = country

    try:
        day_ahead_prices_country = client.query_day_ahead_prices(
            country_code, start=start, end=end
        )
    except NoMatchingDataError:
        continue

    day_ahead_prices_country.to_csv(
        Path.cwd() / ".." / ".." / "price_data" / f"{country}.csv"
    )

# generation = pd.concat(generation, keys=countries, axis=1)
# client.query_crossborder_flows(country_code_from, country_code_to, start, end)