import pandas as pd
import geopandas as gpd

idx = pd.IndexSlice

#translations for Eurostat
country_to_code = {
'EU28' : 'EU',
'EA19' : 'EA',
'Belgium' : 'BE',
'Bulgaria' : 'BG',
'Czech Republic' : 'CZ',
'Denmark' : 'DK',
'Germany' : 'DE',
'Estonia' : 'EE',
'Ireland' : 'IE',
'Greece' : 'GR',
'Spain' : 'ES',
'France' : 'FR',
'Croatia' : 'HR',
'Italy' : 'IT',
'Cyprus' : 'CY',
'Latvia' : 'LV',
'Lithuania' : 'LT',
'Luxembourg' : 'LU',
'Hungary' : 'HU',
'Malta' : 'MA',
'Netherlands' : 'NL',
'Austria' : 'AT',
'Poland' : 'PL',
'Portugal' : 'PT',
'Romania' : 'RO',
'Slovenia' : 'SI',
'Slovakia' : 'SK',
'Finland' : 'FI',
'Sweden' : 'SE',
'United Kingdom' : 'GB',
'Iceland' : 'IS',
'Norway' : 'NO',
'Montenegro' : 'ME',
'FYR of Macedonia' : 'MK',
'Albania' : 'AL',
'Serbia' : 'RS',
'Turkey' : 'TU',
'Bosnia and Herzegovina' : 'BA',
'Kosovo\n(UNSCR 1244/99)' : 'KO',  #2017 version
'Kosovo\n(under United Nations Security Council Resolution 1244/99)' : 'KO',  #2016 version
'Moldova' : 'MO',
'Ukraine' : 'UK',
'Switzerland' : 'CH',
}


non_EU = ['NO', 'CH', 'ME', 'MK', 'RS', 'BA', 'AL']

rename = {"GR" : "EL",
          "GB" : "UK"}

eu28 = ['FR', 'DE', 'GB', 'IT', 'ES', 'PL', 'SE', 'NL', 'BE', 'FI', 'CZ',
        'DK', 'PT', 'RO', 'AT', 'BG', 'EE', 'GR', 'LV',
        'HU', 'IE', 'SK', 'LT', 'HR', 'LU', 'SI'] + ['CY','MT']

eu28_eea = eu28[:]
eu28_eea.remove("GB")
eu28_eea.append("UK")


def build_eurostat(year):
    """Return multi-index for all countries' energy data in TWh/a."""

    stats_from_year = 2016

    fns = {2016: "data/eurostat-energy_balances-june_2016_edition/{year}-Energy-Balances-June2016edition.xlsx",
           2017: "data/eurostat-energy_balances-june_2017_edition/{year}-ENERGY-BALANCES-June2017edition.xlsx"}

    #2016 includes BA, 2017 doesn't

    #with sheet as None, an ordered dictionary of all sheets is returned
    dfs = pd.read_excel(fns[stats_from_year].format(year=year),
                        None,
                        skiprows=1,
                        index_col=list(range(4)))


    #sorted_index necessary for slicing
    df = pd.concat({country_to_code[df.columns[0]] : df for ct,df in dfs.items()},sort=True).sort_index()

    #drop non-numeric columns; convert ktoe/a to TWh/a
    return df.drop(df.columns[df.dtypes != float],axis=1)*11.63/1e3


def build_swiss(year):

    fn = "data/switzerland-sfoe/switzerland-new_format.csv"

    #convert PJ/a to TWh/a
    return (pd.read_csv(fn,index_col=list(range(2)))/3.6).loc["CH",str(year)]


def build_idees(year):
    base_dir = "data/jrc-idees-2015"

    totals = pd.DataFrame()

    #convert ktoe/a to TWh/a
    factor = 11.63/1e3

    for ct in population.index:

        if ct in non_EU:
            print("When reading IDEES, skipping non-EU28 country",ct)
            continue

        #RESIDENTIAL

        filename = "{}/JRC-IDEES-2015_Residential_{}.xlsx".format(base_dir,rename.get(ct,ct))
        df = pd.read_excel(filename,"RES_hh_fec")

        assert df.iloc[2,0] == "Space heating"
        totals.loc[ct,"total residential space"] = df.loc[2,year]

        assert df.iloc[10,0] == "Advanced electric heating"
        assert df.iloc[11,0] == "Conventional electric heating"
        totals.loc[ct,"electricity residential space"] = df.loc[[10,11],year].sum()

        assert df.iloc[15,0] == "Water heating"
        totals.loc[ct,"total residential water"] = df.loc[15,year]
        assert df.iloc[23,0] == "Electricity"
        totals.loc[ct,"electricity residential water"] = df.loc[23,year]

        assert df.iloc[25,0] == "Cooking"
        totals.loc[ct,"total residential cooking"] = df.loc[25,year]
        assert df.iloc[30,0] == "Electricity"
        totals.loc[ct,"electricity residential cooking"] = df.loc[30,year]

        df = pd.read_excel(filename,"RES_summary")

        assert df.iloc[34,0] == "Energy consumption by fuel - Eurostat structure (ktoe)"
        totals.loc[ct,"total residential"] = df.loc[34,year]

        assert df.iloc[47,0] == "Electricity"
        totals.loc[ct,"electricity residential"] = df.loc[47,year]


        #SERVICES

        filename = "{}/JRC-IDEES-2015_Tertiary_{}.xlsx".format(base_dir,rename.get(ct,ct))
        df = pd.read_excel(filename,"SER_hh_fec")

        assert df.iloc[2,0] == "Space heating"
        totals.loc[ct,"total services space"] = df.loc[2,year]

        assert df.iloc[11,0] == "Advanced electric heating"
        assert df.iloc[12,0] == "Conventional electric heating"
        totals.loc[ct,"electricity services space"] = df.loc[[11,12],year].sum()

        assert df.iloc[17,0] == "Hot water"
        totals.loc[ct,"total services water"] = df.loc[17,year]
        assert df.iloc[24,0] == "Electricity"
        totals.loc[ct,"electricity services water"] = df.loc[24,year]

        assert df.iloc[27,0] == "Catering"
        totals.loc[ct,"total services cooking"] = df.loc[27,year]
        assert df.iloc[31,0] == "Electricity"
        totals.loc[ct,"electricity services cooking"] = df.loc[31,year]

        df = pd.read_excel(filename,"SER_summary")

        assert df.iloc[37,0] == "Energy consumption by fuel - Eurostat structure (ktoe)"
        totals.loc[ct,"total services"] = df.loc[37,year]

        assert df.iloc[50,0] == "Electricity"
        totals.loc[ct,"electricity services"] = df.loc[50,year]


        # TRANSPORT

        filename = "{}/JRC-IDEES-2015_Transport_{}.xlsx".format(base_dir,rename.get(ct,ct))

        df = pd.read_excel(filename,"TrRoad_ene")

        assert df.iloc[2,0] == "by fuel (EUROSTAT DATA)"
        totals.loc[ct,"total road"] = df.loc[2,year]
        assert df.iloc[13,0] == "Electricity"
        totals.loc[ct,"electricity road"] = df.loc[13,year]

        assert df.iloc[17,0] == "Powered 2-wheelers (Gasoline)"
        totals.loc[ct,"total two-wheel"] = df.loc[17,year]

        assert df.iloc[19,0] == "Passenger cars"
        totals.loc[ct,"total passenger cars"] = df.loc[19,year]
        assert df.iloc[30,0] == "Battery electric vehicles"
        totals.loc[ct,"electricity passenger cars"] = df.loc[30,year]

        assert df.iloc[31,0] == "Motor coaches, buses and trolley buses"
        totals.loc[ct,"total other road passenger"] = df.loc[31,year]
        assert df.iloc[39,0] == "Battery electric vehicles"
        totals.loc[ct,"electricity other road passenger"] = df.loc[39,year]

        assert df.iloc[41,0] == "Light duty vehicles"
        totals.loc[ct,"total light duty road freight"] = df.loc[41,year]
        assert df.iloc[49,0] == "Battery electric vehicles"
        totals.loc[ct,"electricity light duty road freight"] = df.loc[49,year]

        assert df.iloc[50,0] == "Heavy duty vehicles (Diesel oil incl. biofuels)"
        totals.loc[ct,"total heavy duty road freight"] = df.loc[50,year]

        assert df.iloc[61,0] == "Passenger cars"
        totals.loc[ct,"passenger car efficiency"] = df.loc[61,year]


        df = pd.read_excel(filename,"TrRail_ene")

        assert df.iloc[2,0] == "by fuel (EUROSTAT DATA)"
        totals.loc[ct,"total rail"] = df.loc[2,year]
        assert df.iloc[12,0] == "Electricity"
        totals.loc[ct,"electricity rail"] = df.loc[12,year]

        assert df.iloc[15,0] == "Passenger transport"
        totals.loc[ct,"total rail passenger"] = df.loc[15,year]
        assert df.iloc[16,0] == "Metro and tram, urban light rail"
        assert df.iloc[19,0] == "Electric"
        assert df.iloc[20,0] == "High speed passenger trains"
        totals.loc[ct,"electricity rail passenger"] = df.loc[[16,19,20],year].sum()

        assert df.iloc[21,0] == "Freight transport"
        totals.loc[ct,"total rail freight"] = df.loc[21,year]
        assert df.iloc[23,0] == "Electric"
        totals.loc[ct,"electricity rail freight"] = df.loc[23,year]


        df = pd.read_excel(filename,"TrAvia_ene")

        assert df.iloc[6,0] == "Passenger transport"
        totals.loc[ct,"total aviation passenger"] = df.loc[6,year]
        assert df.iloc[10,0] == "Freight transport"
        totals.loc[ct,"total aviation freight"] = df.loc[10,year]

        assert df.iloc[7,0] == "Domestic"
        totals.loc[ct,"total domestic aviation passenger"] = df.loc[7,year]
        assert df.iloc[8,0] == "International - Intra-EU"
        assert df.iloc[9,0] == "International - Extra-EU"
        totals.loc[ct,"total international aviation passenger"] = df.loc[[8,9],year].sum()

        assert df.iloc[11,0] == "Domestic and International - Intra-EU"
        totals.loc[ct,"total domestic aviation freight"] = df.loc[11,year]
        assert df.iloc[12,0] == "International - Extra-EU"
        totals.loc[ct,"total international aviation freight"] = df.loc[12,year]

        totals.loc[ct,"total domestic aviation"] = totals.loc[ct,["total domestic aviation freight","total domestic aviation passenger"]].sum()
        totals.loc[ct,"total international aviation"] = totals.loc[ct,["total international aviation freight","total international aviation passenger"]].sum()

        df = pd.read_excel(filename,"TrNavi_ene")

        #coastal and inland
        assert df.iloc[2,0] == "by fuel (EUROSTAT DATA)"
        totals.loc[ct,"total domestic navigation"] = df.loc[2,year]


        df = pd.read_excel(filename,"TrRoad_act")

        assert df.iloc[85,0] == "Passenger cars"
        totals.loc[ct,"passenger cars"] = df.loc[85,year]

    totals = totals*factor

    totals["passenger cars"] = totals["passenger cars"]/factor

    #convert ktoe/100km to kWh per km
    totals["passenger car efficiency"] = 10*totals["passenger car efficiency"]

    return totals


def build_energy_totals():

    clean_df = idees.reindex(population.index).drop(["passenger cars","passenger car efficiency"],axis=1)

    print("International navigation")
    in_eurostat = clean_df.index.intersection(eurostat.index.levels[0])
    clean_df.loc[in_eurostat,"total international navigation"] = eurostat.loc[idx[in_eurostat,:,"Bunkers",:],"Total all products"].groupby(level=0).sum()

    clean_df.loc["CH"] = swiss

    #get values for missing countries based on Eurostat EnergyBalances
    #divide cooking/space/water according to averages in EU28

    missing = clean_df.index[clean_df["total residential"].isnull()]
    missing_in_eurostat = missing.intersection(eurostat.index.levels[0])
    uses = ["space","cooking","water"]

    for sector,eurostat_sector in [("residential","Residential"),("services","Services"),
                                   ("road","Road"),("rail","Rail")]:
        for fuel,eurostat_fuel in [("electricity","Electricity"),("total","Total all products")]:
            clean_df.loc[missing_in_eurostat,"{} {}".format(fuel,sector)] = eurostat.loc[idx[missing_in_eurostat,:,:,eurostat_sector],eurostat_fuel].groupby(level=0).sum()

        if sector in ["road","rail"]:
            continue

        fuel = "electricity"
        for use in uses:
            avg = (clean_df["{} {} {}".format(fuel,sector,use)]/clean_df["{} {}".format(fuel,sector)]).mean()
            print("{}: average fraction of {} for {} is {}".format(sector,fuel,use,avg))
            clean_df.loc[missing_in_eurostat,"{} {} {}".format(fuel,sector,use)] = avg*clean_df.loc[missing_in_eurostat,"{} {}".format(fuel,sector)]

        fuel = "total"
        for use in uses:
            avg = ((clean_df["{} {} {}".format("total",sector,use)]-clean_df["{} {} {}".format("electricity",sector,use)])/
                   (clean_df["{} {}".format("total",sector)]-clean_df["{} {}".format("electricity",sector)])).mean()
            print("{}: average fraction of non-electric for {} is {}".format(sector,use,avg))
            clean_df.loc[missing_in_eurostat,"{} {} {}".format(fuel,sector,use)] = \
                   clean_df.loc[missing_in_eurostat,"{} {} {}".format("electricity",sector,use)] \
                   + avg*(clean_df.loc[missing_in_eurostat,"{} {}".format("total",sector)] - clean_df.loc[missing_in_eurostat,"{} {}".format("electricity",sector)])


    #Fix Norway space and water heating fractions
    #http://www.ssb.no/en/energi-og-industri/statistikker/husenergi/hvert-3-aar/2014-07-14
    #The main heating source for about 73 per cent of the households is based on electricity
    #=> 26% is non-electric
    elec_fraction = 0.73

    without_norway = clean_df.drop("NO")

    for sector in ["residential","services"]:

        #assume non-electric is heating
        total_heating = (clean_df.loc["NO","{} {}".format("total",sector)]-clean_df.loc["NO","{} {}".format("electricity",sector)])/(1-elec_fraction)

        for use in uses:
            fraction = ((without_norway["{} {} {}".format("total",sector,use)]-without_norway["{} {} {}".format("electricity",sector,use)])/
                        (without_norway["{} {}".format("total",sector)]-without_norway["{} {}".format("electricity",sector)])).mean()
            clean_df.loc["NO","{} {} {}".format("total",sector,use)] = total_heating*fraction
            clean_df.loc["NO","{} {} {}".format("electricity",sector,use)] = total_heating*fraction*elec_fraction

    #Missing aviation
    print("Aviation")
    clean_df.loc[missing_in_eurostat,"total domestic aviation"] = eurostat.loc[idx[missing_in_eurostat,:,:,"Domestic aviation"],"Total all products"].groupby(level=0).sum()
    clean_df.loc[missing_in_eurostat,"total international aviation"] = eurostat.loc[idx[missing_in_eurostat,:,:,"International aviation"],"Total all products"].groupby(level=0).sum()

    print("Domestic navigation")
    clean_df.loc[missing_in_eurostat,"total domestic navigation"] = eurostat.loc[idx[missing_in_eurostat,:,:,"Domestic Navigation"],"Total all products"].groupby(level=0).sum()


    #split road traffic for non-IDEES
    missing = clean_df.index[clean_df["total passenger cars"].isnull()]
    for fuel in ["total","electricity"]:
        selection = [fuel+" passenger cars",fuel+" other road passenger",fuel+" light duty road freight"]
        if fuel == "total":
            selection = [fuel+" two-wheel"] + selection + [fuel+" heavy duty road freight"]
        road = clean_df[selection].sum()
        road_fraction = road/road.sum()
        for i in road_fraction.index:
            clean_df.loc[missing,i] = road_fraction[i]*clean_df.loc[missing,fuel+" road"]


    #split rail traffic for non-IDEES
    missing = clean_df.index[clean_df["total rail passenger"].isnull()]
    for fuel in ["total","electricity"]:
        selection = [fuel+" rail passenger",fuel+" rail freight"]
        rail = clean_df[selection].sum()
        rail_fraction = rail/rail.sum()
        for i in rail_fraction.index:
            clean_df.loc[missing,i] = rail_fraction[i]*clean_df.loc[missing,fuel+" rail"].values


    #split aviation traffic for non-IDEES
    missing = clean_df.index[clean_df["total domestic aviation passenger"].isnull()]
    for destination in ["domestic","international"]:
        selection = ["total " + destination+" aviation passenger","total " + destination+" aviation freight"]
        aviation = clean_df[selection].sum()
        aviation_fraction = aviation/aviation.sum()
        for i in aviation_fraction.index:
            clean_df.loc[missing,i] = aviation_fraction[i]*clean_df.loc[missing,"total "+ destination + " aviation"].values
    clean_df.loc[missing,"total aviation passenger"] = clean_df.loc[missing,["total domestic aviation passenger","total international aviation passenger"]].sum(axis=1)
    clean_df.loc[missing,"total aviation freight"] = clean_df.loc[missing,["total domestic aviation freight","total international aviation freight"]].sum(axis=1)

    if "BA" in clean_df.index:
        #fix missing data for BA (services and road energy data)
        missing = (clean_df.loc["BA"] == 0.)

        #add back in proportional to RS with ratio of total residential demand
        clean_df.loc["BA",missing] = clean_df.loc["BA","total residential"]/clean_df.loc["RS","total residential"]*clean_df.loc["RS",missing]

    clean_df.to_csv(snakemake.output.energy_name)

    return clean_df


def build_eea_co2(year=1990):
    # see ../notebooks/compute_1990_Europe_emissions_for_targets.ipynb

    #https://www.eea.europa.eu/data-and-maps/data/national-emissions-reported-to-the-unfccc-and-to-the-eu-greenhouse-gas-monitoring-mechanism-16
    #downloaded 201228 (modified by EEA last on 201221)
    fn = "data/eea/UNFCCC_v23.csv"
    df = pd.read_csv(fn, encoding="latin-1")
    df.loc[df["Year"] == "1985-1987","Year"] = 1986
    df["Year"] = df["Year"].astype(int)
    df = df.set_index(['Country_code', 'Pollutant_name', 'Year', 'Sector_name']).sort_index()

    e = pd.Series()
    e["electricity"] = '1.A.1.a - Public Electricity and Heat Production'
    e['residential non-elec'] = '1.A.4.b - Residential'
    e['services non-elec'] = '1.A.4.a - Commercial/Institutional'
    e['rail non-elec'] = "1.A.3.c - Railways"
    e["road non-elec"] = '1.A.3.b - Road Transportation'
    e["domestic navigation"] = "1.A.3.d - Domestic Navigation"
    e['international navigation'] = '1.D.1.b - International Navigation'
    e["domestic aviation"] = '1.A.3.a - Domestic Aviation'
    e["international aviation"] = '1.D.1.a - International Aviation'
    e['total energy'] = '1 - Energy'
    e['industrial processes'] = '2 - Industrial Processes and Product Use'
    e['agriculture'] = '3 - Agriculture'
    e['LULUCF'] = '4 - Land Use, Land-Use Change and Forestry'
    e['waste management'] = '5 - Waste management'
    e['other'] = '6 - Other Sector'
    e['indirect'] = 'ind_CO2 - Indirect CO2'
    e["total wL"] = "Total (with LULUCF)"
    e["total woL"] = "Total (without LULUCF)"


    pol = "CO2" #["All greenhouse gases - (CO2 equivalent)","CO2"]

    cts = ["CH","EUA","NO"] + eu28_eea

    emissions = df.loc[idx[cts,pol,year,e.values],"emissions"].unstack("Sector_name").rename(columns=pd.Series(e.index,e.values)).rename(index={"All greenhouse gases - (CO2 equivalent)" : "GHG"},level=1)

    #only take level 0, since level 1 (pol) and level 2 (year) are trivial
    emissions = emissions.groupby(level=0,axis=0).sum()

    emissions.rename(index={"EUA" : "EU28", "UK" : "GB"},inplace=True)

    emissions['industrial non-elec'] = emissions['total energy'] - emissions[['electricity', 'services non-elec','residential non-elec', 'road non-elec',
                                                                              'rail non-elec', 'domestic aviation', 'international aviation', 'domestic navigation',
                                                                              'international navigation']].sum(axis=1)

    emissions.drop(columns=["total energy", "total wL", "total woL"],inplace=True)

    return emissions/1e3


def build_eurostat_co2(year=1990):

    eurostat_for_co2 = build_eurostat(year)

    se = pd.Series(index=eurostat_for_co2.columns,dtype=float)

    #emissions in tCO2_equiv per MWh_th
    se["Solid fuels"] = 0.36   #Approximates coal
    se["Oil (total)"] = 0.285  #Average of distillate and residue
    se["Gas"] = 0.2            #For natural gas

    #oil values from https://www.eia.gov/tools/faqs/faq.cfm?id=74&t=11
    #Distillate oil (No. 2)  0.276
    #Residual oil (No. 6)  0.298
    #https://www.eia.gov/electricity/annual/html/epa_a_03.html


    eurostat_co2 = eurostat_for_co2.multiply(se).sum(axis=1)

    return eurostat_co2


def build_co2_totals(eea_co2, eurostat_co2, year=1990):

    co2 = eea_co2.reindex(["EU28","NO","CH","BA","RS","AL","ME","MK"] + eu28)

    for ct in ["BA","RS","AL","ME","MK"]:
        co2.loc[ct,"electricity"] = eurostat_co2[ct,"+","Conventional Thermal Power Stations","of which From Coal"].sum()
        co2.loc[ct,"residential non-elec"] = eurostat_co2[ct,"+","+","Residential"].sum()
        co2.loc[ct,"services non-elec"] = eurostat_co2[ct,"+","+","Services"].sum()
        co2.loc[ct,"road non-elec"] = eurostat_co2[ct,"+","+","Road"].sum()
        co2.loc[ct,"rail non-elec"] = eurostat_co2[ct,"+","+","Rail"].sum()
        co2.loc[ct,"domestic navigation"] = eurostat_co2[ct,"+","+","Domestic Navigation"].sum()
        co2.loc[ct,'international navigation'] = eurostat_co2[ct,"-","Bunkers"].sum()
        co2.loc[ct,"domestic aviation"] = eurostat_co2[ct,"+","+","Domestic aviation"].sum()
        co2.loc[ct,"international aviation"] = eurostat_co2[ct,"+","+","International aviation"].sum()
        #doesn't include industrial process emissions or fuel processing/refining
        co2.loc[ct,'industrial non-elec'] = eurostat_co2[ct,"+","Industry"].sum()
        #doesn't include non-energy emissions
        co2.loc[ct,'agriculture'] = eurostat_co2[ct,"+","+","Agriculture / Forestry"].sum()

    return co2


def build_transport_data():

    transport_data = pd.DataFrame(columns=["number cars","average fuel efficiency"],
                                  index=population.index)

    ## collect number of cars

    transport_data["number cars"] = idees["passenger cars"]

    #CH from http://ec.europa.eu/eurostat/statistics-explained/index.php/Passenger_cars_in_the_EU#Luxembourg_has_the_highest_number_of_passenger_cars_per_inhabitant
    transport_data.loc["CH","number cars"] = 4.136e6

    missing = transport_data.index[transport_data["number cars"].isnull()]

    print("Missing data on cars from:")

    print(missing)

    cars_pp = transport_data["number cars"]/population

    transport_data.loc[missing,"number cars"] = cars_pp.mean()*population


    ## collect average fuel efficiency in kWh/km

    transport_data["average fuel efficiency"] = idees["passenger car efficiency"]

    missing = transport_data.index[transport_data["average fuel efficiency"].isnull()]

    print("Missing data on fuel efficiency from:")

    print(missing)

    transport_data.loc[missing,"average fuel efficiency"] = transport_data["average fuel efficiency"].mean()

    transport_data.to_csv(snakemake.output.transport_name)

    return transport_data


if __name__ == "__main__":


    # Detect running outside of snakemake and mock snakemake for testing
    if 'snakemake' not in globals():
        from vresutils import Dict
        snakemake = Dict()
        snakemake.output = Dict()
        snakemake.output['energy_name'] = "data/energy_totals.csv"
        snakemake.output['co2_name'] = "data/co2_totals.csv"
        snakemake.output['transport_name'] = "data/transport_data.csv"

        snakemake.input = Dict()
        snakemake.input['nuts3_shapes'] = '../pypsa-eur/resources/nuts3_shapes.geojson'

    nuts3 = gpd.read_file(snakemake.input.nuts3_shapes).set_index('index')
    population = nuts3['pop'].groupby(nuts3.country).sum()

    year = 2011

    eurostat = build_eurostat(year)

    swiss = build_swiss(year)

    idees = build_idees(year)

    build_energy_totals()

    eea_co2 = build_eea_co2()

    eurostat_co2 = build_eurostat_co2()

    co2=build_co2_totals(eea_co2, eurostat_co2, year)
    co2.to_csv(snakemake.output.co2_name)
    
    build_transport_data()