include data retrieving in workflow

2019-11-05 12:53:21 +01:00 · 2019-11-05 12:53:21 +01:00 · f6e44d2805
commit f6e44d2805
parent eb14486e29
9 changed files with 145 additions and 24 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -8,14 +8,6 @@ install:
  - sudo apt-get update --fix-missing
  - sudo apt-get install -yq --no-install-recommends curl bzip2 xz-utils git ca-certificates coinor-cbc
  
-  # download and extract data dependencies
-  - mkdir ./resources
-  - curl -L "https://zenodo.org/record/3517921/files/pypsa-eur-tutorial-data-bundle.tar.xz" -o "./bundle.tar.xz"
-  - curl -L "https://zenodo.org/record/3518020/files/pypsa-eur-tutorial-cutouts.tar.xz" -o "./cutouts.tar.xz"
-  - curl -L "https://zenodo.org/record/3518215/files/natura.tiff" -o "./resources/natura.tiff"
-  - tar xJf ./bundle.tar.xz -C ./data 
-  - tar xJf ./cutouts.tar.xz -C .
-  
  # install conda
  # https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/use-conda-with-travis-ci.html
  - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
--- a/61
+++ b/61
@ -1,4 +1,4 @@
-configfile: "config.yaml"
+configfile: "config.tutorial.yaml"

 COSTS="data/costs.csv"

@ -32,6 +32,30 @@ if config['enable']['prepare_links_p_nom']:
        # group: 'nonfeedin_preparation'
        script: 'scripts/prepare_links_p_nom.py'

+
+datafiles = ['ch_cantons.csv', 'je-e-21.03.02.xls', 
+            'eez/World_EEZ_v8_2014.shp', 'EIA_hydro_generation_2000_2014.csv', 
+            'hydro_capacities.csv', 'naturalearth/ne_10m_admin_0_countries.shp', 
+            'NUTS_2013_60M_SH/data/NUTS_RG_60M_2013.shp', 'nama_10r_3popgdp.tsv.gz', 
+            'nama_10r_3gdp.tsv.gz', 'time_series_60min_singleindex_filtered.csv', 
+            'corine/g250_clc06_V18_5.tif']
+if not config['tutorial']:
+    datafiles.append(["data/bundle/natura/Natura2000_end2015.shp", "data/bundle/GEBCO_2014_2D.nc"])
+
+rule retrieve_databundle:
+    output:  expand('data/bundle/{file}', file=datafiles)
+        # ch_cantons='data/bundle/ch_cantons.csv',
+        # ch_popgdp='data/bundle/je-e-21.03.02.xls',
+        # eez='data/bundle/eez/World_EEZ_v8_2014.shp',
+        # eia_hydro_generation='data/bundle/EIA_hydro_generation_2000_2014.csv',
+        # hydro_capacities='data/bundle/hydro_capacities.csv',
+        # naturalearth='data/bundle/naturalearth/ne_10m_admin_0_countries.shp',
+        # nuts3='data/bundle/NUTS_2013_60M_SH/data/NUTS_RG_60M_2013.shp',
+        # nuts3pop='data/bundle/nama_10r_3popgdp.tsv.gz',
+        # nuts3gdp='data/bundle/nama_10r_3gdp.tsv.gz',
+        # opsd_load='data/bundle/time_series_60min_singleindex_filtered.csv'
+    script: 'scripts/retrieve_databundle.py'
+
 rule build_powerplants:
    input: base_network="networks/base.nc"
    output: "resources/powerplants.csv"
@ -91,18 +115,29 @@ rule build_bus_regions:
    # group: 'nonfeedin_preparation'
    script: "scripts/build_bus_regions.py"

-rule build_cutout:
-    output: directory("cutouts/{cutout}")
-    resources: mem=config['atlite'].get('nprocesses', 4) * 1000
-    threads: config['atlite'].get('nprocesses', 4)
-    benchmark: "benchmarks/build_cutout_{cutout}"
-    # group: 'feedin_preparation'
-    script: "scripts/build_cutout.py"
+if config['enable']['build_cutout']:        
+    rule build_cutout:
+        output: directory("cutouts/{cutout}")
+        resources: mem=config['atlite'].get('nprocesses', 4) * 1000
+        threads: config['atlite'].get('nprocesses', 4)
+        benchmark: "benchmarks/build_cutout_{cutout}"
+        # group: 'feedin_preparation'
+        script: "scripts/build_cutout.py"
+else:
+    rule retrieve_cutout:
+        output: directory("cutouts/{cutout}")
+        script: 'scripts/retrieve_cutout.py'

-rule build_natura_raster:
-    input: "data/bundle/natura/Natura2000_end2015.shp"
-    output: "resources/natura.tiff"
-    script: "scripts/build_natura_raster.py"
+
+if config['enable']['build_natura_raster']:        
+    rule build_natura_raster:
+        input: "data/bundle/natura/Natura2000_end2015.shp"
+        output: "resources/natura.tiff"
+        script: "scripts/build_natura_raster.py"
+else:
+    rule retrieve_natura_raster:
+        output: "resources/natura.tiff"
+        script: 'scripts/retrieve_natura_raster.py'

 rule build_renewable_profiles:
    input:
@ -110,7 +145,7 @@ rule build_renewable_profiles:
        corine="data/bundle/corine/g250_clc06_V18_5.tif",
        natura="resources/natura.tiff",
        gebco=lambda wildcards: ("data/bundle/GEBCO_2014_2D.nc"
-                                 if "max_depth" in config["renewable"][wildcards.technology].keys()
+                                 if "max_depth" in config["renewable"][wildcards.technology].keys() 
                                 else []),
        country_shapes='resources/country_shapes.geojson',
        offshore_shapes='resources/offshore_shapes.geojson',
--- a/config.default.yaml
+++ b/config.default.yaml
@ -1,4 +1,5 @@
 version: 0.1
+tutorial: false
 logging_level: INFO

 summary_dir: results
@ -19,6 +20,8 @@ snapshots:

 enable:
  prepare_links_p_nom: false
+  build_cutout: false
+  build_natura_raster: false

 electricity:
  voltages: [220., 300., 380.]
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@ -1,4 +1,5 @@
 version: 0.1
+tutorial: true
 logging_level: INFO

 summary_dir: results
@ -19,6 +20,8 @@ snapshots:

 enable:
  prepare_links_p_nom: false
+  build_cutout: false
+  build_natura_raster: false

 electricity:
  voltages: [220., 300., 380.]
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@ -1,6 +1,7 @@
 import pandas as pd
-import numpy as np
-from six import iteritems, iterkeys, itervalues
+from six import iterkeys, itervalues
+import urllib
+from progressbar import ProgressBar

 import pypsa

@ -104,3 +105,12 @@ def aggregate_costs(n, flatten=False, opts=None, existing_only=False):
        )

    return costs
+
+def progress_retrieve(url, file):
+    pbar = ProgressBar(0, 100)
+
+    def dlProgress(count, blockSize, totalSize):
+        pbar.update( int(count * blockSize * 100 / totalSize) )
+
+    urllib.request.urlretrieve(url, file, reporthook=dlProgress)
+
--- a/scripts/retrieve_cutout.py
+++ b/scripts/retrieve_cutout.py
@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Nov  4 18:37:11 2019
+
+@author: fabian
+"""
+
+import logging, os, tarfile
+from _helpers import progress_retrieve
+
+logger = logging.getLogger(__name__)
+
+if snakemake.config['tutorial']:
+    url =        "https://zenodo.org/record/3518020/files/pypsa-eur-tutorial-cutouts.tar.xz"
+else:
+   url = "https://zenodo.org/record/3517949/files/pypsa-eur-cutouts.tar.xz"
+
+file = "./cutouts.tar.xz"
+
+progress_retrieve(url, file)
+
+# extract
+tarfile.open('./cutouts.tar.xz').extractall()
+os.remove("./cutouts.tar.xz")
+
--- a/scripts/retrieve_databundle.py
+++ b/scripts/retrieve_databundle.py
@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Nov  4 18:37:11 2019
+
+@author: fabian
+"""
+
+import logging, os, tarfile
+from _helpers import progress_retrieve
+
+logger = logging.getLogger(__name__)
+
+
+if snakemake.config['tutorial']:
+    url = "https://zenodo.org/record/3517921/files/pypsa-eur-tutorial-data-bundle.tar.xz"
+else:
+   url = "https://zenodo.org/record/3517935/files/pypsa-eur-data-bundle.tar.xz"
+
+file = "./bundle.tar.xz"
+
+progress_retrieve(url, file)
+
+# extract
+tarfile.open('./bundle.tar.xz').extractall('./data')
+
+os.remove("./bundle.tar.xz")
+
--- a/scripts/retrieve_natura_raster.py
+++ b/scripts/retrieve_natura_raster.py
@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Nov  4 18:37:11 2019
+
+@author: fabian
+"""
+
+import logging, os
+from _helpers import progress_retrieve
+
+logger = logging.getLogger(__name__)
+
+d = './resources'
+if not os.path.exists(d):
+    os.makedirs(d)
+
+url = "https://zenodo.org/record/3518215/files/natura.tiff"
+file = "resources/natura.tiff"
+progress_retrieve(url, file)
+
--- a/test/config.test1.yaml
+++ b/test/config.test1.yaml
@ -1,4 +1,5 @@
 version: 0.1
+tutorial: true
 logging_level: INFO

 summary_dir: results
@ -19,7 +20,9 @@ snapshots:

 enable:
  prepare_links_p_nom: false
-
+  build_cutout: false
+  build_natura_raster: false
+  
 electricity:
  voltages: [220., 300., 380.]
  co2limit: 100.e+6