Source code for pyESD.Weatherstation

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 12 14:01:43 2021


This routine handles the preprocessing of data downloaded directly from DWD. The default time series is monthly, others frequency must be pass to the function
1. Extracting only stations with required number of years 
2. Writing additional information into files (eg. station name, lat, lon and elevation), since its downloaded into a separate file using station codes
3. All utils function to read stations into pyESD Station operator class

Note: This routine is specifically designed for data downloded from DWD (otherwise please contact daniel.boateng@uni-tuebingen.de for assistance on other datasets)

@author: dboateng
"""

# importing packages
import os 
import pandas as pd
import numpy as np

#from StationOperator import StationOperator
# #local packages
try:
    from .StationOperator import StationOperator
except:
    from StationOperator import StationOperator
    



[docs]def read_weatherstationnames(path_to_data):
    """
    This function reads all the station names in the data directory

    Parameters
    ----------
    path_to_data : TYPE: str
        DESCRIPTION. The directory path to where all the station data are stored

    Returns
    -------
    namedict : TYPE: dict
        DESCRIPTION.

    """

    nr, name = np.loadtxt(os.path.join(path_to_data, 'stationnames.csv'),
                              delimiter=',', skiprows=1, usecols=(0,1),
                              dtype=str, unpack=True)

    nr = [int(i) for i in nr]
    namedict = dict(zip(nr, name))
    return namedict


[docs]def read_station_csv(filename, varname, return_all=False):
    """
    

    Parameters
    ----------
    filename : TYPE: str
        DESCRIPTION. Name of the station in path 
    varname : TYPE: str
        DESCRIPTION. The name of the varibale to downscale (eg. Precipitation, Temperature)

    Raises
    ------
    ValueError
        DESCRIPTION.

    Returns
    -------
    ws : TYPE
        DESCRIPTION.

    """
    
    # reading headers info with readline 
    with open(filename, "r") as f:
        name = f.readline().split(',')[1].replace("\n", "")
        lat = float(f.readline().split(',')[1].replace("\n",""))
        lon = float(f.readline().split(',')[1].replace("\n",""))
        elev = float(f.readline().split(',')[1].replace("\n",""))
        
        
        
    data = pd.read_csv(filename, sep=',', skiprows=6, usecols=[0,1,],
                       parse_dates=[0], index_col=0, names=['Time',
                       varname])
    
    data = data.dropna()
    
    if varname == "Precipitation":
        pr = data[varname]
        pr[pr == -9999] = np.nan
        assert not np.any(pr < -1e-2)
        assert not np.any(pr > 2000)
        
        data = {varname:pr}
        
    elif varname == "Temperature":
        t = data[varname]
        t[t == -8888] = np.nan
        assert not np.any(t < -50)
        assert not np.any(t > 80)
        
        data = {varname:t}
        
    elif varname == "O18":
        d18O = data[varname]
        d18O[d18O == -9999] = np.nan
    else:
        raise ValueError("The model does not recognize the variable name")

    if return_all == False:

        so = StationOperator(data, name, lat, lon, elev)
        
        return so
    
    else:
        return data, lat, lon
    
    
    
    
[docs]def read_weatherstations(path_to_data):
    """
    Read all the station data in a directory.

    Parameters
    ----------
    path_to_data : TYPE: STR
        DESCRIPTION. relative or absolute path to the station folder

    Returns
    -------
    stations : TYPE: DICT
        DESCRIPTION. Dictionary containing all the datasets

    """
    namedict = read_weatherstationnames(path_to_data)
    stations = {}
    for i in namedict:
        filename = namedict[i].replace(' ', '_') + '.csv'
        print("Reading", filename)
        ws = read_station_csv(os.path.join(path_to_data, filename))
        stations[i] = ws
    return stations