#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 12 14:02:45 2021
@author: dboateng
"""
# importing modules
from abc import ABC, abstractmethod
import os
import sys
import pickle
import numpy as np
import pandas as pd
def _check_data_available(data, daterange):
for d in daterange:
data.loc[d]
[docs]class Predictor(ABC):
def __init__(self, name, longname=None, cachedir=None):
self.name = name
if longname is not None:
self.longname = longname
else:
self.longname = name
if cachedir is not None:
self.cachedir = cachedir
else:
self.cachedir = '.predictors'
self.data = {}
self.params = {}
self.patterns = {}
[docs] def save(self):
if not os.path.isdir(self.cachedir):
try:
os.makedirs(self.cachedir)
except:
print("There might be problem making the directory" + self.cachedir +
"which is required to store predictors", file=sys.stderr)
raise
filename_to_store = os.path.join(self.cachedir, self.longname + ".pickle")
with open(filename_to_store, "wb") as f:
predictordata = {"data":self.data, "params":self.params, "patterns":self.patterns}
# serialize the the predictor data with dump()
pickle.dump(predictordata, f)
[docs] def load(self):
filename_to_store = os.path.join(self.cachedir, self.longname + ".pickle")
if not os.path.exists(filename_to_store):
raise FileNotFoundError("Predictor data may not be available in serialize form")
with open(filename_to_store, "rb") as f:
predictordata = pickle.load(f)
self.data = predictordata["data"]
self.params = predictordata["params"]
self.patterns = predictordata["patterns"]
[docs] def get(self, daterange, dataset, fit, regenerate=False, patterns_from=None, params_from=None):
if patterns_from is None:
patterns_from = dataset.name
if params_from is None:
params_from = dataset.name
data_key = "data=" + dataset.name + "_patterns=" +params_from + "_params=" + params_from
if not self.data and not self.params and not regenerate:
try:
self.load()
except FileNotFoundError:
pass
if not self.data:
regenerate = True
if not regenerate:
try:
_check_data_available(self.data[data_key], daterange)
except KeyError:
regenerate=True
if regenerate:
print("Regenerating predictor data for", self.name, "using dataset", dataset.name,
"with loading patterns and params from", patterns_from, "and", params_from)
if dataset.name not in self.params:
self.params[dataset.name] = {}
data = self._generate(daterange, dataset, fit, patterns_from, params_from)
if data_key in self.data:
self.data[data_key] = self.data[data_key].combine_first(data)
else:
self.data[data_key] = data
self.save()
data = self.data[data_key].loc[daterange]
try:
_check_data_available(data, daterange)
except KeyError:
print("Predictor data for", self.name, "could not be generated for all required timesteps",
file=sys.stderr)
raise
return data
[docs] def fit(self, daterange, dataset):
self.get(daterange, dataset, True)
@abstractmethod
def _generate(self, daterange, dataset, fit, patterns_from, params_from):
...
[docs] def plot(self, daterange, dataset, fit, regenerate=False, patterns_from=None, params_from=None,
**plot_kwargs):
data = self.get(daterange, dataset, fit, regenerate, patterns_from, params_from)
handle = data.plot(**plot_kwargs)
return handle