Source code for alp.appcom.ensembles

"""
Ensembles module
================
"""

import warnings
from time import time

import numpy as np
import pandas as pd
from progressbar import ETA
from progressbar import Bar
from progressbar import DynamicMessage
from progressbar import FormatLabel
from progressbar import Percentage
from progressbar import ProgressBar
from progressbar import SimpleProgress


[docs]def get_best(experiments, metric, op, partial=False): """Helper function for manipulation of a list of experiments In case of equality in the metric, the behaviour of op_arg determines the result. Args: experiments(list): a list of experiments metric(str): the name of a metric used in the experiments op (function): operation to perform with the metric (optional) partial(bool): if True will pass an experiment without result. Raise an error otherwise. """ best_perf_expes = [] list_experiments = [] list_keys = [] not_ready = False for k, expe in experiments.items(): if not hasattr(expe, 'full_res'): # pragma: no cover if not partial: raise Exception('Results are not ready') else: not_ready = True else: best_perf_expes.append(op(expe.full_res['metrics'][metric])) list_experiments.append(expe) list_keys.append(k) if not_ready is True: # pragma: no cover warnings.warn('Some results are not ready: Using the best available' ' model.') if len(list_experiments) == 0: raise Exception('No result is ready yet') ar_expes = np.array(list_experiments) ar_keys = np.array(list_keys) perf_array = np.array(best_perf_expes) perf_nans = np.isnan(perf_array) if (1 - perf_nans).sum() == 0: raise Exception('The selected metric evaluations are all nans') best_perf_expes = perf_array[perf_nans == False] # NOQA bool_choice = op(best_perf_expes) == np.array(best_perf_expes) best = ar_expes[bool_choice] # NOQA best_key = ar_keys[bool_choice] return best[0], best_key[0]
widgets = [Percentage(), ' ', SimpleProgress(), ' ', Bar(marker='=', left='[', right=']'), ' ', FormatLabel('in: %(elapsed)s'), ' ', ETA(), ' | ', 'job/', DynamicMessage('s')]
[docs]class Ensemble(object): """Base class to build experiments containers able to execute batch sequences of action. Must implement the `fit`, `fit_gen`, `fit_async` `fit_gen_async` methods Args: experiments(dict or list): experiments to be wrapped. If a dictionnary is passed, it should map experiment names to experiments. """ def __init__(self, experiments): if isinstance(experiments, list): experiments = {i: v for i, v in enumerate(experiments)} if not isinstance(experiments, dict): # pragma: no cover raise TypeError('You must pass either an experiments dict or list') self.experiments = experiments
[docs] def fit(self, data, data_val, *args, **kwargs): raise NotImplementedError
[docs] def fit_gen(self, data, data_val, *args, **kwargs): raise NotImplementedError
[docs] def fit_async(self, data, data_val, *args, **kwargs): raise NotImplementedError
[docs] def fit_gen_async(self, data, data_val, *args, **kwargs): raise NotImplementedError
[docs] def predict(self, data, data_val, *args, **kwargs): raise NotImplementedError
[docs] def summary(self, metrics, verbose=False): raise NotImplementedError
[docs] def plt_summary(self): raise NotImplementedError
[docs]class HParamsSearch(Ensemble): """Hyper parameters search class Train several experiments with different hyperparameters and save results. Wraps the training process so that it's possible to access results easily. Args: experiments(dict or list): experiments to be wrapped. If a dictionnary is passed, it should map experiment names to experiments hyperparams(dict): a dict of hyperparameters metric(str): the name of a metric used in the experiments op(str): an operator to select a model """ def __init__(self, experiments, hyperparams=None, metric=None, op=None): super(HParamsSearch, self).__init__(experiments=experiments) self.hyperparams = hyperparams self.metric = metric self.op = op self.results = dict()
[docs] def fit(self, data, data_val, *args, **kwargs): """Apply the fit method to all the experiments Args: see `alp.core.Experiment.fit` Returns: a list of results""" self._fit_cm(data, data_val, gen=False, async=False, *args, **kwargs) return self.results
[docs] def fit_gen(self, data, data_val, *args, **kwargs): """Apply the fit_gen method to all the experiments Args: see :meth:`alp.appcom.core.Experiment.fit_gen` Returns: a list of results""" self._fit_cm(data, data_val, gen=True, async=False, *args, **kwargs) return self.results
[docs] def fit_gen_async(self, data, data_val, *args, **kwargs): """Apply the fit_gen_async method to all the experiments Args: see :meth:`alp.appcom.core.Experiment.fit_gen_async` Returns: a list of results""" self._fit_cm(data, data_val, gen=True, async=True, *args, **kwargs) return self.results
[docs] def fit_async(self, data, data_val, *args, **kwargs): """Apply the fit_async method to all the experiments Args: see :meth:`alp.appcom.core.Experiment.fit_async` Returns: a list of results""" self._fit_cm(data, data_val, gen=False, async=True, *args, **kwargs) return self.results
def _fit_cm(self, data, data_val, gen, async, *args, **kwargs): with ProgressBar(max_value=len(self.experiments), redirect_stdout=True, widgets=widgets, term_width=80) as progress: for i, kv in enumerate(self.experiments.items()): k, expe = kv b = time() if gen and async: res = expe.fit_gen_async(data, data_val, *args, **kwargs) elif gen and not async: res = expe.fit_gen(data, data_val, *args, **kwargs) elif not gen and async: res = expe.fit_async(data, data_val, *args, **kwargs) else: res = expe.fit(data, data_val, *args, **kwargs) self.results[k] = res if i == 0: spent = time() - b to_print = spent else: spent += time() - b to_print = spent / (i + 1) progress.update(i, s=float(1 / to_print)) if expe.backend_name == 'keras' and async: # pragma: no cover import keras.backend as K if K.backend() == 'tensorflow': K.clear_session() return self.results
[docs] def predict(self, data, metric=None, op=None, partial=False, *args, **kwargs): """Apply the predict method to all the experiments Args: see :meth:`alp.appcom.core.Experiment.predict` metric(str): the name of the metric to use op(function): an operator returning the value to select an experiment Returns: an array of results""" if not metric: metric = self.metric if not op: op = self.op if metric is None or op is None: raise Exception('You should provide a metric along with an op') best_exp, best_key = get_best(self.experiments, metric, op, partial) return best_key, best_exp.predict(data, *args, **kwargs)
[docs] def summary(self, metrics, verbose=False): """Build a results table using individual results from models Args: verbose(bool): if True, print a description of the results metrics(dict): a dictionnary mapping metric's names to ops. Returns: a pandas DataFrame of results""" # build results table res_dict = dict() expes = self.experiments for kv in self.results.items(): k, res = kv res, t = res if t is not None: t.join() for kr, v in expes[k].full_res['metrics'].items(): if isinstance(v, list): if kr in metrics: op = metrics[kr] if kr in res_dict: res_dict[kr] += [op(v)] else: res_dict[kr] = [] res_dict[kr] += [op(v)] res_table = pd.DataFrame(res_dict) if verbose is True: print(res_table.describe()) return res_table