Source code for cRedditscore.evaluation

# -*- coding: utf-8 -*-
'''
A module of tools to evaluate predictive models.
'''
import numpy as np
import pandas as pd
from sklearn import cross_validation
from sklearn import metrics as skmetrics


[docs]class EvalError(Exception):
    '''Errors in evaluating the model, for example
    a missing predict function.'''
    def __init__(self, msg):
        self.msg = msg

    def __str__(self):
        return self.msg


[docs]class GenModel(object):
    '''
    A general (binary classification) model class, mostly for
    testing and explanatory purposes.

    :param function fit:
        the fit function of the model
    :param function predict:
        the prediction function of the model
    :param function predict_proba:
        the probability prediction function of the model,
        computes the probability that an observation belongs to the first class

    '''

    def __init__(self, fit=None, predict=None, predict_proba=None):
        self.fit = fit
        self.predict = predict
        self.predict_proba = predict_proba


[docs]class Evaluate(object):
    '''
    Evaluate a predictive binary classification model
    on a choice of metrics including accuracy, AUC,
    precision and recall. The model is assumed to have functions

    * *fit* (for cross validation): takes as input the training set
      features and responses and fits the model to the training set
    * *predict* (for accuracy, precision, recall
      and cross validation): takes as input a list of observations and
      outputs a list of predictions
    * *predict_proba* (for AUC and drawing the ROC curve): takes as input
      a list of observations and outputs a list of probabilities that the
      response belongs to the first class

    For example, we make a dummy test set and model:

    >>> model = GenModel(predict = lambda x : ['blue' for i in x])
    >>> test_features = range(20)
    >>> test_responses = np.array(
    ...     ['blue' if i%2==0 else 'green' for i in range(20)]
    ...     )

    and make an `Evaluate` object to test its accuracy.

    >>> eval = Evaluate(model)
    >>> eval.accuracy(test_features=test_features,
    ...     test_responses=test_responses)
    0.5

    :param model:
        the model to evaluate, described above
    :param array-like data_features:
        the features of the data to evaluate the model on,
        generally the training set features
    :param array-like data_responses:
        the responses of the data to evaluate the model on
    :param pos_label:
        the class to be considered positive for auc, precision and recall;
        if None, the first class is picked
    '''

    def __init__(self,
                 model=None,
                 data_features=None,
                 data_responses=None,
                 pos_label=None):
        self.model = model
        self.data_features = data_features
        self.data_responses = data_responses
        if data_responses is not None:
            self.classes = list(set(self.data_responses))
            if len(self.classes) > 2:
                raise EvalError('More than two classes!')
            if pos_label is None:
                self.pos_label = self.data_responses.iloc[0]
            else:
                self.pos_label = pos_label

[docs]    def cv_split(self, k=10):
        '''
        Make k folds of the data using stratified cross validation.

        :param int k:
            The number of folds to divide the data into
        '''

        self.k = k
        self.cv = cross_validation.StratifiedKFold(
            self.data_responses,
            n_folds=k
            )

[docs]    def compute_scores(
            self,
            test_features=None,
            test_responses=None,
            metrics=['accuracy', 'auc', 'precision', 'recall', 'f1']
            ):
        '''
        Compute the metric scores for the model on the
        cross-validation folds of the training data
        or on the test data, storing the results in a dataframe.

        Add a new class attribute

        * *scores*,
            the results of the computations as a
            `pandas.core.frame.DataFrame` object

        :param array-like test_features:
            The features of the test data. If none, evaluate the model
            on the cv folds.
        :param array-like test_responses:
            The responses of the test data. If none, evaluate the model
            on the cv folds.
        '''

        # if only one metric is passed as a string, turn it into a list
        if not type(metrics) is list:
            metrics = [metrics]

        # build the scores dataframe
        self.scores = self.build_scores_df()

        for i, (train, test) in enumerate(self.cv):

            # fit the model
            self.model.fit(
                self.data_features[train],
                self.data_responses[train]
                )

            # make the predictions if there's a metric besides auc
            if not metrics == ['auc']:
                predictions = self.model.predict(self.data_features[test])

            # make probability predictions if auc is a metric
            if 'auc' in metrics:
                prob_preds = self.model.predict_proba(self.data_features[test])

            if 'accuracy' in metrics:
                acc = self.accuracy(
                    test_responses=self.data_responses[test],
                    predictions=predictions
                    )
                self.scores.loc['accuracy']['fold'][i] = acc

            if 'auc' in metrics:

                # add fpr, tpr and thresh to roc list
                fpr, tpr, thresh = skmetrics.roc_curve(
                    y_true=self.data_responses[test],
                    y_score=prob_preds[:, 1],
                    pos_label=self.pos_label
                    )
                try:
                    self.roc.append([fpr, tpr, thresh])
                except:
                    self.roc = [[fpr, tpr, thresh]]

                # compute the auc
                self.scores.loc['auc']['fold'][i] = skmetrics.auc(fpr, tpr)

            if 'precision' in metrics:
                sum_prec = 0.0
                for cl in self.classes:
                    prec = skmetrics.precision_score(
                        y_true=self.data_responses[test],
                        y_pred=predictions,
                        pos_label=cl
                        )
                    sum_prec += prec
                    ind = ('precision', 'class {}'.format(cl))
                    self.scores.xs(ind)['fold'][i] = prec
                self.scores.xs(('precision', 'avg'))['fold'][i] = sum_prec/2

            if 'recall' in metrics:
                sum_rec = 0.0
                for cl in self.classes:
                    rec = skmetrics.recall_score(
                        y_true=self.data_responses[test],
                        y_pred=predictions,
                        pos_label=cl
                        )
                    sum_rec += rec
                    ind = ('recall', 'class {}'.format(cl))
                    self.scores.xs(ind)['fold'][i] = rec
                self.scores.xs(('recall', 'avg'))['fold'][i] = sum_rec/2

            if 'f1' in metrics:
                sum_f1 = 0.0
                for cl in self.classes:
                    f1 = skmetrics.f1_score(
                        y_true=self.data_responses[test],
                        y_pred=predictions,
                        pos_label=cl
                        )
                    sum_f1 += f1
                    ind = ('f1', 'class {}'.format(cl))
                    self.scores.xs(ind)['fold'][i] = f1
                self.scores.xs(('f1', 'avg'))['fold'][i] = sum_f1/2

            # add min, max, mean, std
            for i in self.scores.index:
                self.scores.xs(i)['min'] = np.min(self.scores.xs(i)['fold'])
                self.scores.xs(i)['max'] = np.max(self.scores.xs(i)['fold'])
                self.scores.xs(i)['mean'] = np.mean(self.scores.xs(i)['fold'])
                self.scores.xs(i)['std'] = np.std(self.scores.xs(i)['fold'])

[docs]    def build_scores_df(self):
        '''Build the scores dataframe.'''

        metrics_level_1 = pd.MultiIndex.from_tuples([('accuracy',), ('auc',)])
        metrics_level_2 = pd.MultiIndex.from_product([
            ['precision', 'recall', 'f1'],
            ['class {}'.format(cl) for cl in self.classes]+['avg']
            ])
        fold_columns = pd.MultiIndex.from_tuples(
            [('fold', i) for i in xrange(self.k)]
            )
        agg_columns = pd.MultiIndex.from_tuples(
            [(agg, '') for agg in ['min', 'max', 'mean', 'std']]
            )
        scores = pd.DataFrame(
            columns=fold_columns | agg_columns,
            index=metrics_level_1 | metrics_level_2
            )
        return scores

[docs]    def accuracy(self,
                 test_responses,
                 test_features=None,
                 predictions=None,
                 train=None):
        '''
        Find the accuracy of the model on a given test set.

        :param array-like test_responses:
            the responses of the test set
        :param array-like test_features:
            the features of the test set to predict on.
            If None, use the pre-made `predictions`
        :param array-like predictions:
            the predictions to test.
            If None, use `test_features` to predict on
        :param array-like train:
            the data set to train the model on (optional)
        '''
        # check if the model and it's predict function exist
        if not self.model:
            raise EvalError('Accuracy: no model to evaluate!')
        if not self.model.predict:
            raise EvalError('Accuracy: model has no predict function!')

        # train the model if a training set is given and fit function exists
        if train:
            if not self.model.fit:
                raise EvalError('Accuracy: training set' +
                                ' given but model has no fit function!')
            else:
                self.model.fit(train)

        if predictions is None:
            # make the prediction
            predictions = self.model.predict(test_features)

        # make sure the lengths of predictions and test_responses match up
        if not len(predictions) == len(test_responses):
            raise EvalError('The numbers of predictions' +
                            ' and test responses do not match up!')

        # calculate and return the accuracy
        return np.mean(test_responses == predictions)

[docs]    def compute_curves(self):
        '''Plot ROC and precision recall curves.'''
        pass