Source code for pyanno.measures.covariation

# Copyright (c) 2011, Enthought, Ltd.
# Author: Pietro Berkes <pberkes@enthought.com>
# License: Modified BSD license (2-clause)

"""Standard reliability and covariation measures."""

from __future__ import division

import numpy as np
import scipy.stats
from pyanno.measures.helpers import all_invalid

from pyanno.util import is_valid, MISSING_VALUE

import logging
logger = logging.getLogger(__name__)

[docs]def pearsons_rho(annotations1, annotations2, nclasses=None): """Compute Pearson's product-moment correlation coefficient. See also :func:`~pyanno.measures.helpers.pairwise_matrix`. **References:** * `Wikipedia entry <http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient>`_ Arguments --------- annotations1 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` annotations2 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` nclasses : int Number of annotation classes. If None, `nclasses` is inferred from the values in the annotations Returns ------- stat : float The value of the statistics """ valid = is_valid(annotations1) & is_valid(annotations2) if all(~valid): logger.debug('No valid annotations') return np.nan rho, pval = scipy.stats.pearsonr(annotations1[valid], annotations2[valid]) return rho
[docs]def spearmans_rho(annotations1, annotations2, nclasses=None): """Compute Spearman's rank correlation coefficient. See also :func:`~pyanno.measures.helpers.pairwise_matrix`. **References:** * `Wikipedia entry <http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient>`_ Arguments --------- annotations1 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` annotations2 : ndarray, shape = (n_items, ) Array of annotations for a single annotator. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` nclasses : int Number of annotation classes. If None, `nclasses` is inferred from the values in the annotations Returns ------- stat : float The value of the statistics """ valid = is_valid(annotations1) & is_valid(annotations2) if all(~valid): logger.debug('No valid annotations') return np.nan rho, pval = scipy.stats.spearmanr(annotations1[valid], annotations2[valid]) return rho
[docs]def cronbachs_alpha(annotations, nclasses=None): """Compute Cronbach's alpha. **References:** * Cronbach, L. J. (1951). "Coefficient alpha and the internal structure of tests." Psychometrika, 16(3), 297-334. * `Wikipedia entry <http://en.wikipedia.org/wiki/Cronbach%27s_alpha>`_ Arguments --------- annotations : ndarray, shape = (n_items, n_annotators) Array of annotations for multiple annotators. Missing values should be indicated by :attr:`pyanno.util.MISSING_VALUE` nclasses : int Number of annotation classes. If None, `nclasses` is inferred from the values in the annotations Returns ------- stat : float The value of the statistics """ if all_invalid(annotations): logger.debug('No valid annotations') return np.nan nitems = annotations.shape[0] valid_anno = np.ma.masked_equal(annotations, MISSING_VALUE) item_var = valid_anno.var(1, ddof=1) total_var = valid_anno.sum(0).var(ddof=1) return nitems/(nitems - 1.) * (1. - item_var.sum() / total_var)

Table Of Contents