Source code for pyanno.measures.covariation
# Copyright (c) 2011, Enthought, Ltd.
# Author: Pietro Berkes <pberkes@enthought.com>
# License: Modified BSD license (2-clause)
"""Standard reliability and covariation measures."""
from __future__ import division
import numpy as np
import scipy.stats
from pyanno.measures.helpers import all_invalid
from pyanno.util import is_valid, MISSING_VALUE
import logging
logger = logging.getLogger(__name__)
[docs]def pearsons_rho(annotations1, annotations2, nclasses=None):
"""Compute Pearson's product-moment correlation coefficient.
See also :func:`~pyanno.measures.helpers.pairwise_matrix`.
**References:**
* `Wikipedia entry
<http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient>`_
Arguments
---------
annotations1 : ndarray, shape = (n_items, )
Array of annotations for a single annotator. Missing values should be
indicated by :attr:`pyanno.util.MISSING_VALUE`
annotations2 : ndarray, shape = (n_items, )
Array of annotations for a single annotator. Missing values should be
indicated by :attr:`pyanno.util.MISSING_VALUE`
nclasses : int
Number of annotation classes. If None, `nclasses` is inferred from the
values in the annotations
Returns
-------
stat : float
The value of the statistics
"""
valid = is_valid(annotations1) & is_valid(annotations2)
if all(~valid):
logger.debug('No valid annotations')
return np.nan
rho, pval = scipy.stats.pearsonr(annotations1[valid], annotations2[valid])
return rho
[docs]def spearmans_rho(annotations1, annotations2, nclasses=None):
"""Compute Spearman's rank correlation coefficient.
See also :func:`~pyanno.measures.helpers.pairwise_matrix`.
**References:**
* `Wikipedia entry
<http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient>`_
Arguments
---------
annotations1 : ndarray, shape = (n_items, )
Array of annotations for a single annotator. Missing values should be
indicated by :attr:`pyanno.util.MISSING_VALUE`
annotations2 : ndarray, shape = (n_items, )
Array of annotations for a single annotator. Missing values should be
indicated by :attr:`pyanno.util.MISSING_VALUE`
nclasses : int
Number of annotation classes. If None, `nclasses` is inferred from the
values in the annotations
Returns
-------
stat : float
The value of the statistics
"""
valid = is_valid(annotations1) & is_valid(annotations2)
if all(~valid):
logger.debug('No valid annotations')
return np.nan
rho, pval = scipy.stats.spearmanr(annotations1[valid], annotations2[valid])
return rho
[docs]def cronbachs_alpha(annotations, nclasses=None):
"""Compute Cronbach's alpha.
**References:**
* Cronbach, L. J. (1951). "Coefficient alpha and the internal structure
of tests." Psychometrika, 16(3), 297-334.
* `Wikipedia entry
<http://en.wikipedia.org/wiki/Cronbach%27s_alpha>`_
Arguments
---------
annotations : ndarray, shape = (n_items, n_annotators)
Array of annotations for multiple annotators. Missing values should be
indicated by :attr:`pyanno.util.MISSING_VALUE`
nclasses : int
Number of annotation classes. If None, `nclasses` is inferred from the
values in the annotations
Returns
-------
stat : float
The value of the statistics
"""
if all_invalid(annotations):
logger.debug('No valid annotations')
return np.nan
nitems = annotations.shape[0]
valid_anno = np.ma.masked_equal(annotations, MISSING_VALUE)
item_var = valid_anno.var(1, ddof=1)
total_var = valid_anno.sum(0).var(ddof=1)
return nitems/(nitems - 1.) * (1. - item_var.sum() / total_var)