# Copyright (c) 2011, Enthought, Ltd.
# Author: Pietro Berkes <pberkes@enthought.com>
# License: Modified BSD license (2-clause)
from __future__ import division
import unittest
import numpy as np
import pyanno
import pyanno.measures.agreement as pma
import pyanno.measures.covariation as pmc
import pyanno.measures.helpers as pmh
import pyanno.measures.distances as pmd
from pyanno.util import MISSING_VALUE as MV, is_valid, PyannoValueError
[docs]class Bunch(object):
def __init__(self, **kwargs):
self.__dict__.update(kwargs)
[docs]class TestMeasures(unittest.TestCase):
[docs] def setUp(self):
"""Define fixtures."""
# ---- annotations for fully agreeing annotators
nitems = 100
nannotators = 5
nclasses = 4
# perfect agreement, 2 missing annotations per row
annotations = np.empty((nitems, nannotators), dtype=int)
for i in xrange(nitems):
annotations[i,:] = np.random.randint(nclasses)
perm = np.random.permutation(nclasses)
annotations[i,perm[0:2]] = MV
self.full_agreement = Bunch(nitems=nitems,
nannotators=nannotators,
nclasses=nclasses,
annotations=annotations)
# ---- test example from
# http://en.wikipedia.org/wiki/Krippendorff%27s_Alpha#A_computational_example
annotations = np.array(
[
[MV, 1, MV],
[MV, MV, MV],
[MV, 2, 2],
[MV, 1, 1],
[MV, 3, 3],
[ 3, 3, 4],
[ 4, 4, 4],
[ 1, 3, MV],
[ 2, MV, 2],
[ 1, MV, 1],
[ 1, MV, 1],
[ 3, MV, 3],
[ 3, MV, 3],
[MV, MV, MV],
[ 3, MV, 4]
]
)
annotations[annotations>=0] -= 1
coincidence = np.array(
[
[6, 0, 1, 0],
[0, 4, 0, 0],
[1, 0, 7, 2],
[0, 0, 2, 3]
]
)
nc = np.array([7, 4, 10, 5])
self.krippendorff_example = Bunch(annotations = annotations,
coincidence = coincidence,
nc = nc,
nclasses = 4,
alpha_diagonal = 0.811,
alpha_binary = 0.691)
# ---- Cronbach's alpha Excell example
annotations = np.array(
[
[0, 1, 1, 1, 1],
[1, 0, 0, 0, 1],
[0, 0, 0, 1, 1],
[1, 0, 1, 0, 1],
[1, 0, 1, 0, 1],
[1, 0, 1, 1, 1],
[0, 0, 0, 0, 0],
[0, 1, 1, 1, 1],
[1, 1, 1, 0, 1],
[0, 0, 1, 1, 1]
]
)
self.cronbach_example = Bunch(annotations = annotations,
nclasses = 2,
alpha = 0.619658)
# ---- Cohen's kappa example from
# http://r.789695.n4.nabble.com/Cohen-s-Kappa-for-beginners-td2229658.html
annotations = np.array(
[
[1, 2, 3, 1],
[1, 3, 3, 1]
]
).T - 1
self.cohen_example = Bunch(annotations = annotations,
nclasses = 3,
kappa = 0.6)
# ---- Test for annotations with no valid entry
annotations = np.empty((10, 8), dtype=int)
annotations.fill(MV)
self.invalid_test = Bunch(annotations = annotations)
[docs] def test_confusion_matrix(self):
anno1 = np.array([0, 0, 1, 1, 2, 3])
anno2 = np.array([0, 1, 1, 1, 2, 2])
expected = np.array(
[
[1, 1, 0, 0],
[0, 2, 0, 0],
[0, 0, 1, 0],
[0, 0, 1, 0]
])
cm = pmh.confusion_matrix(anno1, anno2, 4)
np.testing.assert_array_equal(cm, expected)
[docs] def test_confusion_matrix_missing(self):
"""Test confusion matrix with missing data."""
anno1 = np.array([0, 0, 1, 1, MV, 3])
anno2 = np.array([0, MV, 1, 1, 2, 2])
expected = np.array(
[
[1, 0, 0, 0],
[0, 2, 0, 0],
[0, 0, 0, 0],
[0, 0, 1, 0]
])
cm = pmh.confusion_matrix(anno1, anno2, 4)
np.testing.assert_array_equal(cm, expected)
[docs] def test_chance_agreement_same_frequency(self):
distr = np.array([0.1, 0.5, 0.4])
anno1 = pyanno.util.random_categorical(distr, nsamples=10000)
anno2 = pyanno.util.random_categorical(distr, nsamples=10000)
expected = distr ** 2.
freqs = pmh.chance_agreement_same_frequency(anno1, anno2, len(distr))
np.testing.assert_allclose(freqs, expected, atol=1e-2, rtol=0.)
[docs] def test_chance_agreement_different_frequency(self):
distr1 = np.array([0.1, 0.5, 0.4])
distr2 = np.array([0.6, 0.2, 0.2])
anno1 = pyanno.util.random_categorical(distr1, nsamples=10000)
anno2 = pyanno.util.random_categorical(distr2, nsamples=10000)
expected = distr1 * distr2
freqs = pmh.chance_agreement_different_frequency(anno1, anno2,
len(distr1))
np.testing.assert_allclose(freqs, expected, atol=1e-2, rtol=0.)
[docs] def test_observed_agreement(self):
anno1 = np.array([0, 0, 1, 1, MV, 3])
anno2 = np.array([0, MV, 1, 1, MV, 2])
nvalid = np.sum(is_valid(anno1) & is_valid(anno2))
expected = np.array([1., 2., 0., 0.]) / nvalid
freqs = pmh.observed_agreement_frequency(anno1, anno2, 4)
np.testing.assert_array_equal(freqs, expected)
[docs] def test_cohens_kappa(self):
# test basic functionality with full agreement, missing annotations
fa = self.full_agreement
self.assertAlmostEqual(pma.cohens_kappa(fa.annotations[:,0],
fa.annotations[:,1]),
1.0, 6)
[docs] def test_cohens_weighted_kappa(self):
# test basic functionality with full agreement, missing annotations
fa = self.full_agreement
self.assertAlmostEqual(pma.cohens_weighted_kappa(fa.annotations[:,0],
fa.annotations[:,1]),
1.0, 6)
[docs] def test_cohens_weighted_kappa2(self):
# cohen's weighted kappa is the same as cohen's kappa when
# the weights are 0. on the diagonal and 1. elsewhere
anno1 = np.array([0, 0, 1, 2, 1, MV, 3])
anno2 = np.array([0, MV, 1, 0, 1, MV, 2])
weighted_kappa = pma.cohens_weighted_kappa(anno1, anno2,
pmd.binary_distance)
cohens_kappa = pma.cohens_kappa(anno1, anno2)
self.assertAlmostEqual(weighted_kappa, cohens_kappa, 6)
[docs] def test_cohens_kappa2(self):
ce = self.cohen_example
kappa = pma.cohens_kappa(ce.annotations[:,0],
ce.annotations[:,1])
self.assertAlmostEqual(kappa, ce.kappa, 6)
[docs] def test_scotts_pi(self):
# test basic functionality with full agreement, missing annotations
fa = self.full_agreement
self.assertAlmostEqual(pma.scotts_pi(fa.annotations[:,0],
fa.annotations[:,1]),
1.0, 6)
[docs] def test_fleiss_kappa_nannotations(self):
# same example as
# http://en.wikipedia.org/wiki/Fleiss%27_kappa#Worked_example
nannotations = np.array(
[
[0, 0, 0, 0, 14],
[0, 2, 6, 4, 2],
[0, 0, 3, 5, 6],
[0, 3, 9, 2, 0],
[2, 2, 8, 1, 1],
[7, 7, 0, 0, 0],
[3, 2, 6, 3, 0],
[2, 5, 3, 2, 2],
[6, 5, 2, 1, 0],
[0, 2, 2, 3, 7]
]
)
expected = 0.21
kappa = pma._fleiss_kappa_nannotations(nannotations)
self.assertAlmostEqual(kappa, expected, 2)
[docs] def test_fleiss_kappa(self):
# test basic functionality with full agreement, missing annotations
fa = self.full_agreement
self.assertAlmostEqual(pma.fleiss_kappa(fa.annotations, fa.nclasses),
1.0, 6)
# unequal number of annotators per row
fa.annotations[0,:] = MV
self.assertRaises(PyannoValueError, pma.fleiss_kappa, fa.annotations)
[docs] def test_krippendorffs_alpha(self):
# test basic functionality with full agreement, missing annotations
fa = self.full_agreement
self.assertAlmostEqual(pma.krippendorffs_alpha(fa.annotations),
1.0, 6)
[docs] def test_coincidence_matrix(self):
# test example from
# http://en.wikipedia.org/wiki/Krippendorff%27s_Alpha#A_computational_example
kr = self.krippendorff_example
coincidence = pmh.coincidence_matrix(kr.annotations, kr.nclasses)
np.testing.assert_allclose(coincidence, kr.coincidence)
[docs] def test_krippendorffs_alpha2(self):
# test example from
# http://en.wikipedia.org/wiki/Krippendorff%27s_Alpha#A_computational_example
kr = self.krippendorff_example
alpha = pma.krippendorffs_alpha(kr.annotations,
metric_func=pmd.binary_distance)
self.assertAlmostEqual(alpha, kr.alpha_binary, 3)
alpha = pma.krippendorffs_alpha(kr.annotations,
metric_func=pmd.diagonal_distance)
self.assertAlmostEqual(alpha, kr.alpha_diagonal, 3)
[docs] def test_pearsons_rho(self):
# test basic functionality with full agreement, missing annotations
fa = self.full_agreement
self.assertAlmostEqual(pmc.pearsons_rho(fa.annotations[:,0],
fa.annotations[:,1]),
1.0, 6)
[docs] def test_spearmans_rho(self):
# test basic functionality with full agreement, missing annotations
fa = self.full_agreement
self.assertAlmostEqual(pmc.spearmans_rho(fa.annotations[:,0],
fa.annotations[:,1]),
1.0, 6)
[docs] def test_cronbachs_alpha(self):
# test basic functionality with full agreement, missing annotations
fa = self.full_agreement
alpha = pmc.cronbachs_alpha(fa.annotations)
alpha *= (fa.nitems - 1.) / fa.nitems
self.assertAlmostEqual(alpha, 1.0, 6)
[docs] def test_cronbachs_alpha2(self):
# test basic functionality with full agreement, missing annotations
ce = self.cronbach_example
self.assertAlmostEqual(pmc.cronbachs_alpha(ce.annotations),
ce.alpha, 6)
[docs] def test_matrix(self):
ke = self.krippendorff_example
mat = pyanno.measures.pairwise_matrix(pma.cohens_kappa,
ke.annotations, nclasses=4)
self.assertAlmostEqual(mat[1,1], 1., 6)
kappa = pma.cohens_kappa(ke.annotations[:,2], ke.annotations[:,0])
self.assertAlmostEqual(mat[2,0], kappa)
[docs] def test_all_invalid(self):
# behavior: all measures should return np.nan for a set of
# annotations with no valid entry
anno = self.invalid_test.annotations
self.assert_(
np.isnan(pma.scotts_pi(anno[:,0], anno[:,1], nclasses=4))
)
self.assert_(
np.isnan(pma.cohens_kappa(anno[:,0], anno[:,1], nclasses=3))
)
self.assert_(
np.isnan(pma.cohens_weighted_kappa(anno[:,0], anno[:,1],
nclasses=5))
)
self.assert_(
np.isnan(pma.fleiss_kappa(anno, nclasses=4))
)
self.assert_(
np.isnan(pma.krippendorffs_alpha(anno, nclasses=7))
)
self.assert_(
np.isnan(pmc.pearsons_rho(anno[:,0], anno[:,1], nclasses=4))
)
self.assert_(
np.isnan(pmc.spearmans_rho(anno[:,0], anno[:,1], nclasses=4))
)
self.assert_(
np.isnan(pmc.cronbachs_alpha(anno, nclasses=4))
)
if __name__ == '__main__':
unittest.main()