Source code for pyanno.test.test_modelBt

# Copyright (c) 2011, Enthought, Ltd.
# Author: Pietro Berkes <pberkes@enthought.com>
# License: Modified BSD license (2-clause)

import unittest
import numpy as np
from numpy import testing
from pyanno.models import ModelBt, ModelBtLoopDesign
from pyanno.util import MISSING_VALUE as MV, is_valid, PyannoValueError, labels_frequency


[docs]class TestModelBt(unittest.TestCase):

[docs]    def test_create_model(self):
        nclasses = 8
        nannotators = 32
        model = ModelBt.create_initial_state(nclasses, nannotators)

        self.assertEqual(model.nannotators, nannotators)
        self.assertEqual(model.gamma.shape[0], nclasses)
        self.assertEqual(model.theta.shape[0], nannotators)


[docs]    def test_mle_estimation(self):
        # test simple model, check that we get to global optimum
        nclasses, nannotators, nitems = 3, 5, 5000
        # create random model and data (this is our ground truth model)
        true_model = ModelBt.create_initial_state(nclasses, nannotators)
        annotations = true_model.generate_annotations(nitems)

        # create a new, empty model and infer back the parameters
        model = ModelBt.create_initial_state(nclasses, nannotators)
        before_llhood = model.log_likelihood(annotations)
        model.mle(annotations)
        after_llhood = model.log_likelihood(annotations)

        testing.assert_allclose(model.gamma, true_model.gamma,
                                atol=0.05, rtol=0.)
        testing.assert_allclose(model.theta, true_model.theta,
                                atol=0.05, rtol=0.)
        self.assertGreater(after_llhood, before_llhood)


[docs]    def test_map_estimation(self):
        # test simple model, check that we get to global optimum
        nclasses, nannotators, nitems = 3, 5, 5000
        # create random model and data (this is our ground truth model)
        true_model = ModelBt.create_initial_state(nclasses, nannotators)
        annotations = true_model.generate_annotations(nitems)

        # create a new, empty model and infer back the parameters
        model = ModelBt.create_initial_state(nclasses, nannotators)
        before_obj = model.log_likelihood(annotations) + model._log_prior()
        model.map(annotations)
        after_obj = model.log_likelihood(annotations) + model._log_prior()

        testing.assert_allclose(model.gamma, true_model.gamma,
                                atol=0.05, rtol=0.)
        testing.assert_allclose(model.theta, true_model.theta,
                                atol=0.05, rtol=0.)
        self.assertGreater(after_obj, before_obj)


[docs]    def test_log_likelihood_loop_design(self):
        # behavior: the log likelihood of the new class should match the one
        # of the more specialized class
        nclasses, nannotators, nitems = 4, 8, 100

        # create specialized model, draw data
        true_model = ModelBtLoopDesign.create_initial_state(nclasses)
        annotations = true_model.generate_annotations(nitems)
        expect = true_model.log_likelihood(annotations)

        model = ModelBt(nclasses, nannotators,
                        gamma=true_model.gamma, theta=true_model.theta)
        llhood = model.log_likelihood(annotations)

        np.testing.assert_almost_equal(llhood, expect, 10)


[docs]    def test_log_likelihood(self):
        # check that log likelihood is maximal at true parameters
        nclasses, nannotators, nitems = 3, 5, 1000
        # create random model and data (this is our ground truth model)
        true_model = ModelBt.create_initial_state(nclasses, nannotators)
        annotations = true_model.generate_annotations(nitems)

        max_llhood = true_model.log_likelihood(annotations)

        # perturb gamma
        for _ in xrange(20):
            theta = true_model.theta
            gamma = np.random.normal(loc=true_model.gamma, scale=0.1)
            gamma = np.clip(gamma, 0., 1.)
            gamma /= gamma.sum()
            model = ModelBt(nclasses, nannotators, gamma, theta)
            llhood = model.log_likelihood(annotations)
            self.assertGreater(max_llhood, llhood)

        # perturb theta
        for _ in xrange(20):
            gamma = true_model.gamma
            theta = np.random.normal(loc=true_model.theta, scale=0.1)
            theta = np.clip(theta, 0., 1.)
            model = ModelBt(nclasses, nannotators, gamma, theta)
            llhood = model.log_likelihood(annotations)
            self.assertGreater(max_llhood, llhood)


[docs]    def test_sampling_theta(self):
        nclasses, nannotators, nitems = 3, 5, 5000
        nsamples = 1000

        # create random model (this is our ground truth model)
        true_model = ModelBt.create_initial_state(nclasses, nannotators)
        # create random data
        annotations = true_model.generate_annotations(nitems)

        # create a new model
        model = ModelBt.create_initial_state(nclasses, nannotators)
        # get optimal parameters (to make sure we're at the optimum)
        model.map(annotations)

        # modify parameters, to give false start to sampler
        real_theta = model.theta.copy()
        model.theta = model._random_theta(model.nannotators)
        # save current parameters
        gamma_before, theta_before = model.gamma.copy(), model.theta.copy()
        samples = model.sample_posterior_over_accuracy(
            annotations,
            nsamples,
            burn_in_samples=100,
            thin_samples=2
        )
        # test: the mean of the sampled parameters is the same as the MLE one
        # (up to 3 standard deviations of the estimate sample distribution)
        testing.assert_array_less(np.absolute(samples.mean(0)-real_theta),
                                  3.*samples.std(0))

        # check that original parameters are intact
        testing.assert_equal(model.gamma, gamma_before)
        testing.assert_equal(model.theta, theta_before)


[docs]    def test_inference(self):
        # perfect annotation, check that inferred label is correct
        nclasses, nannotators, nitems = 3, 5, 50*8

        # create random model (this is our ground truth model)
        gamma = np.ones((nclasses,)) / float(nclasses)
        theta = np.ones((8,)) * 0.999
        true_model = ModelBt(nclasses, nannotators, gamma, theta)
        # create random data
        labels = true_model.generate_labels(nitems)
        annotations = true_model.generate_annotations_from_labels(labels)

        posterior = true_model.infer_labels(annotations)
        testing.assert_allclose(posterior.sum(1), 1., atol=1e-6, rtol=0.)
        inferred = posterior.argmax(1)

        testing.assert_equal(inferred, labels)
        self.assertTrue(np.all(posterior[np.arange(nitems),inferred] > 0.999))

        # at chance annotation, disagreeing annotators: get back prior
        gamma = ModelBt._random_gamma(nclasses)
        theta = np.ones((nannotators,)) / float(nclasses)
        model = ModelBt(nclasses, nannotators, gamma, theta)

        data = np.array([[MV, 0, 1, 2, MV]])
        testing.assert_almost_equal(np.squeeze(model.infer_labels(data)),
                                    model.gamma, 6)


[docs]    def test_generate_annotations(self):
        # test to check that annotations are masked correctly when the number
        # of items is not divisible by the number of annotators
        nclasses, nannotators, nitems = 5, 7, 201

        model = ModelBt.create_initial_state(nclasses, nannotators)
        annotations = model.generate_annotations(nitems)

        valid = is_valid(annotations)
        self.assertEqual(annotations.shape, (nitems, nannotators))
        model.are_annotations_compatible(annotations)

        # perfect annotators, annotations correspond to prior
        nitems = 20000
        model.theta[:] = 1.
        annotations = model.generate_annotations(nitems)
        freq = labels_frequency(annotations, nclasses)
        np.testing.assert_almost_equal(freq, model.gamma, 2)


[docs]    def test_annotations_compatibility(self):
        nclasses = 3
        nannotators = 5
        model = ModelBt.create_initial_state(nclasses, nannotators)

        # test method that checks annotations compatibility
        anno = np.array([[0, 1, MV, MV, MV]])
        self.assertTrue(model.are_annotations_compatible(anno))

        anno = np.array([[0, 0, 0, 0]])
        self.assertFalse(model.are_annotations_compatible(anno))

        anno = np.array([[4, 0, 0, 0, 0]])
        self.assertFalse(model.are_annotations_compatible(anno))

        anno = np.array([[-2, MV, MV, MV, MV]])
        self.assertFalse(model.are_annotations_compatible(anno))


[docs]    def test_raise_error_on_incompatible_annotation(self):
        nclasses, nannotators = 3, 7
        model = ModelBt.create_initial_state(nclasses, nannotators)
        anno = np.array([[MV, MV, 0, 0, 7, MV, MV]])

        with self.assertRaises(PyannoValueError):
            model.mle(anno)

        with self.assertRaises(PyannoValueError):
            model.map(anno)

        with self.assertRaises(PyannoValueError):
            model.sample_posterior_over_accuracy(anno, 10)

        with self.assertRaises(PyannoValueError):
            model.infer_labels(anno)

        with self.assertRaises(PyannoValueError):
            model.log_likelihood(anno)


[docs]    def test_missing_annotations(self):
        # test simple model, check that we get to global optimum

        nclasses, nannotators, nitems = 2, 3, 10000
        # create random model and data (this is our ground truth model)
        true_model = ModelBt.create_initial_state(nclasses, nannotators)
        annotations = true_model.generate_annotations(nitems)
        # remove about 10% of the annotations
        for _ in range(nitems*nannotators//10):
            i = np.random.randint(nitems)
            j = np.random.randint(nannotators)
            annotations[i,j] = MV

        # create a new, empty model and infer back the parameters
        model = ModelBt.create_initial_state(nclasses, nannotators)
        before_llhood = (model.log_likelihood(annotations)
                         + model._log_prior(model.theta))
        model.map(annotations)
        after_llhood = (model.log_likelihood(annotations)
                        + model._log_prior(model.theta))

        testing.assert_allclose(model.gamma, true_model.gamma,
                                atol=1e-1, rtol=0.)
        testing.assert_allclose(model.theta, true_model.theta,
                                atol=1e-1, rtol=0.)
        self.assertGreater(after_llhood, before_llhood)


if __name__ == '__main__':
    unittest.main()
Navigation

Source code for pyanno.test.test_modelBt

Table Of Contents

Quick search

Navigation