Commit c6cdb238 authored by Benjamin Murauer's avatar Benjamin Murauer
Browse files

Merge branch '59-speedup-random-classifier-tests' into 'master'

Resolve "speedup random classifier tests"

Closes #59

See merge request dbis/software/dbispipeline!41
parents d0dc2b6d 69c0a945
Pipeline #42179 passed with stage
in 2 minutes and 29 seconds
"""Test cases for random classifier."""
import unittest
import numpy as np
from dbispipeline.models import RandomClassifier
class TestRandomClassifierUniformDistribution(unittest.TestCase):
"""Testcases for random classifier when using uniform distribution."""
def test_only_given_classes(self):
"""Testing classes of the training data set are used for prediction."""
expected = ['a', 'b', 'c', 'd']
# setup test lists
x = [a for a in range(100)]
y = ['a' for a in range(25)] + \
['b' for b in range(25)] + \
['c' for c in range(25)] + \
['d' for d in range(25)]
rc = RandomClassifier(uniform=True)
rc.fit(x, y)
results = sorted(rc.classes)
self.assertEqual(expected, results)
def test_uniform_distribution(self):
"""Testing uniform distribution when flag is True in constructor."""
size_population = 1000000
expected = [True] * 4
x1 = [a for a in range(size_population)]
x2 = [a for a in range(size_population)]
y = ['a' for a in range(int(size_population * 0.15))] + \
['b' for b in range(int(size_population * 0.15))] + \
['c' for c in range(int(size_population * 0.30))] + \
['d' for d in range(int(size_population * 0.40))]
rc = RandomClassifier(uniform=True)
rc.fit(x1, y)
predictions = rc.predict(x2)
classes, counts = np.unique(predictions, return_counts=True)
probabilites = counts / len(x2)
target_value = 1 / len(probabilites)
results = [(x >= target_value - 0.01 and x <= target_value + 0.01)
for x in probabilites]
self.assertEqual(expected, results)
class TestRandomClassifierProbabilityDistribution(unittest.TestCase):
"""Testcases for random classifier using probability distribution."""
def test_only_given_classes(self):
"""Testing classes of the training data set are used for prediction."""
expected = ['a', 'b', 'c', 'd']
# setup test lists
x = [a for a in range(100)]
y = ['a' for a in range(25)] + \
['b' for b in range(25)] + \
['c' for c in range(25)] + \
['d' for d in range(25)]
rc = RandomClassifier(uniform=False)
rc.fit(x, y)
results = sorted(rc.classes)
self.assertEqual(expected, results)
def test_probability_distribution(self):
"""Testing probability distribution when flag is False in constr."""
size_population = 1000000
expected = [True] * 4
x1 = [a for a in range(size_population)]
x2 = [a for a in range(size_population)]
y = ['a' for a in range(int(size_population * 0.15))] + \
['b' for b in range(int(size_population * 0.15))] + \
['c' for c in range(int(size_population * 0.30))] + \
['d' for d in range(int(size_population * 0.40))]
rc = RandomClassifier(uniform=False)
rc.fit(x1, y)
predictions = rc.predict(x2)
classes, counts = np.unique(predictions, return_counts=True)
probabilites = counts / len(x2)
tar_val = [0.15, 0.15, 0.30, 0.40]
results = [(x >= tar_val[i] - 0.01 and x <= tar_val[i] + 0.01)
for i, x in enumerate(probabilites)]
self.assertEqual(expected, results)
"""Test cases for random classifier."""
import pytest
import dbispipeline.models
from dbispipeline.models import RandomClassifier
@pytest.mark.parametrize('uniform', [True, False])
def test_fit(uniform):
"""Testing fit method."""
expected = ['a', 'b', 'c', 'd']
# setup test lists
y = ['a', 'b', 'c', 'd'] * 25
x = list(range(len(y)))
rc = RandomClassifier(uniform=uniform)
rc.fit(x, y)
assert expected == sorted(rc.classes)
assert len(rc.probabilites) == len(expected)
for p in rc.probabilites:
assert p == 0.25
@pytest.mark.parametrize('uniform', [True, False])
def test_predict(monkeypatch, uniform):
"""Testing predict method."""
mock_call = {
'call_count': 0,
}
classes = ['a', 'b', 'c', 'd']
propabilities = [0.15, 0.15, 0.3, 0.4]
def _mock_np_rand_choice(a, size=None, replace=True, p=None):
"""Mocks np.random.choice by returning elements in order."""
idx = mock_call['call_count']
mock_call['call_count'] += 1
mock_call['a'] = a
mock_call['size'] = size
mock_call['replace'] = replace
mock_call['p'] = p
return a[idx % len(a)]
with monkeypatch.context() as m:
m.setattr(dbispipeline.models.np.random, 'choice',
_mock_np_rand_choice)
y = []
for c, p in zip(classes, propabilities):
y += [c] * int(p * 100)
x1 = list(range(len(y)))
rc = RandomClassifier(uniform=uniform)
rc.fit(x1, y)
expected = ['a', 'b', 'c', 'd'] * 2
x2 = list(range(len(expected)))
actual = rc.predict(x2)
assert expected == actual
assert mock_call['call_count'] == len(expected)
assert (mock_call['a'] == rc.classes).all()
assert mock_call['size'] is None
assert mock_call['replace'] is True
if uniform is True:
assert mock_call['p'] is None
else:
assert len(mock_call['p']) == 4
for p_actual, p_expected in zip(mock_call['p'], propabilities):
assert p_actual == p_expected
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment