Commit 69c0a945 authored by Michael Vötter's avatar Michael Vötter Committed by Benjamin Murauer
Browse files

Resolve "speedup random classifier tests"

parent d0dc2b6d
"""Test cases for random classifier."""
import unittest
import numpy as np
from dbispipeline.models import RandomClassifier
class TestRandomClassifierUniformDistribution(unittest.TestCase):
"""Testcases for random classifier when using uniform distribution."""
def test_only_given_classes(self):
"""Testing classes of the training data set are used for prediction."""
expected = ['a', 'b', 'c', 'd']
# setup test lists
x = [a for a in range(100)]
y = ['a' for a in range(25)] + \
['b' for b in range(25)] + \
['c' for c in range(25)] + \
['d' for d in range(25)]
rc = RandomClassifier(uniform=True)
rc.fit(x, y)
results = sorted(rc.classes)
self.assertEqual(expected, results)
def test_uniform_distribution(self):
"""Testing uniform distribution when flag is True in constructor."""
size_population = 1000000
expected = [True] * 4
x1 = [a for a in range(size_population)]
x2 = [a for a in range(size_population)]
y = ['a' for a in range(int(size_population * 0.15))] + \
['b' for b in range(int(size_population * 0.15))] + \
['c' for c in range(int(size_population * 0.30))] + \
['d' for d in range(int(size_population * 0.40))]
rc = RandomClassifier(uniform=True)
rc.fit(x1, y)
predictions = rc.predict(x2)
classes, counts = np.unique(predictions, return_counts=True)
probabilites = counts / len(x2)
target_value = 1 / len(probabilites)
results = [(x >= target_value - 0.01 and x <= target_value + 0.01)
for x in probabilites]
self.assertEqual(expected, results)
class TestRandomClassifierProbabilityDistribution(unittest.TestCase):
"""Testcases for random classifier using probability distribution."""
def test_only_given_classes(self):
"""Testing classes of the training data set are used for prediction."""
expected = ['a', 'b', 'c', 'd']
# setup test lists
x = [a for a in range(100)]
y = ['a' for a in range(25)] + \
['b' for b in range(25)] + \
['c' for c in range(25)] + \
['d' for d in range(25)]
rc = RandomClassifier(uniform=False)
rc.fit(x, y)
results = sorted(rc.classes)
self.assertEqual(expected, results)
def test_probability_distribution(self):
"""Testing probability distribution when flag is False in constr."""
size_population = 1000000
expected = [True] * 4
x1 = [a for a in range(size_population)]
x2 = [a for a in range(size_population)]
y = ['a' for a in range(int(size_population * 0.15))] + \
['b' for b in range(int(size_population * 0.15))] + \
['c' for c in range(int(size_population * 0.30))] + \
['d' for d in range(int(size_population * 0.40))]
rc = RandomClassifier(uniform=False)
rc.fit(x1, y)
predictions = rc.predict(x2)
classes, counts = np.unique(predictions, return_counts=True)
probabilites = counts / len(x2)
tar_val = [0.15, 0.15, 0.30, 0.40]
results = [(x >= tar_val[i] - 0.01 and x <= tar_val[i] + 0.01)
for i, x in enumerate(probabilites)]
self.assertEqual(expected, results)
"""Test cases for random classifier."""
import pytest
import dbispipeline.models
from dbispipeline.models import RandomClassifier
@pytest.mark.parametrize('uniform', [True, False])
def test_fit(uniform):
"""Testing fit method."""
expected = ['a', 'b', 'c', 'd']
# setup test lists
y = ['a', 'b', 'c', 'd'] * 25
x = list(range(len(y)))
rc = RandomClassifier(uniform=uniform)
rc.fit(x, y)
assert expected == sorted(rc.classes)
assert len(rc.probabilites) == len(expected)
for p in rc.probabilites:
assert p == 0.25
@pytest.mark.parametrize('uniform', [True, False])
def test_predict(monkeypatch, uniform):
"""Testing predict method."""
mock_call = {
'call_count': 0,
}
classes = ['a', 'b', 'c', 'd']
propabilities = [0.15, 0.15, 0.3, 0.4]
def _mock_np_rand_choice(a, size=None, replace=True, p=None):
"""Mocks np.random.choice by returning elements in order."""
idx = mock_call['call_count']
mock_call['call_count'] += 1
mock_call['a'] = a
mock_call['size'] = size
mock_call['replace'] = replace
mock_call['p'] = p
return a[idx % len(a)]
with monkeypatch.context() as m:
m.setattr(dbispipeline.models.np.random, 'choice',
_mock_np_rand_choice)
y = []
for c, p in zip(classes, propabilities):
y += [c] * int(p * 100)
x1 = list(range(len(y)))
rc = RandomClassifier(uniform=uniform)
rc.fit(x1, y)
expected = ['a', 'b', 'c', 'd'] * 2
x2 = list(range(len(expected)))
actual = rc.predict(x2)
assert expected == actual
assert mock_call['call_count'] == len(expected)
assert (mock_call['a'] == rc.classes).all()
assert mock_call['size'] is None
assert mock_call['replace'] is True
if uniform is True:
assert mock_call['p'] is None
else:
assert len(mock_call['p']) == 4
for p_actual, p_expected in zip(mock_call['p'], propabilities):
assert p_actual == p_expected
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment