Commit 83d4424e authored by Manfred Moosleitner's avatar Manfred Moosleitner
Browse files

Merge branch '49-nan-in-cv_results-leads-to-exception' into 'master'

Resolve "NaN in cv_results leads to exception"

Closes #49

See merge request dbis/software/dbispipeline!33
parents 1f8ef857 c83d8411
Pipeline #38595 passed with stage
in 2 minutes and 54 seconds
......@@ -378,7 +378,9 @@ def get_job_id():
return os.getpid()
def check_serializability(content):
def check_serializability(content, nan_replacement=0.0,
pos_inf_replacement=0.0,
neg_inf_repacement=0.0):
"""
Returns a copy of a dict object that can be json-serialized.
......@@ -389,20 +391,38 @@ def check_serializability(content):
Args:
content: anything
The object to be checked
nan_replacement (float):
Value to replace np.nan for serialization
Returns:
A copy of the results which can be stored in a JSON column in the db,
or as a pickle for the pickle storage handler
"""
try:
_ = json.dumps(content)
_ = json.dumps(content, allow_nan=False)
return content
except TypeError:
except (ValueError, TypeError):
if type(content) == list or type(content) == tuple:
cleaned = [check_serializability(x) for x in content]
cleaned = [check_serializability(
x,
nan_replacement=nan_replacement,
pos_inf_replacement=pos_inf_replacement,
neg_inf_repacement=neg_inf_repacement)
for x in content]
elif type(content) == dict:
cleaned = {k: check_serializability(v) for k, v in content.items()}
cleaned = {k: check_serializability(
v,
nan_replacement=nan_replacement,
pos_inf_replacement=pos_inf_replacement,
neg_inf_repacement=neg_inf_repacement)
for k, v in content.items()}
elif callable(content):
cleaned = inspect.getsource(content).strip()
elif type(content) == float and np.isnan(content):
cleaned = nan_replacement
elif type(content) == float and np.isposinf(content):
cleaned = pos_inf_replacement
elif type(content) == float and np.isneginf(content):
cleaned = neg_inf_repacement
else:
cleaned = str(content)
cleaned = content
return cleaned
"""Tests for preprocessing results before storing into db."""
import numpy as np
from dbispipeline.utils import check_serializability
def test_serialize_valid_json():
"""Valid json should be returned as-is."""
clean_json = {
'some_field': [0, 1, 2],
'some_other_field': [1, 2, 3],
'some nested stuff': {
'more fields!': [0, 1, 2],
'foo': 'bar',
'a list of nested stuff': [
{'a': 1, 'b': []},
],
},
}
actual = check_serializability(clean_json)
assert clean_json == actual
def test_invalid_json_with_nans_default_replacement():
"""Numpy NaN should be replaced with default value 0.0."""
dirty_json = {
'some_field': [0, 1, 2],
'some_other_field': [1, np.nan, 3],
'some nested stuff': {
'more fields!': [0, 0.0, 3],
'foo': 'bar',
'a list of nested stuff': [
{'a': np.nan, 'b': []},
],
},
}
expected = {
'some_field': [0, 1, 2],
'some_other_field': [1, 0.0, 3],
'some nested stuff': {
'more fields!': [0, 0.0, 3],
'foo': 'bar',
'a list of nested stuff': [
{'a': 0.0, 'b': []},
],
},
}
actual = check_serializability(dirty_json)
assert actual == expected
def test_invalid_json_with_nans_custom_replacement():
"""Numpy NaN should be replaced with custom value."""
dirty_json = {
'some_field': [0, 1, 2],
'some_other_field': [1, np.nan, 3],
'some nested stuff': {
'more fields!': [0, 0.0, 3],
'foo': 'bar',
'a list of nested stuff': [
{'a': np.nan, 'b': []},
],
},
}
expected = {
'some_field': [0, 1, 2],
'some_other_field': [1, 123.0, 3],
'some nested stuff': {
'more fields!': [0, 0.0, 3],
'foo': 'bar',
'a list of nested stuff': [
{'a': 123.0, 'b': []},
],
},
}
actual = check_serializability(dirty_json, nan_replacement=123.0)
assert actual == expected
def test_invalid_json_with_lambdas():
"""Lambdas should be replaced by their string definition."""
dirty_json = {
'some_field': [0, 1, 2],
'some_other_field': [1, 2, np.nan],
'some nested stuff': {
'more fields!': [0, 0.0, 3],
'foo':
lambda x: 'bar',
'a list of nested stuff': [
{'a': 1, 'b': []},
],
},
}
expected = {
'some_field': [0, 1, 2],
'some_other_field': [1, 2, 123.0],
'some nested stuff': {
'more fields!': [0, 0.0, 3],
'foo': 'lambda x: \'bar\',',
'a list of nested stuff': [
{'a': 1, 'b': []},
],
},
}
actual = check_serializability(dirty_json, nan_replacement=123.0)
assert actual == expected
def test_invalid_json_with_infinities_default_values():
"""Infinities should be replaced with default values."""
dirty_json = {
'some_field': [0, 1, 2],
'some_other_field': [1, 2, np.inf],
'some nested stuff': {
'more fields!': [0, 0.0, 3],
'foo': 'bar',
'a list of nested stuff': [
{'a': -np.inf, 'b': []},
],
},
}
expected = {
'some_field': [0, 1, 2],
'some_other_field': [1, 2, 0.0],
'some nested stuff': {
'more fields!': [0, 0.0, 3],
'foo': 'bar',
'a list of nested stuff': [
{'a': 0.0, 'b': []},
],
},
}
actual = check_serializability(dirty_json)
assert actual == expected
def test_invalid_json_with_infinities_custom_values():
"""Infinities should be replaced with custom values."""
dirty_json = {
'some_field': [0, 1, 2],
'some_other_field': [1, 2, np.inf],
'some nested stuff': {
'more fields!': [0, 0.0, 3],
'foo': 'bar',
'a list of nested stuff': [
{'a': -np.inf, 'b': []},
],
},
}
expected = {
'some_field': [0, 1, 2],
'some_other_field': [1, 2, 'INFINITY'],
'some nested stuff': {
'more fields!': [0, 0.0, 3],
'foo': 'bar',
'a list of nested stuff': [
{'a': 'NEG INFINITY', 'b': []},
],
},
}
actual = check_serializability(dirty_json, pos_inf_replacement='INFINITY',
neg_inf_repacement='NEG INFINITY')
assert actual == expected
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment