Commit c4063794 authored by Christian Schneider's avatar Christian Schneider
Browse files

Changed saving to HDF5 file format

parent 4e109789
......@@ -8,6 +8,9 @@ import copy
import numpy as np
from .version import __version__
# For saving to .h5 and .nc
import pandas as pd
class data_module_base(object):
"""Base class of DataModule
......@@ -85,14 +88,32 @@ class data_module_base(object):
else:
file_name = path[1]
# Define datatype
if self.dtype == 'data_complex':
dtype = 'dm_complex'
ending = '.dm.h5'
elif self.dtype == 'data_table':
dtype = 'dm_table'
ending = '.dm.h5'
elif self.dtype == 'data_grid':
dtype = 'dm_grid'
ending = '.dm.nc'
else:
raise Exception('Error. Datatype not supported')
# Check for file extension
if path[1][-3:].lower() != '.dm':
file_name += '.dm'
if path[1][-3:].lower() not in ['.h5', '.nc']:
file_name += ending
# Append Folder and be adaptive to windows, etc.
file_name = os.path.normpath(os.path.join(path[0], file_name))
filename = file_name
# Whitelist of allowed parameters to be stored
white_list = (str, dict, bool, float, int, np.float64, np.int64,
np.ndarray)
# Check for Overwrite
# Check for overwrite. If file exists move to duplicates
if not force:
if os.path.isfile(file_name):
from shutil import copyfile
......@@ -102,8 +123,9 @@ class data_module_base(object):
if not os.path.exists(os.path.join(fpath, 'duplicates')):
os.makedirs(os.path.join(fpath, 'duplicates'))
fpath = os.path.join(fpath, 'duplicates')
fn, e = os.path.splitext(fn)
file_name2 = os.path.join(fpath, fn + '%s.dm')
# Split twice since .dm.h5 (two dots)
fn, e = os.path.splitext(os.path.splitext(fn)[0])
file_name2 = os.path.join(fpath, fn + '%s' + file_name[-6:])
number = ''
while os.path.isfile(file_name2 % number):
number = int(number or "0") + 1
......@@ -114,8 +136,41 @@ class data_module_base(object):
'moved into the subfolder duplicates.\n',
flush=True, end=" ")
with open(file_name, "wb") as f:
pickle.dump(self, f, -1)
# Save as hdf5 file
if dtype in ['dm_complex', 'dm_table']:
# Save with pandas HDF writer
with pd.HDFStore(filename, 'w') as f:
# Save dataframe
f.put(dtype, self.df, format='table') # , data_columns=True)
# Save datamodule metadata
attrs = f.get_storer(dtype).attrs
metadata_dict = {}
for i, k in self.__dict__.items():
if isinstance(k, np.ndarray):
# Special case, since numpy arrays are not that well
# serializable
metadata_dict[i] = list(k)
elif isinstance(k, white_list):
metadata_dict[i] = k
attrs['dm_metadata'] = repr(metadata_dict)
# attrs['dm_metadata'] = repr({i: k
# for i,k in self.__dict__.items()
# if isinstance(k, white_list)})
elif dtype in ['dm_grid']:
# Save with xarray netCDF writer
# Save datamodule metadata
attrs = {}
for i, k in self.__dict__.items():
if isinstance(k, white_list):
attrs[i] = k
self.df.attrs['dm_metadata'] = repr(attrs)
self.df.to_netcdf(filename)
return None
def copy(self):
"""Copy datamodule.
......
......@@ -39,6 +39,7 @@ class data_complex(data_table):
self.circuit = None
self.fitresults = None
self.fitresults_full_model = None
self.dtype = 'data_complex'
def _repr_html_(self):
"""Show pandas dataframe as default representation"""
......
......@@ -66,6 +66,7 @@ class data_grid(data_module_base):
self.name_x = df_names[0]
self.name_y = df_names[1]
self.name_v = df_names[-1]
self.dtype = 'data_grid'
# Helpful data functions ###################################################
def return_coord(self, coord_name):
......
......@@ -81,6 +81,7 @@ class data_table(data_module_base):
# Add new fit
self.mfit = fit_plugin(self)
self.dtype = 'data_table'
def _repr_html_(self):
"""Show pandas dataframe as default representation"""
......
......@@ -11,6 +11,7 @@ import os
import pickle
import numpy as np
import pandas as pd
import xarray as xr
from .downwards_compatibility.data_line import data_line,data_2d
from .downwards_compatibility.data_surface import data_surface,data_3d
......@@ -49,84 +50,121 @@ def load_datamodule(filename, upgrade=True):
DataModule
Returns a datamodule
"""
try:
with open(filename, 'rb') as f:
a = pickle.load(f)
if upgrade is True:
return upgrade_dm(a)
else:
return a
## This sections is for downwards compatibility ############################
except (AttributeError, NameError, ImportError) as e:
# This is just for compatibility with old datamodule files
import sys
import DataModule.downwards_compatibility.data_line as data_l
import DataModule.downwards_compatibility.data_surface as data_s
import DataModule.downwards_compatibility.data_complex as data_c
import DataModule.data_table as data_t
sys.modules['DataModule.data_line'] = data_l
sys.modules['DataModule.data_surface'] = data_s
sys.modules['DataModule'].data_3d = data_s.data_3d
tmp = sys.modules['DataModule.data_complex']
sys.modules['DataModule.data_complex'] = data_c
sys.modules['DataModule'].data_cplx = data_c.data_cplx
sys.modules['DataModule.data_xy'] = data_t
sys.modules['DataModule.data_xy'].data_xy = data_table
with open(filename, 'rb') as f:
a = pickle.load(f)
# Old data complex class
if isinstance(a, data_c.data_complex):
if filename[-3:] == '.dm':
# Old datamodule storage (pickled)
try:
with open(filename, 'rb') as f:
a = pickle.load(f)
if upgrade is True:
return upgrade_dm(a)
else:
return a
## This sections is for downwards compatibility ######################
# Used for very old datamodules. Recommened to update this with a
# to .h5 script
except (AttributeError, NameError, ImportError) as e:
# This is just for compatibility with old datamodule files
import sys
import DataModule.downwards_compatibility.data_line as data_l
import DataModule.downwards_compatibility.data_surface as data_s
import DataModule.downwards_compatibility.data_complex as data_c
import DataModule.data_table as data_t
sys.modules['DataModule.data_line'] = data_l
sys.modules['DataModule.data_surface'] = data_s
sys.modules['DataModule'].data_3d = data_s.data_3d
tmp = sys.modules['DataModule.data_complex']
sys.modules['DataModule.data_complex'] = data_c
sys.modules['DataModule'].data_cplx = data_c.data_cplx
sys.modules['DataModule.data_xy'] = data_t
sys.modules['DataModule.data_xy'].data_xy = data_table
with open(filename, 'rb') as f:
a = pickle.load(f)
# Old data complex class
if isinstance(a, data_c.data_complex):
f = a.x
v = a.value
b = data_complex(f, v)
try:
b.idx_min = a.idx_min
b.idx_max = a.idx_max
except AttributeError:
b.idx_min = 0
b.idx_max = None
elif isinstance(a, (data_line, data_surface)):
b = upgrade_dm(a)
elif isinstance(a, (data_table)):
a.name_x = a.df.keys()[0]
a.name_y = a.df.keys()[1]
b = upgrade_dm(a)
# Restore new classes
sys.modules['DataModule.data_complex'] = tmp
del sys.modules['DataModule.data_line']
del sys.modules['DataModule.data_surface']
del sys.modules['DataModule.data_xy']
# Save new datamodules
print('\rUpgrade to datamodule V3. Saved new datamodule',
end=' ', flush=True)
b.par = a.par
b.save(filename, useDate=False)
return b
except NameError:
import sys
with open(filename, 'rb') as f:
a = pickle.load(f)
f = a.x
v = a.value
b = data_complex(f, v)
sys.modules['DataModule.data_complex'] = tmp
data_new = data_complex(f, v)
try:
b.idx_min = a.idx_min
b.idx_max = a.idx_max
data_new.idx_min = a.idx_min
data_new.idx_max = a.idx_max
except AttributeError:
b.idx_min = 0
b.idx_max = None
elif isinstance(a, (data_line, data_surface)):
b = upgrade_dm(a)
elif isinstance(a, (data_table)):
a.name_x = a.df.keys()[0]
a.name_y = a.df.keys()[1]
b = upgrade_dm(a)
# Restore new classes
sys.modules['DataModule.data_complex'] = tmp
del sys.modules['DataModule.data_line']
del sys.modules['DataModule.data_surface']
del sys.modules['DataModule.data_xy']
# Save new datamodules
print('\rUpgrade to datamodule V3. Saved new datamodule',
end=' ', flush=True)
b.par = a.par
b.save(filename, useDate=False)
return b
except NameError:
import sys
with open(filename, 'rb') as f:
a = pickle.load(f)
f = a.x
v = a.value
sys.modules['DataModule.data_complex'] = tmp
data_new = data_complex(f, v)
try:
data_new.idx_min = a.idx_min
data_new.idx_max = a.idx_max
except AttributeError:
data_new.idx_min = 0
data_new.idx_max = None
data_new.save(filename, useDate=False)
return data_new
data_new.idx_min = 0
data_new.idx_max = None
data_new.save(filename, useDate=False)
return data_new
elif filename[-5:] == 'dm.h5':
# Load with pandas HDF reader
with pd.HDFStore(filename, 'a') as f:
# Determine first if complex data or data_table
if '/dm_complex' in f.keys():
tmp = data_complex()
dtype = 'dm_complex'
elif '/dm_table' in f.keys():
tmp = data_table()
dtype = 'dm_table'
# Get metadata
attrs = eval(f.get_storer(dtype).attrs['dm_metadata'])
# Load dataframe
tmp.df = f.get(dtype)
# Set attributes of dm object
for key, value in attrs.items():
setattr(tmp, key, value)
elif filename[-5:] == 'dm.nc':
# Load with xarray netCDF reader
with xr.open_dataset(filename) as ds:
# Create empty data_grid
tmp = data_grid([[0], [0], [[0]]])
# Save xarray DataArray
tmp.df = ds[list(ds.keys())[0]]
# Set attributes of dm object
for key, value in eval(tmp.df.attrs['dm_metadata']).items():
setattr(tmp, key, value)
# Return datamodule
return tmp
def load_csv(fname, **kwargs):
"""Load
......@@ -693,7 +731,7 @@ def average_data(*args):
def cleandat(x, y):
"""AUtoclean data.
"""Autoclean data.
Take a two row data matrix (x,y) and do the following:
1. Sort data in ascending x order
......
__version__ = '3.2.0'
__version__ = '3.3.0'
"""
Last Updates:
v3.3.0 - CHR.
- Updated to saving to .h5 and .nc files (no pickle any longer)
- implemented mfit module
v3.1.0 - OSC:
- fixed the average_data function, at the moment it works only for data_table
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment