""" Simple Class to hold data from a Profiler"""
import numpy as np
import warnings
from abc import ABCMeta, abstractmethod
import pandas
from profiler.loading import binned
from profiler import io as p_io
from IPython import embed
[docs]
class ProfilerData:
"""
Abstract base class for Profilers
Attributes:
dataset (str): The name of the glider data
"""
__metaclass__ = ABCMeta
# Max dimension for z/p (just to keep track of the arrays)
Ndepth:int = None
# Standard variables
lat = None
lon = None
time = None
profile_id = None
# glider offset
#dist = None
#offset = None
distE = None # Eastward distance from the survey midpoint
distN = None # Northward distance from the survey midpoint
data_keys:list = []
# Meta
missid:int = None
platform:str = None
pi:str = None # Principal Invesitgator
pdict:dict = None # dict on the profiler
dataset = None # The name of the dataset
meta_keys:list = []
# I/O
datafile:str = None
# CTD -- Nprof, Ndepth
s = None
t = None
p = None
theta = None
depth = None
qual:dict = None
# ADCP
has_adcp:bool = False
[docs]
def __init__(self, datafile:str, dataset:str):
self.datafile = datafile
self.dataset = dataset
[docs]
@classmethod
def from_binned_file(cls, datafile:str, bin_style:str,
dataset:str, in_field:bool=False,
missid:int=None, extra_dict:dict=None):
# Init
pData = cls(datafile, dataset)
pData.datafile = datafile
pData.dataset = dataset
pData.in_field = in_field
if extra_dict is not None:
for key in extra_dict:
setattr(pData, key, extra_dict[key])
# Load
binned.load(pData, bin_style, in_missid=missid)
return pData
@abstractmethod
def raw_loader(self, datafile:str):
pass
[docs]
@classmethod
def from_rawfile(cls, datafile:str, dataset:str,
in_field:bool=False, mdict:dict=None,
**kwargs):
"""
Load a raw IDG file.
Parameters:
datafile (str): The path to the data file.
dataset (str): The name of the dataset.
in_field (bool): Whether the data is in-field or not.
Returns:
cData (CTDData): The loaded CTDData object.
"""
# meta dict
if mdict is None:
mdict = {}
mdict['datafile'] = datafile
mdict['dataset'] = dataset
# Generate dict
d, darrays = cls.raw_loader(datafile)
# Init
pData = cls.from_dict(d, darrays, mdict, dataset,
in_field=in_field)
return pData
[docs]
@classmethod
def from_dict(cls, d:dict, darrays:dict, mdict:dict,
dataset:str, in_field:bool=False):
"""
Create a ProfilerData object from a dictionary of data.
Args:
d (dict): A dictionary of data.
Required fields:
time (np.ndarray): An array of times.
lat (np.ndarray): An array of latitudes.
lon (np.ndarray): An array of longitudes.
dataset (str): The name of the dataset.
darrrays (dict): A dictionary of data arrays.
Required keys are:
profile_arrays (list): A list of profile arrays.
depth_arrays (list): A list of depth arrays.
profile_depth_arrays (list): A list of profile + depth arrays.
scalar_keys (list): A list of scalar keys.
mdict (dict): A dictionary of metadata.
in_field (bool): Whether the data is from the infield processing.
Returns:
ProfilerData: A ProfilerData object containing the data from the dictionary.
"""
# Init
pData = cls.__new__(cls)
pData.dataset = dataset
pData.in_field = in_field
# Meta dict
for key in mdict.keys():
setattr(pData, key, mdict[key])
pData.meta_keys = list(mdict.keys())
# Data arrays
for key in darrays.keys():
setattr(pData, key, darrays[key])
# Scalars
for key in pData.scalar_keys:
setattr(pData, key, d[key])
# Depth arrays
for key in pData.depth_arrays:
setattr(pData, key, d[key])
# Profile arrays
for key in pData.profile_arrays:
setattr(pData, key, d[key])
# Profile + depth
for key in pData.profile_depth_arrays:
setattr(pData, key, d[key])
# Return
return pData
@property
def raw_loader(self):
pass
@property
def ptime(self): # pandas time
return pandas.to_datetime(self.time, unit='s')
@property
def Nprof(self):
return len(self.time)
@property
def darrays(self):
darrays = {}
for key in ['profile_arrays', 'depth_arrays',
'profile_depth_arrays', 'scalar_keys']:
darrays[key] = getattr(self, key)
return darrays
@property
def meta_dict(self):
mdict = {}
for attr in self.meta_keys: #['missid', 'platform', 'pi', 'pdict', 'dataset', 'datafile']:
if hasattr(self, attr):
mdict[attr] = getattr(self, attr)
return mdict
[docs]
def cut_on_reltime(self, timecut:tuple):
"""
Cuts the profiler data based on good velocity values.
Variables:
timecut (tuple): range of times to include 0 to 1
Returns:
pData (ProfilerData): A subset of the original ProfilerData object containing only the profiles with good velocity values.
"""
# Relative time
min_time = self.time.min()
max_time = self.time.max()
reltime = (self.time - min_time) / (max_time - min_time)
# Cut on time
keep = (reltime > timecut[0]) & (reltime <= timecut[1])
# Cut
pData = self.profile_subset(np.where(keep)[0])
# Return
return pData
[docs]
def profile_subset(self, profiles: np.ndarray,
init:bool=True):
"""
Create a subset of the ProfilerData object based on the given profiles.
Args:
profiles (np.ndarray): An array of profile indices to
include in the subset. Or a boolean array
init (bool): Whether to initialize a new ProfilerData object.
Returns:
GliderData: A new ProfilerData object containing the subset of profiles.
"""
# Init
if init:
pData = self.__class__(self.datafile, self.dataset)
# Meta
for key in np.unique(self.meta_keys):
setattr(pData, key, getattr(self, key))
else:
pData = self
# Cut on profiles
for key in self.profile_arrays:
setattr(pData, key, getattr(self, key)[profiles])
for key in self.profile_depth_arrays:
#try:
setattr(pData, key, getattr(self, key)[profiles])
#setattr(pData, key, getattr(self, key)[:, profiles])
#except IndexError:
# embed(header=f"Error with {key} in profile_subset")
# Return
return pData
[docs]
def rstr_settings(self):
""" Return the representation of the CTDData object """
# Settings (adcp_on, in_field)
rstr_settings = []
return rstr_settings
[docs]
def rstr_variables(self):
# Variables
rstr_var = []
rstr_var += [" Variables:\n"]
for key in self.depth_arrays:
if getattr(self, key) is not None:
rstr_var += [f" {key}: {getattr(self, key).shape}\n"]
for key in self.profile_arrays:
if getattr(self, key) is not None:
rstr_var += [f" {key}: {getattr(self, key).shape}\n"]
for key in self.profile_depth_arrays:
if getattr(self, key) is not None:
rstr_var += [f" {key}: {getattr(self, key).shape}\n"]
#
return rstr_var
[docs]
def to_dict(self):
out_dict = {}
# Meta
for key in self.meta_keys:
out_dict[key] = getattr(self, key)
# Data
for key in self.data_keys:
out_dict[key] = getattr(self, key)
# Scalars
for key in self.scalar_keys:
out_dict[key] = getattr(self, key)
# Arrays
for karray in [self.depth_arrays,
self.profile_arrays,
self.profile_depth_arrays]:
for key in karray:
out_dict[key] = getattr(self, key)
# Key me
for keys in ['meta_keys', 'data_keys', 'scalar_keys']:
out_dict[keys] = getattr(self, keys)
return out_dict
[docs]
def write(self, outfile:str, gzip:bool=False):
# dict me
odict = self.to_dict()
# JSON
jdict = p_io.jsonify(odict)
# Write`
p_io.savejson(outfile, jdict, overwrite=True)
# gzip?
if gzip:
outfile += '.gz'
raise NotImplementedError("Ooops")
# done
print(f'Wrote: {outfile}')
# Combine
def __repr__(self):
# Grab em
r_m = self.rstr_meta()
r_s = self.rstr_settings()
r_v = self.rstr_variables()
# Combine
all_r = r_m + r_s + r_v
rstr = ''.join(all_r)
# Return
return rstr
[docs]
class ADCPData(ProfilerData):
"""
Class to hold CTD data
"""
__metaclass__ = ABCMeta
# ADCP
udop = None
vdop = None
udopacross = None
udopalong = None
has_adcp = True
adcp_on:bool = None
[docs]
def __init__(self, datafile:str, dataset:str, adcp_on:bool=True):
# Init
self.adcp_on = adcp_on
ProfilerData.__init__(self, datafile, dataset)
[docs]
def cut_on_good_velocity(self, init:bool=True):
"""
Cuts the glider data based on good velocity values.
Returns:
pData (ProfilerData): A subset of the original ProfilerData object containing
only the profiles with good velocity values.
"""
# Cut on velocity
good = np.isfinite(self.udop) & np.isfinite(self.vdop)
idx = np.where(good)
gd_profiles = np.unique(idx[0])
# Cut
gData = self.profile_subset(gd_profiles, init=init)
# Return
return gData