Source code for profiler.profilerdata

""" Simple Class to hold data from a Profiler"""

import numpy as np
import warnings

from abc import ABCMeta, abstractmethod

import pandas

from profiler.loading import binned
from profiler import io as p_io

from IPython import embed

[docs] class ProfilerData: """ Abstract base class for Profilers Attributes: dataset (str): The name of the glider data """ __metaclass__ = ABCMeta # Max dimension for z/p (just to keep track of the arrays) Ndepth:int = None # Standard variables lat = None lon = None time = None profile_id = None # glider offset #dist = None #offset = None distE = None # Eastward distance from the survey midpoint distN = None # Northward distance from the survey midpoint data_keys:list = [] # Meta missid:int = None platform:str = None pi:str = None # Principal Invesitgator pdict:dict = None # dict on the profiler dataset = None # The name of the dataset meta_keys:list = [] # I/O datafile:str = None # CTD -- Nprof, Ndepth s = None t = None p = None theta = None depth = None qual:dict = None # ADCP has_adcp:bool = False
[docs] def __init__(self, datafile:str, dataset:str): self.datafile = datafile self.dataset = dataset
[docs] @classmethod def from_binned_file(cls, datafile:str, bin_style:str, dataset:str, in_field:bool=False, missid:int=None, extra_dict:dict=None): # Init pData = cls(datafile, dataset) pData.datafile = datafile pData.dataset = dataset pData.in_field = in_field if extra_dict is not None: for key in extra_dict: setattr(pData, key, extra_dict[key]) # Load binned.load(pData, bin_style, in_missid=missid) return pData
@abstractmethod def raw_loader(self, datafile:str): pass
[docs] @classmethod def from_rawfile(cls, datafile:str, dataset:str, in_field:bool=False, mdict:dict=None, **kwargs): """ Load a raw IDG file. Parameters: datafile (str): The path to the data file. dataset (str): The name of the dataset. in_field (bool): Whether the data is in-field or not. Returns: cData (CTDData): The loaded CTDData object. """ # meta dict if mdict is None: mdict = {} mdict['datafile'] = datafile mdict['dataset'] = dataset # Generate dict d, darrays = cls.raw_loader(datafile) # Init pData = cls.from_dict(d, darrays, mdict, dataset, in_field=in_field) return pData
[docs] @classmethod def from_dict(cls, d:dict, darrays:dict, mdict:dict, dataset:str, in_field:bool=False): """ Create a ProfilerData object from a dictionary of data. Args: d (dict): A dictionary of data. Required fields: time (np.ndarray): An array of times. lat (np.ndarray): An array of latitudes. lon (np.ndarray): An array of longitudes. dataset (str): The name of the dataset. darrrays (dict): A dictionary of data arrays. Required keys are: profile_arrays (list): A list of profile arrays. depth_arrays (list): A list of depth arrays. profile_depth_arrays (list): A list of profile + depth arrays. scalar_keys (list): A list of scalar keys. mdict (dict): A dictionary of metadata. in_field (bool): Whether the data is from the infield processing. Returns: ProfilerData: A ProfilerData object containing the data from the dictionary. """ # Init pData = cls.__new__(cls) pData.dataset = dataset pData.in_field = in_field # Meta dict for key in mdict.keys(): setattr(pData, key, mdict[key]) pData.meta_keys = list(mdict.keys()) # Data arrays for key in darrays.keys(): setattr(pData, key, darrays[key]) # Scalars for key in pData.scalar_keys: setattr(pData, key, d[key]) # Depth arrays for key in pData.depth_arrays: setattr(pData, key, d[key]) # Profile arrays for key in pData.profile_arrays: setattr(pData, key, d[key]) # Profile + depth for key in pData.profile_depth_arrays: setattr(pData, key, d[key]) # Return return pData
@property def raw_loader(self): pass @property def ptime(self): # pandas time return pandas.to_datetime(self.time, unit='s') @property def Nprof(self): return len(self.time) @property def darrays(self): darrays = {} for key in ['profile_arrays', 'depth_arrays', 'profile_depth_arrays', 'scalar_keys']: darrays[key] = getattr(self, key) return darrays @property def meta_dict(self): mdict = {} for attr in self.meta_keys: #['missid', 'platform', 'pi', 'pdict', 'dataset', 'datafile']: if hasattr(self, attr): mdict[attr] = getattr(self, attr) return mdict
[docs] def cut_on_reltime(self, timecut:tuple): """ Cuts the profiler data based on good velocity values. Variables: timecut (tuple): range of times to include 0 to 1 Returns: pData (ProfilerData): A subset of the original ProfilerData object containing only the profiles with good velocity values. """ # Relative time min_time = self.time.min() max_time = self.time.max() reltime = (self.time - min_time) / (max_time - min_time) # Cut on time keep = (reltime > timecut[0]) & (reltime <= timecut[1]) # Cut pData = self.profile_subset(np.where(keep)[0]) # Return return pData
[docs] def profile_subset(self, profiles: np.ndarray, init:bool=True): """ Create a subset of the ProfilerData object based on the given profiles. Args: profiles (np.ndarray): An array of profile indices to include in the subset. Or a boolean array init (bool): Whether to initialize a new ProfilerData object. Returns: GliderData: A new ProfilerData object containing the subset of profiles. """ # Init if init: pData = self.__class__(self.datafile, self.dataset) # Meta for key in np.unique(self.meta_keys): setattr(pData, key, getattr(self, key)) else: pData = self # Cut on profiles for key in self.profile_arrays: setattr(pData, key, getattr(self, key)[profiles]) for key in self.profile_depth_arrays: #try: setattr(pData, key, getattr(self, key)[profiles]) #setattr(pData, key, getattr(self, key)[:, profiles]) #except IndexError: # embed(header=f"Error with {key} in profile_subset") # Return return pData
[docs] def rstr_meta(self): """ Return the representation of the CTDData object """ rstr_meta = [] rstr_meta += [f"{self.__class__.__name__} object for {self.dataset}\n"] rstr_meta += [f" Mission ID: {self.missid}\n"] rstr_meta += [f" Number of profiles: {self.Nprof}\n"] rstr_meta += [f" Time range: {self.ptime.min()} to {self.ptime.max()}\n"] return rstr_meta
[docs] def rstr_settings(self): """ Return the representation of the CTDData object """ # Settings (adcp_on, in_field) rstr_settings = [] return rstr_settings
[docs] def rstr_variables(self): # Variables rstr_var = [] rstr_var += [" Variables:\n"] for key in self.depth_arrays: if getattr(self, key) is not None: rstr_var += [f" {key}: {getattr(self, key).shape}\n"] for key in self.profile_arrays: if getattr(self, key) is not None: rstr_var += [f" {key}: {getattr(self, key).shape}\n"] for key in self.profile_depth_arrays: if getattr(self, key) is not None: rstr_var += [f" {key}: {getattr(self, key).shape}\n"] # return rstr_var
[docs] def to_dict(self): out_dict = {} # Meta for key in self.meta_keys: out_dict[key] = getattr(self, key) # Data for key in self.data_keys: out_dict[key] = getattr(self, key) # Scalars for key in self.scalar_keys: out_dict[key] = getattr(self, key) # Arrays for karray in [self.depth_arrays, self.profile_arrays, self.profile_depth_arrays]: for key in karray: out_dict[key] = getattr(self, key) # Key me for keys in ['meta_keys', 'data_keys', 'scalar_keys']: out_dict[keys] = getattr(self, keys) return out_dict
[docs] def write(self, outfile:str, gzip:bool=False): # dict me odict = self.to_dict() # JSON jdict = p_io.jsonify(odict) # Write` p_io.savejson(outfile, jdict, overwrite=True) # gzip? if gzip: outfile += '.gz' raise NotImplementedError("Ooops") # done print(f'Wrote: {outfile}')
# Combine def __repr__(self): # Grab em r_m = self.rstr_meta() r_s = self.rstr_settings() r_v = self.rstr_variables() # Combine all_r = r_m + r_s + r_v rstr = ''.join(all_r) # Return return rstr
[docs] class ADCPData(ProfilerData): """ Class to hold CTD data """ __metaclass__ = ABCMeta # ADCP udop = None vdop = None udopacross = None udopalong = None has_adcp = True adcp_on:bool = None
[docs] def __init__(self, datafile:str, dataset:str, adcp_on:bool=True): # Init self.adcp_on = adcp_on ProfilerData.__init__(self, datafile, dataset)
[docs] def cut_on_good_velocity(self, init:bool=True): """ Cuts the glider data based on good velocity values. Returns: pData (ProfilerData): A subset of the original ProfilerData object containing only the profiles with good velocity values. """ # Cut on velocity good = np.isfinite(self.udop) & np.isfinite(self.vdop) idx = np.where(good) gd_profiles = np.unique(idx[0]) # Cut gData = self.profile_subset(gd_profiles, init=init) # Return return gData