import logging
from pathlib import Path
from traceback import format_exc
from typing import Literal
from pydantic import (
BaseModel,
ConfigDict,
field_validator,
model_validator,
)
from pyaerocom import const
from pyaerocom._lowlevel_helpers import LayerLimits
from pyaerocom.exceptions import InitialisationError
logger = logging.getLogger(__name__)
SUPPORTED_VERT_CODES: tuple[
str,
str,
str,
] = (
"Column",
"Profile",
"Surface",
)
ALT_NAMES_VERT_CODES: dict = dict(ModelLevel="Profile")
SUPPORTED_VERT_LOCS: tuple[str, str, str] = (
"ground",
"space",
"airborne",
)
[docs]
class ObsEntry(BaseModel):
"""Observation configuration for evaluation (BaseModel)
Note
----
Only :attr:`obs_id` and `obs_vars` are mandatory, the rest are optional.
Attributes
----------
obs_id : str
ID of observation network in AeroCom database
(e.g. 'AeronetSunV3Lev2.daily')
Note that this can also be a custom supplied obs_id if and only if bs_aux_requires is provided
obs_vars : tuple[str, ...]
tuple of pyaerocom variable names that are supposed to be analysed
(e.g. ('od550aer', 'ang4487aer'))
obs_ts_type_read : :obj:`str` or :obj:`dict`, optional
may be specified to explicitly define the reading frequency of the
observation data (so far, this does only apply to gridded obsdata such
as satellites). For ungridded reading, the frequency may be specified
via :attr:`obs_id`, where applicable (e.g. AeronetSunV3Lev2.daily).
Can be specified variable specific in form of dictionary.
obs_vert_type : str, optional
Aerocom vertical code encoded in the model filenames (only AeroCom 3
and later).
obs_aux_requires : dict, optional
information about required datasets / variables for auxiliary
variables.
instr_vert_loc : str, optional
vertical location code of observation instrument. This is used in
the aeroval interface for separating different categories of measurements
such as "ground", "space" or "airborne".
is_superobs : bool
if True, this observation is a combination of several others which all
have to have their own obs config entry.
only_superobs : bool
this indicates whether this configuration is only to be used as part
of a superobs network, and not individually.
read_opts_ungridded : :obj:`dict`, optional
dictionary that specifies reading constraints for ungridded reading
(c.g. :class:`pyaerocom.io.ReadUngridded`).
only_json : bool
Only to be set if the obs entry already has colocated data files which were
preprocessed outside of pyaerocom. Setting to True will skip the colcoation
and just create the JSON output.
coldata_dir : str
Only to be set if the obs entry already has colocated data files which were
preprocessed outside of pyaerocom. This is the directory in which the
colocated data files are located.
"""
## Pydantic ConfigDict
model_config = ConfigDict(
arbitrary_types_allowed=True,
extra="allow",
validate_assignment=True,
)
######################
## Required attributes
######################
obs_vars: str | tuple[str, ...]
obs_id: str | tuple[str, ...]
######################
## Optional attributes
######################
obs_name: str | None = None # not expected to be set directly, rather set by ObsCollection
obs_ts_type_read: str | dict | None = None
obs_vert_type: Literal["Column", "Profile", "Surface", "ModelLevel"] = "Surface"
obs_aux_requires: dict[str, dict] = {}
instr_vert_loc: str | None = None
is_superobs: bool = False
only_superobs: bool = False
colocation_layer_limts: tuple[LayerLimits, ...] | None = None
profile_layer_limits: tuple[LayerLimits, ...] | None = None
web_interface_name: str | None = None
diurnal_only: bool = False
obs_type: str | None = None
read_opts_ungridded: dict = {}
# attributes for reading colocated data files made outside of pyaerocom
only_json: bool = False
coldata_dir: str | Path | None = (
None # TODO: Would like this to be a Path but need to see if it will cause issues down the line
)
#############
## Validators
#############
@field_validator("obs_vars")
@classmethod
def validate_obs_vars(cls, v):
if isinstance(v, str):
return (v,)
return v
@field_validator("instr_vert_loc")
@classmethod
def validate_instr_vert_loc(cls, v):
if isinstance(v, str) and v not in SUPPORTED_VERT_LOCS:
raise AttributeError(
f"Invalid value for instr_vert_loc: {v} for {cls.obs_id}. "
f"Please choose from: {SUPPORTED_VERT_LOCS}"
)
[docs]
@field_validator("obs_vert_type")
@classmethod
def check_obs_vert_type(cls, ovt):
"""Check if obs_vert_type string is valid alias
Parameters
----------
ovt : str
obs_vert_type string
Returns
-------
str
valid obs_vert_type
Raises
------
ValueError
if `ovt` is invalid
"""
if ovt in SUPPORTED_VERT_CODES:
return ovt
if ovt in ALT_NAMES_VERT_CODES:
logger.warning(
f"Please use {ALT_NAMES_VERT_CODES[ovt]} for obs_vert_code and not {ovt}"
)
ovt = ALT_NAMES_VERT_CODES[ovt]
return ovt
valid = SUPPORTED_VERT_CODES + list(ALT_NAMES_VERT_CODES)
raise ValueError(
f"Invalid value for obs_vert_type: {ovt}. " f"Supported codes are {valid}."
)
@model_validator(mode="after")
def check_cfg(self):
if not self.is_superobs and not isinstance(self.obs_id, str | tuple | dict):
raise ValueError(
f"Invalid value for obs_id: {self.obs_id}. Need str, tuple, or dict "
f"or specification of ids and variables via obs_compute_post"
)
self.check_add_obs()
return self
##########
## Methods
##########
[docs]
def check_add_obs(self):
"""Check if this dataset is an auxiliary post dataset"""
if len(self.obs_aux_requires) > 0:
if not self.obs_type == "ungridded":
raise NotImplementedError(
f"Cannot initialise auxiliary setup for {self.obs_id}. "
f"Aux obs reading is so far only possible for ungridded observations."
)
if self.obs_id not in const.OBS_IDS_UNGRIDDED:
try:
const.add_ungridded_post_dataset(**self.model_dump())
except Exception:
raise InitialisationError(
f"Cannot initialise auxiliary reading setup for {self.obs_id}. "
f"Reason:\n{format_exc()}"
)
[docs]
def get_all_vars(self) -> tuple[str, ...]:
"""
Get a tuple of all variables associated with this entry
Returns
-------
tuple[str, ...]
"""
return self.obs_vars
[docs]
def has_var(self, var_name):
"""
Check if input variable is defined in entry
Returns
-------
bool
True if entry has variable available, else False
"""
return True if var_name in self.get_all_vars() else False
[docs]
def get_vert_code(self, var):
"""Get vertical code name for obs / var combination"""
vc = self.obs_vert_type
if isinstance(vc, str):
val = vc
elif isinstance(vc, dict) and var in vc:
val = vc[var]
else:
raise ValueError(f"invalid value for obs_vert_type: {vc}")
if val not in SUPPORTED_VERT_CODES:
raise ValueError(
f"invalid value for obs_vert_type: {val}. Choose from " f"{SUPPORTED_VERT_CODES}."
)
return val