import logging
from configparser import ConfigParser
import numpy as np
from pyaerocom._lowlevel_helpers import BrowseDict
from pyaerocom.data import resources
logger = logging.getLogger(__name__)
[docs]
class DataSource(BrowseDict):
"""Dict-like object defining a data source
Attributes
----------
data_id
name (or ID) of dataset (e.g. AeronetSunV3Lev2.daily)
dataset_name
name of dataset (e.g. AERONET)
data_product
data product (e.g. SDA, Inv, Sun for Aeronet)
data_version
version of data (e.g. 3)
data_level
level of data (e.g. 2)
framework : str
ID of framework to which data is associated (e.g. ACTRIS, GAW)
instr_vert_loc : str
Vertical location of measuring instrument(s).
revision_date
last revision date of dataset
ts_type_src
sampling frequency as defined in data files (use None if undefined)
stat_merge_pref_attr : str
optional, a metadata attribute that is available in data and that
is used to order the individual stations by relevance in case overlaps
occur. The associated values of this attribute need to be sortable
(e.g. revision_date). This is only relevant in case overlaps occur.
"""
SUPPORTED_VERT_LOCS = ["ground", "space", "airborne"]
_types = dict(
dataset_name=str,
data_product=str,
data_version=float,
data_level=float,
framework=str,
instr_vert_loc=str,
ts_type_src=str,
stat_merge_pref_attr=str,
revision_date=np.datetime64,
website=str,
)
_ini_file_name = "data_sources.ini"
def __init__(self, **info):
self.data_id = None
self.dataset_name = None
self.data_product = None
self.data_version = None
self.data_level = None
self.framework = None
self.instr_vert_loc = None
self.revision_date = None
self.website = None
self.ts_type_src = None
self.stat_merge_pref_attr = None
self.update(**info)
if self.data_id is not None:
self._parse_source_info_from_ini()
@property
def data_dir(self):
"""Directory containing data files"""
from pyaerocom.io.helpers import get_obsnetwork_dir
return get_obsnetwork_dir(self.data_id)
[docs]
def dataset_str(self):
s = ""
if self.dataset_name is not None:
s += self.dataset_name
hasv = False
if self.data_version is not None:
s += f"(v{self.data_version}"
hasv = True
if self.data_level is not None:
if hasv:
s += f", Lev {self.data_level})"
else:
s += f"(Lev {self.data_level})"
else:
s += ")"
else:
s += self.data_id
return s
[docs]
def load_dataset_info(self):
"""Wrapper for :func:`_parse_source_info_from_ini`"""
try:
self._parse_source_info_from_ini()
except Exception:
pass
def _parse_source_info_from_ini(self):
"""Parse source info from ini file"""
if not resources.is_resource("pyaerocom.data", self._ini_file_name):
raise OSError(f"File {self._ini_file_name} does not exist")
parser = ConfigParser()
with resources.path("pyaerocom.data", self._ini_file_name) as path:
parser.read(path)
if self.data_id in parser:
for k, v in parser[self.data_id].items():
if k in self._types:
self[k] = self._types[k](v)
else:
self[k] = str(v)
[docs]
class AerocomDataID:
"""
Class representing a model data ID following AeroCom PhaseIII conventions
The ID must contain 4 substrings with meta parameters:
<ModelName>-<MeteoConfigSpecifier>_<ExperimentName>-<PerturbationName>
E.g.
NorESM2-met2010_CTRL-AP3
For more information see `AeroCom diagnostics spreadsheet <https://docs.google.com/spreadsheets/d/1NiHLVTDsBo0JEBSnnDECNI2ojUnCVlxuy2PFrsRJW38/edit#gid=1475397852>`__
This interface can be used to make sure a provided data ID is following
this convention and to extract the corresponding meta parameters as
dictionary (:func:`to_dict`) or to create an data_id from the corresponding
meta parameters :func:`from_dict`.
"""
DELIM = "_"
SUBDELIM = "-"
KEYS = ["model_name", "meteo", "experiment", "perturbation"]
def __init__(self, data_id=None, **meta_info):
self._data_id = None
self._values = None
if data_id is not None:
self.data_id = data_id
elif meta_info:
self._values_from_dict(meta_info)
@property
def data_id(self):
"""
str
AeroCom data ID
"""
return self._data_id
@data_id.setter
def data_id(self, val):
self._values = self._eval_data_id(val)
self._data_id = val
@property
def values(self):
if self._values is not None:
return self._values
raise AttributeError("Meta value list is not set.")
@values.setter
def values(self, val):
if not isinstance(val, list) or not len(val) == len(self.KEYS):
raise ValueError(f"Invalid input: need list of length {len(self.KEYS)}")
# this will first create a data_id string from input values and
# then call setter method to make sure the input is correct.
self.data_id = self.from_values(val)
[docs]
def to_dict(self):
"""Convert data_id to dictionary
Returns
-------
dict
dictionary with metadata information
"""
if not len(self._values) == len(self.KEYS):
self._eval_data_id(self.data_id)
return dict(zip(self.KEYS, self._values))
def _values_from_dict(self, meta):
vals = []
for key in self.KEYS:
if not key in meta:
raise KeyError(f"Missing specification of {key} in input meta dict")
vals.append(meta[key])
self._data_id = self.from_values(vals)
self._values = vals
[docs]
@staticmethod
def from_dict(meta):
"""
Create instance of AerocomDataID from input meta dictionary
Parameters
----------
meta : dict
dictionary containing required keys (cf. :attr:`KEYS`) and
corresponding values to create an data_id
Raises
------
KeyError
if not all information required is provided
Returns
-------
AerocomDataID
"""
return AerocomDataID(**meta)
[docs]
@staticmethod
def from_values(values):
"""
Create data_id from list of values
Note
----
The values have to be in the right order, cf. :attr:`KEYS`
Parameters
----------
values : list
list containing values for each key in :attr:`KEYS`
Raises
------
ValueError
if length of input list mismatches length of :attr:`KEYS`
Returns
-------
str
generated data_id
"""
if not len(values) == 4:
raise ValueError("Need 4 entries model_name, meteo_config, experiment, perturbation")
return "{}-{}_{}-{}".format(*values)
def _eval_data_id(self, val):
"""
Check and extract meta information from input data_id
Parameters
----------
val : str
data_id
Raises
------
ValueError
if input is not string or is not in format
<model_name>-<meteo_config>_<experiment>-<perturbation>
Returns
-------
values
DESCRIPTION.
"""
if not isinstance(val, str):
raise ValueError(f"Invalid input for data_id. Need str. Got {val}")
values = [""] * len(self.KEYS)
spl = val.split(self.DELIM)
if not len(spl) == 2:
logger.debug(
"Invalid or old data ID %s. Consider format <model-name>-<meteo-config>_<experiment>-<perturbation>",
val,
)
values[0] = val
return values
sub = spl[0].split(self.SUBDELIM, 1)
if len(sub) == 2:
values[0] = sub[0] # model_name
meteo = sub[1]
if meteo.startswith("met"):
values[1] = meteo # meteo_config
else:
logger.debug(
"Meteorology config substring in data_id %s needs to start with met.", meteo
)
values[0] = spl[0]
else:
values[0] = spl[0]
sub = spl[1].split(self.SUBDELIM, 1)
if len(sub) == 2:
values[2] = sub[0]
values[3] = sub[1]
else:
values[2] = spl[1]
return values
def __eq__(self, other):
return True if self._data_id == str(other) else False
def __repr__(self):
return self._data_id
def __str__(self):
return self._data_id
STANDARD_META_KEYS = list(StationMetaData())