Source code for pyaerocom.io.fileconventions

from configparser import ConfigParser
from os.path import basename, splitext

from pyaerocom import const
from pyaerocom.data import resources
from pyaerocom.exceptions import FileConventionError
from pyaerocom.tstype import TsType


[docs] class FileConventionRead: """Class that represents a file naming convention for reading Aerocom files Attributes ---------- name : str name of this convention (e.g. "aerocom3") file_sep : str filename delimiter for accessing different variables year_pos : int position of year information in filename after splitting using delimiter :attr:`file_sep` var_pos : int position of variable information in filename after splitting using delimiter :attr:`file_sep` ts_pos : int position of information of temporal resolution in filename after splitting using delimiter :attr:`file_sep` vert_pos : int position of information about vertical resolution of data data_id_pos : int position of data ID """ _io_opts = const AEROCOM3_VERT_INFO = { "2d": ["surface", "column", "modellevel", "2d"], "3d": ["modellevelatstations"], } def __init__( self, name="aerocom3", file_sep="_", year_pos=None, var_pos=None, ts_pos=None, vert_pos=None, data_id_pos=None, from_file=None, ): self.name = name self.file_sep = file_sep self.year_pos = year_pos self.var_pos = var_pos self.ts_pos = ts_pos self.vert_pos = vert_pos self.data_id_pos = data_id_pos if from_file is not None: self.from_file(from_file) else: try: self.import_default(self.name) except Exception: pass @property def info_init(self): """Empty dictionary containing init values of infos to be extracted from filenames """ return dict( year=None, var_name=None, ts_type=None, vert_code="", is_at_stations=False, data_id="", )
[docs] def from_file(self, file): """Identify convention from a file Currently only two conventions (aerocom2 and aerocom3) exist that are identified by the delimiter used. Parameters ---------- file : str file path or file name Returns ------- FileConventionRead this object (with updated convention) Raises ------ FileConventionError if convention cannot be identified Example ------- >>> from pyaerocom.io import FileConventionRead >>> filename = 'aerocom3_CAM5.3-Oslo_AP3-CTRL2016-PD_od550aer_Column_2010_monthly.nc' >>> print(FileConventionRead().from_file(filename)) pyaeorocom FileConventionRead name: aerocom3 file_sep: _ year_pos: -2 var_pos: -4 ts_pos: -1 """ if basename(file).count("_") >= 4: self.import_default("aerocom3") elif basename(file).count(".") >= 4: self.import_default("aerocom2") else: raise FileConventionError( f"Could not identify convention from input file {basename(file)}" ) self.check_validity(file) return self
[docs] def check_validity(self, file): """Check if filename is valid""" info = self.get_info_from_file(file) year = info["year"] if not TsType.valid(info["ts_type"]): raise FileConventionError( f"Invalid ts_type {info['ts_type']} in filename {basename(file)}" ) elif not (const.MIN_YEAR <= year <= const.MAX_YEAR): raise FileConventionError(f"Invalid year {info['year']} in filename {basename(file)}")
def _info_from_aerocom3(self, file: str) -> dict: """Extract info from filename Aerocom 3 convention Parameters ----------- file : str netcdf file name Returns ------- dict dictionary containing infos that were extracted from filename """ # init result dictionary info = self.info_init spl = splitext(basename(file))[0].split(self.file_sep) # phase 3 file naming convention try: info["year"] = int(spl[self.year_pos]) except Exception: raise FileConventionError( f"Failed to extract year information from file {basename(file)} " f"using file convention Aerocom 3 {self.name}" ) try: # include vars for the surface if spl[self.vert_pos].lower() in self.AEROCOM3_VERT_INFO["2d"]: info["var_name"] = spl[self.var_pos] # also include 3d vars that provide station based data # and contain the string vmr in this case the variable name has to # be slightly changed to the aerocom phase 2 naming elif spl[self.vert_pos].lower() in self.AEROCOM3_VERT_INFO["3d"]: if "vmr" in spl[self.var_pos]: info["var_name"] = spl[self.var_pos].replace("vmr", "vmr3d") else: info["var_name"] = spl[self.var_pos] else: raise FileConventionError( f"Invalid file name (Aerocom 3 naming convention).\n{file}\n" f"Invalid string identifier for vertical coordinate: {spl[self.vert_pos]}" ) except Exception as e: raise FileConventionError( f"Failed to extract variable name from file {basename(file)} " f"using file convention {self.name}.\nError: {repr(e)}" ) try: info["ts_type"] = spl[self.ts_pos] except Exception: raise FileConventionError( f"Failed to extract ts_type from file {basename(file)} " f"using file convention {self.name}" ) try: info["vert_code"] = spl[self.vert_pos] except Exception: raise FileConventionError( f"Failed to extract vert_code from file {basename(file)} " f"using file convention {self.name}" ) try: info["data_id"] = self.file_sep.join(spl[self.data_id_pos : self.var_pos]) except Exception: raise FileConventionError( f"Failed to extract model name from file {basename(file)} " f"using file convention {self.name}" ) if "atstations" in file.lower(): info["is_at_stations"] = True return info def _info_from_aerocom2(self, file: str) -> dict: """Extract info from filename Aerocom 2 convention Parameters ----------- file : str netcdf file name Returns ------- dict dictionary containing infos that were extracted from filename """ info = self.info_init if self.file_sep == ".": spl = basename(file).split(self.file_sep) else: spl = splitext(basename(file))[0].split(self.file_sep) try: info["year"] = int(spl[self.year_pos]) except Exception: raise FileConventionError( f"Failed to extract year information from file {basename(file)} " f"using file convention {self.name}" ) try: info["var_name"] = spl[self.var_pos] except Exception: raise FileConventionError( f"Failed to extract variable information from file {basename(file)} " f"using file convention {self.name}" ) try: info["ts_type"] = spl[self.ts_pos] except Exception: raise FileConventionError( f"Failed to extract ts_type from file {basename(file)} " f"using file convention {self.name}" ) try: info["data_id"] = ".".join(spl[self.data_id_pos : self.ts_pos]) except Exception: raise FileConventionError( f"Failed to extract name from file {basename(file)} " f"using file convention {self.name}" ) if "atstations" in file.lower(): raise Exception( "Developers: please debug " "(file convention Aerocom 2 should not have atstations encoded in file name)" ) return info
[docs] def get_info_from_file(self, file: str) -> dict: """Identify convention from a file Currently only two conventions (aerocom2 and aerocom3) exist that are identified by the delimiter used. Parameters ---------- file : str file path or file name Returns ------- dict dictionary containing keys `year, var_name, ts_type` and corresponding variables, extracted from the filename Raises ------ FileConventionError if convention cannot be identified Example ------- >>> from pyaerocom.io import FileConventionRead >>> filename = 'aerocom3_CAM5.3-Oslo_AP3-CTRL2016-PD_od550aer_Column_2010_monthly.nc' >>> conv = FileConventionRead("aerocom3") >>> info = conv.get_info_from_file(filename) >>> for item in info.items(): print(item) ('year', 2010) ('var_name', 'od550aer') ('ts_type', 'monthly') """ if self.name == "aerocom3": return self._info_from_aerocom3(file) if self.name == "aerocom2": return self._info_from_aerocom2(file) raise FileConventionError(f"Unknown {self.name}")
[docs] def string_mask(self, data_id, var, year, ts_type, vert_which=None): """Returns mask that can be used to identify files of this convention Parameters ---------- data_id : str experiment ID (e.g. GISS-MATRIX.A2.CTRL) var : str variable string ID (e.g. "od550aer") year : int desired year of observation (e.g. 2012) ts_type : str string specifying temporal resolution (e.g. "daily") Example ------- conf_aero2 = FileConventionRead(name="aerocom2") conf_aero3 = FileConventionRead(name="aerocom2") var = od550aer year = 2012 ts_type = "daily" match_str_aero2 = conf_aero2.string_mask(var, year, ts_type) match_str_aero3 = conf_aero3.string_mask(var, year, ts_type) """ if ts_type is None: ts_type = "*" if self.name == "aerocom2": if vert_which is not None: raise FileConventionError( f"Specification of vert_which ({vert_which}) is not supported for " ) return ".".join([".*", data_id, ts_type, var, str(year), "nc"]) elif self.name == "aerocom3": if vert_which is None: vert_which = ".*" return "_".join([".*", data_id, var, vert_which, str(year), ts_type]) + ".nc" else: raise NotImplementedError( f"File matching mask for convention {self.name} not yet defined..." )
[docs] def import_default(self, name: str): """Checks and load default information from database""" conf_reader = ConfigParser() with resources.path("pyaerocom.data", "file_conventions.ini") as path: conf_reader.read(path) if not name in conf_reader: raise NameError(f"No default available for {name}") self.name = name for key, val in conf_reader[name].items(): if key in self.__dict__: try: val = int(val) except Exception: pass self.__dict__[key] = val
[docs] def from_dict(self, new_vals): """Load info from dictionary Parameters ---------- new_vals : dict dictionary containing information Returns ------- self """ for k, v in new_vals.items(): if k in self.__dict__: self.__dict__[k] = v return self
[docs] def to_dict(self): """Convert this object to ordered dictionary""" return dict( name=self.name, file_sep=self.file_sep, year_pos=self.year_pos, var_pos=self.var_pos, ts_pos=self.ts_pos, vert_pos=self.vert_pos, data_id_pos=self.data_id_pos, )
def __repr__(self): return f"{self.name} {super().__repr__()}" def __str__(self): s = "\npyaeorocom FileConventionRead" for k, v in self.to_dict().items(): s += f"\n{k}: {v}" return s