Source code for pyaerocom.io.aerocom_browser

import fnmatch
import logging
import os
import re

from pyaerocom import const
from pyaerocom._lowlevel_helpers import BrowseDict
from pyaerocom.exceptions import DataSearchError

logger = logging.getLogger(__name__)


[docs] class AerocomBrowser(BrowseDict): """Interface for browsing all Aerocom data direcories Note ---- Use :func:`browse` to find directories matching a certain search pattern. The class methods :func:`find_matches` and :func:`find_dir` both use :func:`browse`, the only difference is, that the :func:`find_matches` adds the search result (a list with strings) to """ def _browse(self, name_or_pattern, ignorecase=True, return_if_match=True): """Search all Aerocom data directories that match input name or pattern Note ---- Please do not use this function but either Parameters ---------- name_or_pattern : str name or pattern of data (can be model or obs data) ignorecase : bool if True, upper / lower case is ignored return_if_match : bool if True, then the data directory is returned as string, if it can be found, else, only a list is returned that contains all matches. The latter takes longer since the whole database is searched. Returns ------- :obj:`str` or :obj:`list` Data directory (str, if ``return_if_match`` is True) or list containing valid Aerocom names (which can then be used to retrieve the paths) Raises ------ DataSearchError if no match or no unique match can be found """ pattern = fnmatch.translate(name_or_pattern) _candidates = [] _msgs = [] _warnings = [] for obs_id, obs_path in const.OBSLOCS_UNGRIDDED.items(): if ignorecase: match = name_or_pattern.lower() == obs_id.lower() else: match = name_or_pattern == obs_id if match: logger.info(f"Found match for search pattern in obs network directories {obs_id}") path = os.path.normpath(obs_path) if os.path.exists(path): self[obs_id] = path _candidates.append(obs_id) if return_if_match: return path else: if ignorecase: match = bool(re.search(pattern, obs_id, re.IGNORECASE)) else: match = bool(re.search(pattern, obs_id)) if match: path = os.path.normpath(obs_path) if os.path.exists(path): self[obs_id] = path _candidates.append(obs_id) if return_if_match: return path for search_dir in const.DATA_SEARCH_DIRS: # get the directories if os.path.isdir(search_dir): # subdirs = listdir(search_dir) subdirs = [ x for x in os.listdir(search_dir) if os.path.isdir(os.path.join(search_dir, x)) ] for subdir in subdirs: if ignorecase: match = bool(re.search(pattern, subdir, re.IGNORECASE)) else: match = bool(re.search(pattern, subdir)) if match: _dir = os.path.normpath(os.path.join(search_dir, subdir)) _rnsubdir = os.path.join(_dir, "renamed") if os.path.isdir(_rnsubdir): logger.info(f"{_dir} has subdir renamed. Using that one") _dir = _rnsubdir if any([_dir in x for x in self.values()]): # directory was already found before continue # append name of candidate ... _candidates.append(subdir) # ... and the corresponding data directory self[subdir] = _dir # now check if it is actually an exact match, if # applicable if return_if_match: if ignorecase: match = name_or_pattern.lower() == subdir.lower() else: match = name_or_pattern == subdir if match: logger.info(f"Found match for ID {name_or_pattern}") if return_if_match: return _dir else: _msgs.append("directory %s does not exist\n" % search_dir) for msg in _msgs: logger.info(msg) for warning in _warnings: logger.warning(warning) if len(_candidates) == 0: raise DataSearchError( f"No matches could be found for search pattern {name_or_pattern}" ) if return_if_match: if len(_candidates) == 1: logger.info( f"Found exactly one match for search pattern " f"{name_or_pattern}: {_candidates[0]}" ) return self[_candidates[0]] raise DataSearchError( f"Found multiple matches for search pattern {name_or_pattern}. " f"Please choose from {_candidates}" ) return _candidates @property def dirs_found(self): """All directories that were found""" return list(self.values()) @property def ids_found(self): """All data IDs that were found""" return list(self)
[docs] def find_data_dir(self, name_or_pattern, ignorecase=True): """Find match of input name or pattern in Aerocom database Parameters ---------- name_or_pattern : str name or pattern of data (can be model or obs data) ignorecase : bool if True, upper / lower case is ignored Returns ------- str data directory of match Raises ------ DataSearchError if no matches or no unique match can be found """ if name_or_pattern in self: logger.info(f"{name_or_pattern} found in instance of AerocomBrowser") return self[name_or_pattern] logger.info(f"Searching database for {name_or_pattern}") return self._browse( name_or_pattern, ignorecase=ignorecase, return_if_match=True ) # returns list
[docs] def find_matches(self, name_or_pattern, ignorecase=True): """Search all Aerocom data directories that match input name or pattern Parameters ---------- name_or_pattern : str name or pattern of data (can be model or obs data) ignorecase : bool if True, upper / lower case is ignored Returns ------- list list of names that match the pattern (corresponding paths can be accessed from this class instance) Raises ------ DataSearchError if no matches can be found """ return self._browse( name_or_pattern, ignorecase=ignorecase, return_if_match=False ) # returns list