Source code for pyaerocom.aeroval.utils

import logging
import os

import numpy as np

from pyaerocom.aeroval import EvalSetup, ExperimentProcessor
from pyaerocom.griddeddata import GriddedData
from pyaerocom.helpers import make_dummy_cube_latlon, numpy_to_cube
from pyaerocom.variable_helpers import get_variable

logger = logging.getLogger(__name__)


[docs] def make_config_template(proj_id: str, exp_id: str) -> EvalSetup: """ Make a template for an AeroVal evaluation setup Parameters ---------- proj_id : str ID of project. exp_id : str ID of experiment Returns ------- EvalSetup template evaluation setup (all defaults are set) that can be used to add model and obs entries, and modified to meet the purposes. """ return EvalSetup(proj_id, exp_id)
[docs] def compute_model_average_and_diversity( cfg, var_name, model_names=None, ts_type=None, lat_res_deg=2, lon_res_deg=3, data_id=None, avg_how=None, extract_surface=True, ignore_models=None, comment=None, model_use_vars=None, ): """Compute median or mean model based on input models Note ---- - BETA version that will likely undergo revisions. - Time selection currently not properly handled Parameters ---------- cfg : AerocomEvaluation analysis instance var_name : str name of variable model_names : list, optional list of model names. If None, all entries in input engine are used. ts_type : str, optional output freq. Defaults to monthly. lat_res_deg : int, optional output latitude resolution, defaults to 2 degrees. lon_res_deg : int, optional output longitude resolution, defaults to 3 degrees. data_id : str, optional output data_id of ensemble model. avg_how : str, optional how to compute averages (choose from mean or median), defaults to "median". extract_surface : bool if True (and if data contains model levels), surface level is extracted ignore_models : list, optional list of models to be ignored comment : str, optional comment string added to metadata of output data objects. model_use_vars : dict, optional model variables to be used. Returns ------- GriddedData ensemble model for input variable computed averaged using median or mean (input avg_how). Default is median. GriddedData corresponding diversity field, if avg_how is "mean", then computed using definition from Textor et al., 2006 (ACP) DOI: 10.5194/acp-6-1777-2006. If avg_how is "median" then interquartile range is used (Q3-Q1)/Q2 GriddedData or None Q1 field (only output if avg_how is median) GriddedData or None Q3 field (only output if avg_how is median) GriddedData or None standard deviation field (only output if avg_how is mean) """ if not isinstance(cfg, ExperimentProcessor): raise ValueError("invalid input, need ExperimentProcessor") cfg.cfg._check_time_config() if ts_type is None: ts_type = "monthly" if avg_how is None: avg_how = "median" if model_use_vars is None: model_use_vars = {} if ignore_models is None: ignore_models = [] if avg_how == "mean": avg_fun = np.mean elif avg_how == "median": avg_fun = np.median else: raise ValueError(f"Invalid input for avg_how {avg_how}") if data_id is None: data_id = f"AEROCOM-{avg_how.upper()}" if model_names is None: model_names = list(cfg.cfg.model_cfg) if len(ignore_models) > 0: models = [] for model in model_names: if not model in ignore_models: models.append(model) else: models = model_names if not len(models) > 1: raise ValueError("Need more than one model to compute average...") dummy = make_dummy_cube_latlon(lat_res_deg=lat_res_deg, lon_res_deg=lon_res_deg) loaded = [] from_files = [] from_models = [] from_vars = [] models_failed = [] unit_out = get_variable(var_name).units for mname in models: logger.info(f"Adding {mname} ({var_name})") mid = cfg.cfg.model_cfg.get_entry(mname)["model_id"] if mid == data_id or mname == data_id: continue read_var = var_name col = cfg.get_colocator(model_name=mname) if mname in model_use_vars: muv = model_use_vars[mname] if var_name in muv: read_var = muv[var_name] try: data = col.get_model_data(read_var) if not data.units == unit_out: data.convert_unit(unit_out) elif not data.longitude.circular: if not data.check_lon_circular(): raise Exception(f"Longitude of {mname} is not circular...") data.reorder_dimensions_tseries() data = data.resample_time(ts_type) if data.ndim == 4: if extract_surface: data = data.extract_surface_level() else: raise NotImplementedError("Cannot process ModelLevel fields yet") data = data.regrid(dummy) logger.info("Success!") except Exception as e: models_failed.append(mid) logger.info(f"Failed! Reason: {e}") continue loaded.append(data.cube.data) from_files.extend(data.from_files) from_models.append(data.data_id) from_vars.append(data.var_name) if not len(loaded) > 1: raise ValueError("Can only compute average if more than one model is available") from_files = [os.path.basename(f) for f in from_files] dims = [data.time, dummy.coord("latitude"), dummy.coord("longitude")] # the merged data objects arr = np.asarray(loaded) # average (mean or median) avgarr = avg_fun(arr, axis=0) if avg_how == "mean": stdarr = np.std(arr, axis=0) divarr = np.std(arr / avgarr, axis=0) * 100 else: q1arr = np.quantile(arr, 0.25, axis=0) q3arr = np.quantile(arr, 0.75, axis=0) divarr = (q3arr - q1arr) / avgarr * 100 if comment is None: comment = f"Ensemble {avg_how} for variable {var_name}. " # median or mean avg_out = GriddedData( numpy_to_cube( avgarr, dims=dims, var_name=var_name, units=data.units, ts_type=ts_type, data_id=data_id, from_files=from_files, from_models=from_models, from_vars=from_vars, models_failed=models_failed, comment=comment, ) ) commentdiv = comment + " Diversity field in units of % (IQR for median, std for mean)" # IQR or std based diversity div_out = GriddedData( numpy_to_cube( divarr, dims=dims, var_name=f"{var_name}div", units="%", ts_type=ts_type, data_id=data_id, from_files=from_files, from_models=from_models, from_vars=from_vars, models_failed=models_failed, comment=commentdiv, ) ) std_out, q1_out, q3_out = None, None, None if avg_how == "mean": commentstd = comment + " Standard deviation" std_out = GriddedData( numpy_to_cube( stdarr, dims=dims, var_name=f"{var_name}std", units=data.units, ts_type=ts_type, data_id=data_id, from_files=from_files, from_models=from_models, from_vars=from_vars, models_failed=models_failed, comment=commentstd, ) ) else: commentq1 = comment + " First quantile." q1_out = GriddedData( numpy_to_cube( q1arr, dims=dims, var_name=f"{var_name}q1", units=data.units, ts_type=ts_type, data_id=data_id, from_files=from_files, from_models=from_models, from_vars=from_vars, models_failed=models_failed, comment=commentq1, ) ) commentq3 = comment + " Third quantile." q3_out = GriddedData( numpy_to_cube( q3arr, dims=dims, var_name=f"{var_name}q3", units=data.units, ts_type=ts_type, data_id=data_id, from_files=from_files, from_models=from_models, from_vars=from_vars, models_failed=models_failed, comment=commentq3, ) ) return (avg_out, div_out, q1_out, q3_out, std_out)