diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..cf85da41 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,15 @@ +[run] +parallel = True +branch = True +command_line = -m unittest +source = + cfsites + dr2xml + scripts + xml_writer + logger.py + utilities + +[report] +exclude_lines = + if __name__ == "__main__": \ No newline at end of file diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml new file mode 100644 index 00000000..d52b6522 --- /dev/null +++ b/.github/workflows/test_python.yml @@ -0,0 +1,43 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Test python 3.8 + +on: + - push + - workflow_dispatch + - pull_request + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: true + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install flake8 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + python -m pip install git+https://github.com/CMIP-Data-Request/CMIP7_DReq_Software.git + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with unittest + run: | + export CMIP7_DR_API_CONFIGFILE=$PWD/dr2xml/dr_interface/CMIP7_config + python3 -m data_request_api.command_line.config offline true + for f in $(ls tests/test_*/__init__.py); do echo $f; python3 -m unittest $f; done + for f in $(ls tests/test_*.py); do echo $f; python3 -m unittest $f; done diff --git a/dr2xml/Xparse.py b/dr2xml/Xparse.py index 8effb170..95e40238 100644 --- a/dr2xml/Xparse.py +++ b/dr2xml/Xparse.py @@ -18,7 +18,7 @@ from .xml_interface import get_root_of_xml_file, is_xml_element_to_parse, find_rank_xml_subelement # Logger -from logger import get_logger +from utilities.logger import get_logger # Define for each object kind those attributes useful for grid inheritance @@ -169,7 +169,7 @@ def attrib_by_ref(elt, attrib, index, level): if rep: return rep except: - if not refid.startswith("dummy_"): + if not "dummy" in refid: raise XparseError("Error : reference '%s' is invalid" % refid) @@ -285,25 +285,6 @@ def id_has_expr_with_at(field_id, index): # raise Xparse_error("field %s is not known"%field_id) -if False: - - nemo = init_context('nemo', "./", False) - # print list(nemo) - grid = id2grid("CMIP6_O18sw", nemo, True) - print(grid.attrib['id']) - print() - - arpsfx = init_context('arpsfx', "./", False) - grid = id2grid("CMIP6_cdnc", arpsfx, True) - # grid=None - if grid is not None: - # print "Grid id is :"+grid.attrib['id'] - print(create_string_from_xml_element(grid)) - grid_string = create_string_from_xml_element(grid) - new_grid_string = re.sub(r'axis_ref= *.([\w_])*.', 'axis_ref="axis_autre"', grid_string) - print(new_grid_string) - - class XparseError(Exception): """ Xparse exceptions class. diff --git a/dr2xml/Xwrite.py b/dr2xml/Xwrite.py index f00dcf19..08040f36 100644 --- a/dr2xml/Xwrite.py +++ b/dr2xml/Xwrite.py @@ -18,7 +18,7 @@ from .utils import Dr2xmlError # Logger -from logger import get_logger +from utilities.logger import get_logger, change_log_level # Global variables and configuration tools from .config import get_config_variable, set_config_variable, add_value_in_dict_config_variable @@ -156,7 +156,7 @@ def create_xios_aux_elmts_defs(sv, alias, table, context, target_hgrid_id, zgrid # Build a construct for computing a climatology (if applicable) # -------------------------------------------------------------------- if clim: - if sv.frequency in ["1hrCM", ]: + if sv.frequency in ["1hrCM", "1hr"]: last_field_id, last_grid_id = process_diurnal_cycle(last_field_id) else: raise Dr2xmlError("Cannot handle climatology cell_method for frequency %s and variable %s" @@ -462,7 +462,7 @@ def write_xios_file_def(filename, svars_per_table, year, dummies, skipped_vars_p set_config_variable("domain_defs", OrderedDict()) # Add xml_file_definition xml_file_definition = DR2XMLElement(tag="file_definition") - _, hgrid, _, _, _ = internal_dict['grids'][get_settings_values("internal_values", "grid_choice")][context] + _, hgrid, _, _, _ = internal_dict['grids'][internal_dict["select_grid_choice"]][context] files_list = determine_files_list(svars_per_table, enddate, year, debug) for file_dict in files_list: write_xios_file_def_for_svars_list(hgrid=hgrid, xml_file_definition=xml_file_definition, dummies=dummies, @@ -767,8 +767,7 @@ def get_split_info(sv, table, enddate, year, debug): endmonth = "01" endday = "01" split_last_date = "{}-{}-{} 00:00:00".format(endyear, endmonth, endday) - sc = get_dr_object("get_scope") - split_freq = determine_split_freq(sv, grid_choice, sc.mcfg, context) + split_freq = determine_split_freq(sv, grid_choice, context) return split_freq_format, split_last_date, split_start_offset, split_end_offset, split_freq diff --git a/dr2xml/__init__.py b/dr2xml/__init__.py index 8aa1196c..8379055b 100644 --- a/dr2xml/__init__.py +++ b/dr2xml/__init__.py @@ -61,7 +61,7 @@ from .utils import print_struct # Logger -from logger import initialize_logger, get_logger, change_log_level +from utilities.logger import initialize_logger, get_logger, change_log_level # Global variables and configuration tools from .config import get_config_variable, set_config_variable, initialize_config_variables @@ -575,7 +575,7 @@ def configuration_init(func): :return: The initial function with initialized environment to use dr2xml. """ def make_configuration(lset, sset, cvs_path=None, printout=False, prefix="", debug=False, force_reset=False, - **kwargs): + select="on_expt_and_year", **kwargs): year = kwargs.get("year", 0) context = kwargs.get("context") dirname = kwargs.get("dirname") @@ -588,14 +588,13 @@ def make_configuration(lset, sset, cvs_path=None, printout=False, prefix="", deb initialize_logger(default=True, level=default_level) initialize_config_variables() initialize_settings(lset=lset, sset=sset, cvspath=cvs_path, context=context, prefix=prefix, - year=year, dirname=dirname, force_reset=force_reset) + year=year, dirname=dirname, force_reset=force_reset, select=select) return func(**kwargs) return make_configuration @configuration_init -def generate_file_defs(year, enddate, context, pingfiles=None, dummies='include', dirname="./", attributes=list(), - select="on_expt_and_year"): +def generate_file_defs(year, enddate, context, pingfiles=None, dummies='include', dirname="./", attributes=list()): """ Using the DR module, a dict of lab settings ``lset``, and a dict of simulation settings ``sset``, generate an XIOS file_defs 'file' for a @@ -664,7 +663,7 @@ def generate_file_defs(year, enddate, context, pingfiles=None, dummies='include' # TBS# from os import path as os_path # TBS# prog_path=os_path.abspath(os_path.split(__file__)[0]) - print("* %29s" % "CMIP6 Data Request version: ", get_dr_object("get_data_request").get_version()) + print("* %29s" % "{} Data Request version: ".format(internal_settings["data_request_used"]), get_dr_object("get_data_request").get_version()) print("\n*\n {}".format(50 * "*")) logger = get_logger() @@ -699,7 +698,7 @@ def generate_file_defs(year, enddate, context, pingfiles=None, dummies='include' # -------------------------------------------------------------------- skipped_vars_per_table = OrderedDict() actually_written_vars = list() - svars_per_table = select_variables_to_be_processed(year, context, select) + svars_per_table = select_variables_to_be_processed() # # -------------------------------------------------------------------- # Read ping_file defined variables @@ -753,14 +752,15 @@ def generate_file_defs(year, enddate, context, pingfiles=None, dummies='include' @configuration_init -def create_ping_files(context, path_special, dummy="field_atm", dummy_with_shape=False, exact=False, comments=False, - filename=None, debug=list(), by_realm=False): +def create_ping_files(context, dummy="field_atm", dummy_with_shape=False, exact=False, comments=False, + filename=None, debug=list(), by_realm=False, **kwargs): from .settings_interface import get_settings_values from .vars_interface.selection import select_variables_to_be_processed from .pingfiles_interface import ping_file_for_realms_list considered_realms = get_settings_values("internal", "realms_per_context") - svars = select_variables_to_be_processed(None, context, "no") + path_special = get_settings_values("internal", "path_special_defs") + svars = select_variables_to_be_processed() if by_realm: for realm in considered_realms: ping_file_for_realms_list(context=context, svars=svars, lrealms=[realm, ], path_special=path_special, diff --git a/dr2xml/analyzer.py b/dr2xml/analyzer.py index 7c9e3a86..b9b2bf15 100644 --- a/dr2xml/analyzer.py +++ b/dr2xml/analyzer.py @@ -24,7 +24,7 @@ from .utils import Dr2xmlError # Logger -from logger import get_logger +from utilities.logger import get_logger # Global variables and configuration tools from .config import add_value_in_list_config_variable @@ -210,8 +210,12 @@ def analyze_cell_time_method(cm, label, table): operation = "average" detect_missing = True elif "time: mean where sea" in cm: # [amnesi-tmn]: - # Area Mean of Ext. Prop. on Sea Ice : pas utilisee - logger.warning("time: mean where sea is not supposed to be used (%s,%s)" % (label, table)) + add_value_in_list_config_variable("cell_method_warnings", + ('time: mean where sea', label, table)) + logger.info("Note: assuming that 'time: mean where sea' " + " for %15s in table %s is well handled by 'detect_missing'" % (label, table)) + operation = "average" + detect_missing = True # ------------------------------------------------------------------------------------- elif "time: mean where floating_ice_shelf" in cm: # [amnfi-twmn]: Weighted Time Mean on Floating Ice Shelf (presque que des diff --git a/dr2xml/dr_interface/C3S.py b/dr2xml/dr_interface/C3S.py index 13699a0b..c58ef42a 100644 --- a/dr2xml/dr_interface/C3S.py +++ b/dr2xml/dr_interface/C3S.py @@ -12,7 +12,6 @@ from importlib.machinery import SourceFileLoader from .definition import ListWithItems -from .definition import Scope as ScopeBasic from .definition import DataRequest as DataRequestBasic from .definition import SimpleObject from .definition import SimpleCMORVar as SimpleCMORVarBasic @@ -31,13 +30,12 @@ from .C3S_DR import c3s_nc_dims, c3s_nc_coords, c3s_nc_comvars, c3s_nc_vars -scope = None data_request = None class DataRequest(DataRequestBasic): def get_version(self): - return "No Data Request" + return "undef" def get_list_by_id(self, collection, **kwargs): return ListWithItems() @@ -83,27 +81,12 @@ def get_grids_dict(self): def get_dimensions_dict(self): return OrderedDict() - def get_cmorvars_list(self, sizes=None, **kwargs): - if sizes is not None: - sc = get_scope() - sc.update_mcfg(sizes) + def get_cmorvars_list(self, **kwargs): rep = defaultdict(set) for id in self.get_element_uid(elt_type="variable"): for grid in self.get_element_uid(id=id, elt_type="variable").grids: rep[id].add(grid) - return rep, list() - - -class Scope(ScopeBasic): - - def __init__(self, scope=None): - super().__init__(scope=scope) - - def get_request_link_by_mip(self, mips_list): - return list() - - def get_vars_by_request_link(self, request_link, pmax): - return list() + return rep def initialize_data_request(): @@ -120,27 +103,6 @@ def get_data_request(): return data_request -def initialize_scope(tier_max): - global scope - dq = get_data_request() - if scope is None: - scope = Scope() - return scope - - -def get_scope(tier_max=None): - if scope is None: - return initialize_scope(tier_max) - else: - return scope - - -def set_scope(sc): - if sc is not None: - global scope - scope = sc - - def normalize_grid(grid): return grid diff --git a/dr2xml/dr_interface/CMIP6.py b/dr2xml/dr_interface/CMIP6.py index db8e7eaa..246cee81 100644 --- a/dr2xml/dr_interface/CMIP6.py +++ b/dr2xml/dr_interface/CMIP6.py @@ -12,22 +12,31 @@ import re import sys from collections import OrderedDict, defaultdict +import os import six -from logger import get_logger -from .definition import Scope as ScopeBasic +from utilities.logger import get_logger from .definition import DataRequest as DataRequestBasic from .definition import SimpleObject from .definition import SimpleDim as SimpleDimBasic from .definition import SimpleCMORVar as SimpleCMORVarBasic +from ..projects.dr2xml import format_sizes from ..utils import Dr2xmlError, print_struct, is_elt_applicable, convert_string_to_year -from dr2xml.settings_interface import get_settings_values +from dr2xml.settings_interface import get_settings_values, get_values_from_internal_settings data_request_path = get_settings_values("internal", "data_request_path") if data_request_path is not None: sys.path.insert(0, data_request_path) +data_request_content_version = get_settings_values("internal", "data_request_content_version") +if data_request_content_version not in ["latest_stable", "stable", "latest"]: + reset_manifest = True + os.environ["DRQ_CONFIG_DIR"] = data_request_content_version + os.environ["DRQ_VERSION_DIR"] = data_request_content_version +else: + reset_manifest = False + try: import dreq except ImportError: @@ -38,41 +47,18 @@ except ImportError: from dreqPy.scope import dreqQuery - -class Scope(ScopeBasic): - - def __init__(self, scope=None): - super(Scope, self).__init__(scope=scope) - self.mcfg = self.scope.mcfg - - def get_request_link_by_mip(self, mips_list): - return sorted(list(self.scope.getRequestLinkByMip(set(mips_list))), key=lambda x: x.label) - - def get_filtered_request_links_by_mip_included_excluded(self, mips_list, included_request_links=None, - excluded_request_links=None): - logger = get_logger() - rep = self.get_request_link_by_mip(mips_list) - logger.info("Number of Request Links which apply to MIPS %s is: %d" % - (print_struct(mips_list), len(rep))) - rep = [rl for rl in rep if is_elt_applicable(rl, attribute="label", excluded=excluded_request_links)] - logger.info("Number of Request Links after filtering by excluded_request_links is: %d" % len(rep)) - if included_request_links is not None and len(included_request_links) > 0: - excluded_rls = [rl for rl in rep if not is_elt_applicable(rl, attribute="label", - included=included_request_links)] - for rl in excluded_rls: - logger.critical("RequestLink %s is not included" % rl.label) - rep.remove(rl) - logger.info("Number of Request Links after filtering by included_request_links is: %d" % len(rep)) - - return rep - - def get_vars_by_request_link(self, request_link, pmax): - if not isinstance(request_link, list): - request_link = [request_link, ] - return self.scope.varsByRql(request_link, pmax) +rls = None class DataRequest(DataRequestBasic): + def set_mcfg(self): + self.scope = dreqQuery(dq=self.data_request, tierMax=get_settings_values("internal", "select_tierMax")) + self.mcfg = format_sizes(self.scope.mcfg) + + def update_mcfg(self): + mcfg = get_settings_values('internal', 'select_sizes') + if mcfg is not None: + self.mcfg = mcfg def get_version(self): return self.data_request.version @@ -83,6 +69,22 @@ def get_list_by_id(self, collection, elt_type=None, **kwargs): rep = [SimpleCMORVar.get_from_dr(elt, id=elt.uid) for elt in rep.items] return rep + def get_variables_per_label(self, debug=list()): + logger = get_logger() + rep = OrderedDict() + for v in self.get_list_by_id("var").items: + if v.label not in rep: + rep[v.label] = [] + if v.label in debug: + logger.debug("Adding %s" % v.label) + refs = self.get_request_by_id_by_sect(v.uid, 'CMORvar') + for r in refs: + ref = self.get_element_uid(r, elt_type="variable") + rep[v.label].append(ref) + if v.label in debug: + logger.debug("Adding CmorVar %s(%s) for %s" % (v.label, ref.mipTable, ref.label)) + return rep + def get_sectors_list(self): """ Get the sectors list. @@ -103,9 +105,9 @@ def get_experiment_label_start_end_years(self, experiment): else: return super().get_experiment_label_start_end_years(experiment) - def filter_request_link_by_experiment_and_year(self, request_links, experiment_id, year, - filter_on_realization=False, realization_index=1, branching=dict(), - branch_year_in_child=None, endyear=False): + def _filter_request_link_by_experiment_and_year(self, request_links, experiment_id, year, + filter_on_realization=False, realization_index=1, branching=dict(), + branch_year_in_child=None, endyear=False): logger = get_logger() _, starty, endy = self.get_experiment_label_start_end_years(experiment_id) logger.info("Filtering for experiment %s, covering years [ %s , %s ] in DR" % @@ -115,15 +117,15 @@ def filter_request_link_by_experiment_and_year(self, request_links, experiment_i for rl in request_links: # Access all requesItems ids which refer to this RequestLink rl_req_items = self.get_request_by_id_by_sect(rl.uid, 'requestItem') - if any([self.check_requestitem_for_exp_and_year(self.get_element_uid(ri_id), experiment_id, year, - filter_on_realization, endyear, realization_index, - branching, branch_year_in_child)[0] + if any([self._check_requestitem_for_exp_and_year(self.get_element_uid(ri_id), experiment_id, year, + filter_on_realization, endyear, realization_index, + branching, branch_year_in_child)[0] for ri_id in rl_req_items]): rep.append(rl) return rep - def check_requestitem_for_exp_and_year(self, ri, experiment, year, filter_on_realization=False, endyear=False, - realization_index=1, branching=dict(), branch_year_in_child=None): + def _check_requestitem_for_exp_and_year(self, ri, experiment, year, filter_on_realization=False, endyear=False, + realization_index=1, branching=dict(), branch_year_in_child=None): """ Returns True if requestItem 'ri' in data request is relevant for a given 'experiment' and 'year'. Toggle 'debug' allow some printouts @@ -181,8 +183,8 @@ def check_requestitem_for_exp_and_year(self, ri, experiment, year, filter_on_rea logger.debug(" ..applies because arg year is None") else: year = int(year) - rep, endyear = self.is_year_in_requestitem(ri, experiment, year, branching, branch_year_in_child, - endyear) + rep, endyear = self._is_year_in_requestitem(ri, experiment, year, branching, branch_year_in_child, + endyear) logger.debug(" ..year in ri returns: %s %s" % (rep, endyear)) # if (ri.label=="AerchemmipAermonthly3d") : # print "reqItem=%s,experiment=%s,year=%d,rep=%s,"%(ri.label,experiment,year,rep) @@ -192,7 +194,7 @@ def check_requestitem_for_exp_and_year(self, ri, experiment, year, filter_on_rea # print return False, None - def is_year_in_requestitem(self, ri, exp, year, branching=dict(), branch_year_in_child=None, endyear=False): + def _is_year_in_requestitem(self, ri, exp, year, branching=dict(), branch_year_in_child=None, endyear=False): """ :param ri: request item :param exp: experiment @@ -207,8 +209,8 @@ def is_year_in_requestitem(self, ri, exp, year, branching=dict(), branch_year_in return True, 2018 if 'tslice' in ri.__dict__: logger.debug("calling year_in_ri_tslice") - rep, endyear = self.is_year_in_requestitem_tslice(ri, exp_label, exp_startyear, year, branching, - branch_year_in_child) + rep, endyear = self._is_year_in_requestitem_tslice(ri, exp_label, exp_startyear, year, branching, + branch_year_in_child) return rep, endyear try: ny = int(ri.nymax) @@ -249,8 +251,8 @@ def is_year_in_requestitem(self, ri, exp, year, branching=dict(), branch_year_in logger.debug("year_in_ri: returning %s %s" % (applies, RI_end_year)) return applies, RI_end_year - def is_year_in_requestitem_tslice(self, ri, exp_label, exp_startyear, year, branching=dict(), - branch_year_in_child=None): + def _is_year_in_requestitem_tslice(self, ri, exp_label, exp_startyear, year, branching=dict(), + branch_year_in_child=None): """ Returns a couple : relevant, endyear. RELEVANT is True if requestItem RI applies to @@ -292,6 +294,7 @@ def is_year_in_requestitem_tslice(self, ri, exp_label, exp_startyear, year, bran endyear = year elif tslice.type in ["startRange", ]: # e.g. _slice_VolMIP3 # used only for VolMIP : _slice_VolMIP3 + nyear = tslice.nyears start_year = self.find_exp_start_year(exp_label, exp_startyear, branch_year_in_child=branch_year_in_child) relevant = (year >= start_year and year < start_year + nyear) endyear = start_year + nyear - 1 @@ -324,7 +327,7 @@ def is_year_in_requestitem_tslice(self, ri, exp_label, exp_startyear, year, bran (year, exp_label, repr(relevant), ri.title, tslice.type, repr(endyear))) return relevant, endyear - def get_requestitems_for_cmorvar(self, cmorvar_id, pmax, global_rls): + def _get_requestitems_for_cmorvar(self, cmorvar_id, pmax): logger = get_logger() rVarsUid = self.get_request_by_id_by_sect(cmorvar_id, 'requestVar') @@ -336,13 +339,9 @@ def get_requestitems_for_cmorvar(self, cmorvar_id, pmax, global_rls): RequestLinksId = list() for vg in VarGroups: RequestLinksId.extend(self.get_request_by_id_by_sect(vg.uid, 'requestLink')) - FilteredRequestLinks = list() - for rlid in RequestLinksId: - rl = self.get_element_uid(rlid) - if rl in global_rls: - FilteredRequestLinks.append(rl) - logger.debug("les requestlinks: %s" % " ".join([self.get_element_uid(rlid).label - for rlid in RequestLinksId])) + RequestLinksId = sorted([self.get_element_uid(rlid) for rlid in RequestLinksId], key=lambda x: x.label) + FilteredRequestLinks = sorted(list(set(RequestLinksId) & set(rls)), key=lambda x: x.label) + logger.debug("les requestlinks: %s" % " ".join([rl.label for rl in RequestLinksId])) logger.debug("les FilteredRequestlinks: %s" % " ".join([rl.label for rl in FilteredRequestLinks])) RequestItems = list() for rl in FilteredRequestLinks: @@ -351,22 +350,23 @@ def get_requestitems_for_cmorvar(self, cmorvar_id, pmax, global_rls): "les requestItems: %s" % " ".join([self.get_element_uid(riid).label for riid in RequestItems])) return RequestItems - def get_endyear_for_cmorvar(self, cmorvar, experiment, year, internal_dict, global_rls): + def get_endyear_for_cmorvar(self, cmorvar, experiment, year, internal_dict): logger = get_logger() logger.debug("In end_year for %s %s" % (cmorvar.label, cmorvar.mipTable)) # 1- Get the RequestItems which apply to CmorVar - request_items = self.get_requestitems_for_cmorvar(cmorvar.id, internal_dict["max_priority"], global_rls) + max_priority = get_values_from_internal_settings("max_priority", "max_priority_lset", merge=False) + request_items = self._get_requestitems_for_cmorvar(cmorvar.id, max_priority) # 2- Select those request links which include expt and year larger = None for riid in request_items: ri = self.get_element_uid(riid) - applies, endyear = self.check_requestitem_for_exp_and_year(ri, experiment, year, - internal_dict["filter_on_realization"], - internal_dict["end_year"], - internal_dict["realization_index"], - internal_dict["branching"], - internal_dict["branch_year_in_child"]) + applies, endyear = self._check_requestitem_for_exp_and_year(ri, experiment, year, + internal_dict["filter_on_realization"], + internal_dict["end_year"], + internal_dict["realization_index"], + internal_dict["branching"], + internal_dict["branch_year_in_child"]) logger.debug("For var and freq selected for debug and year %s, for ri %s, applies=%s, endyear=%s" % (str(year), ri.title, str(applies), str(endyear))) if applies: @@ -381,6 +381,8 @@ def get_endyear_for_cmorvar(self, cmorvar, experiment, year, internal_dict, glob def get_element_uid(self, id=None, error_msg=None, raise_on_error=False, check_print_DR_errors=True, check_print_stdnames_error=False, elt_type=None, **kwargs): logger = get_logger() + if elt_type in ["dim", ] and not id.startswith("dim:"): + id = 'dim:{}'.format(id) if id is None: rep = self.data_request.inx.uid elif id in self.data_request.inx.uid: @@ -433,32 +435,32 @@ def get_single_levels_list(self): rep.append(c.label) return rep - def get_cmorvars_list(self, tierMax, mips_list, included_request_links, excluded_request_links, max_priority, - included_vars, excluded_vars, included_tables, excluded_tables, excluded_pairs, - experiment_filter=False, sizes=None): + def get_cmorvars_list(self, select_tierMax, select_mips, select_included_request_links, + select_excluded_request_links, select_max_priority, select_included_vars, + select_excluded_vars, select_included_tables, select_excluded_tables, select_excluded_pairs, + experiment_filter=False, **kwargs): + global rls logger = get_logger() - sc = get_scope(tierMax) - if sizes is not None: - sc.update_mcfg(sizes) + self.update_mcfg() # Get the request links for all experiments filtered by MIPs - rls_for_mips = sc.get_filtered_request_links_by_mip_included_excluded( - mips_list=mips_list, included_request_links=included_request_links, - excluded_request_links=excluded_request_links + rls_for_mips = self._get_filtered_request_links_by_mip_included_excluded( + mips_list=select_mips, included_request_links=select_included_request_links, + excluded_request_links=select_excluded_request_links ) rls_for_mips = sorted(rls_for_mips, key=lambda x: x.label) # Filter by experiment if needed if experiment_filter: - rls = self.filter_request_link_by_experiment_and_year(rls_for_mips, **experiment_filter) + rls = self._filter_request_link_by_experiment_and_year(rls_for_mips, **experiment_filter) logger.info("Number of Request Links which apply to experiment %s member %s and MIPs %s is: %d" % (experiment_filter["experiment_id"], experiment_filter['realization_index'], - print_struct(mips_list), len(rls))) + print_struct(select_mips), len(rls))) else: rls = rls_for_mips # Get variables and grids by mips miprl_vars_grids = set() for rl in rls: logger.debug("processing RequestLink %s" % rl.title) - for v in sc.get_vars_by_request_link(request_link=rl.uid, pmax=max_priority): + for v in self._get_vars_by_request_link(request_link=rl.uid, pmax=select_max_priority): # The requested grid is given by the RequestLink except if spatial shape matches S-* gr = rl.grid cmvar = self.get_element_uid(v, elt_type="variable") @@ -472,9 +474,9 @@ def get_cmorvars_list(self, tierMax, mips_list, included_request_links, excluded filtered_vars = list() for (v, g) in miprl_vars_grids: cmvar = self.get_element_uid(v, elt_type="variable") - if is_elt_applicable(cmvar.mipVarLabel, excluded=excluded_vars, included=included_vars) and \ - is_elt_applicable(cmvar.mipTable, excluded=excluded_tables, included=included_tables) and \ - is_elt_applicable((cmvar.mipVarLabel, cmvar.mipTable), excluded=excluded_pairs): + if is_elt_applicable(cmvar.mipVarLabel, excluded=select_excluded_vars, included=select_included_vars) and \ + is_elt_applicable(cmvar.mipTable, excluded=select_excluded_tables, included=select_included_tables) and \ + is_elt_applicable((cmvar.mipVarLabel, cmvar.mipTable), excluded=select_excluded_pairs): filtered_vars.append((v, g)) logger.debug("adding var %s, grid=%s, ttable=%s=" % (cmvar.label, g, cmvar.mipTable)) @@ -484,17 +486,46 @@ def get_cmorvars_list(self, tierMax, mips_list, included_request_links, excluded filtered_vars_with_grids = defaultdict(set) for (v, g) in filtered_vars: filtered_vars_with_grids[v].add(g) - return filtered_vars_with_grids, rls + return filtered_vars_with_grids + + def _get_request_link_by_mip(self, mips_list): + return sorted(list(self.scope.getRequestLinkByMip(set(mips_list))), key=lambda x: x.label) + + def _get_filtered_request_links_by_mip_included_excluded(self, mips_list, included_request_links=None, + excluded_request_links=None): + logger = get_logger() + rep = self._get_request_link_by_mip(mips_list) + logger.info("Number of Request Links which apply to MIPS %s is: %d" % + (print_struct(mips_list), len(rep))) + rep = [rl for rl in rep if is_elt_applicable(rl, attribute="label", excluded=excluded_request_links)] + logger.info("Number of Request Links after filtering by excluded_request_links is: %d" % len(rep)) + if included_request_links is not None and len(included_request_links) > 0: + excluded_rls = [rl for rl in rep if not is_elt_applicable(rl, attribute="label", + included=included_request_links)] + for rl in excluded_rls: + logger.critical("RequestLink %s is not included" % rl.label) + rep.remove(rl) + logger.info("Number of Request Links after filtering by included_request_links is: %d" % len(rep)) + + return rep + + def _get_vars_by_request_link(self, request_link, pmax): + if not isinstance(request_link, list): + request_link = [request_link, ] + return self.scope.varsByRql(request_link, pmax) -scope = None data_request = None def initialize_data_request(): global data_request + if reset_manifest: + dict_load = dict(manifest=None) + else: + dict_load = dict() if data_request is None: - data_request = DataRequest(data_request=dreq.loadDreq(), print_DR_errors=True, print_DR_stdname_errors=False) + data_request = DataRequest(data_request=dreq.loadDreq(**dict_load), print_DR_errors=True, print_DR_stdname_errors=False) return data_request @@ -505,27 +536,6 @@ def get_data_request(): return data_request -def initialize_scope(tier_max): - global scope - dq = get_data_request() - if scope is None: - scope = Scope(dreqQuery(dq=dq.data_request, tierMax=tier_max)) - return scope - - -def get_scope(tier_max=None): - if scope is None: - return initialize_scope(tier_max) - else: - return scope - - -def set_scope(sc): - if sc is not None: - global scope - scope = sc - - def normalize_grid(grid): """ in DR 1.0.2, values are : ['', 'model grid', '100km', '50km or smaller', 'cfsites', '1deg', '2deg', '25km or smaller', 'native'] @@ -598,12 +608,14 @@ def correct_data_request(self): self.cell_measures = 'area: areacello' if self.label in ["jpdftaure", ]: self.spatial_shape = "XY-na" - if self.modeling_realm is not None: + if len(self.modeling_realm) > 0: # Because wrong in DR01.00.20 - if self.modeling_realm.startswith("zoo"): - self.modeling_realm = "ocnBgChem" + self.modeling_realm=["ocnBgChem" if elt.startswith("zoo") else elt for elt in self.modeling_realm] + self.set_modeling_realms = set() + for realm in self.modeling_realm: + self.set_modeling_realms = self.set_modeling_realms | set(realm.split(" ")) # TBD : this cell_measure choice for seaice variables is specific to Nemo - if "seaIce" in self.modeling_realm and self.cell_measures is not None and \ + if "seaIce" in self.set_modeling_realms and self.cell_measures is not None and \ "areacella" in self.cell_measures: if self.label in ['siconca', ]: self.cell_measures = 'area: areacella' @@ -619,7 +631,7 @@ def correct_data_request(self): self.long_name = "empty in DR %s" % data_request.get_version() if self.units is None: self.units = "empty in DR %s" % data_request.get_version() - if self.modeling_realm in ["seaIce", ] and re.match(".*areacella.*", str(self.cell_measures)) \ + if self.modeling_realm == ["seaIce", ] and re.match(".*areacella.*", str(self.cell_measures)) \ and self.label not in ["siconca", ]: self.comments = ". Due an error in DR01.00.21 and to technical constraints, this variable may have " \ "attribute cell_measures set to area: areacella, while it actually is area: areacello" @@ -732,4 +744,10 @@ def get_from_dr(cls, input_var, sn_issues=None, allow_pseudo=False, mip_list=lis if struct is not None: input_var_dict["flag_meanings"] = struct.flag_meanings input_var_dict["flag_values"] = struct.flag_values + if "modeling_realm" in input_var_dict: + val = input_var_dict["modeling_realm"] + if val is None or len(val) == 0: + input_var_dict["modeling_realm"] = list() + elif not isinstance(val, list): + input_var_dict["modeling_realm"] = [val, ] return cls(from_dr=True, **input_var_dict) diff --git a/dr2xml/dr_interface/CMIP7.py b/dr2xml/dr_interface/CMIP7.py new file mode 100644 index 00000000..ea6d59ac --- /dev/null +++ b/dr2xml/dr_interface/CMIP7.py @@ -0,0 +1,322 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Interface if no data request should be used. +""" + +from __future__ import print_function, division, absolute_import, unicode_literals + +from collections import OrderedDict, defaultdict +import sys +import os + +from utilities.logger import get_logger +from .definition import ListWithItems +from .definition import DataRequest as DataRequestBasic +from .definition import SimpleObject +from .definition import SimpleCMORVar as SimpleCMORVarBasic +from .definition import SimpleDim as SimpleDimBasic +from dr2xml.settings_interface import get_settings_values +from ..utils import Dr2xmlError, is_elt_applicable + +data_request_path = get_settings_values("internal", "data_request_path") +sys.path.append(data_request_path) +os.environ["CMIP7_DR_API_CONFIGFILE"] = get_settings_values("internal", "data_request_config") +from data_request_api.query.vocabulary_server import ConstantValueObj +from data_request_api.query.data_request import DataRequest as CMIP7DataRequest +from data_request_api.content.dump_transformation import get_transformed_content + + +data_request = None + + +def get_value_from_constant(value): + if isinstance(value, ConstantValueObj): + return value.value + elif isinstance(value, list): + return [get_value_from_constant(val) for val in value] + elif isinstance(value, set): + return set([get_value_from_constant(val) for val in value]) + elif isinstance(value, tuple): + return tuple([get_value_from_constant(val) for val in value]) + else: + return value + + +class DataRequest(DataRequestBasic): + def get_version(self): + return self.data_request.version + + def get_list_by_id(self, collection, **kwargs): + logger = get_logger() + if collection in ["CMORvar", "var"]: + rep = self.get_element_uid(id=None, elt_type="variable") + elif collection in ["structure_title", "spatial_shape", "coordinates_and_dimensions"]: + rep = self.get_element_uid(id=None, elt_type=collection) + else: + logger.error(f"Unable to find out collection {collection}") + raise ValueError(f"Unable to find out collection {collection}") + if collection not in ["CMORvar", ]: + new_rep, rep = rep, ListWithItems() + rep.extend(new_rep) + return rep + + def get_variables_per_label(self, debug=list()): + logger = get_logger() + rep = OrderedDict() + for v in self.get_list_by_id("var"): + if v.label not in rep: + rep[v.label] = [] + if v.label in debug: + logger.debug("Adding %s" % v.label) + refs = self.get_request_by_id_by_sect(v.id, 'CMORvar') + for r in refs: + ref = self.get_element_uid(r, elt_type="variable") + rep[v.label].append(ref) + if v.label in debug: + logger.debug("Adding CmorVar %s(%s) for %s" % (v.label, ref.mipTable, ref.label)) + return rep + + def get_sectors_list(self): + return ListWithItems() + + def get_experiment_label(self, experiment): + return self.data_request.find_element("experiment", experiment).label + + def get_cmor_var_id_by_label(self, label): + physical_parameter = self.data_request.find_element("physical_parameter", label, default=None) + if physical_parameter is None: + return list() + else: + rep = self.data_request.filter_elements_per_request("variable", + requests=dict(physical_parameter=physical_parameter), + print_warning_bcv=False) + return [elt.id for elt in rep] + + def get_element_uid(self, id=None, elt_type=None, error_msg=None, raise_on_error=False, check_print_DR_errors=True, + check_print_stdnames_error=False, **kwargs): + logger = get_logger() + if elt_type is None: + raise ValueError("Unable to find out uid with elt_type None") + if id is None: + return self.data_request.get_elements_per_kind(elt_type) + else: + if elt_type in ["dim", ]: + elt_type = "dimension" + rep = self.data_request.find_element(element_type=elt_type, value=id, default=None) + if rep is None: + if error_msg is None: + error_msg = "DR Error: issue with %s" % id + if raise_on_error: + raise Dr2xmlError(error_msg) + elif check_print_DR_errors and self.print_DR_errors: + logger.error(error_msg) + elif check_print_stdnames_error and self.print_DR_stdname_errors: + logger.error(error_msg) + else: + if elt_type in ["variable", ]: + rep = SimpleCMORVar.get_from_dr(rep, id=id, **kwargs) + elif elt_type in ["dimension", ]: + rep = SimpleDim.get_from_dr(rep, id=id) + return rep + + def get_request_by_id_by_sect(self, id, request): + logger = get_logger() + if request in ["CMORvar", ]: + return [self.get_element_uid(id=id, elt_type="variable").id, ] + else: + logger.error(f"Unable to find out collection {request}") + raise ValueError(f"Unable to find out collection {request}") + + def get_single_levels_list(self): + rep = self.get_list_by_id("coordinates_and_dimensions") + rep = [elt for elt in rep if elt.cf_category in ["coordinate", ] and elt.axis_flag in ["Z", ]] + return sorted(rep) + + def get_grids_dict(self): + rep = OrderedDict() + dims = self.get_list_by_id("coordinates_and_dimensions") + for dim in dims.items: + rep[dim.name] = dim.id + return rep + + def get_dimensions_dict(self): + rep = OrderedDict() + for spshp in self.get_list_by_id("spatial_shape").items: + dims = [elt.name for elt in spshp.dimensions] + new_dims = list() + for key in ["longitude", "latitude"]: + if key in dims: + dims.remove(key) + new_dims.append(key) + new_dims.extend(sorted(dims)) + new_dims = "|".join([str(dim) for dim in new_dims]) + rep[new_dims] = str(spshp.name) + return rep + + def _is_timesubset_applicable(self, year, select_on_year, time_subset): + if year is None or select_on_year is None: + return None, None + else: + return ((time_subset.start is None or (time_subset.start <= int(year))) and + (time_subset.end is None or (time_subset.end >= int(year))), time_subset.end) + + def _get_filtering_elements(self, experiment=None, variable=None): + internal_dict = get_settings_values("internal") + request_dict_all_of_any = dict(opportunities=internal_dict["select_included_opportunities"], + variable_groups=internal_dict["select_included_vargroups"], + max_priority_level=internal_dict["select_max_priority"]) + not_request_dict_any = dict(opportunity=internal_dict["select_excluded_opportunities"], + variable_groups=internal_dict["select_excluded_vargroups"]) + select_mips = internal_dict["select_mips"] + if len(select_mips) > 0: + request_dict_all_of_any["mip"] = select_mips + if experiment is not None: + request_dict_all_of_any["experiment"] = experiment + if variable is not None: + request_dict_all_of_any["variable"] = variable + return dict(requests=request_dict_all_of_any, request_operation="all_of_any", not_requests=not_request_dict_any, + not_request_operation="any") + + def get_cmorvars_list(self, select_mips, select_max_priority, select_included_vars, select_excluded_vars, + select_included_tables, select_excluded_tables, select_excluded_pairs, + select_included_opportunities, select_excluded_opportunities, select_included_vargroups, + select_excluded_vargroups, select_on_year, experiment_filter=False, **kwargs): + rep = defaultdict(set) + # Filter var list per priority and experiment + if experiment_filter: + experiment = experiment_filter["experiment_id"] + else: + experiment = None + find_out_requests_dict = self._get_filtering_elements(experiment=experiment) + # Filter var list per filtering dict "any" + var_list = self.data_request.filter_elements_per_request("variables", skip_if_missing=False, + **find_out_requests_dict) + # Apply other filters + for var in var_list: + dr_var = SimpleCMORVar.get_from_dr(var, **kwargs) + if is_elt_applicable(dr_var.mipTable, excluded=select_excluded_tables, included=select_included_tables) and \ + is_elt_applicable(dr_var.mipVarLabel, excluded=select_excluded_vars, included=select_included_vars) \ + and is_elt_applicable((dr_var.mipVarLabel, dr_var.mipTable), excluded=select_excluded_pairs) \ + and self.get_endyear_for_cmorvar(cmorvar=dr_var, experiment=experiment, year=select_on_year, + internal_dict=get_settings_values("internal")) is not False: + rep[dr_var.id] = rep[dr_var.id] | set(dr_var.grids) + return rep + + def get_endyear_for_cmorvar(self, cmorvar, experiment, year, internal_dict): + # Find time_subset linked to the variable and experiment for dedicated year + find_out_requests_dict = self._get_filtering_elements(experiment=experiment, variable=cmorvar.cmvar) + time_subsets = self.data_request.filter_elements_per_request("time_subsets", skip_if_missing=False, + **find_out_requests_dict) + time_subsets = [self._is_timesubset_applicable(year, internal_dict["select_on_year"], time_subset) for time_subset in time_subsets] + time_subsets = [end for (apply, end) in time_subsets if apply is not False] + if len(time_subsets) == 0: + return False + elif None in time_subsets: + return None + else: + return max(time_subsets) + + +def initialize_data_request(): + global data_request + if data_request is None: + internal_dict = get_settings_values("internal") + data_request_content_version = internal_dict["data_request_content_version"] + content = get_transformed_content(version=data_request_content_version, + force_retrieve=False) + data_request = DataRequest(CMIP7DataRequest.from_separated_inputs(**content), print_DR_errors=True, + print_DR_stdname_errors=False) + return data_request + + +def get_data_request(): + if data_request is None: + return initialize_data_request() + else: + return data_request + + +def normalize_grid(grid): + return grid + + +class SimpleCMORVar(SimpleCMORVarBasic): + def __init__(self, **kwargs): + for (key, value) in kwargs.items(): + kwargs[key] = get_value_from_constant(value) + super().__init__(**kwargs) + + + @classmethod + def get_from_dr(cls, input_var, **kwargs): + sdims = OrderedDict() + product_of_other_dims = 1 + dimensions = str(input_var.dimensions).split(", ") + dimensions = [dim for dim in dimensions if "time" not in dim] + for sdim in dimensions: + sdim = SimpleDim.get_from_dr(data_request.data_request.find_element("coordinates_and_dimensions", sdim), **kwargs) + sdims[sdim.name] = sdim + product_of_other_dims *= sdim.dimsize + cell_measures = input_var.cell_measures + if isinstance(cell_measures, list): + cell_measures = [elt if isinstance(elt, str) else elt.name for elt in cell_measures] + elif isinstance(cell_measures, str): + cell_measures = [cell_measures, ] + else: + cell_measures = [cell_measures.name, ] + cell_methods = input_var.cell_methods.cell_methods + logger = get_logger() + logger.debug(f"Variable considered: {input_var.name}") + return cls(from_dr=True, + type=input_var.type, + modeling_realm=[realm.id for realm in input_var.modelling_realm], + label=input_var.physical_parameter.name, + mipVarLabel=input_var.physical_parameter.name, + label_without_psuffix=input_var.physical_parameter.name, + label_non_ambiguous=input_var.name, + frequency=input_var.cmip7_frequency.name, + mipTable=input_var.cmip6_tables_identifier.name, + description=input_var.description, + stdname=input_var.physical_parameter.cf_standard_name.name, + units=input_var.physical_parameter.units, + long_name=input_var.physical_parameter.title, + sdims=sdims, + other_dims_size=product_of_other_dims, + cell_methods=cell_methods, + cm=input_var.cell_methods.cell_methods, + cell_measures=cell_measures, + spatial_shp=input_var.spatial_shape.name, + temporal_shp=input_var.temporal_shape.name, + id=input_var.id, + cmvar=input_var, + Priority=data_request.data_request.find_priority_per_variable(input_var) + ) + + +class SimpleDim(SimpleDimBasic): + def __init__(self, **kwargs): + for (key, value) in kwargs.items(): + kwargs[key] = get_value_from_constant(value) + super().__init__(**kwargs) + + @classmethod + def get_from_dr(cls, input_dim, **kwargs): + return cls( + from_dr=True, + label=input_dim.name, + stdname=input_dim.cf_standard_name, + long_name=input_dim.title, + positive=input_dim.positive_direction, + requested=input_dim.requested_values, + value=input_dim.value, + out_name=input_dim.output_name, + units=input_dim.units, + boundsRequested=input_dim.requested_bounds, + axis=input_dim.axis_flag, + type=input_dim.type, + coords=input_dim, + title=input_dim.title, + name=input_dim.name + ) diff --git a/dr2xml/dr_interface/CMIP7_config b/dr2xml/dr_interface/CMIP7_config new file mode 100644 index 00000000..26bd01e4 --- /dev/null +++ b/dr2xml/dr_interface/CMIP7_config @@ -0,0 +1,7 @@ +check_api_version: false +consolidate: false +export: release +log_file: default +log_level: info +offline: true +variable_name: CMIP7 Compound Name diff --git a/dr2xml/dr_interface/__init__.py b/dr2xml/dr_interface/__init__.py index fdb8bf58..22d73009 100644 --- a/dr2xml/dr_interface/__init__.py +++ b/dr2xml/dr_interface/__init__.py @@ -19,14 +19,10 @@ def __init__(self, *args, **kwargs): defaultfunction() -scope = None data_request = None DataRequest = DefaultClass initialize_data_request = defaultfunction get_data_request = defaultfunction -initialize_scope = defaultfunction -get_scope = defaultfunction -set_scope = defaultfunction normalize_grid = defaultfunction SimpleObject = DefaultClass SimpleCMORVar = DefaultClass @@ -36,8 +32,6 @@ def __init__(self, *args, **kwargs): def get_dr_object(key): if key in ["get_data_request", ]: return get_data_request() - elif key in ["get_scope", ]: - return get_scope() elif key in ["normalize_grid", ]: return normalize_grid elif key in ["SimpleCMORVar", ]: @@ -49,19 +43,22 @@ def get_dr_object(key): def load_correct_dr(): - global scope, data_request, DataRequest, initialize_data_request, get_data_request, initialize_scope, get_scope, \ - set_scope, normalize_grid, SimpleDim, SimpleObject, SimpleCMORVar + global data_request, DataRequest, initialize_data_request, get_data_request, \ + normalize_grid, SimpleDim, SimpleObject, SimpleCMORVar data_request_version = get_settings_values("internal", "data_request_used") if data_request_version in ["CMIP6", ]: - from .CMIP6 import scope, data_request, DataRequest, initialize_data_request, get_data_request, \ - initialize_scope, get_scope, set_scope, normalize_grid, SimpleDim, SimpleObject, SimpleCMORVar + from .CMIP6 import data_request, DataRequest, initialize_data_request, get_data_request, \ + normalize_grid, SimpleDim, SimpleObject, SimpleCMORVar + elif data_request_version in ["CMIP7", ]: + from .CMIP7 import data_request, DataRequest, initialize_data_request, get_data_request, \ + normalize_grid, SimpleDim, SimpleObject, SimpleCMORVar elif data_request_version in ["no", "none", "None", None]: - from .no import scope, data_request, DataRequest, initialize_data_request, get_data_request, initialize_scope, \ - get_scope, set_scope, normalize_grid, SimpleDim, SimpleObject, SimpleCMORVar + from .no import data_request, DataRequest, initialize_data_request, get_data_request, \ + normalize_grid, SimpleDim, SimpleObject, SimpleCMORVar elif data_request_version in ["C3S", ]: - from .C3S import scope, data_request, DataRequest, initialize_data_request, get_data_request, initialize_scope,\ - get_scope, set_scope, normalize_grid, SimpleDim, SimpleObject, SimpleCMORVar + from .C3S import data_request, DataRequest, initialize_data_request, get_data_request, \ + normalize_grid, SimpleDim, SimpleObject, SimpleCMORVar else: raise ValueError("The data request specified (%s) is not known." % data_request_version) diff --git a/dr2xml/dr_interface/definition.py b/dr2xml/dr_interface/definition.py index 5645f985..1ed049b6 100644 --- a/dr2xml/dr_interface/definition.py +++ b/dr2xml/dr_interface/definition.py @@ -7,43 +7,29 @@ from __future__ import print_function, division, absolute_import, unicode_literals -from collections import namedtuple, OrderedDict +from collections import OrderedDict import six +from dr2xml.settings_interface import get_settings_values from dr2xml.settings_interface.py_settings_interface import is_sset_not_None from dr2xml.utils import convert_string_to_year, Dr2xmlError -class Scope(object): - def __init__(self, scope=None): - self.scope = scope - self.mcfg = self.build_mcfg([None, None, None, None, None, None, None]) - - def build_mcfg(self, value): - mcfg = namedtuple('mcfg', ['nho', 'nlo', 'nha', 'nla', 'nlas', 'nls', 'nh1']) - return mcfg._make(value)._asdict() - - def update_mcfg(self, value): - self.mcfg = self.build_mcfg(value) - - def get_request_link_by_mip(self, mips_list): - return list() - - def get_filtered_request_links_by_mip_included_excluded(self, mips_list, included_request_links=None, - excluded_request_links=None): - return list() - - def get_vars_by_request_link(self, request_link, pmax): - return list() - - class DataRequest(object): def __init__(self, data_request=None, print_DR_errors=False, print_DR_stdname_errors=False): self.data_request = data_request self.print_DR_errors = print_DR_errors self.print_DR_stdname_errors = print_DR_stdname_errors + self.mcfg = None + self.set_mcfg() + + def set_mcfg(self): + self.mcfg = get_settings_values('internal', 'select_sizes') + + def update_mcfg(self): + raise NotImplementedError() def get_version(self): """ @@ -51,6 +37,9 @@ def get_version(self): """ raise NotImplementedError() + def get_variables_per_label(self, debug=list()): + return OrderedDict() + def get_list_by_id(self, collection, **kwargs): """ Get the collection corresponding to the collection id. @@ -72,12 +61,6 @@ def get_experiment_label_start_end_years(self, experiment): """ return None, "??", "??" - def filter_request_link_by_experiment_and_year(self, request_links, experiment_id, year, **kwargs): - return list() - - def check_requestitem_for_exp_and_year(self, ri, experiment, year, **kwargs): - return False, None - def get_cmor_var_id_by_label(self, label): """ Get the id of the CMOR var corresponding to label. @@ -114,18 +97,6 @@ def get_single_levels_list(self): """ raise NotImplementedError() - def convert_DR_variable_to_dr2xml_variable(self, input_variable): - """ - Convert a variable from the DR used to the dr2xml template variable. - """ - raise NotImplementedError() - - def convert_DR_dimension_to_dr2xml_dimension(self, input_dimension): - """ - Convert a dimension from the DR used to the dr2xml template dimension. - """ - raise NotImplementedError() - def get_endyear_for_cmorvar(self, **kwargs): return None @@ -163,7 +134,7 @@ def find_exp_end_year(exp_endyear, end_year=False): return convert_string_to_year(exp_endyear) def get_cmorvars_list(self, **kwargs): - return dict(), list() + return dict() class ListWithItems(list): @@ -221,7 +192,7 @@ class SimpleCMORVar(SimpleObject): """ A class for unifying CMOR vars and home variables """ - def __init__(self, type=False, modeling_realm=None, grids=[""], label=None, mipVarLabel=None, + def __init__(self, type=False, modeling_realm=list(), grids=[""], label=None, mipVarLabel=None, label_without_psuffix=None, label_non_ambiguous=None, frequency=None, mipTable=None, positive=None, description=None, stdname=None, units=None, long_name=None, other_dims_size=1, cell_methods=None, cell_measures=None, spatial_shp=None, temporal_shp=None, experiment=None, @@ -230,6 +201,9 @@ def __init__(self, type=False, modeling_realm=None, grids=[""], label=None, mipV **kwargs): self.type = type self.modeling_realm = modeling_realm + self.set_modeling_realms = set() + for realm in self.modeling_realm: + self.set_modeling_realms = self.set_modeling_realms | set(realm.split(" ")) self.grids = grids self.label = label # taken equal to the CMORvar label self.mipVarLabel = mipVarLabel # taken equal to MIPvar label @@ -264,12 +238,39 @@ def __init__(self, type=False, modeling_realm=None, grids=[""], label=None, mipV self.flag_meanings = flag_meanings self.flag_values = flag_values super(SimpleCMORVar, self).__init__(**kwargs) + + def set_attributes(self, **kwargs): + if "modeling_realm" in kwargs: + modeling_realms = kwargs["modeling_realm"] + if modeling_realms in ["", None]: + modeling_realms = list() + elif not isinstance(modeling_realms, list): + modeling_realms = [modeling_realms, ] + kwargs["modeling_realm"] = modeling_realms + set_modeling_realms = set() + for realm in modeling_realms: + set_modeling_realms = set_modeling_realms | set(realm.split(" ")) + kwargs["set_modeling_realms"] = set_modeling_realms + super().set_attributes(**kwargs) def __eq__(self, other): return self.label == other.label and self.modeling_realm == other.modeling_realm and \ self.frequency == other.frequency and self.mipTable == other.mipTable and \ self.temporal_shp == other.temporal_shp and self.spatial_shp == other.spatial_shp + def __lt__(self, other): + return self.label < other.label + + def __gt__(self, other): + return self.label > other.label + + def __str__(self): + return (f"SimpleCMORVar {self.label} of priority {self.Priority} " + f"(with standard name {self.stdname} and units {self.units})") + + def __repr__(self): + return str(self) + def correct_data_request(self): pass @@ -282,7 +283,7 @@ def get_from_extra(cls, input_var, mip_era=None, freq=None, table=None, **kwargs input_var_dict = dict(type="extra", mip_era=mip_era, label=input_var["out_name"], mipVarLabel=input_var["out_name"], stdname=input_var.get("standard_name", ""), long_name=input_var["long_name"], units=input_var["units"], - modeling_realm=input_var["modeling_realm"], frequency=freq, mipTable=table, + modeling_realm=[input_var["modeling_realm"], ], frequency=freq, mipTable=table, cell_methods=input_var["cell_methods"], cell_measures=input_var["cell_measures"], positive=input_var["positive"], Priority=float(input_var[mip_era.lower() + "_priority"]), label_without_psuffix=input_var["out_name"], diff --git a/dr2xml/dr_interface/no.py b/dr2xml/dr_interface/no.py index 407400b5..c94cbb8e 100644 --- a/dr2xml/dr_interface/no.py +++ b/dr2xml/dr_interface/no.py @@ -9,14 +9,13 @@ from collections import OrderedDict -from .definition import Scope, ListWithItems +from .definition import ListWithItems from .definition import DataRequest as DataRequestBasic from .definition import SimpleObject from .definition import SimpleCMORVar as SimpleCMORVarBasic from .definition import SimpleDim as SimpleDimBasic -scope = None data_request = None @@ -54,12 +53,6 @@ def get_grids_dict(self): def get_dimensions_dict(self): return OrderedDict() - def get_cmorvars_list(self, sizes=None, **kwargs): - if sizes is not None: - sc = get_scope() - sc.update_mcfg(sizes) - return dict(), list() - def initialize_data_request(): global data_request @@ -75,27 +68,6 @@ def get_data_request(): return data_request -def initialize_scope(tier_max): - global scope - dq = get_data_request() - if scope is None: - scope = Scope() - return scope - - -def get_scope(tier_max=None): - if scope is None: - return initialize_scope(tier_max) - else: - return scope - - -def set_scope(sc): - if sc is not None: - global scope - scope = sc - - def normalize_grid(grid): return grid diff --git a/dr2xml/file_splitting.py b/dr2xml/file_splitting.py index 912b9e36..60a85ba1 100644 --- a/dr2xml/file_splitting.py +++ b/dr2xml/file_splitting.py @@ -14,11 +14,12 @@ # Utilities import six +from .dr_interface import get_dr_object from .settings_interface import get_settings_values from .utils import Dr2xmlGridError, Dr2xmlError # Logger -from logger import get_logger +from utilities.logger import get_logger # Interface to configuration from .config import get_config_variable, set_config_variable @@ -80,7 +81,7 @@ def read_compression_factors(): return -def split_frequency_for_variable(svar, grid, mcfg, context): +def split_frequency_for_variable(svar, grid, context): """ Compute variable level split_freq and returns it as a string @@ -103,7 +104,7 @@ def split_frequency_for_variable(svar, grid, mcfg, context): max_size = internal_settings["max_file_size_in_floats"] # compression_factor = get_config_variable("compression_factor") - size = field_size(svar, mcfg) * internal_settings["bytes_per_float"] + size = field_size(svar) * internal_settings["bytes_per_float"] if compression_factor is None: read_compression_factors() compression_factor = get_config_variable("compression_factor") @@ -212,11 +213,10 @@ def timesteps_per_freq_and_duration(freq, nbdays, sampling_tstep): spatial_shape_regexp = re.compile(r"(?P\w+)-(?P\w+)(?P(\|\w+)?)") -def field_size(svar, mcfg): +def field_size(svar): """ :param svar: - :param mcfg: :return: """ # COmputing field size is basee on the fact that sptial dimensions @@ -225,21 +225,21 @@ def field_size(svar, mcfg): # of the non-spatial dimensions sizes # ['nho','nlo','nha','nla','nlas','nls','nh1'] / nz = sc.mcfg['nlo'] - - nb_lat = mcfg['nh1'] - nb_lat_ocean = mcfg['nh1'] - atm_grid_size = mcfg['nha'] - atm_nblev = mcfg['nla'] - soil_nblev = mcfg['nls'] - oce_nblev = mcfg['nlo'] - oce_grid_size = mcfg['nho'] + dr = get_dr_object("get_data_request") + nb_lat = dr.mcfg['nh1'] + nb_lat_ocean = dr.mcfg['nh1'] + atm_grid_size = dr.mcfg['nha'] + atm_nblev = dr.mcfg['nla'] + soil_nblev = dr.mcfg['nls'] + oce_nblev = dr.mcfg['nlo'] + oce_grid_size = dr.mcfg['nho'] # TBD : dimension sizes below should be derived from DR query - nb_cosp_sites = 129 - nb_lidar_temp = 40 - nb_parasol_refl = 5 - nb_isccp_tau = 7 - nb_isccp_pc = 7 - nb_curtain_sites = 1000 + nb_cosp_sites = dr.mcfg["nb_cosp_sites"] + nb_lidar_temp = dr.mcfg["nb_lidar_temp"] + nb_parasol_refl = dr.mcfg["nb_parasol_refl"] + nb_isccp_tau = dr.mcfg["nb_isccp_tau"] + nb_isccp_pc = dr.mcfg["nb_isccp_pc"] + nb_curtain_sites = dr.mcfg["nb_curtain_sites"] # siz = 0 s = svar.spatial_shp @@ -250,8 +250,8 @@ def field_size(svar, mcfg): s_other = s_match.groupdict()["other"] if s_hdim in ["XY", ]: - if s_vdim in ["O", ] or svar.modeling_realm in ['ocean', 'seaIce', 'ocean seaIce', 'ocnBgchem', - 'seaIce ocean']: + if s_vdim in ["O", ] or len(set(svar.modeling_realm) & + {'ocean', 'seaIce', 'ocean seaIce', 'ocnBgchem', 'seaIce ocean'}) > 0: siz = oce_grid_size else: siz = atm_grid_size @@ -308,8 +308,8 @@ def evaluate_split_freq_value(split_freq): return split_freq_units, split_freq_length -def determine_split_freq(svar, grid_choice, mcfg, context): - split_freq = split_frequency_for_variable(svar, grid_choice, mcfg, context) +def determine_split_freq(svar, grid_choice, context): + split_freq = split_frequency_for_variable(svar, grid_choice, context) max_split_freq = get_settings_values("internal", "max_split_freq") if max_split_freq is not None: split_freq_units, split_freq_length = evaluate_split_freq_value(split_freq) diff --git a/dr2xml/grids.py b/dr2xml/grids.py index 93633369..d11630fb 100644 --- a/dr2xml/grids.py +++ b/dr2xml/grids.py @@ -16,7 +16,7 @@ from .utils import Dr2xmlError # Logger -from logger import get_logger +from utilities.logger import get_logger # Global variables and configuration tools from .config import get_config_variable, add_value_in_dict_config_variable @@ -118,7 +118,7 @@ def create_axis_def(sdim): internal_dict = get_settings_values("internal") prefix = internal_dict["ping_variables_prefix"] # nbre de valeurs de l'axe determine aussi si on est en dim singleton - if sdim.requested: + if sdim.requested not in [False, None, "undef", ""]: glo_list = sdim.requested.strip(" ").split() else: glo_list = sdim.value.strip(" ").split() @@ -286,14 +286,13 @@ def change_axes_in_grid(grid_id): alt_labels = None dr_axis_id = dr_axis_id.replace('axis_', '') # For toy_cnrmcm, atmosphere part # - dim_id = 'dim:{}'.format(dr_axis_id) # print "in change_axis for %s %s"%(grid_id,dim_id) # dim_id should be a dimension ! - dim = data_request.get_element_uid(dim_id, elt_type="dim", + dim = data_request.get_element_uid(dr_axis_id, elt_type="dim", raise_on_error=True, error_msg="Value %s in 'non_standard_axes' is not a DR dimension id" % dr_axis_id) # We don't process scalars here - if dim.value in ['', ] or dim.label in ["scatratio", ]: + if dim.value in ["", "undef", False] or dim.label in ["scatratio", ]: axis_id, axis_name = create_axis_from_dim(dim, alt_labels, axis_ref) # cannot use ET library which does not guarantee the ordering of axes changed_done = True diff --git a/dr2xml/infos.py b/dr2xml/infos.py index 67d9b795..6c60d27e 100644 --- a/dr2xml/infos.py +++ b/dr2xml/infos.py @@ -8,7 +8,7 @@ from collections import OrderedDict, defaultdict -from logger import get_logger +from utilities.logger import get_logger # mpmoine_petitplus: nouvelle fonction print_some_stats (plus d'info sur les skipped_vars, nbre de vars / (shape,freq) ) @@ -45,7 +45,8 @@ def print_some_stats(context, svars_per_table, skipped_vars_per_table, actually_ # -------------------------------------------------------------------- if skipped_vars_per_table: logger.info("\nSkipped variables (i.e. whose alias is not present in the pingfile):") - for table, skipvars in skipped_vars_per_table.items(): + for table in sorted(list(skipped_vars_per_table)): + skipvars = sorted(skipped_vars_per_table[table]) logger.info(">>> TABLE: %15s %02d/%02d ----> %s" % (table, len(skipvars), len(svars_per_table[table]), " ".join(skipvars))) # TBS# print "\n\t",table ," ",len(skipvars),"--->", @@ -57,8 +58,8 @@ def print_some_stats(context, svars_per_table, skipped_vars_per_table, actually_ # (i.e. not excluded and not skipped) # -------------------------------------------------------------------- stats_out = {} - for table in svars_per_table: - for sv in svars_per_table[table]: + for table in sorted(list(svars_per_table)): + for sv in sorted(list(svars_per_table[table])): dic_freq = {} dic_shp = {} if table not in skipped_vars_per_table or \ @@ -78,25 +79,17 @@ def print_some_stats(context, svars_per_table, skipped_vars_per_table, actually_ logger.info("\n\nSome Statistics on actually written variables per frequency+shape...") # ((sv.label,sv.table,sv.frequency,sv.Priority,sv.spatial_shp)) - dic = OrderedDict() - for label, long_name, stdname, table, frequency, Priority, spatial_shp in actually_written_vars: - if frequency not in dic: - dic[frequency] = OrderedDict() - if spatial_shp not in dic[frequency]: - dic[frequency][spatial_shp] = OrderedDict() - if table not in dic[frequency][spatial_shp]: - dic[frequency][spatial_shp][table] = OrderedDict() - if Priority not in dic[frequency][spatial_shp][table]: - dic[frequency][spatial_shp][table][Priority] = [] + dic = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(list)))) + for (label, long_name, stdname, table, frequency, Priority, spatial_shp) in actually_written_vars: dic[frequency][spatial_shp][table][Priority].append(label) tot_among_freqs = 0 - for frequency in dic: + for frequency in sorted(list(dic)): tot_for_freq_among_shapes = 0 - for spatial_shp in dic[frequency]: + for spatial_shp in sorted(list(dic[frequency])): tot_for_freq_and_shape_among_tables = 0 - for table in dic[frequency][spatial_shp]: - for Priority in dic[frequency][spatial_shp][table]: - list_priority = dic[frequency][spatial_shp][table][Priority] + for table in sorted(list(dic[frequency][spatial_shp])): + for Priority in sorted(list(dic[frequency][spatial_shp][table])): + list_priority = sorted(dic[frequency][spatial_shp][table][Priority]) tot_for_freq_and_shape_among_tables += len(list_priority) logger.info("%10s %8s %12s P%1d %3d: %s" % (" ", " ", table, Priority, len(list_priority), " ".join(list_priority))) @@ -125,8 +118,8 @@ def print_some_stats(context, svars_per_table, skipped_vars_per_table, actually_ list_labels = sorted(list(dic)) for label in list_labels: - ln = sorted(list(dic_ln[label])) - sn = sorted(list(dic_sn[label])) + ln = sorted(list(dic_ln[label]), key=lambda x: str(x)) + sn = sorted(list(dic_sn[label]), key=lambda x: str(x)) logger.info((14 + len(label)) * "-") logger.info("--- VARNAME: {}: {}".format(label, ln[0]).strip()) logger.info((14 + len(label)) * "-") diff --git a/dr2xml/laboratories/IPSL.py b/dr2xml/laboratories/IPSL.py new file mode 100644 index 00000000..29ab9c91 --- /dev/null +++ b/dr2xml/laboratories/IPSL.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Tools specific to a CNRM-CERFACS +""" +from __future__ import print_function, division, absolute_import, unicode_literals + +from dr2xml.dr_interface import get_dr_object +from dr2xml.settings_interface import get_settings_values + + +def lab_grid_policy(cmvarid, grids): # TBD + """ + See doc of lab_adhoc_grid_policy + """ + internal_dict = get_settings_values("internal") + cmvar_uid = get_dr_object("get_data_request").get_element_uid(cmvarid, elt_type="variable") + if cmvar_uid is not None and cmvar_uid.label in ["sos"]: + return [g for g in grids if g in ["", "1deg"]] + elif cmvar_uid is not None and cmvar_uid.label in ["tos"] and (cmvar_uid.mipTable not in ["3hr"] or + internal_dict["allow_tos_3hr_1deg"]): + if internal_dict["adhoc_policy_do_add_1deg_grid_for_tos"]: + list_grids = list() + if "" in grids: + list_grids.append("") + list_grids.append("1deg") + return list_grids + else: + return [g for g in grids if g in ["", "1deg"]] + else: + ngrids = [g for g in grids if g not in ["1deg", "2deg", "100km", "50km"]] + # if "cfsites" in grids : return ["","cfsites"] + if len(ngrids) == 0: + ngrids = [""] # We should at least provide native grid + return ngrids \ No newline at end of file diff --git a/dr2xml/laboratories/__init__.py b/dr2xml/laboratories/__init__.py index 3aebf49d..9098c7ae 100644 --- a/dr2xml/laboratories/__init__.py +++ b/dr2xml/laboratories/__init__.py @@ -23,6 +23,9 @@ def initialize_laboratory_settings(): if institution_id in ["CNRM-CERFACS", "CNRM", "lfpw"]: from . import CNRM_CERFACS laboratory_source = CNRM_CERFACS + elif institution_id in ["IPSL", "ipsl"]: + from . import IPSL + laboratory_source = IPSL else: laboratory_used = internal_dict["laboratory_used"] if laboratory_used is not None: diff --git a/dr2xml/pingfiles_interface.py b/dr2xml/pingfiles_interface.py index df0f30fc..fa3493c0 100644 --- a/dr2xml/pingfiles_interface.py +++ b/dr2xml/pingfiles_interface.py @@ -18,7 +18,7 @@ from .utils import Dr2xmlError # Logger -from logger import get_logger +from utilities.logger import get_logger # Global variables and configuration tools from .config import get_config_variable, add_value_in_dict_config_variable, set_config_variable, \ @@ -98,7 +98,7 @@ def read_xml_elmt_or_attrib(filename, tag='field', attrib=None): logger.debug("") return rep else: - logger.info("No file ") + logger.info("No file") return None @@ -149,7 +149,7 @@ def check_for_file_input(sv, hgrid): logger.debug(remap_grid_def) # Create xml for reading the variable - filename = externs[sv.label][hgrid][get_settings_values("internal_values", "grid_choice")] + filename = externs[sv.label][hgrid][internal_dict["select_grid_choice"]] file_id = "remapped_{}_file".format(sv.label) field_in_file_id = "_".join([sv.label, hgrid]) # field_in_file_id=sv.label @@ -218,16 +218,10 @@ def ping_file_for_realms_list(context, svars, lrealms, path_special, dummy="fiel for table in svars: for v in svars[table]: added = False - if exact: - if any([v.modeling_realm == r for r in lrealms]): - lvars.append(v) - added = True - else: - var_realms = v.modeling_realm.split(" ") - if any([v.modeling_realm == r or r in var_realms - for r in lrealms]): - lvars.append(v) - added = True + if len(set(v.modeling_realm) & set(lrealms)) > 0 or \ + (not(exact) and len(set(lrealms) & v.set_modeling_realms) > 0): + lvars.append(v) + added = True if not added and context in internal_values['orphan_variables'] and \ v.label in internal_values['orphan_variables'][context]: lvars.append(v) @@ -242,8 +236,7 @@ def ping_file_for_realms_list(context, svars, lrealms, path_special, dummy="fiel best_prio[v.label_non_ambiguous].append(v) if v.label_without_psuffix is not None: best_prio[v.label_without_psuffix].append(v) - lvars = [sorted(list(best_prio[elt]), key=lambda x: x.Priority, reverse=True)[0] for elt in best_prio] - + lvars = [sorted(best_prio[elt], key=lambda x: x.Priority)[0] for elt in best_prio] # lvars=uniques lvars.sort(key=lambda x: x.label_without_psuffix) # @@ -288,7 +281,7 @@ def ping_file_for_realms_list(context, svars, lrealms, path_special, dummy="fiel if isinstance(comments, six.string_types): xml_fields.append(DR2XMLComment(text=comments)) xml_fields.append(DR2XMLComment(text="P%d (%s) %s : %s" % - (v.Priority, v.units, v.stdname, v.description))) + (v.Priority, v.units, v.stdname, v.description.replace(" \n", os.linesep)))) if 'atmos' in lrealms or 'atmosChem' in lrealms or 'aerosol' in lrealms: for tab in ["ap", "ap_bnds", "b", "b_bnds"]: xml_fields.append(DR2XMLElement(tag="field", id="%s%s" % (prefix, tab), field_ref="dummy_hyb")) diff --git a/dr2xml/plevs_unions.py b/dr2xml/plevs_unions.py index a56756ec..3fabd6f1 100644 --- a/dr2xml/plevs_unions.py +++ b/dr2xml/plevs_unions.py @@ -14,7 +14,7 @@ from .config import get_config_variable # Logger -from logger import get_logger +from utilities.logger import get_logger # Interface to settings dictionaries from .settings_interface import get_settings_values @@ -54,7 +54,7 @@ def create_xios_axis_and_grids_for_plevs_unions(svars, plev_sfxs, dummies): # First, search plev unions for each label_without_psuffix and build dict_plevs dict_plevs = OrderedDict() for sv in svars: - if not sv.modeling_realm: + if len(sv.modeling_realm) == 0: logger.warning("Warning: no modeling_realm associated to: %s %s %s" % (sv.label, sv.mipTable, sv.mip_era)) for sd in sv.sdims.values(): # couvre les dimensions verticales de type 'plev7h' ou 'p850' @@ -98,8 +98,6 @@ def create_xios_axis_and_grids_for_plevs_unions(svars, plev_sfxs, dummies): # print "for var %s/%s, dim %s is not related to pressure"%(sv.label,sv.label_without_psuffix,sd.label) # # Second, create xios axis for union of plevs - union_axis_defs = axis_defs - union_grid_defs = grid_defs for lwps in list(dict_plevs): sdim_union = get_dr_object("SimpleDim") plevs_union_xios = "" @@ -132,7 +130,7 @@ def create_xios_axis_and_grids_for_plevs_unions(svars, plev_sfxs, dummies): sdim_union.is_union_for.append(sv.label + "_" + sd.label) else: logger.warning("Warning: No requested nor value found for %s with vertical dimension %s" - % (svar.label, plev)) + % (sv.label, plev)) plevs_union = plevs_union.union(plev_values) logger.debug(" -- on %s : %s" % (plev, plev_values)) logger.debug(" * %s (%s)" % (sv.label, sv.mipTable)) diff --git a/dr2xml/postprocessing.py b/dr2xml/postprocessing.py index befa93f4..10f4617d 100644 --- a/dr2xml/postprocessing.py +++ b/dr2xml/postprocessing.py @@ -12,7 +12,7 @@ from .utils import Dr2xmlError # Logger -from logger import get_logger +from utilities.logger import get_logger # Global variables and configuration tools from .config import get_config_variable, add_value_in_dict_config_variable diff --git a/dr2xml/projects/C3S-SF.py b/dr2xml/projects/C3S-SF.py index 9cf4ecd6..55dfcecd 100644 --- a/dr2xml/projects/C3S-SF.py +++ b/dr2xml/projects/C3S-SF.py @@ -37,10 +37,16 @@ def convert_frequency(freq): def convert_realm(realm): - if realm in ["ocean", "seaIce"]: + if not isinstance(realm, (list, set, tuple)): + realm = [realm, ] + if "ocean" in realm or "seaIce" in realm: realm = "nemo", - if realm in ["land", ]: + elif "land" in realm: realm = "atmo" + elif len(realm) == 1: + realm = list(realm)[0] + else: + raise ValueError("Unable to figure out the realm to be used.") return realm diff --git a/dr2xml/projects/CMIP7.py b/dr2xml/projects/CMIP7.py new file mode 100644 index 00000000..e59bbc01 --- /dev/null +++ b/dr2xml/projects/CMIP7.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +CMIP7 python tools +""" + +from __future__ import print_function, division, absolute_import, unicode_literals + +from dr2xml.projects.projects_interface_definitions import ParameterSettings, ValueSettings, FunctionSettings, \ + TagSettings, ConditionSettings + +parent_project_settings = "CMIP6" + +internal_values = dict() + +common_values = dict() + +project_settings = dict() diff --git a/dr2xml/projects/CORDEX-CMIP6.py b/dr2xml/projects/CORDEX-CMIP6.py index 098a6839..434cefb4 100644 --- a/dr2xml/projects/CORDEX-CMIP6.py +++ b/dr2xml/projects/CORDEX-CMIP6.py @@ -12,8 +12,8 @@ def build_filename(frequency, prefix, source_id, expid_in_filename, date_range, var_type, list_perso_dev_file, label, - mipVarLabel, domain_id, driving_source_id, driving_variant_label, rcm_version_id, institution_id, - source_configuration_id, use_cmorvar=False): + mipVarLabel, domain_id, driving_source_id, driving_variant_label, institution_id, + version_realization, use_cmorvar=False): if "fx" in frequency: varname_for_filename = label else: @@ -25,8 +25,8 @@ def build_filename(frequency, prefix, source_id, expid_in_filename, date_range, if label in ["tsland", ]: varname_for_filename = "tsland" filename = "_".join(([elt for elt in [varname_for_filename, domain_id, driving_source_id, expid_in_filename, - driving_variant_label, institution_id, source_id, rcm_version_id, - source_configuration_id, frequency] if + driving_variant_label, institution_id, source_id, + version_realization, frequency] if len(str(elt)) > 0])) if var_type in ["perso", "dev"]: with open(list_perso_dev_file, mode="a", encoding="utf-8") as list_perso_and_dev: @@ -62,9 +62,9 @@ def build_filename(frequency, prefix, source_id, expid_in_filename, date_range, conventions_version=ParameterSettings( key="conventions_version", default_values=[ - ValueSettings(key_type="config", keys="CMIP6_conventions_version") - ], - help="Version of the conventions used." + "CF-1.11", + ValueSettings(key_type="laboratory", keys="conventions_version") + ] ), HDL=ParameterSettings( key="HDL", @@ -74,22 +74,6 @@ def build_filename(frequency, prefix, source_id, expid_in_filename, date_range, "21.14103" ] ), - variant_label=ParameterSettings( - key="variant_label", - default_values=[ - ValueSettings( - key_type="combine", - keys=[ - ValueSettings(key_type="internal", keys="realization_index"), - ValueSettings(key_type="common", keys="initialization_index"), - ValueSettings(key_type="common", keys="physics_index"), - ValueSettings(key_type="common", keys="forcing_index") - ], - fmt="r{}i{}p{}f{}" - ) - ], - help="Label of the variant done." - ), domain=ParameterSettings( key="domain", default_values=[ @@ -100,8 +84,7 @@ def build_filename(frequency, prefix, source_id, expid_in_filename, date_range, ValueSettings(key_type="internal", keys="context") ] ) - ], - help="Dictionary which contains, for each context, the associated domain." + ] ), domain_id=ParameterSettings( key="domain_id", @@ -113,68 +96,62 @@ def build_filename(frequency, prefix, source_id, expid_in_filename, date_range, ValueSettings(key_type="internal", keys="context") ] ) - ], - help="Dictionary which contains, for each context, the associated domain id." + ] ), - version_realisation=ParameterSettings( - key="version_realisation", + driving_experiment=ParameterSettings( + key="driving_experiment", default_values=[ ValueSettings( key_type="simulation", - keys="version_realisation" + keys="driving_experiment" ) - ], - help="Version of the realisation done." + ] ), - driving_source_id=ParameterSettings( - key="driving_source_id", + driving_experiment_id=ParameterSettings( + key="driving_experiment_id", default_values=[ ValueSettings( key_type="simulation", - keys="driving_source_id" + keys="driving_experiment_id" ) - ], - help="Id of the driving model." + ] ), - driving_variant_label=ParameterSettings( - key="driving_variant_label", + driving_institution_id=ParameterSettings( + key="driving_institution_id", default_values=[ ValueSettings( key_type="simulation", - keys="driving_variant_label" + keys="driving_institution_id" ) - ], - help="Id of the driving variant." + ] ), - driving_experiment_id=ParameterSettings( - key="driving_experiment_id", + driving_source_id=ParameterSettings( + key="driving_source_id", default_values=[ ValueSettings( key_type="simulation", - keys="driving_experiment_id" + keys="driving_source_id" ) - ], - help="Id of the experiment which drives the current simulation." + ] ), - driving_institution_id=ParameterSettings( - key="driving_institution_id", + driving_variant_label=ParameterSettings( + key="driving_variant_label", default_values=[ ValueSettings( key_type="simulation", - keys="driving_institution_id" + keys="driving_variant_label" ) - ], - help="Id of the institution of the driving model." + ] ), - driving_experiment=ParameterSettings( - key="driving_experiment", + version_realization=ParameterSettings( + key="version_realization", default_values=[ ValueSettings( key_type="simulation", - keys="driving_experiment" - ) - ], - help="Id of the experiment which drives the current simulation." + keys="version_realization" + ), + "v1-r1" + ] ), Lambert_conformal_longitude_of_central_meridian=ParameterSettings( key="Lambert_conformal_longitude_of_central_meridian", @@ -183,8 +160,7 @@ def build_filename(frequency, prefix, source_id, expid_in_filename, date_range, key_type="simulation", keys="Lambert_conformal_longitude_of_central_meridian" ) - ], - help="Longitude of central meridian of the Lambert conformal projection." + ] ), Lambert_conformal_standard_parallel=ParameterSettings( key="Lambert_conformal_standard_parallel", @@ -193,8 +169,7 @@ def build_filename(frequency, prefix, source_id, expid_in_filename, date_range, key_type="simulation", keys="Lambert_conformal_standard_parallel" ) - ], - help="Standard parallel of the Lambert conformal projection." + ] ), Lambert_conformal_latitude_of_projection_origin=ParameterSettings( key="Lambert_conformal_latitude_of_projection_origin", @@ -203,19 +178,8 @@ def build_filename(frequency, prefix, source_id, expid_in_filename, date_range, key_type="simulation", keys="Lambert_conformal_latitude_of_projection_origin" ) - ], - help="Latitude of central meridian of the Lambert conformal projection." + ] ), - rcm_version_id=ParameterSettings( - key="rcm_version_id", - default_values=[ - ValueSettings( - key_type="simulation", - keys="rcm_version_id" - ) - ], - help="Version id of the regional model used." - ) ) project_settings = dict( @@ -225,12 +189,10 @@ def build_filename(frequency, prefix, source_id, expid_in_filename, date_range, comments_constraints=dict( CV_version=ParameterSettings( key="CV_version", - help="Controled vocabulary version used.", default_values=["CMIP6-CV version ??", ] ), conventions_version=ParameterSettings( key="conventions_version", - help="Conventions version used.", default_values=[ ValueSettings(key_type="common", keys="conventions_version", fmt="CMIP6_conventions_version {}") ] @@ -258,25 +220,28 @@ def build_filename(frequency, prefix, source_id, expid_in_filename, date_range, domain_id=ValueSettings(key_type="common", keys="domain_id"), driving_source_id=ValueSettings(key_type="common", keys="driving_source_id"), driving_variant_label=ValueSettings(key_type="common", keys="driving_variant_label"), - rcm_version_id=ValueSettings(key_type="common", keys="rcm_version_id"), - institution_id=ValueSettings(key_type="common", keys="institution_id"), - source_configuration_id=ValueSettings(key_type="common", keys="source_configuration_id") + institution_id=ValueSettings(key_type="internal", keys="institution_id"), + version_realization=ValueSettings(key_type="common", keys="version_realization") ) )) ], fatal=True ) ), - vars_list=["activity_id", "contact", "data_specs_version", "dr2xml_version", "expid_in_filename", - "external_variables", "frequency", "grid", "grid_label", "nominal_resolution", "comment", "history", - "institution_id", "domain", "domain_id", "driving_source_id", "driving_variant_label", - "driving_experiment_id", "driving_experiment", "driving_institution_id", + vars_list=["activity_id", "comment", "contact", "conventions_version", "dr2xml_version", + "domain", "domain_id", + "driving_experiment", "driving_experiment_id", "driving_institution_id", + "driving_source_id", "driving_variant_label", + "expid_in_filename", # EXPID + "external_variables", "frequency", "grid", + "history","institution","institution_id", "Lambert_conformal_longitude_of_central_meridian", "Lambert_conformal_standard_parallel", - "Lambert_conformal_latitude_of_projection_origin", "institution", "parent_experiment_id", - "parent_mip_era", "parent_activity_id", "parent_source_id", "parent_time_units", - "parent_variant_label", "branch_time_in_parent", "branch_time_in_child", "product", "mip_era", - "realization_index", "realm", "references", "source", "source_id", "source_type", "table_id", - "title", "variable_id", "version_realisation"], + "Lambert_conformal_latitude_of_projection_origin", + "license", "mip_era", "nominal_resolution", + "product", "project_id", + "realm", "references", "source", "source_id", "source_type", + "title", "variable_id", "version_realization"], + # rajoutés par xios : creation_date, tracking_id vars_constraints=dict( variable_id=ParameterSettings( key="variable_id", @@ -286,65 +251,73 @@ def build_filename(frequency, prefix, source_id, expid_in_filename, date_range, ), nominal_resolution=ParameterSettings( key="nominal_resolution", - output_key="native_resolution", - help="Nominal resolution of the model." + output_key="native_resolution" ), - driving_institution_id=ParameterSettings( - key="driving_institution_id", - help="Id of the institution of the driving model." + version_realization=ParameterSettings( + key="version_realization", + default_values=[ + ValueSettings(key_type="common", keys="version_realization") + ] ), - version_realisation=ParameterSettings( - key="version_realisation", + conventions_version=ParameterSettings( + key="conventions_version", default_values=[ - ValueSettings(key_type="common", keys="version_realisation") + ValueSettings(key_type="common", keys="conventions_version") ], - help="Version of the realisation done." + output_key="Conventions" ), domain=ParameterSettings( key="domain", default_values=[ ValueSettings(key_type="common", keys="domain") - ], - help="Dictionary which contains, for each context, the associated domain." + ] ), domain_id=ParameterSettings( key="domain_id", default_values=[ ValueSettings(key_type="common", keys="domain_id") - ], - help="Dictionary which contains, for each context, the associated domain id." + ] ), driving_source_id=ParameterSettings( key="driving_source_id", default_values=[ ValueSettings(key_type="common", keys="driving_source_id") ], - fatal=True, - help="Member of the simulation which drives the simulation." + fatal=True ), driving_variant_label=ParameterSettings( key="driving_variant_label", default_values=[ ValueSettings(key_type="common", keys="driving_variant_label") ], - fatal=True, - help="Id of the driving variant." + fatal=True ), driving_experiment_id=ParameterSettings( key="driving_experiment_id", default_values=[ ValueSettings(key_type="common", keys="driving_experiment_id") ], - fatal=True, - help="Id of the experiment which drives the current simulation." + fatal=True ), driving_experiment=ParameterSettings( key="driving_experiment", default_values=[ ValueSettings(key_type="common", keys="driving_experiment") ], - fatal=True, - help="Id of the experiment which drives the current simulation." + fatal=True + ), + driving_institution_id=ParameterSettings( + key="driving_institution_id", + default_values=[ + ValueSettings(key_type="common", keys="driving_institution_id") + ], + fatal=True + ), + further_info_url=ParameterSettings( + key="further_info_url", + default_values=[ + ValueSettings(key_type="laboratory", keys="info_url"), + ] ), Lambert_conformal_longitude_of_central_meridian=ParameterSettings( key="Lambert_conformal_longitude_of_central_meridian", @@ -357,8 +330,7 @@ def build_filename(frequency, prefix, source_id, expid_in_filename, date_range, check_value=ValueSettings(key_type="internal", keys="context"), check_to_do="eq", reference_values="surfex" ) - ], - help="Longitude of central meridian of the Lambert conformal projection." + ] ), Lambert_conformal_standard_parallel=ParameterSettings( key="Lambert_conformal_standard_parallel", @@ -371,8 +343,7 @@ def build_filename(frequency, prefix, source_id, expid_in_filename, date_range, check_value=ValueSettings(key_type="internal", keys="context"), check_to_do="eq", reference_values="surfex" ) - ], - help="Standard parallel of the Lambert conformal projection." + ] ), Lambert_conformal_latitude_of_projection_origin=ParameterSettings( key="Lambert_conformal_latitude_of_projection_origin", @@ -385,21 +356,59 @@ def build_filename(frequency, prefix, source_id, expid_in_filename, date_range, check_value=ValueSettings(key_type="internal", keys="context"), check_to_do="eq", reference_values="surfex" ) - ], - help="Latitude of central meridian of the Lambert conformal projection." + ] + ), + license=ParameterSettings( + key="license", + default_values=[ + ValueSettings(key_type="laboratory", keys="license"), + ] + ), + mip_era=ParameterSettings( + key="mip_era", + default_values=[ + ValueSettings(key_type="simulation", keys="mip_era"), + ValueSettings(key_type="laboratory", keys="mip_era"), + "CMIP6" + ] ), product=ParameterSettings( key="product", - default_values=["output", ] + default_values=["model-output", ] ), - source=ParameterSettings( - key="source", - fatal=True, - output_key="project_id" + project_id=ParameterSettings( + key="project_id", + default_values=["CORDEX-CMIP6", ] ), source_id=ParameterSettings( key="source_id", - output_key="model_id" + default_values=[ + ValueSettings(key_type="simulation", keys="source_id"), + ] + ), + source=ParameterSettings( + key="source", + default_values=[ + ValueSettings( + key_type="laboratory", + keys=[ + "source_description", + ValueSettings(key_type="internal", keys="source_id") + ] + ) + ] + ), + source_type=ParameterSettings( + key="source_type", + default_values=[ + ValueSettings( + key_type="laboratory", + keys=[ + "source_types", + ValueSettings(key_type="internal", keys="source_id") + ] + ) + ] ), title=ParameterSettings( key="title", @@ -408,22 +417,12 @@ def build_filename(frequency, prefix, source_id, expid_in_filename, date_range, key_type="combine", keys=[ ValueSettings(key_type="internal", keys="source_id"), - "CMIP6", - ValueSettings(key_type="common", keys="activity_id"), - ValueSettings(key_type="simulation", keys="expid_in_filename") + "CORDEX-CMIP6", + ValueSettings(key_type="common", keys="expid_in_filename"), + ValueSettings(key_type="common", keys="driving_experiment") ], - fmt="{} model output prepared for {} and {} / {} simulation" + fmt="{} model output prepared for {} / {} simulation driven by {}" ), - ValueSettings( - key_type="combine", - keys=[ - ValueSettings(key_type="internal", keys="source_id"), - "CMIP6", - ValueSettings(key_type="common", keys="activity_id"), - ValueSettings(key_type="internal", keys="experiment_id") - ], - fmt="{} model output prepared for {} / {} {}" - ) ] ) ) @@ -442,8 +441,7 @@ def build_filename(frequency, prefix, source_id, expid_in_filename, date_range, vars_constraints=dict( grid_mapping=ParameterSettings( key="grid_mapping", - help="Grid mapping associated with the file.", - default_values=["Lambert_Conformal", ], + default_values=["crs", ], conditions=[ ConditionSettings(check_value=ValueSettings(key_type="internal", keys="context"), check_to_do="eq", reference_values="surfex") diff --git a/dr2xml/projects/basics.py b/dr2xml/projects/basics.py index a2090cc7..97b45e5f 100644 --- a/dr2xml/projects/basics.py +++ b/dr2xml/projects/basics.py @@ -389,7 +389,9 @@ def compute_nb_days(year_ref, year_branch, month_ref=1, month_branch=1, day_ref= key="DR_version", help="Version of the Data Request used", default_values=[ - ValueSettings(key_type="common", keys="data_specs_version", fmt="CMIP6 Data Request version {}") + ValueSettings(key_type="combine", fmt="{} Data Request version {}", + keys=[ValueSettings(key_type="internal", keys="data_request_used"), + ValueSettings(key_type="common", keys="data_specs_version")]) ] ), dr2xml_version=ParameterSettings( @@ -417,7 +419,7 @@ def compute_nb_days(year_ref, year_branch, month_ref=1, month_branch=1, day_ref= key="year", help="Year used for the dr2xml's launch", default_values=[ - ValueSettings(key_type="common", keys="year", fmt="Year processed {}") + ValueSettings(key_type="internal", keys="year", fmt="Year processed {}") ] ) ) @@ -956,7 +958,8 @@ def compute_nb_days(year_ref, year_branch, month_ref=1, month_branch=1, day_ref= key="realm", help="Realm associated with the file.", default_values=[ - ValueSettings(key_type="variable", keys="modeling_realm") + ValueSettings(key_type="variable", keys="modeling_realm", + func=lambda x: ",".join(x) if isinstance(x, list) else x) ] ), frequency=ParameterSettings( diff --git a/dr2xml/projects/dr2xml.py b/dr2xml/projects/dr2xml.py index 59dbc4e0..248ea6db 100644 --- a/dr2xml/projects/dr2xml.py +++ b/dr2xml/projects/dr2xml.py @@ -7,11 +7,72 @@ from __future__ import print_function, division, absolute_import, unicode_literals -from dr2xml.projects.projects_interface_definitions import ValueSettings, ParameterSettings, TagSettings +import os.path +from dr2xml.projects.projects_interface_definitions import ValueSettings, ParameterSettings, TagSettings, \ + FunctionSettings, CaseSettings, ConditionSettings +from utilities.logger import get_logger parent_project_settings = None + +def format_sizes(*sizes): + """ + Transform into a dict the sizes values provided as ['nho', 'nlo', 'nha', 'nla', 'nlas', 'nls', 'nh1'], with: + - nho: oce grid size + - nlo : oce nb levels + - nha: atm grid size + - nla: atm nb levels + - nlas: + - nls : soil nb of levels + - nh1 : number of latitude (atmosphere/ocean grids) + Also provide others infor such as: + - nb cosp sites (default 129) + - nb lidar temp (default 40) + - nb_parasol_refl (default 5) + - nb isccp tau (default 7) + - nb isccp pc (default 5) + - nb curtains sites (default 1000) + :param dict or list sizes: dict containing the sizes as a list or dict + :return dict: dictionary containing sizes as a dict + """ + logger = get_logger() + rep = dict(nho=None, nlo=None, nha=None, nla=None, nlas=None, nls=None, nh1=None, + nb_cosp_sites=129, nb_lidar_temp=40, nb_parasol_refl=5, nb_isccp_tau=7, nb_isccp_pc=7, + nb_curtain_sites=1000) + if isinstance(sizes, (list, tuple)) and len(sizes) == 1 and isinstance(sizes[0], (dict, list, tuple)): + sizes = sizes[0] + if isinstance(sizes, (list, tuple)): + mcfg = dict() + for (key, val) in zip(['nho', 'nlo', 'nha', 'nla', 'nlas', 'nls', 'nh1'], sizes): + mcfg[key] = val + rep.update(mcfg) + elif isinstance(sizes, dict): + rep.update(sizes) + else: + logger.error("Unable to transform sizes to get relevant information.") + raise ValueError("Unable to transform sizes to get relevant information.") + issues_values = [elt for elt in rep if rep[elt] is None] + if len(issues_values) > 0: + logger.error(f"The values provided by sizes must not be None, issues with {issues_values}.") + raise ValueError(f"The values provided by sizes must not be None, issues with {issues_values}.") + return rep + + +def sort_mips(*mips): + if isinstance(mips, (list, tuple)) and len(mips) == 1 and isinstance(mips[0], (dict, set, list)): + mips = mips[0] + elif len(mips) == 0: + mips = list() + rep = set() + if isinstance(mips, dict): + for grid in mips: + rep = rep | mips[grid] + else: + rep = mips + return sorted(list(rep)) + + internal_values = dict( xios_version=ParameterSettings( key="xios_version", @@ -79,6 +140,383 @@ fatal=True, help="Context associated with the xml file produced." ), + select=ParameterSettings( + key="select", + default_values=[ + ValueSettings(key_type="dict", keys="select") + ], + authorized_values=["on_expt_and_year", "on_expt", "no"], + fatal=True, + help="Selection strategy for variables." + ), + select_on_expt=ParameterSettings( + key="select_on_expt", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select"), + check_to_do="eq", + reference_values=["on_expt_and_year", "on_expt"]), + value=True + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select"), + check_to_do="eq", + reference_values=["no", ]), + value=False + ) + ], + fatal=True, + help="Should data be selected on experiment?" + ), + select_on_year=ParameterSettings( + key="select_on_year", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select"), + check_to_do="eq", + reference_values=["on_expt_and_year", ]), + value=ValueSettings(key_type="internal", keys="year") + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select"), + check_to_do="eq", + reference_values=["no", "on_expt"]), + value=None + ) + ], + fatal=True, + help="Should data be selected on year?" + ), + select_grid_choice=ParameterSettings( + key="select_grid_choice", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=True), + value=ValueSettings(key_type="internal", keys="grid_choice") + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=False), + value="LR" + ) + ], + fatal=True, + help="Grid choice for variable selection." + ), + select_sizes=ParameterSettings( + key="select_sizes", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=True), + value=ValueSettings(key_type="internal", keys="sizes") + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=False), + value=None + ) + ], + fatal=True, + help="Sizes for variable selection." + ), + select_max_priority=ParameterSettings( + key="select_max_priority", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=True), + value=ValueSettings(key_type="internal", keys="max_priority") + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=False), + value=ValueSettings(key_type="internal", keys="max_priority_lset") + ) + ], + fatal=True, + help="Max priority for variable selection." + ), + select_tierMax=ParameterSettings( + key="select_tierMax", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=True), + value=ValueSettings(key_type="internal", keys="tierMax") + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=False), + value=ValueSettings(key_type="internal", keys="tierMax_lset") + ) + ], + fatal=True, + help="tierMax for variable selection." + ), + select_included_vars=ParameterSettings( + key="select_included_vars", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=True), + value=ValueSettings(key_type="internal", keys="included_vars") + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=False), + value=ValueSettings(key_type="internal", keys="included_vars_lset") + ) + ], + fatal=True, + help="Included variables for variable selection." + ), + select_included_tables=ParameterSettings( + key="select_included_tables", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=True), + value=ValueSettings(key_type="internal", keys="included_tables") + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=False), + value=ValueSettings(key_type="internal", keys="included_tables_lset") + ) + ], + fatal=True, + help="Included tables for variable selection." + ), + select_included_opportunities=ParameterSettings( + key="select_included_opportunities", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=True), + value=ValueSettings(key_type="internal", keys="included_opportunities") + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=False), + value=ValueSettings(key_type="internal", keys="included_opportunities_lset") + ) + ], + fatal=True, + help="Included opportunities for variable selection." + ), + select_included_vargroups=ParameterSettings( + key="select_included_vargroups", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=True), + value=ValueSettings(key_type="internal", keys="included_vargroups") + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=False), + value=ValueSettings(key_type="internal", keys="included_vargroups_lset") + ) + ], + fatal=True, + help="Included variables groups for variable selection." + ), + select_included_request_links=ParameterSettings( + key="select_included_request_links", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=True), + value=ValueSettings(key_type="internal", keys="included_request_links") + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=False), + value=None + ) + ], + fatal=True, + help="Included request links for variable selection." + ), + select_excluded_request_links=ParameterSettings( + key="select_excluded_request_links", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=True), + value=ValueSettings(key_type="internal", keys="excluded_request_links") + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=False), + value=None + ) + ], + fatal=True, + help="Excluded request links for variable selection." + ), + select_excluded_vars=ParameterSettings( + key="select_excluded_vars", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=True), + value=ValueSettings(key_type="merge", + keys=[ + ValueSettings(key_type="internal", keys="excluded_vars_lset"), + ValueSettings(key_type="internal", keys="excluded_vars_sset"), + ValueSettings(key_type="internal", keys="excluded_vars_per_config") + ]) + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=False), + value=ValueSettings(key_type="internal", keys="excluded_vars_lset") + ) + ], + fatal=True, + help="Excluded variables for variable selection." + ), + select_excluded_tables=ParameterSettings( + key="select_excluded_tables", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=True), + value=ValueSettings(key_type="merge", + keys=[ + ValueSettings(key_type="internal", keys="excluded_tables_lset"), + ValueSettings(key_type="internal", keys="excluded_tables_sset") + ]) + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=False), + value=ValueSettings(key_type="internal", keys="excluded_tables_lset") + ) + ], + fatal=True, + help="Excluded tables for variable selection." + ), + select_excluded_opportunities=ParameterSettings( + key="select_excluded_opportunities", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=True), + value=ValueSettings(key_type="merge", + keys=[ + ValueSettings(key_type="internal", keys="excluded_opportunities_lset"), + ValueSettings(key_type="internal", keys="excluded_opportunities_sset") + ]) + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=False), + value=ValueSettings(key_type="internal", keys="excluded_opportunities_lset") + ) + ], + fatal=True, + help="Excluded opportunities for variable selection." + ), + select_excluded_vargroups=ParameterSettings( + key="select_excluded_vargroups", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=True), + value=ValueSettings(key_type="merge", + keys=[ + ValueSettings(key_type="internal", keys="excluded_vargroups_lset"), + ValueSettings(key_type="internal", keys="excluded_vargroups_sset") + ]) + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=False), + value=ValueSettings(key_type="internal", keys="excluded_vargroups_lset") + ) + ], + fatal=True, + help="Excluded variables groups for variable selection." + ), + select_excluded_pairs=ParameterSettings( + key="select_excluded_pairs", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=True), + value=ValueSettings(key_type="merge", + keys=[ + ValueSettings(key_type="internal", keys="excluded_pairs_lset"), + ValueSettings(key_type="internal", keys="excluded_pairs_sset") + ]) + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=False), + value=ValueSettings(key_type="internal", keys="excluded_pairs_lset") + ) + ], + fatal=True, + help="Excluded pairs for variable selection." + ), + select_mips=ParameterSettings( + key="select_mips", + cases=[ + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=True), + value=ValueSettings(key_type="internal", + keys=[ + "mips", + ValueSettings(key_type="internal", keys="select_grid_choice") + ], + func=sort_mips) + ), + CaseSettings( + conditions=ConditionSettings(check_value=ValueSettings(key_type="internal", keys="select_on_expt"), + check_to_do="eq", + reference_values=False), + value=ValueSettings(key_type="internal", keys="mips", func=sort_mips) + ) + ], + fatal=True, + help="MIPs for variable selection." + ), path_to_parse=ParameterSettings( key="path_to_parse", default_values=[ @@ -238,7 +676,8 @@ keys=[ "sizes", ValueSettings(key_type="internal", keys="grid_choice") - ] + ], + func=FunctionSettings(func=format_sizes) ) ], fatal=True, @@ -260,6 +699,7 @@ ValueSettings(key_type="laboratory", keys="excluded_request_links"), list() ], + target_type="list", help="List of links un data request that should not been followed (those request are not taken into account)." ), included_request_links=ParameterSettings( @@ -268,6 +708,7 @@ ValueSettings(key_type="laboratory", keys="included_request_links"), list() ], + target_type="list", help="List of the request links that will be processed (all others will not)." ), excluded_tables_lset=ParameterSettings( @@ -276,6 +717,7 @@ ValueSettings(key_type="laboratory", keys="excluded_tables"), list() ], + target_type="list", help="List of the tables that will be excluded from outputs from laboratory settings." ), excluded_tables_sset=ParameterSettings( @@ -284,6 +726,7 @@ ValueSettings(key_type="simulation", keys="excluded_tables"), list() ], + target_type="list", help="List of the tables that will be excluded from outputs from simulation settings." ), excluded_spshapes_lset=ParameterSettings( @@ -292,6 +735,7 @@ ValueSettings(key_type="laboratory", keys="excluded_spshapes"), list() ], + target_type="list", help="The list of shapes that should be excluded (all variables in those shapes will be excluded from outputs)." ), excluded_vars_lset=ParameterSettings( @@ -300,6 +744,7 @@ ValueSettings(key_type="laboratory", keys="excluded_vars"), list() ], + target_type="list", help="List of CMOR variables to exclude from the result based on previous Data Request extraction from " "laboratory settings." ), @@ -309,6 +754,7 @@ ValueSettings(key_type="simulation", keys="excluded_vars"), list() ], + target_type="list", help="List of CMOR variables to exclude from the result based on previous Data Request extraction from " "simulation settings." ), @@ -318,6 +764,7 @@ ValueSettings(key_type="laboratory", keys="excluded_pairs"), list() ], + target_type="list", help="You can exclude some (variable, table) pairs from outputs. " "A list of tuple (variable, table) to be excluded from laboratory settings." ), @@ -327,6 +774,7 @@ ValueSettings(key_type="simulation", keys="excluded_pairs"), list() ], + target_type="list", help="You can exclude some (variable, table) pairs from outputs. " "A list of tuple (variable, table) to be excluded from simulation settings." ), @@ -336,6 +784,7 @@ ValueSettings(key_type="laboratory", keys="included_tables"), list() ], + target_type="list", help="List of tables that will be processed (all others will not) from laboratory settings." ), included_tables=ParameterSettings( @@ -344,6 +793,7 @@ ValueSettings(key_type="simulation", keys="included_tables"), ValueSettings(key_type="internal", keys="included_tables_lset") ], + target_type="list", help="List of tables that will be processed (all others will not)." ), included_vars_lset=ParameterSettings( @@ -352,6 +802,7 @@ ValueSettings(key_type="laboratory", keys="included_vars"), list() ], + target_type="list", help="Variables to be considered from the Data Request (all others will not) from laboratory settings." ), included_vars=ParameterSettings( @@ -360,6 +811,7 @@ ValueSettings(key_type="simulation", keys="included_vars"), ValueSettings(key_type="internal", keys="included_vars_lset") ], + target_type="list", help="Variables to be considered from the Data Request (all others will not)" ), excluded_vars_per_config=ParameterSettings( @@ -377,6 +829,78 @@ help="A dictionary which keys are configurations and values the list of variables that must be excluded for " "each configuration." ), + included_opportunities_lset=ParameterSettings( + key="included_opportunities_lset", + default_values=[ + ValueSettings(key_type="laboratory", keys="included_opportunities"), + list() + ], + target_type="list", + help="List of opportunities that will be processed (all others will not) from laboratory settings." + ), + included_opportunities=ParameterSettings( + key="included_opportunities", + default_values=[ + ValueSettings(key_type="simulation", keys="included_opportunities"), + ValueSettings(key_type="internal", keys="included_opportunities_lset") + ], + target_type="list", + help="List of opportunities that will be processed (all others will not)." + ), + included_vargroups_lset=ParameterSettings( + key="included_vargroups_lset", + default_values=[ + ValueSettings(key_type="laboratory", keys="included_vargroups"), + list() + ], + target_type="list", + help="List of variables groups that will be processed (all others will not) from laboratory settings." + ), + included_vargroups=ParameterSettings( + key="included_vargroups", + default_values=[ + ValueSettings(key_type="simulation", keys="included_vargroups"), + ValueSettings(key_type="internal", keys="included_vargroups_lset") + ], + target_type="list", + help="List of variables groups that will be processed (all others will not)." + ), + excluded_opportunities_lset=ParameterSettings( + key="excluded_opportunities_lset", + default_values=[ + ValueSettings(key_type="laboratory", keys="excluded_opportunities"), + list() + ], + target_type="list", + help="List of the opportunities that will be excluded from outputs from laboratory settings." + ), + excluded_opportunities_sset=ParameterSettings( + key="excluded_opportunities_sset", + default_values=[ + ValueSettings(key_type="simulation", keys="excluded_opportunities"), + list() + ], + target_type="list", + help="List of the opportunities that will be excluded from outputs from simulation settings." + ), + excluded_vargroups_lset=ParameterSettings( + key="excluded_vargroups_lset", + default_values=[ + ValueSettings(key_type="laboratory", keys="excluded_vargroups"), + list() + ], + target_type="list", + help="List of the variables groups that will be excluded from outputs from laboratory settings." + ), + excluded_vargroups_sset=ParameterSettings( + key="excluded_vargroups_sset", + default_values=[ + ValueSettings(key_type="simulation", keys="excluded_vargroups"), + list() + ], + target_type="list", + help="List of the variables groups that will be excluded from outputs from simulation settings." + ), experiment_id=ParameterSettings( key="experiment_id", default_values=[ @@ -802,7 +1326,7 @@ ValueSettings(key_type="laboratory", keys="data_request_used"), "CMIP6" ], - help="Version of the data request used." + help="The Data Request infrastructure type which should be used." ), data_request_path=ParameterSettings( key="data_request_path", @@ -810,7 +1334,23 @@ ValueSettings(key_type="laboratory", keys="data_request_path"), None ], - help="Path where the data request used is placed." + help="Path where the data request API used is placed." + ), + data_request_content_version=ParameterSettings( + key="data_request_content_version", + default_values=[ + ValueSettings(key_type="laboratory", keys="data_request_content_version"), + "latest_stable" + ], + help="Version of the data request content to be used" + ), + data_request_config=ParameterSettings( + key="data_request_config", + default_values=[ + ValueSettings(key_type="laboratory", keys="data_request_config"), + os.sep.join([os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "dr_interface", "CMIP7_config"]) + ], + help="Configuration file of the data request content to be used" ), laboratory_used=ParameterSettings( key="laboratory_used", @@ -880,6 +1420,14 @@ "Sometimes it is appropriate to list two (or more) model types here, among AER, AGCM, AOGCM, BGC, CHEM, " "ISM, LAND, OGCM, RAD, SLAB e.g. amip , run with CNRM-CM6-1, should quote \"AGCM AER\". " "Also see note 14 of https://docs.google.com/document/d/1h0r8RZr_f3-8egBMMh7aqLwy3snpD6_MrDz1q8n5XUk/edit" + ), + year=ParameterSettings( + key="year", + default_values=[ + ValueSettings(key_type="dict", keys="year") + ], + fatal=True, + help="Year associated with the launch of dr2xml." ) ) @@ -891,14 +1439,6 @@ ], fatal=True, help="Prefix to be used for each file definition." - ), - year=ParameterSettings( - key="year", - default_values=[ - ValueSettings(key_type="dict", keys="year") - ], - fatal=True, - help="Year associated with the launch of dr2xml." ) ) diff --git a/dr2xml/projects/ping.py b/dr2xml/projects/ping.py index fa81fc0c..a83cefa9 100644 --- a/dr2xml/projects/ping.py +++ b/dr2xml/projects/ping.py @@ -57,9 +57,13 @@ source_type=ParameterSettings( key="source_type", default_values=[None, ] + ), + path_special_defs=ParameterSettings( + key="path_special_defs", + default_values=[ValueSettings(key_type="laboratory", keys="path_special_defs")] ) ) -common_values = list() +common_values = dict() project_settings = dict() \ No newline at end of file diff --git a/dr2xml/projects/projects_interface_definitions.py b/dr2xml/projects/projects_interface_definitions.py index ed1ddfb5..9793cb07 100644 --- a/dr2xml/projects/projects_interface_definitions.py +++ b/dr2xml/projects/projects_interface_definitions.py @@ -15,8 +15,9 @@ from dr2xml.config import get_config_variable from dr2xml.settings_interface.py_settings_interface import format_dict_for_printing, is_key_in_lset, \ get_variable_from_lset_without_default, is_key_in_sset, get_variable_from_sset_without_default -from dr2xml.utils import Dr2xmlError, read_json_content -from logger import get_logger +from dr2xml.utils import Dr2xmlError +from utilities.json_tools import read_json_content +from utilities.logger import get_logger def return_value(value, common_dict=dict(), internal_dict=dict(), additional_dict=dict(), @@ -33,7 +34,7 @@ def return_value(value, common_dict=dict(), internal_dict=dict(), additional_dic def determine_value(key_type=None, keys=list(), func=None, fmt=None, src=None, common_dict=dict(), internal_dict=dict(), additional_dict=dict(), allow_additional_keytypes=True): logger = get_logger() - if key_type in ["combine", ] or (key_type is None and func is not None): + if key_type in ["combine", "merge"] or (key_type is None and func is not None): keys = [return_value(key, common_dict=common_dict, internal_dict=internal_dict, additional_dict=additional_dict, allow_additional_keytypes=allow_additional_keytypes) for key in keys] @@ -51,6 +52,10 @@ def determine_value(key_type=None, keys=list(), func=None, fmt=None, src=None, c if key_type in ["combine", ]: keys = [",".join(key) if isinstance(key, (list, tuple)) else key for key in keys] value = fmt.format(*keys) + elif key_type in ["merge", ]: + value = list() + for key in keys: + value.extend(key) else: if isinstance(func, FunctionSettings): found, value = func(*keys, additional_dict=additional_dict, internal_dict=internal_dict, @@ -59,8 +64,9 @@ def determine_value(key_type=None, keys=list(), func=None, fmt=None, src=None, c try: value = func(*keys) found = True - except: + except BaseException as e: logger.debug("Issue calling func %s with arguments %s" % (str(func), str(keys))) + logger.debug(str(e)) value = None found = False if found and fmt is not None: @@ -128,11 +134,7 @@ def determine_value(key_type=None, keys=list(), func=None, fmt=None, src=None, c else: value = None elif allow_additional_keytypes: - if key_type in ["scope", ] and allow_additional_keytypes: - from dr2xml.dr_interface import get_dr_object - value = get_dr_object("get_scope") - found = True - elif key_type in ["data_request", ] and allow_additional_keytypes: + if key_type in ["data_request", ] and allow_additional_keytypes: from dr2xml.dr_interface import get_dr_object value = get_dr_object("get_data_request") found = True @@ -184,7 +186,8 @@ def determine_value(key_type=None, keys=list(), func=None, fmt=None, src=None, c try: value = func(*value) found = True - except: + except Exception as e: + logger.debug(str(e)) value = None found = False if found and fmt is not None: @@ -308,6 +311,8 @@ def dump_doc(self, force_void=False): tmp_rep += "[%s]" % key_value elif key_type in ["combine", ]: tmp_rep = ", ".join(self.dump_doc_inner(self.keys, format_struct=False)) + elif key_type in ["merge", ]: + tmp_rep = str(self.dump_doc_inner(self.keys, format_struct=False)) elif key_type in ["data_request", ]: tmp_rep = "%s" % key_type keys_values = self.dump_doc_inner(self.keys, format_struct=False) @@ -340,7 +345,8 @@ class ParameterSettings(Settings): def init_dict_default(self): return dict(skip_values=list(), forbidden_patterns=list(), conditions=list(), default_values=list(), cases=list(), authorized_values=list(), authorized_types=list(), corrections=dict(), - output_key=None, num_type="string", is_default=False, fatal=False, key=None, help="TODO") + output_key=None, num_type="string", is_default=False, fatal=False, key=None, help="TODO", + target_type=None) def dump_doc(self, force_void=False): rep = list() @@ -387,6 +393,8 @@ def __init__(self, *args, **kwargs): self.updated.add("is_default") if isinstance(self.authorized_types, list) and len(self.authorized_types) == 1: self.authorized_types = self.authorized_types[0] + if not self.target_type in ["list", "set", "str", None]: + raise ValueError("Target type must have a value among 'str', 'set', 'list', None.") def update(self, other): super(ParameterSettings, self).update(other) @@ -483,7 +491,8 @@ def find_value(self, is_value=False, value=None, internal_dict=dict(), common_di allow_additional_keytypes=allow_additional_keytypes) if test: test, value = self.correct_value(value, internal_values=internal_dict, common_values=common_dict, - additional_dict=dict(), allow_additional_keytypes=True) + additional_dict=dict(), + allow_additional_keytypes=allow_additional_keytypes) if test: relevant, test = self.check_value(value, internal_dict=internal_dict, common_dict=common_dict, additional_dict=additional_dict, @@ -507,10 +516,35 @@ def find_value(self, is_value=False, value=None, internal_dict=dict(), common_di test = test and relevant if not test: i += 1 - if not test and self.fatal and raise_on_error: + if test: + value = self.correct_target_type(value) + elif not test and self.fatal and raise_on_error: raise ValueError("Could not find a proper value for %s" % self.key) return test, value + def correct_target_type(self, value): + target_type = self.target_type + if target_type in ["list", ]: + if isinstance(value, set): + value = list(value) + elif isinstance(value, six.string_types): + value = [value, ] + elif not isinstance(value, list): + raise ValueError(f"Unable to transform {type(value)} into {target_type}.") + elif target_type in ["set", ]: + if isinstance(value, list): + value = set(value) + elif isinstance(value, six.string_types): + value = set([value, ]) + elif not isinstance(value, set): + raise ValueError(f"Unable to transform {type(value)} into {target_type}.") + elif target_type in ["str", ]: + if isinstance(value, (list, set)) and len(value) == 1: + value = self.correct_target_type(value[0]) + elif not isinstance(value, six.string_types): + value = str(value) + return value + class TagSettings(Settings): @@ -602,6 +636,7 @@ def __call__(self, *args, additional_dict=dict(), internal_dict=dict(), common_d value = self.func(*args, **self.options) except BaseException as e: logger.debug("Issue calling %s with arguments %s and options %s" % (str(self.func), str(args), str(self.options))) + logger.debug(str(e)) value = None test = False return test, value diff --git a/dr2xml/settings_interface/__init__.py b/dr2xml/settings_interface/__init__.py index 78d63194..e4fb471d 100644 --- a/dr2xml/settings_interface/__init__.py +++ b/dr2xml/settings_interface/__init__.py @@ -10,7 +10,8 @@ import copy from collections import OrderedDict -from logger import get_logger +from dr2xml.utils import Dr2xmlError +from utilities.logger import get_logger # Internal settings for dr2xml internal_settings = None @@ -55,13 +56,10 @@ def initialize_internal_values(force_reset=False): set_internal_value(key="initial_selection_configuration", value=dict()) if force_reset or get_settings_values("internal_values", "axis_count", default=None, is_default=True) is None: set_internal_value(key="axis_count", value=0) - if force_reset or get_settings_values("internal_values", "global_rls", default=None, is_default=True) is None: - set_internal_value(key="global_rls", value=list()) if force_reset or get_settings_values("internal_values", "cmor_vars", default=None, is_default=True) is None: set_internal_value(key="cmor_vars", value=list()) set_internal_value(key="sn_issues", value=OrderedDict()) set_internal_value(key="print_multiple_grids", value=False) - set_internal_value(key="grid_choice", value=None) def set_internal_value(key, value, action=False): @@ -120,3 +118,39 @@ def get_settings_values(*args, **kwargs): raise ValueError("Could not find a proper value: %s not in %s" % (args[i], settings)) else: return default + + +def get_values_from_internal_settings(*args, **kwargs): + internal_settings = get_settings_values("internal") + merge = kwargs.get("merge", False) + default = kwargs.get("default", list()) + if merge: + rep = list() + for arg in args: + if isinstance(arg, tuple): + is_relevant, key = arg + else: + is_relevant = True + key = arg + if is_relevant and key is not None: + rep.extend(internal_settings.get(key, list())) + elif key is None: + raise Dr2xmlError("Unable to get values from settings with None key") + else: + rep = default + i = 0 + test = False + while not test and i < len(args): + if isinstance(args[i], tuple): + is_relevant, key = args[i] + else: + is_relevant = True + key = args[i] + if is_relevant and key is not None and key in internal_settings: + rep = internal_settings[key] + test = True + elif key is None: + raise Dr2xmlError("Unable to get values from settings with None key") + else: + i += 1 + return rep diff --git a/dr2xml/settings_interface/py_settings_interface.py b/dr2xml/settings_interface/py_settings_interface.py index b6c45918..d446f509 100644 --- a/dr2xml/settings_interface/py_settings_interface.py +++ b/dr2xml/settings_interface/py_settings_interface.py @@ -12,9 +12,10 @@ import six -from dr2xml.utils import Dr2xmlError, decode_if_needed, print_struct +from dr2xml.utils import Dr2xmlError, print_struct -from logger import get_logger +from utilities.logger import get_logger +from utilities.encoding_tools import decode_if_needed # Initial simulation (sset) and laboratory (lset) dictionaries diff --git a/dr2xml/utils.py b/dr2xml/utils.py index d2c455dc..5e2e731d 100644 --- a/dr2xml/utils.py +++ b/dr2xml/utils.py @@ -7,16 +7,12 @@ from __future__ import print_function, division, absolute_import, unicode_literals -import copy -import json -import os -import sys from collections import OrderedDict from functools import reduce import six -from logger import get_logger +from utilities.logger import get_logger class Dr2xmlError(Exception): @@ -28,7 +24,7 @@ def __init__(self, valeur): def __str__(self): logger = get_logger() - logger.error(repr(self.valeur)) + logger.error(repr(self.valeur), ) return "\n\n" + repr(self.valeur) + "\n\n" # """ just for test""" @@ -47,40 +43,6 @@ class VarsError(Dr2xmlError): pass -def encode_if_needed(a_string, encoding="utf-8"): - """ - - :param a_string: - :param encoding: - :return: - """ - logger = get_logger() - if sys.version.startswith("2."): - return a_string.encode(encoding) - elif sys.version.startswith("3."): - return a_string - else: - logger.error("Unknown Python version %s" % sys.version.split()[0]) - raise OSError("Unknown Python version %s" % sys.version.split()[0]) - - -def decode_if_needed(a_string, encoding="utf-8"): - """ - - :param a_string: - :param encoding: - :return: - """ - logger = get_logger() - if sys.version.startswith("2."): - return a_string.decode(encoding) - elif sys.version.startswith("3."): - return a_string - else: - logger.error("Unknown Python version %s" % sys.version.split()[0]) - raise OSError("Unknown Python version %s", sys.version.split()[0]) - - def print_struct(struct, skip_sep=False, sort=False, back_line=False): """ @@ -133,34 +95,6 @@ def reduce_and_strip(elt): return elt -def read_json_content(filename): - logger = get_logger() - if os.path.isfile(filename): - with open(filename) as fp: - content = json.load(fp) - return content - else: - logger.error("Could not find the json file at %s" % filename) - raise OSError("Could not find the json file at %s" % filename) - - -def format_json_before_writing(settings): - if isinstance(settings, (dict, OrderedDict)): - for key in list(settings): - settings[key] = format_json_before_writing(settings[key]) - elif isinstance(settings, (list, tuple)): - for i in range(len(settings)): - settings[i] = format_json_before_writing(settings[i]) - elif isinstance(settings, type): - settings = str(settings) - return settings - - -def write_json_content(filename, settings): - with open(filename, "w") as fp: - json.dump(format_json_before_writing(copy.deepcopy(settings)), fp) - - def is_elt_applicable(elt, attribute=None, included=None, excluded=None): if attribute is not None: if isinstance(elt, tuple): diff --git a/dr2xml/vars_interface/cmor.py b/dr2xml/vars_interface/cmor.py index eabb2daf..e61d0b85 100644 --- a/dr2xml/vars_interface/cmor.py +++ b/dr2xml/vars_interface/cmor.py @@ -8,7 +8,7 @@ from __future__ import print_function, division, absolute_import, unicode_literals from dr2xml.dr_interface import get_dr_object -from logger import get_logger +from utilities.logger import get_logger from dr2xml.settings_interface import get_settings_values from dr2xml.utils import Dr2xmlError from .generic import read_home_var, fill_homevar, check_homevar, get_correspond_cmor_var, \ diff --git a/dr2xml/vars_interface/dev.py b/dr2xml/vars_interface/dev.py index fd97187e..2bd61a49 100644 --- a/dr2xml/vars_interface/dev.py +++ b/dr2xml/vars_interface/dev.py @@ -8,7 +8,7 @@ from __future__ import print_function, division, absolute_import, unicode_literals from dr2xml.dr_interface import get_dr_object -from logger import get_logger +from utilities.logger import get_logger from dr2xml.utils import VarsError from .generic import read_home_var, fill_homevar, check_homevar, tcmName2tcmValue, get_correspond_cmor_var diff --git a/dr2xml/vars_interface/extra.py b/dr2xml/vars_interface/extra.py index d5e2172a..0647d5be 100644 --- a/dr2xml/vars_interface/extra.py +++ b/dr2xml/vars_interface/extra.py @@ -12,9 +12,10 @@ from dr2xml.analyzer import guess_freq_from_table_name from dr2xml.dr_interface import get_dr_object -from logger import get_logger -from dr2xml.settings_interface import get_settings_values -from dr2xml.utils import VarsError, read_json_content +from utilities.logger import get_logger +from dr2xml.settings_interface import get_values_from_internal_settings +from dr2xml.utils import VarsError +from utilities.json_tools import read_json_content from .generic import read_home_var, multi_plev_suffixes, single_plev_suffixes, remove_p_suffix home_attrs = ['type', 'label', 'modeling_realm', 'frequency', 'mipTable', 'temporal_shp', 'spatial_shp', @@ -182,7 +183,7 @@ def read_extra_table(path, table): def check_extra_variable(home_var, hv_info): logger = get_logger() - if home_var.Priority <= get_settings_values("internal", "max_priority"): + if home_var.Priority <= get_values_from_internal_settings("max_priority", "max_priority_lset", merge=False): logger.debug("Info: %s HOMEVar is read in an extra Table with priority %s => Taken into account." % (hv_info, home_var.Priority)) return home_var diff --git a/dr2xml/vars_interface/generic.py b/dr2xml/vars_interface/generic.py index 55155e2a..1e5ca800 100644 --- a/dr2xml/vars_interface/generic.py +++ b/dr2xml/vars_interface/generic.py @@ -7,6 +7,7 @@ from __future__ import print_function, division, absolute_import, unicode_literals +import copy import re from collections import OrderedDict @@ -14,11 +15,11 @@ from dr2xml.analyzer import cellmethod2area from dr2xml.dr_interface import get_dr_object -from logger import get_logger +from utilities.logger import get_logger from dr2xml.settings_interface import get_settings_values from dr2xml.utils import VarsError, Dr2xmlError -tcmName2tcmValue = {"time-mean": "time: mean", "time-point": "time: point", "None": None} +tcmName2tcmValue = {"time-mean": "time: mean", "time-intv": "time: mean", "time-point": "time: point", "None": None} # List of multi and single pressure level suffixes for which we want the union/zoom axis mecanism turned on # For not using union/zoom, set 'use_union_zoom' to False in lab settings @@ -112,7 +113,7 @@ def remove_p_suffix(svar, mlev_sfxs, slev_sfxs, realms): # suppression des terminaisons en "Clim" le cas echant label_out = svar.label.split("Clim")[0] # - svar_realms = set(svar.modeling_realm.split()) + svar_realms = svar.set_modeling_realms valid_realms = set(realms) if svar_realms.intersection(valid_realms): mvl = r.match(label_out) @@ -134,8 +135,9 @@ def get_correspond_cmor_var(homevar): empty_table = (homevar.mipTable in ['NONE', ]) or (homevar.mipTable.startswith("None")) allow_pseudo = get_settings_values("internal", 'allow_pseudo_standard_names') global sn_issues_home + sn_issues_home_local = copy.deepcopy(sn_issues_home) for cmvarid in data_request.get_cmor_var_id_by_label(homevar.ref_var): - cmvar = data_request.get_element_uid(cmvarid, elt_type="variable", sn_issues=sn_issues_home, + cmvar = data_request.get_element_uid(cmvarid, elt_type="variable", sn_issues=sn_issues_home_local, allow_pseudo=allow_pseudo) logger.debug("get_corresp, checking %s vs %s in %s" % (homevar.label, cmvar.label, cmvar.mipTable)) # @@ -147,9 +149,9 @@ def get_correspond_cmor_var(homevar): ("SoilPools" in homevar.label and homevar.frequency in ["mon", ] and cmvar.frequency in ["monPt", ]) match_table = (cmvar.mipTable == homevar.mipTable) - match_realm = (homevar.modeling_realm in cmvar.modeling_realm.split(' ')) or \ - (homevar.modeling_realm == cmvar.modeling_realm) - empty_realm = (cmvar.modeling_realm in ['', ]) + empty_realm = len(cmvar.modeling_realm) == 0 + match_realm = (not(empty_realm) and set(homevar.modeling_realm).issubset(cmvar.set_modeling_realms)) or \ + homevar.modeling_realm == cmvar.modeling_realm matching = (match_label and (match_freq or empty_table) and (match_table or empty_table) and (match_realm or empty_realm)) @@ -170,6 +172,7 @@ def get_correspond_cmor_var(homevar): logger.debug("doesn't match %s %s %s %s %s %s %s %s" % (match_label, match_freq, cmvar.frequency, homevar.frequency, match_table, match_realm, empty_realm, homevar.mipTable)) + sn_issues_home = sn_issues_home_local if count >= 1: # empty table means that the frequency is changed (but the variable exists in another frequency cmor table @@ -199,24 +202,25 @@ def complement_svar_using_cmorvar(svar, cmvar, debug=[]): # Get information form CMORvar svar.set_attributes(prec=cmvar.prec, frequency=cmvar.frequency, mipTable=cmvar.mipTable, - Priority=cmvar.Priority, positive=cmvar.positive, modeling_realm=cmvar.modeling_realm, + Priority=cmvar.Priority, positive=cmvar.positive, label=cmvar.label, spatial_shp=cmvar.spatial_shp, temporal_shp=cmvar.temporal_shp, cmvar=cmvar, long_name=cmvar.long_name, description=cmvar.description, ref_var=cmvar.label, mipVarLabel=cmvar.mipVarLabel, units=cmvar.units, stdname=cmvar.stdname, cm=cmvar.cm, cell_methods=cmvar.cell_methods, cell_measures=cmvar.cell_measures, sdims=cmvar.sdims, other_dims_size=cmvar.other_dims_size, mip_era=cmvar.mip_era, - flag_meanings=cmvar.flag_meanings, flag_values=cmvar.flag_values) + flag_meanings=cmvar.flag_meanings, flag_values=cmvar.flag_values, + modeling_realm=cmvar.modeling_realm, set_modeling_realms=cmvar.set_modeling_realms) area = cellmethod2area(svar.cell_methods) if svar.label in debug: logger.debug("complement_svar ... processing %s, area=%s" % (svar.label, str(area))) if area: - ambiguous = any([svar.label == alabel and svar.modeling_realm == arealm + ambiguous = any([svar.label == alabel and arealm in svar.modeling_realm for (alabel, (arealm, lmethod)) in ambiguous_mipvarnames]) if svar.label in debug: logger.debug("complement_svar ... processing %s, ambiguous=%s" % (svar.label, repr(ambiguous))) if ambiguous: # Special case for a set of land variables - if not (svar.modeling_realm == 'land' and svar.label[0] == 'c'): + if not ('land' in svar.modeling_realm and svar.label[0] == 'c'): svar.label_non_ambiguous = svar.label + "_" + area if svar.label in debug: logger.debug("complement_svar ... processing %s, label_non_ambiguous=%s" % @@ -239,19 +243,8 @@ def analyze_ambiguous_mip_varnames(debug=[]): # Compute a dict which keys are MIP varnames and values = list # of CMORvars items for the varname logger = get_logger() - d = OrderedDict() data_request = get_dr_object("get_data_request") - for v in data_request.get_list_by_id('var').items: - if v.label not in d: - d[v.label] = [] - if v.label in debug: - logger.debug("Adding %s" % v.label) - refs = data_request.get_request_by_id_by_sect(v.uid, 'CMORvar') - for r in refs: - ref = data_request.get_element_uid(r, elt_type="variable") - d[v.label].append(ref) - if v.label in debug: - logger.debug("Adding CmorVar %s(%s) for %s" % (v.label, ref.mipTable, ref.label)) + d = data_request.get_variables_per_label(debug=debug) # Replace dic values by dic of area portion of cell_methods for vlabel in d: @@ -262,17 +255,16 @@ def analyze_ambiguous_mip_varnames(debug=[]): cm = cv.cell_methods if cm is not None: area = cellmethod2area(cm) - realm = cv.modeling_realm - if area == 'sea' and realm == 'ocean': + if area == 'sea' and 'ocean' in cv.modeling_realm: area = None - # realm="" - if vlabel in debug: - logger.debug("for %s 's CMORvar %s(%s), area=%s" % (vlabel, cv.label, cv.mipTable, area)) - if realm not in d[vlabel]: - d[vlabel][realm] = OrderedDict() - if area not in d[vlabel][realm]: - d[vlabel][realm][area] = [] - d[vlabel][realm][area].append(cv.mipTable) + for realm in cv.modeling_realm: + if vlabel in debug: + logger.debug("for %s 's CMORvar %s(%s), area=%s" % (vlabel, cv.label, cv.mipTable, area)) + if realm not in d[vlabel]: + d[vlabel][realm] = OrderedDict() + if area not in d[vlabel][realm]: + d[vlabel][realm][area] = [] + d[vlabel][realm][area].append(cv.mipTable) if vlabel in debug: print(vlabel, d[vlabel]) else: diff --git a/dr2xml/vars_interface/generic_data_request.py b/dr2xml/vars_interface/generic_data_request.py index 396683f8..b663f33f 100644 --- a/dr2xml/vars_interface/generic_data_request.py +++ b/dr2xml/vars_interface/generic_data_request.py @@ -7,31 +7,21 @@ from __future__ import print_function, division, absolute_import, unicode_literals -from collections import OrderedDict - from dr2xml.dr_interface import get_dr_object -from logger import get_logger -from dr2xml.settings_interface import get_settings_values, set_internal_value +from utilities.logger import get_logger +from dr2xml.settings_interface import get_settings_values, set_internal_value, get_values_from_internal_settings from dr2xml.utils import print_struct, Dr2xmlError, check_objects_equals from .generic import complement_svar_using_cmorvar from dr2xml.laboratories import lab_adhoc_grid_policy -def select_data_request_CMORvars_for_lab(sset=False, year=None): +def select_data_request_CMORvars_for_lab(): """ A function to list CMOR variables relevant for a lab (and also, optionally for an experiment and a year) The variables relative to the laboratory settings are get using the dict_interface module: list of MIPS, max Tier, list of excluded variables names - Args: - sset (boolean): should simulation settings be used - the parameter taken here are: source_type, - max priority (and all for filtering on the simulation) - If sset is False, use union of mips among all grid choices - year (int,optional) : simulation year - used when sset is not None, - to additionally filter on year - Returns: A list of 'simplified CMOR variables' @@ -39,88 +29,26 @@ def select_data_request_CMORvars_for_lab(sset=False, year=None): logger = get_logger() internal_settings = get_settings_values("internal") data_request = get_dr_object("get_data_request") - # Set sizes for lab settings, if available (or use CNRM-CM6-1 defaults) - mip_list_by_grid = internal_settings["mips"] - exctab = internal_settings["excluded_tables_lset"] - if not isinstance(exctab, list): - exctab = [exctab, ] - excvars = internal_settings['excluded_vars_lset'] - if not isinstance(excvars, list): - excvars = [excvars, ] - excpairs = internal_settings['excluded_pairs_lset'] - if not isinstance(excpairs, list): - excpairs = [excpairs, ] - if sset: - tierMax = internal_settings['tierMax'] - grid_choice = internal_settings["grid_choice"] - mips_list = set(mip_list_by_grid[grid_choice]) - sizes = internal_settings["sizes"] - inclinks = internal_settings["included_request_links"] - excluded_links = internal_settings["excluded_request_links"] + mips_list = internal_settings["select_mips"] + if internal_settings["select_on_expt"]: experiment_id = internal_settings["experiment_for_requests"] experiment_filter = dict(experiment_id=experiment_id, - year=year, + year=internal_settings["select_on_year"], filter_on_realization=internal_settings["filter_on_realization"], realization_index=internal_settings["realization_index"], branching=internal_settings["branching"], branch_year_in_child=internal_settings["branch_year_in_child"], endyear=internal_settings["end_year"]) - pmax = internal_settings['max_priority'] - inctab = internal_settings["included_tables"] - if not isinstance(inctab, list): - inctab = [inctab, ] - exctab.extend(internal_settings["excluded_tables_sset"]) - incvars = internal_settings["included_vars"] - if not isinstance(incvars, list): - incvars = [incvars, ] - excvars_sset = internal_settings['excluded_vars_sset'] - if not isinstance(excvars_sset, list): - excvars_sset = [excvars_sset, ] - excvars.extend(excvars_sset) - excvars_config = internal_settings['excluded_vars_per_config'] - if not isinstance(excvars_config, list): - excvars_config = [excvars_config, ] - excvars.extend(excvars_config) - excpairs_sset = internal_settings['excluded_pairs_sset'] - if not isinstance(excpairs_sset, list): - excpairs_sset = [excpairs_sset, ] - excpairs.extend(excpairs_sset) else: - tierMax = internal_settings['tierMax_lset'] - if isinstance(mip_list_by_grid, (dict, OrderedDict)): - mips_list = set().union(*[set(mip_list_by_grid[grid]) for grid in mip_list_by_grid]) - else: - mips_list = mip_list_by_grid - grid_choice = "LR" - sizes = None - inclinks = None - excluded_links = None experiment_filter = False - pmax = internal_settings['max_priority_lset'] - inctab = internal_settings["included_tables_lset"] - if not isinstance(inctab, list): - inctab = [inctab, ] - incvars = internal_settings['included_vars_lset'] - if not isinstance(incvars, list): - incvars = [incvars, ] - mips_list = sorted(list(mips_list)) - - set_internal_value("grid_choice", grid_choice) last_filter_options = get_settings_values("internal_values", "initial_selection_configuration") - filter_options = dict(tierMax=tierMax, mips_list=mips_list, included_request_links=inclinks, - excluded_request_links=excluded_links, max_priority=pmax, included_vars=incvars, - excluded_vars=excvars, included_tables=inctab, excluded_tables=exctab, - excluded_pairs=excpairs, experiment_filter=experiment_filter, sizes=sizes) + filter_options = {key: internal_settings[key] for key in list(internal_settings) if key.startswith("select")} + filter_options["experiment_filter"] = experiment_filter if check_objects_equals(filter_options, last_filter_options): d = get_settings_values("internal_values", "cmor_vars") else: - d, rls = data_request.get_cmorvars_list(tierMax=tierMax, mips_list=mips_list, included_request_links=inclinks, - excluded_request_links=excluded_links, max_priority=pmax, - included_vars=incvars, excluded_vars=excvars, included_tables=inctab, - excluded_tables=exctab, excluded_pairs=excpairs, - experiment_filter=experiment_filter, sizes=sizes) - set_internal_value("global_rls", rls) + d = data_request.get_cmorvars_list(**filter_options) set_internal_value("cmor_vars", d) set_internal_value("initial_selection_configuration", filter_options, action="update") logger.info('Number of distinct CMOR variables (whatever the grid): %d' % len(d)) @@ -172,8 +100,7 @@ def endyear_for_CMORvar(cv, expt, year): data_request = get_dr_object("get_data_request") # Some debug material - larger = data_request.get_endyear_for_cmorvar(cmorvar=cv, experiment=expt, year=year, internal_dict=internal_dict, - global_rls=get_settings_values("internal_values", "global_rls")) + larger = data_request.get_endyear_for_cmorvar(cmorvar=cv, experiment=expt, year=year, internal_dict=internal_dict) return larger diff --git a/dr2xml/vars_interface/home_data_request.py b/dr2xml/vars_interface/home_data_request.py index a63d1ae4..db2d60bd 100644 --- a/dr2xml/vars_interface/home_data_request.py +++ b/dr2xml/vars_interface/home_data_request.py @@ -11,7 +11,7 @@ from collections import defaultdict from dr2xml.config import get_config_variable, set_config_variable -from logger import get_logger +from utilities.logger import get_logger from dr2xml.settings_interface import get_settings_values from dr2xml.utils import VarsError from .cmor import read_home_var_cmor, check_cmor_variable @@ -104,10 +104,12 @@ def process_home_vars(mip_vars_list, mips, expid="False"): path_extra_tables = internal_dict['path_extra_tables'] logger.info("homevars file: %s" % homevars) home_vars_list = read_home_vars_list(homevars, expid, mips, path_extra_tables) - logger.info("homevars list: %s" % " ".join([sv.label for sv in home_vars_list])) + msg = "homevars list: %s" % " ".join([sv.label for sv in home_vars_list]) + logger.info(msg.strip(" ")) # for hv in home_vars_list: - hv_info = {"varname": hv.label, "realm": hv.modeling_realm, "freq": hv.frequency, "table": hv.mipTable} + hv_info = {"varname": hv.label, "realm": ",".join(hv.modeling_realm), "freq": hv.frequency, + "table": hv.mipTable} logger.debug(hv_info) if hv.type in ["cmor", ]: new_hv = check_cmor_variable(hv, mip_vars_list, hv_info) diff --git a/dr2xml/vars_interface/perso.py b/dr2xml/vars_interface/perso.py index 238fdb05..d1cf30a6 100644 --- a/dr2xml/vars_interface/perso.py +++ b/dr2xml/vars_interface/perso.py @@ -8,7 +8,7 @@ from __future__ import print_function, division, absolute_import, unicode_literals from dr2xml.dr_interface import get_dr_object -from logger import get_logger +from utilities.logger import get_logger from dr2xml.utils import VarsError from .generic import read_home_var, fill_homevar, check_homevar, tcmName2tcmValue, get_correspond_cmor_var diff --git a/dr2xml/vars_interface/selection.py b/dr2xml/vars_interface/selection.py index 20a888d8..152c21bf 100644 --- a/dr2xml/vars_interface/selection.py +++ b/dr2xml/vars_interface/selection.py @@ -9,7 +9,7 @@ from collections import defaultdict -from logger import get_logger +from utilities.logger import get_logger from dr2xml.settings_interface import get_settings_values from dr2xml.utils import print_struct, Dr2xmlError from .generic_data_request import select_data_request_CMORvars_for_lab @@ -38,32 +38,37 @@ def check_exclusion(var, *exclusions): return tests, reasons -def select_variables_to_be_processed(year, context, select): +def select_variables_to_be_processed(): """ Return the list of variables to be processed. """ internal_dict = get_settings_values("internal") + context = internal_dict["context"] logger = get_logger() # # -------------------------------------------------------------------- # Extract CMOR variables for the experiment and year and lab settings # -------------------------------------------------------------------- - mip_vars_list = gather_AllSimpleVars(year, select) + mip_vars_list = gather_AllSimpleVars() # Group vars per realm svars_per_realm = defaultdict(list) for svar in mip_vars_list: - realm = svar.modeling_realm - if svar not in svars_per_realm[realm]: - add = not any([test_variables_similar(svar, ovar) for ovar in svars_per_realm[realm]]) - # Settings may allow for duplicate var in two tables. - # In DR01.00.21, this actually applies to very few fields (ps-Aermon, tas-ImonAnt, areacellg) - if internal_dict['allow_duplicates'] or add: - svars_per_realm[realm].append(svar) + for realm in svar.modeling_realm: + if svar not in svars_per_realm[realm]: + add = not any([test_variables_similar(svar, ovar) for ovar in svars_per_realm[realm]]) + # Settings may allow for duplicate var in two tables. + # In DR01.00.21, this actually applies to very few fields (ps-Aermon, tas-ImonAnt, areacellg) + if internal_dict['allow_duplicates'] or add: + svars_per_realm[realm].append(svar) + else: + logger.warning("Not adding duplicate %s (from %s) for realm %s" % (svar.label, svar.mipTable, realm)) else: - logger.warning("Not adding duplicate %s (from %s) for realm %s" % (svar.label, svar.mipTable, realm)) - else: - logger.warning("Duplicate svar %s %s" % (svar.label, svar.grid)) - logger.info("\nRealms for these CMORvars: %s" % " ".join(sorted(list(svars_per_realm)))) + logger.warning("Duplicate svar %s %s" % (svar.label, svar.grids)) + list_svars_per_realms = set(list(svars_per_realm)) + if None in list_svars_per_realms: + list_svars_per_realms = list_svars_per_realms & {""} - {None} + list_svars_per_realms = sorted(list(list_svars_per_realms)) + logger.info(f"\nRealms for these CMORvars: {' '.join(list_svars_per_realms)}".strip(" ")) # # -------------------------------------------------------------------- # Select on context realms, grouping by table @@ -124,28 +129,18 @@ def select_variables_to_be_processed(year, context, select): return svars_per_table -def gather_AllSimpleVars(year=False, select="on_expt_and_year"): +def gather_AllSimpleVars(): """ List of mip variables asked - :param year: year when the variables are created - :param select: selection criteria :return: list of mip variables """ logger = get_logger() internal_dict = get_settings_values("internal") - if select in ["on_expt_and_year", ""]: - mip_vars_list = select_data_request_CMORvars_for_lab(True, year) - elif select in ["on_expt", ]: - mip_vars_list = select_data_request_CMORvars_for_lab(True, None) - elif select in ["no", ]: - mip_vars_list = select_data_request_CMORvars_for_lab(False, None) - else: - logger.error("Choice %s is not allowed for arg 'select'" % select) - raise Dr2xmlError("Choice %s is not allowed for arg 'select'" % select) + mip_vars_list = select_data_request_CMORvars_for_lab() # if internal_dict['listof_home_vars']: exp = internal_dict['experiment_for_requests'] - mip_vars_list = process_home_vars(mip_vars_list, internal_dict["mips"][get_settings_values("internal_values", "grid_choice")], expid=exp) + mip_vars_list = process_home_vars(mip_vars_list, internal_dict["select_mips"], expid=exp) else: logger.info("Info: No HOMEvars list provided.") return mip_vars_list diff --git a/dr2xml/xml_interface.py b/dr2xml/xml_interface.py index af202979..082b0e9e 100644 --- a/dr2xml/xml_interface.py +++ b/dr2xml/xml_interface.py @@ -13,7 +13,8 @@ import xml_writer from .settings_interface import get_settings_values -from .utils import reduce_and_strip, decode_if_needed +from .utils import reduce_and_strip +from utilities.encoding_tools import decode_if_needed class DR2XMLComment(xml_writer.Comment): diff --git a/examples/graphviz/Graphviz.py b/examples/graphviz/Graphviz.py deleted file mode 100755 index e0d87b1b..00000000 --- a/examples/graphviz/Graphviz.py +++ /dev/null @@ -1,422 +0,0 @@ -# -*- coding: ISO-8859-15 -*- -''' -version = "0.0." -date = "2007.01.15" -environnement = "Windows XP" -graphic = "�cran 17 - 1152x854" -ide = "Python Scripter V 1.7.2.6 (http://www.mmm-experts.com/)" -author = "lespinx (http://www.pythonfrance.com/)" -''' - -import pyclbr, tokenize, os, sys -from operator import itemgetter - -class Graphviz_dot: - ''' - o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o - But Gen�rer le fichier parametre en entr�e de Graphviz - Param�tres aucun - Appel�e par : - o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o - ''' - def __init__(self): - self.lst_ligne_code = [] #Liste du code �pur� - #[0]Nom_class - #[1]Nom_def (appelante) - #[3]Code - - self.lst_class = [] #Liste des noms de classes - #[0]Nom_class - - self.lst_def = [] #Liste des noms de "def" - #[0]Nom_def - - self.dic_class_def = {("", "n000", "
") : []} - #-Key : Liste [0]Classe - # [1]Node - # [2]Def - #-Data: Liste contenant les "node" des - # fonctions appel�es - - self.dic_node = {"
" : "n000"} - #-Key : Classe + Def - #-Data: Node - - self.node = 0 #Compteur pour incr�ment num�ro de node - self.lst_dot = [] #Liste des parametres pour Graphviz - self.lst_anomalies = [] #Liste des anomalies - - def add_dic(self, nom_class, nom_def): - ''' - o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o - But Controle d'unicit� avant ajout dans le dictionnaire - Param�tres - Appel�e par : - o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o - ''' - if not self.dic_node.has_key(nom_class + nom_def): - self.node += 1 - node = "n" + str(self.node).rjust(3,"0") - self.dic_class_def[nom_class, node, nom_def] = [] - self.dic_node[nom_class + nom_def] = node - - #Stockage des noms de def - if nom_def not in self.lst_def and nom_def != "": - self.lst_def.append(nom_def) - - def anomalies(self, parm, pos, nom_class, nom_def, - lst_token = "", tokval = ""): - ''' - o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o - But Mise en forme des anomalies - Param�tres - Appel�e par : - o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o - ''' - mot1 = str(parm) + "-ligne=" + str(pos[0]).ljust(5) + "col=" + \ - str(pos[1] + 1).ljust(3) - if parm == 1: - l1 = mot1 + "class=" + nom_class.ljust(20) + " def=" + \ - nom_def.ljust(20) - self.lst_anomalies.append(l1) - - if parm == 2: - l1 = mot1 + "class=" + nom_class.ljust(20)+ " def=" \ - + nom_def.ljust(20) - - if len(lst_token) > 0: - l2 = ">>"+ (lst_token[-1] + "." + tokval + "<<").ljust(26) - else: l2 = (">>" + tokval + "<<").ljust(28) - - self.lst_anomalies.append(l1 + l2) - - def pyclbr_class_def(self, fic): - ''' - o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o - But Rechercher les noms de classes, methodes et fonctions - dans le module Python. - Param�tres Nom du module � analyser(sans extension) - Appel�e par : - o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o - ''' - #R�cup�ration via "pyclbr" des noms de classes, m�thodes et fonctions - dict = pyclbr.readmodule_ex(fic, [os.getcwd()]) - - objs = dict.values() - objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0), - getattr(b, 'lineno', 0))) - for obj in objs: - - #S�lection des objets du module(exclusion des import) - if obj.module == fic: - if isinstance(obj, pyclbr.Class): - - #Stockage des noms de classe - if obj.name not in self.lst_class: - self.lst_class.append(obj.name) - - #Extraction et tri des m�thodes de classe - methods = sorted(obj.methods.iteritems(), key=itemgetter(1)) - - #Si la classe n'a pas de m�thode - if len(methods) == 0: self.add_dic(obj.name, "") - else: - #Stockage des m�thodes - for name, _ in methods: - if name != "__path__": - self.add_dic(obj.name, name) - - #Stockage des fonctions - elif isinstance(obj, pyclbr.Function): - self.add_dic("", obj.name) - - def tokenize_class_def(self, mon_module): - ''' - o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o - But Epurer, stucturer et analyser le module source Python - r�sultat dans fichier: <"mon_module"_ligne_code.txt> - Param�tres Nom du module Python � analyser(avec extension) - Appel�e par : - o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o - ''' - #Dictionnaire des mots cl�s utilis�s par "tokenize" - for cle in tokenize.tok_name: - if tokenize.tok_name[cle] == "NAME": NAME = cle - if tokenize.tok_name[cle] == "NEWLINE": NEWLINE = cle - if tokenize.tok_name[cle] == "ENDMARKER": ENDMARKER = cle - - nom_class = "" - nom_def = "
" - flag_class = False #Switch pour lecture ligne suivant "class" - flag_def = False #Switch pour lecture ligne suivant "def" - pos_rupt = 0 #Gestion des fin de class, def ou
- lst_token = [] #Liste des instructions significatives - c_bind = "" #Si appel par bind => [style=dotted] - dic_instance = {} #Stockage des instance de classes - - fic_source = open(mon_module, "r") - ligne_code = tokenize.generate_tokens(fic_source.readline) - - for tokcle, tokval, tokpos, _, tokcode in ligne_code: - - if tokcle == NAME: - if tokval == "class": - flag_class = True - nom_def = "" - continue - - if flag_class: - nom_class = tokval - #Ajout des classes qui n'ont pas �t� vues par pyclbr - if nom_class not in self.lst_class: - self.lst_class.append(nom_class) - flag_class = False - continue - - if tokval == "def": - pos_rupt = tokpos[1] - #Si "def" est en col 0, on est plus dans une classe - if pos_rupt == 0: nom_class = "" - flag_def = True - continue - - if flag_def: - nom_def = tokval - #Ajout des fonctions qui n'ont pas �t� vues par pyclbr - if nom_def not in self.lst_def: - self.add_dic(nom_class, nom_def) - flag_def = False - continue - -#-o-o-o-o D�tection rupture class ou def - if tokpos[1] == 0: - nom_class = "" - nom_def = "
" - - if tokpos[1] <= pos_rupt: - if nom_class == "": nom_def = "
" - else: nom_def = "" - -#-o-o-o-o Recherche instance de classe : Si on trouve dans le code un mot - #contenant un nom de classe, "lst_token[-1]" est l'instance de la classe - if tokval in self.lst_class: - if len(lst_token) > 0 and lst_token[-1] != "raise": - dic_instance.setdefault(lst_token[-1], tokval) - -#-o-o-o-o Recherche appel par "bind" - if tokval in ("bind", "bind_all", "bind_class", "bindtags", - "tag_bind"): - c_bind = " [style=dotted]" - -#-o-o-o-o Recherche appel fonction ou m�thode - if tokval in self.lst_def: - #Y-a t-il une instance de classe? - if len(lst_token) > 0: - if lst_token[-1] == "self": nomclass = nom_class - else: nomclass = dic_instance.get(lst_token[-1], "") - else: nomclass = "" - - #R�cup�ration du node de la fonction appelante - if self.dic_node.has_key(nom_class + nom_def): - node1 = self.dic_node.get(nom_class + nom_def) - else: - if self.dic_node.has_key(tokval): - node1 = self.dic_node.get(tokval) - else: self.anomalies(1, tokpos, nom_class, nom_def) - - #R�cup�ration du node de la fonction appel�e - if self.dic_node.has_key(nomclass + tokval): - node2 = self.dic_node.get(nomclass + tokval) - - #Ajout de la fonction appel�e � la fonction appelante - cle = (nom_class, node1, nom_def) - if self.dic_class_def.has_key(cle): - self.dic_class_def[cle].append(" -> " + node2 + - c_bind + " /*" + tokval + "*/") - c_bind = "" - else: self.anomalies(2, tokpos, nom_class, nom_def, lst_token, tokval) - -#-o-o-o-o Stockage du code - lst_token.append(tokval) - continue - -#-o-o-o-o Nouvelle ligne - if tokcle == NEWLINE and len(lst_token) > 0: - self.lst_ligne_code.append([nom_class, nom_def, " ".join(lst_token)]) - lst_token = [] - -#-o-o-o-o Derni�re ligne - if tokcle == ENDMARKER: - self.lst_ligne_code.append([nom_class, nom_def, " ".join(lst_token)]) - - fic_source.close() - - def genere_DOT(self, mon_module): - ''' - o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o - But Constituer le fichier parametre en entr�e de Graphviz : - -Constitution des blocs "Cluster" - -Constitution des relations appelants -> appel�s - Param�tres Nom du module Python � analyser(avec extension) - Appel�e par : - o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o - ''' - parenthese = "" #Parenthese fermante vide la 1�re fois - cluster = 0 #Incr�ment num�ro de Cluster - rupture = "999" #Test de rupture sur nom de classe - - #Extraction dans une liste des couples key/data - lst_items = self.dic_class_def.items() - lst_items.sort() - - self.lst_dot.append("digraph G {node [shape=box, fontsize=10];\n") - self.lst_dot.append("/*Fonctions appelantes*/\n") - - for item in lst_items: - nom_class = item[0][0] - node = item[0][1] - nom_def = item[0][2] - - if nom_class != rupture: - rupture = nom_class - - #Si on traite la proc�dure principale ==> Cluster Module - if nom_class == "": - self.lst_dot.append("subgraph cluster" + str(cluster) + - '{label="Module ' + mon_module + '";') - #Sinon ==> Cluster Class - else: - self.lst_dot.append(parenthese) - parenthese = " " * 17 + "}" - self.lst_dot.append("subgraph cluster" + str(cluster) + - '{label="Class ' + nom_class + ' ";') - cluster += 1 - - #Si nom_def est vide, la classe n'as pas de m�thodes - if nom_def == "": - self.lst_dot.append(" " * 18 + node + - ' [label = "No def", fontcolor = red2];') - continue - - #Traitement des fonctions appelantes - if nom_def == "
": - label = ' [label = "' + nom_def + '", fontcolor = blue2];' - else: - label = ' [label = "' + nom_def + '"];' - self.lst_dot.append(" " * 18 + node + label) - - #Traitement des fonctions appel�es - self.lst_dot.append(parenthese) - self.lst_dot.append("\n/*Fonctions appelees*/\n") - - for item in lst_items: - node = item[0][1] #Node de la fonction appelante - item[1].sort() #Tri liste des nodes/fonctions appel�es - nb_appels = 1 #Nombre d'appels (occurence d'un meme node) - occurence = "" #Si aucune occurence le champ reste vide - x = 0 #Index de poste - nb_postes = len(item[1]) - - for appel in item[1]: - if (x + 1) == nb_postes: #Dernier poste de la liste? - self.lst_dot.append(" " * 18 + node + appel + occurence + ";") - else: - if item[1][x] == item[1][x + 1]: - nb_appels += 1 - occurence = ' [label=" ' + str(nb_appels) + ' appels",fontsize=10]' - else: - self.lst_dot.append(" " * 18 + node + appel + occurence + ";") - nb_appels = 1 - occurence = "" - x += 1 - - self.lst_dot.append(" " * 17 + "}") - self.lst_dot.append(" " * 17 + "}") - - def ecriture_fichiers(self, fic, chemin_Graphviz): - ''' - o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o - But Ecriture des fichiers r�sultat: - -1- Ecriture du fichier lignes code epur� (optionnel) - -2- Ecriture du fichier debug (optionnel) - -3- Ecriture du fichier parametres pour Graphviz - -4- Ecriture du fichier de commande Windows ".cmd" - (ligne de commande pour ex�cution de Graphviz sous Windows)) - - Param�tres Nom du module Python � analyser(sans extension) - Appel�e par : - o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o - ''' -#-o-o-o-o -1- -## fic_code = open(fic + "_ligne_code.txt", "w") -## for ligne in self.lst_ligne_code: -## fic_code.write(ligne[0].ljust(28) + ligne[1].ljust(28) + ligne[2] + "\n") -## fic_code.close() - -#-o-o-o-o -2- -## fic_debug = open(fic + "_debug.txt", "w") -## -## if len(self.lst_anomalies) > 0: -## fic_debug.write("#-o-o-Liste des anomalies o-o-o-o-o-o-o-o-" + "\n") -## fic_debug.write('erreur 1 = Ne figure pas dans "dic_node"' + "\n") -## fic_debug.write("erreur 2 = les >>crit�res<< n'ont pu �tre d�tect�s." + "\n") -## fic_debug.write("\n") -## else: -## fic_debug.write("Aucune anomalies d�tect�es\n") -## -## for ligne in self.lst_anomalies: -## fic_debug.write(ligne + "\n") -## -## fic_debug.write("\n#-o-o-Liste des classes detect�es o-o-o-o-o-o" + "\n") -## self.lst_class.sort() -## for ligne in self.lst_class: fic_debug.write(ligne + "\n") -## -## fic_debug.write("\n#-o-o-Liste des fonctions detect�es o-o-o-o-o" + "\n") -## self.lst_def.sort() -## for ligne in self.lst_def: fic_debug.write(ligne + "\n") -## -## fic_debug.write("\n#-o-o-Liste du dictionnaire appelant/appel�s " + "\n") -## lst_dic = self.dic_class_def.items() -## lst_dic.sort() -## for cle, data in lst_dic: -## fic_debug.write(("".join(cle)).ljust(32) + " ".join(data) +"\n") -## -## fic_debug.close() - -#-o-o-o-o -3- - fic_dot = open(fic + "_dot.txt", "w") - for ligne in self.lst_dot: - fic_dot.write(ligne + "\n") - fic_dot.close() - -#-o-o-o-o -4- - fic_cmd = open(fic + "_Graphviz.cmd", "w") - #Ecriture param�tres Graphviz - fic_cmd.write(chemin_Graphviz + " " + fic + "_dot.txt -Tpng -o " - + fic + ".png \n") - if sys.platform == "win32": - fic_cmd.write("pause \n") - fic_cmd.write(fic + ".png \n") #Affichage de l'image/graphe - fic_cmd.close() - - if sys.platform == "win32": - os.system(fic + "_Graphviz.cmd") #Execute fichier commande - -if __name__ == "__main__": - try: - mon_module = sys.argv[1] #Appel via un script - except: - mon_module = "dr2xml_total.py" - - chemin_mon_module = "/Users/moine/Codes/MyDevel_Codes/CMIP6_DATA_SUITE/DR2XML/dr2pub_mpmoine/graphviz" - chemin_Graphviz = "/usr/local/bin/dot" - os.chdir(chemin_mon_module) - - C = Graphviz_dot() - #fic = nom du module Python sans extension - fic = mon_module[0:mon_module.find(".")] - #Appel des fonctions - C.pyclbr_class_def(fic) - C.tokenize_class_def(mon_module ) - C.genere_DOT(mon_module) - C.ecriture_fichiers(fic, chemin_Graphviz) diff --git a/examples/graphviz/Lisez_moi.txt b/examples/graphviz/Lisez_moi.txt deleted file mode 100755 index 43cf3ebc..00000000 --- a/examples/graphviz/Lisez_moi.txt +++ /dev/null @@ -1,117 +0,0 @@ -En savoir plus sur Graphivz: -http://graphviz.org/ -http://cyberzoide.developpez.com/graphviz/ -http://fr.wikipedia.org/wiki/Graphviz - -Les tests ayant �t� effectu�s dans l'environnement Windows, tout ce qui suit concerne cet environnement. - - -1) Installation de Graphviz: - - Il n'est pas indispensable d'uliser la proc�dure d'installation "graphviz-2.12.exe", vous pouvez vous contenter de d�compacter - "graphviz-win-2.12.bin.tar.gz" dans le dossier de votre choix (aucune �criture dans la base de registre). - Le module "dot.exe" sera appel� depuis une ligne de commande. - -2) Dans le code source "Graphviz.py", renseigner les chemins d'acc�s et le nom du module � examiner: - - a) Indiquer le nom du module Python � analyser: - - Renseignez la variable "mon_module" - - b) Localiser l'emplacement de votre module Python � analyser - - Renseignez la variable "chemin_mon_module" - - c) Localiser l'emplacement du module "dot.exe" de Graphviz - - Renseignez la variable "chemin_Graphviz" - - -3) Ex�cution: - - a) "Graphviz.py" gen�re le fichier "Mon_module_dot.txt" ==> fichier param�tres pour Graphviz - - b) "Graphviz.py" gen�re le fichier "Mon_module_Graphviz.cmd" ==> fichier de commande pour ex�cuter Graphviz - Si vous �tes dans l'environnement Windows, le fichier de commande est lan�� automatiquement et provoque l'affichage du graphe. - - c) Les fichiers "Mon_module_ligne_code.txt" et Mon_module_debug.txt" sont optionnels. - -4) Informations: - - a) "
" est le mom symbolique de la proc�dure principale: - Il faut un point d'ancrage � Graphviz pour repr�senter un appel de fonction depuis la proc�dure principale. - - b) Les liaisons en pointill�s repr�sentent les appels par "bind" - - c) Les classes marqu�es "NoDef" sont des classes sans m�thodes utilis�es pour la gestion des erreurs et appel�es par "raise" - - -AVERTISSEMENT : Ca ne marche pas � tous les coups !!! -Il y a de multiples fa�ons d'utiliser les Classes et d'appeler les m�thodes de classes ou fonctions, le code propos� se limite � -examiner les cas les plus simples. - -Ce code est donc livr� sans AUCUNE GARANTIE D'EXACTITUDE !!! - -Les cas ou �a ne marche pas: - -Les liens seront in�xacts ou absents - - a) Votre module n'a pas de classe, de m�thodes ni d'appel de fonction. - - b) Le nom de la fonction (appel�e ou appelante) est stock�e dans une variable, liste ou dictionnaire. - - c) Utiliser un mom reserv� pour nommer une fonction, ou une variable a le m�me nom qu'une fonction. - exemple - def open(): - ...... - fichier = open("fic.txt","r") - - d) Fonctions imbriqu�es: - exemple : module Main.py de la distribution standard - - def initial_color(s, colordb): - # function called on every color - def scan_color(s, colordb=colordb): - try: - r, g, b = colordb.find_byname(s) - except ColorDB.BadColor: - try: - ........ - La fonction "def scan_color" n'est pas vue par "pyclbr.py" - - e) Cr�ation conditionnelle de classes - exemple - if test > 1: - class Ma_Classe: - def __init__(self): - pass - def fonction_1(self): - print "fonction_1" - else: - class Ma_Classe: - def __init__(self): - pass - def fonction_2(self): - print "fonction_2" - - "pyclbr.py" ne voit que la derni�re cr�ation de "Ma_Classe et "fonction_1()" ne sera pas consider� - comme appartemant � "Ma_classe" - - f) H�ritage de classe - exemple - class Ma_Classe: - def __init__(self): - pass - def fonction_1(self): - print "fonction_1 class=Ma_Classe" - - class Ma_Classe_bis(Ma_Classe): - def __init__(self): - pass - def fonction_2(self): - print "fonction_2" - - c = Ma_Classe_bis() - c.fonction_1() - - L'instance de classe "c" renvoie � "Ma_Classe_bis" et "c.fonction_1()" ne sera pas trouv�. - - - etc..... \ No newline at end of file diff --git a/examples/graphviz/dr2xml_total.png b/examples/graphviz/dr2xml_total.png deleted file mode 100644 index a6a23b15..00000000 Binary files a/examples/graphviz/dr2xml_total.png and /dev/null differ diff --git a/examples/graphviz/dr2xml_total.py b/examples/graphviz/dr2xml_total.py deleted file mode 100644 index 47fcdd15..00000000 --- a/examples/graphviz/dr2xml_total.py +++ /dev/null @@ -1,3476 +0,0 @@ -#!/usr/bin/python -# -*- coding: iso-8859-15 -*- -""" -In the context of Climate Model Intercomparison Projects (CMIP) : - -A few functions for processing - - a CMIP Data request and - - a set of settings related to a laboratory, and a model - - a set of settings related to an experiment (i.e. a set of numerical - simulations), -to generate a set of xml-syntax files used by XIOS (see -https://forge.ipsl.jussieu.fr/ioserver/) for outputing geophysical -variable fields - -First version (0.8) : S.S�n�si (CNRM) - sept 2016 - -Changes : - oct 2016 - Marie-Pierre Moine (CERFACS) - handle 'home' Data Request - in addition - dec 2016 - S.S�n�si (CNRM) - improve robustness - jan 2017 - S.S�n�si (CNRM) - handle split_freq; go single-var files; - adapt to new DRS ... - feb 2017 - S.S�n�si (CNRM) - handle grids and remapping; - put some func in separate module - -""" -#################################### -# Pre-requisites -#################################### - -# 1- CMIP6 Data Request package retrieved using -# svn co http://proj.badc.rl.ac.uk/svn/exarch/CMIP6dreq/tags/01.00.01 -# (and must include 01.00.01/dreqPy in PYTHONPATH) -from scope import dreqQuery -import dreq - -# 2- CMIP6 Controled Vocabulary (available from -# https://github.com/WCRP-CMIP/CMIP6_CVs). You will provide its path -# as argument to functions defined here - -# 3- XIOS release must be 1047 or above (to be fed with the outputs) -# see https://forge.ipsl.jussieu.fr/ioserver/wiki - -#################################### -# End of pre-requisites -#################################### -version = "mpmoine-dev3 = mpmoine-dev2 merged with senesi-dev-v0.12 + updates untill 05/05/2017" -print "* dr2xml version:", version - -from datetime import datetime -import re -import json -import collections -import sys, os -import xml.etree.ElementTree as ET - -# mpmoine_merge_dev2_v0.12: posixpath.dirname ne marche pas chez moi -# TBS# from os import path as os_path -# TBS# prog_path=os_path.abspath(os_path.split(__file__)[0]) - -# Local packages -# mpmoine_zoom_modif: import simple_Dim -# GRAPHVIZ#from vars import simple_CMORvar, simple_Dim, process_home_vars, complement_svar_using_cmorvar, \ -# GRAPHVIZ# multi_plev_suffixes, single_plev_suffixes -# GRAPHVIZ#from grids import decide_for_grids, grid2resol, grid2desc, field_size,\ -# GRAPHVIZ# split_frequency_for_variable, timesteps_per_freq_and_duration -# GRAPHVIZ#from Xparse import init_context, id2grid - -# A local auxilliary table -# mpmoine_last_modif: dr2xml.py: ajout import de cmipFreq2xiosFreq -# GRAPHVIZ#from table2freq import table2freq, table2splitfreq, cmipFreq2xiosFreq - -# GRAPHVIZ#from dr2cmip6_expname import dr2cmip6_expname - -print_DR_errors = True - -dq = dreq.loadDreq() -print "* CMIP6 Data Request version: ", dq.version - -context_index = None - -""" An example/template of settings for a lab and a model""" -example_lab_and_model_settings = { - 'institution_id': "CNRM-CERFACS", # institution should be read in CMIP6_CV, if up-to-date - 'source_id': "CNRM-CM6-1", - # The description of lab models, in CMIP6 CV wording - 'source_types': {"CNRM-CM6-1": "AOGCM", "CNRM-CM6-1-HR": "AOGCM", - "CNRM-ESM2-1": "ESM", "CNRM-ESM2-1-HR": "ESM"}, - 'source': "CNRM-CM6-1", # Useful only if CMIP6_CV is not up to date - 'references': "A character string containing a list of published or web-based " + \ - "references that describe the data or the methods used to produce it." + \ - "Typically, the user should provide references describing the model" + \ - "formulation here", - 'info_url': "http://www.umr-cnrm.fr/cmip6/", - 'contact': 'contact.cmip@meteo.fr', - - # We account for the list of MIPS in which the lab takes part. - # Note : a MIPs set limited to {'C4MIP'} leads to a number of tables and - # variables which is manageable for eye inspection - 'mips_for_test': {'C4MIP', 'SIMIP', 'OMIP', 'CFMIP', 'RFMIP'}, - 'mips': {'AerChemMIP', 'C4MIP', 'CFMIP', 'DAMIP', 'FAFMIP', 'GeoMIP', 'GMMIP', 'ISMIP6', \ - 'LS3MIP', 'LUMIP', 'OMIP', 'PMIP', 'RFMIP', 'ScenarioMIP', 'CORDEX', 'SIMIP'}, - # Max variable priority level to be output - 'max_priority': 1, - 'tierMax': 1, - - # The ping file defines variable names, which are constructed using CMIP6 "MIPvarnames" - # and a prefix which must be set here, and can be the empty string : - "ping_variables_prefix": "CMIP6_", - - # We account for a list of variables which the lab does not want to produce , - # Names must match DR MIPvarnames (and **NOT** CF standard_names) - # excluded_vars_file="../../cnrm/non_published_variables" - "excluded_vars": [], - - # We account for a list of variables which the lab wants to produce in some cases - "listof_home_vars": "../../cnrm/listof_home_vars.txt", - - # Each XIOS context does adress a number of realms - 'realms_per_context': { - 'nemo': ['seaIce', 'ocean', 'ocean seaIce', 'ocnBgchem', 'seaIce ocean'], - 'arpsfx': ['atmos', 'atmos atmosChem', 'aerosol', 'atmos land', 'land', - 'landIce land', 'aerosol land', 'land landIce', 'landIce', ], - }, - # Some variables, while belonging to a realm, may fall in another XIOS context than the - # context which hanldes that realm - 'orphan_variables': {'nemo': ['dummy_variable_for_illustration_purpose'], - ' arpsfx': [], - }, - 'vars_OK': dict(), - # A per-variable dict of comments valid for all simulations - 'comments': { - 'tas': 'nothing special about tas' - }, - # Sizes for atm and oce grids (cf DR doc) - "sizes": [259200, 60, 64800, 40, 20, 5, 100], - # What is the maximum size of generated files, in number of float values - "max_file_size_in_floats": 500. * 1.e+6, - # grid_policy among None, DR, native, native+DR, adhoc- see docin grids.py - "grid_policy": "adhoc", - # Grids : CMIP6 name, name_of_target_domain, CMIP6-std resolution, and description - "grids": { - "LR": { - "arpsfx": ["gr", "complete", "250 km", - "data regridded to a T127 gaussian grid (128x256 latlon) from a native atmosphere T127l reduced gaussian grid"], - "nemo": ["gn", "", "100km", "native ocean tri-polar grid with 105 k ocean cells"], }, - "HR": { - "arpsfx": ["gr", "completeHR", "50 km", - "data regridded to a 359 gaussian grid (180x360 latlon) from a native atmosphere T359l reduced gaussian grid"], - "nemo": ["gn", "", "25km", "native ocean tri-polar grid with 1.47 M ocean cells"], }, - }, - 'grid_choice': {"CNRM-CM6-1": "LR", "CNRM-CM6-1-HR": "HR", - "CNRM-ESM2-1": "LR", "CNRM-ESM2-1-HR": "HR"}, - -} - -""" An example/template of settings for a simulation """ - -example_simulation_settings = { - # Dictionnary describing the necessary attributes for a given simulation - - # Warning : some lines are commented out in this example but should be - # un-commented in some cases. See comments - - "experiment_id": "historical", - # "contact" : "", set it only if it is specific to the simualtion - # "project" : "CMIP6", #CMIP6 is the default - - # 'source_type' : "ESM" # If source_type deduced from model name is not relevant for this - # experiment (e.g. : AMIP), you may tell that here - - # MIPs specifying the experiment. For historical, it is CMIP - # itself In a few cases it may be appropriate to include multiple - # activities in the activity_id (separated by single spaces). - # An example of this is 'LUMIP AerChemMIP' for one of the land-use change experiments. - "activity_id": "CMIP", # examples : "PMIP", 'LS3MIP LUMIP'; defaults to "CMIP" - - # It is recommended that some description be included to help - # identify major differences among variants, but care should be - # taken to record correct information. Prudence dictates that - # this attribute includes a warning along the following lines: - # 'Information provided by this attribute may in some cases be - # flawed.# Users can find more comprehensive and up-to-date - # documentation via the further_info_url global attribute.' - "variant_info": "Start date chosen so that variant r1i1p1f1 has " + \ - "the better fit with Krakatoa impact on tos", - # - "realization_index": 1, # Value may be omitted if = 1 - "initialization_index": 1, # Value may be omitted if = 1 - "physics_index": 1, # Value may be omitted if = 1 - "forcing_index": 1, # Value may be omitted if = 1 - # - # All about the parent experiment and branching scheme - "parent_experiment_id": "piControl", # omit or set to 'no parent' if not applicable - # (remaining parent attributes will be disregarded) - "branch_method": "standard", # default value='standard' meaning ~ "select a start date" - "branch_time_in_child": "0.0D0", # a double precision value in child time units (days), used if applicable - "branch_time_in_parent": "365.0D0", # a double precision value, in days, used if applicable - 'parent_time_ref_year': 1850, # default=1850. - # 'parent_variant_label' :"" #Default to 'same as child'. Other cases should be exceptional - # "parent_mip_era" : 'CMIP5' # only in special cases (as e.g. PMIP warm - # start from CMIP5/PMIP3 experiment) - # 'parent_activity_id' : 'CMIP' # only in special cases, defaults to CMIP - # 'parent_source_id' : 'CNRM-CM5.1' # only in special cases, where parent model - # is not the same model - # - "sub_experiment_id": "None", # Optional, default is 'none'; example : s1960. - "sub_experiment": "None", # Optional, default in 'none' - "history": "None", # Used when a simulation is re-run, an output file is modified ... - # A per-variable dict of comments which are specific to this simulation. It will replace - # the all-simulation comment - 'comments': { - 'tas': 'tas diagnostic uses a special scheme in this simulation : .....', - } -} - - -# def hasCMORVarName(hmvar): -# for cmvar in dq.coll['CMORvar'].items: -# if (cmvar.label==hmvar.label): return True - -def RequestItem_applies_for_exp_and_year(ri, experiment, year, debug=False): - """ - Returns True if requestItem 'ri' in data request 'dq' (global) is relevant - for a given 'experiment' and 'year'. Toggle 'debug' allow some printouts - """ - # Acces experiment or experiment group for the RequestItem - if (debug): print "Checking ", "% 15s" % ri.label, - item_exp = dq.inx.uid[ri.esid] - relevant = False - exps = dq.coll['experiment'].items - # esid can link to an experiment or an experiment group - if item_exp._h.label == 'experiment': - if (debug): print "%20s" % "Simple Expt case", item_exp.label, - if item_exp.label == experiment: - if (debug): print " OK", - relevant = True - elif item_exp._h.label == 'exptgroup': - if (debug): print "%20s" % "Expt Group case ", item_exp.label, - group_id = ri.esid - for e in exps: - if 'egid' in dir(e) and e.egid == group_id and \ - e.label == experiment: - if (debug): print " OK for experiment based on group" + \ - group_id.label, - relevant = True - elif item_exp._h.label == 'mip': - mip_id = ri.esid - if (debug): print "%20s" % "Mip case ", dq.inx.uid[mip_id].label, - for e in exps: - if 'mip' in dir(e) and e.mip == mip_id: - if (debug): print e.label, ",", - if e.label == experiment: - if (debug): print " OK for experiment based on mip" + \ - mip_id.label, - relevant = True - else: - if (debug): - print "%20s" % 'Error %s for %s' % (item_exp._h.label, `ri`) - # raise(dr2xml_error("%20s"%'Other case , label=%s|'%item_exp._h.label)) - if relevant: - if 'tslice' in ri.__dict__: - if ri.tslice == '__unset__': - print "tslice is unset for reqlink %s " % ri.title - relevant = True - else: - timeslice = dq.inx.uid[ri.tslice] - if (debug): print "OK for the year" - try: - relevant = year >= timeslice.start and year <= timeslice.end - except: - relevant = True - print "tslice not well set for " + timeslice.label + " " + \ - timeslice.uid + \ - ". Assuming it applies for RequestItem " + ri.title - else: - if (debug): print "tslice not set -> OK for the year" - # print "No tslice for %s"%ri.title - relevant = True - return relevant - - -def select_CMORvars_for_lab(lset, experiment_id=None, year=None, printout=False): - """ - A function to list CMOR variables relevant for a lab (and also, - optionnally for an experiment and a year) - - Args: - lset (dict): laboratory settings; used to provide the list of MIPS, - the max Tier, and a list of excluded variable names - experiment_id (string,optional): if willing to filter on a given - experiment - not used if year is None - year (int,optional) : simulation year - used to filter the request - for an experiment and a year - - Returns: - A list of 'simplified CMOR variables' - - """ - # - # From MIPS set to Request links - global sc - sc = dreqQuery(dq=dq, tierMax=lset['tierMax']) - - # Set sizes for lab settings, if available (or use CNRM-CM6-1 defaults) - mcfg = collections.namedtuple('mcfg', \ - ['nho', 'nlo', 'nha', 'nla', 'nlas', 'nls', 'nh1']) - sizes = lset.get("sizes", [259200, 60, 64800, 40, 20, 5, 100]) - sc.mcfg = mcfg._make(sizes)._asdict() - # - rls_for_mips = sc.getRequestLinkByMip(lset['mips']) - if printout: - print "Number of Request Links which apply to MIPS", - print lset['mips'], " is: ", len(rls_for_mips) - # - if (year): - filtered_rls = [] - for rl in rls_for_mips: - # Access all requesItems ids which refer to this RequestLink - ri_ids = dq.inx.iref_by_sect[rl.uid].a['requestItem'] - for ri_id in ri_ids: - ri = dq.inx.uid[ri_id] - # print "Checking requestItem ",ri.label - if RequestItem_applies_for_exp_and_year(ri, - experiment_id, year, False): - # print "% 25s"%ri.label," applies " - filtered_rls.append(rl) - rls = filtered_rls - if printout: - print "Number of Request Links which apply to experiment ", \ - experiment_id, "and MIPs", lset['mips'], " is: ", len(rls) - else: - rls = rls_for_mips - - # From Request links to CMOR vars + grid - # miprl_ids=[ rl.uid for rl in rls ] - # miprl_vars=sc.varsByRql(miprl_ids, pmax=lset['max_priority']) - miprl_vars_grids = [] - for rl in rls: - rl_vars = sc.varsByRql([rl.uid], pmax=lset['max_priority']) - for v in rl_vars: - if (v, rl.grid) not in miprl_vars_grids: - miprl_vars_grids.append((v, rl.grid)) - if printout: - print 'Number of (CMOR variable, grid) pairs for these requestLinks is :%s' % len(miprl_vars_grids) - # - filtered_vars = [] - for (v, g) in miprl_vars_grids: - cmvar = dq.inx.uid[v] - mipvar = dq.inx.uid[cmvar.vid] - if mipvar.label not in lset['excluded_vars']: - filtered_vars.append((v, g)) - if printout: - print 'Number once filtered by excluded vars is : %s' % len(filtered_vars) - - # Filter the list of grids requested for each variable based on lab policy - d = dict() - for (v, g) in filtered_vars: - if v not in d: d[v] = set() - d[v].add(g) - if printout: - print 'Number of distinct CMOR variables (whatever the grid) : %d' % len(d) - for v in d: - d[v] = decide_for_grids(v, d[v], lset, dq) - if False and printout and len(d[v]) > 1: - print "\tVariable %s will be processed with multiple grids : %s" % (dq.inx.uid[v].label, `d[v]`) - # - # Print a count of distinct var labels - if printout: - varlabels = set() - for v in d: varlabels.add(dq.inx.uid[v].label) - print 'Number of distinct var labels is :', len(varlabels) - - # Translate CMORvars to a list of simplified CMORvar objects - simplified_vars = [] - for v in d: - svar = simple_CMORvar() - cmvar = dq.inx.uid[v] - complement_svar_using_cmorvar(svar, cmvar, dq) - svar.Priority = analyze_priority(cmvar, lset['mips']) - svar.grids = d[v] - simplified_vars.append(svar) - print '\nNumber of simplified vars is :', len(simplified_vars) - return simplified_vars - - -def analyze_priority(cmvar, lmips): - """ - Returns the max priority of the CMOR variable, for a set of mips - """ - prio = cmvar.defaultPriority - rv_ids = dq.inx.iref_by_sect[cmvar.uid].a['requestVar'] - for rv_id in rv_ids: - rv = dq.inx.uid[rv_id] - vg = dq.inx.uid[rv.vgid] - if vg.mip in lmips: - if rv.priority < prio: prio = rv.priority - return prio - - -# mpmoine_last_modif:wr: ajout de l'argument num_type -# mpmoine_zoom_modif:wr: ajout de l'argument out (car fonction remontee d un niveau) -def wr(out, key, dic_or_val=None, num_type="string", default=None): - """ - Short cut for a repetitive pattern : writing in 'out' - a string variable name and value - If dic_or_val is not None - if dic_or_val is a dict, - if key is in value is dic_or_val[key], - otherwise use default as value , except if default is False - otherwise, use arg dic_or_val as value if not None nor False, - otherwise use value of local variable 'key' - """ - val = None - if type(dic_or_val) == type({}): - if key in dic_or_val: - val = dic_or_val[key] - else: - if default is not None: - if default is not False: val = default - else: - print 'error : %s not in dic and default is None' % key - else: - if dic_or_val is not None: - val = dic_or_val - else: - print 'error in wr, no value provided for %s' % key - if val: - out.write(' %s ' % (key, num_type, val)) - out.write(' \n') - - -# mpmoine_WIP_update: WIP doc v6.2.3 - Apr. 2017: format is frequency-dependant => new function 'freq2datefmt' -def freq2datefmt(freq): - datefmt = False - if freq in ["yr", "decadal"]: - datefmt = "%y" - elif freq in ["mon", "monClim"]: - datefmt = "%y%mo" - elif freq == "day": - datefmt = "%y%mo%d" - # mpmoine_TBD: supprimer "hr" selon reponse de D. Nadeau a l'issue https://github.com/PCMDI/cmip6-cmor-tables/issues/59 - elif freq in ["6hr", "3hr", "3hrClim", "1hr", "hr", "1hrClimMon"]: - datefmt = "%y%mo%d%h%mi" - elif freq == "subhr": - datefmt = "%y%mo%d%h%mi%s" - elif freq == "fx": - pass ## WIP doc v6.2.3 - Apr. 2017: if frequency="fx", [_] is ommitted - return datefmt - - -def write_xios_file_def(cmv, table, lset, sset, out, cvspath, - field_defs, axis_defs, grid_defs, domain_defs, - dummies, skipped_vars_per_table, - prefix, context, grid, pingvars=None): - """ - Generate an XIOS file_def entry in out for : - - a dict for laboratory settings - - a dict of simulation settings - - a 'simplifed CMORvar' cmv - - which all belong to given table - - a path 'cvs' for Controlled Vocabulary - - Lenghty code, but not longer than the corresponding specification document - - After a prologue, attributes valid for all variables are - written as file-level metadata, in the same order than in - WIP document; last, field-level metadate are written - """ - # - global sc - - # mpmoine_amelioration:write_xios_file_def_for_svar: gestion ici des attributs pour lesquels on a recupere des chaines vides (" " est Faux mais est ecrit " "") - # -------------------------------------------------------------------- - # Set to NOT-SET field attributes that can be empty strings - # -------------------------------------------------------------------- - if not cmv.stdname: cmv.stdname = "DR-ISSUE" - if not cmv.long_name: cmv.long_name = "DR-ISSUE" - if not cmv.cell_methods: cmv.cell_methods = "DR-ISSUE" - if not cmv.cell_measures: cmv.cell_measures = "DR-ISSUE" - if not cmv.stdunits: cmv.stdunits = "DR-ISSUE" - - # -------------------------------------------------------------------- - # Define alias for field_ref in file-def file - # - may be replaced by alias1 later - # - this is not necessarily the alias used in ping file because of - # intermediate field id(s) due to union/zoom - # -------------------------------------------------------------------- - # We use a simple convention for variable names in ping files : - if cmv.type == 'perso': - alias = cmv.label - else: - # mpmoine_correction:write_xios_file_def_for_svar: si on a defini un label non ambigu alors on l'untilise comme alias (i.e. le field_ref) - # mpmoine_correction:write_xios_file_def_for_svar: et pour l'alias seulement (le nom de variable dans le nom de fichier restant svar.label) - if cmv.label_non_ambiguous: - alias = lset["ping_variables_prefix"] + cmv.label_non_ambiguous - else: - alias = lset["ping_variables_prefix"] + cmv.label - # mpmoine_correction:write_xios_file_def_for_svar: suppression des terminaisons en "Clim" pour l'alias (i.e. le field_ref) le cas echeant - split_alias = alias.split("Clim") - alias = split_alias[0] - if pingvars is not None: - # mpmoine_zoom_modif:write_xios_file_def_for_svar: dans le pingfile, on attend plus les alias complets des variables (CMIP6_