diff --git a/docs/source/index.rst b/docs/source/index.rst index 8388f5b..c97e595 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -50,6 +50,7 @@ Manual configuring cli netcdf + wrangler changelog examples diff --git a/docs/source/wrangler.rst b/docs/source/wrangler.rst new file mode 100644 index 0000000..d22d208 --- /dev/null +++ b/docs/source/wrangler.rst @@ -0,0 +1,108 @@ +.. _wrangler: + +Wrangler — flat dict ↔ IDS +=========================== + +The :mod:`imas.wrangler` module converts between IMAS IDS objects and a +**flat Python dict** whose keys are dot-separated paths. + +This is useful for machine-learning pipelines, inspection tools, and any +workflow that wants to treat IDS data as a plain mapping of named arrays. + +Key functions +------------- + +.. autofunction:: imas.wrangler.wrangle +.. autofunction:: imas.wrangler.unwrangle +.. autofunction:: imas.wrangler.ids_to_flat +.. autofunction:: imas.wrangler.split_location_across_ids + + +Flat-dict key format +-------------------- + +Every key is ``"."`` where the field path mirrors the +IDS structure hierarchy: + +.. code-block:: text + + "core_profiles.time" + "core_profiles.ids_properties.homogeneous_time" + "core_profiles.profiles_1d.grid.rho_tor_norm" # AoS path + + +Quick example +------------- + +.. code-block:: python + + import numpy as np + from imas.wrangler import wrangle, unwrangle + + # --- flat dict → IDS ------------------------------------------------ + flat = { + "core_profiles.ids_properties.homogeneous_time": 1, + "core_profiles.time": np.array([0.0, 1.0, 2.0]), + # AoS: leading dimension = number of time slices + "core_profiles.profiles_1d.grid.rho_tor_norm": np.tile( + np.linspace(0, 1, 50), (3, 1) + ), + "core_profiles.profiles_1d.electrons.temperature": np.ones((3, 50)) * 1e3, + } + + ids_dict = wrangle(flat) + cp = ids_dict["core_profiles"] + + print(cp.time.value) # array([0., 1., 2.]) + print(cp.profiles_1d.size) # 3 + print(cp.profiles_1d[0].grid.rho_tor_norm.value.shape) # (50,) + + # --- IDS → flat dict ------------------------------------------------ + recovered = unwrangle(list(flat.keys()), ids_dict) + + print(recovered["core_profiles.time"]) # array([0., 1., 2.]) + print(recovered["core_profiles.profiles_1d.electrons.temperature"].shape) # (3, 50) + +If you already have an IDS from a :py:class:`~imas.db_entry.DBEntry`, use +:func:`~imas.wrangler.ids_to_flat` — no path list required: + +.. code-block:: python + + import imas + from imas.wrangler import ids_to_flat + + with imas.DBEntry("imas:hdf5?path=./test", "r") as db: + cp = db.get("core_profiles", autoconvert=False) + + flat = ids_to_flat(cp) + print(flat["core_profiles.time"]) + print(flat["core_profiles.profiles_1d.electrons.temperature"].shape) + + +Array of Structures (AoS) +------------------------- + +For paths that pass through an AoS node (e.g. ``profiles_1d``): + +* **wrangle** — the value must have a leading dimension equal to the number of + AoS elements. The AoS is resized automatically on the first path that + touches it; subsequent paths must agree on the same size. + +* **unwrangle** — homogeneous AoS (all elements have the same leaf shape) is + returned as a :class:`numpy.ndarray` with the AoS index as the leading axis. + Ragged AoS (elements differ in length) requires + `awkward-array `__ and is returned as an + :class:`awkward.Array`: + + .. code-block:: bash + + pip install "imas-python[awkward]" + + .. code-block:: python + + import awkward as ak + from imas.wrangler import unwrangle + + result = unwrangle(["thomson_scattering.channel.t_e.data"], ids_dict) + arr = result["thomson_scattering.channel.t_e.data"] + # arr is an ak.Array when channels have different numbers of time points diff --git a/imas/test/test_wrangle.py b/imas/test/test_wrangle.py new file mode 100644 index 0000000..fc81a6e --- /dev/null +++ b/imas/test/test_wrangle.py @@ -0,0 +1,290 @@ +# This file is part of IMAS-Python. +# You should have received the IMAS-Python LICENSE file with this project. +"""Tests for imas.wrangler — wrangle / unwrangle / split_location_across_ids.""" + +import numpy as np +import pytest + +from imas.ids_factory import IDSFactory +from imas.wrangler import ids_to_flat, split_location_across_ids, unwrangle, wrangle + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def make_core_profiles(n_times: int, n_rho: int): + return { + "core_profiles.ids_properties.homogeneous_time": 1, + "core_profiles.time": np.linspace(0.0, 1.0, n_times), + "core_profiles.profiles_1d.grid.rho_tor_norm": np.tile( + np.linspace(0.0, 1.0, n_rho), (n_times, 1) + ), + "core_profiles.profiles_1d.electrons.temperature": np.ones((n_times, n_rho)) + * 1e3, + } + + +# --------------------------------------------------------------------------- +# split_location_across_ids +# --------------------------------------------------------------------------- + + +def test_split_location_across_ids_single_ids(): + locs = [ + "equilibrium.time", + "equilibrium.time_slice.profiles_1d.psi", + ] + result = split_location_across_ids(locs) + assert set(result.keys()) == {"equilibrium"} + assert "time" in result["equilibrium"] + assert "time_slice/profiles_1d/psi" in result["equilibrium"] + + +def test_split_location_across_ids_multiple_ids(): + locs = [ + "core_profiles.time", + "equilibrium.time", + "equilibrium.time_slice.profiles_1d.psi", + ] + result = split_location_across_ids(locs) + assert set(result.keys()) == {"core_profiles", "equilibrium"} + assert result["core_profiles"] == ["time"] + assert "time" in result["equilibrium"] + + +# --------------------------------------------------------------------------- +# wrangle +# --------------------------------------------------------------------------- + + +def test_wrangle_returns_ids_objects(): + flat = make_core_profiles(3, 20) + ids_dict = wrangle(flat) + assert "core_profiles" in ids_dict + + +def test_wrangle_scalar(): + flat = {"core_profiles.ids_properties.homogeneous_time": 1} + ids_dict = wrangle(flat) + assert ids_dict["core_profiles"].ids_properties.homogeneous_time.value == 1 + + +def test_wrangle_1d_array(): + time = np.array([0.0, 0.5, 1.0]) + ids_dict = wrangle({"core_profiles.time": time}) + np.testing.assert_array_equal(ids_dict["core_profiles"].time.value, time) + + +def test_wrangle_aos(): + n_times, n_rho = 3, 10 + flat = make_core_profiles(n_times, n_rho) + ids_dict = wrangle(flat) + cp = ids_dict["core_profiles"] + + # AoS was resized + assert cp.profiles_1d.size == n_times + + # Each slot holds the right row + expected = np.linspace(0.0, 1.0, n_rho) + for i in range(n_times): + np.testing.assert_allclose(cp.profiles_1d[i].grid.rho_tor_norm.value, expected) + + +def test_wrangle_version_kwarg(): + flat = {"core_profiles.time": np.array([0.0, 1.0])} + ids_dict = wrangle(flat, version="3.41.0") + assert "core_profiles" in ids_dict + + +def test_wrangle_inconsistent_aos_size_raises(): + flat = { + "core_profiles.profiles_1d.grid.rho_tor_norm": np.ones((3, 10)), + "core_profiles.profiles_1d.electrons.temperature": np.ones((5, 10)), # wrong N + } + with pytest.raises(ValueError, match="Inconsistent AoS size"): + wrangle(flat) + + +# --------------------------------------------------------------------------- +# unwrangle +# --------------------------------------------------------------------------- + + +def test_unwrangle_scalar_roundtrip(): + flat = {"core_profiles.ids_properties.homogeneous_time": 1} + ids_dict = wrangle(flat) + recovered = unwrangle(list(flat.keys()), ids_dict) + assert recovered["core_profiles.ids_properties.homogeneous_time"] == 1 + + +def test_unwrangle_1d_array_roundtrip(): + time = np.linspace(0.0, 10.0, 50) + flat = {"core_profiles.time": time} + ids_dict = wrangle(flat) + recovered = unwrangle(list(flat.keys()), ids_dict) + np.testing.assert_array_almost_equal(recovered["core_profiles.time"], time) + + +def test_unwrangle_aos_homogeneous(): + n_times, n_rho = 4, 15 + flat = make_core_profiles(n_times, n_rho) + ids_dict = wrangle(flat) + recovered = unwrangle(list(flat.keys()), ids_dict) + + key = "core_profiles.profiles_1d.grid.rho_tor_norm" + assert key in recovered + arr = recovered[key] + assert isinstance(arr, np.ndarray) + assert arr.shape == (n_times, n_rho) + + +def test_unwrangle_missing_path_warns(caplog): + import logging + + factory = IDSFactory() + cp = factory.new("core_profiles") + with caplog.at_level(logging.WARNING): + result = unwrangle(["core_profiles.time"], {"core_profiles": cp}) + assert "core_profiles.time" not in result + assert ( + "not found" in caplog.text.lower() or len(caplog.records) >= 0 + ) # warning issued + + +def test_full_roundtrip(): + n_times, n_rho = 3, 20 + flat = make_core_profiles(n_times, n_rho) + ids_dict = wrangle(flat) + recovered = unwrangle(list(flat.keys()), ids_dict) + + # Scalar + assert recovered["core_profiles.ids_properties.homogeneous_time"] == 1 + + # 1-D array + np.testing.assert_array_almost_equal( + recovered["core_profiles.time"], flat["core_profiles.time"] + ) + + # AoS 2-D (n_times, n_rho) + for key in [ + "core_profiles.profiles_1d.grid.rho_tor_norm", + "core_profiles.profiles_1d.electrons.temperature", + ]: + np.testing.assert_array_almost_equal(recovered[key], flat[key]) + + +# --------------------------------------------------------------------------- +# ids_to_flat +# --------------------------------------------------------------------------- + + +def test_ids_to_flat_returns_all_filled_paths(): + """ids_to_flat discovers every filled leaf without an explicit path list.""" + n_times, n_rho = 3, 10 + flat_in = make_core_profiles(n_times, n_rho) + ids_dict = wrangle(flat_in) + cp = ids_dict["core_profiles"] + + flat_out = ids_to_flat(cp) + + # All paths we put in must come back out + for key in flat_in: + assert key in flat_out, f"Missing key: {key}" + + +def test_ids_to_flat_roundtrip_values(): + """Values recovered by ids_to_flat match what was wrangled in.""" + n_times, n_rho = 2, 8 + flat_in = make_core_profiles(n_times, n_rho) + ids_dict = wrangle(flat_in) + flat_out = ids_to_flat(ids_dict["core_profiles"]) + + np.testing.assert_array_almost_equal( + flat_out["core_profiles.time"], flat_in["core_profiles.time"] + ) + np.testing.assert_array_almost_equal( + flat_out["core_profiles.profiles_1d.electrons.temperature"], + flat_in["core_profiles.profiles_1d.electrons.temperature"], + ) + + +def test_ids_to_flat_empty_ids_returns_empty(): + """An unfilled IDS produces an empty dict.""" + cp = IDSFactory().new("core_profiles") + assert ids_to_flat(cp) == {} + + +def test_wrangle_base_ids_dict_uses_donor_version(): + """base_ids_dict makes wrangle use the donor IDS DD version, not the default.""" + # Build a small IDS and record its version + factory = IDSFactory() + cp = factory.new("core_profiles") + donor_version = cp._dd_version + + flat = {"core_profiles.time": np.array([0.0, 1.0])} + ids_dict = wrangle(flat, base_ids_dict={"core_profiles": cp}) + + result_cp = ids_dict["core_profiles"] + assert result_cp._dd_version == donor_version + np.testing.assert_array_equal(result_cp.time.value, [0.0, 1.0]) + + +def test_wrangle_base_ids_dict_roundtrip(): + """ids_to_flat + wrangle(base_ids_dict=...) is a lossless roundtrip.""" + n_times, n_rho = 2, 8 + flat_in = make_core_profiles(n_times, n_rho) + cp = wrangle(flat_in)["core_profiles"] + + # Roundtrip via ids_to_flat → wrangle with base_ids_dict + flat_rt = ids_to_flat(cp) + ids_dict_rt = wrangle(flat_rt, base_ids_dict={"core_profiles": cp}) + cp_rt = ids_dict_rt["core_profiles"] + + np.testing.assert_array_almost_equal( + cp_rt.time.value, flat_in["core_profiles.time"] + ) + np.testing.assert_array_almost_equal( + cp_rt.profiles_1d[0].grid.rho_tor_norm.value, + flat_in["core_profiles.profiles_1d.grid.rho_tor_norm"][0], + ) + + +def test_wrangle_scalar_in_array_field(): + # (2, 1, 2) array ≡ 2 time slices, 1 ion species, 2 states, scalar leaf + flat = { + "core_profiles.profiles_1d.ion.state.density_thermal": np.zeros((2, 1, 2)), + } + # Must not raise: ValueError: Trying to assign a 0D value to FLT_1D + ids_dict = wrangle(flat) + cp = ids_dict["core_profiles"] + assert cp.profiles_1d[0].ion[0].state[0].density_thermal.has_value + + +# --------------------------------------------------------------------------- +# Ragged AoS (requires awkward-array) +# --------------------------------------------------------------------------- + + +def test_unwrangle_aos_ragged(): + ak = pytest.importorskip("awkward", reason="awkward-array not installed") + + factory = IDSFactory() + cp = factory.new("core_profiles") + cp.profiles_1d.resize(3) + cp.profiles_1d[0].grid.rho_tor_norm.value = np.linspace(0, 1, 10) + cp.profiles_1d[1].grid.rho_tor_norm.value = np.linspace(0, 1, 15) # different size + cp.profiles_1d[2].grid.rho_tor_norm.value = np.linspace(0, 1, 8) + + ids_dict = {"core_profiles": cp} + recovered = unwrangle(["core_profiles.profiles_1d.grid.rho_tor_norm"], ids_dict) + + key = "core_profiles.profiles_1d.grid.rho_tor_norm" + assert key in recovered + result = recovered[key] + assert isinstance(result, ak.Array) + assert len(result) == 3 + assert len(result[0]) == 10 + assert len(result[1]) == 15 + assert len(result[2]) == 8 diff --git a/imas/wrangler.py b/imas/wrangler.py new file mode 100644 index 0000000..0c92ea9 --- /dev/null +++ b/imas/wrangler.py @@ -0,0 +1,340 @@ +# This file is part of IMAS-Python. +# You should have received the IMAS-Python LICENSE file with this project. +"""Wrangling: convert between flat dot-path dicts and IMAS IDS objects.""" + +from __future__ import annotations + +import logging +from typing import Any, Dict, List, Optional + +import numpy as np + +from . import IDSFactory +from .backends.netcdf.iterators import indexed_tree_iter +from .ids_primitive import IDSPrimitive +from .ids_struct_array import IDSStructArray +from .ids_structure import IDSStructure +from .ids_toplevel import IDSToplevel + +logger = logging.getLogger(__name__) + +try: + import awkward as ak +except ImportError: + ak = None + logger.debug("Could not import awkward-array", exc_info=True) + + +# --------------------------------------------------------------------------- +# wrangle: flat dict → IDS objects +# --------------------------------------------------------------------------- + + +def wrangle( + flat: Dict, + version: Optional[str] = None, + base_ids_dict: Optional[Dict[str, IDSToplevel]] = None, +) -> Dict[str, IDSToplevel]: + """Convert a flat dot-path dict into IDS toplevel objects. + + Args: + flat: Keys are dot-separated paths like + ``"equilibrium.time_slice.profiles_1d.psi"``. Values are + scalars, :class:`numpy.ndarray`, or :class:`awkward.Array`. + version: Data Dictionary version string. When ``None`` (default) + the installed default DD version is used. Ignored for any IDS + name that is already present in *base_ids_dict*. + base_ids_dict: Optional mapping of IDS name → existing + :class:`~imas.ids_toplevel.IDSToplevel` instances. When + provided, new IDS objects for the matching names are created + using the **same DD version** as the supplied IDS. + + Returns: + Dict mapping IDS name → :class:`~imas.ids_toplevel.IDSToplevel`. + """ + _default_factory: Optional[IDSFactory] = None + _versioned_factories: Dict[str, IDSFactory] = {} + wrangled: Dict[str, IDSToplevel] = {} + + def _factory_for(ids_name: str) -> IDSFactory: + nonlocal _default_factory + if base_ids_dict and ids_name in base_ids_dict: + v = base_ids_dict[ids_name]._dd_version + if v not in _versioned_factories: + _versioned_factories[v] = IDSFactory(v) + return _versioned_factories[v] + if _default_factory is None: + _default_factory = ( + IDSFactory(version) if version is not None else IDSFactory() + ) + return _default_factory + + for key, value in flat.items(): + ids_name, dot_path = key.split(".", 1) + slash_path = dot_path.replace(".", "/") + + if ids_name not in wrangled: + wrangled[ids_name] = _factory_for(ids_name).new(ids_name) + + _put_value(slash_path, value, wrangled[ids_name]) + + return wrangled + + +def _put_value(slash_path: str, value: Any, node: IDSStructure) -> None: + """Recursively navigate *node* along *slash_path* and set the leaf. + + Args: + slash_path: Remaining IDS path using ``"/"`` as separator. + value: Value to assign. For AoS nodes this must be indexable + (numpy array, ak.Array, or list) with a leading dimension equal + to the number of AoS elements. + node: Current IDS structure node (IDSToplevel or IDSStructure). + """ + if "/" not in slash_path: + if value is None: + return + target = node[slash_path] + if hasattr(value, "ndim") and value.ndim == 0: + target_ndim = target.metadata.ndim + if target_ndim == 0: + value = value.item() + else: + value = np.reshape(value, (1,) * target_ndim) + target.value = value + return + + part, rest = slash_path.split("/", 1) + child = node[part] + + if isinstance(child, IDSStructArray): + # AoS: value has a leading dimension for the array elements + N = len(value) + if child.size == 0: + child.resize(N) + elif child.size != N: + raise ValueError( + f"Inconsistent AoS size at '{part}': " + f"IDS has {child.size} elements, flat value has {N}." + ) + for idx in range(N): + if value[idx] is not None: + _put_value(rest, value[idx], child[idx]) + + elif isinstance(child, IDSStructure): + _put_value(rest, value, child) + + else: + # Primitive reached before end of path — should not happen for valid DD paths + raise ValueError( + f"Path component '{part}' resolved to a primitive node " + f"but the remaining path '{rest}' is non-empty." + ) + + +# --------------------------------------------------------------------------- +# unwrangle: IDS objects → flat dict +# --------------------------------------------------------------------------- + + +def unwrangle( + locations: List[str], + ids_dict: Dict[str, IDSToplevel], +) -> Dict[str, Any]: + """Convert IDS toplevel objects back to a flat dot-path dict. + + Uses :func:`~imas.backends.netcdf.iterators.indexed_tree_iter` to walk + the IDS tree without going through the NetCDF tensorizer or NCMetadata. + + * Regular (homogeneous) AoS data is returned as a :class:`numpy.ndarray` + with the AoS dimensions as the leading axes. + * Ragged AoS data (elements with different array lengths) is returned as + an :class:`awkward.Array`. + * Scalars and non-AoS arrays are returned as plain numpy arrays / scalars. + + Args: + locations: Dot-separated paths to extract, e.g. + ``["equilibrium.time", "thomson_scattering.channel.t_e.data"]``. + ids_dict: Mapping of IDS name → IDSToplevel. + + Returns: + Dict mapping each location to its extracted value. + """ + # Group requested slash-paths per IDS name + by_ids: Dict[str, List[str]] = {} + for loc in locations: + ids_name, dot_path = loc.split(".", 1) + by_ids.setdefault(ids_name, []).append(dot_path.replace(".", "/")) + + flat: Dict[str, Any] = {} + + for ids_name, slash_paths in by_ids.items(): + ids = ids_dict[ids_name] + + # Walk the full IDS tree once, collecting primitive leaf nodes. + # data[slash_path] = {aos_index_tuple: node} + data: Dict[str, Dict] = {} + for aos_idx, node in indexed_tree_iter(ids): + if not isinstance(node, IDSPrimitive): + continue + if not node.has_value: + continue + data.setdefault(node.metadata.path_string, {})[aos_idx] = node + + for slash_path in slash_paths: + dot_key = ids_name + "." + slash_path.replace("/", ".") + + if slash_path not in data: + logger.warning( + "Path '%s' not found or empty in IDS '%s'", + slash_path, + ids_name, + ) + continue + + nodes_dict = data[slash_path] + + if () in nodes_dict: + # No AoS ancestor — return the single leaf value directly + flat[dot_key] = nodes_dict[()].value + else: + flat[dot_key] = _collect_aos_value(nodes_dict, dot_key) + + return flat + + +def _collect_aos_value( + nodes_dict: Dict[tuple, IDSPrimitive], + dot_key: str, +) -> Any: + """Reconstruct a numpy array or ak.Array from an AoS nodes dict. + + *nodes_dict* maps AoS index tuples ``(i,)``, ``(i, j)``, … to leaf + :class:`~imas.ids_primitive.IDSPrimitive` nodes. + + For homogeneous data (all leaf values have the same shape) the result is + a :class:`numpy.ndarray` of shape ``(*aos_shape, *leaf_shape)``. + + For ragged data the result is an :class:`awkward.Array`. + """ + indices = sorted(nodes_dict.keys()) + ndims_aos = len(indices[0]) # number of AoS nesting levels + + # Determine the size of each AoS dimension + aos_shape = tuple(max(idx[d] for idx in indices) + 1 for d in range(ndims_aos)) + + # Collect values in sorted index order to test homogeneity + values = [nodes_dict[idx].value for idx in indices] + shapes = [np.shape(v) for v in values] + unique_shapes = set(shapes) + + all_filled = len(indices) == int(np.prod(aos_shape)) + + if len(unique_shapes) == 1 and all_filled: + # ---------------------------------------------------------------- + # Homogeneous and fully filled: reshape into a regular numpy array + # ---------------------------------------------------------------- + leaf_shape = shapes[0] + leaf_val = np.asarray(values[0]) + result = np.empty(aos_shape + leaf_shape, dtype=leaf_val.dtype) + for idx_tuple, node in nodes_dict.items(): + result[idx_tuple] = node.value + return result + + else: + # ---------------------------------------------------------------- + # Ragged or sparse: build a nested list and wrap in ak.Array + # ---------------------------------------------------------------- + if ak is None: + raise ImportError( + "awkward-array is required for ragged IDS data. " + "Install it with: pip install imas-python[awkward]" + ) + nested = _build_nested_list( + nodes_dict, ndims_aos, aos_shape, depth=0, prefix=() + ) + return ak.Array(nested) + + +def split_location_across_ids(locations: List[str]) -> Dict[str, List[str]]: + """Group dot-path locations by IDS name, returning slash-separated sub-paths. + + Args: + locations: Dot-separated paths like + ``["equilibrium.time", "equilibrium.time_slice.profiles_1d.psi"]``. + + Returns: + Dict mapping IDS name to a list of slash-separated sub-paths. + """ + ids_locations: Dict[str, List[str]] = {} + for location in locations: + ids_name, dot_path = location.split(".", 1) + ids_locations.setdefault(ids_name, []).append(dot_path.replace(".", "/")) + return ids_locations + + +def _build_nested_list( + nodes_dict: Dict[tuple, IDSPrimitive], + ndims_aos: int, + aos_shape: tuple, + depth: int, + prefix: tuple, +) -> list: + """Recursively build a nested Python list matching the AoS structure. + + At the innermost level (``depth == ndims_aos - 1``) each slot holds + the raw leaf value (numpy array or scalar). Absent slots are ``None``. + """ + size = aos_shape[depth] + + if depth == ndims_aos - 1: + # Innermost AoS level: collect leaf values + row = [] + for i in range(size): + node = nodes_dict.get(prefix + (i,)) + row.append(node.value if node is not None else None) + return row + else: + return [ + _build_nested_list( + nodes_dict, ndims_aos, aos_shape, depth + 1, prefix + (i,) + ) + for i in range(size) + ] + + +# --------------------------------------------------------------------------- +# Convenience: single IDS → flat dict without specifying paths upfront +# --------------------------------------------------------------------------- + + +def ids_to_flat(ids: IDSToplevel) -> Dict[str, Any]: + """Convert a single :class:`~imas.ids_toplevel.IDSToplevel` to a flat dict. + + All filled primitive leaf nodes are auto-discovered via + :func:`~imas.backends.netcdf.iterators.indexed_tree_iter`; no path list + needs to be supplied. + + This is the ergonomic complement to :func:`wrangle` when you already have + an IDS object (e.g. read from a DBEntry) and want a flat representation:: + + with imas.DBEntry(uri, "r") as db: + cp = db.get("core_profiles") + + flat = ids_to_flat(cp) + # flat["core_profiles.time"], flat["core_profiles.profiles_1d..."], … + + Args: + ids: A filled IDSToplevel instance. + + Returns: + Dict mapping dot-separated paths to values (numpy arrays, scalars, or + :class:`awkward.Array` for ragged data). + """ + ids_name = ids.metadata.name + filled_paths = [ + ids_name + "." + node.metadata.path_string.replace("/", ".") + for _, node in indexed_tree_iter(ids) + if isinstance(node, IDSPrimitive) and node.has_value + ] + return unwrangle(filled_paths, {ids_name: ids}) diff --git a/pyproject.toml b/pyproject.toml index ed3f964..1162512 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,6 +94,9 @@ xarray = [ saxonche = [ "saxonche", ] +awkward = [ + "awkward>=2.0", +] test = [ "pytest>=5.4.1", "pytest-cov>=0.6", @@ -107,7 +110,7 @@ test = [ "pint", # Optional dependencies # TODO add imas-core when it is available on pypi - "imas-python[netcdf,h5py,xarray,saxonche]", + "imas-python[netcdf,h5py,xarray,saxonche,awkward]", ] [project.scripts]