iterorganization · olivhoenen · Apr 2, 2026 · Mar 23, 2026 · Mar 23, 2026 · Mar 27, 2026
@@ -183,3 +183,39 @@ specific paths inside the IDS. The latter variant can also be combined with
     #   profiles_1d.grid.rho_tor
     #   profiles_1d.grid.rho_tor_norm
     #   profiles_1d.grid.psi
+
+
+Store Xarray Datasets in IMAS-compatible netCDF file
+''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+.. versionadded:: 2.3.0 :py:meth:`~imas.util.to_xarray` now includes the required
+    metadata to load the IDS from a netCDF file.
+
+The following snippet shows how to store an IMAS Xarray dataset in an IMAS-compatible
+netCDF file. The group name in the netCDF file must correspond to ``<IDS
+Name>/<occurrence>`` (``core_profiles/0`` in the snippet).
+
+.. code-block:: python
+    :caption: Store IMAS Xarray dataset in an IMAS-compatible netCDF file
+
+    import imas.training
+    import netCDF4
+
+    with imas.training.get_training_db_entry() as training_entry:
+        core_profiles = training_entry.get("core_profiles")
+        xrds = imas.util.to_xarray(core_profiles)
+
+    # Store the xarray dataset in an IMAS-compatible netCDF dataset
+    filename = "data.nc"
+    xrds.to_netcdf(
+        filename,
+        group="core_profiles/0",  # Update to the correct IDS name and occurrence
+        # auto_complex=True,      # Uncomment if the dataset contains complex data
+    )
+    # Set global DD version metadata
+    with netCDF4.Dataset(filename, "a") as ds: 
+        ds.data_dictionary_version = imas.util.get_data_dictionary_version(ids)
+
+    # Test that we can get the IDS from the netCDF file
+    with imas.DBEntry(filename, "r") as entry:
+        ids2 = entry.get("core_profiles")
@@ -8,7 +8,7 @@
 from imas.ids_data_type import IDSDataType
 
 fillvals = {
-    IDSDataType.INT: -(2**31) + 1,
+    IDSDataType.INT: numpy.int32(-(2**31) + 1),
     IDSDataType.STR: "",
     IDSDataType.FLT: numpy.nan,
     IDSDataType.CPX: numpy.nan * (1 + 1j),
@@ -50,21 +50,28 @@ def to_xarray(ids: IDSToplevel, *paths: str) -> xarray.Dataset:
         var_name = path.replace("/", ".")
         metadata = ids.metadata[path]
         if metadata.data_type in (IDSDataType.STRUCTURE, IDSDataType.STRUCT_ARRAY):
-            continue  # We don't store these in xarray
-
-        dimensions = tensorizer.ncmeta.get_dimensions(path, tensorizer.homogeneous_time)
-        data = tensorizer.tensorize(path, fillvals[metadata.data_type])
-
-        attrs = dict(documentation=metadata.documentation)
-        if metadata.units:
-            attrs["units"] = metadata.units
-        coordinates = tensorizer.filter_coordinates(path)
-        if coordinates:
-            coordinate_names.update(coordinates.split(" "))
-            attrs["coordinates"] = coordinates
+            # Metadata variables for (arrays of) structures
+            if paths and path not in paths:
+                continue
+            dimensions = ()
+            data = b""
+        else:
+            dimensions = tensorizer.get_dimensions(path)
+            data = tensorizer.tensorize(path, fillvals[metadata.data_type])
 
+        attrs = tensorizer.get_attributes(path, fillvals)
+        if "coordinates" in attrs:
+            coordinate_names.update(attrs["coordinates"].split(" "))
         data_vars[var_name] = (dimensions, data, attrs)
 
+        # :shape array for sparse data
+        if path in tensorizer.shapes and metadata.ndim:
+            shape_name = f"{var_name}:shape"
+            dimensions = tensorizer.get_shape_dimensions(path)
+            data = tensorizer.shapes[path]
+            attrs = tensorizer.get_shape_attributes(var_name)
+            data_vars[shape_name] = (dimensions, data, attrs)
+
     # Remove coordinates from data_vars and put in coordinates mapping:
     coordinates = {}
     for coordinate_name in coordinate_names:

@@ -47,7 +47,6 @@ def create_dimensions(self) -> None:
 
     def create_variables(self) -> None:
         """Create netCDF variables."""
-        get_dimensions = self.ncmeta.get_dimensions
         for path in self.filled_data:
             metadata = self.ids.metadata[path]
             var_name = path.replace("/", ".")
@@ -75,54 +74,21 @@ def create_variables(self) -> None:
                 if dtype is not dtypes[IDSDataType.CPX]:  # Set fillvalue
                     kwargs.update(fill_value=default_fillvals[metadata.data_type])
                 # Create variable
-                dimensions = get_dimensions(path, self.homogeneous_time)
+                dimensions = self.get_dimensions(path)
                 var = self.group.createVariable(var_name, dtype, dimensions, **kwargs)
 
             # Fill metadata attributes
-            var.documentation = metadata.documentation
-            if metadata.units:
-                var.units = metadata.units
-
-            ancillary_variables = " ".join(
-                error_var
-                for error_var in [f"{var_name}_error_upper", f"{var_name}_error_lower"]
-                if error_var in self.filled_variables
-            )
-            if ancillary_variables:
-                var.ancillary_variables = ancillary_variables
-
-            if metadata.data_type is not IDSDataType.STRUCT_ARRAY:
-                coordinates = self.filter_coordinates(path)
-                if coordinates:
-                    var.coordinates = coordinates
-
-            # Sparsity and :shape array
-            if path in self.shapes:
-                if not metadata.ndim:
-                    # Doesn't need a :shape array:
-                    var.sparse = "Sparse data, missing data is filled with _FillValue"
-                    var.sparse += f" ({default_fillvals[metadata.data_type]})"
-
-                else:
-                    shape_name = f"{var_name}:shape"
-                    var.sparse = f"Sparse data, data shapes are stored in {shape_name}"
-
-                    # Create variable to store data shape
-                    dimensions = get_dimensions(
-                        self.ncmeta.aos.get(path), self.homogeneous_time
-                    ) + (f"{metadata.ndim}D",)
-                    shape_var = self.group.createVariable(
-                        shape_name,
-                        SHAPE_DTYPE,
-                        dimensions,
-                    )
-                    doc_indices = ",".join(chr(ord("i") + i) for i in range(3))
-                    shape_var.documentation = (
-                        f"Shape information for {var_name}.\n"
-                        f"{shape_name}[{doc_indices},:] describes the shape of filled "
-                        f"data of {var_name}[{doc_indices},...]. Data outside this "
-                        "shape is unset (i.e. filled with _Fillvalue)."
-                    )
+            var.setncatts(self.get_attributes(path, default_fillvals))
+
+            # :shape array for sparse data
+            if path in self.shapes and metadata.ndim:
+                shape_name = f"{var_name}:shape"
+                # Create variable to store data shape
+                dimensions = self.get_shape_dimensions(path)
+                shape_var = self.group.createVariable(
+                    shape_name, SHAPE_DTYPE, dimensions
+                )
+                shape_var.setncatts(self.get_shape_attributes(var_name))
 
     def store_data(self) -> None:
         """Store data in the netCDF variables"""

@@ -3,7 +3,7 @@
 """Tensorization logic to convert IDSs to netCDF files and/or xarray Datasets."""
 
 from collections import deque
-from typing import List
+from typing import List, Tuple, Dict
 
 import numpy
 
@@ -47,13 +47,26 @@ def __init__(self, ids: IDSToplevel, paths_to_tensorize: List[str]) -> None:
         """Map of IDS paths to filled data nodes."""
         self.filled_variables = set()
         """Set of filled IDS variables"""
-        self.homogeneous_time = (
+        self.homogeneous_time = bool(
             ids.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS
         )
         """True iff the IDS time mode is homogeneous."""
         self.shapes = {}
         """Map of IDS paths to data shape arrays."""
 
+    def get_dimensions(self, path: str) -> Tuple[str, ...]:
+        """Get the dimensions for a netCDF variable.
+
+        Args:
+            path: Data Dictionary path to the variable, e.g. ``ids_properties/comment``.
+        """
+        return self.ncmeta.get_dimensions(path, self.homogeneous_time)
+
+    def get_shape_dimensions(self, path: str) -> Tuple[str, ...]:
+        """Get dimensions names for shape array of the tensorized variable"""
+        ndim = self.ids.metadata[path].ndim
+        return self.get_dimensions(self.ncmeta.aos.get(path, "")) + (f"{ndim}D",)
+
     def include_coordinate_paths(self) -> None:
         """Append all paths that are coordinates of self.paths_to_tensorize"""
         # Use a queue so we can also take coordinates of coordinates into account
@@ -62,7 +75,7 @@ def include_coordinate_paths(self) -> None:
         for path in self.paths_to_tensorize:
             while path:
                 path, _, _ = path.rpartition("/")
-                if self.ncmeta.get_dimensions(path, self.homogeneous_time):
+                if self.get_dimensions(path):
                     queue.append(path)
 
         self.paths_to_tensorize = []
@@ -82,7 +95,6 @@ def collect_filled_data(self) -> None:
         # Initialize dictionary with all paths that could exist in this IDS
         filled_data = {path: {} for path in self.ncmeta.paths}
         dimension_size = {}
-        get_dimensions = self.ncmeta.get_dimensions
 
         if self.paths_to_tensorize:
             # Restrict tensorization to provided paths
@@ -102,7 +114,7 @@ def collect_filled_data(self) -> None:
             ndim = node.metadata.ndim
             if not ndim:
                 continue
-            dimensions = get_dimensions(path, self.homogeneous_time)
+            dimensions = self.get_dimensions(path)
             # We're only interested in the non-tensorized dimensions: [-ndim:]
             for dim_name, size in zip(dimensions[-ndim:], node.shape):
                 dimension_size[dim_name] = max(dimension_size.get(dim_name, 0), size)
@@ -115,15 +127,13 @@ def collect_filled_data(self) -> None:
 
     def determine_data_shapes(self) -> None:
         """Determine tensorized data shapes and sparsity, save in :attr:`shapes`."""
-        get_dimensions = self.ncmeta.get_dimensions
-
         for path, nodes_dict in self.filled_data.items():
             metadata = self.ids.metadata[path]
             # Structures don't have a size
             if metadata.data_type is IDSDataType.STRUCTURE:
                 continue
             ndim = metadata.ndim
-            dimensions = get_dimensions(path, self.homogeneous_time)
+            dimensions = self.get_dimensions(path)
 
             # node shape if it is completely filled
             full_shape = tuple(self.dimension_size[dim] for dim in dimensions[-ndim:])
@@ -137,7 +147,7 @@ def determine_data_shapes(self) -> None:
 
             else:
                 # Data is tensorized, determine if it is homogeneously shaped
-                aos_dims = get_dimensions(self.ncmeta.aos[path], self.homogeneous_time)
+                aos_dims = self.get_dimensions(self.ncmeta.aos[path])
                 shapes_shape = [self.dimension_size[dim] for dim in aos_dims]
                 if ndim:
                     shapes_shape.append(ndim)
@@ -168,6 +178,55 @@ def filter_coordinates(self, path: str) -> str:
             if coordinate in self.filled_variables
         )
 
+    def get_attributes(self, path: str, fillvals: dict) -> Dict[str, str]:
+        """Get metadata attributes of the tensorized variable"""
+        metadata = self.ids.metadata[path]
+        var_name = path.replace("/", ".")
+
+        assert metadata.documentation is not None
+        attrs = {"documentation": metadata.documentation}
+        if metadata.units:
+            attrs["units"] = metadata.units
+
+        ancillary_variables = " ".join(
+            error_var
+            for error_var in [f"{var_name}_error_upper", f"{var_name}_error_lower"]
+            if error_var in self.filled_variables
+        )
+        if ancillary_variables:
+            attrs["ancillary_variables"] = ancillary_variables
+
+        if metadata.data_type is not IDSDataType.STRUCT_ARRAY:
+            coordinates = self.filter_coordinates(path)
+            if coordinates:
+                attrs["coordinates"] = coordinates
+
+        # Sparsity
+        if path in self.shapes:
+            if not metadata.ndim:
+                # Doesn't need a :shape array
+                attrs["sparse"] = (
+                    "Sparse data, missing data is filled with _FillValue"
+                    f" ({fillvals[metadata.data_type]})"
+                )
+            else:
+                attrs["sparse"] = (
+                    f"Sparse data, data shapes are stored in {var_name}:shape"
+                )
+
+        return attrs
+
+    def get_shape_attributes(self, var_name: str) -> Dict[str, str]:
+        """Get attributes of the :shape variable corresponding to var_name"""
+        doc_indices = ",".join(chr(ord("i") + i) for i in range(3))
+        documentation = (
+            f"Shape information for {var_name}.\n"
+            f"{var_name}:shape[{doc_indices},:] describes the shape of filled "
+            f"data of {var_name}[{doc_indices},...]. Data outside this "
+            "shape is unset (i.e. filled with _Fillvalue)."
+        )
+        return {"documentation": documentation}
+
     def tensorize(self, path, fillvalue):
         """
         Tensorizes the data at the given path with the specified fill value.
@@ -180,7 +239,7 @@ def tensorize(self, path, fillvalue):
         Returns:
             A tensor filled with the data from the specified path.
         """
-        dimensions = self.ncmeta.get_dimensions(path, self.homogeneous_time)
+        dimensions = self.get_dimensions(path)
         shape = tuple(self.dimension_size[dim] for dim in dimensions)
 
         # TODO: depending on the data, tmp_var may be HUGE, we may need a more

@@ -1,6 +1,6 @@
 import logging
 import os
-from typing import Optional
+from typing import Optional, Tuple
 
 import netCDF4
 import numpy as np
@@ -80,6 +80,14 @@ def __init__(
             )
         self.homogeneous_time = var[()] == IDS_TIME_MODE_HOMOGENEOUS
 
+    def get_dimensions(self, path: str) -> Tuple[str, ...]:
+        """Get the dimensions for a netCDF variable.
+
+        Args:
+            path: Data Dictionary path to the variable, e.g. ``ids_properties/comment``.
+        """
+        return self.ncmeta.get_dimensions(path, self.homogeneous_time)
+
     def run(self, lazy: bool) -> None:
         """Load the data from the netCDF group into the IDS."""
         self.variables.sort()
@@ -130,9 +138,7 @@ def run(self, lazy: bool) -> None:
 
                 else:
                     # FIXME: extract dimension name from nc file?
-                    dim = self.ncmeta.get_dimensions(
-                        metadata.path_string, self.homogeneous_time
-                    )[-1]
+                    dim = self.get_dimensions(metadata.path_string)[-1]
                     size = self.group.dimensions[dim].size
                     for _, node in indexed_tree_iter(self.ids, target_metadata):
                         node.resize(size)
@@ -235,9 +241,7 @@ def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> No
                 raise variable_error(var, "data type", var.dtype, expected_dtype)
 
             # Dimensions
-            expected_dims = self.ncmeta.get_dimensions(
-                metadata.path_string, self.homogeneous_time
-            )
+            expected_dims = self.get_dimensions(metadata.path_string)
             if var.dimensions != expected_dims:
                 raise variable_error(var, "dimensions", var.dimensions, expected_dims)
 
@@ -298,9 +302,7 @@ def _validate_sparsity(
             return  # Sparsity is stored with _Fillvalue, nothing to validate
 
         # Dimensions
-        aos_dimensions = self.ncmeta.get_dimensions(
-            self.ncmeta.aos.get(metadata.path_string), self.homogeneous_time
-        )
+        aos_dimensions = self.get_dimensions(self.ncmeta.aos.get(metadata.path_string))
         shape_dimensions = shape_var.dimensions
         if (
             len(shape_dimensions) != len(aos_dimensions) + 1
@@ -331,7 +333,6 @@ def get_child(self, child):
 
         Args:
             child: The child IDS node which should be lazy loaded.
-
         """
         metadata = child.metadata
         path = metadata.path_string
@@ -347,9 +348,7 @@ def get_child(self, child):
                 size = nc2ids.group[var.name + ":shape"][self.index][0]
             else:
                 # FIXME: extract dimension name from nc file?
-                dim = nc2ids.ncmeta.get_dimensions(
-                    metadata.path_string, nc2ids.homogeneous_time
-                )[-1]
+                dim = nc2ids.get_dimensions(metadata.path_string)[-1]
                 size = nc2ids.group.dimensions[dim].size
 
             child._set_lazy_context(LazyArrayStructContext(nc2ids, self.index, size))