Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions docs/source/netcdf.rst
Original file line number Diff line number Diff line change
Expand Up @@ -183,3 +183,39 @@ specific paths inside the IDS. The latter variant can also be combined with
# profiles_1d.grid.rho_tor
# profiles_1d.grid.rho_tor_norm
# profiles_1d.grid.psi


Store Xarray Datasets in IMAS-compatible netCDF file
''''''''''''''''''''''''''''''''''''''''''''''''''''

.. versionadded:: 2.3.0 :py:meth:`~imas.util.to_xarray` now includes the required
metadata to load the IDS from a netCDF file.

The following snippet shows how to store an IMAS Xarray dataset in an IMAS-compatible
netCDF file. The group name in the netCDF file must correspond to ``<IDS
Name>/<occurrence>`` (``core_profiles/0`` in the snippet).

.. code-block:: python
:caption: Store IMAS Xarray dataset in an IMAS-compatible netCDF file

import imas.training
import netCDF4

with imas.training.get_training_db_entry() as training_entry:
core_profiles = training_entry.get("core_profiles")
xrds = imas.util.to_xarray(core_profiles)

# Store the xarray dataset in an IMAS-compatible netCDF dataset
filename = "data.nc"
xrds.to_netcdf(
filename,
group="core_profiles/0", # Update to the correct IDS name and occurrence
# auto_complex=True, # Uncomment if the dataset contains complex data
)
# Set global DD version metadata
with netCDF4.Dataset(filename, "a") as ds:
ds.data_dictionary_version = imas.util.get_data_dictionary_version(ids)

# Test that we can get the IDS from the netCDF file
with imas.DBEntry(filename, "r") as entry:
ids2 = entry.get("core_profiles")
33 changes: 20 additions & 13 deletions imas/_to_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from imas.ids_data_type import IDSDataType

fillvals = {
IDSDataType.INT: -(2**31) + 1,
IDSDataType.INT: numpy.int32(-(2**31) + 1),
IDSDataType.STR: "",
IDSDataType.FLT: numpy.nan,
IDSDataType.CPX: numpy.nan * (1 + 1j),
Expand Down Expand Up @@ -50,21 +50,28 @@ def to_xarray(ids: IDSToplevel, *paths: str) -> xarray.Dataset:
var_name = path.replace("/", ".")
metadata = ids.metadata[path]
if metadata.data_type in (IDSDataType.STRUCTURE, IDSDataType.STRUCT_ARRAY):
continue # We don't store these in xarray

dimensions = tensorizer.ncmeta.get_dimensions(path, tensorizer.homogeneous_time)
data = tensorizer.tensorize(path, fillvals[metadata.data_type])

attrs = dict(documentation=metadata.documentation)
if metadata.units:
attrs["units"] = metadata.units
coordinates = tensorizer.filter_coordinates(path)
if coordinates:
coordinate_names.update(coordinates.split(" "))
attrs["coordinates"] = coordinates
# Metadata variables for (arrays of) structures
if paths and path not in paths:
continue
dimensions = ()
data = b""
else:
dimensions = tensorizer.get_dimensions(path)
data = tensorizer.tensorize(path, fillvals[metadata.data_type])

attrs = tensorizer.get_attributes(path, fillvals)
if "coordinates" in attrs:
coordinate_names.update(attrs["coordinates"].split(" "))
data_vars[var_name] = (dimensions, data, attrs)

# :shape array for sparse data
if path in tensorizer.shapes and metadata.ndim:
shape_name = f"{var_name}:shape"
dimensions = tensorizer.get_shape_dimensions(path)
data = tensorizer.shapes[path]
attrs = tensorizer.get_shape_attributes(var_name)
data_vars[shape_name] = (dimensions, data, attrs)

# Remove coordinates from data_vars and put in coordinates mapping:
coordinates = {}
for coordinate_name in coordinate_names:
Expand Down
58 changes: 12 additions & 46 deletions imas/backends/netcdf/ids2nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ def create_dimensions(self) -> None:

def create_variables(self) -> None:
"""Create netCDF variables."""
get_dimensions = self.ncmeta.get_dimensions
for path in self.filled_data:
metadata = self.ids.metadata[path]
var_name = path.replace("/", ".")
Expand Down Expand Up @@ -75,54 +74,21 @@ def create_variables(self) -> None:
if dtype is not dtypes[IDSDataType.CPX]: # Set fillvalue
kwargs.update(fill_value=default_fillvals[metadata.data_type])
# Create variable
dimensions = get_dimensions(path, self.homogeneous_time)
dimensions = self.get_dimensions(path)
var = self.group.createVariable(var_name, dtype, dimensions, **kwargs)

# Fill metadata attributes
var.documentation = metadata.documentation
if metadata.units:
var.units = metadata.units

ancillary_variables = " ".join(
error_var
for error_var in [f"{var_name}_error_upper", f"{var_name}_error_lower"]
if error_var in self.filled_variables
)
if ancillary_variables:
var.ancillary_variables = ancillary_variables

if metadata.data_type is not IDSDataType.STRUCT_ARRAY:
coordinates = self.filter_coordinates(path)
if coordinates:
var.coordinates = coordinates

# Sparsity and :shape array
if path in self.shapes:
if not metadata.ndim:
# Doesn't need a :shape array:
var.sparse = "Sparse data, missing data is filled with _FillValue"
var.sparse += f" ({default_fillvals[metadata.data_type]})"

else:
shape_name = f"{var_name}:shape"
var.sparse = f"Sparse data, data shapes are stored in {shape_name}"

# Create variable to store data shape
dimensions = get_dimensions(
self.ncmeta.aos.get(path), self.homogeneous_time
) + (f"{metadata.ndim}D",)
shape_var = self.group.createVariable(
shape_name,
SHAPE_DTYPE,
dimensions,
)
doc_indices = ",".join(chr(ord("i") + i) for i in range(3))
shape_var.documentation = (
f"Shape information for {var_name}.\n"
f"{shape_name}[{doc_indices},:] describes the shape of filled "
f"data of {var_name}[{doc_indices},...]. Data outside this "
"shape is unset (i.e. filled with _Fillvalue)."
)
var.setncatts(self.get_attributes(path, default_fillvals))

# :shape array for sparse data
if path in self.shapes and metadata.ndim:
shape_name = f"{var_name}:shape"
# Create variable to store data shape
dimensions = self.get_shape_dimensions(path)
shape_var = self.group.createVariable(
shape_name, SHAPE_DTYPE, dimensions
)
shape_var.setncatts(self.get_shape_attributes(var_name))

def store_data(self) -> None:
"""Store data in the netCDF variables"""
Expand Down
79 changes: 69 additions & 10 deletions imas/backends/netcdf/ids_tensorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""Tensorization logic to convert IDSs to netCDF files and/or xarray Datasets."""

from collections import deque
from typing import List
from typing import List, Tuple, Dict

import numpy

Expand Down Expand Up @@ -47,13 +47,26 @@ def __init__(self, ids: IDSToplevel, paths_to_tensorize: List[str]) -> None:
"""Map of IDS paths to filled data nodes."""
self.filled_variables = set()
"""Set of filled IDS variables"""
self.homogeneous_time = (
self.homogeneous_time = bool(
ids.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS
)
"""True iff the IDS time mode is homogeneous."""
self.shapes = {}
"""Map of IDS paths to data shape arrays."""

def get_dimensions(self, path: str) -> Tuple[str, ...]:
"""Get the dimensions for a netCDF variable.

Args:
path: Data Dictionary path to the variable, e.g. ``ids_properties/comment``.
"""
return self.ncmeta.get_dimensions(path, self.homogeneous_time)

def get_shape_dimensions(self, path: str) -> Tuple[str, ...]:
"""Get dimensions names for shape array of the tensorized variable"""
ndim = self.ids.metadata[path].ndim
return self.get_dimensions(self.ncmeta.aos.get(path, "")) + (f"{ndim}D",)

def include_coordinate_paths(self) -> None:
"""Append all paths that are coordinates of self.paths_to_tensorize"""
# Use a queue so we can also take coordinates of coordinates into account
Expand All @@ -62,7 +75,7 @@ def include_coordinate_paths(self) -> None:
for path in self.paths_to_tensorize:
while path:
path, _, _ = path.rpartition("/")
if self.ncmeta.get_dimensions(path, self.homogeneous_time):
if self.get_dimensions(path):
queue.append(path)

self.paths_to_tensorize = []
Expand All @@ -82,7 +95,6 @@ def collect_filled_data(self) -> None:
# Initialize dictionary with all paths that could exist in this IDS
filled_data = {path: {} for path in self.ncmeta.paths}
dimension_size = {}
get_dimensions = self.ncmeta.get_dimensions

if self.paths_to_tensorize:
# Restrict tensorization to provided paths
Expand All @@ -102,7 +114,7 @@ def collect_filled_data(self) -> None:
ndim = node.metadata.ndim
if not ndim:
continue
dimensions = get_dimensions(path, self.homogeneous_time)
dimensions = self.get_dimensions(path)
# We're only interested in the non-tensorized dimensions: [-ndim:]
for dim_name, size in zip(dimensions[-ndim:], node.shape):
dimension_size[dim_name] = max(dimension_size.get(dim_name, 0), size)
Expand All @@ -115,15 +127,13 @@ def collect_filled_data(self) -> None:

def determine_data_shapes(self) -> None:
"""Determine tensorized data shapes and sparsity, save in :attr:`shapes`."""
get_dimensions = self.ncmeta.get_dimensions

for path, nodes_dict in self.filled_data.items():
metadata = self.ids.metadata[path]
# Structures don't have a size
if metadata.data_type is IDSDataType.STRUCTURE:
continue
ndim = metadata.ndim
dimensions = get_dimensions(path, self.homogeneous_time)
dimensions = self.get_dimensions(path)

# node shape if it is completely filled
full_shape = tuple(self.dimension_size[dim] for dim in dimensions[-ndim:])
Expand All @@ -137,7 +147,7 @@ def determine_data_shapes(self) -> None:

else:
# Data is tensorized, determine if it is homogeneously shaped
aos_dims = get_dimensions(self.ncmeta.aos[path], self.homogeneous_time)
aos_dims = self.get_dimensions(self.ncmeta.aos[path])
shapes_shape = [self.dimension_size[dim] for dim in aos_dims]
if ndim:
shapes_shape.append(ndim)
Expand Down Expand Up @@ -168,6 +178,55 @@ def filter_coordinates(self, path: str) -> str:
if coordinate in self.filled_variables
)

def get_attributes(self, path: str, fillvals: dict) -> Dict[str, str]:
"""Get metadata attributes of the tensorized variable"""
metadata = self.ids.metadata[path]
var_name = path.replace("/", ".")

assert metadata.documentation is not None
attrs = {"documentation": metadata.documentation}
if metadata.units:
attrs["units"] = metadata.units

ancillary_variables = " ".join(
error_var
for error_var in [f"{var_name}_error_upper", f"{var_name}_error_lower"]
if error_var in self.filled_variables
)
if ancillary_variables:
attrs["ancillary_variables"] = ancillary_variables

if metadata.data_type is not IDSDataType.STRUCT_ARRAY:
coordinates = self.filter_coordinates(path)
if coordinates:
attrs["coordinates"] = coordinates

# Sparsity
if path in self.shapes:
if not metadata.ndim:
# Doesn't need a :shape array
attrs["sparse"] = (
"Sparse data, missing data is filled with _FillValue"
f" ({fillvals[metadata.data_type]})"
)
else:
attrs["sparse"] = (
f"Sparse data, data shapes are stored in {var_name}:shape"
)

return attrs

def get_shape_attributes(self, var_name: str) -> Dict[str, str]:
"""Get attributes of the :shape variable corresponding to var_name"""
doc_indices = ",".join(chr(ord("i") + i) for i in range(3))
documentation = (
f"Shape information for {var_name}.\n"
f"{var_name}:shape[{doc_indices},:] describes the shape of filled "
f"data of {var_name}[{doc_indices},...]. Data outside this "
"shape is unset (i.e. filled with _Fillvalue)."
)
return {"documentation": documentation}

def tensorize(self, path, fillvalue):
"""
Tensorizes the data at the given path with the specified fill value.
Expand All @@ -180,7 +239,7 @@ def tensorize(self, path, fillvalue):
Returns:
A tensor filled with the data from the specified path.
"""
dimensions = self.ncmeta.get_dimensions(path, self.homogeneous_time)
dimensions = self.get_dimensions(path)
shape = tuple(self.dimension_size[dim] for dim in dimensions)

# TODO: depending on the data, tmp_var may be HUGE, we may need a more
Expand Down
27 changes: 13 additions & 14 deletions imas/backends/netcdf/nc2ids.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import os
from typing import Optional
from typing import Optional, Tuple

import netCDF4
import numpy as np
Expand Down Expand Up @@ -80,6 +80,14 @@ def __init__(
)
self.homogeneous_time = var[()] == IDS_TIME_MODE_HOMOGENEOUS

def get_dimensions(self, path: str) -> Tuple[str, ...]:
"""Get the dimensions for a netCDF variable.

Args:
path: Data Dictionary path to the variable, e.g. ``ids_properties/comment``.
"""
return self.ncmeta.get_dimensions(path, self.homogeneous_time)

def run(self, lazy: bool) -> None:
"""Load the data from the netCDF group into the IDS."""
self.variables.sort()
Expand Down Expand Up @@ -130,9 +138,7 @@ def run(self, lazy: bool) -> None:

else:
# FIXME: extract dimension name from nc file?
dim = self.ncmeta.get_dimensions(
metadata.path_string, self.homogeneous_time
)[-1]
dim = self.get_dimensions(metadata.path_string)[-1]
size = self.group.dimensions[dim].size
for _, node in indexed_tree_iter(self.ids, target_metadata):
node.resize(size)
Expand Down Expand Up @@ -235,9 +241,7 @@ def _validate_variable(self, var: netCDF4.Variable, metadata: IDSMetadata) -> No
raise variable_error(var, "data type", var.dtype, expected_dtype)

# Dimensions
expected_dims = self.ncmeta.get_dimensions(
metadata.path_string, self.homogeneous_time
)
expected_dims = self.get_dimensions(metadata.path_string)
if var.dimensions != expected_dims:
raise variable_error(var, "dimensions", var.dimensions, expected_dims)

Expand Down Expand Up @@ -298,9 +302,7 @@ def _validate_sparsity(
return # Sparsity is stored with _Fillvalue, nothing to validate

# Dimensions
aos_dimensions = self.ncmeta.get_dimensions(
self.ncmeta.aos.get(metadata.path_string), self.homogeneous_time
)
aos_dimensions = self.get_dimensions(self.ncmeta.aos.get(metadata.path_string))
shape_dimensions = shape_var.dimensions
if (
len(shape_dimensions) != len(aos_dimensions) + 1
Expand Down Expand Up @@ -331,7 +333,6 @@ def get_child(self, child):

Args:
child: The child IDS node which should be lazy loaded.

"""
metadata = child.metadata
path = metadata.path_string
Expand All @@ -347,9 +348,7 @@ def get_child(self, child):
size = nc2ids.group[var.name + ":shape"][self.index][0]
else:
# FIXME: extract dimension name from nc file?
dim = nc2ids.ncmeta.get_dimensions(
metadata.path_string, nc2ids.homogeneous_time
)[-1]
dim = nc2ids.get_dimensions(metadata.path_string)[-1]
size = nc2ids.group.dimensions[dim].size

child._set_lazy_context(LazyArrayStructContext(nc2ids, self.index, size))
Expand Down
Loading
Loading