Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions examples/external_aerodynamics/data_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
ExternalAerodynamicsNumpyDataInMemory,
ExternalAerodynamicsZarrDataInMemory,
)
from zarr.storage import LocalStore

from physicsnemo_curator.etl.data_sources import DataSource
from physicsnemo_curator.etl.processing_config import ProcessingConfig
Expand Down Expand Up @@ -231,20 +232,21 @@ def _write_zarr(
output_path: Path where the .zarr directory should be written
"""
# Create store
zarr_store = zarr.DirectoryStore(output_path)
root = zarr.group(store=zarr_store)
zarr_store = LocalStore(output_path)
root = zarr.open_group(store=zarr_store, mode="w")

# Write metadata as attributes
data.metadata.zarr_format = zarr.__version__
root.attrs.update(asdict(data.metadata))

# Write required arrays
for field in ["stl_coordinates", "stl_centers", "stl_faces", "stl_areas"]:
array_info = getattr(data, field)
root.create_dataset(
field,
root.create_array(
name=field,
data=array_info.data,
chunks=array_info.chunks,
compressor=array_info.compressor,
compressors=array_info.compressor if array_info.compressor else None,
)

# Write optional arrays if present
Expand All @@ -258,11 +260,13 @@ def _write_zarr(
]:
array_info = getattr(data, field)
if array_info is not None:
root.create_dataset(
field,
root.create_array(
name=field,
data=array_info.data,
chunks=array_info.chunks,
compressor=array_info.compressor,
compressors=(
array_info.compressor if array_info.compressor else None
),
)

def should_skip(self, filename: str) -> bool:
Expand Down
7 changes: 4 additions & 3 deletions examples/external_aerodynamics/data_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from typing import Callable, Optional

import numpy as np
import zarr
from constants import PhysicsConstants
from external_aero_geometry_data_processors import (
default_geometry_processing_for_external_aerodynamics,
Expand All @@ -30,7 +31,6 @@
from external_aero_volume_data_processors import (
default_volume_processing_for_external_aerodynamics,
)
from numcodecs import Blosc
from schemas import (
ExternalAerodynamicsExtractedDataInMemory,
ExternalAerodynamicsNumpyDataInMemory,
Expand Down Expand Up @@ -231,10 +231,11 @@ def __init__(
chunk_size_mb: float = 1.0, # Default 1MB chunk size
):
super().__init__(cfg)
self.compressor = Blosc(
# Zarr 3 codec configuration
self.compressor = zarr.codecs.BloscCodec(
cname=compression_method,
clevel=compression_level,
shuffle=Blosc.SHUFFLE,
shuffle=zarr.codecs.BloscShuffle.shuffle,
)
self.chunk_size_mb = chunk_size_mb

Expand Down
8 changes: 6 additions & 2 deletions examples/external_aerodynamics/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
from dataclasses import dataclass
from typing import Optional

import numcodecs
import numpy as np
import pyvista as pv
import vtk
import zarr
from constants import ModelType


Expand Down Expand Up @@ -53,6 +53,9 @@ class ExternalAerodynamicsMetadata:
decimation_reduction: Optional[float] = None
decimation_algo: Optional[str] = None

# Zarr format version
zarr_format: Optional[int] = None


@dataclass
class ExternalAerodynamicsExtractedDataInMemory:
Expand Down Expand Up @@ -93,11 +96,12 @@ class PreparedZarrArrayInfo:

Version history:
- 1.0: Initial version with compression and chunking info
- 2.0: Updated to use Zarr 3 codecs
"""

data: np.ndarray
chunks: tuple[int, ...]
compressor: numcodecs.abc.Codec
compressor: zarr.abc.codec


@dataclass(frozen=True)
Expand Down
31 changes: 15 additions & 16 deletions examples/structural_mechanics/crash/data_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
load_d3plot_data,
parse_k_file,
)
from numcodecs import Blosc
from schemas import CrashExtractedDataInMemory, CrashMetadata
from zarr.storage import LocalStore

from physicsnemo_curator.etl.data_sources import DataSource
from physicsnemo_curator.etl.processing_config import ProcessingConfig
Expand Down Expand Up @@ -307,8 +307,10 @@ def __init__(
self.chunk_size_mb = chunk_size_mb

# Set up compressor
self.compressor = Blosc(
cname=compression_method, clevel=compression_level, shuffle=Blosc.SHUFFLE
self.compressor = zarr.codecs.BloscCodec(
cname=compression_method,
clevel=compression_level,
shuffle=zarr.codecs.BloscShuffle.shuffle,
)

# Warn if chunk size might be problematic
Expand Down Expand Up @@ -423,7 +425,7 @@ def _write_impl_temp_file(
)

# Create Zarr store
zarr_store = zarr.DirectoryStore(output_path)
zarr_store = LocalStore(output_path)
root = zarr.group(store=zarr_store)

# Write metadata as root attributes
Expand All @@ -447,30 +449,27 @@ def _write_impl_temp_file(
edges_chunks = self._calculate_chunks(edges_array)

# Write temporal position data
root.create_dataset(
"mesh_pos",
root.create_array(
name="mesh_pos",
data=mesh_pos_data,
chunks=mesh_pos_chunks,
compressor=self.compressor,
dtype=np.float32,
compressors=(self.compressor,),
)

# Write node thickness (static per node)
root.create_dataset(
"thickness",
root.create_array(
name="thickness",
data=thickness_data,
chunks=thickness_chunks,
compressor=self.compressor,
dtype=np.float32,
compressors=(self.compressor,),
)

# Write edges connectivity
root.create_dataset(
"edges",
root.create_array(
name="edges",
data=edges_array,
chunks=edges_chunks,
compressor=self.compressor,
dtype=np.int64,
compressors=(self.compressor,),
)

# Add some statistics as metadata
Expand Down
12 changes: 6 additions & 6 deletions examples/tutorials/etl_hdf5_to_zarr/h5_to_zarr_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from typing import Any, Dict

import numpy as np
from numcodecs import Blosc
import zarr

from physicsnemo_curator.etl.data_transformations import DataTransformation
from physicsnemo_curator.etl.processing_config import ProcessingConfig
Expand All @@ -40,11 +40,11 @@ def __init__(
self.chunk_size = chunk_size
self.compression_level = compression_level

# Set up compression
self.compressor = Blosc(
cname="zstd", # zstd compression algorithm
clevel=compression_level,
shuffle=Blosc.SHUFFLE,
# Set up Zarr 3 compression codec
self.compressor = zarr.codecs.BloscCodec(
cname="zstd",
clevel=self.compression_level,
shuffle=zarr.codecs.BloscShuffle.shuffle,
)

def transform(self, data: Dict[str, Any]) -> Dict[str, Any]:
Expand Down
Loading