From 6fca4fdb018937ab56bc46ee032c05be31bc2385 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 13 Feb 2026 11:30:55 +0100 Subject: [PATCH 1/3] NetCDF implementation for DBEntry.list_filled_paths --- imas/backends/db_entry_impl.py | 8 +++ imas/backends/imas_core/db_entry_al.py | 3 + imas/backends/netcdf/db_entry_nc.py | 29 ++++++-- imas/db_entry.py | 61 +++++++++++++++- imas/test/test_list_filled_paths.py | 98 ++++++++++++++++++++++++++ 5 files changed, 190 insertions(+), 9 deletions(-) create mode 100644 imas/test/test_list_filled_paths.py diff --git a/imas/backends/db_entry_impl.py b/imas/backends/db_entry_impl.py index 0c1b2cd..f6ed4d6 100644 --- a/imas/backends/db_entry_impl.py +++ b/imas/backends/db_entry_impl.py @@ -120,3 +120,11 @@ def delete_data(self, ids_name: str, occurrence: int) -> None: @abstractmethod def list_all_occurrences(self, ids_name: str) -> List[int]: """Implement DBEntry.list_all_occurrences()""" + + @abstractmethod + def list_filled_paths(self, ids_name: str, occurrence: int) -> List[str]: + """Implement DBEntry.list_filled_paths(). + + N.B. DD conversion is handled in DBEntry.list_filled_paths(), this method + returns the data paths as stored on-disk. + """ diff --git a/imas/backends/imas_core/db_entry_al.py b/imas/backends/imas_core/db_entry_al.py index fc58270..dd4c433 100644 --- a/imas/backends/imas_core/db_entry_al.py +++ b/imas/backends/imas_core/db_entry_al.py @@ -364,6 +364,9 @@ def list_all_occurrences(self, ids_name: str) -> List[int]: ) from None return occurrence_list + def list_filled_paths(self, ids_name: str, occurrence: int) -> List[str]: + raise NotImplementedError() + def _check_uda_warnings(self, lazy: bool) -> None: """Various checks / warnings for the UDA backend.""" cache_mode = self._querydict.get("cache_mode") diff --git a/imas/backends/netcdf/db_entry_nc.py b/imas/backends/netcdf/db_entry_nc.py index 0776c47..e9eae9a 100644 --- a/imas/backends/netcdf/db_entry_nc.py +++ b/imas/backends/netcdf/db_entry_nc.py @@ -92,6 +92,14 @@ def close(self, *, erase: bool = False) -> None: ) self._dataset.close() + def _get_group(self, ids_name: str, occurrence: int) -> "netCDF4.Group": + try: + return self._dataset[f"{ids_name}/{occurrence}"] + except LookupError as exc: + raise DataEntryException( + f"IDS {ids_name!r}, occurrence {occurrence} is not found." + ) from exc + def get( self, ids_name: str, @@ -110,12 +118,7 @@ def get( raise NotImplementedError(f"`{func}` is not available for netCDF files.") # Check if the IDS/occurrence exists, and obtain the group it is stored in - try: - group = self._dataset[f"{ids_name}/{occurrence}"] - except KeyError: - raise DataEntryException( - f"IDS {ids_name!r}, occurrence {occurrence} is not found." - ) + group = self._get_group(ids_name, occurrence) # Load data into the destination IDS if self._ds_factory.dd_version == destination._dd_version: @@ -183,3 +186,17 @@ def list_all_occurrences(self, ids_name: str) -> List[int]: occurrence_list.sort() return occurrence_list + + def list_filled_paths(self, ids_name: str, occurrence: int) -> List[str]: + # Check if the IDS/occurrence exists, and obtain the group it is stored in + group = self._get_group(ids_name, occurrence) + + result = [] + for name, variable in group.variables.items(): + if variable.ndim == 0 and variable.dtype == "S1": + continue # (Array of) Structure metadata node, no data + if name.endswith(":shape"): + continue # Shape data, not a DD path + result.append(name.replace(".", "/")) + + return result diff --git a/imas/db_entry.py b/imas/db_entry.py index 5a47064..3eb77c3 100644 --- a/imas/db_entry.py +++ b/imas/db_entry.py @@ -7,7 +7,7 @@ import logging import os import pathlib -from typing import Any, Type, overload +from typing import Any, Type, overload, List import numpy @@ -197,14 +197,14 @@ def _select_implementation(uri: str | None) -> Type[DBEntryImpl]: from imas.backends.imas_core.db_entry_al import ALDBEntryImpl as impl return impl - def __enter__(self): + def __enter__(self) -> "DBEntry": # Context manager protocol if self._dbe_impl is None: # Open if the DBEntry was not already opened or created self.open() return self - def __exit__(self, exc_type, exc_value, traceback): + def __exit__(self, exc_type, exc_value, traceback) -> None: # Context manager protocol self.close() @@ -800,3 +800,58 @@ def list_all_occurrences(self, ids_name, node_path=None): self.get(ids_name, occ, lazy=True)[node_path] for occ in occurrence_list ] return occurrence_list, node_content_list + + def list_filled_paths( + self, ids_name, occurrence: int = 0, *, autoconvert: bool = True + ) -> List[str]: + """Get a list of filled Data Dictionary paths from the backend. + + Note that this is only supported by some backends (HDF5 and netCDF), and will + result in an error on unsupported backends. + + Args: + ids_name: Name of the IDS to request filled data for. + occurrence: Occurrence number of the IDS to request filled data for. + + Returns: + List of paths which have some data filled in the backend. For example, when + ``profiles_1d/ion/temperature`` is in this list, it means that there is at + least on ``ion`` in one ``profiles_1d`` entry for which the temperature is + filled. + + The paths in this list may be ordered arbitrarily. + + Example: + >>> with imas.DBEntry("imas:hdf5?path=./path/to/data", "r") as entry: + >>> print(entry.list_filled_paths("core_profiles")) + ['ids_properties/comment', 'ids_properties/homogeneous_time', + 'profiles_1d/grid/rho_tor_norm', 'profiles_1d/electrons/temperature', + 'profiles_1d/ion/temperature', 'time'] + """ + if self._dbe_impl is None: + raise RuntimeError("Database entry is not open.") + paths = self._dbe_impl.list_filled_paths(ids_name, occurrence) + if not autoconvert: + return paths + + # DD conversion? + dd_version = self._dbe_impl.read_dd_version(ids_name, occurrence) + if dd_version == self._ids_factory.dd_version: + return paths # No conversion required + + # Follow any NBC renames: + ddmap, source_is_older = dd_version_map_from_factories( + ids_name, IDSFactory(version=dd_version), self._ids_factory + ) + nbc_map = ddmap.old_to_new if source_is_older else ddmap.new_to_old + + converted_paths = [] + for path in paths: + if path in nbc_map: + new_name = nbc_map.path[path] + if new_name is not None: + converted_paths.append(new_name) + else: + converted_paths.append(path) + + return converted_paths diff --git a/imas/test/test_list_filled_paths.py b/imas/test/test_list_filled_paths.py new file mode 100644 index 0000000..6bf4bda --- /dev/null +++ b/imas/test/test_list_filled_paths.py @@ -0,0 +1,98 @@ +import pytest + +import imas +from imas.backends.imas_core.imas_interface import ll_interface +from imas.exception import DataEntryException +from imas.ids_defs import IDS_TIME_MODE_HOMOGENEOUS, IDS_TIME_MODE_INDEPENDENT + + +if not hasattr(ll_interface, "list_all_occurrences"): + marker = pytest.mark.xfail(reason="list_all_occurrences not available in imas_core") +else: + marker = [] + + +@pytest.fixture(params=["netcdf", pytest.param("hdf5", marks=marker)]) +def testuri(request, tmp_path): + if request.param == "netcdf": + return str(tmp_path / "list_filled_paths.nc") + return f"imas:{request.param}?path={tmp_path}/list_filled_paths_{request.param}" + + +def test_list_filled_paths(testuri): + with imas.DBEntry(testuri, "w", dd_version="4.0.0") as dbentry: + # No IDSs in the DBEntry yet, expect an exception + with pytest.raises(DataEntryException): + dbentry.list_filled_paths("core_profiles") + + cp = dbentry.factory.core_profiles() + cp.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + cp.ids_properties.comment = "comment" + cp.time = [0.1, 0.2] + cp.profiles_1d.resize(2) + cp.profiles_1d[0].grid.rho_tor_norm = [1.0, 2.0] + cp.profiles_1d[0].ion.resize(2) + cp.profiles_1d[0].ion[1].temperature = [1.0, 2.0] + cp.profiles_1d[1].grid.psi = [1.0, 2.0] + cp.profiles_1d[1].q = [1.0, 2.0] + cp.profiles_1d[1].e_field.radial = [1.0, 2.0] + cp.profiles_1d[1].neutral.resize(2) + cp.global_quantities.ip = [1.0, 2.0] + + dbentry.put(cp) + + filled_paths = dbentry.list_filled_paths("core_profiles") + assert isinstance(filled_paths, list) + assert set(filled_paths) == { + "ids_properties/version_put/access_layer", + "ids_properties/version_put/access_layer_language", + "ids_properties/version_put/data_dictionary", + "ids_properties/homogeneous_time", + "ids_properties/comment", + "time", + "profiles_1d/grid/rho_tor_norm", + "profiles_1d/ion/temperature", + "profiles_1d/grid/psi", + "profiles_1d/q", + "profiles_1d/e_field/radial", + "profiles_1d/e_field/radial", + "global_quantities/ip", + } + # Other occurrence should still raise an error: + with pytest.raises(DataEntryException): + dbentry.list_filled_paths("core_profiles", 1) + + +def test_list_filled_paths_autoconvert(testuri): + with imas.DBEntry(testuri, "w", dd_version="3.25.0") as entry: + ps = entry.factory.pulse_schedule() + ps.ids_properties.homogeneous_time = IDS_TIME_MODE_INDEPENDENT + ps.ec.antenna.resize(1) + ps.ec.antenna[0].launching_angle_pol.reference_name = "test" + entry.put(ps) + + filled_paths = entry.list_filled_paths("pulse_schedule") + assert set(filled_paths) == { + "ids_properties/version_put/access_layer", + "ids_properties/version_put/access_layer_language", + "ids_properties/version_put/data_dictionary", + "ids_properties/homogeneous_time", + "ec/antenna/launching_angle_pol/reference_name", + } + + # Check autoconvert with DD 3.28.0 + with imas.DBEntry(testuri, "r", dd_version="3.28.0") as entry: + assert set(entry.list_filled_paths("pulse_schedule", autoconvert=False)) == { + "ids_properties/version_put/access_layer", + "ids_properties/version_put/access_layer_language", + "ids_properties/version_put/data_dictionary", + "ids_properties/homogeneous_time", + "ec/antenna/launching_angle_pol/reference_name", # original name + } + assert set(entry.list_filled_paths("pulse_schedule")) == { + "ids_properties/version_put/access_layer", + "ids_properties/version_put/access_layer_language", + "ids_properties/version_put/data_dictionary", + "ids_properties/homogeneous_time", + "ec/launcher/steering_angle_pol/reference_name", # autoconverted name + } From 32f355980d9ea359758d80d494dce85b43f0990d Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 13 Feb 2026 15:52:55 +0100 Subject: [PATCH 2/3] Implement list_fill_paths for IMAS-Core (HDF5 backend) --- imas/backends/imas_core/al_context.py | 11 +++++++++++ imas/backends/imas_core/db_entry_al.py | 12 +++++++++++- imas/backends/imas_core/imas_interface.py | 5 +++++ imas/test/test_list_filled_paths.py | 9 ++++++--- 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/imas/backends/imas_core/al_context.py b/imas/backends/imas_core/al_context.py index d3d2f62..21cf1b8 100644 --- a/imas/backends/imas_core/al_context.py +++ b/imas/backends/imas_core/al_context.py @@ -174,6 +174,17 @@ def list_all_occurrences(self, ids_name: str) -> List[int]: return list(occurrences) return [] + def list_filled_paths(self, path: str) -> List[str]: + """List all filled paths in an IDS. + + Args: + path: IDS and occurrence as a string: [/] + """ + status, result = ll_interface.list_filled_paths(self.ctx, path) + if status != 0: + raise LowlevelError(f"list filled paths for {path!r}", status) + return result + def close(self): """Close this ALContext.""" ll_interface.end_action(self.ctx) diff --git a/imas/backends/imas_core/db_entry_al.py b/imas/backends/imas_core/db_entry_al.py index dd4c433..0a24bdb 100644 --- a/imas/backends/imas_core/db_entry_al.py +++ b/imas/backends/imas_core/db_entry_al.py @@ -365,7 +365,17 @@ def list_all_occurrences(self, ids_name: str) -> List[int]: return occurrence_list def list_filled_paths(self, ids_name: str, occurrence: int) -> List[str]: - raise NotImplementedError() + if self._db_ctx is None: + raise RuntimeError("Database entry is not open.") + ll_path = ids_name + if occurrence != 0: + ll_path += f"/{occurrence}" + paths = self._db_ctx.list_filled_paths(ll_path) + if not paths: + raise DataEntryException( + f"IDS {ids_name!r}, occurrence {occurrence} is empty." + ) + return paths def _check_uda_warnings(self, lazy: bool) -> None: """Various checks / warnings for the UDA backend.""" diff --git a/imas/backends/imas_core/imas_interface.py b/imas/backends/imas_core/imas_interface.py index c9d69a0..a9fb66b 100644 --- a/imas/backends/imas_core/imas_interface.py +++ b/imas/backends/imas_core/imas_interface.py @@ -166,6 +166,11 @@ def begin_timerange_action( ): raise self._minimal_version("5.4") + # New method in AL 5.7 + + def list_filled_paths(self, ctx, path): + raise self._minimal_version("5.7") + # Dummy documentation for interface: for funcname in dir(LowlevelInterface): diff --git a/imas/test/test_list_filled_paths.py b/imas/test/test_list_filled_paths.py index 6bf4bda..9df74f4 100644 --- a/imas/test/test_list_filled_paths.py +++ b/imas/test/test_list_filled_paths.py @@ -1,13 +1,13 @@ import pytest import imas -from imas.backends.imas_core.imas_interface import ll_interface +from imas_core import _al_lowlevel from imas.exception import DataEntryException from imas.ids_defs import IDS_TIME_MODE_HOMOGENEOUS, IDS_TIME_MODE_INDEPENDENT -if not hasattr(ll_interface, "list_all_occurrences"): - marker = pytest.mark.xfail(reason="list_all_occurrences not available in imas_core") +if not hasattr(_al_lowlevel, "al_list_filled_paths"): + marker = pytest.mark.xfail(reason="list_filled_paths not available in imas_core") else: marker = [] @@ -61,6 +61,9 @@ def test_list_filled_paths(testuri): # Other occurrence should still raise an error: with pytest.raises(DataEntryException): dbentry.list_filled_paths("core_profiles", 1) + # Until we write data to the occurrence: + dbentry.put(cp, 3) + assert set(filled_paths) == set(dbentry.list_filled_paths("core_profiles", 3)) def test_list_filled_paths_autoconvert(testuri): From 34ec6be61edfd3504d9fde5382655c116ed8830e Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 13 Feb 2026 17:03:20 +0100 Subject: [PATCH 3/3] Update documentation --- docs/source/intro.rst | 40 +++++++++++++++++++++++++--------------- imas/db_entry.py | 6 +++++- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/docs/source/intro.rst b/docs/source/intro.rst index 3027a24..80b183f 100644 --- a/docs/source/intro.rst +++ b/docs/source/intro.rst @@ -86,17 +86,6 @@ get an error message if this is not possible: Load and store an IDS to disk with IMAS-Core '''''''''''''''''''''''''''''''''''''''''''' -.. note:: - - - This functionality requires the IMAS-Core, until this library is openly available - on GitHub you may need to fetch it from `git.iter.org `_ - (requires to have an ITER account). Using IMAS-Core also enable slicing methods - :py:meth:`~imas.db_entry.DBEntry.get_slice`, - :py:meth:`~imas.db_entry.DBEntry.put_slice` and - :py:meth:`~imas.db_entry.DBEntry.get_sample` (with IMAS-Core>=5.4). - - If you can't have access to it, you can save IDS to disk with the built-in - netCDF backend :ref:`Load and store an IDS to disk with netCDF` - To store an IDS to disk, we need to indicate the following URI to the IMAS-Core: ``imas:?path=`` or using the legacy query keys ``imas:?user=;database=;version=;pulse=;run=`` @@ -115,11 +104,9 @@ In IMAS-Python you do this as follows: >>> # now store the core_profiles IDS we just populated >>> dbentry.put(core_profiles) -.. image:: imas_structure.png - To load an IDS from disk, you need to specify the same information as when storing the IDS (see above). Once the data entry is opened, you -can use ``.get()`` to load IDS data from disk: +can use ``dbentry.get()`` to load IDS data from disk: .. code-block:: python @@ -146,7 +133,7 @@ In IMAS-Python you do this as follows: To load an IDS from disk, you need to specify the same file information as when storing the IDS. Once the data entry is opened, you -can use ``.get()`` to load IDS data from disk: +can use ``dbentry.get()`` to load IDS data from disk: .. code-block:: python @@ -154,3 +141,26 @@ can use ``.get()`` to load IDS data from disk: >>> dbentry2 = imas.DBEntry("mypulsefile.nc","r") >>> core_profiles2 = dbentry2.get("core_profiles") >>> print(core_profiles2.ids_properties.comment.value) + + +Data Entry API overview +''''''''''''''''''''''' + +See the documentation of :py:class:`imas.DBEntry ` for more +details on reading and writing IDSs to disk. Useful functions include: + +- :py:meth:`~imas.db_entry.DBEntry.put` and :py:meth:`~imas.db_entry.DBEntry.put_slice` + to write a full IDS or write append a time slice to existing data. +- :py:meth:`~imas.db_entry.DBEntry.get`, :py:meth:`~imas.db_entry.DBEntry.get_slice` and + :py:meth:`~imas.db_entry.DBEntry.get_sample` to read all time slices, a single time + slice, or a sample of time slices from disk. ``get_slice()`` and ``get_sample()`` can + also interpolate data to a requested point in time. + + All three ``get()`` methods have a ``lazy`` mode, which will only load data from disk + when you need it. This can greatly speed up data access in some scenarios. See + :ref:`Lazy loading` for more details. +- :py:meth:`~imas.db_entry.DBEntry.list_all_occurrences` to query whether there are any + occurrences of a certain IDS stored on disk. +- :py:meth:`~imas.db_entry.DBEntry.list_filled_paths` to query which Data Dictionary + paths have data filled inside a specific IDS. + diff --git a/imas/db_entry.py b/imas/db_entry.py index 3eb77c3..d6b7053 100644 --- a/imas/db_entry.py +++ b/imas/db_entry.py @@ -813,10 +813,14 @@ def list_filled_paths( ids_name: Name of the IDS to request filled data for. occurrence: Occurrence number of the IDS to request filled data for. + Keyword Args: + autoconvert: If enabled (default), this method will take NBC renames into + account in the returned list of filled paths. + Returns: List of paths which have some data filled in the backend. For example, when ``profiles_1d/ion/temperature`` is in this list, it means that there is at - least on ``ion`` in one ``profiles_1d`` entry for which the temperature is + least one ``ion`` in one ``profiles_1d`` entry for which the temperature is filled. The paths in this list may be ordered arbitrarily.