diff --git a/docs/source/intro.rst b/docs/source/intro.rst
index 3027a24..80b183f 100644
--- a/docs/source/intro.rst
+++ b/docs/source/intro.rst
@@ -86,17 +86,6 @@ get an error message if this is not possible:
Load and store an IDS to disk with IMAS-Core
''''''''''''''''''''''''''''''''''''''''''''
-.. note::
-
- - This functionality requires the IMAS-Core, until this library is openly available
- on GitHub you may need to fetch it from `git.iter.org `_
- (requires to have an ITER account). Using IMAS-Core also enable slicing methods
- :py:meth:`~imas.db_entry.DBEntry.get_slice`,
- :py:meth:`~imas.db_entry.DBEntry.put_slice` and
- :py:meth:`~imas.db_entry.DBEntry.get_sample` (with IMAS-Core>=5.4).
- - If you can't have access to it, you can save IDS to disk with the built-in
- netCDF backend :ref:`Load and store an IDS to disk with netCDF`
-
To store an IDS to disk, we need to indicate the following URI to the
IMAS-Core: ``imas:?path=`` or using the legacy query keys
``imas:?user=;database=;version=;pulse=;run=``
@@ -115,11 +104,9 @@ In IMAS-Python you do this as follows:
>>> # now store the core_profiles IDS we just populated
>>> dbentry.put(core_profiles)
-.. image:: imas_structure.png
-
To load an IDS from disk, you need to specify the same information as
when storing the IDS (see above). Once the data entry is opened, you
-can use ``.get()`` to load IDS data from disk:
+can use ``dbentry.get()`` to load IDS data from disk:
.. code-block:: python
@@ -146,7 +133,7 @@ In IMAS-Python you do this as follows:
To load an IDS from disk, you need to specify the same file information as
when storing the IDS. Once the data entry is opened, you
-can use ``.get()`` to load IDS data from disk:
+can use ``dbentry.get()`` to load IDS data from disk:
.. code-block:: python
@@ -154,3 +141,26 @@ can use ``.get()`` to load IDS data from disk:
>>> dbentry2 = imas.DBEntry("mypulsefile.nc","r")
>>> core_profiles2 = dbentry2.get("core_profiles")
>>> print(core_profiles2.ids_properties.comment.value)
+
+
+Data Entry API overview
+'''''''''''''''''''''''
+
+See the documentation of :py:class:`imas.DBEntry ` for more
+details on reading and writing IDSs to disk. Useful functions include:
+
+- :py:meth:`~imas.db_entry.DBEntry.put` and :py:meth:`~imas.db_entry.DBEntry.put_slice`
+ to write a full IDS or write append a time slice to existing data.
+- :py:meth:`~imas.db_entry.DBEntry.get`, :py:meth:`~imas.db_entry.DBEntry.get_slice` and
+ :py:meth:`~imas.db_entry.DBEntry.get_sample` to read all time slices, a single time
+ slice, or a sample of time slices from disk. ``get_slice()`` and ``get_sample()`` can
+ also interpolate data to a requested point in time.
+
+ All three ``get()`` methods have a ``lazy`` mode, which will only load data from disk
+ when you need it. This can greatly speed up data access in some scenarios. See
+ :ref:`Lazy loading` for more details.
+- :py:meth:`~imas.db_entry.DBEntry.list_all_occurrences` to query whether there are any
+ occurrences of a certain IDS stored on disk.
+- :py:meth:`~imas.db_entry.DBEntry.list_filled_paths` to query which Data Dictionary
+ paths have data filled inside a specific IDS.
+
diff --git a/imas/backends/db_entry_impl.py b/imas/backends/db_entry_impl.py
index 0c1b2cd..f6ed4d6 100644
--- a/imas/backends/db_entry_impl.py
+++ b/imas/backends/db_entry_impl.py
@@ -120,3 +120,11 @@ def delete_data(self, ids_name: str, occurrence: int) -> None:
@abstractmethod
def list_all_occurrences(self, ids_name: str) -> List[int]:
"""Implement DBEntry.list_all_occurrences()"""
+
+ @abstractmethod
+ def list_filled_paths(self, ids_name: str, occurrence: int) -> List[str]:
+ """Implement DBEntry.list_filled_paths().
+
+ N.B. DD conversion is handled in DBEntry.list_filled_paths(), this method
+ returns the data paths as stored on-disk.
+ """
diff --git a/imas/backends/imas_core/al_context.py b/imas/backends/imas_core/al_context.py
index d3d2f62..21cf1b8 100644
--- a/imas/backends/imas_core/al_context.py
+++ b/imas/backends/imas_core/al_context.py
@@ -174,6 +174,17 @@ def list_all_occurrences(self, ids_name: str) -> List[int]:
return list(occurrences)
return []
+ def list_filled_paths(self, path: str) -> List[str]:
+ """List all filled paths in an IDS.
+
+ Args:
+ path: IDS and occurrence as a string: [/]
+ """
+ status, result = ll_interface.list_filled_paths(self.ctx, path)
+ if status != 0:
+ raise LowlevelError(f"list filled paths for {path!r}", status)
+ return result
+
def close(self):
"""Close this ALContext."""
ll_interface.end_action(self.ctx)
diff --git a/imas/backends/imas_core/db_entry_al.py b/imas/backends/imas_core/db_entry_al.py
index fc58270..0a24bdb 100644
--- a/imas/backends/imas_core/db_entry_al.py
+++ b/imas/backends/imas_core/db_entry_al.py
@@ -364,6 +364,19 @@ def list_all_occurrences(self, ids_name: str) -> List[int]:
) from None
return occurrence_list
+ def list_filled_paths(self, ids_name: str, occurrence: int) -> List[str]:
+ if self._db_ctx is None:
+ raise RuntimeError("Database entry is not open.")
+ ll_path = ids_name
+ if occurrence != 0:
+ ll_path += f"/{occurrence}"
+ paths = self._db_ctx.list_filled_paths(ll_path)
+ if not paths:
+ raise DataEntryException(
+ f"IDS {ids_name!r}, occurrence {occurrence} is empty."
+ )
+ return paths
+
def _check_uda_warnings(self, lazy: bool) -> None:
"""Various checks / warnings for the UDA backend."""
cache_mode = self._querydict.get("cache_mode")
diff --git a/imas/backends/imas_core/imas_interface.py b/imas/backends/imas_core/imas_interface.py
index c9d69a0..a9fb66b 100644
--- a/imas/backends/imas_core/imas_interface.py
+++ b/imas/backends/imas_core/imas_interface.py
@@ -166,6 +166,11 @@ def begin_timerange_action(
):
raise self._minimal_version("5.4")
+ # New method in AL 5.7
+
+ def list_filled_paths(self, ctx, path):
+ raise self._minimal_version("5.7")
+
# Dummy documentation for interface:
for funcname in dir(LowlevelInterface):
diff --git a/imas/backends/netcdf/db_entry_nc.py b/imas/backends/netcdf/db_entry_nc.py
index 0776c47..e9eae9a 100644
--- a/imas/backends/netcdf/db_entry_nc.py
+++ b/imas/backends/netcdf/db_entry_nc.py
@@ -92,6 +92,14 @@ def close(self, *, erase: bool = False) -> None:
)
self._dataset.close()
+ def _get_group(self, ids_name: str, occurrence: int) -> "netCDF4.Group":
+ try:
+ return self._dataset[f"{ids_name}/{occurrence}"]
+ except LookupError as exc:
+ raise DataEntryException(
+ f"IDS {ids_name!r}, occurrence {occurrence} is not found."
+ ) from exc
+
def get(
self,
ids_name: str,
@@ -110,12 +118,7 @@ def get(
raise NotImplementedError(f"`{func}` is not available for netCDF files.")
# Check if the IDS/occurrence exists, and obtain the group it is stored in
- try:
- group = self._dataset[f"{ids_name}/{occurrence}"]
- except KeyError:
- raise DataEntryException(
- f"IDS {ids_name!r}, occurrence {occurrence} is not found."
- )
+ group = self._get_group(ids_name, occurrence)
# Load data into the destination IDS
if self._ds_factory.dd_version == destination._dd_version:
@@ -183,3 +186,17 @@ def list_all_occurrences(self, ids_name: str) -> List[int]:
occurrence_list.sort()
return occurrence_list
+
+ def list_filled_paths(self, ids_name: str, occurrence: int) -> List[str]:
+ # Check if the IDS/occurrence exists, and obtain the group it is stored in
+ group = self._get_group(ids_name, occurrence)
+
+ result = []
+ for name, variable in group.variables.items():
+ if variable.ndim == 0 and variable.dtype == "S1":
+ continue # (Array of) Structure metadata node, no data
+ if name.endswith(":shape"):
+ continue # Shape data, not a DD path
+ result.append(name.replace(".", "/"))
+
+ return result
diff --git a/imas/db_entry.py b/imas/db_entry.py
index 5a47064..d6b7053 100644
--- a/imas/db_entry.py
+++ b/imas/db_entry.py
@@ -7,7 +7,7 @@
import logging
import os
import pathlib
-from typing import Any, Type, overload
+from typing import Any, Type, overload, List
import numpy
@@ -197,14 +197,14 @@ def _select_implementation(uri: str | None) -> Type[DBEntryImpl]:
from imas.backends.imas_core.db_entry_al import ALDBEntryImpl as impl
return impl
- def __enter__(self):
+ def __enter__(self) -> "DBEntry":
# Context manager protocol
if self._dbe_impl is None:
# Open if the DBEntry was not already opened or created
self.open()
return self
- def __exit__(self, exc_type, exc_value, traceback):
+ def __exit__(self, exc_type, exc_value, traceback) -> None:
# Context manager protocol
self.close()
@@ -800,3 +800,62 @@ def list_all_occurrences(self, ids_name, node_path=None):
self.get(ids_name, occ, lazy=True)[node_path] for occ in occurrence_list
]
return occurrence_list, node_content_list
+
+ def list_filled_paths(
+ self, ids_name, occurrence: int = 0, *, autoconvert: bool = True
+ ) -> List[str]:
+ """Get a list of filled Data Dictionary paths from the backend.
+
+ Note that this is only supported by some backends (HDF5 and netCDF), and will
+ result in an error on unsupported backends.
+
+ Args:
+ ids_name: Name of the IDS to request filled data for.
+ occurrence: Occurrence number of the IDS to request filled data for.
+
+ Keyword Args:
+ autoconvert: If enabled (default), this method will take NBC renames into
+ account in the returned list of filled paths.
+
+ Returns:
+ List of paths which have some data filled in the backend. For example, when
+ ``profiles_1d/ion/temperature`` is in this list, it means that there is at
+ least one ``ion`` in one ``profiles_1d`` entry for which the temperature is
+ filled.
+
+ The paths in this list may be ordered arbitrarily.
+
+ Example:
+ >>> with imas.DBEntry("imas:hdf5?path=./path/to/data", "r") as entry:
+ >>> print(entry.list_filled_paths("core_profiles"))
+ ['ids_properties/comment', 'ids_properties/homogeneous_time',
+ 'profiles_1d/grid/rho_tor_norm', 'profiles_1d/electrons/temperature',
+ 'profiles_1d/ion/temperature', 'time']
+ """
+ if self._dbe_impl is None:
+ raise RuntimeError("Database entry is not open.")
+ paths = self._dbe_impl.list_filled_paths(ids_name, occurrence)
+ if not autoconvert:
+ return paths
+
+ # DD conversion?
+ dd_version = self._dbe_impl.read_dd_version(ids_name, occurrence)
+ if dd_version == self._ids_factory.dd_version:
+ return paths # No conversion required
+
+ # Follow any NBC renames:
+ ddmap, source_is_older = dd_version_map_from_factories(
+ ids_name, IDSFactory(version=dd_version), self._ids_factory
+ )
+ nbc_map = ddmap.old_to_new if source_is_older else ddmap.new_to_old
+
+ converted_paths = []
+ for path in paths:
+ if path in nbc_map:
+ new_name = nbc_map.path[path]
+ if new_name is not None:
+ converted_paths.append(new_name)
+ else:
+ converted_paths.append(path)
+
+ return converted_paths
diff --git a/imas/test/test_list_filled_paths.py b/imas/test/test_list_filled_paths.py
new file mode 100644
index 0000000..9df74f4
--- /dev/null
+++ b/imas/test/test_list_filled_paths.py
@@ -0,0 +1,101 @@
+import pytest
+
+import imas
+from imas_core import _al_lowlevel
+from imas.exception import DataEntryException
+from imas.ids_defs import IDS_TIME_MODE_HOMOGENEOUS, IDS_TIME_MODE_INDEPENDENT
+
+
+if not hasattr(_al_lowlevel, "al_list_filled_paths"):
+ marker = pytest.mark.xfail(reason="list_filled_paths not available in imas_core")
+else:
+ marker = []
+
+
+@pytest.fixture(params=["netcdf", pytest.param("hdf5", marks=marker)])
+def testuri(request, tmp_path):
+ if request.param == "netcdf":
+ return str(tmp_path / "list_filled_paths.nc")
+ return f"imas:{request.param}?path={tmp_path}/list_filled_paths_{request.param}"
+
+
+def test_list_filled_paths(testuri):
+ with imas.DBEntry(testuri, "w", dd_version="4.0.0") as dbentry:
+ # No IDSs in the DBEntry yet, expect an exception
+ with pytest.raises(DataEntryException):
+ dbentry.list_filled_paths("core_profiles")
+
+ cp = dbentry.factory.core_profiles()
+ cp.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS
+ cp.ids_properties.comment = "comment"
+ cp.time = [0.1, 0.2]
+ cp.profiles_1d.resize(2)
+ cp.profiles_1d[0].grid.rho_tor_norm = [1.0, 2.0]
+ cp.profiles_1d[0].ion.resize(2)
+ cp.profiles_1d[0].ion[1].temperature = [1.0, 2.0]
+ cp.profiles_1d[1].grid.psi = [1.0, 2.0]
+ cp.profiles_1d[1].q = [1.0, 2.0]
+ cp.profiles_1d[1].e_field.radial = [1.0, 2.0]
+ cp.profiles_1d[1].neutral.resize(2)
+ cp.global_quantities.ip = [1.0, 2.0]
+
+ dbentry.put(cp)
+
+ filled_paths = dbentry.list_filled_paths("core_profiles")
+ assert isinstance(filled_paths, list)
+ assert set(filled_paths) == {
+ "ids_properties/version_put/access_layer",
+ "ids_properties/version_put/access_layer_language",
+ "ids_properties/version_put/data_dictionary",
+ "ids_properties/homogeneous_time",
+ "ids_properties/comment",
+ "time",
+ "profiles_1d/grid/rho_tor_norm",
+ "profiles_1d/ion/temperature",
+ "profiles_1d/grid/psi",
+ "profiles_1d/q",
+ "profiles_1d/e_field/radial",
+ "profiles_1d/e_field/radial",
+ "global_quantities/ip",
+ }
+ # Other occurrence should still raise an error:
+ with pytest.raises(DataEntryException):
+ dbentry.list_filled_paths("core_profiles", 1)
+ # Until we write data to the occurrence:
+ dbentry.put(cp, 3)
+ assert set(filled_paths) == set(dbentry.list_filled_paths("core_profiles", 3))
+
+
+def test_list_filled_paths_autoconvert(testuri):
+ with imas.DBEntry(testuri, "w", dd_version="3.25.0") as entry:
+ ps = entry.factory.pulse_schedule()
+ ps.ids_properties.homogeneous_time = IDS_TIME_MODE_INDEPENDENT
+ ps.ec.antenna.resize(1)
+ ps.ec.antenna[0].launching_angle_pol.reference_name = "test"
+ entry.put(ps)
+
+ filled_paths = entry.list_filled_paths("pulse_schedule")
+ assert set(filled_paths) == {
+ "ids_properties/version_put/access_layer",
+ "ids_properties/version_put/access_layer_language",
+ "ids_properties/version_put/data_dictionary",
+ "ids_properties/homogeneous_time",
+ "ec/antenna/launching_angle_pol/reference_name",
+ }
+
+ # Check autoconvert with DD 3.28.0
+ with imas.DBEntry(testuri, "r", dd_version="3.28.0") as entry:
+ assert set(entry.list_filled_paths("pulse_schedule", autoconvert=False)) == {
+ "ids_properties/version_put/access_layer",
+ "ids_properties/version_put/access_layer_language",
+ "ids_properties/version_put/data_dictionary",
+ "ids_properties/homogeneous_time",
+ "ec/antenna/launching_angle_pol/reference_name", # original name
+ }
+ assert set(entry.list_filled_paths("pulse_schedule")) == {
+ "ids_properties/version_put/access_layer",
+ "ids_properties/version_put/access_layer_language",
+ "ids_properties/version_put/data_dictionary",
+ "ids_properties/homogeneous_time",
+ "ec/launcher/steering_angle_pol/reference_name", # autoconverted name
+ }