Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 25 additions & 15 deletions docs/source/intro.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,6 @@ get an error message if this is not possible:
Load and store an IDS to disk with IMAS-Core
''''''''''''''''''''''''''''''''''''''''''''

.. note::

- This functionality requires the IMAS-Core, until this library is openly available
on GitHub you may need to fetch it from `git.iter.org <https://git.iter.org/>`_
(requires to have an ITER account). Using IMAS-Core also enable slicing methods
:py:meth:`~imas.db_entry.DBEntry.get_slice`,
:py:meth:`~imas.db_entry.DBEntry.put_slice` and
:py:meth:`~imas.db_entry.DBEntry.get_sample` (with IMAS-Core>=5.4).
- If you can't have access to it, you can save IDS to disk with the built-in
netCDF backend :ref:`Load and store an IDS to disk with netCDF`

To store an IDS to disk, we need to indicate the following URI to the
IMAS-Core: ``imas:<backend>?path=<path_to_folder>`` or using the legacy query keys
``imas:<backend>?user=<user>;database=<database>;version=<version>;pulse=<pulse>;run=<run>``
Expand All @@ -115,11 +104,9 @@ In IMAS-Python you do this as follows:
>>> # now store the core_profiles IDS we just populated
>>> dbentry.put(core_profiles)

.. image:: imas_structure.png

To load an IDS from disk, you need to specify the same information as
when storing the IDS (see above). Once the data entry is opened, you
can use ``<IDS>.get()`` to load IDS data from disk:
can use ``dbentry.get()`` to load IDS data from disk:

.. code-block:: python

Expand All @@ -146,11 +133,34 @@ In IMAS-Python you do this as follows:

To load an IDS from disk, you need to specify the same file information as
when storing the IDS. Once the data entry is opened, you
can use ``<IDS>.get()`` to load IDS data from disk:
can use ``dbentry.get()`` to load IDS data from disk:

.. code-block:: python

>>> # Now load the core_profiles IDS back from disk
>>> dbentry2 = imas.DBEntry("mypulsefile.nc","r")
>>> core_profiles2 = dbentry2.get("core_profiles")
>>> print(core_profiles2.ids_properties.comment.value)


Data Entry API overview
'''''''''''''''''''''''

See the documentation of :py:class:`imas.DBEntry <imas.db_entry.DBEntry>` for more
details on reading and writing IDSs to disk. Useful functions include:

- :py:meth:`~imas.db_entry.DBEntry.put` and :py:meth:`~imas.db_entry.DBEntry.put_slice`
to write a full IDS or write append a time slice to existing data.
- :py:meth:`~imas.db_entry.DBEntry.get`, :py:meth:`~imas.db_entry.DBEntry.get_slice` and
:py:meth:`~imas.db_entry.DBEntry.get_sample` to read all time slices, a single time
slice, or a sample of time slices from disk. ``get_slice()`` and ``get_sample()`` can
also interpolate data to a requested point in time.

All three ``get()`` methods have a ``lazy`` mode, which will only load data from disk
when you need it. This can greatly speed up data access in some scenarios. See
:ref:`Lazy loading` for more details.
- :py:meth:`~imas.db_entry.DBEntry.list_all_occurrences` to query whether there are any
occurrences of a certain IDS stored on disk.
- :py:meth:`~imas.db_entry.DBEntry.list_filled_paths` to query which Data Dictionary
paths have data filled inside a specific IDS.

8 changes: 8 additions & 0 deletions imas/backends/db_entry_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,11 @@ def delete_data(self, ids_name: str, occurrence: int) -> None:
@abstractmethod
def list_all_occurrences(self, ids_name: str) -> List[int]:
"""Implement DBEntry.list_all_occurrences()"""

@abstractmethod
def list_filled_paths(self, ids_name: str, occurrence: int) -> List[str]:
"""Implement DBEntry.list_filled_paths().

N.B. DD conversion is handled in DBEntry.list_filled_paths(), this method
returns the data paths as stored on-disk.
"""
11 changes: 11 additions & 0 deletions imas/backends/imas_core/al_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,17 @@ def list_all_occurrences(self, ids_name: str) -> List[int]:
return list(occurrences)
return []

def list_filled_paths(self, path: str) -> List[str]:
"""List all filled paths in an IDS.

Args:
path: IDS and occurrence as a string: <IDS>[/<occurrence>]
"""
status, result = ll_interface.list_filled_paths(self.ctx, path)
if status != 0:
raise LowlevelError(f"list filled paths for {path!r}", status)
return result

def close(self):
"""Close this ALContext."""
ll_interface.end_action(self.ctx)
Expand Down
13 changes: 13 additions & 0 deletions imas/backends/imas_core/db_entry_al.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,19 @@ def list_all_occurrences(self, ids_name: str) -> List[int]:
) from None
return occurrence_list

def list_filled_paths(self, ids_name: str, occurrence: int) -> List[str]:
if self._db_ctx is None:
raise RuntimeError("Database entry is not open.")
ll_path = ids_name
if occurrence != 0:
ll_path += f"/{occurrence}"
paths = self._db_ctx.list_filled_paths(ll_path)
if not paths:
raise DataEntryException(
f"IDS {ids_name!r}, occurrence {occurrence} is empty."
)
return paths

def _check_uda_warnings(self, lazy: bool) -> None:
"""Various checks / warnings for the UDA backend."""
cache_mode = self._querydict.get("cache_mode")
Expand Down
5 changes: 5 additions & 0 deletions imas/backends/imas_core/imas_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,11 @@ def begin_timerange_action(
):
raise self._minimal_version("5.4")

# New method in AL 5.7

def list_filled_paths(self, ctx, path):
raise self._minimal_version("5.7")


# Dummy documentation for interface:
for funcname in dir(LowlevelInterface):
Expand Down
29 changes: 23 additions & 6 deletions imas/backends/netcdf/db_entry_nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ def close(self, *, erase: bool = False) -> None:
)
self._dataset.close()

def _get_group(self, ids_name: str, occurrence: int) -> "netCDF4.Group":
try:
return self._dataset[f"{ids_name}/{occurrence}"]
except LookupError as exc:
raise DataEntryException(
f"IDS {ids_name!r}, occurrence {occurrence} is not found."
) from exc

def get(
self,
ids_name: str,
Expand All @@ -110,12 +118,7 @@ def get(
raise NotImplementedError(f"`{func}` is not available for netCDF files.")

# Check if the IDS/occurrence exists, and obtain the group it is stored in
try:
group = self._dataset[f"{ids_name}/{occurrence}"]
except KeyError:
raise DataEntryException(
f"IDS {ids_name!r}, occurrence {occurrence} is not found."
)
group = self._get_group(ids_name, occurrence)

# Load data into the destination IDS
if self._ds_factory.dd_version == destination._dd_version:
Expand Down Expand Up @@ -183,3 +186,17 @@ def list_all_occurrences(self, ids_name: str) -> List[int]:

occurrence_list.sort()
return occurrence_list

def list_filled_paths(self, ids_name: str, occurrence: int) -> List[str]:
# Check if the IDS/occurrence exists, and obtain the group it is stored in
group = self._get_group(ids_name, occurrence)

result = []
for name, variable in group.variables.items():
if variable.ndim == 0 and variable.dtype == "S1":
continue # (Array of) Structure metadata node, no data
if name.endswith(":shape"):
continue # Shape data, not a DD path
result.append(name.replace(".", "/"))

return result
65 changes: 62 additions & 3 deletions imas/db_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import logging
import os
import pathlib
from typing import Any, Type, overload
from typing import Any, Type, overload, List

import numpy

Expand Down Expand Up @@ -197,14 +197,14 @@ def _select_implementation(uri: str | None) -> Type[DBEntryImpl]:
from imas.backends.imas_core.db_entry_al import ALDBEntryImpl as impl
return impl

def __enter__(self):
def __enter__(self) -> "DBEntry":
# Context manager protocol
if self._dbe_impl is None:
# Open if the DBEntry was not already opened or created
self.open()
return self

def __exit__(self, exc_type, exc_value, traceback):
def __exit__(self, exc_type, exc_value, traceback) -> None:
# Context manager protocol
self.close()

Expand Down Expand Up @@ -800,3 +800,62 @@ def list_all_occurrences(self, ids_name, node_path=None):
self.get(ids_name, occ, lazy=True)[node_path] for occ in occurrence_list
]
return occurrence_list, node_content_list

def list_filled_paths(
self, ids_name, occurrence: int = 0, *, autoconvert: bool = True
) -> List[str]:
"""Get a list of filled Data Dictionary paths from the backend.

Note that this is only supported by some backends (HDF5 and netCDF), and will
result in an error on unsupported backends.

Args:
ids_name: Name of the IDS to request filled data for.
occurrence: Occurrence number of the IDS to request filled data for.

Keyword Args:
autoconvert: If enabled (default), this method will take NBC renames into
account in the returned list of filled paths.

Returns:
List of paths which have some data filled in the backend. For example, when
``profiles_1d/ion/temperature`` is in this list, it means that there is at
least one ``ion`` in one ``profiles_1d`` entry for which the temperature is
filled.

The paths in this list may be ordered arbitrarily.

Example:
>>> with imas.DBEntry("imas:hdf5?path=./path/to/data", "r") as entry:
>>> print(entry.list_filled_paths("core_profiles"))
['ids_properties/comment', 'ids_properties/homogeneous_time',
'profiles_1d/grid/rho_tor_norm', 'profiles_1d/electrons/temperature',
'profiles_1d/ion/temperature', 'time']
"""
if self._dbe_impl is None:
raise RuntimeError("Database entry is not open.")
paths = self._dbe_impl.list_filled_paths(ids_name, occurrence)
if not autoconvert:
return paths

# DD conversion?
dd_version = self._dbe_impl.read_dd_version(ids_name, occurrence)
if dd_version == self._ids_factory.dd_version:
return paths # No conversion required

# Follow any NBC renames:
ddmap, source_is_older = dd_version_map_from_factories(
ids_name, IDSFactory(version=dd_version), self._ids_factory
)
nbc_map = ddmap.old_to_new if source_is_older else ddmap.new_to_old

converted_paths = []
for path in paths:
if path in nbc_map:
new_name = nbc_map.path[path]
if new_name is not None:
converted_paths.append(new_name)
else:
converted_paths.append(path)

return converted_paths
101 changes: 101 additions & 0 deletions imas/test/test_list_filled_paths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import pytest

import imas
from imas_core import _al_lowlevel
from imas.exception import DataEntryException
from imas.ids_defs import IDS_TIME_MODE_HOMOGENEOUS, IDS_TIME_MODE_INDEPENDENT


if not hasattr(_al_lowlevel, "al_list_filled_paths"):
marker = pytest.mark.xfail(reason="list_filled_paths not available in imas_core")
else:
marker = []


@pytest.fixture(params=["netcdf", pytest.param("hdf5", marks=marker)])
def testuri(request, tmp_path):
if request.param == "netcdf":
return str(tmp_path / "list_filled_paths.nc")
return f"imas:{request.param}?path={tmp_path}/list_filled_paths_{request.param}"


def test_list_filled_paths(testuri):
with imas.DBEntry(testuri, "w", dd_version="4.0.0") as dbentry:
# No IDSs in the DBEntry yet, expect an exception
with pytest.raises(DataEntryException):
dbentry.list_filled_paths("core_profiles")

cp = dbentry.factory.core_profiles()
cp.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS
cp.ids_properties.comment = "comment"
cp.time = [0.1, 0.2]
cp.profiles_1d.resize(2)
cp.profiles_1d[0].grid.rho_tor_norm = [1.0, 2.0]
cp.profiles_1d[0].ion.resize(2)
cp.profiles_1d[0].ion[1].temperature = [1.0, 2.0]
cp.profiles_1d[1].grid.psi = [1.0, 2.0]
cp.profiles_1d[1].q = [1.0, 2.0]
cp.profiles_1d[1].e_field.radial = [1.0, 2.0]
cp.profiles_1d[1].neutral.resize(2)
cp.global_quantities.ip = [1.0, 2.0]

dbentry.put(cp)

filled_paths = dbentry.list_filled_paths("core_profiles")
assert isinstance(filled_paths, list)
assert set(filled_paths) == {
"ids_properties/version_put/access_layer",
"ids_properties/version_put/access_layer_language",
"ids_properties/version_put/data_dictionary",
"ids_properties/homogeneous_time",
"ids_properties/comment",
"time",
"profiles_1d/grid/rho_tor_norm",
"profiles_1d/ion/temperature",
"profiles_1d/grid/psi",
"profiles_1d/q",
"profiles_1d/e_field/radial",
"profiles_1d/e_field/radial",
"global_quantities/ip",
}
# Other occurrence should still raise an error:
with pytest.raises(DataEntryException):
dbentry.list_filled_paths("core_profiles", 1)
# Until we write data to the occurrence:
dbentry.put(cp, 3)
assert set(filled_paths) == set(dbentry.list_filled_paths("core_profiles", 3))


def test_list_filled_paths_autoconvert(testuri):
with imas.DBEntry(testuri, "w", dd_version="3.25.0") as entry:
ps = entry.factory.pulse_schedule()
ps.ids_properties.homogeneous_time = IDS_TIME_MODE_INDEPENDENT
ps.ec.antenna.resize(1)
ps.ec.antenna[0].launching_angle_pol.reference_name = "test"
entry.put(ps)

filled_paths = entry.list_filled_paths("pulse_schedule")
assert set(filled_paths) == {
"ids_properties/version_put/access_layer",
"ids_properties/version_put/access_layer_language",
"ids_properties/version_put/data_dictionary",
"ids_properties/homogeneous_time",
"ec/antenna/launching_angle_pol/reference_name",
}

# Check autoconvert with DD 3.28.0
with imas.DBEntry(testuri, "r", dd_version="3.28.0") as entry:
assert set(entry.list_filled_paths("pulse_schedule", autoconvert=False)) == {
"ids_properties/version_put/access_layer",
"ids_properties/version_put/access_layer_language",
"ids_properties/version_put/data_dictionary",
"ids_properties/homogeneous_time",
"ec/antenna/launching_angle_pol/reference_name", # original name
}
assert set(entry.list_filled_paths("pulse_schedule")) == {
"ids_properties/version_put/access_layer",
"ids_properties/version_put/access_layer_language",
"ids_properties/version_put/data_dictionary",
"ids_properties/homogeneous_time",
"ec/launcher/steering_angle_pol/reference_name", # autoconverted name
}
Loading