Skip to content
2 changes: 1 addition & 1 deletion sasdata/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def abscissae(self) -> Quantity:
# TODO: Won't work when there's errors involved. On reflection, we
# probably want to avoid creating a new Quantity but at the moment I
# can't see a way around it.
return Quantity(data_contents, reference_data_content.units)
return Quantity(data_contents, reference_data_content.units, name=self._data_contents["Qx"].name, id_header=self._data_contents["Qx"]._id_header)
case dataset_types.sesans:
return self._data_contents["SpinEchoLength"]
case _:
Expand Down
8 changes: 8 additions & 0 deletions sasdata/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,14 @@ def from_json(obj):
raw=MetaNode.from_json(obj["raw"]),
)

@property
def id_header(self):
"""Generate a header for used in the unique_id for datasets"""
title = ""
if self.title is not None:
title = self.title
return f"{title}:{",".join(self.run)}"

def as_h5(self, f: h5py.Group):
"""Export data onto an HDF5 group"""
for idx, run in enumerate(self.run):
Expand Down
52 changes: 40 additions & 12 deletions sasdata/quantities/quantity.py
Original file line number Diff line number Diff line change
Expand Up @@ -1082,14 +1082,13 @@ def summary(self):


class Quantity[QuantityType]:


def __init__(self,
value: QuantityType,
units: Unit,
standard_error: QuantityType | None = None,
hash_seed = ""):

hash_seed="",
name="",
id_header=""):
self.value = value
""" Numerical value of this data, in the specified units"""

Expand All @@ -1113,14 +1112,21 @@ def __init__(self,

self.history = QuantityHistory.variable(self)

self._id_header = id_header
self.name = name
# print(f"ID Header: {self._id_header}, Quant: {self.value}")

# TODO: Adding this method as a temporary measure but we need a single
# method that does this.
def with_standard_error(self, standard_error: "Quantity"):
if standard_error.units.equivalent(self.units):
return Quantity(
value=self.value,
units=self.units,
standard_error=standard_error.in_units_of(self.units),)
standard_error=standard_error.in_units_of(self.units),
name=self.name,
id_header=self._id_header,
)
else:
raise UnitError(f"Standard error units ({standard_error.units}) "
f"are not compatible with value units ({self.units})")
Expand All @@ -1133,10 +1139,30 @@ def has_variance(self):
def variance(self) -> "Quantity":
""" Get the variance of this object"""
if self._variance is None:
return Quantity(np.zeros_like(self.value), self.units**2)
return Quantity(np.zeros_like(self.value), self.units**2, name=self.name, id_header=self._id_header)
else:
return Quantity(self._variance, self.units**2)

def _base62_hash(self) -> str:
"""Encode the hash_value in base62 for better readability"""
hashed = ""
current_hash = self.hash_value
while current_hash:
digit = current_hash % 62
if digit < 10:
hashed = f"{digit}{hashed}"
elif digit < 36:
hashed = f"{chr(55 + digit)}{hashed}"
else:
hashed = f"{chr(61 + digit)}{hashed}"
current_hash = (current_hash - digit) // 62
return hashed

@property
def unique_id(self) -> str:
"""Get a human readable unique id for a data set"""
return f"{self._id_header}:{self.name}:{self._base62_hash()}"

def standard_deviation(self) -> "Quantity":
return self.variance ** 0.5

Expand All @@ -1152,7 +1178,8 @@ def to_units_of(self, new_units: Unit) -> "Quantity[QuantityType]":
return Quantity(value=new_value,
units=new_units,
standard_error=new_error,
hash_seed=self._hash_seed)
hash_seed=self._hash_seed,
id_header=self._id_header)

def variance_in_units_of(self, units: Unit) -> QuantityType:
""" Get the variance of quantity in other units """
Expand Down Expand Up @@ -1411,10 +1438,9 @@ def __init__(self,
name: str,
value: QuantityType,
units: Unit,
standard_error: QuantityType | None = None):

super().__init__(value, units, standard_error=standard_error, hash_seed=name)
self.name = name
standard_error: QuantityType | None = None,
id_header=""):
super().__init__(value, units, standard_error=standard_error, hash_seed=name, name=name, id_header=id_header)

def __repr__(self):
return f"[{self.name}] " + super().__repr__()
Expand All @@ -1432,7 +1458,9 @@ def with_standard_error(self, standard_error: Quantity):
value=self.value,
units=self.units,
standard_error=standard_error.in_units_of(self.units),
name=self.name)
name=self.name,
id_header=self._id_header,
)

else:
raise UnitError(f"Standard error units ({standard_error.units}) "
Expand Down
8 changes: 4 additions & 4 deletions sasdata/temp_ascii_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
guess_starting_position,
)
from sasdata.metadata import Metadata, MetaNode
from sasdata.quantities.quantity import Quantity
from sasdata.quantities.quantity import NamedQuantity, Quantity
from sasdata.quantities.units import NamedUnit


Expand Down Expand Up @@ -121,7 +121,7 @@ def split_line(separator_dict: dict[str, bool], line: str) -> list[str]:


# TODO: Implement error handling.
def load_quantities(params: AsciiReaderParams, filename: str) -> dict[str, Quantity]:
def load_quantities(params: AsciiReaderParams, filename: str, metadata: Metadata) -> dict[str, Quantity]:
"""Load a list of quantities from the filename based on the params."""
with open(filename) as ascii_file:
lines = ascii_file.readlines()
Expand All @@ -146,7 +146,7 @@ def load_quantities(params: AsciiReaderParams, filename: str) -> dict[str, Quant
print(f"Line {i + 1} skipped.")
continue
file_quantities = {
name: Quantity(arrays[i], unit)
name: NamedQuantity(name, arrays[i], unit, id_header=metadata.id_header)
for i, (name, unit) in enumerate(params.columns_included)
}
return file_quantities
Expand Down Expand Up @@ -194,7 +194,6 @@ def load_data(params: AsciiReaderParams) -> list[SasData]:
list contained in the params."""
loaded_data: list[SasData] = []
for filename in params.filenames:
quantities = load_quantities(params, filename)
raw_metadata = import_metadata(
params.metadata.all_file_metadata(path.basename(filename))
)
Expand All @@ -207,6 +206,7 @@ def load_data(params: AsciiReaderParams) -> list[SasData]:
process=None,
raw=raw_metadata,
)
quantities = load_quantities(params, filename, metadata)
data = SasData(
path.basename(filename),
merge_uncertainties(quantities),
Expand Down
14 changes: 6 additions & 8 deletions sasdata/temp_hdf5_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def recurse_hdf5(hdf5_entry):
GET_UNITS_FROM_ELSEWHERE = units.meters


def connected_data(node: SASDataGroup, name_prefix="") -> dict[str, Quantity]:
def connected_data(node: SASDataGroup, name_prefix="", metadata=None) -> dict[str, Quantity]:
"""In the context of NeXus files, load a group of data entries that are organised together
match up the units and errors with their values"""
# Gather together data with its error terms
Expand All @@ -89,9 +89,7 @@ def connected_data(node: SASDataGroup, name_prefix="") -> dict[str, Quantity]:
else:
units = GET_UNITS_FROM_ELSEWHERE

quantity = NamedQuantity(
name=name_prefix + child.name, value=child.data, units=units
)
quantity = NamedQuantity(name=child.name, value=child.data, units=units, id_header=metadata.id_header)

# Turns out people can't be trusted to use the same keys here
if "uncertainty" in child.attributes or "uncertainties" in child.attributes:
Expand Down Expand Up @@ -368,7 +366,7 @@ def load_raw(node: HDF5Group | HDF5Dataset) -> MetaNode:
else:
if "units" in attrib and attrib["units"]:
data = node[()] if node.shape == () else node[:]
contents = Quantity(data, parse(attrib["units"]))
contents = Quantity(data, parse(attrib["units"]), id_header=node.name)
else:
contents = node[()] if node.shape == () else node[:]
return MetaNode(name=name, attrs=attrib, contents=contents)
Expand Down Expand Up @@ -424,13 +422,13 @@ def load_data(filename: str) -> dict[str, SasData]:
logger.warning("No sasdata or data key")
logger.warning(f"Known keys: {[k for k in entry_keys]}")

metadata = parse_metadata(f[root_key])

for key in entry_keys:
component = entry[key]
if get_canSAS_class(entry[key])=='SASdata':
datum = recurse_hdf5(component)
data_contents = connected_data(datum, str(filename))

metadata = parse_metadata(f[root_key])
data_contents = connected_data(datum, str(filename), metadata)

if "Qz" in data_contents:
dataset_type = three_dim
Expand Down
8 changes: 4 additions & 4 deletions sasdata/temp_xml_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
Source,
Vec3,
)
from sasdata.quantities.quantity import Quantity
from sasdata.quantities.quantity import NamedQuantity, Quantity
from sasdata.quantities.units import Unit
from sasdata.quantities.units import none as unitless

Expand Down Expand Up @@ -205,7 +205,7 @@ def parse_sample(node: etree._Element, version: str) -> Sample:
)


def parse_data(node: etree._Element, version: str) -> dict[str, Quantity]:
def parse_data(node: etree._Element, version: str, metadata: Metadata) -> dict[str, Quantity]:
"""Parse scattering data"""
aos = []
keys = set()
Expand Down Expand Up @@ -244,7 +244,7 @@ def parse_data(node: etree._Element, version: str) -> dict[str, Quantity]:

result: dict[str, Quantity] = {}
for k in keys:
result[k] = Quantity(np.array(soa[k]), us[k])
result[k] = NamedQuantity(k, np.array(soa[k]), us[k], id_header=metadata.id_header)
if k + "dev" in uncertainties:
result[k] = result[k].with_standard_error(
Quantity(np.array(soa[k + "dev"]), us[k + "dev"])
Expand Down Expand Up @@ -323,7 +323,7 @@ def load_data(filename: str) -> dict[str, SasData]:
datacount = 0
for n in entry.findall(f"{version}:SASdata", ns):
datacount += 1
data_set = parse_data(n, version)
data_set = parse_data(n, version, metadata)
data = data_set
break

Expand Down
44 changes: 44 additions & 0 deletions test/sasdataloader/utest_data_names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""
Tests for generation of unique, but reproducible, names for data quantities
"""

import os

import pytest

from sasdata.data import SasData
from sasdata.temp_ascii_reader import load_data_default_params
from sasdata.temp_hdf5_reader import load_data as hdf_load_data
from sasdata.temp_xml_reader import load_data as xml_load_data


def local_load(path: str) -> SasData:
"""Get local file path"""
base = os.path.join(os.path.dirname(__file__), path)
if os.path.exists(f"{base}.h5"):
return hdf_load_data(f"{base}.h5").values()
if os.path.exists(f"{base}.xml"):
return xml_load_data(f"{base}.xml").values()
if os.path.exists(f"{base}.txt"):
return load_data_default_params(f"{base}.txt")
assert False


test_file_names = [
("ascii_test_1", "::Q:3KrS58TPgclJ1rgyr0VQp3"),
("ISIS_1_1", "TK49 c10_SANS:79680:Q:4TghWEoJi6xxhyeDXhS751"),
("cansas1d", "Test title:1234:Q:440tNBqdx9jvci6CgjmrmD"),
("MAR07232_rest", "MAR07232_rest_out.dat:2:/sasentry01/sasdata01/Qx:2Y0qTTb054KSJnJaJv0rFl"),
("simpleexamplefile", "::/sasentry01/sasdata01/Q:uoHMeB8mukElC1uLCy7Sd"),
]


@pytest.mark.names
@pytest.mark.parametrize("x", test_file_names)
def test_quantity_name(x):
(f, expected) = x
data = [v for v in local_load(f"data/{f}")][0]
print(data.metadata.title)
if data.metadata.title is not None:
assert data.abscissae.unique_id.startswith(data.metadata.title)
assert data.abscissae.unique_id == expected