diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d2e4147a..c72e97fd 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -18,10 +18,11 @@ Fixes * In `nc_specs`, set `dim_names_nc` in the order expected by Raven (x, y, t). Previously, we only made sure that `time` was the last dimension, but did not ensure x and y were in the right order. (PR #533) * Adjusted the `Perform_a_climate_change_impact_study_on_a_watershed.ipynb` notebook to reduce the number of years in the simulation to speed up execution time. (PR #535) * Adjusted a broken test that was overlooked in the previous release (from changes in PR #513). (PR #535) +* Adapted code base to the `pandas` v3.0 API. (Issue #570, PR #572) Internal changes ^^^^^^^^^^^^^^^^ -* Updated the cookiecutter template to the latest version (#548): +* Updated the cookiecutter template to the latest version (PR #548): * Updated the Contributor Covenant Agreement to v3.0. * Added a `CITATION.cff` file. * Removed `black`, `blackdoc`, and `isort`, as well as their configurations. @@ -29,8 +30,9 @@ Internal changes * Pinned `pydantic` below v2.12 due to breaking changes in their API. (PR #548) * Unpinned `pydantic` as newer 2.12 patch releases appear to have addressed regressions. (PR #559). * Pinned `pydap` >=3.5.6 and `h5netcdf` >=1.5.0 to ensure modern versions with better `xarray` support are installed by default. (PR #559). -* Updated the cookiecutter template to the latest version (#569): +* Updated the cookiecutter template to the latest version (PR #569): * Added a workflow for automatically accepting and merging periodic updates from Dependabot affecting CI dependencies. +* Added a `pytest` fixture to perform a teardown of changes performed within the installed `ravenpy` source location. (PR #572). .. _changes_0.19.1: diff --git a/environment-dev.yml b/environment-dev.yml index fd2621e8..ecc18427 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -20,7 +20,7 @@ dependencies: - netcdf4 >=1.7.2 - numpy >=1.25.0 - owslib >=0.29.1 - - pandas >=2.2.0,<3.0 + - pandas >=2.2.0 - pint >=0.24.4 - pydantic >=2.11 - pydap >=3.5.6 diff --git a/environment-docs.yml b/environment-docs.yml index d3005831..28e70460 100644 --- a/environment-docs.yml +++ b/environment-docs.yml @@ -31,7 +31,7 @@ dependencies: - netCDF4 >=1.7.2 - numpy >=1.24.0 - notebook - - pandas >=2.2,<3.0 + - pandas >=2.2 - pydantic >=2.11,<2.12 - pymetalink >=6.5.2 - s3fs diff --git a/pyproject.toml b/pyproject.toml index a16047b5..9268d9bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ dependencies = [ "matplotlib >=3.6.0", "numpy >=1.25.0", "owslib >=0.29.1", - "pandas >=2.2.0,<3.0", + "pandas >=2.2.0", "pint >=0.24.4", "pydantic >=2.11", "pydap >=3.5.6", diff --git a/src/ravenpy/extractors/routing_product.py b/src/ravenpy/extractors/routing_product.py index 9381c5da..1da02e39 100644 --- a/src/ravenpy/extractors/routing_product.py +++ b/src/ravenpy/extractors/routing_product.py @@ -405,7 +405,7 @@ def extract(self) -> dict: # Read routing data # WGS 84 / North Pole LAEA Canada - self._routing_data = self._routing_data.to_crs(epsg=GridWeightExtractor.CRS_CAEA) + df = self._routing_data.to_crs(epsg=GridWeightExtractor.CRS_CAEA) def keep_only_valid_downsubid_and_obs_nm(g): """ @@ -436,13 +436,7 @@ def keep_only_valid_downsubid_and_obs_nm(g): return row # Remove duplicate HRU_IDs while making sure that we keep relevant DowSubId and Obs_NM values - # FIXME: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. - # This behavior is deprecated, and in a future version of pandas the grouping columns will be - # excluded from the operation. Either pass `include_groups=False` to exclude the groupings or - # explicitly select the grouping columns after groupby to silence this warning. - self._routing_data = self._routing_data.groupby(self._routing_id_field, group_keys=False).apply( - keep_only_valid_downsubid_and_obs_nm, include_groups=True - ) + self._routing_data = df.groupby(self._routing_id_field, group_keys=False)[df.columns.to_list()].apply(keep_only_valid_downsubid_and_obs_nm) # Make sure those are ints self._routing_data.SubId = self._routing_data.SubId.astype(int) diff --git a/src/ravenpy/utilities/regionalization.py b/src/ravenpy/utilities/regionalization.py index 6dd3d9d2..9d40fc76 100644 --- a/src/ravenpy/utilities/regionalization.py +++ b/src/ravenpy/utilities/regionalization.py @@ -29,9 +29,9 @@ def regionalize( config: Config, method: str, nash: pd.Series, - params: pd.DataFrame = None, - props: pd.DataFrame = None, - target_props: Union[pd.Series, dict] = None, + params: Optional[pd.DataFrame] = None, + props: Optional[pd.DataFrame] = None, + target_props: Optional[Union[pd.Series, dict]] = None, size: int = 5, min_NSE: float = 0.6, # noqa: N803 workdir: Optional[Union[str, Path]] = None, @@ -199,13 +199,13 @@ def read_gauged_properties(properties) -> pd.DataFrame: return proptable[properties] -def read_gauged_params(model): +def read_gauged_params(model) -> tuple[pd.Series, pd.DataFrame]: """ Return table of NASH-Sutcliffe Efficiency values and model parameters for North American catchments. Returns ------- - pd.DataFrame + pd.Series Nash-Sutcliffe Efficiency keyed by catchment ID. pd.DataFrame Model parameters keyed by catchment ID. @@ -264,6 +264,8 @@ def similarity(gauged: pd.DataFrame, ungauged: pd.DataFrame, kind: str = "ptp") spread = stats.loc["std"] elif kind == "iqr": spread = stats.loc["75%"] - stats.loc["25%"] + else: + raise NotImplementedError("'kind' not in ['ptp', 'std', 'iqr']") d = ungauged.values - gauged.values n = np.abs(d) / spread.values @@ -347,10 +349,10 @@ def IDW(qsims: xr.DataArray, dist: pd.Series) -> xr.DataArray: # noqa: N802 weights = xr.DataArray(1.0 / dist, dims="members", coords={"members": qsims.members}) # Make weights sum to one - weights /= weights.sum(axis=0) + one_sum_weights = weights / weights.sum(axis=0) # Calculate weighted average. - out = qsims.dot(weights) + out = qsims.dot(one_sum_weights) out.name = qsims.name out.attrs = qsims.attrs return out diff --git a/tests/test_external_dataset_access.py b/tests/test_external_dataset_access.py index 95b44328..91b4e9aa 100644 --- a/tests/test_external_dataset_access.py +++ b/tests/test_external_dataset_access.py @@ -1,4 +1,5 @@ import datetime as dt +import importlib.util import urllib.error from pathlib import Path @@ -30,6 +31,20 @@ def test_get_ECCC_dataset(self): assert n_hours <= 36 +@pytest.fixture(scope="function") +def remote_access_teardown(request): + def _teardown(): + ravenpy_location = Path(importlib.util.find_spec("ravenpy").origin).parent + testing = ravenpy_location / "testing" + + if testing.joinpath("main").is_dir(): + for f in testing.joinpath("main").iterdir(): + f.unlink() + testing.joinpath("main").rmdir() + + request.addfinalizer(_teardown) + + @pytest.mark.online class TestRemoteFileAccess: dap_url = "http://test.opendap.org:80/opendap/data/nc/" @@ -41,7 +56,7 @@ class TestRemoteFileAccess: reason="Get file is API rate limited", strict=False, ) - def test_get_file_default_cache(self): + def test_get_file_default_cache(self, remote_access_teardown): # noqa: F841 file = yangtze(branch=self.branch).fetch(fname="ostrich-hbvec/raven-hbvec-salmon.rvi") assert Path(default_testdata_cache).exists() @@ -50,10 +65,7 @@ def test_get_file_default_cache(self): header = f.read() assert ":FileType rvi ASCII Raven 2.8.2" in header - def test_open_dataset( - self, - tmp_path, - ): + def test_open_dataset(self, tmp_path, remote_access_teardown): # noqa: F841 cache_dir = tmp_path / "yangtze_cache" ds = open_dataset( name="raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc",