From bc3ff2e5db9eca26bd64eafffa433b0419f240bb Mon Sep 17 00:00:00 2001 From: Lorin Gaertner Date: Wed, 14 Jan 2026 16:34:32 -0800 Subject: [PATCH 1/2] added data units to rsdynamics meta --- .../layer_definitions.json | 53 +++++++++++++------ 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/pipelines/rsdynamics_to_athena/layer_definitions.json b/pipelines/rsdynamics_to_athena/layer_definitions.json index 0cc73aa..d0f049f 100644 --- a/pipelines/rsdynamics_to_athena/layer_definitions.json +++ b/pipelines/rsdynamics_to_athena/layer_definitions.json @@ -31,37 +31,47 @@ { "name": "low_lying_floodplain_area", "friendly_name": "Low-Lying Floodplain Area", - "dtype": "double" + "dtype": "double", + "data_unit": "m**2" }, { "name": "low_lying_floodplain_prop", "friendly_name": "Low-Lying Floodplain Proportion", - "dtype": "double" + "dtype": "double", + "data_unit": "dimensionless", + "description": "Ratio of low-lying floodplain to DGO area" }, { "name": "active_channel_area", "friendly_name": "Active Channel Area", - "dtype": "double" + "dtype": "double", + "data_unit": "m**2" }, { "name": "active_channel_prop", "friendly_name": "Active Channel Proportion", - "dtype": "double" + "dtype": "double", + "data_unit": "dimensionless", + "description": "Ratio of active channel to DGO area" }, { "name": "elevated_floodplain_area", "friendly_name": "Elevated Floodplain Area", - "dtype": "double" + "dtype": "double", + "data_unit": "m**2" }, { "name": "elevated_floodplain_prop", "friendly_name": "Elevated Floodplain Proportion", - "dtype": "double" + "dtype": "double", + "data_unit": "dimensionless", + "description": "Ratio of elevated floodplain to DGO area" }, { "name": "floodplain_area", "friendly_name": "Floodplain Area", - "dtype": "double" + "dtype": "double", + "data_unit": "m**2" }, { "name": "floodplain_prop", @@ -71,17 +81,20 @@ { "name": "centerline_length", "friendly_name": "Centerline Length", - "dtype": "double" + "dtype": "double", + "data_unit": "m" }, { "name": "segment_area", "friendly_name": "Segment Area", - "dtype": "double" + "dtype": "double", + "data_unit": "m**2" }, { "name": "integrated_width", "friendly_name": "Integrated Width", - "dtype": "double" + "dtype": "double", + "data_unit": "m" }, { "name": "dgo_geom", @@ -96,7 +109,8 @@ { "name": "rd_project_id", "friendly_name": "RSDynamics Project ID", - "dtype": "string" + "dtype": "string", + "data_unit": "NA" }, { "name": "rd_date_created_ts", @@ -123,12 +137,13 @@ { "layer_id": "rsdynamics_metrics", "layer_name": "RSDynamics Metrics", - "description": "Riverscapes Dynamics Project Metrics data for each DGO, landcover, epoch", + "description": "Riverscapes Dynamics Project Metrics data for each DGO, landcover, epoch. Join with rsdynamics on rd_project_id + dgo_id.", "columns": [ { "name": "dgo_id", "friendly_name": "DGO ID", - "dtype": "bigint" + "dtype": "bigint", + "data_unit": "NA" }, { "name": "landcover", @@ -153,22 +168,26 @@ { "name": "area", "friendly_name": "Area", - "dtype": "double" + "dtype": "double", + "data_unit": "m**2" }, { "name": "areapc", "friendly_name": "Area Percent", - "dtype": "double" + "dtype": "double", + "data_unit": "%" }, { "name": "width", "friendly_name": "Width", - "dtype": "double" + "dtype": "double", + "data_unit": "m" }, { "name": "widthpc", "friendly_name": "Width Percent", - "dtype": "double" + "dtype": "double", + "data_unit": "%" }, { "name": "huc", From e10bb4a8680b4389169267a2d47a3e2579333973 Mon Sep 17 00:00:00 2001 From: Lorin Gaertner Date: Thu, 15 Jan 2026 09:16:00 -0800 Subject: [PATCH 2/2] convert from 2193 to 4326 --- .../rsdynamics_to_athena/rsdynamics_to_athena_parquet.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pipelines/rsdynamics_to_athena/rsdynamics_to_athena_parquet.py b/pipelines/rsdynamics_to_athena/rsdynamics_to_athena_parquet.py index 78aba35..14ba203 100644 --- a/pipelines/rsdynamics_to_athena/rsdynamics_to_athena_parquet.py +++ b/pipelines/rsdynamics_to_athena/rsdynamics_to_athena_parquet.py @@ -177,6 +177,7 @@ def extract_dgo_metrics_to_dataframe(gpkg_path: str, spatialite_path: str) -> pd t2 = time.time() sql = f'SELECT {", ".join(col_names)} FROM vbet_dgos' + log.debug(f"Select query:\n{sql}") df = pd.read_sql_query(sql, conn) log.debug(f"Loaded data from sql to dataframe in {time.time() - t2:.2f}s. Shape {df.shape}") @@ -208,7 +209,8 @@ def extract_dgo_metrics_to_dataframe(gpkg_path: str, spatialite_path: str) -> pd df_final = df_final.pivot_table( index=['fid', 'landcover', 'epoch_length', 'epoch_name', 'confidence'], columns='metric_name', - values='measurement' + values='measurement', + observed=True ).reset_index() log.debug(f"Pivot data {time.time() - t6:.2f}s") @@ -222,6 +224,7 @@ def extract_dgo_metrics_to_dataframe(gpkg_path: str, spatialite_path: str) -> pd def extract_dgos_to_geodataframe(gpkg_path: str, spatialite_path: str) -> gpd.GeoDataFrame: """ Connect to the GeoPackage, run the SQL, and return a GeoDataFrame. + Assumes data are in EPSG:2193 (True for at least one of the New Zealand projects) """ conn = apsw.Connection(gpkg_path) conn.enable_load_extension(True) @@ -265,7 +268,8 @@ def extract_dgos_to_geodataframe(gpkg_path: str, spatialite_path: str) -> gpd.Ge df = df.loc[:, [col for col in df.columns if col != 'dgoid']] # convert wkb geometry to shapely objects df['dgo_geom'] = df['dgo_geom'].apply(wkb.loads) # pyright: ignore[reportCallIssue, reportArgumentType] - gdf = gpd.GeoDataFrame(df, geometry='dgo_geom', crs='EPSG:4326') + gdf = gpd.GeoDataFrame(df, geometry='dgo_geom', crs='EPSG:2193') # SOURCE EPSG + gdf = gdf.to_crs('EPSG:4326') # DESTINATION EPSG bbox_df = gdf.geometry.bounds.rename(columns={'minx': 'xmin', 'miny': 'ymin', 'maxx': 'xmax', 'maxy': 'ymax'}) # Combine into a struct-like dict for each row