diff --git a/.vscode/launch.json b/.vscode/launch.json index c10120a..392ec96 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -56,6 +56,31 @@ "console": "integratedTerminal", "justMyCode": true }, + { + "name": "Apply attribution to projects in bulk", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/scripts/utility/project_attribution/apply_attribution.py", + "cwd": "${workspaceFolder}", + "console": "integratedTerminal", + "envFile": "${workspaceFolder}/.env", + "env": { + "PYTHONPATH": "${workspaceFolder}" + }, + "args": [ + "--stage", + "production", // staging or production + "--mode", + "ADD", // ADD, REPLACE or REMOVE + "--csv-folder", + "{env:CSV_FOLDER}", + "--organization", + "f1b8e6ae-d103-4ffb-b402-a9ee0eaf7607", //NAR on production + "--roles", // OWNER DESIGNER CO_FUNDER etc. + "CO_FUNDER", + "--verbose" + ] + }, { "name": "📦 Merge Projects Tool", "type": "debugpy", @@ -179,7 +204,7 @@ "PYTHONPATH": "${workspaceFolder}" }, "args": [ - "production", + "staging", "{env:CSV_FOLDER}", ] }, diff --git a/README.md b/README.md index 779685f..54be7c0 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,10 @@ This project is designed to simplify interaction with the Riverscapes GraphQL API. It uses modern Python packaging standards, including a `pyproject.toml` file for configuration and dependency management. +### Data Exchange API (GraphQL definitions) + +This project includes a static local copy of ot he riverscapes data exchange API. We do this as we've had trouble getting linter/VSCode introspection to work with the online version. But it means if the API changes, this code needs to be updated. + ## Using UV for Environment Management This project uses [uv](https://github.com/astral-sh/uv) to manage Python virtual environments and dependencies. `uv` is an alternative to tools like `pipenv` and `poetry`. diff --git a/pydex/classes/RiverscapesAPI.py b/pydex/classes/RiverscapesAPI.py index 642a00e..32785ef 100644 --- a/pydex/classes/RiverscapesAPI.py +++ b/pydex/classes/RiverscapesAPI.py @@ -1,4 +1,5 @@ import os +from pathlib import Path from typing import Dict, List, Generator, Tuple import webbrowser import re @@ -325,17 +326,21 @@ def load_query(self, query_name: str) -> str: with open(os.path.join(os.path.dirname(__file__), '..', 'graphql', 'queries', f'{query_name}.graphql'), 'r', encoding='utf-8') as queryFile: return queryFile.read() - def load_mutation(self, mutation_name: str) -> str: - """ Load a mutation file from the file system. + def load_mutation(self, mutation_name: str | Path) -> str: + """ Load a mutation file from the file system graphql/mutations folder or from a specific path. Args: - mutationName (str): _description_ + mutationName (str|Path): name of mutation in library, or Path to .graphql file Returns: - str: _description_ + str: the contents of the file """ - with open(os.path.join(os.path.dirname(__file__), '..', 'graphql', 'mutations', f'{mutation_name}.graphql'), 'r', encoding='utf-8') as queryFile: - return queryFile.read() + if Path(mutation_name).exists(): + mutation_file_path = Path(mutation_name) + else: + mutation_file_path = Path(__file__).parent.parent / 'graphql' / 'mutations' / f'{mutation_name}.graphql' + + return mutation_file_path.read_text(encoding='utf-8') def search(self, search_params: RiverscapesSearchParams, progress_bar: bool = False, page_size: int = 500, sort: List[str] = None, max_results: int = None, search_query_name: str = None) -> Generator[Tuple[RiverscapesProject, Dict, int], None, None]: """ A simple function to make a yielded search on the riverscapes API @@ -542,18 +547,18 @@ def search_count(self, search_params: RiverscapesSearchParams): stats = results['data']['searchProjects']['stats'] return (total, stats) - def run_query(self, query, variables): - """ A simple function to use requests.post to make the API call. Note the json= section. + def run_query(self, query: str, variables: dict) -> dict: + """A simple function to use requests.post to make the API call. Note the json= section. Args: - query (_type_): _description_ - variables (_type_): _description_ + query (str): GraphQL query string + variables (dict): mapping variable names to values Raises: - Exception: _description_ + Exception: RiverscapesAPIException Returns: - _type_: _description_ + dict: parsed JSON response from the API """ headers = {"authorization": "Bearer " + self.access_token} if self.access_token else {} request = requests.post(self.uri, json={ diff --git a/pydex/generate_python_classes_from_graphql_api.py b/pydex/generate_python_classes_from_graphql_api.py new file mode 100644 index 0000000..a0160f6 --- /dev/null +++ b/pydex/generate_python_classes_from_graphql_api.py @@ -0,0 +1,143 @@ +""" +Generate Python TypedDict definitions from a GraphQL schema. + +This script reads the project's 'graphql.config.json' to locate the schema file, +parses it, and generates Python `TypedDict` classes for all InputObjects. and enums. +This allows for type-safe construction of GraphQL mutation payloads. + +Quickly built with copilot/gemini 3 pro (preview) 2026-01-27 by Lorin +NOTE: If we want to go deeper, there are established libraries for this: +* ariadne https://github.com/mirumee/ariadne-codegen/ +* https://github.com/sauldom102/gql_schema_codegen +e.g. Could add types, could make Total=True if all fields are required +""" + +import argparse +from pathlib import Path + +from graphql import ( + EnumTypeDefinitionNode, + InputObjectTypeDefinitionNode, + ListTypeNode, + NamedTypeNode, + NonNullTypeNode, + TypeNode, + parse, +) + + +def get_python_type(type_node: TypeNode) -> str: + """ + Recursively resolve GraphQL types to modern Python type strings. + + Args: + type_node: The GraphQL AST node representing the type. + + Returns: + A string representing the Python type (e.g., 'list[str]', 'int'). + """ + if isinstance(type_node, NonNullTypeNode): + return get_python_type(type_node.type) + + if isinstance(type_node, ListTypeNode): + inner_type = get_python_type(type_node.type) + return f"list[{inner_type}]" + + if isinstance(type_node, NamedTypeNode): + name = type_node.name.value + mapping = { + 'String': 'str', + 'ID': 'str', + 'Boolean': 'bool', + 'Int': 'int', + 'Float': 'float' + } + # Use quotes for forward references to other classes + return mapping.get(name, f"'{name}'") + + return "Any" + + +def generate_types(schema_path: Path, output_path: Path) -> None: + """ + Parse the schema and write Python TypedDict definitions to a file. + + Args: + schema_path: Path to the .graphql schema file. + output_path: Path to the output .py file. + """ + if not schema_path.exists(): + print(f"Error: Schema file not found at {schema_path}") + return + + print(f"Reading schema from: {schema_path}") + print(f"Writing types to: {output_path}") + + with open(schema_path, 'r', encoding='utf-8') as f: + schema_content = f.read() + + doc = parse(schema_content) + + # Ensure output directory exists + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, 'w', encoding='utf-8') as f: + f.write(f'"""\nGenerated from {schema_path.name} using {Path(__file__).name}\n"""\n') + f.write("from typing import TypedDict\n") + f.write("from enum import Enum\n\n\n") + + enum_count = 0 + input_count = 0 + + # Pass 1: generate Enums + for definition in doc.definitions: + if isinstance(definition, EnumTypeDefinitionNode): + enum_count += 1 + name = definition.name.value + f.write(f"class {name}(str, Enum):\n") + if not definition.values: + f.write(" pass\n\n") + continue + + for value_def in definition.values: + val = value_def.name.value + # Handle Python reserved keywords or invalid identifiers if necessary + # For now assume schema values are safe or valid python identifiers + f.write(f" {val} = '{val}'\n") + f.write("\n") + + # Pass 2: generate Input Objects + for definition in doc.definitions: + # We focus on Input types as they are critical for constructing mutation payloads + if isinstance(definition, InputObjectTypeDefinitionNode): + input_count += 1 + name = definition.name.value + f.write(f"class {name}(TypedDict, total=False):\n") + + if not definition.fields: + f.write(" pass\n\n") + continue + + for field in definition.fields: + field_name = field.name.value + python_type = get_python_type(field.type) + f.write(f" {field_name}: {python_type}\n") + f.write("\n") + + print(f"Successfully generated {enum_count} Enums and {input_count} Input Types.") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate Python TypedDicts from GraphQL Schema") + + default_schema = Path("pydex/graphql/riverscapes.schema.graphql") + default_output = Path("pydex/generated_types.py") + + parser.add_argument('--schema', type=Path, default=default_schema, + help='Path to riverscapes.schema.graphql') + parser.add_argument('--output', type=Path, default=default_output, + help='Path to output .py file') + + args = parser.parse_args() + generate_types(args.schema, args.output) + print('DONE.') diff --git a/pydex/generated_types.py b/pydex/generated_types.py new file mode 100644 index 0000000..76277d0 --- /dev/null +++ b/pydex/generated_types.py @@ -0,0 +1,542 @@ +""" +Generated from riverscapes.schema.graphql using generate_python_classes_from_graphql_api.py +""" +from typing import TypedDict +from enum import Enum + + +class AttributionRoleEnum(str, Enum): + ANALYST = 'ANALYST' + CONTRIBUTOR = 'CONTRIBUTOR' + CO_FUNDER = 'CO_FUNDER' + DESIGNER = 'DESIGNER' + FUNDER = 'FUNDER' + OWNER = 'OWNER' + QA_QC = 'QA_QC' + SUPPORTER = 'SUPPORTER' + + +class DatasetContainerTypesEnum(str, Enum): + CommonDatasets = 'CommonDatasets' + Configuration = 'Configuration' + Datasets = 'Datasets' + Inputs = 'Inputs' + Intermediates = 'Intermediates' + Logs = 'Logs' + Outputs = 'Outputs' + Products = 'Products' + + +class DatasetTypeEnum(str, Enum): + AuxInstrumentFile = 'AuxInstrumentFile' + CSV = 'CSV' + ConfigFile = 'ConfigFile' + DEM = 'DEM' + DataTable = 'DataTable' + Database = 'Database' + File = 'File' + Geopackage = 'Geopackage' + HTMLFile = 'HTMLFile' + HillShade = 'HillShade' + Image = 'Image' + InstrumentFile = 'InstrumentFile' + LogFile = 'LogFile' + MSAccessDB = 'MSAccessDB' + PDF = 'PDF' + Raster = 'Raster' + SQLiteDB = 'SQLiteDB' + SurveyQualityDB = 'SurveyQualityDB' + TIN = 'TIN' + Vector = 'Vector' + Video = 'Video' + ZipFile = 'ZipFile' + + +class DateWithinEnum(str, Enum): + ONE_DAY = 'ONE_DAY' + ONE_MONTH = 'ONE_MONTH' + ONE_WEEK = 'ONE_WEEK' + SIX_MONTHS = 'SIX_MONTHS' + + +class EntitiesWithImagesEnum(str, Enum): + COLLECTION = 'COLLECTION' + ORGANIZATION = 'ORGANIZATION' + PROJECT = 'PROJECT' + PROJECT_TYPE = 'PROJECT_TYPE' + SAVED_SEARCH = 'SAVED_SEARCH' + USER = 'USER' + + +class EntityDeleteActionsEnum(str, Enum): + DELETE = 'DELETE' + DELETE_COMPLETE = 'DELETE_COMPLETE' + MAKE_PUBLIC = 'MAKE_PUBLIC' + REQUEST_TRANSFER = 'REQUEST_TRANSFER' + + +class ImageTypeEnum(str, Enum): + AVATAR = 'AVATAR' + HERO = 'HERO' + LOGO = 'LOGO' + + +class JobStatusEnum(str, Enum): + FAILED = 'FAILED' + PROCESSING = 'PROCESSING' + READY = 'READY' + SUCCESS = 'SUCCESS' + UNKNOWN = 'UNKNOWN' + + +class MetaDataExtEnum(str, Enum): + DATASET = 'DATASET' + PROJECT = 'PROJECT' + WAREHOUSE = 'WAREHOUSE' + + +class MetaDataTypeEnum(str, Enum): + BOOLEAN = 'BOOLEAN' + FILEPATH = 'FILEPATH' + FLOAT = 'FLOAT' + GUID = 'GUID' + HIDDEN = 'HIDDEN' + IMAGE = 'IMAGE' + INT = 'INT' + ISODATE = 'ISODATE' + JSON = 'JSON' + MARKDOWN = 'MARKDOWN' + RICHTEXT = 'RICHTEXT' + STRING = 'STRING' + TIMESTAMP = 'TIMESTAMP' + URL = 'URL' + VIDEO = 'VIDEO' + + +class NotificationActionsEnum(str, Enum): + CREATED = 'CREATED' + DELETED = 'DELETED' + RENAMED = 'RENAMED' + TRANSFERRED = 'TRANSFERRED' + UPDATED = 'UPDATED' + + +class NotificationOperationEnum(str, Enum): + DELETE = 'DELETE' + MARK_READ = 'MARK_READ' + MARK_UNREAD = 'MARK_UNREAD' + + +class NotificationTypesEnum(str, Enum): + COLLECTION = 'COLLECTION' + ORGANIZATION = 'ORGANIZATION' + PROJECT = 'PROJECT' + SAVED_SEARCH = 'SAVED_SEARCH' + USER = 'USER' + + +class OrganizationInviteRoleEnum(str, Enum): + ADMIN = 'ADMIN' + CONTRIBUTOR = 'CONTRIBUTOR' + VIEWER = 'VIEWER' + + +class OrganizationInviteStateEnum(str, Enum): + ACCEPTED = 'ACCEPTED' + EXPIRED = 'EXPIRED' + INVITED = 'INVITED' + REJECTED = 'REJECTED' + REQUESTED = 'REQUESTED' + + +class OrganizationRoleEnum(str, Enum): + ADMIN = 'ADMIN' + CONTRIBUTOR = 'CONTRIBUTOR' + NONE = 'NONE' + OWNER = 'OWNER' + VIEWER = 'VIEWER' + + +class OwnerInputTypesEnum(str, Enum): + ORGANIZATION = 'ORGANIZATION' + USER = 'USER' + + +class ProjectDeleteChoicesEnum(str, Enum): + DELETE = 'DELETE' + DELETE_COMPLETE = 'DELETE_COMPLETE' + + +class ProjectGroupVisibilityEnum(str, Enum): + PUBLIC = 'PUBLIC' + SECRET = 'SECRET' + + +class ProjectTreeLayerTypeEnum(str, Enum): + FILE = 'FILE' + LINE = 'LINE' + POINT = 'POINT' + POLYGON = 'POLYGON' + RASTER = 'RASTER' + REPORT = 'REPORT' + TIN = 'TIN' + + +class ProjectTypeStateEnum(str, Enum): + ACTIVE = 'ACTIVE' + DELETED = 'DELETED' + SUGGESTED = 'SUGGESTED' + + +class ProjectVisibilityEnum(str, Enum): + PRIVATE = 'PRIVATE' + PUBLIC = 'PUBLIC' + SECRET = 'SECRET' + + +class QAQCStateEnum(str, Enum): + FAILED = 'FAILED' + PASSED = 'PASSED' + PROVISIONAL = 'PROVISIONAL' + + +class RampTypeEnum(str, Enum): + DISCRETE = 'DISCRETE' + EXACT = 'EXACT' + INTERPOLATED = 'INTERPOLATED' + + +class SearchSortEnum(str, Enum): + AREA_DESC = 'AREA_DESC' + DATE_CREATED_ASC = 'DATE_CREATED_ASC' + DATE_CREATED_DESC = 'DATE_CREATED_DESC' + DATE_UPDATED_ASC = 'DATE_UPDATED_ASC' + DATE_UPDATED_DESC = 'DATE_UPDATED_DESC' + MINE = 'MINE' + MODEL_VERSION_ASC = 'MODEL_VERSION_ASC' + MODEL_VERSION_DESC = 'MODEL_VERSION_DESC' + NAME_ASC = 'NAME_ASC' + NAME_DESC = 'NAME_DESC' + + +class SearchableTypesEnum(str, Enum): + COLLECTION = 'COLLECTION' + ORGANIZATION = 'ORGANIZATION' + PROJECT = 'PROJECT' + SAVED_SEARCH = 'SAVED_SEARCH' + USER = 'USER' + + +class SeverityEnum(str, Enum): + CRITICAL = 'CRITICAL' + DEBUG = 'DEBUG' + ERROR = 'ERROR' + INFO = 'INFO' + WARNING = 'WARNING' + + +class StarrableTypesEnum(str, Enum): + COLLECTION = 'COLLECTION' + ORGANIZATION = 'ORGANIZATION' + PROJECT = 'PROJECT' + SAVED_SEARCH = 'SAVED_SEARCH' + USER = 'USER' + + +class SymbologyStateEnum(str, Enum): + ERROR = 'ERROR' + FETCHING = 'FETCHING' + FOUND = 'FOUND' + MISSING = 'MISSING' + NOT_APPLICABLE = 'NOT_APPLICABLE' + UNKNOWN = 'UNKNOWN' + + +class TileTypesEnum(str, Enum): + HTML = 'HTML' + RASTER = 'RASTER' + VECTOR_GPKG = 'VECTOR_GPKG' + VECTOR_SHP = 'VECTOR_SHP' + + +class TilingStateEnum(str, Enum): + CREATING = 'CREATING' + FETCHING = 'FETCHING' + FETCH_ERROR = 'FETCH_ERROR' + INDEX_NOT_FOUND = 'INDEX_NOT_FOUND' + LAYER_NOT_FOUND = 'LAYER_NOT_FOUND' + NOT_APPLICABLE = 'NOT_APPLICABLE' + NO_GEOMETRIES = 'NO_GEOMETRIES' + QUEUED = 'QUEUED' + SUCCESS = 'SUCCESS' + TILING_ERROR = 'TILING_ERROR' + TIMEOUT = 'TIMEOUT' + UNKNOWN = 'UNKNOWN' + + +class TransferStateEnum(str, Enum): + ACCEPTED = 'ACCEPTED' + EXPIRED = 'EXPIRED' + IN_PROGRESS = 'IN_PROGRESS' + PROPOSED = 'PROPOSED' + REJECTED = 'REJECTED' + + +class TransferrableTypesEnum(str, Enum): + COLLECTION = 'COLLECTION' + ORGANIZATION = 'ORGANIZATION' + PROJECT = 'PROJECT' + USER = 'USER' + + +class CollectionInput(TypedDict, total=False): + citation: str + clearContact: bool + clearHeroImage: bool + contact: 'OwnerInput' + description: str + heroImageToken: str + meta: list['MetaDataInput'] + name: str + summary: str + tags: list[str] + visibility: 'ProjectGroupVisibilityEnum' + + +class DBObjNotificationsInput(TypedDict, total=False): + createdById: str + createdByName: str + createdOn: 'DateTime' + id: str + name: str + summary: str + updatedById: str + updatedByName: str + updatedOn: 'DateTime' + + +class DatasetInput(TypedDict, total=False): + citation: str + description: str + extRef: str + layers: list['DatasetLayerInput'] + localPath: str + meta: list['MetaDataInput'] + name: str + rsXPath: str + summary: str + + +class DatasetLayerInput(TypedDict, total=False): + citation: str + description: str + extRef: str + lyrName: str + meta: list['MetaDataInput'] + name: str + summary: str + + +class DatasetLayerUpdate(TypedDict, total=False): + citation: str + description: str + meta: list['MetaDataInput'] + name: str + summary: str + + +class DatasetUpdate(TypedDict, total=False): + citation: str + description: str + dsId: str + meta: list['MetaDataInput'] + name: str + summary: str + + +class EntityDeletionOptions(TypedDict, total=False): + totalDelete: bool + transfer: 'TransferEntityItemsInput' + + +class FileDownloadMetaInput(TypedDict, total=False): + contentType: str + localPath: str + md5: str + size: 'BigInt' + + +class LinkInput(TypedDict, total=False): + alt: str + href: 'URL' + text: str + + +class MetaDataInput(TypedDict, total=False): + ext: 'MetaDataExtEnum' + key: str + locked: bool + type: 'MetaDataTypeEnum' + value: str + + +class NotificationInput(TypedDict, total=False): + object: 'DBObjNotificationsInput' + subject: 'DBObjNotificationsInput' + type: 'NotificationTypesEnum' + verb: 'NotificationActionsEnum' + + +class OrganizationInput(TypedDict, total=False): + clearLogo: bool + description: str + logoToken: str + meta: list['MetaDataInput'] + name: str + preferences: 'JSONObject' + social: 'SocialLinksInput' + summary: str + + +class OwnerInput(TypedDict, total=False): + id: str + type: 'OwnerInputTypesEnum' + + +class ProfileInput(TypedDict, total=False): + affiliations: list['UserAffiliationInput'] + avatarToken: str + clearAvatar: bool + description: str + jobTitle: str + location: str + meta: list['MetaDataInput'] + name: str + preferences: 'JSONObject' + socialLinks: 'SocialLinksInput' + summary: str + + +class ProjectAttributionInput(TypedDict, total=False): + organizationId: str + roles: list['AttributionRoleEnum'] + + +class ProjectInput(TypedDict, total=False): + archived: bool + attribution: list['ProjectAttributionInput'] + boundsToken: str + citation: str + clearBounds: bool + clearHeroImage: bool + datasets: list['DatasetInput'] + deleteDatasets: list[str] + description: str + heroImageToken: str + meta: list['MetaDataInput'] + name: str + qaqc: list['QAQCEventInput'] + summary: str + tags: list[str] + totalSize: 'BigInt' + visibility: 'ProjectVisibilityEnum' + + +class ProjectSearchParamsInput(TypedDict, total=False): + attributedOrgId: str + bbox: list[float] + boundsId: str + collection: str + createdOn: 'SearchDateInput' + createdWithin: 'DateWithinEnum' + editableOnly: bool + excludeArchived: bool + keywords: str + meta: list['MetaDataInput'] + name: str + ownedBy: 'OwnerInput' + projectTypeId: str + tags: list[str] + updatedOn: 'SearchDateInput' + visibility: 'ProjectVisibilityEnum' + + +class ProjectTypeInput(TypedDict, total=False): + clearLogo: bool + description: str + logoToken: str + meta: list['MetaDataInput'] + name: str + summary: str + url: 'URL' + + +class QAQCEventInput(TypedDict, total=False): + datePerformed: 'DateTime' + description: str + meta: list['MetaDataInput'] + name: str + performedBy: str + state: 'QAQCStateEnum' + summary: str + supportingLinks: list['LinkInput'] + + +class SavedSearchInput(TypedDict, total=False): + citation: str + clearHeroImage: bool + defaultSort: list['SearchSortEnum'] + description: str + heroImageToken: str + meta: list['MetaDataInput'] + name: str + searchParams: 'ProjectSearchParamsInput' + summary: str + tags: list[str] + visibility: 'ProjectGroupVisibilityEnum' + + +class SearchDateInput(TypedDict, total=False): + from: 'DateTime' + to: 'DateTime' + + +class SearchParamsInput(TypedDict, total=False): + createdOn: 'SearchDateInput' + createdWithin: 'DateWithinEnum' + editableOnly: bool + keywords: str + meta: list['MetaDataInput'] + name: str + ownedBy: 'OwnerInput' + tags: list[str] + updatedOn: 'SearchDateInput' + visibility: 'ProjectGroupVisibilityEnum' + + +class SocialLinksInput(TypedDict, total=False): + facebook: str + instagram: str + linkedIn: str + tiktok: str + twitter: str + website: 'URL' + + +class TransferEntityItemsInput(TypedDict, total=False): + note: str + transferTo: 'OwnerInput' + + +class TransferInput(TypedDict, total=False): + includeProjects: bool + note: str + objectIds: list[str] + transferTo: 'OwnerInput' + transferType: 'TransferrableTypesEnum' + + +class UserAffiliationInput(TypedDict, total=False): + affiliationRole: str + name: str + url: 'URL' diff --git a/pyproject.toml b/pyproject.toml index a0f50be..bd802a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,3 +46,22 @@ include-package-data = true [tool.setuptools.package-data] pydex = ["**/*.graphql", "**/*.json"] + +[tool.ruff] +line-length = 240 +target-version = "py312" + +[tool.ruff.format] +quote-style = "preserve" + +[tool.ruff.lint] +# I = isort (import sorting fix) +# E, F = Flake8 (standard errors) +# PL = Pylint (replaces Pylint's rules) +# N = pep8 naming checks +select = ["I", "E", "F", "PL", "N"] + +[tool.ruff.lint.isort] +# Tells Ruff exactly which imports are "local" to your project +known-first-party = ["pydex"] +combine-as-imports = true \ No newline at end of file diff --git a/scripts/utility/project_attribution/apply_attribution.py b/scripts/utility/project_attribution/apply_attribution.py new file mode 100644 index 0000000..650edd2 --- /dev/null +++ b/scripts/utility/project_attribution/apply_attribution.py @@ -0,0 +1,374 @@ +"""Bulk apply project attribution to projects in Data Exchange + +The workhorse function is `apply_attribution`. The inputs are a list of projects, an organization ID and a list of roles. +The main function will help user select a csv file (from a specified folder) containing the list of projects. + +A project's attribution consists of a list of attribution objects, +each of which is an Organization and a list of Roles from the AttributionRoleEnum + +There are three MODES for attribution change: +1. ADD (do not change existing, apply new on top of it) +2. REPLACE (remove any existing attribution and apply new) +3. REMOVE (remove specific attribution but leave all others in place) + +* This currently implements all modes +* in REMOVE mode, it removes _all_ attribution for that organization (leaving other organizations in place) +* It only updates project if there is a change. +* When a project is updated by the script, the project will show as having been UPDATED BY the user running the script (logging into Data Exchange) + +## Example usage: +"Add BLM as funder to all the 2025 CONUS projects." +* Run Athena query to get the IDs of all projects tagged 2025CONUS. `SELECT project_id FROM conus_projects WHERE contains(tags,'2025CONUS')` +* download results as csv e.g. `conusprojects.csv` +* look up the BLM organization ID in Data Exchange: 876d3961-08f2-4db5-aff2-7ccfa391b984 +* Run `apply_attribution --stage production --csv-file conusprojects.csv --organization 876d3961-08f2-4db5-aff2-7ccfa391b984 --role FUNDER --mode ADD` + +Lorin Gaertner +January 2026 + +These classes objects originally came from pydex.generated_types (and could be imported): + AttributionRoleEnum, ProjectAttributionInput, ProjectInput + +Possible enhancements: +* if Organization or roles not provided in command line, use inquirer option to get from user (with multi-select for roles) +* more selective removal option - to remove specific role for an organization +""" + +import argparse +import logging +import uuid +from datetime import datetime +from enum import Enum +from pathlib import Path +from typing import Any, TypedDict + +import inquirer +from rsxml import Logger, ProgressBar, dotenv + +from pydex import RiverscapesAPI + + +class AttributionRoleEnum(str, Enum): + ANALYST = "ANALYST" + CONTRIBUTOR = "CONTRIBUTOR" + CO_FUNDER = "CO_FUNDER" + DESIGNER = "DESIGNER" + FUNDER = "FUNDER" + OWNER = "OWNER" + QA_QC = "QA_QC" + SUPPORTER = "SUPPORTER" + + +class ProjectAttributionInput(TypedDict, total=True): + organizationId: str + roles: list["AttributionRoleEnum"] + + +class ProjectInput(TypedDict, total=False): + archived: bool + attribution: list["ProjectAttributionInput"] + description: str + name: str + summary: str + tags: list[str] + + +# ============================================================================================ + + +class ProjectAttributionOutput(TypedDict): + """Model for what we get back from the API""" + + organization: dict[str, Any] # e.g. {'id': '...', 'name': '...'} + roles: list[str] + + +class UpdateMode(str, Enum): + """Allowed options for attribution changes""" + + ADD = "ADD" + REPLACE = "REPLACE" + REMOVE = "REMOVE" + + +def build_attribution_params() -> tuple[list[str], str, list[str]]: + """Assemble: + * list of projects IDs to apply attribution to + * ProjectAttribution Object Organization ID + * ProjectAttribution Object list of AttributionRoleEnum + """ + return (["73cc1ada-c82b-499e-b3b2-5dc70393e340"], "c3addb86-a96d-4831-99eb-3899764924da", ["ANALYST", "DESIGNER"]) + + +def normalize_api_data(current_data: list[Any]) -> list[ProjectAttributionInput]: + """Helper: Convert raw API Output (Nested Dicts) to Input Format (TypedDict)""" + normalized_list: list[ProjectAttributionInput] = [] + + if not current_data: + return normalized_list + + for item in current_data: + # Safety check for malformed data + if not item.get("organization") or not item["organization"].get("id"): + continue + + normalized_list.append( + { + "organizationId": item["organization"]["id"], + # Convert string roles back to proper Enums + "roles": [AttributionRoleEnum(r) for r in item.get("roles", [])], + } + ) + return normalized_list + + +def is_attribution_equal(list_a: list[ProjectAttributionInput], list_b: list[ProjectAttributionInput]) -> bool: + """Compare two attribution lists. + * Checks length + * Checks Organization ID Match + * Checks Roles (Order agnostic using Sets) + """ + if len(list_a) != len(list_b): + return False + + # We assume the order of organizations matters (e.g. Primary first) + for a, b in zip(list_a, list_b): + if a["organizationId"] != b["organizationId"]: + return False + + # Compare roles as sets to ignore order (['A', 'B'] == ['B', 'A']) + if set(a["roles"]) != set(b["roles"]): + return False + + return True + + +def resolve_attribution_list(current_data: list[ProjectAttributionInput], target_attrib_item: ProjectAttributionInput, mode: UpdateMode) -> list[ProjectAttributionInput]: + """ + Takes the normalized input list, applies logic, returns new list: + * for ADD - adds the specific attribution in target to existing + * for REPLACE - all existing attributions ignored, target returned + * For REMOVE - removes all attribution for the organization specified in target + # TODO: Allow for more targetted removal of a specific role + """ + + # 2. Logic + if mode == UpdateMode.REPLACE: + # Override everything + return [target_attrib_item] + + target_org_id = target_attrib_item["organizationId"] + if mode == UpdateMode.REMOVE: + # Return list without this org + return [x for x in current_data if x["organizationId"] != target_org_id] + + working_list = [x.copy() for x in current_data] + if mode == UpdateMode.ADD: + # check if org exists + existing_index = next((i for i, x in enumerate(working_list) if x["organizationId"] == target_org_id), -1) + if existing_index > -1: + # MERGE: Combine existing roles with new roles (using set to avoid duplicates) + existing_roles = set(working_list[existing_index]["roles"]) + new_roles = set(target_attrib_item["roles"]) + + # Convert back to list and cast to Enum to satisfy TypedDict + merged_roles = [AttributionRoleEnum(r) for r in existing_roles.union(new_roles)] + working_list[existing_index]["roles"] = merged_roles + else: + # APPEND: Add new entry to list + working_list.append(target_attrib_item) + + return working_list + + +def apply_attribution(rs_api: RiverscapesAPI, mode: UpdateMode, project_ids: list[str], org_id: str, roles: list[str]): + """Apply attribution to a project""" + # Project.attribution is an array of [ProjectAttribution!]! + # ProjectAttribution is organization: Organization! , role [AttributionRoleEnum!] + log = Logger("Apply attribution") + log.title("Apply attribution") + mutation_file = Path(__file__).parent / "updateProjectAttribution.graphql" + mutation = rs_api.load_mutation(mutation_file) + get_current_attrib_query_file = Path(__file__).parent / "getProjectAttribution.graphql" + get_current_attrib_query = rs_api.load_mutation(get_current_attrib_query_file) + + target_attrib_item: ProjectAttributionInput = {"organizationId": org_id, "roles": [AttributionRoleEnum(role) for role in roles]} + + updated = 0 + prg = ProgressBar(total=len(project_ids), text="Attributing projects") + for i, project_id in enumerate(project_ids): + log.debug(f"Processing Project ID {project_id}") + # Step 1 .Fetch Current attribution + current_attribution = [] + try: + resp = rs_api.run_query(get_current_attrib_query, {"id": project_id}) + if resp and "data" in resp: + raw_data = resp["data"]["project"].get("attribution", []) + current_attribution = normalize_api_data(raw_data) + log.debug(f"Current attribution: {current_attribution}") + except Exception as e: + log.error(f"Failed to fetch current attribution for {project_id}: {e}") + prg.update(i + 1) + continue + + # Step 2: Calculate desired new attribution state + final_list = resolve_attribution_list(current_attribution, target_attrib_item, mode) + if is_attribution_equal(current_attribution, final_list): + log.debug("No change needed") + else: + project_update: ProjectInput = {"attribution": final_list} + variables = {"projectId": project_id, "project": project_update} + try: + result = rs_api.run_query(mutation, variables) + if result is None: + raise Exception(f"Failed to update project {project_id}. Query returned: {result}") + updated += 1 + log.debug(f"New attribution: {final_list}") + except Exception as e: + log.error(f"Error executing mutation on {project_id}: {e}") + prg.update(i + 1) + prg.finish() + log.info(f"Process complete. {updated} projects updated.") + + +def get_file_from_folder(folder: Path, ext: str = ".csv") -> Path | None: + """prompt user for csv file from within specified folder + returns: path to the chosen file or None otherwise + This could easily be adjusted to get files of + """ + log = Logger("get file from folder") + if not (folder.exists() and folder.is_dir()): + log.error(f"The path {folder} does not exist or is not a directory. Please provide a valid folder with CSV files.") + return + + # Get a list of all CSV files in the specified folder. Do not walk to subfolders. + matching_files = [f for f in folder.iterdir() if f.suffix == ext] + if not matching_files: + log.error(f"No `.{ext}` files found in {folder}. Please provide a valid folder with {ext} files.") + return + + answers = inquirer.prompt([inquirer.List("file_path", message=f"Select a {ext} file to use", choices=matching_files)]) + if not answers: + log.error("No file selected.") + return + csv_path = folder / answers["file_path"] + return csv_path + + +def load_ids_from_csv(csvfile: Path) -> list[str]: + """ + Load a list of GUIDs from a CSV file. + Assumes the file exists and is a valid path. + Ignores the first row if it looks like a header (non-GUID). + Strips whitespace from each entry. + Logs a warning if any non-GUID values are found. + Returns a list of GUID strings only. + """ + log = Logger("load IDs from file") + project_ids = [] + non_guids = [] + lines = csvfile.read_text().splitlines() + for i, line in enumerate(lines): + value = line.strip().strip(",") + # Remove surrounding single or double quotes if present + if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")): + value = value[1:-1] + value = value.strip() + if not value: + continue + try: + # Try to parse as UUID + uuid_obj = uuid.UUID(value) + project_ids.append(str(uuid_obj)) + except (ValueError, AttributeError): + # Ignore first row if it looks like a header + if i == 0: + continue + non_guids.append(value) + if non_guids: + log.warning(f"Found {len(non_guids)} non-GUID values in CSV e.g. {non_guids[0]}. These will not be processed.") + return project_ids + + +def get_organization_name(rs_api: RiverscapesAPI, organization_id: str) -> str | None: + """Look up organization by ID and return its name or None if not found.""" + get_org_qry = """ +query getOrganization($id: ID!) { + organization(id: $id) { + name + } +} +""" + log = Logger("Get organization name") + try: + resp = rs_api.run_query(get_org_qry, {"id": organization_id}) + if resp and "data" in resp and resp["data"].get("organization"): + return resp["data"]["organization"]["name"] + return None + except Exception as e: + log.error(f"No organization found for id {organization_id}: {e}") + return None + + +def main(): + """Main entry point - process arguments""" + parser = argparse.ArgumentParser() + parser.add_argument("--stage", help="Production or staging Data Exchange", type=str, choices=["production", "staging"], default="staging") + parser.add_argument("--mode", type=str, choices=[m.value for m in UpdateMode], default="ADD", help="ADD: Append/Merge, REPLACE: Overwrite, REMOVE: Delete specific org") + # because we use dotenv.parse_args_env we need to parser to get strings rather than path objects + parser.add_argument("--csv-file", help="path to specific csv file with projectIDs to process", type=str) + parser.add_argument("--csv-folder", help="Folder containing CSV files with project IDs, from which a file can be chosen interactively", type=str) + parser.add_argument("--organization", help="GUID for the organization whose attribution will be added or removed", type=str) + parser.add_argument( + "--roles", + nargs="+", + choices=[role.value for role in AttributionRoleEnum], + help="one or more roles to add or replace for the supplied organization and projects e.g. FUNDER OWNER. Ignored for REMOVE mode (all attributions are removed)", + type=str, + ) + parser.add_argument("--yes", "-y", help="Assume yes to all prompts and run without confirmation.", action="store_true") + parser.add_argument("--verbose", "-v", help="Verbose logging output", action="store_true") + # Parse arguments and inquire from user, as needed + args = dotenv.parse_args_env(parser) + log = Logger("Setup") + + datestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + if args.verbose: + log_level = logging.DEBUG + else: + log_level = logging.INFO + log_path = Path.cwd() / f"apply_attribution_{datestamp}.log" + print(f"Logging to {log_path} with level {log_level}.") + log.setup(log_path=log_path, log_level=log_level) + mode_enum = UpdateMode(args.mode) + # get csv_file of projects + csv_file = None + if args.csv_file: + csv_path = Path(args.csv_file) + csv_file = csv_path if csv_path.exists() else None + elif args.csv_folder: + folder_path = Path(args.csv_folder) + csv_file = get_file_from_folder(folder_path) + if not csv_file: + log.error("No file of projects to process provided. Exiting.") + return + project_id_list = load_ids_from_csv(csv_file) + + log.info(f"Connecting to {args.stage} environment") + with RiverscapesAPI(stage=args.stage) as api: + organization_id = args.organization + org_name = get_organization_name(api, organization_id) + if not org_name: + log.error(f"Invalid Organization ID: {organization_id}") + roles = args.roles + log.info(f"Ready to alter attribution using {mode_enum} \n for {organization_id} ({org_name}) \n (ROLES {roles}) \n to {len(project_id_list)} projects \n from {csv_file}.") + # final review for user + if not args.yes: + proceed = inquirer.prompt([inquirer.Confirm("proceed", message="Proceed?", default=True)]) + if not proceed or not proceed.get("proceed", False): + return + apply_attribution(api, mode_enum, project_id_list, organization_id, roles) + + +if __name__ == "__main__": + main() diff --git a/scripts/utility/project_attribution/getProjectAttribution.graphql b/scripts/utility/project_attribution/getProjectAttribution.graphql new file mode 100644 index 0000000..6612526 --- /dev/null +++ b/scripts/utility/project_attribution/getProjectAttribution.graphql @@ -0,0 +1,13 @@ +query getProjectAttribution($id: ID!) { + project(id: $id) { + id + name + attribution { + organization { + id + name + } + roles + } + } +} \ No newline at end of file diff --git a/scripts/utility/project_attribution/updateProjectAttribution.graphql b/scripts/utility/project_attribution/updateProjectAttribution.graphql new file mode 100644 index 0000000..4cf7e21 --- /dev/null +++ b/scripts/utility/project_attribution/updateProjectAttribution.graphql @@ -0,0 +1,11 @@ +mutation updateProjectAttribution($projectId: ID!, $project: ProjectInput!) { + updateProject(project: $project, projectId: $projectId) { + id + attribution { + organization { + name + } + roles + } + } +} \ No newline at end of file