From 6ee92f03749c11ebbbe815164e906dbcafefba12 Mon Sep 17 00:00:00 2001 From: mcarans Date: Thu, 22 Jan 2026 18:03:53 +1300 Subject: [PATCH 1/4] Add no_empty parameter to generate_resource --- requirements.txt | 6 ++++-- src/hdx/data/dataset.py | 19 ++++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 48785d5..7abd2d5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -86,6 +86,8 @@ num2words==0.5.14 # via quantulum3 openpyxl==3.1.5 # via hdx-python-utilities +packaging==26.0 + # via wheel petl==1.7.17 # via frictionless ply==3.11 @@ -144,7 +146,7 @@ rpds-py==0.30.0 # referencing ruamel-yaml==0.19.1 # via hdx-python-utilities -setuptools==80.9.0 +setuptools==80.10.1 # via ckanapi shellingham==1.5.4 # via typer @@ -194,7 +196,7 @@ urllib3==2.6.3 # requests validators==0.35.0 # via frictionless -wheel==0.45.1 +wheel==0.46.2 # via libhxl xlrd==2.0.2 # via hdx-python-utilities diff --git a/src/hdx/data/dataset.py b/src/hdx/data/dataset.py index d861662..70a0d0f 100755 --- a/src/hdx/data/dataset.py +++ b/src/hdx/data/dataset.py @@ -2410,6 +2410,7 @@ def generate_resource( datecol: int | str | None = None, yearcol: int | str | None = None, date_function: Callable[[dict], dict | None] | None = None, + no_empty: bool = True, ) -> tuple[bool, dict]: """Write rows to file and create resource, adding it to the dataset. The headers argument is either a row number (rows start counting at 1), or the actual @@ -2440,9 +2441,10 @@ def generate_resource( columns: Columns to write. Defaults to all. format: Format to write. Defaults to csv. encoding: Encoding to use. Defaults to None (infer encoding). - datecol: Optional[Union[int, str]] = None, - yearcol: Optional[Union[int, str]] = None, - date_function: Optional[Callable[[Dict], Optional[Dict]]] = None, + datecol: Date column for setting time period. Defaults to None (don't set). + yearcol: Year column for setting dataset year range. Defaults to None (don't set). + date_function: Date function to call for each row. Defaults to None. + no_empty: Don't generate resource if there are no data rows. Defaults to True. Returns: (True if resource added, dictionary of results) @@ -2505,7 +2507,7 @@ def process_row(row: Sequence | Mapping) -> Sequence | Mapping | None: encoding=encoding, row_function=process_row, ) - if not rows: + if not rows and no_empty: logger.error(f"No data rows in {filename}!") return False, retdict if yearcol is not None or date_function is not None: @@ -2731,6 +2733,7 @@ def download_generate_resource( datecol: int | str | None = None, yearcol: int | str | None = None, date_function: Callable[[dict], dict | None] | None = None, + no_empty: bool = True, **kwargs: Any, ) -> tuple[bool, dict]: """Download url, write rows to csv and create resource, adding to it @@ -2769,9 +2772,10 @@ def download_generate_resource( columns: Columns to write. Defaults to all. format: Format to write. Defaults to csv. encoding: Encoding to use. Defaults to None (infer encoding). - datecol: Optional[Union[int, str]] = None, - yearcol: Optional[Union[int, str]] = None, - date_function: Optional[Callable[[Dict], Optional[Dict]]] = None, + datecol: Date column for setting time period. Defaults to None (don't set). + yearcol: Year column for setting dataset year range. Defaults to None (don't set). + date_function: Date function to call for each row. Defaults to None. + no_empty: Don't generate resource if there are no data rows. Defaults to True. **kwargs: Any additional args to pass to downloader.get_tabular_rows Returns: @@ -2797,6 +2801,7 @@ def download_generate_resource( datecol=datecol, yearcol=yearcol, date_function=date_function, + no_empty=no_empty, ) def download_and_generate_resource( From cdf3479abb1028520aea919c476ce89528e53b5f Mon Sep 17 00:00:00 2001 From: mcarans Date: Fri, 23 Jan 2026 11:56:27 +1300 Subject: [PATCH 2/4] pass no_empty --- src/hdx/data/dataset.py | 3 ++- .../data/test_dataset_resource_generation.py | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/hdx/data/dataset.py b/src/hdx/data/dataset.py index 70a0d0f..ad02d6f 100755 --- a/src/hdx/data/dataset.py +++ b/src/hdx/data/dataset.py @@ -2506,8 +2506,9 @@ def process_row(row: Sequence | Mapping) -> Sequence | Mapping | None: format=format, encoding=encoding, row_function=process_row, + no_empty=no_empty, ) - if not rows and no_empty: + if not rows: logger.error(f"No data rows in {filename}!") return False, retdict if yearcol is not None or date_function is not None: diff --git a/tests/hdx/data/test_dataset_resource_generation.py b/tests/hdx/data/test_dataset_resource_generation.py index 86041b2..980d4c5 100644 --- a/tests/hdx/data/test_dataset_resource_generation.py +++ b/tests/hdx/data/test_dataset_resource_generation.py @@ -500,6 +500,23 @@ def process_year(row): ) assert success is False url = "https://raw.githubusercontent.com/OCHA-DAP/hdx-python-api/main/tests/fixtures/gen_resource/test_data_no_data.csv" + success, results = dataset.download_generate_resource( + downloader, + url, + folder, + filename, + resourcedata, + ) + assert success is False + success, results = dataset.download_generate_resource( + downloader, + url, + folder, + filename, + resourcedata, + no_empty=False, + ) + assert success is True success, results = dataset.download_generate_resource( downloader, url, From ba9344f3e6910bce820a090b98fd6408f3d827db Mon Sep 17 00:00:00 2001 From: mcarans Date: Fri, 23 Jan 2026 11:57:24 +1300 Subject: [PATCH 3/4] pass no_empty --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7abd2d5..7b5525e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -196,7 +196,7 @@ urllib3==2.6.3 # requests validators==0.35.0 # via frictionless -wheel==0.46.2 +wheel==0.46.3 # via libhxl xlrd==2.0.2 # via hdx-python-utilities From bbfad12290d9ef8910f80b214059775a5079a1c2 Mon Sep 17 00:00:00 2001 From: mcarans Date: Fri, 23 Jan 2026 12:13:26 +1300 Subject: [PATCH 4/4] pass no_empty --- pyproject.toml | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 369fe88..f4e7a05 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ dependencies = [ "defopt>=7.0.0", "email_validator", "hdx-python-country>=4.0.1", - "hdx-python-utilities>=4.0.2", + "hdx-python-utilities>=4.0.3", "libhxl>=5.2.2", "makefun", "quantulum3", diff --git a/requirements.txt b/requirements.txt index 7b5525e..ed8eb82 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,7 +36,7 @@ frictionless==5.18.1 # via hdx-python-utilities hdx-python-country==4.0.1 # via hdx-python-api (pyproject.toml) -hdx-python-utilities==4.0.2 +hdx-python-utilities==4.0.3 # via # hdx-python-api (pyproject.toml) # hdx-python-country