From 9a982c082b5a03ea4cf96c9763ac9ec2df15dbeb Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Sun, 3 May 2026 12:56:14 -0500 Subject: [PATCH 1/5] Fix WQP_Metadata.site_info: bind as a real property and forward kwargs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `site_info` was defined inside `WQP_Metadata.__init__` as a nested `@property`-decorated function that was never bound to the class. The decorator returned a property descriptor that was immediately discarded when `__init__` returned, so `md.site_info` always fell through to `BaseMetadata.site_info` and raised `NotImplementedError`. The closure also used `parameters[...]` (the captured `**parameters` kwargs) for the lookup but `self._parameters` for the `in` check — inconsistent. Compounding the issue, every helper called `WQP_Metadata(response)` with no kwargs, so `self._parameters` was always `{}` even after fixing the closure. Move `site_info` to a real property on the class, look up `siteid` first (WQP-native; the previous `sites`/`site`/`site_no` keys reflected an NWIS copy-paste and never matched a WQP query), and thread `**kwargs` through `WQP_Metadata(response, **kwargs)` from all nine call sites so the property has the parameters it needs. Also corrected the docstring: the attribute is `comment` (not `comments`, inherited from `BaseMetadata`), and the `query_time` type is `datetime.timedelta` (not `datetme.timedelta`). Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/wqp.py | 45 ++++++++++++++++++++++---------------------- tests/wqp_test.py | 39 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 22 deletions(-) diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py index 0b53e387..30dc37c9 100644 --- a/dataretrieval/wqp.py +++ b/dataretrieval/wqp.py @@ -155,7 +155,7 @@ def get_results( response = query(url, kwargs, delimiter=";", ssl_check=ssl_check) df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, **kwargs) def what_sites( @@ -210,7 +210,7 @@ def what_sites( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, **kwargs) def what_organizations( @@ -265,7 +265,7 @@ def what_organizations( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, **kwargs) def what_projects(ssl_check=True, legacy=True, **kwargs): @@ -316,7 +316,7 @@ def what_projects(ssl_check=True, legacy=True, **kwargs): df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, **kwargs) def what_activities( @@ -380,7 +380,7 @@ def what_activities( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, **kwargs) def what_detection_limits( @@ -442,7 +442,7 @@ def what_detection_limits( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, **kwargs) def what_habitat_metrics( @@ -497,7 +497,7 @@ def what_habitat_metrics( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, **kwargs) def what_project_weights(ssl_check=True, legacy=True, **kwargs): @@ -553,7 +553,7 @@ def what_project_weights(ssl_check=True, legacy=True, **kwargs): df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, **kwargs) def what_activity_metrics(ssl_check=True, legacy=True, **kwargs): @@ -609,7 +609,7 @@ def what_activity_metrics(ssl_check=True, legacy=True, **kwargs): df = pd.read_csv(StringIO(response.text), delimiter=",") - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, **kwargs) def wqp_url(service): @@ -649,14 +649,15 @@ class WQP_Metadata(BaseMetadata): ---------- url : str Response url - query_time : datetme.timedelta + query_time : datetime.timedelta Response elapsed time header : requests.structures.CaseInsensitiveDict Response headers - comments : None - Metadata comments. WQP does not return comments. - site_info : tuple[pd.DataFrame, NWIS_Metadata] | None - Site information if the query included `sites`, `site` or `site_no`. + comment : None + Metadata comment. WQP does not return comments. + site_info : tuple[pd.DataFrame, WQP_Metadata] | None + Site information if the query included a site filter (`siteid`, + `sites`, `site`, or `site_no`). """ def __init__(self, response, **parameters) -> None: @@ -682,14 +683,14 @@ def __init__(self, response, **parameters) -> None: self._parameters = parameters - @property - def site_info(self): - if "sites" in self._parameters: - return what_sites(sites=parameters["sites"]) - elif "site" in self._parameters: - return what_sites(sites=parameters["site"]) - elif "site_no" in self._parameters: - return what_sites(sites=parameters["site_no"]) + @property + def site_info(self): + if "siteid" in self._parameters: + return what_sites(siteid=self._parameters["siteid"]) + for legacy_key in ("sites", "site", "site_no"): + if legacy_key in self._parameters: + return what_sites(siteid=self._parameters[legacy_key]) + return None def _check_kwargs(kwargs): diff --git a/tests/wqp_test.py b/tests/wqp_test.py index f36558bc..1bba0b89 100644 --- a/tests/wqp_test.py +++ b/tests/wqp_test.py @@ -216,3 +216,42 @@ def test_check_kwargs(): kwargs = {"mimeType": "foo"} with pytest.raises(ValueError): kwargs = _check_kwargs(kwargs) + + +def test_wqp_metadata_site_info_property_resolves(requests_mock): + """`site_info` must be a real property bound to the class. + + Regression: previously `site_info` was defined as a closure inside + `__init__`, so `md.site_info` fell through to `BaseMetadata.site_info` + and raised `NotImplementedError`. Also verify the parameters are + threaded through from `get_results`. + """ + results_url = ( + "https://www.waterqualitydata.us/data/Result/Search?" + "siteid=WIDNR_WQX-10032762&mimeType=csv" + ) + sites_url = ( + "https://www.waterqualitydata.us/data/Station/Search?" + "siteid=WIDNR_WQX-10032762&mimeType=csv" + ) + mock_request(requests_mock, results_url, "tests/data/wqp_results.txt") + mock_request(requests_mock, sites_url, "tests/data/wqp_sites.txt") + + _df, md = get_results(siteid="WIDNR_WQX-10032762") + + # site_info must be a bound property — accessing it should return the + # what_sites() tuple, not raise NotImplementedError. + site_df, site_md = md.site_info + assert isinstance(site_df, DataFrame) + assert site_md.url == sites_url + + +def test_wqp_metadata_site_info_returns_none_without_site_filter(requests_mock): + """`site_info` returns None when no site filter was supplied.""" + results_url = ( + "https://www.waterqualitydata.us/data/Result/Search?" + "characteristicName=Chloride&mimeType=csv" + ) + mock_request(requests_mock, results_url, "tests/data/wqp_results.txt") + _df, md = get_results(characteristicName="Chloride") + assert md.site_info is None From 96950587f6ee492bd67068909f70f751f0501967 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Sun, 3 May 2026 13:02:52 -0500 Subject: [PATCH 2/5] Simplify site_info lookup to a single loop Folds the special-cased `siteid` branch into the loop over candidate keys; semantics are unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/wqp.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py index 30dc37c9..540711e2 100644 --- a/dataretrieval/wqp.py +++ b/dataretrieval/wqp.py @@ -685,11 +685,9 @@ def __init__(self, response, **parameters) -> None: @property def site_info(self): - if "siteid" in self._parameters: - return what_sites(siteid=self._parameters["siteid"]) - for legacy_key in ("sites", "site", "site_no"): - if legacy_key in self._parameters: - return what_sites(siteid=self._parameters[legacy_key]) + for key in ("siteid", "sites", "site", "site_no"): + if key in self._parameters: + return what_sites(siteid=self._parameters[key]) return None From 3f03d3632f475512ba38cecfcdd245d1c655daf7 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 4 May 2026 10:08:37 -0500 Subject: [PATCH 3/5] Forward legacy/ssl_check from query call sites to site_info lookup Per copilot review on PR #249. The site_info property previously called what_sites() with default legacy=True/ssl_check=True, so a WQX3.0 get_results(legacy=False, ...) call produced a legacy Station lookup. WQP_Metadata now persists the originating legacy/ssl_check on the instance and forwards them when site_info is accessed. All nine WQP_Metadata call sites now thread these through. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/wqp.py | 42 +++++++++++++++++++++++++++++------------- tests/wqp_test.py | 24 ++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 13 deletions(-) diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py index 540711e2..11943051 100644 --- a/dataretrieval/wqp.py +++ b/dataretrieval/wqp.py @@ -155,7 +155,7 @@ def get_results( response = query(url, kwargs, delimiter=";", ssl_check=ssl_check) df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response, **kwargs) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_sites( @@ -210,7 +210,7 @@ def what_sites( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response, **kwargs) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_organizations( @@ -265,7 +265,7 @@ def what_organizations( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response, **kwargs) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_projects(ssl_check=True, legacy=True, **kwargs): @@ -316,7 +316,7 @@ def what_projects(ssl_check=True, legacy=True, **kwargs): df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response, **kwargs) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_activities( @@ -380,7 +380,7 @@ def what_activities( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response, **kwargs) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_detection_limits( @@ -442,7 +442,7 @@ def what_detection_limits( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response, **kwargs) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_habitat_metrics( @@ -497,7 +497,7 @@ def what_habitat_metrics( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response, **kwargs) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_project_weights(ssl_check=True, legacy=True, **kwargs): @@ -553,7 +553,7 @@ def what_project_weights(ssl_check=True, legacy=True, **kwargs): df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response, **kwargs) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_activity_metrics(ssl_check=True, legacy=True, **kwargs): @@ -609,7 +609,7 @@ def what_activity_metrics(ssl_check=True, legacy=True, **kwargs): df = pd.read_csv(StringIO(response.text), delimiter=",") - return df, WQP_Metadata(response, **kwargs) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def wqp_url(service): @@ -660,7 +660,9 @@ class WQP_Metadata(BaseMetadata): `sites`, `site`, or `site_no`). """ - def __init__(self, response, **parameters) -> None: + def __init__( + self, response, legacy: bool = True, ssl_check: bool = True, **parameters + ) -> None: """Generates a standard set of metadata informed by the response with specific metadata for WQP data. @@ -668,9 +670,17 @@ def __init__(self, response, **parameters) -> None: ---------- response : Response Response object from requests module - + legacy : bool + Whether the originating request used the legacy WQX endpoint. + Forwarded to ``what_sites`` when ``site_info`` is accessed so the + resolved station metadata uses the same profile as the original + query. + ssl_check : bool + Whether the originating request verified SSL. Forwarded to + ``what_sites`` for consistency. parameters : dict - Unpacked dictionary of the parameters supplied in the request + Unpacked dictionary of the remaining parameters supplied in the + request. Returns ------- @@ -682,12 +692,18 @@ def __init__(self, response, **parameters) -> None: super().__init__(response) self._parameters = parameters + self._legacy = legacy + self._ssl_check = ssl_check @property def site_info(self): for key in ("siteid", "sites", "site", "site_no"): if key in self._parameters: - return what_sites(siteid=self._parameters[key]) + return what_sites( + siteid=self._parameters[key], + legacy=self._legacy, + ssl_check=self._ssl_check, + ) return None diff --git a/tests/wqp_test.py b/tests/wqp_test.py index 1bba0b89..5530b9eb 100644 --- a/tests/wqp_test.py +++ b/tests/wqp_test.py @@ -255,3 +255,27 @@ def test_wqp_metadata_site_info_returns_none_without_site_filter(requests_mock): mock_request(requests_mock, results_url, "tests/data/wqp_results.txt") _df, md = get_results(characteristicName="Chloride") assert md.site_info is None + + +def test_wqp_metadata_site_info_uses_wqx3_when_originating_query_was_wqx3( + requests_mock, +): + """`site_info` must use the same legacy/WQX3.0 profile as the originating query. + + Regression: previously `site_info` always called `what_sites()` with default + `legacy=True`, so a WQX3.0 results query produced a legacy Station lookup. + """ + results_url = ( + "https://www.waterqualitydata.us/wqx3/Result/search?" + "siteid=UTAHDWQ_WQX-4993795&mimeType=csv&dataProfile=fullPhysChem" + ) + sites_wqx3_url = ( + "https://www.waterqualitydata.us/wqx3/Station/search?" + "siteid=UTAHDWQ_WQX-4993795&mimeType=csv" + ) + mock_request(requests_mock, results_url, "tests/data/wqp3_results.txt") + mock_request(requests_mock, sites_wqx3_url, "tests/data/wqp_sites.txt") + + _df, md = get_results(legacy=False, siteid="UTAHDWQ_WQX-4993795") + _site_df, site_md = md.site_info + assert site_md.url == sites_wqx3_url From 7836d120e91c7f0e5fd8054f2c54929c43cd8f95 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 4 May 2026 15:26:43 -0500 Subject: [PATCH 4/5] Tidy WQP_Metadata.site_info per /simplify review Per /simplify review on PR #249: - Switch `site_info` from `@property` to `functools.cached_property`. Repeated access of `md.site_info` (common in notebooks) now reuses the resolved tuple instead of issuing a fresh `what_sites` HTTP call on every read. - Add a one-line comment explaining the legacy-alias -> `siteid` coercion: whichever of `siteid`/`sites`/`site`/`site_no` matched, the value is passed as `siteid` (what_sites' native WQP arg). Also confirmed the docstring's `comments` -> `comment` rename matches `BaseMetadata.comment` (singular) defined at `dataretrieval/utils.py:130`. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/wqp.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py index 3982e394..10edf699 100644 --- a/dataretrieval/wqp.py +++ b/dataretrieval/wqp.py @@ -12,6 +12,7 @@ from __future__ import annotations import warnings +from functools import cached_property from io import StringIO from typing import TYPE_CHECKING @@ -687,8 +688,10 @@ def __init__( self._legacy = legacy self._ssl_check = ssl_check - @property + @cached_property def site_info(self): + # Walk WQP-native key first, then legacy NWIS-style aliases. Whichever + # matched, pass the value as `siteid` -- that's what_sites' native arg. for key in ("siteid", "sites", "site", "site_no"): if key in self._parameters: return what_sites( From 0c0de2ac26cfaafd559b90cc3d9e925dc5d313de Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 4 May 2026 20:31:03 -0500 Subject: [PATCH 5/5] Revert cached_property on WQP_Metadata.site_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WQP_Metadata is not strictly a snapshot — a long-lived `md` reference (paginated workflow, long-running script) would see stale site metadata if `site_info` cached its result. Plain `@property` matches user expectation that an attribute named `site_info` reflects the current state, and the cost of one extra HTTP call on re-read is acceptable. Drop the now-unused `from functools import cached_property`. The coercion comment is retained. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/wqp.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py index 10edf699..5ce8185a 100644 --- a/dataretrieval/wqp.py +++ b/dataretrieval/wqp.py @@ -12,7 +12,6 @@ from __future__ import annotations import warnings -from functools import cached_property from io import StringIO from typing import TYPE_CHECKING @@ -688,7 +687,7 @@ def __init__( self._legacy = legacy self._ssl_check = ssl_check - @cached_property + @property def site_info(self): # Walk WQP-native key first, then legacy NWIS-style aliases. Whichever # matched, pass the value as `siteid` -- that's what_sites' native arg.