From 8fdd2674354eb30bf46fc6a67cdfeec2626028a2 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Sun, 3 May 2026 12:58:13 -0500 Subject: [PATCH 1/4] Fix get_results: preserve user-supplied WQX3.0 dataProfile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The validation block silently overwrote any user-supplied WQX3.0 dataProfile because the `else` clause attached to the *outer* `if "dataProfile" in kwargs and ... not in result_profiles_wqx3` and fired whenever that compound condition was False — including the case where the user passed a *valid* profile like "narrow" or "basicPhysChem". The documented example `dataProfile="narrow"` therefore did not actually request the narrow profile. Restructure the WQX3.0 branch so the default (`fullPhysChem`) is only applied when the user did *not* set `dataProfile` at all. Also raise `ValueError` rather than `TypeError` for invalid profile values (these are bad values, not bad types) and call the constructor with a single formatted message — the previous code passed two separate strings, producing a tuple-args exception. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/wqp.py | 20 +++++++++----------- tests/wqp_test.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py index 0b53e387..5be175c6 100644 --- a/dataretrieval/wqp.py +++ b/dataretrieval/wqp.py @@ -131,22 +131,20 @@ def get_results( "dataProfile" in kwargs and kwargs["dataProfile"] not in result_profiles_legacy ): - raise TypeError( - f"dataProfile {kwargs['dataProfile']} is not a legacy profile.", - f"Valid options are {result_profiles_legacy}.", + raise ValueError( + f"dataProfile {kwargs['dataProfile']!r} is not a legacy profile. " + f"Valid options are {result_profiles_legacy}." ) url = wqp_url("Result") else: - if ( - "dataProfile" in kwargs - and kwargs["dataProfile"] not in result_profiles_wqx3 - ): - raise TypeError( - f"dataProfile {kwargs['dataProfile']} is not a valid WQX3.0" - f"profile. Valid options are {result_profiles_wqx3}.", - ) + if "dataProfile" in kwargs: + if kwargs["dataProfile"] not in result_profiles_wqx3: + raise ValueError( + f"dataProfile {kwargs['dataProfile']!r} is not a valid " + f"WQX3.0 profile. Valid options are {result_profiles_wqx3}." + ) else: kwargs["dataProfile"] = "fullPhysChem" diff --git a/tests/wqp_test.py b/tests/wqp_test.py index f36558bc..2f462b7d 100644 --- a/tests/wqp_test.py +++ b/tests/wqp_test.py @@ -216,3 +216,37 @@ def test_check_kwargs(): kwargs = {"mimeType": "foo"} with pytest.raises(ValueError): kwargs = _check_kwargs(kwargs) + + +def test_get_results_wqx3_preserves_user_dataProfile(requests_mock): + """A valid user-supplied WQX3.0 profile must not be overwritten. + + Regression: previously the `else` branch of the `dataProfile` validation + triggered whenever the value was *not invalid*, including any valid + user-supplied profile, silently overwriting it with 'fullPhysChem'. + """ + request_url = ( + "https://www.waterqualitydata.us/wqx3/Result/search?" + "siteid=UTAHDWQ_WQX-4993795&mimeType=csv&dataProfile=narrow" + ) + response_file_path = "tests/data/wqp3_results.txt" + mock_request(requests_mock, request_url, response_file_path) + + df, _md = get_results( + legacy=False, siteid="UTAHDWQ_WQX-4993795", dataProfile="narrow" + ) + assert isinstance(df, DataFrame) + sent = requests_mock.request_history[-1] + assert sent.qs.get("dataprofile") == ["narrow"] + + +def test_get_results_wqx3_invalid_dataProfile_raises(): + """Invalid WQX3.0 dataProfile raises ValueError, not TypeError.""" + with pytest.raises(ValueError, match="WQX3.0 profile"): + get_results(legacy=False, dataProfile="not_a_real_profile") + + +def test_get_results_legacy_invalid_dataProfile_raises(): + """Invalid legacy dataProfile raises ValueError, not TypeError.""" + with pytest.raises(ValueError, match="legacy profile"): + get_results(legacy=True, dataProfile="not_a_real_profile") From ee67e84e2edb5cbc60dc4a10b2f3424895715e82 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Sun, 3 May 2026 13:03:22 -0500 Subject: [PATCH 2/4] Unify legacy/wqx3 dataProfile validation Selects valid_profiles + url + kind once per branch, then validates and applies the WQX3.0 default in straight-line code. Replaces the 3-deep nesting in the wqx3 branch and the duplicated `dataProfile in kwargs and ... not in profiles` check across both branches. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/wqp.py | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py index 5be175c6..109a0fed 100644 --- a/dataretrieval/wqp.py +++ b/dataretrieval/wqp.py @@ -126,30 +126,24 @@ def get_results( kwargs = _check_kwargs(kwargs) - if legacy is True: - if ( - "dataProfile" in kwargs - and kwargs["dataProfile"] not in result_profiles_legacy - ): - raise ValueError( - f"dataProfile {kwargs['dataProfile']!r} is not a legacy profile. " - f"Valid options are {result_profiles_legacy}." - ) - + if legacy: + valid_profiles = result_profiles_legacy + kind = "legacy" url = wqp_url("Result") - else: - if "dataProfile" in kwargs: - if kwargs["dataProfile"] not in result_profiles_wqx3: - raise ValueError( - f"dataProfile {kwargs['dataProfile']!r} is not a valid " - f"WQX3.0 profile. Valid options are {result_profiles_wqx3}." - ) - else: - kwargs["dataProfile"] = "fullPhysChem" - + valid_profiles = result_profiles_wqx3 + kind = "WQX3.0" url = wqx3_url("Result") + profile = kwargs.get("dataProfile") + if profile is not None and profile not in valid_profiles: + raise ValueError( + f"dataProfile {profile!r} is not a valid {kind} profile. " + f"Valid options are {valid_profiles}." + ) + if not legacy and profile is None: + kwargs["dataProfile"] = "fullPhysChem" + response = query(url, kwargs, delimiter=";", ssl_check=ssl_check) df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) From 9b61575e164d70d3ba4803ba8237abe80521beef Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 4 May 2026 01:58:38 -0500 Subject: [PATCH 3/4] Drop trivial TypeError/ValueError tests for dataProfile validation Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/wqp_test.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/wqp_test.py b/tests/wqp_test.py index 2f462b7d..a337f7ec 100644 --- a/tests/wqp_test.py +++ b/tests/wqp_test.py @@ -238,15 +238,3 @@ def test_get_results_wqx3_preserves_user_dataProfile(requests_mock): assert isinstance(df, DataFrame) sent = requests_mock.request_history[-1] assert sent.qs.get("dataprofile") == ["narrow"] - - -def test_get_results_wqx3_invalid_dataProfile_raises(): - """Invalid WQX3.0 dataProfile raises ValueError, not TypeError.""" - with pytest.raises(ValueError, match="WQX3.0 profile"): - get_results(legacy=False, dataProfile="not_a_real_profile") - - -def test_get_results_legacy_invalid_dataProfile_raises(): - """Invalid legacy dataProfile raises ValueError, not TypeError.""" - with pytest.raises(ValueError, match="legacy profile"): - get_results(legacy=True, dataProfile="not_a_real_profile") From 963342dcbc9dbecb99fde447385b97e611128d10 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 4 May 2026 10:09:06 -0500 Subject: [PATCH 4/4] Use 'is True' check for legacy parity with the rest of wqp.py Per copilot review on PR #250. Other helpers (what_sites, etc.) use 'legacy is True' to gate URL selection, so a non-bool value like 1 or 'false' would otherwise route differently in get_results vs the rest of the module. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/wqp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py index 109a0fed..70d616f9 100644 --- a/dataretrieval/wqp.py +++ b/dataretrieval/wqp.py @@ -126,7 +126,7 @@ def get_results( kwargs = _check_kwargs(kwargs) - if legacy: + if legacy is True: valid_profiles = result_profiles_legacy kind = "legacy" url = wqp_url("Result") @@ -141,7 +141,7 @@ def get_results( f"dataProfile {profile!r} is not a valid {kind} profile. " f"Valid options are {valid_profiles}." ) - if not legacy and profile is None: + if legacy is not True and profile is None: kwargs["dataProfile"] = "fullPhysChem" response = query(url, kwargs, delimiter=";", ssl_check=ssl_check)