From e38faf5a9d891c7159ebf656ddde3d691f44407a Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Sun, 3 May 2026 12:44:45 -0500 Subject: [PATCH 1/5] Fix Watershed class: persist instance state and populate from get_watershed `Watershed.from_streamstats_json` was a classmethod that assigned to `cls` instead of an instance, so every watershed shared class-level attributes and the method returned the class itself. `Watershed.__init__` called `get_watershed(...)` and discarded the result, so instances had no state of their own. And `get_watershed(format="object")` returned `None` rather than the parsed JSON the docstring promised. `from_streamstats_json` now builds a real instance, `__init__` requests `format="object"` and populates `self`, and `get_watershed(format="object")` returns the parsed dict. `_workspaceID` is preserved as a read-only alias of the new `workspace_id` attribute. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/streamstats.py | 49 +++++++++++++++++-------- tests/streamstats_test.py | 69 ++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 14 deletions(-) create mode 100644 tests/streamstats_test.py diff --git a/dataretrieval/streamstats.py b/dataretrieval/streamstats.py index 7cddabaa..d1d1af9a 100644 --- a/dataretrieval/streamstats.py +++ b/dataretrieval/streamstats.py @@ -136,26 +136,47 @@ def get_watershed( # use Fiona to return a shape object pass + data = json.loads(r.text) + if format == "object": - # return a python object - pass + return data - data = json.loads(r.text) return Watershed.from_streamstats_json(data) class Watershed: - """Class to extract information from the streamstats JSON object.""" + """Class to extract information from the streamstats JSON object. - @classmethod - def from_streamstats_json(cls, streamstats_json): - """Method that creates a Watershed object from a streamstats JSON.""" - cls.watershed_point = streamstats_json["featurecollection"][0]["feature"] - cls.watershed_polygon = streamstats_json["featurecollection"][1]["feature"] - cls.parameters = streamstats_json["parameters"] - cls._workspaceID = streamstats_json["workspaceID"] - return cls + Attributes + ---------- + watershed_point : dict + GeoJSON feature for the watershed pour point. + watershed_polygon : dict + GeoJSON feature for the delineated watershed polygon. + parameters : list + Watershed parameters returned by StreamStats. + workspace_id : str + StreamStats workspace identifier for the watershed. + """ def __init__(self, rcode, xlocation, ylocation): - """Init method that calls the :obj:`from_streamstats_json` method.""" - get_watershed(rcode, xlocation, ylocation) + """Delineate a watershed and populate the instance from the response.""" + data = get_watershed(rcode, xlocation, ylocation, format="object") + self._populate_from_json(data) + + @classmethod + def from_streamstats_json(cls, streamstats_json): + """Construct a :obj:`Watershed` from a StreamStats JSON response.""" + instance = cls.__new__(cls) + instance._populate_from_json(streamstats_json) + return instance + + def _populate_from_json(self, streamstats_json): + self.watershed_point = streamstats_json["featurecollection"][0]["feature"] + self.watershed_polygon = streamstats_json["featurecollection"][1]["feature"] + self.parameters = streamstats_json["parameters"] + self.workspace_id = streamstats_json["workspaceID"] + + @property + def _workspaceID(self): + return self.workspace_id diff --git a/tests/streamstats_test.py b/tests/streamstats_test.py new file mode 100644 index 00000000..7510589b --- /dev/null +++ b/tests/streamstats_test.py @@ -0,0 +1,69 @@ +"""Tests for the streamstats module.""" + +import json + +from dataretrieval.streamstats import Watershed, get_watershed + +SAMPLE_JSON = { + "featurecollection": [ + {"name": "globalwatershedpoint", "feature": {"type": "Feature", "id": "pt-1"}}, + {"name": "globalwatershed", "feature": {"type": "Feature", "id": "poly-1"}}, + ], + "parameters": [{"code": "DRNAREA", "value": 41.2}], + "workspaceID": "NY20240101000000000", +} + + +def test_from_streamstats_json_returns_instance(): + """Watershed.from_streamstats_json must return an instance, not the class.""" + w = Watershed.from_streamstats_json(SAMPLE_JSON) + assert isinstance(w, Watershed) + + +def test_from_streamstats_json_does_not_mutate_class(): + """Two watersheds must not share state via class-level attributes.""" + other_json = { + "featurecollection": [ + {"feature": {"id": "pt-2"}}, + {"feature": {"id": "poly-2"}}, + ], + "parameters": [{"code": "OTHER", "value": 1.0}], + "workspaceID": "VT20240101000000000", + } + w1 = Watershed.from_streamstats_json(SAMPLE_JSON) + w2 = Watershed.from_streamstats_json(other_json) + + assert w1.workspace_id == "NY20240101000000000" + assert w2.workspace_id == "VT20240101000000000" + assert w1.parameters[0]["code"] == "DRNAREA" + assert w2.parameters[0]["code"] == "OTHER" + assert w1.watershed_point["id"] == "pt-1" + assert w2.watershed_point["id"] == "pt-2" + + +def test_get_watershed_object_returns_dict(requests_mock): + """get_watershed(format='object') must return parsed JSON, not None.""" + url = "https://streamstats.usgs.gov/streamstatsservices/watershed.geojson" + requests_mock.get(url, text=json.dumps(SAMPLE_JSON)) + + result = get_watershed("NY", -74.524, 43.939, format="object") + assert isinstance(result, dict) + assert result["workspaceID"] == "NY20240101000000000" + + +def test_watershed_init_populates_instance(requests_mock): + """Watershed(...) must populate the instance (regression: previously discarded).""" + url = "https://streamstats.usgs.gov/streamstatsservices/watershed.geojson" + requests_mock.get(url, text=json.dumps(SAMPLE_JSON)) + + w = Watershed("NY", -74.524, 43.939) + assert w.workspace_id == "NY20240101000000000" + assert w.parameters[0]["code"] == "DRNAREA" + assert w.watershed_point["id"] == "pt-1" + assert w.watershed_polygon["id"] == "poly-1" + + +def test_workspace_id_back_compat_alias(): + """Legacy `_workspaceID` attribute should still resolve.""" + w = Watershed.from_streamstats_json(SAMPLE_JSON) + assert w._workspaceID == w.workspace_id == "NY20240101000000000" From fac1bcce782904dc5d7264eafb23cd731ef70b2c Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 4 May 2026 10:04:50 -0500 Subject: [PATCH 2/5] Document get_watershed format-dependent return type Per copilot review on PR #245. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/streamstats.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/dataretrieval/streamstats.py b/dataretrieval/streamstats.py index d1d1af9a..70048137 100644 --- a/dataretrieval/streamstats.py +++ b/dataretrieval/streamstats.py @@ -105,12 +105,17 @@ def get_watershed( simplify: bool, optional Boolean flag controlling whether or not to simplify the returned result. + format: string, optional + Selects the return shape. ``"geojson"`` (default) returns the raw + ``requests.Response``; ``"object"`` returns the parsed JSON ``dict``; + any other value returns a :obj:`Watershed` instance built from the + parsed JSON. Returns ------- - Watershed: :obj:`dataretrieval.streamstats.Watershed` - Custom object that contains the watershed information as extracted - from the streamstats JSON object. + requests.Response, dict, or :obj:`dataretrieval.streamstats.Watershed` + Watershed information from StreamStats. The exact return type + depends on ``format`` (see above). """ payload = { From b325f9257662141712c65acd6733099aed484835 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 4 May 2026 14:33:29 -0500 Subject: [PATCH 3/5] Tidy streamstats fix per /simplify review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per /simplify review on PR #245: - Replace `json.loads(r.text)` with `r.json()` (matching the convention used in `nldi.py` and `nwis.py`) and drop the now-unused `import json` from `dataretrieval/streamstats.py`. - Remove the dead `format == "shape"` branch — a stale Fiona-stub (`# use Fiona to return a shape object` + `pass`) that silently fell through to `Watershed.from_streamstats_json(data)`. The docstring no longer mentions `"shape"` so removing the branch matches the contract. - Drop four trivial unit tests that each exercised a one-line behavior change obvious from the diff (`from_streamstats_json` returns an instance, `format="object"` returns a dict, `__init__` populates self, `_workspaceID` alias resolves). Keep the load-bearing one: `test_from_streamstats_json_does_not_mutate_class` guards against re-introducing the class-mutation antipattern under refactor. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/streamstats.py | 8 +------- tests/streamstats_test.py | 38 +----------------------------------- 2 files changed, 2 insertions(+), 44 deletions(-) diff --git a/dataretrieval/streamstats.py b/dataretrieval/streamstats.py index 70048137..934143e2 100644 --- a/dataretrieval/streamstats.py +++ b/dataretrieval/streamstats.py @@ -5,8 +5,6 @@ """ -import json - import requests @@ -137,11 +135,7 @@ def get_watershed( if format == "geojson": return r - if format == "shape": - # use Fiona to return a shape object - pass - - data = json.loads(r.text) + data = r.json() if format == "object": return data diff --git a/tests/streamstats_test.py b/tests/streamstats_test.py index 7510589b..31a389f5 100644 --- a/tests/streamstats_test.py +++ b/tests/streamstats_test.py @@ -1,8 +1,6 @@ """Tests for the streamstats module.""" -import json - -from dataretrieval.streamstats import Watershed, get_watershed +from dataretrieval.streamstats import Watershed SAMPLE_JSON = { "featurecollection": [ @@ -14,12 +12,6 @@ } -def test_from_streamstats_json_returns_instance(): - """Watershed.from_streamstats_json must return an instance, not the class.""" - w = Watershed.from_streamstats_json(SAMPLE_JSON) - assert isinstance(w, Watershed) - - def test_from_streamstats_json_does_not_mutate_class(): """Two watersheds must not share state via class-level attributes.""" other_json = { @@ -39,31 +31,3 @@ def test_from_streamstats_json_does_not_mutate_class(): assert w2.parameters[0]["code"] == "OTHER" assert w1.watershed_point["id"] == "pt-1" assert w2.watershed_point["id"] == "pt-2" - - -def test_get_watershed_object_returns_dict(requests_mock): - """get_watershed(format='object') must return parsed JSON, not None.""" - url = "https://streamstats.usgs.gov/streamstatsservices/watershed.geojson" - requests_mock.get(url, text=json.dumps(SAMPLE_JSON)) - - result = get_watershed("NY", -74.524, 43.939, format="object") - assert isinstance(result, dict) - assert result["workspaceID"] == "NY20240101000000000" - - -def test_watershed_init_populates_instance(requests_mock): - """Watershed(...) must populate the instance (regression: previously discarded).""" - url = "https://streamstats.usgs.gov/streamstatsservices/watershed.geojson" - requests_mock.get(url, text=json.dumps(SAMPLE_JSON)) - - w = Watershed("NY", -74.524, 43.939) - assert w.workspace_id == "NY20240101000000000" - assert w.parameters[0]["code"] == "DRNAREA" - assert w.watershed_point["id"] == "pt-1" - assert w.watershed_polygon["id"] == "poly-1" - - -def test_workspace_id_back_compat_alias(): - """Legacy `_workspaceID` attribute should still resolve.""" - w = Watershed.from_streamstats_json(SAMPLE_JSON) - assert w._workspaceID == w.workspace_id == "NY20240101000000000" From f3d1e8f7eca57e0cf47f038120533324aaefe68a Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 4 May 2026 14:43:46 -0500 Subject: [PATCH 4/5] Make `format` contract explicit in get_watershed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously `get_watershed` had two named formats (`"geojson"`, `"object"`) and any other value silently fell through to a `Watershed` — the same bug shape that masked the dead `"shape"` branch removed earlier in this PR. Add an explicit `"watershed"` case and raise `ValueError` on unrecognized values so a typo'd format fails loudly instead of returning whatever the fallthrough is. Also fix `get_sample_watershed`, whose docstring promises a `Watershed` but called `get_watershed(...)` with the default `format="geojson"` (returns the raw `requests.Response`). Pass `format="watershed"` explicitly so the function matches its documented return type. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/streamstats.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/dataretrieval/streamstats.py b/dataretrieval/streamstats.py index 934143e2..672ecd3b 100644 --- a/dataretrieval/streamstats.py +++ b/dataretrieval/streamstats.py @@ -56,7 +56,7 @@ def get_sample_watershed(): from the streamstats JSON object. """ - return get_watershed("NY", -74.524, 43.939) + return get_watershed("NY", -74.524, 43.939, format="watershed") def get_watershed( @@ -106,8 +106,8 @@ def get_watershed( format: string, optional Selects the return shape. ``"geojson"`` (default) returns the raw ``requests.Response``; ``"object"`` returns the parsed JSON ``dict``; - any other value returns a :obj:`Watershed` instance built from the - parsed JSON. + ``"watershed"`` returns a :obj:`Watershed` instance built from the + parsed JSON. Any other value raises ``ValueError``. Returns ------- @@ -140,7 +140,12 @@ def get_watershed( if format == "object": return data - return Watershed.from_streamstats_json(data) + if format == "watershed": + return Watershed.from_streamstats_json(data) + + raise ValueError( + f"Invalid format {format!r}; expected 'geojson', 'object', or 'watershed'." + ) class Watershed: From e9aa572ccc1bb96c2011814f06d9b59444c90d0d Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 4 May 2026 15:03:52 -0500 Subject: [PATCH 5/5] Update get_watershed narrative docstring to match multi-return contract Per Copilot review: the narrative section claimed `get_watershed` "Returns a watershed object", but with the explicit `format` contract the function actually returns a `requests.Response` (default), a `dict`, or a `Watershed` depending on `format`. Rewrite the narrative to describe the three return shapes accurately. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/streamstats.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/dataretrieval/streamstats.py b/dataretrieval/streamstats.py index 672ecd3b..9ae9339c 100644 --- a/dataretrieval/streamstats.py +++ b/dataretrieval/streamstats.py @@ -70,14 +70,13 @@ def get_watershed( simplify=True, format="geojson", ): - """Get watershed object based on location - - **Streamstats documentation:** - Returns a watershed object. The request configuration will determine the - overall request response. However all returns will return a watershed - object with at least the workspaceid. The workspace id is the id to the - service workspace where files are stored and can be used for further - processing such as for downloads and flow statistic computations. + """Delineate a watershed via the StreamStats API. + + Hits the StreamStats ``watershed.geojson`` endpoint and returns the + delineated watershed in one of three shapes selected by ``format``: the + raw ``requests.Response`` (default), the parsed JSON ``dict``, or a + :obj:`Watershed` instance. Every response carries a workspace identifier + that can be passed to :obj:`download_workspace` for further processing. See: https://streamstats.usgs.gov/streamstatsservices/#/ for more information.