From e3047631f9249ede41c4d400ae2f87be048e1d04 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Wed, 6 May 2026 10:11:35 -0500
Subject: [PATCH 1/7] Add waterdata.get_ratings for USGS stage-discharge
 ratings via STAC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wraps the new Water Data STAC catalog endpoint
(api.waterdata.usgs.gov/stac/v0/search) for stage-discharge rating
curves. Lives in its own module (waterdata/ratings.py) because the
transport layer — STAC search + per-feature RDB download — differs
from the OGC collections used by the rest of the package.

The function:
- Composes a CQL filter from monitoring_location_id and (single-value)
  file_type, mirroring R's logic. Multi-type requests fetch all
  matches and filter URLs client-side.
- Optionally downloads each matching .rdb asset to a user-supplied
  file_path (default: a fresh tempfile.mkdtemp), and parses with the
  existing nwis._read_rdb helper.
- Returns a dict[id -> DataFrame] when download_and_parse=True, or the
  raw list of STAC features when False (cheap "what's available?"
  inspection).

Surfaces a clear ValueError for invalid file_type values and for ISO
8601 durations in `datetime` (the rating-curve service rejects them).

Mirrors R's read_waterdata_ratings in DOI-USGS/dataRetrieval; example
sites and idioms come straight from the R doc.

Tests: 9 unit tests covering filter composition, the two error paths,
and end-to-end search-and-download via requests_mock (single-site,
download_and_parse=False, and multi-type URL filtering).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 NEWS.md                             |   2 +
 dataretrieval/waterdata/__init__.py |   2 +
 dataretrieval/waterdata/ratings.py  | 260 ++++++++++++++++++++++++++++
 tests/waterdata_ratings_test.py     | 154 ++++++++++++++++
 4 files changed, 418 insertions(+)
 create mode 100644 dataretrieval/waterdata/ratings.py
 create mode 100644 tests/waterdata_ratings_test.py

diff --git a/NEWS.md b/NEWS.md
index beabe9d8..7413b820 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,5 @@
+**05/06/2026:** Added `waterdata.get_ratings(...)` — wraps the new Water Data STAC catalog (`api.waterdata.usgs.gov/stac/v0/search`) for USGS stage-discharge rating curves. Returns parsed `exsa` / `base` / `corr` rating tables as a dict of DataFrames keyed by feature ID, or just the list of available STAC features when `download_and_parse=False`. Mirrors R's `read_waterdata_ratings`.
+
 **05/05/2026:** Added `waterdata.get_combined_metadata(...)` — wraps the Water Data API's `combined-metadata` collection, which joins the monitoring-locations catalog with the time-series-metadata catalog and returns one row per (location, parameter, statistic) inventory entry. This is the most flexible "what data is available" endpoint in the API: any location attribute (state, HUC, site type, drainage area, well-construction depth, …) can be combined with any time-series attribute (parameter code, statistic, data type, period of record, …) in a single query. Mirrors R's `read_waterdata_combined_meta`.
 
 **05/05/2026:** Added `waterdata.get_samples_summary(monitoringLocationIdentifier=...)` — wraps the Samples database `/summary/{id}` endpoint, returning per-characteristic result and activity counts plus first / most recent activity dates for a single monitoring location. Useful for taking inventory of available discrete-sample data before pulling observations with `get_samples`.
diff --git a/dataretrieval/waterdata/__init__.py b/dataretrieval/waterdata/__init__.py
index 22fb7d38..b7ff474b 100644
--- a/dataretrieval/waterdata/__init__.py
+++ b/dataretrieval/waterdata/__init__.py
@@ -29,6 +29,7 @@
 )
 from .filters import FILTER_LANG
 from .nearest import get_nearest_continuous
+from .ratings import get_ratings
 from .types import (
     CODE_SERVICES,
     PROFILE_LOOKUP,
@@ -52,6 +53,7 @@
     "get_latest_daily",
     "get_monitoring_locations",
     "get_nearest_continuous",
+    "get_ratings",
     "get_reference_table",
     "get_samples",
     "get_samples_summary",
diff --git a/dataretrieval/waterdata/ratings.py b/dataretrieval/waterdata/ratings.py
new file mode 100644
index 00000000..813fa592
--- /dev/null
+++ b/dataretrieval/waterdata/ratings.py
@@ -0,0 +1,260 @@
+"""USGS rating-curve retrieval via the Water Data STAC catalog.
+
+Wraps ``https://api.waterdata.usgs.gov/stac/v0/search`` and the per-feature
+RDB downloads that follow. The STAC endpoint hosts standard NWIS rating
+files (``exsa``, ``base``, ``corr``) for active streamgages — see the
+service overview at https://api.waterdata.usgs.gov/docs/stac/ and the
+WDFN announcement at https://waterdata.usgs.gov/blog/wdfn-rating-curves/.
+
+This is the discrete analogue to the OGC waterdata getters; it lives in
+its own module because the transport layer (STAC search + RDB download)
+differs from the OGC collections used by the rest of the package.
+
+The R analogue is ``read_waterdata_ratings`` in
+https://github.com/DOI-USGS/dataRetrieval/.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import tempfile
+from typing import Any
+
+import pandas as pd
+import requests
+
+from dataretrieval.nwis import _read_rdb
+
+from .utils import BASE_URL, _default_headers, _format_api_dates
+
+logger = logging.getLogger(__name__)
+
+STAC_URL = f"{BASE_URL}/stac/v0"
+_VALID_FILE_TYPES: tuple[str, ...] = ("exsa", "base", "corr")
+
+
+def _build_filter(
+    monitoring_location_id: str | list[str] | None,
+    file_type: str | None,
+) -> str | None:
+    """Compose the CQL filter sent to STAC ``/search``.
+
+    Mirrors R's logic: only pin ``file_type`` when a single value was given,
+    so a multi-type request returns every matching site and the file-type
+    filtering happens client-side from the per-feature URLs.
+    """
+    parts: list[str] = []
+    if monitoring_location_id is not None:
+        ids = (
+            [monitoring_location_id]
+            if isinstance(monitoring_location_id, str)
+            else list(monitoring_location_id)
+        )
+        joined = "', '".join(ids)
+        parts.append(f"monitoring_location_id IN ('{joined}')")
+    if file_type is not None:
+        parts.append(f"file_type = '{file_type}'")
+    return " AND ".join(parts) if parts else None
+
+
+def _search(
+    filter_str: str | None,
+    datetime_str: str | None,
+    bbox: list[float] | None,
+    limit: int,
+    ssl_check: bool,
+) -> list[dict[str, Any]]:
+    """Run a single STAC ``/search`` request and return its features."""
+    params: dict[str, Any] = {"limit": limit}
+    if filter_str is not None:
+        params["filter"] = filter_str
+    if datetime_str is not None:
+        params["datetime"] = datetime_str
+    if bbox is not None:
+        params["bbox"] = ",".join(str(b) for b in bbox)
+
+    response = requests.get(
+        f"{STAC_URL}/search",
+        params=params,
+        headers=_default_headers(),
+        verify=ssl_check,
+    )
+    response.raise_for_status()
+    return response.json().get("features", [])
+
+
+def _download_and_parse(
+    feature: dict[str, Any],
+    file_path: str,
+    ssl_check: bool,
+) -> pd.DataFrame:
+    """Fetch the feature's data asset, write it to ``file_path``, parse RDB."""
+    url = feature["assets"]["data"]["href"]
+    fid = feature["id"]
+    target = os.path.join(file_path, fid)
+
+    response = requests.get(url, headers=_default_headers(), verify=ssl_check)
+    response.raise_for_status()
+
+    with open(target, "w") as f:
+        f.write(response.text)
+
+    return _read_rdb(response.text)
+
+
+def get_ratings(
+    monitoring_location_id: str | list[str] | None = None,
+    file_type: str | list[str] = "exsa",
+    file_path: str | None = None,
+    datetime: str | list[str] | None = None,
+    bbox: list[float] | None = None,
+    limit: int = 10000,
+    download_and_parse: bool = True,
+    ssl_check: bool = True,
+) -> dict[str, pd.DataFrame] | list[dict[str, Any]]:
+    """Get USGS stage-discharge rating curves from the Water Data STAC catalog.
+
+    Returns the current rating tables for one or more active USGS streamgages.
+    The catalog hosts three file types:
+
+    - ``"exsa"`` — expanded shift-adjusted rating (default). Adds a ``SHIFT``
+      column to ``"base"`` indicating the current shift for each ``INDEP``.
+    - ``"base"`` — three columns: ``INDEP`` (typically gage height, ft);
+      ``DEP`` (typically discharge, ft^3/s); ``STOR`` ("``*``" marks fixed
+      points of the rating).
+    - ``"corr"`` — three columns: ``INDEP``; ``CORR`` (correction for that
+      value); ``CORRINDEP`` (corrected INDEP).
+
+    See https://api.waterdata.usgs.gov/docs/stac/ for the upstream service
+    docs and https://waterdata.usgs.gov/blog/wdfn-rating-curves/ for the
+    background announcement. The R analogue is ``read_waterdata_ratings``
+    in https://github.com/DOI-USGS/dataRetrieval/.
+
+    Parameters
+    ----------
+    monitoring_location_id : string or list of strings, optional
+        One or more identifiers in ``AGENCY-ID`` form (e.g.
+        ``"USGS-01104475"``). If omitted, the spatial / temporal filters
+        determine the result set.
+    file_type : string or list of strings, default ``"exsa"``
+        Which rating file(s) to request. One or more of ``"exsa"``,
+        ``"base"``, ``"corr"``.
+    file_path : string, optional
+        Directory the downloaded RDB files are written to. Defaults to a
+        per-call temporary directory created via :func:`tempfile.mkdtemp`.
+    datetime : string or list of strings, optional
+        STAC ``datetime`` filter — a single date / datetime, or an
+        interval (``"start/end"``, optionally half-bounded with ``..``).
+        ISO 8601 *durations* (``"P1M"``, ``"PT36H"``, …) are **not**
+        supported by the rating-curve service; passing one raises
+        ``ValueError``.
+    bbox : list of numbers, optional
+        Only features whose geometry intersects the bounding box are
+        selected. Format: ``[xmin, ymin, xmax, ymax]`` in CRS 4326
+        (longitude / latitude, west-south-east-north).
+    limit : int, default 10000
+        Page size for the STAC ``/search`` request (capped at 10000).
+    download_and_parse : bool, default ``True``
+        If ``True``, download every matching RDB file and parse it into a
+        ``DataFrame``. If ``False``, return the raw list of STAC feature
+        dicts so the caller can inspect what's available before pulling
+        bytes.
+    ssl_check : bool, default ``True``
+        Verify the server's SSL certificate.
+
+    Returns
+    -------
+    dict[str, pandas.DataFrame] or list[dict]
+        When ``download_and_parse=True`` (the default), a dict keyed by
+        feature ID (e.g. ``"USGS-01104475.exsa.rdb"``) mapping to a parsed
+        ``DataFrame``. When ``download_and_parse=False``, the raw list of
+        STAC feature dicts as returned by the search endpoint.
+
+    Raises
+    ------
+    ValueError
+        For an unrecognized ``file_type`` value or an ISO 8601 duration in
+        ``datetime``.
+
+    Examples
+    --------
+    .. code::
+
+        >>> # Default exsa ratings for two sites
+        >>> ratings = dataretrieval.waterdata.get_ratings(
+        ...     monitoring_location_id=["USGS-01104475", "USGS-01104460"],
+        ...     file_type="exsa",
+        ... )
+        >>> ratings["USGS-01104475.exsa.rdb"].head()
+
+        >>> # Both exsa and corr files for the same two sites
+        >>> ratings = dataretrieval.waterdata.get_ratings(
+        ...     monitoring_location_id=["USGS-01104475", "USGS-01104460"],
+        ...     file_type=["exsa", "corr"],
+        ... )
+
+        >>> # Bounding-box query, listing what's available without downloading
+        >>> features = dataretrieval.waterdata.get_ratings(
+        ...     bbox=[-95.0, 40.0, -92.0, 42.0],
+        ...     download_and_parse=False,
+        ... )
+
+        >>> # Restrict to features modified since seven days ago (no durations)
+        >>> features = dataretrieval.waterdata.get_ratings(
+        ...     bbox=[-95.0, 40.0, -92.0, 42.0],
+        ...     datetime=["2026-04-29", ".."],
+        ...     download_and_parse=False,
+        ... )
+
+    """
+    file_types = [file_type] if isinstance(file_type, str) else list(file_type)
+    invalid = [ft for ft in file_types if ft not in _VALID_FILE_TYPES]
+    if invalid:
+        raise ValueError(
+            f"Invalid file_type {invalid!r}. Valid options: {list(_VALID_FILE_TYPES)}."
+        )
+
+    if datetime is not None:
+        # The rating-curve STAC service rejects ISO 8601 durations; surface a
+        # clear error rather than letting the server return a confusing 4xx.
+        dt_values = datetime if isinstance(datetime, list) else [datetime]
+        if any(v is not None and "P" in str(v).upper() for v in dt_values):
+            raise ValueError(
+                "ISO 8601 durations (e.g. 'P7D') are not supported in "
+                "`datetime` for the rating-curve service. Provide a date or "
+                "interval instead."
+            )
+        datetime_str = _format_api_dates(datetime, date=False)
+    else:
+        datetime_str = None
+
+    # Mirror R: only pin file_type in the server-side filter when one type
+    # is requested. With multiple types, fetch all and filter URLs locally.
+    server_file_type = file_types[0] if len(file_types) == 1 else None
+    filter_str = _build_filter(monitoring_location_id, server_file_type)
+
+    features = _search(filter_str, datetime_str, bbox, limit, ssl_check)
+
+    if not download_and_parse:
+        return features
+
+    if file_path is None:
+        file_path = tempfile.mkdtemp(prefix="dataretrieval-ratings-")
+    os.makedirs(file_path, exist_ok=True)
+
+    out: dict[str, pd.DataFrame] = {}
+    for feature in features:
+        url = feature.get("assets", {}).get("data", {}).get("href", "")
+        # Skip features whose file type wasn't requested (only relevant when
+        # `file_type` is a list — single-type requests are already filtered
+        # server-side).
+        if not any(ft in url for ft in file_types):
+            continue
+        fid = feature["id"]
+        try:
+            out[fid] = _download_and_parse(feature, file_path, ssl_check)
+        except Exception as e:
+            logger.warning("Failed to download / parse %s: %s", fid, e)
+
+    return out
diff --git a/tests/waterdata_ratings_test.py b/tests/waterdata_ratings_test.py
new file mode 100644
index 00000000..5962b363
--- /dev/null
+++ b/tests/waterdata_ratings_test.py
@@ -0,0 +1,154 @@
+import sys
+from urllib.parse import parse_qs, urlsplit
+
+import pandas as pd
+import pytest
+
+if sys.version_info < (3, 10):
+    pytest.skip("Skip entire module on Python < 3.10", allow_module_level=True)
+
+from dataretrieval.waterdata import get_ratings
+from dataretrieval.waterdata.ratings import _build_filter
+
+
+def test_build_filter_single_site_single_type():
+    f = _build_filter("USGS-01104475", "exsa")
+    assert f == "monitoring_location_id IN ('USGS-01104475') AND file_type = 'exsa'"
+
+
+def test_build_filter_multi_site_no_type():
+    f = _build_filter(["USGS-A", "USGS-B"], None)
+    assert f == "monitoring_location_id IN ('USGS-A', 'USGS-B')"
+
+
+def test_build_filter_no_site_single_type():
+    f = _build_filter(None, "corr")
+    assert f == "file_type = 'corr'"
+
+
+def test_build_filter_empty_returns_none():
+    assert _build_filter(None, None) is None
+
+
+def test_get_ratings_rejects_invalid_file_type():
+    with pytest.raises(ValueError, match="Invalid file_type"):
+        get_ratings(monitoring_location_id="USGS-01104475", file_type="bogus")
+
+
+def test_get_ratings_rejects_iso_8601_duration_in_datetime():
+    """STAC ratings doesn't accept ISO 8601 durations; surface a clear error."""
+    with pytest.raises(ValueError, match="durations.*not supported"):
+        get_ratings(
+            monitoring_location_id="USGS-01104475",
+            datetime="P7D",
+        )
+
+
+_SAMPLE_RDB = """\
+# header line one
+# header line two
+agency_cd\tsite_no\tINDEP\tDEP
+5s\t15s\t10n\t10n
+USGS\t01104475\t0.10\t0.0
+USGS\t01104475\t0.20\t0.5
+USGS\t01104475\t0.30\t1.2
+"""
+
+
+def _stub_search_response():
+    return {
+        "features": [
+            {
+                "id": "USGS-01104475.exsa.rdb",
+                "properties": {"file_type": "exsa"},
+                "assets": {
+                    "data": {
+                        "href": "https://api.waterdata.usgs.gov/stac-files/ratings/USGS.01104475.exsa.rdb"
+                    }
+                },
+            }
+        ]
+    }
+
+
+def test_get_ratings_mocked_search_and_download(requests_mock, tmp_path):
+    """End-to-end happy path with mocked STAC search + RDB download."""
+    requests_mock.get(
+        "https://api.waterdata.usgs.gov/stac/v0/search",
+        json=_stub_search_response(),
+    )
+    requests_mock.get(
+        "https://api.waterdata.usgs.gov/stac-files/ratings/USGS.01104475.exsa.rdb",
+        text=_SAMPLE_RDB,
+    )
+
+    out = get_ratings(
+        monitoring_location_id="USGS-01104475",
+        file_type="exsa",
+        file_path=str(tmp_path),
+    )
+    assert "USGS-01104475.exsa.rdb" in out
+    df = out["USGS-01104475.exsa.rdb"]
+    assert isinstance(df, pd.DataFrame)
+    assert {"INDEP", "DEP"}.issubset(df.columns)
+    assert len(df) == 3
+
+    # Server-side filter should pin the single requested file_type.
+    sent = requests_mock.request_history[0]
+    qs = parse_qs(urlsplit(sent.url).query)
+    assert "file_type = 'exsa'" in qs["filter"][0]
+    assert "monitoring_location_id IN ('USGS-01104475')" in qs["filter"][0]
+
+
+def test_get_ratings_download_and_parse_false_returns_features(requests_mock):
+    requests_mock.get(
+        "https://api.waterdata.usgs.gov/stac/v0/search",
+        json=_stub_search_response(),
+    )
+    features = get_ratings(
+        monitoring_location_id="USGS-01104475",
+        download_and_parse=False,
+    )
+    assert isinstance(features, list)
+    assert features[0]["id"] == "USGS-01104475.exsa.rdb"
+
+
+def test_get_ratings_multi_type_filters_urls_locally(requests_mock, tmp_path):
+    """File_type list: server filter omits it; URL filtering is local."""
+    requests_mock.get(
+        "https://api.waterdata.usgs.gov/stac/v0/search",
+        json={
+            "features": [
+                {
+                    "id": "USGS-X.exsa.rdb",
+                    "properties": {"file_type": "exsa"},
+                    "assets": {"data": {"href": "https://x.example/X.exsa.rdb"}},
+                },
+                {
+                    "id": "USGS-X.base.rdb",
+                    "properties": {"file_type": "base"},
+                    "assets": {"data": {"href": "https://x.example/X.base.rdb"}},
+                },
+                {
+                    "id": "USGS-X.corr.rdb",
+                    "properties": {"file_type": "corr"},
+                    "assets": {"data": {"href": "https://x.example/X.corr.rdb"}},
+                },
+            ]
+        },
+    )
+    # Only mock the two URLs we expect to be downloaded.
+    requests_mock.get("https://x.example/X.exsa.rdb", text=_SAMPLE_RDB)
+    requests_mock.get("https://x.example/X.corr.rdb", text=_SAMPLE_RDB)
+
+    out = get_ratings(
+        monitoring_location_id="USGS-X",
+        file_type=["exsa", "corr"],
+        file_path=str(tmp_path),
+    )
+    assert set(out) == {"USGS-X.exsa.rdb", "USGS-X.corr.rdb"}
+
+    # Server-side filter must NOT include file_type for multi-type requests.
+    search_req = requests_mock.request_history[0]
+    qs = parse_qs(urlsplit(search_req.url).query)
+    assert "file_type" not in qs["filter"][0]

From 630cc8c2d93df69d9bb46c8276bbb4422629506c Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Wed, 6 May 2026 10:26:21 -0500
Subject: [PATCH 2/7] get_ratings: surface RDB header as df.attrs and document
 the nwis dep
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two non-functional follow-ups suggested during review of #269:

(1) Document the cross-module reach into nwis._read_rdb. Rating files
    use the same USGS RDB shape as NWIS responses, so the parser is
    already reusable as-is — no refactor of the legacy nwis module is
    needed. Added a comment at the import site explaining why the
    private import is intentional and what to watch for if _read_rdb
    ever moves.

(2) Surface the RDB #-prefixed header block. Each parsed rating frame
    now carries provenance in df.attrs:
      - df.attrs["comment"]: the list of "#"-prefixed header lines
        (rating id, parameter, expansion type, last-shifted timestamp,
        warnings, etc.).
      - df.attrs["url"]: the asset URL it was fetched from.
    R's read_waterdata_ratings exposes the comment block via
    comment(df); pandas's standard `attrs` dict is the Python
    equivalent. Done in ratings.py only — does not touch nwis.

A live spot-check against api.waterdata.usgs.gov on USGS-01104475
exsa shows the 31-line USGS header survives intact (gauge name,
parameter code, rating expansion, etc.). One new unit test pins the
behavior.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 dataretrieval/waterdata/ratings.py | 30 +++++++++++++++++++++++++++---
 tests/waterdata_ratings_test.py    | 25 +++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/dataretrieval/waterdata/ratings.py b/dataretrieval/waterdata/ratings.py
index 813fa592..6e58f1e9 100644
--- a/dataretrieval/waterdata/ratings.py
+++ b/dataretrieval/waterdata/ratings.py
@@ -24,6 +24,11 @@
 import pandas as pd
 import requests
 
+# Rating files use the same USGS RDB shape as NWIS responses (comment
+# block prefixed with ``#``, header row, format-spec row, then tab-separated
+# data), so we reuse the parser already in ``nwis``. ``_read_rdb`` is private;
+# if it ever moves or its contract changes we want a loud failure here, hence
+# the explicit import rather than a copy.
 from dataretrieval.nwis import _read_rdb
 
 from .utils import BASE_URL, _default_headers, _format_api_dates
@@ -84,6 +89,18 @@ def _search(
     return response.json().get("features", [])
 
 
+def _extract_rdb_comment(rdb: str) -> list[str]:
+    """Return the RDB ``#``-prefixed comment block as a list of header lines.
+
+    The comment block carries useful per-rating metadata — rating id,
+    parameter description, expansion type, last-shifted timestamp, etc.
+    R's ``read_waterdata_ratings`` exposes this via ``comment(df)``; we
+    attach it to ``df.attrs["comment"]`` so callers can inspect or log
+    provenance without re-reading the on-disk RDB.
+    """
+    return [line for line in rdb.splitlines() if line.startswith("#")]
+
+
 def _download_and_parse(
     feature: dict[str, Any],
     file_path: str,
@@ -100,7 +117,10 @@ def _download_and_parse(
     with open(target, "w") as f:
         f.write(response.text)
 
-    return _read_rdb(response.text)
+    df = _read_rdb(response.text)
+    df.attrs["comment"] = _extract_rdb_comment(response.text)
+    df.attrs["url"] = url
+    return df
 
 
 def get_ratings(
@@ -168,8 +188,12 @@ def get_ratings(
     dict[str, pandas.DataFrame] or list[dict]
         When ``download_and_parse=True`` (the default), a dict keyed by
         feature ID (e.g. ``"USGS-01104475.exsa.rdb"``) mapping to a parsed
-        ``DataFrame``. When ``download_and_parse=False``, the raw list of
-        STAC feature dicts as returned by the search endpoint.
+        ``DataFrame``. Each frame carries provenance in
+        ``df.attrs["comment"]`` (the RDB ``#``-prefixed header lines, like
+        rating id, parameter, last-shifted timestamp) and
+        ``df.attrs["url"]`` (the asset URL it was fetched from). When
+        ``download_and_parse=False``, the raw list of STAC feature dicts
+        as returned by the search endpoint.
 
     Raises
     ------
diff --git a/tests/waterdata_ratings_test.py b/tests/waterdata_ratings_test.py
index 5962b363..ae17c19b 100644
--- a/tests/waterdata_ratings_test.py
+++ b/tests/waterdata_ratings_test.py
@@ -100,6 +100,31 @@ def test_get_ratings_mocked_search_and_download(requests_mock, tmp_path):
     assert "monitoring_location_id IN ('USGS-01104475')" in qs["filter"][0]
 
 
+def test_get_ratings_attaches_rdb_comment_and_url(requests_mock, tmp_path):
+    """Each parsed frame should carry its RDB header + source URL in df.attrs."""
+    requests_mock.get(
+        "https://api.waterdata.usgs.gov/stac/v0/search",
+        json=_stub_search_response(),
+    )
+    asset_url = (
+        "https://api.waterdata.usgs.gov/stac-files/ratings/USGS.01104475.exsa.rdb"
+    )
+    requests_mock.get(asset_url, text=_SAMPLE_RDB)
+
+    out = get_ratings(
+        monitoring_location_id="USGS-01104475",
+        file_type="exsa",
+        file_path=str(tmp_path),
+    )
+    df = out["USGS-01104475.exsa.rdb"]
+    # The fixture has two `# ...` lines at the top; both should land in attrs.
+    assert df.attrs["comment"] == [
+        "# header line one",
+        "# header line two",
+    ]
+    assert df.attrs["url"] == asset_url
+
+
 def test_get_ratings_download_and_parse_false_returns_features(requests_mock):
     requests_mock.get(
         "https://api.waterdata.usgs.gov/stac/v0/search",

From 2650f509e483d650671c0aea95d2a47177d32f9a Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Wed, 6 May 2026 10:39:40 -0500
Subject: [PATCH 3/7] Address /simplify findings on get_ratings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Six fixes from a multi-agent review pass on PR #269:

1. Narrow the per-feature except handler from a bare Exception to
   (RequestException, ValueError, OSError). The previous catch-all
   would swallow programming bugs (KeyError on a malformed feature,
   AttributeError, ...) and silently drop rows.

2. Escape single quotes in CQL string literals via the standard
   doubling rule. Most monitoring-location IDs cannot contain a
   quote, but the function takes arbitrary strings — defending
   against malformed filters and potential injection regardless.
   New _quote_cql_str helper, applied to monitoring_location_id and
   file_type. New unit test pins the behaviour.

3. Promote file_type to a Literal["exsa", "base", "corr"] and derive
   _VALID_FILE_TYPES from it via typing.get_args, so the runtime
   guard and the type hint can never drift apart.

4. Rename the `datetime` parameter to `time` to match the convention
   used by every sibling waterdata getter (and to stop shadowing the
   stdlib `datetime` module). The parameter is still passed through
   as the STAC `datetime` query string under the hood; that's now
   documented explicitly.

5. Switch the multi-type local filter from a substring check on the
   asset URL to feature["properties"]["file_type"]. Substring
   matching on URLs would false-match if a host or path ever
   contained one of the literal type names; STAC features carry the
   typed property already.

6. Skip the on-disk RDB write when file_path is None. tempfile.mkdtemp
   leaks (no automatic cleanup), and df.attrs["url"] already records
   the source — so by default we now return only the parsed frame.
   Users who want a local copy can pass file_path=...; the contract
   for that path is unchanged.

While here, dropped a "discrete analogue to the OGC waterdata getters"
sentence from the module docstring (it was internal-architecture
WHAT-narrating, not user-relevant guidance).

All 11 ratings tests pass (one new test for the CQL quote-escaping).
Live verification on USGS-01104475 confirms file_path=None still
returns the parsed frame with df.attrs populated, multi-type via
property filter returns the expected 4 tables, and the renamed
`time=` parameter still drives the STAC datetime filter.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 dataretrieval/waterdata/ratings.py | 110 ++++++++++++++++-------------
 tests/waterdata_ratings_test.py    |  12 +++-
 2 files changed, 69 insertions(+), 53 deletions(-)

diff --git a/dataretrieval/waterdata/ratings.py b/dataretrieval/waterdata/ratings.py
index 6e58f1e9..ecb7375f 100644
--- a/dataretrieval/waterdata/ratings.py
+++ b/dataretrieval/waterdata/ratings.py
@@ -6,10 +6,6 @@
 service overview at https://api.waterdata.usgs.gov/docs/stac/ and the
 WDFN announcement at https://waterdata.usgs.gov/blog/wdfn-rating-curves/.
 
-This is the discrete analogue to the OGC waterdata getters; it lives in
-its own module because the transport layer (STAC search + RDB download)
-differs from the OGC collections used by the rest of the package.
-
 The R analogue is ``read_waterdata_ratings`` in
 https://github.com/DOI-USGS/dataRetrieval/.
 """
@@ -18,8 +14,7 @@
 
 import logging
 import os
-import tempfile
-from typing import Any
+from typing import Any, Literal, get_args
 
 import pandas as pd
 import requests
@@ -36,7 +31,19 @@
 logger = logging.getLogger(__name__)
 
 STAC_URL = f"{BASE_URL}/stac/v0"
-_VALID_FILE_TYPES: tuple[str, ...] = ("exsa", "base", "corr")
+
+RATING_FILE_TYPE = Literal["exsa", "base", "corr"]
+_VALID_FILE_TYPES = get_args(RATING_FILE_TYPE)
+
+
+def _quote_cql_str(value: str) -> str:
+    """Escape a string for inclusion in a single-quoted CQL literal.
+
+    CQL escapes a single quote by doubling it. Most monitoring-location IDs
+    can never contain a quote, but the function accepts arbitrary strings,
+    so we defend against malformed filters / injection regardless.
+    """
+    return value.replace("'", "''")
 
 
 def _build_filter(
@@ -47,7 +54,7 @@ def _build_filter(
 
     Mirrors R's logic: only pin ``file_type`` when a single value was given,
     so a multi-type request returns every matching site and the file-type
-    filtering happens client-side from the per-feature URLs.
+    filtering happens client-side from the per-feature properties.
     """
     parts: list[str] = []
     if monitoring_location_id is not None:
@@ -56,16 +63,16 @@ def _build_filter(
             if isinstance(monitoring_location_id, str)
             else list(monitoring_location_id)
         )
-        joined = "', '".join(ids)
+        joined = "', '".join(_quote_cql_str(i) for i in ids)
         parts.append(f"monitoring_location_id IN ('{joined}')")
     if file_type is not None:
-        parts.append(f"file_type = '{file_type}'")
+        parts.append(f"file_type = '{_quote_cql_str(file_type)}'")
     return " AND ".join(parts) if parts else None
 
 
 def _search(
     filter_str: str | None,
-    datetime_str: str | None,
+    time_str: str | None,
     bbox: list[float] | None,
     limit: int,
     ssl_check: bool,
@@ -74,8 +81,8 @@ def _search(
     params: dict[str, Any] = {"limit": limit}
     if filter_str is not None:
         params["filter"] = filter_str
-    if datetime_str is not None:
-        params["datetime"] = datetime_str
+    if time_str is not None:
+        params["datetime"] = time_str
     if bbox is not None:
         params["bbox"] = ",".join(str(b) for b in bbox)
 
@@ -103,19 +110,19 @@ def _extract_rdb_comment(rdb: str) -> list[str]:
 
 def _download_and_parse(
     feature: dict[str, Any],
-    file_path: str,
+    file_path: str | None,
     ssl_check: bool,
 ) -> pd.DataFrame:
-    """Fetch the feature's data asset, write it to ``file_path``, parse RDB."""
+    """Fetch the feature's data asset, parse RDB, optionally persist to disk."""
     url = feature["assets"]["data"]["href"]
     fid = feature["id"]
-    target = os.path.join(file_path, fid)
 
     response = requests.get(url, headers=_default_headers(), verify=ssl_check)
     response.raise_for_status()
 
-    with open(target, "w") as f:
-        f.write(response.text)
+    if file_path is not None:
+        with open(os.path.join(file_path, fid), "w") as f:
+            f.write(response.text)
 
     df = _read_rdb(response.text)
     df.attrs["comment"] = _extract_rdb_comment(response.text)
@@ -125,9 +132,9 @@ def _download_and_parse(
 
 def get_ratings(
     monitoring_location_id: str | list[str] | None = None,
-    file_type: str | list[str] = "exsa",
+    file_type: RATING_FILE_TYPE | list[RATING_FILE_TYPE] = "exsa",
     file_path: str | None = None,
-    datetime: str | list[str] | None = None,
+    time: str | list[str] | None = None,
     bbox: list[float] | None = None,
     limit: int = 10000,
     download_and_parse: bool = True,
@@ -157,18 +164,19 @@ def get_ratings(
         One or more identifiers in ``AGENCY-ID`` form (e.g.
         ``"USGS-01104475"``). If omitted, the spatial / temporal filters
         determine the result set.
-    file_type : string or list of strings, default ``"exsa"``
-        Which rating file(s) to request. One or more of ``"exsa"``,
-        ``"base"``, ``"corr"``.
+    file_type : ``"exsa"``, ``"base"``, ``"corr"``, or a list, default ``"exsa"``
+        Which rating file(s) to request.
     file_path : string, optional
-        Directory the downloaded RDB files are written to. Defaults to a
-        per-call temporary directory created via :func:`tempfile.mkdtemp`.
-    datetime : string or list of strings, optional
-        STAC ``datetime`` filter — a single date / datetime, or an
-        interval (``"start/end"``, optionally half-bounded with ``..``).
-        ISO 8601 *durations* (``"P1M"``, ``"PT36H"``, …) are **not**
-        supported by the rating-curve service; passing one raises
-        ``ValueError``.
+        Directory the downloaded RDB files are written to. If ``None``
+        (the default), the parsed ``DataFrame`` is returned without
+        persisting the bytes to disk; ``df.attrs["url"]`` still records
+        where each rating came from.
+    time : string or list of strings, optional
+        STAC ``datetime`` filter (passed through verbatim under that name)
+        — a single date / datetime, or an interval (``"start/end"``,
+        optionally half-bounded with ``..``). ISO 8601 *durations*
+        (``"P1M"``, ``"PT36H"``, …) are **not** supported by the
+        rating-curve service; passing one raises ``ValueError``.
     bbox : list of numbers, optional
         Only features whose geometry intersects the bounding box are
         selected. Format: ``[xmin, ymin, xmax, ymax]`` in CRS 4326
@@ -199,7 +207,7 @@ def get_ratings(
     ------
     ValueError
         For an unrecognized ``file_type`` value or an ISO 8601 duration in
-        ``datetime``.
+        ``time``.
 
     Examples
     --------
@@ -224,10 +232,10 @@ def get_ratings(
         ...     download_and_parse=False,
         ... )
 
-        >>> # Restrict to features modified since seven days ago (no durations)
+        >>> # Restrict to features in a date range (durations not supported)
         >>> features = dataretrieval.waterdata.get_ratings(
         ...     bbox=[-95.0, 40.0, -92.0, 42.0],
-        ...     datetime=["2026-04-29", ".."],
+        ...     time=["2026-04-29", ".."],
         ...     download_and_parse=False,
         ... )
 
@@ -239,46 +247,46 @@ def get_ratings(
             f"Invalid file_type {invalid!r}. Valid options: {list(_VALID_FILE_TYPES)}."
         )
 
-    if datetime is not None:
+    if time is not None:
         # The rating-curve STAC service rejects ISO 8601 durations; surface a
         # clear error rather than letting the server return a confusing 4xx.
-        dt_values = datetime if isinstance(datetime, list) else [datetime]
-        if any(v is not None and "P" in str(v).upper() for v in dt_values):
+        time_values = time if isinstance(time, list) else [time]
+        if any(v is not None and "P" in str(v).upper() for v in time_values):
             raise ValueError(
                 "ISO 8601 durations (e.g. 'P7D') are not supported in "
-                "`datetime` for the rating-curve service. Provide a date or "
+                "`time` for the rating-curve service. Provide a date or "
                 "interval instead."
             )
-        datetime_str = _format_api_dates(datetime, date=False)
+        time_str = _format_api_dates(time, date=False)
     else:
-        datetime_str = None
+        time_str = None
 
     # Mirror R: only pin file_type in the server-side filter when one type
-    # is requested. With multiple types, fetch all and filter URLs locally.
+    # is requested. With multiple types, fetch all and filter locally.
     server_file_type = file_types[0] if len(file_types) == 1 else None
     filter_str = _build_filter(monitoring_location_id, server_file_type)
 
-    features = _search(filter_str, datetime_str, bbox, limit, ssl_check)
+    features = _search(filter_str, time_str, bbox, limit, ssl_check)
 
     if not download_and_parse:
         return features
 
-    if file_path is None:
-        file_path = tempfile.mkdtemp(prefix="dataretrieval-ratings-")
-    os.makedirs(file_path, exist_ok=True)
+    if file_path is not None:
+        os.makedirs(file_path, exist_ok=True)
 
     out: dict[str, pd.DataFrame] = {}
+    requested = set(file_types)
     for feature in features:
-        url = feature.get("assets", {}).get("data", {}).get("href", "")
-        # Skip features whose file type wasn't requested (only relevant when
-        # `file_type` is a list — single-type requests are already filtered
-        # server-side).
-        if not any(ft in url for ft in file_types):
+        # Multi-type requests skip the server-side file_type filter, so
+        # filter here on the per-feature property (more reliable than
+        # substring-matching the URL).
+        feat_type = feature.get("properties", {}).get("file_type")
+        if feat_type not in requested:
             continue
         fid = feature["id"]
         try:
             out[fid] = _download_and_parse(feature, file_path, ssl_check)
-        except Exception as e:
+        except (requests.RequestException, ValueError, OSError) as e:
             logger.warning("Failed to download / parse %s: %s", fid, e)
 
     return out
diff --git a/tests/waterdata_ratings_test.py b/tests/waterdata_ratings_test.py
index ae17c19b..096e9b61 100644
--- a/tests/waterdata_ratings_test.py
+++ b/tests/waterdata_ratings_test.py
@@ -35,15 +35,23 @@ def test_get_ratings_rejects_invalid_file_type():
         get_ratings(monitoring_location_id="USGS-01104475", file_type="bogus")
 
 
-def test_get_ratings_rejects_iso_8601_duration_in_datetime():
+def test_get_ratings_rejects_iso_8601_duration_in_time():
     """STAC ratings doesn't accept ISO 8601 durations; surface a clear error."""
     with pytest.raises(ValueError, match="durations.*not supported"):
         get_ratings(
             monitoring_location_id="USGS-01104475",
-            datetime="P7D",
+            time="P7D",
         )
 
 
+def test_build_filter_escapes_quotes():
+    """Defends against malformed CQL or injection if an ID contains a quote."""
+    from dataretrieval.waterdata.ratings import _build_filter
+
+    f = _build_filter("USGS-x'-y", None)
+    assert f == "monitoring_location_id IN ('USGS-x''-y')"
+
+
 _SAMPLE_RDB = """\
 # header line one
 # header line two

From ad60f67c9928fee8527a681a10b5655f2e7a5c21 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Wed, 6 May 2026 12:56:44 -0500
Subject: [PATCH 4/7] Tighten get_ratings: idioms, abstraction, readability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Eight small changes; net -16 lines. No behavior change other than the
duration check now using the project's anchored regex instead of an
ad-hoc substring search (small correctness improvement).

1. Reorder so the public `get_ratings` is at the top and helpers
   follow — matches the convention in `nearest.py`.

2. Reuse `_DURATION_RE` from `waterdata.utils` instead of the ad-hoc
   `"P" in str(v).upper()` check. The regex is anchored at start and
   case-correct; the substring check was both too loose (matched any
   string containing 'p'/'P') and case-wrong (ISO 8601 P/PT are
   case-sensitive — uppercasing the input would have masked
   lowercase-p false positives).

3. Extract `_as_list(x)` for the `[x] if isinstance(x, str) else
   list(x)` pattern, which appeared three times (file_type
   normalization, monitoring_location_id in `_build_filter`, time
   values in the duration check).

4. Drop redundant `date=False` on the `_format_api_dates` call (it's
   the default).

5. Collapse the `if time is not None: ... else: time_str = None`
   four-line block into a single ternary.

6. Pre-filter `features` by file_type before the download loop, so
   the loop body is just download-and-handle-errors. The "skip
   features whose file type wasn't requested" comment goes away
   because the list-comprehension above is self-documenting.

7. Drop two narrating comments whose WHY is already captured by the
   helpers' docstrings (`_build_filter` and `_extract_rdb_comment`).

8. Rename `test_get_ratings_multi_type_filters_urls_locally` →
   `..._filters_via_property` — the test name still claimed URL
   filtering after the property-based switch in the previous
   /simplify pass.

All 11 ratings tests pass; live verification on
api.waterdata.usgs.gov confirms file_path=None still attaches
df.attrs, multi-type filtering returns the expected four tables, and
the duration check still rejects "P7D" while accepting "2026-04-29".

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 dataretrieval/waterdata/ratings.py | 237 +++++++++++++----------------
 tests/waterdata_ratings_test.py    |   4 +-
 2 files changed, 112 insertions(+), 129 deletions(-)

diff --git a/dataretrieval/waterdata/ratings.py b/dataretrieval/waterdata/ratings.py
index ecb7375f..d40e7dcb 100644
--- a/dataretrieval/waterdata/ratings.py
+++ b/dataretrieval/waterdata/ratings.py
@@ -14,19 +14,17 @@
 
 import logging
 import os
-from typing import Any, Literal, get_args
+from typing import Any, Iterable, Literal, get_args
 
 import pandas as pd
 import requests
 
-# Rating files use the same USGS RDB shape as NWIS responses (comment
-# block prefixed with ``#``, header row, format-spec row, then tab-separated
-# data), so we reuse the parser already in ``nwis``. ``_read_rdb`` is private;
-# if it ever moves or its contract changes we want a loud failure here, hence
-# the explicit import rather than a copy.
+# Rating files use the same USGS RDB shape as NWIS responses, so we reuse
+# the parser already in ``nwis``. ``_read_rdb`` is private; if it ever
+# moves we want a loud failure here, hence the explicit import.
 from dataretrieval.nwis import _read_rdb
 
-from .utils import BASE_URL, _default_headers, _format_api_dates
+from .utils import _DURATION_RE, BASE_URL, _default_headers, _format_api_dates
 
 logger = logging.getLogger(__name__)
 
@@ -36,100 +34,6 @@
 _VALID_FILE_TYPES = get_args(RATING_FILE_TYPE)
 
 
-def _quote_cql_str(value: str) -> str:
-    """Escape a string for inclusion in a single-quoted CQL literal.
-
-    CQL escapes a single quote by doubling it. Most monitoring-location IDs
-    can never contain a quote, but the function accepts arbitrary strings,
-    so we defend against malformed filters / injection regardless.
-    """
-    return value.replace("'", "''")
-
-
-def _build_filter(
-    monitoring_location_id: str | list[str] | None,
-    file_type: str | None,
-) -> str | None:
-    """Compose the CQL filter sent to STAC ``/search``.
-
-    Mirrors R's logic: only pin ``file_type`` when a single value was given,
-    so a multi-type request returns every matching site and the file-type
-    filtering happens client-side from the per-feature properties.
-    """
-    parts: list[str] = []
-    if monitoring_location_id is not None:
-        ids = (
-            [monitoring_location_id]
-            if isinstance(monitoring_location_id, str)
-            else list(monitoring_location_id)
-        )
-        joined = "', '".join(_quote_cql_str(i) for i in ids)
-        parts.append(f"monitoring_location_id IN ('{joined}')")
-    if file_type is not None:
-        parts.append(f"file_type = '{_quote_cql_str(file_type)}'")
-    return " AND ".join(parts) if parts else None
-
-
-def _search(
-    filter_str: str | None,
-    time_str: str | None,
-    bbox: list[float] | None,
-    limit: int,
-    ssl_check: bool,
-) -> list[dict[str, Any]]:
-    """Run a single STAC ``/search`` request and return its features."""
-    params: dict[str, Any] = {"limit": limit}
-    if filter_str is not None:
-        params["filter"] = filter_str
-    if time_str is not None:
-        params["datetime"] = time_str
-    if bbox is not None:
-        params["bbox"] = ",".join(str(b) for b in bbox)
-
-    response = requests.get(
-        f"{STAC_URL}/search",
-        params=params,
-        headers=_default_headers(),
-        verify=ssl_check,
-    )
-    response.raise_for_status()
-    return response.json().get("features", [])
-
-
-def _extract_rdb_comment(rdb: str) -> list[str]:
-    """Return the RDB ``#``-prefixed comment block as a list of header lines.
-
-    The comment block carries useful per-rating metadata — rating id,
-    parameter description, expansion type, last-shifted timestamp, etc.
-    R's ``read_waterdata_ratings`` exposes this via ``comment(df)``; we
-    attach it to ``df.attrs["comment"]`` so callers can inspect or log
-    provenance without re-reading the on-disk RDB.
-    """
-    return [line for line in rdb.splitlines() if line.startswith("#")]
-
-
-def _download_and_parse(
-    feature: dict[str, Any],
-    file_path: str | None,
-    ssl_check: bool,
-) -> pd.DataFrame:
-    """Fetch the feature's data asset, parse RDB, optionally persist to disk."""
-    url = feature["assets"]["data"]["href"]
-    fid = feature["id"]
-
-    response = requests.get(url, headers=_default_headers(), verify=ssl_check)
-    response.raise_for_status()
-
-    if file_path is not None:
-        with open(os.path.join(file_path, fid), "w") as f:
-            f.write(response.text)
-
-    df = _read_rdb(response.text)
-    df.attrs["comment"] = _extract_rdb_comment(response.text)
-    df.attrs["url"] = url
-    return df
-
-
 def get_ratings(
     monitoring_location_id: str | list[str] | None = None,
     file_type: RATING_FILE_TYPE | list[RATING_FILE_TYPE] = "exsa",
@@ -240,29 +144,22 @@ def get_ratings(
         ... )
 
     """
-    file_types = [file_type] if isinstance(file_type, str) else list(file_type)
+    file_types = _as_list(file_type)
     invalid = [ft for ft in file_types if ft not in _VALID_FILE_TYPES]
     if invalid:
         raise ValueError(
-            f"Invalid file_type {invalid!r}. Valid options: {list(_VALID_FILE_TYPES)}."
+            f"Invalid file_type {invalid!r}; "
+            f"valid options are {list(_VALID_FILE_TYPES)}."
+        )
+
+    if time is not None and any(_DURATION_RE.match(str(v)) for v in _as_list(time)):
+        raise ValueError(
+            "ISO 8601 durations (e.g. 'P7D') are not supported in `time` "
+            "for the rating-curve service. Provide a date or interval instead."
         )
+    time_str = _format_api_dates(time) if time is not None else None
 
-    if time is not None:
-        # The rating-curve STAC service rejects ISO 8601 durations; surface a
-        # clear error rather than letting the server return a confusing 4xx.
-        time_values = time if isinstance(time, list) else [time]
-        if any(v is not None and "P" in str(v).upper() for v in time_values):
-            raise ValueError(
-                "ISO 8601 durations (e.g. 'P7D') are not supported in "
-                "`time` for the rating-curve service. Provide a date or "
-                "interval instead."
-            )
-        time_str = _format_api_dates(time, date=False)
-    else:
-        time_str = None
-
-    # Mirror R: only pin file_type in the server-side filter when one type
-    # is requested. With multiple types, fetch all and filter locally.
+    # Mirror R: pin file_type server-side only when one type is requested.
     server_file_type = file_types[0] if len(file_types) == 1 else None
     filter_str = _build_filter(monitoring_location_id, server_file_type)
 
@@ -271,18 +168,16 @@ def get_ratings(
     if not download_and_parse:
         return features
 
+    requested = set(file_types)
+    matching = [
+        f for f in features if f.get("properties", {}).get("file_type") in requested
+    ]
+
     if file_path is not None:
         os.makedirs(file_path, exist_ok=True)
 
     out: dict[str, pd.DataFrame] = {}
-    requested = set(file_types)
-    for feature in features:
-        # Multi-type requests skip the server-side file_type filter, so
-        # filter here on the per-feature property (more reliable than
-        # substring-matching the URL).
-        feat_type = feature.get("properties", {}).get("file_type")
-        if feat_type not in requested:
-            continue
+    for feature in matching:
         fid = feature["id"]
         try:
             out[fid] = _download_and_parse(feature, file_path, ssl_check)
@@ -290,3 +185,91 @@ def get_ratings(
             logger.warning("Failed to download / parse %s: %s", fid, e)
 
     return out
+
+
+def _as_list(x: str | Iterable[str]) -> list[str]:
+    """Normalize a string or iterable-of-strings to a list."""
+    return [x] if isinstance(x, str) else list(x)
+
+
+def _quote_cql_str(value: str) -> str:
+    """Escape a single-quoted CQL literal by doubling embedded quotes.
+
+    Defends against malformed filters / injection on arbitrary user input,
+    even though valid USGS monitoring-location IDs cannot contain a quote.
+    """
+    return value.replace("'", "''")
+
+
+def _build_filter(
+    monitoring_location_id: str | list[str] | None,
+    file_type: str | None,
+) -> str | None:
+    """Compose the CQL filter sent to STAC ``/search``.
+
+    Returns ``None`` when neither argument constrains the search.
+    """
+    parts: list[str] = []
+    if monitoring_location_id is not None:
+        ids = _as_list(monitoring_location_id)
+        joined = "', '".join(_quote_cql_str(i) for i in ids)
+        parts.append(f"monitoring_location_id IN ('{joined}')")
+    if file_type is not None:
+        parts.append(f"file_type = '{_quote_cql_str(file_type)}'")
+    return " AND ".join(parts) if parts else None
+
+
+def _search(
+    filter_str: str | None,
+    time_str: str | None,
+    bbox: list[float] | None,
+    limit: int,
+    ssl_check: bool,
+) -> list[dict[str, Any]]:
+    """Run a single STAC ``/search`` request and return its features."""
+    params: dict[str, Any] = {"limit": limit}
+    if filter_str is not None:
+        params["filter"] = filter_str
+    if time_str is not None:
+        params["datetime"] = time_str
+    if bbox is not None:
+        params["bbox"] = ",".join(map(str, bbox))
+
+    response = requests.get(
+        f"{STAC_URL}/search",
+        params=params,
+        headers=_default_headers(),
+        verify=ssl_check,
+    )
+    response.raise_for_status()
+    return response.json().get("features", [])
+
+
+def _extract_rdb_comment(rdb: str) -> list[str]:
+    """Return the RDB ``#``-prefixed comment block as a list of header lines.
+
+    Carries useful per-rating provenance — rating id, parameter description,
+    expansion type, last-shifted timestamp, etc. R's ``read_waterdata_ratings``
+    exposes the equivalent via ``comment(df)``.
+    """
+    return [line for line in rdb.splitlines() if line.startswith("#")]
+
+
+def _download_and_parse(
+    feature: dict[str, Any],
+    file_path: str | None,
+    ssl_check: bool,
+) -> pd.DataFrame:
+    """Fetch the feature's data asset, parse RDB, optionally persist to disk."""
+    url = feature["assets"]["data"]["href"]
+    response = requests.get(url, headers=_default_headers(), verify=ssl_check)
+    response.raise_for_status()
+
+    if file_path is not None:
+        with open(os.path.join(file_path, feature["id"]), "w") as f:
+            f.write(response.text)
+
+    df = _read_rdb(response.text)
+    df.attrs["comment"] = _extract_rdb_comment(response.text)
+    df.attrs["url"] = url
+    return df
diff --git a/tests/waterdata_ratings_test.py b/tests/waterdata_ratings_test.py
index 096e9b61..88a0c639 100644
--- a/tests/waterdata_ratings_test.py
+++ b/tests/waterdata_ratings_test.py
@@ -146,8 +146,8 @@ def test_get_ratings_download_and_parse_false_returns_features(requests_mock):
     assert features[0]["id"] == "USGS-01104475.exsa.rdb"
 
 
-def test_get_ratings_multi_type_filters_urls_locally(requests_mock, tmp_path):
-    """File_type list: server filter omits it; URL filtering is local."""
+def test_get_ratings_multi_type_filters_via_property(requests_mock, tmp_path):
+    """File_type list: server filter omits it; local filter reads the property."""
     requests_mock.get(
         "https://api.waterdata.usgs.gov/stac/v0/search",
         json={

From 6dc6b64dfe496dd0f6c33938d84aafd3ebb1da70 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Wed, 6 May 2026 13:01:44 -0500
Subject: [PATCH 5/7] Address /simplify (third pass) ruff advisory findings on
 PR #269
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two test-only fixes from a broader-ruff (--select ALL) pass. Both
already-passed-CI changes are advisory under the project's narrower
rule set (F, E, W, I, UP, B, Q, SIM, TID, C90, E501) but worth
applying as cheap correctness/cleanliness wins.

1. RUF043: prefix the pytest.raises(match=...) pattern with `r` so
   the `.*` metacharacters are explicit. Functionally equivalent
   here (pytest re.searches the message), but the raw string makes
   the regex intent unambiguous.

2. PLC0415: remove a redundant inline `from
   dataretrieval.waterdata.ratings import _build_filter` inside
   test_build_filter_escapes_quotes — `_build_filter` is already
   imported at module top (left over from before the top-level
   import landed in an earlier commit on this branch).

Other broader-ruff findings reviewed and intentionally skipped as
either project-wide patterns or non-actionable:

  - S113 (`requests.get` without `timeout=`): no other call in the
    codebase uses one — applying here only would be inconsistent.
  - PLW1514 (`open(..., "w")` without `encoding=`): same project-
    wide situation. RDB content is ASCII.
  - PLR0913 / PLR0917 (8 args on get_ratings): mirrors R's
    read_waterdata_ratings signature; can't reduce without breaking
    parity.
  - FBT001 / FBT002 (boolean positional args
    download_and_parse / ssl_check): match the rest of waterdata's
    public surface; reordering would be a breaking API change.
  - FURB103 / PTH103 / PTH118 / PTH123 (pathlib over os.path): the
    rest of the codebase uses os.path; consistency wins.
  - EM101 / EM102 / TRY003 (raise ValueError(msg) with msg
    pre-assigned): every other `raise ValueError` in waterdata uses
    the inline f-string; consistency wins.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 tests/waterdata_ratings_test.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/waterdata_ratings_test.py b/tests/waterdata_ratings_test.py
index 88a0c639..fcead65d 100644
--- a/tests/waterdata_ratings_test.py
+++ b/tests/waterdata_ratings_test.py
@@ -37,7 +37,7 @@ def test_get_ratings_rejects_invalid_file_type():
 
 def test_get_ratings_rejects_iso_8601_duration_in_time():
     """STAC ratings doesn't accept ISO 8601 durations; surface a clear error."""
-    with pytest.raises(ValueError, match="durations.*not supported"):
+    with pytest.raises(ValueError, match=r"durations.*not supported"):
         get_ratings(
             monitoring_location_id="USGS-01104475",
             time="P7D",
@@ -46,8 +46,6 @@ def test_get_ratings_rejects_iso_8601_duration_in_time():
 
 def test_build_filter_escapes_quotes():
     """Defends against malformed CQL or injection if an ID contains a quote."""
-    from dataretrieval.waterdata.ratings import _build_filter
-
     f = _build_filter("USGS-x'-y", None)
     assert f == "monitoring_location_id IN ('USGS-x''-y')"
 

From ec3b21581788e27b7d5966dda8de4fc415abd69d Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Wed, 6 May 2026 13:40:45 -0500
Subject: [PATCH 6/7] Extract RDB parsing into a shared dataretrieval.rdb
 module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The USGS RDB tab-separated format is used both by NWIS web services
and by the new Water Data STAC catalog's rating-curve assets. Until
now, only nwis.py had a parser, and waterdata/ratings.py reached
across to import the private nwis._read_rdb. That smell is gone:

- New top-level module dataretrieval.rdb with two public helpers:
  - read_rdb(text, dtypes=None): pure parser. Strips the comment
    block, tab-parses the header row, skips the format-spec row,
    and returns a DataFrame. Forwards optional caller-supplied
    dtype hints to pandas.read_csv (unknown column names are
    silently ignored, so dicts are safe to pass on any RDB).
    Raises ValueError on HTML responses.
  - extract_rdb_comment(text): returns the #-prefixed header
    block as a list of lines (the equivalent of R's comment(df)).

- nwis._read_rdb is now a thin wrapper around rdb.read_rdb that
  applies the NWIS-specific dtype hints (site_no, dec_long_va, ...)
  and runs format_response() for datetime indexing. The private
  symbol's contract is unchanged; existing 30 nwis tests pass.
  Dropped the now-unused StringIO import.

- waterdata.ratings switches its import from nwis._read_rdb to
  rdb.read_rdb / extract_rdb_comment, dropping the local
  _extract_rdb_comment helper. The cross-module-private-import
  comment block goes away — the dependency is now a clean
  same-package import.

7 new unit tests pin the shared parser's behavior: basic shape
parsing, format-spec-row skipping, dtype passthrough, all-comments
empty-result, HTML rejection, and the comment extractor.

Net stat: +177 / -72.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 dataretrieval/nwis.py              | 76 +++++++------------------
 dataretrieval/rdb.py               | 89 ++++++++++++++++++++++++++++++
 dataretrieval/waterdata/ratings.py | 19 +------
 tests/rdb_test.py                  | 65 ++++++++++++++++++++++
 4 files changed, 177 insertions(+), 72 deletions(-)
 create mode 100644 dataretrieval/rdb.py
 create mode 100644 tests/rdb_test.py

diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py
index 0bcb1d68..b9d7fdd5 100644
--- a/dataretrieval/nwis.py
+++ b/dataretrieval/nwis.py
@@ -7,12 +7,12 @@
 from __future__ import annotations
 
 import warnings
-from io import StringIO
 from json import JSONDecodeError
 
 import pandas as pd
 import requests
 
+from dataretrieval.rdb import read_rdb as _read_rdb_text
 from dataretrieval.utils import BaseMetadata
 
 from .utils import query
@@ -1017,65 +1017,29 @@ def _read_json(json):
     return merged_df
 
 
-def _read_rdb(rdb):
-    """
-    Convert NWIS rdb table into a ``pandas.dataframe``.
+# NWIS-specific column dtype hints; pandas silently ignores unknown
+# names, so passing the dict to read_rdb is safe even on responses
+# whose columns don't include any of these.
+_NWIS_RDB_DTYPES = {
+    "site_no": str,
+    "dec_long_va": float,
+    "dec_lat_va": float,
+    "parm_cd": str,
+    "parameter_cd": str,
+}
 
-    Parameters
-    ----------
-    rdb: string
-        A string representation of an rdb table
 
-    Returns
-    -------
-    df: ``pandas.dataframe``
-        A formatted pandas data frame
+def _read_rdb(rdb):
+    """Parse an NWIS RDB response and apply NWIS-specific post-processing.
 
+    Thin wrapper around :func:`dataretrieval.rdb.read_rdb` that adds the
+    NWIS column-dtype hints and runs :func:`format_response` (datetime
+    index, multi-site MultiIndex, optional GeoDataFrame).
     """
-    if "<html>" in rdb.lower() or "<!doctype html>" in rdb.lower():
-        raise ValueError(
-            "Received HTML response instead of RDB. This often indicates "
-            "that the service has been moved or is currently unavailable."
-        )
-
-    count = 0
-    lines = rdb.splitlines()
-
-    for line in lines:
-        # ignore comment lines
-        if line.startswith("#"):
-            count = count + 1
-
-        else:
-            break
-
-    if count >= len(lines):
-        # All lines are comments — the service returned no data rows (e.g.
-        # "No sites found matching all criteria").  This is a legitimate empty
-        # result, so return an empty DataFrame rather than raising.
-        return pd.DataFrame()
-
-    fields = lines[count].split("\t")
-    fields = [field.replace(",", "").strip() for field in fields if field.strip()]
-    dtypes = {
-        "site_no": str,
-        "dec_long_va": float,
-        "dec_lat_va": float,
-        "parm_cd": str,
-        "parameter_cd": str,
-    }
-
-    df = pd.read_csv(
-        StringIO(rdb),
-        delimiter="\t",
-        skiprows=count + 2,
-        names=fields,
-        na_values="NaN",
-        dtype=dtypes,
-    )
-
-    df = format_response(df)
-    return df
+    df = _read_rdb_text(rdb, dtypes=_NWIS_RDB_DTYPES)
+    if df.empty:
+        return df
+    return format_response(df)
 
 
 def _check_sites_value_types(sites):
diff --git a/dataretrieval/rdb.py b/dataretrieval/rdb.py
new file mode 100644
index 00000000..a8cbf48d
--- /dev/null
+++ b/dataretrieval/rdb.py
@@ -0,0 +1,89 @@
+"""Format-agnostic parser for the USGS RDB tab-separated text format.
+
+RDB (Relational DataBase) is a USGS-internal text format used by NWIS web
+services and by the Water Data STAC catalog's rating-curve assets. Every
+RDB file has the same shape:
+
+- One or more ``#``-prefixed comment lines carrying provenance metadata
+  (data source, retrieval timestamp, station name, parameter codes, etc.).
+- A tab-separated header row naming each column.
+- A second tab-separated row giving column format specs (e.g. ``5s 15s``);
+  it is informational only and skipped during parsing.
+- Tab-separated data rows.
+
+This module exposes the parsing primitives that both ``dataretrieval.nwis``
+and ``dataretrieval.waterdata.ratings`` use. Callers layer their own
+post-processing (NWIS-specific datetime indexing, ratings-specific
+``df.attrs`` provenance, etc.) on top of the raw frame.
+"""
+
+from __future__ import annotations
+
+from io import StringIO
+
+import pandas as pd
+
+
+def read_rdb(rdb: str, dtypes: dict[str, type] | None = None) -> pd.DataFrame:
+    """Parse an RDB text response into a ``pandas.DataFrame``.
+
+    Parameters
+    ----------
+    rdb : str
+        The RDB text response from a USGS web service.
+    dtypes : dict[str, type] or None, optional
+        Optional column-name to dtype hints, forwarded to
+        ``pandas.read_csv``. Unknown column names are silently ignored, so
+        callers may safely pass a dict of all columns they might be
+        interested in.
+
+    Returns
+    -------
+    pandas.DataFrame
+        The parsed data. An RDB consisting only of comment lines (e.g. a
+        "no sites found" response) returns an empty DataFrame rather than
+        raising.
+
+    Raises
+    ------
+    ValueError
+        If the response body looks like HTML, which usually means the
+        service has been moved, is degraded, or returned an error page.
+    """
+    if "<html>" in rdb.lower() or "<!doctype html>" in rdb.lower():
+        raise ValueError(
+            "Received HTML response instead of RDB. This often indicates "
+            "that the service has been moved or is currently unavailable."
+        )
+
+    lines = rdb.splitlines()
+    header_idx = next(
+        (i for i, line in enumerate(lines) if not line.startswith("#")),
+        len(lines),
+    )
+    if header_idx >= len(lines):
+        # All lines are comments — a legitimate empty result.
+        return pd.DataFrame()
+
+    fields = lines[header_idx].split("\t")
+    fields = [f.replace(",", "").strip() for f in fields if f.strip()]
+
+    return pd.read_csv(
+        StringIO(rdb),
+        delimiter="\t",
+        skiprows=header_idx + 2,  # +1 for header, +1 for the format-spec row
+        names=fields,
+        na_values="NaN",
+        dtype=dtypes,
+    )
+
+
+def extract_rdb_comment(rdb: str) -> list[str]:
+    """Return the RDB ``#``-prefixed comment block as a list of header lines.
+
+    The comment block carries provenance metadata that is otherwise lost
+    during parsing — data source, retrieval timestamp, parameter codes,
+    rating id and last-shifted timestamp for ratings, etc. R's
+    ``dataRetrieval`` exposes the equivalent via ``comment(df)``.
+    """
+    return [line for line in rdb.splitlines() if line.startswith("#")]
diff --git a/dataretrieval/waterdata/ratings.py b/dataretrieval/waterdata/ratings.py
index d40e7dcb..a1d0a3bb 100644
--- a/dataretrieval/waterdata/ratings.py
+++ b/dataretrieval/waterdata/ratings.py
@@ -19,10 +19,7 @@
 import pandas as pd
 import requests
 
-# Rating files use the same USGS RDB shape as NWIS responses, so we reuse
-# the parser already in ``nwis``. ``_read_rdb`` is private; if it ever
-# moves we want a loud failure here, hence the explicit import.
-from dataretrieval.nwis import _read_rdb
+from dataretrieval.rdb import extract_rdb_comment, read_rdb
 
 from .utils import _DURATION_RE, BASE_URL, _default_headers, _format_api_dates
 
@@ -245,16 +242,6 @@ def _search(
     return response.json().get("features", [])
 
 
-def _extract_rdb_comment(rdb: str) -> list[str]:
-    """Return the RDB ``#``-prefixed comment block as a list of header lines.
-
-    Carries useful per-rating provenance — rating id, parameter description,
-    expansion type, last-shifted timestamp, etc. R's ``read_waterdata_ratings``
-    exposes the equivalent via ``comment(df)``.
-    """
-    return [line for line in rdb.splitlines() if line.startswith("#")]
-
-
 def _download_and_parse(
     feature: dict[str, Any],
     file_path: str | None,
@@ -269,7 +256,7 @@ def _download_and_parse(
         with open(os.path.join(file_path, feature["id"]), "w") as f:
             f.write(response.text)
 
-    df = _read_rdb(response.text)
-    df.attrs["comment"] = _extract_rdb_comment(response.text)
+    df = read_rdb(response.text)
+    df.attrs["comment"] = extract_rdb_comment(response.text)
     df.attrs["url"] = url
     return df
diff --git a/tests/rdb_test.py b/tests/rdb_test.py
new file mode 100644
index 00000000..99f46ff5
--- /dev/null
+++ b/tests/rdb_test.py
@@ -0,0 +1,65 @@
+import pandas as pd
+import pytest
+
+from dataretrieval.rdb import extract_rdb_comment, read_rdb
+
+# A minimally complete RDB: comment block, header row, format-spec row,
+# data rows. Both NWIS responses and ratings RDBs share this shape.
+_BASIC_RDB = """\
+# header line one
+# header line two
+agency_cd\tsite_no\tINDEP\tDEP
+5s\t15s\t10n\t10n
+USGS\t01104475\t0.10\t0.0
+USGS\t01104475\t0.20\t0.5
+USGS\t01104475\t0.30\t1.2
+"""
+
+
+def test_read_rdb_parses_basic_shape():
+    df = read_rdb(_BASIC_RDB)
+    assert list(df.columns) == ["agency_cd", "site_no", "INDEP", "DEP"]
+    assert len(df) == 3
+    assert df["INDEP"].tolist() == [0.10, 0.20, 0.30]
+
+
+def test_read_rdb_skips_format_spec_row():
+    """The "5s 15s 10n 10n" row is metadata, not data."""
+    df = read_rdb(_BASIC_RDB)
+    # If the format-spec row had been treated as data, df would have 4 rows
+    # and "5s" / "15s" would appear in the parsed values.
+    assert "5s" not in df["agency_cd"].tolist()
+
+
+def test_read_rdb_dtype_hints_applied():
+    """Caller-supplied dtype hints are forwarded to pandas; unknown names ignored."""
+    df = read_rdb(_BASIC_RDB, dtypes={"site_no": str, "DEP": float, "unknown": int})
+    # Without the str hint, pandas would parse "01104475" as int and drop the
+    # leading zero. Check the values, not the dtype name (which varies across
+    # pandas versions: object, StringDtype, etc.).
+    assert df["site_no"].iloc[0] == "01104475"
+    assert df["DEP"].dtype == float
+
+
+def test_read_rdb_empty_when_only_comments():
+    """All-comments input is a legitimate "no data" response, not an error."""
+    df = read_rdb("# only a comment\n# and another\n")
+    assert isinstance(df, pd.DataFrame)
+    assert df.empty
+
+
+def test_read_rdb_raises_on_html_response():
+    """If the service returns an HTML error page, surface it loudly."""
+    with pytest.raises(ValueError, match="HTML"):
+        read_rdb("<html><body>Service Unavailable</body></html>")
+    with pytest.raises(ValueError, match="HTML"):
+        read_rdb("<!DOCTYPE html>\n<html>...")
+
+
+def test_extract_rdb_comment_returns_only_hash_lines():
+    comments = extract_rdb_comment(_BASIC_RDB)
+    assert comments == ["# header line one", "# header line two"]
+
+
+def test_extract_rdb_comment_empty_when_no_comments():
+    assert extract_rdb_comment("a\tb\n1\t2\n") == []

From ad1486a28fd33ce1ae91ac958e1bcdfb4ac96384 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Wed, 6 May 2026 13:50:48 -0500
Subject: [PATCH 7/7] Address /simplify findings on the rdb refactor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Aggregated quality-review fixes; no behavior change. Tests: 7 rdb +
11 ratings + 28 nwis pass (was 30 nwis; -2 are pure-duplicate
coverage now lived in rdb_test.py).

Source:
- dataretrieval/rdb.py: rename `rdb` parameter -> `text` on both
  read_rdb and extract_rdb_comment ("rdb" is the format, not the
  thing; reads better as `read_rdb(text=...)`). Drop "USGS-internal"
  from the module docstring (the format is publicly served). Folded
  the two-pass `[f.replace ... for f in fields if f.strip()]` into
  one comprehension + a single empty-string filter. Replaced the
  defensive `>=` on the all-comments guard with `==` (it's exact
  after `next(..., len(lines))`). Spelled out in
  extract_rdb_comment's docstring that the returned lines retain
  their leading `#` and whitespace, matching R.

- dataretrieval/nwis.py: drop the `read_rdb as _read_rdb_text`
  import alias — the local name is too close to the wrapper
  `_read_rdb` and confused readers. Move `_NWIS_RDB_DTYPES` from
  near the bottom of the file to the top alongside `_CRS`, where
  module-level constants live. Drop the narrating "pandas silently
  ignores unknown names..." comment (already in the read_rdb
  docstring). Drop the `if df.empty: return df` guard in the
  wrapper — `format_response()` already short-circuits cleanly on
  an empty frame (no `dec_lat_va` -> skip GeoDataFrame branch; no
  `datetime` -> early return).

Tests:
- tests/nwis_test.py: drop the two pure-duplicate cases
  (`test_valid_rdb_returns_dataframe` and
  `test_all_comments_returns_empty_dataframe`) whose coverage now
  lives in tests/rdb_test.py. Keep and refocus the remaining
  `test_no_sites_flows_through_format_response` to exercise the
  wrapper-specific contract (now load-bearing after dropping the
  empty-frame guard): an empty parser result must still flow
  through format_response without raising. Issue-#171 regression
  intent preserved in the docstring.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 dataretrieval/nwis.py | 27 +++++++++---------------
 dataretrieval/rdb.py  | 39 +++++++++++++++++-----------------
 tests/nwis_test.py    | 49 +++++++++++++------------------------------
 3 files changed, 45 insertions(+), 70 deletions(-)

diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py
index b9d7fdd5..ec8d2537 100644
--- a/dataretrieval/nwis.py
+++ b/dataretrieval/nwis.py
@@ -12,7 +12,7 @@
 import pandas as pd
 import requests
 
-from dataretrieval.rdb import read_rdb as _read_rdb_text
+from dataretrieval.rdb import read_rdb
 from dataretrieval.utils import BaseMetadata
 
 from .utils import query
@@ -44,6 +44,14 @@
 # NAD83
 _CRS = "EPSG:4269"
 
+_NWIS_RDB_DTYPES = {
+    "site_no": str,
+    "dec_long_va": float,
+    "dec_lat_va": float,
+    "parm_cd": str,
+    "parameter_cd": str,
+}
+
 
 def _parse_json_or_raise(response: requests.Response) -> pd.DataFrame:
     """Parse a JSON NWIS response, raising a helpful error on HTML responses."""
@@ -1017,18 +1025,6 @@ def _read_json(json):
     return merged_df
 
 
-# NWIS-specific column dtype hints; pandas silently ignores unknown
-# names, so passing the dict to read_rdb is safe even on responses
-# whose columns don't include any of these.
-_NWIS_RDB_DTYPES = {
-    "site_no": str,
-    "dec_long_va": float,
-    "dec_lat_va": float,
-    "parm_cd": str,
-    "parameter_cd": str,
-}
-
-
 def _read_rdb(rdb):
     """Parse an NWIS RDB response and apply NWIS-specific post-processing.
 
@@ -1036,10 +1032,7 @@ def _read_rdb(rdb):
     NWIS column-dtype hints and runs :func:`format_response` (datetime
     index, multi-site MultiIndex, optional GeoDataFrame).
     """
-    df = _read_rdb_text(rdb, dtypes=_NWIS_RDB_DTYPES)
-    if df.empty:
-        return df
-    return format_response(df)
+    return format_response(read_rdb(rdb, dtypes=_NWIS_RDB_DTYPES))
 
 
 def _check_sites_value_types(sites):
diff --git a/dataretrieval/rdb.py b/dataretrieval/rdb.py
index a8cbf48d..2b52656b 100644
--- a/dataretrieval/rdb.py
+++ b/dataretrieval/rdb.py
@@ -1,8 +1,8 @@
-"""Format-agnostic parser for the USGS RDB tab-separated text format.
+"""Parser for the USGS RDB tab-separated text format.
 
-RDB (Relational DataBase) is a USGS-internal text format used by NWIS web
-services and by the Water Data STAC catalog's rating-curve assets. Every
-RDB file has the same shape:
+RDB (Relational DataBase) is the text format used by NWIS web services
+and by the Water Data STAC catalog's rating-curve assets. Every RDB
+file has the same shape:
 
 - One or more ``#``-prefixed comment lines carrying provenance metadata
   (data source, retrieval timestamp, station name, parameter codes, etc.).
@@ -24,12 +24,12 @@
 import pandas as pd
 
 
-def read_rdb(rdb: str, dtypes: dict[str, type] | None = None) -> pd.DataFrame:
+def read_rdb(text: str, dtypes: dict[str, type] | None = None) -> pd.DataFrame:
     """Parse an RDB text response into a ``pandas.DataFrame``.
 
     Parameters
     ----------
-    rdb : str
+    text : str
         The RDB text response from a USGS web service.
     dtypes : dict[str, type] or None, optional
         Optional column-name to dtype hints, forwarded to
@@ -50,26 +50,26 @@ def read_rdb(rdb: str, dtypes: dict[str, type] | None = None) -> pd.DataFrame:
         If the response body looks like HTML, which usually means the
         service has been moved, is degraded, or returned an error page.
     """
-    if "<html>" in rdb.lower() or "<!doctype html>" in rdb.lower():
+    if "<html>" in text.lower() or "<!doctype html>" in text.lower():
         raise ValueError(
             "Received HTML response instead of RDB. This often indicates "
             "that the service has been moved or is currently unavailable."
         )
 
-    lines = rdb.splitlines()
+    lines = text.splitlines()
     header_idx = next(
         (i for i, line in enumerate(lines) if not line.startswith("#")),
         len(lines),
     )
-    if header_idx >= len(lines):
+    if header_idx == len(lines):
         # All lines are comments — a legitimate empty result.
         return pd.DataFrame()
 
-    fields = lines[header_idx].split("\t")
-    fields = [f.replace(",", "").strip() for f in fields if f.strip()]
+    fields = [f.replace(",", "").strip() for f in lines[header_idx].split("\t")]
+    fields = [f for f in fields if f]
 
     return pd.read_csv(
-        StringIO(rdb),
+        StringIO(text),
         delimiter="\t",
         skiprows=header_idx + 2,  # +1 for header, +1 for the format-spec row
         names=fields,
@@ -78,12 +78,13 @@ def read_rdb(rdb: str, dtypes: dict[str, type] | None = None) -> pd.DataFrame:
     )
 
 
-def extract_rdb_comment(rdb: str) -> list[str]:
-    """Return the RDB ``#``-prefixed comment block as a list of header lines.
+def extract_rdb_comment(text: str) -> list[str]:
+    """Return the RDB ``#``-prefixed comment block, raw and in original order.
 
-    The comment block carries provenance metadata that is otherwise lost
-    during parsing — data source, retrieval timestamp, parameter codes,
-    rating id and last-shifted timestamp for ratings, etc. R's
-    ``dataRetrieval`` exposes the equivalent via ``comment(df)``.
+    Each entry includes its leading ``#`` and any whitespace, matching what
+    R's ``dataRetrieval`` returns from ``comment(df)``. The comment block
+    carries provenance metadata that is otherwise lost during parsing —
+    data source, retrieval timestamp, parameter codes, rating id and
+    last-shifted timestamp for ratings, etc.
     """
-    return [line for line in rdb.splitlines() if line.startswith("#")]
+    return [line for line in text.splitlines() if line.startswith("#")]
diff --git a/tests/nwis_test.py b/tests/nwis_test.py
index c52775a4..a42ba509 100644
--- a/tests/nwis_test.py
+++ b/tests/nwis_test.py
@@ -325,43 +325,24 @@ def test_variable_info_deprecated(self):
 
 
 class TestReadRdb:
-    """Tests for the _read_rdb helper.
+    """Tests for the NWIS-specific _read_rdb wrapper.
 
-    Notes
-    -----
-    Related to GitHub Issue #171.
+    The format-agnostic parser is exercised in tests/rdb_test.py; this
+    class pins the wrapper-specific contract — that an empty parser
+    result flows through format_response without crashing (issue #171).
     """
 
-    # Minimal valid RDB response with one data row
-    _VALID_RDB = "# comment\nsite_no\tvalue\n5s\t10n\n01491000\t42\n"
-
-    # NWIS response when no sites match the query criteria
-    _NO_SITES_RDB = (
-        "# //Output-Format: RDB\n"
-        "# //Response-Status: OK\n"
-        "# //Response-Message: No sites found matching all criteria\n"
-    )
-
-    def test_valid_rdb_returns_dataframe(self):
-        """_read_rdb returns a DataFrame for a well-formed RDB response."""
-        df = _read_rdb(self._VALID_RDB)
-        assert isinstance(df, pd.DataFrame)
-        assert "site_no" in df.columns
-
-    def test_no_sites_returns_empty_dataframe(self):
-        """_read_rdb returns an empty DataFrame when NWIS finds no matching sites.
-
-        A "No sites found" response is a legitimate empty result, not an error,
-        so callers can check ``df.empty`` rather than catching an exception.
-        Regression test for issue #171 (previously raised IndexError).
+    def test_no_sites_flows_through_format_response(self):
+        """A "No sites found" response is a legitimate empty result, not an
+        error, so callers can check ``df.empty`` rather than catching an
+        exception. Regression for issue #171 (previously raised IndexError),
+        which now also covers the empty-frame path through ``format_response``.
         """
-        df = _read_rdb(self._NO_SITES_RDB)
-        assert isinstance(df, pd.DataFrame)
-        assert df.empty
-
-    def test_all_comments_returns_empty_dataframe(self):
-        """_read_rdb returns an empty DataFrame when the response has only comments."""
-        rdb = "# just a comment\n# another comment\n"
-        df = _read_rdb(rdb)
+        no_sites_rdb = (
+            "# //Output-Format: RDB\n"
+            "# //Response-Status: OK\n"
+            "# //Response-Message: No sites found matching all criteria\n"
+        )
+        df = _read_rdb(no_sites_rdb)
         assert isinstance(df, pd.DataFrame)
         assert df.empty