Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

**05/06/2026:** Added `waterdata.get_field_measurements_metadata(...)` — wraps the OGC `field-measurements-metadata` collection. Returns one row per (location, parameter) field-measurement series describing its period of record, units, etc., without the underlying observations. Discrete-measurement analogue to `get_time_series_metadata`. Mirrors R's `read_waterdata_field_meta`.

**05/06/2026:** Added `waterdata.get_peaks(...)` — wraps the new OGC `peaks` collection, returning the annual peak streamflow / stage record for a monitoring location (one row per water year, per parameter). Standard input to flood-frequency analysis. Supports calendar/water-year filters and the usual location/parameter/CQL knobs shared with the other OGC getters.

**05/05/2026:** Added `waterdata.get_combined_metadata(...)` — wraps the Water Data API's `combined-metadata` collection, which joins the monitoring-locations catalog with the time-series-metadata catalog and returns one row per (location, parameter, statistic) inventory entry. This is the most flexible "what data is available" endpoint in the API: any location attribute (state, HUC, site type, drainage area, well-construction depth, …) can be combined with any time-series attribute (parameter code, statistic, data type, period of record, …) in a single query. Mirrors R's `read_waterdata_combined_meta`.

**05/05/2026:** Added `waterdata.get_samples_summary(monitoringLocationIdentifier=...)` — wraps the Samples database `/summary/{id}` endpoint, returning per-characteristic result and activity counts plus first / most recent activity dates for a single monitoring location. Useful for taking inventory of available discrete-sample data before pulling observations with `get_samples`.
Expand Down
2 changes: 2 additions & 0 deletions dataretrieval/waterdata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
get_latest_continuous,
get_latest_daily,
get_monitoring_locations,
get_peaks,
get_reference_table,
get_samples,
get_samples_summary,
Expand Down Expand Up @@ -55,6 +56,7 @@
"get_latest_daily",
"get_monitoring_locations",
"get_nearest_continuous",
"get_peaks",
"get_ratings",
"get_reference_table",
"get_samples",
Expand Down
120 changes: 120 additions & 0 deletions dataretrieval/waterdata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1878,6 +1878,126 @@ def get_field_measurements_metadata(
return get_ogc_data(args, output_id, service)


def get_peaks(
monitoring_location_id: str | list[str] | None = None,
parameter_code: str | list[str] | None = None,
time_series_id: str | list[str] | None = None,
unit_of_measure: str | list[str] | None = None,
time: str | list[str] | None = None,
last_modified: str | list[str] | None = None,
water_year: int | list[int] | None = None,
year: int | list[int] | None = None,
month: int | list[int] | None = None,
day: int | list[int] | None = None,
peak_since: int | list[int] | None = None,
properties: str | list[str] | None = None,
skip_geometry: bool | None = None,
bbox: list[float] | None = None,
limit: int | None = None,
filter: str | None = None,
filter_lang: FILTER_LANG | None = None,
convert_type: bool = True,
) -> tuple[pd.DataFrame, BaseMetadata]:
"""Get the annual peak streamflow / stage record for a monitoring location.

Peaks are the largest values observed at a site each water year and are
the standard input to flood-frequency analysis (e.g. log-Pearson Type III
fits). The endpoint returns one row per (monitoring location, parameter,
water year), with the peak ``value`` and the ``time`` it occurred.

The collection covers both stage (parameter ``"00065"``, ``ft``) and
discharge (parameter ``"00060"``, ``ft^3/s``); a typical streamgage has a
series for each. Reference docs:
https://api.waterdata.usgs.gov/ogcapi/v0/openapi?f=html#/peaks

Parameters
----------
monitoring_location_id : string or list of strings, optional
A unique identifier representing a single monitoring location, in
``AGENCY-ID`` form (e.g. ``"USGS-02238500"``).
parameter_code : string or list of strings, optional
5-digit parameter code. Most peaks records are ``"00060"`` (discharge)
or ``"00065"`` (stage / gage height). Full list at
https://help.waterdata.usgs.gov/codes-and-parameters/parameters.
time_series_id : string or list of strings, optional
ID of the time series the peak belongs to.
unit_of_measure : string or list of strings, optional
Human-readable units (e.g. ``"ft^3/s"``, ``"ft"``).
time : string, optional
Datetime, interval, or duration filter on the peak's date.
See :func:`get_time_series_metadata` for the full grammar.
last_modified : string, optional
Same datetime grammar as ``time``; filters on the database
last-modified timestamp (useful for incremental ETL polling).
water_year, year, month, day : int or list of ints, optional
Calendar / water-year filters on the peak event. The water year ends
September 30 (e.g. WY2024 = Oct 1, 2023 – Sep 30, 2024).
peak_since : int or list of ints, optional
Filter on the year since which the peak value has stood as the
record (the API serves this field as an integer; many rows are
``null``).
properties : string or list of strings, optional
Subset of columns to return. Defaults to every available property.
skip_geometry : boolean, optional
Skip per-feature geometries; the returned object will be a plain
``DataFrame`` with no spatial information.
bbox : list of numbers, optional
Only features whose geometry intersects the bounding box are
selected. Format: ``[xmin, ymin, xmax, ymax]`` in CRS 4326
(longitude / latitude, west-south-east-north).
limit : numeric, optional
Page size; the maximum allowable value is 50000. Default
(``None``) requests the maximum allowable limit.
filter, filter_lang : optional
Server-side CQL filter passed through as the OGC ``filter`` /
``filter-lang`` query parameters. See
:mod:`dataretrieval.waterdata.filters` for syntax, auto-chunking,
and the lexicographic-comparison pitfall.
convert_type : boolean, optional
If True, converts columns to appropriate types.

Returns
-------
df : ``pandas.DataFrame`` or ``geopandas.GeoDataFrame``
Formatted data returned from the API query.
md : :obj:`dataretrieval.utils.Metadata`
A custom metadata object pertaining to the query.

Examples
--------
.. code::

>>> # Full annual peak record at one site (both stage and discharge)
>>> df, md = dataretrieval.waterdata.get_peaks(
... monitoring_location_id="USGS-02238500"
... )

>>> # Discharge peaks only
>>> df, md = dataretrieval.waterdata.get_peaks(
... monitoring_location_id="USGS-02238500",
... parameter_code="00060",
... )

>>> # Multi-site peaks for a parameter, narrowed to a water-year range
>>> df, md = dataretrieval.waterdata.get_peaks(
... monitoring_location_id=[
... "USGS-07069000",
... "USGS-07064000",
... "USGS-07068000",
... ],
... parameter_code="00060",
... water_year=[2020, 2021, 2022, 2023],
... )

"""
service = "peaks"
output_id = "peak_id"

args = _get_args(locals())

return get_ogc_data(args, output_id, service)


def get_reference_table(
collection: str,
limit: int | None = None,
Expand Down
23 changes: 23 additions & 0 deletions tests/waterdata_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
get_latest_continuous,
get_latest_daily,
get_monitoring_locations,
get_peaks,
get_reference_table,
get_samples,
get_samples_summary,
Expand Down Expand Up @@ -399,6 +400,28 @@ def test_get_field_measurements_metadata_multi_site():
}


def test_get_peaks():
df, md = get_peaks(monitoring_location_id="USGS-02238500", skip_geometry=True)
assert "peak_id" in df.columns
assert "value" in df.columns
assert "water_year" in df.columns
assert (df["monitoring_location_id"] == "USGS-02238500").all()
assert set(df["parameter_code"].unique()).issubset({"00060", "00065"})
assert hasattr(md, "url")
assert hasattr(md, "query_time")


def test_get_peaks_water_year_filter():
df, _ = get_peaks(
monitoring_location_id="USGS-02238500",
parameter_code="00060",
water_year=[2020, 2021, 2022],
skip_geometry=True,
)
assert (df["parameter_code"] == "00060").all()
assert set(df["water_year"].unique()).issubset({2020, 2021, 2022})


def test_get_reference_table():
df, md = get_reference_table("agency-codes")
assert "agency_code" in df.columns
Expand Down
Loading