diff --git a/NEWS.md b/NEWS.md index 50bd8b9b..2b988d4b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,8 @@ **05/06/2026:** Added `waterdata.get_field_measurements_metadata(...)` — wraps the OGC `field-measurements-metadata` collection. Returns one row per (location, parameter) field-measurement series describing its period of record, units, etc., without the underlying observations. Discrete-measurement analogue to `get_time_series_metadata`. Mirrors R's `read_waterdata_field_meta`. +**05/06/2026:** Added `waterdata.get_peaks(...)` — wraps the new OGC `peaks` collection, returning the annual peak streamflow / stage record for a monitoring location (one row per water year, per parameter). Standard input to flood-frequency analysis. Supports calendar/water-year filters and the usual location/parameter/CQL knobs shared with the other OGC getters. + **05/05/2026:** Added `waterdata.get_combined_metadata(...)` — wraps the Water Data API's `combined-metadata` collection, which joins the monitoring-locations catalog with the time-series-metadata catalog and returns one row per (location, parameter, statistic) inventory entry. This is the most flexible "what data is available" endpoint in the API: any location attribute (state, HUC, site type, drainage area, well-construction depth, …) can be combined with any time-series attribute (parameter code, statistic, data type, period of record, …) in a single query. Mirrors R's `read_waterdata_combined_meta`. **05/05/2026:** Added `waterdata.get_samples_summary(monitoringLocationIdentifier=...)` — wraps the Samples database `/summary/{id}` endpoint, returning per-characteristic result and activity counts plus first / most recent activity dates for a single monitoring location. Useful for taking inventory of available discrete-sample data before pulling observations with `get_samples`. diff --git a/dataretrieval/waterdata/__init__.py b/dataretrieval/waterdata/__init__.py index 28510b70..f81966c4 100644 --- a/dataretrieval/waterdata/__init__.py +++ b/dataretrieval/waterdata/__init__.py @@ -21,6 +21,7 @@ get_latest_continuous, get_latest_daily, get_monitoring_locations, + get_peaks, get_reference_table, get_samples, get_samples_summary, @@ -55,6 +56,7 @@ "get_latest_daily", "get_monitoring_locations", "get_nearest_continuous", + "get_peaks", "get_ratings", "get_reference_table", "get_samples", diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 9ca4ba01..886a989c 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -1878,6 +1878,126 @@ def get_field_measurements_metadata( return get_ogc_data(args, output_id, service) +def get_peaks( + monitoring_location_id: str | list[str] | None = None, + parameter_code: str | list[str] | None = None, + time_series_id: str | list[str] | None = None, + unit_of_measure: str | list[str] | None = None, + time: str | list[str] | None = None, + last_modified: str | list[str] | None = None, + water_year: int | list[int] | None = None, + year: int | list[int] | None = None, + month: int | list[int] | None = None, + day: int | list[int] | None = None, + peak_since: int | list[int] | None = None, + properties: str | list[str] | None = None, + skip_geometry: bool | None = None, + bbox: list[float] | None = None, + limit: int | None = None, + filter: str | None = None, + filter_lang: FILTER_LANG | None = None, + convert_type: bool = True, +) -> tuple[pd.DataFrame, BaseMetadata]: + """Get the annual peak streamflow / stage record for a monitoring location. + + Peaks are the largest values observed at a site each water year and are + the standard input to flood-frequency analysis (e.g. log-Pearson Type III + fits). The endpoint returns one row per (monitoring location, parameter, + water year), with the peak ``value`` and the ``time`` it occurred. + + The collection covers both stage (parameter ``"00065"``, ``ft``) and + discharge (parameter ``"00060"``, ``ft^3/s``); a typical streamgage has a + series for each. Reference docs: + https://api.waterdata.usgs.gov/ogcapi/v0/openapi?f=html#/peaks + + Parameters + ---------- + monitoring_location_id : string or list of strings, optional + A unique identifier representing a single monitoring location, in + ``AGENCY-ID`` form (e.g. ``"USGS-02238500"``). + parameter_code : string or list of strings, optional + 5-digit parameter code. Most peaks records are ``"00060"`` (discharge) + or ``"00065"`` (stage / gage height). Full list at + https://help.waterdata.usgs.gov/codes-and-parameters/parameters. + time_series_id : string or list of strings, optional + ID of the time series the peak belongs to. + unit_of_measure : string or list of strings, optional + Human-readable units (e.g. ``"ft^3/s"``, ``"ft"``). + time : string, optional + Datetime, interval, or duration filter on the peak's date. + See :func:`get_time_series_metadata` for the full grammar. + last_modified : string, optional + Same datetime grammar as ``time``; filters on the database + last-modified timestamp (useful for incremental ETL polling). + water_year, year, month, day : int or list of ints, optional + Calendar / water-year filters on the peak event. The water year ends + September 30 (e.g. WY2024 = Oct 1, 2023 – Sep 30, 2024). + peak_since : int or list of ints, optional + Filter on the year since which the peak value has stood as the + record (the API serves this field as an integer; many rows are + ``null``). + properties : string or list of strings, optional + Subset of columns to return. Defaults to every available property. + skip_geometry : boolean, optional + Skip per-feature geometries; the returned object will be a plain + ``DataFrame`` with no spatial information. + bbox : list of numbers, optional + Only features whose geometry intersects the bounding box are + selected. Format: ``[xmin, ymin, xmax, ymax]`` in CRS 4326 + (longitude / latitude, west-south-east-north). + limit : numeric, optional + Page size; the maximum allowable value is 50000. Default + (``None``) requests the maximum allowable limit. + filter, filter_lang : optional + Server-side CQL filter passed through as the OGC ``filter`` / + ``filter-lang`` query parameters. See + :mod:`dataretrieval.waterdata.filters` for syntax, auto-chunking, + and the lexicographic-comparison pitfall. + convert_type : boolean, optional + If True, converts columns to appropriate types. + + Returns + ------- + df : ``pandas.DataFrame`` or ``geopandas.GeoDataFrame`` + Formatted data returned from the API query. + md : :obj:`dataretrieval.utils.Metadata` + A custom metadata object pertaining to the query. + + Examples + -------- + .. code:: + + >>> # Full annual peak record at one site (both stage and discharge) + >>> df, md = dataretrieval.waterdata.get_peaks( + ... monitoring_location_id="USGS-02238500" + ... ) + + >>> # Discharge peaks only + >>> df, md = dataretrieval.waterdata.get_peaks( + ... monitoring_location_id="USGS-02238500", + ... parameter_code="00060", + ... ) + + >>> # Multi-site peaks for a parameter, narrowed to a water-year range + >>> df, md = dataretrieval.waterdata.get_peaks( + ... monitoring_location_id=[ + ... "USGS-07069000", + ... "USGS-07064000", + ... "USGS-07068000", + ... ], + ... parameter_code="00060", + ... water_year=[2020, 2021, 2022, 2023], + ... ) + + """ + service = "peaks" + output_id = "peak_id" + + args = _get_args(locals()) + + return get_ogc_data(args, output_id, service) + + def get_reference_table( collection: str, limit: int | None = None, diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index 2ab5ddf0..1edf012e 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -17,6 +17,7 @@ get_latest_continuous, get_latest_daily, get_monitoring_locations, + get_peaks, get_reference_table, get_samples, get_samples_summary, @@ -399,6 +400,28 @@ def test_get_field_measurements_metadata_multi_site(): } +def test_get_peaks(): + df, md = get_peaks(monitoring_location_id="USGS-02238500", skip_geometry=True) + assert "peak_id" in df.columns + assert "value" in df.columns + assert "water_year" in df.columns + assert (df["monitoring_location_id"] == "USGS-02238500").all() + assert set(df["parameter_code"].unique()).issubset({"00060", "00065"}) + assert hasattr(md, "url") + assert hasattr(md, "query_time") + + +def test_get_peaks_water_year_filter(): + df, _ = get_peaks( + monitoring_location_id="USGS-02238500", + parameter_code="00060", + water_year=[2020, 2021, 2022], + skip_geometry=True, + ) + assert (df["parameter_code"] == "00060").all() + assert set(df["water_year"].unique()).issubset({2020, 2021, 2022}) + + def test_get_reference_table(): df, md = get_reference_table("agency-codes") assert "agency_code" in df.columns