diff --git a/src/items/mod.rs b/src/items/mod.rs index a1529bb..f810cbf 100644 --- a/src/items/mod.rs +++ b/src/items/mod.rs @@ -52,9 +52,10 @@ use crate::ParsedDateTime; use jiff::Zoned; use primitive::space; use winnow::{ - combinator::{alt, eof, preceded, repeat_till, terminated, trace}, + combinator::{alt, eof, opt, preceded, repeat_till, terminated, trace}, error::{AddContext, ContextError, ErrMode, StrContext, StrContextValue}, - stream::Stream, + stream::{AsChar, Stream}, + token::take_while, ModalResult, Parser, }; @@ -249,9 +250,20 @@ fn parse_items(input: &mut &str) -> ModalResult { items.try_into().map_err(|e| expect_error(input, e)) } -/// Parse an item. +/// Parse a single item. +/// +/// In addition to the regular item parsers, this also implements GNU `date`'s +/// behavior of silently ignoring unrecognized alphabetic tokens that trail a +/// pure number (issue #279). For example, `8j` and `8 j` both produce +/// 08:00:00, just as `date -d '8j'` does. +/// +/// Noise consumption is gated on (a) the parsed item being a `Pure` and (b) +/// the immediately-following input not parsing as any other recognized item. +/// Gating on `Pure` keeps the hot invalid-input path (e.g. `NotADate`) cheap +/// — no extra alt-branch is tried — and the lookahead prevents valid tokens +/// like `BRT` from being eaten when they trail a pure number such as `8 BRT`. fn parse_item(input: &mut &str) -> ModalResult { - trace( + let item = trace( "parse_item", alt(( combined::parse.map(Item::DateTime), @@ -263,7 +275,32 @@ fn parse_item(input: &mut &str) -> ModalResult { pure::parse.map(Item::Pure), )), ) - .parse_next(input) + .parse_next(input)?; + + if matches!(item, Item::Pure(_)) { + // Peek the remaining input through the same item parsers (cheap: &str + // is a pointer-pair). If none of them match, then any leading alpha + // word is unrecognized noise and we consume it; otherwise we leave the + // token alone so the next iteration can parse it normally. + let mut probe = *input; + let next_is_real_item = alt(( + combined::parse.map(|_| ()), + date::parse.map(|_| ()), + time::parse.map(|_| ()), + relative::parse.map(|_| ()), + weekday::parse.map(|_| ()), + offset::parse.map(|_| ()), + pure::parse.map(|_| ()), + )) + .parse_next(&mut probe) + .is_ok(); + + if !next_is_real_item { + let _ = opt(primitive::s(take_while(1.., AsChar::is_alpha))).parse_next(input)?; + } + } + + Ok(item) } /// Create an error with context for unexpected input. @@ -724,6 +761,37 @@ mod tests { assert_eq!(result.second(), 0); } + /// GNU `date` silently ignores unrecognized alphabetic tokens that trail a + /// pure number (issue #279). E.g. `8j` and `8 j` both produce 08:00:00. + #[test] + fn noise_after_pure_number() { + let now = Zoned::now().with_time_zone(TimeZone::UTC); + + // Adjacent suffix: "8j" → hour 8 + let result = at_date(parse(&mut "8j").unwrap(), now.clone()); + assert_eq!(result.hour(), 8); + assert_eq!(result.minute(), 0); + assert_eq!(result.second(), 0); + + // Space-separated suffix: "8 j" → hour 8 + let result = at_date(parse(&mut "8 j").unwrap(), now.clone()); + assert_eq!(result.hour(), 8); + assert_eq!(result.minute(), 0); + assert_eq!(result.second(), 0); + + // Noise following a full date+pure-time: "1230foo" → 12:30 + let result = at_date(parse(&mut "1230foo").unwrap(), now.clone()); + assert_eq!(result.hour(), 12); + assert_eq!(result.minute(), 30); + + // Noise must NOT be accepted when it precedes a real item (leading garbage). + assert!(parse(&mut "bogus +1 day").is_err()); + // Noise must NOT be accepted after a non-pure item (e.g. after a date). + assert!(parse(&mut "2025-01-01 abcdef").is_err()); + // A standalone unrecognized word is still an error. + assert!(parse(&mut "notadate").is_err()); + } + #[test] fn pure() { let now = Zoned::now().with_time_zone(TimeZone::UTC); diff --git a/src/items/offset.rs b/src/items/offset.rs index 9dc42f1..ddc132f 100644 --- a/src/items/offset.rs +++ b/src/items/offset.rs @@ -281,6 +281,7 @@ fn timezone_name_to_offset(input: &str) -> ModalResult { "w" => Ok("-10"), "v" => Ok("-9"), "utc" => Ok("+0"), + "ut" => Ok("+0"), // Universal Time = UTC "u" => Ok("-8"), "t" => Ok("-7"), "sst" => Ok("-11"), @@ -423,6 +424,7 @@ mod tests { fn timezone_name_without_offset() { for (input, expected) in [ ("utc", off(false, 0, 0)), // UTC + ("ut", off(false, 0, 0)), // Universal Time = UTC (issue #280) ("gmt", off(false, 0, 0)), // UTC ("z", off(false, 0, 0)), // UTC ("west", off(false, 1, 0)), // positive offset diff --git a/tests/date.rs b/tests/date.rs index c57f782..555cacf 100644 --- a/tests/date.rs +++ b/tests/date.rs @@ -285,3 +285,30 @@ fn test_multiple_month_skip(#[case] base: &str, #[case] input: &str, #[case] exp fn test_embedded_timezone(#[case] input: &str, #[case] expected: &str) { check_absolute(input, expected); } + +// Issue #280: bare timezone abbreviation 'ut'/'UT' (Universal Time) should be +// accepted as UTC. GNU date accepts these; previously parse_datetime rejected them +// because 'ut' was absent from the named-timezone table. +#[test] +fn test_bare_ut_timezone_is_accepted() { + use parse_datetime::parse_datetime; + for input in ["ut", "UT", "Ut", "uT"] { + let result = parse_datetime(input); + assert!( + result.is_ok(), + "expected bare timezone '{}' to be accepted, got: {:?}", + input, + result, + ); + let offset_secs = result + .unwrap() + .as_zoned() + .map(|z| z.offset().seconds()) + .unwrap_or(0); + assert_eq!( + offset_secs, 0, + "expected 'ut' to resolve to UTC offset 0, got {} seconds", + offset_secs + ); + } +}