From 94b5cb12f051bb21e23ef222418d3f2fcb378e4c Mon Sep 17 00:00:00 2001 From: 0xSoftBoi Date: Sun, 5 Apr 2026 21:08:04 -0400 Subject: [PATCH 1/4] fix(offset): add "ut" (Universal Time) to named-timezone table GNU date accepts bare "UT" and "ut" as a synonym for UTC (+0). parse_datetime rejected them because the abbreviation was absent from the named-timezone lookup table in timezone_name_to_offset(). Add "ut" => Ok("+0") immediately after the existing "utc" entry and add a regression test that verifies all four case variants are accepted and resolve to a UTC-offset-0 instant. Fixes #280 Co-Authored-By: Claude Opus 4.6 --- src/items/offset.rs | 2 ++ tests/date.rs | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/src/items/offset.rs b/src/items/offset.rs index 9dc42f1..9a61161 100644 --- a/src/items/offset.rs +++ b/src/items/offset.rs @@ -281,6 +281,7 @@ fn timezone_name_to_offset(input: &str) -> ModalResult { "w" => Ok("-10"), "v" => Ok("-9"), "utc" => Ok("+0"), + "ut" => Ok("+0"), // Universal Time = UTC "u" => Ok("-8"), "t" => Ok("-7"), "sst" => Ok("-11"), @@ -423,6 +424,7 @@ mod tests { fn timezone_name_without_offset() { for (input, expected) in [ ("utc", off(false, 0, 0)), // UTC + ("ut", off(false, 0, 0)), // Universal Time = UTC (issue #280) ("gmt", off(false, 0, 0)), // UTC ("z", off(false, 0, 0)), // UTC ("west", off(false, 1, 0)), // positive offset diff --git a/tests/date.rs b/tests/date.rs index c57f782..1ef547b 100644 --- a/tests/date.rs +++ b/tests/date.rs @@ -285,3 +285,26 @@ fn test_multiple_month_skip(#[case] base: &str, #[case] input: &str, #[case] exp fn test_embedded_timezone(#[case] input: &str, #[case] expected: &str) { check_absolute(input, expected); } + + +// Issue #280: bare timezone abbreviation 'ut'/'UT' (Universal Time) should be +// accepted as UTC. GNU date accepts these; previously parse_datetime rejected them +// because 'ut' was absent from the named-timezone table. +#[test] +fn test_bare_ut_timezone_is_accepted() { + use parse_datetime::parse_datetime; + for input in ["ut", "UT", "Ut", "uT"] { + let result = parse_datetime(input); + assert!( + result.is_ok(), + "expected bare timezone '{}' to be accepted, got: {:?}", + input, result, + ); + let offset_secs = result.unwrap() + .as_zoned() + .map(|z| z.offset().seconds()) + .unwrap_or(0); + assert_eq!(offset_secs, 0, + "expected 'ut' to resolve to UTC offset 0, got {} seconds", offset_secs); + } +} From 104b2588302b5f639372b6d3fe9af3e011c26da8 Mon Sep 17 00:00:00 2001 From: 0xSoftBoi Date: Sun, 5 Apr 2026 21:14:03 -0400 Subject: [PATCH 2/4] fix: silently ignore unrecognized trailing alphabetic tokens after pure numbers GNU date accepts inputs like '8j' and '8 j', treating the number as an hour and silently discarding the unrecognized trailing word-token. This commit matches that behaviour. Implementation: - Add Item::Noise variant for unrecognized alphabetic tokens - Add noise_token() as the last alternative in parse_item(), so it only fires after every other parser has failed - In DateTimeBuilder::try_from, accept Noise only when it directly follows a Pure number item (prev_was_pure guard); reject it anywhere else so that leading garbage (e.g. 'bogus +1 day') and post-date garbage (e.g. '2025-01-01 abcdef') still produce errors - Add noise_after_pure_number regression test covering both '8j' and '8 j' Fixes #279 --- src/items/builder.rs | 51 ++++++++++++++++++++++++++++++++++++------- src/items/mod.rs | 52 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 94 insertions(+), 9 deletions(-) diff --git a/src/items/builder.rs b/src/items/builder.rs index 3465178..681a99c 100644 --- a/src/items/builder.rs +++ b/src/items/builder.rs @@ -517,17 +517,52 @@ impl TryFrom> for DateTimeBuilder { fn try_from(items: Vec) -> Result { let mut builder = DateTimeBuilder::new(); + // GNU date silently ignores unrecognized alphabetic tokens that directly + // follow a pure number (e.g. `8j` or `8 j` → 08:00:00). A Noise token + // is only valid in that position; anywhere else it is an error. + let mut prev_was_pure = false; for item in items { builder = match item { - Item::DateTime(dt) => builder.set_date(dt.date)?.set_time(dt.time)?, - Item::Date(d) => builder.set_date(d)?, - Item::Time(t) => builder.set_time(t)?, - Item::Weekday(weekday) => builder.set_weekday(weekday)?, - Item::Offset(offset) => builder.set_offset(offset)?, - Item::Relative(rel) => builder.push_relative(rel)?, - Item::TimeZone(tz) => builder.set_timezone(tz)?, - Item::Pure(pure) => builder.set_pure(pure)?, + Item::Noise => { + if !prev_was_pure { + return Err("unrecognized token"); + } + prev_was_pure = false; + builder + } + Item::Pure(pure) => { + prev_was_pure = true; + builder.set_pure(pure)? + } + Item::DateTime(dt) => { + prev_was_pure = false; + builder.set_date(dt.date)?.set_time(dt.time)? + } + Item::Date(d) => { + prev_was_pure = false; + builder.set_date(d)? + } + Item::Time(t) => { + prev_was_pure = false; + builder.set_time(t)? + } + Item::Weekday(weekday) => { + prev_was_pure = false; + builder.set_weekday(weekday)? + } + Item::Offset(offset) => { + prev_was_pure = false; + builder.set_offset(offset)? + } + Item::Relative(rel) => { + prev_was_pure = false; + builder.push_relative(rel)? + } + Item::TimeZone(tz) => { + prev_was_pure = false; + builder.set_timezone(tz)? + } } } diff --git a/src/items/mod.rs b/src/items/mod.rs index a1529bb..a0f4e60 100644 --- a/src/items/mod.rs +++ b/src/items/mod.rs @@ -54,7 +54,8 @@ use primitive::space; use winnow::{ combinator::{alt, eof, preceded, repeat_till, terminated, trace}, error::{AddContext, ContextError, ErrMode, StrContext, StrContextValue}, - stream::Stream, + stream::{AsChar, Stream}, + token::take_while, ModalResult, Parser, }; @@ -71,6 +72,9 @@ enum Item { Offset(offset::Offset), TimeZone(jiff::tz::TimeZone), Pure(String), + /// An unrecognized alphabetic token silently ignored for GNU `date` compatibility. + /// GNU `date` ignores trailing word-tokens it doesn't recognize (e.g. `8j` → 08:00:00). + Noise, } /// Parse a date and time string and resolve it against the given base date and @@ -261,11 +265,26 @@ fn parse_item(input: &mut &str) -> ModalResult { weekday::parse.map(Item::Weekday), offset::parse.map(Item::Offset), pure::parse.map(Item::Pure), + noise_token, )), ) .parse_next(input) } +/// Consume an unrecognized alphabetic word and silently discard it. +/// +/// GNU `date` ignores trailing word-tokens it does not recognize (issue #279). +/// For example, `8j` is accepted and the `j` is silently dropped, yielding +/// 08:00:00, just as GNU `date -d '8j'` does. +/// +/// This parser is the last alternative in `parse_item`, so it only fires after +/// every other item parser has already failed. +fn noise_token(input: &mut &str) -> ModalResult { + primitive::s(take_while(1.., AsChar::is_alpha)) + .map(|_| Item::Noise) + .parse_next(input) +} + /// Create an error with context for unexpected input. fn expect_error(input: &mut &str, reason: &'static str) -> ErrMode { ErrMode::Cut(ContextError::new()).add_context( @@ -724,6 +743,37 @@ mod tests { assert_eq!(result.second(), 0); } + /// GNU `date` silently ignores unrecognized alphabetic tokens that trail a + /// pure number (issue #279). E.g. `8j` and `8 j` both produce 08:00:00. + #[test] + fn noise_after_pure_number() { + let now = Zoned::now().with_time_zone(TimeZone::UTC); + + // Adjacent suffix: "8j" → hour 8 + let result = at_date(parse(&mut "8j").unwrap(), now.clone()); + assert_eq!(result.hour(), 8); + assert_eq!(result.minute(), 0); + assert_eq!(result.second(), 0); + + // Space-separated suffix: "8 j" → hour 8 + let result = at_date(parse(&mut "8 j").unwrap(), now.clone()); + assert_eq!(result.hour(), 8); + assert_eq!(result.minute(), 0); + assert_eq!(result.second(), 0); + + // Noise following a full date+pure-time: "1230foo" → 12:30 + let result = at_date(parse(&mut "1230foo").unwrap(), now.clone()); + assert_eq!(result.hour(), 12); + assert_eq!(result.minute(), 30); + + // Noise must NOT be accepted when it precedes a real item (leading garbage). + assert!(parse(&mut "bogus +1 day").is_err()); + // Noise must NOT be accepted after a non-pure item (e.g. after a date). + assert!(parse(&mut "2025-01-01 abcdef").is_err()); + // A standalone unrecognized word is still an error. + assert!(parse(&mut "notadate").is_err()); + } + #[test] fn pure() { let now = Zoned::now().with_time_zone(TimeZone::UTC); From 118bb5ae582ccd4cf21932e647645b889ed04cc3 Mon Sep 17 00:00:00 2001 From: 0xSoftBoi Date: Mon, 6 Apr 2026 05:21:51 -0400 Subject: [PATCH 3/4] style: run rustfmt on stacked UT/noise parser fixes --- src/items/offset.rs | 2 +- tests/date.rs | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/items/offset.rs b/src/items/offset.rs index 9a61161..ddc132f 100644 --- a/src/items/offset.rs +++ b/src/items/offset.rs @@ -281,7 +281,7 @@ fn timezone_name_to_offset(input: &str) -> ModalResult { "w" => Ok("-10"), "v" => Ok("-9"), "utc" => Ok("+0"), - "ut" => Ok("+0"), // Universal Time = UTC + "ut" => Ok("+0"), // Universal Time = UTC "u" => Ok("-8"), "t" => Ok("-7"), "sst" => Ok("-11"), diff --git a/tests/date.rs b/tests/date.rs index 1ef547b..555cacf 100644 --- a/tests/date.rs +++ b/tests/date.rs @@ -286,7 +286,6 @@ fn test_embedded_timezone(#[case] input: &str, #[case] expected: &str) { check_absolute(input, expected); } - // Issue #280: bare timezone abbreviation 'ut'/'UT' (Universal Time) should be // accepted as UTC. GNU date accepts these; previously parse_datetime rejected them // because 'ut' was absent from the named-timezone table. @@ -298,13 +297,18 @@ fn test_bare_ut_timezone_is_accepted() { assert!( result.is_ok(), "expected bare timezone '{}' to be accepted, got: {:?}", - input, result, + input, + result, ); - let offset_secs = result.unwrap() + let offset_secs = result + .unwrap() .as_zoned() .map(|z| z.offset().seconds()) .unwrap_or(0); - assert_eq!(offset_secs, 0, - "expected 'ut' to resolve to UTC offset 0, got {} seconds", offset_secs); + assert_eq!( + offset_secs, 0, + "expected 'ut' to resolve to UTC offset 0, got {} seconds", + offset_secs + ); } } From 06267b3208bbf8584a1b6c0d59d2897fb8675683 Mon Sep 17 00:00:00 2001 From: 0xSoftBoi Date: Thu, 7 May 2026 10:17:36 -0400 Subject: [PATCH 4/4] perf: avoid CodSpeed regression on parse_invalid_input The original noise-token alt-branch made every invalid alphabetic input take an extra parse-item iteration plus a builder-level rejection, which showed up as a -13.27% regression on the parse_invalid_input bench. Restructure: drop the global Item::Noise variant and instead absorb trailing GNU-compat noise inside parse_item only after a Pure item was matched, gated on a cheap peek that confirms the next token is not a real item (datetime/date/time/relative/weekday/offset/pure). This keeps the hot invalid-input and weekday paths identical to main (no extra alt branch), while still passing all noise_after_pure_number cases: - 8j -> 08:00:00 - 8 j -> 08:00:00 - 1230foo -> 12:30 - bogus +1 day -> error (leading garbage) - 2025-01-01 abcdef -> error (noise after non-pure) - notadate -> error (standalone unrecognized) All 377 tests pass; cargo fmt and clippy clean. --- src/items/builder.rs | 51 ++++++------------------------------- src/items/mod.rs | 60 ++++++++++++++++++++++++++++---------------- 2 files changed, 47 insertions(+), 64 deletions(-) diff --git a/src/items/builder.rs b/src/items/builder.rs index 681a99c..3465178 100644 --- a/src/items/builder.rs +++ b/src/items/builder.rs @@ -517,52 +517,17 @@ impl TryFrom> for DateTimeBuilder { fn try_from(items: Vec) -> Result { let mut builder = DateTimeBuilder::new(); - // GNU date silently ignores unrecognized alphabetic tokens that directly - // follow a pure number (e.g. `8j` or `8 j` → 08:00:00). A Noise token - // is only valid in that position; anywhere else it is an error. - let mut prev_was_pure = false; for item in items { builder = match item { - Item::Noise => { - if !prev_was_pure { - return Err("unrecognized token"); - } - prev_was_pure = false; - builder - } - Item::Pure(pure) => { - prev_was_pure = true; - builder.set_pure(pure)? - } - Item::DateTime(dt) => { - prev_was_pure = false; - builder.set_date(dt.date)?.set_time(dt.time)? - } - Item::Date(d) => { - prev_was_pure = false; - builder.set_date(d)? - } - Item::Time(t) => { - prev_was_pure = false; - builder.set_time(t)? - } - Item::Weekday(weekday) => { - prev_was_pure = false; - builder.set_weekday(weekday)? - } - Item::Offset(offset) => { - prev_was_pure = false; - builder.set_offset(offset)? - } - Item::Relative(rel) => { - prev_was_pure = false; - builder.push_relative(rel)? - } - Item::TimeZone(tz) => { - prev_was_pure = false; - builder.set_timezone(tz)? - } + Item::DateTime(dt) => builder.set_date(dt.date)?.set_time(dt.time)?, + Item::Date(d) => builder.set_date(d)?, + Item::Time(t) => builder.set_time(t)?, + Item::Weekday(weekday) => builder.set_weekday(weekday)?, + Item::Offset(offset) => builder.set_offset(offset)?, + Item::Relative(rel) => builder.push_relative(rel)?, + Item::TimeZone(tz) => builder.set_timezone(tz)?, + Item::Pure(pure) => builder.set_pure(pure)?, } } diff --git a/src/items/mod.rs b/src/items/mod.rs index a0f4e60..f810cbf 100644 --- a/src/items/mod.rs +++ b/src/items/mod.rs @@ -52,7 +52,7 @@ use crate::ParsedDateTime; use jiff::Zoned; use primitive::space; use winnow::{ - combinator::{alt, eof, preceded, repeat_till, terminated, trace}, + combinator::{alt, eof, opt, preceded, repeat_till, terminated, trace}, error::{AddContext, ContextError, ErrMode, StrContext, StrContextValue}, stream::{AsChar, Stream}, token::take_while, @@ -72,9 +72,6 @@ enum Item { Offset(offset::Offset), TimeZone(jiff::tz::TimeZone), Pure(String), - /// An unrecognized alphabetic token silently ignored for GNU `date` compatibility. - /// GNU `date` ignores trailing word-tokens it doesn't recognize (e.g. `8j` → 08:00:00). - Noise, } /// Parse a date and time string and resolve it against the given base date and @@ -253,9 +250,20 @@ fn parse_items(input: &mut &str) -> ModalResult { items.try_into().map_err(|e| expect_error(input, e)) } -/// Parse an item. +/// Parse a single item. +/// +/// In addition to the regular item parsers, this also implements GNU `date`'s +/// behavior of silently ignoring unrecognized alphabetic tokens that trail a +/// pure number (issue #279). For example, `8j` and `8 j` both produce +/// 08:00:00, just as `date -d '8j'` does. +/// +/// Noise consumption is gated on (a) the parsed item being a `Pure` and (b) +/// the immediately-following input not parsing as any other recognized item. +/// Gating on `Pure` keeps the hot invalid-input path (e.g. `NotADate`) cheap +/// — no extra alt-branch is tried — and the lookahead prevents valid tokens +/// like `BRT` from being eaten when they trail a pure number such as `8 BRT`. fn parse_item(input: &mut &str) -> ModalResult { - trace( + let item = trace( "parse_item", alt(( combined::parse.map(Item::DateTime), @@ -265,24 +273,34 @@ fn parse_item(input: &mut &str) -> ModalResult { weekday::parse.map(Item::Weekday), offset::parse.map(Item::Offset), pure::parse.map(Item::Pure), - noise_token, )), ) - .parse_next(input) -} + .parse_next(input)?; -/// Consume an unrecognized alphabetic word and silently discard it. -/// -/// GNU `date` ignores trailing word-tokens it does not recognize (issue #279). -/// For example, `8j` is accepted and the `j` is silently dropped, yielding -/// 08:00:00, just as GNU `date -d '8j'` does. -/// -/// This parser is the last alternative in `parse_item`, so it only fires after -/// every other item parser has already failed. -fn noise_token(input: &mut &str) -> ModalResult { - primitive::s(take_while(1.., AsChar::is_alpha)) - .map(|_| Item::Noise) - .parse_next(input) + if matches!(item, Item::Pure(_)) { + // Peek the remaining input through the same item parsers (cheap: &str + // is a pointer-pair). If none of them match, then any leading alpha + // word is unrecognized noise and we consume it; otherwise we leave the + // token alone so the next iteration can parse it normally. + let mut probe = *input; + let next_is_real_item = alt(( + combined::parse.map(|_| ()), + date::parse.map(|_| ()), + time::parse.map(|_| ()), + relative::parse.map(|_| ()), + weekday::parse.map(|_| ()), + offset::parse.map(|_| ()), + pure::parse.map(|_| ()), + )) + .parse_next(&mut probe) + .is_ok(); + + if !next_is_real_item { + let _ = opt(primitive::s(take_while(1.., AsChar::is_alpha))).parse_next(input)?; + } + } + + Ok(item) } /// Create an error with context for unexpected input.