Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 73 additions & 5 deletions src/items/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,10 @@ use crate::ParsedDateTime;
use jiff::Zoned;
use primitive::space;
use winnow::{
combinator::{alt, eof, preceded, repeat_till, terminated, trace},
combinator::{alt, eof, opt, preceded, repeat_till, terminated, trace},
error::{AddContext, ContextError, ErrMode, StrContext, StrContextValue},
stream::Stream,
stream::{AsChar, Stream},
token::take_while,
ModalResult, Parser,
};

Expand Down Expand Up @@ -249,9 +250,20 @@ fn parse_items(input: &mut &str) -> ModalResult<DateTimeBuilder> {
items.try_into().map_err(|e| expect_error(input, e))
}

/// Parse an item.
/// Parse a single item.
///
/// In addition to the regular item parsers, this also implements GNU `date`'s
/// behavior of silently ignoring unrecognized alphabetic tokens that trail a
/// pure number (issue #279). For example, `8j` and `8 j` both produce
/// 08:00:00, just as `date -d '8j'` does.
///
/// Noise consumption is gated on (a) the parsed item being a `Pure` and (b)
/// the immediately-following input not parsing as any other recognized item.
/// Gating on `Pure` keeps the hot invalid-input path (e.g. `NotADate`) cheap
/// — no extra alt-branch is tried — and the lookahead prevents valid tokens
/// like `BRT` from being eaten when they trail a pure number such as `8 BRT`.
fn parse_item(input: &mut &str) -> ModalResult<Item> {
trace(
let item = trace(
"parse_item",
alt((
combined::parse.map(Item::DateTime),
Expand All @@ -263,7 +275,32 @@ fn parse_item(input: &mut &str) -> ModalResult<Item> {
pure::parse.map(Item::Pure),
)),
)
.parse_next(input)
.parse_next(input)?;

if matches!(item, Item::Pure(_)) {
// Peek the remaining input through the same item parsers (cheap: &str
// is a pointer-pair). If none of them match, then any leading alpha
// word is unrecognized noise and we consume it; otherwise we leave the
// token alone so the next iteration can parse it normally.
let mut probe = *input;
let next_is_real_item = alt((
combined::parse.map(|_| ()),
date::parse.map(|_| ()),
time::parse.map(|_| ()),
relative::parse.map(|_| ()),
weekday::parse.map(|_| ()),
offset::parse.map(|_| ()),
pure::parse.map(|_| ()),
))
.parse_next(&mut probe)
.is_ok();

if !next_is_real_item {
let _ = opt(primitive::s(take_while(1.., AsChar::is_alpha))).parse_next(input)?;
}
}

Ok(item)
}

/// Create an error with context for unexpected input.
Expand Down Expand Up @@ -724,6 +761,37 @@ mod tests {
assert_eq!(result.second(), 0);
}

/// GNU `date` silently ignores unrecognized alphabetic tokens that trail a
/// pure number (issue #279). E.g. `8j` and `8 j` both produce 08:00:00.
#[test]
fn noise_after_pure_number() {
let now = Zoned::now().with_time_zone(TimeZone::UTC);

// Adjacent suffix: "8j" → hour 8
let result = at_date(parse(&mut "8j").unwrap(), now.clone());
assert_eq!(result.hour(), 8);
assert_eq!(result.minute(), 0);
assert_eq!(result.second(), 0);

// Space-separated suffix: "8 j" → hour 8
let result = at_date(parse(&mut "8 j").unwrap(), now.clone());
assert_eq!(result.hour(), 8);
assert_eq!(result.minute(), 0);
assert_eq!(result.second(), 0);

// Noise following a full date+pure-time: "1230foo" → 12:30
let result = at_date(parse(&mut "1230foo").unwrap(), now.clone());
assert_eq!(result.hour(), 12);
assert_eq!(result.minute(), 30);

// Noise must NOT be accepted when it precedes a real item (leading garbage).
assert!(parse(&mut "bogus +1 day").is_err());
// Noise must NOT be accepted after a non-pure item (e.g. after a date).
assert!(parse(&mut "2025-01-01 abcdef").is_err());
// A standalone unrecognized word is still an error.
assert!(parse(&mut "notadate").is_err());
}

#[test]
fn pure() {
let now = Zoned::now().with_time_zone(TimeZone::UTC);
Expand Down
2 changes: 2 additions & 0 deletions src/items/offset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ fn timezone_name_to_offset(input: &str) -> ModalResult<Offset> {
"w" => Ok("-10"),
"v" => Ok("-9"),
"utc" => Ok("+0"),
"ut" => Ok("+0"), // Universal Time = UTC
"u" => Ok("-8"),
"t" => Ok("-7"),
"sst" => Ok("-11"),
Expand Down Expand Up @@ -423,6 +424,7 @@ mod tests {
fn timezone_name_without_offset() {
for (input, expected) in [
("utc", off(false, 0, 0)), // UTC
("ut", off(false, 0, 0)), // Universal Time = UTC (issue #280)
("gmt", off(false, 0, 0)), // UTC
("z", off(false, 0, 0)), // UTC
("west", off(false, 1, 0)), // positive offset
Expand Down
27 changes: 27 additions & 0 deletions tests/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,3 +285,30 @@ fn test_multiple_month_skip(#[case] base: &str, #[case] input: &str, #[case] exp
fn test_embedded_timezone(#[case] input: &str, #[case] expected: &str) {
check_absolute(input, expected);
}

// Issue #280: bare timezone abbreviation 'ut'/'UT' (Universal Time) should be
// accepted as UTC. GNU date accepts these; previously parse_datetime rejected them
// because 'ut' was absent from the named-timezone table.
#[test]
fn test_bare_ut_timezone_is_accepted() {
use parse_datetime::parse_datetime;
for input in ["ut", "UT", "Ut", "uT"] {
let result = parse_datetime(input);
assert!(
result.is_ok(),
"expected bare timezone '{}' to be accepted, got: {:?}",
input,
result,
);
let offset_secs = result
.unwrap()
.as_zoned()
.map(|z| z.offset().seconds())
.unwrap_or(0);
assert_eq!(
offset_secs, 0,
"expected 'ut' to resolve to UTC offset 0, got {} seconds",
offset_secs
);
}
}
Loading