From 5806127294529c2e2a3bfaa552491a537f44ee58 Mon Sep 17 00:00:00 2001 From: paq <89paku@gmail.com> Date: Mon, 11 May 2026 11:26:50 +0900 Subject: [PATCH 1/3] fix: align %p behavior with native C Lua --- src/Lua/Standard/Internal/MatchState.cs | 3 ++- tests/Lua.Tests/PatternMatchingTests.cs | 33 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/Lua/Standard/Internal/MatchState.cs b/src/Lua/Standard/Internal/MatchState.cs index f9bbf718..b306ddac 100644 --- a/src/Lua/Standard/Internal/MatchState.cs +++ b/src/Lua/Standard/Internal/MatchState.cs @@ -460,7 +460,8 @@ static bool MatchClass(char c, char cl) res = char.IsLower(c); break; case 'p': - res = char.IsPunctuation(c); + // Emulate C ispunct; .NET's char.IsPunctuation does not include symbols like '='. + res = !char.IsLetterOrDigit(c) && !char.IsControl(c) && !char.IsWhiteSpace(c); break; case 's': res = char.IsWhiteSpace(c); diff --git a/tests/Lua.Tests/PatternMatchingTests.cs b/tests/Lua.Tests/PatternMatchingTests.cs index 7ddc6d9f..6411d3ea 100644 --- a/tests/Lua.Tests/PatternMatchingTests.cs +++ b/tests/Lua.Tests/PatternMatchingTests.cs @@ -597,6 +597,39 @@ public async Task Test_StringGSub_PatternReplacements() Assert.That(result[1].Read(), Is.EqualTo(1)); } + [Test] + public async Task Test_StringGSub_PunctuationClassCapturesSymbolCharacters() + { + var state = LuaState.Create(); + state.OpenStringLibrary(); + + var result = await state.DoStringAsync( + """ + local punctuation = [=[!"#$%&'()*+,-./ + :;<=>?@ + [\]^_` + {|}~]=] + local matched = '' + local count = 0 + string.gsub(punctuation, '%p', function(c) + matched = matched .. c + count = count + 1 + end) + return matched, count + """ + ); + + Assert.That(result[0].Read(), Is.EqualTo("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~")); + Assert.That(result[1].Read(), Is.EqualTo(32)); + + result = await state.DoStringAsync( + "return string.gsub('abc=xyz', '(%w*)(%p)(%w+)', '%3%2%1-%0')" + ); + + Assert.That(result[0].Read(), Is.EqualTo("xyz=abc-abc=xyz")); + Assert.That(result[1].Read(), Is.EqualTo(1)); + } + [Test] public async Task Test_StringGSub_FunctionReplacements() { From 9a34b886ec6c90eb95fc62af614303b7ce02f20b Mon Sep 17 00:00:00 2001 From: paq <89paku@gmail.com> Date: Sun, 31 May 2026 09:28:58 +0900 Subject: [PATCH 2/3] fix: align %p behavior with C ispunct exactly --- src/Lua/Standard/Internal/MatchState.cs | 5 ++++- tests/Lua.Tests/PatternMatchingTests.cs | 13 ++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/Lua/Standard/Internal/MatchState.cs b/src/Lua/Standard/Internal/MatchState.cs index b306ddac..0204ab07 100644 --- a/src/Lua/Standard/Internal/MatchState.cs +++ b/src/Lua/Standard/Internal/MatchState.cs @@ -461,7 +461,10 @@ static bool MatchClass(char c, char cl) break; case 'p': // Emulate C ispunct; .NET's char.IsPunctuation does not include symbols like '='. - res = !char.IsLetterOrDigit(c) && !char.IsControl(c) && !char.IsWhiteSpace(c); + res = (c >= '!' && c <= '/') || + (c >= ':' && c <= '@') || + (c >= '[' && c <= '`') || + (c >= '{' && c <= '~'); break; case 's': res = char.IsWhiteSpace(c); diff --git a/tests/Lua.Tests/PatternMatchingTests.cs b/tests/Lua.Tests/PatternMatchingTests.cs index 6411d3ea..146e8fc2 100644 --- a/tests/Lua.Tests/PatternMatchingTests.cs +++ b/tests/Lua.Tests/PatternMatchingTests.cs @@ -598,7 +598,7 @@ public async Task Test_StringGSub_PatternReplacements() } [Test] - public async Task Test_StringGSub_PunctuationClassCapturesSymbolCharacters() + public async Task Test_StringGSub_PunctuationClassCapturesAsciiSymbolCharacters() { var state = LuaState.Create(); state.OpenStringLibrary(); @@ -622,6 +622,17 @@ public async Task Test_StringGSub_PunctuationClassCapturesSymbolCharacters() Assert.That(result[0].Read(), Is.EqualTo("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~")); Assert.That(result[1].Read(), Is.EqualTo(32)); + // Lua %p does not capture non-ASCII punctuation characters + result = await state.DoStringAsync( + """ + local text = '’،。、!?' + return string.gsub(text, '%p', 'X') + """ + ); + + Assert.That(result[0].Read(), Is.EqualTo("’،。、!?")); + Assert.That(result[1].Read(), Is.EqualTo(0)); + result = await state.DoStringAsync( "return string.gsub('abc=xyz', '(%w*)(%p)(%w+)', '%3%2%1-%0')" ); From f9a5f3a432793bf5d1636e7044ad25eef48c4339 Mon Sep 17 00:00:00 2001 From: akeit0 <90429982+Akeit0@users.noreply.github.com> Date: Sun, 31 May 2026 20:02:16 +0900 Subject: [PATCH 3/3] run formatter --- src/Lua/Standard/Internal/MatchState.cs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Lua/Standard/Internal/MatchState.cs b/src/Lua/Standard/Internal/MatchState.cs index 0204ab07..15874841 100644 --- a/src/Lua/Standard/Internal/MatchState.cs +++ b/src/Lua/Standard/Internal/MatchState.cs @@ -461,10 +461,12 @@ static bool MatchClass(char c, char cl) break; case 'p': // Emulate C ispunct; .NET's char.IsPunctuation does not include symbols like '='. - res = (c >= '!' && c <= '/') || - (c >= ':' && c <= '@') || - (c >= '[' && c <= '`') || - (c >= '{' && c <= '~'); + res = + c + is (>= '!' and <= '/') + or (>= ':' and <= '@') + or (>= '[' and <= '`') + or (>= '{' and <= '~'); break; case 's': res = char.IsWhiteSpace(c);