From bf867a2d4e3147ae56a5707e42d280a45b475006 Mon Sep 17 00:00:00 2001 From: Lucian Popescu Date: Fri, 17 Apr 2026 16:35:08 +0100 Subject: [PATCH 1/2] Escape special chars --- cpp2rust/converter/converter.cpp | 9 +++++- tests/unit/out/refcount/string_escape.rs | 37 ++++++++++++++++++++++++ tests/unit/out/unsafe/string_escape.rs | 32 ++++++++++++++++++++ tests/unit/string_escape.cpp | 16 ++++++++++ 4 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 tests/unit/out/refcount/string_escape.rs create mode 100644 tests/unit/out/unsafe/string_escape.rs create mode 100644 tests/unit/string_escape.cpp diff --git a/cpp2rust/converter/converter.cpp b/cpp2rust/converter/converter.cpp index 3f778a93..6ccabf26 100644 --- a/cpp2rust/converter/converter.cpp +++ b/cpp2rust/converter/converter.cpp @@ -1558,6 +1558,12 @@ std::string Converter::GetEscapedCharLiteral(char character) const { case '"': esc = "\\\""; break; + case '\'': + esc = "\\'"; + break; + case '\\': + esc = "\\\\"; + break; case '\n': esc = "\\n"; break; @@ -1593,7 +1599,8 @@ std::string Converter::GetEscapedStringLiteral(clang::Expr *expr, std::string string = str_expr->getString().str(); // escape quotes size_t pos = 0; - while ((pos = string.find_first_of("\"\n\r\t\0", pos)) != std::string::npos) { + while ((pos = string.find_first_of("\"\'\\\n\r\t\0", pos)) != + std::string::npos) { string.replace(pos, 1, GetEscapedCharLiteral(string[pos])); pos += 2; } diff --git a/tests/unit/out/refcount/string_escape.rs b/tests/unit/out/refcount/string_escape.rs new file mode 100644 index 00000000..b4368434 --- /dev/null +++ b/tests/unit/out/refcount/string_escape.rs @@ -0,0 +1,37 @@ +extern crate libcc2rs; +use libcc2rs::*; +use std::cell::RefCell; +use std::collections::BTreeMap; +use std::io::prelude::*; +use std::io::Seek; +use std::io::{Read, Write}; +use std::os::fd::AsFd; +use std::rc::{Rc, Weak}; +pub fn main() { + std::process::exit(main_0()); +} +fn main_0() -> i32 { + let special: Value> = Rc::new(RefCell::new(Ptr::from_string_literal( + "\t\n \r !\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~", + ))); + thread_local!( + static expected: Value> = Rc::new(RefCell::new(Box::new([ + 7_u8, 8_u8, 9_u8, 10_u8, 11_u8, 12_u8, 13_u8, 32_u8, 33_u8, 34_u8, 35_u8, 36_u8, 37_u8, + 38_u8, 39_u8, 40_u8, 41_u8, 42_u8, 43_u8, 44_u8, 45_u8, 46_u8, 47_u8, 58_u8, 59_u8, + 60_u8, 61_u8, 62_u8, 63_u8, 64_u8, 91_u8, 92_u8, 93_u8, 94_u8, 95_u8, 96_u8, 123_u8, + 124_u8, 125_u8, 126_u8, + ]))); + ); + let i: Value = Rc::new(RefCell::new(0)); + 'loop_: while ((*i.borrow()) + < ((((::std::mem::size_of::<[u8; 40]>() as u64 as u64) + .wrapping_div(::std::mem::size_of::() as u64 as u64)) as u64) as i32)) + { + assert!({ + let _lhs = (((*special.borrow()).offset((*i.borrow()) as isize).read()) as i32); + _lhs == ((*expected.with(Value::clone).borrow())[(*i.borrow()) as usize] as i32) + }); + (*i.borrow_mut()).postfix_inc(); + } + return 0; +} diff --git a/tests/unit/out/unsafe/string_escape.rs b/tests/unit/out/unsafe/string_escape.rs new file mode 100644 index 00000000..3e0f4166 --- /dev/null +++ b/tests/unit/out/unsafe/string_escape.rs @@ -0,0 +1,32 @@ +extern crate libc; +use libc::*; +extern crate libcc2rs; +use libcc2rs::*; +use std::collections::BTreeMap; +use std::io::Seek; +use std::io::{Read, Write}; +use std::os::fd::{AsFd, FromRawFd, IntoRawFd}; +use std::rc::Rc; +pub fn main() { + unsafe { + std::process::exit(main_0() as i32); + } +} +unsafe fn main_0() -> i32 { + let mut special: *const u8 = b"\t\n \r !\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\0".as_ptr(); + static expected: [u8; 40] = [ + 7_u8, 8_u8, 9_u8, 10_u8, 11_u8, 12_u8, 13_u8, 32_u8, 33_u8, 34_u8, 35_u8, 36_u8, 37_u8, + 38_u8, 39_u8, 40_u8, 41_u8, 42_u8, 43_u8, 44_u8, 45_u8, 46_u8, 47_u8, 58_u8, 59_u8, 60_u8, + 61_u8, 62_u8, 63_u8, 64_u8, 91_u8, 92_u8, 93_u8, 94_u8, 95_u8, 96_u8, 123_u8, 124_u8, + 125_u8, 126_u8, + ];; + let mut i: i32 = 0; + 'loop_: while ((i) + < ((((::std::mem::size_of::<[u8; 40]>() as u64 as u64) + .wrapping_div(::std::mem::size_of::() as u64 as u64)) as u64) as i32)) + { + assert!((((*special.offset((i) as isize)) as i32) == (expected[(i) as usize] as i32))); + i.postfix_inc(); + } + return 0; +} diff --git a/tests/unit/string_escape.cpp b/tests/unit/string_escape.cpp new file mode 100644 index 00000000..039139b8 --- /dev/null +++ b/tests/unit/string_escape.cpp @@ -0,0 +1,16 @@ +#include +#include + +int main() { + const char *special = "\a\b\t\n\v\f\r !\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~"; + static const char expected[] = { + 7, 8, 9, 10, 11, 12, 13, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, 47, 58, 59, 60, 61, 62, + 63, 64, 91, 92, 93, 94, 95, 96, 123, 124, 125, 126, + }; + for (int i = 0; i < (int)(sizeof(expected) / sizeof(expected[0])); i++) { + assert(special[i] == expected[i]); + } + + return 0; +} From 03b3f045267e242c90bedd6d4dbffe366a2c0e9c Mon Sep 17 00:00:00 2001 From: Lucian Popescu Date: Fri, 17 Apr 2026 16:41:36 +0100 Subject: [PATCH 2/2] Proplery escape non-ASCII chars --- cpp2rust/converter/converter.cpp | 49 +++++++++++------------- tests/unit/out/refcount/string_escape.rs | 2 +- tests/unit/out/unsafe/string_escape.rs | 3 +- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/cpp2rust/converter/converter.cpp b/cpp2rust/converter/converter.cpp index 6ccabf26..0796784e 100644 --- a/cpp2rust/converter/converter.cpp +++ b/cpp2rust/converter/converter.cpp @@ -1552,32 +1552,27 @@ bool Converter::VisitCharacterLiteral(clang::CharacterLiteral *expr) { } std::string Converter::GetEscapedCharLiteral(char character) const { - std::string esc; - esc = character; switch (character) { case '"': - esc = "\\\""; - break; + return "\\\""; case '\'': - esc = "\\'"; - break; + return "\\'"; case '\\': - esc = "\\\\"; - break; + return "\\\\"; case '\n': - esc = "\\n"; - break; + return "\\n"; case '\r': - esc = "\\r"; - break; + return "\\r"; case '\t': - esc = "\\t"; - break; + return "\\t"; case '\0': - esc = "\\0"; - break; + return "\\0"; + } + auto uc = static_cast(character); + if (uc < 0x20 || uc == 0x7F) { + return std::format("\\x{:02x}", uc); } - return esc; + return std::string(1, character); } std::string Converter::GetEscapedUTF8CharLiteral(clang::Expr *expr) const { @@ -1596,15 +1591,17 @@ std::string Converter::GetEscapedStringLiteral(clang::Expr *expr, bool add_null_char) const { auto str_expr = clang::dyn_cast(expr->IgnoreCasts()); assert(str_expr); - std::string string = str_expr->getString().str(); - // escape quotes - size_t pos = 0; - while ((pos = string.find_first_of("\"\'\\\n\r\t\0", pos)) != - std::string::npos) { - string.replace(pos, 1, GetEscapedCharLiteral(string[pos])); - pos += 2; - } - return '"' + std::move(string) + (add_null_char ? "\\0\"" : "\""); + auto raw = str_expr->getString(); + std::string out; + out.push_back('"'); + for (unsigned char c : raw) { + out += GetEscapedCharLiteral(static_cast(c)); + } + if (add_null_char) { + out += "\\0"; + } + out.push_back('"'); + return out; } bool Converter::VisitStringLiteral(clang::StringLiteral *expr) { diff --git a/tests/unit/out/refcount/string_escape.rs b/tests/unit/out/refcount/string_escape.rs index b4368434..228e635a 100644 --- a/tests/unit/out/refcount/string_escape.rs +++ b/tests/unit/out/refcount/string_escape.rs @@ -12,7 +12,7 @@ pub fn main() { } fn main_0() -> i32 { let special: Value> = Rc::new(RefCell::new(Ptr::from_string_literal( - "\t\n \r !\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~", + "\x07\x08\t\n\x0b\x0c\r !\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~", ))); thread_local!( static expected: Value> = Rc::new(RefCell::new(Box::new([ diff --git a/tests/unit/out/unsafe/string_escape.rs b/tests/unit/out/unsafe/string_escape.rs index 3e0f4166..6bba3fe4 100644 --- a/tests/unit/out/unsafe/string_escape.rs +++ b/tests/unit/out/unsafe/string_escape.rs @@ -13,7 +13,8 @@ pub fn main() { } } unsafe fn main_0() -> i32 { - let mut special: *const u8 = b"\t\n \r !\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\0".as_ptr(); + let mut special: *const u8 = + b"\x07\x08\t\n\x0b\x0c\r !\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\0".as_ptr(); static expected: [u8; 40] = [ 7_u8, 8_u8, 9_u8, 10_u8, 11_u8, 12_u8, 13_u8, 32_u8, 33_u8, 34_u8, 35_u8, 36_u8, 37_u8, 38_u8, 39_u8, 40_u8, 41_u8, 42_u8, 43_u8, 44_u8, 45_u8, 46_u8, 47_u8, 58_u8, 59_u8, 60_u8,