From 14be157edccaace010edcf41555174babb984260 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Thu, 25 Apr 2024 19:42:48 +0200 Subject: rebel-parse: tokenize: handle escapes in regular strings --- crates/rebel-parse/src/grammar/tokenize.rs | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/crates/rebel-parse/src/grammar/tokenize.rs b/crates/rebel-parse/src/grammar/tokenize.rs index a64c5e1..811da0d 100644 --- a/crates/rebel-parse/src/grammar/tokenize.rs +++ b/crates/rebel-parse/src/grammar/tokenize.rs @@ -33,9 +33,9 @@ peg::parser! { = $(['0'..='9'] ['0'..='9' | 'a'..='z' | 'A'..='Z' | '_']*) rule string() -> String<'input> - = "\"" chars:$(string_char()*) "\"" { + = "\"" pieces:string_piece()* "\"" { String { - pieces: vec![StringPiece::Chars(chars)], + pieces, kind: StringKind::String, } } @@ -52,9 +52,26 @@ peg::parser! { } } - rule string_char() - = [^'"' | '\\'] - / "\\" [_] + rule string_piece() -> StringPiece<'input> + = chars:$([^'"' | '\\']+) { StringPiece::Chars(chars) } + / "\\" escape:string_escape() { StringPiece::Escape(escape) } + + rule string_escape() -> char + = "n" { '\n' } + / "r" { '\r' } + / "t" { '\t' } + / "\\" { '\\' } + / "\"" { '"' } + / "0" { '\0' } + / "x" digits:$(['0'..='7'] hex_digit()) { + u8::from_str_radix(digits, 16).unwrap().into() + } + / "u{" digits:$(hex_digit()*<1,6>) "}" { ? + u32::from_str_radix(digits, 16).unwrap().try_into().or(Err("Invalid unicode escape")) + } + + rule hex_digit() + = ['0'..='9' | 'a'..='f' | 'A'..='F'] /// Mandatory whitespace rule __ -- cgit v1.2.3