diff options
Diffstat (limited to 'crates/rebel-parse/src/grammar/tokenize.rs')
-rw-r--r-- | crates/rebel-parse/src/grammar/tokenize.rs | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/crates/rebel-parse/src/grammar/tokenize.rs b/crates/rebel-parse/src/grammar/tokenize.rs new file mode 100644 index 0000000..a30e299 --- /dev/null +++ b/crates/rebel-parse/src/grammar/tokenize.rs @@ -0,0 +1,64 @@ +use crate::token::*; + +pub use rules::*; + +peg::parser! { + pub grammar rules() for str { + pub rule token_stream() -> Vec<Token<'input>> + = _ tokens:(token() ** _) _ { tokens } + + pub rule token() -> Token<'input> + = literal:literal() { Token::Literal(literal) } + / ident:ident() { Token::Ident(ident) } + / punct:punct() { Token::Punct(punct) } + + rule ident() -> Ident<'input> + = name:$( + ['a'..='z' | 'A' ..='Z' | '_' ] + ['a'..='z' | 'A' ..='Z' | '_' | '0'..='9']* + ) { Ident(name) } + + rule punct() -> Punct + = ch:punct_char() spacing:spacing() { Punct(ch, spacing) } + + rule punct_char() -> char + = !literal() !ident() !__ ch:[_] { ch } + + rule spacing() -> Spacing + = &punct_char() { Spacing::Joint } + / { Spacing::Alone } + + rule literal() -> Literal<'input> + = content:$(['0'..='9'] ['0'..='9' | 'a'..='z' | 'A'..='Z' | '_']*) { + Literal { content, kind: LiteralKind::Number } + } + / "\"" content:$(string_char()*) "\"" { + Literal { content, kind: LiteralKind::String } + } + / "r\"" content:$([^'"']*) "\"" { + Literal { content, kind: LiteralKind::RawString } + } + / "```" newline() content:$((!"```" [_])+) "```" { + Literal { content, kind: LiteralKind::ScriptString } + } + + rule string_char() + = [^'"' | '\\'] + / "\\" [_] + + /// Mandatory whitespace + rule __ + = ([' ' | '\t'] / quiet!{newline()} / quiet!{comment()})+ + + /// Optional whitespace + rule _ + = quiet!{__?} + + rule comment() + = "//" (!newline() [_])* (newline() / ![_]) + / "/*" (!"*/" [_])* "*/" + + rule newline() + = ['\n' | '\r'] + } +} |