summaryrefslogtreecommitdiffstats
path: root/crates/rebel-parse/src/grammar/tokenize.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/rebel-parse/src/grammar/tokenize.rs')
-rw-r--r--crates/rebel-parse/src/grammar/tokenize.rs64
1 files changed, 64 insertions, 0 deletions
diff --git a/crates/rebel-parse/src/grammar/tokenize.rs b/crates/rebel-parse/src/grammar/tokenize.rs
new file mode 100644
index 0000000..a30e299
--- /dev/null
+++ b/crates/rebel-parse/src/grammar/tokenize.rs
@@ -0,0 +1,64 @@
+use crate::token::*;
+
+pub use rules::*;
+
+peg::parser! {
+ pub grammar rules() for str {
+ pub rule token_stream() -> Vec<Token<'input>>
+ = _ tokens:(token() ** _) _ { tokens }
+
+ pub rule token() -> Token<'input>
+ = literal:literal() { Token::Literal(literal) }
+ / ident:ident() { Token::Ident(ident) }
+ / punct:punct() { Token::Punct(punct) }
+
+ rule ident() -> Ident<'input>
+ = name:$(
+ ['a'..='z' | 'A' ..='Z' | '_' ]
+ ['a'..='z' | 'A' ..='Z' | '_' | '0'..='9']*
+ ) { Ident(name) }
+
+ rule punct() -> Punct
+ = ch:punct_char() spacing:spacing() { Punct(ch, spacing) }
+
+ rule punct_char() -> char
+ = !literal() !ident() !__ ch:[_] { ch }
+
+ rule spacing() -> Spacing
+ = &punct_char() { Spacing::Joint }
+ / { Spacing::Alone }
+
+ rule literal() -> Literal<'input>
+ = content:$(['0'..='9'] ['0'..='9' | 'a'..='z' | 'A'..='Z' | '_']*) {
+ Literal { content, kind: LiteralKind::Number }
+ }
+ / "\"" content:$(string_char()*) "\"" {
+ Literal { content, kind: LiteralKind::String }
+ }
+ / "r\"" content:$([^'"']*) "\"" {
+ Literal { content, kind: LiteralKind::RawString }
+ }
+ / "```" newline() content:$((!"```" [_])+) "```" {
+ Literal { content, kind: LiteralKind::ScriptString }
+ }
+
+ rule string_char()
+ = [^'"' | '\\']
+ / "\\" [_]
+
+ /// Mandatory whitespace
+ rule __
+ = ([' ' | '\t'] / quiet!{newline()} / quiet!{comment()})+
+
+ /// Optional whitespace
+ rule _
+ = quiet!{__?}
+
+ rule comment()
+ = "//" (!newline() [_])* (newline() / ![_])
+ / "/*" (!"*/" [_])* "*/"
+
+ rule newline()
+ = ['\n' | '\r']
+ }
+}