summaryrefslogtreecommitdiffstats
path: root/crates/rebel-parse/src/grammar/tokenize.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/rebel-parse/src/grammar/tokenize.rs')
-rw-r--r--crates/rebel-parse/src/grammar/tokenize.rs136
1 files changed, 136 insertions, 0 deletions
diff --git a/crates/rebel-parse/src/grammar/tokenize.rs b/crates/rebel-parse/src/grammar/tokenize.rs
new file mode 100644
index 0000000..ed1db39
--- /dev/null
+++ b/crates/rebel-parse/src/grammar/tokenize.rs
@@ -0,0 +1,136 @@
+use crate::token::*;
+
+pub use rules::*;
+
+static KEYWORDS: phf::Map<&'static str, Keyword> = phf::phf_map! {
+ "else" => Keyword::Else,
+ "false" => Keyword::False,
+ "fetch" => Keyword::Fetch,
+ "fn" => Keyword::Fn,
+ "for" => Keyword::For,
+ "if" => Keyword::If,
+ "let" => Keyword::Let,
+ "map" => Keyword::Map,
+ "mut" => Keyword::Mut,
+ "recipe" => Keyword::Recipe,
+ "set" => Keyword::Set,
+ "task" => Keyword::Task,
+ "true" => Keyword::True,
+};
+
+peg::parser! {
+ pub grammar rules() for str {
+ pub rule token_stream() -> TokenStream<'input>
+ = _ tokens:(token() ** _) _ { TokenStream(tokens) }
+
+ pub rule token() -> Token<'input>
+ = number:number() { Token::Number(number) }
+ / string:string() { Token::Str(string) }
+ / token:ident_or_keyword() { token }
+ / punct:punct() { Token::Punct(punct) }
+
+ rule ident_or_keyword() -> Token<'input>
+ = s:$(
+ ['a'..='z' | 'A' ..='Z' | '_' ]
+ ['a'..='z' | 'A' ..='Z' | '_' | '0'..='9']*
+ ) {
+ if let Some(kw) = KEYWORDS.get(s) {
+ Token::Keyword(*kw)
+ } else {
+ Token::Ident(s)
+ }
+ }
+
+ rule punct() -> Punct
+ = ch:punct_char() spacing:spacing() { Punct(ch, spacing) }
+
+ rule punct_char() -> char
+ = !comment_start() ch:[
+ | '~' | '!' | '@' | '#' | '$' | '%' | '^' | '&'
+ | '*' | '-' | '=' | '+' | '|' | ';' | ':' | ','
+ | '<' | '.' | '>' | '/' | '\'' | '?' | '(' | ')'
+ | '[' | ']' | '{' | '}'
+ ] { ch }
+
+ rule spacing() -> Spacing
+ = &punct_char() { Spacing::Joint }
+ / { Spacing::Alone }
+
+ rule number() -> &'input str
+ = $(['0'..='9'] ['0'..='9' | 'a'..='z' | 'A'..='Z' | '_']*)
+
+ rule string() -> Str<'input>
+ = "\"" pieces:string_piece()* "\"" {
+ Str {
+ pieces,
+ kind: StrKind::Regular,
+ }
+ }
+ / "r\"" chars:$([^'"']*) "\"" {
+ Str {
+ pieces: vec![StrPiece::Chars(chars)],
+ kind: StrKind::Raw,
+ }
+ }
+ / "```" newline() pieces:script_string_piece()* "```" {
+ Str {
+ pieces,
+ kind: StrKind::Script,
+ }
+ }
+
+ rule string_piece() -> StrPiece<'input>
+ = chars:$((!"{{" [^'"' | '\\'])+) { StrPiece::Chars(chars) }
+ / "\\" escape:string_escape() { StrPiece::Escape(escape) }
+ / string_interp()
+
+ rule string_escape() -> char
+ = "n" { '\n' }
+ / "r" { '\r' }
+ / "t" { '\t' }
+ / "\\" { '\\' }
+ / "\"" { '"' }
+ / "{" { '{' }
+ / "0" { '\0' }
+ / "x" digits:$(['0'..='7'] hex_digit()) {
+ u8::from_str_radix(digits, 16).unwrap().into()
+ }
+ / "u{" digits:$(hex_digit()*<1,6>) "}" { ?
+ u32::from_str_radix(digits, 16).unwrap().try_into().or(Err("Invalid unicode escape"))
+ }
+
+ rule script_string_piece() -> StrPiece<'input>
+ = chars:$((!"{{" !"```" [_])+) { StrPiece::Chars(chars) }
+ / string_interp()
+
+ rule string_interp() -> StrPiece<'input>
+ = "{{" _ tokens:(subtoken() ++ _) _ "}}" {
+ StrPiece::Interp(TokenStream(tokens))
+ }
+
+ rule subtoken() -> Token<'input>
+ = !"}}" token:token() { token }
+
+ rule hex_digit()
+ = ['0'..='9' | 'a'..='f' | 'A'..='F']
+
+ /// Mandatory whitespace
+ rule __
+ = ([' ' | '\t'] / quiet!{newline()} / quiet!{comment()})+
+
+ /// Optional whitespace
+ rule _
+ = quiet!{__?}
+
+ rule comment_start()
+ = "//"
+ / "/*"
+
+ rule comment()
+ = "//" (!newline() [_])* (newline() / ![_])
+ / "/*" (!"*/" [_])* "*/"
+
+ rule newline()
+ = ['\n' | '\r']
+ }
+}