1 files changed, 136 insertions, 0 deletions
diff --git a/crates/rebel-parse/src/grammar/tokenize.rs b/crates/rebel-parse/src/grammar/tokenize.rs
new file mode 100644
index 0000000..ed1db39
--- /dev/null
+++ b/crates/rebel-parse/src/grammar/tokenize.rs
@@ -0,0 +1,136 @@
+use crate::token::*;
+
+pub use rules::*;
+
+static KEYWORDS: phf::Map<&'static str, Keyword> = phf::phf_map! {
+	"else" => Keyword::Else,
+	"false" => Keyword::False,
+	"fetch" => Keyword::Fetch,
+	"fn" => Keyword::Fn,
+	"for" => Keyword::For,
+	"if" => Keyword::If,
+	"let" => Keyword::Let,
+	"map" => Keyword::Map,
+	"mut" => Keyword::Mut,
+	"recipe" => Keyword::Recipe,
+	"set" =>  Keyword::Set,
+	"task" => Keyword::Task,
+	"true" => Keyword::True,
+};
+
+peg::parser! {
+	pub grammar rules() for str {
+		pub rule token_stream() -> TokenStream<'input>
+			= _ tokens:(token() ** _) _ { TokenStream(tokens) }
+
+		pub rule token() -> Token<'input>
+			= number:number() { Token::Number(number) }
+			/ string:string() { Token::Str(string) }
+			/ token:ident_or_keyword() { token }
+			/ punct:punct() { Token::Punct(punct) }
+
+		rule ident_or_keyword() -> Token<'input>
+			= s:$(
+				['a'..='z' | 'A' ..='Z' | '_' ]
+				['a'..='z' | 'A' ..='Z' | '_' | '0'..='9']*
+			) {
+				if let Some(kw) = KEYWORDS.get(s) {
+					Token::Keyword(*kw)
+				} else {
+					Token::Ident(s)
+				}
+			}
+
+		rule punct() -> Punct
+			= ch:punct_char() spacing:spacing() { Punct(ch, spacing) }
+
+		rule punct_char() -> char
+			= !comment_start() ch:[
+				| '~' | '!' | '@' | '#' | '$' | '%' | '^' | '&'
+				| '*' | '-' | '=' | '+' | '|' | ';' | ':' | ','
+				| '<' | '.' | '>' | '/' | '\'' | '?' | '(' | ')'
+				| '[' | ']' | '{' | '}'
+			] { ch }
+
+		rule spacing() -> Spacing
+			= &punct_char() { Spacing::Joint }
+			/ { Spacing::Alone }
+
+		rule number() -> &'input str
+			= $(['0'..='9'] ['0'..='9' | 'a'..='z' | 'A'..='Z' | '_']*)
+
+		rule string() -> Str<'input>
+			= "\"" pieces:string_piece()* "\"" {
+				Str {
+					pieces,
+					kind: StrKind::Regular,
+				}
+			}
+			/ "r\"" chars:$([^'"']*) "\"" {
+				Str {
+					pieces: vec![StrPiece::Chars(chars)],
+					kind: StrKind::Raw,
+				}
+			}
+			/ "```" newline() pieces:script_string_piece()* "```" {
+				Str {
+					pieces,
+					kind: StrKind::Script,
+				}
+			}
+
+		rule string_piece() -> StrPiece<'input>
+			= chars:$((!"{{" [^'"' | '\\'])+) { StrPiece::Chars(chars) }
+			/ "\\" escape:string_escape() { StrPiece::Escape(escape) }
+			/ string_interp()
+
+		rule string_escape() -> char
+			= "n" { '\n' }
+			/ "r" { '\r' }
+			/ "t" { '\t' }
+			/ "\\" { '\\' }
+			/ "\"" { '"' }
+			/ "{" { '{' }
+			/ "0" { '\0' }
+			/ "x" digits:$(['0'..='7'] hex_digit()) {
+				u8::from_str_radix(digits, 16).unwrap().into()
+			}
+			/ "u{" digits:$(hex_digit()*<1,6>) "}" { ?
+				u32::from_str_radix(digits, 16).unwrap().try_into().or(Err("Invalid unicode escape"))
+			}
+
+		rule script_string_piece() -> StrPiece<'input>
+			= chars:$((!"{{" !"```" [_])+) { StrPiece::Chars(chars) }
+			/ string_interp()
+
+		rule string_interp() -> StrPiece<'input>
+			= "{{" _ tokens:(subtoken() ++ _) _ "}}" {
+				StrPiece::Interp(TokenStream(tokens))
+			}
+
+		rule subtoken() -> Token<'input>
+			= !"}}" token:token() { token }
+
+		rule hex_digit()
+			= ['0'..='9' | 'a'..='f' | 'A'..='F']
+
+		/// Mandatory whitespace
+		rule __
+			= ([' ' | '\t'] / quiet!{newline()} / quiet!{comment()})+
+
+		/// Optional whitespace
+		rule _
+			= quiet!{__?}
+
+		rule comment_start()
+			= "//"
+			/ "/*"
+
+		rule comment()
+			= "//" (!newline() [_])* (newline() / ![_])
+			/ "/*" (!"*/" [_])* "*/"
+
+		rule newline()
+			= ['\n' | '\r']
+	}
+}