summaryrefslogtreecommitdiffstats
path: root/crates/rebel-parse/src/grammar/tokenize.rs
blob: b497e23fa80c0399236483b80b9ca78ffb8e7690 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
use crate::token::*;

pub use rules::*;

peg::parser! {
	pub grammar rules() for str {
		pub rule token_stream() -> Vec<Token<'input>>
			= _ tokens:(token() ** _) _ { tokens }

		pub rule token() -> Token<'input>
			= literal:literal() { Token::Literal(literal) }
			/ ident:ident() { Token::Ident(ident) }
			/ punct:punct() { Token::Punct(punct) }

		rule ident() -> &'input str
			= $(
				['a'..='z' | 'A' ..='Z' | '_' ]
				['a'..='z' | 'A' ..='Z' | '_' | '0'..='9']*
			)

		rule punct() -> Punct
			= ch:punct_char() spacing:spacing() { Punct(ch, spacing) }

		rule punct_char() -> char
			= !literal() !ident() !__ ch:[_] { ch }

		rule spacing() -> Spacing
			= &punct_char() { Spacing::Joint }
			/ { Spacing::Alone }

		rule literal() -> Literal<'input>
			= content:$(['0'..='9'] ['0'..='9' | 'a'..='z' | 'A'..='Z' | '_']*) {
				Literal { content, kind: LiteralKind::Number }
			}
			/ "\"" content:$(string_char()*) "\"" {
				Literal { content, kind: LiteralKind::String }
			}
			/ "r\"" content:$([^'"']*) "\"" {
				Literal { content, kind: LiteralKind::RawString }
			}
			/ "```" newline() content:$((!"```" [_])+) "```" {
				Literal { content, kind: LiteralKind::ScriptString }
			}

		rule string_char()
			= [^'"' | '\\']
			/ "\\" [_]

		/// Mandatory whitespace
		rule __
			= ([' ' | '\t'] / quiet!{newline()} / quiet!{comment()})+

		/// Optional whitespace
		rule _
			= quiet!{__?}

		rule comment()
			= "//" (!newline() [_])* (newline() / ![_])
			/ "/*" (!"*/" [_])* "*/"

		rule newline()
			= ['\n' | '\r']
	}
}