summaryrefslogtreecommitdiffstats
path: root/crates/rebel-parse/src/grammar/tokenize.rs
blob: a64c5e1b6c9c31e3698fa7f688ad9b7ea6134675 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
use crate::token::*;

pub use rules::*;

peg::parser! {
	pub grammar rules() for str {
		pub rule token_stream() -> TokenStream<'input>
			= _ tokens:(token() ** _) _ { TokenStream(tokens) }

		pub rule token() -> Token<'input>
			= number:number() { Token::Number(number) }
			/ string:string() { Token::String(string) }
			/ ident:ident() { Token::Ident(ident) }
			/ punct:punct() { Token::Punct(punct) }

		rule ident() -> &'input str
			= $(
				['a'..='z' | 'A' ..='Z' | '_' ]
				['a'..='z' | 'A' ..='Z' | '_' | '0'..='9']*
			)

		rule punct() -> Punct
			= ch:punct_char() spacing:spacing() { Punct(ch, spacing) }

		rule punct_char() -> char
			= !number() !string() !ident() !__ ch:[_] { ch }

		rule spacing() -> Spacing
			= &punct_char() { Spacing::Joint }
			/ { Spacing::Alone }

		rule number() -> &'input str
			= $(['0'..='9'] ['0'..='9' | 'a'..='z' | 'A'..='Z' | '_']*)

		rule string() -> String<'input>
			= "\"" chars:$(string_char()*) "\"" {
				String {
					pieces: vec![StringPiece::Chars(chars)],
					kind: StringKind::String,
				}
			}
			/ "r\"" chars:$([^'"']*) "\"" {
				String {
					pieces: vec![StringPiece::Chars(chars)],
					kind: StringKind::RawString,
				}
			}
			/ "```" newline() chars:$((!"```" [_])+) "```" {
				String {
					pieces: vec![StringPiece::Chars(chars)],
					kind: StringKind::ScriptString,
				}
			}

		rule string_char()
			= [^'"' | '\\']
			/ "\\" [_]

		/// Mandatory whitespace
		rule __
			= ([' ' | '\t'] / quiet!{newline()} / quiet!{comment()})+

		/// Optional whitespace
		rule _
			= quiet!{__?}

		rule comment()
			= "//" (!newline() [_])* (newline() / ![_])
			/ "/*" (!"*/" [_])* "*/"

		rule newline()
			= ['\n' | '\r']
	}
}