From 458afaa9b725b295fc634121350da547cf668f25 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sun, 21 Apr 2024 13:01:28 +0200 Subject: rebel-parse: Initial RecipeLang parser implementation Parse a recipe into an AST. No span support yet. --- Cargo.lock | 1 + crates/rebel-parse/Cargo.toml | 3 + crates/rebel-parse/examples/parse-string.rs | 49 ++++++++ crates/rebel-parse/src/ast.rs | 185 ++++++++++++++++++++++++++++ crates/rebel-parse/src/grammar/mod.rs | 2 + crates/rebel-parse/src/grammar/recipe.rs | 162 ++++++++++++++++++++++++ crates/rebel-parse/src/grammar/task_ref.rs | 65 ++++++++++ crates/rebel-parse/src/lib.rs | 68 +--------- crates/rebel/src/main.rs | 2 +- 9 files changed, 472 insertions(+), 65 deletions(-) create mode 100644 crates/rebel-parse/examples/parse-string.rs create mode 100644 crates/rebel-parse/src/ast.rs create mode 100644 crates/rebel-parse/src/grammar/mod.rs create mode 100644 crates/rebel-parse/src/grammar/recipe.rs create mode 100644 crates/rebel-parse/src/grammar/task_ref.rs diff --git a/Cargo.lock b/Cargo.lock index e2b4cee..3247956 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -508,6 +508,7 @@ dependencies = [ name = "rebel-parse" version = "0.1.0" dependencies = [ + "clap", "peg", "rebel-common", ] diff --git a/crates/rebel-parse/Cargo.toml b/crates/rebel-parse/Cargo.toml index f75e382..3ed7d98 100644 --- a/crates/rebel-parse/Cargo.toml +++ b/crates/rebel-parse/Cargo.toml @@ -11,3 +11,6 @@ edition = "2021" rebel-common = { path = "../rebel-common" } peg = "0.8.2" + +[dev-dependencies] +clap = { version = "4.0.0", features = ["derive"] } diff --git a/crates/rebel-parse/examples/parse-string.rs b/crates/rebel-parse/examples/parse-string.rs new file mode 100644 index 0000000..ba6a378 --- /dev/null +++ b/crates/rebel-parse/examples/parse-string.rs @@ -0,0 +1,49 @@ +use std::{fmt::Debug, time::Instant}; + +use clap::{Parser, ValueEnum}; + +use rebel_parse::recipe; + +#[derive(Clone, Debug, ValueEnum)] +enum Rule { + Recipe, + RecipeStmt, + Body, + BodyStmt, + Expr, +} + +#[derive(Clone, Debug, Parser)] +struct Opts { + rule: Rule, + input: String, +} + +fn main() { + let opts: Opts = Opts::parse(); + let input = opts.input.trim(); + + fn as_debug<'a>(v: impl Debug + 'a) -> Box { + Box::new(v) + } + + let start = Instant::now(); + let result = match opts.rule { + Rule::Recipe => recipe::recipe(input).map(as_debug), + Rule::RecipeStmt => recipe::recipe_stmt(input).map(as_debug), + Rule::Body => recipe::body(input).map(as_debug), + Rule::BodyStmt => recipe::body_stmt(input).map(as_debug), + Rule::Expr => recipe::expr(input).map(as_debug), + }; + let dur = Instant::now().duration_since(start); + + match result { + Ok(value) => { + println!("{value:#?}"); + } + Err(err) => { + println!("{err}"); + } + }; + println!("Took {} us", dur.as_micros()); +} diff --git a/crates/rebel-parse/src/ast.rs b/crates/rebel-parse/src/ast.rs new file mode 100644 index 0000000..d923f2a --- /dev/null +++ b/crates/rebel-parse/src/ast.rs @@ -0,0 +1,185 @@ +pub type Recipe<'a> = Vec>; + +#[derive(Debug, Clone)] +pub enum RecipeStmt<'a> { + BodyStmt(BodyStmt<'a>), + Fetch { + name: Ident<'a>, + body: Body<'a>, + }, + Task { + name: Ident<'a>, + args: Vec>, + body: Body<'a>, + }, +} + +pub type Body<'a> = Vec>; + +#[derive(Debug, Clone)] +pub enum BodyStmt<'a> { + Assign { + left: Box>, + op: Option, + right: Box>, + }, +} + +impl<'a> BodyStmt<'a> { + pub(crate) fn assign(left: TypedExpr<'a>, op: Option, right: Expr<'a>) -> Self { + BodyStmt::Assign { + left: Box::new(left), + op, + right: Box::new(right), + } + } +} + +#[derive(Debug, Clone)] +pub enum Expr<'a> { + Binary { + left: Box>, + op: OpBinary, + right: Box>, + }, + Unary { + op: OpUnary, + expr: Box>, + }, + Apply { + expr: Box>, + args: Vec>, + }, + Index { + expr: Box>, + index: Box>, + }, + Field { + expr: Box>, + field: Ident<'a>, + }, + Paren(Box>), + Path(Path<'a>), + Literal(Literal<'a>), +} + +impl<'a> Expr<'a> { + pub(crate) fn binary(left: Expr<'a>, op: OpBinary, right: Expr<'a>) -> Self { + Expr::Binary { + left: Box::new(left), + op, + right: Box::new(right), + } + } + + pub(crate) fn unary(op: OpUnary, expr: Expr<'a>) -> Self { + Expr::Unary { + op, + expr: Box::new(expr), + } + } + + pub(crate) fn apply(expr: Expr<'a>, args: Args<'a>) -> Self { + Expr::Apply { + expr: Box::new(expr), + args, + } + } + + pub(crate) fn index(expr: Expr<'a>, index: Expr<'a>) -> Self { + Expr::Index { + expr: Box::new(expr), + index: Box::new(index), + } + } + + pub(crate) fn field(expr: Expr<'a>, field: Ident<'a>) -> Self { + Expr::Field { + expr: Box::new(expr), + field, + } + } + + pub(crate) fn paren(expr: Expr<'a>) -> Self { + Expr::Paren(Box::new(expr)) + } +} + +#[derive(Debug, Clone)] +pub struct TypedExpr<'a> { + pub expr: Expr<'a>, + pub typ: Option>, +} + +pub type Args<'a> = Vec>; + +#[derive(Debug, Clone)] +pub struct Arg<'a> { + pub expr: Expr<'a>, +} + +#[derive(Debug, Clone)] +pub struct ArgType<'a> { + pub name: Ident<'a>, + pub typ: Expr<'a>, +} + +#[derive(Debug, Clone)] +pub enum Literal<'a> { + Unit, + Boolean(bool), + Integer(u64), + String(&'a str), + RawString(&'a str), + ScriptString(&'a str), + Tuple(Vec>), + Array(Vec>), + Map(Vec>), +} + +impl<'a> Literal<'a> { + pub(crate) fn integer(s: &'a str, radix: u32) -> Result { + let s = s.replace('_', ""); + let value = u64::from_str_radix(&s, radix).or(Err("Failed to parse number"))?; + Ok(Literal::Integer(value)) + } +} + +#[derive(Debug, Clone)] +pub struct MapEntry<'a> { + pub key: &'a str, + pub value: Expr<'a>, +} + +#[derive(Debug, Clone, Copy)] +pub enum OpUnary { + Not, + Neg, +} + +#[derive(Debug, Clone, Copy)] +pub enum OpBinary { + Add, + Sub, + Mul, + Div, + Rem, + And, + Or, + Eq, + Lt, + Le, + Ne, + Ge, + Gt, +} + +#[derive(Debug, Clone)] +pub struct Path<'a> { + pub components: Vec>, +} + +#[derive(Debug, Clone, Copy)] +pub struct Ident<'a> { + pub name: &'a str, +} diff --git a/crates/rebel-parse/src/grammar/mod.rs b/crates/rebel-parse/src/grammar/mod.rs new file mode 100644 index 0000000..ed180f0 --- /dev/null +++ b/crates/rebel-parse/src/grammar/mod.rs @@ -0,0 +1,2 @@ +pub mod recipe; +pub mod task_ref; diff --git a/crates/rebel-parse/src/grammar/recipe.rs b/crates/rebel-parse/src/grammar/recipe.rs new file mode 100644 index 0000000..5ae6b8b --- /dev/null +++ b/crates/rebel-parse/src/grammar/recipe.rs @@ -0,0 +1,162 @@ +use crate::ast::*; + +pub use rules::*; + +peg::parser! { + pub grammar rules() for str { + use OpBinary::*; + use OpUnary::*; + + pub rule recipe() -> Recipe<'input> + = _ recipe:recipe_stmt()* { recipe } + + pub rule recipe_stmt() -> RecipeStmt<'input> + = stmt:body_stmt() { + RecipeStmt::BodyStmt(stmt) + } + / "fetch" __ name:ident() _ "{" _ body:body() _ "}" _ { + RecipeStmt::Fetch { name, body } + } + / "task" __ name:ident() _ "(" _ args:argtypes() _ ")" _ "{" _ body:body() _ "}" _ { + RecipeStmt::Task { name, args, body } + } + + pub rule body() -> Body<'input> + = recipe:body_stmt()* { recipe } + + pub rule body_stmt() -> BodyStmt<'input> + = left:typed_expr() _ op:assign_op() _ right:expr() _ ";" _ { + BodyStmt::assign(left, op, right) + } + + rule assign_op() -> Option + = "+=" { Some(Add) } + / "-=" { Some(Sub) } + / "*=" { Some(Mul) } + / "/=" { Some(Div) } + / "%=" { Some(Rem) } + / "=" { None } + + rule typed_expr() -> TypedExpr<'input> + = expr:expr() typ:tagged(<_ ":" _>, )? { TypedExpr { expr, typ } } + + + rule typ() -> Path<'input> + = path() + + pub rule expr() -> Expr<'input> = precedence! { + left:(@) _ "||" _ right:@ { Expr::binary(left, Or, right) } + -- + left:(@) _ "&&" _ right:@ { Expr::binary(left, And, right) } + -- + left:(@) _ "==" _ right:@ { Expr::binary(left, Eq, right) } + left:(@) _ "!=" _ right:@ { Expr::binary(left, Ne, right) } + left:(@) _ "<" _ right:@ { Expr::binary(left, Lt, right) } + left:(@) _ ">" _ right:@ { Expr::binary(left, Gt, right) } + left:(@) _ "<=" _ right:@ { Expr::binary(left, Le, right) } + left:(@) _ ">=" _ right:@ { Expr::binary(left, Ge, right) } + -- + left:(@) _ "+" _ right:@ { Expr::binary(left, Add, right) } + left:(@) _ "-" _ right:@ { Expr::binary(left, Sub, right) } + -- + left:(@) _ "*" _ right:@ { Expr::binary(left, Mul, right) } + left:(@) _ "/" _ right:@ { Expr::binary(left, Div, right) } + left:(@) _ "%" _ right:@ { Expr::binary(left, Rem, right) } + -- + "-" _ expr:@ { Expr::unary(Neg, expr) } + "!" _ expr:@ { Expr::unary(Not, expr) } + -- + expr:@ _ "(" _ args:args() _ ")" { Expr::apply(expr, args) } + expr:@ _ "[" _ index:expr() _ "]" { Expr::index(expr, index) } + -- + expr:@ _ "." _ field:field() { Expr::field(expr, field) } + -- + "(" _ e:expr() _ ")" { Expr::paren(e) } + e:atom() { e } + } + + rule atom() -> Expr<'input> + = lit:literal() { Expr::Literal(lit) } + / path:path() { Expr::Path(path) } + + rule args() -> Vec> + = args:delimited(, <_ "," _>) { args } + + rule arg() -> Arg<'input> + = expr:expr() { Arg { expr } } + + rule argtypes() -> Vec> + = args:delimited(, <_ "," _>) { args } + + rule argtype() -> ArgType<'input> + = expr:typed_expr() { ArgType { expr } } + + rule literal() -> Literal<'input> + = "true" { Literal::Boolean(true) } + / "false" { Literal::Boolean(false) } + / "0x" s:$((['0'..='9' | 'a'..='f' | 'A'..='F']+) ++ "_") { ? + Literal::integer(s, 16) + } + / "0o" s:$((['0'..='7']+) ++ "_") { ? + Literal::integer(s, 8) + } + / "0b" s:$((['0'..='1']+) ++ "_") { ? + Literal::integer(s, 2) + } + / s:$((['0'..='9']+) ++ "_") { ? + Literal::integer(s, 10) + } + / "\"" s:$(string_char()*) "\"" { Literal::String(s) } + / "r\"" s:$([^'"']*) "\"" { Literal::RawString(s) } + / "```" newline() s:$((!"```" [_])+) "```" { Literal::ScriptString(s) } + / "(" _ ")" { Literal::Unit } + / "(" _ elements:(expr() ** (_ "," _)) (_ ",")? _ ")" { Literal::Tuple(elements) } + / "[" _ elements:delimited(, <_ "," _>) _ "]" { Literal::Array(elements) } + / "{" _ entries:delimited(, <_ "," _>) _ "}" { Literal::Map(entries) } + + rule map_entry() -> MapEntry<'input> + = left:typed_expr() _ "=" _ right:expr() { + MapEntry { left, right } + } + + rule string_char() + = [^'"' | '\\'] + / "\\" [_] + + rule path() -> Path<'input> + = components:ident() ++ (_ "::" _) { Path { components } } + + rule field() -> Ident<'input> + = name:$( + ['a'..='z' | 'A' ..='Z' | '0'..='9' | '_']* + ) { Ident { name } } + + rule ident() -> Ident<'input> + = name:$( + ['a'..='z' | 'A' ..='Z' | '_' ] + ['a'..='z' | 'A' ..='Z' | '0'..='9' | '_']* + ) { Ident { name } } + + /// Mandatory whitespace + rule __ + = ([' ' | '\t'] / quiet!{newline()} / quiet!{comment()})+ + + /// Optional whitespace + rule _ + = quiet!{__?} + + rule comment() + = "//" (!newline() [_])* (newline() / ![_]) + / "/*" (!"*/" [_])* "*/" + + rule newline() + = ['\n' | '\r'] + + rule delimited(expr: rule, delim: rule<()>) -> Vec + = values:(expr() ++ delim()) delim()? { values } + / { Vec::new() } + + rule tagged(tag: rule<()>, value: rule) -> T + = tag() v:value() { v } + } +} diff --git a/crates/rebel-parse/src/grammar/task_ref.rs b/crates/rebel-parse/src/grammar/task_ref.rs new file mode 100644 index 0000000..77d6c5f --- /dev/null +++ b/crates/rebel-parse/src/grammar/task_ref.rs @@ -0,0 +1,65 @@ +pub use rules::*; + +use rebel_common::types::TaskIDRef; + +#[derive(Debug, Clone, Copy)] +pub struct TaskRef<'a> { + pub id: TaskIDRef<'a>, + pub args: TaskArgs<'a>, +} + +#[derive(Debug, Clone, Copy, Default)] +pub struct TaskArgs<'a> { + pub host: Option<&'a str>, + pub target: Option<&'a str>, +} + +#[derive(Debug, Clone, Copy)] +pub struct TaskFlags { + pub force_run: bool, +} + +peg::parser! { + pub grammar rules() for str { + pub rule task_ref_with_flags() -> (TaskRef<'input>, TaskFlags) + = task:task_ref() flags:task_flags() { (task, flags) } + + pub rule task_ref() -> TaskRef<'input> + = id:task_id() args:task_args() { + TaskRef { + id, + args, + } + } + + rule recipe_id() -> &'input str + = $(name() ("/" name())?) + + rule task_id() -> TaskIDRef<'input> + = recipe:recipe_id() "::" task:name() { + TaskIDRef { recipe, task } + } + + rule task_args() -> TaskArgs<'input> + = "@" host:name()? target:tagged(<":">, )? { + TaskArgs { + host, + target, + } + } + / { Default::default() } + + rule task_flags() -> TaskFlags + = force_run:force_run() { TaskFlags { force_run } } + + rule force_run() -> bool + = "+" { true } + / { false } + + rule name() -> &'input str + = $(['a'..='z' | 'A' ..='Z' | '0'..='9' | '_' | '-']+) + + rule tagged(tag: rule<()>, value: rule) -> T + = tag() v:value() { v } + } +} diff --git a/crates/rebel-parse/src/lib.rs b/crates/rebel-parse/src/lib.rs index d9a59cf..8019d00 100644 --- a/crates/rebel-parse/src/lib.rs +++ b/crates/rebel-parse/src/lib.rs @@ -1,65 +1,5 @@ -use rebel_common::types::TaskIDRef; +pub mod ast; +mod grammar; -pub use rules::*; - -#[derive(Debug, Clone, Copy)] -pub struct TaskRef<'a> { - pub id: TaskIDRef<'a>, - pub args: TaskArgs<'a>, -} - -#[derive(Debug, Clone, Copy, Default)] -pub struct TaskArgs<'a> { - pub host: Option<&'a str>, - pub target: Option<&'a str>, -} - -#[derive(Debug, Clone, Copy)] -pub struct TaskFlags { - pub force_run: bool, -} - -peg::parser! { - grammar rules() for str { - pub rule task_ref_with_flags() -> (TaskRef<'input>, TaskFlags) - = task:task_ref() flags:task_flags() { (task, flags) } - - pub rule task_ref() -> TaskRef<'input> - = id:task_id() args:task_args() { - TaskRef { - id, - args, - } - } - - rule recipe_id() -> &'input str - = $(name() ("/" name())?) - - rule task_id() -> TaskIDRef<'input> - = recipe:recipe_id() "::" task:name() { - TaskIDRef { recipe, task } - } - - rule task_args() -> TaskArgs<'input> - = "@" host:name()? target:tagged(<":">, )? { - TaskArgs { - host, - target, - } - } - / { Default::default() } - - rule task_flags() -> TaskFlags - = force_run:force_run() { TaskFlags { force_run } } - - rule force_run() -> bool - = "+" { true } - / { false } - - rule name() -> &'input str - = $(['a'..='z' | 'A' ..='Z' | '0'..='9' | '_' | '-']+) - - rule tagged(tag: rule<()>, value: rule) -> T - = tag() v:value() { v } - } -} +pub use grammar::recipe; +pub use grammar::task_ref; diff --git a/crates/rebel/src/main.rs b/crates/rebel/src/main.rs index 214e6ef..625b43d 100644 --- a/crates/rebel/src/main.rs +++ b/crates/rebel/src/main.rs @@ -49,7 +49,7 @@ fn main() { let mut force_run = HashSet::new(); for task in opts.tasks { - let Ok((parsed, flags)) = parse::task_ref_with_flags(&task) else { + let Ok((parsed, flags)) = parse::task_ref::task_ref_with_flags(&task) else { eprintln!("Invalid task syntax"); std::process::exit(1); }; -- cgit v1.2.3