diff options
Diffstat (limited to 'crates/rebel-parse')
-rw-r--r-- | crates/rebel-parse/Cargo.toml | 23 | ||||
-rw-r--r-- | crates/rebel-parse/benches/recipe.rs | 21 | ||||
-rw-r--r-- | crates/rebel-parse/examples/parse-string.rs | 70 | ||||
-rw-r--r-- | crates/rebel-parse/src/ast/expr.rs | 333 | ||||
-rw-r--r-- | crates/rebel-parse/src/ast/mod.rs | 187 | ||||
-rw-r--r-- | crates/rebel-parse/src/ast/pat.rs | 57 | ||||
-rw-r--r-- | crates/rebel-parse/src/ast/typ.rs | 28 | ||||
-rw-r--r-- | crates/rebel-parse/src/grammar/mod.rs | 3 | ||||
-rw-r--r-- | crates/rebel-parse/src/grammar/recipe.rs | 277 | ||||
-rw-r--r-- | crates/rebel-parse/src/grammar/task_ref.rs | 65 | ||||
-rw-r--r-- | crates/rebel-parse/src/grammar/tokenize.rs | 137 | ||||
-rw-r--r-- | crates/rebel-parse/src/lib.rs | 8 | ||||
-rw-r--r-- | crates/rebel-parse/src/token.rs | 87 |
13 files changed, 1296 insertions, 0 deletions
diff --git a/crates/rebel-parse/Cargo.toml b/crates/rebel-parse/Cargo.toml new file mode 100644 index 0000000..d116736 --- /dev/null +++ b/crates/rebel-parse/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "rebel-parse" +version = "0.1.0" +authors = ["Matthias Schiffer <mschiffer@universe-factory.net>"] +license = "MIT" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +derive-into-owned = { git = "https://github.com/neocturne/derive-into-owned.git", branch = "more-types" } +peg = "0.8.3" +phf = { version = "0.11.2", features = ["macros"] } +rebel-common = { path = "../rebel-common" } +rustc-hash = "1.1.0" + +[dev-dependencies] +clap = { version = "4.0.0", features = ["derive"] } +divan = "0.1.14" + +[[bench]] +name = "recipe" +harness = false diff --git a/crates/rebel-parse/benches/recipe.rs b/crates/rebel-parse/benches/recipe.rs new file mode 100644 index 0000000..4cff857 --- /dev/null +++ b/crates/rebel-parse/benches/recipe.rs @@ -0,0 +1,21 @@ +use rebel_parse::{ast, token::TokenStream}; + +fn main() { + divan::main(); +} + +const RECIPE: &str = include_str!("../../../examples/recipes/gmp/build.recipe"); + +#[divan::bench] +fn tokenize() -> TokenStream<'static> { + rebel_parse::tokenize::token_stream(divan::black_box(RECIPE)).unwrap() +} + +#[divan::bench] +fn parse(bencher: divan::Bencher) { + let tokens = tokenize(); + + bencher.bench(|| -> ast::Recipe<'static> { + rebel_parse::recipe::recipe(divan::black_box(&tokens)).unwrap() + }); +} diff --git a/crates/rebel-parse/examples/parse-string.rs b/crates/rebel-parse/examples/parse-string.rs new file mode 100644 index 0000000..9750a87 --- /dev/null +++ b/crates/rebel-parse/examples/parse-string.rs @@ -0,0 +1,70 @@ +use std::{fmt::Debug, process, time::Instant}; + +use clap::{Parser, ValueEnum}; + +use rebel_parse::{recipe, tokenize}; + +#[derive(Clone, Debug, PartialEq, Eq, ValueEnum)] +enum Rule { + Tokenize, + Recipe, + RecipeStmt, + Block, + BlockStmt, + Expr, + Type, + Pat, +} + +#[derive(Clone, Debug, Parser)] +struct Opts { + rule: Rule, + input: String, +} + +fn main() { + let opts: Opts = Opts::parse(); + let input = opts.input.trim(); + + fn as_debug<'a>(v: impl Debug + 'a) -> Box<dyn Debug + 'a> { + Box::new(v) + } + + let start = Instant::now(); + let result = tokenize::token_stream(input); + let dur = Instant::now().duration_since(start); + println!("Tokenization took {} µs", dur.as_micros()); + + let tokens = match result { + Ok(value) => value, + Err(err) => { + println!("{err}"); + process::exit(1); + } + }; + + let start = Instant::now(); + let result = match opts.rule { + Rule::Tokenize => Ok(as_debug(tokens)), + Rule::Recipe => recipe::recipe(&tokens).map(as_debug), + Rule::RecipeStmt => recipe::recipe_stmt(&tokens).map(as_debug), + Rule::Block => recipe::block(&tokens).map(as_debug), + Rule::BlockStmt => recipe::block_stmt(&tokens).map(as_debug), + Rule::Expr => recipe::expr(&tokens).map(as_debug), + Rule::Type => recipe::typ(&tokens).map(as_debug), + Rule::Pat => recipe::pat(&tokens).map(as_debug), + }; + if opts.rule != Rule::Tokenize { + let dur = Instant::now().duration_since(start); + println!("Parsing took {} µs", dur.as_micros()); + } + + match result { + Ok(value) => { + println!("{value:#?}"); + } + Err(err) => { + println!("{err}"); + } + }; +} diff --git a/crates/rebel-parse/src/ast/expr.rs b/crates/rebel-parse/src/ast/expr.rs new file mode 100644 index 0000000..a35a9af --- /dev/null +++ b/crates/rebel-parse/src/ast/expr.rs @@ -0,0 +1,333 @@ +use std::borrow::Cow; + +use super::{Block, DestrPat, Ident, Path, PathRoot, ValidationError}; +use crate::token; +use derive_into_owned::{Borrowed, IntoOwned}; +use rustc_hash::FxHashSet; + +pub use token::StrKind; + +#[derive(Debug, Clone, PartialEq, Eq, IntoOwned, Borrowed)] +pub enum Expr<'a> { + Binary { + left: Box<Expr<'a>>, + op: OpBinary, + right: Box<Expr<'a>>, + }, + Unary { + op: OpUnary, + expr: Box<Expr<'a>>, + }, + Apply { + expr: Box<Expr<'a>>, + params: Vec<Expr<'a>>, + }, + Method { + expr: Box<Expr<'a>>, + method: Ident<'a>, + params: Vec<Expr<'a>>, + }, + Index { + base: Box<Expr<'a>>, + index: Box<Expr<'a>>, + }, + Field { + base: Box<Expr<'a>>, + field: Ident<'a>, + }, + Block(Block<'a>), + IfElse { + if_blocks: Vec<(Expr<'a>, Block<'a>)>, + else_block: Option<Box<Block<'a>>>, + }, + Paren(Box<Expr<'a>>), + Path(Path<'a>), + Literal(Literal<'a>), +} + +impl<'a> Expr<'a> { + pub(crate) fn binary(left: Expr<'a>, op: OpBinary, right: Expr<'a>) -> Self { + Expr::Binary { + left: Box::new(left), + op, + right: Box::new(right), + } + } + + pub(crate) fn unary(op: OpUnary, expr: Expr<'a>) -> Self { + Expr::Unary { + op, + expr: Box::new(expr), + } + } + + pub(crate) fn apply(expr: Expr<'a>, params: Vec<Expr<'a>>) -> Self { + Expr::Apply { + expr: Box::new(expr), + params, + } + } + + pub(crate) fn method(expr: Expr<'a>, method: Ident<'a>, params: Vec<Expr<'a>>) -> Self { + Expr::Method { + expr: Box::new(expr), + method, + params, + } + } + + pub(crate) fn index(base: Expr<'a>, index: Expr<'a>) -> Self { + Expr::Index { + base: Box::new(base), + index: Box::new(index), + } + } + + pub(crate) fn field(base: Expr<'a>, field: Ident<'a>) -> Self { + Expr::Field { + base: Box::new(base), + field, + } + } + + pub(crate) fn paren(expr: Expr<'a>) -> Self { + Expr::Paren(Box::new(expr)) + } + + pub fn validate(&self) -> Result<(), ValidationError> { + match self { + Expr::Binary { left, op, right } => { + left.validate()?; + right.validate()?; + + if op.is_comparision() + && (left.is_binary_comparison() || right.is_binary_comparison()) + { + return Err(ValidationError::NeedsParens); + } + Ok(()) + } + Expr::Unary { op: _, expr } => expr.validate(), + Expr::Apply { expr, params } => { + for param in params { + param.validate()?; + } + expr.validate() + } + Expr::Method { + expr, + method: _, + params, + } => { + for param in params { + param.validate()?; + } + expr.validate() + } + Expr::Index { base, index } => { + index.validate()?; + base.validate() + } + Expr::Field { base, field: _ } => base.validate(), + Expr::Block(block) => { + for stmt in &block.0 { + stmt.validate()?; + } + Ok(()) + } + Expr::IfElse { + if_blocks, + else_block, + } => { + for (cond, block) in if_blocks { + cond.validate()?; + block.validate()?; + } + if let Some(block) = else_block { + block.validate()?; + } + Ok(()) + } + Expr::Paren(expr) => expr.validate(), + Expr::Path(_) => Ok(()), + Expr::Literal(lit) => lit.validate(), + } + } + + fn is_binary_comparison(&self) -> bool { + let Expr::Binary { + left: _, + op, + right: _, + } = self + else { + return false; + }; + + op.is_comparision() + } +} + +impl<'a> From<DestrPat<'a>> for Expr<'a> { + fn from(value: DestrPat<'a>) -> Self { + match value { + DestrPat::Index { base, index } => Expr::Index { + base: Box::new((*base).into()), + index: index.clone(), + }, + DestrPat::Field { base, field } => Expr::Field { + base: Box::new((*base).into()), + field: field.clone(), + }, + DestrPat::Paren(pat) => Expr::Paren(Box::new((*pat).into())), + DestrPat::Path(path) => Expr::Path(path.clone()), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, IntoOwned, Borrowed)] +pub enum Literal<'a> { + Unit, + None, + Bool(bool), + Int(i64), + Str { + pieces: Vec<StrPiece<'a>>, + kind: StrKind, + }, + Tuple(Vec<Expr<'a>>), + Array(Vec<Expr<'a>>), + Map(Vec<MapEntry<'a>>), + Struct(Vec<StructField<'a>>), +} + +impl<'a> Literal<'a> { + fn validate(&self) -> Result<(), ValidationError> { + match self { + Literal::Unit => Ok(()), + Literal::None => Ok(()), + Literal::Bool(_) => Ok(()), + Literal::Int(_) => Ok(()), + Literal::Str { pieces, kind: _ } => { + for piece in pieces { + match piece { + StrPiece::Chars(_) => {} + StrPiece::Escape(_) => {} + StrPiece::Interp(expr) => expr.validate()?, + } + } + Ok(()) + } + Literal::Tuple(elems) => { + for elem in elems { + elem.validate()?; + } + Ok(()) + } + Literal::Array(elems) => { + for elem in elems { + elem.validate()?; + } + Ok(()) + } + Literal::Map(entries) => { + for MapEntry { key, value } in entries { + key.validate()?; + value.validate()?; + } + Ok(()) + } + Literal::Struct(entries) => { + let mut fields = FxHashSet::default(); + for StructField { name, value } in entries { + if !fields.insert(name) { + return Err(ValidationError::DuplicateKey); + } + value.validate()?; + } + Ok(()) + } + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, IntoOwned, Borrowed)] +pub enum StrPiece<'a> { + Chars(Cow<'a, str>), + Escape(char), + Interp(Expr<'a>), +} + +impl<'a> TryFrom<&token::StrPiece<'a>> for StrPiece<'a> { + type Error = &'static str; + + fn try_from(value: &token::StrPiece<'a>) -> Result<Self, Self::Error> { + use crate::recipe; + + Ok(match value { + token::StrPiece::Chars(chars) => StrPiece::Chars(Cow::Borrowed(chars)), + token::StrPiece::Escape(c) => StrPiece::Escape(*c), + token::StrPiece::Interp(tokens) => StrPiece::Interp( + recipe::expr(tokens).or(Err("Invalid expression in string interpolation"))?, + ), + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, IntoOwned, Borrowed)] +pub struct MapEntry<'a> { + pub key: Expr<'a>, + pub value: Expr<'a>, +} + +#[derive(Debug, Clone, PartialEq, Eq, IntoOwned, Borrowed)] +pub struct StructField<'a> { + pub name: Cow<'a, str>, + pub value: Expr<'a>, +} + +impl<'a> StructField<'a> { + pub(crate) fn new(field: Ident<'a>, value: Option<Expr<'a>>) -> Self { + let value = value.unwrap_or_else(|| { + Expr::Path(Path { + root: PathRoot::Relative, + components: vec![field.clone()], + }) + }); + StructField { + name: field.name, + value, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum OpUnary { + Not, + Neg, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum OpBinary { + Add, + Sub, + Mul, + Div, + Rem, + And, + Or, + Eq, + Lt, + Le, + Ne, + Ge, + Gt, +} + +impl OpBinary { + fn is_comparision(self) -> bool { + use OpBinary::*; + + matches!(self, Eq | Lt | Le | Ne | Ge | Gt) + } +} diff --git a/crates/rebel-parse/src/ast/mod.rs b/crates/rebel-parse/src/ast/mod.rs new file mode 100644 index 0000000..0cdc808 --- /dev/null +++ b/crates/rebel-parse/src/ast/mod.rs @@ -0,0 +1,187 @@ +use std::borrow::Cow; + +use derive_into_owned::{Borrowed, IntoOwned}; +use rustc_hash::FxHashSet; + +pub mod expr; +pub mod pat; +pub mod typ; + +use expr::{Expr, StructField}; +use pat::{DestrPat, Pat}; +use typ::Type; + +pub type Recipe<'a> = Vec<RecipeStmt<'a>>; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum RecipeStmt<'a> { + BlockStmt(BlockStmt<'a>), + Fetch { + name: Ident<'a>, + entries: Vec<StructField<'a>>, + }, + Task { + name: Ident<'a>, + params: Vec<FuncParam<'a>>, + block: Block<'a>, + }, +} + +impl<'a> RecipeStmt<'a> { + pub fn validate(&self) -> Result<(), ValidationError> { + match self { + RecipeStmt::BlockStmt(stmt) => stmt.validate(), + RecipeStmt::Fetch { name: _, entries } => { + let mut fields = FxHashSet::default(); + for StructField { name, value } in entries { + if !fields.insert(name) { + return Err(ValidationError::DuplicateKey); + } + value.validate()?; + } + Ok(()) + } + RecipeStmt::Task { + name: _, + params: _, + block, + } => { + // TODO: Validate params? + block.validate() + } + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, IntoOwned, Borrowed)] +pub struct Block<'a>(pub Vec<BlockStmt<'a>>); + +impl<'a> Block<'a> { + pub fn validate(&self) -> Result<(), ValidationError> { + for stmt in &self.0 { + stmt.validate()?; + } + Ok(()) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, IntoOwned, Borrowed)] +pub enum BlockStmt<'a> { + Let { + dest: Box<TypedPat<'a>>, + expr: Option<Box<Expr<'a>>>, + }, + Assign { + dest: Box<DestrPat<'a>>, + expr: Box<Expr<'a>>, + }, + Fn { + ident: Ident<'a>, + params: Vec<FuncParam<'a>>, + ret: Option<Box<Type<'a>>>, + block: Block<'a>, + }, + Expr { + expr: Box<Expr<'a>>, + }, + Empty, +} + +impl<'a> BlockStmt<'a> { + pub(crate) fn let_assign(dest: TypedPat<'a>, expr: Option<Expr<'a>>) -> Self { + BlockStmt::Let { + dest: Box::new(dest), + expr: expr.map(Box::new), + } + } + + pub(crate) fn assign( + dest: DestrPat<'a>, + op: Option<expr::OpBinary>, + swapped: bool, + expr: Expr<'a>, + ) -> Self { + let expr = match op { + Some(op) => { + let dest_expr = Expr::from(dest.clone()); + if swapped { + Expr::binary(expr, op, dest_expr) + } else { + Expr::binary(dest_expr, op, expr) + } + } + None => expr, + }; + BlockStmt::Assign { + dest: Box::new(dest), + expr: Box::new(expr), + } + } + + pub fn validate(&self) -> Result<(), ValidationError> { + match self { + BlockStmt::Let { dest, expr } => { + let TypedPat { pat, typ: _ } = dest.as_ref(); + pat.validate()?; + if let Some(expr) = expr { + expr.validate()?; + } + Ok(()) + } + BlockStmt::Assign { dest, expr } => { + dest.validate()?; + expr.validate()?; + Ok(()) + } + BlockStmt::Fn { + ident: _, + params: _, + ret: _, + block, + } => { + // TODO: Validate params? + block.validate() + } + BlockStmt::Expr { expr } => expr.validate(), + BlockStmt::Empty => Ok(()), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, IntoOwned, Borrowed)] +pub struct TypedPat<'a> { + pub pat: Pat<'a>, + pub typ: Option<Type<'a>>, +} + +#[derive(Debug, Clone, PartialEq, Eq, IntoOwned, Borrowed)] +pub struct FuncParam<'a> { + pub name: Ident<'a>, + pub typ: Type<'a>, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PathRoot { + Absolute, + Relative, + Recipe, + Task, +} + +#[derive(Debug, Clone, PartialEq, Eq, IntoOwned, Borrowed)] +pub struct Path<'a> { + pub root: PathRoot, + pub components: Vec<Ident<'a>>, +} + +#[derive(Debug, Clone, PartialEq, Eq, IntoOwned, Borrowed)] +pub struct Ident<'a> { + pub name: Cow<'a, str>, +} + +#[derive(Debug, Clone, Copy)] +pub enum ValidationError { + DuplicateKey, + NeedsParens, + InvalidLet, +} diff --git a/crates/rebel-parse/src/ast/pat.rs b/crates/rebel-parse/src/ast/pat.rs new file mode 100644 index 0000000..c85f625 --- /dev/null +++ b/crates/rebel-parse/src/ast/pat.rs @@ -0,0 +1,57 @@ +use super::*; + +#[derive(Debug, Clone, PartialEq, Eq, IntoOwned, Borrowed)] +pub enum Pat<'a> { + Paren(Box<Pat<'a>>), + Ident(Ident<'a>), +} + +impl<'a> Pat<'a> { + pub fn validate(&self) -> Result<(), ValidationError> { + match self { + Pat::Paren(pat) => pat.validate(), + Pat::Ident(_) => Ok(()), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, IntoOwned, Borrowed)] +pub enum DestrPat<'a> { + Index { + base: Box<DestrPat<'a>>, + index: Box<Expr<'a>>, + }, + Field { + base: Box<DestrPat<'a>>, + field: Ident<'a>, + }, + Paren(Box<DestrPat<'a>>), + Path(Path<'a>), +} + +impl<'a> DestrPat<'a> { + pub fn validate(&self) -> Result<(), ValidationError> { + match self { + DestrPat::Index { base, index } => { + base.validate()?; + index.validate()?; + Ok(()) + } + DestrPat::Field { base, field: _ } => base.validate(), + DestrPat::Paren(pat) => pat.validate(), + DestrPat::Path(_) => Ok(()), + } + } +} + +impl<'a> From<Pat<'a>> for DestrPat<'a> { + fn from(value: Pat<'a>) -> Self { + match value { + Pat::Paren(pat) => DestrPat::Paren(Box::new((*pat).into())), + Pat::Ident(ident) => DestrPat::Path(Path { + root: PathRoot::Relative, + components: vec![ident], + }), + } + } +} diff --git a/crates/rebel-parse/src/ast/typ.rs b/crates/rebel-parse/src/ast/typ.rs new file mode 100644 index 0000000..54ab1b9 --- /dev/null +++ b/crates/rebel-parse/src/ast/typ.rs @@ -0,0 +1,28 @@ +use std::borrow::Cow; + +use derive_into_owned::{Borrowed, IntoOwned}; + +use super::Path; + +#[derive(Debug, Clone, PartialEq, Eq, IntoOwned, Borrowed)] +pub enum Type<'a> { + Paren(Box<Type<'a>>), + Option(Box<Type<'a>>), + Path(Path<'a>), + Literal(Literal<'a>), +} + +#[derive(Debug, Clone, PartialEq, Eq, IntoOwned, Borrowed)] +pub enum Literal<'a> { + Unit, + Tuple(Vec<Type<'a>>), + Array(Box<Type<'a>>), + Map(Box<Type<'a>>, Box<Type<'a>>), + Struct(Vec<StructField<'a>>), +} + +#[derive(Debug, Clone, PartialEq, Eq, IntoOwned, Borrowed)] +pub struct StructField<'a> { + pub name: Cow<'a, str>, + pub typ: Type<'a>, +} diff --git a/crates/rebel-parse/src/grammar/mod.rs b/crates/rebel-parse/src/grammar/mod.rs new file mode 100644 index 0000000..de06991 --- /dev/null +++ b/crates/rebel-parse/src/grammar/mod.rs @@ -0,0 +1,3 @@ +pub mod recipe; +pub mod task_ref; +pub mod tokenize; diff --git a/crates/rebel-parse/src/grammar/recipe.rs b/crates/rebel-parse/src/grammar/recipe.rs new file mode 100644 index 0000000..81b47c9 --- /dev/null +++ b/crates/rebel-parse/src/grammar/recipe.rs @@ -0,0 +1,277 @@ +use std::borrow::Cow; + +use crate::{ + ast::{ + self, + expr::{self, Expr}, + pat::{DestrPat, Pat}, + typ::{self, Type}, + }, + token::*, +}; + +pub use rules::*; + +peg::parser! { + pub grammar rules<'a>() for TokenStream<'a> { + use expr::OpBinary::*; + use expr::OpUnary::*; + + pub rule recipe() -> ast::Recipe<'a> + = recipe:recipe_stmt()* { recipe } + + pub rule recipe_stmt() -> ast::RecipeStmt<'a> + = [Token::Keyword(Keyword::Fetch)] name:ident() p('{') entries:delimited(<struct_field()>, <p(',')>) p('}') { + ast::RecipeStmt::Fetch { name, entries } + } + / [Token::Keyword(Keyword::Task)] name:ident() p('(') params:func_params() p(')') + p('{') block:block() p('}') { + ast::RecipeStmt::Task { name, params, block } + } + / stmt:block_stmt() p(';') { + ast::RecipeStmt::BlockStmt(stmt) + } + + pub rule block() -> ast::Block<'a> + = block:block_stmt() ++ p(';') { ast::Block(block) } + + pub rule block_stmt() -> ast::BlockStmt<'a> + = [Token::Keyword(Keyword::Let)] dest:typed_pat() p('=') expr:expr() { + ast::BlockStmt::let_assign(dest, Some(expr)) + } + / [Token::Keyword(Keyword::Let)] dest:typed_pat() { + ast::BlockStmt::let_assign(dest, None) + } + / [Token::Keyword(Keyword::Fn)] ident:ident() p('(') params:func_params() p(')') + ret:tagged(<p2('-', '>')>, <typ()>)? p('{') block:block() p('}') + { + ast::BlockStmt::Fn { + ident, + params, + ret: ret.map(Box::new), + block, + } + } + / dest:destr_pat() op:assign_op() expr:expr() { + ast::BlockStmt::assign(dest, op, false, expr) + } + / dest:destr_pat() p2('=', '+') expr:expr() { + ast::BlockStmt::assign(dest, Some(Add), true, expr) + } + / expr:expr() { + ast::BlockStmt::Expr { expr: Box::new(expr) } + } + / { ast::BlockStmt::Empty } + + rule assign_op() -> Option<expr::OpBinary> + = p('=') { None } + / p2('+', '=') { Some(Add) } + / p2('-', '=') { Some(Sub) } + / p2('*', '=') { Some(Mul) } + / p2('/', '=') { Some(Div) } + / p2('%', '=') { Some(Rem) } + + rule typed_pat() -> ast::TypedPat<'a> + = pat:pat() typ:tagged(<p(':')>, <typ()>)? { ast::TypedPat { pat, typ } } + + pub rule typ() -> Type<'a> = precedence! { + t:@ p('?') { Type::Option(Box::new(t)) } + -- + t:typ_atom() { t } + } + + rule typ_atom() -> Type<'a> + = p('(') t:typ() p(')') { Type::Paren(Box::new(t)) } + / lit:typ_literal() { Type::Literal(lit) } + / path:path() { Type::Path(path) } + + rule typ_literal() -> typ::Literal<'a> + = p('(') p(')') { typ::Literal::Unit } + / p('(') elements:(typ() ++ p(',')) p(',')? p(')') { + typ::Literal::Tuple(elements) + } + / p('[') typ:typ() p(']') { + typ::Literal::Array(Box::new(typ)) + } + / [Token::Keyword(Keyword::Map)] p('{') key:typ() p2('=', '>') value:typ() p('}') { + typ::Literal::Map(Box::new(key), Box::new(value)) + } + / p('{') entries:delimited(<struct_field_typ()>, <p(',')>) p('}') { + typ::Literal::Struct(entries) + } + + pub rule pat() -> ast::pat::Pat<'a> + = p('(') pat:pat() p(')') { Pat::Paren(Box::new(pat)) } + / ident:ident() { Pat::Ident(ident) } + + pub rule destr_pat() -> DestrPat<'a> = precedence! { + base:@ p('[') index:expr() p(']') { + DestrPat::Index { base: Box::new(base), index: Box::new(index) } + } + -- + base:@ p('.') field:field() { + DestrPat::Field { base: Box::new(base), field } + } + -- + p('(') pat:destr_pat() p(')') { DestrPat::Paren(Box::new(pat)) } + path:path() { DestrPat::Path(path) } + } + + rule struct_field_typ() -> typ::StructField<'a> + = field:field() p(':') typ:typ() { + typ::StructField { name: field.name, typ } + } + + pub rule expr() -> Expr<'a> = precedence! { + left:(@) p2('|', '|') right:@ { Expr::binary(left, Or, right) } + -- + left:(@) p2('&', '&') right:@ { Expr::binary(left, And, right) } + -- + left:(@) p2('=', '=') right:@ { Expr::binary(left, Eq, right) } + left:(@) p2('!', '=') right:@ { Expr::binary(left, Ne, right) } + left:(@) p('<') right:@ { Expr::binary(left, Lt, right) } + left:(@) p('>') right:@ { Expr::binary(left, Gt, right) } + left:(@) p2('<', '=') right:@ { Expr::binary(left, Le, right) } + left:(@) p2('>', '=') right:@ { Expr::binary(left, Ge, right) } + -- + left:(@) p('+') right:@ { Expr::binary(left, Add, right) } + left:(@) p('-') right:@ { Expr::binary(left, Sub, right) } + -- + left:(@) p('*') right:@ { Expr::binary(left, Mul, right) } + left:(@) p('/') right:@ { Expr::binary(left, Div, right) } + left:(@) p('%') right:@ { Expr::binary(left, Rem, right) } + -- + p('-') expr:@ { Expr::unary(Neg, expr) } + p('!') expr:@ { Expr::unary(Not, expr) } + -- + expr:@ p('(') params:call_params() p(')') { + Expr::apply(expr, params) + } + base:@ p('[') index:expr() p(']') { Expr::index(base, index) } + -- + expr:@ p('.') method:field() p('(') params:call_params() p(')') { + Expr::method(expr, method, params) + } + base:@ p('.') field:field() { Expr::field(base, field) } + -- + e:atom() { e } + } + + rule atom() -> Expr<'a> + = p('(') e:expr() p(')') { Expr::paren(e) } + / [Token::Keyword(Keyword::If)] + if_blocks:(cond_block() ++ ([Token::Keyword(Keyword::Else)] [Token::Keyword(Keyword::If)])) + else_block:([Token::Keyword(Keyword::Else)] p('{') block:block() p('}') { Box::new(block) })? + { + Expr::IfElse { if_blocks, else_block } + } + / lit:literal() { Expr::Literal(lit) } + / p('{') block:block() p('}') { Expr::Block(block) } + / path:path() { Expr::Path(path) } + + rule cond_block() -> (Expr<'a>, ast::Block<'a>) + = cond:expr() p('{') block:block() p('}') { (cond, block) } + + rule call_params() -> Vec<expr::Expr<'a>> + = args:delimited(<expr()>, <p(',')>) { args } + + rule func_params() -> Vec<ast::FuncParam<'a>> + = params:delimited(<func_param()>, <p(',')>) { params } + + rule func_param() -> ast::FuncParam<'a> + = name:ident() p(':') typ:typ() { ast::FuncParam { name, typ } } + + rule literal() -> expr::Literal<'a> + = [Token::Keyword(Keyword::True)] { expr::Literal::Bool(true) } + / [Token::Keyword(Keyword::False)] { expr::Literal::Bool(false) } + / [Token::Keyword(Keyword::None)] { expr::Literal::None } + / n:number() { expr::Literal::Int(n) } + / [Token::Str(Str { pieces, kind })] { ? + let pieces = pieces + .iter() + .map(|piece| piece.try_into()) + .collect::<Result<_, _>>()?; + Ok(expr::Literal::Str{ pieces, kind: *kind }) + } + / p('(') p(')') { expr::Literal::Unit } + / p('(') elements:(expr() ++ p(',')) p(',')? p(')') { + expr::Literal::Tuple(elements) + } + / p('[') elements:delimited(<expr()>, <p(',')>) p(']') { + expr::Literal::Array(elements) + } + / [Token::Keyword(Keyword::Map)] p('{') entries:delimited(<map_entry()>, <p(',')>) p('}') { + expr::Literal::Map(entries) + } + / p('{') entries:delimited(<struct_field()>, <p(',')>) p('}') { + expr::Literal::Struct(entries) + } + + rule map_entry() -> expr::MapEntry<'a> + = key:expr() p2('=', '>') value:expr() { + expr::MapEntry { key, value } + } + + rule struct_field() -> expr::StructField<'a> + = field:field() value:tagged(<p(':')>, <expr()>)? { + expr::StructField::new(field, value) + } + + rule path() -> ast::Path<'a> + = components:(ident() ++ p2(':', ':')) { + ast::Path { root: ast::PathRoot::Relative, components } + } + / components:(p2(':', ':') ident:ident() { ident })+ { + ast::Path { root: ast::PathRoot::Absolute, components } + } + / [Token::Keyword(Keyword::Recipe)] components:(p2(':', ':') ident:ident() { ident })* { + ast::Path { root: ast::PathRoot::Recipe, components } + } + / [Token::Keyword(Keyword::Task)] components:(p2(':', ':') ident:ident() { ident })* { + ast::Path { root: ast::PathRoot::Task, components } + } + + rule field() -> ast::Ident<'a> + = ident() + / [Token::Number(content)] { + ast::Ident { name: Cow::Borrowed(content) } + } + + rule number() -> i64 + = neg:p('-')? [Token::Number(s)] { ? + let (radix, rest) = if let Some(rest) = s.strip_prefix("0x") { + (16, rest) + } else if let Some(rest) = s.strip_prefix("0o") { + (8, rest) + } else if let Some(rest) = s.strip_prefix("0b") { + (2, rest) + } else { + (10, *s) + }; + let mut digits = rest.replace('_', ""); + if neg.is_some() { + digits = format!("-{digits}"); + } + i64::from_str_radix(&digits, radix).or(Err("number")) + } + + rule p_(ch: char) + = [Token::Punct(Punct(c, Spacing::Joint)) if *c == ch] {} + + rule p(ch: char) -> () + = [Token::Punct(Punct(c, _)) if *c == ch] {} + + rule p2(ch1: char, ch2: char) -> () + = p_(ch1) p(ch2) + + rule ident() -> ast::Ident<'a> + = [Token::Ident(name)] { ast::Ident { name: Cow::Borrowed(name) } } + + rule delimited<T>(expr: rule<T>, delim: rule<()>) -> Vec<T> + = values:(expr() ++ delim()) delim()? { values } + / { Vec::new() } + + rule tagged<T>(tag: rule<()>, value: rule<T>) -> T + = tag() v:value() { v } + } +} diff --git a/crates/rebel-parse/src/grammar/task_ref.rs b/crates/rebel-parse/src/grammar/task_ref.rs new file mode 100644 index 0000000..77d6c5f --- /dev/null +++ b/crates/rebel-parse/src/grammar/task_ref.rs @@ -0,0 +1,65 @@ +pub use rules::*; + +use rebel_common::types::TaskIDRef; + +#[derive(Debug, Clone, Copy)] +pub struct TaskRef<'a> { + pub id: TaskIDRef<'a>, + pub args: TaskArgs<'a>, +} + +#[derive(Debug, Clone, Copy, Default)] +pub struct TaskArgs<'a> { + pub host: Option<&'a str>, + pub target: Option<&'a str>, +} + +#[derive(Debug, Clone, Copy)] +pub struct TaskFlags { + pub force_run: bool, +} + +peg::parser! { + pub grammar rules() for str { + pub rule task_ref_with_flags() -> (TaskRef<'input>, TaskFlags) + = task:task_ref() flags:task_flags() { (task, flags) } + + pub rule task_ref() -> TaskRef<'input> + = id:task_id() args:task_args() { + TaskRef { + id, + args, + } + } + + rule recipe_id() -> &'input str + = $(name() ("/" name())?) + + rule task_id() -> TaskIDRef<'input> + = recipe:recipe_id() "::" task:name() { + TaskIDRef { recipe, task } + } + + rule task_args() -> TaskArgs<'input> + = "@" host:name()? target:tagged(<":">, <name()>)? { + TaskArgs { + host, + target, + } + } + / { Default::default() } + + rule task_flags() -> TaskFlags + = force_run:force_run() { TaskFlags { force_run } } + + rule force_run() -> bool + = "+" { true } + / { false } + + rule name() -> &'input str + = $(['a'..='z' | 'A' ..='Z' | '0'..='9' | '_' | '-']+) + + rule tagged<T>(tag: rule<()>, value: rule<T>) -> T + = tag() v:value() { v } + } +} diff --git a/crates/rebel-parse/src/grammar/tokenize.rs b/crates/rebel-parse/src/grammar/tokenize.rs new file mode 100644 index 0000000..eb8a900 --- /dev/null +++ b/crates/rebel-parse/src/grammar/tokenize.rs @@ -0,0 +1,137 @@ +use crate::token::*; + +pub use rules::*; + +static KEYWORDS: phf::Map<&'static str, Keyword> = phf::phf_map! { + "else" => Keyword::Else, + "false" => Keyword::False, + "fetch" => Keyword::Fetch, + "fn" => Keyword::Fn, + "for" => Keyword::For, + "if" => Keyword::If, + "let" => Keyword::Let, + "map" => Keyword::Map, + "mut" => Keyword::Mut, + "none" => Keyword::None, + "recipe" => Keyword::Recipe, + "set" => Keyword::Set, + "task" => Keyword::Task, + "true" => Keyword::True, +}; + +peg::parser! { + pub grammar rules() for str { + pub rule token_stream() -> TokenStream<'input> + = _ tokens:(token() ** _) _ { TokenStream(tokens) } + + pub rule token() -> Token<'input> + = number:number() { Token::Number(number) } + / string:string() { Token::Str(string) } + / token:ident_or_keyword() { token } + / punct:punct() { Token::Punct(punct) } + + rule ident_or_keyword() -> Token<'input> + = s:$( + ['a'..='z' | 'A' ..='Z' | '_' ] + ['a'..='z' | 'A' ..='Z' | '_' | '0'..='9']* + ) { + if let Some(kw) = KEYWORDS.get(s) { + Token::Keyword(*kw) + } else { + Token::Ident(s) + } + } + + rule punct() -> Punct + = ch:punct_char() spacing:spacing() { Punct(ch, spacing) } + + rule punct_char() -> char + = !comment_start() ch:[ + | '~' | '!' | '@' | '#' | '$' | '%' | '^' | '&' + | '*' | '-' | '=' | '+' | '|' | ';' | ':' | ',' + | '<' | '.' | '>' | '/' | '\'' | '?' | '(' | ')' + | '[' | ']' | '{' | '}' + ] { ch } + + rule spacing() -> Spacing + = &punct_char() { Spacing::Joint } + / { Spacing::Alone } + + rule number() -> &'input str + = $(['0'..='9'] ['0'..='9' | 'a'..='z' | 'A'..='Z' | '_']*) + + rule string() -> Str<'input> + = "\"" pieces:string_piece()* "\"" { + Str { + pieces, + kind: StrKind::Regular, + } + } + / "r\"" chars:$([^'"']*) "\"" { + Str { + pieces: vec![StrPiece::Chars(chars)], + kind: StrKind::Raw, + } + } + / "```" newline() pieces:script_string_piece()* "```" { + Str { + pieces, + kind: StrKind::Script, + } + } + + rule string_piece() -> StrPiece<'input> + = chars:$((!"{{" [^'"' | '\\'])+) { StrPiece::Chars(chars) } + / "\\" escape:string_escape() { StrPiece::Escape(escape) } + / string_interp() + + rule string_escape() -> char + = "n" { '\n' } + / "r" { '\r' } + / "t" { '\t' } + / "\\" { '\\' } + / "\"" { '"' } + / "{" { '{' } + / "0" { '\0' } + / "x" digits:$(['0'..='7'] hex_digit()) { + u8::from_str_radix(digits, 16).unwrap().into() + } + / "u{" digits:$(hex_digit()*<1,6>) "}" { ? + u32::from_str_radix(digits, 16).unwrap().try_into().or(Err("Invalid unicode escape")) + } + + rule script_string_piece() -> StrPiece<'input> + = chars:$((!"{{" !"```" [_])+) { StrPiece::Chars(chars) } + / string_interp() + + rule string_interp() -> StrPiece<'input> + = "{{" _ tokens:(subtoken() ++ _) _ "}}" { + StrPiece::Interp(TokenStream(tokens)) + } + + rule subtoken() -> Token<'input> + = !"}}" token:token() { token } + + rule hex_digit() + = ['0'..='9' | 'a'..='f' | 'A'..='F'] + + /// Mandatory whitespace + rule __ + = ([' ' | '\t'] / quiet!{newline()} / quiet!{comment()})+ + + /// Optional whitespace + rule _ + = quiet!{__?} + + rule comment_start() + = "//" + / "/*" + + rule comment() + = "//" (!newline() [_])* (newline() / ![_]) + / "/*" (!"*/" [_])* "*/" + + rule newline() + = ['\n' | '\r'] + } +} diff --git a/crates/rebel-parse/src/lib.rs b/crates/rebel-parse/src/lib.rs new file mode 100644 index 0000000..4a8c431 --- /dev/null +++ b/crates/rebel-parse/src/lib.rs @@ -0,0 +1,8 @@ +pub mod ast; +pub mod token; + +mod grammar; + +pub use grammar::recipe; +pub use grammar::task_ref; +pub use grammar::tokenize; diff --git a/crates/rebel-parse/src/token.rs b/crates/rebel-parse/src/token.rs new file mode 100644 index 0000000..444b5a8 --- /dev/null +++ b/crates/rebel-parse/src/token.rs @@ -0,0 +1,87 @@ +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Token<'a> { + Keyword(Keyword), + Ident(&'a str), + Punct(Punct), + Str(Str<'a>), + Number(&'a str), +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Keyword { + Else, + False, + Fetch, + Fn, + For, + If, + Let, + Map, + Mut, + None, + Recipe, + Set, + Task, + True, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Punct(pub char, pub Spacing); + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Spacing { + Alone, + Joint, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Str<'a> { + pub pieces: Vec<StrPiece<'a>>, + pub kind: StrKind, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum StrPiece<'a> { + Chars(&'a str), + Escape(char), + Interp(TokenStream<'a>), +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum StrKind { + Regular, + Raw, + Script, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct TokenStream<'a>(pub Vec<Token<'a>>); + +impl<'a> peg::Parse for TokenStream<'a> { + type PositionRepr = usize; + + fn start(&self) -> usize { + 0 + } + + fn is_eof(&self, pos: usize) -> bool { + pos >= self.0.len() + } + + fn position_repr(&self, pos: usize) -> Self::PositionRepr { + pos + } +} + +impl<'input, 'a: 'input> peg::ParseElem<'input> for TokenStream<'a> { + type Element = &'input Token<'a>; + + fn parse_elem(&'input self, pos: usize) -> peg::RuleResult<Self::Element> { + use peg::RuleResult; + + match self.0[pos..].first() { + Some(c) => RuleResult::Matched(pos + 1, c), + None => RuleResult::Failed, + } + } +} |