diff --git a/src/ast.rs b/src/ast.rs index 5ea5bc7..cf08fe7 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,3 +1,22 @@ +//! AST (Abstract Syntax Tree) module for the TeaLang compiler. +//! +//! This module defines the complete structure of the AST produced by the parser. +//! It is organized into the following sub-modules: +//! +//! - [`decl`]: Declarations and definitions, including variable declarations, +//! variable definitions, struct definitions, and function declarations/definitions. +//! - [`display`]: [`std::fmt::Display`] implementations for pretty-printing AST nodes. +//! - [`expr`]: Expression nodes, including arithmetic expressions, boolean expressions, +//! comparison expressions, function calls, left-values, and right-values. +//! - [`ops`]: Operator enumerations for arithmetic, boolean, and comparison operations. +//! - [`program`]: Top-level program structure, including `use` statements and program elements. +//! - [`stmt`]: Statement nodes, including assignment, control flow (`if`, `while`), +//! function calls, `return`, `break`, `continue`, and null statements. +//! - [`tree`]: AST traversal and visitor utilities. +//! - [`types`]: Type specifiers and built-in type definitions. +//! +//! All major types from sub-modules are re-exported at this level for convenient access. + pub mod decl; pub mod display; pub mod expr; diff --git a/src/ast/decl.rs b/src/ast/decl.rs index f4ac542..439db5b 100644 --- a/src/ast/decl.rs +++ b/src/ast/decl.rs @@ -1,101 +1,161 @@ +//! Declaration and definition AST nodes. +//! +//! This module covers everything related to declaring or defining names in +//! the language: variable declarations and definitions (scalar and array), +//! struct definitions, function declarations, and function definitions. + use super::expr::{RightVal, RightValList}; use super::stmt::CodeBlockStmtList; use super::types::TypeSpecifier; use std::ops::Deref; +/// The fixed-length metadata for an array variable declaration. #[derive(Debug, Clone)] pub struct VarDeclArray { + /// The number of elements in the array. pub len: usize, } +/// Whether a variable declaration is a scalar or a fixed-length array. #[derive(Debug, Clone)] pub enum VarDeclInner { + /// A scalar (non-array) variable declaration. Scalar, + /// A fixed-length array variable declaration. Array(Box), } +/// A variable declaration — a name and optional type specifier without an +/// initial value. Used in function parameter lists and as forward +/// declarations. #[derive(Debug, Clone)] pub struct VarDecl { + /// The variable name. pub identifier: String, + /// Optional explicit type annotation; `None` means the type is inferred. pub type_specifier: Option, + /// Whether the declaration is for a scalar or an array. pub inner: VarDeclInner, } +/// A list of variable declarations, used for struct fields and parameter lists. pub type VarDeclList = Vec; +/// The initializer for a scalar variable definition, holding its initial value. #[derive(Debug, Clone)] pub struct VarDefScalar { + /// The initial value expression. pub val: Box, } +/// The initializer for an array variable definition. #[derive(Debug, Clone)] pub enum ArrayInitializer { + /// An explicit element-by-element initializer list, e.g. `[1, 2, 3]`. ExplicitList(RightValList), + /// A fill initializer that repeats a single value `count` times, + /// e.g. `[0; 10]`. Fill { val: Box, count: usize }, } +/// The initializer for a fixed-length array variable definition. #[derive(Debug, Clone)] pub struct VarDefArray { + /// The declared length of the array. pub len: usize, + /// The initializer (explicit list or fill). pub initializer: ArrayInitializer, } +/// Whether a variable definition is for a scalar or an array. #[derive(Debug, Clone)] pub enum VarDefInner { + /// A scalar variable definition with a single initial value. Scalar(Box), + /// An array variable definition with a length and initializer. Array(Box), } +/// A variable definition — a name, optional type specifier, and an +/// initial value (scalar or array). #[derive(Debug, Clone)] pub struct VarDef { + /// The variable name. pub identifier: String, + /// Optional explicit type annotation. pub type_specifier: Option, + /// The initial value (scalar or array). pub inner: VarDefInner, } +/// A statement that either declares or defines a variable. #[derive(Debug, Clone)] pub enum VarDeclStmtInner { + /// A declaration without an initial value. Decl(Box), + /// A definition with an initial value. Def(Box), } +/// A top-level or block-scoped variable declaration/definition statement. #[derive(Debug, Clone)] pub struct VarDeclStmt { + /// Whether this statement is a bare declaration or a definition. pub inner: VarDeclStmtInner, } +/// A struct type definition, grouping a set of named fields. #[derive(Debug, Clone)] pub struct StructDef { + /// The struct type name. pub identifier: String, + /// The list of field declarations. pub decls: VarDeclList, } +/// The formal parameter declaration of a function, consisting of one or more +/// named (and optionally typed) variable declarations. #[derive(Debug, Clone)] pub struct ParamDecl { + /// The list of parameter variable declarations. pub decls: VarDeclList, } +/// A function declaration (prototype) — name, optional parameters, and +/// optional return type, without a body. #[derive(Debug, Clone)] pub struct FnDecl { + /// The function name. pub identifier: String, + /// Optional parameter declaration; `None` means no parameters. pub param_decl: Option>, + /// Optional return type; `None` means the function returns nothing (void). pub return_dtype: Option, } +/// A function definition — a declaration together with a body. #[derive(Debug, Clone)] pub struct FnDef { + /// The function's declaration (name, parameters, return type). pub fn_decl: Box, + /// The ordered list of statements forming the function body. pub stmts: CodeBlockStmtList, } +/// A function declaration used as a top-level statement (forward declaration). #[derive(Debug, Clone)] pub struct FnDeclStmt { + /// The underlying function declaration. pub fn_decl: Box, } +/// `Deref` implementation so that `FnDeclStmt` can be used directly wherever +/// a `FnDecl` reference is expected, avoiding repeated `.fn_decl` field +/// accesses. impl Deref for FnDeclStmt { type Target = FnDecl; + /// Returns a reference to the inner `FnDecl`. fn deref(&self) -> &Self::Target { &self.fn_decl } diff --git a/src/ast/display.rs b/src/ast/display.rs index afb76bc..f3850fc 100644 --- a/src/ast/display.rs +++ b/src/ast/display.rs @@ -1,3 +1,10 @@ +//! `Display` trait implementations for all AST node types. +//! +//! Each implementation produces a compact, human-readable textual +//! representation of the corresponding node. These representations are used +//! when printing error messages, debug output, and the tree-formatted program +//! dump via [`super::tree::DisplayAsTree`]. + use super::expr::*; use super::ops::*; use super::program::Program; @@ -5,6 +12,7 @@ use super::tree::DisplayAsTree; use super::types::*; use std::fmt::{Display, Error, Formatter}; +/// Formats a built-in type as its source-level keyword (e.g., `int`). impl Display for BuiltIn { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { match self { @@ -13,6 +21,9 @@ impl Display for BuiltIn { } } +/// Formats a type-specifier inner node: +/// built-ins use their keyword, composites use their name, and +/// references are wrapped in `&[…]`. impl Display for TypeSpecifierInner { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { match self { @@ -23,12 +34,16 @@ impl Display for TypeSpecifierInner { } } +/// Formats a full type specifier as `@`, annotating it with its +/// source position for diagnostic purposes. impl Display for TypeSpecifier { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { write!(f, "{}@{}", self.inner, self.pos) } } +/// Formats an arithmetic binary operator as its LLVM IR mnemonic +/// (e.g., `add`, `sub`, `mul`, `sdiv`). impl Display for ArithBiOp { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { match self { @@ -40,6 +55,7 @@ impl Display for ArithBiOp { } } +/// Formats a boolean unary operator as its source-level symbol (`!`). impl Display for BoolUOp { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { match self { @@ -48,6 +64,8 @@ impl Display for BoolUOp { } } +/// Formats a boolean binary operator as its source-level symbol +/// (`&&` or `||`). impl Display for BoolBiOp { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { let op = match self { @@ -58,6 +76,8 @@ impl Display for BoolBiOp { } } +/// Formats a comparison operator as its LLVM IR predicate mnemonic +/// (e.g., `eq`, `ne`, `sgt`, …). impl Display for ComOp { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { match self { @@ -71,12 +91,15 @@ impl Display for ComOp { } } +/// Formats a binary arithmetic expression as `( )`. impl Display for ArithBiOpExpr { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { write!(f, "({} {} {})", self.left, self.op, self.right) } } +/// Formats the inner part of an arithmetic expression by delegating to +/// the concrete variant. impl Display for ArithExprInner { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { match self { @@ -86,30 +109,36 @@ impl Display for ArithExprInner { } } +/// Formats an arithmetic expression by delegating to its inner representation. impl Display for ArithExpr { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { write!(f, "{}", self.inner) } } +/// Formats a comparison expression as `( )`. impl Display for ComExpr { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { write!(f, "({} {} {})", self.left, self.op, self.right) } } +/// Formats a unary boolean expression as `()`, e.g., `(!x)`. impl Display for BoolUOpExpr { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { write!(f, "({}{})", self.op, self.cond) } } +/// Formats a binary boolean expression as `( )`. impl Display for BoolBiOpExpr { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { write!(f, "({} {} {})", self.left, self.op, self.right) } } +/// Formats the inner part of a boolean expression by delegating to the +/// concrete variant. impl Display for BoolExprInner { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { match self { @@ -119,12 +148,15 @@ impl Display for BoolExprInner { } } +/// Formats a boolean expression by delegating to its inner representation. impl Display for BoolExpr { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { write!(f, "{}", self.inner) } } +/// Formats the inner part of a boolean unit by delegating to the concrete +/// variant (comparison, nested boolean expression, or unary not). impl Display for BoolUnitInner { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { match self { @@ -135,12 +167,14 @@ impl Display for BoolUnitInner { } } +/// Formats a boolean unit by delegating to its inner representation. impl Display for BoolUnit { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { write!(f, "{}", self.inner) } } +/// Formats the inner part of an rvalue by delegating to the concrete variant. impl Display for RightValInner { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { match self { @@ -150,12 +184,14 @@ impl Display for RightValInner { } } +/// Formats an rvalue by delegating to its inner representation. impl Display for RightVal { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { write!(f, "{}", self.inner) } } +/// Formats the inner part of an lvalue. impl Display for LeftValInner { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { match self { @@ -166,12 +202,14 @@ impl Display for LeftValInner { } } +/// Formats an lvalue by delegating to its inner representation. impl Display for LeftVal { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { write!(f, "{}", self.inner) } } +/// Formats an index expression as either a numeric literal or an identifier. impl Display for IndexExpr { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { match &self.inner { @@ -181,20 +219,25 @@ impl Display for IndexExpr { } } +/// Formats an array access expression as `[]`. impl Display for ArrayExpr { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { write!(f, "{}[{}]", self.arr, self.idx) } } +/// Formats a struct member access as `.`. impl Display for MemberExpr { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { write!(f, "{}.{}", self.struct_id, self.member_id) } } +/// Formats a function call as `()` or `::()` +/// for qualified calls. impl Display for FnCall { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + // Format all argument values as a comma-separated string. let args: Vec = self.vals.iter().map(|v| format!("{}", v)).collect(); if let Some(module) = &self.module_prefix { write!(f, "{}::{}({})", module, self.name, args.join(", ")) @@ -204,6 +247,7 @@ impl Display for FnCall { } } +/// Formats the inner part of an expression unit. impl Display for ExprUnitInner { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { match self { @@ -218,12 +262,15 @@ impl Display for ExprUnitInner { } } +/// Formats an expression unit by delegating to its inner representation. impl Display for ExprUnit { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { write!(f, "{}", self.inner) } } +/// Formats the entire program using the tree pretty-printer so that +/// `println!("{}", program)` produces a readable AST dump. impl Display for Program { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { self.fmt_tree_root(f) diff --git a/src/ast/expr.rs b/src/ast/expr.rs index 3b50e38..b4e91fd 100644 --- a/src/ast/expr.rs +++ b/src/ast/expr.rs @@ -1,117 +1,187 @@ +//! Expression AST nodes. +//! +//! This module contains all node types that represent expressions in the +//! language: left-hand-side values (lvalues), arithmetic expressions, +//! boolean expressions, comparison expressions, function calls, and the +//! general-purpose expression units that glue everything together. + use super::ops::*; use super::types::Pos; +/// An lvalue — a memory location that can appear on the left side of an +/// assignment. #[derive(Debug, Clone)] pub struct LeftVal { + /// Source position of this lvalue. pub pos: Pos, + /// The kind of lvalue (simple identifier, array element, or struct member). pub inner: LeftValInner, } +/// The inner representation of an lvalue. #[derive(Debug, Clone)] pub enum LeftValInner { + /// A simple variable name. Id(String), + /// An array element access, e.g. `a[i]`. ArrayExpr(Box), + /// A struct member access, e.g. `s.field`. MemberExpr(Box), } +/// The inner representation of an array index expression. #[derive(Debug, Clone)] pub enum IndexExprInner { + /// A literal numeric index, e.g. `a[2]`. Num(usize), + /// An identifier used as an index, e.g. `a[i]`. Id(String), } +/// An index expression used inside an array access. #[derive(Debug, Clone)] pub struct IndexExpr { + /// Whether the index is a literal number or a variable name. pub inner: IndexExprInner, } +/// An array element access expression, e.g. `arr[idx]`. #[derive(Debug, Clone)] pub struct ArrayExpr { + /// The array being indexed (itself an lvalue, enabling `a[i][j]`). pub arr: Box, + /// The index expression. pub idx: Box, } +/// A struct member access expression, e.g. `obj.field`. #[derive(Debug, Clone)] pub struct MemberExpr { + /// The struct lvalue being accessed. pub struct_id: Box, + /// The name of the member field. pub member_id: String, } +/// A binary arithmetic expression, e.g. `a + b`. #[derive(Debug, Clone)] pub struct ArithBiOpExpr { + /// The arithmetic operator. pub op: ArithBiOp, + /// The left operand. pub left: Box, + /// The right operand. pub right: Box, } +/// The inner representation of an arithmetic expression. #[derive(Debug, Clone)] pub enum ArithExprInner { + /// A binary arithmetic operation such as `a + b`. ArithBiOpExpr(Box), + /// A leaf expression unit (number literal, identifier, function call, …). ExprUnit(Box), } +/// An arithmetic expression, pairing the inner value with a source position. #[derive(Debug, Clone)] pub struct ArithExpr { + /// Source position of this expression. pub pos: Pos, + /// The actual arithmetic expression content. pub inner: ArithExprInner, } +/// A comparison expression that yields a boolean, e.g. `a < b`. #[derive(Debug, Clone)] pub struct ComExpr { + /// The comparison operator. pub op: ComOp, + /// The left operand (must be an expression unit). pub left: Box, + /// The right operand (must be an expression unit). pub right: Box, } +/// A unary boolean expression, e.g. `!cond`. #[derive(Debug, Clone)] pub struct BoolUOpExpr { + /// The boolean unary operator (currently only `Not`). pub op: BoolUOp, + /// The operand boolean unit to negate. pub cond: Box, } +/// A binary boolean expression, e.g. `a && b`. #[derive(Debug, Clone)] pub struct BoolBiOpExpr { + /// The boolean binary operator (`And` or `Or`). pub op: BoolBiOp, + /// The left operand. pub left: Box, + /// The right operand. pub right: Box, } +/// The inner representation of a boolean expression. #[derive(Debug, Clone)] pub enum BoolExprInner { + /// A binary boolean operation such as `a && b`. BoolBiOpExpr(Box), + /// A leaf boolean unit (comparison, nested bool expr, or unary not). BoolUnit(Box), } +/// A boolean expression, pairing the inner value with a source position. #[derive(Debug, Clone)] pub struct BoolExpr { + /// Source position of this boolean expression. pub pos: Pos, + /// The actual boolean expression content. pub inner: BoolExprInner, } +/// The inner representation of a boolean unit — the atomic building block +/// from which boolean expressions are composed. #[derive(Debug, Clone)] #[allow(clippy::enum_variant_names)] pub enum BoolUnitInner { + /// A comparison expression, e.g. `a == b`. ComExpr(Box), + /// A parenthesised or nested boolean expression. BoolExpr(Box), + /// A unary boolean expression, e.g. `!cond`. BoolUOpExpr(Box), } +/// A boolean unit with its source position. #[derive(Debug, Clone)] pub struct BoolUnit { + /// Source position of this boolean unit. pub pos: Pos, + /// The actual boolean unit content. pub inner: BoolUnitInner, } +/// A function call expression, e.g. `foo(a, b)` or `mod::foo(a, b)`. #[derive(Debug, Clone)] pub struct FnCall { + /// Optional module prefix for qualified calls such as `io::print`. pub module_prefix: Option, + /// The unqualified function name. pub name: String, + /// The list of argument values passed to the function. pub vals: RightValList, } +/// Implementation of helper methods for function calls. impl FnCall { + /// Returns the fully-qualified function name, including the module prefix + /// if one is present (e.g., `"io::print"`), or just the bare function + /// name otherwise (e.g., `"print"`). pub fn qualified_name(&self) -> String { if let Some(module) = &self.module_prefix { + // Combine module prefix and function name with `::` separator. format!("{module}::{}", self.name) } else { self.name.clone() @@ -119,32 +189,51 @@ impl FnCall { } } +/// The inner representation of a leaf expression unit. #[derive(Debug, Clone)] pub enum ExprUnitInner { + /// An integer literal. Num(i32), + /// A simple variable identifier. Id(String), + /// A parenthesised arithmetic sub-expression. ArithExpr(Box), + /// A function call whose return value is used as a value. FnCall(Box), + /// An array element access used as a value. ArrayExpr(Box), + /// A struct member access used as a value. MemberExpr(Box), + /// A reference to a variable, e.g. `&x`. Reference(String), } +/// An expression unit — the leaf node of arithmetic expressions — paired +/// with a source position. #[derive(Debug, Clone)] pub struct ExprUnit { + /// Source position of this expression unit. pub pos: Pos, + /// The actual expression unit content. pub inner: ExprUnitInner, } +/// The inner representation of a right-hand-side value. #[derive(Debug, Clone)] pub enum RightValInner { + /// An arithmetic expression used as an rvalue. ArithExpr(Box), + /// A boolean expression used as an rvalue. BoolExpr(Box), } +/// An rvalue — any value that can appear on the right side of an assignment +/// or as a function argument. #[derive(Debug, Clone)] pub struct RightVal { + /// The actual rvalue content (arithmetic or boolean). pub inner: RightValInner, } +/// A list of right-hand-side values, used for function argument lists. pub type RightValList = Vec; diff --git a/src/ast/ops.rs b/src/ast/ops.rs index b031017..cba6f63 100644 --- a/src/ast/ops.rs +++ b/src/ast/ops.rs @@ -1,28 +1,51 @@ +//! Operator enumerations used in arithmetic, boolean, and comparison expressions. +//! +//! Each operator kind is represented as its own enum so that the type system +//! enforces that, for example, a boolean unary operator can never be used +//! where an arithmetic binary operator is expected. + +/// Arithmetic binary operators. #[derive(Debug, Clone)] pub enum ArithBiOp { + /// Addition (`+`). Add, + /// Subtraction (`-`). Sub, + /// Multiplication (`*`). Mul, + /// Division (`/`), mapped to signed integer division in codegen. Div, } +/// Boolean unary operators. #[derive(Debug, PartialEq, Clone)] pub enum BoolUOp { + /// Logical negation (`!`). Not, } +/// Boolean binary operators. #[derive(Debug, PartialEq, Clone)] pub enum BoolBiOp { + /// Logical conjunction (`&&`). And, + /// Logical disjunction (`||`). Or, } +/// Comparison operators used to produce boolean results from two values. #[derive(Debug, Clone)] pub enum ComOp { + /// Strictly less than (`<`). Lt, + /// Less than or equal (`<=`). Le, + /// Strictly greater than (`>`). Gt, + /// Greater than or equal (`>=`). Ge, + /// Equal (`==`). Eq, + /// Not equal (`!=`). Ne, } diff --git a/src/ast/program.rs b/src/ast/program.rs index 64ea726..2f21636 100644 --- a/src/ast/program.rs +++ b/src/ast/program.rs @@ -1,27 +1,48 @@ +//! Top-level program AST nodes. +//! +//! This module defines the root `Program` node and the elements that can +//! appear at the top level of a source file: `use` import statements, +//! variable declarations/definitions, struct definitions, function forward +//! declarations, and function definitions. + use super::decl::{FnDeclStmt, FnDef, StructDef, VarDeclStmt}; +/// A `use` statement that imports an external module into the current scope, +/// e.g. `use io;`. #[derive(Debug, Clone)] pub struct UseStmt { + /// The name of the module being imported. pub module_name: String, } +/// The inner kind of a top-level program element. #[derive(Debug, Clone)] pub enum ProgramElementInner { + /// A global variable declaration or definition. VarDeclStmt(Box), + /// A struct type definition. StructDef(Box), + /// A function forward declaration (prototype). FnDeclStmt(Box), + /// A function definition with a body. FnDef(Box), } +/// A single top-level element in a program, wrapping its specific kind. #[derive(Debug, Clone)] pub struct ProgramElement { + /// The actual top-level element content. pub inner: ProgramElementInner, } +/// An ordered list of top-level program elements. pub type ProgramElementList = Vec; +/// The root node of the AST, representing a complete source file. #[derive(Debug, Clone)] pub struct Program { + /// The `use` import statements at the top of the file. pub use_stmts: Vec, + /// The top-level declarations, definitions, and function bodies. pub elements: ProgramElementList, } diff --git a/src/ast/stmt.rs b/src/ast/stmt.rs index 4ad071b..ad5f92e 100644 --- a/src/ast/stmt.rs +++ b/src/ast/stmt.rs @@ -1,60 +1,102 @@ +//! Statement AST nodes. +//! +//! This module defines all statement kinds that can appear inside a function +//! body (code block): assignments, function-call statements, control-flow +//! statements (`if`, `while`, `return`, `continue`, `break`), variable +//! declarations, and the empty (null) statement. + use super::decl::VarDeclStmt; use super::expr::{BoolUnit, FnCall, LeftVal, RightVal}; +/// An assignment statement, e.g. `x = expr;`. #[derive(Debug, Clone)] pub struct AssignmentStmt { + /// The target lvalue (the location being written to). pub left_val: Box, + /// The source rvalue (the value being assigned). pub right_val: Box, } +/// A statement consisting of a bare function call whose return value is +/// discarded, e.g. `print(x);`. #[derive(Debug, Clone)] pub struct CallStmt { + /// The function call expression. pub fn_call: Box, } +/// A `return` statement, optionally carrying a value. #[derive(Debug, Clone)] pub struct ReturnStmt { + /// The value to return, or `None` for a void return. pub val: Option>, } +/// A `continue` statement that jumps to the next iteration of the enclosing +/// loop. #[derive(Debug, Clone)] pub struct ContinueStmt {} +/// A `break` statement that exits the enclosing loop. #[derive(Debug, Clone)] pub struct BreakStmt {} +/// An empty (null) statement — a lone semicolon with no effect. #[derive(Debug, Clone)] pub struct NullStmt {} +/// An `if` statement, with a mandatory then-branch and an optional +/// else-branch. #[derive(Debug, Clone)] pub struct IfStmt { + /// The condition that controls which branch is taken. pub bool_unit: Box, + /// The statements executed when the condition is `true`. pub if_stmts: CodeBlockStmtList, + /// The statements executed when the condition is `false`; absent if there + /// is no `else` clause. pub else_stmts: Option, } +/// A `while` loop statement. #[derive(Debug, Clone)] pub struct WhileStmt { + /// The loop condition evaluated before each iteration. pub bool_unit: Box, + /// The statements that form the loop body. pub stmts: CodeBlockStmtList, } +/// The inner kind of a statement that can appear inside a code block. #[derive(Debug, Clone)] pub enum CodeBlockStmtInner { + /// A variable declaration or definition. VarDecl(Box), + /// An assignment statement. Assignment(Box), + /// A function-call statement. Call(Box), + /// An `if` (possibly with `else`) statement. If(Box), + /// A `while` loop statement. While(Box), + /// A `return` statement. Return(Box), + /// A `continue` statement. Continue(Box), + /// A `break` statement. Break(Box), + /// An empty (null) statement. Null(Box), } +/// A single statement inside a code block, wrapping its specific kind. #[derive(Debug, Clone)] pub struct CodeBlockStmt { + /// The actual statement content. pub inner: CodeBlockStmtInner, } +/// An ordered sequence of statements forming a code block (function body, +/// `if`/`else` branch, or loop body). pub type CodeBlockStmtList = Vec; diff --git a/src/ast/tree.rs b/src/ast/tree.rs index 69890eb..a0171cd 100644 --- a/src/ast/tree.rs +++ b/src/ast/tree.rs @@ -1,10 +1,31 @@ +//! Tree pretty-printer for the AST. +//! +//! This module defines the [`DisplayAsTree`] trait and provides implementations +//! for every AST node type. When a node is printed with this trait it +//! produces an indented, Unicode-box-drawing tree that mirrors the logical +//! structure of the AST, making it easy to read program structure at a glance. +//! +//! The indentation state is passed down through the `indent_levels` slice. +//! Each element records whether the corresponding ancestor was the *last* +//! child at its level; this drives the choice between `│ ` (more siblings +//! follow) and ` ` (no more siblings) connector strings. + use super::decl::*; use super::expr::*; use super::program::*; use super::stmt::*; use std::fmt::{Error, Formatter}; +/// Trait for formatting an AST node as an indented tree. pub trait DisplayAsTree { + /// Write this node (and all its children) to `f` as an indented tree. + /// + /// * `indent_levels` – a slice whose length equals the current nesting + /// depth; each `bool` records whether the corresponding ancestor was + /// the last child at its level (`true` → last, so print spaces instead + /// of a vertical bar). + /// * `is_last` – whether *this* node is the last sibling among its + /// parent's children. fn fmt_tree( &self, f: &mut Formatter<'_>, @@ -12,28 +33,42 @@ pub trait DisplayAsTree { is_last: bool, ) -> Result<(), Error>; + /// Convenience method that starts a fresh tree with no indentation. + /// Equivalent to calling `fmt_tree(f, &[], true)`. fn fmt_tree_root(&self, f: &mut Formatter<'_>) -> Result<(), Error> { self.fmt_tree(f, &[], true) } } +/// Builds the indentation prefix string for a tree node. +/// +/// For each ancestor level, appends either `" "` (if that ancestor was the +/// last child, so no vertical bar is needed) or `"│ "` (if more siblings +/// follow at that level). Finally appends `"└─"` for the last child or +/// `"├─"` for any other child. fn tree_indent(indent_levels: &[bool], is_last: bool) -> String { let mut s = String::new(); for &last in indent_levels.iter() { if last { + // Ancestor was the last child — no vertical connector needed. s.push_str(" "); } else { + // More siblings exist at this ancestor level — draw vertical bar. s.push_str("│ "); } } if is_last { + // This node is the last child — use a corner connector. s.push_str("└─"); } else { + // More siblings follow — use a tee connector. s.push_str("├─"); } s } +/// Formats the root `Program` node, listing every top-level element as a +/// child in the tree. impl DisplayAsTree for Program { fn fmt_tree( &self, @@ -42,6 +77,7 @@ impl DisplayAsTree for Program { is_last: bool, ) -> Result<(), Error> { writeln!(f, "{}Program", tree_indent(indent_levels, is_last))?; + // Build the indentation context for children. let mut new_indent = indent_levels.to_vec(); new_indent.push(!is_last); let last_index = self.elements.len().saturating_sub(1); @@ -52,6 +88,7 @@ impl DisplayAsTree for Program { } } +/// Delegates formatting to the concrete element variant. impl DisplayAsTree for ProgramElement { fn fmt_tree( &self, @@ -68,6 +105,7 @@ impl DisplayAsTree for ProgramElement { } } +/// Prints a `VarDeclStmt` header then delegates to the inner decl/def. impl DisplayAsTree for VarDeclStmt { fn fmt_tree( &self, @@ -76,10 +114,13 @@ impl DisplayAsTree for VarDeclStmt { is_last: bool, ) -> Result<(), Error> { writeln!(f, "{}VarDeclStmt", tree_indent(indent_levels, is_last))?; + // The inner node is always the single (last) child. self.inner.fmt_tree(f, indent_levels, true) } } +/// Delegates to either the `Decl` or `Def` variant of a variable +/// declaration statement. impl DisplayAsTree for VarDeclStmtInner { fn fmt_tree( &self, @@ -94,6 +135,8 @@ impl DisplayAsTree for VarDeclStmtInner { } } +/// Prints a single variable declaration as `: `, using +/// `"unknown"` when no type annotation is present. impl DisplayAsTree for VarDecl { fn fmt_tree( &self, @@ -101,6 +144,7 @@ impl DisplayAsTree for VarDecl { indent_levels: &[bool], is_last: bool, ) -> Result<(), Error> { + // Render the type specifier, falling back to "unknown" if absent. let type_str = self .type_specifier .as_ref() @@ -115,6 +159,8 @@ impl DisplayAsTree for VarDecl { } } +/// Transparent forwarding implementation: delegates directly to the +/// pointed-to value so that `Box` nodes behave identically to `T`. impl DisplayAsTree for Box { fn fmt_tree( &self, @@ -126,6 +172,8 @@ impl DisplayAsTree for Box { } } +/// Skips rendering entirely when the `Option` is `None`; otherwise +/// delegates to the inner boxed value. impl DisplayAsTree for Option> { fn fmt_tree( &self, @@ -140,6 +188,9 @@ impl DisplayAsTree for Option> { } } } + +/// Prints the function name and, optionally, an indented `Params:` subtree +/// listing all parameter declarations. impl DisplayAsTree for FnDecl { fn fmt_tree( &self, @@ -154,6 +205,7 @@ impl DisplayAsTree for FnDecl { self.identifier )?; if let Some(params) = &self.param_decl { + // Extend the indentation context for the parameter subtree. let mut new_indent = indent_levels.to_vec(); new_indent.push(!is_last); writeln!(f, "{}Params:", tree_indent(&new_indent, false))?; @@ -163,6 +215,7 @@ impl DisplayAsTree for FnDecl { } } +/// Delegates directly to the inner `FnDecl`. impl DisplayAsTree for FnDeclStmt { fn fmt_tree( &self, @@ -174,6 +227,7 @@ impl DisplayAsTree for FnDeclStmt { } } +/// Prints the function name then lists the body statements as children. impl DisplayAsTree for FnDef { fn fmt_tree( &self, @@ -193,6 +247,8 @@ impl DisplayAsTree for FnDef { } } +/// Prints a variable definition as either a scalar assignment +/// (`name = val`) or an array initializer. impl DisplayAsTree for VarDef { fn fmt_tree( &self, @@ -205,9 +261,11 @@ impl DisplayAsTree for VarDef { VarDefInner::Scalar(s) => writeln!(f, "{}{} = {}", prefix, self.identifier, s.val), VarDefInner::Array(a) => match &a.initializer { ArrayInitializer::ExplicitList(vals) => { + // Print the debug representation of all explicit values. writeln!(f, "{}{} = {:?}", prefix, self.identifier, vals) } ArrayInitializer::Fill { val, count } => { + // Print the fill syntax: `name = [val; count]`. writeln!(f, "{}{} = [{}; {}]", prefix, self.identifier, val, count) } }, @@ -215,6 +273,7 @@ impl DisplayAsTree for VarDef { } } +/// Prints a `VarDeclList` header then lists every declaration as a child. impl DisplayAsTree for VarDeclList { fn fmt_tree( &self, @@ -235,6 +294,8 @@ impl DisplayAsTree for VarDeclList { } } +/// Prints an `AssignmentStmt` header then shows the lvalue and rvalue as +/// the two children. impl DisplayAsTree for AssignmentStmt { fn fmt_tree( &self, @@ -247,11 +308,13 @@ impl DisplayAsTree for AssignmentStmt { let mut new_indent = indent_levels.to_vec(); new_indent.push(is_last); + // lvalue is not the last child; rvalue is. self.left_val.fmt_tree(f, &new_indent, false)?; self.right_val.fmt_tree(f, &new_indent, true) } } +/// Prints `CallStmt ` then lists each argument as a child. impl DisplayAsTree for CallStmt { fn fmt_tree( &self, @@ -277,6 +340,7 @@ impl DisplayAsTree for CallStmt { } } +/// Delegates to the concrete statement variant inside a code block. impl DisplayAsTree for CodeBlockStmtInner { fn fmt_tree( &self, @@ -298,6 +362,7 @@ impl DisplayAsTree for CodeBlockStmtInner { } } +/// Delegates to the inner statement kind. impl DisplayAsTree for CodeBlockStmt { fn fmt_tree( &self, @@ -309,6 +374,8 @@ impl DisplayAsTree for CodeBlockStmt { } } +/// Prints every statement in the list at the same indentation level, +/// marking only the final element as `is_last`. impl DisplayAsTree for CodeBlockStmtList { fn fmt_tree( &self, @@ -324,6 +391,8 @@ impl DisplayAsTree for CodeBlockStmtList { } } +/// Prints `IfStmt Cond: ` then shows the `IfBranch` and optional +/// `ElseBranch` subtrees. impl DisplayAsTree for IfStmt { fn fmt_tree( &self, @@ -339,6 +408,8 @@ impl DisplayAsTree for IfStmt { )?; let mut new_indent = indent_levels.to_vec(); new_indent.push(is_last); + // Print the then-branch header; it is not the last child when an + // else-branch also exists. writeln!(f, "{}IfBranch:", tree_indent(&new_indent, false))?; self.if_stmts.fmt_tree(f, &new_indent, true)?; if let Some(e) = &self.else_stmts { @@ -349,6 +420,7 @@ impl DisplayAsTree for IfStmt { } } +/// Prints `WhileStmt Cond: ` then shows the loop `Body` subtree. impl DisplayAsTree for WhileStmt { fn fmt_tree( &self, @@ -369,6 +441,8 @@ impl DisplayAsTree for WhileStmt { } } +/// Prints `ReturnStmt ` if a value is returned, or just `ReturnStmt` +/// for a void return. impl DisplayAsTree for ReturnStmt { fn fmt_tree( &self, @@ -384,6 +458,7 @@ impl DisplayAsTree for ReturnStmt { } } +/// Prints a leaf `ContinueStmt` node. impl DisplayAsTree for ContinueStmt { fn fmt_tree( &self, @@ -395,6 +470,7 @@ impl DisplayAsTree for ContinueStmt { } } +/// Prints a leaf `BreakStmt` node. impl DisplayAsTree for BreakStmt { fn fmt_tree( &self, @@ -406,6 +482,7 @@ impl DisplayAsTree for BreakStmt { } } +/// Prints a leaf `NullStmt` node (empty statement). impl DisplayAsTree for NullStmt { fn fmt_tree( &self, @@ -416,6 +493,9 @@ impl DisplayAsTree for NullStmt { writeln!(f, "{}NullStmt", tree_indent(indent_levels, is_last)) } } + +/// Prints a `LeftVal` header then its inner variant (identifier, array +/// access, or member access) as the single child. impl DisplayAsTree for LeftVal { fn fmt_tree( &self, @@ -435,6 +515,8 @@ impl DisplayAsTree for LeftVal { } } +/// Prints a `RightVal` header then its inner variant (arithmetic or boolean +/// expression) as the single child. impl DisplayAsTree for RightVal { fn fmt_tree( &self, @@ -453,6 +535,7 @@ impl DisplayAsTree for RightVal { } } +/// Prints `StructDef ` then lists field declarations as children. impl DisplayAsTree for StructDef { fn fmt_tree( &self, @@ -472,6 +555,8 @@ impl DisplayAsTree for StructDef { } } +/// Prints an `ArrayExpr` header then shows the array lvalue and index +/// expression as two children. impl DisplayAsTree for ArrayExpr { fn fmt_tree( &self, @@ -482,11 +567,14 @@ impl DisplayAsTree for ArrayExpr { writeln!(f, "{}ArrayExpr", tree_indent(indent_levels, is_last))?; let mut new_indent = indent_levels.to_vec(); new_indent.push(is_last); + // Array lvalue is not the last child; index expression is. self.arr.fmt_tree(f, &new_indent, false)?; self.idx.fmt_tree(f, &new_indent, true) } } +/// Prints `MemberExpr ` then shows the base struct lvalue as the +/// single child. impl DisplayAsTree for MemberExpr { fn fmt_tree( &self, @@ -506,6 +594,8 @@ impl DisplayAsTree for MemberExpr { } } +/// Prints an `ArithExpr` header then delegates to the concrete inner variant +/// (binary operation or expression unit). impl DisplayAsTree for ArithExpr { fn fmt_tree( &self, @@ -523,6 +613,8 @@ impl DisplayAsTree for ArithExpr { } } +/// Prints a `BoolExpr` header then delegates to the concrete inner variant +/// (binary boolean operation or boolean unit). impl DisplayAsTree for BoolExpr { fn fmt_tree( &self, @@ -540,6 +632,8 @@ impl DisplayAsTree for BoolExpr { } } +/// Prints an index expression as either `IndexExpr Num()` or +/// `IndexExpr Id()`. impl DisplayAsTree for IndexExpr { fn fmt_tree( &self, @@ -564,6 +658,8 @@ impl DisplayAsTree for IndexExpr { } } +/// Prints `ArithBiOpExpr ` then shows the left and right operands as +/// two children. impl DisplayAsTree for ArithBiOpExpr { fn fmt_tree( &self, @@ -579,11 +675,15 @@ impl DisplayAsTree for ArithBiOpExpr { )?; let mut new_indent = indent_levels.to_vec(); new_indent.push(is_last); + // Left operand is not the last child; right operand is. self.left.fmt_tree(f, &new_indent, false)?; self.right.fmt_tree(f, &new_indent, true) } } +/// Prints an `ExprUnit` header then delegates to the concrete inner variant +/// (number, identifier, sub-expression, function call, array access, member +/// access, or reference). impl DisplayAsTree for ExprUnit { fn fmt_tree( &self, @@ -608,6 +708,8 @@ impl DisplayAsTree for ExprUnit { } } +/// Prints `BoolBiOpExpr ` then shows the left and right operands as +/// two children. impl DisplayAsTree for BoolBiOpExpr { fn fmt_tree( &self, @@ -623,11 +725,14 @@ impl DisplayAsTree for BoolBiOpExpr { )?; let mut new_indent = indent_levels.to_vec(); new_indent.push(is_last); + // Left operand is not the last child; right operand is. self.left.fmt_tree(f, &new_indent, false)?; self.right.fmt_tree(f, &new_indent, true) } } +/// Prints a `BoolUnit` header then delegates to the concrete inner variant +/// (comparison, nested boolean expression, or unary boolean operation). impl DisplayAsTree for BoolUnit { fn fmt_tree( &self, @@ -646,6 +751,7 @@ impl DisplayAsTree for BoolUnit { } } +/// Prints `FnCall: ` then lists each argument as a child. impl DisplayAsTree for FnCall { fn fmt_tree( &self, @@ -653,6 +759,7 @@ impl DisplayAsTree for FnCall { indent_levels: &[bool], is_last: bool, ) -> Result<(), Error> { + // Use the qualified name so that module-prefixed calls are shown correctly. let fn_name = self.qualified_name(); writeln!( f, @@ -672,6 +779,8 @@ impl DisplayAsTree for FnCall { } } +/// Prints a `ComExpr` header then shows the left and right operands as +/// two children. impl DisplayAsTree for ComExpr { fn fmt_tree( &self, @@ -683,11 +792,14 @@ impl DisplayAsTree for ComExpr { let mut new_indent = indent_levels.to_vec(); new_indent.push(is_last); + // Left operand is not the last child; right operand is. self.left.fmt_tree(f, &new_indent, false)?; self.right.fmt_tree(f, &new_indent, true) } } +/// Prints `BoolUOpExpr ` then shows the operand condition as the single +/// child. impl DisplayAsTree for BoolUOpExpr { fn fmt_tree( &self, diff --git a/src/ast/types.rs b/src/ast/types.rs index 56accd3..a6bad02 100644 --- a/src/ast/types.rs +++ b/src/ast/types.rs @@ -1,19 +1,38 @@ +//! Type definitions used throughout the AST. +//! +//! This module defines the source-position alias and all type-specifier +//! nodes that appear in variable declarations, function parameters, and +//! return-type annotations. + +/// Byte offset (or character index) into the source text. +/// Used to track where each AST node originated for error reporting. pub type Pos = usize; +/// Built-in primitive types supported by the language. #[derive(Debug, Clone)] pub enum BuiltIn { + /// The 32-bit signed integer type (`int`). Int, } +/// The inner representation of a type specifier, distinguishing between +/// built-in primitives, user-defined composite types, and reference types. #[derive(Debug, Clone)] pub enum TypeSpecifierInner { + /// A primitive type such as `int`. BuiltIn(BuiltIn), + /// A user-defined struct or composite type, identified by name. Composite(String), + /// A reference to another type specifier (e.g., `&int`). Reference(Box), } +/// A fully-annotated type specifier, pairing the type's inner representation +/// with the source position where it appears. #[derive(Debug, Clone)] pub struct TypeSpecifier { + /// Source position of this type specifier. pub pos: Pos, + /// The actual type information (built-in, composite, or reference). pub inner: TypeSpecifierInner, } diff --git a/src/main.rs b/src/main.rs index ae5c051..cba6797 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,10 @@ +//! `teac` – the TeaLang compiler driver. +//! +//! This binary ties together all compiler stages in order: +//! parsing → IR generation → optimisation → assembly emission. +//! The output stage can be stopped early (via `--emit`) to inspect +//! the AST, IR, or final AArch64 assembly. + mod asm; mod ast; mod common; @@ -14,36 +21,63 @@ use std::{ path::{Path, PathBuf}, }; +/// Controls which intermediate representation the compiler writes to the output. +/// The pipeline always runs up to (and including) the chosen stage, then exits. #[derive(Copy, Clone, Debug, PartialEq, ValueEnum)] enum EmitTarget { + /// Stop after parsing and emit the Abstract Syntax Tree. Ast, + /// Stop after IR generation and optimisation and emit the IR. Ir, + /// Run all stages and emit the final AArch64 assembly (default). Asm, } +/// The OS / ABI to target when generating assembly. +/// When omitted, `Target::host()` detects the platform at runtime. #[derive(Copy, Clone, Debug, PartialEq, ValueEnum)] enum TargetPlatform { + /// Generate Linux (ELF) assembly. Linux, + /// Generate macOS (Mach-O) assembly. Macos, } +/// Command-line interface definition parsed by `clap`. #[derive(Parser, Debug)] #[command(name = "teac")] #[command(about = "A compiler written in Rust for TeaLang")] struct Cli { + /// Path to the TeaLang source file to compile. #[clap(value_name = "FILE")] input: String, + /// Which IR stage to emit as output (default: `asm`). #[arg(long, value_enum, ignore_case = true, default_value = "asm")] emit: EmitTarget, + /// Target platform for assembly generation. + /// Defaults to the host platform when not specified. #[arg(long, value_enum, ignore_case = true)] target: Option, + /// Write output to FILE instead of stdout. #[clap(short, long, value_name = "FILE")] output: Option, } +/// Opens a buffered writer for the compiler output. +/// +/// If `output` is `None`, writes to stdout. +/// Otherwise creates the file at the given path, creating any missing +/// parent directories along the way. +/// +/// # Parameters +/// - `output`: optional path to an output file. +/// +/// # Returns +/// A boxed `Write` implementation, either wrapping stdout or a newly +/// created file. fn open_writer(output: &Option) -> Result> { let Some(path) = output else { return Ok(Box::new(BufWriter::new(io::stdout()))); @@ -58,6 +92,20 @@ fn open_writer(output: &Option) -> Result> { Ok(Box::new(BufWriter::new(file))) } +/// Runs the full compiler pipeline. +/// +/// Steps performed: +/// 1. Parse CLI arguments and read the source file. +/// 2. Parse the source into an AST; exit here if `--emit ast`. +/// 3. Lower the AST to IR. +/// 4. Run the default optimisation pass pipeline over every function. +/// Exit here if `--emit ir`. +/// 5. Generate AArch64 assembly for the requested target platform +/// and write it to the output. +/// +/// # Returns +/// `Ok(())` on success, or an `anyhow::Error` describing the first +/// failure encountered. fn run() -> Result<()> { let cli = Cli::parse(); let source = fs::read_to_string(&cli.input) @@ -69,6 +117,7 @@ fn run() -> Result<()> { .generate() .with_context(|| format!("failed to parse '{}'", cli.input))?; + // Early exit: the user only wants the AST dump. if cli.emit == EmitTarget::Ast { return parser .output(&mut writer) @@ -90,17 +139,20 @@ fn run() -> Result<()> { let mut ir_gen = ir::IrGenerator::new(ast, source_dir); ir_gen.generate().context("failed to generate IR")?; + // Run the default optimisation passes over every function in the module. let pass_manager = opt::FunctionPassManager::with_default_pipeline(); for func in ir_gen.module.function_list.values_mut() { pass_manager.run(func); } + // Early exit: the user only wants the (optimised) IR dump. if cli.emit == EmitTarget::Ir { return ir_gen .output(&mut writer) .context("failed to write IR output"); } + // Resolve the target platform: use the explicit flag, or auto-detect the host. let target = match cli.target { Some(TargetPlatform::Linux) => Target::Linux, Some(TargetPlatform::Macos) => Target::Macos, @@ -114,6 +166,8 @@ fn run() -> Result<()> { .context("failed to write assembly output") } +/// Entry point: delegates to [`run`] and converts any error into a +/// human-readable message printed to stderr, exiting with code 1. fn main() { if let Err(e) = run() { eprintln!("Error: {e:#}"); diff --git a/src/parser/common.rs b/src/parser/common.rs index f7e4193..4b9ff66 100644 --- a/src/parser/common.rs +++ b/src/parser/common.rs @@ -1,10 +1,16 @@ use pest_derive::Parser as DeriveParser; +/// Errors that can be produced during parsing of a TeaLang source file. #[derive(Debug, thiserror::Error)] pub enum Error { + /// A syntax error reported directly by the PEG parser (pest). + /// The inner `String` contains the human-readable pest error message. #[error("{0}")] Syntax(String), + /// An integer literal that could not be parsed as a valid `i32`. + /// Includes the original source text, its position, and the underlying + /// `ParseIntError` as the error source. #[error("invalid integer literal `{literal}` at line {line}, column {column}")] InvalidNumber { literal: String, @@ -14,24 +20,46 @@ pub enum Error { source: std::num::ParseIntError, }, + /// An I/O error encountered while reading source input. #[error("I/O error")] Io(#[from] std::io::Error), + /// The parse tree had an unexpected structure at the given location. + /// The inner `String` names the grammar rule or context where the + /// unexpected structure was found. #[error("unexpected parse tree structure in {0}")] Grammar(String), } +/// The pest-derived parser for the TeaLang grammar. +/// It is generated automatically from `tealang.pest` and implements the +/// [`pest::Parser`] trait for the [`Rule`] enum. #[derive(DeriveParser)] #[grammar = "tealang.pest"] pub(crate) struct TeaLangParser; +/// A specialized `Result` type used throughout the parser. +/// `Ok` carries a successfully parsed value of type `T`; `Err` carries an +/// [`Error`] describing what went wrong. pub(crate) type ParseResult = Result; + +/// A single node in the pest parse tree, parameterised by the input lifetime. +/// This is a type alias for [`pest::iterators::Pair`] bound to the [`Rule`] +/// enum produced by [`TeaLangParser`]. pub(crate) type Pair<'a> = pest::iterators::Pair<'a, Rule>; +/// Collapses a raw source snippet into a compact, single-line preview string +/// suitable for use in error messages. +/// +/// All runs of whitespace are collapsed to a single space, and the result is +/// truncated to at most `MAX_CHARS` characters. If the snippet is empty after +/// normalisation, the placeholder `""` is returned instead. pub(crate) fn compact_snippet(snippet: &str) -> String { const MAX_CHARS: usize = 48; + // Collapse all whitespace sequences into a single space. let compact = snippet.split_whitespace().collect::>().join(" "); + // Fall back to trimming if the split produced nothing (e.g., all whitespace). let normalized = if compact.is_empty() { snippet.trim().to_string() } else { @@ -42,6 +70,7 @@ pub(crate) fn compact_snippet(snippet: &str) -> String { return "".to_string(); } + // Take up to MAX_CHARS characters; append "..." if the string is longer. let mut chars = normalized.chars(); let preview: String = chars.by_ref().take(MAX_CHARS).collect(); if chars.next().is_some() { @@ -51,8 +80,14 @@ pub(crate) fn compact_snippet(snippet: &str) -> String { } } +/// Creates a [`Error::Grammar`] variant that includes the source position and +/// a compact snippet taken from `pair`'s span. +/// +/// `context` is a short, human-readable label that identifies the grammar rule +/// or function where the unexpected structure was encountered. pub(crate) fn grammar_error(context: &'static str, pair: &Pair<'_>) -> Error { let span = pair.as_span(); + // Extract line and column numbers from the start of the span. let (line, column) = span.start_pos().line_col(); let near = compact_snippet(span.as_str()); @@ -61,14 +96,26 @@ pub(crate) fn grammar_error(context: &'static str, pair: &Pair<'_>) -> Error { )) } +/// Creates a [`Error::Grammar`] variant from a static string alone, without +/// access to a specific parse-tree node. +/// +/// Use this when position information is unavailable (e.g., when validating +/// program state rather than a particular source span). pub(crate) fn grammar_error_static(context: &'static str) -> Error { Error::Grammar(context.to_string()) } +/// Returns the byte offset of the start of `pair`'s span within the source +/// string. This is used to track source positions in AST nodes. pub(crate) fn get_pos(pair: &Pair<'_>) -> usize { pair.as_span().start() } +/// Parses an integer literal from a `num` parse-tree node. +/// +/// Reads the raw text of `pair`, attempts to parse it as an `i32`, and wraps +/// any failure in [`Error::InvalidNumber`] that includes the literal text and +/// its source position. pub(crate) fn parse_num(pair: Pair) -> ParseResult { let literal = pair.as_str().to_string(); let (line, column) = pair.as_span().start_pos().line_col(); diff --git a/src/parser/decl.rs b/src/parser/decl.rs index 5210fdc..959045f 100644 --- a/src/parser/decl.rs +++ b/src/parser/decl.rs @@ -4,7 +4,19 @@ use super::common::{get_pos, grammar_error, parse_num, Pair, ParseResult, Rule}; use super::ParseContext; impl<'a> ParseContext<'a> { + /// Parses a `use_stmt` parse-tree node into an [`ast::UseStmt`]. + /// + /// A `use` statement has the form `use module::path;`. The method collects + /// all `identifier` children and joins them with `"::"` to reconstruct the + /// fully-qualified module path. + /// + /// # Arguments + /// * `pair` – the `use_stmt` parse-tree node. + /// + /// # Returns + /// An [`ast::UseStmt`] containing the module path string. pub(crate) fn parse_use_stmt(&self, pair: Pair) -> ParseResult { + // Collect every identifier segment from the use path. let parts: Vec<&str> = pair .into_inner() .filter(|p| p.as_rule() == Rule::identifier) @@ -15,6 +27,15 @@ impl<'a> ParseContext<'a> { }) } + /// Parses a `program_element` node into an optional boxed [`ast::ProgramElement`]. + /// + /// A program element is one of: a variable declaration statement, a struct + /// definition, a function declaration statement, or a function definition. + /// Returns `None` if the node contains no recognisable inner rule (this + /// should not occur in a well-formed parse tree). + /// + /// # Arguments + /// * `pair` – the `program_element` parse-tree node. pub(crate) fn parse_program_element( &self, pair: Pair, @@ -51,6 +72,14 @@ impl<'a> ParseContext<'a> { Ok(None) } + /// Parses a `struct_def` node into a boxed [`ast::StructDef`]. + /// + /// A struct definition has the form `struct Name { field_list }`. The + /// method extracts the struct name and delegates field parsing to + /// [`Self::parse_typed_var_decl_list`]. + /// + /// # Arguments + /// * `pair` – the `struct_def` parse-tree node. pub(crate) fn parse_struct_def(&self, pair: Pair) -> ParseResult> { let mut identifier = String::new(); let mut decls = Vec::new(); @@ -66,6 +95,12 @@ impl<'a> ParseContext<'a> { Ok(Box::new(ast::StructDef { identifier, decls })) } + /// Parses a `typed_var_decl_list` node into a `Vec` of [`ast::VarDecl`]. + /// + /// Each child `typed_var_decl` node is delegated to [`Self::parse_var_decl`]. + /// + /// # Arguments + /// * `pair` – the `typed_var_decl_list` parse-tree node. pub(crate) fn parse_typed_var_decl_list(&self, pair: Pair) -> ParseResult> { let mut decls = Vec::new(); for inner in pair.into_inner() { @@ -76,7 +111,16 @@ impl<'a> ParseContext<'a> { Ok(decls) } + /// Parses a `typed_var_decl` (or `var_decl`) node into a boxed [`ast::VarDecl`]. + /// + /// Extracts the variable name, an optional type specifier, and—for array + /// declarations—the array length. Returns [`Error::Grammar`] if the + /// identifier is missing. + /// + /// # Arguments + /// * `pair` – the `typed_var_decl` / `var_decl` parse-tree node. pub(crate) fn parse_var_decl(&self, pair: Pair) -> ParseResult> { + // Keep a clone to pass to grammar_error if needed. let pair_for_error = pair.clone(); let mut identifier: Option = None; let mut type_specifier: Option = None; @@ -84,12 +128,14 @@ impl<'a> ParseContext<'a> { for inner in pair.into_inner() { match inner.as_rule() { + // Only the first identifier child is the variable name. Rule::identifier if identifier.is_none() => { identifier = Some(inner.as_str().to_string()); } Rule::type_spec => { type_specifier = self.parse_type_spec(inner)?; } + // A numeric literal indicates an array declaration. Rule::num => { array_len = Some(parse_num(inner)? as usize); } @@ -99,6 +145,7 @@ impl<'a> ParseContext<'a> { let identifier = identifier.ok_or_else(|| grammar_error("var_decl.identifier", &pair_for_error))?; + // Build the inner variant based on whether an array length was found. let inner = if let Some(len) = array_len { ast::VarDeclInner::Array(Box::new(ast::VarDeclArray { len })) } else { @@ -112,7 +159,16 @@ impl<'a> ParseContext<'a> { })) } + /// Parses a `type_spec` node into an optional [`ast::TypeSpecifier`]. + /// + /// Recognises reference types (`&T`), the built-in `i32` keyword, and + /// user-defined composite (struct) types by their identifier. Returns + /// `Ok(None)` when the node is empty or contains no recognised type rule. + /// + /// # Arguments + /// * `pair` – the `type_spec` parse-tree node. pub(crate) fn parse_type_spec(&self, pair: Pair) -> ParseResult> { + // Record the start position for use in the returned AST node. let pos = get_pos(&pair); let children: Vec<_> = pair.into_inner().collect(); @@ -120,6 +176,7 @@ impl<'a> ParseContext<'a> { for child in &children { match child.as_rule() { Rule::ref_type => { + // A reference type wraps an inner type_spec: `&`. let ref_children: Vec<_> = child.clone().into_inner().collect(); let inner_type_spec = ref_children .iter() @@ -134,12 +191,14 @@ impl<'a> ParseContext<'a> { })); } Rule::kw_i32 => { + // Built-in integer type. return Ok(Some(ast::TypeSpecifier { pos, inner: ast::TypeSpecifierInner::BuiltIn(ast::BuiltIn::Int), })); } Rule::identifier => { + // User-defined composite (struct) type referenced by name. return Ok(Some(ast::TypeSpecifier { pos, inner: ast::TypeSpecifierInner::Composite(child.as_str().to_string()), @@ -152,6 +211,15 @@ impl<'a> ParseContext<'a> { Ok(None) } + /// Parses a `var_decl_stmt` node into a boxed [`ast::VarDeclStmt`]. + /// + /// A variable declaration statement is either a variable definition + /// (`var_def`, e.g. `let x = 1;`) or a plain declaration (`var_decl`, + /// e.g. `let x: i32;`). Returns [`Error::Grammar`] if neither child is + /// present. + /// + /// # Arguments + /// * `pair` – the `var_decl_stmt` parse-tree node. pub(crate) fn parse_var_decl_stmt(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); for inner in pair.into_inner() { @@ -173,18 +241,30 @@ impl<'a> ParseContext<'a> { Err(grammar_error("var_decl_stmt", &pair_for_error)) } + /// Parses a `var_def` node into a boxed [`ast::VarDef`]. + /// + /// Handles both scalar and array variable definitions. An array definition + /// contains an `array_initializer` child; a scalar definition contains a + /// `right_val` child. The type annotation (after `:`) is optional in both + /// forms. + /// + /// # Arguments + /// * `pair` – the `var_def` parse-tree node. pub(crate) fn parse_var_def(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); let inner_pairs: Vec<_> = pair.into_inner().collect(); + // The first child is always the variable name identifier. let identifier = inner_pairs[0].as_str().to_string(); + // Determine the form of the definition by looking for key child rules. let has_initializer = inner_pairs .iter() .any(|p| p.as_rule() == Rule::array_initializer); let has_colon = inner_pairs.iter().any(|p| p.as_rule() == Rule::colon); if has_initializer { + // Array definition: `let arr[N]: T = [...]` or `let arr[N] = [v; N]`. let len = parse_num( inner_pairs .iter() @@ -193,6 +273,7 @@ impl<'a> ParseContext<'a> { .clone(), )? as usize; + // Type annotation is optional; only present when a colon was found. let type_specifier = if has_colon { self.parse_type_spec( inner_pairs @@ -219,6 +300,7 @@ impl<'a> ParseContext<'a> { inner: ast::VarDefInner::Array(Box::new(ast::VarDefArray { len, initializer })), })) } else { + // Scalar definition: `let x: T = expr` or `let x = expr`. let type_specifier = if has_colon { self.parse_type_spec( inner_pairs @@ -247,10 +329,19 @@ impl<'a> ParseContext<'a> { } } + /// Parses an `array_initializer` node into an [`ast::ArrayInitializer`]. + /// + /// Two forms are supported: + /// * **Explicit list** – `[v0, v1, v2]`: contains a `right_val_list`. + /// * **Fill** – `[v; N]`: contains a single `right_val` and a `num`. + /// + /// # Arguments + /// * `pair` – the `array_initializer` parse-tree node. fn parse_array_initializer(&self, pair: Pair) -> ParseResult { let pair_for_error = pair.clone(); let children: Vec<_> = pair.into_inner().collect(); + // Check for the explicit-list form first. if let Some(list_pair) = children .iter() .find(|p| p.as_rule() == Rule::right_val_list) @@ -259,6 +350,7 @@ impl<'a> ParseContext<'a> { return Ok(ast::ArrayInitializer::ExplicitList(vals)); } + // Otherwise it must be the fill form `[val; count]`. let val_pair = children .iter() .find(|p| p.as_rule() == Rule::right_val) @@ -274,6 +366,13 @@ impl<'a> ParseContext<'a> { Ok(ast::ArrayInitializer::Fill { val, count }) } + /// Parses a `fn_decl_stmt` node into a boxed [`ast::FnDeclStmt`]. + /// + /// A function declaration statement wraps a single `fn_decl` child. + /// Returns [`Error::Grammar`] if the expected child is absent. + /// + /// # Arguments + /// * `pair` – the `fn_decl_stmt` parse-tree node. pub(crate) fn parse_fn_decl_stmt(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); for inner in pair.into_inner() { @@ -287,6 +386,13 @@ impl<'a> ParseContext<'a> { Err(grammar_error("fn_decl_stmt", &pair_for_error)) } + /// Parses a `fn_decl` node into a boxed [`ast::FnDecl`]. + /// + /// Extracts the function name, an optional parameter list, and an optional + /// return type specifier. + /// + /// # Arguments + /// * `pair` – the `fn_decl` parse-tree node. fn parse_fn_decl(&self, pair: Pair) -> ParseResult> { let mut identifier = String::new(); let mut param_decl = None; @@ -296,6 +402,7 @@ impl<'a> ParseContext<'a> { match inner.as_rule() { Rule::identifier => identifier = inner.as_str().to_string(), Rule::param_decl => param_decl = Some(self.parse_param_decl(inner)?), + // The optional return type follows `->`. Rule::type_spec => return_dtype = self.parse_type_spec(inner)?, _ => {} } @@ -308,6 +415,14 @@ impl<'a> ParseContext<'a> { })) } + /// Parses a `param_decl` node into a boxed [`ast::ParamDecl`]. + /// + /// A parameter declaration consists of a `typed_var_decl_list` that lists + /// all formal parameters with their types. Returns [`Error::Grammar`] if + /// the expected child is absent. + /// + /// # Arguments + /// * `pair` – the `param_decl` parse-tree node. fn parse_param_decl(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); for inner in pair.into_inner() { @@ -320,6 +435,14 @@ impl<'a> ParseContext<'a> { Err(grammar_error("param_decl", &pair_for_error)) } + /// Parses a `fn_def` node into a boxed [`ast::FnDef`]. + /// + /// A function definition contains a `fn_decl` header followed by one or + /// more `code_block_stmt` nodes that form the function body. Returns + /// [`Error::Grammar`] if the `fn_decl` child is absent. + /// + /// # Arguments + /// * `pair` – the `fn_def` parse-tree node. pub(crate) fn parse_fn_def(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); let mut fn_decl = None; @@ -328,6 +451,7 @@ impl<'a> ParseContext<'a> { for inner in pair.into_inner() { match inner.as_rule() { Rule::fn_decl => fn_decl = Some(self.parse_fn_decl(inner)?), + // Each statement in the body is collected in order. Rule::code_block_stmt => stmts.push(*self.parse_code_block_stmt(inner)?), _ => {} } diff --git a/src/parser/expr.rs b/src/parser/expr.rs index 2598add..1a6d94b 100644 --- a/src/parser/expr.rs +++ b/src/parser/expr.rs @@ -4,6 +4,13 @@ use super::common::{get_pos, grammar_error, parse_num, Pair, ParseResult, Rule}; use super::ParseContext; impl<'a> ParseContext<'a> { + /// Parses a `right_val_list` node into a `Vec` of [`ast::RightVal`]. + /// + /// Iterates over every `right_val` child and delegates to + /// [`Self::parse_right_val`]. + /// + /// # Arguments + /// * `pair` – the `right_val_list` parse-tree node. pub(crate) fn parse_right_val_list(&self, pair: Pair) -> ParseResult> { let mut vals = Vec::new(); for inner in pair.into_inner() { @@ -14,6 +21,14 @@ impl<'a> ParseContext<'a> { Ok(vals) } + /// Parses a `right_val` node into a boxed [`ast::RightVal`]. + /// + /// A right-hand-side value is either a Boolean expression (`bool_expr`) or + /// an arithmetic expression (`arith_expr`). Returns [`Error::Grammar`] if + /// neither is found. + /// + /// # Arguments + /// * `pair` – the `right_val` parse-tree node. pub(crate) fn parse_right_val(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); for inner in pair.into_inner() { @@ -35,6 +50,14 @@ impl<'a> ParseContext<'a> { Err(grammar_error("right_val", &pair_for_error)) } + /// Parses a `bool_expr` node into a boxed [`ast::BoolExpr`]. + /// + /// A Boolean expression is a sequence of `bool_and_term` nodes optionally + /// combined with `||` operators. The method builds a left-associative tree + /// of [`ast::BoolBiOpExpr`] nodes with [`ast::BoolBiOp::Or`]. + /// + /// # Arguments + /// * `pair` – the `bool_expr` parse-tree node. pub(crate) fn parse_bool_expr(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); let inner_pairs: Vec<_> = pair.into_inner().collect(); @@ -43,11 +66,14 @@ impl<'a> ParseContext<'a> { return Err(grammar_error("bool_expr", &pair_for_error)); } + // Seed the accumulator with the first term. let mut expr = self.parse_bool_and_term(inner_pairs[0].clone())?; + // Walk through the remaining pairs looking for `||` operators. let mut i = 1; while i < inner_pairs.len() { if inner_pairs[i].as_rule() == Rule::op_or { + // Consume the operator and the next operand together. let right = self.parse_bool_and_term(inner_pairs[i + 1].clone())?; expr = Box::new(ast::BoolExpr { pos: expr.pos, @@ -66,6 +92,14 @@ impl<'a> ParseContext<'a> { Ok(expr) } + /// Parses a `bool_and_term` node into a boxed [`ast::BoolExpr`]. + /// + /// A Boolean AND term is a sequence of `bool_unit_atom` nodes optionally + /// combined with `&&` operators. The method builds a left-associative tree + /// of [`ast::BoolBiOpExpr`] nodes with [`ast::BoolBiOp::And`]. + /// + /// # Arguments + /// * `pair` – the `bool_and_term` parse-tree node. fn parse_bool_and_term(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); let inner_pairs: Vec<_> = pair.into_inner().collect(); @@ -74,12 +108,14 @@ impl<'a> ParseContext<'a> { return Err(grammar_error("bool_and_term", &pair_for_error)); } + // Seed the accumulator with the first unit wrapped in a BoolUnit variant. let first_unit = self.parse_bool_unit_atom(inner_pairs[0].clone())?; let mut expr = Box::new(ast::BoolExpr { pos: first_unit.pos, inner: ast::BoolExprInner::BoolUnit(first_unit), }); + // Walk through the remaining pairs looking for `&&` operators. let mut i = 1; while i < inner_pairs.len() { if inner_pairs[i].as_rule() == Rule::op_and { @@ -106,11 +142,21 @@ impl<'a> ParseContext<'a> { Ok(expr) } + /// Parses a `bool_unit_atom` node into a boxed [`ast::BoolUnit`]. + /// + /// Handles three cases: + /// 1. A prefixed `!` (NOT) operator followed by a nested `bool_unit_atom`. + /// 2. A parenthesised Boolean expression (`bool_unit_paren`). + /// 3. A comparison expression (`bool_comparison`). + /// + /// # Arguments + /// * `pair` – the `bool_unit_atom` parse-tree node. fn parse_bool_unit_atom(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); let pos = get_pos(&pair); let inner_pairs: Vec<_> = pair.into_inner().collect(); + // `! ` — unary NOT operator. if inner_pairs.len() == 2 && inner_pairs[0].as_rule() == Rule::op_not { let cond = self.parse_bool_unit_atom(inner_pairs[1].clone())?; return Ok(Box::new(ast::BoolUnit { @@ -137,11 +183,22 @@ impl<'a> ParseContext<'a> { Err(grammar_error("bool_unit_atom", &pair_for_error)) } + /// Parses a `bool_unit_paren` node into a boxed [`ast::BoolUnit`]. + /// + /// After stripping the surrounding parentheses, the inner content is + /// either: + /// * A single `bool_expr` — wrapped as a `BoolUnit::BoolExpr`. + /// * A comparison triple `(expr op expr)` — delegated to + /// [`Self::parse_comparison_pair_triple`]. + /// + /// # Arguments + /// * `pair` – the `bool_unit_paren` parse-tree node. fn parse_bool_unit_paren(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); let pos = get_pos(&pair); let inner_pairs: Vec<_> = pair.into_inner().collect(); + // Remove parenthesis tokens; keep only meaningful children. let filtered: Vec<_> = inner_pairs .into_iter() .filter(|p| p.as_rule() != Rule::lparen && p.as_rule() != Rule::rparen) @@ -154,9 +211,17 @@ impl<'a> ParseContext<'a> { })); } + // Otherwise treat the filtered children as a comparison triple. self.parse_comparison_pair_triple(pos, &filtered, "bool_unit_paren", &pair_for_error) } + /// Parses a `bool_comparison` node into a boxed [`ast::BoolUnit`]. + /// + /// A comparison has the form `expr op expr` (exactly three children). + /// Delegates directly to [`Self::parse_comparison_pair_triple`]. + /// + /// # Arguments + /// * `pair` – the `bool_comparison` parse-tree node. fn parse_bool_comparison(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); let pos = get_pos(&pair); @@ -164,6 +229,17 @@ impl<'a> ParseContext<'a> { self.parse_comparison_pair_triple(pos, &inner_pairs, "bool_comparison", &pair_for_error) } + /// Validates that `pairs` contains exactly three elements and builds a + /// comparison [`ast::BoolUnit`] from them. + /// + /// Returns [`Error::Grammar`] (using `context` as the label) when the + /// slice does not have exactly three elements. + /// + /// # Arguments + /// * `pos` – source byte offset for the resulting AST node. + /// * `pairs` – slice expected to contain `[left_expr, comp_op, right_expr]`. + /// * `context` – human-readable context label used in error messages. + /// * `pair_for_error` – original parse-tree node used if an error is raised. fn parse_comparison_pair_triple( &self, pos: usize, @@ -183,6 +259,16 @@ impl<'a> ParseContext<'a> { ) } + /// Builds a [`ast::BoolUnit::ComExpr`] from three parse-tree nodes. + /// + /// Parses the left operand, comparison operator, and right operand in turn + /// and assembles them into a [`ast::ComExpr`]. + /// + /// # Arguments + /// * `pos` – source byte offset for the resulting AST node. + /// * `left_pair` – parse-tree node for the left `expr_unit`. + /// * `op_pair` – parse-tree node for the comparison operator. + /// * `right_pair` – parse-tree node for the right `expr_unit`. fn parse_comparison_to_bool_unit( &self, pos: usize, @@ -200,6 +286,13 @@ impl<'a> ParseContext<'a> { })) } + /// Parses a `comp_op` node into an [`ast::ComOp`] variant. + /// + /// Recognises the six comparison operators: `<`, `>`, `<=`, `>=`, `==`, + /// `!=`. Returns [`Error::Grammar`] if no known operator token is found. + /// + /// # Arguments + /// * `pair` – the `comp_op` parse-tree node. fn parse_comp_op(&self, pair: Pair) -> ParseResult { let pair_for_error = pair.clone(); for inner in pair.into_inner() { @@ -216,6 +309,14 @@ impl<'a> ParseContext<'a> { Err(grammar_error("comp_op", &pair_for_error)) } + /// Parses an `arith_expr` node into a boxed [`ast::ArithExpr`]. + /// + /// An arithmetic expression is a sequence of `arith_term` nodes optionally + /// combined with additive operators (`+`, `-`). The method builds a + /// left-associative tree of [`ast::ArithBiOpExpr`] nodes. + /// + /// # Arguments + /// * `pair` – the `arith_expr` parse-tree node. pub(crate) fn parse_arith_expr(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); let inner_pairs: Vec<_> = pair.into_inner().collect(); @@ -224,8 +325,10 @@ impl<'a> ParseContext<'a> { return Err(grammar_error("arith_expr", &pair_for_error)); } + // Seed the accumulator with the first term. let mut expr = self.parse_arith_term(inner_pairs[0].clone())?; + // Walk through the remaining pairs looking for additive operators. let mut i = 1; while i < inner_pairs.len() { if inner_pairs[i].as_rule() == Rule::arith_add_op { @@ -249,6 +352,14 @@ impl<'a> ParseContext<'a> { Ok(expr) } + /// Parses an `arith_term` node into a boxed [`ast::ArithExpr`]. + /// + /// An arithmetic term is a sequence of `expr_unit` nodes optionally + /// combined with multiplicative operators (`*`, `/`). The method builds a + /// left-associative tree of [`ast::ArithBiOpExpr`] nodes. + /// + /// # Arguments + /// * `pair` – the `arith_term` parse-tree node. fn parse_arith_term(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); let inner_pairs: Vec<_> = pair.into_inner().collect(); @@ -257,12 +368,14 @@ impl<'a> ParseContext<'a> { return Err(grammar_error("arith_term", &pair_for_error)); } + // Seed the accumulator with the first expression unit. let first_unit = self.parse_expr_unit(inner_pairs[0].clone())?; let mut expr = Box::new(ast::ArithExpr { pos: first_unit.pos, inner: ast::ArithExprInner::ExprUnit(first_unit), }); + // Walk through the remaining pairs looking for multiplicative operators. let mut i = 1; while i < inner_pairs.len() { if inner_pairs[i].as_rule() == Rule::arith_mul_op { @@ -290,6 +403,13 @@ impl<'a> ParseContext<'a> { Ok(expr) } + /// Parses an `arith_add_op` node into an [`ast::ArithBiOp`] additive variant. + /// + /// Recognises `+` and `-` tokens. Returns [`Error::Grammar`] if neither + /// is found. + /// + /// # Arguments + /// * `pair` – the `arith_add_op` parse-tree node. fn parse_arith_add_op(&self, pair: Pair) -> ParseResult { let pair_for_error = pair.clone(); for inner in pair.into_inner() { @@ -302,6 +422,13 @@ impl<'a> ParseContext<'a> { Err(grammar_error("arith_add_op", &pair_for_error)) } + /// Parses an `arith_mul_op` node into an [`ast::ArithBiOp`] multiplicative variant. + /// + /// Recognises `*` and `/` tokens. Returns [`Error::Grammar`] if neither + /// is found. + /// + /// # Arguments + /// * `pair` – the `arith_mul_op` parse-tree node. fn parse_arith_mul_op(&self, pair: Pair) -> ParseResult { let pair_for_error = pair.clone(); for inner in pair.into_inner() { @@ -314,17 +441,35 @@ impl<'a> ParseContext<'a> { Err(grammar_error("arith_mul_op", &pair_for_error)) } + /// Parses an `expr_unit` node into a boxed [`ast::ExprUnit`]. + /// + /// An expression unit is the atomic building block of arithmetic + /// expressions. The method handles the following forms, in order of + /// precedence: + /// 1. Negated integer literal: `-`. + /// 2. Parenthesised arithmetic expression: `()`. + /// 3. Function call: ``. + /// 4. Plain integer literal: ``. + /// 5. Reference: `&`. + /// 6. Identifier with optional field/index suffixes (left-value chain). + /// + /// Returns [`Error::Grammar`] if none of the forms matches. + /// + /// # Arguments + /// * `pair` – the `expr_unit` parse-tree node. pub(crate) fn parse_expr_unit(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); let pos = get_pos(&pair); let inner_pairs: Vec<_> = pair.into_inner().collect(); + // Strip parentheses to obtain the meaningful children. let filtered: Vec<_> = inner_pairs .iter() .filter(|p| !matches!(p.as_rule(), Rule::lparen | Rule::rparen)) .cloned() .collect(); + // `-` — negated integer literal. if filtered.len() == 2 && filtered[0].as_rule() == Rule::op_sub && filtered[1].as_rule() == Rule::num @@ -336,6 +481,7 @@ impl<'a> ParseContext<'a> { })); } + // `()` — parenthesised arithmetic expression. if filtered.len() == 1 && filtered[0].as_rule() == Rule::arith_expr { return Ok(Box::new(ast::ExprUnit { pos, @@ -343,6 +489,7 @@ impl<'a> ParseContext<'a> { })); } + // `` — a function or method call. if !filtered.is_empty() && filtered[0].as_rule() == Rule::fn_call { return Ok(Box::new(ast::ExprUnit { pos, @@ -350,6 +497,7 @@ impl<'a> ParseContext<'a> { })); } + // `` — plain integer literal. if filtered.len() == 1 && filtered[0].as_rule() == Rule::num { let num = parse_num(filtered[0].clone())?; return Ok(Box::new(ast::ExprUnit { @@ -358,6 +506,7 @@ impl<'a> ParseContext<'a> { })); } + // `&` — a reference to a variable. if filtered.len() == 2 && filtered[0].as_rule() == Rule::ampersand && filtered[1].as_rule() == Rule::identifier @@ -369,14 +518,17 @@ impl<'a> ParseContext<'a> { })); } + // ` ()*` — variable or field/index access. if !inner_pairs.is_empty() && inner_pairs[0].as_rule() == Rule::identifier { let id = inner_pairs[0].as_str().to_string(); + // Start with a plain identifier left-value. let mut base = Box::new(ast::LeftVal { pos, inner: ast::LeftValInner::Id(id), }); + // Apply any chained field/index suffixes. let mut i = 1; while i < inner_pairs.len() { match inner_pairs[i].as_rule() { @@ -394,6 +546,13 @@ impl<'a> ParseContext<'a> { Err(grammar_error("expr_unit", &pair_for_error)) } + /// Parses an `index_expr` node into a boxed [`ast::IndexExpr`]. + /// + /// An index expression is either a numeric literal (`arr[0]`) or an + /// identifier (`arr[i]`). Returns [`Error::Grammar`] if neither is found. + /// + /// # Arguments + /// * `pair` – the `index_expr` parse-tree node. pub(crate) fn parse_index_expr(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); for inner in pair.into_inner() { @@ -415,6 +574,14 @@ impl<'a> ParseContext<'a> { Err(grammar_error("index_expr", &pair_for_error)) } + /// Parses a `fn_call` node into a boxed [`ast::FnCall`]. + /// + /// Dispatches to either [`Self::parse_module_prefixed_call`] (for calls + /// like `module::func(...)`) or [`Self::parse_local_call`] (for calls like + /// `func(...)`). Returns [`Error::Grammar`] if neither child is found. + /// + /// # Arguments + /// * `pair` – the `fn_call` parse-tree node. pub(crate) fn parse_fn_call(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); for inner in pair.into_inner() { @@ -431,6 +598,14 @@ impl<'a> ParseContext<'a> { Err(grammar_error("fn_call", &pair_for_error)) } + /// Parses a `module_prefixed_call` node into a boxed [`ast::FnCall`]. + /// + /// A module-prefixed call has the form `mod1::mod2::func(args)`. All + /// identifier children are collected; the last one becomes the function + /// name and the rest are joined with `"::"` as the module prefix. + /// + /// # Arguments + /// * `pair` – the `module_prefixed_call` parse-tree node. fn parse_module_prefixed_call(&self, pair: Pair) -> ParseResult> { let inner_pairs: Vec<_> = pair.into_inner().collect(); let mut idents: Vec = Vec::new(); @@ -444,6 +619,7 @@ impl<'a> ParseContext<'a> { } } + // The last identifier is the function name; the rest form the module path. let name = idents.pop().unwrap_or_default(); let module_prefix = if idents.is_empty() { None @@ -458,6 +634,13 @@ impl<'a> ParseContext<'a> { })) } + /// Parses a `local_call` node into a boxed [`ast::FnCall`]. + /// + /// A local call has the form `func(args)` with no module prefix. The + /// method extracts the function name and the argument list. + /// + /// # Arguments + /// * `pair` – the `local_call` parse-tree node. fn parse_local_call(&self, pair: Pair) -> ParseResult> { let mut name = String::new(); let mut vals = Vec::new(); @@ -477,6 +660,16 @@ impl<'a> ParseContext<'a> { })) } + /// Parses a `left_val` node into a boxed [`ast::LeftVal`]. + /// + /// A left-hand-side value starts with an identifier and may be followed by + /// zero or more `expr_suffix` nodes representing field access (`.field`) or + /// array indexing (`[idx]`). + /// + /// Returns [`Error::Grammar`] if the node contains no children. + /// + /// # Arguments + /// * `pair` – the `left_val` parse-tree node. pub(crate) fn parse_left_val(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); let pos = get_pos(&pair); @@ -486,6 +679,7 @@ impl<'a> ParseContext<'a> { return Err(grammar_error("left_val", &pair_for_error)); } + // The first child is always the root identifier. let id = inner_pairs[0].as_str().to_string(); let mut base = Box::new(ast::LeftVal { @@ -493,6 +687,7 @@ impl<'a> ParseContext<'a> { inner: ast::LeftValInner::Id(id), }); + // Apply any chained field/index suffixes. let mut i = 1; while i < inner_pairs.len() { match inner_pairs[i].as_rule() { @@ -507,6 +702,20 @@ impl<'a> ParseContext<'a> { Ok(base) } + /// Applies a single `expr_suffix` to a `base` left-value, producing a new + /// [`ast::LeftVal`]. + /// + /// An `expr_suffix` is either: + /// * An array index: `[]` → [`ast::LeftValInner::ArrayExpr`]. + /// * A field access: `.` → [`ast::LeftValInner::MemberExpr`]. + /// + /// Bracket and dot tokens are skipped; only semantic children are + /// processed. If no recognised suffix token is found the `base` value is + /// returned unchanged. + /// + /// # Arguments + /// * `base` – the left-value accumulated so far. + /// * `suffix` – the `expr_suffix` parse-tree node to apply. pub(crate) fn parse_expr_suffix( &self, base: Box, @@ -516,6 +725,7 @@ impl<'a> ParseContext<'a> { for inner in suffix.into_inner() { match inner.as_rule() { + // Skip syntactic punctuation tokens. Rule::lbracket | Rule::rbracket | Rule::dot => continue, Rule::index_expr => { let idx = self.parse_index_expr(inner)?; @@ -545,18 +755,27 @@ impl<'a> ParseContext<'a> { } } +/// Converts a [`ast::LeftVal`] into the corresponding [`ast::ExprUnit`] variant. +/// +/// This free function is used when an identifier (or field/array access chain) +/// that was initially parsed as a left-value is later determined to appear on +/// the right-hand side of an expression. The conversion is infallible for the +/// three recognised [`ast::LeftValInner`] variants. fn left_val_to_expr_unit(lval: ast::LeftVal) -> ParseResult> { let pos = lval.pos; match &lval.inner { + // Plain identifier `x` → `ExprUnit::Id`. ast::LeftValInner::Id(id) => Ok(Box::new(ast::ExprUnit { pos, inner: ast::ExprUnitInner::Id(id.clone()), })), + // Array index access `arr[i]` → `ExprUnit::ArrayExpr`. ast::LeftValInner::ArrayExpr(arr_expr) => Ok(Box::new(ast::ExprUnit { pos, inner: ast::ExprUnitInner::ArrayExpr(arr_expr.clone()), })), + // Member field access `s.f` → `ExprUnit::MemberExpr`. ast::LeftValInner::MemberExpr(mem_expr) => Ok(Box::new(ast::ExprUnit { pos, inner: ast::ExprUnitInner::MemberExpr(mem_expr.clone()), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 263ea7b..1e15a9f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,3 +1,22 @@ +//! Parser module for the TeaLang compiler front-end. +//! +//! This module is responsible for transforming a raw TeaLang source string into +//! a typed Abstract Syntax Tree (AST). It uses the [pest] PEG parser generator +//! to tokenise and structurally parse the source according to the grammar +//! defined in `tealang.pest`, and then walks the resulting parse tree to build +//! the AST types defined in [`crate::ast`]. +//! +//! # Main entry points +//! * [`Parser`] – the public façade that implements [`crate::common::Generator`]. +//! * [`ParseContext`] – an internal helper that owns a single parse-and-lower +//! pass over one source string. +//! +//! Sub-modules handle different grammatical categories: +//! * `common` – shared utilities (error types, helper functions) +//! * `decl` – declaration and definition rules +//! * `expr` – expression rules +//! * `stmt` – statement rules + mod common; mod decl; mod expr; @@ -13,12 +32,23 @@ use crate::common::Generator; pub use self::common::Error; use self::common::{grammar_error_static, ParseResult, Rule, TeaLangParser}; +/// Public parser that turns a TeaLang source string into an AST. +/// +/// After construction with [`Parser::new`] you must call +/// [`Generator::generate`] before accessing the [`Parser::program`] field. pub struct Parser<'a> { + /// The raw TeaLang source text to be parsed. input: &'a str, + /// The parsed AST program, populated by [`Generator::generate`]. + /// `None` until `generate` completes successfully. pub program: Option>, } impl<'a> Parser<'a> { + /// Creates a new `Parser` for the given source string. + /// + /// The parser is not yet run; call [`Generator::generate`] to perform + /// parsing and populate [`Parser::program`]. pub fn new(input: &'a str) -> Self { Self { input, @@ -30,33 +60,55 @@ impl<'a> Parser<'a> { impl<'a> Generator for Parser<'a> { type Error = Error; + /// Runs the full parse pipeline: tokenisation → parse-tree → AST. + /// + /// On success the resulting [`ast::Program`] is stored in + /// [`Parser::program`]. Any syntax or structural error is returned as + /// [`Error`]. fn generate(&mut self) -> Result<(), Error> { let ctx = ParseContext::new(self.input); self.program = Some(ctx.parse()?); Ok(()) } + /// Writes a pretty-printed representation of the parsed program to `w`. + /// + /// Returns [`Error::Grammar`] if called before [`Generator::generate`]. fn output(&self, w: &mut W) -> Result<(), Error> { let ast = self .program .as_ref() + // Guard: generate() must be called before output(). .ok_or_else(|| grammar_error_static("output before generate"))?; write!(w, "{ast}")?; Ok(()) } } +/// Internal context that owns a single parse pass over one source string. +/// +/// `ParseContext` is constructed by [`Parser`] and carries the source slice so +/// that all parser helper methods can reference it if needed. pub(crate) struct ParseContext<'a> { #[allow(dead_code)] + /// The original source text being parsed. input: &'a str, } impl<'a> ParseContext<'a> { + /// Creates a new `ParseContext` for the given source string. fn new(input: &'a str) -> Self { Self { input } } + /// Parses the full source string into a boxed [`ast::Program`]. + /// + /// Uses [`TeaLangParser`] to produce a parse tree for the `program` rule, + /// then iterates over top-level nodes to collect `use` statements and + /// program elements (variable declarations, struct definitions, function + /// declarations and definitions). fn parse(&self) -> ParseResult> { + // Run the pest parser; convert any pest::Error into Error::Syntax. let pairs = >::parse(Rule::program, self.input) .map_err(|e| Error::Syntax(e.to_string()))?; @@ -65,6 +117,7 @@ impl<'a> ParseContext<'a> { for pair in pairs { if pair.as_rule() == Rule::program { + // Walk the top-level children of the `program` node. for inner in pair.into_inner() { match inner.as_rule() { Rule::use_stmt => { @@ -75,6 +128,7 @@ impl<'a> ParseContext<'a> { elements.push(*elem); } } + // End-of-input marker; nothing to do. Rule::EOI => {} _ => {} } diff --git a/src/parser/stmt.rs b/src/parser/stmt.rs index 4717bde..7c16c63 100644 --- a/src/parser/stmt.rs +++ b/src/parser/stmt.rs @@ -4,6 +4,24 @@ use super::ParseContext; use super::common::{ParseResult, Pair, Rule, get_pos, grammar_error}; impl<'a> ParseContext<'a> { + /// Parses a `code_block_stmt` node into a boxed [`ast::CodeBlockStmt`]. + /// + /// Dispatches to the appropriate statement parser depending on the inner + /// rule: + /// * `var_decl_stmt` → [`Self::parse_var_decl_stmt`] + /// * `assignment_stmt` → [`Self::parse_assignment_stmt`] + /// * `call_stmt` → [`Self::parse_call_stmt`] + /// * `if_stmt` → [`Self::parse_if_stmt`] + /// * `while_stmt` → [`Self::parse_while_stmt`] + /// * `return_stmt` → [`Self::parse_return_stmt`] + /// * `continue_stmt` → wraps a unit [`ast::ContinueStmt`] + /// * `break_stmt` → wraps a unit [`ast::BreakStmt`] + /// * `null_stmt` → wraps a unit [`ast::NullStmt`] + /// + /// Returns [`Error::Grammar`] if no recognisable inner rule is found. + /// + /// # Arguments + /// * `pair` – the `code_block_stmt` parse-tree node. pub(crate) fn parse_code_block_stmt(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); for inner in pair.into_inner() { @@ -40,6 +58,7 @@ impl<'a> ParseContext<'a> { inner: ast::CodeBlockStmtInner::Return(self.parse_return_stmt(inner)?), })); } + // `continue` and `break` carry no additional data. Rule::continue_stmt => { return Ok(Box::new(ast::CodeBlockStmt { inner: ast::CodeBlockStmtInner::Continue(Box::new(ast::ContinueStmt {})), @@ -50,6 +69,7 @@ impl<'a> ParseContext<'a> { inner: ast::CodeBlockStmtInner::Break(Box::new(ast::BreakStmt {})), })); } + // A null statement is a bare semicolon; nothing to parse. Rule::null_stmt => { return Ok(Box::new(ast::CodeBlockStmt { inner: ast::CodeBlockStmtInner::Null(Box::new(ast::NullStmt {})), @@ -62,6 +82,13 @@ impl<'a> ParseContext<'a> { Err(grammar_error("code_block_stmt", &pair_for_error)) } + /// Parses an `assignment_stmt` node into a boxed [`ast::AssignmentStmt`]. + /// + /// An assignment has the form `left_val = right_val;`. Both operands are + /// required; [`Error::Grammar`] is returned if either is absent. + /// + /// # Arguments + /// * `pair` – the `assignment_stmt` parse-tree node. fn parse_assignment_stmt(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); let mut left_val = None; @@ -83,6 +110,14 @@ impl<'a> ParseContext<'a> { })) } + /// Parses a `call_stmt` node into a boxed [`ast::CallStmt`]. + /// + /// A call statement is a standalone function call used for its side + /// effects: `func(args);`. Returns [`Error::Grammar`] if the expected + /// `fn_call` child is absent. + /// + /// # Arguments + /// * `pair` – the `call_stmt` parse-tree node. fn parse_call_stmt(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); for inner in pair.into_inner() { @@ -96,6 +131,13 @@ impl<'a> ParseContext<'a> { Err(grammar_error("call_stmt", &pair_for_error)) } + /// Parses a `return_stmt` node into a boxed [`ast::ReturnStmt`]. + /// + /// The return value is optional: `return;` and `return expr;` are both + /// valid. When present, the expression is parsed as a `right_val`. + /// + /// # Arguments + /// * `pair` – the `return_stmt` parse-tree node. fn parse_return_stmt(&self, pair: Pair) -> ParseResult> { let mut val = None; @@ -108,11 +150,27 @@ impl<'a> ParseContext<'a> { Ok(Box::new(ast::ReturnStmt { val })) } + /// Parses an `if_stmt` node into a boxed [`ast::IfStmt`]. + /// + /// An `if` statement has the form: + /// ```text + /// if { } [else { }] + /// ``` + /// The condition is parsed as a `bool_expr` wrapped in a `BoolUnit`. + /// Body statements are collected into `if_stmts`; once the `else` keyword + /// token is encountered subsequent `code_block_stmt` nodes are collected + /// into `else_stmts`. + /// + /// Returns [`Error::Grammar`] if no condition is found. + /// + /// # Arguments + /// * `pair` – the `if_stmt` parse-tree node. fn parse_if_stmt(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); let mut bool_unit = None; let mut if_stmts = Vec::new(); let mut else_stmts = None; + // Track whether we have passed the `else` keyword. let mut in_else = false; for inner in pair.into_inner() { @@ -120,6 +178,7 @@ impl<'a> ParseContext<'a> { Rule::bool_expr => { let pos = get_pos(&inner); let bool_expr = self.parse_bool_expr(inner)?; + // Wrap the condition expression in a BoolUnit node. bool_unit = Some(Box::new(ast::BoolUnit { pos, inner: ast::BoolUnitInner::BoolExpr(bool_expr), @@ -127,12 +186,14 @@ impl<'a> ParseContext<'a> { } Rule::code_block_stmt => { if in_else { + // Append to the else branch, creating the Vec on first use. let else_branch = else_stmts.get_or_insert_with(Vec::new); else_branch.push(*self.parse_code_block_stmt(inner)?); } else { if_stmts.push(*self.parse_code_block_stmt(inner)?); } } + // The `else` keyword marks the start of the else branch. Rule::kw_else => { in_else = true; } @@ -147,6 +208,19 @@ impl<'a> ParseContext<'a> { })) } + /// Parses a `while_stmt` node into a boxed [`ast::WhileStmt`]. + /// + /// A `while` statement has the form: + /// ```text + /// while { } + /// ``` + /// The condition is parsed as a `bool_expr` wrapped in a `BoolUnit` and + /// all body statements are collected in order. + /// + /// Returns [`Error::Grammar`] if no condition is found. + /// + /// # Arguments + /// * `pair` – the `while_stmt` parse-tree node. fn parse_while_stmt(&self, pair: Pair) -> ParseResult> { let pair_for_error = pair.clone(); let mut bool_unit = None; @@ -157,6 +231,7 @@ impl<'a> ParseContext<'a> { Rule::bool_expr => { let pos = get_pos(&inner); let bool_expr = self.parse_bool_expr(inner)?; + // Wrap the condition expression in a BoolUnit node. bool_unit = Some(Box::new(ast::BoolUnit { pos, inner: ast::BoolUnitInner::BoolExpr(bool_expr),