diff --git a/src/asm.rs b/src/asm.rs index 8baabcd..314683a 100644 --- a/src/asm.rs +++ b/src/asm.rs @@ -1,3 +1,6 @@ +//! This module provides the assembly code generation backend, +//! translating the compiler's IR into target-specific assembly. + pub mod aarch64; pub mod common; pub mod error; diff --git a/src/common.rs b/src/common.rs index 0c906e3..7860553 100644 --- a/src/common.rs +++ b/src/common.rs @@ -1,14 +1,22 @@ +//! Common utilities and shared abstractions used across the compiler, +//! including target platform detection and a generic code generator trait. + pub mod graph; use std::io::Write; +/// Represents the compilation target operating system. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum Target { + /// Compile for Linux. Linux, + /// Compile for macOS. Macos, } impl Target { + /// Detects the current host platform at compile time and returns the + /// corresponding `Target` variant. pub fn host() -> Self { if cfg!(target_os = "macos") { Target::Macos @@ -17,6 +25,11 @@ impl Target { } } + /// Applies platform-specific symbol name mangling. + /// + /// On macOS, the Mach-O ABI requires a leading underscore prefix for C + /// symbols, so this method prepends `_` to the given name. On Linux the + /// name is returned unchanged. pub fn mangle_symbol(&self, name: &str) -> String { match self { Target::Macos => format!("_{name}"), @@ -25,8 +38,16 @@ impl Target { } } +/// A generic trait for code generators. +/// +/// Implementors first call [`generate`](Generator::generate) to perform the +/// code-generation work and then call [`output`](Generator::output) to write +/// the generated result to any [`Write`] sink. pub trait Generator { type Error; + /// Performs the code-generation step, populating the generator's internal + /// state with the result. fn generate(&mut self) -> Result<(), Self::Error>; + /// Writes the previously generated output to `w`. fn output(&self, w: &mut W) -> Result<(), Self::Error>; } diff --git a/src/common/graph.rs b/src/common/graph.rs index 4950dec..27dfaa2 100644 --- a/src/common/graph.rs +++ b/src/common/graph.rs @@ -1,7 +1,30 @@ +//! Graph data structures and dataflow analysis utilities for control-flow graphs. +//! +//! This module provides: +//! - [`CfgNode`]: a trait for nodes in a control-flow graph. +//! - [`Graph`]: a directed graph with successor and predecessor adjacency lists. +//! - [`Lattice`]: a trait defining a lattice for dataflow analysis. +//! - [`BackwardLiveness`]: backward liveness analysis using a worklist algorithm. + use std::collections::{HashMap, HashSet, VecDeque}; +/// A node in a control-flow graph (CFG). +/// +/// Implementors describe how each node connects to its successors and +/// optionally expose a label so that branch targets can be resolved by name. pub trait CfgNode { + /// Returns an optional label for this node. + /// + /// When present, the label is used to build a name-to-index map so that + /// other nodes can refer to this node as a branch target by name. fn label(&self) -> Option; + + /// Computes the successor node indices for this node. + /// + /// - `idx`: the index of this node in the owning node slice. + /// - `num_nodes`: total number of nodes in the graph. + /// - `label_map`: a map from label strings to node indices, used to + /// resolve named branch targets. fn successors( &self, idx: usize, @@ -10,12 +33,20 @@ pub trait CfgNode { ) -> Vec; } +/// A directed graph represented as both successor and predecessor adjacency lists. +/// +/// Both lists are indexed by node index and are derived from the same edge set, +/// so they are always consistent with each other. pub struct Graph { succs: Vec>, preds: Vec>, } impl Graph { + /// Constructs a [`Graph`] from a pre-built successor adjacency list. + /// + /// The predecessor adjacency list is derived automatically by inverting + /// the edges of `succs`. pub fn new(succs: Vec>) -> Self { let n = succs.len(); let mut preds = vec![Vec::new(); n]; @@ -27,6 +58,11 @@ impl Graph { Self { succs, preds } } + /// Builds a [`Graph`] from a slice of [`CfgNode`] implementors. + /// + /// This method first collects all node labels into a name-to-index map, + /// then calls [`CfgNode::successors`] on each node to compute the full + /// successor adjacency list, and finally delegates to [`Graph::new`]. pub fn from_nodes(nodes: &[N]) -> Self { let n = nodes.len(); let label_map: HashMap = nodes @@ -42,33 +78,60 @@ impl Graph { Self::new(succs) } + /// Returns the total number of nodes in the graph. pub fn num_nodes(&self) -> usize { self.succs.len() } + /// Returns the successor indices of `node`. pub fn successors(&self, node: usize) -> &[usize] { &self.succs[node] } + /// Returns the predecessor indices of `node`. pub fn predecessors(&self, node: usize) -> &[usize] { &self.preds[node] } + /// Returns the full successor adjacency list. pub fn succs_vec(&self) -> &[Vec] { &self.succs } + /// Returns the full predecessor adjacency list. pub fn preds_vec(&self) -> &[Vec] { &self.preds } } +/// A lattice used as the value domain for dataflow analysis. +/// +/// Each implementor defines: +/// - a bottom element (the initial / most-conservative value), +/// - a join (least upper bound) operation for merging values at join points, +/// - a transfer function that computes the inflow from the outflow using +/// gen/kill sets. pub trait Lattice: Clone + PartialEq { + /// Returns the bottom element of the lattice (the initial dataflow value). fn bottom() -> Self; + + /// Computes the least upper bound of `self` and `other` in place (join / merge). fn join(&mut self, other: &Self); + + /// Applies the transfer function: `gen ∪ (out ∖ kill)`. + /// + /// Returns the lattice value that flows into a node given + /// - `gen`: values generated (defined / used) by the node, + /// - `kill`: values killed (overwritten) by the node, + /// - `out`: values live at the exit of the node. fn transfer(gen: &Self, kill: &Self, out: &Self) -> Self; } +/// Simple single-bit reachability lattice. +/// +/// `false` is the bottom element. `join` is logical OR. The transfer function +/// propagates liveness if the value is generated or passes through (live-out +/// and not killed). impl Lattice for bool { fn bottom() -> Self { false @@ -83,9 +146,15 @@ impl Lattice for bool { } } +/// A set of virtual-register indices, used as the liveness lattice element +/// when tracking the live set of virtual registers. #[derive(Clone, PartialEq, Eq)] pub struct VregSet(pub HashSet); +/// Set-of-virtual-registers liveness lattice. +/// +/// The bottom element is the empty set. `join` is set union. The transfer +/// function is `gen ∪ (out ∖ kill)`. impl Lattice for VregSet { fn bottom() -> Self { VregSet(HashSet::new()) @@ -106,12 +175,24 @@ impl Lattice for VregSet { } } +/// Results of backward liveness (dataflow) analysis over a [`Graph`]. +/// +/// - `live_in[i]` holds the lattice value live at the **entry** of node `i`. +/// - `live_out[i]` holds the lattice value live at the **exit** of node `i`. pub struct BackwardLiveness { + /// Lattice values live at the entry of each node. pub live_in: Vec, + /// Lattice values live at the exit of each node. pub live_out: Vec, } impl BackwardLiveness { + /// Performs backward liveness analysis using a worklist algorithm. + /// + /// The worklist is initially seeded with all nodes in reverse order so + /// that nodes near the end of the CFG are processed first. Whenever + /// `live_in[i]` changes, all predecessors of `i` are added back to the + /// worklist to propagate the change backward until a fixed point is reached. pub fn compute(gen: &[L], kill: &[L], graph: &Graph) -> Self { let n = graph.num_nodes(); diff --git a/src/ir.rs b/src/ir.rs index a36b6e5..761b1e0 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -1,3 +1,8 @@ +//! This module defines the compiler's intermediate representation (IR). +//! +//! The IR is the central data structure that bridges the front-end (parsing +//! and type-checking) and the back-end (optimization and code generation). + pub mod error; pub mod function; mod gen; diff --git a/src/opt.rs b/src/opt.rs index 5fc5e29..2b4961f 100644 --- a/src/opt.rs +++ b/src/opt.rs @@ -1,3 +1,8 @@ +//! This module provides optimization passes that transform IR functions. +//! +//! Passes implement the [`FunctionPass`] trait and are composed into a +//! [`FunctionPassManager`] pipeline that runs them in registration order. + use crate::ir::function::Function; pub mod cfg; @@ -6,26 +11,35 @@ mod mem2reg; pub use mem2reg::Mem2RegPass; +/// Interface that every function-level optimization pass must implement. pub trait FunctionPass { fn run(&self, func: &mut Function); } +/// Manages a sequential pipeline of [`FunctionPass`] instances. +/// +/// Passes are stored as boxed trait objects so that heterogeneous pass types +/// can be combined in a single pipeline. #[derive(Default)] pub struct FunctionPassManager { passes: Vec>, } impl FunctionPassManager { + /// Creates an empty pass manager with no registered passes. pub fn new() -> Self { Self::default() } + /// Creates a pass manager pre-loaded with the default optimization + /// pipeline (currently: [`Mem2RegPass`]). pub fn with_default_pipeline() -> Self { let mut pm = Self::new(); pm.add_pass(Mem2RegPass); pm } + /// Appends `pass` to the end of the optimization pipeline. pub fn add_pass

(&mut self, pass: P) where P: FunctionPass + 'static, @@ -33,6 +47,7 @@ impl FunctionPassManager { self.passes.push(Box::new(pass)); } + /// Runs all registered passes sequentially on `func`. pub fn run(&self, func: &mut Function) { for pass in &self.passes { pass.run(func); diff --git a/src/parser/mod.rs b/src/parser.rs similarity index 100% rename from src/parser/mod.rs rename to src/parser.rs