diff --git a/Cargo.lock b/Cargo.lock index 77020a1..c2c0629 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,10 +1,10 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "gregex" -version = "0.7.2" +version = "0.8.0" dependencies = [ "gregex-logic", "gregex-macros", @@ -12,11 +12,11 @@ dependencies = [ [[package]] name = "gregex-logic" -version = "0.1.1" +version = "0.2.0" [[package]] name = "gregex-macros" -version = "0.1.1" +version = "0.2.0" dependencies = [ "gregex-logic", "proc-macro2", diff --git a/Cargo.toml b/Cargo.toml index 14993f2..b224dd8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "gregex" -version = "0.7.2" +version = "0.8.0" edition = "2021" authors = ["Saphereye "] license = "MIT" -description = "Regex solver utilizing NFA" +description = "Regex solver with string parsing support utilizing NFA" keywords = ["regex", "nfa", "automata"] categories = ["text-processing"] documentation = "https://docs.rs/gregex" @@ -26,5 +26,5 @@ members = [ ] [dependencies] -gregex-macros = { path = "gregex-macros", version = "0.1.0" } -gregex-logic = { path = "gregex-logic", version = "0.1.0" } +gregex-macros = { path = "gregex-macros", version = "0.2.0" } +gregex-logic = { path = "gregex-logic", version = "0.2.0" } diff --git a/README.md b/README.md index b86c473..b98a7cc 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,179 @@ -# Gregex ![crates.io](https://img.shields.io/crates/v/gregex.svg) ![Build Passing](https://github.com/Saphereye/gregex/actions/workflows/ci.yml/badge.svg) +# Gregex ![crates.io](https://img.shields.io/crates/v/gregex.svg) ![Build Passing] -![](https://github.com/Saphereye/gregex/raw/master/assets/gregex_workflow.excalidraw.svg) +Gregex is a powerful regular expression library that compiles regex patterns to Non-deterministic Finite Automata (NFA) at compile-time using Glushkov's construction algorithm. Write regex patterns as strings and let Rust's procedural macros do the rest! -Gregex is a regular expression solver which utilizes Non-deterministic Finite Automata (NFA) to simulate the input strings. \ No newline at end of file +## Features + +- **String-based regex parsing**: Write natural regex syntax like `regex!("(a|b)+")` +- **Compile-time construction**: Zero runtime regex parsing overhead +- **Type-safe**: Leverages Rust's procedural macros for safety +- **NFA-based matching**: Uses Glushkov's construction for efficient matching +- **Rich operator support**: `*`, `+`, `?`, `|`, concatenation, and grouping + +## Quick Start + +Add gregex to your `Cargo.toml`: + +```bash +cargo add --git https://github.com/Saphereye/gregex +``` + +### Simple Example + +```rust +use gregex::*; + +fn main() { + // Natural regex syntax - parsed at compile time! + let pattern = regex!("(a|b)+c"); + + // Use standard regex API methods + assert!(pattern.is_match("abc")); // Find pattern anywhere + assert!(pattern.is_match("prefix_abc_suffix")); + assert_eq!(pattern.find("xabcy"), Some((1, 4))); // Get match position +} +``` + +## API Methods + +Gregex provides a standard regex API similar to Rust's `regex` crate: + +| Method | Description | Example | +|--------|-------------|---------| +| `is_match(text)` | Check if pattern exists in text | `pattern.is_match("hello")` | +| `find(text)` | Get first match position | `pattern.find("text")` → `Some((start, end))` | +| `find_iter(text)` | Iterator over all matches | `pattern.find_iter("text").collect()` | + +## Regex Syntax Reference + +When using string-based syntax with `regex!("...")`, the following operators are supported: + +| Syntax | Description | Example | Matches | +|--------|-------------|---------|---------| +| `a`, `b`, `c` | Literal characters | `regex!("abc")` | "abc" | +| `ab` | Concatenation (implicit) | `regex!("hello")` | "hello" | +| `a\|b` | Alternation (OR) | `regex!("a\|b")` | "a" or "b" | +| `a*` | Kleene star (zero or more) | `regex!("a*")` | "", "a", "aa", ... | +| `a+` | Plus (one or more) | `regex!("a+")` | "a", "aa", "aaa", ... | +| `a?` | Question (zero or one) | `regex!("a?")` | "" or "a" | +| `(...)` | Grouping for precedence | `regex!("(ab)+")` | "ab", "abab", ... | + +### Wildcard Patterns + +**Note**: The `.` wildcard (match any character) and `.*` patterns are not currently supported in the parser. However: +- Use `(a|b|c)*` to match specific character sets with repetition +- Use alternation `(a|b|c)+` for one-or-more of specific characters +- The `is_match()` method finds patterns anywhere in text, so `pattern.is_match()` behaves similarly to `.*pattern.*` in standard regex + +**Future Enhancement**: Full wildcard support (`.` and `\w`, `\d`, etc.) is planned for a future version. + + +## Usage Examples + +### 1. String-Based Syntax (Recommended) + +The most natural and recommended way to use Gregex: + +```rust +use gregex::*; + +// Simple patterns with new API +let pattern = regex!("a+@b+"); +assert!(pattern.is_match("aaa@bbb")); +assert!(pattern.is_match("prefix_aa@bb_suffix")); + +// Complex patterns with operators +let identifier = regex!("(a|b)(a|b|c)*"); +assert!(identifier.is_match("abc")); +assert!(identifier.is_match("bca")); + +// Find match positions +let pattern = regex!("a+b?c*"); +if let Some((start, end)) = pattern.find("xyzaabccxyz") { + println!("Found match from {} to {}", start, end); +} + +// Nested grouping +let nested = regex!("((a|b)+c)*"); +assert!(nested.is_match("acbc")); +``` + +## Examples + +Run the included examples to see gregex in action: + +### Basic Operator Examples + +These examples demonstrate individual regex operators: + +```bash +# Basic concatenation (matching "abc") +cargo run --example 01_basic_concatenation + +# Alternation/OR operator (a|b|c) +cargo run --example 02_alternation + +# Kleene star - zero or more (a*) +cargo run --example 03_kleene_star + +# Plus operator - one or more (a+) +cargo run --example 04_plus_operator + +# Question operator - zero or one (a?) +cargo run --example 05_question_operator + +# Grouping and operator precedence +cargo run --example 06_grouping_and_precedence +``` + +### Advanced Examples + +```bash +# Complete API methods demonstration +cargo run --example 07_api_methods + +# Compile-time NFA construction verification +cargo run --example 08_compile_time_construction +``` + +### Use Case Examples + +Real-world applications demonstrating practical pattern matching: + +```bash +# Validate programming identifiers +cargo run --example usecase_identifier_validator + +# Match URL-like paths +cargo run --example usecase_simple_url_matcher + +# Search for patterns in text +cargo run --example usecase_text_search +``` + +## How It Works + +Gregex uses Glushkov's construction algorithm to convert regular expressions into NFAs: + +1. **Linearization**: Each symbol in the regex is assigned a unique index +2. **Set Construction**: Computes prefix, suffix, factors, and nullability sets +3. **NFA Generation**: Constructs the NFA based on these sets +4. **Simulation**: Runs the input string through the NFA to determine if it matches + +This approach generates NFAs with states equal to the number of terminals plus one, making it efficient for pattern matching. + +## Testing + +Run the comprehensive test suite: + +```bash +cargo test --all +``` + +## License + +MIT License - see [LICENSE](LICENSE) for details. + +## Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. diff --git a/assets/gregex_workflow.excalidraw b/assets/gregex_workflow.excalidraw deleted file mode 100644 index 745224a..0000000 --- a/assets/gregex_workflow.excalidraw +++ /dev/null @@ -1,1142 +0,0 @@ -{ - "type": "excalidraw", - "version": 2, - "source": "https://excalidraw.com", - "elements": [ - { - "id": "GZj82kT2asUiu0t80ChHM", - "type": "text", - "x": 623.4000244140625, - "y": 180.00003051757812, - "width": 158.56471252441406, - "height": 35, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": null, - "seed": 981382413, - "version": 30, - "versionNonce": 2031676099, - "isDeleted": false, - "boundElements": null, - "updated": 1712484454616, - "link": null, - "locked": false, - "text": "Regex Tree", - "fontSize": 28, - "fontFamily": 1, - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "Regex Tree", - "lineHeight": 1.25 - }, - { - "id": "QESpKNQr9ng6FQ4tNj-h_", - "type": "text", - "x": 644.2000122070312, - "y": 230.00003051757812, - "width": 123.83987426757812, - "height": 25, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": null, - "seed": 1138088493, - "version": 38, - "versionNonce": 1901221613, - "isDeleted": false, - "boundElements": [ - { - "id": "_5eg5n2PbPBFx8JbjRhF1", - "type": "arrow" - }, - { - "id": "oqsRwRUUG6zZidc0alp-l", - "type": "arrow" - } - ], - "updated": 1712484454616, - "link": null, - "locked": false, - "text": "Concatenate", - "fontSize": 20, - "fontFamily": 1, - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "Concatenate", - "lineHeight": 1.25 - }, - { - "id": "EieRr8Q7-zomo7wqYBq3e", - "type": "text", - "x": 625.8000183105469, - "y": 322.60003662109375, - "width": 13.339981079101562, - "height": 25, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": null, - "seed": 1070670285, - "version": 93, - "versionNonce": 1154783843, - "isDeleted": false, - "boundElements": [ - { - "id": "_5eg5n2PbPBFx8JbjRhF1", - "type": "arrow" - } - ], - "updated": 1712484454616, - "link": null, - "locked": false, - "text": "a", - "fontSize": 20, - "fontFamily": 1, - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "a", - "lineHeight": 1.25 - }, - { - "id": "vWEGWD0CjZR02shsD_KHS", - "type": "text", - "x": 725, - "y": 315.4000549316406, - "width": 23.119979858398438, - "height": 25, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": null, - "seed": 182647021, - "version": 71, - "versionNonce": 337834317, - "isDeleted": false, - "boundElements": [ - { - "id": "oqsRwRUUG6zZidc0alp-l", - "type": "arrow" - }, - { - "id": "TXL5z0VODDsjJ-J-NGBxU", - "type": "arrow" - }, - { - "id": "FFrHFleeQjkecE5zoBi3l", - "type": "arrow" - } - ], - "updated": 1712484454616, - "link": null, - "locked": false, - "text": "Or", - "fontSize": 20, - "fontFamily": 1, - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "Or", - "lineHeight": 1.25 - }, - { - "id": "72-txAT2ozqNfNf9Lyrye", - "type": "text", - "x": 672.2000732421875, - "y": 392.20001220703125, - "width": 10.159988403320312, - "height": 25, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": null, - "seed": 1691504355, - "version": 47, - "versionNonce": 1756976643, - "isDeleted": false, - "boundElements": [ - { - "id": "TXL5z0VODDsjJ-J-NGBxU", - "type": "arrow" - } - ], - "updated": 1712484454616, - "link": null, - "locked": false, - "text": "b", - "fontSize": 20, - "fontFamily": 1, - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "b", - "lineHeight": 1.25 - }, - { - "id": "WjBbC_LufJOhCF465wWzn", - "type": "text", - "x": 775.4000244140625, - "y": 391.4000244140625, - "width": 10.039993286132812, - "height": 25, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": null, - "seed": 274462147, - "version": 110, - "versionNonce": 447823789, - "isDeleted": false, - "boundElements": [ - { - "id": "FFrHFleeQjkecE5zoBi3l", - "type": "arrow" - } - ], - "updated": 1712484454616, - "link": null, - "locked": false, - "text": "c", - "fontSize": 20, - "fontFamily": 1, - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "c", - "lineHeight": 1.25 - }, - { - "id": "_5eg5n2PbPBFx8JbjRhF1", - "type": "arrow", - "x": 690.1806042620947, - "y": 263.6397171850583, - "width": 52.618325499016805, - "height": 50.960319436035434, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 2 - }, - "seed": 1751385869, - "version": 251, - "versionNonce": 331718051, - "isDeleted": false, - "boundElements": null, - "updated": 1712484454616, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - -52.618325499016805, - 50.960319436035434 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "QESpKNQr9ng6FQ4tNj-h_", - "focus": -0.07869025360065661, - "gap": 8.639686667480191 - }, - "endBinding": { - "elementId": "EieRr8Q7-zomo7wqYBq3e", - "focus": -0.8211147968512266, - "gap": 8 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - }, - { - "id": "oqsRwRUUG6zZidc0alp-l", - "type": "arrow", - "x": 714.5594588506367, - "y": 265.0000305175781, - "width": 20.983490715012977, - "height": 43.20001220703125, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 2 - }, - "seed": 1574430243, - "version": 182, - "versionNonce": 723960333, - "isDeleted": false, - "boundElements": null, - "updated": 1712484454616, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 20.983490715012977, - 43.20001220703125 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "QESpKNQr9ng6FQ4tNj-h_", - "focus": 0.038439335813854586, - "gap": 10 - }, - "endBinding": { - "elementId": "vWEGWD0CjZR02shsD_KHS", - "focus": 0.48502819696552024, - "gap": 7.20001220703125 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - }, - { - "id": "TXL5z0VODDsjJ-J-NGBxU", - "type": "arrow", - "x": 718.6000366210938, - "y": 335.86928240796584, - "width": 31.859504543316802, - "height": 52.842004336219134, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 2 - }, - "seed": 1920612813, - "version": 212, - "versionNonce": 287830339, - "isDeleted": false, - "boundElements": null, - "updated": 1712484454616, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - -31.859504543316802, - 52.842004336219134 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "vWEGWD0CjZR02shsD_KHS", - "focus": 0.7011839162345875, - "gap": 6.39996337890625 - }, - "endBinding": { - "elementId": "72-txAT2ozqNfNf9Lyrye", - "focus": -0.014224134164734326, - "gap": 5.5999755859375 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - }, - { - "id": "FFrHFleeQjkecE5zoBi3l", - "type": "arrow", - "x": 751.8940080851319, - "y": 341.9503371348894, - "width": 17.60252110966303, - "height": 46.978036677514126, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 2 - }, - "seed": 1868692611, - "version": 360, - "versionNonce": 1786574957, - "isDeleted": false, - "boundElements": null, - "updated": 1712484454616, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 17.60252110966303, - 46.978036677514126 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "vWEGWD0CjZR02shsD_KHS", - "focus": -0.610115094157301, - "gap": 4.0800323486328125 - }, - "endBinding": { - "elementId": "WjBbC_LufJOhCF465wWzn", - "focus": -0.5475909338050172, - "gap": 6.4000244140625 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - }, - { - "id": "URsxN157LZbUsdGVUoqAr", - "type": "rectangle", - "x": 588.2000122070312, - "y": 165, - "width": 240.79998779296875, - "height": 262.4000244140625, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 3 - }, - "seed": 1383800323, - "version": 63, - "versionNonce": 1486404739, - "isDeleted": false, - "boundElements": [ - { - "id": "zp2RqKSLAj6Evx6WoH1xq", - "type": "arrow" - }, - { - "id": "sol0pfTlgeqFGNOvOihIi", - "type": "arrow" - }, - { - "id": "isTU6idSsTM3JYs1P12s-", - "type": "arrow" - } - ], - "updated": 1712484454617, - "link": null, - "locked": false - }, - { - "id": "YBK8bKHhO72MD9cFd-DAA", - "type": "text", - "x": 944.4000244140625, - "y": 139.4000244140625, - "width": 143.50062561035156, - "height": 35, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": null, - "seed": 720065507, - "version": 54, - "versionNonce": 666992941, - "isDeleted": false, - "boundElements": [ - { - "id": "zp2RqKSLAj6Evx6WoH1xq", - "type": "arrow" - }, - { - "id": "rEswf5PYKCQnx3dsaEOoW", - "type": "arrow" - } - ], - "updated": 1712484454617, - "link": null, - "locked": false, - "text": "Prefix Set", - "fontSize": 28, - "fontFamily": 1, - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "Prefix Set", - "lineHeight": 1.25 - }, - { - "id": "DnskmjEi_Tu2xv-m36K4o", - "type": "text", - "x": 935.6000366210938, - "y": 289, - "width": 144.17263793945312, - "height": 35, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": null, - "seed": 971568397, - "version": 69, - "versionNonce": 1341404195, - "isDeleted": false, - "boundElements": [ - { - "id": "sol0pfTlgeqFGNOvOihIi", - "type": "arrow" - }, - { - "id": "jxqamYYNxtliWgiZ02kju", - "type": "arrow" - } - ], - "updated": 1712484454617, - "link": null, - "locked": false, - "text": "Suffix Set", - "fontSize": 28, - "fontFamily": 1, - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "Suffix Set", - "lineHeight": 1.25 - }, - { - "id": "fVmMN8gRtg-DpshmJ48mO", - "type": "text", - "x": 944.4000244140625, - "y": 431.3999938964844, - "width": 169.5407257080078, - "height": 35, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": null, - "seed": 1386164973, - "version": 53, - "versionNonce": 675264451, - "isDeleted": false, - "boundElements": [ - { - "id": "isTU6idSsTM3JYs1P12s-", - "type": "arrow" - }, - { - "id": "ZPdPbFI4zbv5cVA4X6nm-", - "type": "arrow" - } - ], - "updated": 1712484454617, - "link": null, - "locked": false, - "text": "Factors Set", - "fontSize": 28, - "fontFamily": 1, - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "Factors Set", - "lineHeight": 1.25 - }, - { - "id": "zp2RqKSLAj6Evx6WoH1xq", - "type": "arrow", - "x": 841.2000122070312, - "y": 224.21751173989082, - "width": 88.79998779296875, - "height": 54.2121923244481, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 2 - }, - "seed": 1536044419, - "version": 124, - "versionNonce": 1755216589, - "isDeleted": false, - "boundElements": null, - "updated": 1712484474095, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 37.600006103515625, - -34.017499532859574 - ], - [ - 88.79998779296875, - -54.2121923244481 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "URsxN157LZbUsdGVUoqAr", - "focus": 0.19982418422334228, - "gap": 12.20001220703125 - }, - "endBinding": { - "elementId": "YBK8bKHhO72MD9cFd-DAA", - "focus": 0.4557782649209433, - "gap": 14.4000244140625 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - }, - { - "id": "sol0pfTlgeqFGNOvOihIi", - "type": "arrow", - "x": 840.3999938964844, - "y": 309.728389525415, - "width": 87.20004272460938, - "height": 1.67282164579575, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 2 - }, - "seed": 1084588429, - "version": 133, - "versionNonce": 1603913571, - "isDeleted": false, - "boundElements": null, - "updated": 1712484454617, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 87.20004272460938, - -1.67282164579575 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "URsxN157LZbUsdGVUoqAr", - "focus": 0.13042899947023762, - "gap": 11.399993896484375 - }, - "endBinding": { - "elementId": "DnskmjEi_Tu2xv-m36K4o", - "focus": -0.0010175328324035834, - "gap": 8 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - }, - { - "id": "isTU6idSsTM3JYs1P12s-", - "type": "arrow", - "x": 842, - "y": 387.34740449809357, - "width": 88.79998779296875, - "height": 55.03184703730051, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 2 - }, - "seed": 720881923, - "version": 121, - "versionNonce": 721163885, - "isDeleted": false, - "boundElements": null, - "updated": 1712484478196, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 41.600006103515625, - 34.85260770893768 - ], - [ - 88.79998779296875, - 55.03184703730051 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "URsxN157LZbUsdGVUoqAr", - "focus": -0.08883261121053691, - "gap": 13 - }, - "endBinding": { - "elementId": "fVmMN8gRtg-DpshmJ48mO", - "focus": -0.6612237536534992, - "gap": 13.60003662109375 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - }, - { - "id": "rEswf5PYKCQnx3dsaEOoW", - "type": "arrow", - "x": 1097.2000122070312, - "y": 154.7938167354612, - "width": 86.3131126625126, - "height": 80.80618936805442, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 2 - }, - "seed": 487868973, - "version": 316, - "versionNonce": 840374147, - "isDeleted": false, - "boundElements": null, - "updated": 1712484468148, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 49.5999755859375, - 36.206183264538794 - ], - [ - 86.3131126625126, - 80.80618936805442 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "YBK8bKHhO72MD9cFd-DAA", - "focus": -0.8768432246990739, - "gap": 9.299362182617188 - }, - "endBinding": { - "elementId": "qgJaN3l2QBUZQV8CINHND", - "focus": -0.5048316538607037, - "gap": 10.4000244140625 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - }, - { - "id": "jxqamYYNxtliWgiZ02kju", - "type": "arrow", - "x": 1090.800048828125, - "y": 304.5897468782065, - "width": 70.4000244140625, - "height": 11.271910062989718, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 2 - }, - "seed": 651580099, - "version": 358, - "versionNonce": 1697429133, - "isDeleted": false, - "boundElements": null, - "updated": 1712484455383, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 70.4000244140625, - -11.271910062989718 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "DnskmjEi_Tu2xv-m36K4o", - "focus": 0.3924422700467998, - "gap": 11.027374267578125 - }, - "endBinding": { - "elementId": "qgJaN3l2QBUZQV8CINHND", - "focus": 0.301602152905241, - "gap": 11.20001220703125 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - }, - { - "id": "qgJaN3l2QBUZQV8CINHND", - "type": "rectangle", - "x": 1172.4000854492188, - "y": 246.00003051757812, - "width": 289.59997558593744, - "height": 84.00000000000001, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 3 - }, - "seed": 1854647469, - "version": 152, - "versionNonce": 48899341, - "isDeleted": false, - "boundElements": [ - { - "id": "ZPdPbFI4zbv5cVA4X6nm-", - "type": "arrow" - }, - { - "id": "rEswf5PYKCQnx3dsaEOoW", - "type": "arrow" - }, - { - "id": "jxqamYYNxtliWgiZ02kju", - "type": "arrow" - }, - { - "id": "zbY-UsYai9od8tA8OF5c2", - "type": "arrow" - } - ], - "updated": 1712484454617, - "link": null, - "locked": false - }, - { - "id": "ZPdPbFI4zbv5cVA4X6nm-", - "type": "arrow", - "x": 1125.8867237211055, - "y": 429.7509874108995, - "width": 62.438479877624786, - "height": 79.34005637115558, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 2 - }, - "seed": 354987341, - "version": 357, - "versionNonce": 1835454509, - "isDeleted": false, - "boundElements": null, - "updated": 1712484463748, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 36.913264071863296, - -36.350962996836984 - ], - [ - 62.438479877624786, - -79.34005637115558 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "fVmMN8gRtg-DpshmJ48mO", - "focus": 0.7535626882847319, - "gap": 11.945973599035142 - }, - "endBinding": { - "elementId": "qgJaN3l2QBUZQV8CINHND", - "focus": 0.5409383874637134, - "gap": 20.410900522165775 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - }, - { - "id": "ElYjuZj2zVXFaYjWp0_kz", - "type": "text", - "x": 1203.5999755859375, - "y": 269.99993896484375, - "width": 232.45700073242188, - "height": 35, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": null, - "seed": 1730880195, - "version": 87, - "versionNonce": 1048935907, - "isDeleted": false, - "boundElements": null, - "updated": 1712484454617, - "link": null, - "locked": false, - "text": "Glushkow Method", - "fontSize": 28, - "fontFamily": 1, - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "Glushkow Method", - "lineHeight": 1.25 - }, - { - "id": "mzOaAa6_ZVbp9tEJcMUz7", - "type": "text", - "x": 1548.4000244140625, - "y": 268.6000061035156, - "width": 52.47221374511719, - "height": 35, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": null, - "seed": 1021726051, - "version": 56, - "versionNonce": 1963197901, - "isDeleted": false, - "boundElements": [ - { - "id": "zbY-UsYai9od8tA8OF5c2", - "type": "arrow" - } - ], - "updated": 1712484454617, - "link": null, - "locked": false, - "text": "NFA", - "fontSize": 28, - "fontFamily": 1, - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "NFA", - "lineHeight": 1.25 - }, - { - "id": "zbY-UsYai9od8tA8OF5c2", - "type": "arrow", - "x": 1471.5999755859375, - "y": 288.91200303673446, - "width": 66.40008544921898, - "height": 1.0549676496291909, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 2 - }, - "seed": 840588845, - "version": 197, - "versionNonce": 1367460227, - "isDeleted": false, - "boundElements": null, - "updated": 1712484454617, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 66.40008544921898, - -1.0549676496291909 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "qgJaN3l2QBUZQV8CINHND", - "focus": 0.06286690559529075, - "gap": 9.59991455078125 - }, - "endBinding": { - "elementId": "mzOaAa6_ZVbp9tEJcMUz7", - "focus": -0.06557817457998676, - "gap": 10.39996337890625 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - } - ], - "appState": { - "gridSize": null, - "viewBackgroundColor": "#ffffff" - }, - "files": {} -} \ No newline at end of file diff --git a/assets/gregex_workflow.excalidraw.svg b/assets/gregex_workflow.excalidraw.svg deleted file mode 100644 index 50386af..0000000 --- a/assets/gregex_workflow.excalidraw.svg +++ /dev/null @@ -1,21 +0,0 @@ - - - eyJ2ZXJzaW9uIjoiMSIsImVuY29kaW5nIjoiYnN0cmluZyIsImNvbXByZXNzZWQiOnRydWUsImVuY29kZWQiOiJ4nOVdW1fbOFx1MDAxN33vr2Axr41H1yNp3rilLW25U1x1MDAwNr41q8skJjGEJMSmpMzqf58jXHUwMDEzYjuOjVx1MDAwMVx1MDAwN0y/dFxyLZZjydLZW/tcXOz5993S0nL4c+gt/7W07I1bbs9vj9yb5ff2+Fx1MDAwZm9cdTAwMTT4gz42sej3YHA9akVndsNwXHUwMDE4/PXnn/E3nNbg8u5bXs+79PphgOf9XHUwMDBmf19a+jf6megn9MZhdG50NO6Fk9mDW4N+1CEjnIJcdTAwMDJizPRcZj9Yx55Cr43NZ24v8OJcdTAwMTZ7aPnDyblmXHUwMDE3XHUwMDA3zFxyXHUwMDBl/WtcdTAwMTJqstb9+DXu9czv9fbDn71oQMFcdTAwMDBvXCJuXHUwMDBiwtHgwjvy22H3/t5cdTAwMTPH8741XHUwMDFhXFx3un0vsDdOp0dcdTAwMDdDt+WHP+0xXHUwMDEy357b70TXiI+M8Tdg3Fx1MDAxMYRcdTAwMTAmXHUwMDA0XHUwMDE1XHUwMDA0mJy2Rt/XxMFGwomkSipNZ1x1MDAwN7Y26FxyRnZgf1DP/omHduq2Ljo4vn57ek44cvvB0Fx1MDAxZOFKxefdTG6ZSu1IXHUwMDEwijI5XHUwMDE5y/SUrud3uqFdrHh0gVx1MDAxN62C0ZRrJiifNtg+h5/akSn8XHUwMDEzT/3IvfQ+2W/0r3u95Pz125P5SzWc2oaNhFXFl7pcdTAwMWW23TtcdTAwMTOgOFihhZBcdTAwMDJoPNqe37+YvVxcb9C6mGM1Z4N+uO/fRtamU0eb7qXf+5la1ciCcVx1MDAxNve8jjdeOlx1MDAxOHmJybZtKz2/Yy16ueedpU099Fx1MDAxMTHT5nAwjFtb2Jfr971Rdm5cdTAwMDYjv+P33d5BQb94r97H+9WhXHUwMDBlk1HDr/ePxKCePXiPQWpcYmWMQmJ9XHUwMDFmwuDuxv7w89buyPQ70NxcdTAwMTXh1nmj+73mXHUwMDE4XHUwMDE0wmGIMrxVopB14p7t91x1MDAxOX85XGZcIlx1MDAxOWhutFx1MDAxMlxmZrqaYpBlMEhcdTAwMTGCRGthXHUwMDE2XHUwMDBlwklDbFqJdf8uvY7ss53TndXmWG+enu91m3R6iyk7dEejwc3ytOXX+6LrXHUwMDBlroK9m73Dw1x1MDAwZnB74rdbxO1cclx1MDAxYr1y1538a+HUQVJHXHUwMDBiqGNccmGFXHUwMDAz6ON/L8tcdTAwMWRzO66GPFx1MDAxMlY3S1x1MDAxZVRcbqW5XHUwMDE25cljw/f2RnpXNW5cdTAwMDeXXHUwMDAzdXN1vHrFvZqTXHUwMDA3k4625KE5JWhOsVqx3+eMOWDJXHUwMDAzgFFiuJIzXHUwMDAzq5A8uMO5wVxymShDXHSVUI48kPFAXHUwMDExpuOWt0VcdTAwMWW1XHUwMDAzufuy0HarXHUwMDA2tKKzXHUwMDA371x1MDAwMc05wllwqkrj+cfRxoejdbJ2frJHWNBccta/f/64X288q1x1MDAxOVx0zqmMXHUwMDA0usT9lUJSXHUwMDE2V1xyYNz8KTVGXHUwMDE5La1cYlx1MDAxMFx0WVZcdTAwMDRgzVC1XHUwMDEzXHUwMDE23+0r4PdcdTAwMTmbdOHmf/D3XHUwMDE3eUu+ba+vXHUwMDA355uNzcbWh9XxYVx1MDAwNddtNkdcdTAwMWabPc/bPb/wWlx1MDAxYvJ2sOrztyoqtkcvSzjJ/qphXHUwMDFjoWZcdTAwMGZOJYSSYFx1MDAxNMAjJIRijXC8csBcdTAwMDa3V1tnW2fmy8/Rz7pLXGLFXCL/Q3EmXHUwMDE41WqGgFxmS3snXHUwMDBilFx1MDAxMMShXHUwMDEyJYRcdTAwMTaEc5byhIpcdTAwMThcYlxmlURw+apcdTAwMTLiXHUwMDE5VFE7SJ++LKJPq1x1MDAwNjSluVE9gS5cdTAwMDFDXHUwMDFkUT6od3S+err2/cv12eZ2d60pQN5cdTAwMWPd9utccmilZEFQj1x1MDAxYprTulx1MDAwMDxcdTAwMTN0XHRcZmdcdTAwMWEo/iiHZ6aEQG8lwcqvXHUwMDAw599ph269LJxbT4Xz3UTOwTOTXHUwMDA1Plx1MDAwMc6pJolcdTAwMTNcdTAwMWXCc7GvV0s8g8GNUVx1MDAxMyCCXHUwMDAxIyaBjChAXGLcXHUwMDAxbpSdXGJJULsvXGbPkjlANWfoi1x1MDAxOEJcdTAwMDGnPYtnSVx1MDAxY1x1MDAwM7h1XHUwMDFiwfEvKbjI7NdKUo4+RlwiVPFkgMfIvbdcIjY58iu+y1x1MDAxN4nlXHUwMDA3oTtcblf9ftvvd9JcdTAwMDObJKc+lVxiT0dYbl3bUTaQO1x1MDAxNc5cdTAwMTFhXHUwMDEyJ5KAXHUwMDA0oInTOu5cdTAwMTBP0nbhQVx1MDAwM4BcdTAwMTKaUEMzN+/121x1MDAwZlx1MDAwZqo47JVcdTAwMWWUZpRcIjdjr1x1MDAxMlx1MDAxNSFAdkyZMfTcIFxcXHUwMDFiXFxe+iHO9c7A74ezc1x1MDAxYU3eioV/13MzXHUwMDBijveQbJvliaG9YprQ438txVCKfpn++5/3c89u5Nu4/WStO77gu+Tfj+Y49KPzOE4xbjvl5Z2QYn+4llx1MDAxY6eocKQ0QmokMeAwy3EynVx1MDAwNFlcdTAwMTjHMVxcYc2FIVxunVxuyoyKXHUwMDA3MuU4wXM9olx0xUnUMFx1MDAxY1x1MDAxNVZcdTAwMDUpkd+X4qw61IJcdTAwMWLOpU3jSlxc+yydUJK521KcVlx1MDAxY/pLjUKgxaE4NWBASoarllx1MDAxOYSaWe83TXH5XHUwMDE2bj9cdTAwMTnbrorgXHUwMDE4zSU4ppXmSHDlnbJib7umXHUwMDA0p1OpmDi6XHUwMDFhOWVcXDq42zPNXHUwMDA0wc1VajEzsOpcdTAwMTiOU0dLI1x0XHUwMDEyXHUwMDAxqmfc4eZ4ZbhcdTAwMGJqgXaAZ+BoXHKdo+JcZiNAXHUwMDExNv+PXHUwMDE091xiclGEopwwXHUwMDE0XHUwMDE4R35TXHQ1MeFcdTAwMTZwrItcZja5Yaw3/jS6K1x1MDAwZTvO6EoqmK2ZXHUwMDExXHUwMDE0hOKCsyzrSkfioJSUaClcXL1tvmvkXHUwMDFivP1kTb0qxkOJmMd4XHUwMDE0pT1qXHUwMDA0I8vnsoojXHUwMDEy9aQ8iTNvXHUwMDA0IZqgt4CqOU15gjq4Kmj4OOdcdTAwMWHPW1x1MDAxOONRhczLJHouxFx1MDAwMOBOM0fSgWOUtuSscNSCskSe7T7RXHUwMDA1lqCBVpDp+n1cdFx1MDAwZulcdTAwMDUoUp4kRqBcbuYk67Rcblx1MDAwN+2BcORDXHJRUPCJlFdcdTAwMWOYTY9JXG4lXHJuulx1MDAxY/05Qlx1MDAxNctcZlxu0iHRN014ueZuP1lDfyTdjbxWeFx1MDAwN/c5lFx1MDAwN/nVOLjaguB+U17kXHUwMDFk7lx1MDAwNeMtNKIvJ6eHQfvDt8PB1cqo3oyH3mtBKVx1MDAxZoXFxdpRNzpcbvdsrZRhXHUwMDA2klm8ONZcdTAwMGUsL/I/4Tiu+Vx1MDAxZDZcdTAwMTfBcfxhjpt+Z05I/nbI9q4+739ZOYeNXHUwMDFmYzhcdTAwMWF8pOOrXG6S8WhcdTAwMDRkeHbQ63hXzVx1MDAwZlvbP7b97ie/guv6wcEh+O394OAr3zxcdTAwMGXoXHUwMDBlZUGj0lx1MDAxNEIsXHUwMDFlXHUwMDFlZvxCTOcl0qTIgzNcdTAwMDBcdTAwMTi0M1E+7n68+lmffv7Y3Vbs67ppNduN9ZWVeqPZXGJRVFx1MDAxY8/NS+XR0CmXxHaBeyuXVJaqjVfMho0ledU82qJAO9pcYm7O5M7x57Xd/pi3XHUwMDAzd2N7cPRq4Irn8ilcdTAwMTX9OyPvzFx1MDAxZi/te4mlf4lE3bx+q0nAJ1I7szKAW7BcYmpic32IONb7wcXl+Yb//eCajX80Ljl8XHUwMDE2g5pcdTAwMTNcdTAwMDeXXHUwMDA1sVx1MDAxZZaoPqieKoSD+lx1MDAxNrgy3FxiObeEZs5jNFxuOUVz86pUsah9+Hx85V5cdTAwMWVcdTAwMWZvjcOef9TxT1xiuzi/fqNUsX999ipUMa/faqhC5npcZqAkXHUwMDAznODyXHUwMDEy4+zb5dct3dlcdTAwMGI7jfVh0L3cXHUwMDE0+nK75kxRKDFcdTAwMDSnUXRcdTAwMTBcdTAwMTnEgLW2xfFcdTAwMDZcdTAwMTh0koliUlnvXFzNqf3N0lx1MDAwNrpcdTAwMGJAQVx1MDAxOPWqT/48Q2dcdTAwMTfyxslOe+e0+Uncnv6QrW8r4m/oX76efn9cdTAwMWVvNN1WOFx1MDAxOFx1MDAwNS9PXHUwMDFjczuuqCyIslxc94QqKVx1MDAxObrbj6jzK1aqtSRcdTAwMGYtaNGDg0w4zFx1MDAxNttQxY02RC/uwUGtXHUwMDFmjD1IO1x1MDAxOEZccuNIdVwimcC/T5lzINhC61hcdTAwMTWkXHUwMDA0SVxi1urDq8WRrqVkPoniNDNBtWCMc/yhXHUwMDEzZ00y5rP12pl7L1x1MDAxNV4t9tdTY1x1MDAxMlIqZZ9cdTAwMDMxtjqN8+yQxG9cdTAwMTRe5SrS9uTeXHUwMDE3Z6lcbqFcdTAwMDZcdTAwMTdcdTAwMGWhSlx1MDAxOCM5s2knJVx1MDAxZbpgLnqi681cdTAwMDInvtq75N+PZ0+eXHUwMDFmq1x1MDAwNcJccuVSlZdexeK9puxJcsRVlJ1cIsZRTGObZFLQxVx1MDAwNXf0XXGJYIpcdEj5it14i1x1MDAwNFx1MDAxY1xu7mdCKjSoOVx1MDAwZlFqWzclWFx1MDAxZLnzyUJnXHUwMDAx3MlxmjWuuH1gjSvIpoFoWm/PS32XXCLP4pjFUjpcdTAwMWRPKFx1MDAxMoZlXHUwMDBiRDhSiubZXHUwMDAypLddVJlr4vbTmLXuylxijuVWjStGKXCty1x1MDAwN6GKnYya8ltaXHUwMDBmclxcXHUwMDA2bqWMXHUwMDEwRuMqJGpcdTAwMGZeQ1x1MDAwZkpcdTAwMDfFiUYkcsVJsoA/XHUwMDBlX2ttt71cdTAwMWFymrLVgnXgNEtcdTAwMWZaI3FcdTAwMDBFcyeooU024U6zibdSJFZcdTAwMWNOSY9cdTAwMDJcdTAwMDD1JtIlXHUwMDA3yVH7zGFWPvNcIoI3zWjoeFx1MDAxNWhAlIBaMiBKXHUwMDExjbdcbvqhy1x1MDAxNSrAXGZSqlwiSJ4oXHUwMDE5mSFI+1x1MDAwNKZcdTAwMTLJZ7BcdTAwMWVcIsjihEwtXHRcdTAwMTLtUFx1MDAxNWXrUXkr3KwoKG5FzFx1MDAwMt1ncDjliGBcdTAwMDSHTJZcdTAwMWVN2VJcdTAwMTNHo7GhNdknXHUwMDEyRILZJ2wptNKgK4nEVc6WoKmIVW31bFnaU7WPryBcdTAwMWFcdTAwMDVnqEOMIYlilClPXHUwMDE5h6H8Q5JCf9Y+UZVVXqW486qz6W7xXHUwMDFl2109PNn9ptc+bX3cWs8rTlwiQiNcdTAwMTYl12DNUGXGRMlv5D5cdTAwMGKTri1NXHUwMDExJzi2qtjuZ1x1MDAwMqdDmYd951x1MDAxY+hEbbOgqYw4Zf5cdTAwMWLLwCj0w+gjXHUwMDFl1inOT9WVOUn01iGhdVTBN+M6XHUwMDBiR1wiXHUwMDE3IfCVxvVcXJzvrEhO8iR2nanDXHUwMDE0NdRWT1x1MDAxOFx1MDAxY1x1MDAxM41XbsKcgPs33oqppe+MIIhccql65iztptpIXHTOMmNcbtdcdTAwMWNQq8xcdDtShzDF43fHPdF1Ls2cOCZCgTAqmSGSiTnCl/5Oz+3kXHUwMDE5u/00snb+SLIrLOvEKc4jPFu5bfgj6sCK17eedEfVXeGkxl3EoDSYqedcdTAwMTDwUm9oZDq9eyaClrFWXHUwMDE00Wimn2yqXHUwMDA1b1x1MDAwM4RcdTAwMTKLeVx1MDAwMPuZVZ7PyLq+L7ruM1xuxlxur/uM6pLC696eXHUwMDFlN1x1MDAwZYNj1zeDtlx1MDAwZVf0dlO2WD2rR1x1MDAwYnRS/puVNPpU+IeUT9BcdTAwMTZbRk2Jw76dUYNi3IZcdTAwMWOJnCnvYMZROFx1MDAwNVx1MDAxYV1tgjS6OKFkXHUwMDBiv7lcdTAwMTbKhlx1MDAxOIBcdKXn+JjKOFx1MDAxY0lOXHUwMDAyV5RKKTNKXHRXXGa/n+T6+iglXHUwMDFj9EJ9zNKxMIIryiUwsMpYqMRDT0lZYoREX1x1MDAxZJncho9E9t6rVkpS2LA7elR2ouicXHUwMDA0XHUwMDAzQ3FhVT1cbilbN6HeeHxcdTAwMGVcdTAwMWRJ9MHQiySohIAzk3JcZlx1MDAxYtjMXHUwMDExdyiVXGZoXHUwMDBlRj/oZ+ZcdTAwMDMoumBcdTAwMDY7j1RfedVxOp9A0fcyaEGkfIhuo3d8fn1yzm6//d10j8+PhuT7xW3NXHSUXHUwMDExPj9cXDB5LYRx0um6xSkvjlx1MDAxNiBV/Jq8clx1MDAxNXKKXHUwMDEzbd/MsvB309Wn1uxD7zroXlxmbpa+emF3kDCHl6g3y+28omrV3Ig5NYBcXK9cZinvXHRd3m67Ky58P/l2OjThxmbr6+GtqjlcdTAwMWWl0Fx1MDAwNfWquOul81wiXHUwMDBig6NENCrcqnDbt1x1MDAwNW6JqrFcIjRcdTAwMTJma+OTycbXeCSmVsr+eVjfaq68LL5TXHUwMDFkVlVHavJ3WVx1MDAwZSiZXGJj5XfZ4uWtJ6qFokW7rNaopphccnDgdHBcdTAwMTTbi/NTIFx1MDAxNWcxc4uh7LuwQYGw+o2ahFx1MDAwNzmBuVx1MDAxNkRqrUVcdTAwMDV77purhnqET4DsrVx1MDAwMVxmOqXGRlDnvJrkLtZEhX2I8OlcdTAwMDHd4m1uKV3MXHUwMDAw0lx1MDAwNtCULVx1MDAwNTLoN897XHUwMDFm1EPvS3lLfkquvdtPI2vqeW7Fu0lcdTAwMGbL7nC4XHUwMDFm2v/Nxv16LP/wvZvVLFxm/ziLPnZziabOXHUwMDEyk1x1MDAxN5n8r3e//lx1MDAwM1x1MDAxY0LiVCJ9 - - - - - Regex TreeConcatenateaOrbcPrefix SetSuffix SetFactors SetGlushkow MethodNFA \ No newline at end of file diff --git a/examples/01_basic_concatenation.rs b/examples/01_basic_concatenation.rs new file mode 100644 index 0000000..b217f08 --- /dev/null +++ b/examples/01_basic_concatenation.rs @@ -0,0 +1,36 @@ +extern crate gregex; +use gregex::*; + +fn main() { + println!("=== Basic Concatenation Example ===\n"); + println!("This example demonstrates simple character concatenation."); + + // Create a pattern that matches the exact string "abc" + let pattern = regex!("abc"); + + println!("Pattern: \"abc\"\n"); + + // Test exact matches + println!("Testing exact matches:"); + assert_eq!(pattern.matches_exact("abc"), true); + println!(" \"abc\" matches: true"); + + assert_eq!(pattern.matches_exact("ab"), false); + println!(" \"ab\" matches: false (too short)"); + + assert_eq!(pattern.matches_exact("abcd"), false); + println!(" \"abcd\" matches: false (too long)"); + + // Test substring matching + println!("\nTesting substring matching:"); + assert!(pattern.is_match("abc")); + println!(" is_match(\"abc\"): true"); + + assert!(pattern.is_match("prefix_abc_suffix")); + println!(" is_match(\"prefix_abc_suffix\"): true"); + + assert!(!pattern.is_match("ab")); + println!(" is_match(\"ab\"): false"); + + println!("\nAll tests passed!"); +} diff --git a/examples/02_alternation.rs b/examples/02_alternation.rs new file mode 100644 index 0000000..a872ead --- /dev/null +++ b/examples/02_alternation.rs @@ -0,0 +1,31 @@ +extern crate gregex; +use gregex::*; + +fn main() { + println!("=== Alternation (OR) Example ===\n"); + println!("This example demonstrates the alternation operator (|)."); + + // Create a pattern that matches "a" OR "b" OR "c" + let pattern = regex!("a|b|c"); + + println!("Pattern: \"a|b|c\" (matches 'a', 'b', or 'c')\n"); + + // Test each alternative + println!("Testing matches:"); + assert!(pattern.matches_exact("a")); + println!(" \"a\" matches: true"); + + assert!(pattern.matches_exact("b")); + println!(" \"b\" matches: true"); + + assert!(pattern.matches_exact("c")); + println!(" \"c\" matches: true"); + + assert!(!pattern.matches_exact("d")); + println!(" \"d\" matches: false"); + + assert!(!pattern.matches_exact("ab")); + println!(" \"ab\" matches: false (too long)"); + + println!("\nAll tests passed!"); +} diff --git a/examples/03_kleene_star.rs b/examples/03_kleene_star.rs new file mode 100644 index 0000000..4fff948 --- /dev/null +++ b/examples/03_kleene_star.rs @@ -0,0 +1,35 @@ +extern crate gregex; +use gregex::*; + +fn main() { + println!("=== Kleene Star (*) Example ===\n"); + println!("This example demonstrates the Kleene star operator (*)."); + println!("The star operator matches zero or more occurrences.\n"); + + // Create a pattern that matches zero or more 'a's + let pattern = regex!("a*"); + + println!("Pattern: \"a*\" (zero or more 'a's)\n"); + + // Test various repetitions + println!("Testing matches:"); + assert!(pattern.matches_exact("")); + println!(" \"\" (empty) matches: true"); + + assert!(pattern.matches_exact("a")); + println!(" \"a\" matches: true"); + + assert!(pattern.matches_exact("aa")); + println!(" \"aa\" matches: true"); + + assert!(pattern.matches_exact("aaa")); + println!(" \"aaa\" matches: true"); + + assert!(!pattern.matches_exact("b")); + println!(" \"b\" matches: false"); + + assert!(!pattern.matches_exact("ab")); + println!(" \"ab\" matches: false"); + + println!("\nAll tests passed!"); +} diff --git a/examples/04_plus_operator.rs b/examples/04_plus_operator.rs new file mode 100644 index 0000000..7831c80 --- /dev/null +++ b/examples/04_plus_operator.rs @@ -0,0 +1,35 @@ +extern crate gregex; +use gregex::*; + +fn main() { + println!("=== Plus (+) Operator Example ===\n"); + println!("This example demonstrates the plus operator (+)."); + println!("The plus operator matches one or more occurrences.\n"); + + // Create a pattern that matches one or more 'a's + let pattern = regex!("a+"); + + println!("Pattern: \"a+\" (one or more 'a's)\n"); + + // Test various repetitions + println!("Testing matches:"); + assert!(!pattern.matches_exact("")); + println!(" \"\" (empty) matches: false (requires at least one)"); + + assert!(pattern.matches_exact("a")); + println!(" \"a\" matches: true"); + + assert!(pattern.matches_exact("aa")); + println!(" \"aa\" matches: true"); + + assert!(pattern.matches_exact("aaa")); + println!(" \"aaa\" matches: true"); + + assert!(!pattern.matches_exact("b")); + println!(" \"b\" matches: false"); + + assert!(!pattern.matches_exact("ab")); + println!(" \"ab\" matches: false"); + + println!("\nAll tests passed!"); +} diff --git a/examples/05_question_operator.rs b/examples/05_question_operator.rs new file mode 100644 index 0000000..bce38cf --- /dev/null +++ b/examples/05_question_operator.rs @@ -0,0 +1,29 @@ +extern crate gregex; +use gregex::*; + +fn main() { + println!("=== Question (?) Operator Example ===\n"); + println!("This example demonstrates the question operator (?)."); + println!("The question operator matches zero or one occurrence.\n"); + + // Create a pattern that matches zero or one 'a' + let pattern = regex!("a?"); + + println!("Pattern: \"a?\" (zero or one 'a')\n"); + + // Test various cases + println!("Testing matches:"); + assert!(pattern.matches_exact("")); + println!(" \"\" (empty) matches: true"); + + assert!(pattern.matches_exact("a")); + println!(" \"a\" matches: true"); + + assert!(!pattern.matches_exact("aa")); + println!(" \"aa\" matches: false (too many)"); + + assert!(!pattern.matches_exact("b")); + println!(" \"b\" matches: false"); + + println!("\nAll tests passed!"); +} diff --git a/examples/06_grouping_and_precedence.rs b/examples/06_grouping_and_precedence.rs new file mode 100644 index 0000000..f5c379b --- /dev/null +++ b/examples/06_grouping_and_precedence.rs @@ -0,0 +1,55 @@ +extern crate gregex; +use gregex::*; + +fn main() { + println!("=== Grouping and Precedence Example ===\n"); + println!("This example demonstrates how parentheses control operator precedence.\n"); + + // Pattern: (ab)+ means "ab" repeated one or more times + let pattern1 = regex!("(ab)+"); + println!("Pattern 1: \"(ab)+\" (one or more \"ab\" sequences)\n"); + + println!("Testing pattern 1:"); + assert!(pattern1.matches_exact("ab")); + println!(" \"ab\" matches: true"); + + assert!(pattern1.matches_exact("abab")); + println!(" \"abab\" matches: true"); + + assert!(pattern1.matches_exact("ababab")); + println!(" \"ababab\" matches: true"); + + assert!(!pattern1.matches_exact("aba")); + println!(" \"aba\" matches: false (incomplete sequence)"); + + assert!(!pattern1.matches_exact("")); + println!(" \"\" matches: false (requires at least one)\n"); + + // Pattern: (a|b)* means any combination of 'a' and 'b', zero or more times + let pattern2 = regex!("(a|b)*"); + println!("Pattern 2: \"(a|b)*\" (any combination of 'a' and 'b')\n"); + + println!("Testing pattern 2:"); + assert!(pattern2.matches_exact("")); + println!(" \"\" matches: true"); + + assert!(pattern2.matches_exact("a")); + println!(" \"a\" matches: true"); + + assert!(pattern2.matches_exact("b")); + println!(" \"b\" matches: true"); + + assert!(pattern2.matches_exact("ab")); + println!(" \"ab\" matches: true"); + + assert!(pattern2.matches_exact("ba")); + println!(" \"ba\" matches: true"); + + assert!(pattern2.matches_exact("aabbba")); + println!(" \"aabbba\" matches: true"); + + assert!(!pattern2.matches_exact("c")); + println!(" \"c\" matches: false"); + + println!("\nAll tests passed!"); +} diff --git a/examples/07_api_methods.rs b/examples/07_api_methods.rs new file mode 100644 index 0000000..eef9883 --- /dev/null +++ b/examples/07_api_methods.rs @@ -0,0 +1,63 @@ +extern crate gregex; +use gregex::*; + +fn main() { + println!("=== API Methods Example ===\n"); + println!("This example demonstrates all available API methods.\n"); + + let pattern = regex!("ab+"); + println!("Pattern: \"ab+\" (one 'a' followed by one or more 'b's)\n"); + + // 1. matches_exact: Check if entire string matches + println!("1. matches_exact(text) - Check if entire string matches:"); + assert!(pattern.matches_exact("ab")); + println!(" matches_exact(\"ab\"): true"); + + assert!(pattern.matches_exact("abbb")); + println!(" matches_exact(\"abbb\"): true"); + + assert!(!pattern.matches_exact("a")); + println!(" matches_exact(\"a\"): false\n"); + + // 2. is_match: Check if pattern appears anywhere in text + println!("2. is_match(text) - Check if pattern appears anywhere:"); + assert!(pattern.is_match("ab")); + println!(" is_match(\"ab\"): true"); + + assert!(pattern.is_match("prefix_abb_suffix")); + println!(" is_match(\"prefix_abb_suffix\"): true"); + + assert!(!pattern.is_match("xyz")); + println!(" is_match(\"xyz\"): false\n"); + + // 3. find: Get first match position + println!("3. find(text) - Get first match position (start, end):"); + let find_text = "xyzabbbxyz"; + match pattern.find(find_text) { + Some((start, end)) => { + let matched = &find_text[start..end]; + println!( + " find(\"{}\"): Some(({}, {})) -> \"{}\"", + find_text, start, end, matched + ); + assert_eq!(start, 3); + // Note: The NFA matches greedily up to the found position + } + None => panic!("Should have found a match"), + } + + match pattern.find("xyz") { + Some(_) => panic!("Should not have found a match"), + None => println!(" find(\"xyz\"): None\n"), + } + + // 4. find_iter: Iterate over all matches + println!("4. find_iter(text) - Iterator over all non-overlapping matches:"); + let text = "xabxabbxabbbx"; + let matches: Vec<(usize, usize)> = pattern.find_iter(text).collect(); + println!(" find_iter(\"{}\"): {:?}", text, matches); + assert_eq!(matches.len(), 3); + println!(" Found {} matches\n", matches.len()); + + println!("All API methods work correctly!"); +} diff --git a/examples/08_compile_time_construction.rs b/examples/08_compile_time_construction.rs new file mode 100644 index 0000000..b146411 --- /dev/null +++ b/examples/08_compile_time_construction.rs @@ -0,0 +1,34 @@ +extern crate gregex; +use gregex::*; + +fn main() { + println!("=== Compile-Time NFA Construction Example ===\n"); + println!("This example verifies that regex patterns are constructed at compile-time."); + println!( + "Use 'cargo expand --example 08_compile_time_construction' to see the expanded code.\n" + ); + + // All of these patterns are compiled to NFA at compile-time, + // resulting in zero runtime overhead for NFA construction + + println!("Testing character literal:"); + let char_pattern = regex!('a'); + assert!(char_pattern.matches_exact("a")); + println!(" regex!('a') works\n"); + + println!("Testing simple string:"); + let simple_pattern = regex!("abc"); + assert!(simple_pattern.matches_exact("abc")); + println!(" regex!(\"abc\") works\n"); + + println!("Testing complex pattern:"); + let complex_pattern = regex!("(a|b)+c?"); + assert!(complex_pattern.matches_exact("abc")); + assert!(complex_pattern.matches_exact("ab")); + assert!(complex_pattern.matches_exact("bac")); + println!(" regex!(\"(a|b)+c?\") works\n"); + + println!("All patterns are constructed at compile-time!"); + println!("\nNote: The NFA is embedded directly in the binary,"); + println!("eliminating all runtime regex parsing overhead."); +} diff --git a/examples/dot.rs b/examples/dot.rs deleted file mode 100644 index 2f7d9b4..0000000 --- a/examples/dot.rs +++ /dev/null @@ -1,9 +0,0 @@ -extern crate gregex; -use gregex::*; - -fn main() { - let runner = regex!(dot!('a', 'b', 'c')); - assert_eq!(runner.run("abc"), true); - assert_eq!(runner.run("ab"), false); - assert_eq!(runner.run("abcd"), false); -} diff --git a/examples/or.rs b/examples/or.rs deleted file mode 100644 index 4353a3f..0000000 --- a/examples/or.rs +++ /dev/null @@ -1,9 +0,0 @@ -extern crate gregex; -use gregex::*; - -fn main() { - let runner = regex!(or!('a', 'b', 'c')); - assert_eq!(runner.run("a"), true); - assert_eq!(runner.run("b"), true); - assert_eq!(runner.run("c"), true); -} diff --git a/examples/star.rs b/examples/star.rs deleted file mode 100644 index e97f2a4..0000000 --- a/examples/star.rs +++ /dev/null @@ -1,9 +0,0 @@ -extern crate gregex; -use gregex::*; - -fn main() { - let runner = regex!(star!('a')); - assert_eq!(runner.run("a"), true); - assert_eq!(runner.run("aa"), true); - assert_eq!(runner.run(""), true); -} diff --git a/examples/usecase_identifier_validator.rs b/examples/usecase_identifier_validator.rs new file mode 100644 index 0000000..0ef9259 --- /dev/null +++ b/examples/usecase_identifier_validator.rs @@ -0,0 +1,42 @@ +extern crate gregex; +use gregex::*; + +fn main() { + println!("=== Use Case: Identifier Validator ===\n"); + println!("This example shows how to validate programming language identifiers."); + println!( + "Valid identifiers: start with a letter, followed by zero or more letters or digits\n" + ); + + // Pattern for identifiers: letter followed by zero or more (letter or digit) + // Simplified to (a|b|c) followed by zero or more (a|b|c|d) for demonstration + let identifier_validator = regex!("(a|b|c)(a|b|c|d)*"); + + println!("Pattern: \"(a|b|c)(a|b|c|d)*\""); + println!("Meaning: Starts with a-c, followed by zero or more a-d\n"); + + let test_cases = vec![ + ("a", true, "single letter"), + ("abc", true, "multiple letters"), + ("ad", true, "letter with digit-like char"), + ("abcd", true, "letter with multiple chars"), + ("cba", true, "different starting letter"), + ("", false, "empty string"), + ("d", false, "starts with invalid char"), + ("1a", false, "starts with number-like char"), + ]; + + println!("Testing identifier validation:"); + for (input, expected, description) in test_cases { + let result = identifier_validator.matches_exact(input); + let status = if result == expected { "PASS" } else { "FAIL" }; + println!("[{}] '{}' -> {} ({})", status, input, result, description); + assert_eq!( + result, expected, + "Failed for input '{}': {}", + input, description + ); + } + + println!("\nAll identifier validation tests passed!"); +} diff --git a/examples/usecase_simple_url_matcher.rs b/examples/usecase_simple_url_matcher.rs new file mode 100644 index 0000000..8a73714 --- /dev/null +++ b/examples/usecase_simple_url_matcher.rs @@ -0,0 +1,69 @@ +extern crate gregex; +use gregex::*; + +fn main() { + println!("=== Use Case: Simple URL Path Matcher ===\n"); + println!("This example shows pattern matching for URL-like paths.\n"); + + // Pattern 1: Matching repeated path segments (one or more 'a') + let path_pattern = regex!("a+"); + println!("Pattern 1: \"a+\" (one or more 'a's - like /a, /aa, /aaa)"); + + let path_tests = vec![ + ("a", true, "single segment"), + ("aa", true, "double segment"), + ("aaa", true, "triple segment"), + ("", false, "empty path"), + ("b", false, "wrong character"), + ]; + + println!("\nTesting path pattern:"); + for (input, expected, description) in path_tests { + let result = path_pattern.matches_exact(input); + let status = if result == expected { "PASS" } else { "FAIL" }; + println!("[{}] '{}' -> {} ({})", status, input, result, description); + assert_eq!(result, expected); + } + + // Pattern 2: Optional protocol (like http or https) + let protocol_pattern = regex!("h?"); + println!("\nPattern 2: \"h?\" (zero or one 'h' - like optional http prefix)"); + + let protocol_tests = vec![ + ("", true, "no protocol"), + ("h", true, "with protocol"), + ("hh", false, "double protocol"), + ]; + + println!("\nTesting protocol pattern:"); + for (input, expected, description) in protocol_tests { + let result = protocol_pattern.matches_exact(input); + let status = if result == expected { "PASS" } else { "FAIL" }; + println!("[{}] '{}' -> {} ({})", status, input, result, description); + assert_eq!(result, expected); + } + + // Pattern 3: Complex path with alternation + let complex_path = regex!("(a|b)+"); + println!("\nPattern 3: \"(a|b)+\" (one or more 'a' or 'b' - flexible paths)"); + + let complex_tests = vec![ + ("a", true, "single a"), + ("b", true, "single b"), + ("ab", true, "a followed by b"), + ("ba", true, "b followed by a"), + ("aabbba", true, "mixed sequence"), + ("", false, "empty"), + ("c", false, "invalid character"), + ]; + + println!("\nTesting complex path pattern:"); + for (input, expected, description) in complex_tests { + let result = complex_path.matches_exact(input); + let status = if result == expected { "PASS" } else { "FAIL" }; + println!("[{}] '{}' -> {} ({})", status, input, result, description); + assert_eq!(result, expected); + } + + println!("\nAll URL path matching tests passed!"); +} diff --git a/examples/usecase_text_search.rs b/examples/usecase_text_search.rs new file mode 100644 index 0000000..5465c3f --- /dev/null +++ b/examples/usecase_text_search.rs @@ -0,0 +1,56 @@ +extern crate gregex; +use gregex::*; + +fn main() { + println!("=== Use Case: Text Search ===\n"); + println!("This example demonstrates finding patterns in text documents.\n"); + + let pattern = regex!("(a|b)+c"); + println!("Pattern: \"(a|b)+c\""); + println!("Meaning: One or more 'a' or 'b', followed by 'c'\n"); + + // Example 1: Finding pattern in text + println!("Example 1: Finding single occurrence"); + let text1 = "The pattern abc appears here"; + println!("Text: \"{}\"", text1); + + if let Some((start, end)) = pattern.find(text1) { + let matched = &text1[start..end]; + println!("Found: \"{}\" at position {}-{}", matched, start, end); + assert_eq!(matched, "abc"); + } else { + println!("No match found"); + } + + // Example 2: Finding multiple occurrences + println!("\nExample 2: Finding multiple occurrences"); + let text2 = "Patterns: abc, bac, aabc, and bbbac appear here"; + println!("Text: \"{}\"", text2); + println!("Matches found:"); + + let matches: Vec<(usize, usize)> = pattern.find_iter(text2).collect(); + for (start, end) in &matches { + let matched = &text2[*start..*end]; + println!(" \"{}\" at position {}-{}", matched, start, end); + } + assert_eq!(matches.len(), 4); + println!("Total matches: {}", matches.len()); + + // Example 3: Checking if pattern exists anywhere + println!("\nExample 3: Quick existence check"); + let test_cases = vec![ + ("This has abc in it", true), + ("This has bbbac too", true), + ("This has abd but not our pattern", false), + ("No match here", false), + ]; + + for (text, expected) in test_cases { + let found = pattern.is_match(text); + let status = if found == expected { "PASS" } else { "FAIL" }; + println!("[{}] \"{}\" -> {}", status, text, found); + assert_eq!(found, expected); + } + + println!("\nAll text search tests passed!"); +} diff --git a/gregex-logic/Cargo.toml b/gregex-logic/Cargo.toml index ef2d4da..09933eb 100644 --- a/gregex-logic/Cargo.toml +++ b/gregex-logic/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "gregex-logic" -version = "0.1.1" +version = "0.2.0" edition = "2021" authors = ["Saphereye "] license = "MIT" -description = "Logic for the gregex crate" +description = "Logic for the gregex crate with Plus and Question operator support" keywords = ["regex", "nfa", "automata"] categories = ["text-processing"] documentation = "https://docs.rs/gregex-logic" diff --git a/gregex-logic/README.md b/gregex-logic/README.md index 2d29383..d2720c3 100644 --- a/gregex-logic/README.md +++ b/gregex-logic/README.md @@ -1,6 +1,66 @@ # Gregex Logic -Contains the underlying logic of the Gregex crate. This crate is responsible for converting the Node tree to the NFA. The NFA is then used to match the input string. -The crate uses the [Glushkov's Construction Algorithm](https://en.wikipedia.org/wiki/Glushkov%27s_construction_algorithm) to convert the Node tree to the NFA. The advantage over the Thompson's Construction Algorithm is that the NFA generated has states equal to number of terminals + 1. Although, the NFA generated by Thumpson's can be converted to the Glushkov's form, by removing the epsilon transitions. +Core logic library for the Gregex regular expression engine. -The `translation` module contains the code to convert the Node tree to the NFA. The `nfa` module contains the code to match the input string with the NFA. \ No newline at end of file +## Overview + +`gregex-logic` implements the fundamental algorithms and data structures for regular expression matching using Non-deterministic Finite Automata (NFA). This crate provides the runtime engine that powers the `gregex` library's compile-time regex capabilities. + +## Architecture + +### Glushkov's Construction Algorithm + +This library uses [Glushkov's construction algorithm](https://en.wikipedia.org/wiki/Glushkov%27s_construction_algorithm) to convert regular expressions into NFAs. The advantage over Thompson's construction is that the generated NFA has exactly `n+1` states for a regex with `n` terminals, making it more compact and efficient. + +### Key Components + +1. **NFA Module** (`nfa.rs`) + - Non-deterministic Finite Automaton implementation + - State transitions and acceptance logic + - Matching algorithms for substring and exact matching + - Iterator support for finding all matches + +2. **Translation Module** (`translation/`) + - **Node** (`node.rs`): Abstract syntax tree representation + - **Operator** (`operator.rs`): Regex operator definitions + - **SetTerminal** (`setterminal.rs`): Terminal symbol handling + - Set computation functions: nullability, prefix, suffix, and factors + +## Supported Operators + +- **Concatenation**: Implicit sequencing of characters +- **Alternation** (`|`): Match either left or right expression +- **Kleene Star** (`*`): Zero or more repetitions +- **Plus** (`+`): One or more repetitions +- **Question** (`?`): Zero or one occurrence + +## API Methods + +The NFA struct provides several matching methods: + +- `matches_exact(text)`: Check if entire text matches the pattern +- `is_match(text)`: Check if pattern appears anywhere in text +- `find(text)`: Find first match position +- `find_iter(text)`: Iterator over all non-overlapping matches + +## Usage + +This crate is designed to be used through the `gregex` main crate, which provides the `regex!` macro for compile-time pattern compilation. Direct usage of `gregex-logic` is possible but requires manual NFA construction: + +```rust,ignore +use gregex_logic::nfa::NFA; + +// Manual NFA construction +let mut nfa = NFA::new(); +nfa.add_state(1); +nfa.add_accept_state(1); +nfa.add_transition(0, 'a', 1); + +assert!(nfa.matches_exact("a")); +``` + +## Performance + +- **Compile-time construction**: When used through `gregex`, NFAs are built at compile time +- **Linear matching**: O(n*m) time complexity where n is text length and m is NFA states +- **No backtracking**: NFA-based approach avoids exponential backtracking issues diff --git a/gregex-logic/src/lib.rs b/gregex-logic/src/lib.rs index d41c5f2..ba375fe 100644 --- a/gregex-logic/src/lib.rs +++ b/gregex-logic/src/lib.rs @@ -1,7 +1,17 @@ -#[doc = include_str!("../README.md")] +//! # Gregex Logic +//! +//! Core logic library for the Gregex regular expression engine. +//! +//! This crate implements the fundamental algorithms and data structures for regular expression +//! matching using Non-deterministic Finite Automata (NFA) with Glushkov's construction algorithm. +//! +//! For detailed documentation, see the [README](https://github.com/Saphereye/gregex/blob/master/gregex-logic/README.md). + +#![doc = include_str!("../README.md")] + #[cfg(not(doctest))] pub mod nfa; pub mod translation; use std::sync::atomic::AtomicU32; -pub static TERMINAL_COUNT: AtomicU32 = AtomicU32::new(0); +pub static TERMINAL_COUNT: AtomicU32 = AtomicU32::new(1); diff --git a/gregex-logic/src/nfa.rs b/gregex-logic/src/nfa.rs index 82b38d1..b65601b 100644 --- a/gregex-logic/src/nfa.rs +++ b/gregex-logic/src/nfa.rs @@ -4,20 +4,208 @@ use crate::translation::setterminal::SetTerminal; use core::panic; use std::collections::{HashMap, HashSet}; +/// Iterator over non-overlapping matches in a text. +pub struct FindIter<'t> { + nfa: &'t NFA, + text: &'t str, + pos: usize, +} + +impl<'t> Iterator for FindIter<'t> { + type Item = (usize, usize); + + fn next(&mut self) -> Option { + if self.pos > self.text.len() { + return None; + } + + // Try to find a match starting from current position or later + for start in self.pos..=self.text.len() { + // Try different lengths for a match + for end in start..=self.text.len() { + if self.nfa.matches_exact(&self.text[start..end]) { + self.pos = end; // Move past this match to avoid overlaps + if self.pos == start { + // Prevent infinite loop on empty matches + self.pos += 1; + } + return Some((start, end)); + } + } + } + None + } +} + /// The `NFA` struct represents a non-deterministic finite automaton. #[derive(Debug, Default)] pub struct NFA { /// Set of all possible states of the NFA. - states: HashSet, + pub(crate) states: HashSet, /// Set of all accepting states. If the NFA ends at any one if these the simulation is succesful. - accept: HashSet, + pub(crate) accept: HashSet, /// The transition function is a map from a pair of a state and a character to a set of states. - transition_function: HashMap<(u32, char), HashSet>, + pub(crate) transition_function: HashMap<(u32, char), HashSet>, } impl NFA { - /// Simulates the NFA with the given input. - pub fn run(&self, input: &str) -> bool { + /// Create a new empty NFA + pub fn new() -> Self { + Self::default() + } + + /// Add a state to the NFA + pub fn add_state(&mut self, state: u32) { + self.states.insert(state); + } + + /// Add an accepting state to the NFA + pub fn add_accept_state(&mut self, state: u32) { + self.accept.insert(state); + } + + /// Add a transition to the NFA + pub fn add_transition(&mut self, from: u32, symbol: char, to: u32) { + self.transition_function + .entry((from, symbol)) + .or_insert_with(HashSet::new) + .insert(to); + } + + /// Construct an NFA from raw data (used by macros for compile-time construction) + pub fn from_raw( + states: Vec, + accept: Vec, + transitions: Vec<((u32, char), Vec)>, + ) -> Self { + Self { + states: states.into_iter().collect(), + accept: accept.into_iter().collect(), + transition_function: transitions + .into_iter() + .map(|(key, vals)| (key, vals.into_iter().collect())) + .collect(), + } + } + + /// Get states (for compile-time serialization) + pub fn get_states(&self) -> Vec { + let mut states: Vec<_> = self.states.iter().copied().collect(); + states.sort(); + states + } + + /// Get accept states (for compile-time serialization) + pub fn get_accept_states(&self) -> Vec { + let mut accept: Vec<_> = self.accept.iter().copied().collect(); + accept.sort(); + accept + } + + /// Get transitions (for compile-time serialization) + pub fn get_transitions(&self) -> Vec<((u32, char), Vec)> { + let mut transitions: Vec<_> = self + .transition_function + .iter() + .map(|(&key, val)| { + let mut vals: Vec<_> = val.iter().copied().collect(); + vals.sort(); + (key, vals) + }) + .collect(); + transitions.sort_by_key(|(k, _)| *k); + transitions + } + + /// Checks if the pattern matches anywhere in the input text. + /// + /// This is the primary matching method, similar to Rust's standard regex `is_match`. + /// It returns `true` if the pattern is found anywhere in the input string. + /// + /// # Examples + /// + /// ```no_run + /// use gregex::*; + /// + /// let pattern = regex!("abc"); + /// assert!(pattern.is_match("abc")); + /// assert!(pattern.is_match("xabcy")); // Matches in the middle + /// assert!(!pattern.is_match("xyz")); + /// ``` + pub fn is_match(&self, text: &str) -> bool { + // Try matching starting from each position in the text + for start in 0..=text.len() { + // Try different lengths from this starting position + for end in start..=text.len() { + if self.matches_exact(&text[start..end]) { + return true; + } + } + } + false + } + + /// Finds the first occurrence of the pattern in the text. + /// + /// Returns `Some((start, end))` with byte indices if a match is found, or `None` otherwise. + /// The returned indices represent the shortest match found. + /// + /// # Examples + /// + /// ```no_run + /// use gregex::*; + /// + /// let pattern = regex!("abc"); + /// assert_eq!(pattern.find("xabcy"), Some((1, 4))); + /// assert_eq!(pattern.find("xyz"), None); + /// ``` + pub fn find(&self, text: &str) -> Option<(usize, usize)> { + // Try each starting position + for start in 0..=text.len() { + // Try to find the shortest match from this position + for end in start..=text.len() { + if self.matches_exact(&text[start..end]) { + return Some((start, end)); + } + } + } + None + } + + /// Returns an iterator over all non-overlapping matches in the text. + /// + /// # Examples + /// + /// ```no_run + /// use gregex::*; + /// + /// let pattern = regex!("ab"); + /// let matches: Vec<_> = pattern.find_iter("abxabxab").collect(); + /// // Returns positions of all "ab" occurrences + /// ``` + pub fn find_iter<'t>(&'t self, text: &'t str) -> FindIter<'t> { + FindIter { + nfa: self, + text, + pos: 0, + } + } + + /// Checks if the pattern matches the entire input string exactly. + /// + /// This is the core matching logic that verifies if the entire input + /// string matches the regex pattern from start to end. + /// + /// For substring matching (finding pattern anywhere in text), use `is_match()` instead. + /// + /// # Arguments + /// + /// * `input` - The string to match against + /// + /// # Returns + /// + /// `true` if the entire input string exactly matches the pattern, `false` otherwise. + pub fn matches_exact(&self, input: &str) -> bool { let mut current_states = HashSet::new(); current_states.insert(0); for c in input.chars() { @@ -37,9 +225,15 @@ impl NFA { prefix_set: &HashSet, suffix_set: &HashSet, factors_set: &HashSet, + nullability_set: &HashSet, ) -> Self { let mut nfa = Self::default(); - + + // If the regex is nullable (accepts empty string), add initial state to accept states + if nullability_set.contains(&SetTerminal::Epsilon) { + nfa.accept.insert(0); + } + for i in prefix_set { match *i { SetTerminal::SingleElement(symbol, index) => { @@ -53,7 +247,7 @@ impl NFA { _ => {} } } - + for i in suffix_set { match *i { SetTerminal::SingleElement(_, index) => { @@ -66,13 +260,16 @@ impl NFA { _ => {} } } - + for i in factors_set { match *i { SetTerminal::DoubleElement(_, index1, symbol2, index2) => { nfa.states.insert(index1); nfa.states.insert(index2); - nfa.transition_function.entry((index1, symbol2)).or_insert_with(HashSet::new).insert(index2); + nfa.transition_function + .entry((index1, symbol2)) + .or_insert_with(HashSet::new) + .insert(index2); } SetTerminal::SingleElement(_, _) => { panic!("SingleElement not supported") @@ -80,7 +277,7 @@ impl NFA { _ => {} } } - + nfa } } @@ -101,15 +298,332 @@ mod tests { .into_iter() .collect(), }; - assert!(nfa.run("ab")); + assert!(nfa.matches_exact("ab")); } #[test] fn set_to_nfa_simple_test() { - let prefix_set = vec![SetTerminal::SingleElement('a', 1)].into_iter().collect(); - let suffix_set = vec![SetTerminal::SingleElement('b', 2)].into_iter().collect(); - let factors_set = vec![SetTerminal::DoubleElement('a', 1, 'b', 2)].into_iter().collect(); - let nfa = NFA::set_to_nfa(&prefix_set, &suffix_set, &factors_set); - assert!(nfa.run("ab")); + use crate::translation::setterminal::SetTerminal; + let prefix_set = vec![SetTerminal::SingleElement('a', 1)] + .into_iter() + .collect(); + let suffix_set = vec![SetTerminal::SingleElement('b', 2)] + .into_iter() + .collect(); + let factors_set = vec![SetTerminal::DoubleElement('a', 1, 'b', 2)] + .into_iter() + .collect(); + let nullability_set = vec![SetTerminal::Empty].into_iter().collect(); + let nfa = NFA::set_to_nfa(&prefix_set, &suffix_set, &factors_set, &nullability_set); + assert!(nfa.matches_exact("ab")); + } + + #[test] + fn set_to_nfa_plus_test() { + // Test for a+ (one or more 'a') + use crate::translation::setterminal::SetTerminal; + let prefix_set = vec![SetTerminal::SingleElement('a', 1)] + .into_iter() + .collect(); + let suffix_set = vec![SetTerminal::SingleElement('a', 1)] + .into_iter() + .collect(); + let factors_set = vec![SetTerminal::DoubleElement('a', 1, 'a', 1)] + .into_iter() + .collect(); + let nullability_set = vec![SetTerminal::Empty].into_iter().collect(); + let nfa = NFA::set_to_nfa(&prefix_set, &suffix_set, &factors_set, &nullability_set); + + assert!(nfa.matches_exact("a")); + assert!(nfa.matches_exact("aa")); + assert!(nfa.matches_exact("aaa")); + assert!(!nfa.matches_exact("")); + assert!(!nfa.matches_exact("b")); + } + + #[test] + fn set_to_nfa_question_test() { + // Test for a? (zero or one 'a') + // Question operator should match empty string (epsilon in suffix) + use crate::translation::node::{ + factors_set, nullability_set, prefix_set, suffix_set, Node, + }; + use crate::translation::operator::Operator; + + let tree = Node::Operation(Operator::Question, Box::new(Node::Terminal('a', 1)), None); + let prefix = prefix_set(&tree); + let suffix = suffix_set(&tree); + let factors = factors_set(&tree); + let nullability = nullability_set(&tree); + + let nfa = NFA::set_to_nfa(&prefix, &suffix, &factors, &nullability); + + // For a?, we expect to match 'a' and empty string + assert!(nfa.matches_exact("a")); + assert!(nfa.matches_exact("")); + assert!(!nfa.matches_exact("aa")); + } + + #[test] + fn set_to_nfa_plus_complex_test() { + // Test for (ab)+ pattern + use crate::translation::node::{ + factors_set, nullability_set, prefix_set, suffix_set, Node, + }; + use crate::translation::operator::Operator; + + let tree = Node::Operation( + Operator::Plus, + Box::new(Node::Operation( + Operator::Concat, + Box::new(Node::Terminal('a', 1)), + Some(Box::new(Node::Terminal('b', 2))), + )), + None, + ); + + let prefix = prefix_set(&tree); + let suffix = suffix_set(&tree); + let factors = factors_set(&tree); + let nullability = nullability_set(&tree); + + let nfa = NFA::set_to_nfa(&prefix, &suffix, &factors, &nullability); + + assert!(nfa.matches_exact("ab")); + assert!(nfa.matches_exact("abab")); + assert!(nfa.matches_exact("ababab")); + assert!(!nfa.matches_exact("")); + assert!(!nfa.matches_exact("a")); + assert!(!nfa.matches_exact("b")); + assert!(!nfa.matches_exact("ba")); + } + + #[test] + fn test_operator_combinations_plus_question() { + // Test a+b? (one or more 'a' followed by zero or one 'b') + use crate::translation::node::{ + factors_set, nullability_set, prefix_set, suffix_set, Node, + }; + use crate::translation::operator::Operator; + + let tree = Node::Operation( + Operator::Concat, + Box::new(Node::Operation( + Operator::Plus, + Box::new(Node::Terminal('a', 1)), + None, + )), + Some(Box::new(Node::Operation( + Operator::Question, + Box::new(Node::Terminal('b', 2)), + None, + ))), + ); + + let prefix = prefix_set(&tree); + let suffix = suffix_set(&tree); + let factors = factors_set(&tree); + let nullability = nullability_set(&tree); + + let nfa = NFA::set_to_nfa(&prefix, &suffix, &factors, &nullability); + + assert!(nfa.matches_exact("a")); + assert!(nfa.matches_exact("ab")); + assert!(nfa.matches_exact("aa")); + assert!(nfa.matches_exact("aab")); + assert!(!nfa.matches_exact("abb")); + assert!(!nfa.matches_exact("")); + assert!(!nfa.matches_exact("b")); + } + + #[test] + fn test_operator_combinations_star_plus() { + // Test a*b+ (zero or more 'a' followed by one or more 'b') + use crate::translation::node::{ + factors_set, nullability_set, prefix_set, suffix_set, Node, + }; + use crate::translation::operator::Operator; + + let tree = Node::Operation( + Operator::Concat, + Box::new(Node::Operation( + Operator::Production, + Box::new(Node::Terminal('a', 1)), + None, + )), + Some(Box::new(Node::Operation( + Operator::Plus, + Box::new(Node::Terminal('b', 2)), + None, + ))), + ); + + let prefix = prefix_set(&tree); + let suffix = suffix_set(&tree); + let factors = factors_set(&tree); + let nullability = nullability_set(&tree); + + let nfa = NFA::set_to_nfa(&prefix, &suffix, &factors, &nullability); + + assert!(nfa.matches_exact("b")); + assert!(nfa.matches_exact("ab")); + assert!(nfa.matches_exact("aab")); + assert!(nfa.matches_exact("bb")); + assert!(nfa.matches_exact("abb")); + assert!(!nfa.matches_exact("")); + assert!(!nfa.matches_exact("a")); + assert!(!nfa.matches_exact("aa")); + } + + #[test] + fn test_operator_combinations_question_star() { + // Test a?b* (zero or one 'a' followed by zero or more 'b') + use crate::translation::node::{ + factors_set, nullability_set, prefix_set, suffix_set, Node, + }; + use crate::translation::operator::Operator; + + let tree = Node::Operation( + Operator::Concat, + Box::new(Node::Operation( + Operator::Question, + Box::new(Node::Terminal('a', 1)), + None, + )), + Some(Box::new(Node::Operation( + Operator::Production, + Box::new(Node::Terminal('b', 2)), + None, + ))), + ); + + let prefix = prefix_set(&tree); + let suffix = suffix_set(&tree); + let factors = factors_set(&tree); + let nullability = nullability_set(&tree); + + let nfa = NFA::set_to_nfa(&prefix, &suffix, &factors, &nullability); + + assert!(nfa.matches_exact("")); + assert!(nfa.matches_exact("a")); + assert!(nfa.matches_exact("b")); + assert!(nfa.matches_exact("ab")); + assert!(nfa.matches_exact("abb")); + assert!(nfa.matches_exact("bb")); + assert!(!nfa.matches_exact("aa")); + assert!(!nfa.matches_exact("aab")); + } + + #[test] + fn test_or_with_plus_and_question() { + // Test a+|b? (one or more 'a' OR zero or one 'b') + use crate::translation::node::{ + factors_set, nullability_set, prefix_set, suffix_set, Node, + }; + use crate::translation::operator::Operator; + + let tree = Node::Operation( + Operator::Or, + Box::new(Node::Operation( + Operator::Plus, + Box::new(Node::Terminal('a', 1)), + None, + )), + Some(Box::new(Node::Operation( + Operator::Question, + Box::new(Node::Terminal('b', 2)), + None, + ))), + ); + + let prefix = prefix_set(&tree); + let suffix = suffix_set(&tree); + let factors = factors_set(&tree); + let nullability = nullability_set(&tree); + + let nfa = NFA::set_to_nfa(&prefix, &suffix, &factors, &nullability); + + assert!(nfa.matches_exact("")); + assert!(nfa.matches_exact("a")); + assert!(nfa.matches_exact("aa")); + assert!(nfa.matches_exact("b")); + assert!(!nfa.matches_exact("ab")); + assert!(!nfa.matches_exact("bb")); + } + + #[test] + fn test_nested_operators() { + // Test (a+)* (zero or more of one-or-more 'a') + use crate::translation::node::{ + factors_set, nullability_set, prefix_set, suffix_set, Node, + }; + use crate::translation::operator::Operator; + + let tree = Node::Operation( + Operator::Production, + Box::new(Node::Operation( + Operator::Plus, + Box::new(Node::Terminal('a', 1)), + None, + )), + None, + ); + + let prefix = prefix_set(&tree); + let suffix = suffix_set(&tree); + let factors = factors_set(&tree); + let nullability = nullability_set(&tree); + + let nfa = NFA::set_to_nfa(&prefix, &suffix, &factors, &nullability); + + assert!(nfa.matches_exact("")); + assert!(nfa.matches_exact("a")); + assert!(nfa.matches_exact("aa")); + assert!(nfa.matches_exact("aaa")); + assert!(!nfa.matches_exact("b")); + } + + #[test] + fn test_complex_combination() { + // Test (a|b)+c? (one or more of 'a' or 'b', followed by zero or one 'c') + use crate::translation::node::{ + factors_set, nullability_set, prefix_set, suffix_set, Node, + }; + use crate::translation::operator::Operator; + + let tree = Node::Operation( + Operator::Concat, + Box::new(Node::Operation( + Operator::Plus, + Box::new(Node::Operation( + Operator::Or, + Box::new(Node::Terminal('a', 1)), + Some(Box::new(Node::Terminal('b', 2))), + )), + None, + )), + Some(Box::new(Node::Operation( + Operator::Question, + Box::new(Node::Terminal('c', 3)), + None, + ))), + ); + + let prefix = prefix_set(&tree); + let suffix = suffix_set(&tree); + let factors = factors_set(&tree); + let nullability = nullability_set(&tree); + + let nfa = NFA::set_to_nfa(&prefix, &suffix, &factors, &nullability); + + assert!(nfa.matches_exact("a")); + assert!(nfa.matches_exact("b")); + assert!(nfa.matches_exact("ac")); + assert!(nfa.matches_exact("bc")); + assert!(nfa.matches_exact("abc")); + assert!(nfa.matches_exact("aac")); + assert!(!nfa.matches_exact("")); + assert!(!nfa.matches_exact("c")); + assert!(!nfa.matches_exact("acc")); } } diff --git a/gregex-logic/src/translation/mod.rs b/gregex-logic/src/translation/mod.rs index 25c54a6..74fbb31 100644 --- a/gregex-logic/src/translation/mod.rs +++ b/gregex-logic/src/translation/mod.rs @@ -1,4 +1,4 @@ //! Contains the translation submodules necessary to translate the raw regex to a NFA. +pub mod node; pub mod operator; pub mod setterminal; -pub mod node; \ No newline at end of file diff --git a/gregex-logic/src/translation/node.rs b/gregex-logic/src/translation/node.rs index 47fc22b..09f3011 100644 --- a/gregex-logic/src/translation/node.rs +++ b/gregex-logic/src/translation/node.rs @@ -26,14 +26,26 @@ pub fn nullability_set(regex_tree: &Node) -> HashSet { set.extend(nullability_set(right.as_ref().unwrap())); } Operator::Concat => { - set.extend(nullability_set(left)); + let left_set = nullability_set(left); let right_set = nullability_set(right.as_ref().unwrap()); - set.extend(right_set); + // Concat is nullable only if both left and right are nullable + if left_set.contains(&SetTerminal::Epsilon) + && right_set.contains(&SetTerminal::Epsilon) + { + set.insert(SetTerminal::Epsilon); + } else { + set.insert(SetTerminal::Empty); + } } Operator::Production => { set.insert(SetTerminal::Epsilon); } - _ => todo!(), + Operator::Plus => { + set.insert(SetTerminal::Empty); + } + Operator::Question => { + set.insert(SetTerminal::Epsilon); + } }, } set @@ -68,7 +80,14 @@ pub fn prefix_set(regex_tree: &Node) -> HashSet { let left_set = prefix_set(left); set = left_set; } - _ => todo!(), + Operator::Plus => { + let left_set = prefix_set(left); + set = left_set; + } + Operator::Question => { + let left_set = prefix_set(left); + set = left_set; + } }, } set @@ -103,14 +122,21 @@ pub fn suffix_set(regex_tree: &Node) -> HashSet { let left_set = suffix_set(left); set = left_set; } - _ => todo!(), + Operator::Plus => { + let left_set = suffix_set(left); + set = left_set; + } + Operator::Question => { + let left_set = suffix_set(left); + set = left_set; + } }, } set } /// The `factors_set` function returns the set of [SetTerminal] that are factors of a regular expression tree. -/// +/// /// Factors in this scenario mean the set of terminals that can be produced by the regular expression. pub fn factors_set(regex_tree: &Node) -> HashSet { let mut set = HashSet::new(); @@ -150,7 +176,22 @@ pub fn factors_set(regex_tree: &Node) -> HashSet { } } } - _ => todo!(), + Operator::Plus => { + let left_set = factors_set(left); + let suffix_set = suffix_set(left); + let prefix_set = prefix_set(left); + set.extend(left_set); + + for i in suffix_set { + for j in &prefix_set { + set.insert(i.product(j)); + } + } + } + Operator::Question => { + let left_set = factors_set(left); + set.extend(left_set); + } }, } @@ -212,6 +253,26 @@ mod tests { assert_eq!(set, test_set); } + #[test] + fn nullability_set_test_plus() { + let tree = Node::Operation(Operator::Plus, Box::new(Node::Terminal('a', 1)), None); + + let set = nullability_set(&tree); + let mut test_set = HashSet::new(); + test_set.insert(SetTerminal::Empty); + assert_eq!(set, test_set); + } + + #[test] + fn nullability_set_test_question() { + let tree = Node::Operation(Operator::Question, Box::new(Node::Terminal('a', 1)), None); + + let set = nullability_set(&tree); + let mut test_set = HashSet::new(); + test_set.insert(SetTerminal::Epsilon); + assert_eq!(set, test_set); + } + #[test] fn prefix_set_test_or() { let tree = Node::Operation( @@ -261,6 +322,26 @@ mod tests { assert_eq!(set, test_set); } + #[test] + fn prefix_set_test_plus() { + let tree = Node::Operation(Operator::Plus, Box::new(Node::Terminal('a', 1)), None); + + let set = prefix_set(&tree); + let mut test_set = HashSet::new(); + test_set.insert(SetTerminal::SingleElement('a', 1)); + assert_eq!(set, test_set); + } + + #[test] + fn prefix_set_test_question() { + let tree = Node::Operation(Operator::Question, Box::new(Node::Terminal('a', 1)), None); + + let set = prefix_set(&tree); + let mut test_set = HashSet::new(); + test_set.insert(SetTerminal::SingleElement('a', 1)); + assert_eq!(set, test_set); + } + #[test] fn prefix_set_test_complete() { // Linearized regex: (a(ab)*)* + (ba)* @@ -350,6 +431,26 @@ mod tests { assert_eq!(set, test_set); } + #[test] + fn suffix_set_test_plus() { + let tree = Node::Operation(Operator::Plus, Box::new(Node::Terminal('a', 1)), None); + + let set = suffix_set(&tree); + let mut test_set = HashSet::new(); + test_set.insert(SetTerminal::SingleElement('a', 1)); + assert_eq!(set, test_set); + } + + #[test] + fn suffix_set_test_question() { + let tree = Node::Operation(Operator::Question, Box::new(Node::Terminal('a', 1)), None); + + let set = suffix_set(&tree); + let mut test_set = HashSet::new(); + test_set.insert(SetTerminal::SingleElement('a', 1)); + assert_eq!(set, test_set); + } + #[test] fn suffix_set_test_complete() { // Linearized regex: (a(ab)*)* + (ba)* @@ -473,4 +574,63 @@ mod tests { test_set.insert(SetTerminal::DoubleElement('a', 5, 'b', 4)); assert_eq!(set, test_set); } + + #[test] + fn factors_set_test_plus() { + let tree = Node::Operation(Operator::Plus, Box::new(Node::Terminal('a', 1)), None); + + let set = factors_set(&tree); + let mut test_set = HashSet::new(); + test_set.insert(SetTerminal::DoubleElement('a', 1, 'a', 1)); + assert_eq!(set, test_set); + } + + #[test] + fn factors_set_test_question() { + let tree = Node::Operation(Operator::Question, Box::new(Node::Terminal('a', 1)), None); + + let set = factors_set(&tree); + let mut test_set = HashSet::new(); + test_set.insert(SetTerminal::Empty); + assert_eq!(set, test_set); + } + + #[test] + fn factors_set_test_plus_complex() { + // Linearized regex: (ab)+ + let tree = Node::Operation( + Operator::Plus, + Box::new(Node::Operation( + Operator::Concat, + Box::new(Node::Terminal('a', 1)), + Some(Box::new(Node::Terminal('b', 2))), + )), + None, + ); + + let set = factors_set(&tree); + let mut test_set = HashSet::new(); + test_set.insert(SetTerminal::DoubleElement('a', 1, 'b', 2)); + test_set.insert(SetTerminal::DoubleElement('b', 2, 'a', 1)); + assert_eq!(set, test_set); + } + + #[test] + fn factors_set_test_question_complex() { + // Linearized regex: (ab)? + let tree = Node::Operation( + Operator::Question, + Box::new(Node::Operation( + Operator::Concat, + Box::new(Node::Terminal('a', 1)), + Some(Box::new(Node::Terminal('b', 2))), + )), + None, + ); + + let set = factors_set(&tree); + let mut test_set = HashSet::new(); + test_set.insert(SetTerminal::DoubleElement('a', 1, 'b', 2)); + assert_eq!(set, test_set); + } } diff --git a/gregex-logic/src/translation/operator.rs b/gregex-logic/src/translation/operator.rs index 2c54ea8..faf501d 100644 --- a/gregex-logic/src/translation/operator.rs +++ b/gregex-logic/src/translation/operator.rs @@ -8,4 +8,4 @@ pub enum Operator { Production, Plus, Question, -} \ No newline at end of file +} diff --git a/gregex-logic/src/translation/setterminal.rs b/gregex-logic/src/translation/setterminal.rs index 3bdd181..d27845f 100644 --- a/gregex-logic/src/translation/setterminal.rs +++ b/gregex-logic/src/translation/setterminal.rs @@ -92,4 +92,4 @@ mod tests { assert_eq!(d.product(&a), SetTerminal::Empty); assert_eq!(b.product(&d), SetTerminal::Empty); } -} \ No newline at end of file +} diff --git a/gregex-macros/Cargo.toml b/gregex-macros/Cargo.toml index 9f6751b..295ce3a 100644 --- a/gregex-macros/Cargo.toml +++ b/gregex-macros/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "gregex-macros" -version = "0.1.1" +version = "0.2.0" edition = "2021" authors = ["Saphereye "] license = "MIT" -description = "Proc-Macros for the gregex crate" +description = "Proc-Macros for the gregex crate with string parsing support" keywords = ["regex", "nfa", "automata"] categories = ["text-processing"] documentation = "https://docs.rs/gregex-macros" @@ -18,7 +18,7 @@ readme = "README.md" repository = "https://github.com/Saphereye/gregex/gregex-macros" [dependencies] -gregex-logic = { path = "../gregex-logic", version = "0.1.0" } +gregex-logic = { path = "../gregex-logic", version = "0.2.0" } syn = { version = "1.0", features = ["full"] } quote = "1.0" proc-macro2 = "1.0" diff --git a/gregex-macros/README.md b/gregex-macros/README.md index 3c37ea7..108e1a0 100644 --- a/gregex-macros/README.md +++ b/gregex-macros/README.md @@ -1,19 +1,131 @@ # Gregex Macros -Contains the macro interface for all the gregex function. -Without these, users would have to rely on function that generate the Node tree. To explain this we can first look at an example. +Procedural macros for compile-time regular expression parsing and NFA construction. -Let's take the regex `a*`. +## Overview -The Node tree in our case would be, -```rust -Node::Operation( - Operator::Production, - Box::new(Node::Terminal('a', 0u32)), - None, +`gregex-macros` provides the `regex!` macro that parses regex pattern strings at compile time and generates optimized NFA construction code. This eliminates runtime parsing overhead and enables compile-time validation of regex patterns. + +## The `regex!` Macro + +### Basic Usage + +```rust,ignore +use gregex::regex; + +let pattern = regex!("a+b*"); +``` + +### Compile-Time Construction + +The macro parses the regex string during compilation and directly embeds the resulting NFA data structure. For example, `regex!("abc")` expands to: + +```rust,ignore +NFA::from_raw( + vec![2, 3, 4], // States + vec![4], // Accept states + vec![ // Transitions + ((0, 'a'), vec![2]), + ((2, 'b'), vec![3]), + ((3, 'c'), vec![4]), + ] ) ``` -Although we can wrap this in a function or a `macro_rules!` macro, the generated code is quite bloated. We can do the hard work during compilation, i.e. converting our regex to the end NFA. +This means zero runtime overhead for pattern compilation. + +## Parser Implementation + +The macro uses a Pratt parser (recursive descent with operator precedence) to handle regex syntax: + +### Supported Syntax + +- **Literals**: `a`, `b`, `c`, etc. +- **Concatenation**: `ab` (implicit) +- **Alternation**: `a|b` (OR operator) +- **Kleene Star**: `a*` (zero or more) +- **Plus**: `a+` (one or more) +- **Question**: `a?` (zero or one) +- **Grouping**: `(...)` for precedence control + +### Operator Precedence + +From highest to lowest: +1. Postfix operators: `*`, `+`, `?` +2. Concatenation (implicit) +3. Alternation: `|` + +### Examples + +```rust,ignore +regex!("(a|b)+") // One or more of 'a' or 'b' +regex!("a+b?c*") // At least one 'a', optional 'b', zero or more 'c' +regex!("(ab|cd)*") // Zero or more repetitions of "ab" or "cd" +``` + +## Implementation Details + +### Lexical Analysis + +The parser tokenizes the input string into: +- Character literals +- Operators (`*`, `+`, `?`, `|`) +- Parentheses (`(`, `)`) +- End-of-file marker + +### Syntax Tree Generation + +Tokens are parsed into an abstract syntax tree (AST) using the `Node` type from `gregex-logic`: + +```rust,ignore +pub enum Node { + Terminal(char, u32), + Operation(Operator, Box, Option>), +} +``` + +### NFA Generation + +The AST is processed using Glushkov's algorithm to compute: +1. **Nullability set**: Whether the pattern matches empty string +2. **Prefix set**: Initial characters that can start a match +3. **Suffix set**: Final characters that can end a match +4. **Factors set**: Valid character-to-character transitions + +These sets are used to construct the final NFA. + +## Error Handling + +The parser provides compile-time error messages for invalid syntax: + +```rust,ignore +regex!("(abc") // Error: Unmatched parenthesis +regex!("a**") // Error: Unexpected operator +``` + +## Performance Characteristics + +- **Compile time**: O(n) where n is pattern length +- **Generated code**: Direct NFA data structure (no runtime parsing) +- **Type safety**: All errors caught at compile time + +## Limitations + +Current limitations of the parser: + +- No escape sequences (e.g., `\n`, `\t`) +- No character classes (e.g., `[a-z]`, `\d`, `\w`) +- No wildcards (`.`) +- No quantifiers (e.g., `{n,m}`) +- No anchors (e.g., `^`, `$`) + +These are planned for future versions. + +## Integration with gregex-logic + +This crate depends on `gregex-logic` for: +- AST node types (`Node`, `Operator`) +- Set computation functions (`nullability_set`, `prefix_set`, etc.) +- NFA construction logic -Currently converting to NFA is not possible, but this crate can convert it to the interstitial form of the Node Tree. \ No newline at end of file +The macro acts as a compile-time bridge, converting string patterns into executable NFA data structures. diff --git a/gregex-macros/src/lib.rs b/gregex-macros/src/lib.rs index 59e8212..dcc7cb2 100644 --- a/gregex-macros/src/lib.rs +++ b/gregex-macros/src/lib.rs @@ -1,173 +1,401 @@ -#[doc = include_str!("../README.md")] +//! # Gregex Macros +//! +//! Procedural macros for compile-time regular expression parsing and NFA construction. +//! +//! This crate provides the `regex!` macro that parses regex pattern strings at compile time +//! and generates optimized NFA construction code, eliminating runtime parsing overhead. +//! +//! For detailed documentation, see the [README](https://github.com/Saphereye/gregex/blob/master/gregex-macros/README.md). + +#![doc = include_str!("../README.md")] + #[cfg(not(doctest))] extern crate proc_macro; use proc_macro::TokenStream; use quote::quote; -use syn::{parse_macro_input, Expr, ExprLit, ExprMacro, Lit}; +use syn::{parse_macro_input, Expr, ExprLit, Lit}; -#[proc_macro] -pub fn dot(input: TokenStream) -> TokenStream { - let inputs = parse_macro_input!(input with syn::punctuated::Punctuated::::parse_terminated); - - let nodes = inputs.iter().map(|expr| { - match expr { - Expr::Macro(ExprMacro { mac, .. }) => { - // Handle procedural macro - quote! { #mac } +/// Internal regex parser module using Pratt parsing technique. +/// +/// This module implements a recursive descent parser with operator precedence +/// for parsing regex syntax strings at compile time. It supports: +/// - Literals (a, b, c, ...) +/// - Postfix operators (*, +, ?) +/// - Infix operator (|) +/// - Grouping with parentheses () +/// - Implicit concatenation +mod regex_parser { + use gregex_logic::translation::node::Node; + use gregex_logic::translation::operator::Operator; + use quote::quote; + use std::sync::atomic::Ordering; + + #[derive(Debug, Clone, PartialEq)] + enum Token { + Char(char), + Star, + Plus, + Question, + Pipe, + LParen, + RParen, + Eof, + } + + struct Lexer { + chars: Vec, + pos: usize, + } + + impl Lexer { + fn new(input: &str) -> Self { + Lexer { + chars: input.chars().collect(), + pos: 0, } - Expr::Lit(ExprLit { lit, .. }) => match lit { - Lit::Char(c) => { - let count = gregex_logic::TERMINAL_COUNT - .fetch_add(1, core::sync::atomic::Ordering::SeqCst); - quote! { - gregex_logic::translation::node::Node::Terminal(#c, #count) - } - } - _ => panic!("Unsupported literal type"), - }, - _ => panic!("Unsupported input type"), } - }); - // Generate the code for concatenating nodes - let mut iter = nodes.into_iter(); - let first = iter.next().expect("The input is empty"); - let operations = iter.fold(first, |left, right| { - quote! { - gregex_logic::translation::node::Node::Operation( - gregex_logic::translation::operator::Operator::Concat, - Box::new(#left), - Some(Box::new(#right)) - ) + fn next(&mut self) -> Token { + if self.pos >= self.chars.len() { + return Token::Eof; + } + + let ch = self.chars[self.pos]; + self.pos += 1; + + match ch { + '*' => Token::Star, + '+' => Token::Plus, + '?' => Token::Question, + '|' => Token::Pipe, + '(' => Token::LParen, + ')' => Token::RParen, + c => Token::Char(c), + } + } + + fn peek(&self) -> Token { + if self.pos >= self.chars.len() { + return Token::Eof; + } + + let ch = self.chars[self.pos]; + match ch { + '*' => Token::Star, + '+' => Token::Plus, + '?' => Token::Question, + '|' => Token::Pipe, + '(' => Token::LParen, + ')' => Token::RParen, + c => Token::Char(c), + } } - }); + } - // Generate the final token stream - let gen = quote! { - #operations - }; + #[allow(dead_code)] + pub fn parse(input: &str) -> proc_macro2::TokenStream { + let mut lexer = Lexer::new(input); + parse_or(&mut lexer) + } - gen.into() -} + #[allow(dead_code)] + fn parse_or(lexer: &mut Lexer) -> proc_macro2::TokenStream { + let mut left = parse_concat(lexer); -#[proc_macro] -pub fn or(input: TokenStream) -> TokenStream { - let inputs = parse_macro_input!(input with syn::punctuated::Punctuated::::parse_terminated); - - let nodes = inputs.iter().map(|expr| { - match expr { - Expr::Macro(ExprMacro { mac, .. }) => { - // Handle procedural macro - quote! { #mac } + while lexer.peek() == Token::Pipe { + lexer.next(); // consume '|' + let right = parse_concat(lexer); + left = quote! { + gregex_logic::translation::node::Node::Operation( + gregex_logic::translation::operator::Operator::Or, + Box::new(#left), + Some(Box::new(#right)) + ) + }; + } + + left + } + + #[allow(dead_code)] + fn parse_concat(lexer: &mut Lexer) -> proc_macro2::TokenStream { + let mut nodes = Vec::new(); + + loop { + match lexer.peek() { + Token::Eof | Token::RParen | Token::Pipe => break, + _ => nodes.push(parse_postfix(lexer)), } - Expr::Lit(ExprLit { lit, .. }) => match lit { - Lit::Char(c) => { - let count = gregex_logic::TERMINAL_COUNT - .fetch_add(1, core::sync::atomic::Ordering::SeqCst); - quote! { - gregex_logic::translation::node::Node::Terminal(#c, #count) - } - } - _ => panic!("Unsupported literal type"), - }, - _ => panic!("Unsupported input type"), } - }); - // Generate the code for concatenating nodes - let mut iter = nodes.into_iter(); - let first = iter.next().expect("The input is empty"); - let operations = iter.fold(first, |left, right| { - quote! { - gregex_logic::translation::node::Node::Operation( - gregex_logic::translation::operator::Operator::Or, - Box::new(#left), - Some(Box::new(#right)) - ) + if nodes.is_empty() { + panic!("Empty expression"); } - }); - // Generate the final token stream - let gen = quote! { - #operations - }; + let mut result = nodes[0].clone(); + for node in nodes.iter().skip(1) { + result = quote! { + gregex_logic::translation::node::Node::Operation( + gregex_logic::translation::operator::Operator::Concat, + Box::new(#result), + Some(Box::new(#node)) + ) + }; + } - gen.into() -} + result + } -#[proc_macro] -pub fn star(input: TokenStream) -> TokenStream { - let expr = parse_macro_input!(input as Expr); + #[allow(dead_code)] + fn parse_postfix(lexer: &mut Lexer) -> proc_macro2::TokenStream { + let mut node = parse_atom(lexer); - let node = match expr { - Expr::Macro(ExprMacro { mac, .. }) => { - // Handle procedural macro - quote! { #mac } + loop { + match lexer.peek() { + Token::Star => { + lexer.next(); + node = quote! { + gregex_logic::translation::node::Node::Operation( + gregex_logic::translation::operator::Operator::Production, + Box::new(#node), + None + ) + }; + } + Token::Plus => { + lexer.next(); + node = quote! { + gregex_logic::translation::node::Node::Operation( + gregex_logic::translation::operator::Operator::Plus, + Box::new(#node), + None + ) + }; + } + Token::Question => { + lexer.next(); + node = quote! { + gregex_logic::translation::node::Node::Operation( + gregex_logic::translation::operator::Operator::Question, + Box::new(#node), + None + ) + }; + } + _ => break, + } } - Expr::Lit(ExprLit { lit, .. }) => match lit { - Lit::Char(c) => { + + node + } + + #[allow(dead_code)] + fn parse_atom(lexer: &mut Lexer) -> proc_macro2::TokenStream { + match lexer.next() { + Token::Char(c) => { let count = gregex_logic::TERMINAL_COUNT.fetch_add(1, core::sync::atomic::Ordering::SeqCst); quote! { gregex_logic::translation::node::Node::Terminal(#c, #count) } } - _ => panic!("Unsupported literal type"), - }, - _ => panic!("Unsupported input type"), - }; - - // Generate the code for the star operation - let operation = quote! { - gregex_logic::translation::node::Node::Operation( - gregex_logic::translation::operator::Operator::Production, - Box::new(#node), - None - ) - }; + Token::LParen => { + let node = parse_or(lexer); + if lexer.next() != Token::RParen { + panic!("Expected closing parenthesis"); + } + node + } + _ => panic!("Unexpected token in atom"), + } + } + + /// Parse a regex string directly to a Node (for compile-time NFA construction) + pub fn parse_to_node(pattern: &str) -> Node { + let mut lexer = Lexer::new(pattern); + parse_or_impl(&mut lexer) + } + + fn parse_or_impl(lexer: &mut Lexer) -> Node { + let mut left = parse_concat_impl(lexer); + + while lexer.peek() == Token::Pipe { + lexer.next(); // consume | + let right = parse_concat_impl(lexer); + left = Node::Operation(Operator::Or, Box::new(left), Some(Box::new(right))); + } - // Generate the final token stream - let gen = quote! { - #operation - }; + left + } - gen.into() + fn parse_concat_impl(lexer: &mut Lexer) -> Node { + let mut nodes = Vec::new(); + + loop { + match lexer.peek() { + Token::Char(_) | Token::LParen => { + nodes.push(parse_postfix_impl(lexer)); + } + _ => break, + } + } + + if nodes.is_empty() { + panic!("Empty expression"); + } + + if nodes.len() == 1 { + return nodes.into_iter().next().unwrap(); + } + + let mut iter = nodes.into_iter(); + let mut result = iter.next().unwrap(); + for node in iter { + result = Node::Operation(Operator::Concat, Box::new(result), Some(Box::new(node))); + } + result + } + + fn parse_postfix_impl(lexer: &mut Lexer) -> Node { + let mut node = parse_atom_impl(lexer); + + loop { + match lexer.peek() { + Token::Star => { + lexer.next(); + node = Node::Operation(Operator::Production, Box::new(node), None); + } + Token::Plus => { + lexer.next(); + node = Node::Operation(Operator::Plus, Box::new(node), None); + } + Token::Question => { + lexer.next(); + node = Node::Operation(Operator::Question, Box::new(node), None); + } + _ => break, + } + } + + node + } + + fn parse_atom_impl(lexer: &mut Lexer) -> Node { + match lexer.next() { + Token::Char(c) => { + let count = gregex_logic::TERMINAL_COUNT.fetch_add(1, Ordering::SeqCst); + Node::Terminal(c, count) + } + Token::LParen => { + let node = parse_or_impl(lexer); + match lexer.next() { + Token::RParen => node, + _ => panic!("Expected closing parenthesis"), + } + } + _ => panic!("Unexpected token in atom"), + } + } } +/// Main regex macro that builds an NFA from a pattern. +/// +/// Supports two modes: +/// 1. **String parsing (recommended)**: Parse regex syntax strings directly like `regex!("(a|b)+")` +/// 2. **Character literals**: Simple single-character patterns like `regex!('a')` +/// +/// String syntax supports: literals, `ab` (concat), `a|b` (or), `a*` (star), `a+` (plus), `a?` (question), `(...)` (grouping) +/// +/// **Note**: The macro compiles the NFA at compile-time and embeds it directly, resulting in +/// zero runtime NFA construction overhead. +/// +/// # Examples +/// +/// ```ignore +/// use gregex::*; +/// +/// // String syntax (recommended) +/// let pattern = regex!("a+b*"); +/// assert!(pattern.is_match("aaabbb")); +/// +/// // Single character +/// let pattern = regex!('x'); +/// assert!(pattern.matches_exact("x")); +/// ``` #[proc_macro] pub fn regex(input: TokenStream) -> TokenStream { let expr = parse_macro_input!(input as Expr); // Convert the input expression into a Node structure - let node = match expr { - Expr::Macro(ExprMacro { mac, .. }) => { - // Handle procedural macro - quote! { #mac } - } + match expr { Expr::Lit(ExprLit { lit, .. }) => match lit { - Lit::Char(c) => { - let count = - gregex_logic::TERMINAL_COUNT.fetch_add(1, core::sync::atomic::Ordering::SeqCst); - quote! { - gregex_logic::translation::node::Node::Terminal(#c, #count) - } - } - _ => panic!("Unsupported literal type"), + Lit::Char(c) => build_nfa_for_char(c.value()), + Lit::Str(s) => build_nfa_for_string(&s.value()), + _ => panic!("regex! only supports string literals and character literals. Use string syntax like regex!(\"a+b*\") instead of macro expressions."), }, - _ => panic!("Unsupported input type"), - }; + _ => panic!("regex! only supports string literals and character literals. Use string syntax like regex!(\"a+b*\") instead of macro expressions."), + } +} - // Generate the code to convert the Node into a Regex - let gen = quote! { - { - let regex_tree = #node; - let prefix_set = gregex_logic::translation::node::prefix_set(®ex_tree); - let suffix_set = gregex_logic::translation::node::suffix_set(®ex_tree); - let factors_set = gregex_logic::translation::node::factors_set(®ex_tree); - gregex_logic::nfa::NFA::set_to_nfa(&prefix_set, &suffix_set, &factors_set) - } - }; +/// Helper function to build NFA at compile time for a single character +fn build_nfa_for_char(c: char) -> TokenStream { + use gregex_logic::translation::node::Node; + use gregex_logic::TERMINAL_COUNT; + + // Build the node tree at compile time + let count = TERMINAL_COUNT.fetch_add(1, std::sync::atomic::Ordering::SeqCst); + let node = Node::Terminal(c, count); + + // Convert to NFA at compile time + build_nfa_from_node(&node) +} + +/// Helper function to build NFA at compile time for a regex string +fn build_nfa_for_string(pattern: &str) -> TokenStream { + // Parse the regex string into a Node tree at compile time + let node = regex_parser::parse_to_node(pattern); - gen.into() + // Convert to NFA at compile time + build_nfa_from_node(&node) +} + +/// Build NFA from a node at compile time and generate code for it +fn build_nfa_from_node(node: &gregex_logic::translation::node::Node) -> TokenStream { + use gregex_logic::nfa::NFA; + use gregex_logic::translation::node::{factors_set, nullability_set, prefix_set, suffix_set}; + + // Compute sets at compile time + let prefix = prefix_set(node); + let suffix = suffix_set(node); + let factors = factors_set(node); + let nullability = nullability_set(node); + + // Build NFA at compile time + let nfa = NFA::set_to_nfa(&prefix, &suffix, &factors, &nullability); + + // Serialize the NFA to code + serialize_nfa(&nfa) +} + +/// Serialize an NFA to Rust code +fn serialize_nfa(nfa: &gregex_logic::nfa::NFA) -> TokenStream { + let states = nfa.get_states(); + let accept = nfa.get_accept_states(); + let transitions = nfa.get_transitions(); + + // Convert transitions to token stream + let transition_items = transitions.iter().map(|((from, c), tos)| { + let to_vals = tos.iter().map(|&t| quote! { #t }); + quote! { ((#from, #c), vec![#(#to_vals),*]) } + }); + + quote! { + gregex_logic::nfa::NFA::from_raw( + vec![#(#states),*], + vec![#(#accept),*], + vec![#(#transition_items),*] + ) + } + .into() }