diff --git a/crates/diffguard-analytics/src/lib.rs b/crates/diffguard-analytics/src/lib.rs index 79dbd701..37db3bba 100644 --- a/crates/diffguard-analytics/src/lib.rs +++ b/crates/diffguard-analytics/src/lib.rs @@ -175,7 +175,11 @@ pub struct TrendRun { /// Stored as `u64` to avoid silent truncation for very large repositories /// (those with more than 2^32 - 1 unique files). pub files_scanned: u64, - pub lines_scanned: u32, + /// Number of distinct lines that were scanned. + /// + /// Stored as `u64` to avoid silent truncation for very large diffs + /// (those with more than 2^32 - 1 unique lines). + pub lines_scanned: u64, pub findings: u32, } diff --git a/crates/diffguard-analytics/tests/edge_cases.rs b/crates/diffguard-analytics/tests/edge_cases.rs new file mode 100644 index 00000000..058000bf --- /dev/null +++ b/crates/diffguard-analytics/tests/edge_cases.rs @@ -0,0 +1,296 @@ +//! Edge case tests for diffguard-analytics usize→u64 migration (issue #577) +//! +//! These tests verify that the migration from u32 to u64 for `findings`, +//! `run_count`, and `total_findings` correctly handles: +//! - Values exceeding u32::MAX +//! - Large accumulations +//! - Empty and single-run histories +//! - Delta calculations with large values + +use diffguard_analytics::*; +use diffguard_types::{DiffMeta, Scope, Severity, ToolMeta, Verdict, VerdictCounts, VerdictStatus}; +use std::u32; + +/// Creates a TrendRun with explicit findings count. +fn make_run(findings: u64, info: u32, warn: u32, error: u32, suppressed: u32) -> TrendRun { + TrendRun { + started_at: "2026-01-01T00:00:00Z".to_string(), + ended_at: "2026-01-01T00:00:01Z".to_string(), + duration_ms: 1000, + base: "origin/main".to_string(), + head: "HEAD".to_string(), + scope: Scope::Added, + status: VerdictStatus::Fail, + counts: VerdictCounts { + info, + warn, + error, + suppressed, + }, + files_scanned: 1, + lines_scanned: 100, + findings, + } +} + +#[test] +fn summarize_empty_history_yields_zero_totals() { + let history = TrendHistory { + schema: TREND_HISTORY_SCHEMA_V2.to_string(), + runs: vec![], + }; + let summary = summarize_trend_history(&history); + assert_eq!(summary.run_count, 0); + assert_eq!(summary.total_findings, 0); + assert!(summary.latest.is_none()); + assert!(summary.delta_from_previous.is_none()); +} + +#[test] +fn summarize_single_run_has_no_delta() { + let run = make_run(5, 1, 2, 3, 4); + let history = TrendHistory { + schema: TREND_HISTORY_SCHEMA_V2.to_string(), + runs: vec![run], + }; + let summary = summarize_trend_history(&history); + assert_eq!(summary.run_count, 1); + assert_eq!(summary.total_findings, 5); + assert!(summary.delta_from_previous.is_none()); +} + +#[test] +fn summarize_two_runs_reports_delta() { + let run1 = make_run(10, 1, 2, 3, 4); + let run2 = make_run(7, 0, 1, 2, 3); + let history = TrendHistory { + schema: TREND_HISTORY_SCHEMA_V2.to_string(), + runs: vec![run1, run2], + }; + let summary = summarize_trend_history(&history); + assert_eq!(summary.run_count, 2); + assert_eq!(summary.total_findings, 17); + let delta = summary.delta_from_previous.expect("should have delta"); + assert_eq!(delta.findings, -3); // 7 - 10 + assert_eq!(delta.warn, -1); // 1 - 2 + assert_eq!(delta.error, -1); // 2 - 3 +} + +#[test] +fn summarize_large_findings_value_no_truncation() { + // Test that findings > u32::MAX are preserved (this was the core bug) + let large_findings = u64::MAX / 2; // ~4 billion + let run = make_run(large_findings, 0, 0, 0, 0); + let history = TrendHistory { + schema: TREND_HISTORY_SCHEMA_V2.to_string(), + runs: vec![run], + }; + let summary = summarize_trend_history(&history); + assert_eq!(summary.total_findings, large_findings); + assert!(summary.total_findings > u32::MAX as u64); +} + +#[test] +fn summarize_accumulates_findings_beyond_u32_max() { + // Each run has ~2 billion findings; 3 runs = ~6 billion (exceeds u32::MAX) + let per_run = u32::MAX as u64 / 2; + let run1 = make_run(per_run, 0, 0, 0, 0); + let run2 = make_run(per_run, 0, 0, 0, 0); + let run3 = make_run(per_run, 0, 0, 0, 0); + let history = TrendHistory { + schema: TREND_HISTORY_SCHEMA_V2.to_string(), + runs: vec![run1, run2, run3], + }; + let summary = summarize_trend_history(&history); + let expected = per_run * 3; + assert_eq!(summary.total_findings, expected); + assert!(summary.total_findings > u32::MAX as u64); +} + +#[test] +fn summarize_run_count_beyond_u32_max() { + // Create many runs to exceed u32::MAX run_count + // We test that run_count is u64 and can hold large values + let run = make_run(1, 0, 0, 0, 0); + let history = TrendHistory { + schema: TREND_HISTORY_SCHEMA_V2.to_string(), + runs: vec![run], + }; + // Verify type can represent values beyond u32::MAX + let summary = summarize_trend_history(&history); + // Manually construct a summary with run_count > u32::MAX + let large_run_count = u64::MAX / 2; + let large_summary = TrendSummary { + run_count: large_run_count, + totals: VerdictCounts::default(), + total_findings: 0, + latest: None, + delta_from_previous: None, + }; + assert!(large_summary.run_count > u32::MAX as u64); + // Verify the actual run_count is correct type + assert!(summary.run_count >= 1); +} + +#[test] +fn trend_run_from_receipt_handles_large_finding_list() { + // Create a receipt with many findings + let many_findings: Vec = (0..1000) + .map(|i| diffguard_types::Finding { + rule_id: format!("rule.{}", i), + severity: Severity::Error, + message: format!("error {}", i), + path: format!("src/file{}.rs", i), + line: i as u32, + column: Some(1), + match_text: format!("match{}", i), + snippet: format!("code {};", i), + }) + .collect(); + + let receipt = diffguard_types::CheckReceipt { + schema: diffguard_types::CHECK_SCHEMA_V1.to_string(), + tool: ToolMeta { + name: "diffguard".to_string(), + version: "0.2.0".to_string(), + }, + diff: DiffMeta { + base: "origin/main".to_string(), + head: "HEAD".to_string(), + context_lines: 0, + scope: Scope::Added, + files_scanned: 1000, + lines_scanned: 10000, + }, + findings: many_findings, + verdict: Verdict { + status: VerdictStatus::Fail, + counts: VerdictCounts { + info: 0, + warn: 0, + error: 1000, + suppressed: 0, + }, + reasons: vec![], + }, + timing: None, + }; + + let run = trend_run_from_receipt( + &receipt, + "2026-01-01T00:00:00Z", + "2026-01-01T00:00:01Z", + 1000, + ); + assert_eq!(run.findings, 1000); + assert!(run.findings > 0); +} + +#[test] +fn delta_calculation_with_large_findings_increase() { + // Test delta when findings increase significantly + let run1 = make_run(5, 0, 0, 0, 0); + let run2 = make_run(u64::MAX / 4, 0, 0, 0, 0); // Large increase + let history = TrendHistory { + schema: TREND_HISTORY_SCHEMA_V2.to_string(), + runs: vec![run1, run2], + }; + let summary = summarize_trend_history(&history); + let delta = summary.delta_from_previous.expect("should have delta"); + // delta should be positive and large + assert!(delta.findings > 0); + assert!(delta.findings > i64::from(u32::MAX)); +} + +#[test] +fn delta_calculation_with_large_findings_decrease() { + // Test delta when findings decrease significantly + let run1 = make_run(u64::MAX / 4, 0, 0, 0, 0); // Large value + let run2 = make_run(5, 0, 0, 0, 0); // Small value + let history = TrendHistory { + schema: TREND_HISTORY_SCHEMA_V2.to_string(), + runs: vec![run1, run2], + }; + let summary = summarize_trend_history(&history); + let delta = summary.delta_from_previous.expect("should have delta"); + // delta should be negative and large in magnitude + assert!(delta.findings < 0); + assert!(delta.findings < -(i64::from(u32::MAX))); +} + +#[test] +fn trend_history_default_uses_schema_v2() { + let history = TrendHistory::default(); + assert_eq!(history.schema, TREND_HISTORY_SCHEMA_V2); +} + +#[test] +fn normalize_trend_history_sets_schema_v2() { + let mut history = TrendHistory::default(); + history.schema = String::new(); + let normalized = normalize_trend_history(history); + assert_eq!(normalized.schema, TREND_HISTORY_SCHEMA_V2); +} + +#[test] +fn append_trend_run_normalizes_empty_schema_to_v2() { + let run = make_run(1, 0, 0, 0, 0); + let history = TrendHistory { + schema: String::new(), // Empty schema should be normalized to V2 + runs: vec![], + }; + let result = append_trend_run(history, run, None); + assert_eq!(result.schema, TREND_HISTORY_SCHEMA_V2); +} + +#[test] +fn saturating_add_does_not_wrap() { + // Verify that accumulating findings uses saturating arithmetic + let run1 = make_run(u64::MAX, 0, 0, 0, 0); + let run2 = make_run(u64::MAX, 0, 0, 0, 0); + let history = TrendHistory { + schema: TREND_HISTORY_SCHEMA_V2.to_string(), + runs: vec![run1, run2], + }; + let summary = summarize_trend_history(&history); + // Should saturate at u64::MAX, not wrap + assert_eq!(summary.total_findings, u64::MAX); +} + +#[test] +fn findings_field_is_u64_not_u32() { + let run = make_run(u64::MAX, 0, 0, 0, 0); + // This would fail to compile if findings were still u32 + let _large_value: u64 = run.findings; + assert_eq!(run.findings, u64::MAX); +} + +#[test] +fn run_count_field_is_u64_not_u32() { + // Create a summary with run_count > u32::MAX + let summary = TrendSummary { + run_count: u64::MAX, + totals: VerdictCounts::default(), + total_findings: 0, + latest: None, + delta_from_previous: None, + }; + // This would fail to compile if run_count were still u32 + let _large_value: u64 = summary.run_count; + assert_eq!(summary.run_count, u64::MAX); +} + +#[test] +fn total_findings_field_is_u64_not_u32() { + // Create a summary with total_findings > u32::MAX + let summary = TrendSummary { + run_count: 1, + totals: VerdictCounts::default(), + total_findings: u64::MAX, + latest: None, + delta_from_previous: None, + }; + // This would fail to compile if total_findings were still u32 + let _large_value: u64 = summary.total_findings; + assert_eq!(summary.total_findings, u64::MAX); +} diff --git a/crates/diffguard-core/src/render.rs b/crates/diffguard-core/src/render.rs index 075ea226..401a00ea 100644 --- a/crates/diffguard-core/src/render.rs +++ b/crates/diffguard-core/src/render.rs @@ -114,28 +114,6 @@ fn render_finding_row(f: &Finding) -> String { ) } -/// Escapes special Markdown characters in table cell content. -/// -/// Escapes pipe (`|`), backtick (`` ` ``), hash (`#`), asterisk (`*`), -/// underscore (`_`), open bracket (`[`), close bracket (`]`), and greater-than -/// (`>`) characters by prefixing with backslash. Also escapes CRLF (`\r\n`) -/// and LF (`\n`) line endings to prevent breaking the markdown table structure. -/// -/// These escapes are needed to prevent breaking the markdown table structure -/// and prevent unintended markdown formatting. -fn escape_md(s: &str) -> String { - s.replace('|', "\\|") - .replace('`', "\\`") - .replace('#', "\\#") - .replace('*', "\\*") - .replace('_', "\\_") - .replace('[', "\\[") - .replace(']', "\\]") - .replace('>', "\\>") - .replace('\r', "\\r") - .replace('\n', "\\n") -} - #[cfg(test)] mod tests { use super::*; diff --git a/crates/diffguard-core/tests/properties.rs b/crates/diffguard-core/tests/properties.rs index 1115c230..bf02a66c 100644 --- a/crates/diffguard-core/tests/properties.rs +++ b/crates/diffguard-core/tests/properties.rs @@ -134,7 +134,7 @@ fn arb_diff_meta() -> impl Strategy { 0u32..10, // context_lines arb_scope(), // scope 0u64..100, // files_scanned - 0u32..1000, // lines_scanned + 0u64..1000, // lines_scanned ) .prop_map( |(base, head, context_lines, scope, files_scanned, lines_scanned)| DiffMeta { @@ -643,7 +643,7 @@ mod unit_tests { context_lines: u32::MAX, scope: Scope::Added, files_scanned: u64::MAX, - lines_scanned: u32::MAX, + lines_scanned: u64::MAX, }, findings: vec![], verdict: Verdict { diff --git a/crates/diffguard-diff/src/unified.rs b/crates/diffguard-diff/src/unified.rs index e88c27d9..0e76c085 100644 --- a/crates/diffguard-diff/src/unified.rs +++ b/crates/diffguard-diff/src/unified.rs @@ -2,6 +2,12 @@ use std::path::Path; use diffguard_types::Scope; +/// Represents the kind of change a diff line represents. +/// +/// This is used to distinguish between: +/// - `Added`: a line that was added (exists only in the new version) +/// - `Changed`: an added line that directly replaces a removed line in the same hunk +/// - `Deleted`: a line that was removed (exists only in the old version) #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ChangeKind { Added, @@ -99,24 +105,38 @@ pub fn parse_rename_to(line: &str) -> Option { parse_rename_path(rest) } +/// Represents a single line extracted from a unified diff. +/// +/// Contains the file path, line number in the new version of the file, +/// the actual line content, and the kind of change it represents. #[derive(Debug, Clone, PartialEq, Eq)] pub struct DiffLine { + /// Path to the file this line belongs to (uses destination path for renames). pub path: String, + /// Line number in the new (post-change) version of the file. pub line: u32, + /// The actual line content (without the leading `+`, `-`, or ` ` marker). pub content: String, + /// The kind of change this line represents. pub kind: ChangeKind, } +/// Aggregate statistics about a parsed diff. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub struct DiffStats { + /// Number of files that had changes matching the requested scope. pub files: u32, - pub lines: u32, + /// Total number of lines matching the requested scope. + pub lines: u64, } +/// Errors that can occur when parsing a unified diff. #[derive(Debug, thiserror::Error)] pub enum DiffParseError { + /// The hunk header (`@@ ... @@` line) could not be parsed. #[error("malformed hunk header: {0}")] MalformedHunkHeader(String), + /// The diff contains more files or lines than can be represented in the result. #[error("diff stats overflow: {0}")] Overflow(String), } @@ -337,8 +357,8 @@ pub fn parse_unified_diff( let stats = DiffStats { files: u32::try_from(files.len()) .map_err(|_| DiffParseError::Overflow(format!("too many files (> {})", u32::MAX)))?, - lines: u32::try_from(out.len()) - .map_err(|_| DiffParseError::Overflow(format!("too many lines (> {})", u32::MAX)))?, + lines: u64::try_from(out.len()) + .map_err(|_| DiffParseError::Overflow(format!("too many lines (> {})", u64::MAX)))?, }; Ok((out, stats)) diff --git a/crates/diffguard-domain/src/evaluate.rs b/crates/diffguard-domain/src/evaluate.rs index d999bc12..8823a39d 100644 --- a/crates/diffguard-domain/src/evaluate.rs +++ b/crates/diffguard-domain/src/evaluate.rs @@ -26,7 +26,13 @@ pub struct Evaluation { /// The previous `u32` cast would silently truncate, producing incorrect /// (often zero) counts for very large codebases. pub files_scanned: u64, - pub lines_scanned: u32, + /// Number of distinct lines that were scanned. + /// + /// Changed from `u32` to `u64` to avoid silent truncation when scanning + /// repositories with more than 2^32 - 1 (≈4.3 billion) unique lines. + /// The previous `u32` cast would silently truncate, producing incorrect + /// counts for very large diffs. + pub lines_scanned: u64, /// Aggregated per-rule hit counts (deterministically sorted by rule ID). pub rule_hits: Vec, } @@ -102,7 +108,7 @@ pub fn evaluate_lines_with_overrides_and_language( .iter() .map(|line| line.path.clone()) .collect::>(); - let lines_scanned = u32::try_from(input_lines.len()).unwrap_or(u32::MAX); + let lines_scanned = u64::try_from(input_lines.len()).unwrap_or(u64::MAX); let mut current_file: Option = None; let mut current_lang = Language::Unknown; diff --git a/crates/diffguard-domain/tests/red_tests_work_9dbac498.rs b/crates/diffguard-domain/tests/red_tests_work_9dbac498.rs new file mode 100644 index 00000000..0ab0ed2e --- /dev/null +++ b/crates/diffguard-domain/tests/red_tests_work_9dbac498.rs @@ -0,0 +1,419 @@ +//! Red tests for work-9dbac498: Refactor `sanitize_line` via Mode-Handler Extraction +//! +//! These tests verify that the mode-handler extraction was done correctly. +//! The handlers are private methods on `Preprocessor`, so these tests MUST be placed +//! inside the `#[cfg(test)] mod tests` section of `preprocess.rs`. +//! +//! These tests will FAIL to compile because the handler methods don't exist yet. +//! After the code-builder extracts the handlers, these tests should compile and pass. +//! +//! ## What the Tests Verify +//! +//! 1. Each mode handler method exists with the correct signature +//! 2. Each handler correctly transitions between modes +//! 3. The output is the same length as input (masked chars replaced with spaces) +//! 4. Multi-line state continuity works correctly +//! +//! ## Handler Method Signatures (as per spec) +//! +//! Each handler returns `usize` (updated index `i`) and takes: +//! - `&mut self` +//! - `bytes: &[u8]` +//! - `out: &mut Vec` +//! - `i: usize` +//! - `len: usize` +//! +//! Handlers that are under 30 lines should have `#[inline]` or `#[inline(always)]`. + +use diffguard_domain::{Language, PreprocessOptions, Preprocessor}; + +/// Test that `handle_normal_mode` exists and handles Normal mode correctly. +/// +/// The Normal mode handler should: +/// 1. Detect string starts (Rust raw, Python triple-quoted, JS template, etc.) +/// 2. Detect comment starts (Hash, PHP, SQL, XML, C-style) +/// 3. Return the updated index +/// +/// This test will fail to compile until `handle_normal_mode` is implemented. +#[test] +fn test_handle_normal_mode_exists() { + let mut p = + Preprocessor::with_language(PreprocessOptions::comments_and_strings(), Language::Rust); + + // Create test input + let line = "fn main() { let x = 1; }"; + let bytes = line.as_bytes(); + let len = bytes.len(); + let mut out = vec![b' '; len]; + + // This will fail to compile until handle_normal_mode is implemented + let _i = p.handle_normal_mode(bytes, &mut out, 0, len); +} + +/// Test that `handle_line_comment_mode` exists and handles LineComment mode correctly. +/// +/// The LineComment handler should: +/// 1. Reset mode to Normal on any character (end of line comment) +/// 2. Return i + 1 +/// +/// This test will fail to compile until `handle_line_comment_mode` is implemented. +#[test] +fn test_handle_line_comment_mode_exists() { + let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Rust); + + // Set up state: we're in LineComment mode + let line = "this is all a comment"; + let bytes = line.as_bytes(); + let len = bytes.len(); + let mut out = vec![b' '; len]; + + // This will fail to compile until handle_line_comment_mode is implemented + let _i = p.handle_line_comment_mode(bytes, &mut out, 0, len); +} + +/// Test that `handle_block_comment_mode` exists and handles nested block comments. +/// +/// The BlockComment handler should: +/// 1. Mask characters inside block comment (if masking enabled) +/// 2. Track nesting depth for languages that support it (Rust) +/// 3. Return to Normal mode when closing `*/` is found +/// 4. Return updated index +/// +/// This test will fail to compile until `handle_block_comment_mode` is implemented. +#[test] +fn test_handle_block_comment_mode_exists() { + let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Rust); + + // Set up state: we're in BlockComment mode at depth 1 + let line = "this is still in comment */"; + let bytes = line.as_bytes(); + let len = bytes.len(); + let mut out = vec![b' '; len]; + + // This will fail to compile until handle_block_comment_mode is implemented + let _i = p.handle_block_comment_mode(bytes, &mut out, 0, len); +} + +/// Test that `handle_normal_string_mode` exists and handles string escaping. +/// +/// The NormalString handler should: +/// 1. Mask characters inside string (if masking enabled) +/// 2. Handle escape sequences (backslash sets escaped = true) +/// 3. Return to Normal mode when closing quote is found +/// 4. Return updated index +/// +/// This test will fail to compile until `handle_normal_string_mode` is implemented. +#[test] +fn test_handle_normal_string_mode_exists() { + let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Rust); + + let line = "hello world"; + let bytes = line.as_bytes(); + let len = bytes.len(); + let mut out = line.as_bytes().to_vec(); + + // This will fail to compile until handle_normal_string_mode is implemented + let _i = p.handle_normal_string_mode(bytes, &mut out, 0, len); +} + +/// Test that `handle_raw_string_mode` exists and handles Rust raw strings. +/// +/// The RawString handler should: +/// 1. Mask characters inside raw string (if masking enabled) +/// 2. Look for closing delimiter: "### (where ### matches opening hashes) +/// 3. Return to Normal mode when closing delimiter is found +/// 4. Return updated index +/// +/// This test will fail to compile until `handle_raw_string_mode` is implemented. +#[test] +fn test_handle_raw_string_mode_exists() { + let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Rust); + + let line = "hello world"; + let bytes = line.as_bytes(); + let len = bytes.len(); + let mut out = line.as_bytes().to_vec(); + + // This will fail to compile until handle_raw_string_mode is implemented + let _i = p.handle_raw_string_mode(bytes, &mut out, 0, len); +} + +/// Test that `handle_char_mode` exists and handles C-style char literals. +/// +/// The Char handler should: +/// 1. Mask characters inside char literal (if masking enabled) +/// 2. Handle escape sequences +/// 3. Return to Normal mode when closing single quote is found +/// 4. Return updated index +/// +/// This test will fail to compile until `handle_char_mode` is implemented. +#[test] +fn test_handle_char_mode_exists() { + let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::C); + + let line = "'x'"; + let bytes = line.as_bytes(); + let len = bytes.len(); + let mut out = line.as_bytes().to_vec(); + + // This will fail to compile until handle_char_mode is implemented + let _i = p.handle_char_mode(bytes, &mut out, 0, len); +} + +/// Test that `handle_template_literal_mode` exists and handles JS template literals. +/// +/// The TemplateLiteral handler should: +/// 1. Mask characters inside template literal (if masking enabled) +/// 2. Handle escape sequences +/// 3. Return to Normal mode when closing backtick is found +/// 4. Return updated index +/// +/// This test will fail to compile until `handle_template_literal_mode` is implemented. +#[test] +fn test_handle_template_literal_mode_exists() { + let mut p = + Preprocessor::with_language(PreprocessOptions::strings_only(), Language::JavaScript); + + let line = "`hello world`"; + let bytes = line.as_bytes(); + let len = bytes.len(); + let mut out = line.as_bytes().to_vec(); + + // This will fail to compile until handle_template_literal_mode is implemented + let _i = p.handle_template_literal_mode(bytes, &mut out, 0, len); +} + +/// Test that `handle_triple_quoted_string_mode` exists and handles Python triple-quoted strings. +/// +/// The TripleQuotedString handler should: +/// 1. Mask characters inside triple-quoted string (if masking enabled) +/// 2. Handle escape sequences +/// 3. Return to Normal mode when closing triple quote is found +/// 4. Return updated index +/// +/// This test will fail to compile until `handle_triple_quoted_string_mode` is implemented. +#[test] +fn test_handle_triple_quoted_string_mode_exists() { + let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Python); + + let line = "\"\"\"hello world\"\"\""; // triple double-quoted string + let bytes = line.as_bytes(); + let len = bytes.len(); + let mut out = line.as_bytes().to_vec(); + + // This will fail to compile until handle_triple_quoted_string_mode is implemented + let _i = p.handle_triple_quoted_string_mode(bytes, &mut out, 0, len); +} + +/// Test that `handle_shell_literal_string_mode` exists and handles Shell single-quoted strings. +/// +/// The ShellLiteralString handler should: +/// 1. Mask characters inside single-quoted string (if masking enabled) +/// 2. NO escape handling (shell literal strings have no escapes) +/// 3. Return to Normal mode when closing single quote is found +/// 4. Return updated index +/// +/// This test will fail to compile until `handle_shell_literal_string_mode` is implemented. +#[test] +fn test_handle_shell_literal_string_mode_exists() { + let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Shell); + + let line = "'hello world'"; + let bytes = line.as_bytes(); + let len = bytes.len(); + let mut out = line.as_bytes().to_vec(); + + // This will fail to compile until handle_shell_literal_string_mode is implemented + let _i = p.handle_shell_literal_string_mode(bytes, &mut out, 0, len); +} + +/// Test that `handle_shell_ansi_c_string_mode` exists and handles Shell ANSI-C strings. +/// +/// The ShellAnsiCString handler should: +/// 1. Mask characters inside ANSI-C string (if masking enabled) +/// 2. Handle escape sequences (backslash) +/// 3. Return to Normal mode when closing single quote is found +/// 4. Return updated index +/// +/// This test will fail to compile until `handle_shell_ansi_c_string_mode` is implemented. +#[test] +fn test_handle_shell_ansi_c_string_mode_exists() { + let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Shell); + + let line = "$'hello world'"; + let bytes = line.as_bytes(); + let len = bytes.len(); + let mut out = line.as_bytes().to_vec(); + + // This will fail to compile until handle_shell_ansi_c_string_mode is implemented + let _i = p.handle_shell_ansi_c_string_mode(bytes, &mut out, 0, len); +} + +/// Test that `handle_xml_comment_mode` exists and handles XML/HTML comments. +/// +/// The XmlComment handler should: +/// 1. Mask characters inside XML comment (if masking enabled) +/// 2. Return to Normal mode when closing `-->` is found +/// 3. Return updated index +/// +/// This test will fail to compile until `handle_xml_comment_mode` is implemented. +#[test] +fn test_handle_xml_comment_mode_exists() { + let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Xml); + + let line = ""; + let bytes = line.as_bytes(); + let len = bytes.len(); + let mut out = line.as_bytes().to_vec(); + + // This will fail to compile until handle_xml_comment_mode is implemented + let _i = p.handle_xml_comment_mode(bytes, &mut out, 0, len); +} + +/// Test that `handle_normal_string_starts` exists and detects string openings in Normal mode. +/// +/// The Normal mode has two sub-handlers: +/// 1. `handle_normal_string_starts` - all string-start detection +/// 2. `handle_normal_comment_starts` - all comment-start detection +/// +/// This test verifies string-start detection works correctly. +/// +/// This test will fail to compile until `handle_normal_string_starts` is implemented. +#[test] +fn test_handle_normal_string_starts_exists() { + let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Rust); + + let line = "let s = r#hello#;"; + let bytes = line.as_bytes(); + let len = bytes.len(); + let mut out = line.as_bytes().to_vec(); + + // This will fail to compile until handle_normal_string_starts is implemented + let _i = p.handle_normal_string_starts(bytes, &mut out, 0, len); +} + +/// Test that `handle_normal_comment_starts` exists and detects comment openings in Normal mode. +/// +/// The Normal mode has two sub-handlers: +/// 1. `handle_normal_string_starts` - all string-start detection +/// 2. `handle_normal_comment_starts` - all comment-start detection +/// +/// This test verifies comment-start detection works correctly. +/// +/// This test will fail to compile until `handle_normal_comment_starts` is implemented. +#[test] +fn test_handle_normal_comment_starts_exists() { + let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Python); + + let line = "x = 1; this is a comment"; + let bytes = line.as_bytes(); + let len = bytes.len(); + let mut out = line.as_bytes().to_vec(); + + // This will fail to compile until handle_normal_comment_starts is implemented + let _i = p.handle_normal_comment_starts(bytes, &mut out, 0, len); +} + +// ============================================================================ +// Behavioral Tests - These verify the extracted handlers produce correct output +// ============================================================================ + +/// Test that output length is preserved (masked chars replaced with spaces). +/// +/// This is a key invariant: sanitized string must be the same byte length as input. +#[test] +fn test_sanitize_line_preserves_output_length() { + let mut p = + Preprocessor::with_language(PreprocessOptions::comments_and_strings(), Language::Rust); + + let line = "fn main() { let s = \"hello\"; }"; + let result = p.sanitize_line(line); + + assert_eq!( + result.len(), + line.len(), + "sanitize_line output must have same length as the input" + ); +} + +/// Test that multiline state continuity works for block comments. +#[test] +fn test_block_comment_multiline_state_continuity() { + let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Rust); + + // Line 1: starts block comment + let line1 = "/* start of block"; + let result1 = p.sanitize_line(line1); + assert!(result1.contains(' '), "block comment should be masked"); + assert!(matches!( + p.mode, + diffguard_domain::preprocess::Mode::BlockComment { .. } + )); + + // Line 2: inside block comment + let line2 = "middle of block"; + let result2 = p.sanitize_line(line2); + assert!( + result2.chars().all(|c| c == ' '), + "block comment content should be all spaces" + ); + assert!(matches!( + p.mode, + diffguard_domain::preprocess::Mode::BlockComment { .. } + )); + + // Line 3: ends block comment + let line3 = "end of block */"; + let result3 = p.sanitize_line(line3); + assert!(matches!(p.mode, diffguard_domain::preprocess::Mode::Normal)); +} + +/// Test that multiline state continuity works for triple-quoted strings. +#[test] +fn test_triple_quoted_string_multiline_state_continuity() { + let mut p = Preprocessor::with_language(PreprocessOptions::strings_only(), Language::Python); + + // Line 1: starts triple-quoted string + let line1 = "\"\"\"start of string"; + let result1 = p.sanitize_line(line1); + assert!(result1.contains(' '), "string should be masked"); + assert!(matches!( + p.mode, + diffguard_domain::preprocess::Mode::TripleQuotedString { .. } + )); + + // Line 2: inside string + let line2 = "middle of string"; + let result2 = p.sanitize_line(line2); + assert!( + result2.chars().all(|c| c == ' '), + "string content should be all spaces" + ); + assert!(matches!( + p.mode, + diffguard_domain::preprocess::Mode::TripleQuotedString { .. } + )); + + // Line 3: ends string + let line3 = "\"\"\"end\"\"\""; + let result3 = p.sanitize_line(line3); + assert!(matches!(p.mode, diffguard_domain::preprocess::Mode::Normal)); +} + +/// Test that LineComment mode resets to Normal at end of line. +#[test] +fn test_line_comment_resets_at_eol() { + let mut p = Preprocessor::with_language(PreprocessOptions::comments_only(), Language::Python); + + // Line with hash comment + let line = "x = 1; this is a comment"; + let _result = p.sanitize_line(line); + + // After processing, mode should be Normal (reset at EOL) + assert!(matches!(p.mode, diffguard_domain::preprocess::Mode::Normal)); + + // Next line should not be in comment mode + let line2 = "y = 2; not a comment"; + let result2 = p.sanitize_line(line2); + assert!(result2.contains("y = 2")); +} diff --git a/crates/diffguard-types/src/lib.rs b/crates/diffguard-types/src/lib.rs index b634c06b..d620ce70 100644 --- a/crates/diffguard-types/src/lib.rs +++ b/crates/diffguard-types/src/lib.rs @@ -133,7 +133,11 @@ pub struct DiffMeta { /// Stored as `u64` to avoid silent truncation for very large repositories /// (those with more than 2^32 - 1 unique files). pub files_scanned: u64, - pub lines_scanned: u32, + /// Number of distinct lines that were scanned. + /// + /// Stored as `u64` to avoid silent truncation for very large diffs + /// (those with more than 2^32 - 1 unique lines). + pub lines_scanned: u64, } /// A single rule match within a scoped file. @@ -395,11 +399,11 @@ pub struct RuleTestCase { /// Whether the rule should match this input. pub should_match: bool, - /// Optional: override ignore_comments for this test case. + /// Optional: override `ignore_comments` for this test case. #[serde(default, skip_serializing_if = "Option::is_none")] pub ignore_comments: Option, - /// Optional: override ignore_strings for this test case. + /// Optional: override `ignore_strings` for this test case. #[serde(default, skip_serializing_if = "Option::is_none")] pub ignore_strings: Option, @@ -426,6 +430,29 @@ fn is_match_mode_any(mode: &MatchMode) -> bool { matches!(mode, MatchMode::Any) } +// Utility for markdown escaping, used by rendering crates — kept here to avoid duplication across crates. +pub fn escape_md(s: &str) -> String { + // Escapes special Markdown characters in table cell content. + // + // Escapes pipe (`|`), backtick (`` ` ``), hash (`#`), asterisk (`*`), + // underscore (`_`), open bracket (`[`), close bracket (`]`), and greater-than + // (`>`) characters by prefixing with backslash. Also escapes CRLF (`\r\n`) + // and LF (`\n`) line endings to prevent breaking the markdown table structure. + // + // These escapes are needed to prevent breaking the markdown table structure + // and prevent unintended markdown formatting. + s.replace('|', "\\|") + .replace('`', "\\`") + .replace('#', "\\#") + .replace('*', "\\*") + .replace('_', "\\_") + .replace('[', "\\[") + .replace(']', "\\]") + .replace('>', "\\>") + .replace('\r', "\\r") + .replace('\n', "\\n") +} + // ============================================================================ // Per-directory override types // ============================================================================ diff --git a/crates/diffguard-types/tests/doc_markdown_fix.rs b/crates/diffguard-types/tests/doc_markdown_fix.rs new file mode 100644 index 00000000..3763ad34 --- /dev/null +++ b/crates/diffguard-types/tests/doc_markdown_fix.rs @@ -0,0 +1,115 @@ +//! Tests verifying `clippy::doc_markdown` compliance for RuleTestCase doc comments. +//! +//! These tests ensure that identifiers in doc comments are wrapped in backticks +//! so that clippy::doc_markdown does not trigger warnings. +//! +//! RED STATE: Lines 398 and 402 contain bare identifiers (ignore_comments, ignore_strings) +//! GREEN STATE: Lines 398 and 402 contain backtick-formatted identifiers (`ignore_comments`, `ignore_strings`) + +use std::path::Path; + +/// Test that doc comments on line 398 have backtick-formatted `ignore_comments` identifier. +/// +/// RED: `/// Optional: override ignore_comments for this test case.` +/// GREEN: `/// Optional: override \`ignore_comments\` for this test case.` +#[test] +fn test_doc_comment_line_398_has_backtick_formatted_ignore_comments() { + let source_path = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("src") + .join("lib.rs"); + + let source = std::fs::read_to_string(&source_path).expect("Failed to read source file"); + + let lines: Vec<&str> = source.lines().collect(); + + // Line 398 (1-indexed) = index 397 (0-indexed) + let line_398 = lines.get(397).expect("Line 398 should exist"); + + assert!( + line_398.contains("`ignore_comments`"), + "Line 398 doc comment should contain backtick-formatted `ignore_comments`, got: {}", + line_398 + ); +} + +/// Test that doc comments on line 402 have backtick-formatted `ignore_strings` identifier. +/// +/// RED: `/// Optional: override ignore_strings for this test case.` +/// GREEN: `/// Optional: override \`ignore_strings\` for this test case.` +#[test] +fn test_doc_comment_line_402_has_backtick_formatted_ignore_strings() { + let source_path = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("src") + .join("lib.rs"); + + let source = std::fs::read_to_string(&source_path).expect("Failed to read source file"); + + let lines: Vec<&str> = source.lines().collect(); + + // Line 402 (1-indexed) = index 401 (0-indexed) + let line_402 = lines.get(401).expect("Line 402 should exist"); + + assert!( + line_402.contains("`ignore_strings`"), + "Line 402 doc comment should contain backtick-formatted `ignore_strings`, got: {}", + line_402 + ); +} + +/// Test that the doc comment for ignore_comments does NOT contain bare identifier. +/// +/// This is the negation test - it verifies the bare identifier is NOT present. +#[test] +fn test_doc_comment_line_398_does_not_have_bare_ignore_comments() { + let source_path = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("src") + .join("lib.rs"); + + let source = std::fs::read_to_string(&source_path).expect("Failed to read source file"); + + let lines: Vec<&str> = source.lines().collect(); + + // Line 398 (1-indexed) = index 397 (0-indexed) + let line_398 = lines.get(397).expect("Line 398 should exist"); + + // The bare identifier pattern (not wrapped in backticks) + // This regex looks for "ignore_comments" NOT preceded by a backtick + let bare_identifier_present = line_398.contains(" ignore_comments") + || line_398.contains("\tignore_comments") + || (line_398.contains("override ignore_comments") + && !line_398.contains("`ignore_comments`")); + + assert!( + !bare_identifier_present, + "Line 398 should NOT contain bare identifier ignore_comments (should be wrapped in backticks), got: {}", + line_398 + ); +} + +/// Test that the doc comment for ignore_strings does NOT contain bare identifier. +/// +/// This is the negation test - it verifies the bare identifier is NOT present. +#[test] +fn test_doc_comment_line_402_does_not_have_bare_ignore_strings() { + let source_path = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("src") + .join("lib.rs"); + + let source = std::fs::read_to_string(&source_path).expect("Failed to read source file"); + + let lines: Vec<&str> = source.lines().collect(); + + // Line 402 (1-indexed) = index 401 (0-indexed) + let line_402 = lines.get(401).expect("Line 402 should exist"); + + // The bare identifier pattern (not wrapped in backticks) + let bare_identifier_present = line_402.contains(" ignore_strings") + || line_402.contains("\tignore_strings") + || (line_402.contains("override ignore_strings") && !line_402.contains("`ignore_strings`")); + + assert!( + !bare_identifier_present, + "Line 402 should NOT contain bare identifier ignore_strings (should be wrapped in backticks), got: {}", + line_402 + ); +} diff --git a/crates/diffguard-types/tests/green_edge_tests_work_2b446664.rs b/crates/diffguard-types/tests/green_edge_tests_work_2b446664.rs new file mode 100644 index 00000000..746b599b --- /dev/null +++ b/crates/diffguard-types/tests/green_edge_tests_work_2b446664.rs @@ -0,0 +1,176 @@ +//! Green edge case tests for work-2b446664: ConfigFile::built_in() panics but is missing # Panics documentation +//! +//! These tests verify the `ConfigFile::built_in()` function has proper documentation +//! for its panic condition, as required by clippy::missing_panics_doc. +//! +//! Edge cases covered: +//! - # Panics documentation is present in the source +//! - The documentation mentions the specific panic condition (built_in.json parsing) +//! - built_in() returns a valid ConfigFile with expected rule count +//! - built_in() rules have valid patterns that can be parsed + +/// Verify the `# Panics` documentation is present in `ConfigFile::built_in()`. +/// +/// This test ensures the documentation explicitly states when the function panics, +/// satisfying the clippy::missing_panics_doc lint requirement. +#[test] +fn built_in_has_panics_documentation() { + let source = include_str!("../src/lib.rs"); + + // Find the built_in() function and verify # Panics section exists + let built_in_start = source + .find("pub fn built_in() -> Self") + .expect("built_in() function should exist"); + + // Look backwards from the function to find the doc comment (/// style) + // We need to find a substantial block of /// lines before the function + let preceding_text = &source[..built_in_start]; + + // Find the last block of /// comments before the function + // The doc comment for built_in starts with "/// Returns the built-in..." + assert!( + preceding_text.contains("# Panics"), + "built_in() doc comment should contain '# Panics' section.\nPreceding text: {}", + preceding_text + ); + + // Verify the panics section mentions the specific condition + assert!( + preceding_text.contains("built_in.json"), + "built_in() # Panics section should mention 'built_in.json'.\nPreceding text: {}", + preceding_text + ); +} + +/// Verify the `# Panics` documentation is accurate. +/// +/// The documentation states: "Panics if `built_in.json` cannot be parsed as valid JSON." +/// Since the JSON is embedded at compile time via include_str!, it should always be valid. +/// This test verifies the returned config is well-formed. +#[test] +fn built_in_returns_well_formed_config() { + let cfg = diffguard_types::ConfigFile::built_in(); + + // Verify basic structure is valid + assert!( + cfg.defaults.base.is_some(), + "built_in() should have default base ref" + ); + assert!( + cfg.defaults.head.is_some(), + "built_in() should have default head ref" + ); + assert!( + cfg.defaults.scope.is_some(), + "built_in() should have default scope" + ); + + // Verify we have the expected number of built-in rules + // The built_in.json contains 36 rules across multiple languages + assert!( + !cfg.rule.is_empty(), + "built_in() should have at least one rule, got {}", + cfg.rule.len() + ); +} + +/// Verify every rule in `built_in()` has valid patterns. +/// +/// Each rule should have at least one non-empty pattern that could be parsed +/// by a regex engine. Empty patterns would indicate malformed JSON data. +#[test] +fn built_in_rules_have_valid_patterns() { + let cfg = diffguard_types::ConfigFile::built_in(); + + for rule in &cfg.rule { + assert!( + !rule.patterns.is_empty(), + "rule '{}' should have at least one pattern", + rule.id + ); + + for pattern in &rule.patterns { + assert!( + !pattern.is_empty(), + "rule '{}' should have non-empty patterns, found empty string", + rule.id + ); + } + } +} + +/// Verify `built_in()` is deterministic across multiple calls. +/// +/// Since the JSON is embedded at compile time, each call should produce +/// an identical ConfigFile. This verifies no internal state mutation occurs. +#[test] +fn built_in_is_deterministic() { + let cfg1 = diffguard_types::ConfigFile::built_in(); + let cfg2 = diffguard_types::ConfigFile::built_in(); + let cfg3 = diffguard_types::ConfigFile::built_in(); + + assert_eq!(cfg1, cfg2, "first and second call should be identical"); + assert_eq!(cfg2, cfg3, "second and third call should be identical"); + assert_eq!(cfg1, cfg3, "first and third call should be identical"); +} + +/// Verify `built_in()` result can be used in a concurrent context. +/// +/// ConfigFile should be Send + Sync to support use in async contexts +/// and multi-threaded pipelines. +#[test] +fn built_in_concurrency_safety() { + fn assert_send_sync() {} + assert_send_sync::(); + + let cfg = diffguard_types::ConfigFile::built_in(); + assert_send_sync::(); + // Use cfg to silence warnings + let _ = cfg; +} + +/// Verify `built_in()` can be serialized to multiple formats. +/// +/// This ensures the ConfigFile is fully serializable and not using +/// any non-serializable internal state. +#[test] +fn built_in_multi_format_serialization() { + let cfg = diffguard_types::ConfigFile::built_in(); + + // JSON serialization + let json = + serde_json::to_string(&cfg).expect("ConfigFile::built_in() should serialize to JSON"); + assert!( + !json.is_empty(), + "JSON serialization should produce non-empty string" + ); + + // Verify it's valid JSON by deserializing + let deserialized: diffguard_types::ConfigFile = + serde_json::from_str(&json).expect("JSON should be deserializable"); + assert_eq!(cfg, deserialized); +} + +/// Verify the `#[must_use]` attribute is present on `built_in()`. +/// +/// The function returns Self (owned ConfigFile), so callers should not +/// discard the return value silently. +#[test] +fn built_in_has_must_use_attribute() { + let source = include_str!("../src/lib.rs"); + + // Find the built_in function and verify #[must_use] appears before it + let built_in_pos = source + .find("pub fn built_in() -> Self") + .expect("built_in() function should exist"); + + // Get the text between the doc comment and the function + let preceding_text = &source[..built_in_pos]; + + // Find the last #[must_use] before the function + assert!( + preceding_text.contains("#[must_use]"), + "built_in() should have #[must_use] attribute.\nPreceding text: {}", + preceding_text + ); +} diff --git a/crates/diffguard-types/tests/properties.rs b/crates/diffguard-types/tests/properties.rs index 20d1991f..58d67dbf 100644 --- a/crates/diffguard-types/tests/properties.rs +++ b/crates/diffguard-types/tests/properties.rs @@ -181,7 +181,7 @@ fn arb_diff_meta() -> impl Strategy { 0u32..100, // context_lines arb_scope(), // scope 0u64..1000, // files_scanned (u64 to handle large repos) - 0u32..10000, // lines_scanned + 0u64..10000, // lines_scanned ) .prop_map( |(base, head, context_lines, scope, files_scanned, lines_scanned)| DiffMeta { @@ -1155,7 +1155,7 @@ mod unit_tests { context_lines: u32::MAX, scope: Scope::Added, files_scanned: u64::MAX, - lines_scanned: u32::MAX, + lines_scanned: u64::MAX, }, findings: vec![Finding { rule_id: "test".to_string(), diff --git a/crates/diffguard-types/tests/red_tests_work_452a2701.rs b/crates/diffguard-types/tests/red_tests_work_452a2701.rs new file mode 100644 index 00000000..3763ad34 --- /dev/null +++ b/crates/diffguard-types/tests/red_tests_work_452a2701.rs @@ -0,0 +1,115 @@ +//! Tests verifying `clippy::doc_markdown` compliance for RuleTestCase doc comments. +//! +//! These tests ensure that identifiers in doc comments are wrapped in backticks +//! so that clippy::doc_markdown does not trigger warnings. +//! +//! RED STATE: Lines 398 and 402 contain bare identifiers (ignore_comments, ignore_strings) +//! GREEN STATE: Lines 398 and 402 contain backtick-formatted identifiers (`ignore_comments`, `ignore_strings`) + +use std::path::Path; + +/// Test that doc comments on line 398 have backtick-formatted `ignore_comments` identifier. +/// +/// RED: `/// Optional: override ignore_comments for this test case.` +/// GREEN: `/// Optional: override \`ignore_comments\` for this test case.` +#[test] +fn test_doc_comment_line_398_has_backtick_formatted_ignore_comments() { + let source_path = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("src") + .join("lib.rs"); + + let source = std::fs::read_to_string(&source_path).expect("Failed to read source file"); + + let lines: Vec<&str> = source.lines().collect(); + + // Line 398 (1-indexed) = index 397 (0-indexed) + let line_398 = lines.get(397).expect("Line 398 should exist"); + + assert!( + line_398.contains("`ignore_comments`"), + "Line 398 doc comment should contain backtick-formatted `ignore_comments`, got: {}", + line_398 + ); +} + +/// Test that doc comments on line 402 have backtick-formatted `ignore_strings` identifier. +/// +/// RED: `/// Optional: override ignore_strings for this test case.` +/// GREEN: `/// Optional: override \`ignore_strings\` for this test case.` +#[test] +fn test_doc_comment_line_402_has_backtick_formatted_ignore_strings() { + let source_path = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("src") + .join("lib.rs"); + + let source = std::fs::read_to_string(&source_path).expect("Failed to read source file"); + + let lines: Vec<&str> = source.lines().collect(); + + // Line 402 (1-indexed) = index 401 (0-indexed) + let line_402 = lines.get(401).expect("Line 402 should exist"); + + assert!( + line_402.contains("`ignore_strings`"), + "Line 402 doc comment should contain backtick-formatted `ignore_strings`, got: {}", + line_402 + ); +} + +/// Test that the doc comment for ignore_comments does NOT contain bare identifier. +/// +/// This is the negation test - it verifies the bare identifier is NOT present. +#[test] +fn test_doc_comment_line_398_does_not_have_bare_ignore_comments() { + let source_path = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("src") + .join("lib.rs"); + + let source = std::fs::read_to_string(&source_path).expect("Failed to read source file"); + + let lines: Vec<&str> = source.lines().collect(); + + // Line 398 (1-indexed) = index 397 (0-indexed) + let line_398 = lines.get(397).expect("Line 398 should exist"); + + // The bare identifier pattern (not wrapped in backticks) + // This regex looks for "ignore_comments" NOT preceded by a backtick + let bare_identifier_present = line_398.contains(" ignore_comments") + || line_398.contains("\tignore_comments") + || (line_398.contains("override ignore_comments") + && !line_398.contains("`ignore_comments`")); + + assert!( + !bare_identifier_present, + "Line 398 should NOT contain bare identifier ignore_comments (should be wrapped in backticks), got: {}", + line_398 + ); +} + +/// Test that the doc comment for ignore_strings does NOT contain bare identifier. +/// +/// This is the negation test - it verifies the bare identifier is NOT present. +#[test] +fn test_doc_comment_line_402_does_not_have_bare_ignore_strings() { + let source_path = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("src") + .join("lib.rs"); + + let source = std::fs::read_to_string(&source_path).expect("Failed to read source file"); + + let lines: Vec<&str> = source.lines().collect(); + + // Line 402 (1-indexed) = index 401 (0-indexed) + let line_402 = lines.get(401).expect("Line 402 should exist"); + + // The bare identifier pattern (not wrapped in backticks) + let bare_identifier_present = line_402.contains(" ignore_strings") + || line_402.contains("\tignore_strings") + || (line_402.contains("override ignore_strings") && !line_402.contains("`ignore_strings`")); + + assert!( + !bare_identifier_present, + "Line 402 should NOT contain bare identifier ignore_strings (should be wrapped in backticks), got: {}", + line_402 + ); +} diff --git a/crates/diffguard/src/main.rs b/crates/diffguard/src/main.rs index c8cd2b8b..1c6aa358 100644 --- a/crates/diffguard/src/main.rs +++ b/crates/diffguard/src/main.rs @@ -29,7 +29,7 @@ use diffguard_types::{ CHECK_SCHEMA_V1, CODE_TOOL_RUNTIME_ERROR, CapabilityStatus, CheckReceipt, ConfigFile, DiffMeta, DirectoryOverrideConfig, FailOn, Finding, MatchMode, REASON_MISSING_BASE, REASON_NO_DIFF_INPUT, REASON_TOOL_ERROR, RuleConfig, Scope, Severity, ToolMeta, Verdict, VerdictCounts, - VerdictStatus, + VerdictStatus, escape_md, }; mod config_loader; @@ -1689,20 +1689,6 @@ fn render_finding_row_with_baseline(f: &Finding, is_baseline: bool) -> String { ) } -/// Escapes special markdown characters in a string. -fn escape_md(s: &str) -> String { - s.replace('|', "\\|") - .replace('`', "\\`") - .replace('#', "\\#") - .replace('*', "\\*") - .replace('_', "\\_") - .replace('[', "\\[") - .replace(']', "\\]") - .replace('>', "\\>") - .replace('\r', "\\r") - .replace('\n', "\\n") -} - /// Renders markdown output with baseline/new annotations. /// /// This modifies the table output to include baseline/new annotations for each finding. diff --git a/crates/diffguard/tests/red_tests_work_d4a75f70.rs b/crates/diffguard/tests/red_tests_work_d4a75f70.rs new file mode 100644 index 00000000..66737a20 --- /dev/null +++ b/crates/diffguard/tests/red_tests_work_d4a75f70.rs @@ -0,0 +1,230 @@ +//! Red tests for work-d4a75f70: Document `tags` and `test_cases` in diffguard.toml.example +//! +//! These tests verify that `diffguard.toml.example` demonstrates the `tags` and `test_cases` +//! features that exist in the codebase but are missing from the example file. +//! +//! **Before fix**: `diffguard.toml.example` is missing: +//! - `tags = ["safety"]` on `rust.no_unwrap` rule +//! - `[[rule.test_cases]]` blocks on `rust.no_unwrap` rule +//! +//! **After fix**: `diffguard.toml.example` should contain: +//! - `rust.no_unwrap` rule with `tags = ["safety"]` (consistent with built_in.json line 30) +//! - `rust.no_unwrap` rule with at least one positive test case (should_match = true) +//! - `rust.no_unwrap` rule with at least one negative test case (should_match = false) +//! +//! The path to diffguard.toml.example is computed at compile time using CARGO_MANIFEST_DIR. +//! For tests in crates/diffguard/tests/, CARGO_MANIFEST_DIR = crates/diffguard +//! We need to go up 2 levels to reach the repo root: crates/diffguard -> crates -> repo root + +/// The content of diffguard.toml.example embedded at compile time. +const DIFFGUARD_EXAMPLE_CONTENT: &str = include_str!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/../../diffguard.toml.example" +)); + +/// Find the bounds of the `rust.no_unwrap` rule block in the TOML. +/// Returns the start and end line indices (0-based). +fn find_rust_no_unwrap_block(lines: &[&str]) -> Option<(usize, usize)> { + let mut rule_start: Option = None; + let mut in_rust_no_unwrap = false; + + for (i, line) in lines.iter().enumerate() { + let trimmed = line.trim(); + + // Check for end of rust.no_unwrap block BEFORE we process new [[rule]] + if in_rust_no_unwrap && trimmed == "[[rule]]" { + return Some((rule_start.unwrap(), i - 1)); + } + + // Start of a new rule block + if trimmed == "[[rule]]" { + rule_start = Some(i); + in_rust_no_unwrap = false; + } else if let Some(_start) = rule_start { + // Check if this is the rust.no_unwrap rule + if trimmed.starts_with("id = ") && trimmed.contains("rust.no_unwrap") { + in_rust_no_unwrap = true; + } + } + } + + if in_rust_no_unwrap { + rule_start.map(|s| (s, lines.len() - 1)) + } else { + None + } +} + +/// Test that `rust.no_unwrap` rule has `tags = ["safety"]` field. +/// +/// This verifies that users can discover the `tags` feature from the example file. +/// The value should match built_in.json which uses `tags: ["safety"]` for this rule. +#[test] +fn rust_no_unwrap_rule_has_tags_safety() { + let lines: Vec<&str> = DIFFGUARD_EXAMPLE_CONTENT.lines().collect(); + + let (start, end) = find_rust_no_unwrap_block(&lines) + .expect("rust.no_unwrap rule block not found in diffguard.toml.example"); + + // Extract the rust.no_unwrap rule block + let rule_block: String = lines[start..=end].join("\n"); + + // Check for tags field with "safety" value + assert!( + rule_block.contains("tags = [\"safety\"]"), + "diffguard.toml.example rust.no_unwrap rule is MISSING `tags = [\"safety\"]`.\n\n\ + Expected: The rust.no_unwrap rule block should contain `tags = [\"safety\"]`\n to demonstrate the tags feature and be consistent with built_in.json (line 30).\n\n\ + Actual: The rust.no_unwrap rule block does not contain `tags = [\"safety\"]`.\n\n\ + The fix: Add `tags = [\"safety\"]` after the existing fields in the rust.no_unwrap rule,\n after line 51 (ignore_strings = true).\n\n\ + Rule block found at lines {} to {}:\n ```\n {}\n ```", + start + 1, // 1-indexed for user display + end + 1, + rule_block + ); +} + +/// Test that `rust.no_unwrap` rule has at least one `[[rule.test_cases]]` block. +/// +/// This verifies that users can discover the `test_cases` feature from the example file. +/// The `[[rule.test_cases]]` syntax is TOML's array of tables notation for appending +/// elements to an array. +#[test] +fn rust_no_unwrap_rule_has_test_cases_blocks() { + let lines: Vec<&str> = DIFFGUARD_EXAMPLE_CONTENT.lines().collect(); + + let (start, end) = find_rust_no_unwrap_block(&lines) + .expect("rust.no_unwrap rule block not found in diffguard.toml.example"); + + // Extract the rust.no_unwrap rule block + let rule_block: String = lines[start..=end].join("\n"); + + // Check for [[rule.test_cases]] syntax (TOML array of tables) + assert!( + rule_block.contains("[[rule.test_cases]]"), + "diffguard.toml.example rust.no_unwrap rule is MISSING `[[rule.test_cases]]` blocks.\n\n\ + Expected: The rust.no_unwrap rule should contain at least one `[[rule.test_cases]]`\n block to demonstrate the test_cases feature for `diff test` command.\n\n\ + Actual: The rust.no_unwrap rule block does not contain `[[rule.test_cases]]`.\n\n\ + The fix: Add `[[rule.test_cases]]` blocks after `tags` within the rust.no_unwrap rule block.\n Each block should have `input` and `should_match` fields.\n\n\ + Rule block found at lines {} to {}:\n ```\n {}\n ```", + start + 1, + end + 1, + rule_block + ); +} + +/// Test that `rust.no_unwrap` rule has a positive test case with `should_match = true`. +/// +/// A positive test case verifies that the rule matches inputs that should be flagged. +/// The example input should contain `.unwrap()` or `.expect()` which are the patterns +/// that rust.no_unwrap detects. +#[test] +fn rust_no_unwrap_has_positive_test_case() { + let lines: Vec<&str> = DIFFGUARD_EXAMPLE_CONTENT.lines().collect(); + + let (start, end) = find_rust_no_unwrap_block(&lines) + .expect("rust.no_unwrap rule block not found in diffguard.toml.example"); + + // Extract the rust.no_unwrap rule block + let rule_block: String = lines[start..=end].join("\n"); + + // Look for [[rule.test_cases]] blocks and check for should_match = true + // A positive test case should contain `.unwrap()` or `.expect()` in the input + // (which the rule is designed to catch) + let has_positive_case = rule_block.contains("[[rule.test_cases]]") + && rule_block.contains("should_match = true") + && (rule_block.contains(".unwrap()") || rule_block.contains(".expect()")); + + assert!( + has_positive_case, + "diffguard.toml.example rust.no_unwrap rule is MISSING a positive test case.\n\n\ + Expected: At least one `[[rule.test_cases]]` block with `should_match = true`\n where the `input` contains `.unwrap()` or `.expect()` (patterns the rule matches).\n\n\ + Actual: No positive test case found with matching input and should_match = true.\n\n\ + The fix: Add a test case block like:\n ```toml\n [[rule.test_cases]]\n input = \"let x = y.unwrap();\"\n should_match = true\n description = \"unwrap() call should be flagged\"\n ```\n\n\ + Rule block found at lines {} to {}:\n ```\n {}\n ```", + start + 1, + end + 1, + rule_block + ); +} + +/// Test that `rust.no_unwrap` rule has a negative test case with `should_match = false`. +/// +/// A negative test case verifies that the rule does NOT match safe inputs. +/// The example input should NOT contain `.unwrap()` or `.expect()`. +#[test] +fn rust_no_unwrap_has_negative_test_case() { + let lines: Vec<&str> = DIFFGUARD_EXAMPLE_CONTENT.lines().collect(); + + let (start, end) = find_rust_no_unwrap_block(&lines) + .expect("rust.no_unwrap rule block not found in diffguard.toml.example"); + + // Extract the rust.no_unwrap rule block + let rule_block: String = lines[start..=end].join("\n"); + + // Look for [[rule.test_cases]] blocks and check for should_match = false + // A negative test case should use safe code without .unwrap() or .expect() + let has_negative_case = rule_block.contains("[[rule.test_cases]]") + && rule_block.contains("should_match = false") + && !rule_block.contains(".unwrap()") + && !rule_block.contains(".expect()"); + + assert!( + has_negative_case, + "diffguard.toml.example rust.no_unwrap rule is MISSING a negative test case.\n\n\ + Expected: At least one `[[rule.test_cases]]` block with `should_match = false`\n where the `input` does NOT contain `.unwrap()` or `.expect()` (safe code).\n\n\ + Actual: No negative test case found with should_match = false and safe input.\n\n\ + The fix: Add a test case block like:\n ```toml\n [[rule.test_cases]]\n input = \"let x = y.ok();\"\n should_match = false\n description = \"ok() is safe and should not be flagged\"\n ```\n\n\ + Rule block found at lines {} to {}:\n ```\n {}\n ```", + start + 1, + end + 1, + rule_block + ); +} + +/// Test that tags appears before [[rule.test_cases]] in the rust.no_unwrap rule. +/// +/// Per the acceptance criteria, `tags` should appear after existing fields and +/// `[[rule.test_cases]]` blocks should appear after `tags`. +#[test] +fn tags_appears_before_test_cases_in_rust_no_unwrap() { + let lines: Vec<&str> = DIFFGUARD_EXAMPLE_CONTENT.lines().collect(); + + let (start, end) = find_rust_no_unwrap_block(&lines) + .expect("rust.no_unwrap rule block not found in diffguard.toml.example"); + + let rule_block: String = lines[start..=end].join("\n"); + + let tags_pos = rule_block.find("tags = [\"safety\"]"); + let test_cases_pos = rule_block.find("[[rule.test_cases]]"); + + if tags_pos.is_none() { + panic!( + "tags = [\"safety\"] not found in rust.no_unwrap rule block.\n\ + This test requires tags to be present before checking ordering." + ); + } + + if test_cases_pos.is_none() { + panic!( + "[[rule.test_cases]] not found in rust.no_unwrap rule block.\n\ + This test requires test_cases to be present before checking ordering." + ); + } + + let tags_idx = tags_pos.unwrap(); + let test_cases_idx = test_cases_pos.unwrap(); + + assert!( + tags_idx < test_cases_idx, + "tags should appear BEFORE [[rule.test_cases]] in the rust.no_unwrap rule.\n\n\ + Expected: tags = [\"safety\"] at position {}, [[rule.test_cases]] at position {}\n \ + Actual: tags appears after [[rule.test_cases]]\n\n\ + The fix: Ensure `tags = [\"safety\"]` appears after existing fields\n (after line 51: ignore_strings = true) and BEFORE any [[rule.test_cases]] blocks.\n\n\ + Rule block found at lines {} to {}:\n ```\n {}\n ```", + tags_idx, + test_cases_idx, + start + 1, + end + 1, + rule_block + ); +} diff --git a/diffguard.toml.example b/diffguard.toml.example index 167c194f..5e78c3bf 100644 --- a/diffguard.toml.example +++ b/diffguard.toml.example @@ -50,6 +50,18 @@ paths = ["**/*.rs"] exclude_paths = ["**/tests/**", "**/benches/**", "**/examples/**"] ignore_comments = true ignore_strings = true +tags = ["safety"] + +# Test cases validate rule behavior using `diff test` +[[rule.test_cases]] +description = "Positive: unwrap() should match" +input = "let x = y.unwrap();" +should_match = true + +[[rule.test_cases]] +description = "Negative: ok() is safe and should not match" +input = "let x = y.ok();" +should_match = false [[rule]] id = "rust.no_dbg" diff --git a/fuzz/fuzz_targets/sarif_renderer.rs b/fuzz/fuzz_targets/sarif_renderer.rs index 3d9c0256..a737a52e 100644 --- a/fuzz/fuzz_targets/sarif_renderer.rs +++ b/fuzz/fuzz_targets/sarif_renderer.rs @@ -98,7 +98,7 @@ fuzz_target!(|input: FuzzSarifInput| { context_lines: 0, scope: Scope::Added, files_scanned: findings.len() as u64, - lines_scanned: findings.len() as u32 * 10, + lines_scanned: findings.len() as u64 * 10, }, findings, verdict: Verdict {