diff --git a/crates/diffguard-analytics/src/lib.rs b/crates/diffguard-analytics/src/lib.rs index 79dbd701..a4c9ed52 100644 --- a/crates/diffguard-analytics/src/lib.rs +++ b/crates/diffguard-analytics/src/lib.rs @@ -175,7 +175,7 @@ pub struct TrendRun { /// Stored as `u64` to avoid silent truncation for very large repositories /// (those with more than 2^32 - 1 unique files). pub files_scanned: u64, - pub lines_scanned: u32, + pub lines_scanned: u64, pub findings: u32, } diff --git a/crates/diffguard-core/src/render.rs b/crates/diffguard-core/src/render.rs index 075ea226..31c0aa1e 100644 --- a/crates/diffguard-core/src/render.rs +++ b/crates/diffguard-core/src/render.rs @@ -1,6 +1,6 @@ use diffguard_types::{ CheckReceipt, Finding, REASON_GIT_UNAVAILABLE, REASON_MISSING_BASE, REASON_NO_DIFF_INPUT, - REASON_TOOL_ERROR, REASON_TRUNCATED, VerdictStatus, + REASON_TOOL_ERROR, REASON_TRUNCATED, VerdictStatus, escape_md, }; /// Reasons that are meaningful to render in markdown output. @@ -114,28 +114,6 @@ fn render_finding_row(f: &Finding) -> String { ) } -/// Escapes special Markdown characters in table cell content. -/// -/// Escapes pipe (`|`), backtick (`` ` ``), hash (`#`), asterisk (`*`), -/// underscore (`_`), open bracket (`[`), close bracket (`]`), and greater-than -/// (`>`) characters by prefixing with backslash. Also escapes CRLF (`\r\n`) -/// and LF (`\n`) line endings to prevent breaking the markdown table structure. -/// -/// These escapes are needed to prevent breaking the markdown table structure -/// and prevent unintended markdown formatting. -fn escape_md(s: &str) -> String { - s.replace('|', "\\|") - .replace('`', "\\`") - .replace('#', "\\#") - .replace('*', "\\*") - .replace('_', "\\_") - .replace('[', "\\[") - .replace(']', "\\]") - .replace('>', "\\>") - .replace('\r', "\\r") - .replace('\n', "\\n") -} - #[cfg(test)] mod tests { use super::*; diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_common_escapes.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_common_escapes.snap new file mode 100644 index 00000000..9e6445bd --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_common_escapes.snap @@ -0,0 +1,6 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"dir\\ name\\\"quote\\\"\\\\tab\\tnewline\\ncarriage\\rend\"#)" +--- +dir name"quote"\tab newline +carriage end diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_embedded_octal-2.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_embedded_octal-2.snap new file mode 100644 index 00000000..dc5ca10a --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_embedded_octal-2.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"octal\\141\\040space\"#)" +--- +octala space diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_embedded_octal.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_embedded_octal.snap new file mode 100644 index 00000000..8deade1a --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_embedded_octal.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"hello\\041world\"#)" +--- +hello!world diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_empty.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_empty.snap new file mode 100644 index 00000000..b3ff7320 --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_empty.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(\"\")" +--- + diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_max_octal_values-2.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_max_octal_values-2.snap new file mode 100644 index 00000000..0b978dab --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_max_octal_values-2.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\77\"#)" +--- +? diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_max_octal_values.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_max_octal_values.snap new file mode 100644 index 00000000..1e6c3b04 --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_max_octal_values.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\377\"#)" +--- +� diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_multiple_octal.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_multiple_octal.snap new file mode 100644 index 00000000..ea66aec9 --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_multiple_octal.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\001\\002\\003\"#)" +--- + diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_no_escapes.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_no_escapes.snap new file mode 100644 index 00000000..9fc9a316 --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_no_escapes.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(\"plain_string_no_escapes\")" +--- +plain_string_no_escapes diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_non_octal_digits-2.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_non_octal_digits-2.snap new file mode 100644 index 00000000..200f6401 --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_non_octal_digits-2.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\9\"#)" +--- +\9 diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_non_octal_digits.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_non_octal_digits.snap new file mode 100644 index 00000000..c7b2a0ac --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_non_octal_digits.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\8\"#)" +--- +\8 diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-2.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-2.snap new file mode 100644 index 00000000..e716ed5c --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-2.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\1\"#)" +--- + diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-3.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-3.snap new file mode 100644 index 00000000..d98a7567 --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-3.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\2\"#)" +--- + diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-4.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-4.snap new file mode 100644 index 00000000..442b0033 --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-4.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\3\"#)" +--- + diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-5.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-5.snap new file mode 100644 index 00000000..5ae62c76 --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-5.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\4\"#)" +--- + diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-6.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-6.snap new file mode 100644 index 00000000..fd7db8ec --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-6.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\5\"#)" +--- + diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-7.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-7.snap new file mode 100644 index 00000000..d9e2d5ff --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-7.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\6\"#)" +--- + diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-8.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-8.snap new file mode 100644 index 00000000..56582832 --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal-8.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\7\"#)" +--- + diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal.snap new file mode 100644 index 00000000..a4d987e5 Binary files /dev/null and b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_single_digit_octal.snap differ diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_three_digit_octal-2.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_three_digit_octal-2.snap new file mode 100644 index 00000000..c167f6cf --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_three_digit_octal-2.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\177\"#)" +--- + diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_three_digit_octal.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_three_digit_octal.snap new file mode 100644 index 00000000..a8436823 Binary files /dev/null and b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_three_digit_octal.snap differ diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_trailing_backslash.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_trailing_backslash.snap new file mode 100644 index 00000000..5c5346ac --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_trailing_backslash.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"endswith\\\"#)" +--- +endswith\ diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal-2.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal-2.snap new file mode 100644 index 00000000..e7b8b0d0 --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal-2.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\07\"#)" +--- + diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal-3.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal-3.snap new file mode 100644 index 00000000..af6652ec --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal-3.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\10\"#)" +--- + diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal-4.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal-4.snap new file mode 100644 index 00000000..c7033a06 --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal-4.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\12\"#)" +--- + diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal-5.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal-5.snap new file mode 100644 index 00000000..351fb4aa --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal-5.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\15\"#)" +--- + diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal-6.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal-6.snap new file mode 100644 index 00000000..0b978dab --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal-6.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\77\"#)" +--- +? diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal.snap new file mode 100644 index 00000000..0e2be016 Binary files /dev/null and b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_two_digit_octal.snap differ diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_unknown_escape-2.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_unknown_escape-2.snap new file mode 100644 index 00000000..ce8a4a48 --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_unknown_escape-2.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\q1\"#)" +--- +\q1 diff --git a/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_unknown_escape.snap b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_unknown_escape.snap new file mode 100644 index 00000000..dc6a4ec4 --- /dev/null +++ b/crates/diffguard-diff/src/snapshots/diffguard_diff__unified__tests__snapshot_unescape_git_path_unknown_escape.snap @@ -0,0 +1,5 @@ +--- +source: crates/diffguard-diff/src/unified.rs +expression: "unescape_git_path(r#\"\\q\"#)" +--- +\q diff --git a/crates/diffguard-diff/tests/integration_octal_escaped_paths.rs b/crates/diffguard-diff/tests/integration_octal_escaped_paths.rs new file mode 100644 index 00000000..f2b1055a --- /dev/null +++ b/crates/diffguard-diff/tests/integration_octal_escaped_paths.rs @@ -0,0 +1,147 @@ +//! Integration tests for parsing diffs with octal-escaped paths. +//! +//! These tests exercise the full pipeline: raw diff text with quoted paths +//! containing octal escape sequences → parse_unified_diff → DiffLine with +//! correctly unescaped paths. +//! +//! The change being tested: replacing `u8 as u32` with `u32::from(u8)` in +//! unescape_git_path's octal parsing branch. This is a lossless widening cast +//! that doesn't affect behavior, but these tests verify the full integration +//! path still works correctly. + +use diffguard_diff::parse_unified_diff; +use diffguard_types::Scope; + +/// Test: Full pipeline with a path containing an octal-escaped space. +/// Git outputs paths with spaces as `\040` (octal for space). +/// +/// Flow: diff text with quoted path → parse_diff_git_line → tokenize_git_paths +/// → unquote_git_token → unescape_git_path → unescaped path in DiffLine +#[test] +fn test_parse_diff_with_octal_escaped_space_in_path() { + // Git quotes paths with special characters and uses octal escapes for spaces + // "\040" is octal for space (ASCII 32) + let diff = r#" +diff --git "a/path\040with spaces/file.rs" "b/path with spaces/file.rs" +--- "a/path\040with spaces/file.rs" ++++ "b/path with spaces/file.rs" +@@ -1 +1,2 @@ + fn existing() {} ++fn added() {} +"#; + + let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap(); + + // The path should be unescaped: \040 → ' ' + assert_eq!(lines.len(), 1); + assert_eq!(lines[0].path, "path with spaces/file.rs"); + assert_eq!(lines[0].content, "fn added() {}"); + assert_eq!(stats.files, 1); + assert_eq!(stats.lines, 1); +} + +/// Test: Full pipeline with embedded octal escapes in path. +/// Path contains multiple octal escapes representing different characters. +/// +/// \041 = '!' (ASCII 33) +/// \040 = ' ' (ASCII 32) +#[test] +fn test_parse_diff_with_multiple_octal_escapes_in_path() { + let diff = r#" +diff --git "a/file\041name\040here.rs" "b/file!name here.rs" +--- "a/file\041name\040here.rs" ++++ "b/file!name here.rs" +@@ -1 +1,2 @@ + fn existing() {} ++fn added() {} +"#; + + let (lines, _stats) = parse_unified_diff(diff, Scope::Added).unwrap(); + + // All octal escapes should be properly decoded + assert_eq!(lines[0].path, "file!name here.rs"); +} + +/// Test: Octal escapes at path boundaries (start, middle, end). +/// +/// \143 = 'c' (ASCII 99) - octal for lowercase 'c' +#[test] +fn test_parse_diff_with_octal_escape_at_path_boundaries() { + // \143 = 'c' + let diff = r#" +diff --git "a/\143at.rs" "b/cat.rs" +--- "a/\143at.rs" ++++ "b/cat.rs" +@@ -1 +1,2 @@ + fn existing() {} ++fn added() {} +"#; + + let (lines, _stats) = parse_unified_diff(diff, Scope::Added).unwrap(); + assert_eq!(lines[0].path, "cat.rs"); +} + +/// Test: Three-digit octal escape at maximum value. +/// \177 = DEL (ASCII 127), \000 = NUL (ASCII 0) +#[test] +fn test_parse_diff_with_octal_edge_cases() { + // \177 = 127 (DEL), \000 = 0 (NUL) + // These are boundary cases for the u8→u32 cast + let diff = r#" +diff --git "a/\177\000file.rs" "b/\177\000file.rs" +--- "a/\177\000file.rs" ++++ "b/\177\000file.rs" +@@ -1 +1,2 @@ + fn existing() {} ++fn added() {} +"#; + + let (lines, _stats) = parse_unified_diff(diff, Scope::Added).unwrap(); + // The path should contain the raw bytes (non-printable but valid) + assert_eq!(lines[0].path, "\x7F\x00file.rs"); +} + +/// Test: Renamed file with octal-escaped path. +/// When a file is renamed, the "rename to" path can also have octal escapes. +#[test] +fn test_parse_diff_rename_with_octal_escaped_path() { + let diff = r#" +diff --git "a/old\040name.rs" "b/new\040name.rs" +rename from old name.rs +rename to new name.rs +--- "a/old\040name.rs" ++++ "b/new\040name.rs" +@@ -1 +1,2 @@ + fn existing() {} ++fn added() {} +"#; + + let (lines, _stats) = parse_unified_diff(diff, Scope::Added).unwrap(); + // The path should be unescaped + assert_eq!(lines[0].path, "new name.rs"); +} + +/// Test: Multiple files with mixed quoted/unquoted paths. +#[test] +fn test_parse_diff_multiple_files_mixed_path_formats() { + let diff = r#" +diff --git "a/quoted\040path.rs" "b/quoted path.rs" +--- "a/quoted\040path.rs" ++++ "b/quoted path.rs" +@@ -1 +1,2 @@ ++added to quoted path +diff --git a/normal_path.rs b/normal_path.rs +--- a/normal_path.rs ++++ b/normal_path.rs +@@ -1 +1,2 @@ ++added to normal path +"#; + + let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap(); + + assert_eq!(lines.len(), 2); + assert_eq!(lines[0].path, "quoted path.rs"); + assert_eq!(lines[1].path, "normal_path.rs"); + assert_eq!(stats.files, 2); + assert_eq!(stats.lines, 2); +} diff --git a/crates/diffguard-domain/src/evaluate.rs b/crates/diffguard-domain/src/evaluate.rs index d999bc12..0273f773 100644 --- a/crates/diffguard-domain/src/evaluate.rs +++ b/crates/diffguard-domain/src/evaluate.rs @@ -26,7 +26,7 @@ pub struct Evaluation { /// The previous `u32` cast would silently truncate, producing incorrect /// (often zero) counts for very large codebases. pub files_scanned: u64, - pub lines_scanned: u32, + pub lines_scanned: u64, /// Aggregated per-rule hit counts (deterministically sorted by rule ID). pub rule_hits: Vec, } @@ -102,7 +102,7 @@ pub fn evaluate_lines_with_overrides_and_language( .iter() .map(|line| line.path.clone()) .collect::>(); - let lines_scanned = u32::try_from(input_lines.len()).unwrap_or(u32::MAX); + let lines_scanned = u64::try_from(input_lines.len()).unwrap_or(u64::MAX); let mut current_file: Option = None; let mut current_lang = Language::Unknown; diff --git a/crates/diffguard-domain/src/preprocess.rs b/crates/diffguard-domain/src/preprocess.rs index cbf67d5e..1e9b34fb 100644 --- a/crates/diffguard-domain/src/preprocess.rs +++ b/crates/diffguard-domain/src/preprocess.rs @@ -166,6 +166,7 @@ pub struct PreprocessOptions { } impl PreprocessOptions { + #[must_use] pub fn none() -> Self { Self { mask_comments: false, @@ -173,6 +174,7 @@ impl PreprocessOptions { } } + #[must_use] pub fn comments_only() -> Self { Self { mask_comments: true, diff --git a/crates/diffguard-domain/tests/red_tests_work_e8a88475.rs b/crates/diffguard-domain/tests/red_tests_work_e8a88475.rs new file mode 100644 index 00000000..e4dc5f4c --- /dev/null +++ b/crates/diffguard-domain/tests/red_tests_work_e8a88475.rs @@ -0,0 +1,201 @@ +//! Red tests for work-e8a88475: #[must_use] on preprocess.rs factory/constructor methods +//! +//! These tests verify that `#[must_use]` attribute is present on 6 functions in +//! `crates/diffguard-domain/src/preprocess.rs` that return `Self` values representing +//! configuration or state that must not be silently dropped. +//! +//! The target functions are: +//! 1. PreprocessOptions::none() - factory method +//! 2. PreprocessOptions::comments_only() - factory method +//! 3. PreprocessOptions::strings_only() - factory method +//! 4. PreprocessOptions::comments_and_strings() - factory method +//! 5. Preprocessor::new(opts: PreprocessOptions) -> Self - constructor +//! 6. Preprocessor::with_language(opts: PreprocessOptions, lang: Language) -> Self - constructor +//! +//! ## How these tests work +//! These tests use `include_str!` to read the source file and verify that +//! `#[must_use]` appears immediately before each target function declaration. +//! This is a compile-time check that ensures the attribute is present. +//! +//! ## Expected behavior +//! - BEFORE fix: Tests FAIL because #[must_use] is not present on those functions +//! - AFTER fix: Tests PASS because #[must_use] is correctly placed + +/// Load the preprocess.rs source file for inspection. +/// We use include_str! to get the raw source at compile time. +const PREPROCESS_SOURCE: &str = include_str!("../src/preprocess.rs"); + +/// Helper to find a function declaration and verify #[must_use] appears immediately before it. +/// Returns the line number of the function if #[must_use] is found, or None if not found. +fn find_must_use_before_function(source: &str, fn_signature: &str) -> Option<(bool, usize)> { + let lines: Vec<&str> = source.lines().collect(); + + for (i, line) in lines.iter().enumerate() { + if line.contains(fn_signature) { + // Check if the previous non-empty line has #[must_use] + // We need to go back and skip empty lines and comments + let mut check_idx = i; + while check_idx > 0 { + check_idx -= 1; + let prev_line = lines[check_idx].trim(); + if prev_line.is_empty() { + continue; + } + if prev_line.starts_with("//") { + continue; + } + if prev_line.starts_with("/*") + || prev_line.starts_with("*/") + || prev_line.starts_with("*") + { + continue; + } + // Found a meaningful previous line + let has_must_use = prev_line == "#[must_use]"; + return Some((has_must_use, i + 1)); // 1-indexed line number + } + // Function is at start of file or only preceded by comments/empty lines + return Some((false, i + 1)); + } + } + None +} + +/// Test that PreprocessOptions::none() has #[must_use] +#[test] +fn preprocess_options_none_has_must_use() { + let result = find_must_use_before_function(PREPROCESS_SOURCE, "pub fn none() -> Self"); + assert!( + result.is_some(), + "Could not find 'pub fn none() -> Self' in preprocess.rs" + ); + + let (has_must_use, line_num) = result.unwrap(); + assert!( + has_must_use, + "PreprocessOptions::none() at line {} does NOT have #[must_use] attribute. \ + The function returns Self representing configuration that must not be dropped. \ + Expected #[must_use] to appear on the line immediately before the function declaration.", + line_num + ); +} + +/// Test that PreprocessOptions::comments_only() has #[must_use] +#[test] +fn preprocess_options_comments_only_has_must_use() { + let result = find_must_use_before_function(PREPROCESS_SOURCE, "pub fn comments_only() -> Self"); + assert!( + result.is_some(), + "Could not find 'pub fn comments_only() -> Self' in preprocess.rs" + ); + + let (has_must_use, line_num) = result.unwrap(); + assert!( + has_must_use, + "PreprocessOptions::comments_only() at line {} does NOT have #[must_use] attribute. \ + The function returns Self representing configuration that must not be dropped. \ + Expected #[must_use] to appear on the line immediately before the function declaration.", + line_num + ); +} + +/// Test that PreprocessOptions::strings_only() has #[must_use] +#[test] +fn preprocess_options_strings_only_has_must_use() { + let result = find_must_use_before_function(PREPROCESS_SOURCE, "pub fn strings_only() -> Self"); + assert!( + result.is_some(), + "Could not find 'pub fn strings_only() -> Self' in preprocess.rs" + ); + + let (has_must_use, line_num) = result.unwrap(); + assert!( + has_must_use, + "PreprocessOptions::strings_only() at line {} does NOT have #[must_use] attribute. \ + The function returns Self representing configuration that must not be dropped. \ + Expected #[must_use] to appear on the line immediately before the function declaration.", + line_num + ); +} + +/// Test that PreprocessOptions::comments_and_strings() has #[must_use] +#[test] +fn preprocess_options_comments_and_strings_has_must_use() { + let result = + find_must_use_before_function(PREPROCESS_SOURCE, "pub fn comments_and_strings() -> Self"); + assert!( + result.is_some(), + "Could not find 'pub fn comments_and_strings() -> Self' in preprocess.rs" + ); + + let (has_must_use, line_num) = result.unwrap(); + assert!( + has_must_use, + "PreprocessOptions::comments_and_strings() at line {} does NOT have #[must_use] attribute. \ + The function returns Self representing configuration that must not be dropped. \ + Expected #[must_use] to appear on the line immediately before the function declaration.", + line_num + ); +} + +/// Test that Preprocessor::new() has #[must_use] +#[test] +fn preprocessor_new_has_must_use() { + let result = find_must_use_before_function( + PREPROCESS_SOURCE, + "pub fn new(opts: PreprocessOptions) -> Self", + ); + assert!( + result.is_some(), + "Could not find 'pub fn new(opts: PreprocessOptions) -> Self' in preprocess.rs" + ); + + let (has_must_use, line_num) = result.unwrap(); + assert!( + has_must_use, + "Preprocessor::new() at line {} does NOT have #[must_use] attribute. \ + The function returns Self representing preprocessor state that must not be dropped. \ + Expected #[must_use] to appear on the line immediately before the function declaration.", + line_num + ); +} + +/// Test that Preprocessor::with_language() has #[must_use] +#[test] +fn preprocessor_with_language_has_must_use() { + let result = find_must_use_before_function( + PREPROCESS_SOURCE, + "pub fn with_language(opts: PreprocessOptions, lang: Language) -> Self", + ); + assert!( + result.is_some(), + "Could not find 'pub fn with_language(opts: PreprocessOptions, lang: Language) -> Self' in preprocess.rs" + ); + + let (has_must_use, line_num) = result.unwrap(); + assert!( + has_must_use, + "Preprocessor::with_language() at line {} does NOT have #[must_use] attribute. \ + The function returns Self representing preprocessor state that must not be dropped. \ + Expected #[must_use] to appear on the line immediately before the function declaration.", + line_num + ); +} + +/// Verification test: Ensure there are exactly 6 #[must_use] attributes in preprocess.rs +/// This verifies scope is limited to the 6 specified functions and no more are added. +#[test] +fn exactly_six_must_use_attributes_in_preprocess() { + let must_use_count = PREPROCESS_SOURCE + .lines() + .filter(|line| line.trim() == "#[must_use]") + .count(); + + assert_eq!( + must_use_count, 6, + "Expected exactly 6 #[must_use] attributes in preprocess.rs, but found {}. \ + The scope of this issue is limited to 6 specific functions. \ + If more #[must_use] attributes are needed, that should be a separate issue.", + must_use_count + ); +} diff --git a/crates/diffguard-types/src/lib.rs b/crates/diffguard-types/src/lib.rs index b634c06b..d9995763 100644 --- a/crates/diffguard-types/src/lib.rs +++ b/crates/diffguard-types/src/lib.rs @@ -133,7 +133,11 @@ pub struct DiffMeta { /// Stored as `u64` to avoid silent truncation for very large repositories /// (those with more than 2^32 - 1 unique files). pub files_scanned: u64, - pub lines_scanned: u32, + /// Number of distinct lines that were scanned. + /// + /// Stored as `u64` to avoid silent truncation for very large diffs + /// (those with more than 2^32 - 1 unique lines). + pub lines_scanned: u64, } /// A single rule match within a scoped file. @@ -426,6 +430,29 @@ fn is_match_mode_any(mode: &MatchMode) -> bool { matches!(mode, MatchMode::Any) } +// Utility for markdown escaping, used by rendering crates — kept here to avoid duplication across crates. +pub fn escape_md(s: &str) -> String { + // Escapes special Markdown characters in table cell content. + // + // Escapes pipe (`|`), backtick (`` ` ``), hash (`#`), asterisk (`*`), + // underscore (`_`), open bracket (`[`), close bracket (`]`), and greater-than + // (`>`) characters by prefixing with backslash. Also escapes CRLF (`\r\n`) + // and LF (`\n`) line endings to prevent breaking the markdown table structure. + // + // These escapes are needed to prevent breaking the markdown table structure + // and prevent unintended markdown formatting. + s.replace('|', "\\|") + .replace('`', "\\`") + .replace('#', "\\#") + .replace('*', "\\*") + .replace('_', "\\_") + .replace('[', "\\[") + .replace(']', "\\]") + .replace('>', "\\>") + .replace('\r', "\\r") + .replace('\n', "\\n") +} + // ============================================================================ // Per-directory override types // ============================================================================ diff --git a/crates/diffguard/src/main.rs b/crates/diffguard/src/main.rs index c8cd2b8b..1c6aa358 100644 --- a/crates/diffguard/src/main.rs +++ b/crates/diffguard/src/main.rs @@ -29,7 +29,7 @@ use diffguard_types::{ CHECK_SCHEMA_V1, CODE_TOOL_RUNTIME_ERROR, CapabilityStatus, CheckReceipt, ConfigFile, DiffMeta, DirectoryOverrideConfig, FailOn, Finding, MatchMode, REASON_MISSING_BASE, REASON_NO_DIFF_INPUT, REASON_TOOL_ERROR, RuleConfig, Scope, Severity, ToolMeta, Verdict, VerdictCounts, - VerdictStatus, + VerdictStatus, escape_md, }; mod config_loader; @@ -1689,20 +1689,6 @@ fn render_finding_row_with_baseline(f: &Finding, is_baseline: bool) -> String { ) } -/// Escapes special markdown characters in a string. -fn escape_md(s: &str) -> String { - s.replace('|', "\\|") - .replace('`', "\\`") - .replace('#', "\\#") - .replace('*', "\\*") - .replace('_', "\\_") - .replace('[', "\\[") - .replace(']', "\\]") - .replace('>', "\\>") - .replace('\r', "\\r") - .replace('\n', "\\n") -} - /// Renders markdown output with baseline/new annotations. /// /// This modifies the table output to include baseline/new annotations for each finding.