Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion crates/diffguard-analytics/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,11 @@ pub struct TrendRun {
/// Stored as `u64` to avoid silent truncation for very large repositories
/// (those with more than 2^32 - 1 unique files).
pub files_scanned: u64,
pub lines_scanned: u32,
/// Number of distinct lines that were scanned.
///
/// Stored as `u64` to avoid silent truncation for very large diffs
/// (those with more than 2^32 - 1 unique lines).
pub lines_scanned: u64,
pub findings: u32,
}

Expand Down
296 changes: 296 additions & 0 deletions crates/diffguard-analytics/tests/edge_cases.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,296 @@
//! Edge case tests for diffguard-analytics usize→u64 migration (issue #577)
//!
//! These tests verify that the migration from u32 to u64 for `findings`,
//! `run_count`, and `total_findings` correctly handles:
//! - Values exceeding u32::MAX
//! - Large accumulations
//! - Empty and single-run histories
//! - Delta calculations with large values

use diffguard_analytics::*;
use diffguard_types::{DiffMeta, Scope, Severity, ToolMeta, Verdict, VerdictCounts, VerdictStatus};
use std::u32;

/// Creates a TrendRun with explicit findings count.
fn make_run(findings: u64, info: u32, warn: u32, error: u32, suppressed: u32) -> TrendRun {
TrendRun {
started_at: "2026-01-01T00:00:00Z".to_string(),
ended_at: "2026-01-01T00:00:01Z".to_string(),
duration_ms: 1000,
base: "origin/main".to_string(),
head: "HEAD".to_string(),
scope: Scope::Added,
status: VerdictStatus::Fail,
counts: VerdictCounts {
info,
warn,
error,
suppressed,
},
files_scanned: 1,
lines_scanned: 100,
findings,
}
}

#[test]
fn summarize_empty_history_yields_zero_totals() {
let history = TrendHistory {
schema: TREND_HISTORY_SCHEMA_V2.to_string(),
runs: vec![],
};
let summary = summarize_trend_history(&history);
assert_eq!(summary.run_count, 0);
assert_eq!(summary.total_findings, 0);
assert!(summary.latest.is_none());
assert!(summary.delta_from_previous.is_none());
}

#[test]
fn summarize_single_run_has_no_delta() {
let run = make_run(5, 1, 2, 3, 4);
let history = TrendHistory {
schema: TREND_HISTORY_SCHEMA_V2.to_string(),
runs: vec![run],
};
let summary = summarize_trend_history(&history);
assert_eq!(summary.run_count, 1);
assert_eq!(summary.total_findings, 5);
assert!(summary.delta_from_previous.is_none());
}

#[test]
fn summarize_two_runs_reports_delta() {
let run1 = make_run(10, 1, 2, 3, 4);
let run2 = make_run(7, 0, 1, 2, 3);
let history = TrendHistory {
schema: TREND_HISTORY_SCHEMA_V2.to_string(),
runs: vec![run1, run2],
};
let summary = summarize_trend_history(&history);
assert_eq!(summary.run_count, 2);
assert_eq!(summary.total_findings, 17);
let delta = summary.delta_from_previous.expect("should have delta");
assert_eq!(delta.findings, -3); // 7 - 10
assert_eq!(delta.warn, -1); // 1 - 2
assert_eq!(delta.error, -1); // 2 - 3
}

#[test]
fn summarize_large_findings_value_no_truncation() {
// Test that findings > u32::MAX are preserved (this was the core bug)
let large_findings = u64::MAX / 2; // ~4 billion
let run = make_run(large_findings, 0, 0, 0, 0);
let history = TrendHistory {
schema: TREND_HISTORY_SCHEMA_V2.to_string(),
runs: vec![run],
};
let summary = summarize_trend_history(&history);
assert_eq!(summary.total_findings, large_findings);
assert!(summary.total_findings > u32::MAX as u64);
}

#[test]
fn summarize_accumulates_findings_beyond_u32_max() {
// Each run has ~2 billion findings; 3 runs = ~6 billion (exceeds u32::MAX)
let per_run = u32::MAX as u64 / 2;
let run1 = make_run(per_run, 0, 0, 0, 0);
let run2 = make_run(per_run, 0, 0, 0, 0);
let run3 = make_run(per_run, 0, 0, 0, 0);
let history = TrendHistory {
schema: TREND_HISTORY_SCHEMA_V2.to_string(),
runs: vec![run1, run2, run3],
};
let summary = summarize_trend_history(&history);
let expected = per_run * 3;
assert_eq!(summary.total_findings, expected);
assert!(summary.total_findings > u32::MAX as u64);
}

#[test]
fn summarize_run_count_beyond_u32_max() {
// Create many runs to exceed u32::MAX run_count
// We test that run_count is u64 and can hold large values
let run = make_run(1, 0, 0, 0, 0);
let history = TrendHistory {
schema: TREND_HISTORY_SCHEMA_V2.to_string(),
runs: vec![run],
};
// Verify type can represent values beyond u32::MAX
let summary = summarize_trend_history(&history);
// Manually construct a summary with run_count > u32::MAX
let large_run_count = u64::MAX / 2;
let large_summary = TrendSummary {
run_count: large_run_count,
totals: VerdictCounts::default(),
total_findings: 0,
latest: None,
delta_from_previous: None,
};
assert!(large_summary.run_count > u32::MAX as u64);
// Verify the actual run_count is correct type
assert!(summary.run_count >= 1);
}

#[test]
fn trend_run_from_receipt_handles_large_finding_list() {
// Create a receipt with many findings
let many_findings: Vec<diffguard_types::Finding> = (0..1000)
.map(|i| diffguard_types::Finding {
rule_id: format!("rule.{}", i),
severity: Severity::Error,
message: format!("error {}", i),
path: format!("src/file{}.rs", i),
line: i as u32,
column: Some(1),
match_text: format!("match{}", i),
snippet: format!("code {};", i),
})
.collect();

let receipt = diffguard_types::CheckReceipt {
schema: diffguard_types::CHECK_SCHEMA_V1.to_string(),
tool: ToolMeta {
name: "diffguard".to_string(),
version: "0.2.0".to_string(),
},
diff: DiffMeta {
base: "origin/main".to_string(),
head: "HEAD".to_string(),
context_lines: 0,
scope: Scope::Added,
files_scanned: 1000,
lines_scanned: 10000,
},
findings: many_findings,
verdict: Verdict {
status: VerdictStatus::Fail,
counts: VerdictCounts {
info: 0,
warn: 0,
error: 1000,
suppressed: 0,
},
reasons: vec![],
},
timing: None,
};

let run = trend_run_from_receipt(
&receipt,
"2026-01-01T00:00:00Z",
"2026-01-01T00:00:01Z",
1000,
);
assert_eq!(run.findings, 1000);
assert!(run.findings > 0);
}

#[test]
fn delta_calculation_with_large_findings_increase() {
// Test delta when findings increase significantly
let run1 = make_run(5, 0, 0, 0, 0);
let run2 = make_run(u64::MAX / 4, 0, 0, 0, 0); // Large increase
let history = TrendHistory {
schema: TREND_HISTORY_SCHEMA_V2.to_string(),
runs: vec![run1, run2],
};
let summary = summarize_trend_history(&history);
let delta = summary.delta_from_previous.expect("should have delta");
// delta should be positive and large
assert!(delta.findings > 0);
assert!(delta.findings > i64::from(u32::MAX));
}

#[test]
fn delta_calculation_with_large_findings_decrease() {
// Test delta when findings decrease significantly
let run1 = make_run(u64::MAX / 4, 0, 0, 0, 0); // Large value
let run2 = make_run(5, 0, 0, 0, 0); // Small value
let history = TrendHistory {
schema: TREND_HISTORY_SCHEMA_V2.to_string(),
runs: vec![run1, run2],
};
let summary = summarize_trend_history(&history);
let delta = summary.delta_from_previous.expect("should have delta");
// delta should be negative and large in magnitude
assert!(delta.findings < 0);
assert!(delta.findings < -(i64::from(u32::MAX)));
}

#[test]
fn trend_history_default_uses_schema_v2() {
let history = TrendHistory::default();
assert_eq!(history.schema, TREND_HISTORY_SCHEMA_V2);
}

#[test]
fn normalize_trend_history_sets_schema_v2() {
let mut history = TrendHistory::default();
history.schema = String::new();
let normalized = normalize_trend_history(history);
assert_eq!(normalized.schema, TREND_HISTORY_SCHEMA_V2);
}

#[test]
fn append_trend_run_normalizes_empty_schema_to_v2() {
let run = make_run(1, 0, 0, 0, 0);
let history = TrendHistory {
schema: String::new(), // Empty schema should be normalized to V2
runs: vec![],
};
let result = append_trend_run(history, run, None);
assert_eq!(result.schema, TREND_HISTORY_SCHEMA_V2);
}

#[test]
fn saturating_add_does_not_wrap() {
// Verify that accumulating findings uses saturating arithmetic
let run1 = make_run(u64::MAX, 0, 0, 0, 0);
let run2 = make_run(u64::MAX, 0, 0, 0, 0);
let history = TrendHistory {
schema: TREND_HISTORY_SCHEMA_V2.to_string(),
runs: vec![run1, run2],
};
let summary = summarize_trend_history(&history);
// Should saturate at u64::MAX, not wrap
assert_eq!(summary.total_findings, u64::MAX);
}

#[test]
fn findings_field_is_u64_not_u32() {
let run = make_run(u64::MAX, 0, 0, 0, 0);
// This would fail to compile if findings were still u32
let _large_value: u64 = run.findings;
assert_eq!(run.findings, u64::MAX);
}

#[test]
fn run_count_field_is_u64_not_u32() {
// Create a summary with run_count > u32::MAX
let summary = TrendSummary {
run_count: u64::MAX,
totals: VerdictCounts::default(),
total_findings: 0,
latest: None,
delta_from_previous: None,
};
// This would fail to compile if run_count were still u32
let _large_value: u64 = summary.run_count;
assert_eq!(summary.run_count, u64::MAX);
}

#[test]
fn total_findings_field_is_u64_not_u32() {
// Create a summary with total_findings > u32::MAX
let summary = TrendSummary {
run_count: 1,
totals: VerdictCounts::default(),
total_findings: u64::MAX,
latest: None,
delta_from_previous: None,
};
// This would fail to compile if total_findings were still u32
let _large_value: u64 = summary.total_findings;
assert_eq!(summary.total_findings, u64::MAX);
}
4 changes: 2 additions & 2 deletions crates/diffguard-core/tests/properties.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ fn arb_diff_meta() -> impl Strategy<Value = DiffMeta> {
0u32..10, // context_lines
arb_scope(), // scope
0u64..100, // files_scanned
0u32..1000, // lines_scanned
0u64..1000, // lines_scanned
)
.prop_map(
|(base, head, context_lines, scope, files_scanned, lines_scanned)| DiffMeta {
Expand Down Expand Up @@ -643,7 +643,7 @@ mod unit_tests {
context_lines: u32::MAX,
scope: Scope::Added,
files_scanned: u64::MAX,
lines_scanned: u32::MAX,
lines_scanned: u64::MAX,
},
findings: vec![],
verdict: Verdict {
Expand Down
26 changes: 23 additions & 3 deletions crates/diffguard-diff/src/unified.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@ use std::path::Path;

use diffguard_types::Scope;

/// Represents the kind of change a diff line represents.
///
/// This is used to distinguish between:
/// - `Added`: a line that was added (exists only in the new version)
/// - `Changed`: an added line that directly replaces a removed line in the same hunk
/// - `Deleted`: a line that was removed (exists only in the old version)
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ChangeKind {
Added,
Expand Down Expand Up @@ -99,24 +105,38 @@ pub fn parse_rename_to(line: &str) -> Option<String> {
parse_rename_path(rest)
}

/// Represents a single line extracted from a unified diff.
///
/// Contains the file path, line number in the new version of the file,
/// the actual line content, and the kind of change it represents.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DiffLine {
/// Path to the file this line belongs to (uses destination path for renames).
pub path: String,
/// Line number in the new (post-change) version of the file.
pub line: u32,
/// The actual line content (without the leading `+`, `-`, or ` ` marker).
pub content: String,
/// The kind of change this line represents.
pub kind: ChangeKind,
}

/// Aggregate statistics about a parsed diff.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct DiffStats {
/// Number of files that had changes matching the requested scope.
pub files: u32,
pub lines: u32,
/// Total number of lines matching the requested scope.
pub lines: u64,
}

/// Errors that can occur when parsing a unified diff.
#[derive(Debug, thiserror::Error)]
pub enum DiffParseError {
/// The hunk header (`@@ ... @@` line) could not be parsed.
#[error("malformed hunk header: {0}")]
MalformedHunkHeader(String),
/// The diff contains more files or lines than can be represented in the result.
#[error("diff stats overflow: {0}")]
Overflow(String),
}
Expand Down Expand Up @@ -337,8 +357,8 @@ pub fn parse_unified_diff(
let stats = DiffStats {
files: u32::try_from(files.len())
.map_err(|_| DiffParseError::Overflow(format!("too many files (> {})", u32::MAX)))?,
lines: u32::try_from(out.len())
.map_err(|_| DiffParseError::Overflow(format!("too many lines (> {})", u32::MAX)))?,
lines: u64::try_from(out.len())
.map_err(|_| DiffParseError::Overflow(format!("too many lines (> {})", u64::MAX)))?,
};

Ok((out, stats))
Expand Down
Loading
Loading