-
Notifications
You must be signed in to change notification settings - Fork 0
WIP: Hoist escape_md to shared diffguard-types crate #609
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"octal\\141\\040space\"#)" | ||
| --- | ||
| octala space |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"hello\\041world\"#)" | ||
| --- | ||
| hello!world |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(\"\")" | ||
| --- | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\77\"#)" | ||
| --- | ||
| ? |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\377\"#)" | ||
| --- | ||
| � |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\001\\002\\003\"#)" | ||
| --- | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(\"plain_string_no_escapes\")" | ||
| --- | ||
| plain_string_no_escapes |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\9\"#)" | ||
| --- | ||
| \9 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\8\"#)" | ||
| --- | ||
| \8 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\1\"#)" | ||
| --- | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\2\"#)" | ||
| --- | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\3\"#)" | ||
| --- | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\4\"#)" | ||
| --- | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\5\"#)" | ||
| --- | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\6\"#)" | ||
| --- | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\7\"#)" | ||
| --- | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\177\"#)" | ||
| --- | ||
| |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"endswith\\\"#)" | ||
| --- | ||
| endswith\ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\07\"#)" | ||
| --- | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\10\"#)" | ||
| --- | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\12\"#)" | ||
| --- | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\15\"#)" | ||
| --- | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\77\"#)" | ||
| --- | ||
| ? |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\q1\"#)" | ||
| --- | ||
| \q1 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| source: crates/diffguard-diff/src/unified.rs | ||
| expression: "unescape_git_path(r#\"\\q\"#)" | ||
| --- | ||
| \q |
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,147 @@ | ||||||||
| //! Integration tests for parsing diffs with octal-escaped paths. | ||||||||
| //! | ||||||||
| //! These tests exercise the full pipeline: raw diff text with quoted paths | ||||||||
| //! containing octal escape sequences → parse_unified_diff → DiffLine with | ||||||||
| //! correctly unescaped paths. | ||||||||
| //! | ||||||||
| //! The change being tested: replacing `u8 as u32` with `u32::from(u8)` in | ||||||||
| //! unescape_git_path's octal parsing branch. This is a lossless widening cast | ||||||||
| //! that doesn't affect behavior, but these tests verify the full integration | ||||||||
| //! path still works correctly. | ||||||||
|
|
||||||||
| use diffguard_diff::parse_unified_diff; | ||||||||
| use diffguard_types::Scope; | ||||||||
|
|
||||||||
| /// Test: Full pipeline with a path containing an octal-escaped space. | ||||||||
| /// Git outputs paths with spaces as `\040` (octal for space). | ||||||||
| /// | ||||||||
| /// Flow: diff text with quoted path → parse_diff_git_line → tokenize_git_paths | ||||||||
| /// → unquote_git_token → unescape_git_path → unescaped path in DiffLine | ||||||||
| #[test] | ||||||||
| fn test_parse_diff_with_octal_escaped_space_in_path() { | ||||||||
| // Git quotes paths with special characters and uses octal escapes for spaces | ||||||||
| // "\040" is octal for space (ASCII 32) | ||||||||
| let diff = r#" | ||||||||
| diff --git "a/path\040with spaces/file.rs" "b/path with spaces/file.rs" | ||||||||
| --- "a/path\040with spaces/file.rs" | ||||||||
| +++ "b/path with spaces/file.rs" | ||||||||
| @@ -1 +1,2 @@ | ||||||||
| fn existing() {} | ||||||||
| +fn added() {} | ||||||||
| "#; | ||||||||
|
Comment on lines
+24
to
+31
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Description: Verify Git's actual escaping behavior for paths with spaces
# Create a temporary test repo
temp_dir=$(mktemp -d)
cd "$temp_dir"
git init -q
# Create a file with spaces in the path
mkdir -p "path with spaces"
echo "original" > "path with spaces/file.txt"
git add .
git commit -q -m "initial"
# Modify the file to generate a diff
echo "modified" >> "path with spaces/file.txt"
# Show how Git formats the diff header
echo "=== Git diff header format for path with spaces ==="
git diff HEAD | grep -A2 "^diff --git"
# Cleanup
cd -
rm -rf "$temp_dir"Repository: EffortlessMetrics/diffguard Length of output: 739 🏁 Script executed: #!/bin/bash
# Description: Verify Git's actual escaping behavior for paths with spaces (corrected)
# Configure git first
git config --global user.email "test@example.com"
git config --global user.name "Test User"
# Create a temporary test repo
temp_dir=$(mktemp -d)
cd "$temp_dir"
git init -q
# Create a file with spaces in the path
mkdir -p "path with spaces"
echo "original" > "path with spaces/file.txt"
git add .
git commit -q -m "initial"
# Modify the file to generate a diff
echo "modified" >> "path with spaces/file.txt"
# Show how Git formats the diff header
echo "=== Git diff header format for path with spaces ==="
git diff HEAD | head -6
# Cleanup
cd -
rm -rf "$temp_dir"Repository: EffortlessMetrics/diffguard Length of output: 407 🏁 Script executed: # Examine the actual test file
head -50 crates/diffguard-diff/tests/integration_octal_escaped_paths.rsRepository: EffortlessMetrics/diffguard Length of output: 1970 🏁 Script executed: # Check the full test file for context
cat crates/diffguard-diff/tests/integration_octal_escaped_paths.rsRepository: EffortlessMetrics/diffguard Length of output: 4645 🏁 Script executed: # Also check if there are other diff-related tests to understand testing patterns
fd -t f "test" crates/diffguard-diff/tests/ | head -10Repository: EffortlessMetrics/diffguard Length of output: 53 Correct the misleading comment about Git's escaping behavior. The inline comment at line 23 incorrectly states "Git outputs paths with spaces as diff --git a/path with spaces/file.txt b/path with spaces/file.txtThe test itself appears designed to verify the parser's robustness in handling mixed escaping formats (a/ path escaped, b/ path literal). If that's the intent, add a comment clarifying this is intentional edge-case testing rather than realistic Git output. Otherwise, update the test data to match actual Git behavior. 🤖 Prompt for AI Agents |
||||||||
|
|
||||||||
| let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap(); | ||||||||
|
|
||||||||
| // The path should be unescaped: \040 → ' ' | ||||||||
| assert_eq!(lines.len(), 1); | ||||||||
| assert_eq!(lines[0].path, "path with spaces/file.rs"); | ||||||||
| assert_eq!(lines[0].content, "fn added() {}"); | ||||||||
| assert_eq!(stats.files, 1); | ||||||||
| assert_eq!(stats.lines, 1); | ||||||||
| } | ||||||||
|
|
||||||||
| /// Test: Full pipeline with embedded octal escapes in path. | ||||||||
| /// Path contains multiple octal escapes representing different characters. | ||||||||
| /// | ||||||||
| /// \041 = '!' (ASCII 33) | ||||||||
| /// \040 = ' ' (ASCII 32) | ||||||||
| #[test] | ||||||||
| fn test_parse_diff_with_multiple_octal_escapes_in_path() { | ||||||||
| let diff = r#" | ||||||||
| diff --git "a/file\041name\040here.rs" "b/file!name here.rs" | ||||||||
| --- "a/file\041name\040here.rs" | ||||||||
| +++ "b/file!name here.rs" | ||||||||
| @@ -1 +1,2 @@ | ||||||||
| fn existing() {} | ||||||||
| +fn added() {} | ||||||||
| "#; | ||||||||
|
|
||||||||
| let (lines, _stats) = parse_unified_diff(diff, Scope::Added).unwrap(); | ||||||||
|
|
||||||||
| // All octal escapes should be properly decoded | ||||||||
| assert_eq!(lines[0].path, "file!name here.rs"); | ||||||||
| } | ||||||||
|
|
||||||||
| /// Test: Octal escapes at path boundaries (start, middle, end). | ||||||||
| /// | ||||||||
| /// \143 = 'c' (ASCII 99) - octal for lowercase 'c' | ||||||||
| #[test] | ||||||||
| fn test_parse_diff_with_octal_escape_at_path_boundaries() { | ||||||||
| // \143 = 'c' | ||||||||
| let diff = r#" | ||||||||
| diff --git "a/\143at.rs" "b/cat.rs" | ||||||||
| --- "a/\143at.rs" | ||||||||
| +++ "b/cat.rs" | ||||||||
| @@ -1 +1,2 @@ | ||||||||
| fn existing() {} | ||||||||
| +fn added() {} | ||||||||
| "#; | ||||||||
|
|
||||||||
| let (lines, _stats) = parse_unified_diff(diff, Scope::Added).unwrap(); | ||||||||
| assert_eq!(lines[0].path, "cat.rs"); | ||||||||
| } | ||||||||
|
|
||||||||
| /// Test: Three-digit octal escape at maximum value. | ||||||||
| /// \177 = DEL (ASCII 127), \000 = NUL (ASCII 0) | ||||||||
| #[test] | ||||||||
| fn test_parse_diff_with_octal_edge_cases() { | ||||||||
| // \177 = 127 (DEL), \000 = 0 (NUL) | ||||||||
| // These are boundary cases for the u8→u32 cast | ||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick | 🔵 Trivial Remove implementation-detail reference from test comment. The comment "These are boundary cases for the u8→u32 cast" references internal implementation details. Integration tests should focus on the behavior being tested (edge octal values), not on internal casting operations. ♻️ Suggested revision- // \177 = 127 (DEL), \000 = 0 (NUL)
- // These are boundary cases for the u8→u32 cast
+ // \177 = 127 (DEL), \000 = 0 (NUL)
+ // Edge case: octal values at the boundaries of the valid byte range📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||
| let diff = r#" | ||||||||
| diff --git "a/\177\000file.rs" "b/\177\000file.rs" | ||||||||
| --- "a/\177\000file.rs" | ||||||||
| +++ "b/\177\000file.rs" | ||||||||
| @@ -1 +1,2 @@ | ||||||||
| fn existing() {} | ||||||||
| +fn added() {} | ||||||||
| "#; | ||||||||
|
|
||||||||
| let (lines, _stats) = parse_unified_diff(diff, Scope::Added).unwrap(); | ||||||||
| // The path should contain the raw bytes (non-printable but valid) | ||||||||
| assert_eq!(lines[0].path, "\x7F\x00file.rs"); | ||||||||
| } | ||||||||
|
|
||||||||
| /// Test: Renamed file with octal-escaped path. | ||||||||
| /// When a file is renamed, the "rename to" path can also have octal escapes. | ||||||||
| #[test] | ||||||||
| fn test_parse_diff_rename_with_octal_escaped_path() { | ||||||||
| let diff = r#" | ||||||||
| diff --git "a/old\040name.rs" "b/new\040name.rs" | ||||||||
| rename from old name.rs | ||||||||
| rename to new name.rs | ||||||||
| --- "a/old\040name.rs" | ||||||||
| +++ "b/new\040name.rs" | ||||||||
| @@ -1 +1,2 @@ | ||||||||
| fn existing() {} | ||||||||
| +fn added() {} | ||||||||
| "#; | ||||||||
|
|
||||||||
| let (lines, _stats) = parse_unified_diff(diff, Scope::Added).unwrap(); | ||||||||
| // The path should be unescaped | ||||||||
| assert_eq!(lines[0].path, "new name.rs"); | ||||||||
| } | ||||||||
|
|
||||||||
| /// Test: Multiple files with mixed quoted/unquoted paths. | ||||||||
| #[test] | ||||||||
| fn test_parse_diff_multiple_files_mixed_path_formats() { | ||||||||
| let diff = r#" | ||||||||
| diff --git "a/quoted\040path.rs" "b/quoted path.rs" | ||||||||
| --- "a/quoted\040path.rs" | ||||||||
| +++ "b/quoted path.rs" | ||||||||
| @@ -1 +1,2 @@ | ||||||||
| +added to quoted path | ||||||||
| diff --git a/normal_path.rs b/normal_path.rs | ||||||||
| --- a/normal_path.rs | ||||||||
| +++ b/normal_path.rs | ||||||||
| @@ -1 +1,2 @@ | ||||||||
| +added to normal path | ||||||||
| "#; | ||||||||
|
|
||||||||
| let (lines, stats) = parse_unified_diff(diff, Scope::Added).unwrap(); | ||||||||
|
|
||||||||
| assert_eq!(lines.len(), 2); | ||||||||
| assert_eq!(lines[0].path, "quoted path.rs"); | ||||||||
| assert_eq!(lines[1].path, "normal_path.rs"); | ||||||||
| assert_eq!(stats.files, 2); | ||||||||
| assert_eq!(stats.lines, 2); | ||||||||
| } | ||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧹 Nitpick | 🔵 Trivial
Remove implementation-detail commentary from integration test documentation.
These lines describe an internal casting change (
u8 as u32→u32::from(u8)) inunescape_git_path. Integration tests should document the behavior being tested, not internal implementation details. This commentary creates confusion about the test suite's purpose.♻️ Suggested revision
📝 Committable suggestion
🤖 Prompt for AI Agents