From 3845d98137f86a940c7d1bb830ef7dc17d8d0814 Mon Sep 17 00:00:00 2001 From: Matt Mastracci Date: Thu, 2 Apr 2026 10:21:34 -0600 Subject: [PATCH] feat: strip basedirs from Rust hash key for cross-machine cache hits SCCACHE_BASEDIRS now normalizes cwd, CARGO_MANIFEST_DIR, CARGO_WORKSPACE_DIR, CARGO_TARGET_TMPDIR, CARGO_MANIFEST_PATH, CARGO_BIN_EXE_*, dep-info env var values, and the concatenated argument string in the Rust compiler's hash key computation. This enables cache hits when the same crate is compiled from different absolute paths on different machines (e.g., CI runners with different checkout roots). strip_basedir_prefix now also matches when the value equals the basedir minus its trailing '/', so `cwd == basedir` strips to the empty string rather than passing through. Without this, two machines with different checkout paths produced different hashes even with matching basedirs -- the feature's central claim. --- src/cache/readonly.rs | 9 +- src/compiler/clang.rs | 2 +- src/compiler/compiler.rs | 10 +- src/compiler/diab.rs | 2 +- src/compiler/gcc.rs | 6 +- src/compiler/msvc.rs | 4 +- src/compiler/rust.rs | 459 +++++++++++++++++++++++++++++++++++-- src/compiler/tasking_vx.rs | 4 +- src/test/mock_storage.rs | 32 ++- tests/helpers/mod.rs | 12 +- tests/sccache_cargo.rs | 89 +++++++ 11 files changed, 585 insertions(+), 44 deletions(-) diff --git a/src/cache/readonly.rs b/src/cache/readonly.rs index 40f9873f5f..c82dd7277d 100644 --- a/src/cache/readonly.rs +++ b/src/cache/readonly.rs @@ -110,7 +110,7 @@ mod test { #[test] fn readonly_storage_is_readonly() { - let storage = ReadOnlyStorage(Arc::new(MockStorage::new(None, false))); + let storage = ReadOnlyStorage(Arc::new(MockStorage::default())); assert_eq!( storage.check().now_or_never().unwrap().unwrap(), CacheMode::ReadOnly @@ -119,8 +119,7 @@ mod test { #[test] fn readonly_storage_forwards_preprocessor_cache_mode_config() { - let storage_no_preprocessor_cache = - ReadOnlyStorage(Arc::new(MockStorage::new(None, false))); + let storage_no_preprocessor_cache = ReadOnlyStorage(Arc::new(MockStorage::default())); assert!( !storage_no_preprocessor_cache .preprocessor_cache_mode_config() @@ -128,7 +127,7 @@ mod test { ); let storage_with_preprocessor_cache = - ReadOnlyStorage(Arc::new(MockStorage::new(None, true))); + ReadOnlyStorage(Arc::new(MockStorage::new(None, true, vec![]))); assert!( storage_with_preprocessor_cache .preprocessor_cache_mode_config() @@ -178,7 +177,7 @@ mod test { .build() .unwrap(); - let storage = ReadOnlyStorage(Arc::new(MockStorage::new(None, true))); + let storage = ReadOnlyStorage(Arc::new(MockStorage::new(None, true, vec![]))); runtime.block_on(async move { assert_eq!( storage diff --git a/src/compiler/clang.rs b/src/compiler/clang.rs index e6294d5241..8e5b842a32 100644 --- a/src/compiler/clang.rs +++ b/src/compiler/clang.rs @@ -1359,7 +1359,7 @@ mod test { too_hard_for_preprocessor_cache_mode: None, }; let runtime = single_threaded_runtime(); - let storage = MockStorage::new(None, false); + let storage = MockStorage::default(); let storage: std::sync::Arc = std::sync::Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage, runtime.handle().clone()); let compiler = &f.bins[0]; diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 1d9ccff751..61ca64b95f 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -2369,7 +2369,7 @@ LLVM version: 6.0", false, pool, false, - Arc::new(MockStorage::new(None, preprocessor_cache_mode)), + Arc::new(MockStorage::new(None, preprocessor_cache_mode, vec![])), CacheControl::Default, ) .wait() @@ -2437,7 +2437,7 @@ LLVM version: 6.0", false, pool, false, - Arc::new(MockStorage::new(None, preprocessor_cache_mode)), + Arc::new(MockStorage::new(None, preprocessor_cache_mode, vec![])), CacheControl::Default, ) .wait() @@ -2503,7 +2503,7 @@ LLVM version: 6.0", false, pool, false, - Arc::new(MockStorage::new(None, preprocessor_cache_mode)), + Arc::new(MockStorage::new(None, preprocessor_cache_mode, vec![])), CacheControl::Default, ) .wait() @@ -2805,7 +2805,7 @@ LLVM version: 6.0", let gcc = f.mk_bin("gcc").unwrap(); let runtime = Runtime::new().unwrap(); let pool = runtime.handle().clone(); - let storage = MockStorage::new(None, preprocessor_cache_mode); + let storage = MockStorage::new(None, preprocessor_cache_mode, vec![]); let storage: Arc = Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage.clone(), pool.clone()); @@ -2898,7 +2898,7 @@ LLVM version: 6.0", std::fs::write(f.tempdir.path().join("foo.c"), "whatever").unwrap(); // Make our storage wait 2ms for each get/put operation. let storage_delay = Duration::from_millis(2); - let storage = MockStorage::new(Some(storage_delay), preprocessor_cache_mode); + let storage = MockStorage::new(Some(storage_delay), preprocessor_cache_mode, vec![]); let storage: Arc = Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage.clone(), pool.clone()); // Pretend to be GCC. diff --git a/src/compiler/diab.rs b/src/compiler/diab.rs index a11e578606..53d3ad7823 100644 --- a/src/compiler/diab.rs +++ b/src/compiler/diab.rs @@ -792,7 +792,7 @@ mod test { too_hard_for_preprocessor_cache_mode: None, }; let runtime = single_threaded_runtime(); - let storage = MockStorage::new(None, false); + let storage = MockStorage::default(); let storage: std::sync::Arc = std::sync::Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage, runtime.handle().clone()); let compiler = &f.bins[0]; diff --git a/src/compiler/gcc.rs b/src/compiler/gcc.rs index 5613ac8920..c9127a687f 100644 --- a/src/compiler/gcc.rs +++ b/src/compiler/gcc.rs @@ -2389,7 +2389,7 @@ mod test { too_hard_for_preprocessor_cache_mode: None, }; let runtime = single_threaded_runtime(); - let storage = MockStorage::new(None, false); + let storage = MockStorage::default(); let storage: std::sync::Arc = std::sync::Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage, runtime.handle().clone()); let compiler = &f.bins[0]; @@ -2450,7 +2450,7 @@ mod test { too_hard_for_preprocessor_cache_mode: None, }; let runtime = single_threaded_runtime(); - let storage = MockStorage::new(None, false); + let storage = MockStorage::default(); let storage: std::sync::Arc = std::sync::Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage, runtime.handle().clone()); let compiler = &f.bins[0]; @@ -2509,7 +2509,7 @@ mod test { too_hard_for_preprocessor_cache_mode: None, }; let runtime = single_threaded_runtime(); - let storage = MockStorage::new(None, false); + let storage = MockStorage::default(); let storage: std::sync::Arc = std::sync::Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage, runtime.handle().clone()); let compiler = &f.bins[0]; diff --git a/src/compiler/msvc.rs b/src/compiler/msvc.rs index 900501f6a6..b3fd99ceca 100644 --- a/src/compiler/msvc.rs +++ b/src/compiler/msvc.rs @@ -2852,7 +2852,7 @@ mod test { too_hard_for_preprocessor_cache_mode: None, }; let runtime = single_threaded_runtime(); - let storage = MockStorage::new(None, false); + let storage = MockStorage::default(); let storage: std::sync::Arc = std::sync::Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage, runtime.handle().clone()); let compiler = &f.bins[0]; @@ -2942,7 +2942,7 @@ mod test { too_hard_for_preprocessor_cache_mode: None, }; let runtime = single_threaded_runtime(); - let storage = MockStorage::new(None, false); + let storage = MockStorage::default(); let storage: std::sync::Arc = std::sync::Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage, runtime.handle().clone()); let compiler = &f.bins[0]; diff --git a/src/compiler/rust.rs b/src/compiler/rust.rs index 7a0fdabc3c..8a1748a322 100644 --- a/src/compiler/rust.rs +++ b/src/compiler/rust.rs @@ -62,6 +62,138 @@ use std::time; use crate::errors::*; +/// Strip a basedir prefix from a byte slice, returning the relative portion. +/// +/// Basedirs are pre-normalized with trailing `/` (see config.rs), so the +/// result is a clean relative path. Iteration is in the order basedirs are +/// listed in config; the first match wins. A value that equals a basedir +/// minus the trailing `/` (e.g. `cwd == basedir`) also matches and strips +/// to the empty byte string. +/// +/// On Windows the value is normalized (lowercased with forward slashes) +/// before comparison since basedirs are stored in that form; a match there +/// returns owned bytes because the borrow would point into the normalized +/// buffer rather than `value`. +fn strip_basedir_prefix<'a>(value: &'a [u8], basedirs: &[Vec]) -> Cow<'a, [u8]> { + if basedirs.is_empty() { + return Cow::Borrowed(value); + } + strip_basedir_prefix_impl(value, basedirs) +} + +#[cfg(not(windows))] +fn strip_basedir_prefix_impl<'a>(value: &'a [u8], basedirs: &[Vec]) -> Cow<'a, [u8]> { + for basedir in basedirs { + if value.starts_with(basedir) { + return Cow::Borrowed(&value[basedir.len()..]); + } + if is_basedir_minus_slash(value, basedir) { + return Cow::Borrowed(b""); + } + } + Cow::Borrowed(value) +} + +#[cfg(windows)] +fn strip_basedir_prefix_impl<'a>(value: &'a [u8], basedirs: &[Vec]) -> Cow<'a, [u8]> { + let normalized = crate::util::normalize_win_path(value); + for basedir in basedirs { + if normalized.starts_with(basedir) { + return Cow::Owned(normalized[basedir.len()..].to_vec()); + } + if is_basedir_minus_slash(&normalized, basedir) { + return Cow::Owned(Vec::new()); + } + } + Cow::Borrowed(value) +} + +/// Returns true if `value` is `basedir` with the trailing `/` removed. +/// Handles the `cwd == basedir` case where a subpath `starts_with` check +/// would otherwise miss. +fn is_basedir_minus_slash(value: &[u8], basedir: &[u8]) -> bool { + basedir.last() == Some(&b'/') && value.len() + 1 == basedir.len() && basedir.starts_with(value) +} + +/// Strip every basedir occurrence from a single rustc argument. +/// +/// A match is any basedir that appears at the start of `arg` or immediately +/// after an arg-internal separator (`=`, `,`). Covers patterns like: +/// * `/abs/path/src.rs` (source file path) +/// * `--remap-path-prefix=/abs/path=/new` (rust-lang/cargo#12137) +/// * `-Clinker=/abs/path` +/// * `-Clink-arg=-Wl,-rpath,/abs/path` (sccache#2652 comment) +/// +/// Overlapping matches are resolved longest-first at each position. On +/// Windows the value is first normalized (lowercased with forward slashes) +/// so it can be compared against basedirs stored in that canonical form. +fn strip_basedirs_in_arg<'a>(arg: &'a [u8], basedirs: &[Vec]) -> Cow<'a, [u8]> { + if basedirs.is_empty() { + return Cow::Borrowed(arg); + } + strip_basedirs_in_arg_impl(arg, basedirs) +} + +#[cfg(not(windows))] +fn strip_basedirs_in_arg_impl<'a>(arg: &'a [u8], basedirs: &[Vec]) -> Cow<'a, [u8]> { + find_and_strip_basedirs(arg, basedirs) +} + +#[cfg(windows)] +fn strip_basedirs_in_arg_impl<'a>(arg: &'a [u8], basedirs: &[Vec]) -> Cow<'a, [u8]> { + let normalized = crate::util::normalize_win_path(arg); + match find_and_strip_basedirs(&normalized, basedirs) { + // No match: return the original arg (mirrors strip_basedir_prefix). + Cow::Borrowed(_) => Cow::Borrowed(arg), + // Match: the slice points into the local normalized buffer. + Cow::Owned(v) => Cow::Owned(v), + } +} + +/// Core matcher used by `strip_basedirs_in_arg_impl`: look up every basedir +/// in `haystack` at start-of-string / post-`=` / post-`,` boundaries, then +/// elide the matched ranges. +fn find_and_strip_basedirs<'a>(haystack: &'a [u8], basedirs: &[Vec]) -> Cow<'a, [u8]> { + let mut matches: Vec<(usize, usize)> = Vec::new(); + for basedir in basedirs { + let b = basedir.as_slice(); + if b.is_empty() || b.len() > haystack.len() { + continue; + } + for start in memchr::memmem::find_iter(haystack, b) { + let is_boundary = start == 0 || matches!(haystack[start - 1], b'=' | b','); + if is_boundary { + matches.push((start, start + b.len())); + } + } + } + if matches.is_empty() { + return Cow::Borrowed(haystack); + } + // Sort by start ascending; break ties by length descending so the longest + // match at a given position wins (e.g. `/a/b/` before `/a/` when both are + // in the basedirs list). + matches.sort_by(|x, y| x.0.cmp(&y.0).then(y.1.cmp(&x.1))); + // Coalesce overlapping matches, keeping the first (longest) at each + // position. + let mut filtered: Vec<(usize, usize)> = Vec::new(); + let mut last_end = 0; + for (s, e) in matches { + if s >= last_end { + filtered.push((s, e)); + last_end = e; + } + } + let mut out = Vec::with_capacity(haystack.len()); + let mut pos = 0; + for (s, e) in filtered { + out.extend_from_slice(&haystack[pos..s]); + pos = e; + } + out.extend_from_slice(&haystack[pos..]); + Cow::Owned(out) +} + #[cfg(feature = "dist-client")] const RLIB_PREFIX: &str = "lib"; #[cfg(feature = "dist-client")] @@ -1333,10 +1465,11 @@ where _may_dist: bool, pool: &tokio::runtime::Handle, _rewrite_includes_only: bool, - _storage: Arc, + storage: Arc, _cache_control: CacheControl, ) -> Result> { trace!("[{}]: generate_hash_key", self.parsed_args.crate_name); + let basedirs = storage.basedirs(); // TODO: this doesn't produce correct arguments if they should be concatenated - should use iter_os_strings let os_string_arguments: Vec<(OsString, Option)> = self .parsed_args @@ -1451,7 +1584,13 @@ where // A few argument types are not passed in a deterministic order // by cargo: --extern, -L, --cfg. We'll filter those out, sort them, // and append them to the rest of the arguments. - let args = { + // Strip basedir occurrences per-argument before hashing. Handles both + // the common source-file-path arg (`/abs/path/src.rs`) and patterns + // that embed paths after `=` or `,`: `--remap-path-prefix=/abs/path`, + // `-Clinker=/abs/path`, `-Clink-arg=-Wl,-rpath,/abs/path`. See + // mozilla/sccache#2652. + let mut args_bytes = Vec::new(); + { let (mut sortables, rest): (Vec<_>, Vec<_>) = os_string_arguments .iter() // We exclude a few arguments from the hash: @@ -1475,15 +1614,16 @@ where // out, sort them, and append them to the rest of the arguments. .partition(|&(arg, _)| arg == "--cfg"); sortables.sort(); - rest.into_iter() + for arg in rest + .into_iter() .chain(sortables) .flat_map(|(arg, val)| iter::once(arg).chain(val.as_ref())) - .fold(OsString::new(), |mut a, b| { - a.push(b); - a - }) - }; - args.hash(&mut HashToDigest { digest: &mut m }); + { + args_bytes + .extend_from_slice(&strip_basedirs_in_arg(arg.as_encoded_bytes(), basedirs)); + } + } + args_bytes.hash(&mut HashToDigest { digest: &mut m }); // 4. The digest of all source files (this includes src file from cmdline). // 5. The digest of all files listed on the commandline (self.externs). // 6. The digest of all static libraries listed on the commandline (self.staticlibs). @@ -1503,7 +1643,10 @@ where for (var, val) in env_deps.iter() { var.hash(&mut HashToDigest { digest: &mut m }); m.update(b"="); - val.hash(&mut HashToDigest { digest: &mut m }); + // Strip basedir prefixes from dep-info env var values (e.g. OUT_DIR) + // to enable cross-machine cache hits. + let val_bytes = val.as_encoded_bytes(); + strip_basedir_prefix(val_bytes, basedirs).hash(&mut HashToDigest { digest: &mut m }); } let mut env_vars: Vec<_> = env_vars .iter() @@ -1534,10 +1677,18 @@ where var.hash(&mut HashToDigest { digest: &mut m }); m.update(b"="); - val.hash(&mut HashToDigest { digest: &mut m }); + // Strip any basedir prefix from every CARGO_* env var value. + // Stripping is a no-op for values that don't start with a basedir, + // so this is safe to apply to non-path vars too and avoids the + // whitelist-maintenance bug class (CARGO_TARGET_DIR, CARGO_HOME, + // future additions, etc.). + let val_bytes = val.as_encoded_bytes(); + strip_basedir_prefix(val_bytes, basedirs).hash(&mut HashToDigest { digest: &mut m }); } // 9. The cwd of the compile. This will wind up in the rlib. - cwd.hash(&mut HashToDigest { digest: &mut m }); + // Strip basedir prefix for cross-machine cache portability. + let cwd_bytes = cwd.as_os_str().as_encoded_bytes(); + strip_basedir_prefix(cwd_bytes, basedirs).hash(&mut HashToDigest { digest: &mut m }); // 10. The version of the compiler. self.version.hash(&mut HashToDigest { digest: &mut m }); @@ -3180,7 +3331,7 @@ abc def.rs: #[cfg(not(windows))] #[test] - fn test_parse_dep_info_cwd() { + fn test_parse_dep_info_cwd_unix() { let deps = "foo: baz.rs abc.rs bar.rs baz.rs: @@ -3202,7 +3353,7 @@ bar.rs: #[cfg(not(windows))] #[test] - fn test_parse_dep_info_abs_paths() { + fn test_parse_dep_info_abs_paths_unix() { let deps = "/foo/foo: /foo/baz.rs /foo/abc.rs /foo/bar.rs /foo/baz.rs: @@ -3219,7 +3370,7 @@ bar.rs: #[cfg(windows)] #[test] - fn test_parse_dep_info_cwd() { + fn test_parse_dep_info_cwd_windows() { let deps = "foo: baz.rs abc.rs bar.rs baz.rs: @@ -3245,7 +3396,7 @@ bar.rs: #[cfg(windows)] #[test] - fn test_parse_dep_info_abs_paths() { + fn test_parse_dep_info_abs_paths_windows() { let deps = "c:/foo/foo: c:/foo/baz.rs c:/foo/abc.rs c:/foo/bar.rs c:/foo/baz.rs: c:/foo/bar.rs @@ -3513,7 +3664,7 @@ proc_macro false false, &pool, false, - Arc::new(MockStorage::new(None, preprocessor_cache_mode)), + Arc::new(MockStorage::new(None, preprocessor_cache_mode, vec![])), CacheControl::Default, ) .wait() @@ -3527,7 +3678,10 @@ proc_macro false // sysroot shlibs digests. m.update(FAKE_DIGEST.as_bytes()); // Arguments, with cfgs sorted at the end. - OsStr::new("ab--cfgabc--cfgxyz").hash(&mut HashToDigest { digest: &mut m }); + let args_str = OsStr::new("ab--cfgabc--cfgxyz"); + args_str + .as_encoded_bytes() + .hash(&mut HashToDigest { digest: &mut m }); // bar.rs (source file, from dep-info) m.update(empty_digest.as_bytes()); // foo.rs (source file, from dep-info) @@ -3540,11 +3694,18 @@ proc_macro false // Env vars OsStr::new("CARGO_BLAH").hash(&mut HashToDigest { digest: &mut m }); m.update(b"="); - OsStr::new("abc").hash(&mut HashToDigest { digest: &mut m }); + OsStr::new("abc") + .as_encoded_bytes() + .hash(&mut HashToDigest { digest: &mut m }); OsStr::new("CARGO_PKG_NAME").hash(&mut HashToDigest { digest: &mut m }); m.update(b"="); OsStr::new("foo").hash(&mut HashToDigest { digest: &mut m }); - f.tempdir.path().hash(&mut HashToDigest { digest: &mut m }); + // cwd + f.tempdir + .path() + .as_os_str() + .as_encoded_bytes() + .hash(&mut HashToDigest { digest: &mut m }); TEST_RUSTC_VERSION.hash(&mut HashToDigest { digest: &mut m }); let digest = m.finish(); assert_eq!(res.key, digest); @@ -3559,6 +3720,7 @@ proc_macro false env_vars: &[(OsString, OsString)], pre_func: F, preprocessor_cache_mode: bool, + basedirs: Vec>, ) -> String where F: Fn(&Path) -> Result<()>, @@ -3605,7 +3767,7 @@ proc_macro false false, &pool, false, - Arc::new(MockStorage::new(None, preprocessor_cache_mode)), + Arc::new(MockStorage::new(None, preprocessor_cache_mode, basedirs)), CacheControl::Default, ) .wait() @@ -3650,6 +3812,7 @@ proc_macro false &[], mk_files, preprocessor_cache_mode, + vec![], ), hash_key( &f, @@ -3671,6 +3834,7 @@ proc_macro false &[], mk_files, preprocessor_cache_mode, + vec![], ) ); } @@ -3700,6 +3864,7 @@ proc_macro false &[], nothing, preprocessor_cache_mode, + vec![], ), hash_key( &f, @@ -3721,6 +3886,7 @@ proc_macro false &[], nothing, preprocessor_cache_mode, + vec![], ) ); } @@ -3753,6 +3919,7 @@ proc_macro false &[], nothing, preprocessor_cache_mode, + vec![], ), hash_key( &f, @@ -3776,6 +3943,7 @@ proc_macro false &[], nothing, preprocessor_cache_mode, + vec![], ) ); } @@ -3803,6 +3971,7 @@ proc_macro false &[], nothing, preprocessor_cache_mode, + vec![], ), hash_key( &f, @@ -3824,6 +3993,7 @@ proc_macro false &[], nothing, preprocessor_cache_mode, + vec![], ) ); } @@ -3853,6 +4023,7 @@ proc_macro false &[], nothing, preprocessor_cache_mode, + vec![], ), hash_key( &f, @@ -3874,10 +4045,256 @@ proc_macro false &[], nothing, preprocessor_cache_mode, + vec![], ) ); } + #[test] + fn test_basedirs_strips_cwd_and_cargo_manifest_dir() { + let f = TestFixture::new(); + let cwd = f.tempdir.path().to_string_lossy().into_owned(); + + let args = &[ + "--emit", + "link", + "foo.rs", + "--out-dir", + "out", + "--crate-name", + "foo", + "--crate-type", + "lib", + ]; + + let manifest_dir = format!("{}/some/pkg", cwd); + let env_vars = vec![ + ( + OsString::from("CARGO_MANIFEST_DIR"), + OsString::from(&manifest_dir), + ), + (OsString::from("CARGO_PKG_NAME"), OsString::from("foo")), + ]; + + let key_without = hash_key(&f, args, &env_vars, nothing, false, vec![]); + + // Basedirs are normalized at config time (forward slashes, lowercase + // on Windows, trailing slash); replicate that here. + let basedir = cwd.into_bytes(); + #[cfg(windows)] + let basedir = crate::util::normalize_win_path(&basedir); + let mut basedir = basedir; + basedir.push(b'/'); + let key_with = hash_key(&f, args, &env_vars, nothing, false, vec![basedir]); + + assert_ne!(key_without, key_with, "basedirs should change the hash key"); + } + + /// Build the canonical basedir byte-string for a given tempdir path: + /// normalize on Windows, append a trailing `/`. Matches how + /// `Config` stores basedirs at runtime. + fn basedir_for(path: &Path) -> Vec { + let bytes = path.to_string_lossy().into_owned().into_bytes(); + #[cfg(windows)] + let bytes = crate::util::normalize_win_path(&bytes); + let mut bytes = bytes; + bytes.push(b'/'); + bytes + } + + #[test] + fn test_basedirs_stable_across_absolute_paths() { + // The central guarantee of this feature: when two machines build the + // same crate from different absolute checkout paths, supplying each + // side's checkout root as a basedir produces identical hash keys, so + // one machine's cache entry is a hit on the other. + let f1 = TestFixture::new(); + let f2 = TestFixture::new(); + assert_ne!( + f1.tempdir.path(), + f2.tempdir.path(), + "fixtures must be at different absolute paths" + ); + + let args = &[ + "--emit", + "link", + "foo.rs", + "--out-dir", + "out", + "--crate-name", + "foo", + "--crate-type", + "lib", + ]; + + // Matching CARGO_MANIFEST_DIR under each fixture exercises the env-var + // basedir-stripping path in addition to the cwd-stripping path. + let manifest1 = format!("{}/some/pkg", f1.tempdir.path().display()); + let manifest2 = format!("{}/some/pkg", f2.tempdir.path().display()); + let env1 = vec![ + ( + OsString::from("CARGO_MANIFEST_DIR"), + OsString::from(&manifest1), + ), + (OsString::from("CARGO_PKG_NAME"), OsString::from("foo")), + ]; + let env2 = vec![ + ( + OsString::from("CARGO_MANIFEST_DIR"), + OsString::from(&manifest2), + ), + (OsString::from("CARGO_PKG_NAME"), OsString::from("foo")), + ]; + + let k1 = hash_key( + &f1, + args, + &env1, + nothing, + false, + vec![basedir_for(f1.tempdir.path())], + ); + let k2 = hash_key( + &f2, + args, + &env2, + nothing, + false, + vec![basedir_for(f2.tempdir.path())], + ); + assert_eq!( + k1, k2, + "basedir stripping must produce identical hashes across different checkout paths" + ); + } + + #[test] + + fn test_strip_basedir_prefix_no_match() { + let out = super::strip_basedir_prefix(b"/other/path", &[b"/home/runner/".to_vec()]); + assert_eq!( + &*out, b"/other/path", + "no match should return value unchanged" + ); + } + + #[test] + + fn test_strip_basedir_prefix_first_match_wins() { + // Documents the current contract: iteration is in config order and the + // first matching basedir wins. Listing a more-specific basedir before a + // less-specific one is the caller's responsibility. + let basedirs = vec![b"/home/".to_vec(), b"/home/runner/".to_vec()]; + let out = super::strip_basedir_prefix(b"/home/runner/src/foo.rs", &basedirs); + assert_eq!(&*out, b"runner/src/foo.rs"); + } + + // strip_basedirs_in_arg covers embedded-path arg patterns (mozilla/sccache#2652). + + #[test] + + fn test_strip_basedirs_in_arg_prefix() { + let out = + super::strip_basedirs_in_arg(b"/home/user/src/lib.rs", &[b"/home/user/".to_vec()]); + assert_eq!(&*out, b"src/lib.rs"); + } + + #[test] + + fn test_strip_basedirs_in_arg_after_equals() { + // `--remap-path-prefix=/abs/path=/new` -- basedir appears after `=`. + let out = super::strip_basedirs_in_arg( + b"--remap-path-prefix=/home/user/a=/new", + &[b"/home/user/".to_vec()], + ); + assert_eq!(&*out, b"--remap-path-prefix=a=/new"); + } + + // Preserves the real `-Clink-arg=-Wl,...` rustc flag capitalization; that + // uppercase gets lowercased by `normalize_win_path` on Windows, so the + // assertion only holds on non-Windows. Windows exercises the same + // code path via `test_strip_basedirs_in_arg_after_comma_lowercase`. + #[cfg(not(windows))] + #[test] + fn test_strip_basedirs_in_arg_after_comma() { + let out = super::strip_basedirs_in_arg( + b"-Clink-arg=-Wl,-rpath,/home/user/lib", + &[b"/home/user/".to_vec()], + ); + assert_eq!(&*out, b"-Clink-arg=-Wl,-rpath,lib"); + } + + // Lowercase-only mirror of `test_strip_basedirs_in_arg_after_comma` so the + // after-`,` boundary match is exercised on every platform. + #[test] + fn test_strip_basedirs_in_arg_after_comma_lowercase() { + let out = super::strip_basedirs_in_arg( + b"-clink-arg=-wl,-rpath,/home/user/lib", + &[b"/home/user/".to_vec()], + ); + assert_eq!(&*out, b"-clink-arg=-wl,-rpath,lib"); + } + + #[test] + + fn test_strip_basedirs_in_arg_multiple_in_one() { + let out = + super::strip_basedirs_in_arg(b"/home/user/a,/home/user/b", &[b"/home/user/".to_vec()]); + assert_eq!(&*out, b"a,b"); + } + + #[test] + + fn test_strip_basedirs_in_arg_no_match_inside() { + // Basedir preceded by a non-boundary byte: no strip. + let out = + super::strip_basedirs_in_arg(b"prefix/home/user/suffix", &[b"/home/user/".to_vec()]); + assert_eq!(&*out, b"prefix/home/user/suffix"); + } + + #[test] + + fn test_strip_basedirs_in_arg_longest_at_same_position() { + // When `/a/b/` and `/a/` both match at the same position, the longer + // one wins (sort is by start asc, length desc). + let basedirs = vec![b"/a/".to_vec(), b"/a/b/".to_vec()]; + let out = super::strip_basedirs_in_arg(b"/a/b/x", &basedirs); + assert_eq!(&*out, b"x"); + } + + #[test] + fn test_basedirs_deterministic() { + // Running the same compilation with the same basedirs twice should + // produce the same hash, and it should differ from no-basedirs. + let f = TestFixture::new(); + let cwd = f.tempdir.path().to_string_lossy().into_owned(); + + let args = &[ + "--emit", + "link", + "foo.rs", + "--out-dir", + "out", + "--crate-name", + "foo", + "--crate-type", + "lib", + ]; + let env_vars = vec![(OsString::from("CARGO_PKG_NAME"), OsString::from("foo"))]; + + let basedir = cwd.into_bytes(); + #[cfg(windows)] + let basedir = crate::util::normalize_win_path(&basedir); + let mut basedir = basedir; + basedir.push(b'/'); + + let key1 = hash_key(&f, args, &env_vars, nothing, false, vec![basedir.clone()]); + let key2 = hash_key(&f, args, &env_vars, nothing, false, vec![basedir]); + + assert_eq!(key1, key2, "Same basedir should produce deterministic hash"); + } + #[test] fn test_parse_unstable_profile_flag() { let h = parses!( diff --git a/src/compiler/tasking_vx.rs b/src/compiler/tasking_vx.rs index b3fff8238a..26ac7676f4 100644 --- a/src/compiler/tasking_vx.rs +++ b/src/compiler/tasking_vx.rs @@ -730,7 +730,7 @@ mod test { too_hard_for_preprocessor_cache_mode: None, }; let runtime = single_threaded_runtime(); - let storage = MockStorage::new(None, false); + let storage = MockStorage::default(); let storage: std::sync::Arc = std::sync::Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage, runtime.handle().clone()); let compiler = &f.bins[0]; @@ -784,7 +784,7 @@ mod test { too_hard_for_preprocessor_cache_mode: None, }; let runtime = single_threaded_runtime(); - let storage = MockStorage::new(None, false); + let storage = MockStorage::default(); let storage: std::sync::Arc = std::sync::Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage, runtime.handle().clone()); let compiler = &f.bins[0]; diff --git a/src/test/mock_storage.rs b/src/test/mock_storage.rs index d4260e79e2..4d4faa23bc 100644 --- a/src/test/mock_storage.rs +++ b/src/test/mock_storage.rs @@ -28,17 +28,33 @@ pub struct MockStorage { tx: mpsc::UnboundedSender>, delay: Option, preprocessor_cache_mode: bool, + basedirs: Vec>, } impl MockStorage { - /// Create a new `MockStorage`. if `delay` is `Some`, wait for that amount of time before returning from operations. - pub(crate) fn new(delay: Option, preprocessor_cache_mode: bool) -> MockStorage { + /// Construct a `MockStorage`. + /// + /// # Arguments + /// + /// * `delay` — if `Some`, every `get`/`put` sleeps this long before + /// returning, to simulate slow storage. + /// * `preprocessor_cache_mode` — value returned from + /// [`Storage::preprocessor_cache_mode_config`]. + /// * `basedirs` — the list reported by [`Storage::basedirs`], used when + /// tests need to exercise basedir-prefix stripping in + /// `generate_hash_key`. + pub(crate) fn new( + delay: Option, + preprocessor_cache_mode: bool, + basedirs: Vec>, + ) -> MockStorage { let (tx, rx) = mpsc::unbounded(); Self { tx, rx: Arc::new(Mutex::new(rx)), delay, preprocessor_cache_mode, + basedirs, } } @@ -48,6 +64,15 @@ impl MockStorage { } } +impl Default for MockStorage { + /// Zero-delay mock with preprocessor cache mode disabled and no basedirs + /// configured -- the usual choice for tests that don't care about those + /// knobs. + fn default() -> Self { + Self::new(None, false, vec![]) + } +} + #[async_trait] impl Storage for MockStorage { async fn get(&self, _key: &str) -> Result { @@ -75,6 +100,9 @@ impl Storage for MockStorage { async fn max_size(&self) -> Result> { Ok(None) } + fn basedirs(&self) -> &[Vec] { + &self.basedirs + } fn preprocessor_cache_mode_config(&self) -> PreprocessorCacheModeConfig { PreprocessorCacheModeConfig { use_preprocessor_cache_mode: self.preprocessor_cache_mode, diff --git a/tests/helpers/mod.rs b/tests/helpers/mod.rs index db0e978a5a..e954ff629e 100644 --- a/tests/helpers/mod.rs +++ b/tests/helpers/mod.rs @@ -64,9 +64,17 @@ impl SccacheTest<'_> { trace!("sccache --start-server"); - Command::new(SCCACHE_BIN.as_os_str()) + let mut server_cmd = Command::new(SCCACHE_BIN.as_os_str()); + server_cmd .arg("--start-server") - .env("SCCACHE_DIR", &cache_dir) + .env("SCCACHE_DIR", &cache_dir); + // Forward `additional_envs` to the server too: some config (e.g. + // `SCCACHE_BASEDIRS`) is only read at server startup, so passing it + // here avoids a `restart_sccache` dance in each test. + if let Some(vec) = additional_envs { + server_cmd.envs(vec.iter().cloned()); + } + server_cmd .assert() .try_success() .context("Failed to start sccache server")?; diff --git a/tests/sccache_cargo.rs b/tests/sccache_cargo.rs index 6734a33f5b..8918984211 100644 --- a/tests/sccache_cargo.rs +++ b/tests/sccache_cargo.rs @@ -37,6 +37,95 @@ fn test_rust_cargo_build() -> Result<()> { test_rust_cargo_cmd("build", SccacheTest::new(None)?) } +#[test] +#[serial] +fn test_rust_cargo_basedirs_cross_dir_cache_hit() -> Result<()> { + // Copy tests/test-crate to two different absolute paths, then build in + // each with SCCACHE_BASEDIRS covering both roots. Without basedir + // stripping the two builds would compute different cache keys (cwd + + // CARGO_MANIFEST_DIR differ); with it, keys converge and the second + // build hits the first build's cache entries. + let work = tempfile::Builder::new() + .prefix("sccache_basedirs_xdir") + .tempdir() + .context("tempdir")?; + // On macOS `/var/...` is a symlink to `/private/var/...` and cargo reports + // the resolved target for CARGO_MANIFEST_DIR. Basedirs are compared by + // byte prefix, so the user-supplied path must be in the same canonical + // form. Windows `fs::canonicalize` returns `\\?\`-prefixed UNC paths that + // cargo does not emit, so only canonicalize on Unix. + #[cfg(unix)] + let work_root = fs::canonicalize(work.path())?; + #[cfg(not(unix))] + let work_root = work.path().to_path_buf(); + let root_a = work_root.join("machine_a"); + let root_b = work_root.join("machine_b"); + let crate_a = root_a.join("project"); + let crate_b = root_b.join("project"); + copy_crate(&CRATE_DIR, &crate_a)?; + copy_crate(&CRATE_DIR, &crate_b)?; + + // Basedir separator: `:` on Unix, `;` on Windows (matches config.rs). + let sep = if cfg!(windows) { ';' } else { ':' }; + let basedirs = format!("{}{sep}{}", root_a.display(), root_b.display()); + let test = SccacheTest::new(Some(&[( + "SCCACHE_BASEDIRS", + std::ffi::OsString::from(basedirs), + )]))?; + + run_cargo_build(&test, &crate_a, &crate_a.join("target"))?; + run_cargo_build(&test, &crate_b, &crate_b.join("target"))?; + + // After the second build, sccache must report Rust cache hits. The exact + // count matches the existing `test_rust_cargo_cmd` baseline (2), which + // exercises the same crate in a single directory with `cargo clean` + // between runs; if basedirs works, a cross-directory run should behave + // identically. + test.show_stats()? + .try_stdout(predicates::str::contains(r#""cache_hits":{"counts":{"Rust":2}"#).from_utf8())? + .try_success()?; + Ok(()) +} + +fn copy_crate(src: &Path, dst: &Path) -> Result<()> { + use walkdir::WalkDir; + fs::create_dir_all(dst)?; + for entry in WalkDir::new(src) { + let entry = entry.context("walkdir")?; + let rel = entry.path().strip_prefix(src).unwrap(); + let target = dst.join(rel); + if entry.file_type().is_dir() { + fs::create_dir_all(&target)?; + } else if entry.file_type().is_file() { + fs::copy(entry.path(), &target)?; + } + } + Ok(()) +} + +fn run_cargo_build(test: &SccacheTest, cwd: &Path, target_dir: &Path) -> Result<()> { + // The harness's default CARGO_TARGET_DIR is shared across invocations, + // which would let cargo short-circuit recompiles. Override per-build so + // each `cargo build` actually invokes rustc for every crate. + let env: Vec<_> = test + .env + .iter() + .filter(|(k, _)| *k != "CARGO_TARGET_DIR") + .cloned() + .chain(std::iter::once(( + "CARGO_TARGET_DIR", + target_dir.as_os_str().to_owned(), + ))) + .collect(); + Command::new(CARGO.as_os_str()) + .arg("build") + .envs(env) + .current_dir(cwd) + .assert() + .try_success()?; + Ok(()) +} + #[test] #[serial] fn test_rust_cargo_build_readonly() -> Result<()> {