diff --git a/Cargo.lock b/Cargo.lock index 430a8e8e7..53a39a6e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1420,7 +1420,7 @@ dependencies = [ "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows-core", + "windows-core 0.52.0", ] [[package]] @@ -2781,6 +2781,18 @@ dependencies = [ "thiserror 2.0.17", ] +[[package]] +name = "reflink-copy" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9efd944f26afa2406cbbabff39fac533c9bc24b13d7f1f12e14ae3e7bdc66cdb" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "rustix", + "windows", +] + [[package]] name = "regex" version = "1.12.2" @@ -3346,6 +3358,7 @@ dependencies = [ "openssl", "predicates", "rand 0.8.5", + "reflink-copy", "regex", "reqsign", "reqwest", @@ -4631,6 +4644,28 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddf874e74c7a99773e62b1c671427abf01a425e77c3d3fb9fb1e4883ea934529" +dependencies = [ + "windows-collections", + "windows-core 0.60.1", + "windows-future", + "windows-link 0.1.3", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5467f79cc1ba3f52ebb2ed41dbb459b8e7db636cc3429458d9a852e15bc24dec" +dependencies = [ + "windows-core 0.60.1", +] + [[package]] name = "windows-core" version = "0.52.0" @@ -4640,6 +4675,51 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-core" +version = "0.60.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca21a92a9cae9bf4ccae5cf8368dce0837100ddf6e6d57936749e85f152f6247" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link 0.1.3", + "windows-result", + "windows-strings 0.3.1", +] + +[[package]] +name = "windows-future" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a787db4595e7eb80239b74ce8babfb1363d8e343ab072f2ffe901400c03349f0" +dependencies = [ + "windows-core 0.60.1", + "windows-link 0.1.3", +] + +[[package]] +name = "windows-implement" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83577b051e2f49a058c308f17f273b570a6a758386fc291b5f6a934dd84e48c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "windows-link" version = "0.1.3" @@ -4652,6 +4732,16 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" +[[package]] +name = "windows-numerics" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "005dea54e2f6499f2cee279b8f703b3cf3b5734a2d8d21867c8f44003182eeed" +dependencies = [ + "windows-core 0.60.1", + "windows-link 0.1.3", +] + [[package]] name = "windows-registry" version = "0.5.3" @@ -4660,7 +4750,7 @@ checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" dependencies = [ "windows-link 0.1.3", "windows-result", - "windows-strings", + "windows-strings 0.4.2", ] [[package]] @@ -4672,6 +4762,15 @@ dependencies = [ "windows-link 0.1.3", ] +[[package]] +name = "windows-strings" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319" +dependencies = [ + "windows-link 0.1.3", +] + [[package]] name = "windows-strings" version = "0.4.2" diff --git a/Cargo.toml b/Cargo.toml index 9e1a1f52c..8e5406061 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -75,6 +75,7 @@ opendal = { version = "0.55.0", optional = true, default-features = false, featu ] } openssl = { version = "0.10.75", optional = true } rand = "0.8.4" +reflink-copy = "0.1" regex = "1.10.3" reqsign = { version = "0.18.0", optional = true } reqwest = { version = "0.12", features = [ diff --git a/docs/FileClone.md b/docs/FileClone.md new file mode 100644 index 000000000..7dc12f544 --- /dev/null +++ b/docs/FileClone.md @@ -0,0 +1,52 @@ +# FileClone Storage + +## Overview + +The `file_clone` option enables uncompressed cache storage with Copy-on-Write (CoW) filesystem support for faster cache hits. + +## Configuration + +Add to your sccache config file (e.g., `~/.config/sccache/config`): + +```toml +[cache.disk] +file_clone = true +``` + +Or set via environment variable: + +```bash +export SCCACHE_FILE_CLONE=true +``` + +## How it Works + +When `file_clone` is enabled: + +1. **Detection**: sccache checks if the cache directory is on a CoW filesystem (APFS on macOS, Btrfs/XFS on Linux) +2. **Uncompressed Storage**: Cache entries are stored as directories with raw files instead of ZIP+zstd +3. **Reflink Extraction**: On cache hit, files are copied using reflink (near-instant on CoW filesystems) +4. **Fallback**: If CoW is not supported, automatically falls back to traditional compressed storage + +## Performance Benefits + +On CoW filesystems: +- Near-zero copy time for cached files (reflink uses filesystem-level COW) +- Reduced CPU usage (no decompression step) +- Trade-off: Slightly higher disk usage (uncompressed files) + +## Compatibility + +Works on: +- macOS with APFS +- Linux with Btrfs +- Linux with XFS +- Other filesystems with reflink support + +If the filesystem doesn't support reflink, sccache automatically uses compressed storage and logs a warning. + +## Implementation Details + +- Cache entries stored as directories under `cache/a/b/{hash}/` +- Each directory contains: `{object_name}`, `stdout`, `stderr` +- Original ZIP+zstd format still supported for backwards compatibility diff --git a/src/cache/cache.rs b/src/cache/cache.rs index f68442437..bc93ecea8 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -638,6 +638,7 @@ pub fn storage_from_config( preprocessor_cache_mode_config, rw_mode, config.basedirs.clone(), + config.fallback_cache.file_clone, ))) } diff --git a/src/cache/cache_io.rs b/src/cache/cache_io.rs index 9a16c3986..cbd5429bf 100644 --- a/src/cache/cache_io.rs +++ b/src/cache/cache_io.rs @@ -48,8 +48,10 @@ pub struct FileObjectSource { /// Result of a cache lookup. pub enum Cache { - /// Result was found in cache. + /// Result was found in cache (compressed ZIP format). Hit(CacheRead), + /// Result was found in cache (uncompressed directory format). + UncompressedHit(UncompressedCacheEntry), /// Result was not found in cache. Miss, /// Do not cache the results of the compilation. @@ -62,6 +64,7 @@ impl fmt::Debug for Cache { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Cache::Hit(_) => write!(f, "Cache::Hit(...)"), + Cache::UncompressedHit(_) => write!(f, "Cache::UncompressedHit(...)"), Cache::Miss => write!(f, "Cache::Miss"), Cache::None => write!(f, "Cache::None"), Cache::Recache => write!(f, "Cache::Recache"), @@ -283,3 +286,80 @@ impl Default for CacheWrite { Self::new() } } + +/// An uncompressed cache entry stored as a directory. +#[derive(Debug)] +pub struct UncompressedCacheEntry { + pub(crate) dir: PathBuf, +} + +impl UncompressedCacheEntry { + pub fn new(dir: PathBuf) -> Self { + Self { dir } + } + + pub async fn extract_objects(self, objects: T, pool: &tokio::runtime::Handle) -> Result<()> + where + T: IntoIterator + Send + Sync + 'static, + { + pool.spawn_blocking(move || { + for FileObjectSource { + key, + path, + optional, + } in objects + { + let src = self.dir.join(&key); + + if !src.exists() { + if optional { + continue; + } + bail!("Required object '{}' not found in cache", key); + } + + let dir = path + .parent() + .context("Output file without a parent directory!")?; + fs::create_dir_all(dir)?; + + // Read permissions from the cached source file directly + let mode = get_file_mode(&fs::File::open(&src)?); + + // Write to a tempfile and then atomically rename to the final path, + // so parallel builds don't see partially-written files. + let tmp_path = NamedTempFile::new_in(dir)?.into_temp_path(); + // Remove the empty temp file so reflink can create the destination + let _ = std::fs::remove_file(&tmp_path); + + if let Err(e) = crate::reflink::reflink_or_copy(&src, &tmp_path) { + if !optional { + bail!("Failed to copy object '{}' to {:?}: {}", key, path, e); + } + continue; + } + + tmp_path.persist(&path).map_err(|e| { + anyhow::anyhow!("Failed to persist {:?} to {:?}: {}", e.path, path, e.error) + })?; + + if let Ok(Some(mode)) = mode { + set_file_mode(&path, mode)?; + } + } + + Ok(()) + }) + .await? + } + + pub fn get_stdout(&self) -> Vec { + let path = self.dir.join("stdout"); + fs::read(&path).unwrap_or_default() + } + + pub fn get_stderr(&self) -> Vec { + let path = self.dir.join("stderr"); + fs::read(&path).unwrap_or_default() + } +} diff --git a/src/cache/disk.rs b/src/cache/disk.rs index 52d9384cf..9f71097fa 100644 --- a/src/cache/disk.rs +++ b/src/cache/disk.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::cache::{Cache, CacheMode, CacheRead, CacheWrite, Storage}; +use crate::cache::{Cache, CacheMode, CacheRead, CacheWrite, Storage, UncompressedCacheEntry}; use crate::compiler::PreprocessorCacheEntry; use crate::lru_disk_cache::{Error as LruError, ReadSeek}; use async_trait::async_trait; @@ -39,6 +39,7 @@ pub struct DiskCache { preprocessor_cache: Arc>, rw_mode: CacheMode, basedirs: Vec>, + use_uncompressed: bool, } impl DiskCache { @@ -50,7 +51,33 @@ impl DiskCache { preprocessor_cache_mode_config: PreprocessorCacheModeConfig, rw_mode: CacheMode, basedirs: Vec>, + file_clone: bool, ) -> DiskCache { + let use_uncompressed = if file_clone { + let root_path = Path::new(root.as_ref()); + // Ensure the cache directory exists before testing reflink support, + // since LazyDiskCache hasn't created it yet at this point. + if let Err(e) = std::fs::create_dir_all(root_path) { + log::warn!( + "file_clone: failed to create cache directory {:?}: {}. \ + Falling back to compressed mode.", + root_path, + e + ); + false + } else if crate::reflink::is_reflink_supported(root_path) { + log::info!("file_clone enabled: using uncompressed storage"); + true + } else { + log::warn!( + "file_clone enabled but CoW filesystem not detected, using compressed storage" + ); + false + } + } else { + false + }; + DiskCache { lru: Arc::new(Mutex::new(LazyDiskCache::Uninit { root: root.as_ref().to_os_string(), @@ -66,6 +93,7 @@ impl DiskCache { })), rw_mode, basedirs, + use_uncompressed, } } } @@ -75,6 +103,46 @@ fn make_key_path(key: &str) -> PathBuf { Path::new(&key[0..1]).join(&key[1..2]).join(key) } +/// Check if a cache entry is stored as an uncompressed directory. +/// Requires both a directory at the key path and the presence of the marker file, +/// to avoid treating partially-written directories (e.g., crash during write) as valid hits. +fn is_uncompressed_entry(cache_root: &Path, key: &str) -> bool { + let path = cache_root.join(make_key_path(key)); + path.is_dir() && path.join(crate::lru_disk_cache::DIR_ENTRY_MARKER).exists() +} + +fn write_uncompressed_entry(cache_root: &Path, key_dir: &Path, entry: CacheWrite) -> Result<()> { + let entry_dir = cache_root.join(key_dir); + fs_err::create_dir_all(&entry_dir)?; + + // Remove the marker file first so concurrent get() calls won't see a + // partially-written entry as valid during an overwrite (e.g. force-recache). + let _ = std::fs::remove_file(entry_dir.join(crate::lru_disk_cache::DIR_ENTRY_MARKER)); + + let compressed = entry.finish()?; + let cursor = std::io::Cursor::new(&compressed); + let mut zip = zip::ZipArchive::new(cursor).context("Failed to parse cache entry")?; + + for i in 0..zip.len() { + let mut file = zip.by_index(i)?; + let name = file.name().to_string(); + + let dest_path = entry_dir.join(&name); + let mut output = fs_err::File::create(&dest_path)?; + + zstd::stream::copy_decode(&mut file, &mut output) + .context("Failed to decompress cache entry")?; + + if name != "stdout" && name != "stderr" { + if let Some(mode) = file.unix_mode() { + crate::cache::utils::set_file_mode(&dest_path, mode)?; + } + } + } + + Ok(()) +} + #[async_trait] impl Storage for DiskCache { async fn get(&self, key: &str) -> Result { @@ -85,20 +153,36 @@ impl Storage for DiskCache { self.pool .spawn_blocking(move || { - let io = match lru.lock().unwrap().get_or_init()?.get(&path) { - Ok(f) => f, + let mut binding = lru.lock().unwrap(); + let cache = binding.get_or_init()?; + let cache_root = cache.path().to_path_buf(); + + // Check for uncompressed entry first (regardless of current mode) + if is_uncompressed_entry(&cache_root, &key) { + let full_dir = cache_root.join(&path); + // Update LRU recency so directory entries aren't evicted prematurely + let _ = cache.touch(&path); + drop(binding); + let entry = UncompressedCacheEntry::new(full_dir); + return Ok(Cache::UncompressedHit(entry)); + } + + // Try compressed entry + match cache.get(&path) { + Ok(io) => { + let hit = CacheRead::from(io)?; + Ok(Cache::Hit(hit)) + } Err(LruError::FileNotInCache) => { trace!("DiskCache::get({}): FileNotInCache", key); - return Ok(Cache::Miss); + Ok(Cache::Miss) } Err(LruError::Io(e)) => { trace!("DiskCache::get({}): IoError: {:?}", key, e); - return Err(e.into()); + Err(e.into()) } Err(_) => unreachable!(), - }; - let hit = CacheRead::from(io)?; - Ok(Cache::Hit(hit)) + } }) .await? } @@ -134,9 +218,41 @@ impl Storage for DiskCache { async fn put(&self, key: &str, entry: CacheWrite) -> Result { trace!("DiskCache::put({})", key); - // Delegate to put_raw after serializing the entry - let data = entry.finish()?; - self.put_raw(key, data.into()).await + + if !self.use_uncompressed { + let data = entry.finish()?; + return self.put_raw(key, data.into()).await; + } + + if self.rw_mode == CacheMode::ReadOnly { + return Err(anyhow!("Cannot write to a read-only cache")); + } + + let lru = self.lru.clone(); + let key = make_key_path(key); + + self.pool + .spawn_blocking(move || { + let start = Instant::now(); + + // Get the cache root path while holding the lock briefly + let cache_root = { + let mut binding = lru.lock().unwrap(); + let cache = binding.get_or_init()?; + cache.path().to_path_buf() + }; + + // Perform I/O without holding the lock + write_uncompressed_entry(&cache_root, &key, entry)?; + + // Re-acquire the lock to register the directory entry + let mut binding = lru.lock().unwrap(); + let cache = binding.get_or_init()?; + cache.add_dir(&key)?; + + Ok(start.elapsed()) + }) + .await? } async fn put_raw(&self, key: &str, data: Bytes) -> Result { @@ -158,7 +274,7 @@ impl Storage for DiskCache { .lock() .unwrap() .get_or_init()? - .prepare_add(key, data.len() as u64)?; + .prepare_add(&key, data.len() as u64)?; f.as_file_mut().write_all(&data)?; lru.lock().unwrap().get().unwrap().commit(f)?; drop(_fork_guard); @@ -250,8 +366,176 @@ mod tests { PreprocessorCacheModeConfig::default(), CacheMode::ReadWrite, vec![], + false, ); assert_eq!(disk.cache_type_name(), "disk"); } + + #[test] + fn test_disk_cache_file_clone_detection() { + let tempdir = tempfile::tempdir().unwrap(); + let runtime = tokio::runtime::Builder::new_current_thread() + .build() + .unwrap(); + + let disk_default = DiskCache::new( + tempdir.path(), + 1024 * 1024, + runtime.handle(), + PreprocessorCacheModeConfig::default(), + CacheMode::ReadWrite, + vec![], + false, + ); + assert!(!disk_default.use_uncompressed); + + let tempdir2 = tempfile::tempdir().unwrap(); + let disk_file_clone = DiskCache::new( + tempdir2.path(), + 1024 * 1024, + runtime.handle(), + PreprocessorCacheModeConfig::default(), + CacheMode::ReadWrite, + vec![], + true, + ); + let is_cow = crate::reflink::is_reflink_supported(tempdir2.path()); + assert_eq!( + disk_file_clone.use_uncompressed, is_cow, + "use_uncompressed should match reflink support when file_clone is enabled" + ); + } + + /// Test that writing an uncompressed entry and reading it back returns UncompressedHit, + /// and that extract_objects() restores expected file contents (using regular copy fallback + /// when reflink is not available). + #[tokio::test] + async fn test_uncompressed_put_get_extract_roundtrip() { + use crate::cache::cache_io::FileObjectSource; + + let tempdir = tempfile::tempdir().unwrap(); + let cache_dir = tempdir.path().join("cache"); + std::fs::create_dir_all(&cache_dir).unwrap(); + + let runtime = tokio::runtime::Handle::current(); + + // Create a DiskCache. Force use_uncompressed = true regardless of FS support, + // since we want to test the uncompressed storage path. + let mut disk = DiskCache::new( + &cache_dir, + 10 * 1024 * 1024, + &runtime, + PreprocessorCacheModeConfig::default(), + CacheMode::ReadWrite, + vec![], + false, // We'll override use_uncompressed below + ); + disk.use_uncompressed = true; + + // Build a CacheWrite entry with a test object and stderr + let mut entry = CacheWrite::new(); + let obj_content = b"hello world object content"; + entry + .put_object( + "output.rlib", + &mut std::io::Cursor::new(obj_content), + Some(0o644), + ) + .unwrap(); + entry.put_stderr(b"some stderr output").unwrap(); + + // Write the entry + let key = "abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890"; + disk.put(key, entry).await.unwrap(); + + // Verify the directory structure was created with marker file + let key_path = make_key_path(key); + let entry_dir = cache_dir.join(&key_path); + assert!(entry_dir.is_dir(), "cache entry should be a directory"); + assert!( + entry_dir + .join(crate::lru_disk_cache::DIR_ENTRY_MARKER) + .exists(), + "marker file should exist" + ); + assert!( + entry_dir.join("output.rlib").exists(), + "object file should exist" + ); + assert!( + entry_dir.join("stderr").exists(), + "stderr file should exist" + ); + // Read it back — should be an UncompressedHit + let result = disk.get(key).await.unwrap(); + match result { + Cache::UncompressedHit(ref uncompressed) => { + // Verify stdout/stderr + assert_eq!(uncompressed.get_stderr(), b"some stderr output"); + assert!(uncompressed.get_stdout().is_empty()); + } + other => panic!("Expected UncompressedHit, got {:?}", other), + } + + // Test extract_objects: extract the .rlib to a temp location + if let Cache::UncompressedHit(uncompressed) = result { + let output_dir = tempdir.path().join("output"); + std::fs::create_dir_all(&output_dir).unwrap(); + let output_path = output_dir.join("output.rlib"); + + let objects = vec![FileObjectSource { + key: "output.rlib".to_string(), + path: output_path.clone(), + optional: false, + }]; + + uncompressed + .extract_objects(objects, &runtime) + .await + .unwrap(); + + // Verify the extracted file has the correct content + let extracted = std::fs::read(&output_path).unwrap(); + assert_eq!( + extracted, obj_content, + "extracted content should match original" + ); + } + } + + /// Test that a directory without a marker file is NOT treated as an UncompressedHit. + #[tokio::test] + async fn test_orphan_directory_not_returned_as_hit() { + let tempdir = tempfile::tempdir().unwrap(); + let cache_dir = tempdir.path().join("cache"); + std::fs::create_dir_all(&cache_dir).unwrap(); + + let runtime = tokio::runtime::Handle::current(); + + let disk = DiskCache::new( + &cache_dir, + 10 * 1024 * 1024, + &runtime, + PreprocessorCacheModeConfig::default(), + CacheMode::ReadWrite, + vec![], + false, + ); + + // Manually create a directory that looks like a cache entry but has no marker + let key = "abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890"; + let key_path = make_key_path(key); + let orphan_dir = cache_dir.join(&key_path); + std::fs::create_dir_all(&orphan_dir).unwrap(); + std::fs::write(orphan_dir.join("output.rlib"), b"some data").unwrap(); + + // get() should return Miss, not UncompressedHit + let result = disk.get(key).await.unwrap(); + assert!( + matches!(result, Cache::Miss), + "directory without marker should be a cache miss, got {:?}", + result + ); + } } diff --git a/src/cache/multilevel.rs b/src/cache/multilevel.rs index 8284f4643..01e5e90e5 100644 --- a/src/cache/multilevel.rs +++ b/src/cache/multilevel.rs @@ -413,6 +413,7 @@ impl MultiLevelStorage { preprocessor_cache_mode_config, rw_mode, config.basedirs.clone(), + disk_config.file_clone, )); storages.push(disk_storage); trace!("Added disk cache level"); diff --git a/src/cache/multilevel_test.rs b/src/cache/multilevel_test.rs index bfc383bbc..bf020c18f 100644 --- a/src/cache/multilevel_test.rs +++ b/src/cache/multilevel_test.rs @@ -59,6 +59,7 @@ fn test_multi_level_storage_get() { PreprocessorCacheModeConfig::default(), CacheMode::ReadWrite, vec![], + false, ); let cache2 = DiskCache::new( &cache_dir2, @@ -67,6 +68,7 @@ fn test_multi_level_storage_get() { PreprocessorCacheModeConfig::default(), CacheMode::ReadWrite, vec![], + false, ); let cache1_storage: Arc = Arc::new(cache1); @@ -131,6 +133,7 @@ fn test_multi_level_storage_backfill_on_hit() { PreprocessorCacheModeConfig::default(), CacheMode::ReadWrite, vec![], + false, ); let cache2 = DiskCache::new( &cache_dir2, @@ -139,6 +142,7 @@ fn test_multi_level_storage_backfill_on_hit() { PreprocessorCacheModeConfig::default(), CacheMode::ReadWrite, vec![], + false, ); let cache1_storage: Arc = Arc::new(cache1); @@ -295,6 +299,7 @@ fn test_disk_plus_remote_to_remote_backfill() { PreprocessorCacheModeConfig::default(), CacheMode::ReadWrite, vec![], + false, )); let remote_l1 = Arc::new(InMemoryStorage::new()); // Memcached-like @@ -398,6 +403,7 @@ fn test_disk_plus_remotes_write_to_all() { PreprocessorCacheModeConfig::default(), CacheMode::ReadWrite, vec![], + false, )); let remote_l1 = Arc::new(InMemoryStorage::new()); @@ -861,6 +867,7 @@ fn test_preprocessor_cache_mode() { preprocessor_config, CacheMode::ReadWrite, vec![], + false, )); let cache_l1 = Arc::new(InMemoryStorage::new()); @@ -912,6 +919,7 @@ fn test_preprocessor_cache_methods() { PreprocessorCacheModeConfig::default(), CacheMode::ReadWrite, vec![], + false, )); let storage = MultiLevelStorage::new(vec![disk_cache as Arc]); @@ -954,6 +962,7 @@ fn test_readonly_level_in_check() { PreprocessorCacheModeConfig::default(), CacheMode::ReadWrite, vec![], + false, ); // Wrap in ReadOnly diff --git a/src/cache/readonly.rs b/src/cache/readonly.rs index 40f9873f5..99e5a2eb9 100644 --- a/src/cache/readonly.rs +++ b/src/cache/readonly.rs @@ -163,6 +163,7 @@ mod test { super::PreprocessorCacheModeConfig::default(), super::CacheMode::ReadWrite, basedirs.clone(), + false, ); let readonly_storage = ReadOnlyStorage(std::sync::Arc::new(disk_cache)); @@ -221,6 +222,7 @@ mod test { super::PreprocessorCacheModeConfig::default(), super::CacheMode::ReadWrite, vec![], + false, ); let readonly_storage = ReadOnlyStorage(std::sync::Arc::new(disk_cache)); diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index d9b0b7121..ccb564997 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -615,7 +615,7 @@ where // In this mode, cache entries are exclusively distinguished by their preprocessed // source contents. But two files may differ in their names and / or the names of // included files while still producing the same preprocessed output, so they get the - // same cache entry. That entry will have wrong (file names) dependency informaton in + // same cache entry. That entry will have wrong (file names) dependency information in // the dependency file except for the compilation unit that originally produced it. // Since we did local preprocessing, that should already have produced the dependency // file - just leave that one alone and don't overwrite it from the cache. @@ -645,6 +645,40 @@ where } } } + (Ok(Ok(Cache::UncompressedHit(entry))), duration) => { + debug!( + "[{}]: Cache uncompressed hit in {}", + out_pretty, + fmt_duration_as_secs(&duration) + ); + let output = process::Output { + status: exit_status(0), + stdout: entry.get_stdout(), + stderr: entry.get_stderr(), + }; + + let filtered_outputs = if compilation.is_locally_preprocessed() { + outputs + .iter() + .filter(|fobj_source| fobj_source.key != "d") + .cloned() + .collect() + } else { + outputs.clone() + }; + + let hit = CompileResult::CacheHit(duration); + match entry.extract_objects(filtered_outputs, &pool).await { + Ok(()) => Ok(CacheLookupResult::Success(hit, output)), + Err(e) => { + debug!( + "[{}]: Failed to extract uncompressed object: {:?}", + out_pretty, e + ); + Ok(CacheLookupResult::Miss(MissType::CacheReadError)) + } + } + } (Ok(Ok(Cache::Miss)), duration) => { debug!( "[{}]: Cache miss in {}", @@ -2660,6 +2694,7 @@ LLVM version: 6.0", }, CacheMode::ReadWrite, vec![], + false, ); // Write a dummy input file so the preprocessor cache mode can work std::fs::write(f.tempdir.path().join("foo.c"), "whatever").unwrap(); @@ -2791,6 +2826,7 @@ LLVM version: 6.0", }, CacheMode::ReadWrite, vec![], + false, ); // Write a dummy input file so the preprocessor cache mode can work std::fs::write(f.tempdir.path().join("foo.c"), "whatever").unwrap(); @@ -3095,6 +3131,7 @@ LLVM version: 6.0", }, CacheMode::ReadWrite, vec![], + false, ); let storage = Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage.clone(), pool.clone()); @@ -3225,6 +3262,7 @@ LLVM version: 6.0", }, CacheMode::ReadWrite, vec![], + false, ); let storage = Arc::new(storage); let service = server::SccacheService::mock_with_storage(storage.clone(), pool.clone()); @@ -3324,6 +3362,7 @@ LLVM version: 6.0", }, CacheMode::ReadWrite, vec![], + false, ); let storage = Arc::new(storage); // Pretend to be GCC. diff --git a/src/config.rs b/src/config.rs index 92f46635d..ed4f1e007 100644 --- a/src/config.rs +++ b/src/config.rs @@ -298,6 +298,7 @@ pub struct DiskCacheConfig { pub size: u64, pub preprocessor_cache_mode: PreprocessorCacheModeConfig, pub rw_mode: CacheModeConfig, + pub file_clone: bool, } impl Default for DiskCacheConfig { @@ -307,6 +308,7 @@ impl Default for DiskCacheConfig { size: default_disk_cache_size(), preprocessor_cache_mode: PreprocessorCacheModeConfig::activated(), rw_mode: CacheModeConfig::ReadWrite, + file_clone: false, } } } @@ -1158,16 +1160,26 @@ fn config_from_env() -> Result { _ => (CacheModeConfig::ReadWrite, false), }; + let mut file_clone = false; + let file_clone_overridden = if let Some(value) = bool_from_env_var("SCCACHE_FILE_CLONE")? { + file_clone = value; + true + } else { + false + }; + let any_overridden = disk_dir.is_some() || disk_sz.is_some() || preprocessor_mode_overridden - || disk_rw_mode_overridden; + || disk_rw_mode_overridden + || file_clone_overridden; let disk = if any_overridden { Some(DiskCacheConfig { dir: disk_dir.unwrap_or_else(default_disk_cache_dir), size: disk_sz.unwrap_or_else(default_disk_cache_size), preprocessor_cache_mode: preprocessor_mode_config, rw_mode: disk_rw_mode, + file_clone, }) } else { None @@ -1609,6 +1621,7 @@ fn config_overrides() { size: 5, preprocessor_cache_mode: Default::default(), rw_mode: CacheModeConfig::ReadWrite, + file_clone: false, }), redis: Some(RedisCacheConfig { endpoint: Some("myotherredisurl".to_owned()), @@ -1631,6 +1644,7 @@ fn config_overrides() { size: 15, preprocessor_cache_mode: Default::default(), rw_mode: CacheModeConfig::ReadWrite, + file_clone: false, }), memcached: Some(MemcachedCacheConfig { url: "memurl".to_owned(), @@ -1674,6 +1688,7 @@ fn config_overrides() { size: 5, preprocessor_cache_mode: Default::default(), rw_mode: CacheModeConfig::ReadWrite, + file_clone: false, }), memcached: Some(MemcachedCacheConfig { url: "memurl".to_owned(), @@ -1697,6 +1712,7 @@ fn config_overrides() { size: 5, preprocessor_cache_mode: Default::default(), rw_mode: CacheModeConfig::ReadWrite, + file_clone: false, }, dist: Default::default(), server_startup_timeout: None, @@ -2344,6 +2360,7 @@ key_prefix = "cosprefix" size: 7 * 1024 * 1024 * 1024, preprocessor_cache_mode: PreprocessorCacheModeConfig::activated(), rw_mode: CacheModeConfig::ReadWrite, + file_clone: false, }), gcs: Some(GCSCacheConfig { bucket: "bucket".to_owned(), diff --git a/src/lib.rs b/src/lib.rs index 67ba7947f..3d0f35409 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,6 +45,7 @@ pub mod lru_disk_cache; mod mock_command; mod net; mod protocol; +pub mod reflink; pub mod server; #[doc(hidden)] pub mod util; diff --git a/src/lru_disk_cache/mod.rs b/src/lru_disk_cache/mod.rs index 9894ee22a..d5dc098b9 100644 --- a/src/lru_disk_cache/mod.rs +++ b/src/lru_disk_cache/mod.rs @@ -4,6 +4,7 @@ use fs::File; use fs_err as fs; use std::borrow::Borrow; use std::boxed::Box; +use std::collections::HashSet; use std::collections::hash_map::RandomState; use std::error::Error as StdError; use std::ffi::{OsStr, OsString}; @@ -12,14 +13,13 @@ use std::hash::BuildHasher; use std::io; use std::io::prelude::*; use std::path::{Path, PathBuf}; +use std::time::SystemTime; use filetime::{FileTime, set_file_times}; pub use lru_cache::{LruCache, Meter}; use tempfile::NamedTempFile; use walkdir::WalkDir; -use crate::util::OsStrExt; - const TEMPFILE_PREFIX: &str = ".sccachetmp"; struct FileSize; @@ -35,30 +35,141 @@ impl Meter for FileSize { } } -/// Return an iterator of `(path, size)` of files under `path` sorted by ascending last-modified -/// time, such that the oldest modified file is returned first. -fn get_all_files>(path: P) -> Box> { - let mut files: Vec<_> = WalkDir::new(path.as_ref()) +/// Marker file placed inside directory cache entries so they can be identified during init. +pub(crate) const DIR_ENTRY_MARKER: &str = ".sccache_dir_entry"; + +/// Calculate the total size of all files in a directory. +fn dir_content_size(path: &Path) -> u64 { + WalkDir::new(path) .into_iter() - .filter_map(|e| { - e.ok().and_then(|f| { - // Only look at files - if f.file_type().is_file() { - // Get the last-modified time, size, and the full path. - f.metadata().ok().and_then(|m| { - m.modified() - .ok() - .map(|mtime| (mtime, f.path().to_owned(), m.len())) - }) - } else { - None + .filter_map(std::result::Result::ok) + .filter(|e| e.file_type().is_file()) + .filter_map(|e| e.metadata().ok()) + .map(|m| m.len()) + .sum() +} + +/// Return an iterator of `(path, size, is_dir)` of cache entries under `path` sorted by ascending +/// last-modified time, such that the oldest modified entry is returned first. +/// +/// Cache entries can be either regular files or directories containing a marker file. +/// A directory is recognized as a cache entry if it contains a `.sccache_dir_entry` marker file. +fn get_all_entries>(path: P) -> Box> { + let root = path.as_ref().to_path_buf(); + let mut entries: Vec<(SystemTime, PathBuf, u64, bool)> = Vec::new(); + // Track directories we've added as cache entries, so we skip their contained files + let mut dir_entry_paths: HashSet = HashSet::new(); + + // First pass: find directory entries (directories containing the marker file) + // + // The disk cache uses a 2-level directory structure: keys are stored at + // `{char1}/{char2}/{full_key}`. Directories at depth 1 and 2 (e.g., `a/` and + // `a/b/`) are structural and naturally contain files (compressed cache entries) + // or subdirectories. Only directories at depth 3+ can be actual cache entries + // (uncompressed directory entries created by write_uncompressed_entry()). + let root_depth = root.components().count(); + for entry in WalkDir::new(&root) + .min_depth(1) + .into_iter() + .filter_map(|e| e.ok()) + { + if !entry.file_type().is_dir() { + continue; + } + let entry_path = entry.path().to_owned(); + + // Skip temp dirs + if entry_path + .file_name() + .map(|n| n.to_string_lossy().starts_with(TEMPFILE_PREFIX)) + .unwrap_or(false) + { + continue; + } + + // Check for the marker file + if !entry_path.join(DIR_ENTRY_MARKER).exists() { + // Only directories at depth 3+ (i.e., at the cache key level) can be + // orphan uncompressed entries. Directories at depth 1-2 are structural + // path components ({char1}/, {char1}/{char2}/) and must not be removed. + let entry_depth = entry_path.components().count() - root_depth; + if entry_depth >= 3 { + // If this directory contains files (not just subdirectories), it's likely + // an orphan from a crash between write_uncompressed_entry() and add_dir(). + // Clean it up to avoid dead space. + let has_files = WalkDir::new(&entry_path) + .min_depth(1) + .max_depth(1) + .into_iter() + .filter_map(|e| e.ok()) + .any(|e| e.file_type().is_file()); + if has_files { + warn!( + "Removing orphan cache directory without marker: {}", + entry_path.display() + ); + fs::remove_dir_all(&entry_path).unwrap_or_else(|e| { + error!( + "Error removing orphan directory `{}`: {}", + entry_path.display(), + e + ); + }); } - }) - }) - .collect(); - // Sort by last-modified-time, so oldest file first. - files.sort_by_key(|k| k.0); - Box::new(files.into_iter().map(|(_mtime, path, size)| (path, size))) + } + continue; + } + + let total_size = dir_content_size(&entry_path); + + if let Ok(meta) = entry.metadata() { + if let Ok(mtime) = meta.modified() { + entries.push((mtime, entry_path.clone(), total_size, true)); + dir_entry_paths.insert(entry_path); + } + } + } + + // Second pass: find regular file entries (not inside directory entries) + for entry in WalkDir::new(&root) + .min_depth(1) + .into_iter() + .filter_map(|e| e.ok()) + { + if !entry.file_type().is_file() { + continue; + } + let entry_path = entry.path().to_owned(); + + // Skip temp files + if entry_path + .file_name() + .map(|n| n.to_string_lossy().starts_with(TEMPFILE_PREFIX)) + .unwrap_or(false) + { + continue; + } + + // Skip files that are inside a directory entry + let is_inside_dir_entry = entry_path.ancestors().any(|a| dir_entry_paths.contains(a)); + if is_inside_dir_entry { + continue; + } + + if let Ok(meta) = entry.metadata() { + if let Ok(mtime) = meta.modified() { + entries.push((mtime, entry_path, meta.len(), false)); + } + } + } + + // Sort by last-modified-time, so oldest entry first. + entries.sort_by_key(|k| k.0); + Box::new( + entries + .into_iter() + .map(|(_mtime, path, size, is_dir)| (path, size, is_dir)), + ) } /// An LRU cache of files on disk. @@ -67,6 +178,8 @@ pub struct LruDiskCache { root: PathBuf, pending: Vec, pending_size: u64, + /// Set of keys that are directory entries (not regular files). + dir_entries: HashSet, } /// Errors returned by this crate. @@ -149,6 +262,7 @@ impl LruDiskCache { root: PathBuf::from(path), pending: vec![], pending_size: 0, + dir_entries: HashSet::new(), } .init() } @@ -182,28 +296,76 @@ impl LruDiskCache { self.root.join(rel_path) } - /// Scan `self.root` for existing files and store them. + /// Scan `self.root` for existing files and directories and store them. fn init(mut self) -> Result { fs::create_dir_all(&self.root)?; - for (file, size) in get_all_files(&self.root) { - if file + + // First, clean up any temporary files/directories left over from incomplete operations. + // These are skipped by get_all_entries(), so we must handle them separately. + for entry in WalkDir::new(&self.root) + .min_depth(1) + .into_iter() + .filter_map(|e| e.ok()) + { + let entry_path = entry.path(); + if entry_path .file_name() - .expect("Bad path?") - .starts_with(TEMPFILE_PREFIX) + .map(|n| n.to_string_lossy().starts_with(TEMPFILE_PREFIX)) + .unwrap_or(false) { - fs::remove_file(&file).unwrap_or_else(|e| { - error!("Error removing temporary file `{}`: {}", file.display(), e); - }); - } else if !self.can_store(size) { - fs::remove_file(file).unwrap_or_else(|e| { - error!( - "Error removing file `{}` which is too large for the cache ({} bytes)", - e, size - ); - }); + if entry.file_type().is_dir() { + fs::remove_dir_all(entry_path).unwrap_or_else(|e| { + error!( + "Error removing temporary directory `{}`: {}", + entry_path.display(), + e + ); + }); + } else { + fs::remove_file(entry_path).unwrap_or_else(|e| { + error!( + "Error removing temporary file `{}`: {}", + entry_path.display(), + e + ); + }); + } + } + } + + // Now scan and register all cache entries (both files and directories). + for (entry_path, size, is_dir) in get_all_entries(&self.root) { + if !self.can_store(size) { + if is_dir { + fs::remove_dir_all(&entry_path).unwrap_or_else(|e| { + error!( + "Error removing directory `{}` which is too large for the cache ({} bytes): {}", + entry_path.display(), + size, + e + ); + }); + } else { + fs::remove_file(&entry_path).unwrap_or_else(|e| { + error!( + "Error removing file `{}` which is too large for the cache ({} bytes): {}", + entry_path.display(), + size, + e + ); + }); + } } else { - self.add_file(AddFile::AbsPath(file), size) - .unwrap_or_else(|e| error!("Error adding file: {}", e)); + if is_dir { + let rel_path = entry_path + .strip_prefix(&self.root) + .expect("Bad path?") + .as_os_str() + .to_owned(); + self.dir_entries.insert(rel_path); + } + self.add_file(AddFile::AbsPath(entry_path), size) + .unwrap_or_else(|e| error!("Error adding entry: {}", e)); } } Ok(self) @@ -221,27 +383,46 @@ impl LruDiskCache { //TODO: ideally LRUCache::insert would give us back the entries it had to remove. while self.size() + size > self.capacity() { let (rel_path, _) = self.lru.remove_lru().expect("Unexpectedly empty cache!"); - let remove_path = self.rel_to_abs_path(rel_path); + let remove_path = self.rel_to_abs_path(&rel_path); + let is_dir = self.dir_entries.remove(&rel_path); //TODO: check that files are removable during `init`, so that this is only // due to outside interference. - fs::remove_file(&remove_path).unwrap_or_else(|e| { - // Sometimes the file has already been removed - // this seems to happen when the max cache size has been reached - // https://github.com/mozilla/sccache/issues/2092 - if e.kind() == std::io::ErrorKind::NotFound { - debug!( - "Error removing file from cache as it was not found: `{:?}`", - remove_path - ); - } else { - panic!( - "Error removing file from cache: `{:?}`: {}, {:?}", - remove_path, - e, - e.kind() - ) - } - }); + if is_dir { + fs::remove_dir_all(&remove_path).unwrap_or_else(|e| { + if e.kind() == std::io::ErrorKind::NotFound { + debug!( + "Error removing directory from cache as it was not found: `{:?}`", + remove_path + ); + } else { + panic!( + "Error removing directory from cache: `{:?}`: {}, {:?}", + remove_path, + e, + e.kind() + ) + } + }); + } else { + fs::remove_file(&remove_path).unwrap_or_else(|e| { + // Sometimes the file has already been removed + // this seems to happen when the max cache size has been reached + // https://github.com/mozilla/sccache/issues/2092 + if e.kind() == std::io::ErrorKind::NotFound { + debug!( + "Error removing file from cache as it was not found: `{:?}`", + remove_path + ); + } else { + panic!( + "Error removing file from cache: `{:?}`: {}, {:?}", + remove_path, + e, + e.kind() + ) + } + }); + } } Ok(()) } @@ -396,15 +577,60 @@ impl LruDiskCache { self.get_file(key).map(|f| Box::new(f) as Box) } + /// Update the LRU recency of an entry without opening it. This is useful for + /// directory entries where `get_file()` doesn't make sense. + /// Returns `Ok(true)` if the entry was found and touched, `Ok(false)` if not found. + pub fn touch>(&mut self, key: K) -> Result { + let rel_path = key.as_ref(); + match self.lru.get(rel_path) { + Some(_) => { + let path = self.rel_to_abs_path(rel_path); + let t = FileTime::now(); + set_file_times(&path, t, t).unwrap_or_else(|e| { + debug!("Failed to update mtime for {:?}: {}", path, e); + }); + Ok(true) + } + None => Ok(false), + } + } + + /// Register a directory entry in the LRU cache. The directory should already exist + /// on disk at `self.root.join(key)`. The total size of all files within the directory + /// is calculated and used for LRU size tracking. A marker file is written inside the + /// directory so it can be recognized as a cache entry on restart. + pub fn add_dir>(&mut self, key: K) -> Result<()> { + let rel_path = key.as_ref().to_owned(); + let abs_path = self.rel_to_abs_path(&rel_path); + let size = dir_content_size(&abs_path); + self.make_space(size)?; + // Write the marker file so init() can recognize this as a directory entry + let marker_path = abs_path.join(DIR_ENTRY_MARKER); + if !marker_path.exists() { + fs::write(&marker_path, b"")?; + } + self.dir_entries.insert(rel_path.clone()); + self.lru.insert(rel_path, size); + Ok(()) + } + /// Remove the given key from the cache. pub fn remove>(&mut self, key: K) -> Result<()> { match self.lru.remove(key.as_ref()) { Some(_) => { let path = self.rel_to_abs_path(key.as_ref()); - fs::remove_file(&path).map_err(|e| { - error!("Error removing file from cache: `{:?}`: {}", path, e); - Into::into(e) - }) + let is_dir = self.dir_entries.remove(key.as_ref()); + if is_dir { + fs::remove_dir_all(&path).map_err(|e| { + error!("Error removing directory from cache: `{:?}`: {}", path, e); + Into::into(e) + }) + } else { + fs::remove_file(&path).map_err(|e| { + error!("Error removing file from cache: `{:?}`: {}", path, e); + Into::into(e) + }) + } } None => Ok(()), } @@ -414,12 +640,37 @@ impl LruDiskCache { #[cfg(test)] mod tests { use super::fs::{self, File}; - use super::{Error, LruDiskCache, LruDiskCacheAddEntry, get_all_files}; + use super::{Error, LruDiskCache, LruDiskCacheAddEntry}; use filetime::{FileTime, set_file_times}; use std::io::{self, Read, Write}; use std::path::{Path, PathBuf}; use tempfile::TempDir; + use walkdir::WalkDir; + + /// Return an iterator of all physical files under `path`, sorted by ascending + /// last-modified time. Used only in tests to verify disk-level cleanup (e.g. + /// that temp files are actually deleted, not just filtered out by get_all_entries). + fn get_all_files>(path: P) -> Box> { + let mut files: Vec<_> = WalkDir::new(path.as_ref()) + .into_iter() + .filter_map(|e| { + e.ok().and_then(|f| { + if f.file_type().is_file() { + f.metadata().ok().and_then(|m| { + m.modified() + .ok() + .map(|mtime| (mtime, f.path().to_owned(), m.len())) + }) + } else { + None + } + }) + }) + .collect(); + files.sort_by_key(|k| k.0); + Box::new(files.into_iter().map(|(_mtime, path, size)| (path, size))) + } struct TestFixture { /// Temp directory. @@ -728,4 +979,52 @@ mod tests { assert!(!f.tmp().join("cache").join("file2").exists()); assert!(!p4.exists()); } + + #[test] + fn test_compressed_entries_survive_reinit() { + // Regression test: compressed entries stored at {char}/{char}/{key} must + // not be deleted by the orphan cleanup logic in get_all_entries() when the + // cache is re-initialized (e.g., server restart). The structural directories + // (depth 1 and 2) contain files but have no .sccache_dir_entry marker, and + // must not be treated as orphan directory entries. + // + // Keys use Path::join() to produce OS-native separators so that + // contains_key() matches after re-init (which stores paths from + // strip_prefix, using native separators). + let f = TestFixture::new(); + let cache_dir = f.tmp().join("cache"); + + let key1 = Path::new("a").join("b").join("abcdef1234"); + let key2 = Path::new("a").join("b").join("abcdef5678"); + let key3 = Path::new("c").join("d").join("cdef1234"); + + { + let mut c = LruDiskCache::new(&cache_dir, 1000).unwrap(); + // Insert entries using the same key path structure as the disk cache + c.insert_bytes(&key1, &[1; 10]).unwrap(); + c.insert_bytes(&key2, &[2; 10]).unwrap(); + c.insert_bytes(&key3, &[3; 10]).unwrap(); + assert_eq!(c.len(), 3); + assert_eq!(c.size(), 30); + } + // Verify the files exist on disk + assert!(cache_dir.join(&key1).exists()); + assert!(cache_dir.join(&key2).exists()); + assert!(cache_dir.join(&key3).exists()); + + // Re-initialize the cache (simulates server restart) + { + let c = LruDiskCache::new(&cache_dir, 1000).unwrap(); + // All entries must still be present after re-init + assert!(c.contains_key(&key1)); + assert!(c.contains_key(&key2)); + assert!(c.contains_key(&key3)); + assert_eq!(c.len(), 3); + assert_eq!(c.size(), 30); + } + // Verify the files still exist on disk + assert!(cache_dir.join(&key1).exists()); + assert!(cache_dir.join(&key2).exists()); + assert!(cache_dir.join(&key3).exists()); + } } diff --git a/src/reflink.rs b/src/reflink.rs new file mode 100644 index 000000000..51bfd4726 --- /dev/null +++ b/src/reflink.rs @@ -0,0 +1,96 @@ +use std::fs; +use std::io; +use std::path::Path; + +/// Test if reflink is supported on the given directory's filesystem. +pub fn is_reflink_supported(cache_dir: &Path) -> bool { + let temp_dir = match tempfile::tempdir_in(cache_dir) { + Ok(d) => d, + Err(_) => return false, + }; + + let src = temp_dir.path().join("test_src"); + let dst = temp_dir.path().join("test_dst"); + + if fs::write(&src, b"test").is_err() { + return false; + } + + match reflink_copy::reflink(&src, &dst) { + Ok(_) => { + let _ = fs::remove_file(&dst); + true + } + Err(_) => false, + } +} + +/// Copy file using reflink if supported, otherwise fall back to regular copy. +/// +/// Note: `reflink_copy::reflink` requires the destination not to exist, while the +/// `fs::copy` fallback will overwrite an existing destination. Callers should ensure +/// the destination does not exist before calling this function if consistent behavior +/// is desired. +pub fn reflink_or_copy(src: &Path, dst: &Path) -> io::Result<()> { + match reflink_copy::reflink(src, dst) { + Ok(_) => Ok(()), + Err(_) => { + fs::copy(src, dst)?; + Ok(()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn test_reflink_or_copy_fallback() { + let dir = tempdir().unwrap(); + let src = dir.path().join("src"); + let dst = dir.path().join("dst"); + + fs::write(&src, b"hello").unwrap(); + reflink_or_copy(&src, &dst).unwrap(); + + assert!(dst.exists()); + assert_eq!(fs::read(&dst).unwrap(), b"hello"); + } + + #[test] + fn test_is_reflink_supported_runs_without_panic() { + let dir = tempdir().unwrap(); + // Just verify it doesn't panic and returns a value. + // On macOS with APFS this should return true; on other filesystems false. + let _result = is_reflink_supported(dir.path()); + } + + #[test] + fn test_reflink_or_copy_overwrites_destination() { + let dir = tempdir().unwrap(); + let src = dir.path().join("src"); + let dst = dir.path().join("dst"); + + fs::write(&src, b"new content").unwrap(); + fs::write(&dst, b"old content").unwrap(); + + reflink_or_copy(&src, &dst).unwrap(); + + assert_eq!(fs::read(&dst).unwrap(), b"new content"); + } + + #[test] + fn test_reflink_or_copy_preserves_content() { + let dir = tempdir().unwrap(); + let src = dir.path().join("src"); + let dst = dir.path().join("dst"); + + let original_content = b"test data for reflink"; + fs::write(&src, original_content).unwrap(); + reflink_or_copy(&src, &dst).unwrap(); + + assert_eq!(fs::read(&dst).unwrap(), original_content); + } +} diff --git a/src/test/tests.rs b/src/test/tests.rs index 8d283a1b2..891da57b4 100644 --- a/src/test/tests.rs +++ b/src/test/tests.rs @@ -87,6 +87,7 @@ where PreprocessorCacheModeConfig::default(), CacheMode::ReadWrite, vec![], + false, )); let client = Client::new();