diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0f609e86116..e0732d6ef2b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -136,6 +136,18 @@ jobs: if: always() && steps.tests.outputs.has-rust == 'true' run: sudo apt install protobuf-compiler + - name: Install PDFium + if: matrix.package == '@rust/chonky' + env: + GH_TOKEN: ${{ github.token }} + run: | + temp_dir=$(mktemp -d) + gh release download chromium/6721 --repo bblanchon/pdfium-binaries --pattern 'pdfium-linux-x64.tgz' --dir $temp_dir + tar -xzf $temp_dir/pdfium-linux-x64.tgz -C $temp_dir + mv $temp_dir/lib/* "${{ matrix.directory }}/libs/" + rm -rf $temp_dir + echo "PDFIUM_DYNAMIC_LIB_PATH=$(pwd)/${{ matrix.directory }}/libs/" >> $GITHUB_ENV + - name: Install Rust toolchain if: always() && steps.tests.outputs.has-rust == 'true' uses: ./.github/actions/install-rust-toolchain @@ -269,6 +281,18 @@ jobs: - name: Install Protobuf run: sudo apt install protobuf-compiler + - name: Install PDFium + if: matrix.package == '@rust/chonky' + env: + GH_TOKEN: ${{ github.token }} + run: | + temp_dir=$(mktemp -d) + gh release download chromium/6721 --repo bblanchon/pdfium-binaries --pattern 'pdfium-linux-x64.tgz' --dir $temp_dir + tar -xzf $temp_dir/pdfium-linux-x64.tgz -C $temp_dir + mv $temp_dir/lib/* "${{ matrix.directory }}/libs/" + rm -rf $temp_dir + echo "PDFIUM_DYNAMIC_LIB_PATH=$(pwd)/${{ matrix.directory }}/libs/" >> $GITHUB_ENV + - name: Install playwright if: matrix.package == '@tests/hash-playwright' uses: nick-fields/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0 diff --git a/Cargo.lock b/Cargo.lock index 3d82f7082be..fbcaa499fdb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,6 +75,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "aligned-vec" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1" + [[package]] name = "allocator-api2" version = "0.2.18" @@ -193,6 +199,23 @@ dependencies = [ "num-traits", ] +[[package]] +name = "arbitrary" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" + +[[package]] +name = "arg_enum_proc_macro" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "ariadne" version = "0.5.0" @@ -374,6 +397,29 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +[[package]] +name = "av1-grain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6678909d8c5d46a42abcf571271e15fdbc0a225e3646cf23762cd415046c78bf" +dependencies = [ + "anyhow", + "arrayvec", + "log", + "nom", + "num-rational", + "v_frame", +] + +[[package]] +name = "avif-serialize" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e335041290c43101ca215eed6f43ec437eb5a42125573f600fc3fa42b9bddd62" +dependencies = [ + "arrayvec", +] + [[package]] name = "aws-config" version = "1.5.10" @@ -936,6 +982,12 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" +[[package]] +name = "bit_field" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc827186963e592360843fb5ba4b973e145841266c1357f7180c43526f2e5b61" + [[package]] name = "bitflags" version = "1.3.2" @@ -948,6 +1000,12 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "bitstream-io" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c12d1856e42f0d817a835fe55853957c85c8c8a470114029143d3f12671446e" + [[package]] name = "bitvec" version = "1.0.1" @@ -993,6 +1051,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "built" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c360505aed52b7ec96a3636c3f039d99103c37d1d9b4f7a8c743d3ea9ffcd03b" + [[package]] name = "bumpalo" version = "3.16.0" @@ -1017,6 +1081,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "byteorder-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" + [[package]] name = "bytes" version = "1.8.0" @@ -1080,9 +1150,21 @@ version = "1.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e80e3b6a3ab07840e1cae9b0666a63970dc28e8ed5ffbcdacbfc760c281bfc1" dependencies = [ + "jobserver", + "libc", "shlex", ] +[[package]] +name = "cfg-expr" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d067ad48b8650848b989a59a86c6c36a995d02d2bf778d45c3c5d57bc2718f02" +dependencies = [ + "smallvec", + "target-lexicon", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -1118,19 +1200,25 @@ name = "chonky" version = "0.0.0" dependencies = [ "error-stack", + "image 0.24.9", + "insta", + "pdfium-render", + "thiserror 2.0.3", ] [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" dependencies = [ "android-tzdata", "iana-time-zone", + "js-sys", "num-traits", "serde", - "windows-targets 0.52.6", + "wasm-bindgen", + "windows-targets 0.48.5", ] [[package]] @@ -1221,6 +1309,12 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" +[[package]] +name = "color_quant" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" + [[package]] name = "colorchoice" version = "1.0.2" @@ -1258,6 +1352,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "console_log" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be8aed40e4edbf4d3b4431ab260b63fdc40f5780a4766824329ea0f1eefe3c0f" +dependencies = [ + "log", + "web-sys", +] + [[package]] name = "const-oid" version = "0.9.6" @@ -2020,6 +2124,21 @@ dependencies = [ "once_cell", ] +[[package]] +name = "exr" +version = "1.73.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83197f59927b46c04a183a619b7c29df34e63e63c7869320862268c0ef687e0" +dependencies = [ + "bit_field", + "half", + "lebe", + "miniz_oxide", + "rayon-core", + "smallvec", + "zune-inflate", +] + [[package]] name = "eyre" version = "0.6.12" @@ -2053,6 +2172,15 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" +[[package]] +name = "fdeflate" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8090f921a24b04994d9929e204f50b498a33ea6ba559ffaa05e04f7ee7fb5ab" +dependencies = [ + "simd-adler32", +] + [[package]] name = "fiat-crypto" version = "0.2.9" @@ -2077,6 +2205,16 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "flate2" +version = "1.0.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fluent-uri" version = "0.3.2" @@ -2365,6 +2503,16 @@ dependencies = [ "polyval", ] +[[package]] +name = "gif" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb2d69b19215e18bb912fa30f7ce15846e301408695e44e0ef719f1da9e19f2" +dependencies = [ + "color_quant", + "weezl", +] + [[package]] name = "gimli" version = "0.31.1" @@ -3556,6 +3704,58 @@ dependencies = [ "xmltree", ] +[[package]] +name = "image" +version = "0.24.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5690139d2f55868e080017335e4b94cb7414274c74f1669c84fb5feba2c9f69d" +dependencies = [ + "bytemuck", + "byteorder", + "color_quant", + "num-traits", + "png", +] + +[[package]] +name = "image" +version = "0.25.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd6f44aed642f18953a158afeb30206f4d50da59fbc66ecb53c66488de73563b" +dependencies = [ + "bytemuck", + "byteorder-lite", + "color_quant", + "exr", + "gif", + "image-webp", + "num-traits", + "png", + "qoi", + "ravif", + "rayon", + "rgb", + "tiff", + "zune-core", + "zune-jpeg", +] + +[[package]] +name = "image-webp" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e031e8e3d94711a9ccb5d6ea357439ef3dcbed361798bd4071dc4d9793fbe22f" +dependencies = [ + "byteorder-lite", + "quick-error 2.0.1", +] + +[[package]] +name = "imgref" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0263a3d970d5c054ed9312c0057b4f3bde9c0b33836d3637361d4a9e6e7a408" + [[package]] name = "include_dir" version = "0.7.4" @@ -3653,6 +3853,17 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "interpolate_name" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "inventory" version = "0.3.15" @@ -3718,6 +3929,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -3733,6 +3953,21 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + +[[package]] +name = "jpeg-decoder" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0" + [[package]] name = "js-sys" version = "0.3.72" @@ -3797,6 +4032,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "lebe" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8" + [[package]] name = "lexical" version = "7.0.2" @@ -3876,6 +4117,26 @@ version = "0.2.159" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" +[[package]] +name = "libfuzzer-sys" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b9569d2f74e257076d8c6bfa73fb505b46b851e51ddaecc825944aa3bed17fa" +dependencies = [ + "arbitrary", + "cc", +] + +[[package]] +name = "libloading" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" +dependencies = [ + "cfg-if", + "windows-targets 0.52.6", +] + [[package]] name = "libm" version = "0.2.8" @@ -4331,6 +4592,15 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "loop9" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fae87c125b03c1d2c0150c90365d7d6bcc53fb73a9acaef207d2d065860f062" +dependencies = [ + "imgref", +] + [[package]] name = "lru" version = "0.12.5" @@ -4370,6 +4640,22 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" +[[package]] +name = "maybe-owned" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4facc753ae494aeb6e3c22f839b158aebd4f9270f55cd3c79906c45476c47ab4" + +[[package]] +name = "maybe-rayon" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519" +dependencies = [ + "cfg-if", + "rayon", +] + [[package]] name = "md-5" version = "0.10.6" @@ -4424,6 +4710,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -4564,6 +4851,12 @@ dependencies = [ "tokio", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "nix" version = "0.24.3" @@ -4591,6 +4884,12 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "noop_proc_macro" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8" + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -4646,6 +4945,17 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num-derive" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "num-format" version = "0.4.4" @@ -5005,6 +5315,32 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pdfium-render" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc9b3c6a3e32b3745bee220a4d3e77e284db1cf57b0361f94509a8821cfd920f" +dependencies = [ + "bitflags 2.6.0", + "bytemuck", + "bytes", + "chrono", + "console_error_panic_hook", + "console_log", + "image 0.25.5", + "itertools 0.13.0", + "js-sys", + "libloading", + "log", + "maybe-owned", + "once_cell", + "utf16string", + "vecmath", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "pem" version = "3.0.4" @@ -5081,6 +5417,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "piston-float" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad78bf43dcf80e8f950c92b84f938a0fc7590b7f6866fbcbeca781609c115590" + [[package]] name = "pkcs8" version = "0.10.2" @@ -5091,6 +5433,12 @@ dependencies = [ "spki", ] +[[package]] +name = "pkg-config" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" + [[package]] name = "plotters" version = "0.3.7" @@ -5119,6 +5467,19 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "png" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f9d46a34a05a6a57566bc2bfae066ef07585a6e3fa30fbbdff5936380623f0" +dependencies = [ + "bitflags 1.3.2", + "crc32fast", + "fdeflate", + "flate2", + "miniz_oxide", +] + [[package]] name = "polling" version = "3.7.3" @@ -5281,6 +5642,25 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "profiling" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afbdc74edc00b6f6a218ca6a5364d6226a259d4b8ea1af4a0ea063f27e179f4d" +dependencies = [ + "profiling-procmacros", +] + +[[package]] +name = "profiling-procmacros" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a65f2e60fbf1063868558d69c6beacf412dc755f9fc020f514b7955fc914fe30" +dependencies = [ + "quote", + "syn 2.0.87", +] + [[package]] name = "prometheus-client" version = "0.22.3" @@ -5443,12 +5823,27 @@ dependencies = [ "serde_json", ] +[[package]] +name = "qoi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6d64c71eb498fe9eae14ce4ec935c555749aef511cca85b5568910d6e48001" +dependencies = [ + "bytemuck", +] + [[package]] name = "quick-error" version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" +[[package]] +name = "quick-error" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" + [[package]] name = "quick-protobuf" version = "0.8.1" @@ -5583,6 +5978,56 @@ dependencies = [ "rand_core", ] +[[package]] +name = "rav1e" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd87ce80a7665b1cce111f8a16c1f3929f6547ce91ade6addf4ec86a8dda5ce9" +dependencies = [ + "arbitrary", + "arg_enum_proc_macro", + "arrayvec", + "av1-grain", + "bitstream-io", + "built", + "cfg-if", + "interpolate_name", + "itertools 0.12.1", + "libc", + "libfuzzer-sys", + "log", + "maybe-rayon", + "new_debug_unreachable", + "noop_proc_macro", + "num-derive", + "num-traits", + "once_cell", + "paste", + "profiling", + "rand", + "rand_chacha", + "simd_helpers", + "system-deps", + "thiserror 1.0.69", + "v_frame", + "wasm-bindgen", +] + +[[package]] +name = "ravif" +version = "0.11.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2413fd96bd0ea5cdeeb37eaf446a22e6ed7b981d792828721e74ded1980a45c6" +dependencies = [ + "avif-serialize", + "imgref", + "loop9", + "quick-error 2.0.1", + "rav1e", + "rayon", + "rgb", +] + [[package]] name = "rayon" version = "1.10.0" @@ -5802,7 +6247,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52e44394d2086d010551b14b53b1f24e31647570cd1deb0379e2c21b329aba00" dependencies = [ "hostname 0.3.1", - "quick-error", + "quick-error 1.2.3", ] [[package]] @@ -6448,6 +6893,21 @@ dependencies = [ "rand_core", ] +[[package]] +name = "simd-adler32" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + +[[package]] +name = "simd_helpers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95890f873bec569a0362c235787f3aca6e1e887302ba4840839bcc6459c42da6" +dependencies = [ + "quote", +] + [[package]] name = "similar" version = "2.6.0" @@ -6698,6 +7158,19 @@ dependencies = [ "libc", ] +[[package]] +name = "system-deps" +version = "6.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e535eb8dded36d55ec13eddacd30dec501792ff23a0b1682c38601b8cf2349" +dependencies = [ + "cfg-expr", + "heck 0.5.0", + "pkg-config", + "toml", + "version-compare", +] + [[package]] name = "tachyonix" version = "0.3.1" @@ -6717,6 +7190,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + [[package]] name = "target-triple" version = "0.1.3" @@ -7002,6 +7481,17 @@ dependencies = [ "once_cell", ] +[[package]] +name = "tiff" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba1310fcea54c6a9a4fd1aad794ecc02c31682f6bfbecdf460bf19533eed1e3e" +dependencies = [ + "flate2", + "jpeg-decoder", + "weezl", +] + [[package]] name = "time" version = "0.3.36" @@ -7777,6 +8267,15 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "utf16string" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b62a1e85e12d5d712bf47a85f426b73d303e2d00a90de5f3004df3596e9d216" +dependencies = [ + "byteorder", +] + [[package]] name = "utf8_iter" version = "1.0.4" @@ -7837,12 +8336,38 @@ dependencies = [ "vsimd", ] +[[package]] +name = "v_frame" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6f32aaa24bacd11e488aa9ba66369c7cd514885742c9fe08cfe85884db3e92b" +dependencies = [ + "aligned-vec", + "num-traits", + "wasm-bindgen", +] + [[package]] name = "valuable" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" +[[package]] +name = "vecmath" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "956ae1e0d85bca567dee1dcf87fb1ca2e792792f66f87dced8381f99cd91156a" +dependencies = [ + "piston-float", +] + +[[package]] +name = "version-compare" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852e951cb7832cb45cb1169900d19760cfa39b82bc0ea9c0e5a14ae88411c98b" + [[package]] name = "version_check" version = "0.9.5" @@ -8035,6 +8560,12 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "weezl" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" + [[package]] name = "which" version = "4.4.2" @@ -8484,3 +9015,27 @@ dependencies = [ "quote", "syn 2.0.87", ] + +[[package]] +name = "zune-core" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a" + +[[package]] +name = "zune-inflate" +version = "0.2.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73ab332fe2f6680068f3582b16a24f90ad7096d5d39b974d1c0aff0125116f02" +dependencies = [ + "simd-adler32", +] + +[[package]] +name = "zune-jpeg" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16099418600b4d8f028622f73ff6e3deaabdff330fb9a2a131dea781ee8b0768" +dependencies = [ + "zune-core", +] diff --git a/Cargo.toml b/Cargo.toml index bff09f7c976..05478f2c399 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -111,6 +111,7 @@ futures-sink = { version = "=0.3.31", default-features = false } futures-util = { version = "=0.3.31", default-features = false } hashbrown = { version = "=0.15.1", default-features = false, features = ["inline-more", "nightly"] } http = { version = "=1.1.0", default-features = false } +image = { version = "0.24.9", default-features = false } inferno = { version = "=0.11.21", default-features = false } iso8601-duration = { version = "=0.2.0", default-features = false } json-number = { version = "=0.4.9", default-features = false } @@ -122,6 +123,7 @@ libp2p-swarm = { version = "=0.45.1", default-features = false } libp2p-yamux = { version = "=0.46.0", default-features = false } multiaddr = { version = "=0.18.2", default-features = false } multistream-select = { version = "=0.13.0", default-features = false } +pdfium-render = { version = "0.8.26" } postgres-types = { version = "=0.2.8", default-features = false } prometheus-client = { version = "=0.22.3", default-features = false } regex = { version = "=1.11.1", default-features = false, features = ["perf", "unicode"] } diff --git a/libs/chonky/.gitattributes b/libs/chonky/.gitattributes new file mode 100644 index 00000000000..5a1ce7f276c --- /dev/null +++ b/libs/chonky/.gitattributes @@ -0,0 +1,2 @@ +src/snapshots/*.snap linguist-generated=true +src/snapshots/*.snap.bmp linguist-generated=true diff --git a/libs/chonky/Cargo.toml b/libs/chonky/Cargo.toml index c9080fe04a1..c17dda0554f 100644 --- a/libs/chonky/Cargo.toml +++ b/libs/chonky/Cargo.toml @@ -17,14 +17,19 @@ edition.workspace = true error-stack = { workspace = true, public = true } # Public third-party dependencies +image = { workspace = true, public = true, features = ["png", "bmp"] } +pdfium-render = { workspace = true, public = true } # Private workspace dependencies # Private third-party dependencies +thiserror = { workspace = true } [dev-dependencies] +insta = { workspace = true } [features] +static = ["pdfium-render/static"] [lints] workspace = true diff --git a/libs/chonky/README.md b/libs/chonky/README.md index 04db5666ca1..2d4ec027f9f 100644 --- a/libs/chonky/README.md +++ b/libs/chonky/README.md @@ -23,6 +23,48 @@ The library relies on common Rust tools as configured in the repository root. Th - For linting, [`clippy`](https://github.com/rust-lang/rust-clippy) is used: `cargo clippy --package chonky` - [`rustfmt`](https://github.com/rust-lang/rustfmt) serves as the formatter: `cargo fmt` +## Usage + +To run this package, a compiled library of `pdfium` must be provided. The library can either be statically or dynamically linked. The `libs/` folder is reserved to store the libraries. + +### Dynamic linking + +A dynamic library can be downloaded from [`bblanchon/pdfium-binaries`](https://github.com/bblanchon/pdfium-binaries/releases). It's possible to download the library from the command line. For example, to download the library for `mac-arm64` from the release `6721` and store it in `./libs/`:L + +```sh +temp_dir=$(mktemp -d) +gh release download chromium/6721 --repo bblanchon/pdfium-binaries --pattern 'pdfium-mac-arm64.tgz' --dir $temp_dir +tar -xzf $temp_dir/pdfium-mac-arm64.tgz -C $temp_dir +mv $temp_dir/lib/* libs/ +rm -rf $temp_dir +``` + +To link the library dynamically, don't enable the `static`. The binary will read `PDFIUM_DYNAMIC_LIB_PATH` to search for the library. If the variable is not set it will use `libs/`: + +```sh +export PDFIUM_DYNAMIC_LIB_PATH="${pwd}/libs/" +cargo build +``` + +### Static linking + +A static library can be downloaded from [`paulocoutinhox/pdfium-lib`](https://github.com/paulocoutinhox/pdfium-lib/releases). It's possible to download the library from the command line. For example, to download the library for `macos` from the release `6694` and store it in `./libs/`: + +```sh +temp_dir=$(mktemp -d) +gh release download 6694 --repo paulocoutinhox/pdfium-lib --pattern 'macos.tgz' --dir $temp_dir +tar -xzf $temp_dir/macos.tgz -C $temp_dir +mv $temp_dir/release/lib/* libs/ +rm -rf $temp_dir +``` + +To link the library statically, enable the `static` feature by passing `--features static` to any `cargo` invocation. When building the library it will search for `PDFIUM_STATIC_LIB_PATH`. For example if the library is located at `libs/libpdfium.a` you can build the library with: + +```sh +export PDFIUM_STATIC_LIB_PATH="${pwd}/libs/" +cargo build --features static +``` + ### Testing The tests for the package can either be run by using the default test harness: diff --git a/libs/chonky/build.rs b/libs/chonky/build.rs new file mode 100644 index 00000000000..06b6a52a363 --- /dev/null +++ b/libs/chonky/build.rs @@ -0,0 +1,23 @@ +fn main() { + #[cfg(all(feature = "static", target_os = "macos"))] + link_macos(); + + #[cfg(all(feature = "static", target_os = "linux"))] + link_linux(); +} + +#[cfg(all(feature = "static", target_os = "macos"))] +fn link_macos() { + // `pdfium` requires linking to the `CoreGraphics` and `libc++` frameworks on macOS + println!("cargo:rustc-link-lib=framework=CoreGraphics"); + // `pdfium` also has the `libc++` feature, which can be used instead, but this makes it more + // explicit that we need both. + println!("cargo:rustc-link-lib=c++"); +} + +#[cfg(all(feature = "static", target_os = "linux"))] +fn link_linux() { + // `pdfium` also has a `libstdc++` feature, which can be used instead, but this makes it more + // explicit that we need a different one depending on architecture. + println!("cargo:rustc-link-lib=stdc++"); +} diff --git a/libs/chonky/libs/.gitignore b/libs/chonky/libs/.gitignore new file mode 100644 index 00000000000..593bcf0e80e --- /dev/null +++ b/libs/chonky/libs/.gitignore @@ -0,0 +1,2 @@ +!.gitignore +* diff --git a/libs/chonky/package.json b/libs/chonky/package.json index 3b31199fbe4..da851bdce47 100644 --- a/libs/chonky/package.json +++ b/libs/chonky/package.json @@ -5,7 +5,7 @@ "scripts": { "fix:clippy": "just clippy --fix", "lint:clippy": "just clippy", - "test:unit": "cargo hack nextest run --feature-powerset --all-targets && cargo test --all-features --doc" + "test:unit": "cargo hack nextest run --feature-powerset --exclude-features static --all-targets && cargo test --all-features --doc" }, "dependencies": { "@rust/error-stack": "0.5.0" diff --git a/libs/chonky/src/lib.rs b/libs/chonky/src/lib.rs index ed157c258bd..a5638709480 100644 --- a/libs/chonky/src/lib.rs +++ b/libs/chonky/src/lib.rs @@ -1,26 +1,304 @@ #![doc = include_str!("../README.md")] -/// Adds two numbers together +extern crate alloc; + +#[cfg(not(feature = "static"))] +use alloc::borrow::Cow; +#[cfg(not(feature = "static"))] +use std::{env, path::Path}; + +use error_stack::{Report, ResultExt as _}; +use pdfium_render::prelude::Pdfium; +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum ChonkyError { + #[error("parsing error in pdf")] + ReadPdf, + #[error("pdfium error")] + Pdfium, + #[error("write error to system")] + Write, + #[error("Issues with CLI input")] + Arguments, +} + +/// Attempts to link to the `PDFium` library. +/// +/// ## Loading strategy /// -/// # Example +/// - if the `static` feature is enabled, it will attempt to statically link to the `PDFium` +/// library. +/// - Otherwise, it will use the `PDFIUM_DYNAMIC_LIB_PATH` environment variable to load the dynamic +/// library from the specific path. If the environment variable is not set, it will attempt to +/// load the dynamic library from the default path `./libs/`. /// -/// ```rust -/// use chonky::add; +/// # Errors /// -/// assert_eq!(add(1, 3), 4); -/// ``` -#[must_use] -pub const fn add(left: u64, right: u64) -> u64 { - left + right +/// Will return a [`ChonkyError::Pdfium`] if the `PDFium` library could not be loaded. +pub fn link_pdfium() -> Result> { + #[cfg(feature = "static")] + return Ok(Pdfium::new( + Pdfium::bind_to_statically_linked_library().change_context(ChonkyError::Pdfium)?, + )); + + #[cfg(not(feature = "static"))] + { + let lib_path = env::var("PDFIUM_DYNAMIC_LIB_PATH") + .map_or_else(|_| Cow::Borrowed("./libs/"), Cow::Owned); + + let lib_path = Path::new(lib_path.as_ref()) + .canonicalize() + .change_context(ChonkyError::Pdfium) + .attach_printable_lazy(|| format!("could not canonicalize path `{lib_path}`"))?; + Ok(Pdfium::new( + Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path(&lib_path)) + .change_context(ChonkyError::Pdfium)?, + )) + } +} + +pub mod pdf_segmentation { + use error_stack::{Report, ResultExt as _}; + use image::{DynamicImage, GrayImage, RgbaImage}; + use pdfium_render::prelude::{ + PdfBitmap, PdfBitmapFormat, PdfDocument, PdfPoints, PdfRenderConfig, Pdfium, + }; + + use crate::ChonkyError; + + /// Function to read the pdf + /// + /// # Errors + /// + /// Will return [`ChonkyError::Pdfium`] if `filename` does not exist or the user does not have + /// permission to read it. + pub fn load_pdf<'a>( + pdfium: &'a Pdfium, + file_path: &str, + ) -> Result, Report> { + pdfium + .load_pdf_from_file(file_path, None) + .map_err(|err| Report::new(err).change_context(ChonkyError::Pdfium)) + } + + // /// TODO: This function returns the extracted text that is segmented in proper reading order + // and /// grouped by boundaries such as newline spacing and other layout information, + // segments can /// contain texts with different formatting (such as a sentence with a + // **bold** inside) /// + // /// #Errors + // /// + // /// TBD + //pub fn extract_text(pdf: &PdfDocument) -> () {} + + // /// TODO: Given a list of segments of a PDF this function reads the segments via the bounding + // /// box order, with the naive approach of top→bottom (and if same top then left→right) and + // /// returns a sorted vector of reading order of segments + // /// + // /// #Errors + // /// + // /// TBD + //fn obtain_reading_order(segments: Vec) -> () {} + + // /// TODO: Function returns a smaller segment vector by grouping segments in similar chunks, + // but /// have different style formatting The new vector stores this information seperately + // /// + // /// #Errors + // /// + // /// TBD + //fn group_similar_segments() -> () {} + + /// Takes in a pdf document and returns a vector list where each page + /// is processed into a raw image that can be later converted to any image format + /// + /// # Errors + /// + /// Return an [`ChonkyError::Pdfium`] if there was a + /// problem with the image processing operation, occurs when image cannot be encoded into + /// specific image format + pub fn pdf_to_images(pdf: &PdfDocument) -> Result, Report> { + let mut images: Vec = Vec::new(); + + //may adjust resolution depending on need + let resolution_width = 1000; + + // information about how to store image, like pixed resolution and aspect ratio + let config = PdfRenderConfig::new().set_target_width(resolution_width); + + let mut bitmap = create_empty_bitmap(pdf, &config)?; + + for page in pdf.pages().iter() { + // Render the entire page to a bitmap + page.render_into_bitmap_with_config(&mut bitmap, &config) + .change_context(ChonkyError::Pdfium)?; + + // Convert PdfBitmap to DynamicImage + let dynamic_image = as_image(&bitmap)?; + images.push(dynamic_image); + } + + Ok(images) + } + + /// Takes in a config with desired resolution of image and a pdf and creates + /// an empty bitmap that can be used by the entire pdf + /// + /// # Errors + /// + /// Return an [`ChonkyError::Pdfium`] if there was a problem + /// with converting the pdfs dimensions to pixels + fn create_empty_bitmap<'a>( + pdf: &'a PdfDocument, + config: &'a PdfRenderConfig, + ) -> Result, Report> { + // read the first page to get page dimesnions + let page_dimensions = pdf + .pages() + .page_sizes() + .change_context(ChonkyError::Pdfium)?[0]; + + let page = pdf.pages().get(0).change_context(ChonkyError::Pdfium)?; + + //converts the boundings boxes to pixels that can be used for creating bitmap with proper + // for correct bitmap dimensions, height point must be set to 0 to get max pixel height + let page_dimensions = pdf + .pages() + .get(0) + .change_context(ChonkyError::Pdfium)? + .points_to_pixels(page_dimensions.width(), PdfPoints::new(0.0), config) + .change_context(ChonkyError::Pdfium)?; + + // some pdfs have dimensions that are larger than the actual "content" resulting in negative + // dimensions we aim to get the "net" dimensions by subtracting the 0.0 point pixel + // conversion from the height and width + let base_page_dimensions = page + .points_to_pixels(PdfPoints::new(0.0), page.height(), config) + .change_context(ChonkyError::Pdfium)?; + + let page_dimensions = ( + page_dimensions.0 - base_page_dimensions.0, + page_dimensions.1 - base_page_dimensions.1, + ); + + //create an empty bitmap that follows dimensions of pdf + // to prevent repeated memory allocations (we assume all pdfs are same dimension) + let bitmap = PdfBitmap::empty( + page_dimensions.0, + page_dimensions.1, + PdfBitmapFormat::BGRA, + pdf.bindings(), + ) + .change_context(ChonkyError::Pdfium)?; + + Ok(bitmap) + } + + ///A vendored function from pdfium-render's `as_image` function that returns a result instead + /// of panicking + /// + /// Errors# + /// + /// [`ChonkyError::Pdfium`] when the image had an error being processed + fn as_image(bitmap: &PdfBitmap) -> Result> { + let bytes = bitmap.as_rgba_bytes(); + + // clippy complains if we directly cast into u32 from i32 because of sign loss + // since we assume dimensions must be positive this is not as issue + let width = u32::try_from(bitmap.width()).change_context(ChonkyError::Pdfium)?; + + let height = u32::try_from(bitmap.height()).change_context(ChonkyError::Pdfium)?; + + Ok(match bitmap.format().map_err(|_foo| ChonkyError::Pdfium)? { + PdfBitmapFormat::BGRA | PdfBitmapFormat::BGRx | PdfBitmapFormat::BGR => { + RgbaImage::from_raw(width, height, bytes) + .map(DynamicImage::ImageRgba8) + .ok_or(ChonkyError::Pdfium)? + } + PdfBitmapFormat::Gray => GrayImage::from_raw(width, height, bytes) + .map(DynamicImage::ImageLuma8) + .ok_or(ChonkyError::Pdfium)?, + _ => return Err(Report::new(ChonkyError::Pdfium)), + }) + } } #[cfg(test)] mod tests { + use error_stack::{Report, ResultExt as _}; + use insta::assert_binary_snapshot; + use super::*; #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); + fn pdf_load_success() -> Result<(), Report> { + let pdfium = link_pdfium()?; + + let test_pdf_string = "tests/docs/test-doc.pdf"; + + let _pdf = pdf_segmentation::load_pdf(&pdfium, test_pdf_string) + .change_context(ChonkyError::Pdfium)?; + + Ok(()) + } + + #[test] + fn pdf_load_failure() -> Result<(), Report> { + let pdfium = link_pdfium()?; + + let test_pdf_string = "tests/docs/invalid.pdf"; + + // Should return an error when loading an invalid PDF + let result = pdf_segmentation::load_pdf(&pdfium, test_pdf_string) + .change_context(ChonkyError::Pdfium); + + if result.is_err() { + // Expected failure, return Ok + Ok(()) + } else { + // Unexpected success, return an error + Err(Report::new(ChonkyError::Pdfium).attach_printable("Expected load_pdf to fail")) + } + } + + #[test] + fn pdf_image_conversion() -> Result<(), Report> { + let pdfium = link_pdfium()?; + + let test_pdf_string = "tests/docs/test-doc.pdf"; + + let pdf = pdf_segmentation::load_pdf(&pdfium, test_pdf_string) + .change_context(ChonkyError::Pdfium)?; + + //number of pages of pdf + let num_pages: usize = pdf.pages().len().into(); + + let preprocessed_pdf = + pdf_segmentation::pdf_to_images(&pdf).change_context(ChonkyError::Pdfium)?; + + //start by checking if proper amount of images are converted + + if preprocessed_pdf.len() != num_pages { + return Err(Report::new(ChonkyError::Pdfium) + .attach_printable("The length of vector should be number of pages")); + } + + // now check if the image contents are the same using insta snapshots + // start by converting images to binary + // let preprocessed_pdf: Vec> = preprocessed_pdf + // .into_iter() + // .map(image::DynamicImage::into_bytes) + // .collect(); + + // we only really need to check the first three pages + for (index, page) in preprocessed_pdf.into_iter().enumerate().take(3) { + let mut buffer = Vec::new(); + let encoder = image::codecs::bmp::BmpEncoder::new(&mut buffer); + + page.write_with_encoder(encoder) + .expect("image should be able to be encoded into a bitmap"); + assert_binary_snapshot!(format!("page_{}.bmp", index + 1).as_str(), buffer); + } + + Ok(()) } } diff --git a/libs/chonky/src/main.rs b/libs/chonky/src/main.rs new file mode 100644 index 00000000000..36a4fc90e35 --- /dev/null +++ b/libs/chonky/src/main.rs @@ -0,0 +1,33 @@ +use std::env; + +use chonky::{ChonkyError, pdf_segmentation}; +use error_stack::{Report, ResultExt as _, ensure}; + +fn main() -> Result<(), Report> { + // read file path arguments + // TODO: implement with clap + let args: Vec = env::args().collect(); + + ensure!(args.len() > 1, ChonkyError::Arguments); + + let pdfium = chonky::link_pdfium()?; + + let pdf = pdf_segmentation::load_pdf(&pdfium, &args[1]).change_context(ChonkyError::Pdfium)?; + + let preprocessed_pdf = + pdf_segmentation::pdf_to_images(&pdf).change_context(ChonkyError::Pdfium)?; + //for now we will print all these images to a folder + // this will be a seperate function in the future once knowledge about error-stack increases + + let output_folder = "./out"; + + for (index, image) in preprocessed_pdf.iter().enumerate() { + // Generate a unique filename for each page image + let file_path = format!("{}/page_{}.png", output_folder, index + 1); + + // Save the image as a PNG file + image.save(&file_path).change_context(ChonkyError::Write)?; + } + + Ok(()) +} diff --git a/libs/chonky/src/snapshots/chonky__tests__page_1.snap b/libs/chonky/src/snapshots/chonky__tests__page_1.snap new file mode 100644 index 00000000000..71131a21ad5 --- /dev/null +++ b/libs/chonky/src/snapshots/chonky__tests__page_1.snap @@ -0,0 +1,6 @@ +--- +source: libs/chonky/src/lib.rs +expression: buffer +extension: bmp +snapshot_kind: binary +--- diff --git a/libs/chonky/src/snapshots/chonky__tests__page_1.snap.bmp b/libs/chonky/src/snapshots/chonky__tests__page_1.snap.bmp new file mode 100644 index 00000000000..800d54de684 Binary files /dev/null and b/libs/chonky/src/snapshots/chonky__tests__page_1.snap.bmp differ diff --git a/libs/chonky/src/snapshots/chonky__tests__page_2.snap b/libs/chonky/src/snapshots/chonky__tests__page_2.snap new file mode 100644 index 00000000000..71131a21ad5 --- /dev/null +++ b/libs/chonky/src/snapshots/chonky__tests__page_2.snap @@ -0,0 +1,6 @@ +--- +source: libs/chonky/src/lib.rs +expression: buffer +extension: bmp +snapshot_kind: binary +--- diff --git a/libs/chonky/src/snapshots/chonky__tests__page_2.snap.bmp b/libs/chonky/src/snapshots/chonky__tests__page_2.snap.bmp new file mode 100644 index 00000000000..46bc92ef731 Binary files /dev/null and b/libs/chonky/src/snapshots/chonky__tests__page_2.snap.bmp differ diff --git a/libs/chonky/src/snapshots/chonky__tests__page_3.snap b/libs/chonky/src/snapshots/chonky__tests__page_3.snap new file mode 100644 index 00000000000..71131a21ad5 --- /dev/null +++ b/libs/chonky/src/snapshots/chonky__tests__page_3.snap @@ -0,0 +1,6 @@ +--- +source: libs/chonky/src/lib.rs +expression: buffer +extension: bmp +snapshot_kind: binary +--- diff --git a/libs/chonky/src/snapshots/chonky__tests__page_3.snap.bmp b/libs/chonky/src/snapshots/chonky__tests__page_3.snap.bmp new file mode 100644 index 00000000000..b69ab120de3 Binary files /dev/null and b/libs/chonky/src/snapshots/chonky__tests__page_3.snap.bmp differ diff --git a/libs/chonky/tests/docs/test-doc.pdf b/libs/chonky/tests/docs/test-doc.pdf new file mode 100644 index 00000000000..a2a20201cde Binary files /dev/null and b/libs/chonky/tests/docs/test-doc.pdf differ