From 71a4f5c3f82e4103c3cbeb39b3251223282a70ef Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sun, 15 Sep 2024 12:45:45 +0200 Subject: [PATCH 1/5] lib: Add convenience function to decode hex strings --- lib/src/git_backend.rs | 2 +- lib/src/hex_util.rs | 4 ++++ lib/src/object_id.rs | 6 ++++-- lib/src/simple_op_heads_store.rs | 3 ++- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/src/git_backend.rs b/lib/src/git_backend.rs index 7d2b267e9..23977b9e8 100644 --- a/lib/src/git_backend.rs +++ b/lib/src/git_backend.rs @@ -1505,7 +1505,7 @@ fn tree_value_from_json(json: &serde_json::Value) -> TreeValue { } fn bytes_vec_from_json(value: &serde_json::Value) -> Vec { - hex::decode(value.as_str().unwrap()).unwrap() + crate::hex_util::decode_hex_string(value.as_str().unwrap()).unwrap() } #[cfg(test)] diff --git a/lib/src/hex_util.rs b/lib/src/hex_util.rs index 48517b38f..722a1799f 100644 --- a/lib/src/hex_util.rs +++ b/lib/src/hex_util.rs @@ -51,6 +51,10 @@ pub fn to_reverse_hex(forward_hex: &str) -> Option { .collect() } +pub fn decode_hex_string(hex: &str) -> Option> { + hex::decode(hex.as_bytes()).ok() +} + /// Calculates common prefix length of two byte sequences. The length /// to be returned is a number of hexadecimal digits. pub fn common_hex_len(bytes_a: &[u8], bytes_b: &[u8]) -> usize { diff --git a/lib/src/object_id.rs b/lib/src/object_id.rs index 061b68bf1..875985c8c 100644 --- a/lib/src/object_id.rs +++ b/lib/src/object_id.rs @@ -104,6 +104,8 @@ macro_rules! impl_id_type { pub(crate) use id_type; pub(crate) use impl_id_type; +use crate::hex_util::decode_hex_string; + /// An identifier prefix (typically from a type implementing the [`ObjectId`] /// trait) with facilities for converting between bytes and a hex string. #[derive(Debug, Clone, PartialEq, Eq)] @@ -120,9 +122,9 @@ impl HexPrefix { pub fn new(prefix: &str) -> Option { let has_odd_byte = prefix.len() & 1 != 0; let min_prefix_bytes = if has_odd_byte { - hex::decode(prefix.to_owned() + "0").ok()? + decode_hex_string(&format!("{prefix}0"))? } else { - hex::decode(prefix).ok()? + decode_hex_string(prefix)? }; Some(HexPrefix { min_prefix_bytes, diff --git a/lib/src/simple_op_heads_store.rs b/lib/src/simple_op_heads_store.rs index e4b05a1c1..d8e59b2f9 100644 --- a/lib/src/simple_op_heads_store.rs +++ b/lib/src/simple_op_heads_store.rs @@ -21,6 +21,7 @@ use std::fs; use std::path::Path; use std::path::PathBuf; +use crate::hex_util::decode_hex_string; use crate::lock::FileLock; use crate::object_id::ObjectId; use crate::op_heads_store::OpHeadsStore; @@ -96,7 +97,7 @@ impl OpHeadsStore for SimpleOpHeadsStore { for op_head_entry in std::fs::read_dir(&self.dir).unwrap() { let op_head_file_name = op_head_entry.unwrap().file_name(); let op_head_file_name = op_head_file_name.to_str().unwrap(); - if let Ok(op_head) = hex::decode(op_head_file_name) { + if let Some(op_head) = decode_hex_string(op_head_file_name) { op_heads.push(OperationId::new(op_head)); } } From ff62e096a2429119d77b5b7c0f63cab1e2398fb2 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sun, 15 Sep 2024 12:23:46 +0200 Subject: [PATCH 2/5] dep: Replace `hex` crate with `faster_hex` `hex` does not seem to be maintained any longer and we already transitively depend on `faster_hex` anyways. --- Cargo.lock | 11 +++-------- Cargo.toml | 2 +- cli/Cargo.toml | 1 + lib/Cargo.toml | 2 +- lib/src/content_hash.rs | 4 ++-- lib/src/default_index/mutable.rs | 2 +- lib/src/git_backend.rs | 3 +-- lib/src/hex_util.rs | 5 ++++- lib/src/object_id.rs | 9 +++++---- lib/src/stacked_table.rs | 2 +- lib/tests/test_git.rs | 4 ++-- lib/tests/test_revset.rs | 8 ++++---- lib/testutils/Cargo.toml | 2 +- lib/testutils/src/test_signing_backend.rs | 3 +-- 14 files changed, 28 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a0072b2f7..77615a300 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1625,12 +1625,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - [[package]] name = "home" version = "0.5.9" @@ -1833,6 +1827,7 @@ dependencies = [ "dirs", "dunce", "esl01-renderdag", + "faster-hex", "futures 0.3.30", "git2", "gix", @@ -1886,12 +1881,12 @@ dependencies = [ "digest", "either", "esl01-renderdag", + "faster-hex", "futures 0.3.30", "git2", "gix", "gix-filter", "glob", - "hex", "ignore", "indexmap", "indoc", @@ -3096,9 +3091,9 @@ version = "0.21.0" dependencies = [ "async-trait", "config", + "faster-hex", "futures 0.3.30", "git2", - "hex", "itertools 0.13.0", "jj-lib", "pollster", diff --git a/Cargo.toml b/Cargo.toml index 9fbebeb5f..36080bfe6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,7 +66,7 @@ gix = { version = "0.66.0", default-features = false, features = [ ] } gix-filter = "0.13.0" glob = "0.3.1" -hex = "0.4.3" +faster-hex = { version = "0.9.0", default-features = false, features = ["std"]} ignore = "0.4.23" indexmap = "2.5.0" indoc = "2.0.4" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index ef261d178..ecafc4e0b 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -64,6 +64,7 @@ esl01-renderdag = { workspace = true } futures = { workspace = true } git2 = { workspace = true } gix = { workspace = true } +faster-hex = { workspace = true } indexmap = { workspace = true } indoc = { workspace = true } itertools = { workspace = true } diff --git a/lib/Cargo.toml b/lib/Cargo.toml index e45e45f88..8621b2ca4 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -48,7 +48,7 @@ git2 = { workspace = true, optional = true } gix = { workspace = true, optional = true } gix-filter = { workspace = true, optional = true } glob = { workspace = true } -hex = { workspace = true } +faster-hex = { workspace = true } ignore = { workspace = true } indexmap = { workspace = true } itertools = { workspace = true } diff --git a/lib/src/content_hash.rs b/lib/src/content_hash.rs index e3918589a..b9f649a7d 100644 --- a/lib/src/content_hash.rs +++ b/lib/src/content_hash.rs @@ -215,7 +215,7 @@ mod tests { x: Vec>, y: i64, } - let foo_hash = hex::encode(hash(&Foo { + let foo_hash = faster_hex::hex_string(&hash(&Foo { x: vec![None, Some(42)], y: 17, })); @@ -231,7 +231,7 @@ mod tests { y: Y, } assert_eq!( - hex::encode(hash(&GenericFoo { + faster_hex::hex_string(&hash(&GenericFoo { x: vec![None, Some(42)], y: 17i64 })), diff --git a/lib/src/default_index/mutable.rs b/lib/src/default_index/mutable.rs index 353550dbe..034f61e07 100644 --- a/lib/src/default_index/mutable.rs +++ b/lib/src/default_index/mutable.rs @@ -359,7 +359,7 @@ impl MutableIndexSegment { self.serialize_local_entries(&mut buf); let mut hasher = Blake2b512::new(); hasher.update(&buf); - let index_file_id_hex = hex::encode(hasher.finalize()); + let index_file_id_hex = faster_hex::hex_string(&hasher.finalize()); let index_file_path = dir.join(&index_file_id_hex); let mut temp_file = NamedTempFile::new_in(dir)?; diff --git a/lib/src/git_backend.rs b/lib/src/git_backend.rs index 23977b9e8..254a2b493 100644 --- a/lib/src/git_backend.rs +++ b/lib/src/git_backend.rs @@ -1512,7 +1512,6 @@ fn bytes_vec_from_json(value: &serde_json::Value) -> Vec { mod tests { use assert_matches::assert_matches; use git2::Oid; - use hex::ToHex; use pollster::FutureExt; use test_case::test_case; @@ -2133,7 +2132,7 @@ mod tests { }; let mut signer = |data: &_| { - let hash: String = blake2b_hash(data).encode_hex(); + let hash: String = faster_hex::hex_string(&blake2b_hash(data)); Ok(format!("test sig\n\n\nhash={hash}").into_bytes()) }; diff --git a/lib/src/hex_util.rs b/lib/src/hex_util.rs index 722a1799f..37208f2f8 100644 --- a/lib/src/hex_util.rs +++ b/lib/src/hex_util.rs @@ -52,7 +52,10 @@ pub fn to_reverse_hex(forward_hex: &str) -> Option { } pub fn decode_hex_string(hex: &str) -> Option> { - hex::decode(hex.as_bytes()).ok() + let mut dst = vec![0; hex.len() / 2]; + faster_hex::hex_decode(hex.as_bytes(), &mut dst) + .ok() + .map(|()| dst) } /// Calculates common prefix length of two byte sequences. The length diff --git a/lib/src/object_id.rs b/lib/src/object_id.rs index 875985c8c..b0136aa40 100644 --- a/lib/src/object_id.rs +++ b/lib/src/object_id.rs @@ -66,8 +66,9 @@ macro_rules! impl_id_type { } /// Parses the given hex string into an ObjectId. - pub fn try_from_hex(hex: &str) -> Result { - hex::decode(hex).map(Self) + pub fn try_from_hex(hex: &str) -> Result { + let mut dst = vec![0; hex.len() / 2]; + faster_hex::hex_decode(hex.as_bytes(), &mut dst).map(|()| Self(dst)) } } @@ -95,7 +96,7 @@ macro_rules! impl_id_type { } fn hex(&self) -> String { - hex::encode(&self.0) + faster_hex::hex_string(&self.0) } } }; @@ -140,7 +141,7 @@ impl HexPrefix { } pub fn hex(&self) -> String { - let mut hex_string = hex::encode(&self.min_prefix_bytes); + let mut hex_string = faster_hex::hex_string(&self.min_prefix_bytes); if self.has_odd_byte { hex_string.pop().unwrap(); } diff --git a/lib/src/stacked_table.rs b/lib/src/stacked_table.rs index d88e05366..ff34eb649 100644 --- a/lib/src/stacked_table.rs +++ b/lib/src/stacked_table.rs @@ -333,7 +333,7 @@ impl MutableTable { let buf = self.maybe_squash_with_ancestors().serialize(); let mut hasher = Blake2b512::new(); hasher.update(&buf); - let file_id_hex = hex::encode(hasher.finalize()); + let file_id_hex = faster_hex::hex_string(&hasher.finalize()); let file_path = store.dir.join(&file_id_hex); let mut temp_file = NamedTempFile::new_in(&store.dir)?; diff --git a/lib/tests/test_git.rs b/lib/tests/test_git.rs index d9106f994..f12949a7e 100644 --- a/lib/tests/test_git.rs +++ b/lib/tests/test_git.rs @@ -1381,8 +1381,8 @@ fn test_import_refs_missing_git_commit() { let commit1 = empty_git_commit(&git_repo, "refs/heads/main", &[]); let commit2 = empty_git_commit(&git_repo, "refs/heads/main", &[&commit1]); - let shard = hex::encode(&commit1.id().as_bytes()[..1]); - let object_basename = hex::encode(&commit1.id().as_bytes()[1..]); + let shard = faster_hex::hex_string(&commit1.id().as_bytes()[..1]); + let object_basename = faster_hex::hex_string(&commit1.id().as_bytes()[1..]); let object_store_path = git_repo.path().join("objects"); let object_file = object_store_path.join(&shard).join(object_basename); let backup_object_file = object_store_path.join(&shard).join("backup"); diff --git a/lib/tests/test_revset.rs b/lib/tests/test_revset.rs index b96550cdd..88629037a 100644 --- a/lib/tests/test_revset.rs +++ b/lib/tests/test_revset.rs @@ -273,22 +273,22 @@ fn test_resolve_symbol_change_id(readonly: bool) { // Test the test setup assert_eq!( - hex::encode(git_commit_ids[0]), + &format!("{}", git_commit_ids[0]), // "04e12a5467bba790efb88a9870894ec208b16bf1" reversed "8fd68d104372910e19511df709e5dde62a548720" ); assert_eq!( - hex::encode(git_commit_ids[1]), + &format!("{}", git_commit_ids[1]), // "040b3ba3a51d8edbc4c5855cbd09de71d4c29cca" reversed "5339432b8e7b90bd3aa1a323db71b8a5c5dcd020" ); assert_eq!( - hex::encode(git_commit_ids[2]), + &format!("{}", git_commit_ids[2]), // "04e1c7082e4e34f3f371d8a1a46770b861b9b547" reversed "e2ad9d861d0ee625851b8ecfcf2c727410e38720" ); assert_eq!( - hex::encode(git_commit_ids[3]), + &format!("{}", git_commit_ids[3]), // "911d7e52fd5ba04b8f289e14c3d30b52d38c0020" reversed "040031cb4ad0cbc3287914f1d205dabf4a7eb889" ); diff --git a/lib/testutils/Cargo.toml b/lib/testutils/Cargo.toml index 62b0ed866..2f065ae23 100644 --- a/lib/testutils/Cargo.toml +++ b/lib/testutils/Cargo.toml @@ -19,7 +19,7 @@ async-trait = { workspace = true } config = { workspace = true } futures = { workspace = true } git2 = { workspace = true } -hex = { workspace = true } +faster-hex = { workspace = true } itertools = { workspace = true } jj-lib = { workspace = true, features = ["testing"] } pollster = { workspace = true } diff --git a/lib/testutils/src/test_signing_backend.rs b/lib/testutils/src/test_signing_backend.rs index 8d1d3369b..5a986ab30 100644 --- a/lib/testutils/src/test_signing_backend.rs +++ b/lib/testutils/src/test_signing_backend.rs @@ -1,4 +1,3 @@ -use hex::ToHex; use jj_lib::content_hash::blake2b_hash; use jj_lib::signing::SigStatus; use jj_lib::signing::SignError; @@ -26,7 +25,7 @@ impl SigningBackend for TestSigningBackend { body.extend_from_slice(key.as_bytes()); body.extend_from_slice(data); - let hash: String = blake2b_hash(&body).encode_hex(); + let hash: String = faster_hex::hex_string(&blake2b_hash(&body)); Ok(format!("{PREFIX}{key}\n{hash}").into_bytes()) } From 436b44944c776205797f23a41267e0c24bef7042 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sun, 15 Sep 2024 12:39:53 +0200 Subject: [PATCH 3/5] lib: Optimize `ChangeId::reverse_hex` --- cli/src/cli_util.rs | 15 +++++++++------ cli/src/commit_templater.rs | 7 +------ lib/src/backend.rs | 13 +++++++++++++ lib/src/hex_util.rs | 18 ++++++++++++++---- 4 files changed, 37 insertions(+), 16 deletions(-) diff --git a/cli/src/cli_util.rs b/cli/src/cli_util.rs index c49af6b77..7a8056907 100644 --- a/cli/src/cli_util.rs +++ b/cli/src/cli_util.rs @@ -64,7 +64,6 @@ use jj_lib::git; use jj_lib::git_backend::GitBackend; use jj_lib::gitignore::GitIgnoreError; use jj_lib::gitignore::GitIgnoreFile; -use jj_lib::hex_util::to_reverse_hex; use jj_lib::id_prefix::IdPrefixContext; use jj_lib::matchers::Matcher; use jj_lib::merge::MergedTreeValue; @@ -2632,17 +2631,21 @@ pub fn edit_temp_file( } pub fn short_commit_hash(commit_id: &CommitId) -> String { - commit_id.hex()[0..12].to_string() + let mut hash = commit_id.hex(); + hash.truncate(12); + hash } pub fn short_change_hash(change_id: &ChangeId) -> String { - // TODO: We could avoid the unwrap() and make this more efficient by converting - // straight from binary. - to_reverse_hex(&change_id.hex()[0..12]).unwrap() + let mut hash = change_id.reverse_hex(); + hash.truncate(12); + hash } pub fn short_operation_hash(operation_id: &OperationId) -> String { - operation_id.hex()[0..12].to_string() + let mut hash = operation_id.hex(); + hash.truncate(12); + hash } /// Wrapper around a `DiffEditor` to conditionally start interactive session. diff --git a/cli/src/commit_templater.rs b/cli/src/commit_templater.rs index d52dbcd0c..95c531df7 100644 --- a/cli/src/commit_templater.rs +++ b/cli/src/commit_templater.rs @@ -31,7 +31,6 @@ use jj_lib::fileset; use jj_lib::fileset::FilesetDiagnostics; use jj_lib::fileset::FilesetExpression; use jj_lib::git; -use jj_lib::hex_util::to_reverse_hex; use jj_lib::id_prefix::IdPrefixContext; use jj_lib::matchers::Matcher; use jj_lib::merged_tree::MergedTree; @@ -1248,11 +1247,7 @@ impl CommitOrChangeId { pub fn hex(&self) -> String { match self { CommitOrChangeId::Commit(id) => id.hex(), - CommitOrChangeId::Change(id) => { - // TODO: We can avoid the unwrap() and make this more efficient by converting - // straight from bytes. - to_reverse_hex(&id.hex()).unwrap() - } + CommitOrChangeId::Change(id) => id.reverse_hex(), } } diff --git a/lib/src/backend.rs b/lib/src/backend.rs index 32b04ae26..5d3ca37f7 100644 --- a/lib/src/backend.rs +++ b/lib/src/backend.rs @@ -25,6 +25,7 @@ use futures::stream::BoxStream; use thiserror::Error; use crate::content_hash::ContentHash; +use crate::hex_util::to_reverse_hex_digit; use crate::index::Index; use crate::merge::Merge; use crate::object_id::id_type; @@ -50,6 +51,18 @@ id_type!(pub FileId); id_type!(pub SymlinkId); id_type!(pub ConflictId); +impl ChangeId { + pub fn reverse_hex(&self) -> String { + let mut buffer = vec![0; self.0.len() * 2]; + faster_hex::hex_encode(&self.0, &mut buffer).unwrap(); + buffer + .iter_mut() + .for_each(|b| *b = to_reverse_hex_digit(*b)); + + String::from_utf8(buffer).unwrap() + } +} + #[derive(ContentHash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)] pub struct MillisSinceEpoch(pub i64); diff --git a/lib/src/hex_util.rs b/lib/src/hex_util.rs index 37208f2f8..b278abb2c 100644 --- a/lib/src/hex_util.rs +++ b/lib/src/hex_util.rs @@ -14,7 +14,17 @@ #![allow(missing_docs)] -fn to_reverse_hex_digit(b: u8) -> Option { +pub fn to_reverse_hex_digit(b: u8) -> u8 { + let offset = match b { + b'0'..=b'9' => b - b'0', + b'A'..=b'F' => b - b'A' + 10, + b'a'..=b'f' => b - b'a' + 10, + b => return b, + }; + b'z' - offset +} + +fn to_reverse_hex_digit_checked(b: u8) -> Option { let value = match b { b'0'..=b'9' => b - b'0', b'A'..=b'F' => b - b'A' + 10, @@ -24,7 +34,7 @@ fn to_reverse_hex_digit(b: u8) -> Option { Some(b'z' - value) } -fn to_forward_hex_digit(b: u8) -> Option { +fn to_forward_hex_digit_checked(b: u8) -> Option { let value = match b { b'k'..=b'z' => b'z' - b, b'K'..=b'Z' => b'Z' - b, @@ -40,14 +50,14 @@ fn to_forward_hex_digit(b: u8) -> Option { pub fn to_forward_hex(reverse_hex: &str) -> Option { reverse_hex .bytes() - .map(|b| to_forward_hex_digit(b).map(char::from)) + .map(|b| to_forward_hex_digit_checked(b).map(char::from)) .collect() } pub fn to_reverse_hex(forward_hex: &str) -> Option { forward_hex .bytes() - .map(|b| to_reverse_hex_digit(b).map(char::from)) + .map(|b| to_reverse_hex_digit_checked(b).map(char::from)) .collect() } From 09b27eb37bd2a8997553045d613d8575e1ef6d0c Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 20 Sep 2024 09:32:09 +0200 Subject: [PATCH 4/5] lib: Hide faster_hex behind hex_utils --- lib/src/backend.rs | 9 ++------- lib/src/content_hash.rs | 6 ++++-- lib/src/default_index/mutable.rs | 3 ++- lib/src/git_backend.rs | 3 ++- lib/src/hex_util.rs | 14 ++++++++++++++ lib/src/object_id.rs | 4 ++-- lib/src/stacked_table.rs | 3 ++- lib/tests/test_git.rs | 5 +++-- lib/testutils/src/test_signing_backend.rs | 3 ++- 9 files changed, 33 insertions(+), 17 deletions(-) diff --git a/lib/src/backend.rs b/lib/src/backend.rs index 5d3ca37f7..efc1d6b9c 100644 --- a/lib/src/backend.rs +++ b/lib/src/backend.rs @@ -25,6 +25,7 @@ use futures::stream::BoxStream; use thiserror::Error; use crate::content_hash::ContentHash; +use crate::hex_util; use crate::hex_util::to_reverse_hex_digit; use crate::index::Index; use crate::merge::Merge; @@ -53,13 +54,7 @@ id_type!(pub ConflictId); impl ChangeId { pub fn reverse_hex(&self) -> String { - let mut buffer = vec![0; self.0.len() * 2]; - faster_hex::hex_encode(&self.0, &mut buffer).unwrap(); - buffer - .iter_mut() - .for_each(|b| *b = to_reverse_hex_digit(*b)); - - String::from_utf8(buffer).unwrap() + hex_util::encode_hex_string_reverse(&self.0) } } diff --git a/lib/src/content_hash.rs b/lib/src/content_hash.rs index b9f649a7d..2ace85139 100644 --- a/lib/src/content_hash.rs +++ b/lib/src/content_hash.rs @@ -150,6 +150,8 @@ mod tests { use std::collections::BTreeMap; use std::collections::HashMap; + use crate::hex_util; + use super::*; #[test] @@ -215,7 +217,7 @@ mod tests { x: Vec>, y: i64, } - let foo_hash = faster_hex::hex_string(&hash(&Foo { + let foo_hash = hex_util::encode_hex_string(&hash(&Foo { x: vec![None, Some(42)], y: 17, })); @@ -231,7 +233,7 @@ mod tests { y: Y, } assert_eq!( - faster_hex::hex_string(&hash(&GenericFoo { + hex_util::encode_hex_string(&hash(&GenericFoo { x: vec![None, Some(42)], y: 17i64 })), diff --git a/lib/src/default_index/mutable.rs b/lib/src/default_index/mutable.rs index 034f61e07..c6d2998d9 100644 --- a/lib/src/default_index/mutable.rs +++ b/lib/src/default_index/mutable.rs @@ -48,6 +48,7 @@ use crate::backend::ChangeId; use crate::backend::CommitId; use crate::commit::Commit; use crate::file_util::persist_content_addressed_temp_file; +use crate::hex_util; use crate::index::AllHeadsForGcUnsupported; use crate::index::ChangeIdIndex; use crate::index::Index; @@ -359,7 +360,7 @@ impl MutableIndexSegment { self.serialize_local_entries(&mut buf); let mut hasher = Blake2b512::new(); hasher.update(&buf); - let index_file_id_hex = faster_hex::hex_string(&hasher.finalize()); + let index_file_id_hex = hex_util::encode_hex_string(&hasher.finalize()); let index_file_path = dir.join(&index_file_id_hex); let mut temp_file = NamedTempFile::new_in(dir)?; diff --git a/lib/src/git_backend.rs b/lib/src/git_backend.rs index 254a2b493..fc2fb6764 100644 --- a/lib/src/git_backend.rs +++ b/lib/src/git_backend.rs @@ -1517,6 +1517,7 @@ mod tests { use super::*; use crate::content_hash::blake2b_hash; + use crate::hex_util; #[test_case(false; "legacy tree format")] #[test_case(true; "tree-level conflict format")] @@ -2132,7 +2133,7 @@ mod tests { }; let mut signer = |data: &_| { - let hash: String = faster_hex::hex_string(&blake2b_hash(data)); + let hash: String = hex_util::encode_hex_string(&blake2b_hash(data)); Ok(format!("test sig\n\n\nhash={hash}").into_bytes()) }; diff --git a/lib/src/hex_util.rs b/lib/src/hex_util.rs index b278abb2c..c435e352b 100644 --- a/lib/src/hex_util.rs +++ b/lib/src/hex_util.rs @@ -81,6 +81,20 @@ pub fn common_hex_len(bytes_a: &[u8], bytes_b: &[u8]) -> usize { .unwrap_or_else(|| bytes_a.len().min(bytes_b.len()) * 2) } +pub fn encode_hex_string_reverse(src: &[u8]) -> String { + let mut buffer = vec![0; src.len() * 2]; + faster_hex::hex_encode(src, &mut buffer).unwrap(); + buffer + .iter_mut() + .for_each(|b| *b = to_reverse_hex_digit(*b)); + + String::from_utf8(buffer).unwrap() +} + +pub fn encode_hex_string(src: &[u8]) -> String { + faster_hex::hex_string(src) +} + #[cfg(test)] mod tests { use super::*; diff --git a/lib/src/object_id.rs b/lib/src/object_id.rs index b0136aa40..b03aa5430 100644 --- a/lib/src/object_id.rs +++ b/lib/src/object_id.rs @@ -105,7 +105,7 @@ macro_rules! impl_id_type { pub(crate) use id_type; pub(crate) use impl_id_type; -use crate::hex_util::decode_hex_string; +use crate::hex_util::{self, decode_hex_string}; /// An identifier prefix (typically from a type implementing the [`ObjectId`] /// trait) with facilities for converting between bytes and a hex string. @@ -141,7 +141,7 @@ impl HexPrefix { } pub fn hex(&self) -> String { - let mut hex_string = faster_hex::hex_string(&self.min_prefix_bytes); + let mut hex_string = hex_util::encode_hex_string(&self.min_prefix_bytes); if self.has_odd_byte { hex_string.pop().unwrap(); } diff --git a/lib/src/stacked_table.rs b/lib/src/stacked_table.rs index ff34eb649..f2a7ee71e 100644 --- a/lib/src/stacked_table.rs +++ b/lib/src/stacked_table.rs @@ -37,6 +37,7 @@ use tempfile::NamedTempFile; use thiserror::Error; use crate::file_util::persist_content_addressed_temp_file; +use crate::hex_util; use crate::lock::FileLock; pub trait TableSegment { @@ -333,7 +334,7 @@ impl MutableTable { let buf = self.maybe_squash_with_ancestors().serialize(); let mut hasher = Blake2b512::new(); hasher.update(&buf); - let file_id_hex = faster_hex::hex_string(&hasher.finalize()); + let file_id_hex = hex_util::encode_hex_string(&hasher.finalize()); let file_path = store.dir.join(&file_id_hex); let mut temp_file = NamedTempFile::new_in(&store.dir)?; diff --git a/lib/tests/test_git.rs b/lib/tests/test_git.rs index f12949a7e..9ff03c44e 100644 --- a/lib/tests/test_git.rs +++ b/lib/tests/test_git.rs @@ -44,6 +44,7 @@ use jj_lib::git::GitRefUpdate; use jj_lib::git::RefName; use jj_lib::git::SubmoduleConfig; use jj_lib::git_backend::GitBackend; +use jj_lib::hex_util; use jj_lib::object_id::ObjectId; use jj_lib::op_store::BookmarkTarget; use jj_lib::op_store::RefTarget; @@ -1381,8 +1382,8 @@ fn test_import_refs_missing_git_commit() { let commit1 = empty_git_commit(&git_repo, "refs/heads/main", &[]); let commit2 = empty_git_commit(&git_repo, "refs/heads/main", &[&commit1]); - let shard = faster_hex::hex_string(&commit1.id().as_bytes()[..1]); - let object_basename = faster_hex::hex_string(&commit1.id().as_bytes()[1..]); + let shard = hex_util::encode_hex_string(&commit1.id().as_bytes()[..1]); + let object_basename = hex_util::encode_hex_string(&commit1.id().as_bytes()[1..]); let object_store_path = git_repo.path().join("objects"); let object_file = object_store_path.join(&shard).join(object_basename); let backup_object_file = object_store_path.join(&shard).join("backup"); diff --git a/lib/testutils/src/test_signing_backend.rs b/lib/testutils/src/test_signing_backend.rs index 5a986ab30..d687844c4 100644 --- a/lib/testutils/src/test_signing_backend.rs +++ b/lib/testutils/src/test_signing_backend.rs @@ -1,4 +1,5 @@ use jj_lib::content_hash::blake2b_hash; +use jj_lib::hex_util; use jj_lib::signing::SigStatus; use jj_lib::signing::SignError; use jj_lib::signing::SignResult; @@ -25,7 +26,7 @@ impl SigningBackend for TestSigningBackend { body.extend_from_slice(key.as_bytes()); body.extend_from_slice(data); - let hash: String = faster_hex::hex_string(&blake2b_hash(&body)); + let hash: String = hex_util::encode_hex_string(&blake2b_hash(&body)); Ok(format!("{PREFIX}{key}\n{hash}").into_bytes()) } From 0ef4d228ee788432785c5e87811798fa5a86cbb0 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 26 Sep 2024 10:14:23 +0200 Subject: [PATCH 5/5] dep: Drop faster-hex dependency --- Cargo.lock | 1 - cli/src/commands/debug/tree.rs | 2 +- lib/Cargo.toml | 1 - lib/src/backend.rs | 1 - lib/src/content_hash.rs | 3 +- lib/src/git_backend.rs | 2 +- lib/src/hex_util.rs | 129 +++++++++++++++++++++++++-------- lib/src/object_id.rs | 10 +-- lib/src/simple_op_store.rs | 6 +- 9 files changed, 109 insertions(+), 46 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 77615a300..40326df0a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1881,7 +1881,6 @@ dependencies = [ "digest", "either", "esl01-renderdag", - "faster-hex", "futures 0.3.30", "git2", "gix", diff --git a/cli/src/commands/debug/tree.rs b/cli/src/commands/debug/tree.rs index 7c40d33b5..38e2647e0 100644 --- a/cli/src/commands/debug/tree.rs +++ b/cli/src/commands/debug/tree.rs @@ -47,7 +47,7 @@ pub fn cmd_debug_tree( let workspace_command = command.workspace_helper(ui)?; let tree = if let Some(tree_id_hex) = &args.id { let tree_id = - TreeId::try_from_hex(tree_id_hex).map_err(|_| user_error("Invalid tree id"))?; + TreeId::try_from_hex(tree_id_hex).ok_or_else(|| user_error("Invalid tree id"))?; let dir = if let Some(dir_str) = &args.dir { workspace_command.parse_file_path(dir_str)? } else { diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 8621b2ca4..4834c40e2 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -48,7 +48,6 @@ git2 = { workspace = true, optional = true } gix = { workspace = true, optional = true } gix-filter = { workspace = true, optional = true } glob = { workspace = true } -faster-hex = { workspace = true } ignore = { workspace = true } indexmap = { workspace = true } itertools = { workspace = true } diff --git a/lib/src/backend.rs b/lib/src/backend.rs index efc1d6b9c..a5786e5d5 100644 --- a/lib/src/backend.rs +++ b/lib/src/backend.rs @@ -26,7 +26,6 @@ use thiserror::Error; use crate::content_hash::ContentHash; use crate::hex_util; -use crate::hex_util::to_reverse_hex_digit; use crate::index::Index; use crate::merge::Merge; use crate::object_id::id_type; diff --git a/lib/src/content_hash.rs b/lib/src/content_hash.rs index 2ace85139..bbe923567 100644 --- a/lib/src/content_hash.rs +++ b/lib/src/content_hash.rs @@ -150,9 +150,8 @@ mod tests { use std::collections::BTreeMap; use std::collections::HashMap; - use crate::hex_util; - use super::*; + use crate::hex_util; #[test] fn test_string_sanity() { diff --git a/lib/src/git_backend.rs b/lib/src/git_backend.rs index fc2fb6764..84ba228f2 100644 --- a/lib/src/git_backend.rs +++ b/lib/src/git_backend.rs @@ -500,7 +500,7 @@ fn root_tree_from_header(git_commit: &CommitRef) -> Result, if *key == JJ_TREES_COMMIT_HEADER { let mut tree_ids = SmallVec::new(); for hex in str::from_utf8(value.as_ref()).or(Err(()))?.split(' ') { - let tree_id = TreeId::try_from_hex(hex).or(Err(()))?; + let tree_id = TreeId::try_from_hex(hex).ok_or(())?; if tree_id.as_bytes().len() != HASH_LENGTH { return Err(()); } diff --git a/lib/src/hex_util.rs b/lib/src/hex_util.rs index c435e352b..a7edeb5df 100644 --- a/lib/src/hex_util.rs +++ b/lib/src/hex_util.rs @@ -14,27 +14,23 @@ #![allow(missing_docs)] -pub fn to_reverse_hex_digit(b: u8) -> u8 { - let offset = match b { - b'0'..=b'9' => b - b'0', - b'A'..=b'F' => b - b'A' + 10, - b'a'..=b'f' => b - b'a' + 10, - b => return b, - }; - b'z' - offset +use std::iter; + +/// Converts a hexadecimal ASCII character to a 0-based index. +fn hex_to_relative(b: u8) -> Option { + match b { + b'0'..=b'9' => Some(b - b'0'), + b'A'..=b'F' => Some(b - b'A' + 10), + b'a'..=b'f' => Some(b - b'a' + 10), + _ => None, + } } -fn to_reverse_hex_digit_checked(b: u8) -> Option { - let value = match b { - b'0'..=b'9' => b - b'0', - b'A'..=b'F' => b - b'A' + 10, - b'a'..=b'f' => b - b'a' + 10, - _ => return None, - }; - Some(b'z' - value) +fn to_reverse_hex_digit(b: u8) -> Option { + Some(b'z' - hex_to_relative(b)?) } -fn to_forward_hex_digit_checked(b: u8) -> Option { +fn to_forward_hex_digit(b: u8) -> Option { let value = match b { b'k'..=b'z' => b'z' - b, b'K'..=b'Z' => b'Z' - b, @@ -47,25 +43,39 @@ fn to_forward_hex_digit_checked(b: u8) -> Option { } } +/// Transforms a reverse hex into a forward hex. +/// +/// If the reverse hex string contains non reverse hex characters the function +/// will return None. pub fn to_forward_hex(reverse_hex: &str) -> Option { reverse_hex .bytes() - .map(|b| to_forward_hex_digit_checked(b).map(char::from)) + .map(|b| to_forward_hex_digit(b).map(char::from)) .collect() } +/// Transforms a forward hex into a reverse hex. +/// +/// If the forward hex string contains non forward hex characters the function +/// will return None. pub fn to_reverse_hex(forward_hex: &str) -> Option { forward_hex .bytes() - .map(|b| to_reverse_hex_digit_checked(b).map(char::from)) + .map(|b| to_reverse_hex_digit(b).map(char::from)) .collect() } -pub fn decode_hex_string(hex: &str) -> Option> { - let mut dst = vec![0; hex.len() / 2]; - faster_hex::hex_decode(hex.as_bytes(), &mut dst) - .ok() - .map(|()| dst) +pub fn decode_hex_string(src: &str) -> Option> { + if src.len() % 2 != 0 { + return None; + } + let mut dst = vec![0; src.len() / 2]; + for (slot, bytes) in iter::zip(&mut dst, src.as_bytes().chunks_exact(2)) { + let a = hex_to_relative(bytes[0])? << 4; + let b = hex_to_relative(bytes[1])?; + *slot = a | b; + } + Some(dst) } /// Calculates common prefix length of two byte sequences. The length @@ -82,23 +92,80 @@ pub fn common_hex_len(bytes_a: &[u8], bytes_b: &[u8]) -> usize { } pub fn encode_hex_string_reverse(src: &[u8]) -> String { - let mut buffer = vec![0; src.len() * 2]; - faster_hex::hex_encode(src, &mut buffer).unwrap(); - buffer - .iter_mut() - .for_each(|b| *b = to_reverse_hex_digit(*b)); + let mut dst = vec![0; src.len() * 2]; + for (&src, dst) in src.iter().zip(dst.chunks_exact_mut(2)) { + dst[0] = hex_lower_reverse((src >> 4) & 0xf); + dst[1] = hex_lower_reverse(src & 0xf); + } + String::from_utf8(dst).expect("hex_lower_reverse emits ascii character bytes") +} - String::from_utf8(buffer).unwrap() +fn hex_lower_reverse(byte: u8) -> u8 { + static TABLE: &[u8] = b"zyxwvutsrqponmlk"; + TABLE[byte as usize] } pub fn encode_hex_string(src: &[u8]) -> String { - faster_hex::hex_string(src) + let mut dst = vec![0; src.len() * 2]; + for (&src, dst) in src.iter().zip(dst.chunks_exact_mut(2)) { + dst[0] = hex_lower((src >> 4) & 0xf); + dst[1] = hex_lower(src & 0xf); + } + String::from_utf8(dst).expect("hex_lower emits ascii character bytes") +} + +fn hex_lower(byte: u8) -> u8 { + static TABLE: &[u8] = b"0123456789abcdef"; + TABLE[byte as usize] } #[cfg(test)] mod tests { use super::*; + #[test] + fn test_common_hex_len() { + assert_eq!(common_hex_len(b"", b""), 0); + assert_eq!(common_hex_len(b"abc", b"abc"), 6); + + assert_eq!(common_hex_len(b"aaa", b"bbb"), 1); + assert_eq!(common_hex_len(b"aab", b"aac"), 5); + } + + #[test] + fn test_encode_hex_string() { + assert_eq!(&encode_hex_string(b""), ""); + assert_eq!(&encode_hex_string(b"012"), "303132"); + assert_eq!(&encode_hex_string(b"0123"), "30313233"); + assert_eq!(&encode_hex_string(b"abdz"), "6162647a"); + } + + #[test] + fn test_encode_hex_string_reverse() { + assert_eq!(&encode_hex_string_reverse(b""), ""); + assert_eq!(&encode_hex_string_reverse(b"012"), "wzwywx"); + assert_eq!(&encode_hex_string_reverse(b"0123"), "wzwywxww"); + assert_eq!(&encode_hex_string_reverse(b"abdz"), "tytxtvsp"); + } + + #[test] + fn test_decode_hex_string() { + // Empty string + assert_eq!(decode_hex_string(""), Some(vec![])); + + // Odd number of digits + assert_eq!(decode_hex_string("0"), None); + + // Invalid digit + assert_eq!(decode_hex_string("g0"), None); + assert_eq!(decode_hex_string("0g"), None); + + assert_eq!( + decode_hex_string("0123456789abcdefABCDEF"), + Some(b"\x01\x23\x45\x67\x89\xab\xcd\xef\xAB\xCD\xEF".to_vec()) + ); + } + #[test] fn test_reverse_hex() { // Empty string diff --git a/lib/src/object_id.rs b/lib/src/object_id.rs index b03aa5430..90450272f 100644 --- a/lib/src/object_id.rs +++ b/lib/src/object_id.rs @@ -66,9 +66,8 @@ macro_rules! impl_id_type { } /// Parses the given hex string into an ObjectId. - pub fn try_from_hex(hex: &str) -> Result { - let mut dst = vec![0; hex.len() / 2]; - faster_hex::hex_decode(hex.as_bytes(), &mut dst).map(|()| Self(dst)) + pub fn try_from_hex(hex: &str) -> Option { + $crate::hex_util::decode_hex_string(hex).map(Self) } } @@ -96,7 +95,7 @@ macro_rules! impl_id_type { } fn hex(&self) -> String { - faster_hex::hex_string(&self.0) + $crate::hex_util::encode_hex_string(&self.0) } } }; @@ -105,7 +104,8 @@ macro_rules! impl_id_type { pub(crate) use id_type; pub(crate) use impl_id_type; -use crate::hex_util::{self, decode_hex_string}; +use crate::hex_util::decode_hex_string; +use crate::hex_util::{self}; /// An identifier prefix (typically from a type implementing the [`ObjectId`] /// trait) with facilities for converting between bytes and a hex string. diff --git a/lib/src/simple_op_store.rs b/lib/src/simple_op_store.rs index 1cadb9ce9..c76e49dfd 100644 --- a/lib/src/simple_op_store.rs +++ b/lib/src/simple_op_store.rs @@ -228,7 +228,7 @@ impl OpStore for SimpleOpStore { if !name.starts_with(&hex_prefix) { continue; } - let Ok(id) = OperationId::try_from_hex(&name) else { + let Some(id) = OperationId::try_from_hex(&name) else { continue; // Skip invalid hex }; if matched.is_some() { @@ -251,11 +251,11 @@ impl OpStore for SimpleOpStore { fn gc(&self, head_ids: &[OperationId], keep_newer: SystemTime) -> OpStoreResult<()> { let to_op_id = |entry: &fs::DirEntry| -> Option { let name = entry.file_name().into_string().ok()?; - OperationId::try_from_hex(&name).ok() + OperationId::try_from_hex(&name) }; let to_view_id = |entry: &fs::DirEntry| -> Option { let name = entry.file_name().into_string().ok()?; - ViewId::try_from_hex(&name).ok() + ViewId::try_from_hex(&name) }; let remove_file_if_not_new = |entry: &fs::DirEntry| -> Result<(), PathError> { let path = entry.path();