diff --git a/Cargo.lock b/Cargo.lock index 32b243e84..0fadeb370 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1631,12 +1631,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - [[package]] name = "home" version = "0.5.9" @@ -1839,6 +1833,7 @@ dependencies = [ "dirs", "dunce", "esl01-renderdag", + "faster-hex", "futures 0.3.30", "git2", "gix", @@ -1897,7 +1892,6 @@ dependencies = [ "gix", "gix-filter", "glob", - "hex", "ignore", "indexmap", "indoc", @@ -3109,9 +3103,9 @@ version = "0.22.0" dependencies = [ "async-trait", "config", + "faster-hex", "futures 0.3.30", "git2", - "hex", "itertools 0.13.0", "jj-lib", "pollster", diff --git a/Cargo.toml b/Cargo.toml index 09a4d1cd2..197078fde 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,7 +66,7 @@ gix = { version = "0.66.0", default-features = false, features = [ ] } gix-filter = "0.13.0" glob = "0.3.1" -hex = "0.4.3" +faster-hex = { version = "0.9.0", default-features = false, features = ["std"]} ignore = "0.4.23" indexmap = "2.6.0" indoc = "2.0.4" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index ef261d178..ecafc4e0b 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -64,6 +64,7 @@ esl01-renderdag = { workspace = true } futures = { workspace = true } git2 = { workspace = true } gix = { workspace = true } +faster-hex = { workspace = true } indexmap = { workspace = true } indoc = { workspace = true } itertools = { workspace = true } diff --git a/cli/src/cli_util.rs b/cli/src/cli_util.rs index 273acc4fa..7a3c93da1 100644 --- a/cli/src/cli_util.rs +++ b/cli/src/cli_util.rs @@ -64,7 +64,6 @@ use jj_lib::git; use jj_lib::git_backend::GitBackend; use jj_lib::gitignore::GitIgnoreError; use jj_lib::gitignore::GitIgnoreFile; -use jj_lib::hex_util::to_reverse_hex; use jj_lib::id_prefix::IdPrefixContext; use jj_lib::matchers::Matcher; use jj_lib::merge::MergedTreeValue; @@ -2650,17 +2649,21 @@ pub fn edit_temp_file( } pub fn short_commit_hash(commit_id: &CommitId) -> String { - commit_id.hex()[0..12].to_string() + let mut hash = commit_id.hex(); + hash.truncate(12); + hash } pub fn short_change_hash(change_id: &ChangeId) -> String { - // TODO: We could avoid the unwrap() and make this more efficient by converting - // straight from binary. - to_reverse_hex(&change_id.hex()[0..12]).unwrap() + let mut hash = change_id.reverse_hex(); + hash.truncate(12); + hash } pub fn short_operation_hash(operation_id: &OperationId) -> String { - operation_id.hex()[0..12].to_string() + let mut hash = operation_id.hex(); + hash.truncate(12); + hash } /// Wrapper around a `DiffEditor` to conditionally start interactive session. diff --git a/cli/src/commands/debug/tree.rs b/cli/src/commands/debug/tree.rs index 7c40d33b5..38e2647e0 100644 --- a/cli/src/commands/debug/tree.rs +++ b/cli/src/commands/debug/tree.rs @@ -47,7 +47,7 @@ pub fn cmd_debug_tree( let workspace_command = command.workspace_helper(ui)?; let tree = if let Some(tree_id_hex) = &args.id { let tree_id = - TreeId::try_from_hex(tree_id_hex).map_err(|_| user_error("Invalid tree id"))?; + TreeId::try_from_hex(tree_id_hex).ok_or_else(|| user_error("Invalid tree id"))?; let dir = if let Some(dir_str) = &args.dir { workspace_command.parse_file_path(dir_str)? } else { diff --git a/cli/src/commit_templater.rs b/cli/src/commit_templater.rs index d52dbcd0c..95c531df7 100644 --- a/cli/src/commit_templater.rs +++ b/cli/src/commit_templater.rs @@ -31,7 +31,6 @@ use jj_lib::fileset; use jj_lib::fileset::FilesetDiagnostics; use jj_lib::fileset::FilesetExpression; use jj_lib::git; -use jj_lib::hex_util::to_reverse_hex; use jj_lib::id_prefix::IdPrefixContext; use jj_lib::matchers::Matcher; use jj_lib::merged_tree::MergedTree; @@ -1248,11 +1247,7 @@ impl CommitOrChangeId { pub fn hex(&self) -> String { match self { CommitOrChangeId::Commit(id) => id.hex(), - CommitOrChangeId::Change(id) => { - // TODO: We can avoid the unwrap() and make this more efficient by converting - // straight from bytes. - to_reverse_hex(&id.hex()).unwrap() - } + CommitOrChangeId::Change(id) => id.reverse_hex(), } } diff --git a/lib/Cargo.toml b/lib/Cargo.toml index e45e45f88..4834c40e2 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -48,7 +48,6 @@ git2 = { workspace = true, optional = true } gix = { workspace = true, optional = true } gix-filter = { workspace = true, optional = true } glob = { workspace = true } -hex = { workspace = true } ignore = { workspace = true } indexmap = { workspace = true } itertools = { workspace = true } diff --git a/lib/src/backend.rs b/lib/src/backend.rs index 32b04ae26..a5786e5d5 100644 --- a/lib/src/backend.rs +++ b/lib/src/backend.rs @@ -25,6 +25,7 @@ use futures::stream::BoxStream; use thiserror::Error; use crate::content_hash::ContentHash; +use crate::hex_util; use crate::index::Index; use crate::merge::Merge; use crate::object_id::id_type; @@ -50,6 +51,12 @@ id_type!(pub FileId); id_type!(pub SymlinkId); id_type!(pub ConflictId); +impl ChangeId { + pub fn reverse_hex(&self) -> String { + hex_util::encode_hex_string_reverse(&self.0) + } +} + #[derive(ContentHash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)] pub struct MillisSinceEpoch(pub i64); diff --git a/lib/src/content_hash.rs b/lib/src/content_hash.rs index e3918589a..bbe923567 100644 --- a/lib/src/content_hash.rs +++ b/lib/src/content_hash.rs @@ -151,6 +151,7 @@ mod tests { use std::collections::HashMap; use super::*; + use crate::hex_util; #[test] fn test_string_sanity() { @@ -215,7 +216,7 @@ mod tests { x: Vec>, y: i64, } - let foo_hash = hex::encode(hash(&Foo { + let foo_hash = hex_util::encode_hex_string(&hash(&Foo { x: vec![None, Some(42)], y: 17, })); @@ -231,7 +232,7 @@ mod tests { y: Y, } assert_eq!( - hex::encode(hash(&GenericFoo { + hex_util::encode_hex_string(&hash(&GenericFoo { x: vec![None, Some(42)], y: 17i64 })), diff --git a/lib/src/default_index/mutable.rs b/lib/src/default_index/mutable.rs index 353550dbe..c6d2998d9 100644 --- a/lib/src/default_index/mutable.rs +++ b/lib/src/default_index/mutable.rs @@ -48,6 +48,7 @@ use crate::backend::ChangeId; use crate::backend::CommitId; use crate::commit::Commit; use crate::file_util::persist_content_addressed_temp_file; +use crate::hex_util; use crate::index::AllHeadsForGcUnsupported; use crate::index::ChangeIdIndex; use crate::index::Index; @@ -359,7 +360,7 @@ impl MutableIndexSegment { self.serialize_local_entries(&mut buf); let mut hasher = Blake2b512::new(); hasher.update(&buf); - let index_file_id_hex = hex::encode(hasher.finalize()); + let index_file_id_hex = hex_util::encode_hex_string(&hasher.finalize()); let index_file_path = dir.join(&index_file_id_hex); let mut temp_file = NamedTempFile::new_in(dir)?; diff --git a/lib/src/git_backend.rs b/lib/src/git_backend.rs index 7d2b267e9..84ba228f2 100644 --- a/lib/src/git_backend.rs +++ b/lib/src/git_backend.rs @@ -500,7 +500,7 @@ fn root_tree_from_header(git_commit: &CommitRef) -> Result, if *key == JJ_TREES_COMMIT_HEADER { let mut tree_ids = SmallVec::new(); for hex in str::from_utf8(value.as_ref()).or(Err(()))?.split(' ') { - let tree_id = TreeId::try_from_hex(hex).or(Err(()))?; + let tree_id = TreeId::try_from_hex(hex).ok_or(())?; if tree_id.as_bytes().len() != HASH_LENGTH { return Err(()); } @@ -1505,19 +1505,19 @@ fn tree_value_from_json(json: &serde_json::Value) -> TreeValue { } fn bytes_vec_from_json(value: &serde_json::Value) -> Vec { - hex::decode(value.as_str().unwrap()).unwrap() + crate::hex_util::decode_hex_string(value.as_str().unwrap()).unwrap() } #[cfg(test)] mod tests { use assert_matches::assert_matches; use git2::Oid; - use hex::ToHex; use pollster::FutureExt; use test_case::test_case; use super::*; use crate::content_hash::blake2b_hash; + use crate::hex_util; #[test_case(false; "legacy tree format")] #[test_case(true; "tree-level conflict format")] @@ -2133,7 +2133,7 @@ mod tests { }; let mut signer = |data: &_| { - let hash: String = blake2b_hash(data).encode_hex(); + let hash: String = hex_util::encode_hex_string(&blake2b_hash(data)); Ok(format!("test sig\n\n\nhash={hash}").into_bytes()) }; diff --git a/lib/src/hex_util.rs b/lib/src/hex_util.rs index 48517b38f..a7edeb5df 100644 --- a/lib/src/hex_util.rs +++ b/lib/src/hex_util.rs @@ -14,14 +14,20 @@ #![allow(missing_docs)] +use std::iter; + +/// Converts a hexadecimal ASCII character to a 0-based index. +fn hex_to_relative(b: u8) -> Option { + match b { + b'0'..=b'9' => Some(b - b'0'), + b'A'..=b'F' => Some(b - b'A' + 10), + b'a'..=b'f' => Some(b - b'a' + 10), + _ => None, + } +} + fn to_reverse_hex_digit(b: u8) -> Option { - let value = match b { - b'0'..=b'9' => b - b'0', - b'A'..=b'F' => b - b'A' + 10, - b'a'..=b'f' => b - b'a' + 10, - _ => return None, - }; - Some(b'z' - value) + Some(b'z' - hex_to_relative(b)?) } fn to_forward_hex_digit(b: u8) -> Option { @@ -37,6 +43,10 @@ fn to_forward_hex_digit(b: u8) -> Option { } } +/// Transforms a reverse hex into a forward hex. +/// +/// If the reverse hex string contains non reverse hex characters the function +/// will return None. pub fn to_forward_hex(reverse_hex: &str) -> Option { reverse_hex .bytes() @@ -44,6 +54,10 @@ pub fn to_forward_hex(reverse_hex: &str) -> Option { .collect() } +/// Transforms a forward hex into a reverse hex. +/// +/// If the forward hex string contains non forward hex characters the function +/// will return None. pub fn to_reverse_hex(forward_hex: &str) -> Option { forward_hex .bytes() @@ -51,6 +65,19 @@ pub fn to_reverse_hex(forward_hex: &str) -> Option { .collect() } +pub fn decode_hex_string(src: &str) -> Option> { + if src.len() % 2 != 0 { + return None; + } + let mut dst = vec![0; src.len() / 2]; + for (slot, bytes) in iter::zip(&mut dst, src.as_bytes().chunks_exact(2)) { + let a = hex_to_relative(bytes[0])? << 4; + let b = hex_to_relative(bytes[1])?; + *slot = a | b; + } + Some(dst) +} + /// Calculates common prefix length of two byte sequences. The length /// to be returned is a number of hexadecimal digits. pub fn common_hex_len(bytes_a: &[u8], bytes_b: &[u8]) -> usize { @@ -64,10 +91,81 @@ pub fn common_hex_len(bytes_a: &[u8], bytes_b: &[u8]) -> usize { .unwrap_or_else(|| bytes_a.len().min(bytes_b.len()) * 2) } +pub fn encode_hex_string_reverse(src: &[u8]) -> String { + let mut dst = vec![0; src.len() * 2]; + for (&src, dst) in src.iter().zip(dst.chunks_exact_mut(2)) { + dst[0] = hex_lower_reverse((src >> 4) & 0xf); + dst[1] = hex_lower_reverse(src & 0xf); + } + String::from_utf8(dst).expect("hex_lower_reverse emits ascii character bytes") +} + +fn hex_lower_reverse(byte: u8) -> u8 { + static TABLE: &[u8] = b"zyxwvutsrqponmlk"; + TABLE[byte as usize] +} + +pub fn encode_hex_string(src: &[u8]) -> String { + let mut dst = vec![0; src.len() * 2]; + for (&src, dst) in src.iter().zip(dst.chunks_exact_mut(2)) { + dst[0] = hex_lower((src >> 4) & 0xf); + dst[1] = hex_lower(src & 0xf); + } + String::from_utf8(dst).expect("hex_lower emits ascii character bytes") +} + +fn hex_lower(byte: u8) -> u8 { + static TABLE: &[u8] = b"0123456789abcdef"; + TABLE[byte as usize] +} + #[cfg(test)] mod tests { use super::*; + #[test] + fn test_common_hex_len() { + assert_eq!(common_hex_len(b"", b""), 0); + assert_eq!(common_hex_len(b"abc", b"abc"), 6); + + assert_eq!(common_hex_len(b"aaa", b"bbb"), 1); + assert_eq!(common_hex_len(b"aab", b"aac"), 5); + } + + #[test] + fn test_encode_hex_string() { + assert_eq!(&encode_hex_string(b""), ""); + assert_eq!(&encode_hex_string(b"012"), "303132"); + assert_eq!(&encode_hex_string(b"0123"), "30313233"); + assert_eq!(&encode_hex_string(b"abdz"), "6162647a"); + } + + #[test] + fn test_encode_hex_string_reverse() { + assert_eq!(&encode_hex_string_reverse(b""), ""); + assert_eq!(&encode_hex_string_reverse(b"012"), "wzwywx"); + assert_eq!(&encode_hex_string_reverse(b"0123"), "wzwywxww"); + assert_eq!(&encode_hex_string_reverse(b"abdz"), "tytxtvsp"); + } + + #[test] + fn test_decode_hex_string() { + // Empty string + assert_eq!(decode_hex_string(""), Some(vec![])); + + // Odd number of digits + assert_eq!(decode_hex_string("0"), None); + + // Invalid digit + assert_eq!(decode_hex_string("g0"), None); + assert_eq!(decode_hex_string("0g"), None); + + assert_eq!( + decode_hex_string("0123456789abcdefABCDEF"), + Some(b"\x01\x23\x45\x67\x89\xab\xcd\xef\xAB\xCD\xEF".to_vec()) + ); + } + #[test] fn test_reverse_hex() { // Empty string diff --git a/lib/src/object_id.rs b/lib/src/object_id.rs index 061b68bf1..90450272f 100644 --- a/lib/src/object_id.rs +++ b/lib/src/object_id.rs @@ -66,8 +66,8 @@ macro_rules! impl_id_type { } /// Parses the given hex string into an ObjectId. - pub fn try_from_hex(hex: &str) -> Result { - hex::decode(hex).map(Self) + pub fn try_from_hex(hex: &str) -> Option { + $crate::hex_util::decode_hex_string(hex).map(Self) } } @@ -95,7 +95,7 @@ macro_rules! impl_id_type { } fn hex(&self) -> String { - hex::encode(&self.0) + $crate::hex_util::encode_hex_string(&self.0) } } }; @@ -104,6 +104,9 @@ macro_rules! impl_id_type { pub(crate) use id_type; pub(crate) use impl_id_type; +use crate::hex_util::decode_hex_string; +use crate::hex_util::{self}; + /// An identifier prefix (typically from a type implementing the [`ObjectId`] /// trait) with facilities for converting between bytes and a hex string. #[derive(Debug, Clone, PartialEq, Eq)] @@ -120,9 +123,9 @@ impl HexPrefix { pub fn new(prefix: &str) -> Option { let has_odd_byte = prefix.len() & 1 != 0; let min_prefix_bytes = if has_odd_byte { - hex::decode(prefix.to_owned() + "0").ok()? + decode_hex_string(&format!("{prefix}0"))? } else { - hex::decode(prefix).ok()? + decode_hex_string(prefix)? }; Some(HexPrefix { min_prefix_bytes, @@ -138,7 +141,7 @@ impl HexPrefix { } pub fn hex(&self) -> String { - let mut hex_string = hex::encode(&self.min_prefix_bytes); + let mut hex_string = hex_util::encode_hex_string(&self.min_prefix_bytes); if self.has_odd_byte { hex_string.pop().unwrap(); } diff --git a/lib/src/simple_op_heads_store.rs b/lib/src/simple_op_heads_store.rs index e4b05a1c1..d8e59b2f9 100644 --- a/lib/src/simple_op_heads_store.rs +++ b/lib/src/simple_op_heads_store.rs @@ -21,6 +21,7 @@ use std::fs; use std::path::Path; use std::path::PathBuf; +use crate::hex_util::decode_hex_string; use crate::lock::FileLock; use crate::object_id::ObjectId; use crate::op_heads_store::OpHeadsStore; @@ -96,7 +97,7 @@ impl OpHeadsStore for SimpleOpHeadsStore { for op_head_entry in std::fs::read_dir(&self.dir).unwrap() { let op_head_file_name = op_head_entry.unwrap().file_name(); let op_head_file_name = op_head_file_name.to_str().unwrap(); - if let Ok(op_head) = hex::decode(op_head_file_name) { + if let Some(op_head) = decode_hex_string(op_head_file_name) { op_heads.push(OperationId::new(op_head)); } } diff --git a/lib/src/simple_op_store.rs b/lib/src/simple_op_store.rs index 1cadb9ce9..c76e49dfd 100644 --- a/lib/src/simple_op_store.rs +++ b/lib/src/simple_op_store.rs @@ -228,7 +228,7 @@ impl OpStore for SimpleOpStore { if !name.starts_with(&hex_prefix) { continue; } - let Ok(id) = OperationId::try_from_hex(&name) else { + let Some(id) = OperationId::try_from_hex(&name) else { continue; // Skip invalid hex }; if matched.is_some() { @@ -251,11 +251,11 @@ impl OpStore for SimpleOpStore { fn gc(&self, head_ids: &[OperationId], keep_newer: SystemTime) -> OpStoreResult<()> { let to_op_id = |entry: &fs::DirEntry| -> Option { let name = entry.file_name().into_string().ok()?; - OperationId::try_from_hex(&name).ok() + OperationId::try_from_hex(&name) }; let to_view_id = |entry: &fs::DirEntry| -> Option { let name = entry.file_name().into_string().ok()?; - ViewId::try_from_hex(&name).ok() + ViewId::try_from_hex(&name) }; let remove_file_if_not_new = |entry: &fs::DirEntry| -> Result<(), PathError> { let path = entry.path(); diff --git a/lib/src/stacked_table.rs b/lib/src/stacked_table.rs index d88e05366..f2a7ee71e 100644 --- a/lib/src/stacked_table.rs +++ b/lib/src/stacked_table.rs @@ -37,6 +37,7 @@ use tempfile::NamedTempFile; use thiserror::Error; use crate::file_util::persist_content_addressed_temp_file; +use crate::hex_util; use crate::lock::FileLock; pub trait TableSegment { @@ -333,7 +334,7 @@ impl MutableTable { let buf = self.maybe_squash_with_ancestors().serialize(); let mut hasher = Blake2b512::new(); hasher.update(&buf); - let file_id_hex = hex::encode(hasher.finalize()); + let file_id_hex = hex_util::encode_hex_string(&hasher.finalize()); let file_path = store.dir.join(&file_id_hex); let mut temp_file = NamedTempFile::new_in(&store.dir)?; diff --git a/lib/tests/test_git.rs b/lib/tests/test_git.rs index d9106f994..9ff03c44e 100644 --- a/lib/tests/test_git.rs +++ b/lib/tests/test_git.rs @@ -44,6 +44,7 @@ use jj_lib::git::GitRefUpdate; use jj_lib::git::RefName; use jj_lib::git::SubmoduleConfig; use jj_lib::git_backend::GitBackend; +use jj_lib::hex_util; use jj_lib::object_id::ObjectId; use jj_lib::op_store::BookmarkTarget; use jj_lib::op_store::RefTarget; @@ -1381,8 +1382,8 @@ fn test_import_refs_missing_git_commit() { let commit1 = empty_git_commit(&git_repo, "refs/heads/main", &[]); let commit2 = empty_git_commit(&git_repo, "refs/heads/main", &[&commit1]); - let shard = hex::encode(&commit1.id().as_bytes()[..1]); - let object_basename = hex::encode(&commit1.id().as_bytes()[1..]); + let shard = hex_util::encode_hex_string(&commit1.id().as_bytes()[..1]); + let object_basename = hex_util::encode_hex_string(&commit1.id().as_bytes()[1..]); let object_store_path = git_repo.path().join("objects"); let object_file = object_store_path.join(&shard).join(object_basename); let backup_object_file = object_store_path.join(&shard).join("backup"); diff --git a/lib/tests/test_revset.rs b/lib/tests/test_revset.rs index 3034c1293..6b71f0375 100644 --- a/lib/tests/test_revset.rs +++ b/lib/tests/test_revset.rs @@ -275,22 +275,22 @@ fn test_resolve_symbol_change_id(readonly: bool) { // Test the test setup assert_eq!( - hex::encode(git_commit_ids[0]), + &format!("{}", git_commit_ids[0]), // "04e12a5467bba790efb88a9870894ec208b16bf1" reversed "8fd68d104372910e19511df709e5dde62a548720" ); assert_eq!( - hex::encode(git_commit_ids[1]), + &format!("{}", git_commit_ids[1]), // "040b3ba3a51d8edbc4c5855cbd09de71d4c29cca" reversed "5339432b8e7b90bd3aa1a323db71b8a5c5dcd020" ); assert_eq!( - hex::encode(git_commit_ids[2]), + &format!("{}", git_commit_ids[2]), // "04e1c7082e4e34f3f371d8a1a46770b861b9b547" reversed "e2ad9d861d0ee625851b8ecfcf2c727410e38720" ); assert_eq!( - hex::encode(git_commit_ids[3]), + &format!("{}", git_commit_ids[3]), // "911d7e52fd5ba04b8f289e14c3d30b52d38c0020" reversed "040031cb4ad0cbc3287914f1d205dabf4a7eb889" ); diff --git a/lib/testutils/Cargo.toml b/lib/testutils/Cargo.toml index 62b0ed866..2f065ae23 100644 --- a/lib/testutils/Cargo.toml +++ b/lib/testutils/Cargo.toml @@ -19,7 +19,7 @@ async-trait = { workspace = true } config = { workspace = true } futures = { workspace = true } git2 = { workspace = true } -hex = { workspace = true } +faster-hex = { workspace = true } itertools = { workspace = true } jj-lib = { workspace = true, features = ["testing"] } pollster = { workspace = true } diff --git a/lib/testutils/src/test_signing_backend.rs b/lib/testutils/src/test_signing_backend.rs index 8d1d3369b..d687844c4 100644 --- a/lib/testutils/src/test_signing_backend.rs +++ b/lib/testutils/src/test_signing_backend.rs @@ -1,5 +1,5 @@ -use hex::ToHex; use jj_lib::content_hash::blake2b_hash; +use jj_lib::hex_util; use jj_lib::signing::SigStatus; use jj_lib::signing::SignError; use jj_lib::signing::SignResult; @@ -26,7 +26,7 @@ impl SigningBackend for TestSigningBackend { body.extend_from_slice(key.as_bytes()); body.extend_from_slice(data); - let hash: String = blake2b_hash(&body).encode_hex(); + let hash: String = hex_util::encode_hex_string(&blake2b_hash(&body)); Ok(format!("{PREFIX}{key}\n{hash}").into_bytes()) }