diff --git a/ast/src/lang/core/pattern.rs b/ast/src/lang/core/pattern.rs index 9ae9c5db9b..41eb8b17be 100644 --- a/ast/src/lang/core/pattern.rs +++ b/ast/src/lang/core/pattern.rs @@ -516,7 +516,7 @@ pub fn symbols_from_pattern(pool: &Pool, initial: &Pattern2) -> Vec { pub fn get_identifier_string(pattern: &Pattern2, interns: &Interns) -> ASTResult { match pattern { - Pattern2::Identifier(symbol) => Ok(symbol.ident_str(interns).to_string()), + Pattern2::Identifier(symbol) => Ok(symbol.as_str(interns).to_string()), other => UnexpectedPattern2Variant { required_pattern2: "Identifier".to_string(), encountered_pattern2: format!("{:?}", other), diff --git a/ast/src/lang/scope.rs b/ast/src/lang/scope.rs index 26b82b25d7..90046e4b40 100644 --- a/ast/src/lang/scope.rs +++ b/ast/src/lang/scope.rs @@ -245,7 +245,7 @@ impl Scope { // use that existing IdentId. Otherwise, create a fresh one. let ident_id = match exposed_ident_ids.get_id(&ident) { Some(ident_id) => ident_id, - None => all_ident_ids.add(ident.clone().into()), + None => all_ident_ids.add(&ident), }; let symbol = Symbol::new(self.home, ident_id); @@ -262,7 +262,7 @@ impl Scope { /// /// Used for record guards like { x: Just _ } pub fn ignore(&mut self, ident: Ident, all_ident_ids: &mut IdentIds) -> Symbol { - let ident_id = all_ident_ids.add(ident.into()); + let ident_id = all_ident_ids.add(&ident); Symbol::new(self.home, ident_id) } diff --git a/compiler/can/src/pattern.rs b/compiler/can/src/pattern.rs index 7fb54af2ea..d3ebcf3118 100644 --- a/compiler/can/src/pattern.rs +++ b/compiler/can/src/pattern.rs @@ -501,7 +501,7 @@ pub fn canonicalize_pattern<'a>( RequiredField(label, loc_guard) => { // a guard does not introduce the label into scope! - let symbol = scope.ignore(label.into(), &mut env.ident_ids); + let symbol = scope.ignore(&Ident::from(label), &mut env.ident_ids); let can_guard = canonicalize_pattern( env, var_store, diff --git a/compiler/can/src/scope.rs b/compiler/can/src/scope.rs index b45b7eea6c..2798957571 100644 --- a/compiler/can/src/scope.rs +++ b/compiler/can/src/scope.rs @@ -225,7 +225,7 @@ impl Scope { region, }; - let ident_id = all_ident_ids.add(ident.clone()); + let ident_id = all_ident_ids.add(&ident); let symbol = Symbol::new(self.home, ident_id); self.symbols.insert(symbol, region); @@ -273,7 +273,7 @@ impl Scope { ) -> Result<(Symbol, Option), (Region, Loc, Symbol)> { match self.idents.get(&ident) { Some(&(original_symbol, original_region)) => { - let shadow_ident_id = all_ident_ids.add(ident.clone()); + let shadow_ident_id = all_ident_ids.add(&ident); let shadow_symbol = Symbol::new(self.home, shadow_ident_id); self.symbols.insert(shadow_symbol, region); @@ -315,7 +315,7 @@ impl Scope { // use that existing IdentId. Otherwise, create a fresh one. let ident_id = match exposed_ident_ids.get_id(&ident) { Some(ident_id) => ident_id, - None => all_ident_ids.add(ident.clone()), + None => all_ident_ids.add(&ident), }; let symbol = Symbol::new(self.home, ident_id); @@ -329,7 +329,7 @@ impl Scope { /// Ignore an identifier. /// /// Used for record guards like { x: Just _ } - pub fn ignore(&mut self, ident: Ident, all_ident_ids: &mut IdentIds) -> Symbol { + pub fn ignore(&mut self, ident: &Ident, all_ident_ids: &mut IdentIds) -> Symbol { let ident_id = all_ident_ids.add(ident); Symbol::new(self.home, ident_id) } diff --git a/compiler/gen_wasm/src/backend.rs b/compiler/gen_wasm/src/backend.rs index 0ad99742cb..7b5162c8b1 100644 --- a/compiler/gen_wasm/src/backend.rs +++ b/compiler/gen_wasm/src/backend.rs @@ -183,7 +183,7 @@ impl<'a> WasmBackend<'a> { .get_mut(&self.env.module_id) .unwrap(); - let ident_id = ident_ids.add(Ident::from(debug_name)); + let ident_id = ident_ids.add(&Ident::from(debug_name)); Symbol::new(self.env.module_id, ident_id) } diff --git a/compiler/load_internal/src/file.rs b/compiler/load_internal/src/file.rs index 6fcb0785ae..20584442b3 100644 --- a/compiler/load_internal/src/file.rs +++ b/compiler/load_internal/src/file.rs @@ -446,7 +446,7 @@ impl LoadedModule { pub fn exposed_values_str(&self) -> Vec<&str> { self.exposed_values .iter() - .map(|symbol| symbol.ident_str(&self.interns).as_str()) + .map(|symbol| symbol.as_str(&self.interns)) .collect() } } diff --git a/compiler/module/src/ident.rs b/compiler/module/src/ident.rs index 5335cad281..0af20224ee 100644 --- a/compiler/module/src/ident.rs +++ b/compiler/module/src/ident.rs @@ -10,6 +10,10 @@ impl Ident { pub fn as_inline_str(&self) -> &IdentStr { &self.0 } + + pub fn as_str(&self) -> &str { + self.0.as_str() + } } pub struct QualifiedModuleName<'a> { diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs index 86648dc9b9..1b36f1b2da 100644 --- a/compiler/module/src/symbol.rs +++ b/compiler/module/src/symbol.rs @@ -68,10 +68,6 @@ impl Symbol { } pub fn as_str(self, interns: &Interns) -> &str { - self.ident_str(interns).as_str() - } - - pub fn ident_str(self, interns: &Interns) -> &IdentStr { let ident_ids = interns .all_ident_ids .get(&self.module_id()) @@ -83,16 +79,13 @@ impl Symbol { ) }); - ident_ids - .get_name(self.ident_id()) - .unwrap_or_else(|| { - panic!( - "ident_string's IdentIds did not contain an entry for {} in module {:?}", - self.ident_id().0, - self.module_id() - ) - }) - .into() + ident_ids.get_name(self.ident_id()).unwrap_or_else(|| { + panic!( + "ident_string's IdentIds did not contain an entry for {} in module {:?}", + self.ident_id().0, + self.module_id() + ) + }) } pub fn as_u64(self) -> u64 { @@ -103,13 +96,13 @@ impl Symbol { let module_id = self.module_id(); if module_id == home { - self.ident_str(interns).clone().into() + ModuleName::from(self.as_str(interns)) } else { // TODO do this without format! to avoid allocation for short strings format!( "{}.{}", self.module_string(interns).as_str(), - self.ident_str(interns) + self.as_str(interns) ) .into() } @@ -535,25 +528,50 @@ pub struct IdentId(u32); /// Since these are interned strings, this shouldn't result in many total allocations in practice. #[derive(Clone, Debug, Default, PartialEq, Eq)] pub struct IdentIds { - /// Each IdentId is an index into this Vec - by_id: Vec, + buffer: String, - next_generated_name: u32, + lengths: Vec, + offsets: Vec, } impl IdentIds { - pub fn ident_strs(&self) -> impl Iterator { - self.by_id - .iter() - .enumerate() - .map(|(index, ident)| (IdentId(index as u32), ident.as_inline_str().as_str())) + fn with_capacity(capacity: usize) -> Self { + Self { + // guess: the average symbol length is 3 + buffer: String::with_capacity(3 * capacity), + + lengths: Vec::with_capacity(capacity), + offsets: Vec::with_capacity(capacity), + } } - pub fn add(&mut self, ident_name: Ident) -> IdentId { - let by_id = &mut self.by_id; - let ident_id = IdentId(by_id.len() as u32); + fn strs(&self) -> impl Iterator { + self.offsets + .iter() + .zip(self.lengths.iter()) + .map(move |(offset, length)| &self.buffer[*offset as usize..][..*length as usize]) + } - by_id.push(ident_name); + pub fn ident_strs(&self) -> impl Iterator { + self.strs() + .enumerate() + .map(|(index, ident)| (IdentId(index as u32), ident)) + } + + pub fn add(&mut self, ident_name: &Ident) -> IdentId { + self.add_str(ident_name.as_inline_str().as_str()) + } + + fn add_str(&mut self, string: &str) -> IdentId { + let offset = self.buffer.len() as u32; + let length = string.len() as u16; + + let ident_id = IdentId(self.lengths.len() as u32); + + self.lengths.push(length); + self.offsets.push(offset); + + self.buffer.push_str(string); ident_id } @@ -561,13 +579,7 @@ impl IdentIds { pub fn get_or_insert(&mut self, name: &Ident) -> IdentId { match self.get_id(name) { Some(id) => id, - None => { - let ident_id = IdentId(self.by_id.len() as u32); - - self.by_id.push(name.clone()); - - ident_id - } + None => self.add(name), } } @@ -586,31 +598,26 @@ impl IdentIds { Some(ident_id_ref) => { let ident_id = ident_id_ref; - let by_id = &mut self.by_id; - let key_index_opt = by_id.iter().position(|x| *x == old_ident); + match self.find_index(old_ident_name) { + Some(key_index) => { + let length = new_ident_name.len(); + let offset = self.buffer.len(); + self.buffer.push_str(new_ident_name); - if let Some(key_index) = key_index_opt { - if let Some(vec_elt) = by_id.get_mut(key_index) { - *vec_elt = new_ident_name.into(); - } else { - // we get the index from by_id - unreachable!() + self.lengths[key_index] = length as u16; + self.offsets[key_index] = offset as u32; + + Ok(ident_id) } - - Ok(ident_id) - } else { - Err( - format!( - "Tried to find position of key {:?} in IdentIds.by_id but I could not find the key. IdentIds.by_id: {:?}", - old_ident_name, - self.by_id - ) - ) + None => Err(format!( + r"Tried to find position of key {:?} in IdentIds.by_id but I could not find the key", + old_ident_name + )), } } None => Err(format!( - "Tried to update key in IdentIds ({:?}) but I could not find the key ({}).", - self.by_id, old_ident_name + "Tried to update key in IdentIds, but I could not find the key ({}).", + old_ident_name )), } } @@ -625,44 +632,60 @@ impl IdentIds { pub fn gen_unique(&mut self) -> IdentId { use std::fmt::Write; - let index: u32 = self.next_generated_name; - self.next_generated_name += 1; + let index = self.lengths.len(); - // "4294967296" is 10 characters - let mut buffer: arrayvec::ArrayString<10> = arrayvec::ArrayString::new(); + let offset = self.buffer.len(); + write!(self.buffer, "{}", index).unwrap(); + let length = self.buffer.len() - offset; - write!(buffer, "{}", index).unwrap(); - let ident = Ident(IdentStr::from_str(buffer.as_str())); + self.lengths.push(length as u16); + self.offsets.push(offset as u32); - self.add(ident) + IdentId(index as u32) } #[inline(always)] pub fn get_id(&self, ident_name: &Ident) -> Option { - let ident_name = ident_name.as_inline_str().as_str(); + self.find_index(ident_name.as_inline_str().as_str()) + .map(|i| IdentId(i as u32)) + } - for (id, ident_str) in self.ident_strs() { - if ident_name == ident_str { - return Some(id); + #[inline(always)] + fn find_index(&self, string: &str) -> Option { + let target_length = string.len() as u16; + + for (index, length) in self.lengths.iter().enumerate() { + if *length == target_length { + let offset = self.offsets[index] as usize; + let ident = &self.buffer[offset..][..*length as usize]; + + // skip the length check + if string.bytes().eq(ident.bytes()) { + return Some(index); + } } } None } - pub fn get_name(&self, id: IdentId) -> Option<&Ident> { - self.by_id.get(id.0 as usize) + pub fn get_name(&self, id: IdentId) -> Option<&str> { + let index = id.0 as usize; + + match self.lengths.get(index) { + None => None, + Some(length) => { + let offset = self.offsets[index] as usize; + Some(&self.buffer[offset..][..*length as usize]) + } + } } pub fn get_name_str_res(&self, ident_id: IdentId) -> ModuleResult<&str> { - Ok(self - .get_name(ident_id) - .with_context(|| IdentIdNotFound { - ident_id, - ident_ids_str: format!("{:?}", self), - })? - .as_inline_str() - .as_str()) + Ok(self.get_name(ident_id).with_context(|| IdentIdNotFound { + ident_id, + ident_ids_str: format!("{:?}", self), + })?) } } @@ -686,45 +709,18 @@ macro_rules! define_builtins { $( debug_assert!(!exposed_idents_by_module.contains_key(&ModuleId($module_id)), "Error setting up Builtins: when setting up module {} {:?} - the module ID {} is already present in the map. Check the map for duplicate module IDs!", $module_id, $module_name, $module_id); - let mut by_id : Vec = Vec::new(); - let ident_ids = { - $( - debug_assert!(by_id.len() == $ident_id, "Error setting up Builtins: when inserting {} …: {:?} into module {} …: {:?} - this entry was assigned an ID of {}, but based on insertion order, it should have had an ID of {} instead! To fix this, change it from {} …: {:?} to {} …: {:?} instead.", $ident_id, $ident_name, $module_id, $module_name, $ident_id, by_id.len(), $ident_id, $ident_name, by_id.len(), $ident_name); + let ident_indexes = [ $($ident_id),+ ]; + let ident_names = [ $($ident_name),+ ]; - by_id.push($ident_name.into()); - )+ + let mut ident_ids = IdentIds::with_capacity(ident_names.len()); - #[cfg(debug_assertions)] - { - let mut cloned = by_id.clone(); - let before = cloned.len(); - cloned.sort(); - cloned.dedup(); - let after = cloned.len(); + for (_expected_id, name) in ident_indexes.iter().zip(ident_names.iter()) { + debug_assert!(!ident_ids.strs().any(|s| s == *name), "The Ident {} is already in IdentIds", name); + let _actual_id = ident_ids.add_str(name); - if before != after { - let mut duplicates : Vec<&Ident> = Vec::new(); - let mut temp : Vec<&Ident> = Vec::new(); - - for symbol in cloned.iter() { - if temp.contains(&&symbol) { - duplicates.push(symbol); - } - - temp.push(&symbol); - } - - - panic!("duplicate symbols in IdentIds for module {:?}: {:?}", $module_name, duplicates); - } - } - - IdentIds { - by_id, - next_generated_name: 0, - } - }; + debug_assert_eq!(*_expected_id, _actual_id.0 as usize, "Error setting up Builtins: when inserting {} …: {} into module {} …: {} - this entry was assigned an ID of {}, but based on insertion order, it should have had an ID of {} instead! To fix this, change it from {} …: {} to {} …: {} instead.", _expected_id, name, $module_id, $module_name, _expected_id, _actual_id.0, _expected_id, name, _actual_id.0, name ); + } if cfg!(debug_assertions) { let module_id = ModuleId($module_id); @@ -734,6 +730,7 @@ macro_rules! define_builtins { module_id.register_debug_idents(&ident_ids); } + exposed_idents_by_module.insert( ModuleId($module_id), ident_ids diff --git a/compiler/mono/src/code_gen_help/mod.rs b/compiler/mono/src/code_gen_help/mod.rs index 3244e04fbb..d2d864ff37 100644 --- a/compiler/mono/src/code_gen_help/mod.rs +++ b/compiler/mono/src/code_gen_help/mod.rs @@ -396,7 +396,7 @@ impl<'a> CodeGenHelp<'a> { } fn create_symbol(&self, ident_ids: &mut IdentIds, debug_name: &str) -> Symbol { - let ident_id = ident_ids.add(Ident::from(debug_name)); + let ident_id = ident_ids.add(&Ident::from(debug_name)); Symbol::new(self.home, ident_id) } diff --git a/compiler/mono/src/layout.rs b/compiler/mono/src/layout.rs index ddc5aadfa6..79e193e1e5 100644 --- a/compiler/mono/src/layout.rs +++ b/compiler/mono/src/layout.rs @@ -2774,7 +2774,7 @@ impl LayoutId { // Returns something like "foo#1" when given a symbol that interns to "foo" // and a LayoutId of 1. pub fn to_symbol_string(self, symbol: Symbol, interns: &Interns) -> String { - let ident_string = symbol.ident_str(interns); + let ident_string = symbol.as_str(interns); let module_string = interns.module_ids.get_name(symbol.module_id()).unwrap(); format!("{}_{}_{}", module_string, ident_string, self.0) } diff --git a/compiler/types/src/pretty_print.rs b/compiler/types/src/pretty_print.rs index f9e11471f3..917e4a33e0 100644 --- a/compiler/types/src/pretty_print.rs +++ b/compiler/types/src/pretty_print.rs @@ -994,7 +994,7 @@ fn write_fn<'a>( fn write_symbol(env: &Env, symbol: Symbol, buf: &mut String) { let interns = &env.interns; - let ident = symbol.ident_str(interns); + let ident_str = symbol.as_str(interns); let module_id = symbol.module_id(); // Don't qualify the symbol if it's in our home module, @@ -1004,5 +1004,5 @@ fn write_symbol(env: &Env, symbol: Symbol, buf: &mut String) { buf.push('.'); } - buf.push_str(ident.as_str()); + buf.push_str(ident_str); } diff --git a/compiler/types/src/types.rs b/compiler/types/src/types.rs index b9c3a9a261..2f92a8fcb8 100644 --- a/compiler/types/src/types.rs +++ b/compiler/types/src/types.rs @@ -2011,7 +2011,7 @@ fn write_error_type_help( if write_parens { buf.push('('); } - buf.push_str(symbol.ident_str(interns).as_str()); + buf.push_str(symbol.as_str(interns)); for arg in arguments { buf.push(' '); diff --git a/docs/src/lib.rs b/docs/src/lib.rs index d09134d98d..a2c01973c7 100644 --- a/docs/src/lib.rs +++ b/docs/src/lib.rs @@ -71,7 +71,7 @@ pub fn generate_docs_html(filenames: Vec, build_dir: &Path) { let exposed_values = loaded_module .exposed_values .iter() - .map(|symbol| symbol.ident_str(&loaded_module.interns).to_string()) + .map(|symbol| symbol.as_str(&loaded_module.interns).to_string()) .collect::>(); (exposed_values, d) diff --git a/editor/src/editor/mvc/let_update.rs b/editor/src/editor/mvc/let_update.rs index de59fbf901..585aa6c5dd 100644 --- a/editor/src/editor/mvc/let_update.rs +++ b/editor/src/editor/mvc/let_update.rs @@ -1,6 +1,7 @@ use roc_ast::lang::core::expr::expr2::Expr2; use roc_ast::lang::core::pattern::Pattern2; use roc_ast::lang::core::val_def::ValueDef; +use roc_module::ident::Ident; use roc_module::symbol::Symbol; use crate::editor::ed_error::EdResult; @@ -25,7 +26,8 @@ pub fn start_new_let_value(ed_model: &mut EdModel, new_char: &char) -> EdResult< let val_expr2_node = Expr2::Blank; let val_expr_id = ed_model.module.env.pool.add(val_expr2_node); - let ident_id = ed_model.module.env.ident_ids.add(val_name_string.into()); + let ident = Ident::from(val_name_string); + let ident_id = ed_model.module.env.ident_ids.add(&ident); let var_symbol = Symbol::new(ed_model.module.env.home, ident_id); let body = Expr2::Var(var_symbol); let body_id = ed_model.module.env.pool.add(body); diff --git a/editor/src/editor/mvc/tld_value_update.rs b/editor/src/editor/mvc/tld_value_update.rs index e548dd2e25..27e85ffe31 100644 --- a/editor/src/editor/mvc/tld_value_update.rs +++ b/editor/src/editor/mvc/tld_value_update.rs @@ -1,5 +1,6 @@ use roc_ast::lang::core::{def::def2::Def2, expr::expr2::Expr2}; use roc_code_markup::slow_pool::MarkNodeId; +use roc_module::ident::Ident; use crate::{ editor::ed_error::{EdResult, FailedToUpdateIdentIdName, KeyNotFound}, @@ -26,13 +27,8 @@ pub fn start_new_tld_value(ed_model: &mut EdModel, new_char: &char) -> EdResult< let val_expr_node = Expr2::Blank; let val_expr_id = ed_model.module.env.pool.add(val_expr_node); - let val_name_string = new_char.to_string(); - - let ident_id = ed_model - .module - .env - .ident_ids - .add(val_name_string.clone().into()); + let ident = Ident::from(new_char.to_string().as_str()); + let ident_id = ed_model.module.env.ident_ids.add(&ident); let module_ident_ids_opt = ed_model .loaded_module @@ -42,7 +38,7 @@ pub fn start_new_tld_value(ed_model: &mut EdModel, new_char: &char) -> EdResult< if let Some(module_ident_ids_ref) = module_ident_ids_opt { // this might create different IdentId for interns and env.ident_ids which may be a problem - module_ident_ids_ref.add(val_name_string.into()); + module_ident_ids_ref.add(&ident); } else { KeyNotFound { key_str: format!("{:?}", ed_model.module.env.home), diff --git a/reporting/src/report.rs b/reporting/src/report.rs index 1fa222c813..e86e16b798 100644 --- a/reporting/src/report.rs +++ b/reporting/src/report.rs @@ -391,19 +391,19 @@ impl<'a> RocDocAllocator<'a> { } pub fn symbol_unqualified(&'a self, symbol: Symbol) -> DocBuilder<'a, Self, Annotation> { - self.text(format!("{}", symbol.ident_str(self.interns))) + self.text(symbol.as_str(self.interns)) .annotate(Annotation::Symbol) } pub fn symbol_foreign_qualified(&'a self, symbol: Symbol) -> DocBuilder<'a, Self, Annotation> { if symbol.module_id() == self.home || symbol.module_id().is_builtin() { // Render it unqualified if it's in the current module or a builtin - self.text(format!("{}", symbol.ident_str(self.interns))) + self.text(symbol.as_str(self.interns)) .annotate(Annotation::Symbol) } else { self.text(format!( "{}.{}", symbol.module_string(self.interns), - symbol.ident_str(self.interns), + symbol.as_str(self.interns), )) .annotate(Annotation::Symbol) } @@ -412,7 +412,7 @@ impl<'a> RocDocAllocator<'a> { self.text(format!( "{}.{}", symbol.module_string(self.interns), - symbol.ident_str(self.interns), + symbol.as_str(self.interns), )) .annotate(Annotation::Symbol) } @@ -425,12 +425,12 @@ impl<'a> RocDocAllocator<'a> { pub fn opaque_name(&'a self, opaque: Symbol) -> DocBuilder<'a, Self, Annotation> { let fmt = if opaque.module_id() == self.home { // Render it unqualified if it's in the current module. - format!("{}", opaque.ident_str(self.interns)) + opaque.as_str(self.interns).to_string() } else { format!( "{}.{}", opaque.module_string(self.interns), - opaque.ident_str(self.interns), + opaque.as_str(self.interns), ) }; @@ -440,7 +440,7 @@ impl<'a> RocDocAllocator<'a> { pub fn wrapped_opaque_name(&'a self, opaque: Symbol) -> DocBuilder<'a, Self, Annotation> { debug_assert_eq!(opaque.module_id(), self.home, "Opaque wrappings can only be defined in the same module they're defined in, but this one is defined elsewhere: {:?}", opaque); - self.text(format!("@{}", opaque.ident_str(self.interns))) + self.text(format!("@{}", opaque.as_str(self.interns))) .annotate(Annotation::Opaque) }