Improved IdentIds

This commit is contained in:
Folkert 2022-04-25 23:09:18 +02:00
parent 5a5324f27e
commit 724fdb666d
No known key found for this signature in database
GPG Key ID: 1F17F6FFD112B97C
16 changed files with 142 additions and 143 deletions

View File

@ -516,7 +516,7 @@ pub fn symbols_from_pattern(pool: &Pool, initial: &Pattern2) -> Vec<Symbol> {
pub fn get_identifier_string(pattern: &Pattern2, interns: &Interns) -> ASTResult<String> {
match pattern {
Pattern2::Identifier(symbol) => Ok(symbol.ident_str(interns).to_string()),
Pattern2::Identifier(symbol) => Ok(symbol.as_str(interns).to_string()),
other => UnexpectedPattern2Variant {
required_pattern2: "Identifier".to_string(),
encountered_pattern2: format!("{:?}", other),

View File

@ -245,7 +245,7 @@ impl Scope {
// use that existing IdentId. Otherwise, create a fresh one.
let ident_id = match exposed_ident_ids.get_id(&ident) {
Some(ident_id) => ident_id,
None => all_ident_ids.add(ident.clone().into()),
None => all_ident_ids.add(&ident),
};
let symbol = Symbol::new(self.home, ident_id);
@ -262,7 +262,7 @@ impl Scope {
///
/// Used for record guards like { x: Just _ }
pub fn ignore(&mut self, ident: Ident, all_ident_ids: &mut IdentIds) -> Symbol {
let ident_id = all_ident_ids.add(ident.into());
let ident_id = all_ident_ids.add(&ident);
Symbol::new(self.home, ident_id)
}

View File

@ -501,7 +501,7 @@ pub fn canonicalize_pattern<'a>(
RequiredField(label, loc_guard) => {
// a guard does not introduce the label into scope!
let symbol = scope.ignore(label.into(), &mut env.ident_ids);
let symbol = scope.ignore(&Ident::from(label), &mut env.ident_ids);
let can_guard = canonicalize_pattern(
env,
var_store,

View File

@ -225,7 +225,7 @@ impl Scope {
region,
};
let ident_id = all_ident_ids.add(ident.clone());
let ident_id = all_ident_ids.add(&ident);
let symbol = Symbol::new(self.home, ident_id);
self.symbols.insert(symbol, region);
@ -273,7 +273,7 @@ impl Scope {
) -> Result<(Symbol, Option<Symbol>), (Region, Loc<Ident>, Symbol)> {
match self.idents.get(&ident) {
Some(&(original_symbol, original_region)) => {
let shadow_ident_id = all_ident_ids.add(ident.clone());
let shadow_ident_id = all_ident_ids.add(&ident);
let shadow_symbol = Symbol::new(self.home, shadow_ident_id);
self.symbols.insert(shadow_symbol, region);
@ -315,7 +315,7 @@ impl Scope {
// use that existing IdentId. Otherwise, create a fresh one.
let ident_id = match exposed_ident_ids.get_id(&ident) {
Some(ident_id) => ident_id,
None => all_ident_ids.add(ident.clone()),
None => all_ident_ids.add(&ident),
};
let symbol = Symbol::new(self.home, ident_id);
@ -329,7 +329,7 @@ impl Scope {
/// Ignore an identifier.
///
/// Used for record guards like { x: Just _ }
pub fn ignore(&mut self, ident: Ident, all_ident_ids: &mut IdentIds) -> Symbol {
pub fn ignore(&mut self, ident: &Ident, all_ident_ids: &mut IdentIds) -> Symbol {
let ident_id = all_ident_ids.add(ident);
Symbol::new(self.home, ident_id)
}

View File

@ -183,7 +183,7 @@ impl<'a> WasmBackend<'a> {
.get_mut(&self.env.module_id)
.unwrap();
let ident_id = ident_ids.add(Ident::from(debug_name));
let ident_id = ident_ids.add(&Ident::from(debug_name));
Symbol::new(self.env.module_id, ident_id)
}

View File

@ -446,7 +446,7 @@ impl LoadedModule {
pub fn exposed_values_str(&self) -> Vec<&str> {
self.exposed_values
.iter()
.map(|symbol| symbol.ident_str(&self.interns).as_str())
.map(|symbol| symbol.as_str(&self.interns))
.collect()
}
}

View File

@ -10,6 +10,10 @@ impl Ident {
pub fn as_inline_str(&self) -> &IdentStr {
&self.0
}
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}
pub struct QualifiedModuleName<'a> {

View File

@ -68,10 +68,6 @@ impl Symbol {
}
pub fn as_str(self, interns: &Interns) -> &str {
self.ident_str(interns).as_str()
}
pub fn ident_str(self, interns: &Interns) -> &IdentStr {
let ident_ids = interns
.all_ident_ids
.get(&self.module_id())
@ -83,16 +79,13 @@ impl Symbol {
)
});
ident_ids
.get_name(self.ident_id())
.unwrap_or_else(|| {
panic!(
"ident_string's IdentIds did not contain an entry for {} in module {:?}",
self.ident_id().0,
self.module_id()
)
})
.into()
ident_ids.get_name(self.ident_id()).unwrap_or_else(|| {
panic!(
"ident_string's IdentIds did not contain an entry for {} in module {:?}",
self.ident_id().0,
self.module_id()
)
})
}
pub fn as_u64(self) -> u64 {
@ -103,13 +96,13 @@ impl Symbol {
let module_id = self.module_id();
if module_id == home {
self.ident_str(interns).clone().into()
ModuleName::from(self.as_str(interns))
} else {
// TODO do this without format! to avoid allocation for short strings
format!(
"{}.{}",
self.module_string(interns).as_str(),
self.ident_str(interns)
self.as_str(interns)
)
.into()
}
@ -535,25 +528,50 @@ pub struct IdentId(u32);
/// Since these are interned strings, this shouldn't result in many total allocations in practice.
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct IdentIds {
/// Each IdentId is an index into this Vec
by_id: Vec<Ident>,
buffer: String,
next_generated_name: u32,
lengths: Vec<u16>,
offsets: Vec<u32>,
}
impl IdentIds {
pub fn ident_strs(&self) -> impl Iterator<Item = (IdentId, &str)> {
self.by_id
.iter()
.enumerate()
.map(|(index, ident)| (IdentId(index as u32), ident.as_inline_str().as_str()))
fn with_capacity(capacity: usize) -> Self {
Self {
// guess: the average symbol length is 3
buffer: String::with_capacity(3 * capacity),
lengths: Vec::with_capacity(capacity),
offsets: Vec::with_capacity(capacity),
}
}
pub fn add(&mut self, ident_name: Ident) -> IdentId {
let by_id = &mut self.by_id;
let ident_id = IdentId(by_id.len() as u32);
fn strs(&self) -> impl Iterator<Item = &str> {
self.offsets
.iter()
.zip(self.lengths.iter())
.map(move |(offset, length)| &self.buffer[*offset as usize..][..*length as usize])
}
by_id.push(ident_name);
pub fn ident_strs(&self) -> impl Iterator<Item = (IdentId, &str)> {
self.strs()
.enumerate()
.map(|(index, ident)| (IdentId(index as u32), ident))
}
pub fn add(&mut self, ident_name: &Ident) -> IdentId {
self.add_str(ident_name.as_inline_str().as_str())
}
fn add_str(&mut self, string: &str) -> IdentId {
let offset = self.buffer.len() as u32;
let length = string.len() as u16;
let ident_id = IdentId(self.lengths.len() as u32);
self.lengths.push(length);
self.offsets.push(offset);
self.buffer.push_str(string);
ident_id
}
@ -561,13 +579,7 @@ impl IdentIds {
pub fn get_or_insert(&mut self, name: &Ident) -> IdentId {
match self.get_id(name) {
Some(id) => id,
None => {
let ident_id = IdentId(self.by_id.len() as u32);
self.by_id.push(name.clone());
ident_id
}
None => self.add(name),
}
}
@ -586,31 +598,26 @@ impl IdentIds {
Some(ident_id_ref) => {
let ident_id = ident_id_ref;
let by_id = &mut self.by_id;
let key_index_opt = by_id.iter().position(|x| *x == old_ident);
match self.find_index(old_ident_name) {
Some(key_index) => {
let length = new_ident_name.len();
let offset = self.buffer.len();
self.buffer.push_str(new_ident_name);
if let Some(key_index) = key_index_opt {
if let Some(vec_elt) = by_id.get_mut(key_index) {
*vec_elt = new_ident_name.into();
} else {
// we get the index from by_id
unreachable!()
self.lengths[key_index] = length as u16;
self.offsets[key_index] = offset as u32;
Ok(ident_id)
}
Ok(ident_id)
} else {
Err(
format!(
"Tried to find position of key {:?} in IdentIds.by_id but I could not find the key. IdentIds.by_id: {:?}",
old_ident_name,
self.by_id
)
)
None => Err(format!(
r"Tried to find position of key {:?} in IdentIds.by_id but I could not find the key",
old_ident_name
)),
}
}
None => Err(format!(
"Tried to update key in IdentIds ({:?}) but I could not find the key ({}).",
self.by_id, old_ident_name
"Tried to update key in IdentIds, but I could not find the key ({}).",
old_ident_name
)),
}
}
@ -625,44 +632,60 @@ impl IdentIds {
pub fn gen_unique(&mut self) -> IdentId {
use std::fmt::Write;
let index: u32 = self.next_generated_name;
self.next_generated_name += 1;
let index = self.lengths.len();
// "4294967296" is 10 characters
let mut buffer: arrayvec::ArrayString<10> = arrayvec::ArrayString::new();
let offset = self.buffer.len();
write!(self.buffer, "{}", index).unwrap();
let length = self.buffer.len() - offset;
write!(buffer, "{}", index).unwrap();
let ident = Ident(IdentStr::from_str(buffer.as_str()));
self.lengths.push(length as u16);
self.offsets.push(offset as u32);
self.add(ident)
IdentId(index as u32)
}
#[inline(always)]
pub fn get_id(&self, ident_name: &Ident) -> Option<IdentId> {
let ident_name = ident_name.as_inline_str().as_str();
self.find_index(ident_name.as_inline_str().as_str())
.map(|i| IdentId(i as u32))
}
for (id, ident_str) in self.ident_strs() {
if ident_name == ident_str {
return Some(id);
#[inline(always)]
fn find_index(&self, string: &str) -> Option<usize> {
let target_length = string.len() as u16;
for (index, length) in self.lengths.iter().enumerate() {
if *length == target_length {
let offset = self.offsets[index] as usize;
let ident = &self.buffer[offset..][..*length as usize];
// skip the length check
if string.bytes().eq(ident.bytes()) {
return Some(index);
}
}
}
None
}
pub fn get_name(&self, id: IdentId) -> Option<&Ident> {
self.by_id.get(id.0 as usize)
pub fn get_name(&self, id: IdentId) -> Option<&str> {
let index = id.0 as usize;
match self.lengths.get(index) {
None => None,
Some(length) => {
let offset = self.offsets[index] as usize;
Some(&self.buffer[offset..][..*length as usize])
}
}
}
pub fn get_name_str_res(&self, ident_id: IdentId) -> ModuleResult<&str> {
Ok(self
.get_name(ident_id)
.with_context(|| IdentIdNotFound {
ident_id,
ident_ids_str: format!("{:?}", self),
})?
.as_inline_str()
.as_str())
Ok(self.get_name(ident_id).with_context(|| IdentIdNotFound {
ident_id,
ident_ids_str: format!("{:?}", self),
})?)
}
}
@ -686,45 +709,18 @@ macro_rules! define_builtins {
$(
debug_assert!(!exposed_idents_by_module.contains_key(&ModuleId($module_id)), "Error setting up Builtins: when setting up module {} {:?} - the module ID {} is already present in the map. Check the map for duplicate module IDs!", $module_id, $module_name, $module_id);
let mut by_id : Vec<Ident> = Vec::new();
let ident_ids = {
$(
debug_assert!(by_id.len() == $ident_id, "Error setting up Builtins: when inserting {} …: {:?} into module {} …: {:?} - this entry was assigned an ID of {}, but based on insertion order, it should have had an ID of {} instead! To fix this, change it from {} …: {:?} to {} …: {:?} instead.", $ident_id, $ident_name, $module_id, $module_name, $ident_id, by_id.len(), $ident_id, $ident_name, by_id.len(), $ident_name);
let ident_indexes = [ $($ident_id),+ ];
let ident_names = [ $($ident_name),+ ];
by_id.push($ident_name.into());
)+
let mut ident_ids = IdentIds::with_capacity(ident_names.len());
#[cfg(debug_assertions)]
{
let mut cloned = by_id.clone();
let before = cloned.len();
cloned.sort();
cloned.dedup();
let after = cloned.len();
for (_expected_id, name) in ident_indexes.iter().zip(ident_names.iter()) {
debug_assert!(!ident_ids.strs().any(|s| s == *name), "The Ident {} is already in IdentIds", name);
let _actual_id = ident_ids.add_str(name);
if before != after {
let mut duplicates : Vec<&Ident> = Vec::new();
let mut temp : Vec<&Ident> = Vec::new();
for symbol in cloned.iter() {
if temp.contains(&&symbol) {
duplicates.push(symbol);
}
temp.push(&symbol);
}
panic!("duplicate symbols in IdentIds for module {:?}: {:?}", $module_name, duplicates);
}
}
IdentIds {
by_id,
next_generated_name: 0,
}
};
debug_assert_eq!(*_expected_id, _actual_id.0 as usize, "Error setting up Builtins: when inserting {} …: {} into module {} …: {} - this entry was assigned an ID of {}, but based on insertion order, it should have had an ID of {} instead! To fix this, change it from {} …: {} to {} …: {} instead.", _expected_id, name, $module_id, $module_name, _expected_id, _actual_id.0, _expected_id, name, _actual_id.0, name );
}
if cfg!(debug_assertions) {
let module_id = ModuleId($module_id);
@ -734,6 +730,7 @@ macro_rules! define_builtins {
module_id.register_debug_idents(&ident_ids);
}
exposed_idents_by_module.insert(
ModuleId($module_id),
ident_ids

View File

@ -396,7 +396,7 @@ impl<'a> CodeGenHelp<'a> {
}
fn create_symbol(&self, ident_ids: &mut IdentIds, debug_name: &str) -> Symbol {
let ident_id = ident_ids.add(Ident::from(debug_name));
let ident_id = ident_ids.add(&Ident::from(debug_name));
Symbol::new(self.home, ident_id)
}

View File

@ -2774,7 +2774,7 @@ impl LayoutId {
// Returns something like "foo#1" when given a symbol that interns to "foo"
// and a LayoutId of 1.
pub fn to_symbol_string(self, symbol: Symbol, interns: &Interns) -> String {
let ident_string = symbol.ident_str(interns);
let ident_string = symbol.as_str(interns);
let module_string = interns.module_ids.get_name(symbol.module_id()).unwrap();
format!("{}_{}_{}", module_string, ident_string, self.0)
}

View File

@ -994,7 +994,7 @@ fn write_fn<'a>(
fn write_symbol(env: &Env, symbol: Symbol, buf: &mut String) {
let interns = &env.interns;
let ident = symbol.ident_str(interns);
let ident_str = symbol.as_str(interns);
let module_id = symbol.module_id();
// Don't qualify the symbol if it's in our home module,
@ -1004,5 +1004,5 @@ fn write_symbol(env: &Env, symbol: Symbol, buf: &mut String) {
buf.push('.');
}
buf.push_str(ident.as_str());
buf.push_str(ident_str);
}

View File

@ -2011,7 +2011,7 @@ fn write_error_type_help(
if write_parens {
buf.push('(');
}
buf.push_str(symbol.ident_str(interns).as_str());
buf.push_str(symbol.as_str(interns));
for arg in arguments {
buf.push(' ');

View File

@ -71,7 +71,7 @@ pub fn generate_docs_html(filenames: Vec<PathBuf>, build_dir: &Path) {
let exposed_values = loaded_module
.exposed_values
.iter()
.map(|symbol| symbol.ident_str(&loaded_module.interns).to_string())
.map(|symbol| symbol.as_str(&loaded_module.interns).to_string())
.collect::<Vec<String>>();
(exposed_values, d)

View File

@ -1,6 +1,7 @@
use roc_ast::lang::core::expr::expr2::Expr2;
use roc_ast::lang::core::pattern::Pattern2;
use roc_ast::lang::core::val_def::ValueDef;
use roc_module::ident::Ident;
use roc_module::symbol::Symbol;
use crate::editor::ed_error::EdResult;
@ -25,7 +26,8 @@ pub fn start_new_let_value(ed_model: &mut EdModel, new_char: &char) -> EdResult<
let val_expr2_node = Expr2::Blank;
let val_expr_id = ed_model.module.env.pool.add(val_expr2_node);
let ident_id = ed_model.module.env.ident_ids.add(val_name_string.into());
let ident = Ident::from(val_name_string);
let ident_id = ed_model.module.env.ident_ids.add(&ident);
let var_symbol = Symbol::new(ed_model.module.env.home, ident_id);
let body = Expr2::Var(var_symbol);
let body_id = ed_model.module.env.pool.add(body);

View File

@ -1,5 +1,6 @@
use roc_ast::lang::core::{def::def2::Def2, expr::expr2::Expr2};
use roc_code_markup::slow_pool::MarkNodeId;
use roc_module::ident::Ident;
use crate::{
editor::ed_error::{EdResult, FailedToUpdateIdentIdName, KeyNotFound},
@ -26,13 +27,8 @@ pub fn start_new_tld_value(ed_model: &mut EdModel, new_char: &char) -> EdResult<
let val_expr_node = Expr2::Blank;
let val_expr_id = ed_model.module.env.pool.add(val_expr_node);
let val_name_string = new_char.to_string();
let ident_id = ed_model
.module
.env
.ident_ids
.add(val_name_string.clone().into());
let ident = Ident::from(new_char.to_string().as_str());
let ident_id = ed_model.module.env.ident_ids.add(&ident);
let module_ident_ids_opt = ed_model
.loaded_module
@ -42,7 +38,7 @@ pub fn start_new_tld_value(ed_model: &mut EdModel, new_char: &char) -> EdResult<
if let Some(module_ident_ids_ref) = module_ident_ids_opt {
// this might create different IdentId for interns and env.ident_ids which may be a problem
module_ident_ids_ref.add(val_name_string.into());
module_ident_ids_ref.add(&ident);
} else {
KeyNotFound {
key_str: format!("{:?}", ed_model.module.env.home),

View File

@ -391,19 +391,19 @@ impl<'a> RocDocAllocator<'a> {
}
pub fn symbol_unqualified(&'a self, symbol: Symbol) -> DocBuilder<'a, Self, Annotation> {
self.text(format!("{}", symbol.ident_str(self.interns)))
self.text(symbol.as_str(self.interns))
.annotate(Annotation::Symbol)
}
pub fn symbol_foreign_qualified(&'a self, symbol: Symbol) -> DocBuilder<'a, Self, Annotation> {
if symbol.module_id() == self.home || symbol.module_id().is_builtin() {
// Render it unqualified if it's in the current module or a builtin
self.text(format!("{}", symbol.ident_str(self.interns)))
self.text(symbol.as_str(self.interns))
.annotate(Annotation::Symbol)
} else {
self.text(format!(
"{}.{}",
symbol.module_string(self.interns),
symbol.ident_str(self.interns),
symbol.as_str(self.interns),
))
.annotate(Annotation::Symbol)
}
@ -412,7 +412,7 @@ impl<'a> RocDocAllocator<'a> {
self.text(format!(
"{}.{}",
symbol.module_string(self.interns),
symbol.ident_str(self.interns),
symbol.as_str(self.interns),
))
.annotate(Annotation::Symbol)
}
@ -425,12 +425,12 @@ impl<'a> RocDocAllocator<'a> {
pub fn opaque_name(&'a self, opaque: Symbol) -> DocBuilder<'a, Self, Annotation> {
let fmt = if opaque.module_id() == self.home {
// Render it unqualified if it's in the current module.
format!("{}", opaque.ident_str(self.interns))
opaque.as_str(self.interns).to_string()
} else {
format!(
"{}.{}",
opaque.module_string(self.interns),
opaque.ident_str(self.interns),
opaque.as_str(self.interns),
)
};
@ -440,7 +440,7 @@ impl<'a> RocDocAllocator<'a> {
pub fn wrapped_opaque_name(&'a self, opaque: Symbol) -> DocBuilder<'a, Self, Annotation> {
debug_assert_eq!(opaque.module_id(), self.home, "Opaque wrappings can only be defined in the same module they're defined in, but this one is defined elsewhere: {:?}", opaque);
self.text(format!("@{}", opaque.ident_str(self.interns)))
self.text(format!("@{}", opaque.as_str(self.interns)))
.annotate(Annotation::Opaque)
}