Merge pull request #3174 from rtfeldman/module-id-improvements

This commit is contained in:
Richard Feldman 2022-06-11 01:42:37 -04:00 committed by GitHub
commit 85d61f99d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 122 additions and 129 deletions

View File

@ -1,6 +1,5 @@
use bumpalo::collections::Vec as BumpVec;
use bumpalo::Bump;
use roc_module::ident::{Ident, IdentStr};
use roc_parse::{ast::CommentOrNewline, parser::SyntaxError};
use roc_region::all::Region;
@ -41,8 +40,7 @@ pub fn toplevel_defs_to_defs2<'a>(
match loc_pattern.value {
Identifier(id_str) => {
let identifier_id =
env.ident_ids.get_or_insert(&Ident(IdentStr::from(id_str)));
let identifier_id = env.ident_ids.get_or_insert(id_str);
// TODO support with annotation
Def2::ValueDef {
@ -164,7 +162,7 @@ pub fn def_to_def2<'a>(
match loc_pattern.value {
Identifier(id_str) => {
let identifier_id = env.ident_ids.get_or_insert(&Ident(IdentStr::from(id_str)));
let identifier_id = env.ident_ids.get_or_insert(id_str);
// TODO support with annotation
Def2::ValueDef {

View File

@ -109,13 +109,11 @@ impl<'a> Env<'a> {
let module_name: ModuleName = module_name.into();
match self.module_ids.get_id(&module_name) {
Some(&module_id) => {
let ident: Ident = ident.into();
Some(module_id) => {
// You can do qualified lookups on your own module, e.g.
// if I'm in the Foo module, I can do a `Foo.bar` lookup.
if module_id == self.home {
match self.ident_ids.get_id(&ident) {
match self.ident_ids.get_id(ident) {
Some(ident_id) => {
let symbol = Symbol::new(module_id, ident_id);
@ -125,7 +123,7 @@ impl<'a> Env<'a> {
}
None => Err(RuntimeError::LookupNotInScope(
Loc {
value: ident,
value: Ident::from(ident),
region,
},
self.ident_ids
@ -136,7 +134,7 @@ impl<'a> Env<'a> {
}
} else {
match self.dep_idents.get(&module_id) {
Some(exposed_ids) => match exposed_ids.get_id(&ident) {
Some(exposed_ids) => match exposed_ids.get_id(ident) {
Some(ident_id) => {
let symbol = Symbol::new(module_id, ident_id);
@ -154,7 +152,7 @@ impl<'a> Env<'a> {
.collect();
Err(RuntimeError::ValueNotExposed {
module_name,
ident,
ident: Ident::from(ident),
region,
exposed_values,
})

View File

@ -244,7 +244,7 @@ impl Scope {
// If this IdentId was already added previously
// when the value was exposed in the module header,
// use that existing IdentId. Otherwise, create a fresh one.
let ident_id = match exposed_ident_ids.get_id(&ident) {
let ident_id = match exposed_ident_ids.get_id(ident.as_str()) {
Some(ident_id) => ident_id,
None => all_ident_ids.add_str(ident.as_str()),
};

View File

@ -70,14 +70,13 @@ impl<'a> Env<'a> {
let is_type_name = ident.starts_with(|c: char| c.is_uppercase());
let module_name = ModuleName::from(module_name_str);
let ident = Ident::from(ident);
match self.module_ids.get_id(&module_name) {
Some(&module_id) => {
Some(module_id) => {
// You can do qualified lookups on your own module, e.g.
// if I'm in the Foo module, I can do a `Foo.bar` lookup.
if module_id == self.home {
match scope.locals.ident_ids.get_id(&ident) {
match scope.locals.ident_ids.get_id(ident) {
Some(ident_id) => {
let symbol = Symbol::new(module_id, ident_id);
@ -92,7 +91,7 @@ impl<'a> Env<'a> {
None => {
let error = RuntimeError::LookupNotInScope(
Loc {
value: ident,
value: Ident::from(ident),
region,
},
scope
@ -107,7 +106,7 @@ impl<'a> Env<'a> {
}
} else {
match self.dep_idents.get(&module_id) {
Some(exposed_ids) => match exposed_ids.get_id(&ident) {
Some(exposed_ids) => match exposed_ids.get_id(ident) {
Some(ident_id) => {
let symbol = Symbol::new(module_id, ident_id);
@ -129,7 +128,7 @@ impl<'a> Env<'a> {
.collect();
Err(RuntimeError::ValueNotExposed {
module_name,
ident,
ident: Ident::from(ident),
region,
exposed_values,
})

View File

@ -647,15 +647,15 @@ mod test {
assert_eq!(
&idents,
&[
Ident::from("Box"),
Ident::from("Set"),
Ident::from("Dict"),
Ident::from("Str"),
Ident::from("Ok"),
Ident::from("False"),
Ident::from("List"),
Ident::from("True"),
Ident::from("Str"),
Ident::from("List"),
Ident::from("Ok"),
Ident::from("Err"),
Ident::from("Dict"),
Ident::from("Set"),
Ident::from("Box"),
]
);
}
@ -674,15 +674,15 @@ mod test {
assert_eq!(
&idents,
&[
Ident::from("Box"),
Ident::from("Set"),
Ident::from("Dict"),
Ident::from("Str"),
Ident::from("Ok"),
Ident::from("False"),
Ident::from("List"),
Ident::from("True"),
Ident::from("Str"),
Ident::from("List"),
Ident::from("Ok"),
Ident::from("Err"),
Ident::from("Dict"),
Ident::from("Set"),
Ident::from("Box"),
]
);

View File

@ -272,7 +272,15 @@ impl std::hash::Hash for IdentStr {
impl Clone for IdentStr {
fn clone(&self) -> Self {
Self::from_str(self.as_str())
if self.is_empty() || self.is_small_str() {
// we can just copy the bytes
Self {
elements: self.elements,
length: self.length,
}
} else {
Self::from_str(self.as_str())
}
}
}

View File

@ -158,7 +158,7 @@ fn generate_entry_docs<'a>(
ValueDef::Annotation(loc_pattern, loc_ann) => {
if let Pattern::Identifier(identifier) = loc_pattern.value {
// Check if the definition is exposed
if ident_ids.get_id(&identifier.into()).is_some() {
if ident_ids.get_id(identifier).is_some() {
let name = identifier.to_string();
let doc_def = DocDef {
name,
@ -179,7 +179,7 @@ fn generate_entry_docs<'a>(
} => {
if let Pattern::Identifier(identifier) = ann_pattern.value {
// Check if the definition is exposed
if ident_ids.get_id(&identifier.into()).is_some() {
if ident_ids.get_id(identifier).is_some() {
let doc_def = DocDef {
name: identifier.to_string(),
type_annotation: type_to_docs(false, ann_type.value),

View File

@ -3152,11 +3152,11 @@ fn send_header<'a>(
let ident_ids = ident_ids_by_module.get_or_insert(module_id);
for ident in exposed_idents {
let ident_id = ident_ids.get_or_insert(&ident);
let ident_id = ident_ids.get_or_insert(ident.as_str());
let symbol = Symbol::new(module_id, ident_id);
// Since this value is exposed, add it to our module's default scope.
debug_assert!(!scope.contains_key(&ident.clone()));
debug_assert!(!scope.contains_key(&ident));
scope.insert(ident, (symbol, region));
}
@ -3178,7 +3178,7 @@ fn send_header<'a>(
// For example, if module A has [B.{ foo }], then
// when we get here for B, `foo` will already have
// an IdentId. We must reuse that!
let ident_id = ident_ids.get_or_insert(&loc_exposed.value.as_str().into());
let ident_id = ident_ids.get_or_insert(loc_exposed.value.as_str());
let symbol = Symbol::new(home, ident_id);
exposed.push(symbol);
@ -3361,7 +3361,7 @@ fn send_header_two<'a>(
let ident_ids = ident_ids_by_module.get_or_insert(module_id);
for ident in exposed_idents {
let ident_id = ident_ids.get_or_insert(&ident);
let ident_id = ident_ids.get_or_insert(ident.as_str());
let symbol = Symbol::new(module_id, ident_id);
// Since this value is exposed, add it to our module's default scope.
@ -3385,7 +3385,7 @@ fn send_header_two<'a>(
for entry in requires {
let entry = entry.value;
let ident: Ident = entry.ident.value.into();
let ident_id = ident_ids.get_or_insert(&ident);
let ident_id = ident_ids.get_or_insert(entry.ident.value);
let symbol = Symbol::new(module_id, ident_id);
// Since this value is exposed, add it to our module's default scope.
@ -3398,11 +3398,11 @@ fn send_header_two<'a>(
for entry in requires_types {
let string: &str = entry.value.into();
let ident: Ident = string.into();
let ident_id = ident_ids.get_or_insert(&ident);
let ident_id = ident_ids.get_or_insert(string);
let symbol = Symbol::new(module_id, ident_id);
// Since this value is exposed, add it to our module's default scope.
debug_assert!(!scope.contains_key(&ident.clone()));
debug_assert!(!scope.contains_key(&ident));
scope.insert(ident, (symbol, entry.region));
}
}
@ -3423,7 +3423,7 @@ fn send_header_two<'a>(
// For example, if module A has [B.{ foo }], then
// when we get here for B, `foo` will already have
// an IdentId. We must reuse that!
let ident_id = ident_ids.get_or_insert(&loc_exposed.value.as_str().into());
let ident_id = ident_ids.get_or_insert(loc_exposed.value.as_str());
let symbol = Symbol::new(home, ident_id);
exposed.push(symbol);
@ -3450,8 +3450,7 @@ fn send_header_two<'a>(
let module_name = ModuleNameEnum::PkgConfig;
let main_for_host = {
let ident_str: Ident = provides[0].value.as_str().into();
let ident_id = ident_ids.get_or_insert(&ident_str);
let ident_id = ident_ids.get_or_insert(provides[0].value.as_str());
Symbol::new(home, ident_id)
};

View File

@ -1,10 +1,9 @@
use crate::ident::{Ident, ModuleName};
use crate::module_err::{IdentIdNotFound, ModuleIdNotFound, ModuleResult};
use roc_collections::{default_hasher, MutMap, SendMap, SmallStringInterner, VecMap};
use roc_collections::{SmallStringInterner, VecMap};
use roc_ident::IdentStr;
use roc_region::all::Region;
use snafu::OptionExt;
use std::collections::HashMap;
use std::num::NonZeroU32;
use std::{fmt, u32};
@ -238,12 +237,12 @@ lazy_static! {
/// which displays not only the Module ID, but also the Module Name which
/// corresponds to that ID.
///
static ref DEBUG_MODULE_ID_NAMES: std::sync::Mutex<roc_collections::all::MutMap<u32, Box<str>>> =
static ref DEBUG_MODULE_ID_NAMES: std::sync::Mutex<roc_collections::SmallStringInterner> =
// This stores a u32 key instead of a ModuleId key so that if there's
// a problem with ModuleId's Debug implementation, logging this for diagnostic
// purposes won't recursively trigger ModuleId's Debug instance in the course of printing
// this out.
std::sync::Mutex::new(roc_collections::all::MutMap::default());
std::sync::Mutex::new(roc_collections::SmallStringInterner::with_capacity(10));
}
#[derive(Debug, Default)]
@ -270,7 +269,7 @@ impl Interns {
let ident: Ident = ident.into();
match self.all_ident_ids.get(&module_id) {
Some(ident_ids) => match ident_ids.get_id(&ident) {
Some(ident_ids) => match ident_ids.get_id(ident.as_str()) {
Some(ident_id) => Symbol::new(module_id, ident_id),
None => {
panic!("Interns::symbol could not find ident entry for {:?} for module {:?} in Interns {:?}", ident, module_id, self);
@ -319,12 +318,12 @@ lazy_static! {
/// This is used in Debug builds only, to let us have a Debug instance
/// which displays not only the Module ID, but also the Module Name which
/// corresponds to that ID.
static ref DEBUG_IDENT_IDS_BY_MODULE_ID: std::sync::Mutex<roc_collections::all::MutMap<u32, IdentIds>> =
static ref DEBUG_IDENT_IDS_BY_MODULE_ID: std::sync::Mutex<roc_collections::VecMap<u32, IdentIds>> =
// This stores a u32 key instead of a ModuleId key so that if there's
// a problem with ModuleId's Debug implementation, logging this for diagnostic
// purposes won't recursively trigger ModuleId's Debug instance in the course of printing
// this out.
std::sync::Mutex::new(roc_collections::all::MutMap::default());
std::sync::Mutex::new(roc_collections::VecMap::default());
}
/// A globally unique ID that gets assigned to each module as it is loaded.
@ -386,8 +385,8 @@ impl fmt::Debug for ModuleId {
.expect("Failed to acquire lock for Debug reading from DEBUG_MODULE_ID_NAMES, presumably because a thread panicked.");
if PRETTY_PRINT_DEBUG_SYMBOLS {
match names.get(&(self.to_zero_indexed() as u32)) {
Some(str_ref) => write!(f, "{}", str_ref.clone()),
match names.try_get(self.to_zero_indexed()) {
Some(str_ref) => write!(f, "{}", str_ref),
None => {
panic!(
"Could not find a Debug name for module ID {} in {:?}",
@ -434,60 +433,49 @@ impl<'a, T> PackageQualified<'a, T> {
#[derive(Debug, Clone)]
pub struct PackageModuleIds<'a> {
by_name: MutMap<PQModuleName<'a>, ModuleId>,
by_id: Vec<PQModuleName<'a>>,
}
impl<'a> PackageModuleIds<'a> {
pub fn get_or_insert(&mut self, module_name: &PQModuleName<'a>) -> ModuleId {
match self.by_name.get(module_name) {
Some(id) => *id,
None => {
let by_id = &mut self.by_id;
let module_id = ModuleId::from_zero_indexed(by_id.len());
by_id.push(module_name.clone());
self.by_name.insert(module_name.clone(), module_id);
if cfg!(debug_assertions) {
Self::insert_debug_name(module_id, module_name);
}
module_id
}
if let Some(module_id) = self.get_id(module_name) {
return module_id;
}
// didn't find it, so we'll add it
let module_id = ModuleId::from_zero_indexed(self.by_id.len());
self.by_id.push(module_name.clone());
if cfg!(debug_assertions) {
Self::insert_debug_name(module_id, module_name);
}
module_id
}
pub fn into_module_ids(self) -> ModuleIds {
let by_name: MutMap<ModuleName, ModuleId> = self
.by_name
.into_iter()
.map(|(pqname, module_id)| (pqname.as_inner().clone(), module_id))
.collect();
let by_id: Vec<ModuleName> = self
.by_id
.into_iter()
.map(|pqname| pqname.as_inner().clone())
.collect();
ModuleIds { by_name, by_id }
ModuleIds { by_id }
}
#[cfg(debug_assertions)]
fn insert_debug_name(module_id: ModuleId, module_name: &PQModuleName) {
let mut names = DEBUG_MODULE_ID_NAMES.lock().expect("Failed to acquire lock for Debug interning into DEBUG_MODULE_ID_NAMES, presumably because a thread panicked.");
names
.entry(module_id.to_zero_indexed() as u32)
.or_insert_with(|| match module_name {
PQModuleName::Unqualified(module) => module.as_str().into(),
PQModuleName::Qualified(package, module) => {
let name = format!("{}.{}", package, module.as_str()).into();
name
if names.try_get(module_id.to_zero_indexed()).is_none() {
match module_name {
PQModuleName::Unqualified(module) => {
names.insert(module.as_str());
}
});
PQModuleName::Qualified(package, module) => {
names.insert(&format!("{}.{}", package, module.as_str()));
}
}
}
}
#[cfg(not(debug_assertions))]
@ -495,8 +483,14 @@ impl<'a> PackageModuleIds<'a> {
// By design, this is a no-op in release builds!
}
pub fn get_id(&self, module_name: &PQModuleName<'a>) -> Option<&ModuleId> {
self.by_name.get(module_name)
pub fn get_id(&self, module_name: &PQModuleName<'a>) -> Option<ModuleId> {
for (index, name) in self.by_id.iter().enumerate() {
if name == module_name {
return Some(ModuleId::from_zero_indexed(index));
}
}
None
}
pub fn get_name(&self, id: ModuleId) -> Option<&PQModuleName> {
@ -509,35 +503,26 @@ impl<'a> PackageModuleIds<'a> {
}
/// Stores a mapping between ModuleId and InlinableString.
///
/// Each module name is stored twice, for faster lookups.
/// Since these are interned strings, this shouldn't result in many total allocations in practice.
#[derive(Debug, Clone)]
pub struct ModuleIds {
by_name: MutMap<ModuleName, ModuleId>,
/// Each ModuleId is an index into this Vec
by_id: Vec<ModuleName>,
}
impl ModuleIds {
pub fn get_or_insert(&mut self, module_name: &ModuleName) -> ModuleId {
match self.by_name.get(module_name) {
Some(id) => *id,
None => {
let by_id = &mut self.by_id;
let module_id = ModuleId::from_zero_indexed(by_id.len());
by_id.push(module_name.clone());
self.by_name.insert(module_name.clone(), module_id);
if cfg!(debug_assertions) {
Self::insert_debug_name(module_id, module_name);
}
module_id
}
if let Some(module_id) = self.get_id(module_name) {
return module_id;
}
// didn't find it, so we'll add it
let module_id = ModuleId::from_zero_indexed(self.by_id.len());
self.by_id.push(module_name.clone());
if cfg!(debug_assertions) {
Self::insert_debug_name(module_id, module_name);
}
module_id
}
#[cfg(debug_assertions)]
@ -545,9 +530,9 @@ impl ModuleIds {
let mut names = DEBUG_MODULE_ID_NAMES.lock().expect("Failed to acquire lock for Debug interning into DEBUG_MODULE_ID_NAMES, presumably because a thread panicked.");
// TODO make sure modules are never added more than once!
names
.entry(module_id.to_zero_indexed() as u32)
.or_insert_with(|| module_name.as_str().to_string().into());
if names.try_get(module_id.to_zero_indexed()).is_none() {
names.insert(module_name.as_str());
}
}
#[cfg(not(debug_assertions))]
@ -555,8 +540,15 @@ impl ModuleIds {
// By design, this is a no-op in release builds!
}
pub fn get_id(&self, module_name: &ModuleName) -> Option<&ModuleId> {
self.by_name.get(module_name)
#[inline]
pub fn get_id(&self, module_name: &ModuleName) -> Option<ModuleId> {
for (index, name) in self.by_id.iter().enumerate() {
if name == module_name {
return Some(ModuleId::from_zero_indexed(index));
}
}
None
}
pub fn get_name(&self, id: ModuleId) -> Option<&ModuleName> {
@ -597,10 +589,6 @@ impl IdentIds {
.map(|(index, ident)| (IdentId(index as u32), ident))
}
pub fn add_ident(&mut self, ident_name: &Ident) -> IdentId {
self.add_str(ident_name.as_str())
}
pub fn add_str(&mut self, ident_name: &str) -> IdentId {
IdentId(self.interner.insert(ident_name) as u32)
}
@ -609,10 +597,10 @@ impl IdentIds {
IdentId(self.interner.duplicate(ident_id.0 as usize) as u32)
}
pub fn get_or_insert(&mut self, name: &Ident) -> IdentId {
pub fn get_or_insert(&mut self, name: &str) -> IdentId {
match self.get_id(name) {
Some(id) => id,
None => self.add_str(name.as_str()),
None => self.add_str(name),
}
}
@ -637,9 +625,9 @@ impl IdentIds {
}
#[inline(always)]
pub fn get_id(&self, ident_name: &Ident) -> Option<IdentId> {
pub fn get_id(&self, ident_name: &str) -> Option<IdentId> {
self.interner
.find_index(ident_name.as_str())
.find_index(ident_name)
.map(|i| IdentId(i as u32))
}
@ -867,7 +855,6 @@ macro_rules! define_builtins {
// +1 because the user will be compiling at least 1 non-builtin module!
let capacity = $total + 1;
let mut by_name = HashMap::with_capacity_and_hasher(capacity, default_hasher());
let mut by_id = Vec::with_capacity(capacity);
let mut insert_both = |id: ModuleId, name_str: &'static str| {
@ -877,7 +864,6 @@ macro_rules! define_builtins {
Self::insert_debug_name(id, &name);
}
by_name.insert(name.clone(), id);
by_id.push(name);
};
@ -885,7 +871,7 @@ macro_rules! define_builtins {
insert_both(ModuleId::$module_const, $module_name);
)+
ModuleIds { by_name, by_id }
ModuleIds { by_id }
}
}
@ -894,7 +880,6 @@ macro_rules! define_builtins {
// +1 because the user will be compiling at least 1 non-builtin module!
let capacity = $total + 1;
let mut by_name = HashMap::with_capacity_and_hasher(capacity, default_hasher());
let mut by_id = Vec::with_capacity(capacity);
let mut insert_both = |id: ModuleId, name_str: &'static str| {
@ -905,7 +890,6 @@ macro_rules! define_builtins {
Self::insert_debug_name(id, &name);
}
by_name.insert(name.clone(), id);
by_id.push(name);
};
@ -913,7 +897,7 @@ macro_rules! define_builtins {
insert_both(ModuleId::$module_const, $module_name);
)+
PackageModuleIds { by_name, by_id }
PackageModuleIds { by_id }
}
}
@ -928,8 +912,8 @@ macro_rules! define_builtins {
/// and what symbols they should resolve to.
///
/// This is for type aliases like `Int` and `Str` and such.
pub fn default_in_scope() -> SendMap<Ident, (Symbol, Region)> {
let mut scope = SendMap::default();
pub fn default_in_scope() -> VecMap<Ident, (Symbol, Region)> {
let mut scope = VecMap::default();
$(
$(

View File

@ -756,7 +756,7 @@ fn doc_url<'a>(
}
} else {
match interns.module_ids.get_id(&module_name.into()) {
Some(&module_id) => {
Some(module_id) => {
// You can do qualified lookups on your own module, e.g.
// if I'm in the Foo module, I can do a `Foo.bar` lookup.
if module_id == home {
@ -772,7 +772,7 @@ fn doc_url<'a>(
// This is not the home module
match dep_idents
.get(&module_id)
.and_then(|exposed_ids| exposed_ids.get_id(&ident.into()))
.and_then(|exposed_ids| exposed_ids.get_id(ident))
{
Some(_) => {
// This is a valid symbol for this dependency,

7
roc_std/Cargo.lock generated
View File

@ -198,8 +198,15 @@ dependencies = [
"pretty_assertions",
"quickcheck",
"quickcheck_macros",
"static_assertions",
]
[[package]]
name = "static_assertions"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f406d6ee68db6796e11ffd7b4d171864c58b7451e79ef9460ea33c287a1f89a7"
[[package]]
name = "syn"
version = "1.0.86"