Split metadata by format

This does not change the metadata to keep old packages working.
Instead, for now, just appended `_` to unused fields.
This commit is contained in:
Brendan Hansknecht 2023-04-27 07:38:12 -07:00
parent ae04a73d2f
commit 506c1251f4
No known key found for this signature in database
GPG Key ID: 0EA784685083E75B
4 changed files with 172 additions and 113 deletions

View File

@ -1,3 +1,4 @@
use bincode::{deserialize_from, serialize_into};
use iced_x86::{Decoder, DecoderOptions, Instruction, OpCodeOperandKind, OpKind};
use memmap2::MmapMut;
use object::{elf, endian};
@ -8,14 +9,14 @@ use object::{
};
use roc_collections::all::MutMap;
use roc_error_macros::{internal_error, user_error};
use std::convert::TryFrom;
use std::ffi::CStr;
use std::mem;
use std::os::raw::c_char;
use std::path::Path;
use std::time::{Duration, Instant};
use crate::metadata::{self, Metadata, VirtualOffset};
use serde::{Deserialize, Serialize};
use std::{
ffi::{c_char, CStr},
io::{BufReader, BufWriter},
mem,
path::Path,
time::{Duration, Instant},
};
use crate::{
align_by_constraint, align_to_offset_by_constraint, load_struct_inplace,
@ -35,6 +36,73 @@ struct ElfDynamicDeps {
shared_lib_index: usize,
}
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug)]
enum VirtualOffset {
Absolute,
Relative(u64),
}
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug)]
struct SurgeryEntry {
file_offset: u64,
virtual_offset: VirtualOffset,
size: u8,
}
// TODO: Reanalyze each piece of data in this struct.
// I think a number of them can be combined to reduce string duplication.
// Also I think a few of them aren't need.
// For example, I think preprocessing can deal with all shifting and remove the need for added_byte_count.
// TODO: we probably should be storing numbers in an endian neutral way.
#[derive(Default, Serialize, Deserialize, PartialEq, Eq, Debug)]
struct Metadata {
app_functions: Vec<String>,
// offset followed by address.
plt_addresses: MutMap<String, (u64, u64)>,
surgeries: MutMap<String, Vec<SurgeryEntry>>,
dynamic_symbol_indices: MutMap<String, u64>,
static_symbol_indices: MutMap<String, u64>,
roc_symbol_vaddresses: MutMap<String, u64>,
exec_len: u64,
load_align_constraint: u64,
added_byte_count: u64,
last_vaddr: u64,
dynamic_section_offset: u64,
dynamic_symbol_table_section_offset: u64,
symbol_table_section_offset: u64,
symbol_table_size: u64,
_macho_cmd_loc: u64,
}
impl Metadata {
fn write_to_file(&self, metadata_filename: &Path) {
let metadata_file =
std::fs::File::create(metadata_filename).unwrap_or_else(|e| internal_error!("{}", e));
serialize_into(BufWriter::new(metadata_file), self)
.unwrap_or_else(|err| internal_error!("Failed to serialize metadata: {err}"));
}
fn read_from_file(metadata_filename: &Path) -> Self {
let input = std::fs::File::open(metadata_filename).unwrap_or_else(|e| {
internal_error!(
r#"
Error:
{}\n"#,
e
)
});
match deserialize_from(BufReader::new(input)) {
Ok(data) => data,
Err(err) => {
internal_error!("Failed to deserialize metadata: {}", err);
}
}
}
}
fn report_timing(label: &str, duration: Duration) {
println!("\t{:9.3} ms {}", duration.as_secs_f64() * 1000.0, label,);
}
@ -95,7 +163,7 @@ fn collect_roc_definitions<'a>(object: &object::File<'a, &'a [u8]>) -> MutMap<St
}
struct Surgeries<'a> {
surgeries: MutMap<String, Vec<metadata::SurgeryEntry>>,
surgeries: MutMap<String, Vec<SurgeryEntry>>,
app_func_addresses: MutMap<u64, &'a str>,
indirect_warning_given: bool,
}
@ -229,7 +297,7 @@ impl<'a> Surgeries<'a> {
self.surgeries
.get_mut(*func_name)
.unwrap()
.push(metadata::SurgeryEntry {
.push(SurgeryEntry {
file_offset: offset,
virtual_offset: VirtualOffset::Relative(inst.next_ip()),
size: op_size,
@ -265,7 +333,7 @@ impl<'a> Surgeries<'a> {
}
}
/// Constructs a `metadata::Metadata` from a host executable binary, and writes it to disk
/// Constructs a `Metadata` from a host executable binary, and writes it to disk
pub(crate) fn preprocess_elf(
endianness: target_lexicon::Endianness,
host_exe_path: &Path,
@ -285,7 +353,7 @@ pub(crate) fn preprocess_elf(
}
};
let mut md = metadata::Metadata {
let mut md = Metadata {
roc_symbol_vaddresses: collect_roc_definitions(&exec_obj),
..Default::default()
};
@ -507,7 +575,7 @@ pub(crate) fn preprocess_elf(
#[allow(clippy::too_many_arguments)]
fn gen_elf_le(
exec_data: &[u8],
md: &mut metadata::Metadata,
md: &mut Metadata,
preprocessed_path: &Path,
got_app_syms: &[(String, usize)],
got_sections: &[(usize, usize)],
@ -695,14 +763,11 @@ fn gen_elf_le(
+ i * mem::size_of::<elf::Rela64<LE>>()
// This 16 skips the first 2 fields and gets to the addend field.
+ 16;
md.surgeries
.get_mut(name)
.unwrap()
.push(metadata::SurgeryEntry {
file_offset: addend_addr as u64,
virtual_offset: VirtualOffset::Absolute,
size: 8,
});
md.surgeries.get_mut(name).unwrap().push(SurgeryEntry {
file_offset: addend_addr as u64,
virtual_offset: VirtualOffset::Absolute,
size: 8,
});
}
}
}
@ -900,7 +965,7 @@ fn gen_elf_le(
fn scan_elf_dynamic_deps(
exec_obj: &object::File,
md: &mut metadata::Metadata,
md: &mut Metadata,
app_syms: &[Symbol],
shared_lib: &Path,
exec_data: &[u8],
@ -1175,7 +1240,7 @@ pub(crate) fn surgery_elf(
fn surgery_elf_help(
verbose: bool,
md: &metadata::Metadata,
md: &Metadata,
exec_mmap: &mut MmapMut,
offset_ref: &mut usize, // TODO return this instead of taking a mutable reference to it
app_obj: object::File,

View File

@ -21,7 +21,6 @@ mod macho;
mod pe;
mod generate_dylib;
mod metadata;
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum LinkType {
@ -409,7 +408,7 @@ pub fn preprocess_host(
)
}
/// Constructs a `metadata::Metadata` from a host executable binary, and writes it to disk
/// Constructs a `Metadata` from a host executable binary, and writes it to disk
#[allow(clippy::too_many_arguments)]
fn preprocess(
target: &Triple,

View File

@ -1,4 +1,4 @@
use crate::metadata::{self, Metadata, VirtualOffset};
use bincode::{deserialize_from, serialize_into};
use iced_x86::{Decoder, DecoderOptions, Instruction, OpCodeOperandKind, OpKind};
use memmap2::MmapMut;
use object::macho;
@ -9,10 +9,14 @@ use object::{
};
use roc_collections::all::MutMap;
use roc_error_macros::internal_error;
use std::ffi::CStr;
use std::mem;
use std::path::Path;
use std::time::{Duration, Instant};
use serde::{Deserialize, Serialize};
use std::{
ffi::CStr,
io::{BufReader, BufWriter},
mem,
path::Path,
time::{Duration, Instant},
};
use target_lexicon::Triple;
use crate::{
@ -34,6 +38,73 @@ const STUB_ADDRESS_OFFSET: u64 = 0x06;
// shared_lib_index: usize,
// }
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug)]
enum VirtualOffset {
Absolute,
Relative(u64),
}
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug)]
struct SurgeryEntry {
file_offset: u64,
virtual_offset: VirtualOffset,
size: u8,
}
// TODO: Reanalyze each piece of data in this struct.
// I think a number of them can be combined to reduce string duplication.
// Also I think a few of them aren't need.
// For example, I think preprocessing can deal with all shifting and remove the need for added_byte_count.
// TODO: we probably should be storing numbers in an endian neutral way.
#[derive(Default, Serialize, Deserialize, PartialEq, Eq, Debug)]
struct Metadata {
app_functions: Vec<String>,
// offset followed by address.
plt_addresses: MutMap<String, (u64, u64)>,
surgeries: MutMap<String, Vec<SurgeryEntry>>,
dynamic_symbol_indices: MutMap<String, u64>,
_static_symbol_indices: MutMap<String, u64>,
roc_symbol_vaddresses: MutMap<String, u64>,
exec_len: u64,
load_align_constraint: u64,
added_byte_count: u64,
last_vaddr: u64,
_dynamic_section_offset: u64,
_dynamic_symbol_table_section_offset: u64,
_symbol_table_section_offset: u64,
_symbol_table_size: u64,
macho_cmd_loc: u64,
}
impl Metadata {
fn write_to_file(&self, metadata_filename: &Path) {
let metadata_file =
std::fs::File::create(metadata_filename).unwrap_or_else(|e| internal_error!("{}", e));
serialize_into(BufWriter::new(metadata_file), self)
.unwrap_or_else(|err| internal_error!("Failed to serialize metadata: {err}"));
}
fn read_from_file(metadata_filename: &Path) -> Self {
let input = std::fs::File::open(metadata_filename).unwrap_or_else(|e| {
internal_error!(
r#"
Error:
{}\n"#,
e
)
});
match deserialize_from(BufReader::new(input)) {
Ok(data) => data,
Err(err) => {
internal_error!("Failed to deserialize metadata: {}", err);
}
}
}
}
fn report_timing(label: &str, duration: Duration) {
println!("\t{:9.3} ms {}", duration.as_secs_f64() * 1000.0, label,);
}
@ -83,7 +154,7 @@ fn collect_roc_definitions<'a>(object: &object::File<'a, &'a [u8]>) -> MutMap<St
}
struct Surgeries<'a> {
surgeries: MutMap<String, Vec<metadata::SurgeryEntry>>,
surgeries: MutMap<String, Vec<SurgeryEntry>>,
app_func_addresses: MutMap<u64, &'a str>,
indirect_warning_given: bool,
}
@ -217,7 +288,7 @@ impl<'a> Surgeries<'a> {
self.surgeries
.get_mut(*func_name)
.unwrap()
.push(metadata::SurgeryEntry {
.push(SurgeryEntry {
file_offset: offset,
virtual_offset: VirtualOffset::Relative(inst.next_ip()),
size: op_size,
@ -253,7 +324,7 @@ impl<'a> Surgeries<'a> {
}
}
/// Constructs a `metadata::Metadata` from a host executable binary, and writes it to disk
/// Constructs a `Metadata` from a host executable binary, and writes it to disk
pub(crate) fn preprocess_macho(
target: &Triple,
host_exe_path: &Path,
@ -273,7 +344,7 @@ pub(crate) fn preprocess_macho(
}
};
let mut md = metadata::Metadata {
let mut md = Metadata {
roc_symbol_vaddresses: collect_roc_definitions(&exec_obj),
..Default::default()
};
@ -581,7 +652,7 @@ pub(crate) fn preprocess_macho(
fn gen_macho_le(
exec_data: &[u8],
md: &mut metadata::Metadata,
md: &mut Metadata,
out_filename: &Path,
macho_load_so_offset: usize,
_target: &Triple,
@ -1053,7 +1124,7 @@ fn gen_macho_le(
// fn scan_macho_dynamic_deps(
// _exec_obj: &object::File,
// _md: &mut metadata::Metadata,
// _md: &mut Metadata,
// _app_syms: &[Symbol],
// _shared_lib: &str,
// _exec_data: &[u8],
@ -1151,7 +1222,7 @@ fn surgery_macho_help(
_out_filename: &Path,
verbose: bool,
_time: bool,
md: &metadata::Metadata,
md: &Metadata,
exec_mmap: &mut MmapMut,
offset_ref: &mut usize, // TODO return this instead of taking a mutable reference to it
app_obj: object::File,

View File

@ -1,76 +0,0 @@
use std::{
io::{BufReader, BufWriter},
path::Path,
};
use bincode::{deserialize_from, serialize_into};
use roc_collections::all::MutMap;
use roc_error_macros::internal_error;
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug)]
pub enum VirtualOffset {
Absolute,
Relative(u64),
}
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug)]
pub struct SurgeryEntry {
pub file_offset: u64,
pub virtual_offset: VirtualOffset,
pub size: u8,
}
// TODO: Reanalyze each piece of data in this struct.
// I think a number of them can be combined to reduce string duplication.
// Also I think a few of them aren't need.
// For example, I think preprocessing can deal with all shifting and remove the need for added_byte_count.
// TODO: we probably should be storing numbers in an endian neutral way.
#[derive(Default, Serialize, Deserialize, PartialEq, Eq, Debug)]
pub struct Metadata {
pub app_functions: Vec<String>,
// offset followed by address.
pub plt_addresses: MutMap<String, (u64, u64)>,
pub surgeries: MutMap<String, Vec<SurgeryEntry>>,
pub dynamic_symbol_indices: MutMap<String, u64>,
pub static_symbol_indices: MutMap<String, u64>,
pub roc_symbol_vaddresses: MutMap<String, u64>,
pub exec_len: u64,
pub load_align_constraint: u64,
pub added_byte_count: u64,
pub last_vaddr: u64,
pub dynamic_section_offset: u64,
pub dynamic_symbol_table_section_offset: u64,
pub symbol_table_section_offset: u64,
pub symbol_table_size: u64,
pub macho_cmd_loc: u64,
}
impl Metadata {
pub fn write_to_file(&self, metadata_filename: &Path) {
let metadata_file =
std::fs::File::create(metadata_filename).unwrap_or_else(|e| internal_error!("{}", e));
serialize_into(BufWriter::new(metadata_file), self)
.unwrap_or_else(|err| internal_error!("Failed to serialize metadata: {err}"));
}
pub fn read_from_file(metadata_filename: &Path) -> Self {
let input = std::fs::File::open(metadata_filename).unwrap_or_else(|e| {
internal_error!(
r#"
Error:
{}\n"#,
e
)
});
match deserialize_from(BufReader::new(input)) {
Ok(data) => data,
Err(err) => {
internal_error!("Failed to deserialize metadata: {}", err);
}
}
}
}