mirror of
https://github.com/rui314/mold.git
synced 2024-10-26 13:10:46 +03:00
Compare commits
5 Commits
2dd7d9bdb9
...
cb9d4e4058
Author | SHA1 | Date | |
---|---|---|---|
|
cb9d4e4058 | ||
|
3f593c62ae | ||
|
d6d0c9178f | ||
|
93096442f3 | ||
|
e63e3a6cf4 |
@ -13,7 +13,7 @@ you need to understand to read mold code.
|
||||
A .so file. Short for Dynamic Shared Object. Often called as a
|
||||
shared library, a dynamic libray or a shared object as well.
|
||||
|
||||
An DSO contains common functions and data that are used by multiple
|
||||
A DSO contains common functions and data that are used by multiple
|
||||
executables and/or other DSOs. At runtime, a DSO is loaded to a
|
||||
contiguous region in the virtual address.
|
||||
|
||||
@ -24,25 +24,25 @@ cannot be executed because it's not self-contained. For example,
|
||||
if you compile a C source file containing a call of `printf`,
|
||||
the actual function code of `printf` is not included in the resulting
|
||||
object file. You include `stdio.h`, but that teaches the compiler
|
||||
only about `printf`'s type, and the compiler still don't know what
|
||||
only about `printf`'s type, and the compiler still doesn't know what
|
||||
`printf` actually does. Therefore, it cannot emit code for `printf`.
|
||||
|
||||
You need to link an object file with other object file or a shared
|
||||
library to make it exectuable.
|
||||
library to make it executable.
|
||||
|
||||
## Virtual address space
|
||||
|
||||
A pointer has a value like 0x803020 which is an address of the
|
||||
pointee. But it doesn't mean that the pointee resides at the
|
||||
physical memory address 0x803020 on the computer. Modern CPUs
|
||||
contains so-called Mmeory Management Unit (MMU), and all access to
|
||||
contains so-called Memory Management Unit (MMU), and all access to
|
||||
the memory are first translated by MMU to the physical address.
|
||||
The address before translation is called the "virtual address".
|
||||
Unless you are doing the kernel programming, all addresses you
|
||||
handle are virtual addresses.
|
||||
|
||||
The OS kernel controls the MMU so that each process owns the entire
|
||||
virtual address space. So, even if two process uses the same virtual
|
||||
virtual address space. So, even if two processes use the same virtual
|
||||
address, they don't conflict. They are mapped to different physical
|
||||
addresses.
|
||||
|
||||
@ -70,16 +70,16 @@ example, if you compile a function which calls a non-local function
|
||||
```
|
||||
|
||||
The above `callq` is the instruction to call a function at the
|
||||
machine code level. It's opcode is `0xe8` in x86-64, so the
|
||||
machine code level. Its opcode is `0xe8` in x86-64, so the
|
||||
instruction begins with `0xe8`. The following four bytes are
|
||||
displacement; that is, the address of the branch target relative to
|
||||
the end of this `callq` instruction. Notice that the displacement is
|
||||
0. The compiler couldn't fill the displacement because it has no
|
||||
idea as to where `foo` will be at runtime. So, the compiler write 0
|
||||
as a placeholder and instead write a relocation `R_X86_64_PLT32`
|
||||
idea as to where `foo` will be at runtime. So, the compiler writes 0
|
||||
as a placeholder and instead writes a relocation `R_X86_64_PLT32`
|
||||
with `foo` as its associated symbol. The linker reads this
|
||||
relocation, computes the offsets between this call instruction and
|
||||
function `foo` and overwrite the placeholder value 0 with an actual
|
||||
function `foo` and overwrites the placeholder value 0 with an actual
|
||||
displacement.
|
||||
|
||||
There are many different types of relocations. For example, if you
|
||||
@ -139,7 +139,7 @@ identify a function or a data in C++, because for example `foo` may
|
||||
be in a namespace or defined as a static member in some class. If
|
||||
`foo` is an overloaded function, we need to distinguish different
|
||||
`foo`s by its type. Therefore, C++ compiler mangles an identifier by
|
||||
appending nmaepsace names, type information and such so that
|
||||
appending namespace names, type information and such so that
|
||||
different things get different names.
|
||||
|
||||
For example, a function `int foo(int)` in a namespace `bar` is
|
||||
|
@ -10,14 +10,8 @@ enum class FileType {
|
||||
EMPTY,
|
||||
ELF_OBJ,
|
||||
ELF_DSO,
|
||||
MACH_OBJ,
|
||||
MACH_EXE,
|
||||
MACH_DYLIB,
|
||||
MACH_BUNDLE,
|
||||
MACH_UNIVERSAL,
|
||||
AR,
|
||||
THIN_AR,
|
||||
TAPI,
|
||||
TEXT,
|
||||
GCC_LTO_OBJ,
|
||||
LLVM_BITCODE,
|
||||
@ -133,28 +127,10 @@ FileType get_file_type(Context &ctx, MappedFile *mf) {
|
||||
return FileType::UNKNOWN;
|
||||
}
|
||||
|
||||
if (data.starts_with("\xcf\xfa\xed\xfe")) {
|
||||
switch (*(ul32 *)(data.data() + 12)) {
|
||||
case 1: // MH_OBJECT
|
||||
return FileType::MACH_OBJ;
|
||||
case 2: // MH_EXECUTE
|
||||
return FileType::MACH_EXE;
|
||||
case 6: // MH_DYLIB
|
||||
return FileType::MACH_DYLIB;
|
||||
case 8: // MH_BUNDLE
|
||||
return FileType::MACH_BUNDLE;
|
||||
}
|
||||
return FileType::UNKNOWN;
|
||||
}
|
||||
|
||||
if (data.starts_with("!<arch>\n"))
|
||||
return FileType::AR;
|
||||
if (data.starts_with("!<thin>\n"))
|
||||
return FileType::THIN_AR;
|
||||
if (data.starts_with("--- !tapi-tbd"))
|
||||
return FileType::TAPI;
|
||||
if (data.starts_with("\xca\xfe\xba\xbe"))
|
||||
return FileType::MACH_UNIVERSAL;
|
||||
if (is_text_file(mf))
|
||||
return FileType::TEXT;
|
||||
if (data.starts_with("\xde\xc0\x17\x0b"))
|
||||
@ -170,14 +146,8 @@ inline std::string filetype_to_string(FileType type) {
|
||||
case FileType::EMPTY: return "EMPTY";
|
||||
case FileType::ELF_OBJ: return "ELF_OBJ";
|
||||
case FileType::ELF_DSO: return "ELF_DSO";
|
||||
case FileType::MACH_EXE: return "MACH_EXE";
|
||||
case FileType::MACH_OBJ: return "MACH_OBJ";
|
||||
case FileType::MACH_DYLIB: return "MACH_DYLIB";
|
||||
case FileType::MACH_BUNDLE: return "MACH_BUNDLE";
|
||||
case FileType::MACH_UNIVERSAL: return "MACH_UNIVERSAL";
|
||||
case FileType::AR: return "AR";
|
||||
case FileType::THIN_AR: return "THIN_AR";
|
||||
case FileType::TAPI: return "TAPI";
|
||||
case FileType::TEXT: return "TEXT";
|
||||
case FileType::GCC_LTO_OBJ: return "GCC_LTO_OBJ";
|
||||
case FileType::LLVM_BITCODE: return "LLVM_BITCODE";
|
||||
|
Loading…
Reference in New Issue
Block a user