Merge pull request #6932 from roc-lang/blitsort

Fast Builtin Sorting
This commit is contained in:
Brendan Hansknecht 2024-07-29 09:22:35 -07:00 committed by GitHub
commit 5a933b0582
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 4087 additions and 63 deletions

1
.gitignore vendored
View File

@ -30,6 +30,7 @@ zig-cache
.envrc
*.rs.bk
*.o
*.a
*.so
*.so.*
*.obj

View File

@ -0,0 +1 @@
.fuzz_data

View File

@ -0,0 +1,42 @@
#!/usr/bin/env bash
set -euxo pipefail
# Run from this directory.
SCRIPT_RELATIVE_DIR=`dirname "${BASH_SOURCE[0]}"`
cd $SCRIPT_RELATIVE_DIR
# First compile the fuzz target.
zig build-lib -static -fcompiler-rt -flto -fPIC src/fuzz_sort.zig
afl-clang-lto -o fuzz libfuzz_sort.a
AFL_LLVM_CMPLOG=1 afl-clang-lto -o fuzz-cmplog libfuzz_sort.a
AFL_LLVM_LAF_ALL=1 afl-clang-lto -o fuzz-cmpcov libfuzz_sort.a
# Setup fuzz directory with dummy input.
INPUT_DIR='.fuzz_data/input'
OUTPUT_DIR='.fuzz_data/output'
if [ ! -d .fuzz_data ]; then
mkdir -p $INPUT_DIR
echo '1234567887654321' > $INPUT_DIR/dummy_input
else
# Resuming from existing run.
INPUT_DIR='-'
fi
# Just hardcoding to 7 fuzzers (this avoids overwhelming 8 core machines).
BASE_CMD="AFL_TESTCACHE_SIZE=250 AFL_IMPORT_FIRST=1 afl-fuzz -i $INPUT_DIR -o $OUTPUT_DIR"
# I'm trying to follow the guide around secondary fuzzers, but I don't quite follow the wording.
# So I feel this may be correct, but it may also be more random then they expect.
# Overkill anyway...so this is fine.
tmux new-session -d -s "fuzz" "AFL_FINAL_SYNC=1 $BASE_CMD -M fuzzer01 ./fuzz"
tmux split-window -h "$BASE_CMD -S fuzzer02 -c ./fuzz-cmplog -m none -l 2AT -p explore ./fuzz"
tmux split-window -v -t 0.0 "$BASE_CMD -S fuzzer03 -c ./fuzz-cmplog -m none -L 0 -p exploit ./fuzz"
tmux split-window -v -t 0.2 "$BASE_CMD -S fuzzer04 -p explore ./fuzz-cmpcov"
tmux new-window "$BASE_CMD -S fuzzer05 -Z -p coe ./fuzz-cmpcov"
tmux split-window -h "$BASE_CMD -S fuzzer06 -P exploit ./fuzz"
tmux split-window -v -t 1.0 "AFL_DISABLE_TRIM=1 $BASE_CMD -S fuzzer07 -p explore ./fuzz"
tmux split-window -v -t 1.2 "htop"
tmux new-window "watch -c -n 30 afl-whatsup -s .fuzz_data/output"
tmux select-window -t 1
tmux select-window -t 0
tmux -2 a -t "fuzz"

View File

@ -0,0 +1,101 @@
const std = @import("std");
const sort = @import("sort.zig");
extern fn malloc(size: usize) callconv(.C) ?*anyopaque;
extern fn free(c_ptr: *anyopaque) callconv(.C) void;
fn cMain() callconv(.C) i32 {
fuzz_main() catch unreachable;
return 0;
}
comptime {
@export(cMain, .{ .name = "main", .linkage = .Strong });
}
const DEBUG = false;
var allocator: std.mem.Allocator = undefined;
pub fn fuzz_main() !void {
// Setup an allocator that will detect leaks/use-after-free/etc
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
// this will check for leaks and crash the program if it finds any
defer std.debug.assert(gpa.deinit() == .ok);
allocator = gpa.allocator();
// Read the data from stdin
const stdin = std.io.getStdIn();
const data = try stdin.readToEndAlloc(allocator, std.math.maxInt(usize));
defer allocator.free(data);
const len = data.len / @sizeOf(i64);
const arr_ptr: [*]i64 = @alignCast(@ptrCast(data.ptr));
if (DEBUG) {
std.debug.print("Input: [{d}]{d}\n", .{ len, arr_ptr[0..len] });
}
var test_count: i64 = 0;
sort.fluxsort(@ptrCast(arr_ptr), len, &test_i64_compare_refcounted, @ptrCast(&test_count), true, &test_inc_n_data, @sizeOf(i64), @alignOf(i64), &test_i64_copy);
const sorted = std.sort.isSorted(i64, arr_ptr[0..len], {}, std.sort.asc(i64));
if (DEBUG) {
std.debug.print("Output: [{d}]{d}\nSorted: {}\nFinal RC: {}\n", .{ len, arr_ptr[0..len], sorted, test_count });
}
std.debug.assert(sorted);
std.debug.assert(test_count == 0);
}
const Opaque = ?[*]u8;
fn test_i64_compare_refcounted(count_ptr: Opaque, a_ptr: Opaque, b_ptr: Opaque) callconv(.C) u8 {
const a = @as(*i64, @alignCast(@ptrCast(a_ptr))).*;
const b = @as(*i64, @alignCast(@ptrCast(b_ptr))).*;
const gt = @as(u8, @intFromBool(a > b));
const lt = @as(u8, @intFromBool(a < b));
std.debug.assert(@as(*isize, @ptrCast(@alignCast(count_ptr))).* > 0);
@as(*isize, @ptrCast(@alignCast(count_ptr))).* -= 1;
// Eq = 0
// GT = 1
// LT = 2
return lt + lt + gt;
}
fn test_i64_copy(dst_ptr: Opaque, src_ptr: Opaque) callconv(.C) void {
@as(*i64, @alignCast(@ptrCast(dst_ptr))).* = @as(*i64, @alignCast(@ptrCast(src_ptr))).*;
}
fn test_inc_n_data(count_ptr: Opaque, n: usize) callconv(.C) void {
@as(*isize, @ptrCast(@alignCast(count_ptr))).* += @intCast(n);
}
comptime {
@export(testing_roc_alloc, .{ .name = "roc_alloc", .linkage = .Strong });
@export(testing_roc_dealloc, .{ .name = "roc_dealloc", .linkage = .Strong });
@export(testing_roc_panic, .{ .name = "roc_panic", .linkage = .Strong });
}
fn testing_roc_alloc(size: usize, _: u32) callconv(.C) ?*anyopaque {
// We store an extra usize which is the size of the full allocation.
const full_size = size + @sizeOf(usize);
var raw_ptr = (allocator.alloc(u8, full_size) catch unreachable).ptr;
@as([*]usize, @alignCast(@ptrCast(raw_ptr)))[0] = full_size;
raw_ptr += @sizeOf(usize);
return @as(?*anyopaque, @ptrCast(raw_ptr));
}
fn testing_roc_dealloc(c_ptr: *anyopaque, _: u32) callconv(.C) void {
const raw_ptr = @as([*]u8, @ptrCast(c_ptr)) - @sizeOf(usize);
const full_size = @as([*]usize, @alignCast(@ptrCast(raw_ptr)))[0];
const slice = raw_ptr[0..full_size];
allocator.free(slice);
}
fn testing_roc_panic(c_ptr: *anyopaque, tag_id: u32) callconv(.C) void {
_ = c_ptr;
_ = tag_id;
@panic("Roc panicked");
}

View File

@ -1,6 +1,7 @@
const std = @import("std");
const utils = @import("utils.zig");
const str = @import("str.zig");
const sort = @import("sort.zig");
const UpdateMode = utils.UpdateMode;
const mem = std.mem;
const math = std.math;
@ -690,60 +691,10 @@ pub fn listDropAt(
}
}
fn partition(
source_ptr: [*]u8,
transform: Opaque,
wrapper: CompareFn,
element_width: usize,
low: isize,
high: isize,
copy: CopyFn,
) isize {
const pivot = source_ptr + (@as(usize, @intCast(high)) * element_width);
var i = (low - 1); // Index of smaller element and indicates the right position of pivot found so far
var j = low;
while (j <= high - 1) : (j += 1) {
const current_elem = source_ptr + (@as(usize, @intCast(j)) * element_width);
const ordering = wrapper(transform, current_elem, pivot);
const order = @as(utils.Ordering, @enumFromInt(ordering));
switch (order) {
utils.Ordering.LT => {
// the current element is smaller than the pivot; swap it
i += 1;
swapElements(source_ptr, element_width, @as(usize, @intCast(i)), @as(usize, @intCast(j)), copy);
},
utils.Ordering.EQ, utils.Ordering.GT => {},
}
}
swapElements(source_ptr, element_width, @as(usize, @intCast(i + 1)), @as(usize, @intCast(high)), copy);
return (i + 1);
}
fn quicksort(
source_ptr: [*]u8,
transform: Opaque,
wrapper: CompareFn,
element_width: usize,
low: isize,
high: isize,
copy: CopyFn,
) void {
if (low < high) {
// partition index
const pi = partition(source_ptr, transform, wrapper, element_width, low, high, copy);
_ = quicksort(source_ptr, transform, wrapper, element_width, low, pi - 1, copy); // before pi
_ = quicksort(source_ptr, transform, wrapper, element_width, pi + 1, high, copy); // after pi
}
}
pub fn listSortWith(
input: RocList,
caller: CompareFn,
data: Opaque,
cmp: CompareFn,
cmp_data: Opaque,
inc_n_data: IncN,
data_is_owned: bool,
alignment: u32,
@ -753,16 +704,13 @@ pub fn listSortWith(
dec: Dec,
copy: CopyFn,
) callconv(.C) RocList {
if (input.len() < 2) {
return input;
}
var list = input.makeUnique(alignment, element_width, elements_refcounted, inc, dec);
if (data_is_owned) {
inc_n_data(data, list.len());
}
if (list.bytes) |source_ptr| {
const low = 0;
const high: isize = @as(isize, @intCast(list.len())) - 1;
quicksort(source_ptr, data, caller, element_width, low, high, copy);
sort.fluxsort(source_ptr, list.len(), cmp, cmp_data, data_is_owned, inc_n_data, element_width, alignment, copy);
}
return list;

View File

@ -394,12 +394,14 @@ fn exportUtilsFn(comptime func: anytype, comptime func_name: []const u8) void {
// Custom panic function, as builtin Zig version errors during LLVM verification
pub fn panic(message: []const u8, stacktrace: ?*std.builtin.StackTrace, _: ?usize) noreturn {
if (builtin.is_test) {
std.debug.print("{s}: {?}", .{ message, stacktrace });
}
if (builtin.target.cpu.arch != .wasm32) {
std.debug.print("\nSomehow in unreachable zig panic!\nThis is a roc standard libarry bug\n{s}: {?}", .{ message, stacktrace });
std.process.abort();
} else {
// Can't call abort or print from wasm. Just leave it as unreachable.
unreachable;
}
}
// Run all tests in imported modules
// https://github.com/ziglang/zig/blob/master/lib/std/std.zig#L94

File diff suppressed because it is too large Load Diff