mirror of
https://github.com/ziglang/zig.git
synced 2024-12-14 23:30:15 +00:00
97643c1ecc
When a unique run is encountered, track it in a bit set memory-mapped into the fuzz directory so it can be observed by other processes, even while the fuzzer is running.
473 lines
17 KiB
Zig
473 lines
17 KiB
Zig
const builtin = @import("builtin");
|
|
const std = @import("std");
|
|
const Allocator = std.mem.Allocator;
|
|
const assert = std.debug.assert;
|
|
const fatal = std.process.fatal;
|
|
|
|
pub const std_options = .{
|
|
.logFn = logOverride,
|
|
};
|
|
|
|
var log_file: ?std.fs.File = null;
|
|
|
|
fn logOverride(
|
|
comptime level: std.log.Level,
|
|
comptime scope: @TypeOf(.EnumLiteral),
|
|
comptime format: []const u8,
|
|
args: anytype,
|
|
) void {
|
|
if (builtin.mode != .Debug) return;
|
|
const f = if (log_file) |f| f else f: {
|
|
const f = fuzzer.dir.createFile("libfuzzer.log", .{}) catch @panic("failed to open fuzzer log file");
|
|
log_file = f;
|
|
break :f f;
|
|
};
|
|
const prefix1 = comptime level.asText();
|
|
const prefix2 = if (scope == .default) ": " else "(" ++ @tagName(scope) ++ "): ";
|
|
f.writer().print(prefix1 ++ prefix2 ++ format ++ "\n", args) catch @panic("failed to write to fuzzer log");
|
|
}
|
|
|
|
export threadlocal var __sancov_lowest_stack: usize = 0;
|
|
|
|
var module_count_8bc: usize = 0;
|
|
var module_count_pcs: usize = 0;
|
|
|
|
export fn __sanitizer_cov_8bit_counters_init(start: [*]u8, end: [*]u8) void {
|
|
assert(@atomicRmw(usize, &module_count_8bc, .Add, 1, .monotonic) == 0);
|
|
fuzzer.pc_counters = start[0 .. end - start];
|
|
}
|
|
|
|
export fn __sanitizer_cov_pcs_init(start: [*]const Fuzzer.FlaggedPc, end: [*]const Fuzzer.FlaggedPc) void {
|
|
assert(@atomicRmw(usize, &module_count_pcs, .Add, 1, .monotonic) == 0);
|
|
fuzzer.flagged_pcs = start[0 .. end - start];
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_const_cmp1(arg1: u8, arg2: u8) void {
|
|
handleCmp(@returnAddress(), arg1, arg2);
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_cmp1(arg1: u8, arg2: u8) void {
|
|
handleCmp(@returnAddress(), arg1, arg2);
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_const_cmp2(arg1: u16, arg2: u16) void {
|
|
handleCmp(@returnAddress(), arg1, arg2);
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_cmp2(arg1: u16, arg2: u16) void {
|
|
handleCmp(@returnAddress(), arg1, arg2);
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_const_cmp4(arg1: u32, arg2: u32) void {
|
|
handleCmp(@returnAddress(), arg1, arg2);
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_cmp4(arg1: u32, arg2: u32) void {
|
|
handleCmp(@returnAddress(), arg1, arg2);
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_const_cmp8(arg1: u64, arg2: u64) void {
|
|
handleCmp(@returnAddress(), arg1, arg2);
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_cmp8(arg1: u64, arg2: u64) void {
|
|
handleCmp(@returnAddress(), arg1, arg2);
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_switch(val: u64, cases_ptr: [*]u64) void {
|
|
const pc = @returnAddress();
|
|
const len = cases_ptr[0];
|
|
const val_size_in_bits = cases_ptr[1];
|
|
const cases = cases_ptr[2..][0..len];
|
|
_ = val;
|
|
fuzzer.visitPc(pc);
|
|
_ = val_size_in_bits;
|
|
_ = cases;
|
|
//std.log.debug("0x{x}: switch on value {d} ({d} bits) with {d} cases", .{
|
|
// pc, val, val_size_in_bits, cases.len,
|
|
//});
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_pc_indir(callee: usize) void {
|
|
const pc = @returnAddress();
|
|
_ = callee;
|
|
fuzzer.visitPc(pc);
|
|
//std.log.debug("0x{x}: indirect call to 0x{x}", .{ pc, callee });
|
|
}
|
|
|
|
fn handleCmp(pc: usize, arg1: u64, arg2: u64) void {
|
|
fuzzer.visitPc(pc ^ arg1 ^ arg2);
|
|
//std.log.debug("0x{x}: comparison of {d} and {d}", .{ pc, arg1, arg2 });
|
|
}
|
|
|
|
const Fuzzer = struct {
|
|
gpa: Allocator,
|
|
rng: std.Random.DefaultPrng,
|
|
input: std.ArrayListUnmanaged(u8),
|
|
flagged_pcs: []const FlaggedPc,
|
|
pc_counters: []u8,
|
|
n_runs: usize,
|
|
recent_cases: RunMap,
|
|
deduplicated_runs: usize,
|
|
/// Data collected from code coverage instrumentation from one execution of
|
|
/// the test function.
|
|
coverage: Coverage,
|
|
/// Tracks which PCs have been seen across all runs that do not crash the fuzzer process.
|
|
/// Stored in a memory-mapped file so that it can be shared with other
|
|
/// processes and viewed while the fuzzer is running.
|
|
seen_pcs: MemoryMappedList,
|
|
dir: std.fs.Dir,
|
|
|
|
const SeenPcsHeader = extern struct {
|
|
n_runs: usize,
|
|
pcs_len: usize,
|
|
lowest_stack: usize,
|
|
};
|
|
|
|
const RunMap = std.ArrayHashMapUnmanaged(Run, void, Run.HashContext, false);
|
|
|
|
const Coverage = struct {
|
|
pc_table: std.AutoArrayHashMapUnmanaged(usize, void),
|
|
run_id_hasher: std.hash.Wyhash,
|
|
|
|
fn reset(cov: *Coverage) void {
|
|
cov.pc_table.clearRetainingCapacity();
|
|
cov.run_id_hasher = std.hash.Wyhash.init(0);
|
|
}
|
|
};
|
|
|
|
const Run = struct {
|
|
id: Id,
|
|
input: []const u8,
|
|
score: usize,
|
|
|
|
const Id = u64;
|
|
|
|
const HashContext = struct {
|
|
pub fn eql(ctx: HashContext, a: Run, b: Run, b_index: usize) bool {
|
|
_ = b_index;
|
|
_ = ctx;
|
|
return a.id == b.id;
|
|
}
|
|
pub fn hash(ctx: HashContext, a: Run) u32 {
|
|
_ = ctx;
|
|
return @truncate(a.id);
|
|
}
|
|
};
|
|
|
|
fn deinit(run: *Run, gpa: Allocator) void {
|
|
gpa.free(run.input);
|
|
run.* = undefined;
|
|
}
|
|
};
|
|
|
|
const Slice = extern struct {
|
|
ptr: [*]const u8,
|
|
len: usize,
|
|
|
|
fn toZig(s: Slice) []const u8 {
|
|
return s.ptr[0..s.len];
|
|
}
|
|
|
|
fn fromZig(s: []const u8) Slice {
|
|
return .{
|
|
.ptr = s.ptr,
|
|
.len = s.len,
|
|
};
|
|
}
|
|
};
|
|
|
|
const FlaggedPc = extern struct {
|
|
addr: usize,
|
|
flags: packed struct(usize) {
|
|
entry: bool,
|
|
_: @Type(.{ .Int = .{ .signedness = .unsigned, .bits = @bitSizeOf(usize) - 1 } }),
|
|
},
|
|
};
|
|
|
|
const Analysis = struct {
|
|
score: usize,
|
|
id: Run.Id,
|
|
};
|
|
|
|
fn init(f: *Fuzzer, dir: std.fs.Dir) !void {
|
|
f.dir = dir;
|
|
|
|
// Layout of this file:
|
|
// - Header
|
|
// - list of PC addresses (usize elements)
|
|
// - list of hit flag, 1 bit per address (stored in u8 elements)
|
|
const coverage_file = dir.createFile("coverage", .{
|
|
.read = true,
|
|
.truncate = false,
|
|
}) catch |err| fatal("unable to create coverage file: {s}", .{@errorName(err)});
|
|
const flagged_pcs = f.flagged_pcs;
|
|
const n_bitset_elems = (flagged_pcs.len + 7) / 8;
|
|
const bytes_len = @sizeOf(SeenPcsHeader) + flagged_pcs.len * @sizeOf(usize) + n_bitset_elems;
|
|
const existing_len = coverage_file.getEndPos() catch |err| {
|
|
fatal("unable to check len of coverage file: {s}", .{@errorName(err)});
|
|
};
|
|
if (existing_len == 0) {
|
|
coverage_file.setEndPos(bytes_len) catch |err| {
|
|
fatal("unable to set len of coverage file: {s}", .{@errorName(err)});
|
|
};
|
|
} else if (existing_len != bytes_len) {
|
|
fatal("incompatible existing coverage file (differing lengths)", .{});
|
|
}
|
|
f.seen_pcs = MemoryMappedList.init(coverage_file, existing_len, bytes_len) catch |err| {
|
|
fatal("unable to init coverage memory map: {s}", .{@errorName(err)});
|
|
};
|
|
if (existing_len != 0) {
|
|
const existing_pcs = std.mem.bytesAsSlice(usize, f.seen_pcs.items[@sizeOf(SeenPcsHeader)..][0 .. flagged_pcs.len * @sizeOf(usize)]);
|
|
for (existing_pcs, flagged_pcs, 0..) |old, new, i| {
|
|
if (old != new.addr) {
|
|
fatal("incompatible existing coverage file (differing PC at index {d}: {x} != {x})", .{
|
|
i, old, new.addr,
|
|
});
|
|
}
|
|
}
|
|
} else {
|
|
const header: SeenPcsHeader = .{
|
|
.n_runs = 0,
|
|
.pcs_len = flagged_pcs.len,
|
|
.lowest_stack = std.math.maxInt(usize),
|
|
};
|
|
f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&header));
|
|
for (flagged_pcs) |flagged_pc| {
|
|
f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&flagged_pc.addr));
|
|
}
|
|
f.seen_pcs.appendNTimesAssumeCapacity(0, n_bitset_elems);
|
|
}
|
|
}
|
|
|
|
fn analyzeLastRun(f: *Fuzzer) Analysis {
|
|
return .{
|
|
.id = f.coverage.run_id_hasher.final(),
|
|
.score = f.coverage.pc_table.count(),
|
|
};
|
|
}
|
|
|
|
fn next(f: *Fuzzer) ![]const u8 {
|
|
const gpa = f.gpa;
|
|
const rng = fuzzer.rng.random();
|
|
|
|
if (f.recent_cases.entries.len == 0) {
|
|
// Prepare initial input.
|
|
try f.recent_cases.ensureUnusedCapacity(gpa, 100);
|
|
const len = rng.uintLessThanBiased(usize, 80);
|
|
try f.input.resize(gpa, len);
|
|
rng.bytes(f.input.items);
|
|
f.recent_cases.putAssumeCapacity(.{
|
|
.id = 0,
|
|
.input = try gpa.dupe(u8, f.input.items),
|
|
.score = 0,
|
|
}, {});
|
|
} else {
|
|
if (f.n_runs % 1000 == 0) f.dumpStats();
|
|
|
|
const analysis = f.analyzeLastRun();
|
|
const gop = f.recent_cases.getOrPutAssumeCapacity(.{
|
|
.id = analysis.id,
|
|
.input = undefined,
|
|
.score = undefined,
|
|
});
|
|
if (gop.found_existing) {
|
|
//std.log.info("duplicate analysis: score={d} id={d}", .{ analysis.score, analysis.id });
|
|
f.deduplicated_runs += 1;
|
|
if (f.input.items.len < gop.key_ptr.input.len or gop.key_ptr.score == 0) {
|
|
gpa.free(gop.key_ptr.input);
|
|
gop.key_ptr.input = try gpa.dupe(u8, f.input.items);
|
|
gop.key_ptr.score = analysis.score;
|
|
}
|
|
} else {
|
|
std.log.info("unique analysis: score={d} id={d}", .{ analysis.score, analysis.id });
|
|
gop.key_ptr.* = .{
|
|
.id = analysis.id,
|
|
.input = try gpa.dupe(u8, f.input.items),
|
|
.score = analysis.score,
|
|
};
|
|
|
|
// Track code coverage from all runs.
|
|
{
|
|
const seen_pcs = f.seen_pcs.items[@sizeOf(SeenPcsHeader) + f.flagged_pcs.len * @sizeOf(usize) ..];
|
|
for (seen_pcs, 0..) |*elem, i| {
|
|
const byte_i = i / 8;
|
|
const mask: u8 =
|
|
(@as(u8, @intFromBool(f.pc_counters[byte_i + 0] != 0)) << 0) |
|
|
(@as(u8, @intFromBool(f.pc_counters[byte_i + 1] != 0)) << 1) |
|
|
(@as(u8, @intFromBool(f.pc_counters[byte_i + 2] != 0)) << 2) |
|
|
(@as(u8, @intFromBool(f.pc_counters[byte_i + 3] != 0)) << 3) |
|
|
(@as(u8, @intFromBool(f.pc_counters[byte_i + 4] != 0)) << 4) |
|
|
(@as(u8, @intFromBool(f.pc_counters[byte_i + 5] != 0)) << 5) |
|
|
(@as(u8, @intFromBool(f.pc_counters[byte_i + 6] != 0)) << 6) |
|
|
(@as(u8, @intFromBool(f.pc_counters[byte_i + 7] != 0)) << 7);
|
|
|
|
_ = @atomicRmw(u8, elem, .Or, mask, .monotonic);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (f.recent_cases.entries.len >= 100) {
|
|
const Context = struct {
|
|
values: []const Run,
|
|
pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool {
|
|
return ctx.values[b_index].score < ctx.values[a_index].score;
|
|
}
|
|
};
|
|
f.recent_cases.sortUnstable(Context{ .values = f.recent_cases.keys() });
|
|
const cap = 50;
|
|
// This has to be done before deinitializing the deleted items.
|
|
const doomed_runs = f.recent_cases.keys()[cap..];
|
|
f.recent_cases.shrinkRetainingCapacity(cap);
|
|
for (doomed_runs) |*run| {
|
|
std.log.info("culling score={d} id={d}", .{ run.score, run.id });
|
|
run.deinit(gpa);
|
|
}
|
|
}
|
|
}
|
|
|
|
const chosen_index = rng.uintLessThanBiased(usize, f.recent_cases.entries.len);
|
|
const run = &f.recent_cases.keys()[chosen_index];
|
|
f.input.clearRetainingCapacity();
|
|
f.input.appendSliceAssumeCapacity(run.input);
|
|
try f.mutate();
|
|
|
|
f.n_runs += 1;
|
|
const header: *volatile SeenPcsHeader = @ptrCast(f.seen_pcs.items[0..@sizeOf(SeenPcsHeader)]);
|
|
_ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic);
|
|
_ = @atomicRmw(usize, &header.lowest_stack, .Min, __sancov_lowest_stack, .monotonic);
|
|
@memset(f.pc_counters, 0);
|
|
f.coverage.reset();
|
|
return f.input.items;
|
|
}
|
|
|
|
fn visitPc(f: *Fuzzer, pc: usize) void {
|
|
errdefer |err| oom(err);
|
|
try f.coverage.pc_table.put(f.gpa, pc, {});
|
|
f.coverage.run_id_hasher.update(std.mem.asBytes(&pc));
|
|
}
|
|
|
|
fn dumpStats(f: *Fuzzer) void {
|
|
std.log.info("stats: runs={d} deduplicated={d}", .{
|
|
f.n_runs, f.deduplicated_runs,
|
|
});
|
|
for (f.recent_cases.keys()[0..@min(f.recent_cases.entries.len, 5)], 0..) |run, i| {
|
|
std.log.info("best[{d}] id={x} score={d} input: '{}'", .{
|
|
i, run.id, run.score, std.zig.fmtEscapes(run.input),
|
|
});
|
|
}
|
|
}
|
|
|
|
fn mutate(f: *Fuzzer) !void {
|
|
const gpa = f.gpa;
|
|
const rng = fuzzer.rng.random();
|
|
|
|
if (f.input.items.len == 0) {
|
|
const len = rng.uintLessThanBiased(usize, 80);
|
|
try f.input.resize(gpa, len);
|
|
rng.bytes(f.input.items);
|
|
return;
|
|
}
|
|
|
|
const index = rng.uintLessThanBiased(usize, f.input.items.len * 3);
|
|
if (index < f.input.items.len) {
|
|
f.input.items[index] = rng.int(u8);
|
|
} else if (index < f.input.items.len * 2) {
|
|
_ = f.input.orderedRemove(index - f.input.items.len);
|
|
} else if (index < f.input.items.len * 3) {
|
|
try f.input.insert(gpa, index - f.input.items.len * 2, rng.int(u8));
|
|
} else {
|
|
unreachable;
|
|
}
|
|
}
|
|
};
|
|
|
|
fn oom(err: anytype) noreturn {
|
|
switch (err) {
|
|
error.OutOfMemory => @panic("out of memory"),
|
|
}
|
|
}
|
|
|
|
var general_purpose_allocator: std.heap.GeneralPurposeAllocator(.{}) = .{};
|
|
|
|
var fuzzer: Fuzzer = .{
|
|
.gpa = general_purpose_allocator.allocator(),
|
|
.rng = std.Random.DefaultPrng.init(0),
|
|
.input = .{},
|
|
.flagged_pcs = undefined,
|
|
.pc_counters = undefined,
|
|
.n_runs = 0,
|
|
.deduplicated_runs = 0,
|
|
.recent_cases = .{},
|
|
.coverage = undefined,
|
|
.dir = undefined,
|
|
.seen_pcs = undefined,
|
|
};
|
|
|
|
export fn fuzzer_next() Fuzzer.Slice {
|
|
return Fuzzer.Slice.fromZig(fuzzer.next() catch |err| switch (err) {
|
|
error.OutOfMemory => @panic("out of memory"),
|
|
});
|
|
}
|
|
|
|
export fn fuzzer_init() void {
|
|
if (module_count_8bc == 0) fatal("__sanitizer_cov_8bit_counters_init was never called", .{});
|
|
if (module_count_pcs == 0) fatal("__sanitizer_cov_pcs_init was never called", .{});
|
|
|
|
// TODO: move this to .zig-cache/f
|
|
const fuzz_dir = std.fs.cwd().makeOpenPath("f", .{ .iterate = true }) catch |err| {
|
|
fatal("unable to open fuzz directory 'f': {s}", .{@errorName(err)});
|
|
};
|
|
fuzzer.init(fuzz_dir) catch |err| fatal("unable to init fuzzer: {s}", .{@errorName(err)});
|
|
}
|
|
|
|
/// Like `std.ArrayListUnmanaged(u8)` but backed by memory mapping.
|
|
pub const MemoryMappedList = struct {
|
|
/// Contents of the list.
|
|
///
|
|
/// Pointers to elements in this slice are invalidated by various functions
|
|
/// of this ArrayList in accordance with the respective documentation. In
|
|
/// all cases, "invalidated" means that the memory has been passed to this
|
|
/// allocator's resize or free function.
|
|
items: []align(std.mem.page_size) volatile u8,
|
|
/// How many bytes this list can hold without allocating additional memory.
|
|
capacity: usize,
|
|
|
|
pub fn init(file: std.fs.File, length: usize, capacity: usize) !MemoryMappedList {
|
|
const ptr = try std.posix.mmap(
|
|
null,
|
|
capacity,
|
|
std.posix.PROT.READ | std.posix.PROT.WRITE,
|
|
.{ .TYPE = .SHARED },
|
|
file.handle,
|
|
0,
|
|
);
|
|
return .{
|
|
.items = ptr[0..length],
|
|
.capacity = capacity,
|
|
};
|
|
}
|
|
|
|
/// Append the slice of items to the list.
|
|
/// Asserts that the list can hold the additional items.
|
|
pub fn appendSliceAssumeCapacity(l: *MemoryMappedList, items: []const u8) void {
|
|
const old_len = l.items.len;
|
|
const new_len = old_len + items.len;
|
|
assert(new_len <= l.capacity);
|
|
l.items.len = new_len;
|
|
@memcpy(l.items[old_len..][0..items.len], items);
|
|
}
|
|
|
|
/// Append a value to the list `n` times.
|
|
/// Never invalidates element pointers.
|
|
/// The function is inline so that a comptime-known `value` parameter will
|
|
/// have better memset codegen in case it has a repeated byte pattern.
|
|
/// Asserts that the list can hold the additional items.
|
|
pub inline fn appendNTimesAssumeCapacity(l: *MemoryMappedList, value: u8, n: usize) void {
|
|
const new_len = l.items.len + n;
|
|
assert(new_len <= l.capacity);
|
|
@memset(l.items.ptr[l.items.len..new_len], value);
|
|
l.items.len = new_len;
|
|
}
|
|
};
|