zig/lib/fuzzer.zig
Andrew Kelley 97643c1ecc fuzzer: track code coverage from all runs
When a unique run is encountered, track it in a bit set memory-mapped
into the fuzz directory so it can be observed by other processes, even
while the fuzzer is running.
2024-08-07 00:48:32 -07:00

473 lines
17 KiB
Zig

const builtin = @import("builtin");
const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const fatal = std.process.fatal;
pub const std_options = .{
.logFn = logOverride,
};
var log_file: ?std.fs.File = null;
fn logOverride(
comptime level: std.log.Level,
comptime scope: @TypeOf(.EnumLiteral),
comptime format: []const u8,
args: anytype,
) void {
if (builtin.mode != .Debug) return;
const f = if (log_file) |f| f else f: {
const f = fuzzer.dir.createFile("libfuzzer.log", .{}) catch @panic("failed to open fuzzer log file");
log_file = f;
break :f f;
};
const prefix1 = comptime level.asText();
const prefix2 = if (scope == .default) ": " else "(" ++ @tagName(scope) ++ "): ";
f.writer().print(prefix1 ++ prefix2 ++ format ++ "\n", args) catch @panic("failed to write to fuzzer log");
}
export threadlocal var __sancov_lowest_stack: usize = 0;
var module_count_8bc: usize = 0;
var module_count_pcs: usize = 0;
export fn __sanitizer_cov_8bit_counters_init(start: [*]u8, end: [*]u8) void {
assert(@atomicRmw(usize, &module_count_8bc, .Add, 1, .monotonic) == 0);
fuzzer.pc_counters = start[0 .. end - start];
}
export fn __sanitizer_cov_pcs_init(start: [*]const Fuzzer.FlaggedPc, end: [*]const Fuzzer.FlaggedPc) void {
assert(@atomicRmw(usize, &module_count_pcs, .Add, 1, .monotonic) == 0);
fuzzer.flagged_pcs = start[0 .. end - start];
}
export fn __sanitizer_cov_trace_const_cmp1(arg1: u8, arg2: u8) void {
handleCmp(@returnAddress(), arg1, arg2);
}
export fn __sanitizer_cov_trace_cmp1(arg1: u8, arg2: u8) void {
handleCmp(@returnAddress(), arg1, arg2);
}
export fn __sanitizer_cov_trace_const_cmp2(arg1: u16, arg2: u16) void {
handleCmp(@returnAddress(), arg1, arg2);
}
export fn __sanitizer_cov_trace_cmp2(arg1: u16, arg2: u16) void {
handleCmp(@returnAddress(), arg1, arg2);
}
export fn __sanitizer_cov_trace_const_cmp4(arg1: u32, arg2: u32) void {
handleCmp(@returnAddress(), arg1, arg2);
}
export fn __sanitizer_cov_trace_cmp4(arg1: u32, arg2: u32) void {
handleCmp(@returnAddress(), arg1, arg2);
}
export fn __sanitizer_cov_trace_const_cmp8(arg1: u64, arg2: u64) void {
handleCmp(@returnAddress(), arg1, arg2);
}
export fn __sanitizer_cov_trace_cmp8(arg1: u64, arg2: u64) void {
handleCmp(@returnAddress(), arg1, arg2);
}
export fn __sanitizer_cov_trace_switch(val: u64, cases_ptr: [*]u64) void {
const pc = @returnAddress();
const len = cases_ptr[0];
const val_size_in_bits = cases_ptr[1];
const cases = cases_ptr[2..][0..len];
_ = val;
fuzzer.visitPc(pc);
_ = val_size_in_bits;
_ = cases;
//std.log.debug("0x{x}: switch on value {d} ({d} bits) with {d} cases", .{
// pc, val, val_size_in_bits, cases.len,
//});
}
export fn __sanitizer_cov_trace_pc_indir(callee: usize) void {
const pc = @returnAddress();
_ = callee;
fuzzer.visitPc(pc);
//std.log.debug("0x{x}: indirect call to 0x{x}", .{ pc, callee });
}
fn handleCmp(pc: usize, arg1: u64, arg2: u64) void {
fuzzer.visitPc(pc ^ arg1 ^ arg2);
//std.log.debug("0x{x}: comparison of {d} and {d}", .{ pc, arg1, arg2 });
}
const Fuzzer = struct {
gpa: Allocator,
rng: std.Random.DefaultPrng,
input: std.ArrayListUnmanaged(u8),
flagged_pcs: []const FlaggedPc,
pc_counters: []u8,
n_runs: usize,
recent_cases: RunMap,
deduplicated_runs: usize,
/// Data collected from code coverage instrumentation from one execution of
/// the test function.
coverage: Coverage,
/// Tracks which PCs have been seen across all runs that do not crash the fuzzer process.
/// Stored in a memory-mapped file so that it can be shared with other
/// processes and viewed while the fuzzer is running.
seen_pcs: MemoryMappedList,
dir: std.fs.Dir,
const SeenPcsHeader = extern struct {
n_runs: usize,
pcs_len: usize,
lowest_stack: usize,
};
const RunMap = std.ArrayHashMapUnmanaged(Run, void, Run.HashContext, false);
const Coverage = struct {
pc_table: std.AutoArrayHashMapUnmanaged(usize, void),
run_id_hasher: std.hash.Wyhash,
fn reset(cov: *Coverage) void {
cov.pc_table.clearRetainingCapacity();
cov.run_id_hasher = std.hash.Wyhash.init(0);
}
};
const Run = struct {
id: Id,
input: []const u8,
score: usize,
const Id = u64;
const HashContext = struct {
pub fn eql(ctx: HashContext, a: Run, b: Run, b_index: usize) bool {
_ = b_index;
_ = ctx;
return a.id == b.id;
}
pub fn hash(ctx: HashContext, a: Run) u32 {
_ = ctx;
return @truncate(a.id);
}
};
fn deinit(run: *Run, gpa: Allocator) void {
gpa.free(run.input);
run.* = undefined;
}
};
const Slice = extern struct {
ptr: [*]const u8,
len: usize,
fn toZig(s: Slice) []const u8 {
return s.ptr[0..s.len];
}
fn fromZig(s: []const u8) Slice {
return .{
.ptr = s.ptr,
.len = s.len,
};
}
};
const FlaggedPc = extern struct {
addr: usize,
flags: packed struct(usize) {
entry: bool,
_: @Type(.{ .Int = .{ .signedness = .unsigned, .bits = @bitSizeOf(usize) - 1 } }),
},
};
const Analysis = struct {
score: usize,
id: Run.Id,
};
fn init(f: *Fuzzer, dir: std.fs.Dir) !void {
f.dir = dir;
// Layout of this file:
// - Header
// - list of PC addresses (usize elements)
// - list of hit flag, 1 bit per address (stored in u8 elements)
const coverage_file = dir.createFile("coverage", .{
.read = true,
.truncate = false,
}) catch |err| fatal("unable to create coverage file: {s}", .{@errorName(err)});
const flagged_pcs = f.flagged_pcs;
const n_bitset_elems = (flagged_pcs.len + 7) / 8;
const bytes_len = @sizeOf(SeenPcsHeader) + flagged_pcs.len * @sizeOf(usize) + n_bitset_elems;
const existing_len = coverage_file.getEndPos() catch |err| {
fatal("unable to check len of coverage file: {s}", .{@errorName(err)});
};
if (existing_len == 0) {
coverage_file.setEndPos(bytes_len) catch |err| {
fatal("unable to set len of coverage file: {s}", .{@errorName(err)});
};
} else if (existing_len != bytes_len) {
fatal("incompatible existing coverage file (differing lengths)", .{});
}
f.seen_pcs = MemoryMappedList.init(coverage_file, existing_len, bytes_len) catch |err| {
fatal("unable to init coverage memory map: {s}", .{@errorName(err)});
};
if (existing_len != 0) {
const existing_pcs = std.mem.bytesAsSlice(usize, f.seen_pcs.items[@sizeOf(SeenPcsHeader)..][0 .. flagged_pcs.len * @sizeOf(usize)]);
for (existing_pcs, flagged_pcs, 0..) |old, new, i| {
if (old != new.addr) {
fatal("incompatible existing coverage file (differing PC at index {d}: {x} != {x})", .{
i, old, new.addr,
});
}
}
} else {
const header: SeenPcsHeader = .{
.n_runs = 0,
.pcs_len = flagged_pcs.len,
.lowest_stack = std.math.maxInt(usize),
};
f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&header));
for (flagged_pcs) |flagged_pc| {
f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&flagged_pc.addr));
}
f.seen_pcs.appendNTimesAssumeCapacity(0, n_bitset_elems);
}
}
fn analyzeLastRun(f: *Fuzzer) Analysis {
return .{
.id = f.coverage.run_id_hasher.final(),
.score = f.coverage.pc_table.count(),
};
}
fn next(f: *Fuzzer) ![]const u8 {
const gpa = f.gpa;
const rng = fuzzer.rng.random();
if (f.recent_cases.entries.len == 0) {
// Prepare initial input.
try f.recent_cases.ensureUnusedCapacity(gpa, 100);
const len = rng.uintLessThanBiased(usize, 80);
try f.input.resize(gpa, len);
rng.bytes(f.input.items);
f.recent_cases.putAssumeCapacity(.{
.id = 0,
.input = try gpa.dupe(u8, f.input.items),
.score = 0,
}, {});
} else {
if (f.n_runs % 1000 == 0) f.dumpStats();
const analysis = f.analyzeLastRun();
const gop = f.recent_cases.getOrPutAssumeCapacity(.{
.id = analysis.id,
.input = undefined,
.score = undefined,
});
if (gop.found_existing) {
//std.log.info("duplicate analysis: score={d} id={d}", .{ analysis.score, analysis.id });
f.deduplicated_runs += 1;
if (f.input.items.len < gop.key_ptr.input.len or gop.key_ptr.score == 0) {
gpa.free(gop.key_ptr.input);
gop.key_ptr.input = try gpa.dupe(u8, f.input.items);
gop.key_ptr.score = analysis.score;
}
} else {
std.log.info("unique analysis: score={d} id={d}", .{ analysis.score, analysis.id });
gop.key_ptr.* = .{
.id = analysis.id,
.input = try gpa.dupe(u8, f.input.items),
.score = analysis.score,
};
// Track code coverage from all runs.
{
const seen_pcs = f.seen_pcs.items[@sizeOf(SeenPcsHeader) + f.flagged_pcs.len * @sizeOf(usize) ..];
for (seen_pcs, 0..) |*elem, i| {
const byte_i = i / 8;
const mask: u8 =
(@as(u8, @intFromBool(f.pc_counters[byte_i + 0] != 0)) << 0) |
(@as(u8, @intFromBool(f.pc_counters[byte_i + 1] != 0)) << 1) |
(@as(u8, @intFromBool(f.pc_counters[byte_i + 2] != 0)) << 2) |
(@as(u8, @intFromBool(f.pc_counters[byte_i + 3] != 0)) << 3) |
(@as(u8, @intFromBool(f.pc_counters[byte_i + 4] != 0)) << 4) |
(@as(u8, @intFromBool(f.pc_counters[byte_i + 5] != 0)) << 5) |
(@as(u8, @intFromBool(f.pc_counters[byte_i + 6] != 0)) << 6) |
(@as(u8, @intFromBool(f.pc_counters[byte_i + 7] != 0)) << 7);
_ = @atomicRmw(u8, elem, .Or, mask, .monotonic);
}
}
}
if (f.recent_cases.entries.len >= 100) {
const Context = struct {
values: []const Run,
pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool {
return ctx.values[b_index].score < ctx.values[a_index].score;
}
};
f.recent_cases.sortUnstable(Context{ .values = f.recent_cases.keys() });
const cap = 50;
// This has to be done before deinitializing the deleted items.
const doomed_runs = f.recent_cases.keys()[cap..];
f.recent_cases.shrinkRetainingCapacity(cap);
for (doomed_runs) |*run| {
std.log.info("culling score={d} id={d}", .{ run.score, run.id });
run.deinit(gpa);
}
}
}
const chosen_index = rng.uintLessThanBiased(usize, f.recent_cases.entries.len);
const run = &f.recent_cases.keys()[chosen_index];
f.input.clearRetainingCapacity();
f.input.appendSliceAssumeCapacity(run.input);
try f.mutate();
f.n_runs += 1;
const header: *volatile SeenPcsHeader = @ptrCast(f.seen_pcs.items[0..@sizeOf(SeenPcsHeader)]);
_ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic);
_ = @atomicRmw(usize, &header.lowest_stack, .Min, __sancov_lowest_stack, .monotonic);
@memset(f.pc_counters, 0);
f.coverage.reset();
return f.input.items;
}
fn visitPc(f: *Fuzzer, pc: usize) void {
errdefer |err| oom(err);
try f.coverage.pc_table.put(f.gpa, pc, {});
f.coverage.run_id_hasher.update(std.mem.asBytes(&pc));
}
fn dumpStats(f: *Fuzzer) void {
std.log.info("stats: runs={d} deduplicated={d}", .{
f.n_runs, f.deduplicated_runs,
});
for (f.recent_cases.keys()[0..@min(f.recent_cases.entries.len, 5)], 0..) |run, i| {
std.log.info("best[{d}] id={x} score={d} input: '{}'", .{
i, run.id, run.score, std.zig.fmtEscapes(run.input),
});
}
}
fn mutate(f: *Fuzzer) !void {
const gpa = f.gpa;
const rng = fuzzer.rng.random();
if (f.input.items.len == 0) {
const len = rng.uintLessThanBiased(usize, 80);
try f.input.resize(gpa, len);
rng.bytes(f.input.items);
return;
}
const index = rng.uintLessThanBiased(usize, f.input.items.len * 3);
if (index < f.input.items.len) {
f.input.items[index] = rng.int(u8);
} else if (index < f.input.items.len * 2) {
_ = f.input.orderedRemove(index - f.input.items.len);
} else if (index < f.input.items.len * 3) {
try f.input.insert(gpa, index - f.input.items.len * 2, rng.int(u8));
} else {
unreachable;
}
}
};
fn oom(err: anytype) noreturn {
switch (err) {
error.OutOfMemory => @panic("out of memory"),
}
}
var general_purpose_allocator: std.heap.GeneralPurposeAllocator(.{}) = .{};
var fuzzer: Fuzzer = .{
.gpa = general_purpose_allocator.allocator(),
.rng = std.Random.DefaultPrng.init(0),
.input = .{},
.flagged_pcs = undefined,
.pc_counters = undefined,
.n_runs = 0,
.deduplicated_runs = 0,
.recent_cases = .{},
.coverage = undefined,
.dir = undefined,
.seen_pcs = undefined,
};
export fn fuzzer_next() Fuzzer.Slice {
return Fuzzer.Slice.fromZig(fuzzer.next() catch |err| switch (err) {
error.OutOfMemory => @panic("out of memory"),
});
}
export fn fuzzer_init() void {
if (module_count_8bc == 0) fatal("__sanitizer_cov_8bit_counters_init was never called", .{});
if (module_count_pcs == 0) fatal("__sanitizer_cov_pcs_init was never called", .{});
// TODO: move this to .zig-cache/f
const fuzz_dir = std.fs.cwd().makeOpenPath("f", .{ .iterate = true }) catch |err| {
fatal("unable to open fuzz directory 'f': {s}", .{@errorName(err)});
};
fuzzer.init(fuzz_dir) catch |err| fatal("unable to init fuzzer: {s}", .{@errorName(err)});
}
/// Like `std.ArrayListUnmanaged(u8)` but backed by memory mapping.
pub const MemoryMappedList = struct {
/// Contents of the list.
///
/// Pointers to elements in this slice are invalidated by various functions
/// of this ArrayList in accordance with the respective documentation. In
/// all cases, "invalidated" means that the memory has been passed to this
/// allocator's resize or free function.
items: []align(std.mem.page_size) volatile u8,
/// How many bytes this list can hold without allocating additional memory.
capacity: usize,
pub fn init(file: std.fs.File, length: usize, capacity: usize) !MemoryMappedList {
const ptr = try std.posix.mmap(
null,
capacity,
std.posix.PROT.READ | std.posix.PROT.WRITE,
.{ .TYPE = .SHARED },
file.handle,
0,
);
return .{
.items = ptr[0..length],
.capacity = capacity,
};
}
/// Append the slice of items to the list.
/// Asserts that the list can hold the additional items.
pub fn appendSliceAssumeCapacity(l: *MemoryMappedList, items: []const u8) void {
const old_len = l.items.len;
const new_len = old_len + items.len;
assert(new_len <= l.capacity);
l.items.len = new_len;
@memcpy(l.items[old_len..][0..items.len], items);
}
/// Append a value to the list `n` times.
/// Never invalidates element pointers.
/// The function is inline so that a comptime-known `value` parameter will
/// have better memset codegen in case it has a repeated byte pattern.
/// Asserts that the list can hold the additional items.
pub inline fn appendNTimesAssumeCapacity(l: *MemoryMappedList, value: u8, n: usize) void {
const new_len = l.items.len + n;
assert(new_len <= l.capacity);
@memset(l.items.ptr[l.items.len..new_len], value);
l.items.len = new_len;
}
};