2020-03-05 07:07:17 +00:00
|
|
|
const Blake3 = @import("crypto.zig").Blake3;
|
|
|
|
const fs = @import("fs.zig");
|
|
|
|
const base64 = @import("base64.zig");
|
|
|
|
const ArrayList = @import("array_list.zig").ArrayList;
|
|
|
|
const debug = @import("debug.zig");
|
|
|
|
const testing = @import("testing.zig");
|
|
|
|
const mem = @import("mem.zig");
|
|
|
|
const fmt = @import("fmt.zig");
|
|
|
|
const Allocator = mem.Allocator;
|
|
|
|
const os = @import("os.zig");
|
2020-04-11 22:01:17 +00:00
|
|
|
const time = @import("time.zig");
|
2020-03-05 07:07:17 +00:00
|
|
|
|
2020-03-07 03:18:34 +00:00
|
|
|
const base64_encoder = fs.base64_encoder;
|
|
|
|
const base64_decoder = fs.base64_decoder;
|
2020-03-07 05:27:20 +00:00
|
|
|
const BIN_DIGEST_LEN = 48;
|
2020-03-07 05:19:43 +00:00
|
|
|
const BASE64_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN);
|
2020-03-05 07:07:17 +00:00
|
|
|
|
2020-05-01 02:00:26 +00:00
|
|
|
const MANIFEST_FILE_SIZE_MAX = 50 * 1024 * 1024;
|
|
|
|
|
2020-03-07 03:23:15 +00:00
|
|
|
pub const File = struct {
|
2020-03-05 07:07:17 +00:00
|
|
|
path: ?[]const u8,
|
2020-05-02 05:06:10 +00:00
|
|
|
max_file_size: ?usize,
|
2020-03-05 07:07:17 +00:00
|
|
|
stat: fs.File.Stat,
|
|
|
|
bin_digest: [BIN_DIGEST_LEN]u8,
|
2020-04-16 02:13:26 +00:00
|
|
|
contents: ?[]const u8 = null,
|
2020-03-05 07:07:17 +00:00
|
|
|
|
|
|
|
pub fn deinit(self: *@This(), alloc: *Allocator) void {
|
|
|
|
if (self.path) |owned_slice| {
|
|
|
|
alloc.free(owned_slice);
|
|
|
|
self.path = null;
|
|
|
|
}
|
2020-04-16 02:13:26 +00:00
|
|
|
if (self.contents) |contents| {
|
|
|
|
alloc.free(contents);
|
|
|
|
self.contents = null;
|
|
|
|
}
|
2020-03-05 07:07:17 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
pub const CacheHash = struct {
|
|
|
|
alloc: *Allocator,
|
|
|
|
blake3: Blake3,
|
2020-03-07 04:02:38 +00:00
|
|
|
manifest_dir: fs.Dir,
|
2020-03-07 03:21:43 +00:00
|
|
|
manifest_file: ?fs.File,
|
2020-03-05 07:07:17 +00:00
|
|
|
manifest_dirty: bool,
|
2020-03-07 03:23:15 +00:00
|
|
|
files: ArrayList(File),
|
2020-03-07 05:19:43 +00:00
|
|
|
b64_digest: [BASE64_DIGEST_LEN]u8,
|
2020-03-05 07:07:17 +00:00
|
|
|
|
|
|
|
pub fn init(alloc: *Allocator, manifest_dir_path: []const u8) !@This() {
|
2020-03-07 04:02:38 +00:00
|
|
|
try fs.cwd().makePath(manifest_dir_path);
|
2020-04-30 23:11:45 +00:00
|
|
|
const manifest_dir = try fs.cwd().openDir(manifest_dir_path, .{});
|
2020-03-07 04:02:38 +00:00
|
|
|
|
2020-03-05 07:07:17 +00:00
|
|
|
return CacheHash{
|
|
|
|
.alloc = alloc,
|
|
|
|
.blake3 = Blake3.init(),
|
2020-03-07 04:02:38 +00:00
|
|
|
.manifest_dir = manifest_dir,
|
2020-03-05 07:07:17 +00:00
|
|
|
.manifest_file = null,
|
|
|
|
.manifest_dirty = false,
|
2020-03-07 03:23:15 +00:00
|
|
|
.files = ArrayList(File).init(alloc),
|
2020-03-07 05:19:43 +00:00
|
|
|
.b64_digest = undefined,
|
2020-03-05 07:07:17 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2020-04-08 05:57:19 +00:00
|
|
|
/// Record a slice of bytes as an dependency of the process being cached
|
2020-03-07 04:25:22 +00:00
|
|
|
pub fn addSlice(self: *@This(), val: []const u8) void {
|
2020-03-07 04:02:38 +00:00
|
|
|
debug.assert(self.manifest_file == null);
|
2020-03-05 07:07:17 +00:00
|
|
|
|
2020-03-06 04:32:26 +00:00
|
|
|
self.blake3.update(val);
|
2020-03-06 06:22:55 +00:00
|
|
|
self.blake3.update(&[_]u8{0});
|
2020-03-06 04:32:26 +00:00
|
|
|
}
|
|
|
|
|
2020-04-08 05:57:19 +00:00
|
|
|
/// Convert the input value into bytes and record it as a dependency of the
|
|
|
|
/// process being cached
|
2020-03-07 04:40:33 +00:00
|
|
|
pub fn add(self: *@This(), val: var) void {
|
|
|
|
debug.assert(self.manifest_file == null);
|
|
|
|
|
2020-04-08 05:57:19 +00:00
|
|
|
const valPtr = switch (@typeInfo(@TypeOf(val))) {
|
|
|
|
.Int => &val,
|
|
|
|
.Pointer => val,
|
|
|
|
else => &val,
|
|
|
|
};
|
|
|
|
|
|
|
|
self.addSlice(mem.asBytes(valPtr));
|
2020-03-05 07:07:17 +00:00
|
|
|
}
|
|
|
|
|
2020-04-08 05:57:19 +00:00
|
|
|
/// Add a file as a dependency of process being cached. When `CacheHash.hit` is
|
|
|
|
/// called, the file's contents will be checked to ensure that it matches
|
|
|
|
/// the contents from previous times.
|
2020-04-16 02:13:26 +00:00
|
|
|
///
|
2020-05-02 05:06:10 +00:00
|
|
|
/// Max file size will be used to determine the amount of space to the file contents
|
|
|
|
/// are allowed to take up in memory. If max_file_size is null, then the contents
|
|
|
|
/// will not be loaded into memory.
|
|
|
|
///
|
2020-04-16 02:13:26 +00:00
|
|
|
/// Returns the index of the entry in the `CacheHash.files` ArrayList. You can use it
|
|
|
|
/// to access the contents of the file after calling `CacheHash.hit()` like so:
|
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// var file_contents = cache_hash.files.items[file_index].contents.?;
|
|
|
|
/// ```
|
2020-05-02 05:06:10 +00:00
|
|
|
pub fn addFile(self: *@This(), file_path: []const u8, max_file_size: ?usize) !usize {
|
2020-03-07 04:02:38 +00:00
|
|
|
debug.assert(self.manifest_file == null);
|
2020-03-05 07:07:17 +00:00
|
|
|
|
2020-04-16 02:13:26 +00:00
|
|
|
const idx = self.files.items.len;
|
2020-03-05 07:07:17 +00:00
|
|
|
var cache_hash_file = try self.files.addOne();
|
|
|
|
cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path});
|
2020-05-02 05:06:10 +00:00
|
|
|
cache_hash_file.max_file_size = max_file_size;
|
2020-05-02 06:00:32 +00:00
|
|
|
cache_hash_file.contents = null;
|
2020-03-05 07:07:17 +00:00
|
|
|
|
2020-03-07 04:25:22 +00:00
|
|
|
self.addSlice(cache_hash_file.path.?);
|
2020-04-16 02:13:26 +00:00
|
|
|
|
|
|
|
return idx;
|
2020-03-05 07:07:17 +00:00
|
|
|
}
|
|
|
|
|
2020-04-08 05:57:19 +00:00
|
|
|
/// Check the cache to see if the input exists in it. If it exists, a base64 encoding
|
|
|
|
/// of it's hash will be returned; otherwise, null will be returned.
|
|
|
|
///
|
|
|
|
/// This function will also acquire an exclusive lock to the manifest file. This means
|
|
|
|
/// that a process holding a CacheHash will block any other process attempting to
|
|
|
|
/// acquire the lock.
|
|
|
|
///
|
|
|
|
/// The lock on the manifest file is released when `CacheHash.release` is called.
|
2020-03-07 05:19:43 +00:00
|
|
|
pub fn hit(self: *@This()) !?[BASE64_DIGEST_LEN]u8 {
|
2020-03-07 04:02:38 +00:00
|
|
|
debug.assert(self.manifest_file == null);
|
2020-03-05 07:07:17 +00:00
|
|
|
|
|
|
|
var bin_digest: [BIN_DIGEST_LEN]u8 = undefined;
|
|
|
|
self.blake3.final(&bin_digest);
|
|
|
|
|
2020-03-07 05:19:43 +00:00
|
|
|
base64_encoder.encode(self.b64_digest[0..], &bin_digest);
|
2020-03-05 07:07:17 +00:00
|
|
|
|
|
|
|
self.blake3 = Blake3.init();
|
|
|
|
self.blake3.update(&bin_digest);
|
|
|
|
|
2020-04-15 03:39:34 +00:00
|
|
|
const manifest_file_path = try fmt.allocPrint(self.alloc, "{}.txt", .{self.b64_digest});
|
|
|
|
defer self.alloc.free(manifest_file_path);
|
2020-03-05 07:07:17 +00:00
|
|
|
|
2020-04-15 03:39:34 +00:00
|
|
|
if (self.files.items.len != 0) {
|
2020-04-11 21:24:20 +00:00
|
|
|
self.manifest_file = try self.manifest_dir.createFile(manifest_file_path, .{
|
|
|
|
.read = true,
|
|
|
|
.truncate = false,
|
|
|
|
.lock = .Exclusive,
|
|
|
|
});
|
2020-04-15 03:39:34 +00:00
|
|
|
} else {
|
|
|
|
// If there are no file inputs, we check if the manifest file exists instead of
|
|
|
|
// comparing the hashes on the files used for the cached item
|
|
|
|
self.manifest_file = self.manifest_dir.openFile(manifest_file_path, .{
|
|
|
|
.read = true,
|
|
|
|
.write = true,
|
|
|
|
.lock = .Exclusive,
|
|
|
|
}) catch |err| switch (err) {
|
|
|
|
error.FileNotFound => {
|
|
|
|
self.manifest_dirty = true;
|
|
|
|
self.manifest_file = try self.manifest_dir.createFile(manifest_file_path, .{
|
|
|
|
.read = true,
|
|
|
|
.truncate = false,
|
|
|
|
.lock = .Exclusive,
|
|
|
|
});
|
|
|
|
return null;
|
|
|
|
},
|
|
|
|
else => |e| return e,
|
|
|
|
};
|
2020-03-05 07:07:17 +00:00
|
|
|
}
|
|
|
|
|
2020-05-01 02:00:26 +00:00
|
|
|
const file_contents = try self.manifest_file.?.inStream().readAllAlloc(self.alloc, MANIFEST_FILE_SIZE_MAX);
|
2020-03-07 05:42:59 +00:00
|
|
|
defer self.alloc.free(file_contents);
|
2020-03-05 07:07:17 +00:00
|
|
|
|
2020-04-08 02:12:21 +00:00
|
|
|
const input_file_count = self.files.items.len;
|
2020-03-05 07:07:17 +00:00
|
|
|
var any_file_changed = false;
|
|
|
|
var line_iter = mem.tokenize(file_contents, "\n");
|
|
|
|
var idx: usize = 0;
|
|
|
|
while (line_iter.next()) |line| {
|
|
|
|
defer idx += 1;
|
|
|
|
|
2020-03-07 03:23:15 +00:00
|
|
|
var cache_hash_file: *File = undefined;
|
2020-03-05 07:07:17 +00:00
|
|
|
if (idx < input_file_count) {
|
2020-04-16 01:45:23 +00:00
|
|
|
cache_hash_file = &self.files.items[idx];
|
2020-03-05 07:07:17 +00:00
|
|
|
} else {
|
|
|
|
cache_hash_file = try self.files.addOne();
|
|
|
|
cache_hash_file.path = null;
|
2020-05-02 05:06:10 +00:00
|
|
|
cache_hash_file.max_file_size = null;
|
2020-05-02 06:00:32 +00:00
|
|
|
cache_hash_file.contents = null;
|
2020-03-05 07:07:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
var iter = mem.tokenize(line, " ");
|
2020-04-08 02:19:49 +00:00
|
|
|
const inode = iter.next() orelse return error.InvalidFormat;
|
2020-03-05 07:07:17 +00:00
|
|
|
const mtime_nsec_str = iter.next() orelse return error.InvalidFormat;
|
|
|
|
const digest_str = iter.next() orelse return error.InvalidFormat;
|
|
|
|
const file_path = iter.rest();
|
|
|
|
|
2020-04-08 02:19:49 +00:00
|
|
|
cache_hash_file.stat.inode = fmt.parseInt(os.ino_t, mtime_nsec_str, 10) catch return error.InvalidFormat;
|
2020-03-05 07:07:17 +00:00
|
|
|
cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat;
|
2020-03-07 03:18:34 +00:00
|
|
|
base64_decoder.decode(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat;
|
2020-03-05 07:07:17 +00:00
|
|
|
|
|
|
|
if (file_path.len == 0) {
|
|
|
|
return error.InvalidFormat;
|
|
|
|
}
|
2020-04-30 23:12:29 +00:00
|
|
|
if (cache_hash_file.path) |p| {
|
|
|
|
if (!mem.eql(u8, file_path, p)) {
|
|
|
|
return error.InvalidFormat;
|
|
|
|
}
|
2020-03-05 07:07:17 +00:00
|
|
|
}
|
|
|
|
|
2020-04-30 23:04:13 +00:00
|
|
|
if (cache_hash_file.path == null) {
|
|
|
|
cache_hash_file.path = try mem.dupe(self.alloc, u8, file_path);
|
|
|
|
}
|
|
|
|
|
2020-03-07 04:02:38 +00:00
|
|
|
const this_file = fs.cwd().openFile(cache_hash_file.path.?, .{ .read = true }) catch {
|
2020-03-05 07:07:17 +00:00
|
|
|
return error.CacheUnavailable;
|
|
|
|
};
|
|
|
|
defer this_file.close();
|
2020-03-08 21:11:06 +00:00
|
|
|
|
|
|
|
const actual_stat = try this_file.stat();
|
2020-04-08 02:19:49 +00:00
|
|
|
const mtime_match = actual_stat.mtime == cache_hash_file.stat.mtime;
|
|
|
|
const inode_match = actual_stat.inode == cache_hash_file.stat.inode;
|
2020-03-08 21:11:06 +00:00
|
|
|
|
2020-04-08 02:19:49 +00:00
|
|
|
if (!mtime_match or !inode_match) {
|
2020-03-05 07:07:17 +00:00
|
|
|
self.manifest_dirty = true;
|
|
|
|
|
2020-03-08 21:11:06 +00:00
|
|
|
cache_hash_file.stat = actual_stat;
|
|
|
|
|
2020-04-11 22:01:17 +00:00
|
|
|
if (is_problematic_timestamp(cache_hash_file.stat.mtime)) {
|
|
|
|
cache_hash_file.stat.mtime = 0;
|
|
|
|
cache_hash_file.stat.inode = 0;
|
|
|
|
}
|
2020-03-05 07:07:17 +00:00
|
|
|
|
2020-03-07 05:27:20 +00:00
|
|
|
var actual_digest: [BIN_DIGEST_LEN]u8 = undefined;
|
2020-05-02 05:06:10 +00:00
|
|
|
cache_hash_file.contents = try hash_file(self.alloc, &actual_digest, &this_file, cache_hash_file.max_file_size);
|
2020-03-05 07:07:17 +00:00
|
|
|
|
|
|
|
if (!mem.eql(u8, &cache_hash_file.bin_digest, &actual_digest)) {
|
|
|
|
mem.copy(u8, &cache_hash_file.bin_digest, &actual_digest);
|
|
|
|
// keep going until we have the input file digests
|
|
|
|
any_file_changed = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!any_file_changed) {
|
|
|
|
self.blake3.update(&cache_hash_file.bin_digest);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (any_file_changed) {
|
|
|
|
// cache miss
|
2020-04-11 21:24:20 +00:00
|
|
|
// keep the manifest file open
|
2020-03-05 07:07:17 +00:00
|
|
|
// reset the hash
|
|
|
|
self.blake3 = Blake3.init();
|
|
|
|
self.blake3.update(&bin_digest);
|
2020-05-01 01:47:04 +00:00
|
|
|
|
|
|
|
// Remove files not in the initial hash
|
|
|
|
for (self.files.items[input_file_count..]) |*file| {
|
|
|
|
file.deinit(self.alloc);
|
|
|
|
}
|
2020-03-05 07:07:17 +00:00
|
|
|
try self.files.resize(input_file_count);
|
2020-05-01 01:47:04 +00:00
|
|
|
|
2020-04-16 01:45:23 +00:00
|
|
|
for (self.files.items) |file| {
|
2020-03-05 07:07:17 +00:00
|
|
|
self.blake3.update(&file.bin_digest);
|
|
|
|
}
|
2020-03-07 05:19:43 +00:00
|
|
|
return null;
|
2020-03-05 07:07:17 +00:00
|
|
|
}
|
|
|
|
|
2020-04-15 03:39:34 +00:00
|
|
|
if (idx < input_file_count) {
|
2020-03-05 07:07:17 +00:00
|
|
|
self.manifest_dirty = true;
|
|
|
|
while (idx < input_file_count) : (idx += 1) {
|
2020-04-08 05:57:59 +00:00
|
|
|
var cache_hash_file = &self.files.items[idx];
|
2020-04-15 01:33:02 +00:00
|
|
|
const contents = self.populate_file_hash(cache_hash_file) catch |err| {
|
2020-03-05 07:07:17 +00:00
|
|
|
return error.CacheUnavailable;
|
|
|
|
};
|
|
|
|
}
|
2020-03-07 05:19:43 +00:00
|
|
|
return null;
|
2020-03-05 07:07:17 +00:00
|
|
|
}
|
|
|
|
|
2020-04-08 02:41:46 +00:00
|
|
|
return self.final();
|
2020-03-05 07:07:17 +00:00
|
|
|
}
|
|
|
|
|
2020-05-02 05:06:10 +00:00
|
|
|
fn populate_file_hash_fetch(self: *@This(), otherAlloc: *mem.Allocator, cache_hash_file: *File) !?[]u8 {
|
2020-03-05 07:07:17 +00:00
|
|
|
debug.assert(cache_hash_file.path != null);
|
|
|
|
|
|
|
|
const this_file = try fs.cwd().openFile(cache_hash_file.path.?, .{});
|
|
|
|
defer this_file.close();
|
|
|
|
|
|
|
|
cache_hash_file.stat = try this_file.stat();
|
|
|
|
|
2020-04-11 22:01:17 +00:00
|
|
|
if (is_problematic_timestamp(cache_hash_file.stat.mtime)) {
|
|
|
|
cache_hash_file.stat.mtime = 0;
|
|
|
|
cache_hash_file.stat.inode = 0;
|
|
|
|
}
|
2020-03-05 07:07:17 +00:00
|
|
|
|
2020-05-02 05:06:10 +00:00
|
|
|
const contents = try hash_file(otherAlloc, &cache_hash_file.bin_digest, &this_file, cache_hash_file.max_file_size);
|
2020-03-05 07:07:17 +00:00
|
|
|
self.blake3.update(&cache_hash_file.bin_digest);
|
2020-04-15 01:33:02 +00:00
|
|
|
|
|
|
|
return contents;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn populate_file_hash(self: *@This(), cache_hash_file: *File) !void {
|
2020-04-16 02:13:26 +00:00
|
|
|
cache_hash_file.contents = try self.populate_file_hash_fetch(self.alloc, cache_hash_file);
|
2020-04-15 01:33:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Add a file as a dependency of process being cached, after the initial hash has been
|
2020-04-15 04:17:55 +00:00
|
|
|
/// calculated. This is useful for processes that don't know the all the files that
|
|
|
|
/// are depended on ahead of time. For example, a source file that can import other files
|
|
|
|
/// will need to be recompiled if the imported file is changed.
|
|
|
|
///
|
|
|
|
/// Returns the contents of the file, allocated with the given allocator.
|
2020-05-02 05:06:10 +00:00
|
|
|
pub fn addFilePostFetch(self: *@This(), otherAlloc: *mem.Allocator, file_path: []const u8, max_file_size_opt: ?usize) !?[]u8 {
|
2020-04-15 01:33:02 +00:00
|
|
|
debug.assert(self.manifest_file != null);
|
|
|
|
|
|
|
|
var cache_hash_file = try self.files.addOne();
|
|
|
|
cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path});
|
2020-05-02 06:00:32 +00:00
|
|
|
cache_hash_file.max_file_size = max_file_size_opt;
|
|
|
|
cache_hash_file.contents = null;
|
2020-04-15 01:33:02 +00:00
|
|
|
|
2020-05-02 05:06:10 +00:00
|
|
|
const contents = try self.populate_file_hash_fetch(otherAlloc, cache_hash_file);
|
|
|
|
|
|
|
|
return contents;
|
2020-04-15 01:33:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Add a file as a dependency of process being cached, after the initial hash has been
|
2020-04-15 04:17:55 +00:00
|
|
|
/// calculated. This is useful for processes that don't know the all the files that
|
|
|
|
/// are depended on ahead of time. For example, a source file that can import other files
|
|
|
|
/// will need to be recompiled if the imported file is changed.
|
2020-04-15 01:33:02 +00:00
|
|
|
pub fn addFilePost(self: *@This(), file_path: []const u8) !void {
|
2020-05-02 05:06:10 +00:00
|
|
|
_ = try self.addFilePostFetch(self.alloc, file_path, null);
|
2020-03-05 07:07:17 +00:00
|
|
|
}
|
|
|
|
|
2020-04-08 02:41:46 +00:00
|
|
|
/// Returns a base64 encoded hash of the inputs.
|
|
|
|
pub fn final(self: *@This()) [BASE64_DIGEST_LEN]u8 {
|
2020-03-07 04:02:38 +00:00
|
|
|
debug.assert(self.manifest_file != null);
|
2020-03-05 07:07:17 +00:00
|
|
|
|
2020-04-08 05:57:19 +00:00
|
|
|
// We don't close the manifest file yet, because we want to
|
|
|
|
// keep it locked until the API user is done using it.
|
|
|
|
// We also don't write out the manifest yet, because until
|
|
|
|
// cache_release is called we still might be working on creating
|
|
|
|
// the artifacts to cache.
|
|
|
|
|
2020-03-05 07:07:17 +00:00
|
|
|
var bin_digest: [BIN_DIGEST_LEN]u8 = undefined;
|
|
|
|
self.blake3.final(&bin_digest);
|
|
|
|
|
2020-03-07 05:19:43 +00:00
|
|
|
var out_digest: [BASE64_DIGEST_LEN]u8 = undefined;
|
|
|
|
base64_encoder.encode(&out_digest, &bin_digest);
|
|
|
|
|
|
|
|
return out_digest;
|
2020-03-05 07:07:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn write_manifest(self: *@This()) !void {
|
2020-03-07 04:02:38 +00:00
|
|
|
debug.assert(self.manifest_file != null);
|
2020-03-05 07:07:17 +00:00
|
|
|
|
2020-03-07 05:45:48 +00:00
|
|
|
var encoded_digest: [BASE64_DIGEST_LEN]u8 = undefined;
|
2020-04-08 02:12:21 +00:00
|
|
|
var contents = ArrayList(u8).init(self.alloc);
|
|
|
|
var outStream = contents.outStream();
|
2020-03-05 07:07:17 +00:00
|
|
|
defer contents.deinit();
|
|
|
|
|
2020-04-16 01:45:23 +00:00
|
|
|
for (self.files.items) |file| {
|
2020-03-07 05:45:48 +00:00
|
|
|
base64_encoder.encode(encoded_digest[0..], &file.bin_digest);
|
2020-04-08 02:19:49 +00:00
|
|
|
try outStream.print("{} {} {} {}\n", .{ file.stat.inode, file.stat.mtime, encoded_digest[0..], file.path });
|
2020-03-05 07:07:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
try self.manifest_file.?.seekTo(0);
|
2020-04-08 02:12:21 +00:00
|
|
|
try self.manifest_file.?.writeAll(contents.items);
|
2020-03-05 07:07:17 +00:00
|
|
|
}
|
|
|
|
|
2020-04-15 03:51:20 +00:00
|
|
|
/// Releases the manifest file and frees any memory the CacheHash was using.
|
|
|
|
/// `CacheHash.hit` must be called first.
|
|
|
|
///
|
|
|
|
/// Will also attempt to write to the manifest file if the manifest is dirty.
|
|
|
|
/// Writing to the manifest file is the only way that this file can return an
|
|
|
|
/// error.
|
|
|
|
pub fn release(self: *@This()) !void {
|
2020-05-01 01:54:40 +00:00
|
|
|
if (self.manifest_file) |file| {
|
|
|
|
if (self.manifest_dirty) {
|
|
|
|
try self.write_manifest();
|
|
|
|
}
|
2020-03-05 07:07:17 +00:00
|
|
|
|
2020-05-01 01:54:40 +00:00
|
|
|
file.close();
|
2020-03-05 07:07:17 +00:00
|
|
|
}
|
|
|
|
|
2020-04-16 01:45:23 +00:00
|
|
|
for (self.files.items) |*file| {
|
2020-03-05 07:07:17 +00:00
|
|
|
file.deinit(self.alloc);
|
|
|
|
}
|
|
|
|
self.files.deinit();
|
2020-03-07 04:02:38 +00:00
|
|
|
self.manifest_dir.close();
|
2020-03-05 07:07:17 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-04-15 01:33:02 +00:00
|
|
|
/// Hash the file, and return the contents as an array
|
2020-05-02 05:06:10 +00:00
|
|
|
fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const fs.File, max_file_size_opt: ?usize) !?[]u8 {
|
2020-03-05 07:07:17 +00:00
|
|
|
var blake3 = Blake3.init();
|
2020-05-02 05:06:10 +00:00
|
|
|
var in_stream = handle.inStream();
|
2020-03-05 07:07:17 +00:00
|
|
|
|
2020-05-02 05:06:10 +00:00
|
|
|
if (max_file_size_opt) |max_file_size| {
|
|
|
|
const contents = try in_stream.readAllAlloc(alloc, max_file_size);
|
2020-03-05 07:07:17 +00:00
|
|
|
|
2020-05-02 05:06:10 +00:00
|
|
|
blake3.update(contents);
|
2020-03-05 07:07:17 +00:00
|
|
|
|
2020-05-02 05:06:10 +00:00
|
|
|
blake3.final(bin_digest);
|
2020-04-15 01:33:02 +00:00
|
|
|
|
2020-05-02 05:06:10 +00:00
|
|
|
return contents;
|
|
|
|
} else {
|
|
|
|
var buf: [1024]u8 = undefined;
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
const bytes_read = try in_stream.read(buf[0..]);
|
|
|
|
if (bytes_read == 0) break;
|
|
|
|
blake3.update(buf[0..bytes_read]);
|
|
|
|
}
|
|
|
|
|
|
|
|
blake3.final(bin_digest);
|
|
|
|
return null;
|
|
|
|
}
|
2020-03-05 07:07:17 +00:00
|
|
|
}
|
2020-03-06 05:59:19 +00:00
|
|
|
|
2020-04-11 22:01:17 +00:00
|
|
|
/// If the wall clock time, rounded to the same precision as the
|
|
|
|
/// mtime, is equal to the mtime, then we cannot rely on this mtime
|
|
|
|
/// yet. We will instead save an mtime value that indicates the hash
|
|
|
|
/// must be unconditionally computed.
|
|
|
|
fn is_problematic_timestamp(file_mtime_ns: i64) bool {
|
|
|
|
const now_ms = time.milliTimestamp();
|
|
|
|
const file_mtime_ms = @divFloor(file_mtime_ns, time.millisecond);
|
|
|
|
return now_ms == file_mtime_ms;
|
|
|
|
}
|
|
|
|
|
2020-04-08 05:57:59 +00:00
|
|
|
test "cache file and then recall it" {
|
2020-03-06 05:59:19 +00:00
|
|
|
const cwd = fs.cwd();
|
|
|
|
|
2020-03-08 21:13:40 +00:00
|
|
|
const temp_file = "test.txt";
|
2020-03-06 05:59:19 +00:00
|
|
|
const temp_manifest_dir = "temp_manifest_dir";
|
|
|
|
|
2020-03-08 21:13:40 +00:00
|
|
|
try cwd.writeFile(temp_file, "Hello, world!\n");
|
2020-03-06 05:59:19 +00:00
|
|
|
|
2020-03-07 05:19:43 +00:00
|
|
|
var digest1: [BASE64_DIGEST_LEN]u8 = undefined;
|
|
|
|
var digest2: [BASE64_DIGEST_LEN]u8 = undefined;
|
2020-03-06 05:59:19 +00:00
|
|
|
|
|
|
|
{
|
|
|
|
var ch = try CacheHash.init(testing.allocator, temp_manifest_dir);
|
2020-04-15 03:51:20 +00:00
|
|
|
defer ch.release() catch unreachable;
|
2020-03-06 05:59:19 +00:00
|
|
|
|
2020-03-07 04:25:22 +00:00
|
|
|
ch.add(true);
|
|
|
|
ch.add(@as(u16, 1234));
|
2020-03-07 04:58:15 +00:00
|
|
|
ch.add("1234");
|
2020-05-02 05:06:10 +00:00
|
|
|
_ = try ch.addFile(temp_file, null);
|
2020-03-06 05:59:19 +00:00
|
|
|
|
|
|
|
// There should be nothing in the cache
|
2020-04-15 03:34:40 +00:00
|
|
|
testing.expectEqual(@as(?[64]u8, null), try ch.hit());
|
2020-03-06 05:59:19 +00:00
|
|
|
|
2020-04-08 02:41:46 +00:00
|
|
|
digest1 = ch.final();
|
2020-03-06 05:59:19 +00:00
|
|
|
}
|
|
|
|
{
|
|
|
|
var ch = try CacheHash.init(testing.allocator, temp_manifest_dir);
|
2020-04-15 03:51:20 +00:00
|
|
|
defer ch.release() catch unreachable;
|
2020-03-06 05:59:19 +00:00
|
|
|
|
2020-03-07 04:25:22 +00:00
|
|
|
ch.add(true);
|
|
|
|
ch.add(@as(u16, 1234));
|
2020-03-07 04:58:15 +00:00
|
|
|
ch.add("1234");
|
2020-05-02 05:06:10 +00:00
|
|
|
_ = try ch.addFile(temp_file, null);
|
2020-03-06 05:59:19 +00:00
|
|
|
|
|
|
|
// Cache hit! We just "built" the same file
|
2020-03-07 05:19:43 +00:00
|
|
|
digest2 = (try ch.hit()).?;
|
2020-03-06 05:59:19 +00:00
|
|
|
}
|
|
|
|
|
2020-04-15 03:34:40 +00:00
|
|
|
testing.expectEqual(digest1, digest2);
|
2020-03-06 05:59:19 +00:00
|
|
|
|
|
|
|
try cwd.deleteTree(temp_manifest_dir);
|
2020-03-08 21:13:40 +00:00
|
|
|
try cwd.deleteFile(temp_file);
|
2020-03-06 05:59:19 +00:00
|
|
|
}
|
2020-04-11 22:01:17 +00:00
|
|
|
|
|
|
|
test "give problematic timestamp" {
|
|
|
|
const now_ns = @intCast(i64, time.milliTimestamp() * time.millisecond);
|
2020-04-15 03:34:40 +00:00
|
|
|
testing.expect(is_problematic_timestamp(now_ns));
|
2020-04-11 22:01:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
test "give nonproblematic timestamp" {
|
|
|
|
const now_ns = @intCast(i64, time.milliTimestamp() * time.millisecond) - 1000;
|
2020-04-15 03:34:40 +00:00
|
|
|
testing.expect(!is_problematic_timestamp(now_ns));
|
2020-04-11 22:01:17 +00:00
|
|
|
}
|
2020-04-15 03:37:35 +00:00
|
|
|
|
|
|
|
test "check that changing a file makes cache fail" {
|
|
|
|
const cwd = fs.cwd();
|
|
|
|
|
|
|
|
const temp_file = "cache_hash_change_file_test.txt";
|
|
|
|
const temp_manifest_dir = "cache_hash_change_file_manifest_dir";
|
2020-05-02 05:06:10 +00:00
|
|
|
const original_temp_file_contents = "Hello, world!\n";
|
|
|
|
const updated_temp_file_contents = "Hello, world; but updated!\n";
|
2020-04-15 03:37:35 +00:00
|
|
|
|
2020-05-02 05:06:10 +00:00
|
|
|
try cwd.writeFile(temp_file, original_temp_file_contents);
|
2020-04-15 03:37:35 +00:00
|
|
|
|
|
|
|
var digest1: [BASE64_DIGEST_LEN]u8 = undefined;
|
|
|
|
var digest2: [BASE64_DIGEST_LEN]u8 = undefined;
|
|
|
|
|
|
|
|
{
|
|
|
|
var ch = try CacheHash.init(testing.allocator, temp_manifest_dir);
|
2020-04-15 03:51:20 +00:00
|
|
|
defer ch.release() catch unreachable;
|
2020-04-15 03:37:35 +00:00
|
|
|
|
|
|
|
ch.add("1234");
|
2020-05-02 05:06:10 +00:00
|
|
|
const temp_file_idx = try ch.addFile(temp_file, 100);
|
2020-04-15 03:37:35 +00:00
|
|
|
|
|
|
|
// There should be nothing in the cache
|
|
|
|
testing.expectEqual(@as(?[64]u8, null), try ch.hit());
|
|
|
|
|
2020-05-02 05:06:10 +00:00
|
|
|
testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.items[temp_file_idx].contents.?));
|
|
|
|
|
2020-04-15 03:37:35 +00:00
|
|
|
digest1 = ch.final();
|
|
|
|
}
|
|
|
|
|
2020-05-02 05:06:10 +00:00
|
|
|
try cwd.writeFile(temp_file, updated_temp_file_contents);
|
2020-04-15 03:37:35 +00:00
|
|
|
|
|
|
|
{
|
|
|
|
var ch = try CacheHash.init(testing.allocator, temp_manifest_dir);
|
2020-04-15 03:51:20 +00:00
|
|
|
defer ch.release() catch unreachable;
|
2020-04-15 03:37:35 +00:00
|
|
|
|
|
|
|
ch.add("1234");
|
2020-05-02 05:06:10 +00:00
|
|
|
const temp_file_idx = try ch.addFile(temp_file, 100);
|
2020-04-15 03:37:35 +00:00
|
|
|
|
|
|
|
// A file that we depend on has been updated, so the cache should not contain an entry for it
|
|
|
|
testing.expectEqual(@as(?[64]u8, null), try ch.hit());
|
|
|
|
|
2020-05-02 05:06:10 +00:00
|
|
|
testing.expect(mem.eql(u8, updated_temp_file_contents, ch.files.items[temp_file_idx].contents.?));
|
|
|
|
|
2020-04-15 03:37:35 +00:00
|
|
|
digest2 = ch.final();
|
|
|
|
}
|
|
|
|
|
|
|
|
testing.expect(!mem.eql(u8, digest1[0..], digest2[0..]));
|
|
|
|
|
|
|
|
try cwd.deleteTree(temp_manifest_dir);
|
|
|
|
try cwd.deleteFile(temp_file);
|
|
|
|
}
|
2020-04-15 03:39:34 +00:00
|
|
|
|
|
|
|
test "no file inputs" {
|
|
|
|
const cwd = fs.cwd();
|
|
|
|
const temp_manifest_dir = "no_file_inputs_manifest_dir";
|
|
|
|
defer cwd.deleteTree(temp_manifest_dir) catch unreachable;
|
|
|
|
|
|
|
|
var digest1: [BASE64_DIGEST_LEN]u8 = undefined;
|
|
|
|
var digest2: [BASE64_DIGEST_LEN]u8 = undefined;
|
|
|
|
|
|
|
|
{
|
|
|
|
var ch = try CacheHash.init(testing.allocator, temp_manifest_dir);
|
2020-04-15 03:51:20 +00:00
|
|
|
defer ch.release() catch unreachable;
|
2020-04-15 03:39:34 +00:00
|
|
|
|
|
|
|
ch.add("1234");
|
|
|
|
|
|
|
|
// There should be nothing in the cache
|
|
|
|
testing.expectEqual(@as(?[64]u8, null), try ch.hit());
|
|
|
|
|
|
|
|
digest1 = ch.final();
|
|
|
|
}
|
|
|
|
{
|
|
|
|
var ch = try CacheHash.init(testing.allocator, temp_manifest_dir);
|
2020-04-15 03:51:20 +00:00
|
|
|
defer ch.release() catch unreachable;
|
2020-04-15 03:39:34 +00:00
|
|
|
|
|
|
|
ch.add("1234");
|
|
|
|
|
|
|
|
digest2 = (try ch.hit()).?;
|
|
|
|
}
|
|
|
|
|
|
|
|
testing.expectEqual(digest1, digest2);
|
|
|
|
}
|
2020-04-30 23:06:03 +00:00
|
|
|
|
2020-05-01 01:47:04 +00:00
|
|
|
test "CacheHashes with files added after initial hash work" {
|
2020-04-30 23:06:03 +00:00
|
|
|
const cwd = fs.cwd();
|
|
|
|
|
2020-05-01 01:47:04 +00:00
|
|
|
const temp_file1 = "cache_hash_post_file_test1.txt";
|
|
|
|
const temp_file2 = "cache_hash_post_file_test2.txt";
|
|
|
|
const temp_manifest_dir = "cache_hash_post_file_manifest_dir";
|
2020-04-30 23:06:03 +00:00
|
|
|
|
|
|
|
try cwd.writeFile(temp_file1, "Hello, world!\n");
|
|
|
|
try cwd.writeFile(temp_file2, "Hello world the second!\n");
|
|
|
|
|
|
|
|
var digest1: [BASE64_DIGEST_LEN]u8 = undefined;
|
|
|
|
var digest2: [BASE64_DIGEST_LEN]u8 = undefined;
|
2020-05-01 01:47:04 +00:00
|
|
|
var digest3: [BASE64_DIGEST_LEN]u8 = undefined;
|
2020-04-30 23:06:03 +00:00
|
|
|
|
|
|
|
{
|
|
|
|
var ch = try CacheHash.init(testing.allocator, temp_manifest_dir);
|
|
|
|
defer ch.release() catch unreachable;
|
|
|
|
|
|
|
|
ch.add("1234");
|
2020-05-02 05:06:10 +00:00
|
|
|
_ = try ch.addFile(temp_file1, null);
|
2020-04-30 23:06:03 +00:00
|
|
|
|
|
|
|
// There should be nothing in the cache
|
|
|
|
testing.expectEqual(@as(?[64]u8, null), try ch.hit());
|
|
|
|
|
2020-05-01 01:47:04 +00:00
|
|
|
_ = try ch.addFilePost(temp_file2);
|
|
|
|
|
2020-04-30 23:06:03 +00:00
|
|
|
digest1 = ch.final();
|
|
|
|
}
|
|
|
|
{
|
|
|
|
var ch = try CacheHash.init(testing.allocator, temp_manifest_dir);
|
|
|
|
defer ch.release() catch unreachable;
|
|
|
|
|
|
|
|
ch.add("1234");
|
2020-05-02 05:06:10 +00:00
|
|
|
_ = try ch.addFile(temp_file1, null);
|
2020-04-30 23:06:03 +00:00
|
|
|
|
|
|
|
// A file that we depend on has been updated, so the cache should not contain an entry for it
|
|
|
|
digest2 = (try ch.hit()).?;
|
|
|
|
}
|
|
|
|
|
2020-05-01 01:47:04 +00:00
|
|
|
// Modify the file added after initial hash
|
|
|
|
try cwd.writeFile(temp_file2, "Hello world the second, updated\n");
|
|
|
|
|
|
|
|
{
|
|
|
|
var ch = try CacheHash.init(testing.allocator, temp_manifest_dir);
|
|
|
|
defer ch.release() catch unreachable;
|
|
|
|
|
|
|
|
ch.add("1234");
|
2020-05-02 05:06:10 +00:00
|
|
|
_ = try ch.addFile(temp_file1, null);
|
2020-05-01 01:47:04 +00:00
|
|
|
|
|
|
|
// A file that we depend on has been updated, so the cache should not contain an entry for it
|
|
|
|
testing.expectEqual(@as(?[64]u8, null), try ch.hit());
|
|
|
|
|
|
|
|
_ = try ch.addFilePost(temp_file2);
|
|
|
|
|
|
|
|
digest3 = ch.final();
|
|
|
|
}
|
|
|
|
|
2020-04-30 23:06:03 +00:00
|
|
|
testing.expect(mem.eql(u8, digest1[0..], digest2[0..]));
|
2020-05-01 01:47:04 +00:00
|
|
|
testing.expect(!mem.eql(u8, digest1[0..], digest3[0..]));
|
2020-04-30 23:06:03 +00:00
|
|
|
|
|
|
|
try cwd.deleteTree(temp_manifest_dir);
|
|
|
|
try cwd.deleteFile(temp_file1);
|
|
|
|
try cwd.deleteFile(temp_file2);
|
|
|
|
}
|