mirror of
https://github.com/ziglang/zig.git
synced 2025-02-15 00:50:17 +00:00
compress: add a deflate compressor
Replaces the inflate API from `inflateStream(reader: anytype, window_slice: []u8)` to `decompressor(allocator: mem.Allocator, reader: anytype, dictionary: ?[]const u8)` and `compressor(allocator: mem.Allocator, writer: anytype, options: CompressorOptions)`
This commit is contained in:
parent
dba04a272a
commit
490f067de8
17
build.zig
17
build.zig
@ -93,12 +93,25 @@ pub fn build(b: *Builder) !void {
|
|||||||
.install_dir = .lib,
|
.install_dir = .lib,
|
||||||
.install_subdir = "zig",
|
.install_subdir = "zig",
|
||||||
.exclude_extensions = &[_][]const u8{
|
.exclude_extensions = &[_][]const u8{
|
||||||
"README.md",
|
// exclude files from lib/std/compress/
|
||||||
|
".gz",
|
||||||
".z.0",
|
".z.0",
|
||||||
".z.9",
|
".z.9",
|
||||||
".gz",
|
|
||||||
"rfc1951.txt",
|
"rfc1951.txt",
|
||||||
|
"rfc1952.txt",
|
||||||
|
// exclude files from lib/std/compress/deflate/testdata
|
||||||
|
".expect",
|
||||||
|
".expect-noinput",
|
||||||
|
".golden",
|
||||||
|
".input",
|
||||||
|
"compress-e.txt",
|
||||||
|
"compress-gettysburg.txt",
|
||||||
|
"compress-pi.txt",
|
||||||
|
"rfc1951.txt",
|
||||||
|
// exclude files from lib/std/tz/
|
||||||
".tzif",
|
".tzif",
|
||||||
|
// others
|
||||||
|
"README.md",
|
||||||
},
|
},
|
||||||
.blank_extensions = &[_][]const u8{
|
.blank_extensions = &[_][]const u8{
|
||||||
"test.zig",
|
"test.zig",
|
||||||
|
@ -5,6 +5,7 @@ pub const gzip = @import("compress/gzip.zig");
|
|||||||
pub const zlib = @import("compress/zlib.zig");
|
pub const zlib = @import("compress/zlib.zig");
|
||||||
|
|
||||||
test {
|
test {
|
||||||
|
_ = deflate;
|
||||||
_ = gzip;
|
_ = gzip;
|
||||||
_ = zlib;
|
_ = zlib;
|
||||||
}
|
}
|
||||||
|
@ -1,738 +1,29 @@
|
|||||||
//
|
//! The deflate package is a translation of the Go code of the compress/flate package from
|
||||||
// Decompressor for DEFLATE data streams (RFC1951)
|
//! https://go.googlesource.com/go/+/refs/tags/go1.17/src/compress/flate/
|
||||||
//
|
|
||||||
// Heavily inspired by the simple decompressor puff.c by Mark Adler
|
|
||||||
|
|
||||||
const std = @import("std");
|
const deflate = @import("deflate/compressor.zig");
|
||||||
const io = std.io;
|
const inflate = @import("deflate/decompressor.zig");
|
||||||
const math = std.math;
|
|
||||||
const mem = std.mem;
|
|
||||||
|
|
||||||
const assert = std.debug.assert;
|
pub const Compression = deflate.Compression;
|
||||||
|
pub const Compressor = deflate.Compressor;
|
||||||
|
pub const Decompressor = inflate.Decompressor;
|
||||||
|
|
||||||
const MAXBITS = 15;
|
pub const compressor = deflate.compressor;
|
||||||
const MAXLCODES = 286;
|
pub const decompressor = inflate.decompressor;
|
||||||
const MAXDCODES = 30;
|
|
||||||
const MAXCODES = MAXLCODES + MAXDCODES;
|
|
||||||
const FIXLCODES = 288;
|
|
||||||
|
|
||||||
// The maximum length of a Huffman code's prefix we can decode using the fast
|
test {
|
||||||
// path. The factor 9 is inherited from Zlib, tweaking the value showed little
|
_ = @import("deflate/token.zig");
|
||||||
// or no changes in the profiler output.
|
_ = @import("deflate/bits_utils.zig");
|
||||||
const PREFIX_LUT_BITS = 9;
|
_ = @import("deflate/dict_decoder.zig");
|
||||||
|
|
||||||
const Huffman = struct {
|
_ = @import("deflate/huffman_code.zig");
|
||||||
const LUTEntry = packed struct { symbol: u16 align(4), len: u16 };
|
_ = @import("deflate/huffman_bit_writer.zig");
|
||||||
|
|
||||||
// Number of codes for each possible length
|
_ = @import("deflate/compressor.zig");
|
||||||
count: [MAXBITS + 1]u16,
|
_ = @import("deflate/compressor_test.zig");
|
||||||
// Mapping between codes and symbols
|
|
||||||
symbol: [MAXCODES]u16,
|
|
||||||
|
|
||||||
// The decoding process uses a trick explained by Mark Adler in [1].
|
_ = @import("deflate/deflate_fast.zig");
|
||||||
// We basically precompute for a fixed number of codes (0 <= x <= 2^N-1)
|
_ = @import("deflate/deflate_fast_test.zig");
|
||||||
// the symbol and the effective code length we'd get if the decoder was run
|
|
||||||
// on the given N-bit sequence.
|
|
||||||
// A code with length 0 means the sequence is not a valid prefix for this
|
|
||||||
// canonical Huffman code and we have to decode it using a slower method.
|
|
||||||
//
|
|
||||||
// [1] https://github.com/madler/zlib/blob/v1.2.11/doc/algorithm.txt#L58
|
|
||||||
prefix_lut: [1 << PREFIX_LUT_BITS]LUTEntry,
|
|
||||||
// The following info refer to the codes of length PREFIX_LUT_BITS+1 and are
|
|
||||||
// used to bootstrap the bit-by-bit reading method if the fast-path fails.
|
|
||||||
last_code: u16,
|
|
||||||
last_index: u16,
|
|
||||||
|
|
||||||
min_code_len: u16,
|
_ = @import("deflate/decompressor.zig");
|
||||||
|
|
||||||
const ConstructError = error{ Oversubscribed, IncompleteSet };
|
|
||||||
|
|
||||||
fn construct(self: *Huffman, code_length: []const u16) ConstructError!void {
|
|
||||||
for (self.count) |*val| {
|
|
||||||
val.* = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.min_code_len = math.maxInt(u16);
|
|
||||||
for (code_length) |len| {
|
|
||||||
if (len != 0 and len < self.min_code_len)
|
|
||||||
self.min_code_len = len;
|
|
||||||
self.count[len] += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// All zero.
|
|
||||||
if (self.count[0] == code_length.len) {
|
|
||||||
self.min_code_len = 0;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
var left: isize = 1;
|
|
||||||
for (self.count[1..]) |val| {
|
|
||||||
// Each added bit doubles the amount of codes.
|
|
||||||
left *= 2;
|
|
||||||
// Make sure the number of codes with this length isn't too high.
|
|
||||||
left -= @as(isize, @bitCast(i16, val));
|
|
||||||
if (left < 0)
|
|
||||||
return error.Oversubscribed;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute the offset of the first symbol represented by a code of a
|
|
||||||
// given length in the symbol table, together with the first canonical
|
|
||||||
// Huffman code for that length.
|
|
||||||
var offset: [MAXBITS + 1]u16 = undefined;
|
|
||||||
var codes: [MAXBITS + 1]u16 = undefined;
|
|
||||||
{
|
|
||||||
offset[1] = 0;
|
|
||||||
codes[1] = 0;
|
|
||||||
var len: usize = 1;
|
|
||||||
while (len < MAXBITS) : (len += 1) {
|
|
||||||
offset[len + 1] = offset[len] + self.count[len];
|
|
||||||
codes[len + 1] = (codes[len] + self.count[len]) << 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
self.prefix_lut = mem.zeroes(@TypeOf(self.prefix_lut));
|
|
||||||
|
|
||||||
for (code_length) |len, symbol| {
|
|
||||||
if (len != 0) {
|
|
||||||
// Fill the symbol table.
|
|
||||||
// The symbols are assigned sequentially for each length.
|
|
||||||
self.symbol[offset[len]] = @truncate(u16, symbol);
|
|
||||||
// Track the last assigned offset.
|
|
||||||
offset[len] += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (len == 0 or len > PREFIX_LUT_BITS)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// Given a Huffman code of length N we transform it into an index
|
|
||||||
// into the lookup table by reversing its bits and filling the
|
|
||||||
// remaining bits (PREFIX_LUT_BITS - N) with every possible
|
|
||||||
// combination of bits to act as a wildcard.
|
|
||||||
const bits_to_fill = @intCast(u5, PREFIX_LUT_BITS - len);
|
|
||||||
const rev_code = bitReverse(u16, codes[len], len);
|
|
||||||
|
|
||||||
// Track the last used code, but only for lengths < PREFIX_LUT_BITS.
|
|
||||||
codes[len] += 1;
|
|
||||||
|
|
||||||
var j: usize = 0;
|
|
||||||
while (j < @as(usize, 1) << bits_to_fill) : (j += 1) {
|
|
||||||
const index = rev_code | (j << @intCast(u5, len));
|
|
||||||
assert(self.prefix_lut[index].len == 0);
|
|
||||||
self.prefix_lut[index] = .{
|
|
||||||
.symbol = @truncate(u16, symbol),
|
|
||||||
.len = @truncate(u16, len),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
self.last_code = codes[PREFIX_LUT_BITS + 1];
|
|
||||||
self.last_index = offset[PREFIX_LUT_BITS + 1] - self.count[PREFIX_LUT_BITS + 1];
|
|
||||||
|
|
||||||
if (left > 0)
|
|
||||||
return error.IncompleteSet;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Reverse bit-by-bit a N-bit code.
|
|
||||||
fn bitReverse(comptime T: type, value: T, N: usize) T {
|
|
||||||
const r = @bitReverse(T, value);
|
|
||||||
return r >> @intCast(math.Log2Int(T), @typeInfo(T).Int.bits - N);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn InflateStream(comptime ReaderType: type) type {
|
|
||||||
return struct {
|
|
||||||
const Self = @This();
|
|
||||||
|
|
||||||
pub const Error = ReaderType.Error || error{
|
|
||||||
EndOfStream,
|
|
||||||
BadCounts,
|
|
||||||
InvalidBlockType,
|
|
||||||
InvalidDistance,
|
|
||||||
InvalidFixedCode,
|
|
||||||
InvalidLength,
|
|
||||||
InvalidStoredSize,
|
|
||||||
InvalidSymbol,
|
|
||||||
InvalidTree,
|
|
||||||
MissingEOBCode,
|
|
||||||
NoLastLength,
|
|
||||||
OutOfCodes,
|
|
||||||
};
|
|
||||||
pub const Reader = io.Reader(*Self, Error, read);
|
|
||||||
|
|
||||||
inner_reader: ReaderType,
|
|
||||||
|
|
||||||
// True if the decoder met the end of the compressed stream, no further
|
|
||||||
// data can be decompressed
|
|
||||||
seen_eos: bool,
|
|
||||||
|
|
||||||
state: union(enum) {
|
|
||||||
// Parse a compressed block header and set up the internal state for
|
|
||||||
// decompressing its contents.
|
|
||||||
DecodeBlockHeader: void,
|
|
||||||
// Decode all the symbols in a compressed block.
|
|
||||||
DecodeBlockData: void,
|
|
||||||
// Copy N bytes of uncompressed data from the underlying stream into
|
|
||||||
// the window.
|
|
||||||
Copy: usize,
|
|
||||||
// Copy 1 byte into the window.
|
|
||||||
CopyLit: u8,
|
|
||||||
// Copy L bytes from the window itself, starting from D bytes
|
|
||||||
// behind.
|
|
||||||
CopyFrom: struct { distance: u16, length: u16 },
|
|
||||||
},
|
|
||||||
|
|
||||||
// Sliding window for the LZ77 algorithm
|
|
||||||
window: struct {
|
|
||||||
const WSelf = @This();
|
|
||||||
|
|
||||||
// invariant: buffer length is always a power of 2
|
|
||||||
buf: []u8,
|
|
||||||
// invariant: ri <= wi
|
|
||||||
wi: usize = 0, // Write index
|
|
||||||
ri: usize = 0, // Read index
|
|
||||||
el: usize = 0, // Number of readable elements
|
|
||||||
total_written: usize = 0,
|
|
||||||
|
|
||||||
fn readable(self: *WSelf) usize {
|
|
||||||
return self.el;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn writable(self: *WSelf) usize {
|
|
||||||
return self.buf.len - self.el;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Insert a single byte into the window.
|
|
||||||
// Returns 1 if there's enough space for the new byte and 0
|
|
||||||
// otherwise.
|
|
||||||
fn append(self: *WSelf, value: u8) usize {
|
|
||||||
if (self.writable() < 1) return 0;
|
|
||||||
self.appendUnsafe(value);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Insert a single byte into the window.
|
|
||||||
// Assumes there's enough space.
|
|
||||||
inline fn appendUnsafe(self: *WSelf, value: u8) void {
|
|
||||||
self.buf[self.wi] = value;
|
|
||||||
self.wi = (self.wi + 1) & (self.buf.len - 1);
|
|
||||||
self.el += 1;
|
|
||||||
self.total_written += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fill dest[] with data from the window, starting from the read
|
|
||||||
// position. This updates the read pointer.
|
|
||||||
// Returns the number of read bytes or 0 if there's nothing to read
|
|
||||||
// yet.
|
|
||||||
fn read(self: *WSelf, dest: []u8) usize {
|
|
||||||
const N = math.min(dest.len, self.readable());
|
|
||||||
|
|
||||||
if (N == 0) return 0;
|
|
||||||
|
|
||||||
if (self.ri + N < self.buf.len) {
|
|
||||||
// The data doesn't wrap around
|
|
||||||
mem.copy(u8, dest, self.buf[self.ri .. self.ri + N]);
|
|
||||||
} else {
|
|
||||||
// The data wraps around the buffer, split the copy
|
|
||||||
std.mem.copy(u8, dest, self.buf[self.ri..]);
|
|
||||||
// How much data we've copied from `ri` to the end
|
|
||||||
const r = self.buf.len - self.ri;
|
|
||||||
std.mem.copy(u8, dest[r..], self.buf[0 .. N - r]);
|
|
||||||
}
|
|
||||||
|
|
||||||
self.ri = (self.ri + N) & (self.buf.len - 1);
|
|
||||||
self.el -= N;
|
|
||||||
|
|
||||||
return N;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy `length` bytes starting from `distance` bytes behind the
|
|
||||||
// write pointer.
|
|
||||||
// Be careful as the length may be greater than the distance, that's
|
|
||||||
// how the compressor encodes run-length encoded sequences.
|
|
||||||
fn copyFrom(self: *WSelf, distance: usize, length: usize) usize {
|
|
||||||
const N = math.min(length, self.writable());
|
|
||||||
|
|
||||||
if (N == 0) return 0;
|
|
||||||
|
|
||||||
// TODO: Profile and, if needed, replace with smarter juggling
|
|
||||||
// of the window memory for the non-overlapping case.
|
|
||||||
var i: usize = 0;
|
|
||||||
while (i < N) : (i += 1) {
|
|
||||||
const index = (self.wi -% distance) & (self.buf.len - 1);
|
|
||||||
self.appendUnsafe(self.buf[index]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return N;
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
// Compressor-local Huffman tables used to decompress blocks with
|
|
||||||
// dynamic codes.
|
|
||||||
huffman_tables: [2]Huffman = undefined,
|
|
||||||
|
|
||||||
// Huffman tables used for decoding length/distance pairs.
|
|
||||||
hdist: *Huffman,
|
|
||||||
hlen: *Huffman,
|
|
||||||
|
|
||||||
// Temporary buffer for the bitstream.
|
|
||||||
// Bits 0..`bits_left` are filled with data, the remaining ones are zeros.
|
|
||||||
bits: u32,
|
|
||||||
bits_left: usize,
|
|
||||||
|
|
||||||
fn peekBits(self: *Self, bits: usize) !u32 {
|
|
||||||
while (self.bits_left < bits) {
|
|
||||||
const byte = try self.inner_reader.readByte();
|
|
||||||
self.bits |= @as(u32, byte) << @intCast(u5, self.bits_left);
|
|
||||||
self.bits_left += 8;
|
|
||||||
}
|
|
||||||
const mask = (@as(u32, 1) << @intCast(u5, bits)) - 1;
|
|
||||||
return self.bits & mask;
|
|
||||||
}
|
|
||||||
fn readBits(self: *Self, bits: usize) !u32 {
|
|
||||||
const val = try self.peekBits(bits);
|
|
||||||
self.discardBits(bits);
|
|
||||||
return val;
|
|
||||||
}
|
|
||||||
fn discardBits(self: *Self, bits: usize) void {
|
|
||||||
self.bits >>= @intCast(u5, bits);
|
|
||||||
self.bits_left -= bits;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn stored(self: *Self) !void {
|
|
||||||
// Discard the remaining bits, the length field is always
|
|
||||||
// byte-aligned (and so is the data).
|
|
||||||
self.discardBits(self.bits_left);
|
|
||||||
|
|
||||||
const length = try self.inner_reader.readIntLittle(u16);
|
|
||||||
const length_cpl = try self.inner_reader.readIntLittle(u16);
|
|
||||||
|
|
||||||
if (length != ~length_cpl)
|
|
||||||
return error.InvalidStoredSize;
|
|
||||||
|
|
||||||
self.state = .{ .Copy = length };
|
|
||||||
}
|
|
||||||
|
|
||||||
fn fixed(self: *Self) !void {
|
|
||||||
comptime var lencode: Huffman = undefined;
|
|
||||||
comptime var distcode: Huffman = undefined;
|
|
||||||
|
|
||||||
// The Huffman codes are specified in the RFC1951, section 3.2.6
|
|
||||||
comptime {
|
|
||||||
@setEvalBranchQuota(100000);
|
|
||||||
|
|
||||||
const len_lengths =
|
|
||||||
[_]u16{8} ** 144 ++
|
|
||||||
[_]u16{9} ** 112 ++
|
|
||||||
[_]u16{7} ** 24 ++
|
|
||||||
[_]u16{8} ** 8;
|
|
||||||
assert(len_lengths.len == FIXLCODES);
|
|
||||||
try lencode.construct(len_lengths[0..]);
|
|
||||||
|
|
||||||
const dist_lengths = [_]u16{5} ** MAXDCODES;
|
|
||||||
distcode.construct(dist_lengths[0..]) catch |err| switch (err) {
|
|
||||||
// This error is expected because we only compute distance codes
|
|
||||||
// 0-29, which is fine since "distance codes 30-31 will never actually
|
|
||||||
// occur in the compressed data" (from section 3.2.6 of RFC1951).
|
|
||||||
error.IncompleteSet => {},
|
|
||||||
else => return err,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
self.hlen = &lencode;
|
|
||||||
self.hdist = &distcode;
|
|
||||||
self.state = .DecodeBlockData;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn dynamic(self: *Self) !void {
|
|
||||||
// Number of length codes
|
|
||||||
const nlen = (try self.readBits(5)) + 257;
|
|
||||||
// Number of distance codes
|
|
||||||
const ndist = (try self.readBits(5)) + 1;
|
|
||||||
// Number of code length codes
|
|
||||||
const ncode = (try self.readBits(4)) + 4;
|
|
||||||
|
|
||||||
if (nlen > MAXLCODES or ndist > MAXDCODES)
|
|
||||||
return error.BadCounts;
|
|
||||||
|
|
||||||
// Permutation of code length codes
|
|
||||||
const ORDER = [19]u16{
|
|
||||||
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4,
|
|
||||||
12, 3, 13, 2, 14, 1, 15,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Build the Huffman table to decode the code length codes
|
|
||||||
var lencode: Huffman = undefined;
|
|
||||||
{
|
|
||||||
var lengths = std.mem.zeroes([19]u16);
|
|
||||||
|
|
||||||
// Read the code lengths, missing ones are left as zero
|
|
||||||
for (ORDER[0..ncode]) |val| {
|
|
||||||
lengths[val] = @intCast(u16, try self.readBits(3));
|
|
||||||
}
|
|
||||||
|
|
||||||
lencode.construct(lengths[0..]) catch return error.InvalidTree;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read the length/literal and distance code length tables.
|
|
||||||
// Zero the table by default so we can avoid explicitly writing out
|
|
||||||
// zeros for codes 17 and 18
|
|
||||||
var lengths = std.mem.zeroes([MAXCODES]u16);
|
|
||||||
|
|
||||||
var i: usize = 0;
|
|
||||||
while (i < nlen + ndist) {
|
|
||||||
const symbol = try self.decode(&lencode);
|
|
||||||
|
|
||||||
switch (symbol) {
|
|
||||||
0...15 => {
|
|
||||||
lengths[i] = symbol;
|
|
||||||
i += 1;
|
|
||||||
},
|
|
||||||
16 => {
|
|
||||||
// repeat last length 3..6 times
|
|
||||||
if (i == 0) return error.NoLastLength;
|
|
||||||
|
|
||||||
const last_length = lengths[i - 1];
|
|
||||||
const repeat = 3 + (try self.readBits(2));
|
|
||||||
const last_index = i + repeat;
|
|
||||||
if (last_index > lengths.len)
|
|
||||||
return error.InvalidLength;
|
|
||||||
while (i < last_index) : (i += 1) {
|
|
||||||
lengths[i] = last_length;
|
|
||||||
}
|
|
||||||
},
|
|
||||||
17 => {
|
|
||||||
// repeat zero 3..10 times
|
|
||||||
i += 3 + (try self.readBits(3));
|
|
||||||
},
|
|
||||||
18 => {
|
|
||||||
// repeat zero 11..138 times
|
|
||||||
i += 11 + (try self.readBits(7));
|
|
||||||
},
|
|
||||||
else => return error.InvalidSymbol,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i > nlen + ndist)
|
|
||||||
return error.InvalidLength;
|
|
||||||
|
|
||||||
// Check if the end of block code is present
|
|
||||||
if (lengths[256] == 0)
|
|
||||||
return error.MissingEOBCode;
|
|
||||||
|
|
||||||
self.huffman_tables[0].construct(lengths[0..nlen]) catch |err| switch (err) {
|
|
||||||
error.Oversubscribed => return error.InvalidTree,
|
|
||||||
error.IncompleteSet => {
|
|
||||||
// incomplete code ok only for single length 1 code
|
|
||||||
if (nlen != self.huffman_tables[0].count[0] + self.huffman_tables[0].count[1]) {
|
|
||||||
return error.InvalidTree;
|
|
||||||
}
|
|
||||||
},
|
|
||||||
};
|
|
||||||
self.huffman_tables[1].construct(lengths[nlen .. nlen + ndist]) catch |err| switch (err) {
|
|
||||||
error.Oversubscribed => return error.InvalidTree,
|
|
||||||
error.IncompleteSet => {
|
|
||||||
// incomplete code ok only for single length 1 code
|
|
||||||
if (ndist != self.huffman_tables[1].count[0] + self.huffman_tables[1].count[1]) {
|
|
||||||
return error.InvalidTree;
|
|
||||||
}
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
self.hlen = &self.huffman_tables[0];
|
|
||||||
self.hdist = &self.huffman_tables[1];
|
|
||||||
self.state = .DecodeBlockData;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn codes(self: *Self, lencode: *Huffman, distcode: *Huffman) !bool {
|
|
||||||
// Size base for length codes 257..285
|
|
||||||
const LENS = [29]u16{
|
|
||||||
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
|
|
||||||
35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258,
|
|
||||||
};
|
|
||||||
// Extra bits for length codes 257..285
|
|
||||||
const LEXT = [29]u16{
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
|
|
||||||
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0,
|
|
||||||
};
|
|
||||||
// Offset base for distance codes 0..29
|
|
||||||
const DISTS = [30]u16{
|
|
||||||
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
|
|
||||||
257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577,
|
|
||||||
};
|
|
||||||
// Extra bits for distance codes 0..29
|
|
||||||
const DEXT = [30]u16{
|
|
||||||
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
|
|
||||||
7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
|
|
||||||
};
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
const symbol = try self.decode(lencode);
|
|
||||||
|
|
||||||
switch (symbol) {
|
|
||||||
0...255 => {
|
|
||||||
// Literal value
|
|
||||||
const c = @truncate(u8, symbol);
|
|
||||||
if (self.window.append(c) == 0) {
|
|
||||||
self.state = .{ .CopyLit = c };
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
},
|
|
||||||
256 => {
|
|
||||||
// End of block symbol
|
|
||||||
return true;
|
|
||||||
},
|
|
||||||
257...285 => {
|
|
||||||
// Length/distance pair
|
|
||||||
const length_symbol = symbol - 257;
|
|
||||||
const length = LENS[length_symbol] +
|
|
||||||
@intCast(u16, try self.readBits(LEXT[length_symbol]));
|
|
||||||
|
|
||||||
const distance_symbol = try self.decode(distcode);
|
|
||||||
const distance = DISTS[distance_symbol] +
|
|
||||||
@intCast(u16, try self.readBits(DEXT[distance_symbol]));
|
|
||||||
|
|
||||||
if (distance > self.window.buf.len or distance > self.window.total_written)
|
|
||||||
return error.InvalidDistance;
|
|
||||||
|
|
||||||
const written = self.window.copyFrom(distance, length);
|
|
||||||
if (written != length) {
|
|
||||||
self.state = .{
|
|
||||||
.CopyFrom = .{
|
|
||||||
.distance = distance,
|
|
||||||
.length = length - @truncate(u16, written),
|
|
||||||
},
|
|
||||||
};
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
},
|
|
||||||
else => return error.InvalidFixedCode,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn decode(self: *Self, h: *Huffman) !u16 {
|
|
||||||
// Using u32 instead of u16 to reduce the number of casts needed.
|
|
||||||
var prefix: u32 = 0;
|
|
||||||
|
|
||||||
// Fast path, read some bits and hope they're the prefix of some code.
|
|
||||||
// We can't read PREFIX_LUT_BITS as we don't want to read past the
|
|
||||||
// deflate stream end, use an incremental approach instead.
|
|
||||||
var code_len = h.min_code_len;
|
|
||||||
if (code_len == 0)
|
|
||||||
return error.OutOfCodes;
|
|
||||||
while (true) {
|
|
||||||
_ = try self.peekBits(code_len);
|
|
||||||
// Small optimization win, use as many bits as possible in the
|
|
||||||
// table lookup.
|
|
||||||
prefix = self.bits & ((1 << PREFIX_LUT_BITS) - 1);
|
|
||||||
|
|
||||||
const lut_entry = &h.prefix_lut[prefix];
|
|
||||||
// The code is longer than PREFIX_LUT_BITS!
|
|
||||||
if (lut_entry.len == 0)
|
|
||||||
break;
|
|
||||||
// If the code lenght doesn't increase we found a match.
|
|
||||||
if (lut_entry.len <= code_len) {
|
|
||||||
self.discardBits(code_len);
|
|
||||||
return lut_entry.symbol;
|
|
||||||
}
|
|
||||||
|
|
||||||
code_len = lut_entry.len;
|
|
||||||
}
|
|
||||||
|
|
||||||
// The sequence we've read is not a prefix of any code of length <=
|
|
||||||
// PREFIX_LUT_BITS, keep decoding it using a slower method.
|
|
||||||
prefix = try self.readBits(PREFIX_LUT_BITS);
|
|
||||||
|
|
||||||
// Speed up the decoding by starting from the first code length
|
|
||||||
// that's not covered by the table.
|
|
||||||
var len: usize = PREFIX_LUT_BITS + 1;
|
|
||||||
var first: usize = h.last_code;
|
|
||||||
var index: usize = h.last_index;
|
|
||||||
|
|
||||||
// Reverse the prefix so that the LSB becomes the MSB and make space
|
|
||||||
// for the next bit.
|
|
||||||
var code = bitReverse(u32, prefix, PREFIX_LUT_BITS + 1);
|
|
||||||
|
|
||||||
while (len <= MAXBITS) : (len += 1) {
|
|
||||||
code |= try self.readBits(1);
|
|
||||||
const count = h.count[len];
|
|
||||||
if (code < first + count) {
|
|
||||||
return h.symbol[index + (code - first)];
|
|
||||||
}
|
|
||||||
index += count;
|
|
||||||
first += count;
|
|
||||||
first <<= 1;
|
|
||||||
code <<= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return error.OutOfCodes;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn step(self: *Self) !void {
|
|
||||||
while (true) {
|
|
||||||
switch (self.state) {
|
|
||||||
.DecodeBlockHeader => {
|
|
||||||
// The compressed stream is done.
|
|
||||||
if (self.seen_eos) return;
|
|
||||||
|
|
||||||
const last = @intCast(u1, try self.readBits(1));
|
|
||||||
const kind = @intCast(u2, try self.readBits(2));
|
|
||||||
|
|
||||||
self.seen_eos = last != 0;
|
|
||||||
|
|
||||||
// The next state depends on the block type.
|
|
||||||
switch (kind) {
|
|
||||||
0 => try self.stored(),
|
|
||||||
1 => try self.fixed(),
|
|
||||||
2 => try self.dynamic(),
|
|
||||||
3 => return error.InvalidBlockType,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
.DecodeBlockData => {
|
|
||||||
if (!try self.codes(self.hlen, self.hdist)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.state = .DecodeBlockHeader;
|
|
||||||
},
|
|
||||||
.Copy => |*length| {
|
|
||||||
const N = math.min(self.window.writable(), length.*);
|
|
||||||
|
|
||||||
// TODO: This loop can be more efficient. On the other
|
|
||||||
// hand uncompressed blocks are not that common so...
|
|
||||||
var i: usize = 0;
|
|
||||||
while (i < N) : (i += 1) {
|
|
||||||
var tmp: [1]u8 = undefined;
|
|
||||||
if ((try self.inner_reader.read(&tmp)) != 1) {
|
|
||||||
// Unexpected end of stream, keep this error
|
|
||||||
// consistent with the use of readBitsNoEof.
|
|
||||||
return error.EndOfStream;
|
|
||||||
}
|
|
||||||
self.window.appendUnsafe(tmp[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (N != length.*) {
|
|
||||||
length.* -= N;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.state = .DecodeBlockHeader;
|
|
||||||
},
|
|
||||||
.CopyLit => |c| {
|
|
||||||
if (self.window.append(c) == 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.state = .DecodeBlockData;
|
|
||||||
},
|
|
||||||
.CopyFrom => |*info| {
|
|
||||||
const written = self.window.copyFrom(info.distance, info.length);
|
|
||||||
if (written != info.length) {
|
|
||||||
info.length -= @truncate(u16, written);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.state = .DecodeBlockData;
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn init(source: ReaderType, window_slice: []u8) Self {
|
|
||||||
assert(math.isPowerOfTwo(window_slice.len));
|
|
||||||
|
|
||||||
return Self{
|
|
||||||
.inner_reader = source,
|
|
||||||
.window = .{ .buf = window_slice },
|
|
||||||
.seen_eos = false,
|
|
||||||
.state = .DecodeBlockHeader,
|
|
||||||
.hdist = undefined,
|
|
||||||
.hlen = undefined,
|
|
||||||
.bits = 0,
|
|
||||||
.bits_left = 0,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Implements the io.Reader interface
|
|
||||||
pub fn read(self: *Self, buffer: []u8) Error!usize {
|
|
||||||
if (buffer.len == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
// Try reading as much as possible from the window
|
|
||||||
var read_amt: usize = self.window.read(buffer);
|
|
||||||
while (read_amt < buffer.len) {
|
|
||||||
// Run the state machine, we can detect the "effective" end of
|
|
||||||
// stream condition by checking if any progress was made.
|
|
||||||
// Why "effective"? Because even though `seen_eos` is true we
|
|
||||||
// may still have to finish processing other decoding steps.
|
|
||||||
try self.step();
|
|
||||||
// No progress was made
|
|
||||||
if (self.window.readable() == 0)
|
|
||||||
break;
|
|
||||||
|
|
||||||
read_amt += self.window.read(buffer[read_amt..]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return read_amt;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn reader(self: *Self) Reader {
|
|
||||||
return .{ .context = self };
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn inflateStream(reader: anytype, window_slice: []u8) InflateStream(@TypeOf(reader)) {
|
|
||||||
return InflateStream(@TypeOf(reader)).init(reader, window_slice);
|
|
||||||
}
|
|
||||||
|
|
||||||
test "lengths overflow" {
|
|
||||||
// malformed final dynamic block, tries to write 321 code lengths (MAXCODES is 316)
|
|
||||||
// f dy hlit hdist hclen 16 17 18 0 (18) x138 (18) x138 (18) x39 (16) x6
|
|
||||||
// 1 10 11101 11101 0000 010 010 010 010 (11) 1111111 (11) 1111111 (11) 0011100 (01) 11
|
|
||||||
const stream = [_]u8{ 0b11101101, 0b00011101, 0b00100100, 0b11101001, 0b11111111, 0b11111111, 0b00111001, 0b00001110 };
|
|
||||||
try std.testing.expectError(error.InvalidLength, testInflate(stream[0..]));
|
|
||||||
}
|
|
||||||
|
|
||||||
test "empty distance alphabet" {
|
|
||||||
// dynamic block with empty distance alphabet is valid if only literals and end of data symbol are used
|
|
||||||
// f dy hlit hdist hclen 16 17 18 0 8 7 9 6 10 5 11 4 12 3 13 2 14 1 15 (18) x128 (18) x128 (1) ( 0) (256)
|
|
||||||
// 1 10 00000 00000 1111 000 000 010 010 000 000 000 000 000 000 000 000 000 000 000 000 000 001 000 (11) 1110101 (11) 1110101 (0) (10) (0)
|
|
||||||
const stream = [_]u8{ 0b00000101, 0b11100000, 0b00000001, 0b00001001, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00010000, 0b01011100, 0b10111111, 0b00101110 };
|
|
||||||
try testInflate(stream[0..]);
|
|
||||||
}
|
|
||||||
|
|
||||||
test "distance past beginning of output stream" {
|
|
||||||
// f fx ('A') ('B') ('C') <len=4, dist=4> (end)
|
|
||||||
// 1 01 (01110001) (01110010) (01110011) (0000010) (00011) (0000000)
|
|
||||||
const stream = [_]u8{ 0b01110011, 0b01110100, 0b01110010, 0b00000110, 0b01100001, 0b00000000 };
|
|
||||||
try std.testing.expectError(error.InvalidDistance, testInflate(stream[0..]));
|
|
||||||
}
|
|
||||||
|
|
||||||
test "inflateStream fuzzing" {
|
|
||||||
// see https://github.com/ziglang/zig/issues/9842
|
|
||||||
try std.testing.expectError(error.EndOfStream, testInflate("\x95\x90=o\xc20\x10\x86\xf30"));
|
|
||||||
try std.testing.expectError(error.OutOfCodes, testInflate("\x950\x00\x0000000"));
|
|
||||||
|
|
||||||
// Huffman.construct errors
|
|
||||||
// lencode
|
|
||||||
try std.testing.expectError(error.InvalidTree, testInflate("\x950000"));
|
|
||||||
try std.testing.expectError(error.InvalidTree, testInflate("\x05000"));
|
|
||||||
// hlen
|
|
||||||
try std.testing.expectError(error.InvalidTree, testInflate("\x05\xea\x01\t\x00\x00\x00\x01\x00\\\xbf.\t\x00"));
|
|
||||||
// hdist
|
|
||||||
try std.testing.expectError(error.InvalidTree, testInflate("\x05\xe0\x01A\x00\x00\x00\x00\x10\\\xbf."));
|
|
||||||
|
|
||||||
// Huffman.construct -> error.IncompleteSet returns that shouldn't give error.InvalidTree
|
|
||||||
// (like the "empty distance alphabet" test but for ndist instead of nlen)
|
|
||||||
try std.testing.expectError(error.EndOfStream, testInflate("\x05\xe0\x01\t\x00\x00\x00\x00\x10\\\xbf\xce"));
|
|
||||||
try testInflate("\x15\xe0\x01\t\x00\x00\x00\x00\x10\\\xbf.0");
|
|
||||||
}
|
|
||||||
|
|
||||||
fn testInflate(data: []const u8) !void {
|
|
||||||
var window: [0x8000]u8 = undefined;
|
|
||||||
const reader = std.io.fixedBufferStream(data).reader();
|
|
||||||
var inflate = inflateStream(reader, &window);
|
|
||||||
var inflated = try inflate.reader().readAllAlloc(std.testing.allocator, std.math.maxInt(usize));
|
|
||||||
defer std.testing.allocator.free(inflated);
|
|
||||||
}
|
}
|
||||||
|
34
lib/std/compress/deflate/bits_utils.zig
Normal file
34
lib/std/compress/deflate/bits_utils.zig
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
const math = @import("std").math;
|
||||||
|
|
||||||
|
// Reverse bit-by-bit a N-bit code.
|
||||||
|
pub fn bitReverse(comptime T: type, value: T, N: usize) T {
|
||||||
|
const r = @bitReverse(T, value);
|
||||||
|
return r >> @intCast(math.Log2Int(T), @typeInfo(T).Int.bits - N);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "bitReverse" {
|
||||||
|
const std = @import("std");
|
||||||
|
const expect = std.testing.expect;
|
||||||
|
|
||||||
|
const ReverseBitsTest = struct {
|
||||||
|
in: u16,
|
||||||
|
bit_count: u5,
|
||||||
|
out: u16,
|
||||||
|
};
|
||||||
|
|
||||||
|
var reverse_bits_tests = [_]ReverseBitsTest{
|
||||||
|
.{ .in = 1, .bit_count = 1, .out = 1 },
|
||||||
|
.{ .in = 1, .bit_count = 2, .out = 2 },
|
||||||
|
.{ .in = 1, .bit_count = 3, .out = 4 },
|
||||||
|
.{ .in = 1, .bit_count = 4, .out = 8 },
|
||||||
|
.{ .in = 1, .bit_count = 5, .out = 16 },
|
||||||
|
.{ .in = 17, .bit_count = 5, .out = 17 },
|
||||||
|
.{ .in = 257, .bit_count = 9, .out = 257 },
|
||||||
|
.{ .in = 29, .bit_count = 5, .out = 23 },
|
||||||
|
};
|
||||||
|
|
||||||
|
for (reverse_bits_tests) |h| {
|
||||||
|
var v = bitReverse(u16, h.in, h.bit_count);
|
||||||
|
try expect(v == h.out);
|
||||||
|
}
|
||||||
|
}
|
1111
lib/std/compress/deflate/compressor.zig
Normal file
1111
lib/std/compress/deflate/compressor.zig
Normal file
File diff suppressed because it is too large
Load Diff
560
lib/std/compress/deflate/compressor_test.zig
Normal file
560
lib/std/compress/deflate/compressor_test.zig
Normal file
@ -0,0 +1,560 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const builtin = @import("builtin");
|
||||||
|
const expect = std.testing.expect;
|
||||||
|
const fifo = std.fifo;
|
||||||
|
const io = std.io;
|
||||||
|
const math = std.math;
|
||||||
|
const mem = std.mem;
|
||||||
|
const testing = std.testing;
|
||||||
|
|
||||||
|
const ArrayList = std.ArrayList;
|
||||||
|
|
||||||
|
const deflate = @import("compressor.zig");
|
||||||
|
const inflate = @import("decompressor.zig");
|
||||||
|
|
||||||
|
const compressor = deflate.compressor;
|
||||||
|
const decompressor = inflate.decompressor;
|
||||||
|
const huffman_only = deflate.huffman_only;
|
||||||
|
|
||||||
|
fn testSync(level: deflate.Compression, input: []const u8) !void {
|
||||||
|
if (input.len == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var divided_buf = fifo
|
||||||
|
.LinearFifo(u8, fifo.LinearFifoBufferType.Dynamic)
|
||||||
|
.init(testing.allocator);
|
||||||
|
defer divided_buf.deinit();
|
||||||
|
var whole_buf = std.ArrayList(u8).init(testing.allocator);
|
||||||
|
defer whole_buf.deinit();
|
||||||
|
|
||||||
|
var multi_writer = io.multiWriter(.{
|
||||||
|
divided_buf.writer(),
|
||||||
|
whole_buf.writer(),
|
||||||
|
}).writer();
|
||||||
|
|
||||||
|
var comp = try compressor(
|
||||||
|
testing.allocator,
|
||||||
|
multi_writer,
|
||||||
|
.{ .level = level },
|
||||||
|
);
|
||||||
|
defer comp.deinit();
|
||||||
|
|
||||||
|
{
|
||||||
|
var decomp = try decompressor(
|
||||||
|
testing.allocator,
|
||||||
|
divided_buf.reader(),
|
||||||
|
null,
|
||||||
|
);
|
||||||
|
defer decomp.deinit();
|
||||||
|
|
||||||
|
// Write first half of the input and flush()
|
||||||
|
var half: usize = (input.len + 1) / 2;
|
||||||
|
var half_len: usize = half - 0;
|
||||||
|
{
|
||||||
|
_ = try comp.writer().writeAll(input[0..half]);
|
||||||
|
|
||||||
|
// Flush
|
||||||
|
try comp.flush();
|
||||||
|
|
||||||
|
// Read back
|
||||||
|
var decompressed = try testing.allocator.alloc(u8, half_len);
|
||||||
|
defer testing.allocator.free(decompressed);
|
||||||
|
|
||||||
|
var read = try decomp.reader().readAll(decompressed); // read at least half
|
||||||
|
try expect(read == half_len);
|
||||||
|
try expect(mem.eql(u8, input[0..half], decompressed));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write last half of the input and close()
|
||||||
|
half_len = input.len - half;
|
||||||
|
{
|
||||||
|
_ = try comp.writer().writeAll(input[half..]);
|
||||||
|
|
||||||
|
// Close
|
||||||
|
try comp.close();
|
||||||
|
|
||||||
|
// Read back
|
||||||
|
var decompressed = try testing.allocator.alloc(u8, half_len);
|
||||||
|
defer testing.allocator.free(decompressed);
|
||||||
|
|
||||||
|
var read = try decomp.reader().readAll(decompressed);
|
||||||
|
try expect(read == half_len);
|
||||||
|
try expect(mem.eql(u8, input[half..], decompressed));
|
||||||
|
|
||||||
|
// Extra read
|
||||||
|
var final: [10]u8 = undefined;
|
||||||
|
read = try decomp.reader().readAll(&final);
|
||||||
|
try expect(read == 0); // expect ended stream to return 0 bytes
|
||||||
|
|
||||||
|
_ = decomp.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_ = try comp.writer().writeAll(input);
|
||||||
|
try comp.close();
|
||||||
|
|
||||||
|
// stream should work for ordinary reader too (reading whole_buf in one go)
|
||||||
|
var whole_buf_reader = io.fixedBufferStream(whole_buf.items).reader();
|
||||||
|
var decomp = try decompressor(testing.allocator, whole_buf_reader, null);
|
||||||
|
defer decomp.deinit();
|
||||||
|
|
||||||
|
var decompressed = try testing.allocator.alloc(u8, input.len);
|
||||||
|
defer testing.allocator.free(decompressed);
|
||||||
|
|
||||||
|
_ = try decomp.reader().readAll(decompressed);
|
||||||
|
_ = decomp.close();
|
||||||
|
|
||||||
|
try expect(mem.eql(u8, input, decompressed));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn testToFromWithLevelAndLimit(level: deflate.Compression, input: []const u8, limit: u32) !void {
|
||||||
|
var compressed = std.ArrayList(u8).init(testing.allocator);
|
||||||
|
defer compressed.deinit();
|
||||||
|
|
||||||
|
var comp = try compressor(testing.allocator, compressed.writer(), .{ .level = level });
|
||||||
|
defer comp.deinit();
|
||||||
|
|
||||||
|
try comp.writer().writeAll(input);
|
||||||
|
try comp.close();
|
||||||
|
|
||||||
|
if (limit > 0) {
|
||||||
|
try expect(compressed.items.len <= limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
var decomp = try decompressor(
|
||||||
|
testing.allocator,
|
||||||
|
io.fixedBufferStream(compressed.items).reader(),
|
||||||
|
null,
|
||||||
|
);
|
||||||
|
defer decomp.deinit();
|
||||||
|
|
||||||
|
var decompressed = try testing.allocator.alloc(u8, input.len);
|
||||||
|
defer testing.allocator.free(decompressed);
|
||||||
|
|
||||||
|
var read: usize = try decomp.reader().readAll(decompressed);
|
||||||
|
try expect(read == input.len);
|
||||||
|
try expect(mem.eql(u8, input, decompressed));
|
||||||
|
|
||||||
|
try testSync(level, input);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn testToFromWithLimit(input: []const u8, limit: [11]u32) !void {
|
||||||
|
try testToFromWithLevelAndLimit(.no_compression, input, limit[0]);
|
||||||
|
try testToFromWithLevelAndLimit(.best_speed, input, limit[1]);
|
||||||
|
try testToFromWithLevelAndLimit(.level_2, input, limit[2]);
|
||||||
|
try testToFromWithLevelAndLimit(.level_3, input, limit[3]);
|
||||||
|
try testToFromWithLevelAndLimit(.level_4, input, limit[4]);
|
||||||
|
try testToFromWithLevelAndLimit(.level_5, input, limit[5]);
|
||||||
|
try testToFromWithLevelAndLimit(.level_6, input, limit[6]);
|
||||||
|
try testToFromWithLevelAndLimit(.level_7, input, limit[7]);
|
||||||
|
try testToFromWithLevelAndLimit(.level_8, input, limit[8]);
|
||||||
|
try testToFromWithLevelAndLimit(.best_compression, input, limit[9]);
|
||||||
|
try testToFromWithLevelAndLimit(.huffman_only, input, limit[10]);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "deflate/inflate" {
|
||||||
|
var limits = [_]u32{0} ** 11;
|
||||||
|
|
||||||
|
var test0 = [_]u8{};
|
||||||
|
var test1 = [_]u8{0x11};
|
||||||
|
var test2 = [_]u8{ 0x11, 0x12 };
|
||||||
|
var test3 = [_]u8{ 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 };
|
||||||
|
var test4 = [_]u8{ 0x11, 0x10, 0x13, 0x41, 0x21, 0x21, 0x41, 0x13, 0x87, 0x78, 0x13 };
|
||||||
|
|
||||||
|
try testToFromWithLimit(&test0, limits);
|
||||||
|
try testToFromWithLimit(&test1, limits);
|
||||||
|
try testToFromWithLimit(&test2, limits);
|
||||||
|
try testToFromWithLimit(&test3, limits);
|
||||||
|
try testToFromWithLimit(&test4, limits);
|
||||||
|
|
||||||
|
var large_data_chunk = try testing.allocator.alloc(u8, 100_000);
|
||||||
|
defer testing.allocator.free(large_data_chunk);
|
||||||
|
// fill with random data
|
||||||
|
for (large_data_chunk) |_, i| {
|
||||||
|
var mul: u8 = @truncate(u8, i);
|
||||||
|
_ = @mulWithOverflow(u8, mul, mul, &mul);
|
||||||
|
large_data_chunk[i] = mul;
|
||||||
|
}
|
||||||
|
try testToFromWithLimit(large_data_chunk, limits);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "very long sparse chunk" {
|
||||||
|
// A SparseReader returns a stream consisting of 0s ending with 65,536 (1<<16) 1s.
|
||||||
|
// This tests missing hash references in a very large input.
|
||||||
|
const SparseReader = struct {
|
||||||
|
l: usize, // length
|
||||||
|
cur: usize, // current position
|
||||||
|
|
||||||
|
const Self = @This();
|
||||||
|
const Error = error{};
|
||||||
|
|
||||||
|
pub const Reader = io.Reader(*Self, Error, read);
|
||||||
|
|
||||||
|
pub fn reader(self: *Self) Reader {
|
||||||
|
return .{ .context = self };
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read(s: *Self, b: []u8) Error!usize {
|
||||||
|
var n: usize = 0; // amount read
|
||||||
|
|
||||||
|
if (s.cur >= s.l) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
n = b.len;
|
||||||
|
var cur = s.cur + n;
|
||||||
|
if (cur > s.l) {
|
||||||
|
n -= cur - s.l;
|
||||||
|
cur = s.l;
|
||||||
|
}
|
||||||
|
for (b[0..n]) |_, i| {
|
||||||
|
if (s.cur + i >= s.l -| (1 << 16)) {
|
||||||
|
b[i] = 1;
|
||||||
|
} else {
|
||||||
|
b[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s.cur = cur;
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
var comp = try compressor(
|
||||||
|
testing.allocator,
|
||||||
|
io.null_writer,
|
||||||
|
.{ .level = .best_speed },
|
||||||
|
);
|
||||||
|
defer comp.deinit();
|
||||||
|
var writer = comp.writer();
|
||||||
|
|
||||||
|
var sparse = SparseReader{ .l = 0x23e8, .cur = 0 };
|
||||||
|
var reader = sparse.reader();
|
||||||
|
|
||||||
|
var read: usize = 1;
|
||||||
|
var written: usize = 0;
|
||||||
|
while (read > 0) {
|
||||||
|
var buf: [1 << 15]u8 = undefined; // 32,768 bytes buffer
|
||||||
|
read = try reader.read(&buf);
|
||||||
|
written += try writer.write(buf[0..read]);
|
||||||
|
}
|
||||||
|
try expect(written == 0x23e8);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "compressor reset" {
|
||||||
|
for (std.enums.values(deflate.Compression)) |c| {
|
||||||
|
try testWriterReset(c, null);
|
||||||
|
try testWriterReset(c, "dict");
|
||||||
|
try testWriterReset(c, "hello");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn testWriterReset(level: deflate.Compression, dict: ?[]const u8) !void {
|
||||||
|
const filler = struct {
|
||||||
|
fn writeData(c: anytype) !void {
|
||||||
|
const msg = "all your base are belong to us";
|
||||||
|
try c.writer().writeAll(msg);
|
||||||
|
try c.flush();
|
||||||
|
|
||||||
|
const hello = "hello world";
|
||||||
|
var i: usize = 0;
|
||||||
|
while (i < 1024) : (i += 1) {
|
||||||
|
try c.writer().writeAll(hello);
|
||||||
|
}
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
while (i < 65000) : (i += 1) {
|
||||||
|
try c.writer().writeAll("x");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
var buf1 = ArrayList(u8).init(testing.allocator);
|
||||||
|
defer buf1.deinit();
|
||||||
|
var buf2 = ArrayList(u8).init(testing.allocator);
|
||||||
|
defer buf2.deinit();
|
||||||
|
|
||||||
|
var comp = try compressor(
|
||||||
|
testing.allocator,
|
||||||
|
buf1.writer(),
|
||||||
|
.{ .level = level, .dictionary = dict },
|
||||||
|
);
|
||||||
|
defer comp.deinit();
|
||||||
|
|
||||||
|
try filler.writeData(&comp);
|
||||||
|
try comp.close();
|
||||||
|
|
||||||
|
comp.reset(buf2.writer());
|
||||||
|
try filler.writeData(&comp);
|
||||||
|
try comp.close();
|
||||||
|
|
||||||
|
try expect(mem.eql(u8, buf1.items, buf2.items));
|
||||||
|
}
|
||||||
|
|
||||||
|
test "decompressor dictionary" {
|
||||||
|
const dict = "hello world"; // dictionary
|
||||||
|
const text = "hello again world";
|
||||||
|
|
||||||
|
var compressed = fifo
|
||||||
|
.LinearFifo(u8, fifo.LinearFifoBufferType.Dynamic)
|
||||||
|
.init(testing.allocator);
|
||||||
|
defer compressed.deinit();
|
||||||
|
|
||||||
|
var comp = try compressor(
|
||||||
|
testing.allocator,
|
||||||
|
compressed.writer(),
|
||||||
|
.{
|
||||||
|
.level = .level_5,
|
||||||
|
.dictionary = null, // no dictionary
|
||||||
|
},
|
||||||
|
);
|
||||||
|
defer comp.deinit();
|
||||||
|
|
||||||
|
// imitate a compressor with a dictionary
|
||||||
|
try comp.writer().writeAll(dict);
|
||||||
|
try comp.flush();
|
||||||
|
compressed.discard(compressed.readableLength()); // empty the output
|
||||||
|
try comp.writer().writeAll(text);
|
||||||
|
try comp.close();
|
||||||
|
|
||||||
|
var decompressed = try testing.allocator.alloc(u8, text.len);
|
||||||
|
defer testing.allocator.free(decompressed);
|
||||||
|
|
||||||
|
var decomp = try decompressor(
|
||||||
|
testing.allocator,
|
||||||
|
compressed.reader(),
|
||||||
|
dict,
|
||||||
|
);
|
||||||
|
defer decomp.deinit();
|
||||||
|
|
||||||
|
_ = try decomp.reader().readAll(decompressed);
|
||||||
|
try expect(mem.eql(u8, decompressed, "hello again world"));
|
||||||
|
}
|
||||||
|
|
||||||
|
test "compressor dictionary" {
|
||||||
|
const dict = "hello world";
|
||||||
|
const text = "hello again world";
|
||||||
|
|
||||||
|
var compressed_nd = fifo
|
||||||
|
.LinearFifo(u8, fifo.LinearFifoBufferType.Dynamic)
|
||||||
|
.init(testing.allocator); // compressed with no dictionary
|
||||||
|
defer compressed_nd.deinit();
|
||||||
|
|
||||||
|
var compressed_d = ArrayList(u8).init(testing.allocator); // compressed with a dictionary
|
||||||
|
defer compressed_d.deinit();
|
||||||
|
|
||||||
|
// imitate a compressor with a dictionary
|
||||||
|
var comp_nd = try compressor(
|
||||||
|
testing.allocator,
|
||||||
|
compressed_nd.writer(),
|
||||||
|
.{
|
||||||
|
.level = .level_5,
|
||||||
|
.dictionary = null, // no dictionary
|
||||||
|
},
|
||||||
|
);
|
||||||
|
defer comp_nd.deinit();
|
||||||
|
try comp_nd.writer().writeAll(dict);
|
||||||
|
try comp_nd.flush();
|
||||||
|
compressed_nd.discard(compressed_nd.readableLength()); // empty the output
|
||||||
|
try comp_nd.writer().writeAll(text);
|
||||||
|
try comp_nd.close();
|
||||||
|
|
||||||
|
// use a compressor with a dictionary
|
||||||
|
var comp_d = try compressor(
|
||||||
|
testing.allocator,
|
||||||
|
compressed_d.writer(),
|
||||||
|
.{
|
||||||
|
.level = .level_5,
|
||||||
|
.dictionary = dict, // with a dictionary
|
||||||
|
},
|
||||||
|
);
|
||||||
|
defer comp_d.deinit();
|
||||||
|
try comp_d.writer().writeAll(text);
|
||||||
|
try comp_d.close();
|
||||||
|
|
||||||
|
try expect(mem.eql(u8, compressed_nd.readableSlice(0), compressed_d.items));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the hash for best_speed only if d.index < d.maxInsertIndex
|
||||||
|
// See https://golang.org/issue/2508
|
||||||
|
test "Go non-regression test for 2508" {
|
||||||
|
var comp = try compressor(
|
||||||
|
testing.allocator,
|
||||||
|
io.null_writer,
|
||||||
|
.{ .level = .best_speed },
|
||||||
|
);
|
||||||
|
defer comp.deinit();
|
||||||
|
|
||||||
|
var buf = [_]u8{0} ** 1024;
|
||||||
|
|
||||||
|
var i: usize = 0;
|
||||||
|
while (i < 131_072) : (i += 1) {
|
||||||
|
try comp.writer().writeAll(&buf);
|
||||||
|
try comp.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "deflate/inflate string" {
|
||||||
|
// Skip wasi because it does not support std.fs.openDirAbsolute()
|
||||||
|
if (builtin.os.tag == .wasi) return error.SkipZigTest;
|
||||||
|
|
||||||
|
const current_dir = try std.fs.openDirAbsolute(std.fs.path.dirname(@src().file).?, .{});
|
||||||
|
const testdata_dir = try current_dir.openDir("testdata", .{});
|
||||||
|
|
||||||
|
const StringTest = struct {
|
||||||
|
filename: []const u8,
|
||||||
|
limit: [11]u32,
|
||||||
|
};
|
||||||
|
|
||||||
|
var deflate_inflate_string_tests = [_]StringTest{
|
||||||
|
.{
|
||||||
|
.filename = "compress-e.txt",
|
||||||
|
.limit = [11]u32{
|
||||||
|
100_018, // no_compression
|
||||||
|
50_650, // best_speed
|
||||||
|
50_960, // 2
|
||||||
|
51_150, // 3
|
||||||
|
50_930, // 4
|
||||||
|
50_790, // 5
|
||||||
|
50_790, // 6
|
||||||
|
50_790, // 7
|
||||||
|
50_790, // 8
|
||||||
|
50_790, // best_compression
|
||||||
|
43_683, // huffman_only
|
||||||
|
},
|
||||||
|
},
|
||||||
|
.{
|
||||||
|
.filename = "rfc1951.txt",
|
||||||
|
.limit = [11]u32{
|
||||||
|
36_954, // no_compression
|
||||||
|
12_952, // best_speed
|
||||||
|
12_228, // 2
|
||||||
|
12_016, // 3
|
||||||
|
11_466, // 4
|
||||||
|
11_191, // 5
|
||||||
|
11_129, // 6
|
||||||
|
11_120, // 7
|
||||||
|
11_112, // 8
|
||||||
|
11_109, // best_compression
|
||||||
|
20_273, // huffman_only
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
for (deflate_inflate_string_tests) |t| {
|
||||||
|
const golden_file = try testdata_dir.openFile(t.filename, .{ .read = true });
|
||||||
|
defer golden_file.close();
|
||||||
|
var golden = try golden_file.reader().readAllAlloc(testing.allocator, math.maxInt(usize));
|
||||||
|
defer testing.allocator.free(golden);
|
||||||
|
|
||||||
|
try testToFromWithLimit(golden, t.limit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "inflate reset" {
|
||||||
|
const strings = [_][]const u8{
|
||||||
|
"lorem ipsum izzle fo rizzle",
|
||||||
|
"the quick brown fox jumped over",
|
||||||
|
};
|
||||||
|
|
||||||
|
var compressed_strings = [_]ArrayList(u8){
|
||||||
|
ArrayList(u8).init(testing.allocator),
|
||||||
|
ArrayList(u8).init(testing.allocator),
|
||||||
|
};
|
||||||
|
defer compressed_strings[0].deinit();
|
||||||
|
defer compressed_strings[1].deinit();
|
||||||
|
|
||||||
|
for (strings) |s, i| {
|
||||||
|
var comp = try compressor(
|
||||||
|
testing.allocator,
|
||||||
|
compressed_strings[i].writer(),
|
||||||
|
.{ .level = .level_6 },
|
||||||
|
);
|
||||||
|
defer comp.deinit();
|
||||||
|
|
||||||
|
try comp.writer().writeAll(s);
|
||||||
|
try comp.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
var decomp = try decompressor(
|
||||||
|
testing.allocator,
|
||||||
|
io.fixedBufferStream(compressed_strings[0].items).reader(),
|
||||||
|
null,
|
||||||
|
);
|
||||||
|
defer decomp.deinit();
|
||||||
|
|
||||||
|
var decompressed_0: []u8 = try decomp.reader()
|
||||||
|
.readAllAlloc(testing.allocator, math.maxInt(usize));
|
||||||
|
defer testing.allocator.free(decompressed_0);
|
||||||
|
|
||||||
|
try decomp.reset(
|
||||||
|
io.fixedBufferStream(compressed_strings[1].items).reader(),
|
||||||
|
null,
|
||||||
|
);
|
||||||
|
|
||||||
|
var decompressed_1: []u8 = try decomp.reader()
|
||||||
|
.readAllAlloc(testing.allocator, math.maxInt(usize));
|
||||||
|
defer testing.allocator.free(decompressed_1);
|
||||||
|
|
||||||
|
_ = decomp.close();
|
||||||
|
|
||||||
|
try expect(strings[0].len == decompressed_0.len);
|
||||||
|
try expect(strings[1].len == decompressed_1.len);
|
||||||
|
|
||||||
|
try expect(mem.eql(u8, strings[0], decompressed_0));
|
||||||
|
try expect(mem.eql(u8, strings[1], decompressed_1));
|
||||||
|
}
|
||||||
|
|
||||||
|
test "inflate reset dictionary" {
|
||||||
|
const dict = "the lorem fox";
|
||||||
|
const strings = [_][]const u8{
|
||||||
|
"lorem ipsum izzle fo rizzle",
|
||||||
|
"the quick brown fox jumped over",
|
||||||
|
};
|
||||||
|
|
||||||
|
var compressed_strings = [_]ArrayList(u8){
|
||||||
|
ArrayList(u8).init(testing.allocator),
|
||||||
|
ArrayList(u8).init(testing.allocator),
|
||||||
|
};
|
||||||
|
defer compressed_strings[0].deinit();
|
||||||
|
defer compressed_strings[1].deinit();
|
||||||
|
|
||||||
|
for (strings) |s, i| {
|
||||||
|
var comp = try compressor(
|
||||||
|
testing.allocator,
|
||||||
|
compressed_strings[i].writer(),
|
||||||
|
.{ .level = .level_6 },
|
||||||
|
);
|
||||||
|
defer comp.deinit();
|
||||||
|
|
||||||
|
try comp.writer().writeAll(s);
|
||||||
|
try comp.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
var decomp = try decompressor(
|
||||||
|
testing.allocator,
|
||||||
|
io.fixedBufferStream(compressed_strings[0].items).reader(),
|
||||||
|
dict,
|
||||||
|
);
|
||||||
|
defer decomp.deinit();
|
||||||
|
|
||||||
|
var decompressed_0: []u8 = try decomp.reader()
|
||||||
|
.readAllAlloc(testing.allocator, math.maxInt(usize));
|
||||||
|
defer testing.allocator.free(decompressed_0);
|
||||||
|
|
||||||
|
try decomp.reset(
|
||||||
|
io.fixedBufferStream(compressed_strings[1].items).reader(),
|
||||||
|
dict,
|
||||||
|
);
|
||||||
|
|
||||||
|
var decompressed_1: []u8 = try decomp.reader()
|
||||||
|
.readAllAlloc(testing.allocator, math.maxInt(usize));
|
||||||
|
defer testing.allocator.free(decompressed_1);
|
||||||
|
|
||||||
|
_ = decomp.close();
|
||||||
|
|
||||||
|
try expect(strings[0].len == decompressed_0.len);
|
||||||
|
try expect(strings[1].len == decompressed_1.len);
|
||||||
|
|
||||||
|
try expect(mem.eql(u8, strings[0], decompressed_0));
|
||||||
|
try expect(mem.eql(u8, strings[1], decompressed_1));
|
||||||
|
}
|
1090
lib/std/compress/deflate/decompressor.zig
Normal file
1090
lib/std/compress/deflate/decompressor.zig
Normal file
File diff suppressed because it is too large
Load Diff
28
lib/std/compress/deflate/deflate_const.zig
Normal file
28
lib/std/compress/deflate/deflate_const.zig
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
// Deflate
|
||||||
|
|
||||||
|
// Biggest block size for uncompressed block.
|
||||||
|
pub const max_store_block_size = 65535;
|
||||||
|
// The special code used to mark the end of a block.
|
||||||
|
pub const end_block_marker = 256;
|
||||||
|
|
||||||
|
// LZ77
|
||||||
|
|
||||||
|
// The smallest match length per the RFC section 3.2.5
|
||||||
|
pub const base_match_length = 3;
|
||||||
|
// The smallest match offset.
|
||||||
|
pub const base_match_offset = 1;
|
||||||
|
// The largest match length.
|
||||||
|
pub const max_match_length = 258;
|
||||||
|
// The largest match offset.
|
||||||
|
pub const max_match_offset = 1 << 15;
|
||||||
|
|
||||||
|
// Huffman Codes
|
||||||
|
|
||||||
|
// The largest offset code.
|
||||||
|
pub const offset_code_count = 30;
|
||||||
|
// Max number of frequencies used for a Huffman Code
|
||||||
|
// Possible lengths are codegenCodeCount (19), offset_code_count (30) and max_num_lit (286).
|
||||||
|
// The largest of these is max_num_lit.
|
||||||
|
pub const max_num_frequencies = max_num_lit;
|
||||||
|
// Maximum number of literals.
|
||||||
|
pub const max_num_lit = 286;
|
721
lib/std/compress/deflate/deflate_fast.zig
Normal file
721
lib/std/compress/deflate/deflate_fast.zig
Normal file
@ -0,0 +1,721 @@
|
|||||||
|
// This encoding algorithm, which prioritizes speed over output size, is
|
||||||
|
// based on Snappy's LZ77-style encoder: github.com/golang/snappy
|
||||||
|
|
||||||
|
const std = @import("std");
|
||||||
|
const math = std.math;
|
||||||
|
const mem = std.mem;
|
||||||
|
|
||||||
|
const Allocator = std.mem.Allocator;
|
||||||
|
|
||||||
|
const deflate_const = @import("deflate_const.zig");
|
||||||
|
const deflate = @import("compressor.zig");
|
||||||
|
const token = @import("token.zig");
|
||||||
|
|
||||||
|
const base_match_length = deflate_const.base_match_length;
|
||||||
|
const base_match_offset = deflate_const.base_match_offset;
|
||||||
|
const max_match_length = deflate_const.max_match_length;
|
||||||
|
const max_match_offset = deflate_const.max_match_offset;
|
||||||
|
const max_store_block_size = deflate_const.max_store_block_size;
|
||||||
|
|
||||||
|
const table_bits = 14; // Bits used in the table.
|
||||||
|
const table_mask = table_size - 1; // Mask for table indices. Redundant, but can eliminate bounds checks.
|
||||||
|
const table_shift = 32 - table_bits; // Right-shift to get the table_bits most significant bits of a uint32.
|
||||||
|
const table_size = 1 << table_bits; // Size of the table.
|
||||||
|
|
||||||
|
// Reset the buffer offset when reaching this.
|
||||||
|
// Offsets are stored between blocks as i32 values.
|
||||||
|
// Since the offset we are checking against is at the beginning
|
||||||
|
// of the buffer, we need to subtract the current and input
|
||||||
|
// buffer to not risk overflowing the i32.
|
||||||
|
const buffer_reset = math.maxInt(i32) - max_store_block_size * 2;
|
||||||
|
|
||||||
|
fn load32(b: []u8, i: i32) u32 {
|
||||||
|
var s = b[@intCast(usize, i) .. @intCast(usize, i) + 4];
|
||||||
|
return @intCast(u32, s[0]) |
|
||||||
|
@intCast(u32, s[1]) << 8 |
|
||||||
|
@intCast(u32, s[2]) << 16 |
|
||||||
|
@intCast(u32, s[3]) << 24;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn load64(b: []u8, i: i32) u64 {
|
||||||
|
var s = b[@intCast(usize, i)..@intCast(usize, i + 8)];
|
||||||
|
return @intCast(u64, s[0]) |
|
||||||
|
@intCast(u64, s[1]) << 8 |
|
||||||
|
@intCast(u64, s[2]) << 16 |
|
||||||
|
@intCast(u64, s[3]) << 24 |
|
||||||
|
@intCast(u64, s[4]) << 32 |
|
||||||
|
@intCast(u64, s[5]) << 40 |
|
||||||
|
@intCast(u64, s[6]) << 48 |
|
||||||
|
@intCast(u64, s[7]) << 56;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hash(u: u32) u32 {
|
||||||
|
return (u *% 0x1e35a7bd) >> table_shift;
|
||||||
|
}
|
||||||
|
|
||||||
|
// These constants are defined by the Snappy implementation so that its
|
||||||
|
// assembly implementation can fast-path some 16-bytes-at-a-time copies.
|
||||||
|
// They aren't necessary in the pure Go implementation, and may not be
|
||||||
|
// necessary in Zig, but using the same thresholds doesn't really hurt.
|
||||||
|
const input_margin = 16 - 1;
|
||||||
|
const min_non_literal_block_size = 1 + 1 + input_margin;
|
||||||
|
|
||||||
|
const TableEntry = struct {
|
||||||
|
val: u32, // Value at destination
|
||||||
|
offset: i32,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn deflateFast() DeflateFast {
|
||||||
|
return DeflateFast{
|
||||||
|
.table = [_]TableEntry{.{ .val = 0, .offset = 0 }} ** table_size,
|
||||||
|
.prev = undefined,
|
||||||
|
.prev_len = 0,
|
||||||
|
.cur = max_store_block_size,
|
||||||
|
.allocator = undefined,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeflateFast maintains the table for matches,
|
||||||
|
// and the previous byte block for cross block matching.
|
||||||
|
pub const DeflateFast = struct {
|
||||||
|
table: [table_size]TableEntry,
|
||||||
|
prev: []u8, // Previous block, zero length if unknown.
|
||||||
|
prev_len: u32, // Previous block length
|
||||||
|
cur: i32, // Current match offset.
|
||||||
|
allocator: Allocator,
|
||||||
|
|
||||||
|
const Self = @This();
|
||||||
|
|
||||||
|
pub fn init(self: *Self, allocator: Allocator) !void {
|
||||||
|
self.allocator = allocator;
|
||||||
|
self.prev = try allocator.alloc(u8, max_store_block_size);
|
||||||
|
self.prev_len = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn deinit(self: *Self) void {
|
||||||
|
self.allocator.free(self.prev);
|
||||||
|
self.prev_len = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Encodes a block given in `src` and appends tokens to `dst` and returns the result.
|
||||||
|
pub fn encode(self: *Self, dst: []token.Token, tokens_count: *u16, src: []u8) void {
|
||||||
|
|
||||||
|
// Ensure that self.cur doesn't wrap.
|
||||||
|
if (self.cur >= buffer_reset) {
|
||||||
|
self.shiftOffsets();
|
||||||
|
}
|
||||||
|
|
||||||
|
// This check isn't in the Snappy implementation, but there, the caller
|
||||||
|
// instead of the callee handles this case.
|
||||||
|
if (src.len < min_non_literal_block_size) {
|
||||||
|
self.cur += max_store_block_size;
|
||||||
|
self.prev_len = 0;
|
||||||
|
emitLiteral(dst, tokens_count, src);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// s_limit is when to stop looking for offset/length copies. The input_margin
|
||||||
|
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||||
|
// looking for copies.
|
||||||
|
var s_limit = @intCast(i32, src.len - input_margin);
|
||||||
|
|
||||||
|
// next_emit is where in src the next emitLiteral should start from.
|
||||||
|
var next_emit: i32 = 0;
|
||||||
|
var s: i32 = 0;
|
||||||
|
var cv: u32 = load32(src, s);
|
||||||
|
var next_hash: u32 = hash(cv);
|
||||||
|
|
||||||
|
outer: while (true) {
|
||||||
|
// Copied from the C++ snappy implementation:
|
||||||
|
//
|
||||||
|
// Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||||
|
// found, start looking only at every other byte. If 32 more bytes are
|
||||||
|
// scanned (or skipped), look at every third byte, etc.. When a match
|
||||||
|
// is found, immediately go back to looking at every byte. This is a
|
||||||
|
// small loss (~5% performance, ~0.1% density) for compressible data
|
||||||
|
// due to more bookkeeping, but for non-compressible data (such as
|
||||||
|
// JPEG) it's a huge win since the compressor quickly "realizes" the
|
||||||
|
// data is incompressible and doesn't bother looking for matches
|
||||||
|
// everywhere.
|
||||||
|
//
|
||||||
|
// The "skip" variable keeps track of how many bytes there are since
|
||||||
|
// the last match; dividing it by 32 (ie. right-shifting by five) gives
|
||||||
|
// the number of bytes to move ahead for each iteration.
|
||||||
|
var skip: i32 = 32;
|
||||||
|
|
||||||
|
var next_s: i32 = s;
|
||||||
|
var candidate: TableEntry = undefined;
|
||||||
|
while (true) {
|
||||||
|
s = next_s;
|
||||||
|
var bytes_between_hash_lookups = skip >> 5;
|
||||||
|
next_s = s + bytes_between_hash_lookups;
|
||||||
|
skip += bytes_between_hash_lookups;
|
||||||
|
if (next_s > s_limit) {
|
||||||
|
break :outer;
|
||||||
|
}
|
||||||
|
candidate = self.table[next_hash & table_mask];
|
||||||
|
var now = load32(src, next_s);
|
||||||
|
self.table[next_hash & table_mask] = .{ .offset = s + self.cur, .val = cv };
|
||||||
|
next_hash = hash(now);
|
||||||
|
|
||||||
|
var offset = s - (candidate.offset - self.cur);
|
||||||
|
if (offset > max_match_offset or cv != candidate.val) {
|
||||||
|
// Out of range or not matched.
|
||||||
|
cv = now;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
||||||
|
// match. But, prior to the match, src[next_emit..s] are unmatched. Emit
|
||||||
|
// them as literal bytes.
|
||||||
|
emitLiteral(dst, tokens_count, src[@intCast(usize, next_emit)..@intCast(usize, s)]);
|
||||||
|
|
||||||
|
// Call emitCopy, and then see if another emitCopy could be our next
|
||||||
|
// move. Repeat until we find no match for the input immediately after
|
||||||
|
// what was consumed by the last emitCopy call.
|
||||||
|
//
|
||||||
|
// If we exit this loop normally then we need to call emitLiteral next,
|
||||||
|
// though we don't yet know how big the literal will be. We handle that
|
||||||
|
// by proceeding to the next iteration of the main loop. We also can
|
||||||
|
// exit this loop via goto if we get close to exhausting the input.
|
||||||
|
while (true) {
|
||||||
|
// Invariant: we have a 4-byte match at s, and no need to emit any
|
||||||
|
// literal bytes prior to s.
|
||||||
|
|
||||||
|
// Extend the 4-byte match as long as possible.
|
||||||
|
//
|
||||||
|
s += 4;
|
||||||
|
var t = candidate.offset - self.cur + 4;
|
||||||
|
var l = self.matchLen(s, t, src);
|
||||||
|
|
||||||
|
// matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
|
||||||
|
dst[tokens_count.*] = token.matchToken(
|
||||||
|
@intCast(u32, l + 4 - base_match_length),
|
||||||
|
@intCast(u32, s - t - base_match_offset),
|
||||||
|
);
|
||||||
|
tokens_count.* += 1;
|
||||||
|
s += l;
|
||||||
|
next_emit = s;
|
||||||
|
if (s >= s_limit) {
|
||||||
|
break :outer;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We could immediately start working at s now, but to improve
|
||||||
|
// compression we first update the hash table at s-1 and at s. If
|
||||||
|
// another emitCopy is not our next move, also calculate next_hash
|
||||||
|
// at s+1. At least on amd64 architecture, these three hash calculations
|
||||||
|
// are faster as one load64 call (with some shifts) instead of
|
||||||
|
// three load32 calls.
|
||||||
|
var x = load64(src, s - 1);
|
||||||
|
var prev_hash = hash(@truncate(u32, x));
|
||||||
|
self.table[prev_hash & table_mask] = TableEntry{
|
||||||
|
.offset = self.cur + s - 1,
|
||||||
|
.val = @truncate(u32, x),
|
||||||
|
};
|
||||||
|
x >>= 8;
|
||||||
|
var curr_hash = hash(@truncate(u32, x));
|
||||||
|
candidate = self.table[curr_hash & table_mask];
|
||||||
|
self.table[curr_hash & table_mask] = TableEntry{
|
||||||
|
.offset = self.cur + s,
|
||||||
|
.val = @truncate(u32, x),
|
||||||
|
};
|
||||||
|
|
||||||
|
var offset = s - (candidate.offset - self.cur);
|
||||||
|
if (offset > max_match_offset or @truncate(u32, x) != candidate.val) {
|
||||||
|
cv = @truncate(u32, x >> 8);
|
||||||
|
next_hash = hash(cv);
|
||||||
|
s += 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (@intCast(u32, next_emit) < src.len) {
|
||||||
|
emitLiteral(dst, tokens_count, src[@intCast(usize, next_emit)..]);
|
||||||
|
}
|
||||||
|
self.cur += @intCast(i32, src.len);
|
||||||
|
self.prev_len = @intCast(u32, src.len);
|
||||||
|
mem.copy(u8, self.prev[0..self.prev_len], src);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn emitLiteral(dst: []token.Token, tokens_count: *u16, lit: []u8) void {
|
||||||
|
for (lit) |v| {
|
||||||
|
dst[tokens_count.*] = token.literalToken(@intCast(u32, v));
|
||||||
|
tokens_count.* += 1;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// matchLen returns the match length between src[s..] and src[t..].
|
||||||
|
// t can be negative to indicate the match is starting in self.prev.
|
||||||
|
// We assume that src[s-4 .. s] and src[t-4 .. t] already match.
|
||||||
|
fn matchLen(self: *Self, s: i32, t: i32, src: []u8) i32 {
|
||||||
|
var s1 = @intCast(u32, s) + max_match_length - 4;
|
||||||
|
if (s1 > src.len) {
|
||||||
|
s1 = @intCast(u32, src.len);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we are inside the current block
|
||||||
|
if (t >= 0) {
|
||||||
|
var b = src[@intCast(usize, t)..];
|
||||||
|
var a = src[@intCast(usize, s)..@intCast(usize, s1)];
|
||||||
|
b = b[0..a.len];
|
||||||
|
// Extend the match to be as long as possible.
|
||||||
|
for (a) |_, i| {
|
||||||
|
if (a[i] != b[i]) {
|
||||||
|
return @intCast(i32, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return @intCast(i32, a.len);
|
||||||
|
}
|
||||||
|
|
||||||
|
// We found a match in the previous block.
|
||||||
|
var tp = @intCast(i32, self.prev_len) + t;
|
||||||
|
if (tp < 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extend the match to be as long as possible.
|
||||||
|
var a = src[@intCast(usize, s)..@intCast(usize, s1)];
|
||||||
|
var b = self.prev[@intCast(usize, tp)..@intCast(usize, self.prev_len)];
|
||||||
|
if (b.len > a.len) {
|
||||||
|
b = b[0..a.len];
|
||||||
|
}
|
||||||
|
a = a[0..b.len];
|
||||||
|
for (b) |_, i| {
|
||||||
|
if (a[i] != b[i]) {
|
||||||
|
return @intCast(i32, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we reached our limit, we matched everything we are
|
||||||
|
// allowed to in the previous block and we return.
|
||||||
|
var n = @intCast(i32, b.len);
|
||||||
|
if (@intCast(u32, s + n) == s1) {
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Continue looking for more matches in the current block.
|
||||||
|
a = src[@intCast(usize, s + n)..@intCast(usize, s1)];
|
||||||
|
b = src[0..a.len];
|
||||||
|
for (a) |_, i| {
|
||||||
|
if (a[i] != b[i]) {
|
||||||
|
return @intCast(i32, i) + n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return @intCast(i32, a.len) + n;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset resets the encoding history.
|
||||||
|
// This ensures that no matches are made to the previous block.
|
||||||
|
pub fn reset(self: *Self) void {
|
||||||
|
self.prev_len = 0;
|
||||||
|
// Bump the offset, so all matches will fail distance check.
|
||||||
|
// Nothing should be >= self.cur in the table.
|
||||||
|
self.cur += max_match_offset;
|
||||||
|
|
||||||
|
// Protect against self.cur wraparound.
|
||||||
|
if (self.cur >= buffer_reset) {
|
||||||
|
self.shiftOffsets();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// shiftOffsets will shift down all match offset.
|
||||||
|
// This is only called in rare situations to prevent integer overflow.
|
||||||
|
//
|
||||||
|
// See https://golang.org/issue/18636 and https://golang.org/issues/34121.
|
||||||
|
fn shiftOffsets(self: *Self) void {
|
||||||
|
if (self.prev_len == 0) {
|
||||||
|
// We have no history; just clear the table.
|
||||||
|
for (self.table) |_, i| {
|
||||||
|
self.table[i] = TableEntry{ .val = 0, .offset = 0 };
|
||||||
|
}
|
||||||
|
self.cur = max_match_offset + 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shift down everything in the table that isn't already too far away.
|
||||||
|
for (self.table) |_, i| {
|
||||||
|
var v = self.table[i].offset - self.cur + max_match_offset + 1;
|
||||||
|
if (v < 0) {
|
||||||
|
// We want to reset self.cur to max_match_offset + 1, so we need to shift
|
||||||
|
// all table entries down by (self.cur - (max_match_offset + 1)).
|
||||||
|
// Because we ignore matches > max_match_offset, we can cap
|
||||||
|
// any negative offsets at 0.
|
||||||
|
v = 0;
|
||||||
|
}
|
||||||
|
self.table[i].offset = v;
|
||||||
|
}
|
||||||
|
self.cur = max_match_offset + 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
test "best speed match 1/3" {
|
||||||
|
const expect = std.testing.expect;
|
||||||
|
|
||||||
|
{
|
||||||
|
var previous = [_]u8{ 0, 0, 0, 1, 2 };
|
||||||
|
var e = DeflateFast{
|
||||||
|
.prev = &previous,
|
||||||
|
.prev_len = previous.len,
|
||||||
|
.table = undefined,
|
||||||
|
.allocator = undefined,
|
||||||
|
.cur = 0,
|
||||||
|
};
|
||||||
|
var current = [_]u8{ 3, 4, 5, 0, 1, 2, 3, 4, 5 };
|
||||||
|
var got: i32 = e.matchLen(3, -3, ¤t);
|
||||||
|
try expect(got == 6);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
var previous = [_]u8{ 0, 0, 0, 1, 2 };
|
||||||
|
var e = DeflateFast{
|
||||||
|
.prev = &previous,
|
||||||
|
.prev_len = previous.len,
|
||||||
|
.table = undefined,
|
||||||
|
.allocator = undefined,
|
||||||
|
.cur = 0,
|
||||||
|
};
|
||||||
|
var current = [_]u8{ 2, 4, 5, 0, 1, 2, 3, 4, 5 };
|
||||||
|
var got: i32 = e.matchLen(3, -3, ¤t);
|
||||||
|
try expect(got == 3);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
var previous = [_]u8{ 0, 0, 0, 1, 1 };
|
||||||
|
var e = DeflateFast{
|
||||||
|
.prev = &previous,
|
||||||
|
.prev_len = previous.len,
|
||||||
|
.table = undefined,
|
||||||
|
.allocator = undefined,
|
||||||
|
.cur = 0,
|
||||||
|
};
|
||||||
|
var current = [_]u8{ 3, 4, 5, 0, 1, 2, 3, 4, 5 };
|
||||||
|
var got: i32 = e.matchLen(3, -3, ¤t);
|
||||||
|
try expect(got == 2);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
var previous = [_]u8{ 0, 0, 0, 1, 2 };
|
||||||
|
var e = DeflateFast{
|
||||||
|
.prev = &previous,
|
||||||
|
.prev_len = previous.len,
|
||||||
|
.table = undefined,
|
||||||
|
.allocator = undefined,
|
||||||
|
.cur = 0,
|
||||||
|
};
|
||||||
|
var current = [_]u8{ 2, 2, 2, 2, 1, 2, 3, 4, 5 };
|
||||||
|
var got: i32 = e.matchLen(0, -1, ¤t);
|
||||||
|
try expect(got == 4);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
var previous = [_]u8{ 0, 0, 0, 1, 2, 3, 4, 5, 2, 2 };
|
||||||
|
var e = DeflateFast{
|
||||||
|
.prev = &previous,
|
||||||
|
.prev_len = previous.len,
|
||||||
|
.table = undefined,
|
||||||
|
.allocator = undefined,
|
||||||
|
.cur = 0,
|
||||||
|
};
|
||||||
|
var current = [_]u8{ 2, 2, 2, 2, 1, 2, 3, 4, 5 };
|
||||||
|
var got: i32 = e.matchLen(4, -7, ¤t);
|
||||||
|
try expect(got == 5);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
var previous = [_]u8{ 9, 9, 9, 9, 9 };
|
||||||
|
var e = DeflateFast{
|
||||||
|
.prev = &previous,
|
||||||
|
.prev_len = previous.len,
|
||||||
|
.table = undefined,
|
||||||
|
.allocator = undefined,
|
||||||
|
.cur = 0,
|
||||||
|
};
|
||||||
|
var current = [_]u8{ 2, 2, 2, 2, 1, 2, 3, 4, 5 };
|
||||||
|
var got: i32 = e.matchLen(0, -1, ¤t);
|
||||||
|
try expect(got == 0);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
var previous = [_]u8{ 9, 9, 9, 9, 9 };
|
||||||
|
var e = DeflateFast{
|
||||||
|
.prev = &previous,
|
||||||
|
.prev_len = previous.len,
|
||||||
|
.table = undefined,
|
||||||
|
.allocator = undefined,
|
||||||
|
.cur = 0,
|
||||||
|
};
|
||||||
|
var current = [_]u8{ 9, 2, 2, 2, 1, 2, 3, 4, 5 };
|
||||||
|
var got: i32 = e.matchLen(1, 0, ¤t);
|
||||||
|
try expect(got == 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "best speed match 2/3" {
|
||||||
|
const expect = std.testing.expect;
|
||||||
|
|
||||||
|
{
|
||||||
|
var previous = [_]u8{};
|
||||||
|
var e = DeflateFast{
|
||||||
|
.prev = &previous,
|
||||||
|
.prev_len = previous.len,
|
||||||
|
.table = undefined,
|
||||||
|
.allocator = undefined,
|
||||||
|
.cur = 0,
|
||||||
|
};
|
||||||
|
var current = [_]u8{ 9, 2, 2, 2, 1, 2, 3, 4, 5 };
|
||||||
|
var got: i32 = e.matchLen(1, -5, ¤t);
|
||||||
|
try expect(got == 0);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
var previous = [_]u8{};
|
||||||
|
var e = DeflateFast{
|
||||||
|
.prev = &previous,
|
||||||
|
.prev_len = previous.len,
|
||||||
|
.table = undefined,
|
||||||
|
.allocator = undefined,
|
||||||
|
.cur = 0,
|
||||||
|
};
|
||||||
|
var current = [_]u8{ 9, 2, 2, 2, 1, 2, 3, 4, 5 };
|
||||||
|
var got: i32 = e.matchLen(1, -1, ¤t);
|
||||||
|
try expect(got == 0);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
var previous = [_]u8{};
|
||||||
|
var e = DeflateFast{
|
||||||
|
.prev = &previous,
|
||||||
|
.prev_len = previous.len,
|
||||||
|
.table = undefined,
|
||||||
|
.allocator = undefined,
|
||||||
|
.cur = 0,
|
||||||
|
};
|
||||||
|
var current = [_]u8{ 2, 2, 2, 2, 1, 2, 3, 4, 5 };
|
||||||
|
var got: i32 = e.matchLen(1, 0, ¤t);
|
||||||
|
try expect(got == 3);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
var previous = [_]u8{ 3, 4, 5 };
|
||||||
|
var e = DeflateFast{
|
||||||
|
.prev = &previous,
|
||||||
|
.prev_len = previous.len,
|
||||||
|
.table = undefined,
|
||||||
|
.allocator = undefined,
|
||||||
|
.cur = 0,
|
||||||
|
};
|
||||||
|
var current = [_]u8{ 3, 4, 5 };
|
||||||
|
var got: i32 = e.matchLen(0, -3, ¤t);
|
||||||
|
try expect(got == 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "best speed match 2/2" {
|
||||||
|
const testing = std.testing;
|
||||||
|
const expect = testing.expect;
|
||||||
|
|
||||||
|
const Case = struct {
|
||||||
|
previous: u32,
|
||||||
|
current: u32,
|
||||||
|
s: i32,
|
||||||
|
t: i32,
|
||||||
|
expected: i32,
|
||||||
|
};
|
||||||
|
|
||||||
|
const cases = [_]Case{
|
||||||
|
.{
|
||||||
|
.previous = 1000,
|
||||||
|
.current = 1000,
|
||||||
|
.s = 0,
|
||||||
|
.t = -1000,
|
||||||
|
.expected = max_match_length - 4,
|
||||||
|
},
|
||||||
|
.{
|
||||||
|
.previous = 200,
|
||||||
|
.s = 0,
|
||||||
|
.t = -200,
|
||||||
|
.current = 500,
|
||||||
|
.expected = max_match_length - 4,
|
||||||
|
},
|
||||||
|
.{
|
||||||
|
.previous = 200,
|
||||||
|
.s = 1,
|
||||||
|
.t = 0,
|
||||||
|
.current = 500,
|
||||||
|
.expected = max_match_length - 4,
|
||||||
|
},
|
||||||
|
.{
|
||||||
|
.previous = max_match_length - 4,
|
||||||
|
.s = 0,
|
||||||
|
.t = -(max_match_length - 4),
|
||||||
|
.current = 500,
|
||||||
|
.expected = max_match_length - 4,
|
||||||
|
},
|
||||||
|
.{
|
||||||
|
.previous = 200,
|
||||||
|
.s = 400,
|
||||||
|
.t = -200,
|
||||||
|
.current = 500,
|
||||||
|
.expected = 100,
|
||||||
|
},
|
||||||
|
.{
|
||||||
|
.previous = 10,
|
||||||
|
.s = 400,
|
||||||
|
.t = 200,
|
||||||
|
.current = 500,
|
||||||
|
.expected = 100,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
for (cases) |c| {
|
||||||
|
var previous = try testing.allocator.alloc(u8, c.previous);
|
||||||
|
defer testing.allocator.free(previous);
|
||||||
|
mem.set(u8, previous, 0);
|
||||||
|
|
||||||
|
var current = try testing.allocator.alloc(u8, c.current);
|
||||||
|
defer testing.allocator.free(current);
|
||||||
|
mem.set(u8, current, 0);
|
||||||
|
|
||||||
|
var e = DeflateFast{
|
||||||
|
.prev = previous,
|
||||||
|
.prev_len = @intCast(u32, previous.len),
|
||||||
|
.table = undefined,
|
||||||
|
.allocator = undefined,
|
||||||
|
.cur = 0,
|
||||||
|
};
|
||||||
|
var got: i32 = e.matchLen(c.s, c.t, current);
|
||||||
|
try expect(got == c.expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "best speed shift offsets" {
|
||||||
|
const testing = std.testing;
|
||||||
|
const expect = std.testing.expect;
|
||||||
|
|
||||||
|
// Test if shiftoffsets properly preserves matches and resets out-of-range matches
|
||||||
|
// seen in https://github.com/golang/go/issues/4142
|
||||||
|
var enc = deflateFast();
|
||||||
|
try enc.init(testing.allocator);
|
||||||
|
defer enc.deinit();
|
||||||
|
|
||||||
|
// test_data may not generate internal matches.
|
||||||
|
var test_data = [32]u8{
|
||||||
|
0xf5, 0x25, 0xf2, 0x55, 0xf6, 0xc1, 0x1f, 0x0b, 0x10, 0xa1,
|
||||||
|
0xd0, 0x77, 0x56, 0x38, 0xf1, 0x9c, 0x7f, 0x85, 0xc5, 0xbd,
|
||||||
|
0x16, 0x28, 0xd4, 0xf9, 0x03, 0xd4, 0xc0, 0xa1, 0x1e, 0x58,
|
||||||
|
0x5b, 0xc9,
|
||||||
|
};
|
||||||
|
|
||||||
|
var tokens = [_]token.Token{0} ** 32;
|
||||||
|
var tokens_count: u16 = 0;
|
||||||
|
|
||||||
|
// Encode the testdata with clean state.
|
||||||
|
// Second part should pick up matches from the first block.
|
||||||
|
tokens_count = 0;
|
||||||
|
enc.encode(&tokens, &tokens_count, &test_data);
|
||||||
|
var want_first_tokens = tokens_count;
|
||||||
|
tokens_count = 0;
|
||||||
|
enc.encode(&tokens, &tokens_count, &test_data);
|
||||||
|
var want_second_tokens = tokens_count;
|
||||||
|
|
||||||
|
try expect(want_first_tokens > want_second_tokens);
|
||||||
|
|
||||||
|
// Forward the current indicator to before wraparound.
|
||||||
|
enc.cur = buffer_reset - @intCast(i32, test_data.len);
|
||||||
|
|
||||||
|
// Part 1 before wrap, should match clean state.
|
||||||
|
tokens_count = 0;
|
||||||
|
enc.encode(&tokens, &tokens_count, &test_data);
|
||||||
|
var got = tokens_count;
|
||||||
|
try expect(want_first_tokens == got);
|
||||||
|
|
||||||
|
// Verify we are about to wrap.
|
||||||
|
try expect(enc.cur == buffer_reset);
|
||||||
|
|
||||||
|
// Part 2 should match clean state as well even if wrapped.
|
||||||
|
tokens_count = 0;
|
||||||
|
enc.encode(&tokens, &tokens_count, &test_data);
|
||||||
|
got = tokens_count;
|
||||||
|
try expect(want_second_tokens == got);
|
||||||
|
|
||||||
|
// Verify that we wrapped.
|
||||||
|
try expect(enc.cur < buffer_reset);
|
||||||
|
|
||||||
|
// Forward the current buffer, leaving the matches at the bottom.
|
||||||
|
enc.cur = buffer_reset;
|
||||||
|
enc.shiftOffsets();
|
||||||
|
|
||||||
|
// Ensure that no matches were picked up.
|
||||||
|
tokens_count = 0;
|
||||||
|
enc.encode(&tokens, &tokens_count, &test_data);
|
||||||
|
got = tokens_count;
|
||||||
|
try expect(want_first_tokens == got);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "best speed reset" {
|
||||||
|
// test that encoding is consistent across a warparound of the table offset.
|
||||||
|
// See https://github.com/golang/go/issues/34121
|
||||||
|
const expect = std.testing.expect;
|
||||||
|
const fmt = std.fmt;
|
||||||
|
const testing = std.testing;
|
||||||
|
|
||||||
|
const ArrayList = std.ArrayList;
|
||||||
|
|
||||||
|
const input_size = 65536;
|
||||||
|
var input = try testing.allocator.alloc(u8, input_size);
|
||||||
|
defer testing.allocator.free(input);
|
||||||
|
|
||||||
|
var i: usize = 0;
|
||||||
|
while (i < input_size) : (i += 1) {
|
||||||
|
_ = try fmt.bufPrint(input, "asdfasdfasdfasdf{d}{d}fghfgujyut{d}yutyu\n", .{ i, i, i });
|
||||||
|
}
|
||||||
|
// This is specific to level 1 (best_speed).
|
||||||
|
const level = .best_speed;
|
||||||
|
const offset: usize = 1;
|
||||||
|
|
||||||
|
// We do an encode with a clean buffer to compare.
|
||||||
|
var want = ArrayList(u8).init(testing.allocator);
|
||||||
|
defer want.deinit();
|
||||||
|
var clean_comp = try deflate.compressor(
|
||||||
|
testing.allocator,
|
||||||
|
want.writer(),
|
||||||
|
.{ .level = level },
|
||||||
|
);
|
||||||
|
defer clean_comp.deinit();
|
||||||
|
|
||||||
|
// Write 3 times, close.
|
||||||
|
try clean_comp.writer().writeAll(input);
|
||||||
|
try clean_comp.writer().writeAll(input);
|
||||||
|
try clean_comp.writer().writeAll(input);
|
||||||
|
try clean_comp.close();
|
||||||
|
|
||||||
|
var o = offset;
|
||||||
|
while (o <= 256) : (o *= 2) {
|
||||||
|
var discard = ArrayList(u8).init(testing.allocator);
|
||||||
|
defer discard.deinit();
|
||||||
|
|
||||||
|
var comp = try deflate.compressor(
|
||||||
|
testing.allocator,
|
||||||
|
discard.writer(),
|
||||||
|
.{ .level = level },
|
||||||
|
);
|
||||||
|
defer comp.deinit();
|
||||||
|
|
||||||
|
// Reset until we are right before the wraparound.
|
||||||
|
// Each reset adds max_match_offset to the offset.
|
||||||
|
i = 0;
|
||||||
|
var limit = (buffer_reset - input.len - o - max_match_offset) / max_match_offset;
|
||||||
|
while (i < limit) : (i += 1) {
|
||||||
|
// skip ahead to where we are close to wrap around...
|
||||||
|
comp.reset(discard.writer());
|
||||||
|
}
|
||||||
|
var got = ArrayList(u8).init(testing.allocator);
|
||||||
|
defer got.deinit();
|
||||||
|
comp.reset(got.writer());
|
||||||
|
|
||||||
|
// Write 3 times, close.
|
||||||
|
try comp.writer().writeAll(input);
|
||||||
|
try comp.writer().writeAll(input);
|
||||||
|
try comp.writer().writeAll(input);
|
||||||
|
try comp.close();
|
||||||
|
|
||||||
|
// output must match at wraparound
|
||||||
|
try expect(mem.eql(u8, got.items, want.items));
|
||||||
|
}
|
||||||
|
}
|
166
lib/std/compress/deflate/deflate_fast_test.zig
Normal file
166
lib/std/compress/deflate/deflate_fast_test.zig
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const expect = std.testing.expect;
|
||||||
|
const io = std.io;
|
||||||
|
const mem = std.mem;
|
||||||
|
const testing = std.testing;
|
||||||
|
|
||||||
|
const ArrayList = std.ArrayList;
|
||||||
|
|
||||||
|
const deflate = @import("compressor.zig");
|
||||||
|
const inflate = @import("decompressor.zig");
|
||||||
|
const deflate_const = @import("deflate_const.zig");
|
||||||
|
|
||||||
|
test "best speed" {
|
||||||
|
// Tests that round-tripping through deflate and then inflate recovers the original input.
|
||||||
|
// The Write sizes are near the thresholds in the compressor.encSpeed method (0, 16, 128), as well
|
||||||
|
// as near `deflate_const.max_store_block_size` (65535).
|
||||||
|
|
||||||
|
var abcabc = try testing.allocator.alloc(u8, 131_072);
|
||||||
|
defer testing.allocator.free(abcabc);
|
||||||
|
|
||||||
|
for (abcabc) |_, i| {
|
||||||
|
abcabc[i] = @intCast(u8, i % 128);
|
||||||
|
}
|
||||||
|
|
||||||
|
var tc_01 = [_]u32{ 65536, 0 };
|
||||||
|
var tc_02 = [_]u32{ 65536, 1 };
|
||||||
|
var tc_03 = [_]u32{ 65536, 1, 256 };
|
||||||
|
var tc_04 = [_]u32{ 65536, 1, 65536 };
|
||||||
|
var tc_05 = [_]u32{ 65536, 14 };
|
||||||
|
var tc_06 = [_]u32{ 65536, 15 };
|
||||||
|
var tc_07 = [_]u32{ 65536, 16 };
|
||||||
|
var tc_08 = [_]u32{ 65536, 16, 256 };
|
||||||
|
var tc_09 = [_]u32{ 65536, 16, 65536 };
|
||||||
|
var tc_10 = [_]u32{ 65536, 127 };
|
||||||
|
var tc_11 = [_]u32{ 65536, 127 };
|
||||||
|
var tc_12 = [_]u32{ 65536, 128 };
|
||||||
|
var tc_13 = [_]u32{ 65536, 128, 256 };
|
||||||
|
var tc_14 = [_]u32{ 65536, 128, 65536 };
|
||||||
|
var tc_15 = [_]u32{ 65536, 129 };
|
||||||
|
var tc_16 = [_]u32{ 65536, 65536, 256 };
|
||||||
|
var tc_17 = [_]u32{ 65536, 65536, 65536 };
|
||||||
|
var test_cases = [_][]u32{
|
||||||
|
&tc_01, &tc_02, &tc_03, &tc_04, &tc_05, &tc_06, &tc_07, &tc_08, &tc_09, &tc_10,
|
||||||
|
&tc_11, &tc_12, &tc_13, &tc_14, &tc_15, &tc_16, &tc_17,
|
||||||
|
};
|
||||||
|
|
||||||
|
for (test_cases) |tc| {
|
||||||
|
var firsts = [_]u32{ 1, 65534, 65535, 65536, 65537, 131072 };
|
||||||
|
|
||||||
|
for (firsts) |first_n| {
|
||||||
|
tc[0] = first_n;
|
||||||
|
|
||||||
|
var to_flush = [_]bool{ false, true };
|
||||||
|
for (to_flush) |flush| {
|
||||||
|
var compressed = ArrayList(u8).init(testing.allocator);
|
||||||
|
defer compressed.deinit();
|
||||||
|
|
||||||
|
var want = ArrayList(u8).init(testing.allocator);
|
||||||
|
defer want.deinit();
|
||||||
|
|
||||||
|
var comp = try deflate.compressor(
|
||||||
|
testing.allocator,
|
||||||
|
compressed.writer(),
|
||||||
|
.{ .level = .best_speed },
|
||||||
|
);
|
||||||
|
defer comp.deinit();
|
||||||
|
|
||||||
|
for (tc) |n| {
|
||||||
|
try want.appendSlice(abcabc[0..n]);
|
||||||
|
try comp.writer().writeAll(abcabc[0..n]);
|
||||||
|
if (flush) {
|
||||||
|
try comp.flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try comp.close();
|
||||||
|
|
||||||
|
var decompressed = try testing.allocator.alloc(u8, want.items.len);
|
||||||
|
defer testing.allocator.free(decompressed);
|
||||||
|
|
||||||
|
var decomp = try inflate.decompressor(
|
||||||
|
testing.allocator,
|
||||||
|
io.fixedBufferStream(compressed.items).reader(),
|
||||||
|
null,
|
||||||
|
);
|
||||||
|
defer decomp.deinit();
|
||||||
|
|
||||||
|
var read = try decomp.reader().readAll(decompressed);
|
||||||
|
_ = decomp.close();
|
||||||
|
|
||||||
|
try expect(read == want.items.len);
|
||||||
|
try expect(mem.eql(u8, want.items, decompressed));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "best speed max match offset" {
|
||||||
|
const abc = "abcdefgh";
|
||||||
|
const xyz = "stuvwxyz";
|
||||||
|
const input_margin = 16 - 1;
|
||||||
|
|
||||||
|
const match_before = [_]bool{ false, true };
|
||||||
|
for (match_before) |do_match_before| {
|
||||||
|
const extras = [_]u32{
|
||||||
|
0,
|
||||||
|
input_margin - 1,
|
||||||
|
input_margin,
|
||||||
|
input_margin + 1,
|
||||||
|
2 * input_margin,
|
||||||
|
};
|
||||||
|
for (extras) |extra| {
|
||||||
|
var offset_adj: i32 = -5;
|
||||||
|
while (offset_adj <= 5) : (offset_adj += 1) {
|
||||||
|
var offset = deflate_const.max_match_offset + offset_adj;
|
||||||
|
|
||||||
|
// Make src to be a []u8 of the form
|
||||||
|
// fmt("{s}{s}{s}{s}{s}", .{abc, zeros0, xyzMaybe, abc, zeros1})
|
||||||
|
// where:
|
||||||
|
// zeros0 is approximately max_match_offset zeros.
|
||||||
|
// xyzMaybe is either xyz or the empty string.
|
||||||
|
// zeros1 is between 0 and 30 zeros.
|
||||||
|
// The difference between the two abc's will be offset, which
|
||||||
|
// is max_match_offset plus or minus a small adjustment.
|
||||||
|
var src_len: usize = @intCast(usize, offset + abc.len + @intCast(i32, extra));
|
||||||
|
var src = try testing.allocator.alloc(u8, src_len);
|
||||||
|
defer testing.allocator.free(src);
|
||||||
|
|
||||||
|
mem.copy(u8, src, abc);
|
||||||
|
if (!do_match_before) {
|
||||||
|
var src_offset: usize = @intCast(usize, offset - xyz.len);
|
||||||
|
mem.copy(u8, src[src_offset..], xyz);
|
||||||
|
}
|
||||||
|
var src_offset: usize = @intCast(usize, offset);
|
||||||
|
mem.copy(u8, src[src_offset..], abc);
|
||||||
|
|
||||||
|
var compressed = ArrayList(u8).init(testing.allocator);
|
||||||
|
defer compressed.deinit();
|
||||||
|
|
||||||
|
var comp = try deflate.compressor(
|
||||||
|
testing.allocator,
|
||||||
|
compressed.writer(),
|
||||||
|
.{ .level = .best_speed },
|
||||||
|
);
|
||||||
|
defer comp.deinit();
|
||||||
|
try comp.writer().writeAll(src);
|
||||||
|
_ = try comp.close();
|
||||||
|
|
||||||
|
var decompressed = try testing.allocator.alloc(u8, src.len);
|
||||||
|
defer testing.allocator.free(decompressed);
|
||||||
|
|
||||||
|
var decomp = try inflate.decompressor(
|
||||||
|
testing.allocator,
|
||||||
|
io.fixedBufferStream(compressed.items).reader(),
|
||||||
|
null,
|
||||||
|
);
|
||||||
|
defer decomp.deinit();
|
||||||
|
var read = try decomp.reader().readAll(decompressed);
|
||||||
|
_ = decomp.close();
|
||||||
|
|
||||||
|
try expect(read == src.len);
|
||||||
|
try expect(mem.eql(u8, decompressed, src));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
420
lib/std/compress/deflate/dict_decoder.zig
Normal file
420
lib/std/compress/deflate/dict_decoder.zig
Normal file
@ -0,0 +1,420 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const assert = std.debug.assert;
|
||||||
|
const mem = std.mem;
|
||||||
|
|
||||||
|
const Allocator = std.mem.Allocator;
|
||||||
|
|
||||||
|
// Implements the LZ77 sliding dictionary as used in decompression.
|
||||||
|
// LZ77 decompresses data through sequences of two forms of commands:
|
||||||
|
//
|
||||||
|
// * Literal insertions: Runs of one or more symbols are inserted into the data
|
||||||
|
// stream as is. This is accomplished through the writeByte method for a
|
||||||
|
// single symbol, or combinations of writeSlice/writeMark for multiple symbols.
|
||||||
|
// Any valid stream must start with a literal insertion if no preset dictionary
|
||||||
|
// is used.
|
||||||
|
//
|
||||||
|
// * Backward copies: Runs of one or more symbols are copied from previously
|
||||||
|
// emitted data. Backward copies come as the tuple (dist, length) where dist
|
||||||
|
// determines how far back in the stream to copy from and length determines how
|
||||||
|
// many bytes to copy. Note that it is valid for the length to be greater than
|
||||||
|
// the distance. Since LZ77 uses forward copies, that situation is used to
|
||||||
|
// perform a form of run-length encoding on repeated runs of symbols.
|
||||||
|
// The writeCopy and tryWriteCopy are used to implement this command.
|
||||||
|
//
|
||||||
|
// For performance reasons, this implementation performs little to no sanity
|
||||||
|
// checks about the arguments. As such, the invariants documented for each
|
||||||
|
// method call must be respected.
|
||||||
|
pub const DictDecoder = struct {
|
||||||
|
const Self = @This();
|
||||||
|
|
||||||
|
allocator: Allocator = undefined,
|
||||||
|
|
||||||
|
hist: []u8 = undefined, // Sliding window history
|
||||||
|
|
||||||
|
// Invariant: 0 <= rd_pos <= wr_pos <= hist.len
|
||||||
|
wr_pos: u32 = 0, // Current output position in buffer
|
||||||
|
rd_pos: u32 = 0, // Have emitted hist[0..rd_pos] already
|
||||||
|
full: bool = false, // Has a full window length been written yet?
|
||||||
|
|
||||||
|
// init initializes DictDecoder to have a sliding window dictionary of the given
|
||||||
|
// size. If a preset dict is provided, it will initialize the dictionary with
|
||||||
|
// the contents of dict.
|
||||||
|
pub fn init(self: *Self, allocator: Allocator, size: u32, dict: ?[]const u8) !void {
|
||||||
|
self.allocator = allocator;
|
||||||
|
|
||||||
|
self.hist = try allocator.alloc(u8, size);
|
||||||
|
|
||||||
|
self.wr_pos = 0;
|
||||||
|
|
||||||
|
if (dict != null) {
|
||||||
|
mem.copy(u8, self.hist, dict.?[dict.?.len -| self.hist.len..]);
|
||||||
|
self.wr_pos = @intCast(u32, dict.?.len);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (self.wr_pos == self.hist.len) {
|
||||||
|
self.wr_pos = 0;
|
||||||
|
self.full = true;
|
||||||
|
}
|
||||||
|
self.rd_pos = self.wr_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn deinit(self: *Self) void {
|
||||||
|
self.allocator.free(self.hist);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reports the total amount of historical data in the dictionary.
|
||||||
|
pub fn histSize(self: *Self) u32 {
|
||||||
|
if (self.full) {
|
||||||
|
return @intCast(u32, self.hist.len);
|
||||||
|
}
|
||||||
|
return self.wr_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reports the number of bytes that can be flushed by readFlush.
|
||||||
|
pub fn availRead(self: *Self) u32 {
|
||||||
|
return self.wr_pos - self.rd_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reports the available amount of output buffer space.
|
||||||
|
pub fn availWrite(self: *Self) u32 {
|
||||||
|
return @intCast(u32, self.hist.len - self.wr_pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a slice of the available buffer to write data to.
|
||||||
|
//
|
||||||
|
// This invariant will be kept: s.len <= availWrite()
|
||||||
|
pub fn writeSlice(self: *Self) []u8 {
|
||||||
|
return self.hist[self.wr_pos..];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Advances the writer pointer by `count`.
|
||||||
|
//
|
||||||
|
// This invariant must be kept: 0 <= count <= availWrite()
|
||||||
|
pub fn writeMark(self: *Self, count: u32) void {
|
||||||
|
assert(0 <= count and count <= self.availWrite());
|
||||||
|
self.wr_pos += count;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Writes a single byte to the dictionary.
|
||||||
|
//
|
||||||
|
// This invariant must be kept: 0 < availWrite()
|
||||||
|
pub fn writeByte(self: *Self, byte: u8) void {
|
||||||
|
self.hist[self.wr_pos] = byte;
|
||||||
|
self.wr_pos += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn copy(dst: []u8, src: []const u8) u32 {
|
||||||
|
if (src.len > dst.len) {
|
||||||
|
mem.copy(u8, dst, src[0..dst.len]);
|
||||||
|
return @intCast(u32, dst.len);
|
||||||
|
}
|
||||||
|
mem.copy(u8, dst, src);
|
||||||
|
return @intCast(u32, src.len);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copies a string at a given (dist, length) to the output.
|
||||||
|
// This returns the number of bytes copied and may be less than the requested
|
||||||
|
// length if the available space in the output buffer is too small.
|
||||||
|
//
|
||||||
|
// This invariant must be kept: 0 < dist <= histSize()
|
||||||
|
pub fn writeCopy(self: *Self, dist: u32, length: u32) u32 {
|
||||||
|
assert(0 < dist and dist <= self.histSize());
|
||||||
|
var dst_base = self.wr_pos;
|
||||||
|
var dst_pos = dst_base;
|
||||||
|
var src_pos: i32 = @intCast(i32, dst_pos) - @intCast(i32, dist);
|
||||||
|
var end_pos = dst_pos + length;
|
||||||
|
if (end_pos > self.hist.len) {
|
||||||
|
end_pos = @intCast(u32, self.hist.len);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy non-overlapping section after destination position.
|
||||||
|
//
|
||||||
|
// This section is non-overlapping in that the copy length for this section
|
||||||
|
// is always less than or equal to the backwards distance. This can occur
|
||||||
|
// if a distance refers to data that wraps-around in the buffer.
|
||||||
|
// Thus, a backwards copy is performed here; that is, the exact bytes in
|
||||||
|
// the source prior to the copy is placed in the destination.
|
||||||
|
if (src_pos < 0) {
|
||||||
|
src_pos += @intCast(i32, self.hist.len);
|
||||||
|
dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[@intCast(usize, src_pos)..]);
|
||||||
|
src_pos = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy possibly overlapping section before destination position.
|
||||||
|
//
|
||||||
|
// This section can overlap if the copy length for this section is larger
|
||||||
|
// than the backwards distance. This is allowed by LZ77 so that repeated
|
||||||
|
// strings can be succinctly represented using (dist, length) pairs.
|
||||||
|
// Thus, a forwards copy is performed here; that is, the bytes copied is
|
||||||
|
// possibly dependent on the resulting bytes in the destination as the copy
|
||||||
|
// progresses along. This is functionally equivalent to the following:
|
||||||
|
//
|
||||||
|
// var i = 0;
|
||||||
|
// while(i < end_pos - dst_pos) : (i+=1) {
|
||||||
|
// self.hist[dst_pos+i] = self.hist[src_pos+i];
|
||||||
|
// }
|
||||||
|
// dst_pos = end_pos;
|
||||||
|
//
|
||||||
|
while (dst_pos < end_pos) {
|
||||||
|
dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[@intCast(usize, src_pos)..dst_pos]);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.wr_pos = dst_pos;
|
||||||
|
return dst_pos - dst_base;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tries to copy a string at a given (distance, length) to the
|
||||||
|
// output. This specialized version is optimized for short distances.
|
||||||
|
//
|
||||||
|
// This method is designed to be inlined for performance reasons.
|
||||||
|
//
|
||||||
|
// This invariant must be kept: 0 < dist <= histSize()
|
||||||
|
pub fn tryWriteCopy(self: *Self, dist: u32, length: u32) u32 {
|
||||||
|
var dst_pos = self.wr_pos;
|
||||||
|
var end_pos = dst_pos + length;
|
||||||
|
if (dst_pos < dist or end_pos > self.hist.len) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
var dst_base = dst_pos;
|
||||||
|
var src_pos = dst_pos - dist;
|
||||||
|
|
||||||
|
// Copy possibly overlapping section before destination position.
|
||||||
|
while (dst_pos < end_pos) {
|
||||||
|
dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[src_pos..dst_pos]);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.wr_pos = dst_pos;
|
||||||
|
return dst_pos - dst_base;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a slice of the historical buffer that is ready to be
|
||||||
|
// emitted to the user. The data returned by readFlush must be fully consumed
|
||||||
|
// before calling any other DictDecoder methods.
|
||||||
|
pub fn readFlush(self: *Self) []u8 {
|
||||||
|
var to_read = self.hist[self.rd_pos..self.wr_pos];
|
||||||
|
self.rd_pos = self.wr_pos;
|
||||||
|
if (self.wr_pos == self.hist.len) {
|
||||||
|
self.wr_pos = 0;
|
||||||
|
self.rd_pos = 0;
|
||||||
|
self.full = true;
|
||||||
|
}
|
||||||
|
return to_read;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// tests
|
||||||
|
|
||||||
|
test "dictionary decoder" {
|
||||||
|
const ArrayList = std.ArrayList;
|
||||||
|
const expect = std.testing.expect;
|
||||||
|
const testing = std.testing;
|
||||||
|
|
||||||
|
const abc = "ABC\n";
|
||||||
|
const fox = "The quick brown fox jumped over the lazy dog!\n";
|
||||||
|
const poem: []const u8 =
|
||||||
|
\\The Road Not Taken
|
||||||
|
\\Robert Frost
|
||||||
|
\\
|
||||||
|
\\Two roads diverged in a yellow wood,
|
||||||
|
\\And sorry I could not travel both
|
||||||
|
\\And be one traveler, long I stood
|
||||||
|
\\And looked down one as far as I could
|
||||||
|
\\To where it bent in the undergrowth;
|
||||||
|
\\
|
||||||
|
\\Then took the other, as just as fair,
|
||||||
|
\\And having perhaps the better claim,
|
||||||
|
\\Because it was grassy and wanted wear;
|
||||||
|
\\Though as for that the passing there
|
||||||
|
\\Had worn them really about the same,
|
||||||
|
\\
|
||||||
|
\\And both that morning equally lay
|
||||||
|
\\In leaves no step had trodden black.
|
||||||
|
\\Oh, I kept the first for another day!
|
||||||
|
\\Yet knowing how way leads on to way,
|
||||||
|
\\I doubted if I should ever come back.
|
||||||
|
\\
|
||||||
|
\\I shall be telling this with a sigh
|
||||||
|
\\Somewhere ages and ages hence:
|
||||||
|
\\Two roads diverged in a wood, and I-
|
||||||
|
\\I took the one less traveled by,
|
||||||
|
\\And that has made all the difference.
|
||||||
|
\\
|
||||||
|
;
|
||||||
|
|
||||||
|
const uppercase: []const u8 =
|
||||||
|
\\THE ROAD NOT TAKEN
|
||||||
|
\\ROBERT FROST
|
||||||
|
\\
|
||||||
|
\\TWO ROADS DIVERGED IN A YELLOW WOOD,
|
||||||
|
\\AND SORRY I COULD NOT TRAVEL BOTH
|
||||||
|
\\AND BE ONE TRAVELER, LONG I STOOD
|
||||||
|
\\AND LOOKED DOWN ONE AS FAR AS I COULD
|
||||||
|
\\TO WHERE IT BENT IN THE UNDERGROWTH;
|
||||||
|
\\
|
||||||
|
\\THEN TOOK THE OTHER, AS JUST AS FAIR,
|
||||||
|
\\AND HAVING PERHAPS THE BETTER CLAIM,
|
||||||
|
\\BECAUSE IT WAS GRASSY AND WANTED WEAR;
|
||||||
|
\\THOUGH AS FOR THAT THE PASSING THERE
|
||||||
|
\\HAD WORN THEM REALLY ABOUT THE SAME,
|
||||||
|
\\
|
||||||
|
\\AND BOTH THAT MORNING EQUALLY LAY
|
||||||
|
\\IN LEAVES NO STEP HAD TRODDEN BLACK.
|
||||||
|
\\OH, I KEPT THE FIRST FOR ANOTHER DAY!
|
||||||
|
\\YET KNOWING HOW WAY LEADS ON TO WAY,
|
||||||
|
\\I DOUBTED IF I SHOULD EVER COME BACK.
|
||||||
|
\\
|
||||||
|
\\I SHALL BE TELLING THIS WITH A SIGH
|
||||||
|
\\SOMEWHERE AGES AND AGES HENCE:
|
||||||
|
\\TWO ROADS DIVERGED IN A WOOD, AND I-
|
||||||
|
\\I TOOK THE ONE LESS TRAVELED BY,
|
||||||
|
\\AND THAT HAS MADE ALL THE DIFFERENCE.
|
||||||
|
\\
|
||||||
|
;
|
||||||
|
|
||||||
|
const PoemRefs = struct {
|
||||||
|
dist: u32, // Backward distance (0 if this is an insertion)
|
||||||
|
length: u32, // Length of copy or insertion
|
||||||
|
};
|
||||||
|
|
||||||
|
var poem_refs = [_]PoemRefs{
|
||||||
|
.{ .dist = 0, .length = 38 }, .{ .dist = 33, .length = 3 }, .{ .dist = 0, .length = 48 },
|
||||||
|
.{ .dist = 79, .length = 3 }, .{ .dist = 0, .length = 11 }, .{ .dist = 34, .length = 5 },
|
||||||
|
.{ .dist = 0, .length = 6 }, .{ .dist = 23, .length = 7 }, .{ .dist = 0, .length = 8 },
|
||||||
|
.{ .dist = 50, .length = 3 }, .{ .dist = 0, .length = 2 }, .{ .dist = 69, .length = 3 },
|
||||||
|
.{ .dist = 34, .length = 5 }, .{ .dist = 0, .length = 4 }, .{ .dist = 97, .length = 3 },
|
||||||
|
.{ .dist = 0, .length = 4 }, .{ .dist = 43, .length = 5 }, .{ .dist = 0, .length = 6 },
|
||||||
|
.{ .dist = 7, .length = 4 }, .{ .dist = 88, .length = 7 }, .{ .dist = 0, .length = 12 },
|
||||||
|
.{ .dist = 80, .length = 3 }, .{ .dist = 0, .length = 2 }, .{ .dist = 141, .length = 4 },
|
||||||
|
.{ .dist = 0, .length = 1 }, .{ .dist = 196, .length = 3 }, .{ .dist = 0, .length = 3 },
|
||||||
|
.{ .dist = 157, .length = 3 }, .{ .dist = 0, .length = 6 }, .{ .dist = 181, .length = 3 },
|
||||||
|
.{ .dist = 0, .length = 2 }, .{ .dist = 23, .length = 3 }, .{ .dist = 77, .length = 3 },
|
||||||
|
.{ .dist = 28, .length = 5 }, .{ .dist = 128, .length = 3 }, .{ .dist = 110, .length = 4 },
|
||||||
|
.{ .dist = 70, .length = 3 }, .{ .dist = 0, .length = 4 }, .{ .dist = 85, .length = 6 },
|
||||||
|
.{ .dist = 0, .length = 2 }, .{ .dist = 182, .length = 6 }, .{ .dist = 0, .length = 4 },
|
||||||
|
.{ .dist = 133, .length = 3 }, .{ .dist = 0, .length = 7 }, .{ .dist = 47, .length = 5 },
|
||||||
|
.{ .dist = 0, .length = 20 }, .{ .dist = 112, .length = 5 }, .{ .dist = 0, .length = 1 },
|
||||||
|
.{ .dist = 58, .length = 3 }, .{ .dist = 0, .length = 8 }, .{ .dist = 59, .length = 3 },
|
||||||
|
.{ .dist = 0, .length = 4 }, .{ .dist = 173, .length = 3 }, .{ .dist = 0, .length = 5 },
|
||||||
|
.{ .dist = 114, .length = 3 }, .{ .dist = 0, .length = 4 }, .{ .dist = 92, .length = 5 },
|
||||||
|
.{ .dist = 0, .length = 2 }, .{ .dist = 71, .length = 3 }, .{ .dist = 0, .length = 2 },
|
||||||
|
.{ .dist = 76, .length = 5 }, .{ .dist = 0, .length = 1 }, .{ .dist = 46, .length = 3 },
|
||||||
|
.{ .dist = 96, .length = 4 }, .{ .dist = 130, .length = 4 }, .{ .dist = 0, .length = 3 },
|
||||||
|
.{ .dist = 360, .length = 3 }, .{ .dist = 0, .length = 3 }, .{ .dist = 178, .length = 5 },
|
||||||
|
.{ .dist = 0, .length = 7 }, .{ .dist = 75, .length = 3 }, .{ .dist = 0, .length = 3 },
|
||||||
|
.{ .dist = 45, .length = 6 }, .{ .dist = 0, .length = 6 }, .{ .dist = 299, .length = 6 },
|
||||||
|
.{ .dist = 180, .length = 3 }, .{ .dist = 70, .length = 6 }, .{ .dist = 0, .length = 1 },
|
||||||
|
.{ .dist = 48, .length = 3 }, .{ .dist = 66, .length = 4 }, .{ .dist = 0, .length = 3 },
|
||||||
|
.{ .dist = 47, .length = 5 }, .{ .dist = 0, .length = 9 }, .{ .dist = 325, .length = 3 },
|
||||||
|
.{ .dist = 0, .length = 1 }, .{ .dist = 359, .length = 3 }, .{ .dist = 318, .length = 3 },
|
||||||
|
.{ .dist = 0, .length = 2 }, .{ .dist = 199, .length = 3 }, .{ .dist = 0, .length = 1 },
|
||||||
|
.{ .dist = 344, .length = 3 }, .{ .dist = 0, .length = 3 }, .{ .dist = 248, .length = 3 },
|
||||||
|
.{ .dist = 0, .length = 10 }, .{ .dist = 310, .length = 3 }, .{ .dist = 0, .length = 3 },
|
||||||
|
.{ .dist = 93, .length = 6 }, .{ .dist = 0, .length = 3 }, .{ .dist = 252, .length = 3 },
|
||||||
|
.{ .dist = 157, .length = 4 }, .{ .dist = 0, .length = 2 }, .{ .dist = 273, .length = 5 },
|
||||||
|
.{ .dist = 0, .length = 14 }, .{ .dist = 99, .length = 4 }, .{ .dist = 0, .length = 1 },
|
||||||
|
.{ .dist = 464, .length = 4 }, .{ .dist = 0, .length = 2 }, .{ .dist = 92, .length = 4 },
|
||||||
|
.{ .dist = 495, .length = 3 }, .{ .dist = 0, .length = 1 }, .{ .dist = 322, .length = 4 },
|
||||||
|
.{ .dist = 16, .length = 4 }, .{ .dist = 0, .length = 3 }, .{ .dist = 402, .length = 3 },
|
||||||
|
.{ .dist = 0, .length = 2 }, .{ .dist = 237, .length = 4 }, .{ .dist = 0, .length = 2 },
|
||||||
|
.{ .dist = 432, .length = 4 }, .{ .dist = 0, .length = 1 }, .{ .dist = 483, .length = 5 },
|
||||||
|
.{ .dist = 0, .length = 2 }, .{ .dist = 294, .length = 4 }, .{ .dist = 0, .length = 2 },
|
||||||
|
.{ .dist = 306, .length = 3 }, .{ .dist = 113, .length = 5 }, .{ .dist = 0, .length = 1 },
|
||||||
|
.{ .dist = 26, .length = 4 }, .{ .dist = 164, .length = 3 }, .{ .dist = 488, .length = 4 },
|
||||||
|
.{ .dist = 0, .length = 1 }, .{ .dist = 542, .length = 3 }, .{ .dist = 248, .length = 6 },
|
||||||
|
.{ .dist = 0, .length = 5 }, .{ .dist = 205, .length = 3 }, .{ .dist = 0, .length = 8 },
|
||||||
|
.{ .dist = 48, .length = 3 }, .{ .dist = 449, .length = 6 }, .{ .dist = 0, .length = 2 },
|
||||||
|
.{ .dist = 192, .length = 3 }, .{ .dist = 328, .length = 4 }, .{ .dist = 9, .length = 5 },
|
||||||
|
.{ .dist = 433, .length = 3 }, .{ .dist = 0, .length = 3 }, .{ .dist = 622, .length = 25 },
|
||||||
|
.{ .dist = 615, .length = 5 }, .{ .dist = 46, .length = 5 }, .{ .dist = 0, .length = 2 },
|
||||||
|
.{ .dist = 104, .length = 3 }, .{ .dist = 475, .length = 10 }, .{ .dist = 549, .length = 3 },
|
||||||
|
.{ .dist = 0, .length = 4 }, .{ .dist = 597, .length = 8 }, .{ .dist = 314, .length = 3 },
|
||||||
|
.{ .dist = 0, .length = 1 }, .{ .dist = 473, .length = 6 }, .{ .dist = 317, .length = 5 },
|
||||||
|
.{ .dist = 0, .length = 1 }, .{ .dist = 400, .length = 3 }, .{ .dist = 0, .length = 3 },
|
||||||
|
.{ .dist = 109, .length = 3 }, .{ .dist = 151, .length = 3 }, .{ .dist = 48, .length = 4 },
|
||||||
|
.{ .dist = 0, .length = 4 }, .{ .dist = 125, .length = 3 }, .{ .dist = 108, .length = 3 },
|
||||||
|
.{ .dist = 0, .length = 2 },
|
||||||
|
};
|
||||||
|
|
||||||
|
var got_list = ArrayList(u8).init(testing.allocator);
|
||||||
|
defer got_list.deinit();
|
||||||
|
var got = got_list.writer();
|
||||||
|
|
||||||
|
var want_list = ArrayList(u8).init(testing.allocator);
|
||||||
|
defer want_list.deinit();
|
||||||
|
var want = want_list.writer();
|
||||||
|
|
||||||
|
var dd = DictDecoder{};
|
||||||
|
try dd.init(testing.allocator, 1 << 11, null);
|
||||||
|
defer dd.deinit();
|
||||||
|
|
||||||
|
const util = struct {
|
||||||
|
fn writeCopy(dst_dd: *DictDecoder, dst: anytype, dist: u32, length: u32) !void {
|
||||||
|
var len = length;
|
||||||
|
while (len > 0) {
|
||||||
|
var n = dst_dd.tryWriteCopy(dist, len);
|
||||||
|
if (n == 0) {
|
||||||
|
n = dst_dd.writeCopy(dist, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
len -= n;
|
||||||
|
if (dst_dd.availWrite() == 0) {
|
||||||
|
_ = try dst.write(dst_dd.readFlush());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fn writeString(dst_dd: *DictDecoder, dst: anytype, str: []const u8) !void {
|
||||||
|
var string = str;
|
||||||
|
while (string.len > 0) {
|
||||||
|
var cnt = DictDecoder.copy(dst_dd.writeSlice(), string);
|
||||||
|
dst_dd.writeMark(cnt);
|
||||||
|
string = string[cnt..];
|
||||||
|
if (dst_dd.availWrite() == 0) {
|
||||||
|
_ = try dst.write(dst_dd.readFlush());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
try util.writeString(&dd, got, ".");
|
||||||
|
_ = try want.write(".");
|
||||||
|
|
||||||
|
var str = poem;
|
||||||
|
for (poem_refs) |ref, i| {
|
||||||
|
_ = i;
|
||||||
|
if (ref.dist == 0) {
|
||||||
|
try util.writeString(&dd, got, str[0..ref.length]);
|
||||||
|
} else {
|
||||||
|
try util.writeCopy(&dd, got, ref.dist, ref.length);
|
||||||
|
}
|
||||||
|
str = str[ref.length..];
|
||||||
|
}
|
||||||
|
_ = try want.write(poem);
|
||||||
|
|
||||||
|
try util.writeCopy(&dd, got, dd.histSize(), 33);
|
||||||
|
_ = try want.write(want_list.items[0..33]);
|
||||||
|
|
||||||
|
try util.writeString(&dd, got, abc);
|
||||||
|
try util.writeCopy(&dd, got, abc.len, 59 * abc.len);
|
||||||
|
_ = try want.write(abc ** 60);
|
||||||
|
|
||||||
|
try util.writeString(&dd, got, fox);
|
||||||
|
try util.writeCopy(&dd, got, fox.len, 9 * fox.len);
|
||||||
|
_ = try want.write(fox ** 10);
|
||||||
|
|
||||||
|
try util.writeString(&dd, got, ".");
|
||||||
|
try util.writeCopy(&dd, got, 1, 9);
|
||||||
|
_ = try want.write("." ** 10);
|
||||||
|
|
||||||
|
try util.writeString(&dd, got, uppercase);
|
||||||
|
try util.writeCopy(&dd, got, uppercase.len, 7 * uppercase.len);
|
||||||
|
var i: u8 = 0;
|
||||||
|
while (i < 8) : (i += 1) {
|
||||||
|
_ = try want.write(uppercase);
|
||||||
|
}
|
||||||
|
|
||||||
|
try util.writeCopy(&dd, got, dd.histSize(), 10);
|
||||||
|
_ = try want.write(want_list.items[want_list.items.len - dd.histSize() ..][0..10]);
|
||||||
|
|
||||||
|
_ = try got.write(dd.readFlush());
|
||||||
|
try expect(mem.eql(u8, got_list.items, want_list.items));
|
||||||
|
}
|
1722
lib/std/compress/deflate/huffman_bit_writer.zig
Normal file
1722
lib/std/compress/deflate/huffman_bit_writer.zig
Normal file
File diff suppressed because it is too large
Load Diff
432
lib/std/compress/deflate/huffman_code.zig
Normal file
432
lib/std/compress/deflate/huffman_code.zig
Normal file
@ -0,0 +1,432 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const assert = std.debug.assert;
|
||||||
|
const math = std.math;
|
||||||
|
const mem = std.mem;
|
||||||
|
const sort = std.sort;
|
||||||
|
const testing = std.testing;
|
||||||
|
|
||||||
|
const Allocator = std.mem.Allocator;
|
||||||
|
|
||||||
|
const bu = @import("bits_utils.zig");
|
||||||
|
const deflate_const = @import("deflate_const.zig");
|
||||||
|
|
||||||
|
const max_bits_limit = 16;
|
||||||
|
|
||||||
|
const LiteralNode = struct {
|
||||||
|
literal: u16,
|
||||||
|
freq: u16,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Describes the state of the constructed tree for a given depth.
|
||||||
|
const LevelInfo = struct {
|
||||||
|
// Our level. for better printing
|
||||||
|
level: u32,
|
||||||
|
|
||||||
|
// The frequency of the last node at this level
|
||||||
|
last_freq: u32,
|
||||||
|
|
||||||
|
// The frequency of the next character to add to this level
|
||||||
|
next_char_freq: u32,
|
||||||
|
|
||||||
|
// The frequency of the next pair (from level below) to add to this level.
|
||||||
|
// Only valid if the "needed" value of the next lower level is 0.
|
||||||
|
next_pair_freq: u32,
|
||||||
|
|
||||||
|
// The number of chains remaining to generate for this level before moving
|
||||||
|
// up to the next level
|
||||||
|
needed: u32,
|
||||||
|
};
|
||||||
|
|
||||||
|
// hcode is a huffman code with a bit code and bit length.
|
||||||
|
pub const HuffCode = struct {
|
||||||
|
code: u16 = 0,
|
||||||
|
len: u16 = 0,
|
||||||
|
|
||||||
|
// set sets the code and length of an hcode.
|
||||||
|
fn set(self: *HuffCode, code: u16, length: u16) void {
|
||||||
|
self.len = length;
|
||||||
|
self.code = code;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const HuffmanEncoder = struct {
|
||||||
|
codes: []HuffCode,
|
||||||
|
freq_cache: []LiteralNode = undefined,
|
||||||
|
bit_count: [17]u32 = undefined,
|
||||||
|
lns: []LiteralNode = undefined, // sorted by literal, stored to avoid repeated allocation in generate
|
||||||
|
lfs: []LiteralNode = undefined, // sorted by frequency, stored to avoid repeated allocation in generate
|
||||||
|
allocator: Allocator,
|
||||||
|
|
||||||
|
pub fn deinit(self: *HuffmanEncoder) void {
|
||||||
|
self.allocator.free(self.codes);
|
||||||
|
self.allocator.free(self.freq_cache);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update this Huffman Code object to be the minimum code for the specified frequency count.
|
||||||
|
//
|
||||||
|
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
|
||||||
|
// max_bits The maximum number of bits to use for any literal.
|
||||||
|
pub fn generate(self: *HuffmanEncoder, freq: []u16, max_bits: u32) void {
|
||||||
|
var list = self.freq_cache[0 .. freq.len + 1];
|
||||||
|
// Number of non-zero literals
|
||||||
|
var count: u32 = 0;
|
||||||
|
// Set list to be the set of all non-zero literals and their frequencies
|
||||||
|
for (freq) |f, i| {
|
||||||
|
if (f != 0) {
|
||||||
|
list[count] = LiteralNode{ .literal = @intCast(u16, i), .freq = f };
|
||||||
|
count += 1;
|
||||||
|
} else {
|
||||||
|
list[count] = LiteralNode{ .literal = 0x00, .freq = 0 };
|
||||||
|
self.codes[i].len = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
list[freq.len] = LiteralNode{ .literal = 0x00, .freq = 0 };
|
||||||
|
|
||||||
|
list = list[0..count];
|
||||||
|
if (count <= 2) {
|
||||||
|
// Handle the small cases here, because they are awkward for the general case code. With
|
||||||
|
// two or fewer literals, everything has bit length 1.
|
||||||
|
for (list) |node, i| {
|
||||||
|
// "list" is in order of increasing literal value.
|
||||||
|
self.codes[node.literal].set(@intCast(u16, i), 1);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
self.lfs = list;
|
||||||
|
sort.sort(LiteralNode, self.lfs, {}, byFreq);
|
||||||
|
|
||||||
|
// Get the number of literals for each bit count
|
||||||
|
var bit_count = self.bitCounts(list, max_bits);
|
||||||
|
// And do the assignment
|
||||||
|
self.assignEncodingAndSize(bit_count, list);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn bitLength(self: *HuffmanEncoder, freq: []u16) u32 {
|
||||||
|
var total: u32 = 0;
|
||||||
|
for (freq) |f, i| {
|
||||||
|
if (f != 0) {
|
||||||
|
total += @intCast(u32, f) * @intCast(u32, self.codes[i].len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return total;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the number of literals assigned to each bit size in the Huffman encoding
|
||||||
|
//
|
||||||
|
// This method is only called when list.len >= 3
|
||||||
|
// The cases of 0, 1, and 2 literals are handled by special case code.
|
||||||
|
//
|
||||||
|
// list: An array of the literals with non-zero frequencies
|
||||||
|
// and their associated frequencies. The array is in order of increasing
|
||||||
|
// frequency, and has as its last element a special element with frequency
|
||||||
|
// std.math.maxInt(i32)
|
||||||
|
//
|
||||||
|
// max_bits: The maximum number of bits that should be used to encode any literal.
|
||||||
|
// Must be less than 16.
|
||||||
|
//
|
||||||
|
// Returns an integer array in which array[i] indicates the number of literals
|
||||||
|
// that should be encoded in i bits.
|
||||||
|
fn bitCounts(self: *HuffmanEncoder, list: []LiteralNode, max_bits_to_use: usize) []u32 {
|
||||||
|
var max_bits = max_bits_to_use;
|
||||||
|
var n = list.len;
|
||||||
|
|
||||||
|
assert(max_bits < max_bits_limit);
|
||||||
|
|
||||||
|
// The tree can't have greater depth than n - 1, no matter what. This
|
||||||
|
// saves a little bit of work in some small cases
|
||||||
|
max_bits = @minimum(max_bits, n - 1);
|
||||||
|
|
||||||
|
// Create information about each of the levels.
|
||||||
|
// A bogus "Level 0" whose sole purpose is so that
|
||||||
|
// level1.prev.needed == 0. This makes level1.next_pair_freq
|
||||||
|
// be a legitimate value that never gets chosen.
|
||||||
|
var levels: [max_bits_limit]LevelInfo = mem.zeroes([max_bits_limit]LevelInfo);
|
||||||
|
// leaf_counts[i] counts the number of literals at the left
|
||||||
|
// of ancestors of the rightmost node at level i.
|
||||||
|
// leaf_counts[i][j] is the number of literals at the left
|
||||||
|
// of the level j ancestor.
|
||||||
|
var leaf_counts: [max_bits_limit][max_bits_limit]u32 = mem.zeroes([max_bits_limit][max_bits_limit]u32);
|
||||||
|
|
||||||
|
{
|
||||||
|
var level = @as(u32, 1);
|
||||||
|
while (level <= max_bits) : (level += 1) {
|
||||||
|
// For every level, the first two items are the first two characters.
|
||||||
|
// We initialize the levels as if we had already figured this out.
|
||||||
|
levels[level] = LevelInfo{
|
||||||
|
.level = level,
|
||||||
|
.last_freq = list[1].freq,
|
||||||
|
.next_char_freq = list[2].freq,
|
||||||
|
.next_pair_freq = list[0].freq + list[1].freq,
|
||||||
|
.needed = 0,
|
||||||
|
};
|
||||||
|
leaf_counts[level][level] = 2;
|
||||||
|
if (level == 1) {
|
||||||
|
levels[level].next_pair_freq = math.maxInt(i32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We need a total of 2*n - 2 items at top level and have already generated 2.
|
||||||
|
levels[max_bits].needed = 2 * @intCast(u32, n) - 4;
|
||||||
|
|
||||||
|
{
|
||||||
|
var level = max_bits;
|
||||||
|
while (true) {
|
||||||
|
var l = &levels[level];
|
||||||
|
if (l.next_pair_freq == math.maxInt(i32) and l.next_char_freq == math.maxInt(i32)) {
|
||||||
|
// We've run out of both leafs and pairs.
|
||||||
|
// End all calculations for this level.
|
||||||
|
// To make sure we never come back to this level or any lower level,
|
||||||
|
// set next_pair_freq impossibly large.
|
||||||
|
l.needed = 0;
|
||||||
|
levels[level + 1].next_pair_freq = math.maxInt(i32);
|
||||||
|
level += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
var prev_freq = l.last_freq;
|
||||||
|
if (l.next_char_freq < l.next_pair_freq) {
|
||||||
|
// The next item on this row is a leaf node.
|
||||||
|
var next = leaf_counts[level][level] + 1;
|
||||||
|
l.last_freq = l.next_char_freq;
|
||||||
|
// Lower leaf_counts are the same of the previous node.
|
||||||
|
leaf_counts[level][level] = next;
|
||||||
|
if (next >= list.len) {
|
||||||
|
l.next_char_freq = maxNode().freq;
|
||||||
|
} else {
|
||||||
|
l.next_char_freq = list[next].freq;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// The next item on this row is a pair from the previous row.
|
||||||
|
// next_pair_freq isn't valid until we generate two
|
||||||
|
// more values in the level below
|
||||||
|
l.last_freq = l.next_pair_freq;
|
||||||
|
// Take leaf counts from the lower level, except counts[level] remains the same.
|
||||||
|
mem.copy(u32, leaf_counts[level][0..level], leaf_counts[level - 1][0..level]);
|
||||||
|
levels[l.level - 1].needed = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
l.needed -= 1;
|
||||||
|
if (l.needed == 0) {
|
||||||
|
// We've done everything we need to do for this level.
|
||||||
|
// Continue calculating one level up. Fill in next_pair_freq
|
||||||
|
// of that level with the sum of the two nodes we've just calculated on
|
||||||
|
// this level.
|
||||||
|
if (l.level == max_bits) {
|
||||||
|
// All done!
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
levels[l.level + 1].next_pair_freq = prev_freq + l.last_freq;
|
||||||
|
level += 1;
|
||||||
|
} else {
|
||||||
|
// If we stole from below, move down temporarily to replenish it.
|
||||||
|
while (levels[level - 1].needed > 0) {
|
||||||
|
level -= 1;
|
||||||
|
if (level == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Somethings is wrong if at the end, the top level is null or hasn't used
|
||||||
|
// all of the leaves.
|
||||||
|
assert(leaf_counts[max_bits][max_bits] == n);
|
||||||
|
|
||||||
|
var bit_count = self.bit_count[0 .. max_bits + 1];
|
||||||
|
var bits: u32 = 1;
|
||||||
|
var counts = &leaf_counts[max_bits];
|
||||||
|
{
|
||||||
|
var level = max_bits;
|
||||||
|
while (level > 0) : (level -= 1) {
|
||||||
|
// counts[level] gives the number of literals requiring at least "bits"
|
||||||
|
// bits to encode.
|
||||||
|
bit_count[bits] = counts[level] - counts[level - 1];
|
||||||
|
bits += 1;
|
||||||
|
if (level == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bit_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look at the leaves and assign them a bit count and an encoding as specified
|
||||||
|
// in RFC 1951 3.2.2
|
||||||
|
fn assignEncodingAndSize(self: *HuffmanEncoder, bit_count: []u32, list_arg: []LiteralNode) void {
|
||||||
|
var code = @as(u16, 0);
|
||||||
|
var list = list_arg;
|
||||||
|
|
||||||
|
for (bit_count) |bits, n| {
|
||||||
|
code <<= 1;
|
||||||
|
if (n == 0 or bits == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// The literals list[list.len-bits] .. list[list.len-bits]
|
||||||
|
// are encoded using "bits" bits, and get the values
|
||||||
|
// code, code + 1, .... The code values are
|
||||||
|
// assigned in literal order (not frequency order).
|
||||||
|
var chunk = list[list.len - @intCast(u32, bits) ..];
|
||||||
|
|
||||||
|
self.lns = chunk;
|
||||||
|
sort.sort(LiteralNode, self.lns, {}, byLiteral);
|
||||||
|
|
||||||
|
for (chunk) |node| {
|
||||||
|
self.codes[node.literal] = HuffCode{
|
||||||
|
.code = bu.bitReverse(u16, code, @intCast(u5, n)),
|
||||||
|
.len = @intCast(u16, n),
|
||||||
|
};
|
||||||
|
code += 1;
|
||||||
|
}
|
||||||
|
list = list[0 .. list.len - @intCast(u32, bits)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
fn maxNode() LiteralNode {
|
||||||
|
return LiteralNode{
|
||||||
|
.literal = math.maxInt(u16),
|
||||||
|
.freq = math.maxInt(u16),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn newHuffmanEncoder(allocator: Allocator, size: u32) !HuffmanEncoder {
|
||||||
|
return HuffmanEncoder{
|
||||||
|
.codes = try allocator.alloc(HuffCode, size),
|
||||||
|
// Allocate a reusable buffer with the longest possible frequency table.
|
||||||
|
// (deflate_const.max_num_frequencies).
|
||||||
|
.freq_cache = try allocator.alloc(LiteralNode, deflate_const.max_num_frequencies + 1),
|
||||||
|
.allocator = allocator,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generates a HuffmanCode corresponding to the fixed literal table
|
||||||
|
pub fn generateFixedLiteralEncoding(allocator: Allocator) !HuffmanEncoder {
|
||||||
|
var h = try newHuffmanEncoder(allocator, deflate_const.max_num_frequencies);
|
||||||
|
var codes = h.codes;
|
||||||
|
var ch: u16 = 0;
|
||||||
|
|
||||||
|
while (ch < deflate_const.max_num_frequencies) : (ch += 1) {
|
||||||
|
var bits: u16 = undefined;
|
||||||
|
var size: u16 = undefined;
|
||||||
|
switch (ch) {
|
||||||
|
0...143 => {
|
||||||
|
// size 8, 000110000 .. 10111111
|
||||||
|
bits = ch + 48;
|
||||||
|
size = 8;
|
||||||
|
},
|
||||||
|
144...255 => {
|
||||||
|
// size 9, 110010000 .. 111111111
|
||||||
|
bits = ch + 400 - 144;
|
||||||
|
size = 9;
|
||||||
|
},
|
||||||
|
256...279 => {
|
||||||
|
// size 7, 0000000 .. 0010111
|
||||||
|
bits = ch - 256;
|
||||||
|
size = 7;
|
||||||
|
},
|
||||||
|
else => {
|
||||||
|
// size 8, 11000000 .. 11000111
|
||||||
|
bits = ch + 192 - 280;
|
||||||
|
size = 8;
|
||||||
|
},
|
||||||
|
}
|
||||||
|
codes[ch] = HuffCode{ .code = bu.bitReverse(u16, bits, @intCast(u5, size)), .len = size };
|
||||||
|
}
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn generateFixedOffsetEncoding(allocator: Allocator) !HuffmanEncoder {
|
||||||
|
var h = try newHuffmanEncoder(allocator, 30);
|
||||||
|
var codes = h.codes;
|
||||||
|
for (codes) |_, ch| {
|
||||||
|
codes[ch] = HuffCode{ .code = bu.bitReverse(u16, @intCast(u16, ch), 5), .len = 5 };
|
||||||
|
}
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn byLiteral(context: void, a: LiteralNode, b: LiteralNode) bool {
|
||||||
|
_ = context;
|
||||||
|
return a.literal < b.literal;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn byFreq(context: void, a: LiteralNode, b: LiteralNode) bool {
|
||||||
|
_ = context;
|
||||||
|
if (a.freq == b.freq) {
|
||||||
|
return a.literal < b.literal;
|
||||||
|
}
|
||||||
|
return a.freq < b.freq;
|
||||||
|
}
|
||||||
|
|
||||||
|
test "generate a Huffman code from an array of frequencies" {
|
||||||
|
var freqs: [19]u16 = [_]u16{
|
||||||
|
8, // 0
|
||||||
|
1, // 1
|
||||||
|
1, // 2
|
||||||
|
2, // 3
|
||||||
|
5, // 4
|
||||||
|
10, // 5
|
||||||
|
9, // 6
|
||||||
|
1, // 7
|
||||||
|
0, // 8
|
||||||
|
0, // 9
|
||||||
|
0, // 10
|
||||||
|
0, // 11
|
||||||
|
0, // 12
|
||||||
|
0, // 13
|
||||||
|
0, // 14
|
||||||
|
0, // 15
|
||||||
|
1, // 16
|
||||||
|
3, // 17
|
||||||
|
5, // 18
|
||||||
|
};
|
||||||
|
|
||||||
|
var enc = try newHuffmanEncoder(testing.allocator, freqs.len);
|
||||||
|
defer enc.deinit();
|
||||||
|
enc.generate(freqs[0..], 7);
|
||||||
|
|
||||||
|
try testing.expect(enc.bitLength(freqs[0..]) == 141);
|
||||||
|
|
||||||
|
try testing.expect(enc.codes[0].len == 3);
|
||||||
|
try testing.expect(enc.codes[1].len == 6);
|
||||||
|
try testing.expect(enc.codes[2].len == 6);
|
||||||
|
try testing.expect(enc.codes[3].len == 5);
|
||||||
|
try testing.expect(enc.codes[4].len == 3);
|
||||||
|
try testing.expect(enc.codes[5].len == 2);
|
||||||
|
try testing.expect(enc.codes[6].len == 2);
|
||||||
|
try testing.expect(enc.codes[7].len == 6);
|
||||||
|
try testing.expect(enc.codes[8].len == 0);
|
||||||
|
try testing.expect(enc.codes[9].len == 0);
|
||||||
|
try testing.expect(enc.codes[10].len == 0);
|
||||||
|
try testing.expect(enc.codes[11].len == 0);
|
||||||
|
try testing.expect(enc.codes[12].len == 0);
|
||||||
|
try testing.expect(enc.codes[13].len == 0);
|
||||||
|
try testing.expect(enc.codes[14].len == 0);
|
||||||
|
try testing.expect(enc.codes[15].len == 0);
|
||||||
|
try testing.expect(enc.codes[16].len == 6);
|
||||||
|
try testing.expect(enc.codes[17].len == 5);
|
||||||
|
try testing.expect(enc.codes[18].len == 3);
|
||||||
|
|
||||||
|
try testing.expect(enc.codes[5].code == 0x0);
|
||||||
|
try testing.expect(enc.codes[6].code == 0x2);
|
||||||
|
try testing.expect(enc.codes[0].code == 0x1);
|
||||||
|
try testing.expect(enc.codes[4].code == 0x5);
|
||||||
|
try testing.expect(enc.codes[18].code == 0x3);
|
||||||
|
try testing.expect(enc.codes[3].code == 0x7);
|
||||||
|
try testing.expect(enc.codes[17].code == 0x17);
|
||||||
|
try testing.expect(enc.codes[1].code == 0x0f);
|
||||||
|
try testing.expect(enc.codes[2].code == 0x2f);
|
||||||
|
try testing.expect(enc.codes[7].code == 0x1f);
|
||||||
|
try testing.expect(enc.codes[16].code == 0x3f);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "generate a Huffman code for the fixed litteral table specific to Deflate" {
|
||||||
|
var enc = try generateFixedLiteralEncoding(testing.allocator);
|
||||||
|
defer enc.deinit();
|
||||||
|
}
|
||||||
|
|
||||||
|
test "generate a Huffman code for the 30 possible relative offsets (LZ77 distances) of Deflate" {
|
||||||
|
var enc = try generateFixedOffsetEncoding(testing.allocator);
|
||||||
|
defer enc.deinit();
|
||||||
|
}
|
15
lib/std/compress/deflate/mem_utils.zig
Normal file
15
lib/std/compress/deflate/mem_utils.zig
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const math = std.math;
|
||||||
|
const mem = std.mem;
|
||||||
|
|
||||||
|
// Copies elements from a source `src` slice into a destination `dst` slice.
|
||||||
|
// The copy never returns an error but might not be complete if the destination is too small.
|
||||||
|
// Returns the number of elements copied, which will be the minimum of `src.len` and `dst.len`.
|
||||||
|
pub fn copy(dst: []u8, src: []const u8) usize {
|
||||||
|
if (dst.len <= src.len) {
|
||||||
|
mem.copy(u8, dst[0..], src[0..dst.len]);
|
||||||
|
} else {
|
||||||
|
mem.copy(u8, dst[0..src.len], src[0..]);
|
||||||
|
}
|
||||||
|
return math.min(dst.len, src.len);
|
||||||
|
}
|
1
lib/std/compress/deflate/testdata/compress-e.txt
vendored
Normal file
1
lib/std/compress/deflate/testdata/compress-e.txt
vendored
Normal file
File diff suppressed because one or more lines are too long
29
lib/std/compress/deflate/testdata/compress-gettysburg.txt
vendored
Normal file
29
lib/std/compress/deflate/testdata/compress-gettysburg.txt
vendored
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
Four score and seven years ago our fathers brought forth on
|
||||||
|
this continent, a new nation, conceived in Liberty, and dedicated
|
||||||
|
to the proposition that all men are created equal.
|
||||||
|
Now we are engaged in a great Civil War, testing whether that
|
||||||
|
nation, or any nation so conceived and so dedicated, can long
|
||||||
|
endure.
|
||||||
|
We are met on a great battle-field of that war.
|
||||||
|
We have come to dedicate a portion of that field, as a final
|
||||||
|
resting place for those who here gave their lives that that
|
||||||
|
nation might live. It is altogether fitting and proper that
|
||||||
|
we should do this.
|
||||||
|
But, in a larger sense, we can not dedicate - we can not
|
||||||
|
consecrate - we can not hallow - this ground.
|
||||||
|
The brave men, living and dead, who struggled here, have
|
||||||
|
consecrated it, far above our poor power to add or detract.
|
||||||
|
The world will little note, nor long remember what we say here,
|
||||||
|
but it can never forget what they did here.
|
||||||
|
It is for us the living, rather, to be dedicated here to the
|
||||||
|
unfinished work which they who fought here have thus far so
|
||||||
|
nobly advanced. It is rather for us to be here dedicated to
|
||||||
|
the great task remaining before us - that from these honored
|
||||||
|
dead we take increased devotion to that cause for which they
|
||||||
|
gave the last full measure of devotion -
|
||||||
|
that we here highly resolve that these dead shall not have
|
||||||
|
died in vain - that this nation, under God, shall have a new
|
||||||
|
birth of freedom - and that government of the people, by the
|
||||||
|
people, for the people, shall not perish from this earth.
|
||||||
|
|
||||||
|
Abraham Lincoln, November 19, 1863, Gettysburg, Pennsylvania
|
1
lib/std/compress/deflate/testdata/compress-pi.txt
vendored
Normal file
1
lib/std/compress/deflate/testdata/compress-pi.txt
vendored
Normal file
File diff suppressed because one or more lines are too long
BIN
lib/std/compress/deflate/testdata/huffman-null-max.dyn.expect
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-null-max.dyn.expect
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-null-max.dyn.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-null-max.dyn.expect-noinput
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-null-max.golden
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-null-max.golden
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-null-max.input
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-null-max.input
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-null-max.wb.expect
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-null-max.wb.expect
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-null-max.wb.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-null-max.wb.expect-noinput
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-pi.dyn.expect
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-pi.dyn.expect
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-pi.dyn.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-pi.dyn.expect-noinput
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-pi.golden
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-pi.golden
vendored
Normal file
Binary file not shown.
1
lib/std/compress/deflate/testdata/huffman-pi.input
vendored
Normal file
1
lib/std/compress/deflate/testdata/huffman-pi.input
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067982148086513282306647093844609550582231725359408128481117450284102701938521105559644622948954930381964428810975665933446128475648233786783165271201909145648566923460348610454326648213393607260249141273724587006606315588174881520920962829254091715364367892590360011330530548820466521384146951941511609433057270365759591953092186117381932611793105118548074462379962749567351885752724891227938183011949129833673362440656643086021394946395224737190702179860943702770539217176293176752384674818467669405132000568127145263560827785771342757789609173637178721468440901224953430146549585371050792279689258923542019956112129021960864034418159813629774771309960518707211349999998372978049951059731732816096318595024459455346908302642522308253344685035261931188171010003137838752886587533208381420617177669147303598253490428755468731159562863882353787593751957781857780532171226806613001927876611195909216420198938095257201065485863278865936153381827968230301952035301852968995773622599413891249721775283479131515574857242454150695950829533116861727855889075098381754637464939319255060400927701671139009848824012858361603563707660104710181942955596198946767837449448255379774726847104047534646208046684259069491293313677028989152104752162056966024058038150193511253382430035587640247496473263914199272604269922796782354781636009341721641219924586315030286182974555706749838505494588586926995690927210797509302955321165344987202755960236480665499119881834797753566369807426542527862551818417574672890977772793800081647060016145249192173217214772350141441973568548161361157352552133475741849468438523323907394143334547762416862518983569485562099219222184272550254256887671790494601653466804988627232791786085784383827967976681454100953883786360950680064225125205117392984896084128488626945604241965285022210661186306744278622039194945047123713786960956364371917287467764657573962413890865832645995813390478027590099465764078951269468398352595709825822620522489407726719478268482601476990902640136394437455305068203496252451749399651431429809190659250937221696461515709858387410597885959772975498930161753928468138268683868942774155991855925245953959431049972524680845987273644695848653836736222626099124608051243884390451244136549762780797715691435997700129616089441694868555848406353422072225828488648158456028506016842739452267467678895252138522549954666727823986456596116354886230577456498035593634568174324112515076069479451096596094025228879710893145669136867228748940560101503308617928680920874760917824938589009714909675985261365549781893129784821682998948722658804857564014270477555132379641451523746234364542858444795265867821051141354735739523113427166102135969536231442952484937187110145765403590279934403742007310578539062198387447808478489683321445713868751943506430218453191048481005370614680674919278191197939952061419663428754440643745123718192179998391015919561814675142691239748940907186494231961567945208095146550225231603881930142093762137855956638937787083039069792077346722182562599661501421503068038447734549202605414665925201497442850732518666002132434088190710486331734649651453905796268561005508106658796998163574736384052571459102897064140110971206280439039759515677157700420337869936007230558763176359421873125147120532928191826186125867321579198414848829164470609575270695722091756711672291098169091528017350671274858322287183520935396572512108357915136988209144421006751033467110314126711136990865851639831501970165151168517143765761835155650884909989859982387345528331635507647918535893226185489632132933089857064204675259070915481416549859461637180
|
BIN
lib/std/compress/deflate/testdata/huffman-pi.wb.expect
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-pi.wb.expect
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-pi.wb.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-pi.wb.expect-noinput
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-rand-1k.dyn.expect
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-rand-1k.dyn.expect
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-rand-1k.dyn.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-rand-1k.dyn.expect-noinput
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-rand-1k.golden
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-rand-1k.golden
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-rand-1k.input
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-rand-1k.input
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-rand-1k.wb.expect
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-rand-1k.wb.expect
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-rand-1k.wb.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-rand-1k.wb.expect-noinput
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-rand-limit.dyn.expect
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-rand-limit.dyn.expect
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-rand-limit.dyn.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-rand-limit.dyn.expect-noinput
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-rand-limit.golden
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-rand-limit.golden
vendored
Normal file
Binary file not shown.
4
lib/std/compress/deflate/testdata/huffman-rand-limit.input
vendored
Normal file
4
lib/std/compress/deflate/testdata/huffman-rand-limit.input
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||||
|
ř‹–vH
|
||||||
|
…”%€ŻÂţŤč ë†É·ĹŢę}‹ç>Úß˙lsŢĚçmŤIGH°čžň1YŢ4´[ĺŕ 0Â<30>[|]o#©
|
||||||
|
Ľ-#ľŮíul™ßýpfćîٱžn<C5BE>YŐÔ€Y<E282AC>w‰C8ÉŻ02š F=gn×ržN!OĆŕÔ{ŤĄö›kÜ*“w(ý´bÚ ç«kQC9/ ’lu>ô5ýC.÷¤uÚę›
|
BIN
lib/std/compress/deflate/testdata/huffman-rand-limit.wb.expect
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-rand-limit.wb.expect
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-rand-limit.wb.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-rand-limit.wb.expect-noinput
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-rand-max.golden
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-rand-max.golden
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-rand-max.input
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-rand-max.input
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-shifts.dyn.expect
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-shifts.dyn.expect
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-shifts.dyn.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-shifts.dyn.expect-noinput
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-shifts.golden
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-shifts.golden
vendored
Normal file
Binary file not shown.
2
lib/std/compress/deflate/testdata/huffman-shifts.input
vendored
Normal file
2
lib/std/compress/deflate/testdata/huffman-shifts.input
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010
|
||||||
|
232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323
|
BIN
lib/std/compress/deflate/testdata/huffman-shifts.wb.expect
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-shifts.wb.expect
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-shifts.wb.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-shifts.wb.expect-noinput
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-text-shift.dyn.expect
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-text-shift.dyn.expect
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-text-shift.dyn.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-text-shift.dyn.expect-noinput
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-text-shift.golden
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-text-shift.golden
vendored
Normal file
Binary file not shown.
14
lib/std/compress/deflate/testdata/huffman-text-shift.input
vendored
Normal file
14
lib/std/compress/deflate/testdata/huffman-text-shift.input
vendored
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
//Copyright2009ThGoAuthor.Allrightrrvd.
|
||||||
|
//UofthiourccodigovrndbyBSD-tyl
|
||||||
|
//licnthtcnbfoundinthLICENSEfil.
|
||||||
|
|
||||||
|
pckgmin
|
||||||
|
|
||||||
|
import"o"
|
||||||
|
|
||||||
|
funcmin(){
|
||||||
|
vrb=mk([]byt,65535)
|
||||||
|
f,_:=o.Crt("huffmn-null-mx.in")
|
||||||
|
f.Writ(b)
|
||||||
|
}
|
||||||
|
ABCDEFGHIJKLMNOPQRSTUVXxyz!"#¤%&/?"
|
BIN
lib/std/compress/deflate/testdata/huffman-text-shift.wb.expect
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-text-shift.wb.expect
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-text-shift.wb.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-text-shift.wb.expect-noinput
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-text.dyn.expect
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-text.dyn.expect
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-text.dyn.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-text.dyn.expect-noinput
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-text.golden
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-text.golden
vendored
Normal file
Binary file not shown.
14
lib/std/compress/deflate/testdata/huffman-text.input
vendored
Normal file
14
lib/std/compress/deflate/testdata/huffman-text.input
vendored
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
// zig v0.10.0
|
||||||
|
// create a file filled with 0x00
|
||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
pub fn main() !void {
|
||||||
|
var b = [1]u8{0} ** 65535;
|
||||||
|
const f = try std.fs.cwd().createFile(
|
||||||
|
"huffman-null-max.in",
|
||||||
|
.{ .read = true },
|
||||||
|
);
|
||||||
|
defer f.close();
|
||||||
|
|
||||||
|
_ = try f.writeAll(b[0..]);
|
||||||
|
}
|
BIN
lib/std/compress/deflate/testdata/huffman-text.wb.expect
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-text.wb.expect
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-text.wb.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-text.wb.expect-noinput
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-zero.dyn.expect
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-zero.dyn.expect
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-zero.dyn.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-zero.dyn.expect-noinput
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-zero.golden
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-zero.golden
vendored
Normal file
Binary file not shown.
1
lib/std/compress/deflate/testdata/huffman-zero.input
vendored
Normal file
1
lib/std/compress/deflate/testdata/huffman-zero.input
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
|
BIN
lib/std/compress/deflate/testdata/huffman-zero.wb.expect
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-zero.wb.expect
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/huffman-zero.wb.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/huffman-zero.wb.expect-noinput
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/null-long-match.dyn.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/null-long-match.dyn.expect-noinput
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/deflate/testdata/null-long-match.wb.expect-noinput
vendored
Normal file
BIN
lib/std/compress/deflate/testdata/null-long-match.wb.expect-noinput
vendored
Normal file
Binary file not shown.
955
lib/std/compress/deflate/testdata/rfc1951.txt
vendored
Normal file
955
lib/std/compress/deflate/testdata/rfc1951.txt
vendored
Normal file
@ -0,0 +1,955 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Network Working Group P. Deutsch
|
||||||
|
Request for Comments: 1951 Aladdin Enterprises
|
||||||
|
Category: Informational May 1996
|
||||||
|
|
||||||
|
|
||||||
|
DEFLATE Compressed Data Format Specification version 1.3
|
||||||
|
|
||||||
|
Status of This Memo
|
||||||
|
|
||||||
|
This memo provides information for the Internet community. This memo
|
||||||
|
does not specify an Internet standard of any kind. Distribution of
|
||||||
|
this memo is unlimited.
|
||||||
|
|
||||||
|
IESG Note:
|
||||||
|
|
||||||
|
The IESG takes no position on the validity of any Intellectual
|
||||||
|
Property Rights statements contained in this document.
|
||||||
|
|
||||||
|
Notices
|
||||||
|
|
||||||
|
Copyright (c) 1996 L. Peter Deutsch
|
||||||
|
|
||||||
|
Permission is granted to copy and distribute this document for any
|
||||||
|
purpose and without charge, including translations into other
|
||||||
|
languages and incorporation into compilations, provided that the
|
||||||
|
copyright notice and this notice are preserved, and that any
|
||||||
|
substantive changes or deletions from the original are clearly
|
||||||
|
marked.
|
||||||
|
|
||||||
|
A pointer to the latest version of this and related documentation in
|
||||||
|
HTML format can be found at the URL
|
||||||
|
<ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html>.
|
||||||
|
|
||||||
|
Abstract
|
||||||
|
|
||||||
|
This specification defines a lossless compressed data format that
|
||||||
|
compresses data using a combination of the LZ77 algorithm and Huffman
|
||||||
|
coding, with efficiency comparable to the best currently available
|
||||||
|
general-purpose compression methods. The data can be produced or
|
||||||
|
consumed, even for an arbitrarily long sequentially presented input
|
||||||
|
data stream, using only an a priori bounded amount of intermediate
|
||||||
|
storage. The format can be implemented readily in a manner not
|
||||||
|
covered by patents.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Deutsch Informational [Page 1]
|
||||||
|
|
||||||
|
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
|
||||||
|
|
||||||
|
|
||||||
|
Table of Contents
|
||||||
|
|
||||||
|
1. Introduction ................................................... 2
|
||||||
|
1.1. Purpose ................................................... 2
|
||||||
|
1.2. Intended audience ......................................... 3
|
||||||
|
1.3. Scope ..................................................... 3
|
||||||
|
1.4. Compliance ................................................ 3
|
||||||
|
1.5. Definitions of terms and conventions used ................ 3
|
||||||
|
1.6. Changes from previous versions ............................ 4
|
||||||
|
2. Compressed representation overview ............................. 4
|
||||||
|
3. Detailed specification ......................................... 5
|
||||||
|
3.1. Overall conventions ....................................... 5
|
||||||
|
3.1.1. Packing into bytes .................................. 5
|
||||||
|
3.2. Compressed block format ................................... 6
|
||||||
|
3.2.1. Synopsis of prefix and Huffman coding ............... 6
|
||||||
|
3.2.2. Use of Huffman coding in the "deflate" format ....... 7
|
||||||
|
3.2.3. Details of block format ............................. 9
|
||||||
|
3.2.4. Non-compressed blocks (BTYPE=00) ................... 11
|
||||||
|
3.2.5. Compressed blocks (length and distance codes) ...... 11
|
||||||
|
3.2.6. Compression with fixed Huffman codes (BTYPE=01) .... 12
|
||||||
|
3.2.7. Compression with dynamic Huffman codes (BTYPE=10) .. 13
|
||||||
|
3.3. Compliance ............................................... 14
|
||||||
|
4. Compression algorithm details ................................. 14
|
||||||
|
5. References .................................................... 16
|
||||||
|
6. Security Considerations ....................................... 16
|
||||||
|
7. Source code ................................................... 16
|
||||||
|
8. Acknowledgements .............................................. 16
|
||||||
|
9. Author's Address .............................................. 17
|
||||||
|
|
||||||
|
1. Introduction
|
||||||
|
|
||||||
|
1.1. Purpose
|
||||||
|
|
||||||
|
The purpose of this specification is to define a lossless
|
||||||
|
compressed data format that:
|
||||||
|
* Is independent of CPU type, operating system, file system,
|
||||||
|
and character set, and hence can be used for interchange;
|
||||||
|
* Can be produced or consumed, even for an arbitrarily long
|
||||||
|
sequentially presented input data stream, using only an a
|
||||||
|
priori bounded amount of intermediate storage, and hence
|
||||||
|
can be used in data communications or similar structures
|
||||||
|
such as Unix filters;
|
||||||
|
* Compresses data with efficiency comparable to the best
|
||||||
|
currently available general-purpose compression methods,
|
||||||
|
and in particular considerably better than the "compress"
|
||||||
|
program;
|
||||||
|
* Can be implemented readily in a manner not covered by
|
||||||
|
patents, and hence can be practiced freely;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Deutsch Informational [Page 2]
|
||||||
|
|
||||||
|
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
|
||||||
|
|
||||||
|
|
||||||
|
* Is compatible with the file format produced by the current
|
||||||
|
widely used gzip utility, in that conforming decompressors
|
||||||
|
will be able to read data produced by the existing gzip
|
||||||
|
compressor.
|
||||||
|
|
||||||
|
The data format defined by this specification does not attempt to:
|
||||||
|
|
||||||
|
* Allow random access to compressed data;
|
||||||
|
* Compress specialized data (e.g., raster graphics) as well
|
||||||
|
as the best currently available specialized algorithms.
|
||||||
|
|
||||||
|
A simple counting argument shows that no lossless compression
|
||||||
|
algorithm can compress every possible input data set. For the
|
||||||
|
format defined here, the worst case expansion is 5 bytes per 32K-
|
||||||
|
byte block, i.e., a size increase of 0.015% for large data sets.
|
||||||
|
English text usually compresses by a factor of 2.5 to 3;
|
||||||
|
executable files usually compress somewhat less; graphical data
|
||||||
|
such as raster images may compress much more.
|
||||||
|
|
||||||
|
1.2. Intended audience
|
||||||
|
|
||||||
|
This specification is intended for use by implementors of software
|
||||||
|
to compress data into "deflate" format and/or decompress data from
|
||||||
|
"deflate" format.
|
||||||
|
|
||||||
|
The text of the specification assumes a basic background in
|
||||||
|
programming at the level of bits and other primitive data
|
||||||
|
representations. Familiarity with the technique of Huffman coding
|
||||||
|
is helpful but not required.
|
||||||
|
|
||||||
|
1.3. Scope
|
||||||
|
|
||||||
|
The specification specifies a method for representing a sequence
|
||||||
|
of bytes as a (usually shorter) sequence of bits, and a method for
|
||||||
|
packing the latter bit sequence into bytes.
|
||||||
|
|
||||||
|
1.4. Compliance
|
||||||
|
|
||||||
|
Unless otherwise indicated below, a compliant decompressor must be
|
||||||
|
able to accept and decompress any data set that conforms to all
|
||||||
|
the specifications presented here; a compliant compressor must
|
||||||
|
produce data sets that conform to all the specifications presented
|
||||||
|
here.
|
||||||
|
|
||||||
|
1.5. Definitions of terms and conventions used
|
||||||
|
|
||||||
|
Byte: 8 bits stored or transmitted as a unit (same as an octet).
|
||||||
|
For this specification, a byte is exactly 8 bits, even on machines
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Deutsch Informational [Page 3]
|
||||||
|
|
||||||
|
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
|
||||||
|
|
||||||
|
|
||||||
|
which store a character on a number of bits different from eight.
|
||||||
|
See below, for the numbering of bits within a byte.
|
||||||
|
|
||||||
|
String: a sequence of arbitrary bytes.
|
||||||
|
|
||||||
|
1.6. Changes from previous versions
|
||||||
|
|
||||||
|
There have been no technical changes to the deflate format since
|
||||||
|
version 1.1 of this specification. In version 1.2, some
|
||||||
|
terminology was changed. Version 1.3 is a conversion of the
|
||||||
|
specification to RFC style.
|
||||||
|
|
||||||
|
2. Compressed representation overview
|
||||||
|
|
||||||
|
A compressed data set consists of a series of blocks, corresponding
|
||||||
|
to successive blocks of input data. The block sizes are arbitrary,
|
||||||
|
except that non-compressible blocks are limited to 65,535 bytes.
|
||||||
|
|
||||||
|
Each block is compressed using a combination of the LZ77 algorithm
|
||||||
|
and Huffman coding. The Huffman trees for each block are independent
|
||||||
|
of those for previous or subsequent blocks; the LZ77 algorithm may
|
||||||
|
use a reference to a duplicated string occurring in a previous block,
|
||||||
|
up to 32K input bytes before.
|
||||||
|
|
||||||
|
Each block consists of two parts: a pair of Huffman code trees that
|
||||||
|
describe the representation of the compressed data part, and a
|
||||||
|
compressed data part. (The Huffman trees themselves are compressed
|
||||||
|
using Huffman encoding.) The compressed data consists of a series of
|
||||||
|
elements of two types: literal bytes (of strings that have not been
|
||||||
|
detected as duplicated within the previous 32K input bytes), and
|
||||||
|
pointers to duplicated strings, where a pointer is represented as a
|
||||||
|
pair <length, backward distance>. The representation used in the
|
||||||
|
"deflate" format limits distances to 32K bytes and lengths to 258
|
||||||
|
bytes, but does not limit the size of a block, except for
|
||||||
|
uncompressible blocks, which are limited as noted above.
|
||||||
|
|
||||||
|
Each type of value (literals, distances, and lengths) in the
|
||||||
|
compressed data is represented using a Huffman code, using one code
|
||||||
|
tree for literals and lengths and a separate code tree for distances.
|
||||||
|
The code trees for each block appear in a compact form just before
|
||||||
|
the compressed data for that block.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Deutsch Informational [Page 4]
|
||||||
|
|
||||||
|
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
|
||||||
|
|
||||||
|
|
||||||
|
3. Detailed specification
|
||||||
|
|
||||||
|
3.1. Overall conventions In the diagrams below, a box like this:
|
||||||
|
|
||||||
|
+---+
|
||||||
|
| | <-- the vertical bars might be missing
|
||||||
|
+---+
|
||||||
|
|
||||||
|
represents one byte; a box like this:
|
||||||
|
|
||||||
|
+==============+
|
||||||
|
| |
|
||||||
|
+==============+
|
||||||
|
|
||||||
|
represents a variable number of bytes.
|
||||||
|
|
||||||
|
Bytes stored within a computer do not have a "bit order", since
|
||||||
|
they are always treated as a unit. However, a byte considered as
|
||||||
|
an integer between 0 and 255 does have a most- and least-
|
||||||
|
significant bit, and since we write numbers with the most-
|
||||||
|
significant digit on the left, we also write bytes with the most-
|
||||||
|
significant bit on the left. In the diagrams below, we number the
|
||||||
|
bits of a byte so that bit 0 is the least-significant bit, i.e.,
|
||||||
|
the bits are numbered:
|
||||||
|
|
||||||
|
+--------+
|
||||||
|
|76543210|
|
||||||
|
+--------+
|
||||||
|
|
||||||
|
Within a computer, a number may occupy multiple bytes. All
|
||||||
|
multi-byte numbers in the format described here are stored with
|
||||||
|
the least-significant byte first (at the lower memory address).
|
||||||
|
For example, the decimal number 520 is stored as:
|
||||||
|
|
||||||
|
0 1
|
||||||
|
+--------+--------+
|
||||||
|
|00001000|00000010|
|
||||||
|
+--------+--------+
|
||||||
|
^ ^
|
||||||
|
| |
|
||||||
|
| + more significant byte = 2 x 256
|
||||||
|
+ less significant byte = 8
|
||||||
|
|
||||||
|
3.1.1. Packing into bytes
|
||||||
|
|
||||||
|
This document does not address the issue of the order in which
|
||||||
|
bits of a byte are transmitted on a bit-sequential medium,
|
||||||
|
since the final data format described here is byte- rather than
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Deutsch Informational [Page 5]
|
||||||
|
|
||||||
|
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
|
||||||
|
|
||||||
|
|
||||||
|
bit-oriented. However, we describe the compressed block format
|
||||||
|
in below, as a sequence of data elements of various bit
|
||||||
|
lengths, not a sequence of bytes. We must therefore specify
|
||||||
|
how to pack these data elements into bytes to form the final
|
||||||
|
compressed byte sequence:
|
||||||
|
|
||||||
|
* Data elements are packed into bytes in order of
|
||||||
|
increasing bit number within the byte, i.e., starting
|
||||||
|
with the least-significant bit of the byte.
|
||||||
|
* Data elements other than Huffman codes are packed
|
||||||
|
starting with the least-significant bit of the data
|
||||||
|
element.
|
||||||
|
* Huffman codes are packed starting with the most-
|
||||||
|
significant bit of the code.
|
||||||
|
|
||||||
|
In other words, if one were to print out the compressed data as
|
||||||
|
a sequence of bytes, starting with the first byte at the
|
||||||
|
*right* margin and proceeding to the *left*, with the most-
|
||||||
|
significant bit of each byte on the left as usual, one would be
|
||||||
|
able to parse the result from right to left, with fixed-width
|
||||||
|
elements in the correct MSB-to-LSB order and Huffman codes in
|
||||||
|
bit-reversed order (i.e., with the first bit of the code in the
|
||||||
|
relative LSB position).
|
||||||
|
|
||||||
|
3.2. Compressed block format
|
||||||
|
|
||||||
|
3.2.1. Synopsis of prefix and Huffman coding
|
||||||
|
|
||||||
|
Prefix coding represents symbols from an a priori known
|
||||||
|
alphabet by bit sequences (codes), one code for each symbol, in
|
||||||
|
a manner such that different symbols may be represented by bit
|
||||||
|
sequences of different lengths, but a parser can always parse
|
||||||
|
an encoded string unambiguously symbol-by-symbol.
|
||||||
|
|
||||||
|
We define a prefix code in terms of a binary tree in which the
|
||||||
|
two edges descending from each non-leaf node are labeled 0 and
|
||||||
|
1 and in which the leaf nodes correspond one-for-one with (are
|
||||||
|
labeled with) the symbols of the alphabet; then the code for a
|
||||||
|
symbol is the sequence of 0's and 1's on the edges leading from
|
||||||
|
the root to the leaf labeled with that symbol. For example:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Deutsch Informational [Page 6]
|
||||||
|
|
||||||
|
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
|
||||||
|
|
||||||
|
|
||||||
|
/\ Symbol Code
|
||||||
|
0 1 ------ ----
|
||||||
|
/ \ A 00
|
||||||
|
/\ B B 1
|
||||||
|
0 1 C 011
|
||||||
|
/ \ D 010
|
||||||
|
A /\
|
||||||
|
0 1
|
||||||
|
/ \
|
||||||
|
D C
|
||||||
|
|
||||||
|
A parser can decode the next symbol from an encoded input
|
||||||
|
stream by walking down the tree from the root, at each step
|
||||||
|
choosing the edge corresponding to the next input bit.
|
||||||
|
|
||||||
|
Given an alphabet with known symbol frequencies, the Huffman
|
||||||
|
algorithm allows the construction of an optimal prefix code
|
||||||
|
(one which represents strings with those symbol frequencies
|
||||||
|
using the fewest bits of any possible prefix codes for that
|
||||||
|
alphabet). Such a code is called a Huffman code. (See
|
||||||
|
reference [1] in Chapter 5, references for additional
|
||||||
|
information on Huffman codes.)
|
||||||
|
|
||||||
|
Note that in the "deflate" format, the Huffman codes for the
|
||||||
|
various alphabets must not exceed certain maximum code lengths.
|
||||||
|
This constraint complicates the algorithm for computing code
|
||||||
|
lengths from symbol frequencies. Again, see Chapter 5,
|
||||||
|
references for details.
|
||||||
|
|
||||||
|
3.2.2. Use of Huffman coding in the "deflate" format
|
||||||
|
|
||||||
|
The Huffman codes used for each alphabet in the "deflate"
|
||||||
|
format have two additional rules:
|
||||||
|
|
||||||
|
* All codes of a given bit length have lexicographically
|
||||||
|
consecutive values, in the same order as the symbols
|
||||||
|
they represent;
|
||||||
|
|
||||||
|
* Shorter codes lexicographically precede longer codes.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Deutsch Informational [Page 7]
|
||||||
|
|
||||||
|
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
|
||||||
|
|
||||||
|
|
||||||
|
We could recode the example above to follow this rule as
|
||||||
|
follows, assuming that the order of the alphabet is ABCD:
|
||||||
|
|
||||||
|
Symbol Code
|
||||||
|
------ ----
|
||||||
|
A 10
|
||||||
|
B 0
|
||||||
|
C 110
|
||||||
|
D 111
|
||||||
|
|
||||||
|
I.e., 0 precedes 10 which precedes 11x, and 110 and 111 are
|
||||||
|
lexicographically consecutive.
|
||||||
|
|
||||||
|
Given this rule, we can define the Huffman code for an alphabet
|
||||||
|
just by giving the bit lengths of the codes for each symbol of
|
||||||
|
the alphabet in order; this is sufficient to determine the
|
||||||
|
actual codes. In our example, the code is completely defined
|
||||||
|
by the sequence of bit lengths (2, 1, 3, 3). The following
|
||||||
|
algorithm generates the codes as integers, intended to be read
|
||||||
|
from most- to least-significant bit. The code lengths are
|
||||||
|
initially in tree[I].Len; the codes are produced in
|
||||||
|
tree[I].Code.
|
||||||
|
|
||||||
|
1) Count the number of codes for each code length. Let
|
||||||
|
bl_count[N] be the number of codes of length N, N >= 1.
|
||||||
|
|
||||||
|
2) Find the numerical value of the smallest code for each
|
||||||
|
code length:
|
||||||
|
|
||||||
|
code = 0;
|
||||||
|
bl_count[0] = 0;
|
||||||
|
for (bits = 1; bits <= MAX_BITS; bits++) {
|
||||||
|
code = (code + bl_count[bits-1]) << 1;
|
||||||
|
next_code[bits] = code;
|
||||||
|
}
|
||||||
|
|
||||||
|
3) Assign numerical values to all codes, using consecutive
|
||||||
|
values for all codes of the same length with the base
|
||||||
|
values determined at step 2. Codes that are never used
|
||||||
|
(which have a bit length of zero) must not be assigned a
|
||||||
|
value.
|
||||||
|
|
||||||
|
for (n = 0; n <= max_code; n++) {
|
||||||
|
len = tree[n].Len;
|
||||||
|
if (len != 0) {
|
||||||
|
tree[n].Code = next_code[len];
|
||||||
|
next_code[len]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Deutsch Informational [Page 8]
|
||||||
|
|
||||||
|
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
Consider the alphabet ABCDEFGH, with bit lengths (3, 3, 3, 3,
|
||||||
|
3, 2, 4, 4). After step 1, we have:
|
||||||
|
|
||||||
|
N bl_count[N]
|
||||||
|
- -----------
|
||||||
|
2 1
|
||||||
|
3 5
|
||||||
|
4 2
|
||||||
|
|
||||||
|
Step 2 computes the following next_code values:
|
||||||
|
|
||||||
|
N next_code[N]
|
||||||
|
- ------------
|
||||||
|
1 0
|
||||||
|
2 0
|
||||||
|
3 2
|
||||||
|
4 14
|
||||||
|
|
||||||
|
Step 3 produces the following code values:
|
||||||
|
|
||||||
|
Symbol Length Code
|
||||||
|
------ ------ ----
|
||||||
|
A 3 010
|
||||||
|
B 3 011
|
||||||
|
C 3 100
|
||||||
|
D 3 101
|
||||||
|
E 3 110
|
||||||
|
F 2 00
|
||||||
|
G 4 1110
|
||||||
|
H 4 1111
|
||||||
|
|
||||||
|
3.2.3. Details of block format
|
||||||
|
|
||||||
|
Each block of compressed data begins with 3 header bits
|
||||||
|
containing the following data:
|
||||||
|
|
||||||
|
first bit BFINAL
|
||||||
|
next 2 bits BTYPE
|
||||||
|
|
||||||
|
Note that the header bits do not necessarily begin on a byte
|
||||||
|
boundary, since a block does not necessarily occupy an integral
|
||||||
|
number of bytes.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Deutsch Informational [Page 9]
|
||||||
|
|
||||||
|
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
|
||||||
|
|
||||||
|
|
||||||
|
BFINAL is set if and only if this is the last block of the data
|
||||||
|
set.
|
||||||
|
|
||||||
|
BTYPE specifies how the data are compressed, as follows:
|
||||||
|
|
||||||
|
00 - no compression
|
||||||
|
01 - compressed with fixed Huffman codes
|
||||||
|
10 - compressed with dynamic Huffman codes
|
||||||
|
11 - reserved (error)
|
||||||
|
|
||||||
|
The only difference between the two compressed cases is how the
|
||||||
|
Huffman codes for the literal/length and distance alphabets are
|
||||||
|
defined.
|
||||||
|
|
||||||
|
In all cases, the decoding algorithm for the actual data is as
|
||||||
|
follows:
|
||||||
|
|
||||||
|
do
|
||||||
|
read block header from input stream.
|
||||||
|
if stored with no compression
|
||||||
|
skip any remaining bits in current partially
|
||||||
|
processed byte
|
||||||
|
read LEN and NLEN (see next section)
|
||||||
|
copy LEN bytes of data to output
|
||||||
|
otherwise
|
||||||
|
if compressed with dynamic Huffman codes
|
||||||
|
read representation of code trees (see
|
||||||
|
subsection below)
|
||||||
|
loop (until end of block code recognized)
|
||||||
|
decode literal/length value from input stream
|
||||||
|
if value < 256
|
||||||
|
copy value (literal byte) to output stream
|
||||||
|
otherwise
|
||||||
|
if value = end of block (256)
|
||||||
|
break from loop
|
||||||
|
otherwise (value = 257..285)
|
||||||
|
decode distance from input stream
|
||||||
|
|
||||||
|
move backwards distance bytes in the output
|
||||||
|
stream, and copy length bytes from this
|
||||||
|
position to the output stream.
|
||||||
|
end loop
|
||||||
|
while not last block
|
||||||
|
|
||||||
|
Note that a duplicated string reference may refer to a string
|
||||||
|
in a previous block; i.e., the backward distance may cross one
|
||||||
|
or more block boundaries. However a distance cannot refer past
|
||||||
|
the beginning of the output stream. (An application using a
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Deutsch Informational [Page 10]
|
||||||
|
|
||||||
|
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
|
||||||
|
|
||||||
|
|
||||||
|
preset dictionary might discard part of the output stream; a
|
||||||
|
distance can refer to that part of the output stream anyway)
|
||||||
|
Note also that the referenced string may overlap the current
|
||||||
|
position; for example, if the last 2 bytes decoded have values
|
||||||
|
X and Y, a string reference with <length = 5, distance = 2>
|
||||||
|
adds X,Y,X,Y,X to the output stream.
|
||||||
|
|
||||||
|
We now specify each compression method in turn.
|
||||||
|
|
||||||
|
3.2.4. Non-compressed blocks (BTYPE=00)
|
||||||
|
|
||||||
|
Any bits of input up to the next byte boundary are ignored.
|
||||||
|
The rest of the block consists of the following information:
|
||||||
|
|
||||||
|
0 1 2 3 4...
|
||||||
|
+---+---+---+---+================================+
|
||||||
|
| LEN | NLEN |... LEN bytes of literal data...|
|
||||||
|
+---+---+---+---+================================+
|
||||||
|
|
||||||
|
LEN is the number of data bytes in the block. NLEN is the
|
||||||
|
one's complement of LEN.
|
||||||
|
|
||||||
|
3.2.5. Compressed blocks (length and distance codes)
|
||||||
|
|
||||||
|
As noted above, encoded data blocks in the "deflate" format
|
||||||
|
consist of sequences of symbols drawn from three conceptually
|
||||||
|
distinct alphabets: either literal bytes, from the alphabet of
|
||||||
|
byte values (0..255), or <length, backward distance> pairs,
|
||||||
|
where the length is drawn from (3..258) and the distance is
|
||||||
|
drawn from (1..32,768). In fact, the literal and length
|
||||||
|
alphabets are merged into a single alphabet (0..285), where
|
||||||
|
values 0..255 represent literal bytes, the value 256 indicates
|
||||||
|
end-of-block, and values 257..285 represent length codes
|
||||||
|
(possibly in conjunction with extra bits following the symbol
|
||||||
|
code) as follows:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Deutsch Informational [Page 11]
|
||||||
|
|
||||||
|
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
|
||||||
|
|
||||||
|
|
||||||
|
Extra Extra Extra
|
||||||
|
Code Bits Length(s) Code Bits Lengths Code Bits Length(s)
|
||||||
|
---- ---- ------ ---- ---- ------- ---- ---- -------
|
||||||
|
257 0 3 267 1 15,16 277 4 67-82
|
||||||
|
258 0 4 268 1 17,18 278 4 83-98
|
||||||
|
259 0 5 269 2 19-22 279 4 99-114
|
||||||
|
260 0 6 270 2 23-26 280 4 115-130
|
||||||
|
261 0 7 271 2 27-30 281 5 131-162
|
||||||
|
262 0 8 272 2 31-34 282 5 163-194
|
||||||
|
263 0 9 273 3 35-42 283 5 195-226
|
||||||
|
264 0 10 274 3 43-50 284 5 227-257
|
||||||
|
265 1 11,12 275 3 51-58 285 0 258
|
||||||
|
266 1 13,14 276 3 59-66
|
||||||
|
|
||||||
|
The extra bits should be interpreted as a machine integer
|
||||||
|
stored with the most-significant bit first, e.g., bits 1110
|
||||||
|
represent the value 14.
|
||||||
|
|
||||||
|
Extra Extra Extra
|
||||||
|
Code Bits Dist Code Bits Dist Code Bits Distance
|
||||||
|
---- ---- ---- ---- ---- ------ ---- ---- --------
|
||||||
|
0 0 1 10 4 33-48 20 9 1025-1536
|
||||||
|
1 0 2 11 4 49-64 21 9 1537-2048
|
||||||
|
2 0 3 12 5 65-96 22 10 2049-3072
|
||||||
|
3 0 4 13 5 97-128 23 10 3073-4096
|
||||||
|
4 1 5,6 14 6 129-192 24 11 4097-6144
|
||||||
|
5 1 7,8 15 6 193-256 25 11 6145-8192
|
||||||
|
6 2 9-12 16 7 257-384 26 12 8193-12288
|
||||||
|
7 2 13-16 17 7 385-512 27 12 12289-16384
|
||||||
|
8 3 17-24 18 8 513-768 28 13 16385-24576
|
||||||
|
9 3 25-32 19 8 769-1024 29 13 24577-32768
|
||||||
|
|
||||||
|
3.2.6. Compression with fixed Huffman codes (BTYPE=01)
|
||||||
|
|
||||||
|
The Huffman codes for the two alphabets are fixed, and are not
|
||||||
|
represented explicitly in the data. The Huffman code lengths
|
||||||
|
for the literal/length alphabet are:
|
||||||
|
|
||||||
|
Lit Value Bits Codes
|
||||||
|
--------- ---- -----
|
||||||
|
0 - 143 8 00110000 through
|
||||||
|
10111111
|
||||||
|
144 - 255 9 110010000 through
|
||||||
|
111111111
|
||||||
|
256 - 279 7 0000000 through
|
||||||
|
0010111
|
||||||
|
280 - 287 8 11000000 through
|
||||||
|
11000111
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Deutsch Informational [Page 12]
|
||||||
|
|
||||||
|
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
|
||||||
|
|
||||||
|
|
||||||
|
The code lengths are sufficient to generate the actual codes,
|
||||||
|
as described above; we show the codes in the table for added
|
||||||
|
clarity. Literal/length values 286-287 will never actually
|
||||||
|
occur in the compressed data, but participate in the code
|
||||||
|
construction.
|
||||||
|
|
||||||
|
Distance codes 0-31 are represented by (fixed-length) 5-bit
|
||||||
|
codes, with possible additional bits as shown in the table
|
||||||
|
shown in Paragraph 3.2.5, above. Note that distance codes 30-
|
||||||
|
31 will never actually occur in the compressed data.
|
||||||
|
|
||||||
|
3.2.7. Compression with dynamic Huffman codes (BTYPE=10)
|
||||||
|
|
||||||
|
The Huffman codes for the two alphabets appear in the block
|
||||||
|
immediately after the header bits and before the actual
|
||||||
|
compressed data, first the literal/length code and then the
|
||||||
|
distance code. Each code is defined by a sequence of code
|
||||||
|
lengths, as discussed in Paragraph 3.2.2, above. For even
|
||||||
|
greater compactness, the code length sequences themselves are
|
||||||
|
compressed using a Huffman code. The alphabet for code lengths
|
||||||
|
is as follows:
|
||||||
|
|
||||||
|
0 - 15: Represent code lengths of 0 - 15
|
||||||
|
16: Copy the previous code length 3 - 6 times.
|
||||||
|
The next 2 bits indicate repeat length
|
||||||
|
(0 = 3, ... , 3 = 6)
|
||||||
|
Example: Codes 8, 16 (+2 bits 11),
|
||||||
|
16 (+2 bits 10) will expand to
|
||||||
|
12 code lengths of 8 (1 + 6 + 5)
|
||||||
|
17: Repeat a code length of 0 for 3 - 10 times.
|
||||||
|
(3 bits of length)
|
||||||
|
18: Repeat a code length of 0 for 11 - 138 times
|
||||||
|
(7 bits of length)
|
||||||
|
|
||||||
|
A code length of 0 indicates that the corresponding symbol in
|
||||||
|
the literal/length or distance alphabet will not occur in the
|
||||||
|
block, and should not participate in the Huffman code
|
||||||
|
construction algorithm given earlier. If only one distance
|
||||||
|
code is used, it is encoded using one bit, not zero bits; in
|
||||||
|
this case there is a single code length of one, with one unused
|
||||||
|
code. One distance code of zero bits means that there are no
|
||||||
|
distance codes used at all (the data is all literals).
|
||||||
|
|
||||||
|
We can now define the format of the block:
|
||||||
|
|
||||||
|
5 Bits: HLIT, # of Literal/Length codes - 257 (257 - 286)
|
||||||
|
5 Bits: HDIST, # of Distance codes - 1 (1 - 32)
|
||||||
|
4 Bits: HCLEN, # of Code Length codes - 4 (4 - 19)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Deutsch Informational [Page 13]
|
||||||
|
|
||||||
|
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
|
||||||
|
|
||||||
|
|
||||||
|
(HCLEN + 4) x 3 bits: code lengths for the code length
|
||||||
|
alphabet given just above, in the order: 16, 17, 18,
|
||||||
|
0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
|
||||||
|
|
||||||
|
These code lengths are interpreted as 3-bit integers
|
||||||
|
(0-7); as above, a code length of 0 means the
|
||||||
|
corresponding symbol (literal/length or distance code
|
||||||
|
length) is not used.
|
||||||
|
|
||||||
|
HLIT + 257 code lengths for the literal/length alphabet,
|
||||||
|
encoded using the code length Huffman code
|
||||||
|
|
||||||
|
HDIST + 1 code lengths for the distance alphabet,
|
||||||
|
encoded using the code length Huffman code
|
||||||
|
|
||||||
|
The actual compressed data of the block,
|
||||||
|
encoded using the literal/length and distance Huffman
|
||||||
|
codes
|
||||||
|
|
||||||
|
The literal/length symbol 256 (end of data),
|
||||||
|
encoded using the literal/length Huffman code
|
||||||
|
|
||||||
|
The code length repeat codes can cross from HLIT + 257 to the
|
||||||
|
HDIST + 1 code lengths. In other words, all code lengths form
|
||||||
|
a single sequence of HLIT + HDIST + 258 values.
|
||||||
|
|
||||||
|
3.3. Compliance
|
||||||
|
|
||||||
|
A compressor may limit further the ranges of values specified in
|
||||||
|
the previous section and still be compliant; for example, it may
|
||||||
|
limit the range of backward pointers to some value smaller than
|
||||||
|
32K. Similarly, a compressor may limit the size of blocks so that
|
||||||
|
a compressible block fits in memory.
|
||||||
|
|
||||||
|
A compliant decompressor must accept the full range of possible
|
||||||
|
values defined in the previous section, and must accept blocks of
|
||||||
|
arbitrary size.
|
||||||
|
|
||||||
|
4. Compression algorithm details
|
||||||
|
|
||||||
|
While it is the intent of this document to define the "deflate"
|
||||||
|
compressed data format without reference to any particular
|
||||||
|
compression algorithm, the format is related to the compressed
|
||||||
|
formats produced by LZ77 (Lempel-Ziv 1977, see reference [2] below);
|
||||||
|
since many variations of LZ77 are patented, it is strongly
|
||||||
|
recommended that the implementor of a compressor follow the general
|
||||||
|
algorithm presented here, which is known not to be patented per se.
|
||||||
|
The material in this section is not part of the definition of the
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Deutsch Informational [Page 14]
|
||||||
|
|
||||||
|
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
|
||||||
|
|
||||||
|
|
||||||
|
specification per se, and a compressor need not follow it in order to
|
||||||
|
be compliant.
|
||||||
|
|
||||||
|
The compressor terminates a block when it determines that starting a
|
||||||
|
new block with fresh trees would be useful, or when the block size
|
||||||
|
fills up the compressor's block buffer.
|
||||||
|
|
||||||
|
The compressor uses a chained hash table to find duplicated strings,
|
||||||
|
using a hash function that operates on 3-byte sequences. At any
|
||||||
|
given point during compression, let XYZ be the next 3 input bytes to
|
||||||
|
be examined (not necessarily all different, of course). First, the
|
||||||
|
compressor examines the hash chain for XYZ. If the chain is empty,
|
||||||
|
the compressor simply writes out X as a literal byte and advances one
|
||||||
|
byte in the input. If the hash chain is not empty, indicating that
|
||||||
|
the sequence XYZ (or, if we are unlucky, some other 3 bytes with the
|
||||||
|
same hash function value) has occurred recently, the compressor
|
||||||
|
compares all strings on the XYZ hash chain with the actual input data
|
||||||
|
sequence starting at the current point, and selects the longest
|
||||||
|
match.
|
||||||
|
|
||||||
|
The compressor searches the hash chains starting with the most recent
|
||||||
|
strings, to favor small distances and thus take advantage of the
|
||||||
|
Huffman encoding. The hash chains are singly linked. There are no
|
||||||
|
deletions from the hash chains; the algorithm simply discards matches
|
||||||
|
that are too old. To avoid a worst-case situation, very long hash
|
||||||
|
chains are arbitrarily truncated at a certain length, determined by a
|
||||||
|
run-time parameter.
|
||||||
|
|
||||||
|
To improve overall compression, the compressor optionally defers the
|
||||||
|
selection of matches ("lazy matching"): after a match of length N has
|
||||||
|
been found, the compressor searches for a longer match starting at
|
||||||
|
the next input byte. If it finds a longer match, it truncates the
|
||||||
|
previous match to a length of one (thus producing a single literal
|
||||||
|
byte) and then emits the longer match. Otherwise, it emits the
|
||||||
|
original match, and, as described above, advances N bytes before
|
||||||
|
continuing.
|
||||||
|
|
||||||
|
Run-time parameters also control this "lazy match" procedure. If
|
||||||
|
compression ratio is most important, the compressor attempts a
|
||||||
|
complete second search regardless of the length of the first match.
|
||||||
|
In the normal case, if the current match is "long enough", the
|
||||||
|
compressor reduces the search for a longer match, thus speeding up
|
||||||
|
the process. If speed is most important, the compressor inserts new
|
||||||
|
strings in the hash table only when no match was found, or when the
|
||||||
|
match is not "too long". This degrades the compression ratio but
|
||||||
|
saves time since there are both fewer insertions and fewer searches.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Deutsch Informational [Page 15]
|
||||||
|
|
||||||
|
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
|
||||||
|
|
||||||
|
|
||||||
|
5. References
|
||||||
|
|
||||||
|
[1] Huffman, D. A., "A Method for the Construction of Minimum
|
||||||
|
Redundancy Codes", Proceedings of the Institute of Radio
|
||||||
|
Engineers, September 1952, Volume 40, Number 9, pp. 1098-1101.
|
||||||
|
|
||||||
|
[2] Ziv J., Lempel A., "A Universal Algorithm for Sequential Data
|
||||||
|
Compression", IEEE Transactions on Information Theory, Vol. 23,
|
||||||
|
No. 3, pp. 337-343.
|
||||||
|
|
||||||
|
[3] Gailly, J.-L., and Adler, M., ZLIB documentation and sources,
|
||||||
|
available in ftp://ftp.uu.net/pub/archiving/zip/doc/
|
||||||
|
|
||||||
|
[4] Gailly, J.-L., and Adler, M., GZIP documentation and sources,
|
||||||
|
available as gzip-*.tar in ftp://prep.ai.mit.edu/pub/gnu/
|
||||||
|
|
||||||
|
[5] Schwartz, E. S., and Kallick, B. "Generating a canonical prefix
|
||||||
|
encoding." Comm. ACM, 7,3 (Mar. 1964), pp. 166-169.
|
||||||
|
|
||||||
|
[6] Hirschberg and Lelewer, "Efficient decoding of prefix codes,"
|
||||||
|
Comm. ACM, 33,4, April 1990, pp. 449-459.
|
||||||
|
|
||||||
|
6. Security Considerations
|
||||||
|
|
||||||
|
Any data compression method involves the reduction of redundancy in
|
||||||
|
the data. Consequently, any corruption of the data is likely to have
|
||||||
|
severe effects and be difficult to correct. Uncompressed text, on
|
||||||
|
the other hand, will probably still be readable despite the presence
|
||||||
|
of some corrupted bytes.
|
||||||
|
|
||||||
|
It is recommended that systems using this data format provide some
|
||||||
|
means of validating the integrity of the compressed data. See
|
||||||
|
reference [3], for example.
|
||||||
|
|
||||||
|
7. Source code
|
||||||
|
|
||||||
|
Source code for a C language implementation of a "deflate" compliant
|
||||||
|
compressor and decompressor is available within the zlib package at
|
||||||
|
ftp://ftp.uu.net/pub/archiving/zip/zlib/.
|
||||||
|
|
||||||
|
8. Acknowledgements
|
||||||
|
|
||||||
|
Trademarks cited in this document are the property of their
|
||||||
|
respective owners.
|
||||||
|
|
||||||
|
Phil Katz designed the deflate format. Jean-Loup Gailly and Mark
|
||||||
|
Adler wrote the related software described in this specification.
|
||||||
|
Glenn Randers-Pehrson converted this document to RFC and HTML format.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Deutsch Informational [Page 16]
|
||||||
|
|
||||||
|
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
|
||||||
|
|
||||||
|
|
||||||
|
9. Author's Address
|
||||||
|
|
||||||
|
L. Peter Deutsch
|
||||||
|
Aladdin Enterprises
|
||||||
|
203 Santa Margarita Ave.
|
||||||
|
Menlo Park, CA 94025
|
||||||
|
|
||||||
|
Phone: (415) 322-0103 (AM only)
|
||||||
|
FAX: (415) 322-1734
|
||||||
|
EMail: <ghost@aladdin.com>
|
||||||
|
|
||||||
|
Questions about the technical content of this specification can be
|
||||||
|
sent by email to:
|
||||||
|
|
||||||
|
Jean-Loup Gailly <gzip@prep.ai.mit.edu> and
|
||||||
|
Mark Adler <madler@alumni.caltech.edu>
|
||||||
|
|
||||||
|
Editorial comments on this specification can be sent by email to:
|
||||||
|
|
||||||
|
L. Peter Deutsch <ghost@aladdin.com> and
|
||||||
|
Glenn Randers-Pehrson <randeg@alumni.rpi.edu>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Deutsch Informational [Page 17]
|
||||||
|
|
104
lib/std/compress/deflate/token.zig
Normal file
104
lib/std/compress/deflate/token.zig
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
// 2 bits: type, can be 0 (literal), 1 (EOF), 2 (Match) or 3 (Unused).
|
||||||
|
// 8 bits: xlength (length - MIN_MATCH_LENGTH).
|
||||||
|
// 22 bits: xoffset (offset - MIN_OFFSET_SIZE), or literal.
|
||||||
|
const length_shift = 22;
|
||||||
|
const offset_mask = (1 << length_shift) - 1; // 4_194_303
|
||||||
|
const literal_type = 0 << 30; // 0
|
||||||
|
pub const match_type = 1 << 30; // 1_073_741_824
|
||||||
|
|
||||||
|
// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
|
||||||
|
// is length_codes[length - MIN_MATCH_LENGTH]
|
||||||
|
var length_codes = [_]u32{
|
||||||
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
|
||||||
|
9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
|
||||||
|
13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
|
||||||
|
15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||||
|
17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
|
||||||
|
18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
|
||||||
|
19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
|
||||||
|
20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||||
|
21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||||
|
21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
|
||||||
|
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
|
||||||
|
22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
|
||||||
|
23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
|
||||||
|
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||||
|
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||||
|
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||||
|
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||||
|
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||||
|
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||||
|
25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||||
|
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||||
|
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||||
|
26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
|
||||||
|
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
||||||
|
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
||||||
|
27, 27, 27, 27, 27, 28,
|
||||||
|
};
|
||||||
|
|
||||||
|
var offset_codes = [_]u32{
|
||||||
|
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||||
|
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||||
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||||
|
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||||
|
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||||
|
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||||
|
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||||
|
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||||
|
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||||
|
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||||
|
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||||
|
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||||
|
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||||
|
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||||
|
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const Token = u32;
|
||||||
|
|
||||||
|
// Convert a literal into a literal token.
|
||||||
|
pub fn literalToken(lit: u32) Token {
|
||||||
|
return literal_type + lit;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert a < xlength, xoffset > pair into a match token.
|
||||||
|
pub fn matchToken(xlength: u32, xoffset: u32) Token {
|
||||||
|
return match_type + (xlength << length_shift) + xoffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the literal of a literal token
|
||||||
|
pub fn literal(t: Token) u32 {
|
||||||
|
return @intCast(u32, t - literal_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the extra offset of a match token
|
||||||
|
pub fn offset(t: Token) u32 {
|
||||||
|
return @intCast(u32, t) & offset_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn length(t: Token) u32 {
|
||||||
|
return @intCast(u32, (t - match_type) >> length_shift);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn lengthCode(len: u32) u32 {
|
||||||
|
return length_codes[len];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the offset code corresponding to a specific offset
|
||||||
|
pub fn offsetCode(off: u32) u32 {
|
||||||
|
if (off < @intCast(u32, offset_codes.len)) {
|
||||||
|
return offset_codes[off];
|
||||||
|
}
|
||||||
|
if (off >> 7 < @intCast(u32, offset_codes.len)) {
|
||||||
|
return offset_codes[off >> 7] + 14;
|
||||||
|
}
|
||||||
|
return offset_codes[off >> 14] + 28;
|
||||||
|
}
|
||||||
|
|
||||||
|
test {
|
||||||
|
const std = @import("std");
|
||||||
|
const expect = std.testing.expect;
|
||||||
|
try expect(matchToken(555, 555) == 3_401_581_099);
|
||||||
|
}
|
@ -20,15 +20,14 @@ pub fn GzipStream(comptime ReaderType: type) type {
|
|||||||
const Self = @This();
|
const Self = @This();
|
||||||
|
|
||||||
pub const Error = ReaderType.Error ||
|
pub const Error = ReaderType.Error ||
|
||||||
deflate.InflateStream(ReaderType).Error ||
|
deflate.Decompressor(ReaderType).Error ||
|
||||||
error{ CorruptedData, WrongChecksum };
|
error{ CorruptedData, WrongChecksum };
|
||||||
pub const Reader = io.Reader(*Self, Error, read);
|
pub const Reader = io.Reader(*Self, Error, read);
|
||||||
|
|
||||||
allocator: mem.Allocator,
|
allocator: mem.Allocator,
|
||||||
inflater: deflate.InflateStream(ReaderType),
|
inflater: deflate.Decompressor(ReaderType),
|
||||||
in_reader: ReaderType,
|
in_reader: ReaderType,
|
||||||
hasher: std.hash.Crc32,
|
hasher: std.hash.Crc32,
|
||||||
window_slice: []u8,
|
|
||||||
read_amt: usize,
|
read_amt: usize,
|
||||||
|
|
||||||
info: struct {
|
info: struct {
|
||||||
@ -93,16 +92,11 @@ pub fn GzipStream(comptime ReaderType: type) type {
|
|||||||
_ = try source.readIntLittle(u16);
|
_ = try source.readIntLittle(u16);
|
||||||
}
|
}
|
||||||
|
|
||||||
// The RFC doesn't say anything about the DEFLATE window size to be
|
|
||||||
// used, default to 32K.
|
|
||||||
var window_slice = try allocator.alloc(u8, 32 * 1024);
|
|
||||||
|
|
||||||
return Self{
|
return Self{
|
||||||
.allocator = allocator,
|
.allocator = allocator,
|
||||||
.inflater = deflate.inflateStream(source, window_slice),
|
.inflater = try deflate.decompressor(allocator, source, null),
|
||||||
.in_reader = source,
|
.in_reader = source,
|
||||||
.hasher = std.hash.Crc32.init(),
|
.hasher = std.hash.Crc32.init(),
|
||||||
.window_slice = window_slice,
|
|
||||||
.info = .{
|
.info = .{
|
||||||
.filename = filename,
|
.filename = filename,
|
||||||
.comment = comment,
|
.comment = comment,
|
||||||
@ -113,7 +107,7 @@ pub fn GzipStream(comptime ReaderType: type) type {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn deinit(self: *Self) void {
|
pub fn deinit(self: *Self) void {
|
||||||
self.allocator.free(self.window_slice);
|
self.inflater.deinit();
|
||||||
if (self.info.filename) |filename|
|
if (self.info.filename) |filename|
|
||||||
self.allocator.free(filename);
|
self.allocator.free(filename);
|
||||||
if (self.info.comment) |comment|
|
if (self.info.comment) |comment|
|
||||||
|
@ -13,15 +13,14 @@ pub fn ZlibStream(comptime ReaderType: type) type {
|
|||||||
const Self = @This();
|
const Self = @This();
|
||||||
|
|
||||||
pub const Error = ReaderType.Error ||
|
pub const Error = ReaderType.Error ||
|
||||||
deflate.InflateStream(ReaderType).Error ||
|
deflate.Decompressor(ReaderType).Error ||
|
||||||
error{ WrongChecksum, Unsupported };
|
error{ WrongChecksum, Unsupported };
|
||||||
pub const Reader = io.Reader(*Self, Error, read);
|
pub const Reader = io.Reader(*Self, Error, read);
|
||||||
|
|
||||||
allocator: mem.Allocator,
|
allocator: mem.Allocator,
|
||||||
inflater: deflate.InflateStream(ReaderType),
|
inflater: deflate.Decompressor(ReaderType),
|
||||||
in_reader: ReaderType,
|
in_reader: ReaderType,
|
||||||
hasher: std.hash.Adler32,
|
hasher: std.hash.Adler32,
|
||||||
window_slice: []u8,
|
|
||||||
|
|
||||||
fn init(allocator: mem.Allocator, source: ReaderType) !Self {
|
fn init(allocator: mem.Allocator, source: ReaderType) !Self {
|
||||||
// Zlib header format is specified in RFC1950
|
// Zlib header format is specified in RFC1950
|
||||||
@ -38,28 +37,25 @@ pub fn ZlibStream(comptime ReaderType: type) type {
|
|||||||
|
|
||||||
// The CM field must be 8 to indicate the use of DEFLATE
|
// The CM field must be 8 to indicate the use of DEFLATE
|
||||||
if (CM != 8) return error.InvalidCompression;
|
if (CM != 8) return error.InvalidCompression;
|
||||||
// CINFO is the base-2 logarithm of the window size, minus 8.
|
// CINFO is the base-2 logarithm of the LZ77 window size, minus 8.
|
||||||
// Values above 7 are unspecified and therefore rejected.
|
// Values above 7 are unspecified and therefore rejected.
|
||||||
if (CINFO > 7) return error.InvalidWindowSize;
|
if (CINFO > 7) return error.InvalidWindowSize;
|
||||||
const window_size: u16 = @as(u16, 1) << (CINFO + 8);
|
|
||||||
|
|
||||||
|
const dictionary = null;
|
||||||
// TODO: Support this case
|
// TODO: Support this case
|
||||||
if (FDICT != 0)
|
if (FDICT != 0)
|
||||||
return error.Unsupported;
|
return error.Unsupported;
|
||||||
|
|
||||||
var window_slice = try allocator.alloc(u8, window_size);
|
|
||||||
|
|
||||||
return Self{
|
return Self{
|
||||||
.allocator = allocator,
|
.allocator = allocator,
|
||||||
.inflater = deflate.inflateStream(source, window_slice),
|
.inflater = try deflate.decompressor(allocator, source, dictionary),
|
||||||
.in_reader = source,
|
.in_reader = source,
|
||||||
.hasher = std.hash.Adler32.init(),
|
.hasher = std.hash.Adler32.init(),
|
||||||
.window_slice = window_slice,
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn deinit(self: *Self) void {
|
pub fn deinit(self: *Self) void {
|
||||||
self.allocator.free(self.window_slice);
|
self.inflater.deinit();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Implements the io.Reader interface
|
// Implements the io.Reader interface
|
||||||
|
Loading…
Reference in New Issue
Block a user