mirror of
https://github.com/ziglang/zig.git
synced 2025-02-02 20:21:35 +00:00
fix(perf): remove LUT
This makes it so that we no longer use a LUT (Look-Up Table): * The code is much simpler and easier to understand now. * Using a LUT means we rely on a warm cache. Relying on the cache like this results in inconsistent performance and in many cases codegen will be worse. Also as @topolarity once pointed out, in some cases while it seems like the code may branch, it actually doesn't: https://github.com/ziglang/zig/pull/11629#issuecomment-1213641429 * Other languages' standard libraries don't do this either. JFF I wanted to see what other languages codegen compared to us now: https://rust.godbolt.org/z/Te4ax9Edf, https://zig.godbolt.org/z/nTbYedWKv So we are pretty much on par or better than other languages now.
This commit is contained in:
parent
626e02a429
commit
19dbc5805c
@ -12,7 +12,7 @@ const std = @import("std");
|
||||
|
||||
/// The C0 control codes of the ASCII encoding.
|
||||
///
|
||||
/// See also: https://en.wikipedia.org/wiki/C0_and_C1_control_codes and `isControl`.
|
||||
/// See also: https://en.wikipedia.org/wiki/C0_and_C1_control_codes and `isControl`
|
||||
pub const control_code = struct {
|
||||
/// Null.
|
||||
pub const nul = 0x00;
|
||||
@ -88,188 +88,63 @@ pub const control_code = struct {
|
||||
pub const xoff = dc3;
|
||||
};
|
||||
|
||||
const tIndex = enum(u3) {
|
||||
Alpha,
|
||||
Hex,
|
||||
Space,
|
||||
Digit,
|
||||
Lower,
|
||||
Upper,
|
||||
// Ctrl, < 0x20 || == DEL
|
||||
// Print, = Graph || == ' '. NOT '\t' et cetera
|
||||
Punct,
|
||||
Graph,
|
||||
//ASCII, | ~0b01111111
|
||||
//isBlank, == ' ' || == '\x09'
|
||||
};
|
||||
|
||||
const combinedTable = init: {
|
||||
comptime var table: [256]u8 = undefined;
|
||||
|
||||
const mem = std.mem;
|
||||
|
||||
const alpha = [_]u1{
|
||||
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||
};
|
||||
const lower = [_]u1{
|
||||
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||
};
|
||||
const upper = [_]u1{
|
||||
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
const digit = [_]u1{
|
||||
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
const hex = [_]u1{
|
||||
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
const space = [_]u1{
|
||||
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
const punct = [_]u1{
|
||||
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
||||
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
|
||||
};
|
||||
const graph = [_]u1{
|
||||
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
|
||||
};
|
||||
|
||||
comptime var i = 0;
|
||||
inline while (i < 128) : (i += 1) {
|
||||
table[i] =
|
||||
@as(u8, alpha[i]) << @enumToInt(tIndex.Alpha) |
|
||||
@as(u8, hex[i]) << @enumToInt(tIndex.Hex) |
|
||||
@as(u8, space[i]) << @enumToInt(tIndex.Space) |
|
||||
@as(u8, digit[i]) << @enumToInt(tIndex.Digit) |
|
||||
@as(u8, lower[i]) << @enumToInt(tIndex.Lower) |
|
||||
@as(u8, upper[i]) << @enumToInt(tIndex.Upper) |
|
||||
@as(u8, punct[i]) << @enumToInt(tIndex.Punct) |
|
||||
@as(u8, graph[i]) << @enumToInt(tIndex.Graph);
|
||||
}
|
||||
mem.set(u8, table[128..256], 0);
|
||||
break :init table;
|
||||
};
|
||||
|
||||
fn inTable(c: u8, t: tIndex) bool {
|
||||
return (combinedTable[c] & (@as(u8, 1) << @enumToInt(t))) != 0;
|
||||
}
|
||||
|
||||
/// Returns whether the character is alphanumeric.
|
||||
/// Returns whether the character is alphanumeric: A-Z, a-z, or 0-9.
|
||||
pub fn isAlphanumeric(c: u8) bool {
|
||||
return (combinedTable[c] & ((@as(u8, 1) << @enumToInt(tIndex.Alpha)) |
|
||||
@as(u8, 1) << @enumToInt(tIndex.Digit))) != 0;
|
||||
return switch (c) {
|
||||
'A'...'Z', 'a'...'z', '0'...'9' => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
/// Returns whether the character is alphabetic.
|
||||
/// Returns whether the character is alphabetic: A-Z or a-z.
|
||||
pub fn isAlphabetic(c: u8) bool {
|
||||
return inTable(c, tIndex.Alpha);
|
||||
return switch (c) {
|
||||
'A'...'Z', 'a'...'z' => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
/// Returns whether the character is a control character.
|
||||
/// This is the same as `!isPrint(c)`.
|
||||
///
|
||||
/// See also: `control_code`.
|
||||
/// See also: `control_code`
|
||||
pub fn isControl(c: u8) bool {
|
||||
return c <= control_code.us or c == control_code.del;
|
||||
}
|
||||
|
||||
/// Returns whether the character is a digit.
|
||||
pub fn isDigit(c: u8) bool {
|
||||
return inTable(c, tIndex.Digit);
|
||||
return switch (c) {
|
||||
'0'...'9' => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
/// Returns whether the character is a lowercased letter.
|
||||
/// Returns whether the character is a lowercase letter.
|
||||
pub fn isLower(c: u8) bool {
|
||||
return inTable(c, tIndex.Lower);
|
||||
return switch (c) {
|
||||
'a'...'z' => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
/// Returns whether the character is printable and has some graphical representation.
|
||||
/// This also returns `true` for the space character.
|
||||
/// This is the same as `!isControl(c)`.
|
||||
/// Returns whether the character is printable and has some graphical representation,
|
||||
/// including the space character.
|
||||
pub fn isPrint(c: u8) bool {
|
||||
return inTable(c, tIndex.Graph) or c == ' ';
|
||||
return isASCII(c) and !isControl(c);
|
||||
}
|
||||
|
||||
/// Returns whether this character is included in `whitespace`.
|
||||
pub fn isWhitespace(c: u8) bool {
|
||||
return inTable(c, tIndex.Space);
|
||||
return for (whitespace) |other| {
|
||||
if (c == other)
|
||||
break true;
|
||||
} else false;
|
||||
}
|
||||
|
||||
/// Whitespace for general use.
|
||||
/// This may be used with e.g. `std.mem.trim` to trim whitespace.
|
||||
///
|
||||
/// See also: `isWhitespace`.
|
||||
/// See also: `isWhitespace`
|
||||
pub const whitespace = [_]u8{ ' ', '\t', '\n', '\r', control_code.vt, control_code.ff };
|
||||
|
||||
test "whitespace" {
|
||||
@ -281,14 +156,20 @@ test "whitespace" {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns whether the character is an uppercased letter.
|
||||
/// Returns whether the character is an uppercase letter.
|
||||
pub fn isUpper(c: u8) bool {
|
||||
return inTable(c, tIndex.Upper);
|
||||
return switch (c) {
|
||||
'A'...'Z' => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
/// Returns whether the character is a hexadecimal digit. Case-insensitive.
|
||||
/// Returns whether the character is a hexadecimal digit: A-F, a-f, or 0-9.
|
||||
pub fn isHex(c: u8) bool {
|
||||
return inTable(c, tIndex.Hex);
|
||||
return switch (c) {
|
||||
'A'...'F', 'a'...'f', '0'...'9' => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
/// Returns whether the character is a 7-bit ASCII character.
|
||||
@ -322,6 +203,8 @@ test "ASCII character classes" {
|
||||
try testing.expect(isControl(control_code.nul));
|
||||
try testing.expect(isControl(control_code.ff));
|
||||
try testing.expect(isControl(control_code.us));
|
||||
try testing.expect(!isControl(0x80));
|
||||
try testing.expect(!isControl(0xff));
|
||||
|
||||
try testing.expect('C' == toUpper('c'));
|
||||
try testing.expect(':' == toUpper(':'));
|
||||
@ -351,6 +234,7 @@ test "ASCII character classes" {
|
||||
|
||||
try testing.expect(!isHex('g'));
|
||||
try testing.expect(isHex('b'));
|
||||
try testing.expect(isHex('F'));
|
||||
try testing.expect(isHex('9'));
|
||||
|
||||
try testing.expect(!isDigit('~'));
|
||||
@ -361,6 +245,8 @@ test "ASCII character classes" {
|
||||
try testing.expect(isPrint('@'));
|
||||
try testing.expect(isPrint('~'));
|
||||
try testing.expect(!isPrint(control_code.esc));
|
||||
try testing.expect(!isPrint(0x80));
|
||||
try testing.expect(!isPrint(0xff));
|
||||
}
|
||||
|
||||
/// Writes a lower case copy of `ascii_string` to `output`.
|
||||
|
Loading…
Reference in New Issue
Block a user