mirror of
https://github.com/ziglang/zig.git
synced 2025-01-09 19:50:29 +00:00
std.unicode.utf8ToUtf16Le: improve performance
on a simple test input: original utf8ToUtf16Le: elapsed: 111384483 ns (111 ms) new utf8ToUtf16Le: elapsed: 138570 ns (0 ms) it's 800x faster in debug mode and ~4500x faster in release-fast mode. this was slowing down installation of files on windows in build scripts.
This commit is contained in:
parent
851a7288a9
commit
c3d20373ee
@ -560,18 +560,34 @@ pub fn utf8ToUtf16LeWithNull(allocator: *mem.Allocator, utf8: []const u8) ![]u16
|
||||
}
|
||||
|
||||
/// Returns index of next character. If exact fit, returned index equals output slice length.
|
||||
/// If ran out of room, returned index equals output slice length + 1.
|
||||
/// Assumes there is enough space for the output.
|
||||
/// TODO support codepoints bigger than 16 bits
|
||||
pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) !usize {
|
||||
const utf16le_as_bytes = @sliceToBytes(utf16le[0..]);
|
||||
var end_index: usize = 0;
|
||||
|
||||
var it = (try Utf8View.init(utf8)).iterator();
|
||||
while (it.nextCodepoint()) |codepoint| {
|
||||
if (end_index == utf16le_as_bytes.len) return (end_index / 2) + 1;
|
||||
// TODO surrogate pairs
|
||||
mem.writeIntSliceLittle(u16, utf16le_as_bytes[end_index..], @intCast(u16, codepoint));
|
||||
end_index += 2;
|
||||
var dest_i: usize = 0;
|
||||
var src_i: usize = 0;
|
||||
while (src_i < utf8.len) {
|
||||
const byte = utf8[src_i];
|
||||
const n = @clz(u8, ~byte);
|
||||
switch (n) {
|
||||
0 => {
|
||||
utf16le[dest_i] = byte;
|
||||
dest_i += 1;
|
||||
src_i += 1;
|
||||
continue;
|
||||
},
|
||||
2, 3, 4 => {
|
||||
const next_src_i = src_i + n;
|
||||
const codepoint = try utf8Decode(utf8[src_i..next_src_i]);
|
||||
const short = @intCast(u16, codepoint); // TODO surrogate pairs
|
||||
utf16le[dest_i] = switch (builtin.endian) {
|
||||
.Little => short,
|
||||
.Big => @byteSwap(u16, short),
|
||||
};
|
||||
dest_i += 1;
|
||||
src_i = next_src_i;
|
||||
},
|
||||
else => return error.Utf8InvalidStartByte,
|
||||
}
|
||||
}
|
||||
return end_index / 2;
|
||||
return dest_i;
|
||||
}
|
||||
|
37
std/unicode/throughput_test.zig
Normal file
37
std/unicode/throughput_test.zig
Normal file
@ -0,0 +1,37 @@
|
||||
const builtin = @import("builtin");
|
||||
const std = @import("std");
|
||||
|
||||
pub fn main() !void {
|
||||
var stdout_file = try std.io.getStdOut();
|
||||
var stdout_out_stream = stdout_file.outStream();
|
||||
const stdout = &stdout_out_stream.stream;
|
||||
|
||||
const args = try std.process.argsAlloc(std.heap.direct_allocator);
|
||||
|
||||
@fence(.SeqCst);
|
||||
var timer = try std.time.Timer.start();
|
||||
@fence(.SeqCst);
|
||||
|
||||
var buffer1: [32767]u16 = undefined;
|
||||
_ = try std.unicode.utf8ToUtf16Le(&buffer1, args[1]);
|
||||
|
||||
@fence(.SeqCst);
|
||||
const elapsed_ns_orig = timer.lap();
|
||||
@fence(.SeqCst);
|
||||
|
||||
var buffer2: [32767]u16 = undefined;
|
||||
_ = try std.unicode.utf8ToUtf16Le_better(&buffer2, args[1]);
|
||||
|
||||
@fence(.SeqCst);
|
||||
const elapsed_ns_better = timer.lap();
|
||||
@fence(.SeqCst);
|
||||
|
||||
std.debug.warn("original utf8ToUtf16Le: elapsed: {} ns ({} ms)\n", elapsed_ns_orig, elapsed_ns_orig / 1000000);
|
||||
std.debug.warn("new utf8ToUtf16Le: elapsed: {} ns ({} ms)\n", elapsed_ns_better, elapsed_ns_better / 1000000);
|
||||
asm volatile ("nop"
|
||||
:
|
||||
: [a] "r" (&buffer1),
|
||||
[b] "r" (&buffer2)
|
||||
: "memory"
|
||||
);
|
||||
}
|
Loading…
Reference in New Issue
Block a user