adjustments to std.mem split / separate

* rename std.mem.split to std.mem.tokenize
 * add future deprecation notice to docs
 * (unrelated) add note to std.os.path.resolve docs
 * std.mem.separate - assert delimiter.len not zero
 * fix implementation of std.mem.separate to respect the delimiter
 * separate the two iterators to different structs
This commit is contained in:
Andrew Kelley 2019-02-04 15:24:06 -05:00
parent f44ce7836a
commit 67bd45f0cf
No known key found for this signature in database
GPG Key ID: 7C5F548F728501A9
8 changed files with 112 additions and 97 deletions

View File

@ -189,14 +189,14 @@ fn findLLVM(b: *Builder, llvm_config_exe: []const u8) !LibraryDep {
const prefix_output = try b.exec([][]const u8{ llvm_config_exe, "--prefix" });
var result = LibraryDep{
.prefix = mem.split(prefix_output, " \r\n").next().?,
.prefix = mem.tokenize(prefix_output, " \r\n").next().?,
.libs = ArrayList([]const u8).init(b.allocator),
.system_libs = ArrayList([]const u8).init(b.allocator),
.includes = ArrayList([]const u8).init(b.allocator),
.libdirs = ArrayList([]const u8).init(b.allocator),
};
{
var it = mem.split(libs_output, " \r\n");
var it = mem.tokenize(libs_output, " \r\n");
while (it.next()) |lib_arg| {
if (mem.startsWith(u8, lib_arg, "-l")) {
try result.system_libs.append(lib_arg[2..]);
@ -210,7 +210,7 @@ fn findLLVM(b: *Builder, llvm_config_exe: []const u8) !LibraryDep {
}
}
{
var it = mem.split(includes_output, " \r\n");
var it = mem.tokenize(includes_output, " \r\n");
while (it.next()) |include_arg| {
if (mem.startsWith(u8, include_arg, "-I")) {
try result.includes.append(include_arg[2..]);
@ -220,7 +220,7 @@ fn findLLVM(b: *Builder, llvm_config_exe: []const u8) !LibraryDep {
}
}
{
var it = mem.split(libdir_output, " \r\n");
var it = mem.tokenize(libdir_output, " \r\n");
while (it.next()) |libdir| {
if (mem.startsWith(u8, libdir, "-L")) {
try result.libdirs.append(libdir[2..]);
@ -233,7 +233,7 @@ fn findLLVM(b: *Builder, llvm_config_exe: []const u8) !LibraryDep {
}
pub fn installStdLib(b: *Builder, stdlib_files: []const u8) void {
var it = mem.split(stdlib_files, ";");
var it = mem.tokenize(stdlib_files, ";");
while (it.next()) |stdlib_file| {
const src_path = os.path.join(b.allocator, "std", stdlib_file) catch unreachable;
const dest_path = os.path.join(b.allocator, "lib", "zig", "std", stdlib_file) catch unreachable;
@ -242,7 +242,7 @@ pub fn installStdLib(b: *Builder, stdlib_files: []const u8) void {
}
pub fn installCHeaders(b: *Builder, c_header_files: []const u8) void {
var it = mem.split(c_header_files, ";");
var it = mem.tokenize(c_header_files, ";");
while (it.next()) |c_header_file| {
const src_path = os.path.join(b.allocator, "c_headers", c_header_file) catch unreachable;
const dest_path = os.path.join(b.allocator, "lib", "zig", "include", c_header_file) catch unreachable;
@ -277,7 +277,7 @@ fn configureStage2(b: *Builder, exe: var, ctx: Context) !void {
addCppLib(b, exe, ctx.cmake_binary_dir, "zig_cpp");
if (ctx.lld_include_dir.len != 0) {
exe.addIncludeDir(ctx.lld_include_dir);
var it = mem.split(ctx.lld_libraries, ";");
var it = mem.tokenize(ctx.lld_libraries, ";");
while (it.next()) |lib| {
exe.addObjectFile(lib);
}
@ -334,7 +334,7 @@ fn addCxxKnownPath(
ctx.cxx_compiler,
b.fmt("-print-file-name={}", objname),
});
const path_unpadded = mem.split(path_padded, "\r\n").next().?;
const path_unpadded = mem.tokenize(path_padded, "\r\n").next().?;
if (mem.eql(u8, path_unpadded, objname)) {
if (errtxt) |msg| {
warn("{}", msg);

View File

@ -57,10 +57,10 @@ pub const LibCInstallation = struct {
const contents = try std.io.readFileAlloc(allocator, libc_file);
defer allocator.free(contents);
var it = std.mem.split(contents, "\n");
var it = std.mem.tokenize(contents, "\n");
while (it.next()) |line| {
if (line.len == 0 or line[0] == '#') continue;
var line_it = std.mem.split(line, "=");
var line_it = std.mem.separate(line, "=");
const name = line_it.next() orelse {
try stderr.print("missing equal sign after field name\n");
return error.ParseError;
@ -213,7 +213,7 @@ pub const LibCInstallation = struct {
},
}
var it = std.mem.split(exec_result.stderr, "\n\r");
var it = std.mem.tokenize(exec_result.stderr, "\n\r");
var search_paths = std.ArrayList([]const u8).init(loop.allocator);
defer search_paths.deinit();
while (it.next()) |line| {
@ -410,7 +410,7 @@ async fn ccPrintFileName(loop: *event.Loop, o_file: []const u8, want_dirname: bo
return error.CCompilerCrashed;
},
}
var it = std.mem.split(exec_result.stdout, "\n\r");
var it = std.mem.tokenize(exec_result.stdout, "\n\r");
const line = it.next() orelse return error.LibCRuntimeNotFound;
const dirname = std.os.path.dirname(line) orelse return error.LibCRuntimeNotFound;

View File

@ -351,7 +351,7 @@ fn buildOutputType(allocator: *Allocator, args: []const []const u8, out_type: Co
const root_name = if (provided_name) |n| n else blk: {
if (root_source_file) |file| {
const basename = os.path.basename(file);
var it = mem.split(basename, ".");
var it = mem.separate(basename, ".");
break :blk it.next() orelse basename;
} else {
try stderr.write("--name [name] not provided and unable to infer\n");

View File

@ -324,7 +324,7 @@ pub const Builder = struct {
fn processNixOSEnvVars(self: *Builder) void {
if (os.getEnvVarOwned(self.allocator, "NIX_CFLAGS_COMPILE")) |nix_cflags_compile| {
var it = mem.split(nix_cflags_compile, " ");
var it = mem.tokenize(nix_cflags_compile, " ");
while (true) {
const word = it.next() orelse break;
if (mem.eql(u8, word, "-isystem")) {
@ -342,7 +342,7 @@ pub const Builder = struct {
assert(err == error.EnvironmentVariableNotFound);
}
if (os.getEnvVarOwned(self.allocator, "NIX_LDFLAGS")) |nix_ldflags| {
var it = mem.split(nix_ldflags, " ");
var it = mem.tokenize(nix_ldflags, " ");
while (true) {
const word = it.next() orelse break;
if (mem.eql(u8, word, "-rpath")) {
@ -689,7 +689,7 @@ pub const Builder = struct {
if (os.path.isAbsolute(name)) {
return name;
}
var it = mem.split(PATH, []u8{os.path.delimiter});
var it = mem.tokenize(PATH, []u8{os.path.delimiter});
while (it.next()) |path| {
const full_path = try os.path.join(self.allocator, path, self.fmt("{}{}", name, exe_extension));
if (os.path.real(self.allocator, full_path)) |real_path| {

View File

@ -689,58 +689,57 @@ pub fn eql_slice_u8(a: []const u8, b: []const u8) bool {
}
/// Returns an iterator that iterates over the slices of `buffer` that are not
/// any of the bytes in `split_bytes`.
/// split(" abc def ghi ", " ")
/// any of the bytes in `delimiter_bytes`.
/// tokenize(" abc def ghi ", " ")
/// Will return slices for "abc", "def", "ghi", null, in that order.
/// If `split_bytes` does not exist in buffer,
/// If `buffer` is empty, the iterator will return null.
/// If `delimiter_bytes` does not exist in buffer,
/// the iterator will return `buffer`, null, in that order.
pub fn split(buffer: []const u8, split_bytes: []const u8) SplitIterator {
return SplitIterator{
/// See also the related function `separate`.
pub fn tokenize(buffer: []const u8, delimiter_bytes: []const u8) TokenIterator {
return TokenIterator{
.index = 0,
.buffer = buffer,
.split_bytes = split_bytes,
.glob = true,
.spun = false,
.delimiter_bytes = delimiter_bytes,
};
}
test "mem.split" {
var it = split(" abc def ghi ", " ");
test "mem.tokenize" {
var it = tokenize(" abc def ghi ", " ");
assert(eql(u8, it.next().?, "abc"));
assert(eql(u8, it.next().?, "def"));
assert(eql(u8, it.next().?, "ghi"));
assert(it.next() == null);
it = split("..\\bob", "\\");
it = tokenize("..\\bob", "\\");
assert(eql(u8, it.next().?, ".."));
assert(eql(u8, "..", "..\\bob"[0..it.index]));
assert(eql(u8, it.next().?, "bob"));
assert(it.next() == null);
it = split("//a/b", "/");
it = tokenize("//a/b", "/");
assert(eql(u8, it.next().?, "a"));
assert(eql(u8, it.next().?, "b"));
assert(eql(u8, "//a/b", "//a/b"[0..it.index]));
assert(it.next() == null);
it = split("|", "|");
it = tokenize("|", "|");
assert(it.next() == null);
it = split("", "|");
assert(eql(u8, it.next().?, ""));
it = tokenize("", "|");
assert(it.next() == null);
it = split("hello", "");
it = tokenize("hello", "");
assert(eql(u8, it.next().?, "hello"));
assert(it.next() == null);
it = split("hello", " ");
it = tokenize("hello", " ");
assert(eql(u8, it.next().?, "hello"));
assert(it.next() == null);
}
test "mem.split (multibyte)" {
var it = split("a|b,c/d e", " /,|");
test "mem.tokenize (multibyte)" {
var it = tokenize("a|b,c/d e", " /,|");
assert(eql(u8, it.next().?, "a"));
assert(eql(u8, it.next().?, "b"));
assert(eql(u8, it.next().?, "c"));
@ -750,18 +749,21 @@ test "mem.split (multibyte)" {
}
/// Returns an iterator that iterates over the slices of `buffer` that
/// seperates by bytes in `delimiter`.
/// are separated by bytes in `delimiter`.
/// separate("abc|def||ghi", "|")
/// Will return slices for "abc", "def", "", "ghi", null, in that order.
/// will return slices for "abc", "def", "", "ghi", null, in that order.
/// If `delimiter` does not exist in buffer,
/// the iterator will return `buffer`, null, in that order.
/// The delimiter length must not be zero.
/// See also the related function `tokenize`.
/// It is planned to rename this function to `split` before 1.0.0, like this:
/// pub fn split(buffer: []const u8, delimiter: []const u8) SplitIterator {
pub fn separate(buffer: []const u8, delimiter: []const u8) SplitIterator {
assert(delimiter.len != 0);
return SplitIterator{
.index = 0,
.buffer = buffer,
.split_bytes = delimiter,
.glob = false,
.spun = false,
.delimiter = delimiter,
};
}
@ -782,19 +784,15 @@ test "mem.separate" {
assert(eql(u8, it.next().?, ""));
assert(it.next() == null);
it = separate("hello", "");
assert(eql(u8, it.next().?, "hello"));
assert(it.next() == null);
it = separate("hello", " ");
assert(eql(u8, it.next().?, "hello"));
assert(it.next() == null);
}
test "mem.separate (multibyte)" {
var it = separate("a|b,c/d e", " /,|");
var it = separate("a, b ,, c, d, e", ", ");
assert(eql(u8, it.next().?, "a"));
assert(eql(u8, it.next().?, "b"));
assert(eql(u8, it.next().?, "b ,"));
assert(eql(u8, it.next().?, "c"));
assert(eql(u8, it.next().?, "d"));
assert(eql(u8, it.next().?, "e"));
@ -819,49 +817,38 @@ test "mem.endsWith" {
assert(!endsWith(u8, "Bob", "Bo"));
}
pub const SplitIterator = struct {
pub const TokenIterator = struct {
buffer: []const u8,
split_bytes: []const u8,
delimiter_bytes: []const u8,
index: usize,
glob: bool,
spun: bool,
/// Iterates and returns null or optionally a slice the next split segment
pub fn next(self: *SplitIterator) ?[]const u8 {
if (self.spun) {
if (self.index + 1 > self.buffer.len) return null;
self.index += 1;
/// Returns a slice of the next token, or null if tokenization is complete.
pub fn next(self: *TokenIterator) ?[]const u8 {
// move to beginning of token
while (self.index < self.buffer.len and self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {}
const start = self.index;
if (start == self.buffer.len) {
return null;
}
self.spun = true;
// move to end of token
while (self.index < self.buffer.len and !self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {}
const end = self.index;
if (self.glob) {
while (self.index < self.buffer.len and self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {}
}
var cursor = self.index;
while (cursor < self.buffer.len and !self.isSplitByte(self.buffer[cursor])) : (cursor += 1) {}
defer self.index = cursor;
if (cursor == self.buffer.len) {
return if (self.glob and self.index == cursor and self.index > 0) null else self.buffer[self.index..];
}
return self.buffer[self.index..cursor];
return self.buffer[start..end];
}
/// Returns a slice of the remaining bytes. Does not affect iterator state.
pub fn rest(self: *const SplitIterator) []const u8 {
pub fn rest(self: TokenIterator) []const u8 {
// move to beginning of token
var index: usize = self.index;
while (index < self.buffer.len and self.isSplitByte(self.buffer[index])) : (index += 1) {}
return self.buffer[index..];
}
fn isSplitByte(self: *const SplitIterator, byte: u8) bool {
for (self.split_bytes) |split_byte| {
if (byte == split_byte) {
fn isSplitByte(self: TokenIterator, byte: u8) bool {
for (self.delimiter_bytes) |delimiter_byte| {
if (byte == delimiter_byte) {
return true;
}
}
@ -869,6 +856,32 @@ pub const SplitIterator = struct {
}
};
pub const SplitIterator = struct {
buffer: []const u8,
index: ?usize,
delimiter: []const u8,
/// Returns a slice of the next field, or null if splitting is complete.
pub fn next(self: *SplitIterator) ?[]const u8 {
const start = self.index orelse return null;
const end = if (indexOfPos(u8, self.buffer, start, self.delimiter)) |delim_start| blk: {
self.index = delim_start + self.delimiter.len;
break :blk delim_start;
} else blk: {
self.index = null;
break :blk self.buffer.len;
};
return self.buffer[start..end];
}
/// Returns a slice of the remaining bytes. Does not affect iterator state.
pub fn rest(self: SplitIterator) []const u8 {
const end = self.buffer.len;
const start = self.index orelse end;
return self.buffer[start..end];
}
};
/// Naively combines a series of strings with a separator.
/// Allocates memory for the result, which must be freed by the caller.
pub fn join(allocator: *Allocator, sep: u8, strings: ...) ![]u8 {

View File

@ -595,7 +595,7 @@ pub const ChildProcess = struct {
const PATH = try os.getEnvVarOwned(self.allocator, "PATH");
defer self.allocator.free(PATH);
var it = mem.split(PATH, ";");
var it = mem.tokenize(PATH, ";");
while (it.next()) |search_path| {
const joined_path = try os.path.join(self.allocator, search_path, app_name);
defer self.allocator.free(joined_path);

View File

@ -608,7 +608,7 @@ pub fn posixExecve(argv: []const []const u8, env_map: *const BufMap, allocator:
// +1 for the null terminating byte
const path_buf = try allocator.alloc(u8, PATH.len + exe_path.len + 2);
defer allocator.free(path_buf);
var it = mem.split(PATH, ":");
var it = mem.tokenize(PATH, ":");
var seen_eacces = false;
var err: usize = undefined;
while (it.next()) |search_path| {

View File

@ -184,7 +184,7 @@ pub fn windowsParsePath(path: []const u8) WindowsPath {
return relative_path;
}
var it = mem.split(path, []u8{this_sep});
var it = mem.tokenize(path, []u8{this_sep});
_ = (it.next() orelse return relative_path);
_ = (it.next() orelse return relative_path);
return WindowsPath{
@ -202,7 +202,7 @@ pub fn windowsParsePath(path: []const u8) WindowsPath {
return relative_path;
}
var it = mem.split(path, []u8{this_sep});
var it = mem.tokenize(path, []u8{this_sep});
_ = (it.next() orelse return relative_path);
_ = (it.next() orelse return relative_path);
return WindowsPath{
@ -264,8 +264,8 @@ fn networkShareServersEql(ns1: []const u8, ns2: []const u8) bool {
const sep1 = ns1[0];
const sep2 = ns2[0];
var it1 = mem.split(ns1, []u8{sep1});
var it2 = mem.split(ns2, []u8{sep2});
var it1 = mem.tokenize(ns1, []u8{sep1});
var it2 = mem.tokenize(ns2, []u8{sep2});
// TODO ASCII is wrong, we actually need full unicode support to compare paths.
return asciiEqlIgnoreCase(it1.next().?, it2.next().?);
@ -285,8 +285,8 @@ fn compareDiskDesignators(kind: WindowsPath.Kind, p1: []const u8, p2: []const u8
const sep1 = p1[0];
const sep2 = p2[0];
var it1 = mem.split(p1, []u8{sep1});
var it2 = mem.split(p2, []u8{sep2});
var it1 = mem.tokenize(p1, []u8{sep1});
var it2 = mem.tokenize(p2, []u8{sep2});
// TODO ASCII is wrong, we actually need full unicode support to compare paths.
return asciiEqlIgnoreCase(it1.next().?, it2.next().?) and asciiEqlIgnoreCase(it1.next().?, it2.next().?);
@ -337,6 +337,8 @@ pub fn resolveSlice(allocator: *Allocator, paths: []const []const u8) ![]u8 {
/// If all paths are relative it uses the current working directory as a starting point.
/// Each drive has its own current working directory.
/// Path separators are canonicalized to '\\' and drives are canonicalized to capital letters.
/// Note: all usage of this function should be audited due to the existence of symlinks.
/// Without performing actual syscalls, resolving `..` could be incorrect.
pub fn resolveWindows(allocator: *Allocator, paths: []const []const u8) ![]u8 {
if (paths.len == 0) {
assert(is_windows); // resolveWindows called on non windows can't use getCwd
@ -416,7 +418,7 @@ pub fn resolveWindows(allocator: *Allocator, paths: []const []const u8) ![]u8 {
},
WindowsPath.Kind.NetworkShare => {
result = try allocator.alloc(u8, max_size);
var it = mem.split(paths[first_index], "/\\");
var it = mem.tokenize(paths[first_index], "/\\");
const server_name = it.next().?;
const other_name = it.next().?;
@ -483,7 +485,7 @@ pub fn resolveWindows(allocator: *Allocator, paths: []const []const u8) ![]u8 {
if (!correct_disk_designator) {
continue;
}
var it = mem.split(p[parsed.disk_designator.len..], "/\\");
var it = mem.tokenize(p[parsed.disk_designator.len..], "/\\");
while (it.next()) |component| {
if (mem.eql(u8, component, ".")) {
continue;
@ -516,6 +518,8 @@ pub fn resolveWindows(allocator: *Allocator, paths: []const []const u8) ![]u8 {
/// It resolves "." and "..".
/// The result does not have a trailing path separator.
/// If all paths are relative it uses the current working directory as a starting point.
/// Note: all usage of this function should be audited due to the existence of symlinks.
/// Without performing actual syscalls, resolving `..` could be incorrect.
pub fn resolvePosix(allocator: *Allocator, paths: []const []const u8) ![]u8 {
if (paths.len == 0) {
assert(!is_windows); // resolvePosix called on windows can't use getCwd
@ -550,7 +554,7 @@ pub fn resolvePosix(allocator: *Allocator, paths: []const []const u8) ![]u8 {
errdefer allocator.free(result);
for (paths[first_index..]) |p, i| {
var it = mem.split(p, "/");
var it = mem.tokenize(p, "/");
while (it.next()) |component| {
if (mem.eql(u8, component, ".")) {
continue;
@ -937,8 +941,8 @@ pub fn relativeWindows(allocator: *Allocator, from: []const u8, to: []const u8)
return resolved_to;
}
var from_it = mem.split(resolved_from, "/\\");
var to_it = mem.split(resolved_to, "/\\");
var from_it = mem.tokenize(resolved_from, "/\\");
var to_it = mem.tokenize(resolved_to, "/\\");
while (true) {
const from_component = from_it.next() orelse return mem.dupe(allocator, u8, to_it.rest());
const to_rest = to_it.rest();
@ -967,14 +971,12 @@ pub fn relativeWindows(allocator: *Allocator, from: []const u8, to: []const u8)
// shave off the trailing slash
result_index -= 1;
if (to_rest.len > 0) {
var rest_it = mem.split(to_rest, "/\\");
while (rest_it.next()) |to_component| {
result[result_index] = '\\';
result_index += 1;
mem.copy(u8, result[result_index..], to_component);
result_index += to_component.len;
}
var rest_it = mem.tokenize(to_rest, "/\\");
while (rest_it.next()) |to_component| {
result[result_index] = '\\';
result_index += 1;
mem.copy(u8, result[result_index..], to_component);
result_index += to_component.len;
}
return result[0..result_index];
@ -990,8 +992,8 @@ pub fn relativePosix(allocator: *Allocator, from: []const u8, to: []const u8) ![
const resolved_to = try resolvePosix(allocator, [][]const u8{to});
defer allocator.free(resolved_to);
var from_it = mem.split(resolved_from, "/");
var to_it = mem.split(resolved_to, "/");
var from_it = mem.tokenize(resolved_from, "/");
var to_it = mem.tokenize(resolved_to, "/");
while (true) {
const from_component = from_it.next() orelse return mem.dupe(allocator, u8, to_it.rest());
const to_rest = to_it.rest();