second attempt

This commit is contained in:
Josh Wolfe 2024-05-04 07:58:22 -04:00
parent e60cd80580
commit 925e17879b
2 changed files with 84 additions and 38 deletions

View File

@ -32,6 +32,7 @@ const std = @import("std");
const Allocator = std.mem.Allocator;
const ArrayList = std.ArrayList;
const ArrayListUnmanaged = std.ArrayListUnmanaged;
const assert = std.debug.assert;
const BitStack = std.BitStack;
@ -193,15 +194,17 @@ pub const TokenType = enum {
/// At any time, notably just after an error, call `getLine()`, `getColumn()`, and/or `getByteOffset()`
/// to get meaningful information from this.
pub const Diagnostics = struct {
// continually updated by Scanner:
line_number: u64 = 1,
line_start_cursor: usize = @as(usize, @bitCast(@as(isize, -1))), // Start just "before" the input buffer to get a 1-based column for line 1.
line_start_cursor: usize = @bitCast(@as(isize, -1)), // Start just "before" the input buffer to get a 1-based column for line 1.
total_bytes_before_current_input: u64 = 0,
/// While the source is operational, this is a pointer into it.
/// If the source is destroyed, this becomes a literal value.
cursor: union(enum) {
pointer: *const usize,
value: usize,
} = undefined,
// updated by Scanner.saveDiagnostics:
cursor_in_current_input: usize = undefined,
current_input: []const u8 = undefined,
// updated by recordContext().
context_stack: ArrayListUnmanaged([]const u8) = .{},
/// Starts at 1.
pub fn getLine(self: *const @This()) u64 {
@ -209,25 +212,68 @@ pub const Diagnostics = struct {
}
/// Starts at 1.
pub fn getColumn(self: *const @This()) u64 {
return self.getCursor() -% self.line_start_cursor;
return self.cursor_in_current_input -% self.line_start_cursor;
}
/// Starts at 0. Measures the byte offset since the start of the input.
pub fn getByteOffset(self: *const @This()) u64 {
return self.total_bytes_before_current_input + self.getCursor();
return self.total_bytes_before_current_input + self.cursor_in_current_input;
}
fn getCursor(self: *const @This()) usize {
return switch (self.cursor) {
.pointer => |p| p.*,
.value => |v| v,
};
pub fn recordContext(self: *@This(), allocator: Allocator, context: []const u8) Allocator.Error!void {
return self.context_stack.append(allocator, context);
}
fn saveCursor(self: *@This()) void {
const value = self.getCursor();
self.cursor = .{ .value = value };
/// Pretty-print diagnostic information to the given writer, such as `std.io.getStdErr().writer()`.
/// file_name if non-null will be printed in a line with the line and column numbers;
/// it is purely aesthetic and is not touched on any actual file system.
pub fn dump(self: *const @This(), writer: anytype, err: anyerror, file_name: ?[]const u8) !void {
try writer.print("{s}:{}:{}: {s}\n", .{file_name orelse "<json>", self.getLine(), self.getColumn(), @errorName(err)});
// Show a "line" of context, or in case of very long lines, just an excerpt of the line.
// (Very long lines are common in minified JSON such as in an HTTP API or other machine-to-machine contexts.)
var start = self.cursor_in_current_input;
var start_elipsis: []const u8 = "";
while (true) {
if (start == 0 or self.current_input[start - 1] == '\n') break; // found start of line.
if (start + 40 <= self.cursor_in_current_input) {
// Too far into the line. Show part of the line.
start_elipsis = "...";
break;
}
start -= 1;
}
var end = start;
var end_elipsis: []const u8 = "";
while (true) {
if (end + 1 < self.current_input.len and self.current_input[end + 1] == '\n') break; // found end of line.
if (end == self.current_input.len) {
// found end of input.
// TODO: put elipsis when not is_end_of_input.
break;
}
if (end >= start + 70) {
// Line is too long. Show part of it.
end_elipsis = "...";
break;
}
end += 1;
}
try writer.print("{s}{s}{s}\n", .{start_elipsis, self.current_input[start..end], end_elipsis});
try writer.writeByteNTimes(' ', start_elipsis.len + self.cursor_in_current_input - start);
try writer.writeAll("^\n");
for (self.context_stack.items) |item| {
try writer.print(" in {s}\n", .{item});
}
}
};
pub inline fn maybeRecordDiagnosticContext(allocator: Allocator, maybe_diagnostics: ?*Diagnostics, context: []const u8) Allocator.Error!void {
if (maybe_diagnostics) |diag| {
try diag.recordContext(allocator, context);
}
}
/// See the documentation for `std.json.Token`.
pub const AllocWhen = enum { alloc_if_needed, alloc_always };
@ -260,10 +306,6 @@ pub fn Reader(comptime buffer_size: usize, comptime ReaderType: type) type {
pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void {
self.scanner.enableDiagnostics(diagnostics);
}
/// Calls `std.json.Scanner.saveDiagnostics`.
pub fn saveDiagnostics(self: *const @This()) void {
self.scanner.saveDiagnostics();
}
pub const NextError = ReaderType.Error || Error || Allocator.Error;
pub const SkipError = NextError;
@ -466,18 +508,18 @@ pub const Scanner = struct {
self.* = undefined;
}
/// See also `saveDiagnostics()`.
pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void {
diagnostics.cursor = .{ .pointer = &self.cursor };
std.log.warn("cursor(enableDiagnostics): {}", .{diagnostics.getCursor()});
self.diagnostics = diagnostics;
}
/// Call this just before `deinit()` to make the diagnostics available after the `deinit()`.
/// For performance reasons, the diagnostics (see `enableDiagnostics`) are not kept up to date continually.
/// Call this method to update the diagnostics with the latest information.
/// Because diagnostics are usually consulted in case of an error, it is common to call this in an errdefer.
/// It is safe to call this regardless of whether diagnostics have been enabled.
/// This is already called in an errdefer block in every relevant public method of this class.
pub fn saveDiagnostics(self: *const @This()) void {
if (self.diagnostics) |diag| {
std.log.warn("cursor(deinit presave): {}", .{diag.getCursor()});
diag.saveCursor();
std.log.warn("cursor(deinit postsave): {}", .{diag.getCursor()});
diag.cursor_in_current_input = self.cursor;
diag.current_input = self.input;
}
}
@ -520,6 +562,7 @@ pub const Scanner = struct {
/// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token {
assert(self.is_end_of_input); // This function is not available in streaming mode.
errdefer self.saveDiagnostics();
const token_type = self.peekNextTokenType() catch |e| switch (e) {
error.BufferUnderrun => unreachable,
else => |err| return err,
@ -577,6 +620,7 @@ pub const Scanner = struct {
/// This method does not indicate whether the token content being returned is for a `.number` or `.string` token type;
/// the caller of this method is expected to know which type of token is being processed.
pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) AllocIntoArrayListError!?[]const u8 {
errdefer self.saveDiagnostics();
while (true) {
const token = try self.next();
switch (token) {
@ -642,6 +686,7 @@ pub const Scanner = struct {
/// see `peekNextTokenType()`.
pub fn skipValue(self: *@This()) SkipError!void {
assert(self.is_end_of_input); // This function is not available in streaming mode.
errdefer self.saveDiagnostics();
switch (self.peekNextTokenType() catch |e| switch (e) {
error.BufferUnderrun => unreachable,
else => |err| return err,
@ -686,6 +731,7 @@ pub const Scanner = struct {
/// Skip tokens until an `.object_end` or `.array_end` token results in a `stackHeight()` equal the given stack height.
/// Unlike `skipValue()`, this function is available in streaming mode.
pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: usize) NextError!void {
errdefer self.saveDiagnostics();
while (true) {
switch (try self.next()) {
.object_end, .array_end => {
@ -705,11 +751,13 @@ pub const Scanner = struct {
/// Pre allocate memory to hold the given number of nesting levels.
/// `stackHeight()` up to the given number will not cause allocations.
pub fn ensureTotalStackCapacity(self: *@This(), height: usize) Allocator.Error!void {
errdefer self.saveDiagnostics();
try self.stack.ensureTotalCapacity(height);
}
/// See `std.json.Token` for documentation of this function.
pub fn next(self: *@This()) NextError!Token {
errdefer self.saveDiagnostics();
state_loop: while (true) {
switch (self.state) {
.value => {
@ -1463,6 +1511,7 @@ pub const Scanner = struct {
/// determines which type of token will be returned from the next `next*()` call.
/// This function is idempotent, only advancing past commas, colons, and inter-token whitespace.
pub fn peekNextTokenType(self: *@This()) PeekError!TokenType {
errdefer self.saveDiagnostics();
state_loop: while (true) {
switch (self.state) {
.value => {

View File

@ -10,6 +10,7 @@ const AllocWhen = @import("./scanner.zig").AllocWhen;
const Diagnostics = @import("./scanner.zig").Diagnostics;
const default_max_value_len = @import("./scanner.zig").default_max_value_len;
const isNumberFormattedLikeAnInteger = @import("./scanner.zig").isNumberFormattedLikeAnInteger;
const maybeRecordDiagnosticContext = @import("./scanner.zig").maybeRecordDiagnosticContext;
const Value = @import("./dynamic.zig").Value;
const Array = @import("./dynamic.zig").Array;
@ -144,11 +145,6 @@ pub fn parseFromTokenSourceLeaky(
if (resolved_options.diagnostics) |diag| {
scanner_or_reader.enableDiagnostics(diag);
}
defer {
if (resolved_options.diagnostics) |_| {
scanner_or_reader.saveDiagnostics();
}
}
const value = try innerParse(T, allocator, scanner_or_reader, resolved_options);
@ -222,6 +218,8 @@ pub fn innerParse(
source: anytype,
options: ParseOptions,
) ParseError(@TypeOf(source.*))!T {
errdefer source.saveDiagnostics();
errdefer maybeRecordDiagnosticContext(allocator, options.diagnostics, @typeName(T)) catch {};
switch (@typeInfo(T)) {
.Bool => {
return switch (try source.next()) {
@ -299,7 +297,7 @@ pub fn innerParse(
if (u_field.type == void) {
// void isn't really a json type, but we can support void payload union tags with {} as a value.
if (.object_begin != try source.next()) return error.UnexpectedToken;
if (.object_end != try source.next()) return error.UnexpectedToken;
if (.object_end != try source.next()) return error.UnknownField;
result = @unionInit(T, u_field.name, {});
} else {
// Recurse.
@ -347,9 +345,7 @@ pub fn innerParse(
.object_end => { // No more fields.
break;
},
else => {
return error.UnexpectedToken;
},
else => unreachable, // Not possible while in an object.
};
inline for (structInfo.fields, 0..) |field, i| {
@ -358,6 +354,7 @@ pub fn innerParse(
// Free the name token now in case we're using an allocator that optimizes freeing the last allocated object.
// (Recursing into innerParse() might trigger more allocations.)
freeAllocated(allocator, name_token.?);
errdefer maybeRecordDiagnosticContext(allocator, options.diagnostics, @typeName(T) ++ "." ++ field.name) catch {};
name_token = null;
if (fields_seen[i]) {
switch (options.duplicate_field_behavior) {
@ -624,7 +621,7 @@ pub fn innerParseFromValue(
if (u_field.type == void) {
// void isn't really a json type, but we can support void payload union tags with {} as a value.
if (kv.value_ptr.* != .object) return error.UnexpectedToken;
if (kv.value_ptr.*.object.count() != 0) return error.UnexpectedToken;
if (kv.value_ptr.*.object.count() != 0) return error.UnknownField;
return @unionInit(T, u_field.name, {});
}
// Recurse.