Merge pull request #17771 from ehaas/mingw-aro

mingw: Use aro instead of clang for preprocessing import libs
This commit is contained in:
Andrew Kelley 2023-11-07 14:47:05 -05:00 committed by GitHub
commit 77bc8e7b67
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
31 changed files with 19241 additions and 905 deletions

View File

@ -645,6 +645,8 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/src/value.zig"
"${CMAKE_SOURCE_DIR}/src/wasi_libc.zig"
"${CMAKE_SOURCE_DIR}/src/windows_sdk.zig"
"${CMAKE_SOURCE_DIR}/src/stubs/aro_builtins.zig"
"${CMAKE_SOURCE_DIR}/src/stubs/aro_names.zig"
)
if(MSVC)
@ -815,7 +817,9 @@ set(BUILD_ZIG2_ARGS
-OReleaseSmall
--name zig2 -femit-bin="${ZIG2_C_SOURCE}"
--mod "build_options::${ZIG_CONFIG_ZIG_OUT}"
--mod "aro::deps/aro/lib.zig"
--mod "Builtins/Builtin.def::src/stubs/aro_builtins.zig"
--mod "Attribute/names.def::src/stubs/aro_names.zig"
--mod "aro:Builtins/Builtin.def,Attribute/names.def:deps/aro/lib.zig"
--deps build_options,aro
-target "${ZIG_HOST_TARGET_TRIPLE}"
)

View File

@ -8,6 +8,7 @@ const io = std.io;
const fs = std.fs;
const InstallDirectoryOptions = std.Build.InstallDirectoryOptions;
const assert = std.debug.assert;
const GenerateDef = @import("deps/aro/build/GenerateDef.zig");
const zig_version = std.SemanticVersion{ .major = 0, .minor = 12, .patch = 0 };
const stack_size = 32 * 1024 * 1024;
@ -589,9 +590,13 @@ fn addCompilerStep(
.max_rss = 7_000_000_000,
});
exe.stack_size = stack_size;
exe.addAnonymousModule("aro", .{
const aro_module = b.createModule(.{
.source_file = .{ .path = "deps/aro/lib.zig" },
});
GenerateDef.add(b, "deps/aro/Builtins/Builtin.def", "Builtins/Builtin.def", exe, aro_module);
GenerateDef.add(b, "deps/aro/Attribute/names.def", "Attribute/names.def", exe, aro_module);
exe.addModule("aro", aro_module);
return exe;
}

738
deps/aro/Attribute.zig vendored
View File

@ -60,7 +60,7 @@ pub const ArgumentType = enum {
fn fromType(comptime T: type) ArgumentType {
return switch (T) {
[]const u8 => .string,
Value.ByteRange => .string,
Identifier => .identifier,
u32 => .int,
Alignment => .alignment,
@ -83,17 +83,13 @@ pub const ArgumentType = enum {
}
};
fn getArguments(comptime descriptor: type) []const ZigType.StructField {
return if (@hasDecl(descriptor, "Args")) std.meta.fields(descriptor.Args) else &.{};
}
/// number of required arguments
pub fn requiredArgCount(attr: Tag) u32 {
switch (attr) {
inline else => |tag| {
comptime var needed = 0;
comptime {
const fields = getArguments(@field(attributes, @tagName(tag)));
const fields = std.meta.fields(@field(attributes, @tagName(tag)));
for (fields) |arg_field| {
if (!mem.eql(u8, arg_field.name, "__name_tok") and @typeInfo(arg_field.type) != .Optional) needed += 1;
}
@ -109,7 +105,7 @@ pub fn maxArgCount(attr: Tag) u32 {
inline else => |tag| {
comptime var max = 0;
comptime {
const fields = getArguments(@field(attributes, @tagName(tag)));
const fields = std.meta.fields(@field(attributes, @tagName(tag)));
for (fields) |arg_field| {
if (!mem.eql(u8, arg_field.name, "__name_tok")) max += 1;
}
@ -128,13 +124,13 @@ fn UnwrapOptional(comptime T: type) type {
pub const Formatting = struct {
/// The quote char (single or double) to use when printing identifiers/strings corresponding
/// to the enum in the first field of the Args of `attr`. Identifier enums use single quotes, string enums
/// to the enum in the first field of the `attr`. Identifier enums use single quotes, string enums
/// use double quotes
fn quoteChar(attr: Tag) []const u8 {
switch (attr) {
.calling_convention => unreachable,
inline else => |tag| {
const fields = getArguments(@field(attributes, @tagName(tag)));
const fields = std.meta.fields(@field(attributes, @tagName(tag)));
if (fields.len == 0) unreachable;
const Unwrapped = UnwrapOptional(fields[0].type);
@ -146,12 +142,12 @@ pub const Formatting = struct {
}
/// returns a comma-separated string of quoted enum values, representing the valid
/// choices for the string or identifier enum of the first field of the Args of `attr`.
/// choices for the string or identifier enum of the first field of the `attr`.
pub fn choices(attr: Tag) []const u8 {
switch (attr) {
.calling_convention => unreachable,
inline else => |tag| {
const fields = getArguments(@field(attributes, @tagName(tag)));
const fields = std.meta.fields(@field(attributes, @tagName(tag)));
if (fields.len == 0) unreachable;
const Unwrapped = UnwrapOptional(fields[0].type);
@ -176,7 +172,7 @@ pub fn wantsIdentEnum(attr: Tag) bool {
switch (attr) {
.calling_convention => return false,
inline else => |tag| {
const fields = getArguments(@field(attributes, @tagName(tag)));
const fields = std.meta.fields(@field(attributes, @tagName(tag)));
if (fields.len == 0) return false;
const Unwrapped = UnwrapOptional(fields[0].type);
@ -190,7 +186,7 @@ pub fn wantsIdentEnum(attr: Tag) bool {
pub fn diagnoseIdent(attr: Tag, arguments: *Arguments, ident: []const u8) ?Diagnostics.Message {
switch (attr) {
inline else => |tag| {
const fields = getArguments(@field(attributes, @tagName(tag)));
const fields = std.meta.fields(@field(attributes, @tagName(tag)));
if (fields.len == 0) unreachable;
const Unwrapped = UnwrapOptional(fields[0].type);
if (@typeInfo(Unwrapped) != .Enum) unreachable;
@ -209,7 +205,7 @@ pub fn diagnoseIdent(attr: Tag, arguments: *Arguments, ident: []const u8) ?Diagn
pub fn wantsAlignment(attr: Tag, idx: usize) bool {
switch (attr) {
inline else => |tag| {
const fields = getArguments(@field(attributes, @tagName(tag)));
const fields = std.meta.fields(@field(attributes, @tagName(tag)));
if (fields.len == 0) return false;
return switch (idx) {
@ -223,7 +219,7 @@ pub fn wantsAlignment(attr: Tag, idx: usize) bool {
pub fn diagnoseAlignment(attr: Tag, arguments: *Arguments, arg_idx: u32, val: Value, ty: Type, comp: *Compilation) ?Diagnostics.Message {
switch (attr) {
inline else => |tag| {
const arg_fields = getArguments(@field(attributes, @tagName(tag)));
const arg_fields = std.meta.fields(@field(attributes, @tagName(tag)));
if (arg_fields.len == 0) unreachable;
switch (arg_idx) {
@ -267,10 +263,17 @@ fn diagnoseField(
.bytes => {
const bytes = val.data.bytes.trim(1); // remove null terminator
if (wanted == Value.ByteRange) {
std.debug.assert(node.tag == .string_literal_expr);
if (!node.ty.elemType().is(.char) and !node.ty.elemType().is(.uchar)) {
return Diagnostics.Message{
.tag = .attribute_requires_string,
.extra = .{ .str = decl.name },
};
}
@field(@field(arguments, decl.name), field.name) = bytes;
return null;
} else if (@typeInfo(wanted) == .Enum and @hasDecl(wanted, "opts") and wanted.opts.enum_kind == .string) {
const str = bytes.slice(strings);
const str = bytes.slice(strings, .@"1");
if (std.meta.stringToEnum(wanted, str)) |enum_val| {
@field(@field(arguments, decl.name), field.name) = enum_val;
return null;
@ -305,7 +308,7 @@ pub fn diagnose(attr: Tag, arguments: *Arguments, arg_idx: u32, val: Value, node
.tag = .attribute_too_many_args,
.extra = .{ .attr_arg_count = .{ .attribute = attr, .expected = max_arg_count } },
};
const arg_fields = getArguments(@field(attributes, decl.name));
const arg_fields = std.meta.fields(@field(attributes, decl.name));
switch (arg_idx) {
inline 0...arg_fields.len - 1 => |arg_i| {
return diagnoseField(decl, arg_fields[arg_i], UnwrapOptional(arg_fields[arg_i].type), arguments, val, node, strings);
@ -330,212 +333,107 @@ pub const Identifier = struct {
const attributes = struct {
pub const access = struct {
const gnu = "access";
access_mode: enum {
read_only,
read_write,
write_only,
none,
const Args = struct {
access_mode: enum {
read_only,
read_write,
write_only,
none,
const opts = struct {
const enum_kind = .identifier;
};
},
ref_index: u32,
size_index: ?u32 = null,
};
const opts = struct {
const enum_kind = .identifier;
};
},
ref_index: u32,
size_index: ?u32 = null,
};
pub const alias = struct {
const gnu = "alias";
const Args = struct {
alias: Value.ByteRange,
};
alias: Value.ByteRange,
};
pub const aligned = struct {
const gnu = "aligned";
const declspec = "align";
const Args = struct {
alignment: ?Alignment = null,
__name_tok: TokenIndex,
};
alignment: ?Alignment = null,
__name_tok: TokenIndex,
};
pub const alloc_align = struct {
const gnu = "alloc_align";
const Args = struct {
position: u32,
};
position: u32,
};
pub const alloc_size = struct {
const gnu = "alloc_size";
const Args = struct {
position_1: u32,
position_2: ?u32 = null,
};
position_1: u32,
position_2: ?u32 = null,
};
pub const allocate = struct {
const declspec = "allocate";
const Args = struct {
segname: Value.ByteRange,
};
};
pub const allocator = struct {
const declspec = "allocator";
};
pub const always_inline = struct {
const gnu = "always_inline";
};
pub const appdomain = struct {
const declspec = "appdomain";
};
pub const artificial = struct {
const gnu = "artificial";
segname: Value.ByteRange,
};
pub const allocator = struct {};
pub const always_inline = struct {};
pub const appdomain = struct {};
pub const artificial = struct {};
pub const assume_aligned = struct {
const gnu = "assume_aligned";
const Args = struct {
alignment: Alignment,
offset: ?u32 = null,
};
alignment: Alignment,
offset: ?u32 = null,
};
pub const cleanup = struct {
const gnu = "cleanup";
const Args = struct {
function: Identifier,
};
function: Identifier,
};
pub const code_seg = struct {
const declspec = "code_seg";
const Args = struct {
segname: Value.ByteRange,
};
};
pub const cold = struct {
const gnu = "cold";
};
pub const common = struct {
const gnu = "common";
};
pub const @"const" = struct {
const gnu = "const";
segname: Value.ByteRange,
};
pub const cold = struct {};
pub const common = struct {};
pub const @"const" = struct {};
pub const constructor = struct {
const gnu = "constructor";
const Args = struct {
priority: ?u32 = null,
};
priority: ?u32 = null,
};
pub const copy = struct {
const gnu = "copy";
const Args = struct {
function: Identifier,
};
function: Identifier,
};
pub const deprecated = struct {
const gnu = "deprecated";
const declspec = "deprecated";
const c2x = "deprecated";
const Args = struct {
msg: ?Value.ByteRange = null,
__name_tok: TokenIndex,
};
};
pub const designated_init = struct {
const gnu = "designated_init";
msg: ?Value.ByteRange = null,
__name_tok: TokenIndex,
};
pub const designated_init = struct {};
pub const destructor = struct {
const gnu = "destructor";
const Args = struct {
priority: ?u32 = null,
};
};
pub const dllexport = struct {
const declspec = "dllexport";
};
pub const dllimport = struct {
const declspec = "dllimport";
priority: ?u32 = null,
};
pub const dllexport = struct {};
pub const dllimport = struct {};
pub const @"error" = struct {
const gnu = "error";
const Args = struct {
msg: Value.ByteRange,
__name_tok: TokenIndex,
};
};
pub const externally_visible = struct {
const gnu = "externally_visible";
};
pub const fallthrough = struct {
const gnu = "fallthrough";
const c2x = "fallthrough";
};
pub const flatten = struct {
const gnu = "flatten";
msg: Value.ByteRange,
__name_tok: TokenIndex,
};
pub const externally_visible = struct {};
pub const fallthrough = struct {};
pub const flatten = struct {};
pub const format = struct {
const gnu = "format";
const Args = struct {
archetype: enum {
printf,
scanf,
strftime,
strfmon,
archetype: enum {
printf,
scanf,
strftime,
strfmon,
const opts = struct {
const enum_kind = .identifier;
};
},
string_index: u32,
first_to_check: u32,
};
const opts = struct {
const enum_kind = .identifier;
};
},
string_index: u32,
first_to_check: u32,
};
pub const format_arg = struct {
const gnu = "format_arg";
const Args = struct {
string_index: u32,
};
};
pub const gnu_inline = struct {
const gnu = "gnu_inline";
};
pub const hot = struct {
const gnu = "hot";
string_index: u32,
};
pub const gnu_inline = struct {};
pub const hot = struct {};
pub const ifunc = struct {
const gnu = "ifunc";
const Args = struct {
resolver: Value.ByteRange,
};
};
pub const interrupt = struct {
const gnu = "interrupt";
};
pub const interrupt_handler = struct {
const gnu = "interrupt_handler";
};
pub const jitintrinsic = struct {
const declspec = "jitintrinsic";
};
pub const leaf = struct {
const gnu = "leaf";
};
pub const malloc = struct {
const gnu = "malloc";
};
pub const may_alias = struct {
const gnu = "may_alias";
resolver: Value.ByteRange,
};
pub const interrupt = struct {};
pub const interrupt_handler = struct {};
pub const jitintrinsic = struct {};
pub const leaf = struct {};
pub const malloc = struct {};
pub const may_alias = struct {};
pub const mode = struct {
const gnu = "mode";
const Args = struct {
mode: enum {
// zig fmt: off
mode: enum {
// zig fmt: off
byte, word, pointer,
BI, QI, HI,
PSI, SI, PDI,
@ -558,336 +456,184 @@ const attributes = struct {
BND32, BND64,
// zig fmt: on
const opts = struct {
const enum_kind = .identifier;
};
},
};
};
pub const naked = struct {
const declspec = "naked";
};
pub const no_address_safety_analysis = struct {
const gnu = "no_address_safety_analysise";
};
pub const no_icf = struct {
const gnu = "no_icf";
};
pub const no_instrument_function = struct {
const gnu = "no_instrument_function";
};
pub const no_profile_instrument_function = struct {
const gnu = "no_profile_instrument_function";
};
pub const no_reorder = struct {
const gnu = "no_reorder";
const opts = struct {
const enum_kind = .identifier;
};
},
};
pub const naked = struct {};
pub const no_address_safety_analysis = struct {};
pub const no_icf = struct {};
pub const no_instrument_function = struct {};
pub const no_profile_instrument_function = struct {};
pub const no_reorder = struct {};
pub const no_sanitize = struct {
const gnu = "no_sanitize";
/// Todo: represent args as union?
const Args = struct {
alignment: Value.ByteRange,
object_size: ?Value.ByteRange = null,
};
};
pub const no_sanitize_address = struct {
const gnu = "no_sanitize_address";
const declspec = "no_sanitize_address";
};
pub const no_sanitize_coverage = struct {
const gnu = "no_sanitize_coverage";
};
pub const no_sanitize_thread = struct {
const gnu = "no_sanitize_thread";
};
pub const no_sanitize_undefined = struct {
const gnu = "no_sanitize_undefined";
};
pub const no_split_stack = struct {
const gnu = "no_split_stack";
};
pub const no_stack_limit = struct {
const gnu = "no_stack_limit";
};
pub const no_stack_protector = struct {
const gnu = "no_stack_protector";
};
pub const @"noalias" = struct {
const declspec = "noalias";
};
pub const noclone = struct {
const gnu = "noclone";
};
pub const nocommon = struct {
const gnu = "nocommon";
};
pub const nodiscard = struct {
const c2x = "nodiscard";
};
pub const noinit = struct {
const gnu = "noinit";
};
pub const @"noinline" = struct {
const gnu = "noinline";
const declspec = "noinline";
};
pub const noipa = struct {
const gnu = "noipa";
alignment: Value.ByteRange,
object_size: ?Value.ByteRange = null,
};
pub const no_sanitize_address = struct {};
pub const no_sanitize_coverage = struct {};
pub const no_sanitize_thread = struct {};
pub const no_sanitize_undefined = struct {};
pub const no_split_stack = struct {};
pub const no_stack_limit = struct {};
pub const no_stack_protector = struct {};
pub const @"noalias" = struct {};
pub const noclone = struct {};
pub const nocommon = struct {};
pub const nodiscard = struct {};
pub const noinit = struct {};
pub const @"noinline" = struct {};
pub const noipa = struct {};
// TODO: arbitrary number of arguments
// const nonnull = struct {
// const gnu = "nonnull";
// const Args = struct {
// arg_index: []const u32,
// // arg_index: []const u32,
// };
// };
pub const nonstring = struct {
const gnu = "nonstring";
};
pub const noplt = struct {
const gnu = "noplt";
};
pub const @"noreturn" = struct {
const gnu = "noreturn";
const c2x = "noreturn";
const declspec = "noreturn";
};
pub const nonstring = struct {};
pub const noplt = struct {};
pub const @"noreturn" = struct {};
// TODO: union args ?
// const optimize = struct {
// const gnu = "optimize";
// const Args = struct {
// optimize, // u32 | []const u8 -- optimize?
// // optimize, // u32 | []const u8 -- optimize?
// };
// };
pub const @"packed" = struct {
const gnu = "packed";
};
pub const patchable_function_entry = struct {
const gnu = "patchable_function_entry";
};
pub const persistent = struct {
const gnu = "persistent";
};
pub const process = struct {
const declspec = "process";
};
pub const pure = struct {
const gnu = "pure";
};
pub const reproducible = struct {
const c2x = "reproducible";
};
pub const restrict = struct {
const declspec = "restrict";
};
pub const retain = struct {
const gnu = "retain";
};
pub const returns_nonnull = struct {
const gnu = "returns_nonnull";
};
pub const returns_twice = struct {
const gnu = "returns_twice";
};
pub const safebuffers = struct {
const declspec = "safebuffers";
};
pub const @"packed" = struct {};
pub const patchable_function_entry = struct {};
pub const persistent = struct {};
pub const process = struct {};
pub const pure = struct {};
pub const reproducible = struct {};
pub const restrict = struct {};
pub const retain = struct {};
pub const returns_nonnull = struct {};
pub const returns_twice = struct {};
pub const safebuffers = struct {};
pub const scalar_storage_order = struct {
const gnu = "scalar_storage_order";
const Args = struct {
order: enum {
@"little-endian",
@"big-endian",
order: enum {
@"little-endian",
@"big-endian",
const opts = struct {
const enum_kind = .string;
};
},
};
const opts = struct {
const enum_kind = .string;
};
},
};
pub const section = struct {
const gnu = "section";
const Args = struct {
name: Value.ByteRange,
};
};
pub const selectany = struct {
const declspec = "selectany";
name: Value.ByteRange,
};
pub const selectany = struct {};
pub const sentinel = struct {
const gnu = "sentinel";
const Args = struct {
position: ?u32 = null,
};
position: ?u32 = null,
};
pub const simd = struct {
const gnu = "simd";
const Args = struct {
mask: ?enum {
notinbranch,
inbranch,
mask: ?enum {
notinbranch,
inbranch,
const opts = struct {
const enum_kind = .string;
};
} = null,
};
const opts = struct {
const enum_kind = .string;
};
} = null,
};
pub const spectre = struct {
const declspec = "spectre";
const Args = struct {
arg: enum {
nomitigation,
arg: enum {
nomitigation,
const opts = struct {
const enum_kind = .identifier;
};
},
};
};
pub const stack_protect = struct {
const gnu = "stack_protect";
const opts = struct {
const enum_kind = .identifier;
};
},
};
pub const stack_protect = struct {};
pub const symver = struct {
const gnu = "symver";
const Args = struct {
version: Value.ByteRange, // TODO: validate format "name2@nodename"
};
version: Value.ByteRange, // TODO: validate format "name2@nodename"
};
pub const target = struct {
const gnu = "target";
const Args = struct {
options: Value.ByteRange, // TODO: multiple arguments
};
options: Value.ByteRange, // TODO: multiple arguments
};
pub const target_clones = struct {
const gnu = "target_clones";
const Args = struct {
options: Value.ByteRange, // TODO: multiple arguments
};
};
pub const thread = struct {
const declspec = "thread";
};
pub const tls_model = struct {
const gnu = "tls_model";
const Args = struct {
model: enum {
@"global-dynamic",
@"local-dynamic",
@"initial-exec",
@"local-exec",
options: Value.ByteRange, // TODO: multiple arguments
const opts = struct {
const enum_kind = .string;
};
},
};
};
pub const transparent_union = struct {
const gnu = "transparent_union";
pub const thread = struct {};
pub const tls_model = struct {
model: enum {
@"global-dynamic",
@"local-dynamic",
@"initial-exec",
@"local-exec",
const opts = struct {
const enum_kind = .string;
};
},
};
pub const transparent_union = struct {};
pub const unavailable = struct {
const gnu = "unavailable";
const Args = struct {
msg: ?Value.ByteRange = null,
__name_tok: TokenIndex,
};
};
pub const uninitialized = struct {
const gnu = "uninitialized";
};
pub const unsequenced = struct {
const c2x = "unsequenced";
};
pub const unused = struct {
const gnu = "unused";
const c2x = "maybe_unused";
};
pub const used = struct {
const gnu = "used";
msg: ?Value.ByteRange = null,
__name_tok: TokenIndex,
};
pub const uninitialized = struct {};
pub const unsequenced = struct {};
pub const unused = struct {};
pub const used = struct {};
pub const uuid = struct {
const declspec = "uuid";
const Args = struct {
uuid: Value.ByteRange,
};
uuid: Value.ByteRange,
};
pub const vector_size = struct {
const gnu = "vector_size";
const Args = struct {
bytes: u32, // TODO: validate "The bytes argument must be a positive power-of-two multiple of the base type size"
};
bytes: u32, // TODO: validate "The bytes argument must be a positive power-of-two multiple of the base type size"
};
pub const visibility = struct {
const gnu = "visibility";
const Args = struct {
visibility_type: enum {
default,
hidden,
internal,
protected,
visibility_type: enum {
default,
hidden,
internal,
protected,
const opts = struct {
const enum_kind = .string;
};
},
};
const opts = struct {
const enum_kind = .string;
};
},
};
pub const warn_if_not_aligned = struct {
const gnu = "warn_if_not_aligned";
const Args = struct {
alignment: Alignment,
};
};
pub const warn_unused_result = struct {
const gnu = "warn_unused_result";
alignment: Alignment,
};
pub const warn_unused_result = struct {};
pub const warning = struct {
const gnu = "warning";
const Args = struct {
msg: Value.ByteRange,
__name_tok: TokenIndex,
};
};
pub const weak = struct {
const gnu = "weak";
msg: Value.ByteRange,
__name_tok: TokenIndex,
};
pub const weak = struct {};
pub const weakref = struct {
const gnu = "weakref";
const Args = struct {
target: ?Value.ByteRange = null,
};
target: ?Value.ByteRange = null,
};
pub const zero_call_used_regs = struct {
const gnu = "zero_call_used_regs";
const Args = struct {
choice: enum {
skip,
used,
@"used-gpr",
@"used-arg",
@"used-gpr-arg",
all,
@"all-gpr",
@"all-arg",
@"all-gpr-arg",
choice: enum {
skip,
used,
@"used-gpr",
@"used-arg",
@"used-gpr-arg",
all,
@"all-gpr",
@"all-arg",
@"all-gpr-arg",
const opts = struct {
const enum_kind = .string;
};
},
};
const opts = struct {
const enum_kind = .string;
};
},
};
pub const asm_label = struct {
const Args = struct {
name: Value.ByteRange,
};
name: Value.ByteRange,
};
pub const calling_convention = struct {
const Args = struct {
cc: CallingConvention,
};
cc: CallingConvention,
};
};
@ -899,7 +645,7 @@ pub const Arguments = blk: {
inline for (decls, &union_fields) |decl, *field| {
field.* = .{
.name = decl.name,
.type = if (@hasDecl(@field(attributes, decl.name), "Args")) @field(attributes, decl.name).Args else void,
.type = @field(attributes, decl.name),
.alignment = 0,
};
}
@ -916,17 +662,16 @@ pub const Arguments = blk: {
pub fn ArgumentsForTag(comptime tag: Tag) type {
const decl = @typeInfo(attributes).Struct.decls[@intFromEnum(tag)];
return if (@hasDecl(@field(attributes, decl.name), "Args")) @field(attributes, decl.name).Args else void;
return @field(attributes, decl.name);
}
pub fn initArguments(tag: Tag, name_tok: TokenIndex) Arguments {
switch (tag) {
inline else => |arg_tag| {
const union_element = @field(attributes, @tagName(arg_tag));
const has_args = @hasDecl(union_element, "Args");
const init = if (has_args) std.mem.zeroInit(union_element.Args, .{}) else {};
const init = std.mem.zeroInit(union_element, .{});
var args = @unionInit(Arguments, @tagName(arg_tag), init);
if (has_args and @hasField(@field(attributes, @tagName(arg_tag)).Args, "__name_tok")) {
if (@hasField(@field(attributes, @tagName(arg_tag)), "__name_tok")) {
@field(args, @tagName(arg_tag)).__name_tok = name_tok;
}
return args;
@ -935,56 +680,29 @@ pub fn initArguments(tag: Tag, name_tok: TokenIndex) Arguments {
}
pub fn fromString(kind: Kind, namespace: ?[]const u8, name: []const u8) ?Tag {
return switch (kind) {
.c2x => fromStringC2X(namespace, name),
.declspec => fromStringDeclspec(name),
.gnu => fromStringGnu(name),
const Properties = struct {
tag: Tag,
gnu: bool = false,
declspec: bool = false,
c2x: bool = false,
};
}
const attribute_names = @import("Attribute/names.def").with(Properties);
fn fromStringGnu(name: []const u8) ?Tag {
const normalized = normalize(name);
const decls = @typeInfo(attributes).Struct.decls;
@setEvalBranchQuota(3000);
inline for (decls, 0..) |decl, i| {
if (@hasDecl(@field(attributes, decl.name), "gnu")) {
if (mem.eql(u8, @field(attributes, decl.name).gnu, normalized)) {
return @enumFromInt(i);
}
}
}
return null;
}
fn fromStringC2X(namespace: ?[]const u8, name: []const u8) ?Tag {
const normalized = normalize(name);
if (namespace) |ns| {
const actual_kind: Kind = if (namespace) |ns| blk: {
const normalized_ns = normalize(ns);
if (mem.eql(u8, normalized_ns, "gnu")) {
return fromStringGnu(normalized);
break :blk .gnu;
}
return null;
}
const decls = @typeInfo(attributes).Struct.decls;
inline for (decls, 0..) |decl, i| {
if (@hasDecl(@field(attributes, decl.name), "c2x")) {
if (mem.eql(u8, @field(attributes, decl.name).c2x, normalized)) {
return @enumFromInt(i);
}
}
}
return null;
}
} else kind;
fn fromStringDeclspec(name: []const u8) ?Tag {
const normalized = normalize(name);
const decls = @typeInfo(attributes).Struct.decls;
inline for (decls, 0..) |decl, i| {
if (@hasDecl(@field(attributes, decl.name), "declspec")) {
if (mem.eql(u8, @field(attributes, decl.name).declspec, normalized)) {
return @enumFromInt(i);
}
}
const tag_and_opts = attribute_names.fromName(normalized) orelse return null;
switch (actual_kind) {
inline else => |tag| {
if (@field(tag_and_opts.properties, @tagName(tag)))
return tag_and_opts.properties.tag;
},
}
return null;
}

431
deps/aro/Attribute/names.def vendored Normal file
View File

@ -0,0 +1,431 @@
# multiple
deprecated
.tag = .deprecated
.c2x = true
.gnu = true
.declspec = true
fallthrough
.tag = .fallthrough
.c2x = true
.gnu = true
noreturn
.tag = .@"noreturn"
.c2x = true
.gnu = true
.declspec = true
no_sanitize_address
.tag = .no_sanitize_address
.gnu = true
.declspec = true
noinline
.tag = .@"noinline"
.gnu = true
.declspec = true
# c2x only
nodiscard
.tag = .nodiscard
.c2x = true
reproducible
.tag = .reproducible
.c2x = true
unsequenced
.tag = .unsequenced
.c2x = true
maybe_unused
.tag = .unused
.c2x = true
# gnu only
access
.tag = .access
.gnu = true
alias
.tag = .alias
.gnu = true
aligned
.tag = .aligned
.gnu = true
alloc_align
.tag = .alloc_align
.gnu = true
alloc_size
.tag = .alloc_size
.gnu = true
always_inline
.tag = .always_inline
.gnu = true
artificial
.tag = .artificial
.gnu = true
assume_aligned
.tag = .assume_aligned
.gnu = true
cleanup
.tag = .cleanup
.gnu = true
cold
.tag = .cold
.gnu = true
common
.tag = .common
.gnu = true
const
.tag = .@"const"
.gnu = true
constructor
.tag = .constructor
.gnu = true
copy
.tag = .copy
.gnu = true
designated_init
.tag = .designated_init
.gnu = true
destructor
.tag = .destructor
.gnu = true
error
.tag = .@"error"
.gnu = true
externally_visible
.tag = .externally_visible
.gnu = true
flatten
.tag = .flatten
.gnu = true
format
.tag = .format
.gnu = true
format_arg
.tag = .format_arg
.gnu = true
gnu_inline
.tag = .gnu_inline
.gnu = true
hot
.tag = .hot
.gnu = true
ifunc
.tag = .ifunc
.gnu = true
interrupt
.tag = .interrupt
.gnu = true
interrupt_handler
.tag = .interrupt_handler
.gnu = true
leaf
.tag = .leaf
.gnu = true
malloc
.tag = .malloc
.gnu = true
may_alias
.tag = .may_alias
.gnu = true
mode
.tag = .mode
.gnu = true
no_address_safety_analysis
.tag = .no_address_safety_analysis
.gnu = true
no_icf
.tag = .no_icf
.gnu = true
no_instrument_function
.tag = .no_instrument_function
.gnu = true
no_profile_instrument_function
.tag = .no_profile_instrument_function
.gnu = true
no_reorder
.tag = .no_reorder
.gnu = true
no_sanitize
.tag = .no_sanitize
.gnu = true
no_sanitize_coverage
.tag = .no_sanitize_coverage
.gnu = true
no_sanitize_thread
.tag = .no_sanitize_thread
.gnu = true
no_sanitize_undefined
.tag = .no_sanitize_undefined
.gnu = true
no_split_stack
.tag = .no_split_stack
.gnu = true
no_stack_limit
.tag = .no_stack_limit
.gnu = true
no_stack_protector
.tag = .no_stack_protector
.gnu = true
noclone
.tag = .noclone
.gnu = true
nocommon
.tag = .nocommon
.gnu = true
noinit
.tag = .noinit
.gnu = true
noipa
.tag = .noipa
.gnu = true
# nonnull
# .tag = .nonnull
# .gnu = true
nonstring
.tag = .nonstring
.gnu = true
noplt
.tag = .noplt
.gnu = true
# optimize
# .tag = .optimize
# .gnu = true
packed
.tag = .@"packed"
.gnu = true
patchable_function_entry
.tag = .patchable_function_entry
.gnu = true
persistent
.tag = .persistent
.gnu = true
pure
.tag = .pure
.gnu = true
retain
.tag = .retain
.gnu = true
returns_nonnull
.tag = .returns_nonnull
.gnu = true
returns_twice
.tag = .returns_twice
.gnu = true
scalar_storage_order
.tag = .scalar_storage_order
.gnu = true
section
.tag = .section
.gnu = true
sentinel
.tag = .sentinel
.gnu = true
simd
.tag = .simd
.gnu = true
stack_protect
.tag = .stack_protect
.gnu = true
symver
.tag = .symver
.gnu = true
target
.tag = .target
.gnu = true
target_clones
.tag = .target_clones
.gnu = true
tls_model
.tag = .tls_model
.gnu = true
transparent_union
.tag = .transparent_union
.gnu = true
unavailable
.tag = .unavailable
.gnu = true
uninitialized
.tag = .uninitialized
.gnu = true
unused
.tag = .unused
.gnu = true
used
.tag = .used
.gnu = true
vector_size
.tag = .vector_size
.gnu = true
visibility
.tag = .visibility
.gnu = true
warn_if_not_aligned
.tag = .warn_if_not_aligned
.gnu = true
warn_unused_result
.tag = .warn_unused_result
.gnu = true
warning
.tag = .warning
.gnu = true
weak
.tag = .weak
.gnu = true
weakref
.tag = .weakref
.gnu = true
zero_call_used_regs
.tag = .zero_call_used_regs
.gnu = true
# declspec only
align
.tag = .aligned
.declspec = true
allocate
.tag = .allocate
.declspec = true
allocator
.tag = .allocator
.declspec = true
appdomain
.tag = .appdomain
.declspec = true
code_seg
.tag = .code_seg
.declspec = true
dllexport
.tag = .dllexport
.declspec = true
dllimport
.tag = .dllimport
.declspec = true
jitintrinsic
.tag = .jitintrinsic
.declspec = true
naked
.tag = .naked
.declspec = true
noalias
.tag = .@"noalias"
.declspec = true
process
.tag = .process
.declspec = true
restrict
.tag = .restrict
.declspec = true
safebuffers
.tag = .safebuffers
.declspec = true
selectany
.tag = .selectany
.declspec = true
spectre
.tag = .spectre
.declspec = true
thread
.tag = .thread
.declspec = true
uuid
.tag = .uuid
.declspec = true

75
deps/aro/Builtins.zig vendored
View File

@ -1,18 +1,20 @@
const std = @import("std");
const Compilation = @import("Compilation.zig");
const Type = @import("Type.zig");
const BuiltinFunction = @import("builtins/BuiltinFunction.zig");
const TypeDescription = @import("builtins/TypeDescription.zig");
const TypeDescription = @import("Builtins/TypeDescription.zig");
const target_util = @import("target.zig");
const StringId = @import("StringInterner.zig").StringId;
const LangOpts = @import("LangOpts.zig");
const Parser = @import("Parser.zig");
const Properties = @import("Builtins/Properties.zig");
pub const Builtin = @import("Builtins/Builtin.def").with(Properties);
const Builtins = @This();
const Expanded = struct {
ty: Type,
builtin: BuiltinFunction,
builtin: Builtin,
};
const NameToTypeMap = std.StringHashMapUnmanaged(Type);
@ -243,8 +245,8 @@ fn createType(desc: TypeDescription, it: *TypeDescription.TypeIterator, comp: *c
return builder.finish(undefined) catch unreachable;
}
fn createBuiltin(comp: *const Compilation, builtin: BuiltinFunction, type_arena: std.mem.Allocator) !Type {
var it = TypeDescription.TypeIterator.init(builtin.param_str);
fn createBuiltin(comp: *const Compilation, builtin: Builtin, type_arena: std.mem.Allocator) !Type {
var it = TypeDescription.TypeIterator.init(builtin.properties.param_str);
const ret_ty_desc = it.next().?;
if (ret_ty_desc.spec == .@"!") {
@ -252,7 +254,7 @@ fn createBuiltin(comp: *const Compilation, builtin: BuiltinFunction, type_arena:
}
const ret_ty = try createType(ret_ty_desc, &it, comp, type_arena);
var param_count: usize = 0;
var params: [BuiltinFunction.MaxParamCount]Type.Func.Param = undefined;
var params: [Builtin.max_param_count]Type.Func.Param = undefined;
while (it.next()) |desc| : (param_count += 1) {
params[param_count] = .{ .name_tok = 0, .ty = try createType(desc, &it, comp, type_arena), .name = .empty };
}
@ -265,14 +267,14 @@ fn createBuiltin(comp: *const Compilation, builtin: BuiltinFunction, type_arena:
.params = duped_params,
};
return .{
.specifier = if (builtin.isVarArgs()) .var_args_func else .func,
.specifier = if (builtin.properties.isVarArgs()) .var_args_func else .func,
.data = .{ .func = func },
};
}
/// Asserts that the builtin has already been created
pub fn lookup(b: *const Builtins, name: []const u8) Expanded {
const builtin = BuiltinFunction.fromName(name).?;
const builtin = Builtin.fromName(name).?;
const ty = b._name_to_type_map.get(name).?;
return .{
.builtin = builtin,
@ -282,7 +284,7 @@ pub fn lookup(b: *const Builtins, name: []const u8) Expanded {
pub fn getOrCreate(b: *Builtins, comp: *Compilation, name: []const u8, type_arena: std.mem.Allocator) !?Expanded {
const ty = b._name_to_type_map.get(name) orelse {
const builtin = BuiltinFunction.fromName(name) orelse return null;
const builtin = Builtin.fromName(name) orelse return null;
if (!comp.hasBuiltinFunction(builtin)) return null;
try b._name_to_type_map.ensureUnusedCapacity(comp.gpa, 1);
@ -294,13 +296,62 @@ pub fn getOrCreate(b: *Builtins, comp: *Compilation, name: []const u8, type_aren
.ty = ty,
};
};
const builtin = BuiltinFunction.fromName(name).?;
const builtin = Builtin.fromName(name).?;
return .{
.builtin = builtin,
.ty = ty,
};
}
pub const Iterator = struct {
index: u16 = 1,
name_buf: [Builtin.longest_name]u8 = undefined,
pub const Entry = struct {
/// Memory of this slice is overwritten on every call to `next`
name: []const u8,
builtin: Builtin,
};
pub fn next(self: *Iterator) ?Entry {
if (self.index > Builtin.data.len) return null;
const index = self.index;
const data_index = index - 1;
self.index += 1;
return .{
.name = Builtin.nameFromUniqueIndex(index, &self.name_buf),
.builtin = Builtin.data[data_index],
};
}
};
test Iterator {
var it = Iterator{};
var seen = std.StringHashMap(Builtin).init(std.testing.allocator);
defer seen.deinit();
var arena_state = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena_state.deinit();
const arena = arena_state.allocator();
while (it.next()) |entry| {
const index = Builtin.uniqueIndex(entry.name).?;
var buf: [Builtin.longest_name]u8 = undefined;
const name_from_index = Builtin.nameFromUniqueIndex(index, &buf);
try std.testing.expectEqualStrings(entry.name, name_from_index);
if (seen.contains(entry.name)) {
std.debug.print("iterated over {s} twice\n", .{entry.name});
std.debug.print("current data: {}\n", .{entry.builtin});
std.debug.print("previous data: {}\n", .{seen.get(entry.name).?});
return error.TestExpectedUniqueEntries;
}
try seen.put(try arena.dupe(u8, entry.name), entry.builtin);
}
try std.testing.expectEqual(@as(usize, Builtin.data.len), seen.count());
}
test "All builtins" {
var comp = Compilation.init(std.testing.allocator);
defer comp.deinit();
@ -310,7 +361,7 @@ test "All builtins" {
const type_arena = arena.allocator();
var builtin_it = BuiltinFunction.BuiltinsIterator{};
var builtin_it = Iterator{};
while (builtin_it.next()) |entry| {
const name = try type_arena.dupe(u8, entry.name);
if (try comp.builtins.getOrCreate(&comp, name, type_arena)) |func_ty| {
@ -334,7 +385,7 @@ test "Allocation failures" {
const type_arena = arena.allocator();
const num_builtins = 40;
var builtin_it = BuiltinFunction.BuiltinsIterator{};
var builtin_it = Iterator{};
for (0..num_builtins) |_| {
const entry = builtin_it.next().?;
_ = try comp.builtins.getOrCreate(&comp, entry.name, type_arena);

17010
deps/aro/Builtins/Builtin.def vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -2,6 +2,7 @@ const std = @import("std");
const Properties = @This();
param_str: []const u8,
language: Language = .all_languages,
attributes: Attributes = Attributes{},
header: Header = .none,
@ -136,3 +137,7 @@ pub const Target = enum {
/// Targets for which a builtin is enabled
pub const TargetSet = std.enums.EnumSet(Target);
pub fn isVarArgs(properties: Properties) bool {
return properties.param_str[properties.param_str.len - 1] == '.';
}

View File

@ -1,7 +1,8 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const BuiltinFunction = @import("builtins/BuiltinFunction.zig");
const Builtins = @import("Builtins.zig");
const Builtin = Builtins.Builtin;
const Compilation = @import("Compilation.zig");
const Interner = @import("Interner.zig");
const Ir = @import("Ir.zig");
@ -1159,10 +1160,10 @@ fn genBoolExpr(c: *CodeGen, base: NodeIndex, true_label: Ir.Ref, false_label: Ir
try c.addBranch(cmp, true_label, false_label);
}
fn genBuiltinCall(c: *CodeGen, builtin: BuiltinFunction, arg_nodes: []const NodeIndex, ty: Type) Error!Ir.Ref {
fn genBuiltinCall(c: *CodeGen, builtin: Builtin, arg_nodes: []const NodeIndex, ty: Type) Error!Ir.Ref {
_ = arg_nodes;
_ = ty;
return c.comp.diag.fatalNoSrc("TODO CodeGen.genBuiltinCall {s}\n", .{BuiltinFunction.nameFromTag(builtin.tag).span()});
return c.comp.diag.fatalNoSrc("TODO CodeGen.genBuiltinCall {s}\n", .{Builtin.nameFromTag(builtin.tag).span()});
}
fn genCall(c: *CodeGen, fn_node: NodeIndex, arg_nodes: []const NodeIndex, ty: Type) Error!Ir.Ref {

View File

@ -4,6 +4,7 @@ const mem = std.mem;
const Allocator = mem.Allocator;
const EpochSeconds = std.time.epoch.EpochSeconds;
const Builtins = @import("Builtins.zig");
const Builtin = Builtins.Builtin;
const Diagnostics = @import("Diagnostics.zig");
const LangOpts = @import("LangOpts.zig");
const Source = @import("Source.zig");
@ -14,7 +15,6 @@ const Pragma = @import("Pragma.zig");
const StringInterner = @import("StringInterner.zig");
const record_layout = @import("record_layout.zig");
const target_util = @import("target.zig");
const BuiltinFunction = @import("builtins/BuiltinFunction.zig");
const Compilation = @This();
@ -238,6 +238,8 @@ pub fn generateBuiltinMacros(comp: *Compilation) !Source {
\\#define __STDC_NO_COMPLEX__ 1
\\#define __STDC_NO_THREADS__ 1
\\#define __STDC_NO_VLA__ 1
\\#define __STDC_UTF_16__ 1
\\#define __STDC_UTF_32__ 1
\\
);
if (comp.langopts.standard.StdCVersionMacro()) |stdc_version| {
@ -1193,9 +1195,7 @@ pub const IncludeDirIterator = struct {
while (self.next()) |found| {
const path = try std.fs.path.join(allocator, &.{ found.path, filename });
if (self.comp.langopts.ms_extensions) {
for (path) |*c| {
if (c.* == '\\') c.* = '/';
}
std.mem.replaceScalar(u8, path, '\\', '/');
}
return .{ .path = path, .kind = found.kind };
}
@ -1416,11 +1416,11 @@ pub fn hasBuiltin(comp: *const Compilation, name: []const u8) bool {
std.mem.eql(u8, name, "__builtin_offsetof") or
std.mem.eql(u8, name, "__builtin_types_compatible_p")) return true;
const builtin = BuiltinFunction.fromName(name) orelse return false;
const builtin = Builtin.fromName(name) orelse return false;
return comp.hasBuiltinFunction(builtin);
}
pub fn hasBuiltinFunction(comp: *const Compilation, builtin: BuiltinFunction) bool {
pub fn hasBuiltinFunction(comp: *const Compilation, builtin: Builtin) bool {
if (!target_util.builtinEnabled(comp.target, builtin.properties.target_set)) return false;
switch (builtin.properties.language) {
@ -1430,6 +1430,20 @@ pub fn hasBuiltinFunction(comp: *const Compilation, builtin: BuiltinFunction) bo
}
}
pub const CharUnitSize = enum(u32) {
@"1" = 1,
@"2" = 2,
@"4" = 4,
pub fn Type(comptime self: CharUnitSize) type {
return switch (self) {
.@"1" => u8,
.@"2" => u16,
.@"4" => u32,
};
}
};
pub const renderErrors = Diagnostics.render;
test "addSourceFromReader" {

View File

@ -4,8 +4,9 @@ const Allocator = mem.Allocator;
const Source = @import("Source.zig");
const Compilation = @import("Compilation.zig");
const Attribute = @import("Attribute.zig");
const BuiltinFunction = @import("builtins/BuiltinFunction.zig");
const Header = @import("builtins/Properties.zig").Header;
const Builtins = @import("Builtins.zig");
const Builtin = Builtins.Builtin;
const Header = @import("Builtins/Properties.zig").Header;
const Tree = @import("Tree.zig");
const util = @import("util.zig");
const is_windows = @import("builtin").os.tag == .windows;
@ -51,7 +52,7 @@ pub const Message = struct {
specifier: enum { @"struct", @"union", @"enum" },
},
builtin_with_header: struct {
builtin: BuiltinFunction.Tag,
builtin: Builtin.Tag,
header: Header,
},
invalid_escape: struct {
@ -69,10 +70,9 @@ pub const Message = struct {
pub const Tag = std.meta.DeclEnum(messages);
// u4 to avoid any possible packed struct issues
pub const Kind = enum(u4) { @"fatal error", @"error", note, warning, off, default };
pub const Kind = enum { @"fatal error", @"error", note, warning, off, default };
pub const Options = packed struct {
pub const Options = struct {
// do not directly use these, instead add `const NAME = true;`
all: Kind = .default,
extra: Kind = .default,
@ -178,6 +178,7 @@ pub const Options = packed struct {
@"invalid-source-encoding": Kind = .default,
@"four-char-constants": Kind = .default,
@"unknown-escape-sequence": Kind = .default,
@"invalid-pp-token": Kind = .default,
};
const messages = struct {
@ -2509,6 +2510,42 @@ const messages = struct {
const opt = "unknown-escape-sequence";
const extra = .invalid_escape;
};
pub const attribute_requires_string = struct {
const msg = "attribute '{s}' requires an ordinary string";
const kind = .@"error";
const extra = .str;
};
pub const unterminated_string_literal_warning = struct {
const msg = "missing terminating '\"' character";
const kind = .warning;
const opt = "invalid-pp-token";
};
pub const unterminated_string_literal_error = struct {
const msg = "missing terminating '\"' character";
const kind = .@"error";
};
pub const empty_char_literal_warning = struct {
const msg = "empty character constant";
const kind = .warning;
const opt = "invalid-pp-token";
};
pub const empty_char_literal_error = struct {
const msg = "empty character constant";
const kind = .@"error";
};
pub const unterminated_char_literal_warning = struct {
const msg = "missing terminating ' character";
const kind = .warning;
const opt = "invalid-pp-token";
};
pub const unterminated_char_literal_error = struct {
const msg = "missing terminating ' character";
const kind = .@"error";
};
pub const unterminated_comment = struct {
const msg = "unterminated comment";
const kind = .@"error";
};
};
list: std.ArrayListUnmanaged(Message) = .{},
@ -2750,7 +2787,7 @@ pub fn renderMessage(comp: *Compilation, m: anytype, msg: Message) void {
}),
.builtin_with_header => m.print(info.msg, .{
@tagName(msg.extra.builtin_with_header.header),
BuiltinFunction.nameFromTag(msg.extra.builtin_with_header.builtin).span(),
Builtin.nameFromTag(msg.extra.builtin_with_header.builtin).span(),
}),
.invalid_escape => {
if (std.ascii.isPrint(msg.extra.invalid_escape.char)) {

2
deps/aro/Ir.zig vendored
View File

@ -552,7 +552,7 @@ fn writeValue(ir: Ir, val_ref: Interner.Ref, color: bool, w: anytype) !void {
switch (v.tag) {
.unavailable => try w.writeAll(" unavailable"),
.int => try w.print("{d}", .{v.data.int}),
.bytes => try w.print("\"{s}\"", .{v.data.bytes.slice(ir.strings)}),
.bytes => try w.print("\"{s}\"", .{v.data.bytes.slice(ir.strings, .@"1")}),
// std.fmt does @as instead of @floatCast
.float => try w.print("{d}", .{@as(f64, @floatCast(v.data.float))}),
else => try w.print("({s})", .{@tagName(v.tag)}),

291
deps/aro/Parser.zig vendored
View File

@ -17,7 +17,7 @@ const NodeList = std.ArrayList(NodeIndex);
const InitList = @import("InitList.zig");
const Attribute = @import("Attribute.zig");
const CharInfo = @import("CharInfo.zig");
const CharLiteral = @import("CharLiteral.zig");
const TextLiteral = @import("TextLiteral.zig");
const Value = @import("Value.zig");
const SymbolStack = @import("SymbolStack.zig");
const Symbol = SymbolStack.Symbol;
@ -26,7 +26,8 @@ const StringId = @import("StringInterner.zig").StringId;
const number_affixes = @import("number_affixes.zig");
const NumberPrefix = number_affixes.Prefix;
const NumberSuffix = number_affixes.Suffix;
const BuiltinFunction = @import("builtins/BuiltinFunction.zig");
const Builtins = @import("Builtins.zig");
const Builtin = Builtins.Builtin;
const target_util = @import("target.zig");
const Parser = @This();
@ -467,7 +468,7 @@ fn checkDeprecatedUnavailable(p: *Parser, ty: Type, usage_tok: TokenIndex, decl_
defer p.strings.items.len = strings_top;
const w = p.strings.writer();
const msg_str = p.retainedString(@"error".msg);
const msg_str = p.attributeMessageString(@"error".msg);
try w.print("call to '{s}' declared with attribute error: {s}", .{ p.tokSlice(@"error".__name_tok), msg_str });
const str = try p.comp.diag.arena.allocator().dupe(u8, p.strings.items[strings_top..]);
try p.errStr(.error_attribute, usage_tok, str);
@ -477,7 +478,7 @@ fn checkDeprecatedUnavailable(p: *Parser, ty: Type, usage_tok: TokenIndex, decl_
defer p.strings.items.len = strings_top;
const w = p.strings.writer();
const msg_str = p.retainedString(warning.msg);
const msg_str = p.attributeMessageString(warning.msg);
try w.print("call to '{s}' declared with attribute warning: {s}", .{ p.tokSlice(warning.__name_tok), msg_str });
const str = try p.comp.diag.arena.allocator().dupe(u8, p.strings.items[strings_top..]);
try p.errStr(.warning_attribute, usage_tok, str);
@ -492,9 +493,10 @@ fn checkDeprecatedUnavailable(p: *Parser, ty: Type, usage_tok: TokenIndex, decl_
}
}
/// Assumes that the specified range was created by an ordinary or `u8` string literal
/// Returned slice is invalidated if additional strings are added to p.retained_strings
fn retainedString(p: *Parser, range: Value.ByteRange) []const u8 {
return range.slice(p.retained_strings.items);
fn attributeMessageString(p: *Parser, range: Value.ByteRange) []const u8 {
return range.slice(p.retained_strings.items, .@"1");
}
fn errDeprecated(p: *Parser, tag: Diagnostics.Tag, tok_i: TokenIndex, msg: ?Value.ByteRange) Compilation.Error!void {
@ -510,7 +512,7 @@ fn errDeprecated(p: *Parser, tag: Diagnostics.Tag, tok_i: TokenIndex, msg: ?Valu
};
try w.writeAll(reason);
if (msg) |m| {
const str = p.retainedString(m);
const str = p.attributeMessageString(m);
try w.print(": {s}", .{str});
}
const str = try p.comp.diag.arena.allocator().dupe(u8, p.strings.items[strings_top..]);
@ -901,7 +903,7 @@ fn decl(p: *Parser) Error!bool {
break :blk DeclSpec{ .ty = try spec.finish(p) };
};
if (decl_spec.noreturn) |tok| {
const attr = Attribute{ .tag = .noreturn, .args = .{ .noreturn = {} }, .syntax = .keyword };
const attr = Attribute{ .tag = .noreturn, .args = .{ .noreturn = .{} }, .syntax = .keyword };
try p.attr_buf.append(p.gpa, .{ .attr = attr, .tok = tok });
}
var init_d = (try p.initDeclarator(&decl_spec, attr_buf_top)) orelse {
@ -985,7 +987,7 @@ fn decl(p: *Parser) Error!bool {
const attr_buf_top_declarator = p.attr_buf.len;
defer p.attr_buf.len = attr_buf_top_declarator;
var d = (try p.declarator(param_decl_spec.ty, .normal)) orelse {
var d = (try p.declarator(param_decl_spec.ty, .param)) orelse {
try p.errTok(.missing_declaration, first_tok);
_ = try p.expectToken(.semicolon);
continue :param_loop;
@ -1152,17 +1154,13 @@ fn staticAssertMessage(p: *Parser, cond_node: NodeIndex, message: Result) !?[]co
try buf.appendSlice(")'");
}
if (message.node != .none) {
assert(p.nodes.items(.tag)[@intFromEnum(message.node)] == .string_literal_expr);
if (buf.items.len > 0) {
try buf.append(' ');
}
const data = message.val.data.bytes;
try buf.ensureUnusedCapacity(data.len());
try Tree.dumpStr(
p.retained_strings.items,
data,
p.nodes.items(.tag)[@intFromEnum(message.node)],
buf.writer(),
);
const byte_range = message.val.data.bytes;
try buf.ensureUnusedCapacity(byte_range.len());
try byte_range.dumpString(message.ty, p.comp, p.retained_strings.items, buf.writer());
}
return try p.comp.diag.arena.allocator().dupe(u8, buf.items);
}
@ -1183,6 +1181,7 @@ fn staticAssert(p: *Parser) Error!bool {
.string_literal_utf_8,
.string_literal_utf_32,
.string_literal_wide,
.unterminated_string_literal,
=> try p.stringLiteral(),
else => {
try p.err(.expected_str_literal);
@ -2366,7 +2365,9 @@ fn enumSpec(p: *Parser) Error!Type {
// check if this is a reference to a previous type
const interned_name = try p.comp.intern(p.tokSlice(ident));
if (try p.syms.findTag(p, interned_name, .keyword_enum, ident, p.tok_ids[p.tok_i])) |prev| {
try p.checkEnumFixedTy(fixed_ty, ident, prev);
// only check fixed underlying type in forward declarations and not in references.
if (p.tok_ids[p.tok_i] == .semicolon)
try p.checkEnumFixedTy(fixed_ty, ident, prev);
return prev.ty;
} else {
// this is a forward declaration, create a new enum Type.
@ -3952,7 +3953,7 @@ fn assembly(p: *Parser, kind: enum { global, decl_label, stmt }) Error!?NodeInde
fn asmStr(p: *Parser) Error!Result {
var i = p.tok_i;
while (true) : (i += 1) switch (p.tok_ids[i]) {
.string_literal => {},
.string_literal, .unterminated_string_literal => {},
.string_literal_utf_16, .string_literal_utf_8, .string_literal_utf_32 => {
try p.errStr(.invalid_asm_str, p.tok_i, "unicode");
return error.ParsingFailed;
@ -4607,7 +4608,7 @@ const CallExpr = union(enum) {
standard: NodeIndex,
builtin: struct {
node: NodeIndex,
tag: BuiltinFunction.Tag,
tag: Builtin.Tag,
},
fn init(p: *Parser, call_node: NodeIndex, func_node: NodeIndex) CallExpr {
@ -4624,9 +4625,9 @@ const CallExpr = union(enum) {
return switch (self) {
.standard => true,
.builtin => |builtin| switch (builtin.tag) {
BuiltinFunction.tagFromName("__builtin_va_start").?,
BuiltinFunction.tagFromName("__va_start").?,
BuiltinFunction.tagFromName("va_start").?,
Builtin.tagFromName("__builtin_va_start").?,
Builtin.tagFromName("__va_start").?,
Builtin.tagFromName("va_start").?,
=> arg_idx != 1,
else => true,
},
@ -4637,11 +4638,11 @@ const CallExpr = union(enum) {
return switch (self) {
.standard => true,
.builtin => |builtin| switch (builtin.tag) {
BuiltinFunction.tagFromName("__builtin_va_start").?,
BuiltinFunction.tagFromName("__va_start").?,
BuiltinFunction.tagFromName("va_start").?,
Builtin.tagFromName("__builtin_va_start").?,
Builtin.tagFromName("__va_start").?,
Builtin.tagFromName("va_start").?,
=> arg_idx != 1,
BuiltinFunction.tagFromName("__builtin_complex").? => false,
Builtin.tagFromName("__builtin_complex").? => false,
else => true,
},
};
@ -4658,11 +4659,11 @@ const CallExpr = union(enum) {
const builtin_tok = p.nodes.items(.data)[@intFromEnum(self.builtin.node)].decl.name;
switch (self.builtin.tag) {
BuiltinFunction.tagFromName("__builtin_va_start").?,
BuiltinFunction.tagFromName("__va_start").?,
BuiltinFunction.tagFromName("va_start").?,
Builtin.tagFromName("__builtin_va_start").?,
Builtin.tagFromName("__va_start").?,
Builtin.tagFromName("va_start").?,
=> return p.checkVaStartArg(builtin_tok, first_after, param_tok, arg, arg_idx),
BuiltinFunction.tagFromName("__builtin_complex").? => return p.checkComplexArg(builtin_tok, first_after, param_tok, arg, arg_idx),
Builtin.tagFromName("__builtin_complex").? => return p.checkComplexArg(builtin_tok, first_after, param_tok, arg, arg_idx),
else => {},
}
}
@ -4676,7 +4677,7 @@ const CallExpr = union(enum) {
return switch (self) {
.standard => null,
.builtin => |builtin| switch (builtin.tag) {
BuiltinFunction.tagFromName("__builtin_complex").? => 2,
Builtin.tagFromName("__builtin_complex").? => 2,
else => null,
},
};
@ -4686,7 +4687,7 @@ const CallExpr = union(enum) {
return switch (self) {
.standard => callable_ty.returnType(),
.builtin => |builtin| switch (builtin.tag) {
BuiltinFunction.tagFromName("__builtin_complex").? => {
Builtin.tagFromName("__builtin_complex").? => {
const last_param = p.list_buf.items[p.list_buf.items.len - 1];
return p.nodes.items(.ty)[@intFromEnum(last_param)].makeComplex();
},
@ -6343,7 +6344,14 @@ fn typesCompatible(p: *Parser) Error!Result {
try p.expectClosing(l_paren, .r_paren);
const compatible = first.compatible(second, p.comp);
var first_unqual = first.canonicalize(.standard);
first_unqual.qual.@"const" = false;
first_unqual.qual.@"volatile" = false;
var second_unqual = second.canonicalize(.standard);
second_unqual.qual.@"const" = false;
second_unqual.qual.@"volatile" = false;
const compatible = first_unqual.eql(second_unqual, p.comp, true);
var res = Result{
.val = Value.int(@intFromBool(compatible)),
@ -7122,7 +7130,7 @@ fn checkComplexArg(p: *Parser, builtin_tok: TokenIndex, first_after: TokenIndex,
}
}
fn checkVariableBuiltinArgument(p: *Parser, builtin_tok: TokenIndex, first_after: TokenIndex, param_tok: TokenIndex, arg: *Result, arg_idx: u32, tag: BuiltinFunction.Tag) !void {
fn checkVariableBuiltinArgument(p: *Parser, builtin_tok: TokenIndex, first_after: TokenIndex, param_tok: TokenIndex, arg: *Result, arg_idx: u32, tag: Builtin.Tag) !void {
switch (tag) {
.__builtin_va_start, .__va_start, .va_start => return p.checkVaStartArg(builtin_tok, first_after, param_tok, arg, arg_idx),
else => {},
@ -7460,12 +7468,15 @@ fn primaryExpr(p: *Parser) Error!Result {
.string_literal_utf_8,
.string_literal_utf_32,
.string_literal_wide,
.unterminated_string_literal,
=> return p.stringLiteral(),
.char_literal,
.char_literal_utf_8,
.char_literal_utf_16,
.char_literal_utf_32,
.char_literal_wide,
.empty_char_literal,
.unterminated_char_literal,
=> return p.charLiteral(),
.zero => {
p.tok_i += 1;
@ -7522,131 +7533,123 @@ fn makePredefinedIdentifier(p: *Parser, start: u32) !Result {
}
fn stringLiteral(p: *Parser) Error!Result {
var start = p.tok_i;
// use 1 for wchar_t
var width: ?u8 = null;
var is_u8_literal = false;
while (true) {
switch (p.tok_ids[p.tok_i]) {
.string_literal => {},
.string_literal_utf_16 => if (width) |some| {
if (some != 16) try p.err(.unsupported_str_cat);
} else {
width = 16;
},
.string_literal_utf_8 => {
is_u8_literal = true;
if (width) |some| {
if (some != 8) try p.err(.unsupported_str_cat);
} else {
width = 8;
var string_end = p.tok_i;
var string_kind: TextLiteral.Kind = .char;
while (TextLiteral.Kind.classify(p.tok_ids[string_end], .string_literal)) |next| : (string_end += 1) {
string_kind = string_kind.concat(next) catch {
try p.errTok(.unsupported_str_cat, string_end);
while (p.tok_ids[p.tok_i].isStringLiteral()) : (p.tok_i += 1) {}
return error.ParsingFailed;
};
if (string_kind == .unterminated) {
try p.errTok(.unterminated_string_literal_error, string_end);
p.tok_i = string_end + 1;
return error.ParsingFailed;
}
}
assert(string_end > p.tok_i);
const char_width = string_kind.charUnitSize(p.comp);
const retain_start = mem.alignForward(usize, p.retained_strings.items.len, string_kind.internalStorageAlignment(p.comp));
try p.retained_strings.resize(retain_start);
while (p.tok_i < string_end) : (p.tok_i += 1) {
const this_kind = TextLiteral.Kind.classify(p.tok_ids[p.tok_i], .string_literal).?;
const slice = this_kind.contentSlice(p.tokSlice(p.tok_i));
var char_literal_parser = TextLiteral.Parser.init(slice, this_kind, 0x10ffff, p.comp);
try p.retained_strings.ensureUnusedCapacity((slice.len + 1) * @intFromEnum(char_width)); // +1 for null terminator
while (char_literal_parser.next()) |item| switch (item) {
.value => |v| {
switch (char_width) {
.@"1" => p.retained_strings.appendAssumeCapacity(@intCast(v)),
.@"2" => {
const word: u16 = @intCast(v);
p.retained_strings.appendSliceAssumeCapacity(mem.asBytes(&word));
},
.@"4" => p.retained_strings.appendSliceAssumeCapacity(mem.asBytes(&v)),
}
},
.string_literal_utf_32 => if (width) |some| {
if (some != 32) try p.err(.unsupported_str_cat);
} else {
width = 32;
.codepoint => |c| {
switch (char_width) {
.@"1" => {
var buf: [4]u8 = undefined;
const written = std.unicode.utf8Encode(c, &buf) catch unreachable;
const encoded = buf[0..written];
p.retained_strings.appendSliceAssumeCapacity(encoded);
},
.@"2" => {
var utf16_buf: [2]u16 = undefined;
var utf8_buf: [4]u8 = undefined;
const utf8_written = std.unicode.utf8Encode(c, &utf8_buf) catch unreachable;
const utf16_written = std.unicode.utf8ToUtf16Le(&utf16_buf, utf8_buf[0..utf8_written]) catch unreachable;
const bytes = std.mem.sliceAsBytes(utf16_buf[0..utf16_written]);
p.retained_strings.appendSliceAssumeCapacity(bytes);
},
.@"4" => {
const val: u32 = c;
p.retained_strings.appendSliceAssumeCapacity(mem.asBytes(&val));
},
}
},
.string_literal_wide => if (width) |some| {
if (some != 1) try p.err(.unsupported_str_cat);
} else {
width = 1;
.improperly_encoded => |bytes| p.retained_strings.appendSliceAssumeCapacity(bytes),
.utf8_text => |view| {
switch (char_width) {
.@"1" => p.retained_strings.appendSliceAssumeCapacity(view.bytes),
.@"2" => {
var capacity_slice: []align(@alignOf(u16)) u8 = @alignCast(p.retained_strings.unusedCapacitySlice());
const dest_len = std.mem.alignBackward(usize, capacity_slice.len, 2);
var dest = std.mem.bytesAsSlice(u16, capacity_slice[0..dest_len]);
const words_written = std.unicode.utf8ToUtf16Le(dest, view.bytes) catch unreachable;
p.retained_strings.resize(p.retained_strings.items.len + words_written * 2) catch unreachable;
},
.@"4" => {
var it = view.iterator();
while (it.nextCodepoint()) |codepoint| {
const val: u32 = codepoint;
p.retained_strings.appendSliceAssumeCapacity(mem.asBytes(&val));
}
},
}
},
else => break,
}
p.tok_i += 1;
}
if (width == null) width = 8;
if (width.? != 8) return p.todo("unicode string literals");
const string_start = p.retained_strings.items.len;
while (start < p.tok_i) : (start += 1) {
var slice = p.tokSlice(start);
slice = slice[0 .. slice.len - 1];
var i = mem.indexOf(u8, slice, "\"").? + 1;
try p.retained_strings.ensureUnusedCapacity(slice.len);
while (i < slice.len) : (i += 1) {
switch (slice[i]) {
'\\' => {
i += 1;
switch (slice[i]) {
'\n' => i += 1,
'\r' => i += 2,
'\'', '\"', '\\', '?' => |c| p.retained_strings.appendAssumeCapacity(c),
'n' => p.retained_strings.appendAssumeCapacity('\n'),
'r' => p.retained_strings.appendAssumeCapacity('\r'),
't' => p.retained_strings.appendAssumeCapacity('\t'),
'a' => p.retained_strings.appendAssumeCapacity(0x07),
'b' => p.retained_strings.appendAssumeCapacity(0x08),
'e' => {
try p.errExtra(.non_standard_escape_char, start, .{ .invalid_escape = .{ .char = 'e', .offset = @intCast(i) } });
p.retained_strings.appendAssumeCapacity(0x1B);
},
'f' => p.retained_strings.appendAssumeCapacity(0x0C),
'v' => p.retained_strings.appendAssumeCapacity(0x0B),
'x' => p.retained_strings.appendAssumeCapacity(try p.parseNumberEscape(start, 16, slice, &i)),
'0'...'7' => p.retained_strings.appendAssumeCapacity(try p.parseNumberEscape(start, 8, slice, &i)),
'u' => try p.parseUnicodeEscape(start, 4, slice, &i),
'U' => try p.parseUnicodeEscape(start, 8, slice, &i),
else => unreachable,
}
},
else => |c| p.retained_strings.appendAssumeCapacity(c),
}
};
for (char_literal_parser.errors.constSlice()) |item| {
try p.errExtra(item.tag, p.tok_i, item.extra);
}
}
try p.retained_strings.append(0);
const slice = p.retained_strings.items[string_start..];
p.retained_strings.appendNTimesAssumeCapacity(0, @intFromEnum(char_width));
const slice = p.retained_strings.items[retain_start..];
const arr_ty = try p.arena.create(Type.Array);
const specifier: Type.Specifier = if (is_u8_literal and p.comp.langopts.hasChar8_T()) .uchar else .char;
arr_ty.* = .{ .elem = .{ .specifier = specifier }, .len = slice.len };
arr_ty.* = .{ .elem = string_kind.elementType(p.comp), .len = @divExact(slice.len, @intFromEnum(char_width)) };
var res: Result = .{
.ty = .{
.specifier = .array,
.data = .{ .array = arr_ty },
},
.val = Value.bytes(@intCast(string_start), @intCast(p.retained_strings.items.len)),
.val = Value.bytes(@intCast(retain_start), @intCast(p.retained_strings.items.len)),
};
res.node = try p.addNode(.{ .tag = .string_literal_expr, .ty = res.ty, .data = undefined });
if (!p.in_macro) try p.value_map.put(res.node, res.val);
return res;
}
fn parseNumberEscape(p: *Parser, tok: TokenIndex, base: u8, slice: []const u8, i: *usize) !u8 {
if (base == 16) i.* += 1; // skip x
var char: u8 = 0;
var reported = false;
while (i.* < slice.len) : (i.* += 1) {
const val = std.fmt.charToDigit(slice[i.*], base) catch break; // validated by Tokenizer
const product, const overflowed = @mulWithOverflow(char, base);
if (overflowed != 0 and !reported) {
try p.errExtra(.escape_sequence_overflow, tok, .{ .unsigned = i.* });
reported = true;
}
char = product + val;
}
i.* -= 1;
return char;
}
fn parseUnicodeEscape(p: *Parser, tok: TokenIndex, count: u8, slice: []const u8, i: *usize) !void {
const c = std.fmt.parseInt(u21, slice[i.* + 1 ..][0..count], 16) catch 0x110000; // count validated by tokenizer
i.* += count + 1;
if (!std.unicode.utf8ValidCodepoint(c) or (c < 0xa0 and c != '$' and c != '@' and c != '`')) {
try p.errExtra(.invalid_universal_character, tok, .{ .unsigned = i.* - count - 2 });
return;
}
var buf: [4]u8 = undefined;
const to_write = std.unicode.utf8Encode(c, &buf) catch unreachable; // validated above
p.retained_strings.appendSliceAssumeCapacity(buf[0..to_write]);
}
fn charLiteral(p: *Parser) Error!Result {
defer p.tok_i += 1;
const tok_id = p.tok_ids[p.tok_i];
const char_kind = CharLiteral.Kind.classify(tok_id);
const char_kind = TextLiteral.Kind.classify(tok_id, .char_literal) orelse {
if (tok_id == .empty_char_literal) {
try p.err(.empty_char_literal_error);
} else if (tok_id == .unterminated_char_literal) {
try p.err(.unterminated_char_literal_error);
} else unreachable;
return .{
.ty = Type.int,
.val = Value.int(0),
.node = try p.addNode(.{ .tag = .char_literal, .ty = Type.int, .data = undefined }),
};
};
var val: u32 = 0;
const slice = char_kind.contentSlice(p.tokSlice(p.tok_i));
@ -7655,7 +7658,8 @@ fn charLiteral(p: *Parser) Error!Result {
// fast path: single unescaped ASCII char
val = slice[0];
} else {
var char_literal_parser = CharLiteral.Parser.init(slice, char_kind, p.comp);
const max_codepoint = char_kind.maxCodepoint(p.comp);
var char_literal_parser = TextLiteral.Parser.init(slice, char_kind, max_codepoint, p.comp);
const max_chars_expected = 4;
var stack_fallback = std.heap.stackFallback(max_chars_expected * @sizeOf(u32), p.comp.gpa);
@ -7663,20 +7667,21 @@ fn charLiteral(p: *Parser) Error!Result {
defer chars.deinit();
while (char_literal_parser.next()) |item| switch (item) {
.value => |c| try chars.append(c),
.value => |v| try chars.append(v),
.codepoint => |c| try chars.append(c),
.improperly_encoded => |s| {
try chars.ensureUnusedCapacity(s.len);
for (s) |c| chars.appendAssumeCapacity(c);
},
.utf8_text => |view| {
var it = view.iterator();
var max_codepoint: u21 = 0;
var max_codepoint_seen: u21 = 0;
try chars.ensureUnusedCapacity(view.bytes.len);
while (it.nextCodepoint()) |c| {
max_codepoint = @max(max_codepoint, c);
max_codepoint_seen = @max(max_codepoint_seen, c);
chars.appendAssumeCapacity(c);
}
if (max_codepoint > char_kind.maxCodepoint(p.comp)) {
if (max_codepoint_seen > max_codepoint) {
char_literal_parser.err(.char_too_large, .{ .none = {} });
}
},

View File

@ -266,6 +266,15 @@ pub fn addIncludeResume(pp: *Preprocessor, source: Source.Id, offset: u32, line:
} });
}
fn invalidTokenDiagnostic(tok_id: Token.Id) Diagnostics.Tag {
return switch (tok_id) {
.unterminated_string_literal => .unterminated_string_literal_warning,
.empty_char_literal => .empty_char_literal_warning,
.unterminated_char_literal => .unterminated_char_literal_warning,
else => unreachable,
};
}
/// Return the name of the #ifndef guard macro that starts a source, if any.
fn findIncludeGuard(pp: *Preprocessor, source: Source) ?[]const u8 {
var tokenizer = Tokenizer{
@ -631,6 +640,12 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token {
}
return tokFromRaw(tok);
},
.unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
start_of_line = false;
try pp.err(tok, invalidTokenDiagnostic(tag));
try pp.expandMacro(&tokenizer, tok);
},
.unterminated_comment => try pp.err(tok, .unterminated_comment),
else => {
if (tok.id.isMacroIdentifier() and pp.poisoned_identifiers.get(pp.tokSlice(tok)) != null) {
try pp.err(tok, .poisoned_identifier);
@ -1239,7 +1254,7 @@ fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const Token) !?[]co
}
for (params) |tok| {
const str = pp.expandedSliceExtra(tok, .preserve_macro_ws, false);
const str = pp.expandedSliceExtra(tok, .preserve_macro_ws);
try pp.char_buf.appendSlice(str);
}
@ -1985,12 +2000,7 @@ fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, raw: RawToken) MacroErr
}
}
fn expandedSliceExtra(
pp: *const Preprocessor,
tok: Token,
macro_ws_handling: enum { single_macro_ws, preserve_macro_ws },
path_escapes: bool,
) []const u8 {
fn expandedSliceExtra(pp: *const Preprocessor, tok: Token, macro_ws_handling: enum { single_macro_ws, preserve_macro_ws }) []const u8 {
if (tok.id.lexeme()) |some| {
if (!tok.id.allowsDigraphs(pp.comp) and !(tok.id == .macro_ws and macro_ws_handling == .preserve_macro_ws)) return some;
}
@ -1999,7 +2009,6 @@ fn expandedSliceExtra(
.comp = pp.comp,
.index = tok.loc.byte_offset,
.source = .generated,
.path_escapes = path_escapes,
};
if (tok.id == .macro_string) {
while (true) : (tmp_tokenizer.index += 1) {
@ -2013,7 +2022,7 @@ fn expandedSliceExtra(
/// Get expanded token source string.
pub fn expandedSlice(pp: *Preprocessor, tok: Token) []const u8 {
return pp.expandedSliceExtra(tok, .single_macro_ws, false);
return pp.expandedSliceExtra(tok, .single_macro_ws);
}
/// Concat two tokens and add the result to pp.generated
@ -2182,6 +2191,11 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
try pp.token_buf.append(tok);
},
.whitespace => need_ws = true,
.unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
try pp.err(tok, invalidTokenDiagnostic(tag));
try pp.token_buf.append(tok);
},
.unterminated_comment => try pp.err(tok, .unterminated_comment),
else => {
if (tok.id != .whitespace and need_ws) {
need_ws = false;
@ -2323,6 +2337,11 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa
}
try pp.token_buf.append(tok);
},
.unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
try pp.err(tok, invalidTokenDiagnostic(tag));
try pp.token_buf.append(tok);
},
.unterminated_comment => try pp.err(tok, .unterminated_comment),
else => {
if (tok.id != .whitespace and need_ws) {
need_ws = false;
@ -2368,8 +2387,6 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa
/// Handle an #embed directive
fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
tokenizer.path_escapes = true;
defer tokenizer.path_escapes = false;
const first = tokenizer.nextNoWS();
const filename_tok = pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof) catch |er| switch (er) {
error.InvalidInclude => return,
@ -2377,7 +2394,7 @@ fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
};
// Check for empty filename.
const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws, true);
const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws);
if (tok_slice.len < 3) {
try pp.err(first, .empty_filename);
return;
@ -2419,8 +2436,6 @@ fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
// Handle a #include directive.
fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInclude) MacroError!void {
tokenizer.path_escapes = true;
defer tokenizer.path_escapes = false;
const first = tokenizer.nextNoWS();
const new_source = findIncludeSource(pp, tokenizer, first, which) catch |er| switch (er) {
error.InvalidInclude => return,
@ -2586,7 +2601,7 @@ fn findIncludeSource(pp: *Preprocessor, tokenizer: *Tokenizer, first: RawToken,
const filename_tok = try pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof);
// Check for empty filename.
const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws, true);
const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws);
if (tok_slice.len < 3) {
try pp.err(first, .empty_filename);
return error.InvalidInclude;

5
deps/aro/Source.zig vendored
View File

@ -74,7 +74,10 @@ pub fn lineCol(source: Source, loc: Location) LineCol {
i += 1;
continue;
};
const cp = std.unicode.utf8Decode(source.buf[i..][0..len]) catch unreachable;
const cp = std.unicode.utf8Decode(source.buf[i..][0..len]) catch {
i += 1;
continue;
};
width += codepointWidth(cp);
i += len;
}

View File

@ -197,7 +197,7 @@ pub fn defineSymbol(
},
.decl => if (names[i] == name) {
const prev_ty = s.syms.items(.ty)[i];
if (!ty.eql(prev_ty, p.comp, true)) { // TODO adjusted equality check
if (!ty.eql(prev_ty, p.comp, true)) {
try p.errStr(.redefinition_incompatible, tok, p.tokSlice(tok));
try p.errTok(.previous_definition, s.syms.items(.tok)[i]);
}
@ -243,7 +243,7 @@ pub fn declareSymbol(
},
.decl => if (names[i] == name) {
const prev_ty = s.syms.items(.ty)[i];
if (!ty.eql(prev_ty, p.comp, true)) { // TODO adjusted equality check
if (!ty.eql(prev_ty, p.comp, true)) {
try p.errStr(.redefinition_incompatible, tok, p.tokSlice(tok));
try p.errTok(.previous_definition, s.syms.items(.tok)[i]);
}
@ -251,7 +251,7 @@ pub fn declareSymbol(
},
.def, .constexpr => if (names[i] == name) {
const prev_ty = s.syms.items(.ty)[i];
if (!ty.eql(prev_ty, p.comp, true)) { // TODO adjusted equality check
if (!ty.eql(prev_ty, p.comp, true)) {
try p.errStr(.redefinition_incompatible, tok, p.tokSlice(tok));
try p.errTok(.previous_definition, s.syms.items(.tok)[i]);
break;

371
deps/aro/TextLiteral.zig vendored Normal file
View File

@ -0,0 +1,371 @@
//! Parsing and classification of string and character literals
const std = @import("std");
const Compilation = @import("Compilation.zig");
const Type = @import("Type.zig");
const Diagnostics = @import("Diagnostics.zig");
const Tokenizer = @import("Tokenizer.zig");
const mem = std.mem;
pub const Item = union(enum) {
/// decoded hex or character escape
value: u32,
/// validated unicode codepoint
codepoint: u21,
/// Char literal in the source text is not utf8 encoded
improperly_encoded: []const u8,
/// 1 or more unescaped bytes
utf8_text: std.unicode.Utf8View,
};
const CharDiagnostic = struct {
tag: Diagnostics.Tag,
extra: Diagnostics.Message.Extra,
};
pub const Kind = enum {
char,
wide,
utf_8,
utf_16,
utf_32,
/// Error kind that halts parsing
unterminated,
pub fn classify(id: Tokenizer.Token.Id, context: enum { string_literal, char_literal }) ?Kind {
return switch (context) {
.string_literal => switch (id) {
.string_literal => .char,
.string_literal_utf_8 => .utf_8,
.string_literal_wide => .wide,
.string_literal_utf_16 => .utf_16,
.string_literal_utf_32 => .utf_32,
.unterminated_string_literal => .unterminated,
else => null,
},
.char_literal => switch (id) {
.char_literal => .char,
.char_literal_utf_8 => .utf_8,
.char_literal_wide => .wide,
.char_literal_utf_16 => .utf_16,
.char_literal_utf_32 => .utf_32,
else => null,
},
};
}
/// Should only be called for string literals. Determines the result kind of two adjacent string
/// literals
pub fn concat(self: Kind, other: Kind) !Kind {
if (self == .unterminated or other == .unterminated) return .unterminated;
if (self == other) return self; // can always concat with own kind
if (self == .char) return other; // char + X -> X
if (other == .char) return self; // X + char -> X
return error.CannotConcat;
}
/// Largest unicode codepoint that can be represented by this character kind
/// May be smaller than the largest value that can be represented.
/// For example u8 char literals may only specify 0-127 via literals or
/// character escapes, but may specify up to \xFF via hex escapes.
pub fn maxCodepoint(kind: Kind, comp: *const Compilation) u21 {
return @intCast(switch (kind) {
.char => std.math.maxInt(u7),
.wide => @min(0x10FFFF, comp.types.wchar.maxInt(comp)),
.utf_8 => std.math.maxInt(u7),
.utf_16 => std.math.maxInt(u16),
.utf_32 => 0x10FFFF,
.unterminated => unreachable,
});
}
/// Largest integer that can be represented by this character kind
pub fn maxInt(kind: Kind, comp: *const Compilation) u32 {
return @intCast(switch (kind) {
.char, .utf_8 => std.math.maxInt(u8),
.wide => comp.types.wchar.maxInt(comp),
.utf_16 => std.math.maxInt(u16),
.utf_32 => std.math.maxInt(u32),
.unterminated => unreachable,
});
}
/// The C type of a character literal of this kind
pub fn charLiteralType(kind: Kind, comp: *const Compilation) Type {
return switch (kind) {
.char => Type.int,
.wide => comp.types.wchar,
.utf_8 => .{ .specifier = .uchar },
.utf_16 => comp.types.uint_least16_t,
.utf_32 => comp.types.uint_least32_t,
.unterminated => unreachable,
};
}
/// Return the actual contents of the literal with leading / trailing quotes and
/// specifiers removed
pub fn contentSlice(kind: Kind, delimited: []const u8) []const u8 {
const end = delimited.len - 1; // remove trailing quote
return switch (kind) {
.char => delimited[1..end],
.wide => delimited[2..end],
.utf_8 => delimited[3..end],
.utf_16 => delimited[2..end],
.utf_32 => delimited[2..end],
.unterminated => unreachable,
};
}
/// The size of a character unit for a string literal of this kind
pub fn charUnitSize(kind: Kind, comp: *const Compilation) Compilation.CharUnitSize {
return switch (kind) {
.char => .@"1",
.wide => switch (comp.types.wchar.sizeof(comp).?) {
2 => .@"2",
4 => .@"4",
else => unreachable,
},
.utf_8 => .@"1",
.utf_16 => .@"2",
.utf_32 => .@"4",
.unterminated => unreachable,
};
}
/// Required alignment within aro (on compiler host) for writing to retained_strings
pub fn internalStorageAlignment(kind: Kind, comp: *const Compilation) usize {
return switch (kind.charUnitSize(comp)) {
inline else => |size| @alignOf(size.Type()),
};
}
/// The C type of an element of a string literal of this kind
pub fn elementType(kind: Kind, comp: *const Compilation) Type {
return switch (kind) {
.unterminated => unreachable,
.char => .{ .specifier = .char },
.utf_8 => if (comp.langopts.hasChar8_T()) .{ .specifier = .uchar } else .{ .specifier = .char },
else => kind.charLiteralType(comp),
};
}
};
pub const Parser = struct {
literal: []const u8,
i: usize = 0,
kind: Kind,
max_codepoint: u21,
/// We only want to issue a max of 1 error per char literal
errored: bool = false,
errors: std.BoundedArray(CharDiagnostic, 4) = .{},
comp: *const Compilation,
pub fn init(literal: []const u8, kind: Kind, max_codepoint: u21, comp: *const Compilation) Parser {
return .{
.literal = literal,
.comp = comp,
.kind = kind,
.max_codepoint = max_codepoint,
};
}
fn prefixLen(self: *const Parser) usize {
return switch (self.kind) {
.unterminated => unreachable,
.char => 0,
.utf_8 => 2,
.wide, .utf_16, .utf_32 => 1,
};
}
pub fn err(self: *Parser, tag: Diagnostics.Tag, extra: Diagnostics.Message.Extra) void {
if (self.errored) return;
self.errored = true;
const diagnostic = .{ .tag = tag, .extra = extra };
self.errors.append(diagnostic) catch {
_ = self.errors.pop();
self.errors.append(diagnostic) catch unreachable;
};
}
pub fn warn(self: *Parser, tag: Diagnostics.Tag, extra: Diagnostics.Message.Extra) void {
if (self.errored) return;
self.errors.append(.{ .tag = tag, .extra = extra }) catch {};
}
pub fn next(self: *Parser) ?Item {
if (self.i >= self.literal.len) return null;
const start = self.i;
if (self.literal[start] != '\\') {
self.i = mem.indexOfScalarPos(u8, self.literal, start + 1, '\\') orelse self.literal.len;
const unescaped_slice = self.literal[start..self.i];
const view = std.unicode.Utf8View.init(unescaped_slice) catch {
if (self.kind != .char) {
self.err(.illegal_char_encoding_error, .{ .none = {} });
return null;
}
self.warn(.illegal_char_encoding_warning, .{ .none = {} });
return .{ .improperly_encoded = self.literal[start..self.i] };
};
return .{ .utf8_text = view };
}
switch (self.literal[start + 1]) {
'u', 'U' => return self.parseUnicodeEscape(),
else => return self.parseEscapedChar(),
}
}
fn parseUnicodeEscape(self: *Parser) ?Item {
const start = self.i;
std.debug.assert(self.literal[self.i] == '\\');
const kind = self.literal[self.i + 1];
std.debug.assert(kind == 'u' or kind == 'U');
self.i += 2;
if (self.i >= self.literal.len or !std.ascii.isHex(self.literal[self.i])) {
self.err(.missing_hex_escape, .{ .ascii = @intCast(kind) });
return null;
}
const expected_len: usize = if (kind == 'u') 4 else 8;
var overflowed = false;
var count: usize = 0;
var val: u32 = 0;
for (self.literal[self.i..], 0..) |c, i| {
if (i == expected_len) break;
const char = std.fmt.charToDigit(c, 16) catch {
break;
};
val, const overflow = @shlWithOverflow(val, 4);
overflowed = overflowed or overflow != 0;
val |= char;
count += 1;
}
self.i += expected_len;
if (overflowed) {
self.err(.escape_sequence_overflow, .{ .unsigned = start + self.prefixLen() });
return null;
}
if (count != expected_len) {
self.err(.incomplete_universal_character, .{ .none = {} });
return null;
}
if (val > std.math.maxInt(u21) or !std.unicode.utf8ValidCodepoint(@intCast(val))) {
self.err(.invalid_universal_character, .{ .unsigned = start + self.prefixLen() });
return null;
}
if (val > self.max_codepoint) {
self.err(.char_too_large, .{ .none = {} });
return null;
}
if (val < 0xA0 and (val != '$' and val != '@' and val != '`')) {
const is_error = !self.comp.langopts.standard.atLeast(.c2x);
if (val >= 0x20 and val <= 0x7F) {
if (is_error) {
self.err(.ucn_basic_char_error, .{ .ascii = @intCast(val) });
} else {
self.warn(.ucn_basic_char_warning, .{ .ascii = @intCast(val) });
}
} else {
if (is_error) {
self.err(.ucn_control_char_error, .{ .none = {} });
} else {
self.warn(.ucn_control_char_warning, .{ .none = {} });
}
}
}
self.warn(.c89_ucn_in_literal, .{ .none = {} });
return .{ .codepoint = @intCast(val) };
}
fn parseEscapedChar(self: *Parser) Item {
self.i += 1;
const c = self.literal[self.i];
defer if (c != 'x' and (c < '0' or c > '7')) {
self.i += 1;
};
switch (c) {
'\n' => unreachable, // removed by line splicing
'\r' => unreachable, // removed by line splicing
'\'', '\"', '\\', '?' => return .{ .value = c },
'n' => return .{ .value = '\n' },
'r' => return .{ .value = '\r' },
't' => return .{ .value = '\t' },
'a' => return .{ .value = 0x07 },
'b' => return .{ .value = 0x08 },
'e', 'E' => {
self.warn(.non_standard_escape_char, .{ .invalid_escape = .{ .char = c, .offset = @intCast(self.i) } });
return .{ .value = 0x1B };
},
'(', '{', '[', '%' => {
self.warn(.non_standard_escape_char, .{ .invalid_escape = .{ .char = c, .offset = @intCast(self.i) } });
return .{ .value = c };
},
'f' => return .{ .value = 0x0C },
'v' => return .{ .value = 0x0B },
'x' => return .{ .value = self.parseNumberEscape(.hex) },
'0'...'7' => return .{ .value = self.parseNumberEscape(.octal) },
'u', 'U' => unreachable, // handled by parseUnicodeEscape
else => {
self.warn(.unknown_escape_sequence, .{ .invalid_escape = .{ .char = c, .offset = @intCast(self.i) } });
return .{ .value = c };
},
}
}
fn parseNumberEscape(self: *Parser, base: EscapeBase) u32 {
var val: u32 = 0;
var count: usize = 0;
var overflowed = false;
const start = self.i;
defer self.i += count;
const slice = switch (base) {
.octal => self.literal[self.i..@min(self.literal.len, self.i + 3)], // max 3 chars
.hex => blk: {
self.i += 1;
break :blk self.literal[self.i..]; // skip over 'x'; could have an arbitrary number of chars
},
};
for (slice) |c| {
const char = std.fmt.charToDigit(c, @intFromEnum(base)) catch break;
val, const overflow = @shlWithOverflow(val, base.log2());
if (overflow != 0) overflowed = true;
val += char;
count += 1;
}
if (overflowed or val > self.kind.maxInt(self.comp)) {
self.err(.escape_sequence_overflow, .{ .unsigned = start + self.prefixLen() });
return 0;
}
if (count == 0) {
std.debug.assert(base == .hex);
self.err(.missing_hex_escape, .{ .ascii = 'x' });
}
return val;
}
};
const EscapeBase = enum(u8) {
octal = 8,
hex = 16,
fn log2(base: EscapeBase) u4 {
return switch (base) {
.octal => 3,
.hex => 4,
};
}
};

121
deps/aro/Tokenizer.zig vendored
View File

@ -30,6 +30,10 @@ pub const Token = struct {
string_literal_utf_32,
string_literal_wide,
/// Any string literal with an embedded newline or EOF
/// Always a parser error; by default just a warning from preprocessor
unterminated_string_literal,
// <foobar> only generated by preprocessor
macro_string,
@ -40,6 +44,17 @@ pub const Token = struct {
char_literal_utf_32,
char_literal_wide,
/// Any character literal with nothing inside the quotes
/// Always a parser error; by default just a warning from preprocessor
empty_char_literal,
/// Any character literal with an embedded newline or EOF
/// Always a parser error; by default just a warning from preprocessor
unterminated_char_literal,
/// `/* */` style comment without a closing `*/` before EOF
unterminated_comment,
/// Integer literal tokens generated by preprocessor.
one,
zero,
@ -470,6 +485,7 @@ pub const Token = struct {
return switch (id) {
.include_start,
.include_resume,
.unterminated_comment, // Fatal error; parsing should not be attempted
=> unreachable,
.invalid,
@ -480,6 +496,9 @@ pub const Token = struct {
.string_literal_utf_8,
.string_literal_utf_32,
.string_literal_wide,
.unterminated_string_literal,
.unterminated_char_literal,
.empty_char_literal,
.char_literal,
.char_literal_utf_8,
.char_literal_utf_16,
@ -984,8 +1003,6 @@ index: u32 = 0,
source: Source.Id,
comp: *const Compilation,
line: u32 = 1,
/// Used to parse include strings with Windows style paths.
path_escapes: bool = false,
pub fn next(self: *Tokenizer) Token {
var state: enum {
@ -996,14 +1013,10 @@ pub fn next(self: *Tokenizer) Token {
U,
L,
string_literal,
path_escape,
char_literal_start,
char_literal,
char_escape_sequence,
escape_sequence,
octal_escape,
hex_escape,
unicode_escape,
string_escape_sequence,
identifier,
extended_identifier,
equal,
@ -1038,8 +1051,6 @@ pub fn next(self: *Tokenizer) Token {
var start = self.index;
var id: Token.Id = .eof;
var return_state = state;
var counter: u32 = 0;
while (self.index < self.buf.len) : (self.index += 1) {
const c = self.buf[self.index];
switch (state) {
@ -1219,29 +1230,30 @@ pub fn next(self: *Tokenizer) Token {
},
.string_literal => switch (c) {
'\\' => {
return_state = .string_literal;
state = if (self.path_escapes) .path_escape else .escape_sequence;
state = .string_escape_sequence;
},
'"' => {
self.index += 1;
break;
},
'\n' => {
id = .invalid;
id = .unterminated_string_literal;
break;
},
'\r' => unreachable,
else => {},
},
.path_escape => {
state = .string_literal;
},
.char_literal_start => switch (c) {
'\\' => {
state = .char_escape_sequence;
},
'\'', '\n' => {
id = .invalid;
'\'' => {
id = .empty_char_literal;
self.index += 1;
break;
},
'\n' => {
id = .unterminated_char_literal;
break;
},
else => {
@ -1257,7 +1269,7 @@ pub fn next(self: *Tokenizer) Token {
break;
},
'\n' => {
id = .invalid;
id = .unterminated_char_literal;
break;
},
else => {},
@ -1266,55 +1278,9 @@ pub fn next(self: *Tokenizer) Token {
'\r', '\n' => unreachable, // removed by line splicing
else => state = .char_literal,
},
.escape_sequence => switch (c) {
'\'', '"', '?', '\\', 'a', 'b', 'e', 'f', 'n', 'r', 't', 'v' => {
state = return_state;
},
.string_escape_sequence => switch (c) {
'\r', '\n' => unreachable, // removed by line splicing
'0'...'7' => {
counter = 1;
state = .octal_escape;
},
'x' => state = .hex_escape,
'u' => {
counter = 4;
state = .unicode_escape;
},
'U' => {
counter = 8;
state = .unicode_escape;
},
else => {
id = .invalid;
break;
},
},
.octal_escape => switch (c) {
'0'...'7' => {
counter += 1;
if (counter == 3) state = return_state;
},
else => {
self.index -= 1;
state = return_state;
},
},
.hex_escape => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {},
else => {
self.index -= 1;
state = return_state;
},
},
.unicode_escape => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {
counter -= 1;
if (counter == 0) state = return_state;
},
else => {
id = .invalid;
break;
},
else => state = .string_literal,
},
.identifier, .extended_identifier => switch (c) {
'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
@ -1732,19 +1698,18 @@ pub fn next(self: *Tokenizer) Token {
.start, .line_comment => {},
.u, .u8, .U, .L, .identifier => id = Token.getTokenId(self.comp, self.buf[start..self.index]),
.extended_identifier => id = .extended_identifier,
.period2,
.string_literal,
.path_escape,
.char_literal_start,
.char_literal,
.escape_sequence,
.char_escape_sequence,
.octal_escape,
.hex_escape,
.unicode_escape,
.period2 => {
self.index -= 1;
id = .period;
},
.multi_line_comment,
.multi_line_comment_asterisk,
=> id = .invalid,
=> id = .unterminated_comment,
.char_escape_sequence, .char_literal, .char_literal_start => id = .unterminated_char_literal,
.string_escape_sequence, .string_literal => id = .unterminated_string_literal,
.whitespace => id = .whitespace,
.multi_line_comment_done => id = .whitespace,
@ -2114,7 +2079,7 @@ test "extended identifiers" {
try expectTokens("0x0\u{E0000}", &.{ .pp_num, .extended_identifier });
try expectTokens("\"\\0\u{E0000}\"", &.{.string_literal});
try expectTokens("\"\\x\u{E0000}\"", &.{.string_literal});
try expectTokens("\"\\u\u{E0000}\"", &.{ .invalid, .extended_identifier, .invalid });
try expectTokens("\"\\u\u{E0000}\"", &.{.string_literal});
try expectTokens("1e\u{E0000}", &.{ .pp_num, .extended_identifier });
try expectTokens("1e1\u{E0000}", &.{ .pp_num, .extended_identifier });
}

21
deps/aro/Tree.zig vendored
View File

@ -6,7 +6,6 @@ const Source = @import("Source.zig");
const Attribute = @import("Attribute.zig");
const Value = @import("Value.zig");
const StringInterner = @import("StringInterner.zig");
const BuiltinFunction = @import("builtins/BuiltinFunction.zig");
const Tree = @This();
@ -657,17 +656,6 @@ pub fn isLvalExtra(nodes: Node.List.Slice, extra: []const NodeIndex, value_map:
}
}
pub fn dumpStr(retained_strings: []const u8, range: Value.ByteRange, tag: Tag, writer: anytype) !void {
switch (tag) {
.string_literal_expr => {
const lit_range = range.trim(1); // remove null-terminator
const str = lit_range.slice(retained_strings);
try writer.print("\"{}\"", .{std.zig.fmtEscapes(str)});
},
else => unreachable,
}
}
pub fn tokSlice(tree: Tree, tok_i: TokenIndex) []const u8 {
if (tree.tokens.items(.id)[tok_i].lexeme()) |some| return some;
const loc = tree.tokens.items(.loc)[tok_i];
@ -703,12 +691,13 @@ fn dumpAttribute(attr: Attribute, strings: []const u8, writer: anytype) !void {
switch (attr.tag) {
inline else => |tag| {
const args = @field(attr.args, @tagName(tag));
if (@TypeOf(args) == void) {
const fields = @typeInfo(@TypeOf(args)).Struct.fields;
if (fields.len == 0) {
try writer.writeByte('\n');
return;
}
try writer.writeByte(' ');
inline for (@typeInfo(@TypeOf(args)).Struct.fields, 0..) |f, i| {
inline for (fields, 0..) |f, i| {
if (comptime std.mem.eql(u8, f.name, "__name_tok")) continue;
if (i != 0) {
try writer.writeAll(", ");
@ -716,8 +705,8 @@ fn dumpAttribute(attr: Attribute, strings: []const u8, writer: anytype) !void {
try writer.writeAll(f.name);
try writer.writeAll(": ");
switch (f.type) {
Value.ByteRange => try writer.print("\"{s}\"", .{@field(args, f.name).slice(strings)}),
?Value.ByteRange => try writer.print("\"{?s}\"", .{if (@field(args, f.name)) |range| range.slice(strings) else null}),
Value.ByteRange => try writer.print("\"{s}\"", .{@field(args, f.name).slice(strings, .@"1")}),
?Value.ByteRange => try writer.print("\"{?s}\"", .{if (@field(args, f.name)) |range| range.slice(strings, .@"1") else null}),
else => switch (@typeInfo(f.type)) {
.Enum => try writer.writeAll(@tagName(@field(args, f.name))),
else => try writer.print("{any}", .{@field(args, f.name)}),

97
deps/aro/Type.zig vendored
View File

@ -9,7 +9,6 @@ const StringInterner = @import("StringInterner.zig");
const StringId = StringInterner.StringId;
const target_util = @import("target.zig");
const LangOpts = @import("LangOpts.zig");
const BuiltinFunction = @import("builtins/BuiltinFunction.zig");
const Type = @This();
@ -104,6 +103,35 @@ pub const Func = struct {
name: StringId,
name_tok: TokenIndex,
};
fn eql(a: *const Func, b: *const Func, a_var_args: bool, b_var_args: bool, comp: *const Compilation) bool {
// return type cannot have qualifiers
if (!a.return_type.eql(b.return_type, comp, false)) return false;
if (a.params.len != b.params.len) {
const a_no_proto = a_var_args and a.params.len == 0 and !comp.langopts.standard.atLeast(.c2x);
const b_no_proto = b_var_args and b.params.len == 0 and !comp.langopts.standard.atLeast(.c2x);
if (a_no_proto or b_no_proto) {
const maybe_has_params = if (a_no_proto) b else a;
for (maybe_has_params.params) |param| {
if (param.ty.undergoesDefaultArgPromotion(comp)) return false;
}
return true;
}
}
if (a_var_args != b_var_args) return false;
// TODO validate this
for (a.params, b.params) |param, b_qual| {
var a_unqual = param.ty;
a_unqual.qual.@"const" = false;
a_unqual.qual.@"volatile" = false;
var b_unqual = b_qual.ty;
b_unqual.qual.@"const" = false;
b_unqual.qual.@"volatile" = false;
if (!a_unqual.eql(b_unqual, comp, true)) return false;
}
return true;
}
};
pub const Array = struct {
@ -448,6 +476,22 @@ pub fn isArray(ty: Type) bool {
};
}
/// Whether the type is promoted if used as a variadic argument or as an argument to a function with no prototype
fn undergoesDefaultArgPromotion(ty: Type, comp: *const Compilation) bool {
return switch (ty.specifier) {
.bool => true,
.char, .uchar, .schar => true,
.short, .ushort => true,
.@"enum" => if (comp.langopts.emulate == .clang) ty.data.@"enum".isIncomplete() else false,
.float => true,
.typeof_type => ty.data.sub_type.undergoesDefaultArgPromotion(comp),
.typeof_expr => ty.data.expr.ty.undergoesDefaultArgPromotion(comp),
.attributed => ty.data.attributed.base.undergoesDefaultArgPromotion(comp),
else => false,
};
}
pub fn isScalar(ty: Type) bool {
return ty.isInt() or ty.isScalarNonInt();
}
@ -1195,31 +1239,6 @@ pub fn annotationAlignment(comp: *const Compilation, attrs: ?[]const Attribute)
return max_requested;
}
/// Checks type compatibility for __builtin_types_compatible_p
/// Returns true if the unqualified version of `a_param` and `b_param` are the same
/// Ignores top-level qualifiers (e.g. `int` and `const int` are compatible) but `int *` and `const int *` are not
/// Two types that are typedefed are considered compatible if their underlying types are compatible.
/// An enum type is not considered to be compatible with another enum type even if both are compatible with the same integer type;
/// `A[]` and `A[N]` for a type `A` and integer `N` are compatible
pub fn compatible(a_param: Type, b_param: Type, comp: *const Compilation) bool {
var a_unqual = a_param.canonicalize(.standard);
a_unqual.qual.@"const" = false;
a_unqual.qual.@"volatile" = false;
var b_unqual = b_param.canonicalize(.standard);
b_unqual.qual.@"const" = false;
b_unqual.qual.@"volatile" = false;
if (a_unqual.eql(b_unqual, comp, true)) return true;
if (!a_unqual.isArray() or !b_unqual.isArray()) return false;
if (a_unqual.arrayLen() == null or b_unqual.arrayLen() == null) {
// incomplete arrays are compatible with arrays of the same element type
// GCC and clang ignore cv-qualifiers on arrays
return a_unqual.elemType().compatible(b_unqual.elemType(), comp);
}
return false;
}
pub fn eql(a_param: Type, b_param: Type, comp: *const Compilation, check_qualifiers: bool) bool {
const a = a_param.canonicalize(.standard);
const b = b_param.canonicalize(.standard);
@ -1252,29 +1271,21 @@ pub fn eql(a_param: Type, b_param: Type, comp: *const Compilation, check_qualifi
.func,
.var_args_func,
.old_style_func,
=> {
// TODO validate this
if (a.data.func.params.len != b.data.func.params.len) return false;
// return type cannot have qualifiers
if (!a.returnType().eql(b.returnType(), comp, false)) return false;
for (a.data.func.params, b.data.func.params) |param, b_qual| {
var a_unqual = param.ty;
a_unqual.qual.@"const" = false;
a_unqual.qual.@"volatile" = false;
var b_unqual = b_qual.ty;
b_unqual.qual.@"const" = false;
b_unqual.qual.@"volatile" = false;
if (!a_unqual.eql(b_unqual, comp, check_qualifiers)) return false;
}
},
=> if (!a.data.func.eql(b.data.func, a.specifier == .var_args_func, b.specifier == .var_args_func, comp)) return false,
.array,
.static_array,
.incomplete_array,
.vector,
=> {
if (!std.meta.eql(a.arrayLen(), b.arrayLen())) return false;
if (!a.elemType().eql(b.elemType(), comp, check_qualifiers)) return false;
const a_len = a.arrayLen();
const b_len = b.arrayLen();
if (a_len == null or b_len == null) {
// At least one array is incomplete; only check child type for equality
} else if (a_len.? != b_len.?) {
return false;
}
if (!a.elemType().eql(b.elemType(), comp, false)) return false;
},
.variable_len_array => if (!a.elemType().eql(b.elemType(), comp, check_qualifiers)) return false,

38
deps/aro/Value.zig vendored
View File

@ -18,8 +18,40 @@ pub const ByteRange = struct {
return .{ .start = self.start, .end = self.end - amount };
}
pub fn slice(self: ByteRange, all_bytes: []const u8) []const u8 {
return all_bytes[self.start..self.end];
pub fn slice(self: ByteRange, all_bytes: []const u8, comptime size: Compilation.CharUnitSize) []const size.Type() {
switch (size) {
inline else => |sz| {
const aligned: []align(@alignOf(sz.Type())) const u8 = @alignCast(all_bytes[self.start..self.end]);
return std.mem.bytesAsSlice(sz.Type(), aligned);
},
}
}
pub fn dumpString(range: ByteRange, ty: Type, comp: *const Compilation, strings: []const u8, w: anytype) !void {
const size: Compilation.CharUnitSize = @enumFromInt(ty.elemType().sizeof(comp).?);
const without_null = range.trim(@intFromEnum(size));
switch (size) {
inline .@"1", .@"2" => |sz| {
const data_slice = without_null.slice(strings, sz);
const formatter = if (sz == .@"1") std.zig.fmtEscapes(data_slice) else std.unicode.fmtUtf16le(data_slice);
try w.print("\"{}\"", .{formatter});
},
.@"4" => {
try w.writeByte('"');
const data_slice = without_null.slice(strings, .@"4");
var buf: [4]u8 = undefined;
for (data_slice) |item| {
if (item <= std.math.maxInt(u21) and std.unicode.utf8ValidCodepoint(@intCast(item))) {
const codepoint: u21 = @intCast(item);
const written = std.unicode.utf8Encode(codepoint, &buf) catch unreachable;
try w.print("{s}", .{buf[0..written]});
} else {
try w.print("\\x{x}", .{item});
}
}
try w.writeByte('"');
},
}
}
};
@ -593,7 +625,7 @@ pub fn dump(v: Value, ty: Type, comp: *Compilation, strings: []const u8, w: anyt
} else {
try w.print("{d}", .{v.signExtend(ty, comp)});
},
.bytes => try w.print("\"{s}\"", .{v.data.bytes.slice(strings)}),
.bytes => try v.data.bytes.dumpString(ty, comp, strings, w),
// std.fmt does @as instead of @floatCast
.float => try w.print("{d}", .{@as(f64, @floatCast(v.data.float))}),
else => try w.print("({s})", .{@tagName(v.tag)}),

641
deps/aro/build/GenerateDef.zig vendored Normal file
View File

@ -0,0 +1,641 @@
const std = @import("std");
const GenerateDef = @This();
const Step = std.Build.Step;
const Allocator = std.mem.Allocator;
const GeneratedFile = std.Build.GeneratedFile;
step: Step,
path: []const u8,
generated_file: GeneratedFile,
pub const base_id: Step.Id = .custom;
pub fn add(
owner: *std.Build,
def_file_path: []const u8,
import_path: []const u8,
compile_step: *Step.Compile,
aro_module: *std.Build.Module,
) void {
const self = owner.allocator.create(GenerateDef) catch @panic("OOM");
const name = owner.fmt("GenerateDef {s}", .{def_file_path});
self.* = .{
.step = Step.init(.{
.id = base_id,
.name = name,
.owner = owner,
.makeFn = make,
}),
.path = def_file_path,
.generated_file = .{ .step = &self.step },
};
const module = owner.createModule(.{
.source_file = .{ .generated = &self.generated_file },
});
compile_step.addModule(import_path, module);
compile_step.step.dependOn(&self.step);
aro_module.dependencies.put(import_path, module) catch @panic("OOM");
}
fn make(step: *Step, prog_node: *std.Progress.Node) !void {
_ = prog_node;
const b = step.owner;
const self = @fieldParentPtr(GenerateDef, "step", step);
const arena = b.allocator;
var man = b.cache.obtain();
defer man.deinit();
// Random bytes to make GenerateDef unique. Refresh this with new
// random bytes when GenerateDef implementation is modified in a
// non-backwards-compatible way.
man.hash.add(@as(u32, 0xDCC14144));
const contents = try b.build_root.handle.readFileAlloc(arena, self.path, std.math.maxInt(u32));
man.hash.addBytes(contents);
const out_name = b.fmt("{s}.zig", .{std.fs.path.stem(self.path)});
if (try step.cacheHit(&man)) {
const digest = man.final();
self.generated_file.path = try b.cache_root.join(arena, &.{
"o", &digest, out_name,
});
return;
}
const digest = man.final();
const sub_path = try std.fs.path.join(arena, &.{ "o", &digest, out_name });
const sub_path_dirname = std.fs.path.dirname(sub_path).?;
b.cache_root.handle.makePath(sub_path_dirname) catch |err| {
return step.fail("unable to make path '{}{s}': {s}", .{
b.cache_root, sub_path_dirname, @errorName(err),
});
};
const output = try self.generate(contents);
b.cache_root.handle.writeFile(sub_path, output) catch |err| {
return step.fail("unable to write file '{}{s}': {s}", .{
b.cache_root, sub_path, @errorName(err),
});
};
self.generated_file.path = try b.cache_root.join(arena, &.{sub_path});
try man.writeManifest();
}
const Value = struct {
name: []const u8,
properties: []const []const u8,
};
fn generate(self: *GenerateDef, input: []const u8) ![]const u8 {
const arena = self.step.owner.allocator;
var values = std.StringArrayHashMap([]const []const u8).init(arena);
defer values.deinit();
var properties = std.ArrayList([]const u8).init(arena);
defer properties.deinit();
var headers = std.ArrayList([]const u8).init(arena);
defer headers.deinit();
var value_name: ?[]const u8 = null;
var it = std.mem.tokenizeAny(u8, input, "\r\n");
while (it.next()) |line_untrimmed| {
const line = std.mem.trim(u8, line_untrimmed, " \t");
if (line.len == 0 or line[0] == '#') continue;
if (std.mem.startsWith(u8, line, "const ") or std.mem.startsWith(u8, line, "pub const ")) {
try headers.append(line);
continue;
}
if (line[0] == '.') {
if (value_name == null) {
return self.step.fail("property not attached to a value:\n\"{s}\"", .{line});
}
try properties.append(line);
continue;
}
if (value_name) |name| {
const old = try values.fetchPut(name, try properties.toOwnedSlice());
if (old != null) return self.step.fail("duplicate value \"{s}\"", .{name});
}
value_name = line;
}
if (value_name) |name| {
const old = try values.fetchPut(name, try properties.toOwnedSlice());
if (old != null) return self.step.fail("duplicate value \"{s}\"", .{name});
}
{
var sorted_list = try arena.dupe([]const u8, values.keys());
defer arena.free(sorted_list);
std.mem.sort([]const u8, sorted_list, {}, struct {
pub fn lessThan(_: void, a: []const u8, b: []const u8) bool {
return std.mem.lessThan(u8, a, b);
}
}.lessThan);
var longest_name: usize = 0;
var shortest_name: usize = std.math.maxInt(usize);
var builder = try DafsaBuilder.init(arena);
defer builder.deinit();
for (sorted_list) |name| {
try builder.insert(name);
longest_name = @max(name.len, longest_name);
shortest_name = @min(name.len, shortest_name);
}
try builder.finish();
builder.calcNumbers();
// As a sanity check, confirm that the minimal perfect hashing doesn't
// have any collisions
{
var index_set = std.AutoHashMap(usize, void).init(arena);
defer index_set.deinit();
for (values.keys()) |name| {
const index = builder.getUniqueIndex(name).?;
const result = try index_set.getOrPut(index);
if (result.found_existing) {
return self.step.fail("clobbered {}, name={s}\n", .{ index, name });
}
}
}
var values_array = try arena.alloc(Value, values.count());
defer arena.free(values_array);
for (values.keys(), values.values()) |name, props| {
const unique_index = builder.getUniqueIndex(name).?;
const data_index = unique_index - 1;
values_array[data_index] = .{ .name = name, .properties = props };
}
var out_buf = std.ArrayList(u8).init(arena);
defer out_buf.deinit();
const writer = out_buf.writer();
try writer.print(
\\//! Autogenerated by GenerateDef from {s}, do not edit
\\
\\const std = @import("std");
\\
\\pub fn with(comptime Properties: type) type {{
\\return struct {{
\\
, .{self.path});
for (headers.items) |line| {
try writer.print("{s}\n", .{line});
}
try writer.writeAll(
\\
\\tag: Tag,
\\properties: Properties,
\\
\\/// Integer starting at 0 derived from the unique index,
\\/// corresponds with the data array index.
\\pub const Tag = enum(u16) { _ };
\\
\\const Self = @This();
\\
\\pub fn fromName(name: []const u8) ?@This() {
\\ const data_index = tagFromName(name) orelse return null;
\\ return data[@intFromEnum(data_index)];
\\}
\\
\\pub fn tagFromName(name: []const u8) ?Tag {
\\ const unique_index = uniqueIndex(name) orelse return null;
\\ return @enumFromInt(unique_index - 1);
\\}
\\
\\pub fn fromTag(tag: Tag) @This() {
\\ return data[@intFromEnum(tag)];
\\}
\\
\\pub fn nameFromTagIntoBuf(tag: Tag, name_buf: []u8) []u8 {
\\ std.debug.assert(name_buf.len >= longest_name);
\\ const unique_index = @intFromEnum(tag) + 1;
\\ return nameFromUniqueIndex(unique_index, name_buf);
\\}
\\
\\pub fn nameFromTag(tag: Tag) NameBuf {
\\ var name_buf: NameBuf = undefined;
\\ const unique_index = @intFromEnum(tag) + 1;
\\ const name = nameFromUniqueIndex(unique_index, &name_buf.buf);
\\ name_buf.len = @intCast(name.len);
\\ return name_buf;
\\}
\\
\\pub const NameBuf = struct {
\\ buf: [longest_name]u8 = undefined,
\\ len: std.math.IntFittingRange(0, longest_name),
\\
\\ pub fn span(self: *const NameBuf) []const u8 {
\\ return self.buf[0..self.len];
\\ }
\\};
\\
\\pub fn exists(name: []const u8) bool {
\\ if (name.len < shortest_name or name.len > longest_name) return false;
\\
\\ var index: u16 = 0;
\\ for (name) |c| {
\\ index = findInList(dafsa[index].child_index, c) orelse return false;
\\ }
\\ return dafsa[index].end_of_word;
\\}
\\
\\
);
try writer.print("pub const shortest_name = {};\n", .{shortest_name});
try writer.print("pub const longest_name = {};\n\n", .{longest_name});
try writer.writeAll(
\\/// Search siblings of `first_child_index` for the `char`
\\/// If found, returns the index of the node within the `dafsa` array.
\\/// Otherwise, returns `null`.
\\pub fn findInList(first_child_index: u16, char: u8) ?u16 {
\\ var index = first_child_index;
\\ while (true) {
\\ if (dafsa[index].char == char) return index;
\\ if (dafsa[index].end_of_list) return null;
\\ index += 1;
\\ }
\\ unreachable;
\\}
\\
\\/// Returns a unique (minimal perfect hash) index (starting at 1) for the `name`,
\\/// or null if the name was not found.
\\pub fn uniqueIndex(name: []const u8) ?u16 {
\\ if (name.len < shortest_name or name.len > longest_name) return null;
\\
\\ var index: u16 = 0;
\\ var node_index: u16 = 0;
\\
\\ for (name) |c| {
\\ const child_index = findInList(dafsa[node_index].child_index, c) orelse return null;
\\ var sibling_index = dafsa[node_index].child_index;
\\ while (true) {
\\ const sibling_c = dafsa[sibling_index].char;
\\ std.debug.assert(sibling_c != 0);
\\ if (sibling_c < c) {
\\ index += dafsa[sibling_index].number;
\\ }
\\ if (dafsa[sibling_index].end_of_list) break;
\\ sibling_index += 1;
\\ }
\\ node_index = child_index;
\\ if (dafsa[node_index].end_of_word) index += 1;
\\ }
\\
\\ if (!dafsa[node_index].end_of_word) return null;
\\
\\ return index;
\\}
\\
\\/// Returns a slice of `buf` with the name associated with the given `index`.
\\/// This function should only be called with an `index` that
\\/// is already known to exist within the `dafsa`, e.g. an index
\\/// returned from `uniqueIndex`.
\\pub fn nameFromUniqueIndex(index: u16, buf: []u8) []u8 {
\\ std.debug.assert(index >= 1 and index <= data.len);
\\
\\ var node_index: u16 = 0;
\\ var count: u16 = index;
\\ var fbs = std.io.fixedBufferStream(buf);
\\ const w = fbs.writer();
\\
\\ while (true) {
\\ var sibling_index = dafsa[node_index].child_index;
\\ while (true) {
\\ if (dafsa[sibling_index].number > 0 and dafsa[sibling_index].number < count) {
\\ count -= dafsa[sibling_index].number;
\\ } else {
\\ w.writeByte(dafsa[sibling_index].char) catch unreachable;
\\ node_index = sibling_index;
\\ if (dafsa[node_index].end_of_word) {
\\ count -= 1;
\\ }
\\ break;
\\ }
\\
\\ if (dafsa[sibling_index].end_of_list) break;
\\ sibling_index += 1;
\\ }
\\ if (count == 0) break;
\\ }
\\
\\ return fbs.getWritten();
\\}
\\
\\
);
try writer.writeAll(
\\/// We're 1 bit shy of being able to fit this in a u32:
\\/// - char only contains 0-9, a-z, A-Z, and _, so it could use a enum(u6) with a way to convert <-> u8
\\/// (note: this would have a performance cost that may make the u32 not worth it)
\\/// - number has a max value of > 2047 and < 4095 (the first _ node has the largest number),
\\/// so it could fit into a u12
\\/// - child_index currently has a max of > 4095 and < 8191, so it could fit into a u13
\\///
\\/// with the end_of_word/end_of_list 2 bools, that makes 33 bits total
\\const Node = packed struct(u64) {
\\ char: u8,
\\ /// Nodes are numbered with "an integer which gives the number of words that
\\ /// would be accepted by the automaton starting from that state." This numbering
\\ /// allows calculating "a one-to-one correspondence between the integers 1 to L
\\ /// (L is the number of words accepted by the automaton) and the words themselves."
\\ ///
\\ /// Essentially, this allows us to have a minimal perfect hashing scheme such that
\\ /// it's possible to store & lookup the properties of each builtin using a separate array.
\\ number: u16,
\\ /// If true, this node is the end of a valid builtin.
\\ /// Note: This does not necessarily mean that this node does not have child nodes.
\\ end_of_word: bool,
\\ /// If true, this node is the end of a sibling list.
\\ /// If false, then (index + 1) will contain the next sibling.
\\ end_of_list: bool,
\\ /// Padding bits to get to u64, unsure if there's some way to use these to improve something.
\\ _extra: u22 = 0,
\\ /// Index of the first child of this node.
\\ child_index: u16,
\\};
\\
\\
);
try builder.writeDafsa(writer);
try writeData(writer, values_array);
try writer.writeAll(
\\};
\\}
\\
);
return out_buf.toOwnedSlice();
}
}
fn writeData(writer: anytype, values: []const Value) !void {
try writer.writeAll("pub const data = blk: {\n");
try writer.print(" @setEvalBranchQuota({});\n", .{values.len});
try writer.writeAll(" break :blk [_]@This(){\n");
for (values, 0..) |value, i| {
try writer.print(" // {s}\n", .{value.name});
try writer.print(" .{{ .tag = @enumFromInt({}), .properties = .{{", .{i});
for (value.properties, 0..) |property, j| {
if (j != 0) try writer.writeByte(',');
try writer.writeByte(' ');
try writer.writeAll(property);
}
if (value.properties.len != 0) try writer.writeByte(' ');
try writer.writeAll("} },\n");
}
try writer.writeAll(" };\n");
try writer.writeAll("};\n");
}
const DafsaBuilder = struct {
root: *Node,
arena: std.heap.ArenaAllocator.State,
allocator: Allocator,
unchecked_nodes: std.ArrayListUnmanaged(UncheckedNode),
minimized_nodes: std.HashMapUnmanaged(*Node, *Node, Node.DuplicateContext, std.hash_map.default_max_load_percentage),
previous_word_buf: [128]u8 = undefined,
previous_word: []u8 = &[_]u8{},
const UncheckedNode = struct {
parent: *Node,
char: u8,
child: *Node,
};
pub fn init(allocator: Allocator) !DafsaBuilder {
var arena = std.heap.ArenaAllocator.init(allocator);
errdefer arena.deinit();
var root = try arena.allocator().create(Node);
root.* = .{};
return DafsaBuilder{
.root = root,
.allocator = allocator,
.arena = arena.state,
.unchecked_nodes = .{},
.minimized_nodes = .{},
};
}
pub fn deinit(self: *DafsaBuilder) void {
self.arena.promote(self.allocator).deinit();
self.unchecked_nodes.deinit(self.allocator);
self.minimized_nodes.deinit(self.allocator);
self.* = undefined;
}
const Node = struct {
children: [256]?*Node = [_]?*Node{null} ** 256,
is_terminal: bool = false,
number: usize = 0,
const DuplicateContext = struct {
pub fn hash(ctx: @This(), key: *Node) u64 {
_ = ctx;
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHash(&hasher, key.children);
std.hash.autoHash(&hasher, key.is_terminal);
return hasher.final();
}
pub fn eql(ctx: @This(), a: *Node, b: *Node) bool {
_ = ctx;
return a.is_terminal == b.is_terminal and std.mem.eql(?*Node, &a.children, &b.children);
}
};
pub fn calcNumbers(self: *Node) void {
self.number = @intFromBool(self.is_terminal);
for (self.children) |maybe_child| {
const child = maybe_child orelse continue;
// A node's number is the sum of the
// numbers of its immediate child nodes.
child.calcNumbers();
self.number += child.number;
}
}
pub fn numDirectChildren(self: *const Node) u8 {
var num: u8 = 0;
for (self.children) |child| {
if (child != null) num += 1;
}
return num;
}
};
pub fn insert(self: *DafsaBuilder, str: []const u8) !void {
if (std.mem.order(u8, str, self.previous_word) == .lt) {
@panic("insertion order must be sorted");
}
var common_prefix_len: usize = 0;
for (0..@min(str.len, self.previous_word.len)) |i| {
if (str[i] != self.previous_word[i]) break;
common_prefix_len += 1;
}
try self.minimize(common_prefix_len);
var node = if (self.unchecked_nodes.items.len == 0)
self.root
else
self.unchecked_nodes.getLast().child;
for (str[common_prefix_len..]) |c| {
std.debug.assert(node.children[c] == null);
var arena = self.arena.promote(self.allocator);
var child = try arena.allocator().create(Node);
self.arena = arena.state;
child.* = .{};
node.children[c] = child;
try self.unchecked_nodes.append(self.allocator, .{
.parent = node,
.char = c,
.child = child,
});
node = node.children[c].?;
}
node.is_terminal = true;
self.previous_word = self.previous_word_buf[0..str.len];
@memcpy(self.previous_word, str);
}
pub fn minimize(self: *DafsaBuilder, down_to: usize) !void {
if (self.unchecked_nodes.items.len == 0) return;
while (self.unchecked_nodes.items.len > down_to) {
const unchecked_node = self.unchecked_nodes.pop();
if (self.minimized_nodes.getPtr(unchecked_node.child)) |child| {
unchecked_node.parent.children[unchecked_node.char] = child.*;
} else {
try self.minimized_nodes.put(self.allocator, unchecked_node.child, unchecked_node.child);
}
}
}
pub fn finish(self: *DafsaBuilder) !void {
try self.minimize(0);
}
fn nodeCount(self: *const DafsaBuilder) usize {
return self.minimized_nodes.count();
}
fn edgeCount(self: *const DafsaBuilder) usize {
var count: usize = 0;
var it = self.minimized_nodes.iterator();
while (it.next()) |entry| {
for (entry.key_ptr.*.children) |child| {
if (child != null) count += 1;
}
}
return count;
}
fn contains(self: *const DafsaBuilder, str: []const u8) bool {
var node = self.root;
for (str) |c| {
node = node.children[c] orelse return false;
}
return node.is_terminal;
}
fn calcNumbers(self: *const DafsaBuilder) void {
self.root.calcNumbers();
}
fn getUniqueIndex(self: *const DafsaBuilder, str: []const u8) ?usize {
var index: usize = 0;
var node = self.root;
for (str) |c| {
const child = node.children[c] orelse return null;
for (node.children, 0..) |sibling, sibling_c| {
if (sibling == null) continue;
if (sibling_c < c) {
index += sibling.?.number;
}
}
node = child;
if (node.is_terminal) index += 1;
}
return index;
}
fn writeDafsa(self: *const DafsaBuilder, writer: anytype) !void {
try writer.writeAll("const dafsa = [_]Node{\n");
// write root
try writer.writeAll(" .{ .char = 0, .end_of_word = false, .end_of_list = true, .number = 0, .child_index = 1 },\n");
var queue = std.ArrayList(*Node).init(self.allocator);
defer queue.deinit();
var child_indexes = std.AutoHashMap(*Node, usize).init(self.allocator);
defer child_indexes.deinit();
try child_indexes.ensureTotalCapacity(@intCast(self.edgeCount()));
var first_available_index: usize = self.root.numDirectChildren() + 1;
first_available_index = try writeDafsaChildren(self.root, writer, &queue, &child_indexes, first_available_index);
while (queue.items.len > 0) {
// TODO: something with better time complexity
const node = queue.orderedRemove(0);
first_available_index = try writeDafsaChildren(node, writer, &queue, &child_indexes, first_available_index);
}
try writer.writeAll("};\n");
}
fn writeDafsaChildren(
node: *Node,
writer: anytype,
queue: *std.ArrayList(*Node),
child_indexes: *std.AutoHashMap(*Node, usize),
first_available_index: usize,
) !usize {
var cur_available_index = first_available_index;
const num_children = node.numDirectChildren();
var child_i: usize = 0;
for (node.children, 0..) |maybe_child, c_usize| {
const child = maybe_child orelse continue;
const c: u8 = @intCast(c_usize);
const is_last_child = child_i == num_children - 1;
if (!child_indexes.contains(child)) {
const child_num_children = child.numDirectChildren();
if (child_num_children > 0) {
child_indexes.putAssumeCapacityNoClobber(child, cur_available_index);
cur_available_index += child_num_children;
}
try queue.append(child);
}
try writer.print(
" .{{ .char = '{c}', .end_of_word = {}, .end_of_list = {}, .number = {}, .child_index = {} }},\n",
.{ c, child.is_terminal, is_last_child, child.number, child_indexes.get(child) orelse 0 },
);
child_i += 1;
}
return cur_available_index;
}
};

View File

@ -177,7 +177,7 @@ fn genNode(func: *Fn, node: NodeIndex) Codegen.Error!Value {
.int_literal => return Value{ .immediate = @bitCast(data.int) },
.string_literal_expr => {
const range = func.c.tree.value_map.get(node).?.data.bytes;
const str_bytes = range.slice(func.c.tree.strings);
const str_bytes = range.slice(func.c.tree.strings, .@"1");
const section = try func.c.obj.getSection(.strings);
const start = section.items.len;
try section.appendSlice(str_bytes);

View File

@ -20,7 +20,7 @@ const Symbol = struct {
info: u8,
};
const Relocation = packed struct {
const Relocation = struct {
symbol: *Symbol,
addend: i64,
offset: u48,

5
deps/aro/target.zig vendored
View File

@ -2,7 +2,7 @@ const std = @import("std");
const LangOpts = @import("LangOpts.zig");
const Type = @import("Type.zig");
const llvm = @import("zig").codegen.llvm;
const TargetSet = @import("builtins/Properties.zig").TargetSet;
const TargetSet = @import("Builtins/Properties.zig").TargetSet;
/// intmax_t for this target
pub fn intMaxType(target: std.Target) Type {
@ -349,8 +349,7 @@ pub fn isCygwinMinGW(target: std.Target) bool {
}
pub fn builtinEnabled(target: std.Target, enabled_for: TargetSet) bool {
var copy = enabled_for;
var it = copy.iterator();
var it = enabled_for.iterator();
while (it.next()) |val| {
switch (val) {
.basic => return true,

View File

@ -4049,7 +4049,6 @@ pub fn cImport(comp: *Compilation, c_src: []const u8) !CImportResult {
}
var tree = switch (comp.c_frontend) {
.aro => tree: {
if (builtin.zig_backend == .stage2_c) @panic("the CBE cannot compile Aro yet!");
const translate_c = @import("aro_translate_c.zig");
_ = translate_c;
if (true) @panic("TODO");

View File

@ -4335,7 +4335,6 @@ fn cmdTranslateC(comp: *Compilation, arena: Allocator, fancy_output: ?*Compilati
var tree = switch (comp.c_frontend) {
.aro => tree: {
if (builtin.zig_backend == .stage2_c) @panic("the CBE cannot compile Aro yet!");
const translate_c = @import("aro_translate_c.zig");
var aro_comp = translate_c.Compilation.init(comp.gpa);
defer aro_comp.deinit();

View File

@ -274,6 +274,7 @@ fn add_cc_args(
}
pub fn buildImportLib(comp: *Compilation, lib_name: []const u8) !void {
if (build_options.only_c) @panic("building import libs not included in core functionality");
var arena_allocator = std.heap.ArenaAllocator.init(comp.gpa);
defer arena_allocator.deinit();
const arena = arena_allocator.allocator();
@ -288,10 +289,6 @@ pub fn buildImportLib(comp: *Compilation, lib_name: []const u8) !void {
else => |e| return e,
};
// We need to invoke `zig clang` to use the preprocessor.
if (!build_options.have_llvm) return error.ZigCompilerNotBuiltWithLLVMExtensions;
const self_exe_path = comp.self_exe_path orelse return error.PreprocessorDisabled;
const target = comp.getTarget();
var cache: Cache = .{
@ -337,67 +334,57 @@ pub fn buildImportLib(comp: *Compilation, lib_name: []const u8) !void {
"o", &digest, final_def_basename,
});
const target_def_arg = switch (target.cpu.arch) {
.x86 => "-DDEF_I386",
.x86_64 => "-DDEF_X64",
.arm, .armeb, .thumb, .thumbeb, .aarch64_32 => "-DDEF_ARM32",
.aarch64, .aarch64_be => "-DDEF_ARM64",
const target_defines = switch (target.cpu.arch) {
.x86 => "#define DEF_I386\n",
.x86_64 => "#define DEF_X64\n",
.arm, .armeb, .thumb, .thumbeb, .aarch64_32 => "#define DEF_ARM32\n",
.aarch64, .aarch64_be => "#define DEF_ARM64\n",
else => unreachable,
};
const args = [_][]const u8{
self_exe_path,
"clang",
"-x",
"c",
def_file_path,
"-Wp,-w",
"-undef",
"-P",
"-I",
try comp.zig_lib_directory.join(arena, &[_][]const u8{ "libc", "mingw", "def-include" }),
target_def_arg,
"-E",
"-o",
def_final_path,
};
const aro = @import("aro");
var aro_comp = aro.Compilation.init(comp.gpa);
defer aro_comp.deinit();
if (comp.verbose_cc) {
Compilation.dump_argv(&args);
const include_dir = try comp.zig_lib_directory.join(arena, &[_][]const u8{ "libc", "mingw", "def-include" });
if (comp.verbose_cc) print: {
std.debug.getStderrMutex().lock();
defer std.debug.getStderrMutex().unlock();
const stderr = std.io.getStdErr().writer();
nosuspend stderr.print("def file: {s}\n", .{def_file_path}) catch break :print;
nosuspend stderr.print("include dir: {s}\n", .{include_dir}) catch break :print;
nosuspend stderr.print("output path: {s}\n", .{def_final_path}) catch break :print;
}
if (std.process.can_spawn) {
var child = std.ChildProcess.init(&args, arena);
child.stdin_behavior = .Ignore;
child.stdout_behavior = .Pipe;
child.stderr_behavior = .Pipe;
try aro_comp.include_dirs.append(include_dir);
try child.spawn();
const builtin_macros = try aro_comp.generateBuiltinMacros();
const user_macros = try aro_comp.addSourceFromBuffer("<command line>", target_defines);
const def_file_source = try aro_comp.addSourceFromPath(def_file_path);
const stderr = try child.stderr.?.reader().readAllAlloc(arena, std.math.maxInt(usize));
var pp = aro.Preprocessor.init(&aro_comp);
defer pp.deinit();
pp.linemarkers = .none;
pp.preserve_whitespace = true;
const term = child.wait() catch |err| {
// TODO surface a proper error here
log.err("unable to spawn {s}: {s}", .{ args[0], @errorName(err) });
return error.ClangPreprocessorFailed;
};
switch (term) {
.Exited => |code| {
if (code != 0) {
// TODO surface a proper error here
log.err("clang exited with code {d} and stderr: {s}", .{ code, stderr });
return error.ClangPreprocessorFailed;
}
},
else => {
// TODO surface a proper error here
log.err("clang terminated unexpectedly with stderr: {s}", .{stderr});
return error.ClangPreprocessorFailed;
},
_ = try pp.preprocess(builtin_macros);
_ = try pp.preprocess(user_macros);
const eof = try pp.preprocess(def_file_source);
try pp.tokens.append(pp.comp.gpa, eof);
for (aro_comp.diag.list.items) |diagnostic| {
if (diagnostic.kind == .@"fatal error" or diagnostic.kind == .@"error") {
aro_comp.renderErrors();
return error.AroPreprocessorFailed;
}
} else {
log.err("unable to spawn {s}: spawning child process not supported on {s}", .{ args[0], @tagName(builtin.os.tag) });
return error.ClangPreprocessorFailed;
}
{
// new scope to ensure definition file is written before passing the path to WriteImportLibrary
const def_final_file = try comp.global_cache_directory.handle.createFile(def_final_path, .{ .truncate = true });
defer def_final_file.close();
try pp.prettyPrintTokens(def_final_file.writer());
}
const lib_final_path = try comp.global_cache_directory.join(comp.gpa, &[_][]const u8{
@ -405,6 +392,7 @@ pub fn buildImportLib(comp: *Compilation, lib_name: []const u8) !void {
});
errdefer comp.gpa.free(lib_final_path);
if (!build_options.have_llvm) return error.ZigCompilerNotBuiltWithLLVMExtensions;
const llvm_bindings = @import("codegen/llvm/bindings.zig");
const llvm = @import("codegen/llvm.zig");
const arch_tag = llvm.targetArch(target.cpu.arch);

View File

@ -0,0 +1,33 @@
//! Stub implementation only used when bootstrapping stage2
//! Keep in sync with deps/aro/build/GenerateDef.zig
pub fn with(comptime Properties: type) type {
return struct {
tag: Tag = @enumFromInt(0),
properties: Properties = undefined,
pub const max_param_count = 1;
pub const longest_name = 0;
pub const data = [_]@This(){.{}};
pub inline fn fromName(_: []const u8) ?@This() {
return .{};
}
pub fn nameFromUniqueIndex(_: u16, _: []u8) []u8 {
return "";
}
pub fn uniqueIndex(_: []const u8) ?u16 {
return null;
}
pub const Tag = enum(u16) { _ };
pub fn nameFromTag(_: Tag) NameBuf {
return .{};
}
pub fn tagFromName(name: []const u8) ?Tag {
return @enumFromInt(name.len);
}
pub const NameBuf = struct {
pub fn span(_: *const NameBuf) []const u8 {
return "";
}
};
};
}

10
src/stubs/aro_names.zig Normal file
View File

@ -0,0 +1,10 @@
//! Stub implementation only used when bootstrapping stage2
//! Keep in sync with deps/aro/build/GenerateDef.zig
pub fn with(comptime _: type) type {
return struct {
pub inline fn fromName(_: []const u8) ?@This() {
return null;
}
};
}