From 1ed8c54cd349497adb264b0502783a6422e4f2d1 Mon Sep 17 00:00:00 2001 From: Evan Haas Date: Tue, 26 Jan 2021 09:08:05 -0800 Subject: [PATCH] translate-c: add wide string literal support Adds support for wide, UTF-16, and UTF-32 string literals. If used to initialize an incomplete array, the same logic as narrow strings is used. Otherwise they are translated as global "anonymous" arrays of the relevant underlying char type. A dot is used in the name to ensure the generated names do not conflict with any other names in the translated program. For example: ```c void my_fn() { const uint32_t *foo = U"foo"; } ``` becomes: ```zig const @"zig.UTF32_string_2" = [4]c_uint{ '\u{66}', '\u{6f}', '\u{6f}', 0, }; pub export fn my_fn() void { var foo: [*c]const u32 = &@"zig.UTF32_string_2"; } ``` --- src/translate_c.zig | 56 ++++++++++++++++++++++++++++----------- test/run_translated_c.zig | 24 +++++++++++++++++ 2 files changed, 65 insertions(+), 15 deletions(-) diff --git a/src/translate_c.zig b/src/translate_c.zig index bca9ff3a20..8efac4922f 100644 --- a/src/translate_c.zig +++ b/src/translate_c.zig @@ -780,7 +780,7 @@ fn visitVarDecl(c: *Context, var_decl: *const clang.VarDecl, mangled_name: ?[]co eq_tok = try appendToken(c, .Equal, "="); if (decl_init) |expr| { const node_or_error = if (expr.getStmtClass() == .StringLiteralClass) - transStringLiteralAsArray(rp, &c.global_scope.base, @ptrCast(*const clang.StringLiteral, expr), type_node) + transStringLiteralAsArray(rp, &c.global_scope.base, @ptrCast(*const clang.StringLiteral, expr), zigArraySize(rp.c, type_node) catch 0) else transExprCoercing(rp, scope, expr, .used, .r_value); init_node = node_or_error catch |err| switch (err) { @@ -1662,7 +1662,7 @@ fn transDeclStmtOne( const eq_token = try appendToken(c, .Equal, "="); var init_node = if (decl_init) |expr| if (expr.getStmtClass() == .StringLiteralClass) - try transStringLiteralAsArray(rp, scope, @ptrCast(*const clang.StringLiteral, expr), type_node) + try transStringLiteralAsArray(rp, scope, @ptrCast(*const clang.StringLiteral, expr), try zigArraySize(rp.c, type_node)) else try transExprCoercing(rp, scope, expr, .used, .r_value) else @@ -2059,16 +2059,41 @@ fn transStringLiteral( }; return maybeSuppressResult(rp, scope, result_used, &node.base); }, - .UTF16, .UTF32, .Wide => return revertAndWarn( - rp, - error.UnsupportedTranslation, - @ptrCast(*const clang.Stmt, stmt).getBeginLoc(), - "TODO: support string literal kind {s}", - .{kind}, - ), + .UTF16, .UTF32, .Wide => { + const node = try transWideStringLiteral(rp, scope, stmt); + return maybeSuppressResult(rp, scope, result_used, node); + }, } } +/// Translates a wide string literal as a global "anonymous" array of the relevant-sized +/// integer type + null terminator, and returns an identifier node for it +fn transWideStringLiteral(rp: RestorePoint, scope: *Scope, stmt: *const clang.StringLiteral) TransError!*ast.Node { + const str_type = @tagName(stmt.getKind()); + const mangle = rp.c.getMangle(); + const name = try std.fmt.allocPrint(rp.c.arena, "zig.{s}_string_{d}", .{ str_type, mangle }); + + const const_tok = try appendToken(rp.c, .Keyword_const, "const"); + const name_tok = try appendIdentifier(rp.c, name); + const eq_tok = try appendToken(rp.c, .Equal, "="); + var semi_tok: ast.TokenIndex = undefined; + + const lit_array = try transStringLiteralAsArray(rp, scope, stmt, stmt.getLength() + 1); + + semi_tok = try appendToken(rp.c, .Semicolon, ";"); + const var_decl_node = try ast.Node.VarDecl.create(rp.c.arena, .{ + .name_token = name_tok, + .mut_token = const_tok, + .semicolon_token = semi_tok, + }, .{ + .visib_token = null, + .eq_token = eq_tok, + .init_node = lit_array, + }); + try addTopLevelDecl(rp.c, name, &var_decl_node.base); + return transCreateNodeIdentifier(rp.c, name); +} + /// Parse the size of an array back out from an ast Node. fn zigArraySize(c: *Context, node: *ast.Node) TransError!usize { if (node.castTag(.ArrayType)) |array| { @@ -2081,17 +2106,18 @@ fn zigArraySize(c: *Context, node: *ast.Node) TransError!usize { } /// Translate a string literal to an array of integers. Used when an -/// array is initialized from a string literal. `target_node` is the -/// array being initialized. If the string literal is larger than the -/// array, truncate the string. If the array is larger than the string -/// literal, pad the array with 0's +/// array is initialized from a string literal. `array_size` is the +/// size of the array being initialized. If the string literal is larger +/// than the array, truncate the string. If the array is larger than the +/// string literal, pad the array with 0's fn transStringLiteralAsArray( rp: RestorePoint, scope: *Scope, stmt: *const clang.StringLiteral, - target_node: *ast.Node, + array_size: usize, ) TransError!*ast.Node { - const array_size = try zigArraySize(rp.c, target_node); + if (array_size == 0) return error.UnsupportedType; + const str_length = stmt.getLength(); const expr_base = @ptrCast(*const clang.Expr, stmt); diff --git a/test/run_translated_c.zig b/test/run_translated_c.zig index e3f9d6b132..a99271eb41 100644 --- a/test/run_translated_c.zig +++ b/test/run_translated_c.zig @@ -794,4 +794,28 @@ pub fn addCases(cases: *tests.RunTranslatedCContext) void { \\ return 0; \\} , ""); + + cases.add("Wide, UTF-16, and UTF-32 string literals", + \\#include + \\#include + \\#include + \\int main(void) { + \\ const wchar_t *wide_str = L"wide"; + \\ const wchar_t wide_hello[] = L"hello"; + \\ if (wcslen(wide_str) != 4) abort(); + \\ if (wcslen(L"literal") != 7) abort(); + \\ if (wcscmp(wide_hello, L"hello") != 0) abort(); + \\ + \\ const uint16_t *u16_str = u"wide"; + \\ const uint16_t u16_hello[] = u"hello"; + \\ if (u16_str[3] != u'e' || u16_str[4] != 0) abort(); + \\ if (u16_hello[4] != u'o' || u16_hello[5] != 0) abort(); + \\ + \\ const uint32_t *u32_str = U"wide"; + \\ const uint32_t u32_hello[] = U"hello"; + \\ if (u32_str[3] != U'e' || u32_str[4] != 0) abort(); + \\ if (u32_hello[4] != U'o' || u32_hello[5] != 0) abort(); + \\ return 0; + \\} + , ""); }