translate-c: add wide string literal support

Adds support for wide, UTF-16, and UTF-32 string literals. If used to initialize
an incomplete array, the same logic as narrow strings is used. Otherwise they
are translated as global "anonymous" arrays of the relevant underlying char type.
A dot is used in the name to ensure the generated names do not conflict with any
other names in the translated program.

For example:

```c
void my_fn() {
    const uint32_t *foo = U"foo";
}
```

becomes:
```zig
const @"zig.UTF32_string_2" = [4]c_uint{
    '\u{66}',
    '\u{6f}',
    '\u{6f}',
    0,
};
pub export fn my_fn() void {
    var foo: [*c]const u32 = &@"zig.UTF32_string_2";
}
```
This commit is contained in:
Evan Haas 2021-01-26 09:08:05 -08:00 committed by Andrew Kelley
parent 79730e6f5c
commit 1ed8c54cd3
2 changed files with 65 additions and 15 deletions

View File

@ -780,7 +780,7 @@ fn visitVarDecl(c: *Context, var_decl: *const clang.VarDecl, mangled_name: ?[]co
eq_tok = try appendToken(c, .Equal, "=");
if (decl_init) |expr| {
const node_or_error = if (expr.getStmtClass() == .StringLiteralClass)
transStringLiteralAsArray(rp, &c.global_scope.base, @ptrCast(*const clang.StringLiteral, expr), type_node)
transStringLiteralAsArray(rp, &c.global_scope.base, @ptrCast(*const clang.StringLiteral, expr), zigArraySize(rp.c, type_node) catch 0)
else
transExprCoercing(rp, scope, expr, .used, .r_value);
init_node = node_or_error catch |err| switch (err) {
@ -1662,7 +1662,7 @@ fn transDeclStmtOne(
const eq_token = try appendToken(c, .Equal, "=");
var init_node = if (decl_init) |expr|
if (expr.getStmtClass() == .StringLiteralClass)
try transStringLiteralAsArray(rp, scope, @ptrCast(*const clang.StringLiteral, expr), type_node)
try transStringLiteralAsArray(rp, scope, @ptrCast(*const clang.StringLiteral, expr), try zigArraySize(rp.c, type_node))
else
try transExprCoercing(rp, scope, expr, .used, .r_value)
else
@ -2059,16 +2059,41 @@ fn transStringLiteral(
};
return maybeSuppressResult(rp, scope, result_used, &node.base);
},
.UTF16, .UTF32, .Wide => return revertAndWarn(
rp,
error.UnsupportedTranslation,
@ptrCast(*const clang.Stmt, stmt).getBeginLoc(),
"TODO: support string literal kind {s}",
.{kind},
),
.UTF16, .UTF32, .Wide => {
const node = try transWideStringLiteral(rp, scope, stmt);
return maybeSuppressResult(rp, scope, result_used, node);
},
}
}
/// Translates a wide string literal as a global "anonymous" array of the relevant-sized
/// integer type + null terminator, and returns an identifier node for it
fn transWideStringLiteral(rp: RestorePoint, scope: *Scope, stmt: *const clang.StringLiteral) TransError!*ast.Node {
const str_type = @tagName(stmt.getKind());
const mangle = rp.c.getMangle();
const name = try std.fmt.allocPrint(rp.c.arena, "zig.{s}_string_{d}", .{ str_type, mangle });
const const_tok = try appendToken(rp.c, .Keyword_const, "const");
const name_tok = try appendIdentifier(rp.c, name);
const eq_tok = try appendToken(rp.c, .Equal, "=");
var semi_tok: ast.TokenIndex = undefined;
const lit_array = try transStringLiteralAsArray(rp, scope, stmt, stmt.getLength() + 1);
semi_tok = try appendToken(rp.c, .Semicolon, ";");
const var_decl_node = try ast.Node.VarDecl.create(rp.c.arena, .{
.name_token = name_tok,
.mut_token = const_tok,
.semicolon_token = semi_tok,
}, .{
.visib_token = null,
.eq_token = eq_tok,
.init_node = lit_array,
});
try addTopLevelDecl(rp.c, name, &var_decl_node.base);
return transCreateNodeIdentifier(rp.c, name);
}
/// Parse the size of an array back out from an ast Node.
fn zigArraySize(c: *Context, node: *ast.Node) TransError!usize {
if (node.castTag(.ArrayType)) |array| {
@ -2081,17 +2106,18 @@ fn zigArraySize(c: *Context, node: *ast.Node) TransError!usize {
}
/// Translate a string literal to an array of integers. Used when an
/// array is initialized from a string literal. `target_node` is the
/// array being initialized. If the string literal is larger than the
/// array, truncate the string. If the array is larger than the string
/// literal, pad the array with 0's
/// array is initialized from a string literal. `array_size` is the
/// size of the array being initialized. If the string literal is larger
/// than the array, truncate the string. If the array is larger than the
/// string literal, pad the array with 0's
fn transStringLiteralAsArray(
rp: RestorePoint,
scope: *Scope,
stmt: *const clang.StringLiteral,
target_node: *ast.Node,
array_size: usize,
) TransError!*ast.Node {
const array_size = try zigArraySize(rp.c, target_node);
if (array_size == 0) return error.UnsupportedType;
const str_length = stmt.getLength();
const expr_base = @ptrCast(*const clang.Expr, stmt);

View File

@ -794,4 +794,28 @@ pub fn addCases(cases: *tests.RunTranslatedCContext) void {
\\ return 0;
\\}
, "");
cases.add("Wide, UTF-16, and UTF-32 string literals",
\\#include <stdlib.h>
\\#include <stdint.h>
\\#include <wchar.h>
\\int main(void) {
\\ const wchar_t *wide_str = L"wide";
\\ const wchar_t wide_hello[] = L"hello";
\\ if (wcslen(wide_str) != 4) abort();
\\ if (wcslen(L"literal") != 7) abort();
\\ if (wcscmp(wide_hello, L"hello") != 0) abort();
\\
\\ const uint16_t *u16_str = u"wide";
\\ const uint16_t u16_hello[] = u"hello";
\\ if (u16_str[3] != u'e' || u16_str[4] != 0) abort();
\\ if (u16_hello[4] != u'o' || u16_hello[5] != 0) abort();
\\
\\ const uint32_t *u32_str = U"wide";
\\ const uint32_t u32_hello[] = U"hello";
\\ if (u32_str[3] != U'e' || u32_str[4] != 0) abort();
\\ if (u32_hello[4] != U'o' || u32_hello[5] != 0) abort();
\\ return 0;
\\}
, "");
}