std.Target: Update CPU models/features for LLVM 19.

This commit is contained in:
Alex Rønne Petersen 2024-08-23 02:04:59 +02:00 committed by Andrew Kelley
parent 662683cafd
commit da8b7fb0c0
11 changed files with 2200 additions and 628 deletions

File diff suppressed because it is too large Load Diff

View File

@ -8,16 +8,23 @@ pub const Feature = enum {
@"16_bit_insts",
a16,
add_no_carry_insts,
agent_scope_fine_grained_remote_memory_atomics,
allocate1_5xvgprs,
aperture_regs,
architected_flat_scratch,
architected_sgprs,
atomic_buffer_global_pk_add_f16_insts,
atomic_buffer_global_pk_add_f16_no_rtn_insts,
atomic_buffer_pk_add_bf16_inst,
atomic_csub_no_rtn_insts,
atomic_ds_pk_add_16_insts,
atomic_fadd_no_rtn_insts,
atomic_fadd_rtn_insts,
atomic_flat_pk_add_16_insts,
atomic_fmin_fmax_flat_f32,
atomic_fmin_fmax_flat_f64,
atomic_fmin_fmax_global_f32,
atomic_fmin_fmax_global_f64,
atomic_global_pk_add_bf16_inst,
auto_waitcnt_before_barrier,
back_off_barrier,
@ -27,6 +34,7 @@ pub const Feature = enum {
default_component_zero,
dl_insts,
dot10_insts,
dot11_insts,
dot1_insts,
dot2_insts,
dot3_insts,
@ -47,6 +55,7 @@ pub const Feature = enum {
fast_fmaf,
flat_address_space,
flat_atomic_fadd_f32_inst,
flat_buffer_global_fadd_f64_inst,
flat_for_global,
flat_global_insts,
flat_inst_offsets,
@ -71,7 +80,6 @@ pub const Feature = enum {
gfx10_b_encoding,
gfx10_insts,
gfx11,
gfx11_full_vgprs,
gfx11_insts,
gfx12,
gfx12_insts,
@ -101,9 +109,12 @@ pub const Feature = enum {
mad_mac_f32_insts,
mad_mix_insts,
mai_insts,
max_hard_clause_length_32,
max_hard_clause_length_63,
max_private_element_size_16,
max_private_element_size_4,
max_private_element_size_8,
memory_atomic_fadd_f32_denormal_support,
mfma_inline_literal_bug,
mimg_r128,
movrel,
@ -120,11 +131,15 @@ pub const Feature = enum {
packed_tid,
partial_nsa_encoding,
pk_fmac_f16_inst,
precise_memory,
priv_enabled_trap2_nop_bug,
promote_alloca,
prt_strict_null,
pseudo_scalar_trans,
r128_a16,
real_true16,
required_export_priority,
requires_cov6,
restricted_soffset,
s_memrealtime,
s_memtime_inst,
@ -201,6 +216,16 @@ pub const all_features = blk: {
.description = "Have VALU add/sub instructions without carry out",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.agent_scope_fine_grained_remote_memory_atomics)] = .{
.llvm_name = "agent-scope-fine-grained-remote-memory-atomics",
.description = "Agent (device) scoped atomic operations, excluding those directly supported by PCIe (i.e. integer atomic add, exchange, and compare-and-swap), are functional for allocations in host or peer device memory.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.allocate1_5xvgprs)] = .{
.llvm_name = "allocate1_5xvgprs",
.description = "Has 50% more physical VGPRs and 50% larger allocation granule",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.aperture_regs)] = .{
.llvm_name = "aperture-regs",
.description = "Has Memory Aperture Base and Size Registers",
@ -230,6 +255,11 @@ pub const all_features = blk: {
.flat_global_insts,
}),
};
result[@intFromEnum(Feature.atomic_buffer_pk_add_bf16_inst)] = .{
.llvm_name = "atomic-buffer-pk-add-bf16-inst",
.description = "Has buffer_atomic_pk_add_bf16 instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.atomic_csub_no_rtn_insts)] = .{
.llvm_name = "atomic-csub-no-rtn-insts",
.description = "Has buffer_atomic_csub and global_atomic_csub instructions that don't return original value",
@ -259,6 +289,26 @@ pub const all_features = blk: {
.description = "Has flat_atomic_pk_add_f16 and flat_atomic_pk_add_bf16 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.atomic_fmin_fmax_flat_f32)] = .{
.llvm_name = "atomic-fmin-fmax-flat-f32",
.description = "Has flat memory instructions for atomicrmw fmin/fmax for float",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.atomic_fmin_fmax_flat_f64)] = .{
.llvm_name = "atomic-fmin-fmax-flat-f64",
.description = "Has flat memory instructions for atomicrmw fmin/fmax for double",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.atomic_fmin_fmax_global_f32)] = .{
.llvm_name = "atomic-fmin-fmax-global-f32",
.description = "Has global/buffer instructions for atomicrmw fmin/fmax for float",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.atomic_fmin_fmax_global_f64)] = .{
.llvm_name = "atomic-fmin-fmax-global-f64",
.description = "Has global/buffer instructions for atomicrmw fmin/fmax for float",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.atomic_global_pk_add_bf16_inst)] = .{
.llvm_name = "atomic-global-pk-add-bf16-inst",
.description = "Has global_atomic_pk_add_bf16 instruction",
@ -306,6 +356,11 @@ pub const all_features = blk: {
.description = "Has v_dot2_f32_f16 instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dot11_insts)] = .{
.llvm_name = "dot11-insts",
.description = "Has v_dot4_f32_fp8_fp8, v_dot4_f32_fp8_bf8, v_dot4_f32_bf8_fp8, v_dot4_f32_bf8_bf8 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dot1_insts)] = .{
.llvm_name = "dot1-insts",
.description = "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions",
@ -406,6 +461,11 @@ pub const all_features = blk: {
.description = "Has flat_atomic_add_f32 instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.flat_buffer_global_fadd_f64_inst)] = .{
.llvm_name = "flat-buffer-global-fadd-f64-inst",
.description = "Has flat, buffer, and global instructions for f64 atomic fadd",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.flat_for_global)] = .{
.llvm_name = "flat-for-global",
.description = "Force to generate flat instruction for global",
@ -504,6 +564,10 @@ pub const all_features = blk: {
.a16,
.add_no_carry_insts,
.aperture_regs,
.atomic_fmin_fmax_flat_f32,
.atomic_fmin_fmax_flat_f64,
.atomic_fmin_fmax_global_f32,
.atomic_fmin_fmax_global_f64,
.ci_insts,
.default_component_zero,
.dpp,
@ -527,6 +591,7 @@ pub const all_features = blk: {
.int_clamp_insts,
.inv_2pi_inline_imm,
.localmemorysize65536,
.max_hard_clause_length_63,
.mimg_r128,
.movrel,
.no_data_dep_hazard,
@ -573,6 +638,8 @@ pub const all_features = blk: {
.a16,
.add_no_carry_insts,
.aperture_regs,
.atomic_fmin_fmax_flat_f32,
.atomic_fmin_fmax_global_f32,
.ci_insts,
.default_component_zero,
.dpp,
@ -599,6 +666,7 @@ pub const all_features = blk: {
.int_clamp_insts,
.inv_2pi_inline_imm,
.localmemorysize65536,
.max_hard_clause_length_32,
.mimg_r128,
.movrel,
.no_data_dep_hazard,
@ -613,11 +681,6 @@ pub const all_features = blk: {
.vscnt,
}),
};
result[@intFromEnum(Feature.gfx11_full_vgprs)] = .{
.llvm_name = "gfx11-full-vgprs",
.description = "GFX11 with 50% more physical VGPRs and 50% larger allocation granule than GFX10",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.gfx11_insts)] = .{
.llvm_name = "gfx11-insts",
.description = "Additional instructions for GFX11+",
@ -630,7 +693,10 @@ pub const all_features = blk: {
.@"16_bit_insts",
.a16,
.add_no_carry_insts,
.agent_scope_fine_grained_remote_memory_atomics,
.aperture_regs,
.atomic_fmin_fmax_flat_f32,
.atomic_fmin_fmax_global_f32,
.ci_insts,
.default_component_broadcast,
.dpp,
@ -655,6 +721,7 @@ pub const all_features = blk: {
.int_clamp_insts,
.inv_2pi_inline_imm,
.localmemorysize65536,
.max_hard_clause_length_32,
.mimg_r128,
.movrel,
.no_data_dep_hazard,
@ -844,6 +911,16 @@ pub const all_features = blk: {
.description = "Has mAI instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.max_hard_clause_length_32)] = .{
.llvm_name = "max-hard-clause-length-32",
.description = "Maximum number of instructions in an explicit S_CLAUSE is 32",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.max_hard_clause_length_63)] = .{
.llvm_name = "max-hard-clause-length-63",
.description = "Maximum number of instructions in an explicit S_CLAUSE is 63",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.max_private_element_size_16)] = .{
.llvm_name = "max-private-element-size-16",
.description = "Maximum private access size may be 16",
@ -859,6 +936,11 @@ pub const all_features = blk: {
.description = "Maximum private access size may be 8",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.memory_atomic_fadd_f32_denormal_support)] = .{
.llvm_name = "memory-atomic-fadd-f32-denormal-support",
.description = "global/flat/buffer atomic fadd for float supports denormal handling",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.mfma_inline_literal_bug)] = .{
.llvm_name = "mfma-inline-literal-bug",
.description = "MFMA cannot use inline literal as SrcC",
@ -939,6 +1021,16 @@ pub const all_features = blk: {
.description = "Has v_pk_fmac_f16 instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.precise_memory)] = .{
.llvm_name = "precise-memory",
.description = "Enable precise memory mode",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.priv_enabled_trap2_nop_bug)] = .{
.llvm_name = "priv-enabled-trap2-nop-bug",
.description = "Hardware that runs with PRIV=1 interpreting 's_trap 2' as a nop bug",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.promote_alloca)] = .{
.llvm_name = "promote-alloca",
.description = "Enable promote alloca pass",
@ -964,6 +1056,16 @@ pub const all_features = blk: {
.description = "Use true 16-bit registers",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.required_export_priority)] = .{
.llvm_name = "required-export-priority",
.description = "Export priority must be explicitly manipulated on GFX11.5",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.requires_cov6)] = .{
.llvm_name = "requires-cov6",
.description = "Target Requires Code Object V6",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.restricted_soffset)] = .{
.llvm_name = "restricted-soffset",
.description = "Has restricted SOffset (immediate not supported).",
@ -1038,6 +1140,10 @@ pub const all_features = blk: {
.llvm_name = "sea-islands",
.description = "SEA_ISLANDS GPU generation",
.dependencies = featureSet(&[_]Feature{
.atomic_fmin_fmax_flat_f32,
.atomic_fmin_fmax_flat_f64,
.atomic_fmin_fmax_global_f32,
.atomic_fmin_fmax_global_f64,
.ci_insts,
.default_component_zero,
.ds_src2_insts,
@ -1087,6 +1193,8 @@ pub const all_features = blk: {
.llvm_name = "southern-islands",
.description = "SOUTHERN_ISLANDS GPU generation",
.dependencies = featureSet(&[_]Feature{
.atomic_fmin_fmax_global_f32,
.atomic_fmin_fmax_global_f64,
.default_component_zero,
.ds_src2_insts,
.extended_image_insts,
@ -1365,7 +1473,6 @@ pub const cpu = struct {
.vcmpx_exec_war_hazard,
.vcmpx_permlane_hazard,
.vmem_to_scalar_write_hazard,
.wavefrontsize32,
.xnack_support,
}),
};
@ -1402,7 +1509,6 @@ pub const cpu = struct {
.vcmpx_exec_war_hazard,
.vcmpx_permlane_hazard,
.vmem_to_scalar_write_hazard,
.wavefrontsize32,
.xnack_support,
}),
};
@ -1439,7 +1545,6 @@ pub const cpu = struct {
.vcmpx_exec_war_hazard,
.vcmpx_permlane_hazard,
.vmem_to_scalar_write_hazard,
.wavefrontsize32,
.xnack_support,
}),
};
@ -1471,7 +1576,6 @@ pub const cpu = struct {
.vcmpx_exec_war_hazard,
.vcmpx_permlane_hazard,
.vmem_to_scalar_write_hazard,
.wavefrontsize32,
.xnack_support,
}),
};
@ -1494,7 +1598,6 @@ pub const cpu = struct {
.ldsbankcount32,
.nsa_encoding,
.shader_cycles_register,
.wavefrontsize32,
}),
};
pub const gfx1031 = CpuModel{
@ -1516,7 +1619,6 @@ pub const cpu = struct {
.ldsbankcount32,
.nsa_encoding,
.shader_cycles_register,
.wavefrontsize32,
}),
};
pub const gfx1032 = CpuModel{
@ -1538,7 +1640,6 @@ pub const cpu = struct {
.ldsbankcount32,
.nsa_encoding,
.shader_cycles_register,
.wavefrontsize32,
}),
};
pub const gfx1033 = CpuModel{
@ -1560,7 +1661,6 @@ pub const cpu = struct {
.ldsbankcount32,
.nsa_encoding,
.shader_cycles_register,
.wavefrontsize32,
}),
};
pub const gfx1034 = CpuModel{
@ -1582,7 +1682,6 @@ pub const cpu = struct {
.ldsbankcount32,
.nsa_encoding,
.shader_cycles_register,
.wavefrontsize32,
}),
};
pub const gfx1035 = CpuModel{
@ -1604,7 +1703,6 @@ pub const cpu = struct {
.ldsbankcount32,
.nsa_encoding,
.shader_cycles_register,
.wavefrontsize32,
}),
};
pub const gfx1036 = CpuModel{
@ -1626,13 +1724,66 @@ pub const cpu = struct {
.ldsbankcount32,
.nsa_encoding,
.shader_cycles_register,
.wavefrontsize32,
}),
};
pub const gfx10_1_generic = CpuModel{
.name = "gfx10_1_generic",
.llvm_name = "gfx10-1-generic",
.features = featureSet(&[_]Feature{
.back_off_barrier,
.dl_insts,
.ds_src2_insts,
.flat_segment_offset_bug,
.get_wave_id_inst,
.gfx10,
.inst_fwd_prefetch_bug,
.lds_branch_vmem_war_hazard,
.lds_misaligned_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
.negative_unaligned_scratch_offset_bug,
.nsa_clause_bug,
.nsa_encoding,
.nsa_to_vmem_bug,
.offset_3f_bug,
.requires_cov6,
.scalar_atomics,
.scalar_flat_scratch_insts,
.scalar_stores,
.smem_to_vector_write_hazard,
.vcmpx_exec_war_hazard,
.vcmpx_permlane_hazard,
.vmem_to_scalar_write_hazard,
.xnack_support,
}),
};
pub const gfx10_3_generic = CpuModel{
.name = "gfx10_3_generic",
.llvm_name = "gfx10-3-generic",
.features = featureSet(&[_]Feature{
.back_off_barrier,
.dl_insts,
.dot10_insts,
.dot1_insts,
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.gfx10,
.gfx10_3_insts,
.gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
.requires_cov6,
.shader_cycles_register,
}),
};
pub const gfx1100 = CpuModel{
.name = "gfx1100",
.llvm_name = "gfx1100",
.features = featureSet(&[_]Feature{
.allocate1_5xvgprs,
.architected_flat_scratch,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
@ -1644,25 +1795,26 @@ pub const cpu = struct {
.dot9_insts,
.flat_atomic_fadd_f32_inst,
.gfx11,
.gfx11_full_vgprs,
.image_insts,
.ldsbankcount32,
.mad_intra_fwd_bug,
.memory_atomic_fadd_f32_denormal_support,
.msaa_load_dst_sel_bug,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.priv_enabled_trap2_nop_bug,
.shader_cycles_register,
.user_sgpr_init16_bug,
.valu_trans_use_hazard,
.vcmpx_permlane_hazard,
.wavefrontsize32,
}),
};
pub const gfx1101 = CpuModel{
.name = "gfx1101",
.llvm_name = "gfx1101",
.features = featureSet(&[_]Feature{
.allocate1_5xvgprs,
.architected_flat_scratch,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
@ -1674,18 +1826,18 @@ pub const cpu = struct {
.dot9_insts,
.flat_atomic_fadd_f32_inst,
.gfx11,
.gfx11_full_vgprs,
.image_insts,
.ldsbankcount32,
.mad_intra_fwd_bug,
.memory_atomic_fadd_f32_denormal_support,
.msaa_load_dst_sel_bug,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.priv_enabled_trap2_nop_bug,
.shader_cycles_register,
.valu_trans_use_hazard,
.vcmpx_permlane_hazard,
.wavefrontsize32,
}),
};
pub const gfx1102 = CpuModel{
@ -1706,15 +1858,16 @@ pub const cpu = struct {
.image_insts,
.ldsbankcount32,
.mad_intra_fwd_bug,
.memory_atomic_fadd_f32_denormal_support,
.msaa_load_dst_sel_bug,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.priv_enabled_trap2_nop_bug,
.shader_cycles_register,
.user_sgpr_init16_bug,
.valu_trans_use_hazard,
.vcmpx_permlane_hazard,
.wavefrontsize32,
}),
};
pub const gfx1103 = CpuModel{
@ -1735,14 +1888,15 @@ pub const cpu = struct {
.image_insts,
.ldsbankcount32,
.mad_intra_fwd_bug,
.memory_atomic_fadd_f32_denormal_support,
.msaa_load_dst_sel_bug,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.priv_enabled_trap2_nop_bug,
.shader_cycles_register,
.valu_trans_use_hazard,
.vcmpx_permlane_hazard,
.wavefrontsize32,
}),
};
pub const gfx1150 = CpuModel{
@ -1763,20 +1917,50 @@ pub const cpu = struct {
.gfx11,
.image_insts,
.ldsbankcount32,
.mad_intra_fwd_bug,
.memory_atomic_fadd_f32_denormal_support,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.required_export_priority,
.salu_float,
.shader_cycles_register,
.vcmpx_permlane_hazard,
.vgpr_singleuse_hint,
.wavefrontsize32,
}),
};
pub const gfx1151 = CpuModel{
.name = "gfx1151",
.llvm_name = "gfx1151",
.features = featureSet(&[_]Feature{
.allocate1_5xvgprs,
.architected_flat_scratch,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.dl_insts,
.dot10_insts,
.dot5_insts,
.dot7_insts,
.dot8_insts,
.dot9_insts,
.dpp_src1_sgpr,
.flat_atomic_fadd_f32_inst,
.gfx11,
.image_insts,
.ldsbankcount32,
.memory_atomic_fadd_f32_denormal_support,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.required_export_priority,
.salu_float,
.shader_cycles_register,
.vcmpx_permlane_hazard,
.vgpr_singleuse_hint,
}),
};
pub const gfx1152 = CpuModel{
.name = "gfx1152",
.llvm_name = "gfx1152",
.features = featureSet(&[_]Feature{
.architected_flat_scratch,
.atomic_fadd_no_rtn_insts,
@ -1790,27 +1974,60 @@ pub const cpu = struct {
.dpp_src1_sgpr,
.flat_atomic_fadd_f32_inst,
.gfx11,
.gfx11_full_vgprs,
.image_insts,
.ldsbankcount32,
.mad_intra_fwd_bug,
.memory_atomic_fadd_f32_denormal_support,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.required_export_priority,
.salu_float,
.shader_cycles_register,
.vcmpx_permlane_hazard,
.vgpr_singleuse_hint,
.wavefrontsize32,
}),
};
pub const gfx11_generic = CpuModel{
.name = "gfx11_generic",
.llvm_name = "gfx11-generic",
.features = featureSet(&[_]Feature{
.architected_flat_scratch,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.dl_insts,
.dot10_insts,
.dot5_insts,
.dot7_insts,
.dot8_insts,
.dot9_insts,
.flat_atomic_fadd_f32_inst,
.gfx11,
.image_insts,
.ldsbankcount32,
.mad_intra_fwd_bug,
.memory_atomic_fadd_f32_denormal_support,
.msaa_load_dst_sel_bug,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.priv_enabled_trap2_nop_bug,
.required_export_priority,
.requires_cov6,
.shader_cycles_register,
.user_sgpr_init16_bug,
.valu_trans_use_hazard,
.vcmpx_permlane_hazard,
}),
};
pub const gfx1200 = CpuModel{
.name = "gfx1200",
.llvm_name = "gfx1200",
.features = featureSet(&[_]Feature{
.allocate1_5xvgprs,
.architected_flat_scratch,
.architected_sgprs,
.atomic_buffer_global_pk_add_f16_insts,
.atomic_buffer_pk_add_bf16_inst,
.atomic_ds_pk_add_16_insts,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
@ -1818,6 +2035,7 @@ pub const cpu = struct {
.atomic_global_pk_add_bf16_inst,
.dl_insts,
.dot10_insts,
.dot11_insts,
.dot7_insts,
.dot8_insts,
.dot9_insts,
@ -1828,6 +2046,7 @@ pub const cpu = struct {
.gfx12,
.image_insts,
.ldsbankcount32,
.memory_atomic_fadd_f32_denormal_support,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
@ -1838,16 +2057,17 @@ pub const cpu = struct {
.shader_cycles_hi_lo_registers,
.vcmpx_permlane_hazard,
.vgpr_singleuse_hint,
.wavefrontsize32,
}),
};
pub const gfx1201 = CpuModel{
.name = "gfx1201",
.llvm_name = "gfx1201",
.features = featureSet(&[_]Feature{
.allocate1_5xvgprs,
.architected_flat_scratch,
.architected_sgprs,
.atomic_buffer_global_pk_add_f16_insts,
.atomic_buffer_pk_add_bf16_inst,
.atomic_ds_pk_add_16_insts,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
@ -1855,6 +2075,7 @@ pub const cpu = struct {
.atomic_global_pk_add_bf16_inst,
.dl_insts,
.dot10_insts,
.dot11_insts,
.dot7_insts,
.dot8_insts,
.dot9_insts,
@ -1865,6 +2086,7 @@ pub const cpu = struct {
.gfx12,
.image_insts,
.ldsbankcount32,
.memory_atomic_fadd_f32_denormal_support,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
@ -1875,7 +2097,47 @@ pub const cpu = struct {
.shader_cycles_hi_lo_registers,
.vcmpx_permlane_hazard,
.vgpr_singleuse_hint,
.wavefrontsize32,
}),
};
pub const gfx12_generic = CpuModel{
.name = "gfx12_generic",
.llvm_name = "gfx12-generic",
.features = featureSet(&[_]Feature{
.allocate1_5xvgprs,
.architected_flat_scratch,
.architected_sgprs,
.atomic_buffer_global_pk_add_f16_insts,
.atomic_buffer_pk_add_bf16_inst,
.atomic_ds_pk_add_16_insts,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.atomic_flat_pk_add_16_insts,
.atomic_global_pk_add_bf16_inst,
.dl_insts,
.dot10_insts,
.dot11_insts,
.dot7_insts,
.dot8_insts,
.dot9_insts,
.dpp_src1_sgpr,
.extended_image_insts,
.flat_atomic_fadd_f32_inst,
.fp8_conversion_insts,
.gfx12,
.image_insts,
.ldsbankcount32,
.memory_atomic_fadd_f32_denormal_support,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.pseudo_scalar_trans,
.requires_cov6,
.restricted_soffset,
.salu_float,
.scalar_dwordx3_loads,
.shader_cycles_hi_lo_registers,
.vcmpx_permlane_hazard,
.vgpr_singleuse_hint,
}),
};
pub const gfx600 = CpuModel{
@ -2124,6 +2386,8 @@ pub const cpu = struct {
.atomic_buffer_global_pk_add_f16_insts,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.atomic_fmin_fmax_flat_f64,
.atomic_fmin_fmax_global_f64,
.back_off_barrier,
.dl_insts,
.dot10_insts,
@ -2135,6 +2399,7 @@ pub const cpu = struct {
.dot6_insts,
.dot7_insts,
.dpp_64bit,
.flat_buffer_global_fadd_f64_inst,
.fma_mix_insts,
.fmacf64_inst,
.full_rate_64_ops,
@ -2170,12 +2435,15 @@ pub const cpu = struct {
.name = "gfx940",
.llvm_name = "gfx940",
.features = featureSet(&[_]Feature{
.agent_scope_fine_grained_remote_memory_atomics,
.architected_flat_scratch,
.atomic_buffer_global_pk_add_f16_insts,
.atomic_ds_pk_add_16_insts,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.atomic_flat_pk_add_16_insts,
.atomic_fmin_fmax_flat_f64,
.atomic_fmin_fmax_global_f64,
.atomic_global_pk_add_bf16_inst,
.back_off_barrier,
.dl_insts,
@ -2189,6 +2457,7 @@ pub const cpu = struct {
.dot7_insts,
.dpp_64bit,
.flat_atomic_fadd_f32_inst,
.flat_buffer_global_fadd_f64_inst,
.fma_mix_insts,
.fmacf64_inst,
.force_store_sc0_sc1,
@ -2201,6 +2470,7 @@ pub const cpu = struct {
.kernarg_preload,
.ldsbankcount32,
.mai_insts,
.memory_atomic_fadd_f32_denormal_support,
.packed_fp32_ops,
.packed_tid,
.pk_fmac_f16_inst,
@ -2211,12 +2481,15 @@ pub const cpu = struct {
.name = "gfx941",
.llvm_name = "gfx941",
.features = featureSet(&[_]Feature{
.agent_scope_fine_grained_remote_memory_atomics,
.architected_flat_scratch,
.atomic_buffer_global_pk_add_f16_insts,
.atomic_ds_pk_add_16_insts,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.atomic_flat_pk_add_16_insts,
.atomic_fmin_fmax_flat_f64,
.atomic_fmin_fmax_global_f64,
.atomic_global_pk_add_bf16_inst,
.back_off_barrier,
.dl_insts,
@ -2230,6 +2503,7 @@ pub const cpu = struct {
.dot7_insts,
.dpp_64bit,
.flat_atomic_fadd_f32_inst,
.flat_buffer_global_fadd_f64_inst,
.fma_mix_insts,
.fmacf64_inst,
.force_store_sc0_sc1,
@ -2242,6 +2516,7 @@ pub const cpu = struct {
.kernarg_preload,
.ldsbankcount32,
.mai_insts,
.memory_atomic_fadd_f32_denormal_support,
.packed_fp32_ops,
.packed_tid,
.pk_fmac_f16_inst,
@ -2252,12 +2527,15 @@ pub const cpu = struct {
.name = "gfx942",
.llvm_name = "gfx942",
.features = featureSet(&[_]Feature{
.agent_scope_fine_grained_remote_memory_atomics,
.architected_flat_scratch,
.atomic_buffer_global_pk_add_f16_insts,
.atomic_ds_pk_add_16_insts,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.atomic_flat_pk_add_16_insts,
.atomic_fmin_fmax_flat_f64,
.atomic_fmin_fmax_global_f64,
.atomic_global_pk_add_bf16_inst,
.back_off_barrier,
.dl_insts,
@ -2271,6 +2549,7 @@ pub const cpu = struct {
.dot7_insts,
.dpp_64bit,
.flat_atomic_fadd_f32_inst,
.flat_buffer_global_fadd_f64_inst,
.fma_mix_insts,
.fmacf64_inst,
.fp8_conversion_insts,
@ -2282,12 +2561,28 @@ pub const cpu = struct {
.kernarg_preload,
.ldsbankcount32,
.mai_insts,
.memory_atomic_fadd_f32_denormal_support,
.packed_fp32_ops,
.packed_tid,
.pk_fmac_f16_inst,
.sramecc_support,
}),
};
pub const gfx9_generic = CpuModel{
.name = "gfx9_generic",
.llvm_name = "gfx9-generic",
.features = featureSet(&[_]Feature{
.ds_src2_insts,
.extended_image_insts,
.gds,
.gfx9,
.image_gather4_d16_bug,
.image_insts,
.ldsbankcount32,
.mad_mac_f32_insts,
.requires_cov6,
}),
};
pub const hainan = CpuModel{
.name = "hainan",
.llvm_name = "hainan",

View File

@ -7,9 +7,7 @@ const CpuModel = std.Target.Cpu.Model;
pub const Feature = enum {
@"32bit",
@"8msecext",
a76,
aapcs_frame_chain,
aapcs_frame_chain_leaf,
aclass,
acquire_release,
aes,
@ -40,7 +38,6 @@ pub const Feature = enum {
dsp,
execute_only,
expand_fp_mlx,
exynos,
fix_cmse_cve_2021_35465,
fix_cortex_a57_aes_1742098,
fp16,
@ -96,7 +93,6 @@ pub const Feature = enum {
lob,
long_calls,
loop_align,
m3,
mclass,
mp,
muxed_units,
@ -120,7 +116,6 @@ pub const Feature = enum {
prefer_ishst,
prefer_vmovsr,
prof_unpr,
r4,
ras,
rclass,
read_tp_tpidrprw,
@ -140,7 +135,6 @@ pub const Feature = enum {
soft_float,
splat_vfp_neon,
strict_align,
swift,
thumb2,
thumb_mode,
trustzone,
@ -228,23 +222,11 @@ pub const all_features = blk: {
.description = "Enable support for ARMv8-M Security Extensions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.a76)] = .{
.llvm_name = "a76",
.description = "Cortex-A76 ARM processors",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.aapcs_frame_chain)] = .{
.llvm_name = "aapcs-frame-chain",
.description = "Create an AAPCS compliant frame chain",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.aapcs_frame_chain_leaf)] = .{
.llvm_name = "aapcs-frame-chain-leaf",
.description = "Create an AAPCS compliant frame chain for leaf functions",
.dependencies = featureSet(&[_]Feature{
.aapcs_frame_chain,
}),
};
result[@intFromEnum(Feature.aclass)] = .{
.llvm_name = "aclass",
.description = "Is application profile ('A' series)",
@ -422,32 +404,9 @@ pub const all_features = blk: {
.description = "Expand VFP/NEON MLA/MLS instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.exynos)] = .{
.llvm_name = "exynos",
.description = "Samsung Exynos processors",
.dependencies = featureSet(&[_]Feature{
.crc,
.crypto,
.expand_fp_mlx,
.fuse_aes,
.fuse_literals,
.hwdiv,
.hwdiv_arm,
.prof_unpr,
.ret_addr_stack,
.slow_fp_brcc,
.slow_vdup32,
.slow_vgetlni32,
.slowfpvfmx,
.slowfpvmlx,
.splat_vfp_neon,
.wide_stride_vfp,
.zcz,
}),
};
result[@intFromEnum(Feature.fix_cmse_cve_2021_35465)] = .{
.llvm_name = "fix-cmse-cve-2021-35465",
.description = "Mitigate against the cve-2021-35465 security vulnerability",
.description = "Mitigate against the cve-2021-35465 security vulnurability",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.fix_cortex_a57_aes_1742098)] = .{
@ -815,11 +774,6 @@ pub const all_features = blk: {
.description = "Prefer 32-bit alignment for loops",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.m3)] = .{
.llvm_name = "m3",
.description = "Cortex-M3 ARM processors",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.mclass)] = .{
.llvm_name = "mclass",
.description = "Is microcontroller profile ('M' series)",
@ -945,11 +899,6 @@ pub const all_features = blk: {
.description = "Is profitable to unpredicate",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.r4)] = .{
.llvm_name = "r4",
.description = "Cortex-R4 ARM processors",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ras)] = .{
.llvm_name = "ras",
.description = "Enable Reliability, Availability and Serviceability extensions",
@ -1049,11 +998,6 @@ pub const all_features = blk: {
.description = "Disallow all unaligned memory access",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.swift)] = .{
.llvm_name = "swift",
.description = "Swift ARM processors",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.thumb2)] = .{
.llvm_name = "thumb2",
.description = "Enable Thumb2 instructions",
@ -1508,10 +1452,9 @@ pub const all_features = blk: {
.db,
.dfb,
.dsp,
.fp_armv8,
.fp_armv8d16sp,
.has_v8,
.mp,
.neon,
.rclass,
.virtualization,
}),
@ -2104,7 +2047,6 @@ pub const cpu = struct {
.name = "cortex_a76",
.llvm_name = "cortex-a76",
.features = featureSet(&[_]Feature{
.a76,
.dotprod,
.fullfp16,
.v8_2a,
@ -2114,7 +2056,6 @@ pub const cpu = struct {
.name = "cortex_a76ae",
.llvm_name = "cortex-a76ae",
.features = featureSet(&[_]Feature{
.a76,
.dotprod,
.fullfp16,
.v8_2a,
@ -2138,6 +2079,15 @@ pub const cpu = struct {
.v8_2a,
}),
};
pub const cortex_a78ae = CpuModel{
.name = "cortex_a78ae",
.llvm_name = "cortex-a78ae",
.features = featureSet(&[_]Feature{
.dotprod,
.fullfp16,
.v8_2a,
}),
};
pub const cortex_a78c = CpuModel{
.name = "cortex_a78c",
.llvm_name = "cortex-a78c",
@ -2219,7 +2169,6 @@ pub const cpu = struct {
.llvm_name = "cortex-m3",
.features = featureSet(&[_]Feature{
.loop_align,
.m3,
.no_branch_predictor,
.use_misched,
.v7m,
@ -2315,7 +2264,6 @@ pub const cpu = struct {
.llvm_name = "cortex-r4",
.features = featureSet(&[_]Feature{
.avoid_partial_cpsr,
.r4,
.ret_addr_stack,
.v7r,
}),
@ -2325,7 +2273,6 @@ pub const cpu = struct {
.llvm_name = "cortex-r4f",
.features = featureSet(&[_]Feature{
.avoid_partial_cpsr,
.r4,
.ret_addr_stack,
.slow_fp_brcc,
.slowfpvfmx,
@ -2352,7 +2299,20 @@ pub const cpu = struct {
.name = "cortex_r52",
.llvm_name = "cortex-r52",
.features = featureSet(&[_]Feature{
.fp_armv8,
.fpao,
.neon,
.use_misched,
.v8r,
}),
};
pub const cortex_r52plus = CpuModel{
.name = "cortex_r52plus",
.llvm_name = "cortex-r52plus",
.features = featureSet(&[_]Feature{
.fp_armv8,
.fpao,
.neon,
.use_misched,
.v8r,
}),
@ -2418,7 +2378,6 @@ pub const cpu = struct {
.ret_addr_stack,
.slowfpvfmx,
.slowfpvmlx,
.swift,
.use_misched,
.v8a,
.zcz,
@ -2435,24 +2394,60 @@ pub const cpu = struct {
.name = "exynos_m1",
.llvm_name = null,
.features = featureSet(&[_]Feature{
.exynos,
.expand_fp_mlx,
.fuse_aes,
.fuse_literals,
.prof_unpr,
.ret_addr_stack,
.slow_fp_brcc,
.slow_vdup32,
.slow_vgetlni32,
.slowfpvfmx,
.slowfpvmlx,
.splat_vfp_neon,
.v8a,
.wide_stride_vfp,
.zcz,
}),
};
pub const exynos_m2 = CpuModel{
.name = "exynos_m2",
.llvm_name = null,
.features = featureSet(&[_]Feature{
.exynos,
.expand_fp_mlx,
.fuse_aes,
.fuse_literals,
.prof_unpr,
.ret_addr_stack,
.slow_fp_brcc,
.slow_vdup32,
.slow_vgetlni32,
.slowfpvfmx,
.slowfpvmlx,
.splat_vfp_neon,
.v8a,
.wide_stride_vfp,
.zcz,
}),
};
pub const exynos_m3 = CpuModel{
.name = "exynos_m3",
.llvm_name = "exynos-m3",
.features = featureSet(&[_]Feature{
.exynos,
.expand_fp_mlx,
.fuse_aes,
.fuse_literals,
.prof_unpr,
.ret_addr_stack,
.slow_fp_brcc,
.slow_vdup32,
.slow_vgetlni32,
.slowfpvfmx,
.slowfpvmlx,
.splat_vfp_neon,
.v8a,
.wide_stride_vfp,
.zcz,
}),
};
pub const exynos_m4 = CpuModel{
@ -2460,9 +2455,21 @@ pub const cpu = struct {
.llvm_name = "exynos-m4",
.features = featureSet(&[_]Feature{
.dotprod,
.exynos,
.expand_fp_mlx,
.fullfp16,
.fuse_aes,
.fuse_literals,
.prof_unpr,
.ret_addr_stack,
.slow_fp_brcc,
.slow_vdup32,
.slow_vgetlni32,
.slowfpvfmx,
.slowfpvmlx,
.splat_vfp_neon,
.v8_2a,
.wide_stride_vfp,
.zcz,
}),
};
pub const exynos_m5 = CpuModel{
@ -2470,9 +2477,21 @@ pub const cpu = struct {
.llvm_name = "exynos-m5",
.features = featureSet(&[_]Feature{
.dotprod,
.exynos,
.expand_fp_mlx,
.fullfp16,
.fuse_aes,
.fuse_literals,
.prof_unpr,
.ret_addr_stack,
.slow_fp_brcc,
.slow_vdup32,
.slow_vgetlni32,
.slowfpvfmx,
.slowfpvmlx,
.splat_vfp_neon,
.v8_2a,
.wide_stride_vfp,
.zcz,
}),
};
pub const generic = CpuModel{
@ -2538,6 +2557,7 @@ pub const cpu = struct {
.llvm_name = "neoverse-n2",
.features = featureSet(&[_]Feature{
.bf16,
.fp16fml,
.i8mm,
.v9a,
}),
@ -2564,7 +2584,6 @@ pub const cpu = struct {
.name = "sc300",
.llvm_name = "sc300",
.features = featureSet(&[_]Feature{
.m3,
.no_branch_predictor,
.use_misched,
.v7m,
@ -2618,7 +2637,6 @@ pub const cpu = struct {
.slow_vgetlni32,
.slowfpvfmx,
.slowfpvmlx,
.swift,
.use_misched,
.v7a,
.vfp4,

View File

@ -7,7 +7,6 @@ const CpuModel = std.Target.Cpu.Model;
pub const Feature = enum {
@"32bit",
@"64bit",
auto_vec,
d,
f,
frecipe,
@ -18,6 +17,7 @@ pub const Feature = enum {
lbt,
lsx,
lvz,
prefer_w_inst,
relax,
ual,
};
@ -41,11 +41,6 @@ pub const all_features = blk: {
.description = "LA64 Basic Integer and Privilege Instruction Set",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.auto_vec)] = .{
.llvm_name = "auto-vec",
.description = "Experimental auto vectorization",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.d)] = .{
.llvm_name = "d",
.description = "'D' (Double-Precision Floating-Point)",
@ -102,6 +97,11 @@ pub const all_features = blk: {
.description = "'LVZ' (Loongson Virtualization Extension)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.prefer_w_inst)] = .{
.llvm_name = "prefer-w-inst",
.description = "Prefer instructions with W suffix",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.relax)] = .{
.llvm_name = "relax",
.description = "Enable Linker relaxation",
@ -152,6 +152,18 @@ pub const cpu = struct {
.ual,
}),
};
pub const la664 = CpuModel{
.name = "la664",
.llvm_name = "la664",
.features = featureSet(&[_]Feature{
.@"64bit",
.frecipe,
.lasx,
.lbt,
.lvz,
.ual,
}),
};
pub const loongarch64 = CpuModel{
.name = "loongarch64",
.llvm_name = "loongarch64",

View File

@ -51,6 +51,7 @@ pub const Feature = enum {
ptr64,
single_float,
soft_float,
strict_align,
sym32,
use_indirect_jump_hazard,
use_tcc_in_div,
@ -356,6 +357,11 @@ pub const all_features = blk: {
.description = "Does not support floating point instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.strict_align)] = .{
.llvm_name = "strict-align",
.description = "Disable unaligned load store for r6",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.sym32)] = .{
.llvm_name = "sym32",
.description = "Symbols are 32 bit on Mips64",

View File

@ -13,6 +13,7 @@ pub const Feature = enum {
ptx50,
ptx60,
ptx61,
ptx62,
ptx63,
ptx64,
ptx65,
@ -29,6 +30,8 @@ pub const Feature = enum {
ptx81,
ptx82,
ptx83,
ptx84,
ptx85,
sm_20,
sm_21,
sm_30,
@ -101,6 +104,11 @@ pub const all_features = blk: {
.description = "Use PTX version 61",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ptx62)] = .{
.llvm_name = "ptx62",
.description = "Use PTX version 62",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ptx63)] = .{
.llvm_name = "ptx63",
.description = "Use PTX version 63",
@ -181,6 +189,16 @@ pub const all_features = blk: {
.description = "Use PTX version 83",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ptx84)] = .{
.llvm_name = "ptx84",
.description = "Use PTX version 84",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ptx85)] = .{
.llvm_name = "ptx85",
.description = "Use PTX version 85",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.sm_20)] = .{
.llvm_name = "sm_20",
.description = "Target SM 20",

View File

@ -8,6 +8,8 @@ pub const Feature = enum {
@"64bit",
@"64bitregs",
aix,
aix_shared_lib_tls_model_opt,
aix_small_local_dynamic_tls,
aix_small_local_exec_tls,
allow_unaligned_fp_access,
altivec,
@ -113,6 +115,16 @@ pub const all_features = blk: {
.description = "AIX OS",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.aix_shared_lib_tls_model_opt)] = .{
.llvm_name = "aix-shared-lib-tls-model-opt",
.description = "Tune TLS model at function level in shared library loaded with the main program (for 64-bit AIX only)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.aix_small_local_dynamic_tls)] = .{
.llvm_name = "aix-small-local-dynamic-tls",
.description = "Produce a faster local-dynamic TLS sequence for this function for 64-bit AIX",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.aix_small_local_exec_tls)] = .{
.llvm_name = "aix-small-local-exec-tls",
.description = "Produce a TOC-free local-exec TLS sequence for this function for 64-bit AIX",
@ -538,8 +550,7 @@ pub const all_features = blk: {
.llvm_name = "prefix-instrs",
.description = "Enable prefixed instructions",
.dependencies = featureSet(&[_]Feature{
.power8_vector,
.power9_altivec,
.isa_v31_instructions,
}),
};
result[@intFromEnum(Feature.privileged)] = .{
@ -990,6 +1001,54 @@ pub const cpu = struct {
.two_const_nr,
}),
};
pub const pwr11 = CpuModel{
.name = "pwr11",
.llvm_name = "pwr11",
.features = featureSet(&[_]Feature{
.@"64bit",
.allow_unaligned_fp_access,
.bpermd,
.cmpb,
.crbits,
.crypto,
.direct_move,
.extdiv,
.fast_MFLR,
.fcpsgn,
.fpcvt,
.fprnd,
.fre,
.fres,
.frsqrte,
.frsqrtes,
.fsqrt,
.fuse_add_logical,
.fuse_arith_add,
.fuse_logical,
.fuse_logical_add,
.fuse_sha3,
.fuse_store,
.htm,
.icbt,
.isa_v206_instructions,
.isel,
.ldbrx,
.lfiwax,
.mfocrf,
.mma,
.partword_atomics,
.pcrelative_memops,
.popcntd,
.power10_vector,
.ppc_postra_sched,
.ppc_prera_sched,
.predictable_select_expensive,
.quadword_atomics,
.recipprec,
.stfiwx,
.two_const_nr,
}),
};
pub const pwr3 = CpuModel{
.name = "pwr3",
.llvm_name = "pwr3",

File diff suppressed because it is too large Load Diff

View File

@ -40,7 +40,9 @@ pub const Feature = enum {
reset_dat_protection,
reset_reference_bits_multiple,
soft_float,
test_pending_external_interruption,
transactional_execution,
unaligned_symbols,
vector,
vector_enhancements_1,
vector_enhancements_2,
@ -233,11 +235,21 @@ pub const all_features = blk: {
.description = "Use software emulation for floating point",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.test_pending_external_interruption)] = .{
.llvm_name = "test-pending-external-interruption",
.description = "Assume that the test-pending-external-interruption facility is installed",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.transactional_execution)] = .{
.llvm_name = "transactional-execution",
.description = "Assume that the transactional-execution facility is installed",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.unaligned_symbols)] = .{
.llvm_name = "unaligned-symbols",
.description = "Don't apply the ABI minimum alignment to external symbols.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.vector)] = .{
.llvm_name = "vector",
.description = "Assume that the vectory facility is installed",
@ -357,6 +369,7 @@ pub const cpu = struct {
.population_count,
.processor_assist,
.reset_reference_bits_multiple,
.test_pending_external_interruption,
.transactional_execution,
.vector,
.vector_enhancements_1,
@ -396,6 +409,7 @@ pub const cpu = struct {
.population_count,
.processor_assist,
.reset_reference_bits_multiple,
.test_pending_external_interruption,
.transactional_execution,
.vector,
.vector_enhancements_1,
@ -441,6 +455,7 @@ pub const cpu = struct {
.processor_assist,
.reset_dat_protection,
.reset_reference_bits_multiple,
.test_pending_external_interruption,
.transactional_execution,
.vector,
.vector_enhancements_1,
@ -538,6 +553,7 @@ pub const cpu = struct {
.population_count,
.processor_assist,
.reset_reference_bits_multiple,
.test_pending_external_interruption,
.transactional_execution,
.vector,
.vector_enhancements_1,
@ -577,6 +593,7 @@ pub const cpu = struct {
.population_count,
.processor_assist,
.reset_reference_bits_multiple,
.test_pending_external_interruption,
.transactional_execution,
.vector,
.vector_enhancements_1,
@ -622,6 +639,7 @@ pub const cpu = struct {
.processor_assist,
.reset_dat_protection,
.reset_reference_bits_multiple,
.test_pending_external_interruption,
.transactional_execution,
.vector,
.vector_enhancements_1,

View File

@ -9,6 +9,7 @@ pub const Feature = enum {
bulk_memory,
exception_handling,
extended_const,
half_precision,
multimemory,
multivalue,
mutable_globals,
@ -49,6 +50,11 @@ pub const all_features = blk: {
.description = "Enable extended const expressions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.half_precision)] = .{
.llvm_name = "half-precision",
.description = "Enable half precision instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.multimemory)] = .{
.llvm_name = "multimemory",
.description = "Enable multiple memories",
@ -109,8 +115,15 @@ pub const cpu = struct {
.features = featureSet(&[_]Feature{
.atomics,
.bulk_memory,
.exception_handling,
.extended_const,
.half_precision,
.multimemory,
.multivalue,
.mutable_globals,
.nontrapping_fptoint,
.reference_types,
.relaxed_simd,
.sign_ext,
.simd128,
.tail_call,
@ -120,7 +133,9 @@ pub const cpu = struct {
.name = "generic",
.llvm_name = "generic",
.features = featureSet(&[_]Feature{
.multivalue,
.mutable_globals,
.reference_types,
.sign_ext,
}),
};

View File

@ -45,6 +45,7 @@ pub const Feature = enum {
avxvnniint8,
bmi,
bmi2,
branch_hint,
branchfusion,
ccmp,
cf,
@ -73,8 +74,10 @@ pub const Feature = enum {
fast_15bytenop,
fast_7bytenop,
fast_bextr,
fast_dpwssd,
fast_gather,
fast_hops,
fast_imm16,
fast_lzcnt,
fast_movbe,
fast_scalar_fsqrt,
@ -96,6 +99,7 @@ pub const Feature = enum {
hreset,
idivl_to_divb,
idivq_to_divl,
inline_asm_use_gpr32,
invpcid,
kl,
lea_sp,
@ -111,6 +115,7 @@ pub const Feature = enum {
movdiri,
mwaitx,
ndd,
nf,
no_bypass_delay,
no_bypass_delay_blend,
no_bypass_delay_mov,
@ -191,6 +196,7 @@ pub const Feature = enum {
xsavec,
xsaveopt,
xsaves,
zu,
};
pub const featureSet = CpuFeature.FeatureSetFns(Feature).featureSet;
@ -213,14 +219,14 @@ pub const all_features = blk: {
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.@"3dnow")] = .{
.llvm_name = "3dnow",
.llvm_name = null,
.description = "Enable 3DNow! instructions",
.dependencies = featureSet(&[_]Feature{
.mmx,
}),
};
result[@intFromEnum(Feature.@"3dnowa")] = .{
.llvm_name = "3dnowa",
.llvm_name = null,
.description = "Enable 3DNow! Athlon instructions",
.dependencies = featureSet(&[_]Feature{
.@"3dnow",
@ -356,7 +362,7 @@ pub const all_features = blk: {
}),
};
result[@intFromEnum(Feature.avx512er)] = .{
.llvm_name = "avx512er",
.llvm_name = null,
.description = "Enable AVX-512 Exponential and Reciprocal Instructions",
.dependencies = featureSet(&[_]Feature{
.avx512f,
@ -388,7 +394,7 @@ pub const all_features = blk: {
}),
};
result[@intFromEnum(Feature.avx512pf)] = .{
.llvm_name = "avx512pf",
.llvm_name = null,
.description = "Enable AVX-512 PreFetch Instructions",
.dependencies = featureSet(&[_]Feature{
.avx512f,
@ -481,6 +487,11 @@ pub const all_features = blk: {
.description = "Support BMI2 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.branch_hint)] = .{
.llvm_name = "branch-hint",
.description = "Target has branch hint feature",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.branchfusion)] = .{
.llvm_name = "branchfusion",
.description = "CMP/TEST can be fused with conditional branches",
@ -625,6 +636,11 @@ pub const all_features = blk: {
.description = "Indicates that the BEXTR instruction is implemented as a single uop with good throughput",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.fast_dpwssd)] = .{
.llvm_name = "fast-dpwssd",
.description = "Prefer vpdpwssd instruction over vpmaddwd+vpaddd instruction sequence",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.fast_gather)] = .{
.llvm_name = "fast-gather",
.description = "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)",
@ -635,6 +651,11 @@ pub const all_features = blk: {
.description = "Prefer horizontal vector math instructions (haddp, phsub, etc.) over normal vector instructions with shuffles",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.fast_imm16)] = .{
.llvm_name = "fast-imm16",
.description = "Prefer a i16 instruction with i16 immediate over extension to i32",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.fast_lzcnt)] = .{
.llvm_name = "fast-lzcnt",
.description = "LZCNT instructions are as fast as most simple integer ops",
@ -747,6 +768,11 @@ pub const all_features = blk: {
.description = "Use 32-bit divide for positive values less than 2^32",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.inline_asm_use_gpr32)] = .{
.llvm_name = "inline-asm-use-gpr32",
.description = "Enable use of GPR32 in inline assembly for APX",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.invpcid)] = .{
.llvm_name = "invpcid",
.description = "Invalidate Process-Context Identifier",
@ -824,6 +850,11 @@ pub const all_features = blk: {
.description = "Support non-destructive destination",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.nf)] = .{
.llvm_name = "nf",
.description = "Support status flags update suppression",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.no_bypass_delay)] = .{
.llvm_name = "no-bypass-delay",
.description = "Has no bypass delay when using the 'wrong' domain",
@ -917,7 +948,7 @@ pub const all_features = blk: {
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.prefetchwt1)] = .{
.llvm_name = "prefetchwt1",
.llvm_name = null,
.description = "Prefetch with Intent to Write and T1 Hint",
.dependencies = featureSet(&[_]Feature{}),
};
@ -1269,6 +1300,11 @@ pub const all_features = blk: {
.xsave,
}),
};
result[@intFromEnum(Feature.zu)] = .{
.llvm_name = "zu",
.description = "Support zero-upper SETcc/IMUL",
.dependencies = featureSet(&[_]Feature{}),
};
const ti = @typeInfo(Feature);
for (&result, 0..) |*elem, i| {
elem.index = i;
@ -1357,6 +1393,7 @@ pub const cpu = struct {
.cx16,
.fast_scalar_shift_masks,
.fxsr,
.idivq_to_divl,
.lzcnt,
.nopl,
.popcnt,
@ -1531,6 +1568,7 @@ pub const cpu = struct {
.cmov,
.cx8,
.nopl,
.prfchw,
.slow_shld,
.slow_unaligned_mem_16,
.vzeroupper,
@ -1548,6 +1586,7 @@ pub const cpu = struct {
.fast_scalar_shift_masks,
.fxsr,
.nopl,
.prfchw,
.sbb_dep_breaking,
.slow_shld,
.slow_unaligned_mem_16,
@ -1567,6 +1606,7 @@ pub const cpu = struct {
.fast_scalar_shift_masks,
.fxsr,
.nopl,
.prfchw,
.sbb_dep_breaking,
.slow_shld,
.slow_unaligned_mem_16,
@ -1584,6 +1624,7 @@ pub const cpu = struct {
.cx8,
.fxsr,
.nopl,
.prfchw,
.slow_shld,
.slow_unaligned_mem_16,
.sse,
@ -1602,6 +1643,7 @@ pub const cpu = struct {
.fast_scalar_shift_masks,
.fxsr,
.nopl,
.prfchw,
.sbb_dep_breaking,
.slow_shld,
.slow_unaligned_mem_16,
@ -1619,6 +1661,7 @@ pub const cpu = struct {
.cx8,
.fxsr,
.nopl,
.prfchw,
.slow_shld,
.slow_unaligned_mem_16,
.sse,
@ -1634,6 +1677,7 @@ pub const cpu = struct {
.cmov,
.cx8,
.nopl,
.prfchw,
.slow_shld,
.slow_unaligned_mem_16,
.vzeroupper,
@ -1649,6 +1693,7 @@ pub const cpu = struct {
.cx8,
.fxsr,
.nopl,
.prfchw,
.slow_shld,
.slow_unaligned_mem_16,
.sse,
@ -1663,6 +1708,7 @@ pub const cpu = struct {
.@"64bit",
.cmov,
.cx16,
.fast_imm16,
.fxsr,
.idivl_to_divb,
.idivq_to_divl,
@ -1693,6 +1739,7 @@ pub const cpu = struct {
.cx16,
.false_deps_popcnt,
.fast_7bytenop,
.fast_imm16,
.fast_movbe,
.fsgsbase,
.fxsr,
@ -1731,6 +1778,7 @@ pub const cpu = struct {
.cx16,
.fast_scalar_shift_masks,
.fxsr,
.idivq_to_divl,
.lzcnt,
.nopl,
.popcnt,
@ -1756,6 +1804,7 @@ pub const cpu = struct {
.fast_11bytenop,
.fast_scalar_shift_masks,
.fxsr,
.idivq_to_divl,
.lwp,
.lzcnt,
.mmx,
@ -1790,6 +1839,7 @@ pub const cpu = struct {
.fast_scalar_shift_masks,
.fma,
.fxsr,
.idivq_to_divl,
.lwp,
.lzcnt,
.mmx,
@ -1826,6 +1876,7 @@ pub const cpu = struct {
.fma,
.fsgsbase,
.fxsr,
.idivq_to_divl,
.lwp,
.lzcnt,
.mmx,
@ -1864,6 +1915,7 @@ pub const cpu = struct {
.fma,
.fsgsbase,
.fxsr,
.idivq_to_divl,
.lwp,
.lzcnt,
.mmx,
@ -1891,6 +1943,7 @@ pub const cpu = struct {
.@"64bit",
.cmov,
.cx16,
.fast_imm16,
.fxsr,
.idivl_to_divb,
.idivq_to_divl,
@ -1963,9 +2016,11 @@ pub const cpu = struct {
.cmov,
.cx16,
.fast_15bytenop,
.fast_imm16,
.fast_scalar_shift_masks,
.fast_vector_shift_masks,
.fxsr,
.idivq_to_divl,
.lzcnt,
.mmx,
.nopl,
@ -1994,11 +2049,13 @@ pub const cpu = struct {
.fast_15bytenop,
.fast_bextr,
.fast_hops,
.fast_imm16,
.fast_lzcnt,
.fast_movbe,
.fast_scalar_shift_masks,
.fast_vector_shift_masks,
.fxsr,
.idivq_to_divl,
.lzcnt,
.mmx,
.movbe,
@ -2019,6 +2076,7 @@ pub const cpu = struct {
.llvm_name = "c3",
.features = featureSet(&[_]Feature{
.@"3dnow",
.prfchw,
.slow_unaligned_mem_16,
.vzeroupper,
.x87,
@ -2324,6 +2382,7 @@ pub const cpu = struct {
.crc32,
.cx16,
.fxsr,
.idivq_to_divl,
.macrofusion,
.mmx,
.no_bypass_delay_mov,
@ -2441,6 +2500,7 @@ pub const cpu = struct {
.features = featureSet(&[_]Feature{
.@"3dnowa",
.cx8,
.prfchw,
.slow_unaligned_mem_16,
.vzeroupper,
.x87,
@ -2457,6 +2517,7 @@ pub const cpu = struct {
.crc32,
.cx16,
.false_deps_popcnt,
.fast_imm16,
.fast_movbe,
.fsgsbase,
.fxsr,
@ -2493,6 +2554,7 @@ pub const cpu = struct {
.cmov,
.crc32,
.cx16,
.fast_imm16,
.fast_movbe,
.fsgsbase,
.fxsr,
@ -2547,8 +2609,6 @@ pub const cpu = struct {
.fxsr,
.gfni,
.hreset,
.idivl_to_divb,
.idivq_to_divl,
.invpcid,
.lzcnt,
.macrofusion,
@ -2602,7 +2662,11 @@ pub const cpu = struct {
.cx16,
.enqcmd,
.f16c,
.fast_movbe,
.false_deps_popcnt,
.fast_15bytenop,
.fast_scalar_fsqrt,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fma,
.fsgsbase,
.fxsr,
@ -2610,11 +2674,11 @@ pub const cpu = struct {
.hreset,
.invpcid,
.lzcnt,
.macrofusion,
.mmx,
.movbe,
.movdir64b,
.movdiri,
.no_bypass_delay,
.nopl,
.pconfig,
.pku,
@ -2628,11 +2692,8 @@ pub const cpu = struct {
.serialize,
.sha,
.shstk,
.slow_incdec,
.slow_lea,
.slow_two_mem_ops,
.slow_3ops_lea,
.uintr,
.use_glm_div_sqrt_costs,
.vaes,
.vpclmulqdq,
.vzeroupper,
@ -2666,6 +2727,7 @@ pub const cpu = struct {
.avxvnni,
.bmi,
.bmi2,
.branch_hint,
.cldemote,
.clflushopt,
.clwb,
@ -2754,6 +2816,7 @@ pub const cpu = struct {
.avxvnni,
.bmi,
.bmi2,
.branch_hint,
.cldemote,
.clflushopt,
.clwb,
@ -3080,6 +3143,7 @@ pub const cpu = struct {
.features = featureSet(&[_]Feature{
.@"3dnow",
.cx8,
.prfchw,
.slow_unaligned_mem_16,
.vzeroupper,
.x87,
@ -3091,6 +3155,7 @@ pub const cpu = struct {
.features = featureSet(&[_]Feature{
.@"3dnow",
.cx8,
.prfchw,
.slow_unaligned_mem_16,
.vzeroupper,
.x87,
@ -3107,6 +3172,7 @@ pub const cpu = struct {
.fast_scalar_shift_masks,
.fxsr,
.nopl,
.prfchw,
.sbb_dep_breaking,
.slow_shld,
.slow_unaligned_mem_16,
@ -3126,6 +3192,7 @@ pub const cpu = struct {
.fast_scalar_shift_masks,
.fxsr,
.nopl,
.prfchw,
.sbb_dep_breaking,
.slow_shld,
.slow_unaligned_mem_16,
@ -3151,6 +3218,7 @@ pub const cpu = struct {
.cx16,
.evex512,
.fast_gather,
.fast_imm16,
.fast_movbe,
.fsgsbase,
.fxsr,
@ -3193,6 +3261,7 @@ pub const cpu = struct {
.cx16,
.evex512,
.fast_gather,
.fast_imm16,
.fast_movbe,
.fsgsbase,
.fxsr,
@ -3384,6 +3453,7 @@ pub const cpu = struct {
.crc32,
.cx16,
.fxsr,
.idivq_to_divl,
.macrofusion,
.mmx,
.no_bypass_delay_mov,
@ -3422,6 +3492,7 @@ pub const cpu = struct {
.fast_scalar_shift_masks,
.fxsr,
.nopl,
.prfchw,
.sbb_dep_breaking,
.slow_shld,
.slow_unaligned_mem_16,
@ -3441,6 +3512,7 @@ pub const cpu = struct {
.fast_scalar_shift_masks,
.fxsr,
.nopl,
.prfchw,
.sbb_dep_breaking,
.slow_shld,
.slow_unaligned_mem_16,
@ -3937,7 +4009,11 @@ pub const cpu = struct {
.cx16,
.enqcmd,
.f16c,
.fast_movbe,
.false_deps_popcnt,
.fast_15bytenop,
.fast_scalar_fsqrt,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fma,
.fsgsbase,
.fxsr,
@ -3945,11 +4021,11 @@ pub const cpu = struct {
.hreset,
.invpcid,
.lzcnt,
.macrofusion,
.mmx,
.movbe,
.movdir64b,
.movdiri,
.no_bypass_delay,
.nopl,
.pconfig,
.pku,
@ -3963,11 +4039,8 @@ pub const cpu = struct {
.serialize,
.sha,
.shstk,
.slow_incdec,
.slow_lea,
.slow_two_mem_ops,
.slow_3ops_lea,
.uintr,
.use_glm_div_sqrt_costs,
.vaes,
.vpclmulqdq,
.vzeroupper,
@ -3989,6 +4062,7 @@ pub const cpu = struct {
.cx16,
.false_deps_popcnt,
.fast_7bytenop,
.fast_imm16,
.fast_movbe,
.fxsr,
.idivq_to_divl,
@ -4191,6 +4265,7 @@ pub const cpu = struct {
.cx16,
.false_deps_popcnt,
.fast_7bytenop,
.fast_imm16,
.fast_movbe,
.fxsr,
.idivq_to_divl,
@ -4293,6 +4368,7 @@ pub const cpu = struct {
.cmov,
.crc32,
.cx16,
.fast_imm16,
.fast_movbe,
.fsgsbase,
.fxsr,
@ -4331,6 +4407,7 @@ pub const cpu = struct {
.crc32,
.cx16,
.fxsr,
.idivq_to_divl,
.macrofusion,
.mmx,
.no_bypass_delay_mov,
@ -4348,6 +4425,7 @@ pub const cpu = struct {
.llvm_name = "winchip2",
.features = featureSet(&[_]Feature{
.@"3dnow",
.prfchw,
.slow_unaligned_mem_16,
.vzeroupper,
.x87,
@ -4519,6 +4597,7 @@ pub const cpu = struct {
.f16c,
.fast_15bytenop,
.fast_bextr,
.fast_imm16,
.fast_lzcnt,
.fast_movbe,
.fast_scalar_fsqrt,
@ -4528,6 +4607,7 @@ pub const cpu = struct {
.fma,
.fsgsbase,
.fxsr,
.idivq_to_divl,
.lzcnt,
.mmx,
.movbe,
@ -4571,6 +4651,7 @@ pub const cpu = struct {
.f16c,
.fast_15bytenop,
.fast_bextr,
.fast_imm16,
.fast_lzcnt,
.fast_movbe,
.fast_scalar_fsqrt,
@ -4580,6 +4661,7 @@ pub const cpu = struct {
.fma,
.fsgsbase,
.fxsr,
.idivq_to_divl,
.lzcnt,
.mmx,
.movbe,
@ -4624,6 +4706,7 @@ pub const cpu = struct {
.f16c,
.fast_15bytenop,
.fast_bextr,
.fast_imm16,
.fast_lzcnt,
.fast_movbe,
.fast_scalar_fsqrt,
@ -4634,6 +4717,7 @@ pub const cpu = struct {
.fsgsbase,
.fsrm,
.fxsr,
.idivq_to_divl,
.invpcid,
.lzcnt,
.macrofusion,
@ -4692,6 +4776,8 @@ pub const cpu = struct {
.evex512,
.fast_15bytenop,
.fast_bextr,
.fast_dpwssd,
.fast_imm16,
.fast_lzcnt,
.fast_movbe,
.fast_scalar_fsqrt,
@ -4702,6 +4788,7 @@ pub const cpu = struct {
.fsrm,
.fxsr,
.gfni,
.idivq_to_divl,
.invpcid,
.lzcnt,
.macrofusion,