linux/arch/arm64/crypto/aes-ce.S
Ard Biesheuvel 571e557cba crypto: arm64/aes-ce - Simplify round key load sequence
Tweak the round key logic so that they can be loaded using a single
branchless sequence using overlapping loads. This is shorter and
simpler, and puts the conditional branches based on the key size further
apart, which might benefit microarchitectures that cannot record taken
branches at every instruction. For these branches, use test-bit-branch
instructions that don't clobber the condition flags.

Note that none of this has any impact on performance, positive or
otherwise (and the branch prediction benefit would only benefit AES-192
which nobody uses). It does make for nicer code, though.

While at it, use \@ to generate the labels inside the macros, which is
more robust than using fixed numbers, which could clash inadvertently.
Also, bring aes-neon.S in line with these changes, including the switch
to test-and-branch instructions, to avoid surprises in the future when
we might start relying on the condition flags being preserved in the
chaining mode wrappers in aes-modes.S

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2024-04-26 17:26:09 +08:00

147 lines
3.6 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
* Crypto Extensions
*
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#define AES_FUNC_START(func) SYM_FUNC_START(ce_ ## func)
#define AES_FUNC_END(func) SYM_FUNC_END(ce_ ## func)
.arch armv8-a+crypto
xtsmask .req v16
cbciv .req v16
vctr .req v16
.macro xts_reload_mask, tmp
.endm
.macro xts_cts_skip_tw, reg, lbl
.endm
/* preload all round keys */
.macro load_round_keys, rk, nr, tmp
add \tmp, \rk, \nr, sxtw #4
sub \tmp, \tmp, #160
ld1 {v17.4s-v20.4s}, [\rk]
ld1 {v21.4s-v24.4s}, [\tmp], #64
ld1 {v25.4s-v28.4s}, [\tmp], #64
ld1 {v29.4s-v31.4s}, [\tmp]
.endm
/* prepare for encryption with key in rk[] */
.macro enc_prepare, rounds, rk, temp
load_round_keys \rk, \rounds, \temp
.endm
/* prepare for encryption (again) but with new key in rk[] */
.macro enc_switch_key, rounds, rk, temp
load_round_keys \rk, \rounds, \temp
.endm
/* prepare for decryption with key in rk[] */
.macro dec_prepare, rounds, rk, temp
load_round_keys \rk, \rounds, \temp
.endm
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4
aes\de \i0\().16b, \k\().16b
aes\mc \i0\().16b, \i0\().16b
.ifnb \i1
aes\de \i1\().16b, \k\().16b
aes\mc \i1\().16b, \i1\().16b
.ifnb \i3
aes\de \i2\().16b, \k\().16b
aes\mc \i2\().16b, \i2\().16b
aes\de \i3\().16b, \k\().16b
aes\mc \i3\().16b, \i3\().16b
.ifnb \i4
aes\de \i4\().16b, \k\().16b
aes\mc \i4\().16b, \i4\().16b
.endif
.endif
.endif
.endm
/* up to 5 interleaved encryption rounds with the same round key */
.macro round_Nx, enc, k, i0, i1, i2, i3, i4
.ifc \enc, e
do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3, \i4
.else
do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3, \i4
.endif
.endm
/* up to 5 interleaved final rounds */
.macro fin_round_Nx, de, k, k2, i0, i1, i2, i3, i4
aes\de \i0\().16b, \k\().16b
.ifnb \i1
aes\de \i1\().16b, \k\().16b
.ifnb \i3
aes\de \i2\().16b, \k\().16b
aes\de \i3\().16b, \k\().16b
.ifnb \i4
aes\de \i4\().16b, \k\().16b
.endif
.endif
.endif
eor \i0\().16b, \i0\().16b, \k2\().16b
.ifnb \i1
eor \i1\().16b, \i1\().16b, \k2\().16b
.ifnb \i3
eor \i2\().16b, \i2\().16b, \k2\().16b
eor \i3\().16b, \i3\().16b, \k2\().16b
.ifnb \i4
eor \i4\().16b, \i4\().16b, \k2\().16b
.endif
.endif
.endif
.endm
/* up to 5 interleaved blocks */
.macro do_block_Nx, enc, rounds, i0, i1, i2, i3, i4
tbz \rounds, #2, .L\@ /* 128 bits */
round_Nx \enc, v17, \i0, \i1, \i2, \i3, \i4
round_Nx \enc, v18, \i0, \i1, \i2, \i3, \i4
tbz \rounds, #1, .L\@ /* 192 bits */
round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4
round_Nx \enc, v20, \i0, \i1, \i2, \i3, \i4
.L\@: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
round_Nx \enc, \key, \i0, \i1, \i2, \i3, \i4
.endr
fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3, \i4
.endm
.macro encrypt_block, in, rounds, t0, t1, t2
do_block_Nx e, \rounds, \in
.endm
.macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
do_block_Nx e, \rounds, \i0, \i1, \i2, \i3
.endm
.macro encrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2
do_block_Nx e, \rounds, \i0, \i1, \i2, \i3, \i4
.endm
.macro decrypt_block, in, rounds, t0, t1, t2
do_block_Nx d, \rounds, \in
.endm
.macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
do_block_Nx d, \rounds, \i0, \i1, \i2, \i3
.endm
.macro decrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2
do_block_Nx d, \rounds, \i0, \i1, \i2, \i3, \i4
.endm
#define MAX_STRIDE 5
#include "aes-modes.S"