crypto: arm64/aes-ce - Simplify round key load sequence

Tweak the round key logic so that they can be loaded using a single
branchless sequence using overlapping loads. This is shorter and
simpler, and puts the conditional branches based on the key size further
apart, which might benefit microarchitectures that cannot record taken
branches at every instruction. For these branches, use test-bit-branch
instructions that don't clobber the condition flags.

Note that none of this has any impact on performance, positive or
otherwise (and the branch prediction benefit would only benefit AES-192
which nobody uses). It does make for nicer code, though.

While at it, use \@ to generate the labels inside the macros, which is
more robust than using fixed numbers, which could clash inadvertently.
Also, bring aes-neon.S in line with these changes, including the switch
to test-and-branch instructions, to avoid surprises in the future when
we might start relying on the condition flags being preserved in the
chaining mode wrappers in aes-modes.S

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Ard Biesheuvel 2024-04-15 15:04:26 +02:00 committed by Herbert Xu
parent 3f4d1482da
commit 571e557cba
2 changed files with 24 additions and 30 deletions

View File

@ -25,33 +25,28 @@
.endm .endm
/* preload all round keys */ /* preload all round keys */
.macro load_round_keys, rounds, rk .macro load_round_keys, rk, nr, tmp
cmp \rounds, #12 add \tmp, \rk, \nr, sxtw #4
blo 2222f /* 128 bits */ sub \tmp, \tmp, #160
beq 1111f /* 192 bits */ ld1 {v17.4s-v20.4s}, [\rk]
ld1 {v17.4s-v18.4s}, [\rk], #32 ld1 {v21.4s-v24.4s}, [\tmp], #64
1111: ld1 {v19.4s-v20.4s}, [\rk], #32 ld1 {v25.4s-v28.4s}, [\tmp], #64
2222: ld1 {v21.4s-v24.4s}, [\rk], #64 ld1 {v29.4s-v31.4s}, [\tmp]
ld1 {v25.4s-v28.4s}, [\rk], #64
ld1 {v29.4s-v31.4s}, [\rk]
.endm .endm
/* prepare for encryption with key in rk[] */ /* prepare for encryption with key in rk[] */
.macro enc_prepare, rounds, rk, temp .macro enc_prepare, rounds, rk, temp
mov \temp, \rk load_round_keys \rk, \rounds, \temp
load_round_keys \rounds, \temp
.endm .endm
/* prepare for encryption (again) but with new key in rk[] */ /* prepare for encryption (again) but with new key in rk[] */
.macro enc_switch_key, rounds, rk, temp .macro enc_switch_key, rounds, rk, temp
mov \temp, \rk load_round_keys \rk, \rounds, \temp
load_round_keys \rounds, \temp
.endm .endm
/* prepare for decryption with key in rk[] */ /* prepare for decryption with key in rk[] */
.macro dec_prepare, rounds, rk, temp .macro dec_prepare, rounds, rk, temp
mov \temp, \rk load_round_keys \rk, \rounds, \temp
load_round_keys \rounds, \temp
.endm .endm
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4 .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4
@ -110,14 +105,13 @@
/* up to 5 interleaved blocks */ /* up to 5 interleaved blocks */
.macro do_block_Nx, enc, rounds, i0, i1, i2, i3, i4 .macro do_block_Nx, enc, rounds, i0, i1, i2, i3, i4
cmp \rounds, #12 tbz \rounds, #2, .L\@ /* 128 bits */
blo 2222f /* 128 bits */
beq 1111f /* 192 bits */
round_Nx \enc, v17, \i0, \i1, \i2, \i3, \i4 round_Nx \enc, v17, \i0, \i1, \i2, \i3, \i4
round_Nx \enc, v18, \i0, \i1, \i2, \i3, \i4 round_Nx \enc, v18, \i0, \i1, \i2, \i3, \i4
1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4 tbz \rounds, #1, .L\@ /* 192 bits */
round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4
round_Nx \enc, v20, \i0, \i1, \i2, \i3, \i4 round_Nx \enc, v20, \i0, \i1, \i2, \i3, \i4
2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 .L\@: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
round_Nx \enc, \key, \i0, \i1, \i2, \i3, \i4 round_Nx \enc, \key, \i0, \i1, \i2, \i3, \i4
.endr .endr
fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3, \i4 fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3, \i4

View File

@ -99,16 +99,16 @@
ld1 {v15.4s}, [\rk] ld1 {v15.4s}, [\rk]
add \rkp, \rk, #16 add \rkp, \rk, #16
mov \i, \rounds mov \i, \rounds
1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ .La\@: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
movi v15.16b, #0x40 movi v15.16b, #0x40
tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
sub_bytes \in sub_bytes \in
subs \i, \i, #1 sub \i, \i, #1
ld1 {v15.4s}, [\rkp], #16 ld1 {v15.4s}, [\rkp], #16
beq 2222f cbz \i, .Lb\@
mix_columns \in, \enc mix_columns \in, \enc
b 1111b b .La\@
2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ .Lb\@: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
.endm .endm
.macro encrypt_block, in, rounds, rk, rkp, i .macro encrypt_block, in, rounds, rk, rkp, i
@ -206,7 +206,7 @@
ld1 {v15.4s}, [\rk] ld1 {v15.4s}, [\rk]
add \rkp, \rk, #16 add \rkp, \rk, #16
mov \i, \rounds mov \i, \rounds
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ .La\@: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
@ -216,13 +216,13 @@
tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
sub_bytes_4x \in0, \in1, \in2, \in3 sub_bytes_4x \in0, \in1, \in2, \in3
subs \i, \i, #1 sub \i, \i, #1
ld1 {v15.4s}, [\rkp], #16 ld1 {v15.4s}, [\rkp], #16
beq 2222f cbz \i, .Lb\@
mix_columns_2x \in0, \in1, \enc mix_columns_2x \in0, \in1, \enc
mix_columns_2x \in2, \in3, \enc mix_columns_2x \in2, \in3, \enc
b 1111b b .La\@
2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ .Lb\@: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */