crypto: x86/aes-ni-xts - rewrite and drop indirections via glue helper

The AES-NI driver implements XTS via the glue helper, which consumes
a struct with sets of function pointers which are invoked on chunks
of input data of the appropriate size, as annotated in the struct.

Let's get rid of this indirection, so that we can perform direct calls
to the assembler helpers. Instead, let's adopt the arm64 strategy, i.e.,
provide a helper which can consume inputs of any size, provided that the
penultimate, full block is passed via the last call if ciphertext stealing
needs to be applied.

This also allows us to enable the XTS mode for i386.

Tested-by: Eric Biggers <ebiggers@google.com> # x86_64
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Ard Biesheuvel 2020-12-31 17:41:55 +01:00 committed by Herbert Xu
parent 86ad60a65f
commit 2481104fe9
3 changed files with 356 additions and 145 deletions

View File

@ -43,10 +43,6 @@
#ifdef __x86_64__
# constants in mergeable sections, linker can reorder and merge
.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16
.align 16
.Lgf128mul_x_ble_mask:
.octa 0x00000000000000010000000000000087
.section .rodata.cst16.POLY, "aM", @progbits, 16
.align 16
POLY: .octa 0xC2000000000000000000000000000001
@ -146,7 +142,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
#define CTR %xmm11
#define INC %xmm12
#define GF128MUL_MASK %xmm10
#define GF128MUL_MASK %xmm7
#ifdef __x86_64__
#define AREG %rax
@ -2823,6 +2819,14 @@ SYM_FUNC_START(aesni_ctr_enc)
ret
SYM_FUNC_END(aesni_ctr_enc)
#endif
.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16
.align 16
.Lgf128mul_x_ble_mask:
.octa 0x00000000000000010000000000000087
.previous
/*
* _aesni_gf128mul_x_ble: internal ABI
* Multiply in GF(2^128) for XTS IVs
@ -2835,11 +2839,11 @@ SYM_FUNC_END(aesni_ctr_enc)
* CTR: == temporary value
*/
#define _aesni_gf128mul_x_ble() \
pshufd $0x13, IV, CTR; \
pshufd $0x13, IV, KEY; \
paddq IV, IV; \
psrad $31, CTR; \
pand GF128MUL_MASK, CTR; \
pxor CTR, IV;
psrad $31, KEY; \
pand GF128MUL_MASK, KEY; \
pxor KEY, IV;
/*
* void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
@ -2847,65 +2851,153 @@ SYM_FUNC_END(aesni_ctr_enc)
*/
SYM_FUNC_START(aesni_xts_encrypt)
FRAME_BEGIN
#ifndef __x86_64__
pushl IVP
pushl LEN
pushl KEYP
pushl KLEN
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
movl (FRAME_OFFSET+28)(%esp), INP # src
movl (FRAME_OFFSET+32)(%esp), LEN # len
movl (FRAME_OFFSET+36)(%esp), IVP # iv
movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
#else
movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
#endif
movups (IVP), IV
mov 480(KEYP), KLEN
.Lxts_enc_loop4:
sub $64, LEN
jl .Lxts_enc_1x
movdqa IV, STATE1
movdqu 0x00(INP), INC
pxor INC, STATE1
movdqu 0x00(INP), IN
pxor IN, STATE1
movdqu IV, 0x00(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE2
movdqu 0x10(INP), INC
pxor INC, STATE2
movdqu 0x10(INP), IN
pxor IN, STATE2
movdqu IV, 0x10(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE3
movdqu 0x20(INP), INC
pxor INC, STATE3
movdqu 0x20(INP), IN
pxor IN, STATE3
movdqu IV, 0x20(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE4
movdqu 0x30(INP), INC
pxor INC, STATE4
movdqu 0x30(INP), IN
pxor IN, STATE4
movdqu IV, 0x30(OUTP)
call _aesni_enc4
movdqu 0x00(OUTP), INC
pxor INC, STATE1
movdqu 0x00(OUTP), IN
pxor IN, STATE1
movdqu STATE1, 0x00(OUTP)
movdqu 0x10(OUTP), INC
pxor INC, STATE2
movdqu 0x10(OUTP), IN
pxor IN, STATE2
movdqu STATE2, 0x10(OUTP)
movdqu 0x20(OUTP), INC
pxor INC, STATE3
movdqu 0x20(OUTP), IN
pxor IN, STATE3
movdqu STATE3, 0x20(OUTP)
movdqu 0x30(OUTP), INC
pxor INC, STATE4
movdqu 0x30(OUTP), IN
pxor IN, STATE4
movdqu STATE4, 0x30(OUTP)
_aesni_gf128mul_x_ble()
add $64, INP
add $64, OUTP
sub $64, LEN
ja .Lxts_enc_loop4
test LEN, LEN
jnz .Lxts_enc_loop4
.Lxts_enc_ret_iv:
movups IV, (IVP)
.Lxts_enc_ret:
#ifndef __x86_64__
popl KLEN
popl KEYP
popl LEN
popl IVP
#endif
FRAME_END
ret
.Lxts_enc_1x:
add $64, LEN
jz .Lxts_enc_ret_iv
sub $16, LEN
jl .Lxts_enc_cts4
.Lxts_enc_loop1:
movdqu (INP), STATE
pxor IV, STATE
call _aesni_enc1
pxor IV, STATE
_aesni_gf128mul_x_ble()
test LEN, LEN
jz .Lxts_enc_out
add $16, INP
sub $16, LEN
jl .Lxts_enc_cts1
movdqu STATE, (OUTP)
add $16, OUTP
jmp .Lxts_enc_loop1
.Lxts_enc_out:
movdqu STATE, (OUTP)
jmp .Lxts_enc_ret_iv
.Lxts_enc_cts4:
movdqa STATE4, STATE
sub $16, OUTP
.Lxts_enc_cts1:
#ifndef __x86_64__
lea .Lcts_permute_table, T1
#else
lea .Lcts_permute_table(%rip), T1
#endif
add LEN, INP /* rewind input pointer */
add $16, LEN /* # bytes in final block */
movups (INP), IN1
mov T1, IVP
add $32, IVP
add LEN, T1
sub LEN, IVP
add OUTP, LEN
movups (T1), %xmm4
movaps STATE, IN2
pshufb %xmm4, STATE
movups STATE, (LEN)
movups (IVP), %xmm0
pshufb %xmm0, IN1
pblendvb IN2, IN1
movaps IN1, STATE
pxor IV, STATE
call _aesni_enc1
pxor IV, STATE
movups STATE, (OUTP)
jmp .Lxts_enc_ret
SYM_FUNC_END(aesni_xts_encrypt)
/*
@ -2914,66 +3006,158 @@ SYM_FUNC_END(aesni_xts_encrypt)
*/
SYM_FUNC_START(aesni_xts_decrypt)
FRAME_BEGIN
#ifndef __x86_64__
pushl IVP
pushl LEN
pushl KEYP
pushl KLEN
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
movl (FRAME_OFFSET+28)(%esp), INP # src
movl (FRAME_OFFSET+32)(%esp), LEN # len
movl (FRAME_OFFSET+36)(%esp), IVP # iv
movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
#else
movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
#endif
movups (IVP), IV
mov 480(KEYP), KLEN
add $240, KEYP
test $15, LEN
jz .Lxts_dec_loop4
sub $16, LEN
.Lxts_dec_loop4:
sub $64, LEN
jl .Lxts_dec_1x
movdqa IV, STATE1
movdqu 0x00(INP), INC
pxor INC, STATE1
movdqu 0x00(INP), IN
pxor IN, STATE1
movdqu IV, 0x00(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE2
movdqu 0x10(INP), INC
pxor INC, STATE2
movdqu 0x10(INP), IN
pxor IN, STATE2
movdqu IV, 0x10(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE3
movdqu 0x20(INP), INC
pxor INC, STATE3
movdqu 0x20(INP), IN
pxor IN, STATE3
movdqu IV, 0x20(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE4
movdqu 0x30(INP), INC
pxor INC, STATE4
movdqu 0x30(INP), IN
pxor IN, STATE4
movdqu IV, 0x30(OUTP)
call _aesni_dec4
movdqu 0x00(OUTP), INC
pxor INC, STATE1
movdqu 0x00(OUTP), IN
pxor IN, STATE1
movdqu STATE1, 0x00(OUTP)
movdqu 0x10(OUTP), INC
pxor INC, STATE2
movdqu 0x10(OUTP), IN
pxor IN, STATE2
movdqu STATE2, 0x10(OUTP)
movdqu 0x20(OUTP), INC
pxor INC, STATE3
movdqu 0x20(OUTP), IN
pxor IN, STATE3
movdqu STATE3, 0x20(OUTP)
movdqu 0x30(OUTP), INC
pxor INC, STATE4
movdqu 0x30(OUTP), IN
pxor IN, STATE4
movdqu STATE4, 0x30(OUTP)
_aesni_gf128mul_x_ble()
add $64, INP
add $64, OUTP
sub $64, LEN
ja .Lxts_dec_loop4
test LEN, LEN
jnz .Lxts_dec_loop4
.Lxts_dec_ret_iv:
movups IV, (IVP)
.Lxts_dec_ret:
#ifndef __x86_64__
popl KLEN
popl KEYP
popl LEN
popl IVP
#endif
FRAME_END
ret
SYM_FUNC_END(aesni_xts_decrypt)
.Lxts_dec_1x:
add $64, LEN
jz .Lxts_dec_ret_iv
.Lxts_dec_loop1:
movdqu (INP), STATE
add $16, INP
sub $16, LEN
jl .Lxts_dec_cts1
pxor IV, STATE
call _aesni_dec1
pxor IV, STATE
_aesni_gf128mul_x_ble()
test LEN, LEN
jz .Lxts_dec_out
movdqu STATE, (OUTP)
add $16, OUTP
jmp .Lxts_dec_loop1
.Lxts_dec_out:
movdqu STATE, (OUTP)
jmp .Lxts_dec_ret_iv
.Lxts_dec_cts1:
movdqa IV, STATE4
_aesni_gf128mul_x_ble()
pxor IV, STATE
call _aesni_dec1
pxor IV, STATE
#ifndef __x86_64__
lea .Lcts_permute_table, T1
#else
lea .Lcts_permute_table(%rip), T1
#endif
add LEN, INP /* rewind input pointer */
add $16, LEN /* # bytes in final block */
movups (INP), IN1
mov T1, IVP
add $32, IVP
add LEN, T1
sub LEN, IVP
add OUTP, LEN
movups (T1), %xmm4
movaps STATE, IN2
pshufb %xmm4, STATE
movups STATE, (LEN)
movups (IVP), %xmm0
pshufb %xmm0, IN1
pblendvb IN2, IN1
movaps IN1, STATE
pxor STATE4, STATE
call _aesni_dec1
pxor STATE4, STATE
movups STATE, (OUTP)
jmp .Lxts_dec_ret
SYM_FUNC_END(aesni_xts_decrypt)

View File

@ -33,9 +33,6 @@
#include <crypto/internal/skcipher.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
#ifdef CONFIG_X86_64
#include <asm/crypto/glue_helper.h>
#endif
#define AESNI_ALIGN 16
@ -632,98 +629,6 @@ static int ctr_crypt(struct skcipher_request *req)
return err;
}
static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err;
err = xts_verify_key(tfm, key, keylen);
if (err)
return err;
keylen /= 2;
/* first half of xts-key is for crypt */
err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx,
key, keylen);
if (err)
return err;
/* second half of xts-key is for tweak */
return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx,
key + keylen, keylen);
}
static void aesni_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_enc);
}
static void aesni_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_dec);
}
static void aesni_xts_enc32(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
{
aesni_xts_encrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv);
}
static void aesni_xts_dec32(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
{
aesni_xts_decrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv);
}
static const struct common_glue_ctx aesni_enc_xts = {
.num_funcs = 2,
.fpu_blocks_limit = 1,
.funcs = { {
.num_blocks = 32,
.fn_u = { .xts = aesni_xts_enc32 }
}, {
.num_blocks = 1,
.fn_u = { .xts = aesni_xts_enc }
} }
};
static const struct common_glue_ctx aesni_dec_xts = {
.num_funcs = 2,
.fpu_blocks_limit = 1,
.funcs = { {
.num_blocks = 32,
.fn_u = { .xts = aesni_xts_dec32 }
}, {
.num_blocks = 1,
.fn_u = { .xts = aesni_xts_dec }
} }
};
static int xts_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
return glue_xts_req_128bit(&aesni_enc_xts, req, aesni_enc,
aes_ctx(ctx->raw_tweak_ctx),
aes_ctx(ctx->raw_crypt_ctx),
false);
}
static int xts_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
return glue_xts_req_128bit(&aesni_dec_xts, req, aesni_enc,
aes_ctx(ctx->raw_tweak_ctx),
aes_ctx(ctx->raw_crypt_ctx),
true);
}
static int
rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
{
@ -996,6 +901,128 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
}
#endif
static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err;
err = xts_verify_key(tfm, key, keylen);
if (err)
return err;
keylen /= 2;
/* first half of xts-key is for crypt */
err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx,
key, keylen);
if (err)
return err;
/* second half of xts-key is for tweak */
return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx,
key + keylen, keylen);
}
static int xts_crypt(struct skcipher_request *req, bool encrypt)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int tail = req->cryptlen % AES_BLOCK_SIZE;
struct skcipher_request subreq;
struct skcipher_walk walk;
int err;
if (req->cryptlen < AES_BLOCK_SIZE)
return -EINVAL;
err = skcipher_walk_virt(&walk, req, false);
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
skcipher_walk_abort(&walk);
skcipher_request_set_tfm(&subreq, tfm);
skcipher_request_set_callback(&subreq,
skcipher_request_flags(req),
NULL, NULL);
skcipher_request_set_crypt(&subreq, req->src, req->dst,
blocks * AES_BLOCK_SIZE, req->iv);
req = &subreq;
err = skcipher_walk_virt(&walk, req, false);
} else {
tail = 0;
}
kernel_fpu_begin();
/* calculate first value of T */
aesni_enc(aes_ctx(ctx->raw_tweak_ctx), walk.iv, walk.iv);
while (walk.nbytes > 0) {
int nbytes = walk.nbytes;
if (nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
if (encrypt)
aesni_xts_encrypt(aes_ctx(ctx->raw_crypt_ctx),
walk.dst.virt.addr, walk.src.virt.addr,
nbytes, walk.iv);
else
aesni_xts_decrypt(aes_ctx(ctx->raw_crypt_ctx),
walk.dst.virt.addr, walk.src.virt.addr,
nbytes, walk.iv);
kernel_fpu_end();
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
if (walk.nbytes > 0)
kernel_fpu_begin();
}
if (unlikely(tail > 0 && !err)) {
struct scatterlist sg_src[2], sg_dst[2];
struct scatterlist *src, *dst;
dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
if (req->dst != req->src)
dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
req->iv);
err = skcipher_walk_virt(&walk, &subreq, false);
if (err)
return err;
kernel_fpu_begin();
if (encrypt)
aesni_xts_encrypt(aes_ctx(ctx->raw_crypt_ctx),
walk.dst.virt.addr, walk.src.virt.addr,
walk.nbytes, walk.iv);
else
aesni_xts_decrypt(aes_ctx(ctx->raw_crypt_ctx),
walk.dst.virt.addr, walk.src.virt.addr,
walk.nbytes, walk.iv);
kernel_fpu_end();
err = skcipher_walk_done(&walk, 0);
}
return err;
}
static int xts_encrypt(struct skcipher_request *req)
{
return xts_crypt(req, true);
}
static int xts_decrypt(struct skcipher_request *req)
{
return xts_crypt(req, false);
}
static struct crypto_alg aesni_cipher_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-aesni",
@ -1082,6 +1109,7 @@ static struct skcipher_alg aesni_skciphers[] = {
.setkey = aesni_skcipher_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
#endif
}, {
.base = {
.cra_name = "__xts(aes)",
@ -1095,10 +1123,10 @@ static struct skcipher_alg aesni_skciphers[] = {
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.walksize = 2 * AES_BLOCK_SIZE,
.setkey = xts_aesni_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
#endif
}
};

View File

@ -1133,7 +1133,6 @@ config CRYPTO_AES_NI_INTEL
select CRYPTO_LIB_AES
select CRYPTO_ALGAPI
select CRYPTO_SKCIPHER
select CRYPTO_GLUE_HELPER_X86 if 64BIT
select CRYPTO_SIMD
help
Use Intel AES-NI instructions for AES algorithm.