crypto: arm64/chacha20 - add XChaCha20 support
Add an XChaCha20 implementation that is hooked up to the ARM64 NEON implementation of ChaCha20. This can be used by Adiantum. A NEON implementation of single-block HChaCha20 is also added so that XChaCha20 can use it rather than the generic implementation. This required refactoring the ChaCha20 permutation into its own function. Signed-off-by: Eric Biggers <ebiggers@google.com> Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
a00fa0c887
commit
cc7cf991e9
@ -101,7 +101,7 @@ config CRYPTO_AES_ARM64_NEON_BLK
|
|||||||
select CRYPTO_SIMD
|
select CRYPTO_SIMD
|
||||||
|
|
||||||
config CRYPTO_CHACHA20_NEON
|
config CRYPTO_CHACHA20_NEON
|
||||||
tristate "NEON accelerated ChaCha20 symmetric cipher"
|
tristate "ChaCha20 and XChaCha20 stream ciphers using NEON instructions"
|
||||||
depends on KERNEL_MODE_NEON
|
depends on KERNEL_MODE_NEON
|
||||||
select CRYPTO_BLKCIPHER
|
select CRYPTO_BLKCIPHER
|
||||||
select CRYPTO_CHACHA20
|
select CRYPTO_CHACHA20
|
||||||
|
@ -23,25 +23,20 @@
|
|||||||
.text
|
.text
|
||||||
.align 6
|
.align 6
|
||||||
|
|
||||||
ENTRY(chacha20_block_xor_neon)
|
/*
|
||||||
// x0: Input state matrix, s
|
* chacha20_permute - permute one block
|
||||||
// x1: 1 data block output, o
|
*
|
||||||
// x2: 1 data block input, i
|
* Permute one 64-byte block where the state matrix is stored in the four NEON
|
||||||
|
* registers v0-v3. It performs matrix operations on four words in parallel,
|
||||||
//
|
* but requires shuffling to rearrange the words after each round.
|
||||||
// This function encrypts one ChaCha20 block by loading the state matrix
|
*
|
||||||
// in four NEON registers. It performs matrix operation on four words in
|
* Clobbers: x3, x10, v4, v12
|
||||||
// parallel, but requires shuffling to rearrange the words after each
|
*/
|
||||||
// round.
|
chacha20_permute:
|
||||||
//
|
|
||||||
|
|
||||||
// x0..3 = s0..3
|
|
||||||
adr x3, ROT8
|
|
||||||
ld1 {v0.4s-v3.4s}, [x0]
|
|
||||||
ld1 {v8.4s-v11.4s}, [x0]
|
|
||||||
ld1 {v12.4s}, [x3]
|
|
||||||
|
|
||||||
mov x3, #10
|
mov x3, #10
|
||||||
|
adr x10, ROT8
|
||||||
|
ld1 {v12.4s}, [x10]
|
||||||
|
|
||||||
.Ldoubleround:
|
.Ldoubleround:
|
||||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
|
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
|
||||||
@ -105,6 +100,23 @@ ENTRY(chacha20_block_xor_neon)
|
|||||||
subs x3, x3, #1
|
subs x3, x3, #1
|
||||||
b.ne .Ldoubleround
|
b.ne .Ldoubleround
|
||||||
|
|
||||||
|
ret
|
||||||
|
ENDPROC(chacha20_permute)
|
||||||
|
|
||||||
|
ENTRY(chacha20_block_xor_neon)
|
||||||
|
// x0: Input state matrix, s
|
||||||
|
// x1: 1 data block output, o
|
||||||
|
// x2: 1 data block input, i
|
||||||
|
|
||||||
|
stp x29, x30, [sp, #-16]!
|
||||||
|
mov x29, sp
|
||||||
|
|
||||||
|
// x0..3 = s0..3
|
||||||
|
ld1 {v0.4s-v3.4s}, [x0]
|
||||||
|
ld1 {v8.4s-v11.4s}, [x0]
|
||||||
|
|
||||||
|
bl chacha20_permute
|
||||||
|
|
||||||
ld1 {v4.16b-v7.16b}, [x2]
|
ld1 {v4.16b-v7.16b}, [x2]
|
||||||
|
|
||||||
// o0 = i0 ^ (x0 + s0)
|
// o0 = i0 ^ (x0 + s0)
|
||||||
@ -125,9 +137,28 @@ ENTRY(chacha20_block_xor_neon)
|
|||||||
|
|
||||||
st1 {v0.16b-v3.16b}, [x1]
|
st1 {v0.16b-v3.16b}, [x1]
|
||||||
|
|
||||||
|
ldp x29, x30, [sp], #16
|
||||||
ret
|
ret
|
||||||
ENDPROC(chacha20_block_xor_neon)
|
ENDPROC(chacha20_block_xor_neon)
|
||||||
|
|
||||||
|
ENTRY(hchacha20_block_neon)
|
||||||
|
// x0: Input state matrix, s
|
||||||
|
// x1: output (8 32-bit words)
|
||||||
|
|
||||||
|
stp x29, x30, [sp, #-16]!
|
||||||
|
mov x29, sp
|
||||||
|
|
||||||
|
ld1 {v0.4s-v3.4s}, [x0]
|
||||||
|
|
||||||
|
bl chacha20_permute
|
||||||
|
|
||||||
|
st1 {v0.16b}, [x1], #16
|
||||||
|
st1 {v3.16b}, [x1]
|
||||||
|
|
||||||
|
ldp x29, x30, [sp], #16
|
||||||
|
ret
|
||||||
|
ENDPROC(hchacha20_block_neon)
|
||||||
|
|
||||||
.align 6
|
.align 6
|
||||||
ENTRY(chacha20_4block_xor_neon)
|
ENTRY(chacha20_4block_xor_neon)
|
||||||
// x0: Input state matrix, s
|
// x0: Input state matrix, s
|
||||||
|
@ -30,6 +30,7 @@
|
|||||||
|
|
||||||
asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
|
asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
|
||||||
asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
|
asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
|
||||||
|
asmlinkage void hchacha20_block_neon(const u32 *state, u32 *out);
|
||||||
|
|
||||||
static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
|
static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
|
||||||
unsigned int bytes)
|
unsigned int bytes)
|
||||||
@ -65,20 +66,16 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
|
|||||||
kernel_neon_end();
|
kernel_neon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static int chacha20_neon(struct skcipher_request *req)
|
static int chacha20_neon_stream_xor(struct skcipher_request *req,
|
||||||
|
struct chacha_ctx *ctx, u8 *iv)
|
||||||
{
|
{
|
||||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
|
||||||
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
||||||
struct skcipher_walk walk;
|
struct skcipher_walk walk;
|
||||||
u32 state[16];
|
u32 state[16];
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (!may_use_simd() || req->cryptlen <= CHACHA_BLOCK_SIZE)
|
|
||||||
return crypto_chacha_crypt(req);
|
|
||||||
|
|
||||||
err = skcipher_walk_virt(&walk, req, false);
|
err = skcipher_walk_virt(&walk, req, false);
|
||||||
|
|
||||||
crypto_chacha_init(state, ctx, walk.iv);
|
crypto_chacha_init(state, ctx, iv);
|
||||||
|
|
||||||
while (walk.nbytes > 0) {
|
while (walk.nbytes > 0) {
|
||||||
unsigned int nbytes = walk.nbytes;
|
unsigned int nbytes = walk.nbytes;
|
||||||
@ -94,7 +91,41 @@ static int chacha20_neon(struct skcipher_request *req)
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct skcipher_alg alg = {
|
static int chacha20_neon(struct skcipher_request *req)
|
||||||
|
{
|
||||||
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||||
|
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||||
|
|
||||||
|
if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
|
||||||
|
return crypto_chacha_crypt(req);
|
||||||
|
|
||||||
|
return chacha20_neon_stream_xor(req, ctx, req->iv);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int xchacha20_neon(struct skcipher_request *req)
|
||||||
|
{
|
||||||
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||||
|
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||||
|
struct chacha_ctx subctx;
|
||||||
|
u32 state[16];
|
||||||
|
u8 real_iv[16];
|
||||||
|
|
||||||
|
if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
|
||||||
|
return crypto_xchacha_crypt(req);
|
||||||
|
|
||||||
|
crypto_chacha_init(state, ctx, req->iv);
|
||||||
|
|
||||||
|
kernel_neon_begin();
|
||||||
|
hchacha20_block_neon(state, subctx.key);
|
||||||
|
kernel_neon_end();
|
||||||
|
|
||||||
|
memcpy(&real_iv[0], req->iv + 24, 8);
|
||||||
|
memcpy(&real_iv[8], req->iv + 16, 8);
|
||||||
|
return chacha20_neon_stream_xor(req, &subctx, real_iv);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct skcipher_alg algs[] = {
|
||||||
|
{
|
||||||
.base.cra_name = "chacha20",
|
.base.cra_name = "chacha20",
|
||||||
.base.cra_driver_name = "chacha20-neon",
|
.base.cra_driver_name = "chacha20-neon",
|
||||||
.base.cra_priority = 300,
|
.base.cra_priority = 300,
|
||||||
@ -110,6 +141,23 @@ static struct skcipher_alg alg = {
|
|||||||
.setkey = crypto_chacha20_setkey,
|
.setkey = crypto_chacha20_setkey,
|
||||||
.encrypt = chacha20_neon,
|
.encrypt = chacha20_neon,
|
||||||
.decrypt = chacha20_neon,
|
.decrypt = chacha20_neon,
|
||||||
|
}, {
|
||||||
|
.base.cra_name = "xchacha20",
|
||||||
|
.base.cra_driver_name = "xchacha20-neon",
|
||||||
|
.base.cra_priority = 300,
|
||||||
|
.base.cra_blocksize = 1,
|
||||||
|
.base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||||
|
.base.cra_module = THIS_MODULE,
|
||||||
|
|
||||||
|
.min_keysize = CHACHA_KEY_SIZE,
|
||||||
|
.max_keysize = CHACHA_KEY_SIZE,
|
||||||
|
.ivsize = XCHACHA_IV_SIZE,
|
||||||
|
.chunksize = CHACHA_BLOCK_SIZE,
|
||||||
|
.walksize = 4 * CHACHA_BLOCK_SIZE,
|
||||||
|
.setkey = crypto_chacha20_setkey,
|
||||||
|
.encrypt = xchacha20_neon,
|
||||||
|
.decrypt = xchacha20_neon,
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static int __init chacha20_simd_mod_init(void)
|
static int __init chacha20_simd_mod_init(void)
|
||||||
@ -117,12 +165,12 @@ static int __init chacha20_simd_mod_init(void)
|
|||||||
if (!(elf_hwcap & HWCAP_ASIMD))
|
if (!(elf_hwcap & HWCAP_ASIMD))
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
return crypto_register_skcipher(&alg);
|
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __exit chacha20_simd_mod_fini(void)
|
static void __exit chacha20_simd_mod_fini(void)
|
||||||
{
|
{
|
||||||
crypto_unregister_skcipher(&alg);
|
crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
|
||||||
}
|
}
|
||||||
|
|
||||||
module_init(chacha20_simd_mod_init);
|
module_init(chacha20_simd_mod_init);
|
||||||
@ -131,3 +179,6 @@ module_exit(chacha20_simd_mod_fini);
|
|||||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||||
MODULE_LICENSE("GPL v2");
|
MODULE_LICENSE("GPL v2");
|
||||||
MODULE_ALIAS_CRYPTO("chacha20");
|
MODULE_ALIAS_CRYPTO("chacha20");
|
||||||
|
MODULE_ALIAS_CRYPTO("chacha20-neon");
|
||||||
|
MODULE_ALIAS_CRYPTO("xchacha20");
|
||||||
|
MODULE_ALIAS_CRYPTO("xchacha20-neon");
|
||||||
|
Loading…
Reference in New Issue
Block a user