Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu: "API: - Decryption test vectors are now automatically generated from encryption test vectors. Algorithms: - Fix unaligned access issues in crc32/crc32c. - Add zstd compression algorithm. - Add AEGIS. - Add MORUS. Drivers: - Add accelerated AEGIS/MORUS on x86. - Add accelerated SM4 on arm64. - Removed x86 assembly salsa implementation as it is slower than C. - Add authenc(hmac(sha*), cbc(aes)) support in inside-secure. - Add ctr(aes) support in crypto4xx. - Add hardware key support in ccree. - Add support for new Centaur CPU in via-rng" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (112 commits) crypto: chtls - free beyond end rspq_skb_cache crypto: chtls - kbuild warnings crypto: chtls - dereference null variable crypto: chtls - wait for memory sendmsg, sendpage crypto: chtls - key len correction crypto: salsa20 - Revert "crypto: salsa20 - export generic helpers" crypto: x86/salsa20 - remove x86 salsa20 implementations crypto: ccp - Add GET_ID SEV command crypto: ccp - Add DOWNLOAD_FIRMWARE SEV command crypto: qat - Add MODULE_FIRMWARE for all qat drivers crypto: ccree - silence debug prints crypto: ccree - better clock handling crypto: ccree - correct host regs offset crypto: chelsio - Remove separate buffer used for DMA map B0 block in CCM crypt: chelsio - Send IV as Immediate for cipher algo crypto: chelsio - Return -ENOSPC for transient busy indication. crypto: caam/qi - fix warning in init_cgr() crypto: caam - fix rfc4543 descriptors crypto: caam - fix MC firmware detection crypto: clarify licensing of OpenSSL asm code ...
This commit is contained in:
commit
3e1a29b3bf
@ -1,4 +1,14 @@
|
||||
#define __ARM_ARCH__ __LINUX_ARM_ARCH__
|
||||
@ SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
|
||||
@ has relicensed it under the GPLv2. Therefore this program is free software;
|
||||
@ you can redistribute it and/or modify it under the terms of the GNU General
|
||||
@ Public License version 2 as published by the Free Software Foundation.
|
||||
@
|
||||
@ The original headers, including the original license headers, are
|
||||
@ included below for completeness.
|
||||
|
||||
@ ====================================================================
|
||||
@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||
@ project. The module is, however, dual licensed under OpenSSL and
|
||||
|
@ -1,12 +1,19 @@
|
||||
#!/usr/bin/env perl
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
# This code is taken from the OpenSSL project but the author (Andy Polyakov)
|
||||
# has relicensed it under the GPLv2. Therefore this program is free software;
|
||||
# you can redistribute it and/or modify it under the terms of the GNU General
|
||||
# Public License version 2 as published by the Free Software Foundation.
|
||||
#
|
||||
# The original headers, including the original license headers, are
|
||||
# included below for completeness.
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
#
|
||||
# Permission to use under GPL terms is granted.
|
||||
# ====================================================================
|
||||
|
||||
# SHA256 block procedure for ARMv4. May 2007.
|
||||
|
@ -1,11 +1,18 @@
|
||||
@ SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
|
||||
@ has relicensed it under the GPLv2. Therefore this program is free software;
|
||||
@ you can redistribute it and/or modify it under the terms of the GNU General
|
||||
@ Public License version 2 as published by the Free Software Foundation.
|
||||
@
|
||||
@ The original headers, including the original license headers, are
|
||||
@ included below for completeness.
|
||||
|
||||
@ ====================================================================
|
||||
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@ project. The module is, however, dual licensed under OpenSSL and
|
||||
@ CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
@ details see http://www.openssl.org/~appro/cryptogams/.
|
||||
@
|
||||
@ Permission to use under GPL terms is granted.
|
||||
@ ====================================================================
|
||||
|
||||
@ SHA256 block procedure for ARMv4. May 2007.
|
||||
|
@ -1,12 +1,19 @@
|
||||
#!/usr/bin/env perl
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
# This code is taken from the OpenSSL project but the author (Andy Polyakov)
|
||||
# has relicensed it under the GPLv2. Therefore this program is free software;
|
||||
# you can redistribute it and/or modify it under the terms of the GNU General
|
||||
# Public License version 2 as published by the Free Software Foundation.
|
||||
#
|
||||
# The original headers, including the original license headers, are
|
||||
# included below for completeness.
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
#
|
||||
# Permission to use under GPL terms is granted.
|
||||
# ====================================================================
|
||||
|
||||
# SHA512 block procedure for ARMv4. September 2007.
|
||||
|
@ -1,11 +1,18 @@
|
||||
@ SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
|
||||
@ has relicensed it under the GPLv2. Therefore this program is free software;
|
||||
@ you can redistribute it and/or modify it under the terms of the GNU General
|
||||
@ Public License version 2 as published by the Free Software Foundation.
|
||||
@
|
||||
@ The original headers, including the original license headers, are
|
||||
@ included below for completeness.
|
||||
|
||||
@ ====================================================================
|
||||
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@ project. The module is, however, dual licensed under OpenSSL and
|
||||
@ CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
@ details see http://www.openssl.org/~appro/cryptogams/.
|
||||
@
|
||||
@ Permission to use under GPL terms is granted.
|
||||
@ ====================================================================
|
||||
|
||||
@ SHA512 block procedure for ARMv4. September 2007.
|
||||
|
@ -47,6 +47,12 @@ config CRYPTO_SM3_ARM64_CE
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_SM3
|
||||
|
||||
config CRYPTO_SM4_ARM64_CE
|
||||
tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_SM4
|
||||
|
||||
config CRYPTO_GHASH_ARM64_CE
|
||||
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
|
||||
depends on KERNEL_MODE_NEON
|
||||
|
@ -23,6 +23,9 @@ sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
|
||||
obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o
|
||||
sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce.o
|
||||
sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
|
||||
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
|
||||
|
||||
|
@ -19,24 +19,33 @@
|
||||
* u32 *macp, u8 const rk[], u32 rounds);
|
||||
*/
|
||||
ENTRY(ce_aes_ccm_auth_data)
|
||||
ldr w8, [x3] /* leftover from prev round? */
|
||||
frame_push 7
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x5
|
||||
|
||||
ldr w25, [x22] /* leftover from prev round? */
|
||||
ld1 {v0.16b}, [x0] /* load mac */
|
||||
cbz w8, 1f
|
||||
sub w8, w8, #16
|
||||
cbz w25, 1f
|
||||
sub w25, w25, #16
|
||||
eor v1.16b, v1.16b, v1.16b
|
||||
0: ldrb w7, [x1], #1 /* get 1 byte of input */
|
||||
subs w2, w2, #1
|
||||
add w8, w8, #1
|
||||
0: ldrb w7, [x20], #1 /* get 1 byte of input */
|
||||
subs w21, w21, #1
|
||||
add w25, w25, #1
|
||||
ins v1.b[0], w7
|
||||
ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
|
||||
beq 8f /* out of input? */
|
||||
cbnz w8, 0b
|
||||
cbnz w25, 0b
|
||||
eor v0.16b, v0.16b, v1.16b
|
||||
1: ld1 {v3.4s}, [x4] /* load first round key */
|
||||
prfm pldl1strm, [x1]
|
||||
cmp w5, #12 /* which key size? */
|
||||
add x6, x4, #16
|
||||
sub w7, w5, #2 /* modified # of rounds */
|
||||
1: ld1 {v3.4s}, [x23] /* load first round key */
|
||||
prfm pldl1strm, [x20]
|
||||
cmp w24, #12 /* which key size? */
|
||||
add x6, x23, #16
|
||||
sub w7, w24, #2 /* modified # of rounds */
|
||||
bmi 2f
|
||||
bne 5f
|
||||
mov v5.16b, v3.16b
|
||||
@ -55,33 +64,43 @@ ENTRY(ce_aes_ccm_auth_data)
|
||||
ld1 {v5.4s}, [x6], #16 /* load next round key */
|
||||
bpl 3b
|
||||
aese v0.16b, v4.16b
|
||||
subs w2, w2, #16 /* last data? */
|
||||
subs w21, w21, #16 /* last data? */
|
||||
eor v0.16b, v0.16b, v5.16b /* final round */
|
||||
bmi 6f
|
||||
ld1 {v1.16b}, [x1], #16 /* load next input block */
|
||||
ld1 {v1.16b}, [x20], #16 /* load next input block */
|
||||
eor v0.16b, v0.16b, v1.16b /* xor with mac */
|
||||
bne 1b
|
||||
6: st1 {v0.16b}, [x0] /* store mac */
|
||||
beq 6f
|
||||
|
||||
if_will_cond_yield_neon
|
||||
st1 {v0.16b}, [x19] /* store mac */
|
||||
do_cond_yield_neon
|
||||
ld1 {v0.16b}, [x19] /* reload mac */
|
||||
endif_yield_neon
|
||||
|
||||
b 1b
|
||||
6: st1 {v0.16b}, [x19] /* store mac */
|
||||
beq 10f
|
||||
adds w2, w2, #16
|
||||
adds w21, w21, #16
|
||||
beq 10f
|
||||
mov w8, w2
|
||||
7: ldrb w7, [x1], #1
|
||||
mov w25, w21
|
||||
7: ldrb w7, [x20], #1
|
||||
umov w6, v0.b[0]
|
||||
eor w6, w6, w7
|
||||
strb w6, [x0], #1
|
||||
subs w2, w2, #1
|
||||
strb w6, [x19], #1
|
||||
subs w21, w21, #1
|
||||
beq 10f
|
||||
ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
|
||||
b 7b
|
||||
8: mov w7, w8
|
||||
add w8, w8, #16
|
||||
8: mov w7, w25
|
||||
add w25, w25, #16
|
||||
9: ext v1.16b, v1.16b, v1.16b, #1
|
||||
adds w7, w7, #1
|
||||
bne 9b
|
||||
eor v0.16b, v0.16b, v1.16b
|
||||
st1 {v0.16b}, [x0]
|
||||
10: str w8, [x3]
|
||||
st1 {v0.16b}, [x19]
|
||||
10: str w25, [x22]
|
||||
|
||||
frame_pop
|
||||
ret
|
||||
ENDPROC(ce_aes_ccm_auth_data)
|
||||
|
||||
@ -126,19 +145,29 @@ ENTRY(ce_aes_ccm_final)
|
||||
ENDPROC(ce_aes_ccm_final)
|
||||
|
||||
.macro aes_ccm_do_crypt,enc
|
||||
ldr x8, [x6, #8] /* load lower ctr */
|
||||
ld1 {v0.16b}, [x5] /* load mac */
|
||||
CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
|
||||
frame_push 8
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x5
|
||||
mov x25, x6
|
||||
|
||||
ldr x26, [x25, #8] /* load lower ctr */
|
||||
ld1 {v0.16b}, [x24] /* load mac */
|
||||
CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */
|
||||
0: /* outer loop */
|
||||
ld1 {v1.8b}, [x6] /* load upper ctr */
|
||||
prfm pldl1strm, [x1]
|
||||
add x8, x8, #1
|
||||
rev x9, x8
|
||||
cmp w4, #12 /* which key size? */
|
||||
sub w7, w4, #2 /* get modified # of rounds */
|
||||
ld1 {v1.8b}, [x25] /* load upper ctr */
|
||||
prfm pldl1strm, [x20]
|
||||
add x26, x26, #1
|
||||
rev x9, x26
|
||||
cmp w23, #12 /* which key size? */
|
||||
sub w7, w23, #2 /* get modified # of rounds */
|
||||
ins v1.d[1], x9 /* no carry in lower ctr */
|
||||
ld1 {v3.4s}, [x3] /* load first round key */
|
||||
add x10, x3, #16
|
||||
ld1 {v3.4s}, [x22] /* load first round key */
|
||||
add x10, x22, #16
|
||||
bmi 1f
|
||||
bne 4f
|
||||
mov v5.16b, v3.16b
|
||||
@ -165,9 +194,9 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
|
||||
bpl 2b
|
||||
aese v0.16b, v4.16b
|
||||
aese v1.16b, v4.16b
|
||||
subs w2, w2, #16
|
||||
bmi 6f /* partial block? */
|
||||
ld1 {v2.16b}, [x1], #16 /* load next input block */
|
||||
subs w21, w21, #16
|
||||
bmi 7f /* partial block? */
|
||||
ld1 {v2.16b}, [x20], #16 /* load next input block */
|
||||
.if \enc == 1
|
||||
eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
|
||||
eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
|
||||
@ -176,18 +205,29 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
|
||||
eor v1.16b, v2.16b, v5.16b /* final round enc */
|
||||
.endif
|
||||
eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
|
||||
st1 {v1.16b}, [x0], #16 /* write output block */
|
||||
bne 0b
|
||||
CPU_LE( rev x8, x8 )
|
||||
st1 {v0.16b}, [x5] /* store mac */
|
||||
str x8, [x6, #8] /* store lsb end of ctr (BE) */
|
||||
5: ret
|
||||
st1 {v1.16b}, [x19], #16 /* write output block */
|
||||
beq 5f
|
||||
|
||||
6: eor v0.16b, v0.16b, v5.16b /* final round mac */
|
||||
if_will_cond_yield_neon
|
||||
st1 {v0.16b}, [x24] /* store mac */
|
||||
do_cond_yield_neon
|
||||
ld1 {v0.16b}, [x24] /* reload mac */
|
||||
endif_yield_neon
|
||||
|
||||
b 0b
|
||||
5:
|
||||
CPU_LE( rev x26, x26 )
|
||||
st1 {v0.16b}, [x24] /* store mac */
|
||||
str x26, [x25, #8] /* store lsb end of ctr (BE) */
|
||||
|
||||
6: frame_pop
|
||||
ret
|
||||
|
||||
7: eor v0.16b, v0.16b, v5.16b /* final round mac */
|
||||
eor v1.16b, v1.16b, v5.16b /* final round enc */
|
||||
st1 {v0.16b}, [x5] /* store mac */
|
||||
add w2, w2, #16 /* process partial tail block */
|
||||
7: ldrb w9, [x1], #1 /* get 1 byte of input */
|
||||
st1 {v0.16b}, [x24] /* store mac */
|
||||
add w21, w21, #16 /* process partial tail block */
|
||||
8: ldrb w9, [x20], #1 /* get 1 byte of input */
|
||||
umov w6, v1.b[0] /* get top crypted ctr byte */
|
||||
umov w7, v0.b[0] /* get top mac byte */
|
||||
.if \enc == 1
|
||||
@ -197,13 +237,13 @@ CPU_LE( rev x8, x8 )
|
||||
eor w9, w9, w6
|
||||
eor w7, w7, w9
|
||||
.endif
|
||||
strb w9, [x0], #1 /* store out byte */
|
||||
strb w7, [x5], #1 /* store mac byte */
|
||||
subs w2, w2, #1
|
||||
beq 5b
|
||||
strb w9, [x19], #1 /* store out byte */
|
||||
strb w7, [x24], #1 /* store mac byte */
|
||||
subs w21, w21, #1
|
||||
beq 6b
|
||||
ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
|
||||
ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
|
||||
b 7b
|
||||
b 8b
|
||||
.endm
|
||||
|
||||
/*
|
||||
|
@ -30,18 +30,21 @@
|
||||
.endm
|
||||
|
||||
/* prepare for encryption with key in rk[] */
|
||||
.macro enc_prepare, rounds, rk, ignore
|
||||
load_round_keys \rounds, \rk
|
||||
.macro enc_prepare, rounds, rk, temp
|
||||
mov \temp, \rk
|
||||
load_round_keys \rounds, \temp
|
||||
.endm
|
||||
|
||||
/* prepare for encryption (again) but with new key in rk[] */
|
||||
.macro enc_switch_key, rounds, rk, ignore
|
||||
load_round_keys \rounds, \rk
|
||||
.macro enc_switch_key, rounds, rk, temp
|
||||
mov \temp, \rk
|
||||
load_round_keys \rounds, \temp
|
||||
.endm
|
||||
|
||||
/* prepare for decryption with key in rk[] */
|
||||
.macro dec_prepare, rounds, rk, ignore
|
||||
load_round_keys \rounds, \rk
|
||||
.macro dec_prepare, rounds, rk, temp
|
||||
mov \temp, \rk
|
||||
load_round_keys \rounds, \temp
|
||||
.endm
|
||||
|
||||
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
|
||||
|
@ -14,12 +14,12 @@
|
||||
.align 4
|
||||
|
||||
aes_encrypt_block4x:
|
||||
encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
|
||||
encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
|
||||
ret
|
||||
ENDPROC(aes_encrypt_block4x)
|
||||
|
||||
aes_decrypt_block4x:
|
||||
decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
|
||||
decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
|
||||
ret
|
||||
ENDPROC(aes_decrypt_block4x)
|
||||
|
||||
@ -31,57 +31,71 @@ ENDPROC(aes_decrypt_block4x)
|
||||
*/
|
||||
|
||||
AES_ENTRY(aes_ecb_encrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
frame_push 5
|
||||
|
||||
enc_prepare w3, x2, x5
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
|
||||
.Lecbencrestart:
|
||||
enc_prepare w22, x21, x5
|
||||
|
||||
.LecbencloopNx:
|
||||
subs w4, w4, #4
|
||||
subs w23, w23, #4
|
||||
bmi .Lecbenc1x
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
|
||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
|
||||
bl aes_encrypt_block4x
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
st1 {v0.16b-v3.16b}, [x19], #64
|
||||
cond_yield_neon .Lecbencrestart
|
||||
b .LecbencloopNx
|
||||
.Lecbenc1x:
|
||||
adds w4, w4, #4
|
||||
adds w23, w23, #4
|
||||
beq .Lecbencout
|
||||
.Lecbencloop:
|
||||
ld1 {v0.16b}, [x1], #16 /* get next pt block */
|
||||
encrypt_block v0, w3, x2, x5, w6
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
ld1 {v0.16b}, [x20], #16 /* get next pt block */
|
||||
encrypt_block v0, w22, x21, x5, w6
|
||||
st1 {v0.16b}, [x19], #16
|
||||
subs w23, w23, #1
|
||||
bne .Lecbencloop
|
||||
.Lecbencout:
|
||||
ldp x29, x30, [sp], #16
|
||||
frame_pop
|
||||
ret
|
||||
AES_ENDPROC(aes_ecb_encrypt)
|
||||
|
||||
|
||||
AES_ENTRY(aes_ecb_decrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
frame_push 5
|
||||
|
||||
dec_prepare w3, x2, x5
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
|
||||
.Lecbdecrestart:
|
||||
dec_prepare w22, x21, x5
|
||||
|
||||
.LecbdecloopNx:
|
||||
subs w4, w4, #4
|
||||
subs w23, w23, #4
|
||||
bmi .Lecbdec1x
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
|
||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
|
||||
bl aes_decrypt_block4x
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
st1 {v0.16b-v3.16b}, [x19], #64
|
||||
cond_yield_neon .Lecbdecrestart
|
||||
b .LecbdecloopNx
|
||||
.Lecbdec1x:
|
||||
adds w4, w4, #4
|
||||
adds w23, w23, #4
|
||||
beq .Lecbdecout
|
||||
.Lecbdecloop:
|
||||
ld1 {v0.16b}, [x1], #16 /* get next ct block */
|
||||
decrypt_block v0, w3, x2, x5, w6
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
ld1 {v0.16b}, [x20], #16 /* get next ct block */
|
||||
decrypt_block v0, w22, x21, x5, w6
|
||||
st1 {v0.16b}, [x19], #16
|
||||
subs w23, w23, #1
|
||||
bne .Lecbdecloop
|
||||
.Lecbdecout:
|
||||
ldp x29, x30, [sp], #16
|
||||
frame_pop
|
||||
ret
|
||||
AES_ENDPROC(aes_ecb_decrypt)
|
||||
|
||||
@ -94,78 +108,100 @@ AES_ENDPROC(aes_ecb_decrypt)
|
||||
*/
|
||||
|
||||
AES_ENTRY(aes_cbc_encrypt)
|
||||
ld1 {v4.16b}, [x5] /* get iv */
|
||||
enc_prepare w3, x2, x6
|
||||
frame_push 6
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x5
|
||||
|
||||
.Lcbcencrestart:
|
||||
ld1 {v4.16b}, [x24] /* get iv */
|
||||
enc_prepare w22, x21, x6
|
||||
|
||||
.Lcbcencloop4x:
|
||||
subs w4, w4, #4
|
||||
subs w23, w23, #4
|
||||
bmi .Lcbcenc1x
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
|
||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
|
||||
eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
|
||||
encrypt_block v0, w3, x2, x6, w7
|
||||
encrypt_block v0, w22, x21, x6, w7
|
||||
eor v1.16b, v1.16b, v0.16b
|
||||
encrypt_block v1, w3, x2, x6, w7
|
||||
encrypt_block v1, w22, x21, x6, w7
|
||||
eor v2.16b, v2.16b, v1.16b
|
||||
encrypt_block v2, w3, x2, x6, w7
|
||||
encrypt_block v2, w22, x21, x6, w7
|
||||
eor v3.16b, v3.16b, v2.16b
|
||||
encrypt_block v3, w3, x2, x6, w7
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
encrypt_block v3, w22, x21, x6, w7
|
||||
st1 {v0.16b-v3.16b}, [x19], #64
|
||||
mov v4.16b, v3.16b
|
||||
st1 {v4.16b}, [x24] /* return iv */
|
||||
cond_yield_neon .Lcbcencrestart
|
||||
b .Lcbcencloop4x
|
||||
.Lcbcenc1x:
|
||||
adds w4, w4, #4
|
||||
adds w23, w23, #4
|
||||
beq .Lcbcencout
|
||||
.Lcbcencloop:
|
||||
ld1 {v0.16b}, [x1], #16 /* get next pt block */
|
||||
ld1 {v0.16b}, [x20], #16 /* get next pt block */
|
||||
eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
|
||||
encrypt_block v4, w3, x2, x6, w7
|
||||
st1 {v4.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
encrypt_block v4, w22, x21, x6, w7
|
||||
st1 {v4.16b}, [x19], #16
|
||||
subs w23, w23, #1
|
||||
bne .Lcbcencloop
|
||||
.Lcbcencout:
|
||||
st1 {v4.16b}, [x5] /* return iv */
|
||||
st1 {v4.16b}, [x24] /* return iv */
|
||||
frame_pop
|
||||
ret
|
||||
AES_ENDPROC(aes_cbc_encrypt)
|
||||
|
||||
|
||||
AES_ENTRY(aes_cbc_decrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
frame_push 6
|
||||
|
||||
ld1 {v7.16b}, [x5] /* get iv */
|
||||
dec_prepare w3, x2, x6
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x5
|
||||
|
||||
.Lcbcdecrestart:
|
||||
ld1 {v7.16b}, [x24] /* get iv */
|
||||
dec_prepare w22, x21, x6
|
||||
|
||||
.LcbcdecloopNx:
|
||||
subs w4, w4, #4
|
||||
subs w23, w23, #4
|
||||
bmi .Lcbcdec1x
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
|
||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
|
||||
mov v4.16b, v0.16b
|
||||
mov v5.16b, v1.16b
|
||||
mov v6.16b, v2.16b
|
||||
bl aes_decrypt_block4x
|
||||
sub x1, x1, #16
|
||||
sub x20, x20, #16
|
||||
eor v0.16b, v0.16b, v7.16b
|
||||
eor v1.16b, v1.16b, v4.16b
|
||||
ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
|
||||
ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */
|
||||
eor v2.16b, v2.16b, v5.16b
|
||||
eor v3.16b, v3.16b, v6.16b
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
st1 {v0.16b-v3.16b}, [x19], #64
|
||||
st1 {v7.16b}, [x24] /* return iv */
|
||||
cond_yield_neon .Lcbcdecrestart
|
||||
b .LcbcdecloopNx
|
||||
.Lcbcdec1x:
|
||||
adds w4, w4, #4
|
||||
adds w23, w23, #4
|
||||
beq .Lcbcdecout
|
||||
.Lcbcdecloop:
|
||||
ld1 {v1.16b}, [x1], #16 /* get next ct block */
|
||||
ld1 {v1.16b}, [x20], #16 /* get next ct block */
|
||||
mov v0.16b, v1.16b /* ...and copy to v0 */
|
||||
decrypt_block v0, w3, x2, x6, w7
|
||||
decrypt_block v0, w22, x21, x6, w7
|
||||
eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
|
||||
mov v7.16b, v1.16b /* ct is next iv */
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
st1 {v0.16b}, [x19], #16
|
||||
subs w23, w23, #1
|
||||
bne .Lcbcdecloop
|
||||
.Lcbcdecout:
|
||||
st1 {v7.16b}, [x5] /* return iv */
|
||||
ldp x29, x30, [sp], #16
|
||||
st1 {v7.16b}, [x24] /* return iv */
|
||||
frame_pop
|
||||
ret
|
||||
AES_ENDPROC(aes_cbc_decrypt)
|
||||
|
||||
@ -176,19 +212,26 @@ AES_ENDPROC(aes_cbc_decrypt)
|
||||
*/
|
||||
|
||||
AES_ENTRY(aes_ctr_encrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
frame_push 6
|
||||
|
||||
enc_prepare w3, x2, x6
|
||||
ld1 {v4.16b}, [x5]
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x5
|
||||
|
||||
.Lctrrestart:
|
||||
enc_prepare w22, x21, x6
|
||||
ld1 {v4.16b}, [x24]
|
||||
|
||||
umov x6, v4.d[1] /* keep swabbed ctr in reg */
|
||||
rev x6, x6
|
||||
cmn w6, w4 /* 32 bit overflow? */
|
||||
bcs .Lctrloop
|
||||
.LctrloopNx:
|
||||
subs w4, w4, #4
|
||||
subs w23, w23, #4
|
||||
bmi .Lctr1x
|
||||
cmn w6, #4 /* 32 bit overflow? */
|
||||
bcs .Lctr1x
|
||||
ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
|
||||
dup v7.4s, w6
|
||||
mov v0.16b, v4.16b
|
||||
@ -200,25 +243,27 @@ AES_ENTRY(aes_ctr_encrypt)
|
||||
mov v1.s[3], v8.s[0]
|
||||
mov v2.s[3], v8.s[1]
|
||||
mov v3.s[3], v8.s[2]
|
||||
ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
|
||||
ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */
|
||||
bl aes_encrypt_block4x
|
||||
eor v0.16b, v5.16b, v0.16b
|
||||
ld1 {v5.16b}, [x1], #16 /* get 1 input block */
|
||||
ld1 {v5.16b}, [x20], #16 /* get 1 input block */
|
||||
eor v1.16b, v6.16b, v1.16b
|
||||
eor v2.16b, v7.16b, v2.16b
|
||||
eor v3.16b, v5.16b, v3.16b
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
st1 {v0.16b-v3.16b}, [x19], #64
|
||||
add x6, x6, #4
|
||||
rev x7, x6
|
||||
ins v4.d[1], x7
|
||||
cbz w4, .Lctrout
|
||||
cbz w23, .Lctrout
|
||||
st1 {v4.16b}, [x24] /* return next CTR value */
|
||||
cond_yield_neon .Lctrrestart
|
||||
b .LctrloopNx
|
||||
.Lctr1x:
|
||||
adds w4, w4, #4
|
||||
adds w23, w23, #4
|
||||
beq .Lctrout
|
||||
.Lctrloop:
|
||||
mov v0.16b, v4.16b
|
||||
encrypt_block v0, w3, x2, x8, w7
|
||||
encrypt_block v0, w22, x21, x8, w7
|
||||
|
||||
adds x6, x6, #1 /* increment BE ctr */
|
||||
rev x7, x6
|
||||
@ -226,22 +271,22 @@ AES_ENTRY(aes_ctr_encrypt)
|
||||
bcs .Lctrcarry /* overflow? */
|
||||
|
||||
.Lctrcarrydone:
|
||||
subs w4, w4, #1
|
||||
subs w23, w23, #1
|
||||
bmi .Lctrtailblock /* blocks <0 means tail block */
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
ld1 {v3.16b}, [x20], #16
|
||||
eor v3.16b, v0.16b, v3.16b
|
||||
st1 {v3.16b}, [x0], #16
|
||||
st1 {v3.16b}, [x19], #16
|
||||
bne .Lctrloop
|
||||
|
||||
.Lctrout:
|
||||
st1 {v4.16b}, [x5] /* return next CTR value */
|
||||
ldp x29, x30, [sp], #16
|
||||
st1 {v4.16b}, [x24] /* return next CTR value */
|
||||
.Lctrret:
|
||||
frame_pop
|
||||
ret
|
||||
|
||||
.Lctrtailblock:
|
||||
st1 {v0.16b}, [x0]
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
st1 {v0.16b}, [x19]
|
||||
b .Lctrret
|
||||
|
||||
.Lctrcarry:
|
||||
umov x7, v4.d[0] /* load upper word of ctr */
|
||||
@ -274,10 +319,16 @@ CPU_LE( .quad 1, 0x87 )
|
||||
CPU_BE( .quad 0x87, 1 )
|
||||
|
||||
AES_ENTRY(aes_xts_encrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
frame_push 6
|
||||
|
||||
ld1 {v4.16b}, [x6]
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x6
|
||||
|
||||
ld1 {v4.16b}, [x24]
|
||||
cbz w7, .Lxtsencnotfirst
|
||||
|
||||
enc_prepare w3, x5, x8
|
||||
@ -286,15 +337,17 @@ AES_ENTRY(aes_xts_encrypt)
|
||||
ldr q7, .Lxts_mul_x
|
||||
b .LxtsencNx
|
||||
|
||||
.Lxtsencrestart:
|
||||
ld1 {v4.16b}, [x24]
|
||||
.Lxtsencnotfirst:
|
||||
enc_prepare w3, x2, x8
|
||||
enc_prepare w22, x21, x8
|
||||
.LxtsencloopNx:
|
||||
ldr q7, .Lxts_mul_x
|
||||
next_tweak v4, v4, v7, v8
|
||||
.LxtsencNx:
|
||||
subs w4, w4, #4
|
||||
subs w23, w23, #4
|
||||
bmi .Lxtsenc1x
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
|
||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
|
||||
next_tweak v5, v4, v7, v8
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
next_tweak v6, v5, v7, v8
|
||||
@ -307,35 +360,43 @@ AES_ENTRY(aes_xts_encrypt)
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
st1 {v0.16b-v3.16b}, [x19], #64
|
||||
mov v4.16b, v7.16b
|
||||
cbz w4, .Lxtsencout
|
||||
cbz w23, .Lxtsencout
|
||||
st1 {v4.16b}, [x24]
|
||||
cond_yield_neon .Lxtsencrestart
|
||||
b .LxtsencloopNx
|
||||
.Lxtsenc1x:
|
||||
adds w4, w4, #4
|
||||
adds w23, w23, #4
|
||||
beq .Lxtsencout
|
||||
.Lxtsencloop:
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
ld1 {v1.16b}, [x20], #16
|
||||
eor v0.16b, v1.16b, v4.16b
|
||||
encrypt_block v0, w3, x2, x8, w7
|
||||
encrypt_block v0, w22, x21, x8, w7
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
st1 {v0.16b}, [x19], #16
|
||||
subs w23, w23, #1
|
||||
beq .Lxtsencout
|
||||
next_tweak v4, v4, v7, v8
|
||||
b .Lxtsencloop
|
||||
.Lxtsencout:
|
||||
st1 {v4.16b}, [x6]
|
||||
ldp x29, x30, [sp], #16
|
||||
st1 {v4.16b}, [x24]
|
||||
frame_pop
|
||||
ret
|
||||
AES_ENDPROC(aes_xts_encrypt)
|
||||
|
||||
|
||||
AES_ENTRY(aes_xts_decrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
frame_push 6
|
||||
|
||||
ld1 {v4.16b}, [x6]
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x6
|
||||
|
||||
ld1 {v4.16b}, [x24]
|
||||
cbz w7, .Lxtsdecnotfirst
|
||||
|
||||
enc_prepare w3, x5, x8
|
||||
@ -344,15 +405,17 @@ AES_ENTRY(aes_xts_decrypt)
|
||||
ldr q7, .Lxts_mul_x
|
||||
b .LxtsdecNx
|
||||
|
||||
.Lxtsdecrestart:
|
||||
ld1 {v4.16b}, [x24]
|
||||
.Lxtsdecnotfirst:
|
||||
dec_prepare w3, x2, x8
|
||||
dec_prepare w22, x21, x8
|
||||
.LxtsdecloopNx:
|
||||
ldr q7, .Lxts_mul_x
|
||||
next_tweak v4, v4, v7, v8
|
||||
.LxtsdecNx:
|
||||
subs w4, w4, #4
|
||||
subs w23, w23, #4
|
||||
bmi .Lxtsdec1x
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
|
||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
|
||||
next_tweak v5, v4, v7, v8
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
next_tweak v6, v5, v7, v8
|
||||
@ -365,26 +428,28 @@ AES_ENTRY(aes_xts_decrypt)
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
st1 {v0.16b-v3.16b}, [x19], #64
|
||||
mov v4.16b, v7.16b
|
||||
cbz w4, .Lxtsdecout
|
||||
cbz w23, .Lxtsdecout
|
||||
st1 {v4.16b}, [x24]
|
||||
cond_yield_neon .Lxtsdecrestart
|
||||
b .LxtsdecloopNx
|
||||
.Lxtsdec1x:
|
||||
adds w4, w4, #4
|
||||
adds w23, w23, #4
|
||||
beq .Lxtsdecout
|
||||
.Lxtsdecloop:
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
ld1 {v1.16b}, [x20], #16
|
||||
eor v0.16b, v1.16b, v4.16b
|
||||
decrypt_block v0, w3, x2, x8, w7
|
||||
decrypt_block v0, w22, x21, x8, w7
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
st1 {v0.16b}, [x19], #16
|
||||
subs w23, w23, #1
|
||||
beq .Lxtsdecout
|
||||
next_tweak v4, v4, v7, v8
|
||||
b .Lxtsdecloop
|
||||
.Lxtsdecout:
|
||||
st1 {v4.16b}, [x6]
|
||||
ldp x29, x30, [sp], #16
|
||||
st1 {v4.16b}, [x24]
|
||||
frame_pop
|
||||
ret
|
||||
AES_ENDPROC(aes_xts_decrypt)
|
||||
|
||||
@ -393,43 +458,61 @@ AES_ENDPROC(aes_xts_decrypt)
|
||||
* int blocks, u8 dg[], int enc_before, int enc_after)
|
||||
*/
|
||||
AES_ENTRY(aes_mac_update)
|
||||
ld1 {v0.16b}, [x4] /* get dg */
|
||||
frame_push 6
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x6
|
||||
|
||||
ld1 {v0.16b}, [x23] /* get dg */
|
||||
enc_prepare w2, x1, x7
|
||||
cbz w5, .Lmacloop4x
|
||||
|
||||
encrypt_block v0, w2, x1, x7, w8
|
||||
|
||||
.Lmacloop4x:
|
||||
subs w3, w3, #4
|
||||
subs w22, w22, #4
|
||||
bmi .Lmac1x
|
||||
ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */
|
||||
ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
|
||||
eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
|
||||
encrypt_block v0, w2, x1, x7, w8
|
||||
encrypt_block v0, w21, x20, x7, w8
|
||||
eor v0.16b, v0.16b, v2.16b
|
||||
encrypt_block v0, w2, x1, x7, w8
|
||||
encrypt_block v0, w21, x20, x7, w8
|
||||
eor v0.16b, v0.16b, v3.16b
|
||||
encrypt_block v0, w2, x1, x7, w8
|
||||
encrypt_block v0, w21, x20, x7, w8
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
cmp w3, wzr
|
||||
csinv x5, x6, xzr, eq
|
||||
cmp w22, wzr
|
||||
csinv x5, x24, xzr, eq
|
||||
cbz w5, .Lmacout
|
||||
encrypt_block v0, w2, x1, x7, w8
|
||||
encrypt_block v0, w21, x20, x7, w8
|
||||
st1 {v0.16b}, [x23] /* return dg */
|
||||
cond_yield_neon .Lmacrestart
|
||||
b .Lmacloop4x
|
||||
.Lmac1x:
|
||||
add w3, w3, #4
|
||||
add w22, w22, #4
|
||||
.Lmacloop:
|
||||
cbz w3, .Lmacout
|
||||
ld1 {v1.16b}, [x0], #16 /* get next pt block */
|
||||
cbz w22, .Lmacout
|
||||
ld1 {v1.16b}, [x19], #16 /* get next pt block */
|
||||
eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
|
||||
|
||||
subs w3, w3, #1
|
||||
csinv x5, x6, xzr, eq
|
||||
subs w22, w22, #1
|
||||
csinv x5, x24, xzr, eq
|
||||
cbz w5, .Lmacout
|
||||
|
||||
encrypt_block v0, w2, x1, x7, w8
|
||||
.Lmacenc:
|
||||
encrypt_block v0, w21, x20, x7, w8
|
||||
b .Lmacloop
|
||||
|
||||
.Lmacout:
|
||||
st1 {v0.16b}, [x4] /* return dg */
|
||||
st1 {v0.16b}, [x23] /* return dg */
|
||||
frame_pop
|
||||
ret
|
||||
|
||||
.Lmacrestart:
|
||||
ld1 {v0.16b}, [x23] /* get dg */
|
||||
enc_prepare w21, x20, x0
|
||||
b .Lmacloop4x
|
||||
AES_ENDPROC(aes_mac_update)
|
||||
|
@ -565,54 +565,61 @@ ENDPROC(aesbs_decrypt8)
|
||||
* int blocks)
|
||||
*/
|
||||
.macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
frame_push 5
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
|
||||
99: mov x5, #1
|
||||
lsl x5, x5, x4
|
||||
subs w4, w4, #8
|
||||
csel x4, x4, xzr, pl
|
||||
lsl x5, x5, x23
|
||||
subs w23, w23, #8
|
||||
csel x23, x23, xzr, pl
|
||||
csel x5, x5, xzr, mi
|
||||
|
||||
ld1 {v0.16b}, [x1], #16
|
||||
ld1 {v0.16b}, [x20], #16
|
||||
tbnz x5, #1, 0f
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
ld1 {v1.16b}, [x20], #16
|
||||
tbnz x5, #2, 0f
|
||||
ld1 {v2.16b}, [x1], #16
|
||||
ld1 {v2.16b}, [x20], #16
|
||||
tbnz x5, #3, 0f
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
ld1 {v3.16b}, [x20], #16
|
||||
tbnz x5, #4, 0f
|
||||
ld1 {v4.16b}, [x1], #16
|
||||
ld1 {v4.16b}, [x20], #16
|
||||
tbnz x5, #5, 0f
|
||||
ld1 {v5.16b}, [x1], #16
|
||||
ld1 {v5.16b}, [x20], #16
|
||||
tbnz x5, #6, 0f
|
||||
ld1 {v6.16b}, [x1], #16
|
||||
ld1 {v6.16b}, [x20], #16
|
||||
tbnz x5, #7, 0f
|
||||
ld1 {v7.16b}, [x1], #16
|
||||
ld1 {v7.16b}, [x20], #16
|
||||
|
||||
0: mov bskey, x2
|
||||
mov rounds, x3
|
||||
0: mov bskey, x21
|
||||
mov rounds, x22
|
||||
bl \do8
|
||||
|
||||
st1 {\o0\().16b}, [x0], #16
|
||||
st1 {\o0\().16b}, [x19], #16
|
||||
tbnz x5, #1, 1f
|
||||
st1 {\o1\().16b}, [x0], #16
|
||||
st1 {\o1\().16b}, [x19], #16
|
||||
tbnz x5, #2, 1f
|
||||
st1 {\o2\().16b}, [x0], #16
|
||||
st1 {\o2\().16b}, [x19], #16
|
||||
tbnz x5, #3, 1f
|
||||
st1 {\o3\().16b}, [x0], #16
|
||||
st1 {\o3\().16b}, [x19], #16
|
||||
tbnz x5, #4, 1f
|
||||
st1 {\o4\().16b}, [x0], #16
|
||||
st1 {\o4\().16b}, [x19], #16
|
||||
tbnz x5, #5, 1f
|
||||
st1 {\o5\().16b}, [x0], #16
|
||||
st1 {\o5\().16b}, [x19], #16
|
||||
tbnz x5, #6, 1f
|
||||
st1 {\o6\().16b}, [x0], #16
|
||||
st1 {\o6\().16b}, [x19], #16
|
||||
tbnz x5, #7, 1f
|
||||
st1 {\o7\().16b}, [x0], #16
|
||||
st1 {\o7\().16b}, [x19], #16
|
||||
|
||||
cbnz x4, 99b
|
||||
cbz x23, 1f
|
||||
cond_yield_neon
|
||||
b 99b
|
||||
|
||||
1: ldp x29, x30, [sp], #16
|
||||
1: frame_pop
|
||||
ret
|
||||
.endm
|
||||
|
||||
@ -632,43 +639,49 @@ ENDPROC(aesbs_ecb_decrypt)
|
||||
*/
|
||||
.align 4
|
||||
ENTRY(aesbs_cbc_decrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
frame_push 6
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x5
|
||||
|
||||
99: mov x6, #1
|
||||
lsl x6, x6, x4
|
||||
subs w4, w4, #8
|
||||
csel x4, x4, xzr, pl
|
||||
lsl x6, x6, x23
|
||||
subs w23, w23, #8
|
||||
csel x23, x23, xzr, pl
|
||||
csel x6, x6, xzr, mi
|
||||
|
||||
ld1 {v0.16b}, [x1], #16
|
||||
ld1 {v0.16b}, [x20], #16
|
||||
mov v25.16b, v0.16b
|
||||
tbnz x6, #1, 0f
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
ld1 {v1.16b}, [x20], #16
|
||||
mov v26.16b, v1.16b
|
||||
tbnz x6, #2, 0f
|
||||
ld1 {v2.16b}, [x1], #16
|
||||
ld1 {v2.16b}, [x20], #16
|
||||
mov v27.16b, v2.16b
|
||||
tbnz x6, #3, 0f
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
ld1 {v3.16b}, [x20], #16
|
||||
mov v28.16b, v3.16b
|
||||
tbnz x6, #4, 0f
|
||||
ld1 {v4.16b}, [x1], #16
|
||||
ld1 {v4.16b}, [x20], #16
|
||||
mov v29.16b, v4.16b
|
||||
tbnz x6, #5, 0f
|
||||
ld1 {v5.16b}, [x1], #16
|
||||
ld1 {v5.16b}, [x20], #16
|
||||
mov v30.16b, v5.16b
|
||||
tbnz x6, #6, 0f
|
||||
ld1 {v6.16b}, [x1], #16
|
||||
ld1 {v6.16b}, [x20], #16
|
||||
mov v31.16b, v6.16b
|
||||
tbnz x6, #7, 0f
|
||||
ld1 {v7.16b}, [x1]
|
||||
ld1 {v7.16b}, [x20]
|
||||
|
||||
0: mov bskey, x2
|
||||
mov rounds, x3
|
||||
0: mov bskey, x21
|
||||
mov rounds, x22
|
||||
bl aesbs_decrypt8
|
||||
|
||||
ld1 {v24.16b}, [x5] // load IV
|
||||
ld1 {v24.16b}, [x24] // load IV
|
||||
|
||||
eor v1.16b, v1.16b, v25.16b
|
||||
eor v6.16b, v6.16b, v26.16b
|
||||
@ -679,34 +692,36 @@ ENTRY(aesbs_cbc_decrypt)
|
||||
eor v3.16b, v3.16b, v30.16b
|
||||
eor v5.16b, v5.16b, v31.16b
|
||||
|
||||
st1 {v0.16b}, [x0], #16
|
||||
st1 {v0.16b}, [x19], #16
|
||||
mov v24.16b, v25.16b
|
||||
tbnz x6, #1, 1f
|
||||
st1 {v1.16b}, [x0], #16
|
||||
st1 {v1.16b}, [x19], #16
|
||||
mov v24.16b, v26.16b
|
||||
tbnz x6, #2, 1f
|
||||
st1 {v6.16b}, [x0], #16
|
||||
st1 {v6.16b}, [x19], #16
|
||||
mov v24.16b, v27.16b
|
||||
tbnz x6, #3, 1f
|
||||
st1 {v4.16b}, [x0], #16
|
||||
st1 {v4.16b}, [x19], #16
|
||||
mov v24.16b, v28.16b
|
||||
tbnz x6, #4, 1f
|
||||
st1 {v2.16b}, [x0], #16
|
||||
st1 {v2.16b}, [x19], #16
|
||||
mov v24.16b, v29.16b
|
||||
tbnz x6, #5, 1f
|
||||
st1 {v7.16b}, [x0], #16
|
||||
st1 {v7.16b}, [x19], #16
|
||||
mov v24.16b, v30.16b
|
||||
tbnz x6, #6, 1f
|
||||
st1 {v3.16b}, [x0], #16
|
||||
st1 {v3.16b}, [x19], #16
|
||||
mov v24.16b, v31.16b
|
||||
tbnz x6, #7, 1f
|
||||
ld1 {v24.16b}, [x1], #16
|
||||
st1 {v5.16b}, [x0], #16
|
||||
1: st1 {v24.16b}, [x5] // store IV
|
||||
ld1 {v24.16b}, [x20], #16
|
||||
st1 {v5.16b}, [x19], #16
|
||||
1: st1 {v24.16b}, [x24] // store IV
|
||||
|
||||
cbnz x4, 99b
|
||||
cbz x23, 2f
|
||||
cond_yield_neon
|
||||
b 99b
|
||||
|
||||
ldp x29, x30, [sp], #16
|
||||
2: frame_pop
|
||||
ret
|
||||
ENDPROC(aesbs_cbc_decrypt)
|
||||
|
||||
@ -731,87 +746,93 @@ CPU_BE( .quad 0x87, 1 )
|
||||
*/
|
||||
__xts_crypt8:
|
||||
mov x6, #1
|
||||
lsl x6, x6, x4
|
||||
subs w4, w4, #8
|
||||
csel x4, x4, xzr, pl
|
||||
lsl x6, x6, x23
|
||||
subs w23, w23, #8
|
||||
csel x23, x23, xzr, pl
|
||||
csel x6, x6, xzr, mi
|
||||
|
||||
ld1 {v0.16b}, [x1], #16
|
||||
ld1 {v0.16b}, [x20], #16
|
||||
next_tweak v26, v25, v30, v31
|
||||
eor v0.16b, v0.16b, v25.16b
|
||||
tbnz x6, #1, 0f
|
||||
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
ld1 {v1.16b}, [x20], #16
|
||||
next_tweak v27, v26, v30, v31
|
||||
eor v1.16b, v1.16b, v26.16b
|
||||
tbnz x6, #2, 0f
|
||||
|
||||
ld1 {v2.16b}, [x1], #16
|
||||
ld1 {v2.16b}, [x20], #16
|
||||
next_tweak v28, v27, v30, v31
|
||||
eor v2.16b, v2.16b, v27.16b
|
||||
tbnz x6, #3, 0f
|
||||
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
ld1 {v3.16b}, [x20], #16
|
||||
next_tweak v29, v28, v30, v31
|
||||
eor v3.16b, v3.16b, v28.16b
|
||||
tbnz x6, #4, 0f
|
||||
|
||||
ld1 {v4.16b}, [x1], #16
|
||||
str q29, [sp, #16]
|
||||
ld1 {v4.16b}, [x20], #16
|
||||
str q29, [sp, #.Lframe_local_offset]
|
||||
eor v4.16b, v4.16b, v29.16b
|
||||
next_tweak v29, v29, v30, v31
|
||||
tbnz x6, #5, 0f
|
||||
|
||||
ld1 {v5.16b}, [x1], #16
|
||||
str q29, [sp, #32]
|
||||
ld1 {v5.16b}, [x20], #16
|
||||
str q29, [sp, #.Lframe_local_offset + 16]
|
||||
eor v5.16b, v5.16b, v29.16b
|
||||
next_tweak v29, v29, v30, v31
|
||||
tbnz x6, #6, 0f
|
||||
|
||||
ld1 {v6.16b}, [x1], #16
|
||||
str q29, [sp, #48]
|
||||
ld1 {v6.16b}, [x20], #16
|
||||
str q29, [sp, #.Lframe_local_offset + 32]
|
||||
eor v6.16b, v6.16b, v29.16b
|
||||
next_tweak v29, v29, v30, v31
|
||||
tbnz x6, #7, 0f
|
||||
|
||||
ld1 {v7.16b}, [x1], #16
|
||||
str q29, [sp, #64]
|
||||
ld1 {v7.16b}, [x20], #16
|
||||
str q29, [sp, #.Lframe_local_offset + 48]
|
||||
eor v7.16b, v7.16b, v29.16b
|
||||
next_tweak v29, v29, v30, v31
|
||||
|
||||
0: mov bskey, x2
|
||||
mov rounds, x3
|
||||
0: mov bskey, x21
|
||||
mov rounds, x22
|
||||
br x7
|
||||
ENDPROC(__xts_crypt8)
|
||||
|
||||
.macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
|
||||
stp x29, x30, [sp, #-80]!
|
||||
mov x29, sp
|
||||
frame_push 6, 64
|
||||
|
||||
ldr q30, .Lxts_mul_x
|
||||
ld1 {v25.16b}, [x5]
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x5
|
||||
|
||||
0: ldr q30, .Lxts_mul_x
|
||||
ld1 {v25.16b}, [x24]
|
||||
|
||||
99: adr x7, \do8
|
||||
bl __xts_crypt8
|
||||
|
||||
ldp q16, q17, [sp, #16]
|
||||
ldp q18, q19, [sp, #48]
|
||||
ldp q16, q17, [sp, #.Lframe_local_offset]
|
||||
ldp q18, q19, [sp, #.Lframe_local_offset + 32]
|
||||
|
||||
eor \o0\().16b, \o0\().16b, v25.16b
|
||||
eor \o1\().16b, \o1\().16b, v26.16b
|
||||
eor \o2\().16b, \o2\().16b, v27.16b
|
||||
eor \o3\().16b, \o3\().16b, v28.16b
|
||||
|
||||
st1 {\o0\().16b}, [x0], #16
|
||||
st1 {\o0\().16b}, [x19], #16
|
||||
mov v25.16b, v26.16b
|
||||
tbnz x6, #1, 1f
|
||||
st1 {\o1\().16b}, [x0], #16
|
||||
st1 {\o1\().16b}, [x19], #16
|
||||
mov v25.16b, v27.16b
|
||||
tbnz x6, #2, 1f
|
||||
st1 {\o2\().16b}, [x0], #16
|
||||
st1 {\o2\().16b}, [x19], #16
|
||||
mov v25.16b, v28.16b
|
||||
tbnz x6, #3, 1f
|
||||
st1 {\o3\().16b}, [x0], #16
|
||||
st1 {\o3\().16b}, [x19], #16
|
||||
mov v25.16b, v29.16b
|
||||
tbnz x6, #4, 1f
|
||||
|
||||
@ -820,18 +841,22 @@ ENDPROC(__xts_crypt8)
|
||||
eor \o6\().16b, \o6\().16b, v18.16b
|
||||
eor \o7\().16b, \o7\().16b, v19.16b
|
||||
|
||||
st1 {\o4\().16b}, [x0], #16
|
||||
st1 {\o4\().16b}, [x19], #16
|
||||
tbnz x6, #5, 1f
|
||||
st1 {\o5\().16b}, [x0], #16
|
||||
st1 {\o5\().16b}, [x19], #16
|
||||
tbnz x6, #6, 1f
|
||||
st1 {\o6\().16b}, [x0], #16
|
||||
st1 {\o6\().16b}, [x19], #16
|
||||
tbnz x6, #7, 1f
|
||||
st1 {\o7\().16b}, [x0], #16
|
||||
st1 {\o7\().16b}, [x19], #16
|
||||
|
||||
cbnz x4, 99b
|
||||
cbz x23, 1f
|
||||
st1 {v25.16b}, [x24]
|
||||
|
||||
1: st1 {v25.16b}, [x5]
|
||||
ldp x29, x30, [sp], #80
|
||||
cond_yield_neon 0b
|
||||
b 99b
|
||||
|
||||
1: st1 {v25.16b}, [x24]
|
||||
frame_pop
|
||||
ret
|
||||
.endm
|
||||
|
||||
@ -856,24 +881,31 @@ ENDPROC(aesbs_xts_decrypt)
|
||||
* int rounds, int blocks, u8 iv[], u8 final[])
|
||||
*/
|
||||
ENTRY(aesbs_ctr_encrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
frame_push 8
|
||||
|
||||
cmp x6, #0
|
||||
cset x10, ne
|
||||
add x4, x4, x10 // do one extra block if final
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x5
|
||||
mov x25, x6
|
||||
|
||||
ldp x7, x8, [x5]
|
||||
ld1 {v0.16b}, [x5]
|
||||
cmp x25, #0
|
||||
cset x26, ne
|
||||
add x23, x23, x26 // do one extra block if final
|
||||
|
||||
98: ldp x7, x8, [x24]
|
||||
ld1 {v0.16b}, [x24]
|
||||
CPU_LE( rev x7, x7 )
|
||||
CPU_LE( rev x8, x8 )
|
||||
adds x8, x8, #1
|
||||
adc x7, x7, xzr
|
||||
|
||||
99: mov x9, #1
|
||||
lsl x9, x9, x4
|
||||
subs w4, w4, #8
|
||||
csel x4, x4, xzr, pl
|
||||
lsl x9, x9, x23
|
||||
subs w23, w23, #8
|
||||
csel x23, x23, xzr, pl
|
||||
csel x9, x9, xzr, le
|
||||
|
||||
tbnz x9, #1, 0f
|
||||
@ -891,82 +923,85 @@ CPU_LE( rev x8, x8 )
|
||||
tbnz x9, #7, 0f
|
||||
next_ctr v7
|
||||
|
||||
0: mov bskey, x2
|
||||
mov rounds, x3
|
||||
0: mov bskey, x21
|
||||
mov rounds, x22
|
||||
bl aesbs_encrypt8
|
||||
|
||||
lsr x9, x9, x10 // disregard the extra block
|
||||
lsr x9, x9, x26 // disregard the extra block
|
||||
tbnz x9, #0, 0f
|
||||
|
||||
ld1 {v8.16b}, [x1], #16
|
||||
ld1 {v8.16b}, [x20], #16
|
||||
eor v0.16b, v0.16b, v8.16b
|
||||
st1 {v0.16b}, [x0], #16
|
||||
st1 {v0.16b}, [x19], #16
|
||||
tbnz x9, #1, 1f
|
||||
|
||||
ld1 {v9.16b}, [x1], #16
|
||||
ld1 {v9.16b}, [x20], #16
|
||||
eor v1.16b, v1.16b, v9.16b
|
||||
st1 {v1.16b}, [x0], #16
|
||||
st1 {v1.16b}, [x19], #16
|
||||
tbnz x9, #2, 2f
|
||||
|
||||
ld1 {v10.16b}, [x1], #16
|
||||
ld1 {v10.16b}, [x20], #16
|
||||
eor v4.16b, v4.16b, v10.16b
|
||||
st1 {v4.16b}, [x0], #16
|
||||
st1 {v4.16b}, [x19], #16
|
||||
tbnz x9, #3, 3f
|
||||
|
||||
ld1 {v11.16b}, [x1], #16
|
||||
ld1 {v11.16b}, [x20], #16
|
||||
eor v6.16b, v6.16b, v11.16b
|
||||
st1 {v6.16b}, [x0], #16
|
||||
st1 {v6.16b}, [x19], #16
|
||||
tbnz x9, #4, 4f
|
||||
|
||||
ld1 {v12.16b}, [x1], #16
|
||||
ld1 {v12.16b}, [x20], #16
|
||||
eor v3.16b, v3.16b, v12.16b
|
||||
st1 {v3.16b}, [x0], #16
|
||||
st1 {v3.16b}, [x19], #16
|
||||
tbnz x9, #5, 5f
|
||||
|
||||
ld1 {v13.16b}, [x1], #16
|
||||
ld1 {v13.16b}, [x20], #16
|
||||
eor v7.16b, v7.16b, v13.16b
|
||||
st1 {v7.16b}, [x0], #16
|
||||
st1 {v7.16b}, [x19], #16
|
||||
tbnz x9, #6, 6f
|
||||
|
||||
ld1 {v14.16b}, [x1], #16
|
||||
ld1 {v14.16b}, [x20], #16
|
||||
eor v2.16b, v2.16b, v14.16b
|
||||
st1 {v2.16b}, [x0], #16
|
||||
st1 {v2.16b}, [x19], #16
|
||||
tbnz x9, #7, 7f
|
||||
|
||||
ld1 {v15.16b}, [x1], #16
|
||||
ld1 {v15.16b}, [x20], #16
|
||||
eor v5.16b, v5.16b, v15.16b
|
||||
st1 {v5.16b}, [x0], #16
|
||||
st1 {v5.16b}, [x19], #16
|
||||
|
||||
8: next_ctr v0
|
||||
cbnz x4, 99b
|
||||
st1 {v0.16b}, [x24]
|
||||
cbz x23, 0f
|
||||
|
||||
0: st1 {v0.16b}, [x5]
|
||||
ldp x29, x30, [sp], #16
|
||||
cond_yield_neon 98b
|
||||
b 99b
|
||||
|
||||
0: frame_pop
|
||||
ret
|
||||
|
||||
/*
|
||||
* If we are handling the tail of the input (x6 != NULL), return the
|
||||
* final keystream block back to the caller.
|
||||
*/
|
||||
1: cbz x6, 8b
|
||||
st1 {v1.16b}, [x6]
|
||||
1: cbz x25, 8b
|
||||
st1 {v1.16b}, [x25]
|
||||
b 8b
|
||||
2: cbz x6, 8b
|
||||
st1 {v4.16b}, [x6]
|
||||
2: cbz x25, 8b
|
||||
st1 {v4.16b}, [x25]
|
||||
b 8b
|
||||
3: cbz x6, 8b
|
||||
st1 {v6.16b}, [x6]
|
||||
3: cbz x25, 8b
|
||||
st1 {v6.16b}, [x25]
|
||||
b 8b
|
||||
4: cbz x6, 8b
|
||||
st1 {v3.16b}, [x6]
|
||||
4: cbz x25, 8b
|
||||
st1 {v3.16b}, [x25]
|
||||
b 8b
|
||||
5: cbz x6, 8b
|
||||
st1 {v7.16b}, [x6]
|
||||
5: cbz x25, 8b
|
||||
st1 {v7.16b}, [x25]
|
||||
b 8b
|
||||
6: cbz x6, 8b
|
||||
st1 {v2.16b}, [x6]
|
||||
6: cbz x25, 8b
|
||||
st1 {v2.16b}, [x25]
|
||||
b 8b
|
||||
7: cbz x6, 8b
|
||||
st1 {v5.16b}, [x6]
|
||||
7: cbz x25, 8b
|
||||
st1 {v5.16b}, [x25]
|
||||
b 8b
|
||||
ENDPROC(aesbs_ctr_encrypt)
|
||||
|
@ -100,9 +100,10 @@
|
||||
dCONSTANT .req d0
|
||||
qCONSTANT .req q0
|
||||
|
||||
BUF .req x0
|
||||
LEN .req x1
|
||||
CRC .req x2
|
||||
BUF .req x19
|
||||
LEN .req x20
|
||||
CRC .req x21
|
||||
CONST .req x22
|
||||
|
||||
vzr .req v9
|
||||
|
||||
@ -123,7 +124,14 @@ ENTRY(crc32_pmull_le)
|
||||
ENTRY(crc32c_pmull_le)
|
||||
adr_l x3, .Lcrc32c_constants
|
||||
|
||||
0: bic LEN, LEN, #15
|
||||
0: frame_push 4, 64
|
||||
|
||||
mov BUF, x0
|
||||
mov LEN, x1
|
||||
mov CRC, x2
|
||||
mov CONST, x3
|
||||
|
||||
bic LEN, LEN, #15
|
||||
ld1 {v1.16b-v4.16b}, [BUF], #0x40
|
||||
movi vzr.16b, #0
|
||||
fmov dCONSTANT, CRC
|
||||
@ -132,7 +140,7 @@ ENTRY(crc32c_pmull_le)
|
||||
cmp LEN, #0x40
|
||||
b.lt less_64
|
||||
|
||||
ldr qCONSTANT, [x3]
|
||||
ldr qCONSTANT, [CONST]
|
||||
|
||||
loop_64: /* 64 bytes Full cache line folding */
|
||||
sub LEN, LEN, #0x40
|
||||
@ -162,10 +170,21 @@ loop_64: /* 64 bytes Full cache line folding */
|
||||
eor v4.16b, v4.16b, v8.16b
|
||||
|
||||
cmp LEN, #0x40
|
||||
b.ge loop_64
|
||||
b.lt less_64
|
||||
|
||||
if_will_cond_yield_neon
|
||||
stp q1, q2, [sp, #.Lframe_local_offset]
|
||||
stp q3, q4, [sp, #.Lframe_local_offset + 32]
|
||||
do_cond_yield_neon
|
||||
ldp q1, q2, [sp, #.Lframe_local_offset]
|
||||
ldp q3, q4, [sp, #.Lframe_local_offset + 32]
|
||||
ldr qCONSTANT, [CONST]
|
||||
movi vzr.16b, #0
|
||||
endif_yield_neon
|
||||
b loop_64
|
||||
|
||||
less_64: /* Folding cache line into 128bit */
|
||||
ldr qCONSTANT, [x3, #16]
|
||||
ldr qCONSTANT, [CONST, #16]
|
||||
|
||||
pmull2 v5.1q, v1.2d, vCONSTANT.2d
|
||||
pmull v1.1q, v1.1d, vCONSTANT.1d
|
||||
@ -204,8 +223,8 @@ fold_64:
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
|
||||
/* final 32-bit fold */
|
||||
ldr dCONSTANT, [x3, #32]
|
||||
ldr d3, [x3, #40]
|
||||
ldr dCONSTANT, [CONST, #32]
|
||||
ldr d3, [CONST, #40]
|
||||
|
||||
ext v2.16b, v1.16b, vzr.16b, #4
|
||||
and v1.16b, v1.16b, v3.16b
|
||||
@ -213,7 +232,7 @@ fold_64:
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
|
||||
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
|
||||
ldr qCONSTANT, [x3, #48]
|
||||
ldr qCONSTANT, [CONST, #48]
|
||||
|
||||
and v2.16b, v1.16b, v3.16b
|
||||
ext v2.16b, vzr.16b, v2.16b, #8
|
||||
@ -223,6 +242,7 @@ fold_64:
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
mov w0, v1.s[1]
|
||||
|
||||
frame_pop
|
||||
ret
|
||||
ENDPROC(crc32_pmull_le)
|
||||
ENDPROC(crc32c_pmull_le)
|
||||
|
@ -74,13 +74,19 @@
|
||||
.text
|
||||
.cpu generic+crypto
|
||||
|
||||
arg1_low32 .req w0
|
||||
arg2 .req x1
|
||||
arg3 .req x2
|
||||
arg1_low32 .req w19
|
||||
arg2 .req x20
|
||||
arg3 .req x21
|
||||
|
||||
vzr .req v13
|
||||
|
||||
ENTRY(crc_t10dif_pmull)
|
||||
frame_push 3, 128
|
||||
|
||||
mov arg1_low32, w0
|
||||
mov arg2, x1
|
||||
mov arg3, x2
|
||||
|
||||
movi vzr.16b, #0 // init zero register
|
||||
|
||||
// adjust the 16-bit initial_crc value, scale it to 32 bits
|
||||
@ -175,8 +181,25 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
|
||||
subs arg3, arg3, #128
|
||||
|
||||
// check if there is another 64B in the buffer to be able to fold
|
||||
b.ge _fold_64_B_loop
|
||||
b.lt _fold_64_B_end
|
||||
|
||||
if_will_cond_yield_neon
|
||||
stp q0, q1, [sp, #.Lframe_local_offset]
|
||||
stp q2, q3, [sp, #.Lframe_local_offset + 32]
|
||||
stp q4, q5, [sp, #.Lframe_local_offset + 64]
|
||||
stp q6, q7, [sp, #.Lframe_local_offset + 96]
|
||||
do_cond_yield_neon
|
||||
ldp q0, q1, [sp, #.Lframe_local_offset]
|
||||
ldp q2, q3, [sp, #.Lframe_local_offset + 32]
|
||||
ldp q4, q5, [sp, #.Lframe_local_offset + 64]
|
||||
ldp q6, q7, [sp, #.Lframe_local_offset + 96]
|
||||
ldr_l q10, rk3, x8
|
||||
movi vzr.16b, #0 // init zero register
|
||||
endif_yield_neon
|
||||
|
||||
b _fold_64_B_loop
|
||||
|
||||
_fold_64_B_end:
|
||||
// at this point, the buffer pointer is pointing at the last y Bytes
|
||||
// of the buffer the 64B of folded data is in 4 of the vector
|
||||
// registers: v0, v1, v2, v3
|
||||
@ -304,6 +327,7 @@ _barrett:
|
||||
_cleanup:
|
||||
// scale the result back to 16 bits
|
||||
lsr x0, x0, #16
|
||||
frame_pop
|
||||
ret
|
||||
|
||||
_less_than_128:
|
||||
|
@ -213,22 +213,31 @@
|
||||
.endm
|
||||
|
||||
.macro __pmull_ghash, pn
|
||||
ld1 {SHASH.2d}, [x3]
|
||||
ld1 {XL.2d}, [x1]
|
||||
frame_push 5
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
|
||||
0: ld1 {SHASH.2d}, [x22]
|
||||
ld1 {XL.2d}, [x20]
|
||||
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
|
||||
eor SHASH2.16b, SHASH2.16b, SHASH.16b
|
||||
|
||||
__pmull_pre_\pn
|
||||
|
||||
/* do the head block first, if supplied */
|
||||
cbz x4, 0f
|
||||
ld1 {T1.2d}, [x4]
|
||||
b 1f
|
||||
cbz x23, 1f
|
||||
ld1 {T1.2d}, [x23]
|
||||
mov x23, xzr
|
||||
b 2f
|
||||
|
||||
0: ld1 {T1.2d}, [x2], #16
|
||||
sub w0, w0, #1
|
||||
1: ld1 {T1.2d}, [x21], #16
|
||||
sub w19, w19, #1
|
||||
|
||||
1: /* multiply XL by SHASH in GF(2^128) */
|
||||
2: /* multiply XL by SHASH in GF(2^128) */
|
||||
CPU_LE( rev64 T1.16b, T1.16b )
|
||||
|
||||
ext T2.16b, XL.16b, XL.16b, #8
|
||||
@ -250,9 +259,18 @@ CPU_LE( rev64 T1.16b, T1.16b )
|
||||
eor T2.16b, T2.16b, XH.16b
|
||||
eor XL.16b, XL.16b, T2.16b
|
||||
|
||||
cbnz w0, 0b
|
||||
cbz w19, 3f
|
||||
|
||||
st1 {XL.2d}, [x1]
|
||||
if_will_cond_yield_neon
|
||||
st1 {XL.2d}, [x20]
|
||||
do_cond_yield_neon
|
||||
b 0b
|
||||
endif_yield_neon
|
||||
|
||||
b 1b
|
||||
|
||||
3: st1 {XL.2d}, [x20]
|
||||
frame_pop
|
||||
ret
|
||||
.endm
|
||||
|
||||
@ -304,38 +322,55 @@ ENDPROC(pmull_ghash_update_p8)
|
||||
.endm
|
||||
|
||||
.macro pmull_gcm_do_crypt, enc
|
||||
ld1 {SHASH.2d}, [x4]
|
||||
ld1 {XL.2d}, [x1]
|
||||
ldr x8, [x5, #8] // load lower counter
|
||||
frame_push 10
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x5
|
||||
mov x25, x6
|
||||
mov x26, x7
|
||||
.if \enc == 1
|
||||
ldr x27, [sp, #96] // first stacked arg
|
||||
.endif
|
||||
|
||||
ldr x28, [x24, #8] // load lower counter
|
||||
CPU_LE( rev x28, x28 )
|
||||
|
||||
0: mov x0, x25
|
||||
load_round_keys w26, x0
|
||||
ld1 {SHASH.2d}, [x23]
|
||||
ld1 {XL.2d}, [x20]
|
||||
|
||||
movi MASK.16b, #0xe1
|
||||
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
|
||||
CPU_LE( rev x8, x8 )
|
||||
shl MASK.2d, MASK.2d, #57
|
||||
eor SHASH2.16b, SHASH2.16b, SHASH.16b
|
||||
|
||||
.if \enc == 1
|
||||
ld1 {KS.16b}, [x7]
|
||||
ld1 {KS.16b}, [x27]
|
||||
.endif
|
||||
|
||||
0: ld1 {CTR.8b}, [x5] // load upper counter
|
||||
ld1 {INP.16b}, [x3], #16
|
||||
rev x9, x8
|
||||
add x8, x8, #1
|
||||
sub w0, w0, #1
|
||||
1: ld1 {CTR.8b}, [x24] // load upper counter
|
||||
ld1 {INP.16b}, [x22], #16
|
||||
rev x9, x28
|
||||
add x28, x28, #1
|
||||
sub w19, w19, #1
|
||||
ins CTR.d[1], x9 // set lower counter
|
||||
|
||||
.if \enc == 1
|
||||
eor INP.16b, INP.16b, KS.16b // encrypt input
|
||||
st1 {INP.16b}, [x2], #16
|
||||
st1 {INP.16b}, [x21], #16
|
||||
.endif
|
||||
|
||||
rev64 T1.16b, INP.16b
|
||||
|
||||
cmp w6, #12
|
||||
b.ge 2f // AES-192/256?
|
||||
cmp w26, #12
|
||||
b.ge 4f // AES-192/256?
|
||||
|
||||
1: enc_round CTR, v21
|
||||
2: enc_round CTR, v21
|
||||
|
||||
ext T2.16b, XL.16b, XL.16b, #8
|
||||
ext IN1.16b, T1.16b, T1.16b, #8
|
||||
@ -390,27 +425,39 @@ CPU_LE( rev x8, x8 )
|
||||
|
||||
.if \enc == 0
|
||||
eor INP.16b, INP.16b, KS.16b
|
||||
st1 {INP.16b}, [x2], #16
|
||||
st1 {INP.16b}, [x21], #16
|
||||
.endif
|
||||
|
||||
cbnz w0, 0b
|
||||
|
||||
CPU_LE( rev x8, x8 )
|
||||
st1 {XL.2d}, [x1]
|
||||
str x8, [x5, #8] // store lower counter
|
||||
cbz w19, 3f
|
||||
|
||||
if_will_cond_yield_neon
|
||||
st1 {XL.2d}, [x20]
|
||||
.if \enc == 1
|
||||
st1 {KS.16b}, [x7]
|
||||
st1 {KS.16b}, [x27]
|
||||
.endif
|
||||
do_cond_yield_neon
|
||||
b 0b
|
||||
endif_yield_neon
|
||||
|
||||
b 1b
|
||||
|
||||
3: st1 {XL.2d}, [x20]
|
||||
.if \enc == 1
|
||||
st1 {KS.16b}, [x27]
|
||||
.endif
|
||||
|
||||
CPU_LE( rev x28, x28 )
|
||||
str x28, [x24, #8] // store lower counter
|
||||
|
||||
frame_pop
|
||||
ret
|
||||
|
||||
2: b.eq 3f // AES-192?
|
||||
4: b.eq 5f // AES-192?
|
||||
enc_round CTR, v17
|
||||
enc_round CTR, v18
|
||||
3: enc_round CTR, v19
|
||||
5: enc_round CTR, v19
|
||||
enc_round CTR, v20
|
||||
b 1b
|
||||
b 2b
|
||||
.endm
|
||||
|
||||
/*
|
||||
|
@ -63,11 +63,12 @@ static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
|
||||
|
||||
asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
|
||||
const u8 src[], struct ghash_key const *k,
|
||||
u8 ctr[], int rounds, u8 ks[]);
|
||||
u8 ctr[], u32 const rk[], int rounds,
|
||||
u8 ks[]);
|
||||
|
||||
asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
|
||||
const u8 src[], struct ghash_key const *k,
|
||||
u8 ctr[], int rounds);
|
||||
u8 ctr[], u32 const rk[], int rounds);
|
||||
|
||||
asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
|
||||
u32 const rk[], int rounds);
|
||||
@ -368,26 +369,29 @@ static int gcm_encrypt(struct aead_request *req)
|
||||
pmull_gcm_encrypt_block(ks, iv, NULL,
|
||||
num_rounds(&ctx->aes_key));
|
||||
put_unaligned_be32(3, iv + GCM_IV_SIZE);
|
||||
kernel_neon_end();
|
||||
|
||||
err = skcipher_walk_aead_encrypt(&walk, req, true);
|
||||
err = skcipher_walk_aead_encrypt(&walk, req, false);
|
||||
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
kernel_neon_begin();
|
||||
pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
|
||||
walk.src.virt.addr, &ctx->ghash_key,
|
||||
iv, num_rounds(&ctx->aes_key), ks);
|
||||
iv, ctx->aes_key.key_enc,
|
||||
num_rounds(&ctx->aes_key), ks);
|
||||
kernel_neon_end();
|
||||
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
} else {
|
||||
__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
|
||||
num_rounds(&ctx->aes_key));
|
||||
put_unaligned_be32(2, iv + GCM_IV_SIZE);
|
||||
|
||||
err = skcipher_walk_aead_encrypt(&walk, req, true);
|
||||
err = skcipher_walk_aead_encrypt(&walk, req, false);
|
||||
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
@ -467,15 +471,19 @@ static int gcm_decrypt(struct aead_request *req)
|
||||
pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
|
||||
num_rounds(&ctx->aes_key));
|
||||
put_unaligned_be32(2, iv + GCM_IV_SIZE);
|
||||
kernel_neon_end();
|
||||
|
||||
err = skcipher_walk_aead_decrypt(&walk, req, true);
|
||||
err = skcipher_walk_aead_decrypt(&walk, req, false);
|
||||
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
kernel_neon_begin();
|
||||
pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
|
||||
walk.src.virt.addr, &ctx->ghash_key,
|
||||
iv, num_rounds(&ctx->aes_key));
|
||||
iv, ctx->aes_key.key_enc,
|
||||
num_rounds(&ctx->aes_key));
|
||||
kernel_neon_end();
|
||||
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes % AES_BLOCK_SIZE);
|
||||
@ -483,14 +491,12 @@ static int gcm_decrypt(struct aead_request *req)
|
||||
if (walk.nbytes)
|
||||
pmull_gcm_encrypt_block(iv, iv, NULL,
|
||||
num_rounds(&ctx->aes_key));
|
||||
|
||||
kernel_neon_end();
|
||||
} else {
|
||||
__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
|
||||
num_rounds(&ctx->aes_key));
|
||||
put_unaligned_be32(2, iv + GCM_IV_SIZE);
|
||||
|
||||
err = skcipher_walk_aead_decrypt(&walk, req, true);
|
||||
err = skcipher_walk_aead_decrypt(&walk, req, false);
|
||||
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
@ -69,30 +69,36 @@
|
||||
* int blocks)
|
||||
*/
|
||||
ENTRY(sha1_ce_transform)
|
||||
frame_push 3
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
|
||||
/* load round constants */
|
||||
loadrc k0.4s, 0x5a827999, w6
|
||||
0: loadrc k0.4s, 0x5a827999, w6
|
||||
loadrc k1.4s, 0x6ed9eba1, w6
|
||||
loadrc k2.4s, 0x8f1bbcdc, w6
|
||||
loadrc k3.4s, 0xca62c1d6, w6
|
||||
|
||||
/* load state */
|
||||
ld1 {dgav.4s}, [x0]
|
||||
ldr dgb, [x0, #16]
|
||||
ld1 {dgav.4s}, [x19]
|
||||
ldr dgb, [x19, #16]
|
||||
|
||||
/* load sha1_ce_state::finalize */
|
||||
ldr_l w4, sha1_ce_offsetof_finalize, x4
|
||||
ldr w4, [x0, x4]
|
||||
ldr w4, [x19, x4]
|
||||
|
||||
/* load input */
|
||||
0: ld1 {v8.4s-v11.4s}, [x1], #64
|
||||
sub w2, w2, #1
|
||||
1: ld1 {v8.4s-v11.4s}, [x20], #64
|
||||
sub w21, w21, #1
|
||||
|
||||
CPU_LE( rev32 v8.16b, v8.16b )
|
||||
CPU_LE( rev32 v9.16b, v9.16b )
|
||||
CPU_LE( rev32 v10.16b, v10.16b )
|
||||
CPU_LE( rev32 v11.16b, v11.16b )
|
||||
|
||||
1: add t0.4s, v8.4s, k0.4s
|
||||
2: add t0.4s, v8.4s, k0.4s
|
||||
mov dg0v.16b, dgav.16b
|
||||
|
||||
add_update c, ev, k0, 8, 9, 10, 11, dgb
|
||||
@ -123,16 +129,25 @@ CPU_LE( rev32 v11.16b, v11.16b )
|
||||
add dgbv.2s, dgbv.2s, dg1v.2s
|
||||
add dgav.4s, dgav.4s, dg0v.4s
|
||||
|
||||
cbnz w2, 0b
|
||||
cbz w21, 3f
|
||||
|
||||
if_will_cond_yield_neon
|
||||
st1 {dgav.4s}, [x19]
|
||||
str dgb, [x19, #16]
|
||||
do_cond_yield_neon
|
||||
b 0b
|
||||
endif_yield_neon
|
||||
|
||||
b 1b
|
||||
|
||||
/*
|
||||
* Final block: add padding and total bit count.
|
||||
* Skip if the input size was not a round multiple of the block size,
|
||||
* the padding is handled by the C code in that case.
|
||||
*/
|
||||
cbz x4, 3f
|
||||
3: cbz x4, 4f
|
||||
ldr_l w4, sha1_ce_offsetof_count, x4
|
||||
ldr x4, [x0, x4]
|
||||
ldr x4, [x19, x4]
|
||||
movi v9.2d, #0
|
||||
mov x8, #0x80000000
|
||||
movi v10.2d, #0
|
||||
@ -141,10 +156,11 @@ CPU_LE( rev32 v11.16b, v11.16b )
|
||||
mov x4, #0
|
||||
mov v11.d[0], xzr
|
||||
mov v11.d[1], x7
|
||||
b 1b
|
||||
b 2b
|
||||
|
||||
/* store new state */
|
||||
3: st1 {dgav.4s}, [x0]
|
||||
str dgb, [x0, #16]
|
||||
4: st1 {dgav.4s}, [x19]
|
||||
str dgb, [x19, #16]
|
||||
frame_pop
|
||||
ret
|
||||
ENDPROC(sha1_ce_transform)
|
||||
|
@ -79,30 +79,36 @@
|
||||
*/
|
||||
.text
|
||||
ENTRY(sha2_ce_transform)
|
||||
frame_push 3
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
|
||||
/* load round constants */
|
||||
adr_l x8, .Lsha2_rcon
|
||||
0: adr_l x8, .Lsha2_rcon
|
||||
ld1 { v0.4s- v3.4s}, [x8], #64
|
||||
ld1 { v4.4s- v7.4s}, [x8], #64
|
||||
ld1 { v8.4s-v11.4s}, [x8], #64
|
||||
ld1 {v12.4s-v15.4s}, [x8]
|
||||
|
||||
/* load state */
|
||||
ld1 {dgav.4s, dgbv.4s}, [x0]
|
||||
ld1 {dgav.4s, dgbv.4s}, [x19]
|
||||
|
||||
/* load sha256_ce_state::finalize */
|
||||
ldr_l w4, sha256_ce_offsetof_finalize, x4
|
||||
ldr w4, [x0, x4]
|
||||
ldr w4, [x19, x4]
|
||||
|
||||
/* load input */
|
||||
0: ld1 {v16.4s-v19.4s}, [x1], #64
|
||||
sub w2, w2, #1
|
||||
1: ld1 {v16.4s-v19.4s}, [x20], #64
|
||||
sub w21, w21, #1
|
||||
|
||||
CPU_LE( rev32 v16.16b, v16.16b )
|
||||
CPU_LE( rev32 v17.16b, v17.16b )
|
||||
CPU_LE( rev32 v18.16b, v18.16b )
|
||||
CPU_LE( rev32 v19.16b, v19.16b )
|
||||
|
||||
1: add t0.4s, v16.4s, v0.4s
|
||||
2: add t0.4s, v16.4s, v0.4s
|
||||
mov dg0v.16b, dgav.16b
|
||||
mov dg1v.16b, dgbv.16b
|
||||
|
||||
@ -131,16 +137,24 @@ CPU_LE( rev32 v19.16b, v19.16b )
|
||||
add dgbv.4s, dgbv.4s, dg1v.4s
|
||||
|
||||
/* handled all input blocks? */
|
||||
cbnz w2, 0b
|
||||
cbz w21, 3f
|
||||
|
||||
if_will_cond_yield_neon
|
||||
st1 {dgav.4s, dgbv.4s}, [x19]
|
||||
do_cond_yield_neon
|
||||
b 0b
|
||||
endif_yield_neon
|
||||
|
||||
b 1b
|
||||
|
||||
/*
|
||||
* Final block: add padding and total bit count.
|
||||
* Skip if the input size was not a round multiple of the block size,
|
||||
* the padding is handled by the C code in that case.
|
||||
*/
|
||||
cbz x4, 3f
|
||||
3: cbz x4, 4f
|
||||
ldr_l w4, sha256_ce_offsetof_count, x4
|
||||
ldr x4, [x0, x4]
|
||||
ldr x4, [x19, x4]
|
||||
movi v17.2d, #0
|
||||
mov x8, #0x80000000
|
||||
movi v18.2d, #0
|
||||
@ -149,9 +163,10 @@ CPU_LE( rev32 v19.16b, v19.16b )
|
||||
mov x4, #0
|
||||
mov v19.d[0], xzr
|
||||
mov v19.d[1], x7
|
||||
b 1b
|
||||
b 2b
|
||||
|
||||
/* store new state */
|
||||
3: st1 {dgav.4s, dgbv.4s}, [x0]
|
||||
4: st1 {dgav.4s, dgbv.4s}, [x19]
|
||||
frame_pop
|
||||
ret
|
||||
ENDPROC(sha2_ce_transform)
|
||||
|
@ -1,3 +1,13 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// This code is taken from the OpenSSL project but the author (Andy Polyakov)
|
||||
// has relicensed it under the GPLv2. Therefore this program is free software;
|
||||
// you can redistribute it and/or modify it under the terms of the GNU General
|
||||
// Public License version 2 as published by the Free Software Foundation.
|
||||
//
|
||||
// The original headers, including the original license headers, are
|
||||
// included below for completeness.
|
||||
|
||||
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the OpenSSL license (the "License"). You may not use
|
||||
@ -10,8 +20,6 @@
|
||||
// project. The module is, however, dual licensed under OpenSSL and
|
||||
// CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
// details see http://www.openssl.org/~appro/cryptogams/.
|
||||
//
|
||||
// Permission to use under GPLv2 terms is granted.
|
||||
// ====================================================================
|
||||
//
|
||||
// SHA256/512 for ARMv8.
|
||||
|
@ -41,9 +41,16 @@
|
||||
*/
|
||||
.text
|
||||
ENTRY(sha3_ce_transform)
|
||||
/* load state */
|
||||
add x8, x0, #32
|
||||
ld1 { v0.1d- v3.1d}, [x0]
|
||||
frame_push 4
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
|
||||
0: /* load state */
|
||||
add x8, x19, #32
|
||||
ld1 { v0.1d- v3.1d}, [x19]
|
||||
ld1 { v4.1d- v7.1d}, [x8], #32
|
||||
ld1 { v8.1d-v11.1d}, [x8], #32
|
||||
ld1 {v12.1d-v15.1d}, [x8], #32
|
||||
@ -51,13 +58,13 @@ ENTRY(sha3_ce_transform)
|
||||
ld1 {v20.1d-v23.1d}, [x8], #32
|
||||
ld1 {v24.1d}, [x8]
|
||||
|
||||
0: sub w2, w2, #1
|
||||
1: sub w21, w21, #1
|
||||
mov w8, #24
|
||||
adr_l x9, .Lsha3_rcon
|
||||
|
||||
/* load input */
|
||||
ld1 {v25.8b-v28.8b}, [x1], #32
|
||||
ld1 {v29.8b-v31.8b}, [x1], #24
|
||||
ld1 {v25.8b-v28.8b}, [x20], #32
|
||||
ld1 {v29.8b-v31.8b}, [x20], #24
|
||||
eor v0.8b, v0.8b, v25.8b
|
||||
eor v1.8b, v1.8b, v26.8b
|
||||
eor v2.8b, v2.8b, v27.8b
|
||||
@ -66,10 +73,10 @@ ENTRY(sha3_ce_transform)
|
||||
eor v5.8b, v5.8b, v30.8b
|
||||
eor v6.8b, v6.8b, v31.8b
|
||||
|
||||
tbnz x3, #6, 2f // SHA3-512
|
||||
tbnz x22, #6, 3f // SHA3-512
|
||||
|
||||
ld1 {v25.8b-v28.8b}, [x1], #32
|
||||
ld1 {v29.8b-v30.8b}, [x1], #16
|
||||
ld1 {v25.8b-v28.8b}, [x20], #32
|
||||
ld1 {v29.8b-v30.8b}, [x20], #16
|
||||
eor v7.8b, v7.8b, v25.8b
|
||||
eor v8.8b, v8.8b, v26.8b
|
||||
eor v9.8b, v9.8b, v27.8b
|
||||
@ -77,34 +84,34 @@ ENTRY(sha3_ce_transform)
|
||||
eor v11.8b, v11.8b, v29.8b
|
||||
eor v12.8b, v12.8b, v30.8b
|
||||
|
||||
tbnz x3, #4, 1f // SHA3-384 or SHA3-224
|
||||
tbnz x22, #4, 2f // SHA3-384 or SHA3-224
|
||||
|
||||
// SHA3-256
|
||||
ld1 {v25.8b-v28.8b}, [x1], #32
|
||||
ld1 {v25.8b-v28.8b}, [x20], #32
|
||||
eor v13.8b, v13.8b, v25.8b
|
||||
eor v14.8b, v14.8b, v26.8b
|
||||
eor v15.8b, v15.8b, v27.8b
|
||||
eor v16.8b, v16.8b, v28.8b
|
||||
b 3f
|
||||
b 4f
|
||||
|
||||
1: tbz x3, #2, 3f // bit 2 cleared? SHA-384
|
||||
2: tbz x22, #2, 4f // bit 2 cleared? SHA-384
|
||||
|
||||
// SHA3-224
|
||||
ld1 {v25.8b-v28.8b}, [x1], #32
|
||||
ld1 {v29.8b}, [x1], #8
|
||||
ld1 {v25.8b-v28.8b}, [x20], #32
|
||||
ld1 {v29.8b}, [x20], #8
|
||||
eor v13.8b, v13.8b, v25.8b
|
||||
eor v14.8b, v14.8b, v26.8b
|
||||
eor v15.8b, v15.8b, v27.8b
|
||||
eor v16.8b, v16.8b, v28.8b
|
||||
eor v17.8b, v17.8b, v29.8b
|
||||
b 3f
|
||||
b 4f
|
||||
|
||||
// SHA3-512
|
||||
2: ld1 {v25.8b-v26.8b}, [x1], #16
|
||||
3: ld1 {v25.8b-v26.8b}, [x20], #16
|
||||
eor v7.8b, v7.8b, v25.8b
|
||||
eor v8.8b, v8.8b, v26.8b
|
||||
|
||||
3: sub w8, w8, #1
|
||||
4: sub w8, w8, #1
|
||||
|
||||
eor3 v29.16b, v4.16b, v9.16b, v14.16b
|
||||
eor3 v26.16b, v1.16b, v6.16b, v11.16b
|
||||
@ -183,17 +190,33 @@ ENTRY(sha3_ce_transform)
|
||||
|
||||
eor v0.16b, v0.16b, v31.16b
|
||||
|
||||
cbnz w8, 3b
|
||||
cbnz w2, 0b
|
||||
cbnz w8, 4b
|
||||
cbz w21, 5f
|
||||
|
||||
if_will_cond_yield_neon
|
||||
add x8, x19, #32
|
||||
st1 { v0.1d- v3.1d}, [x19]
|
||||
st1 { v4.1d- v7.1d}, [x8], #32
|
||||
st1 { v8.1d-v11.1d}, [x8], #32
|
||||
st1 {v12.1d-v15.1d}, [x8], #32
|
||||
st1 {v16.1d-v19.1d}, [x8], #32
|
||||
st1 {v20.1d-v23.1d}, [x8], #32
|
||||
st1 {v24.1d}, [x8]
|
||||
do_cond_yield_neon
|
||||
b 0b
|
||||
endif_yield_neon
|
||||
|
||||
b 1b
|
||||
|
||||
/* save state */
|
||||
st1 { v0.1d- v3.1d}, [x0], #32
|
||||
st1 { v4.1d- v7.1d}, [x0], #32
|
||||
st1 { v8.1d-v11.1d}, [x0], #32
|
||||
st1 {v12.1d-v15.1d}, [x0], #32
|
||||
st1 {v16.1d-v19.1d}, [x0], #32
|
||||
st1 {v20.1d-v23.1d}, [x0], #32
|
||||
st1 {v24.1d}, [x0]
|
||||
5: st1 { v0.1d- v3.1d}, [x19], #32
|
||||
st1 { v4.1d- v7.1d}, [x19], #32
|
||||
st1 { v8.1d-v11.1d}, [x19], #32
|
||||
st1 {v12.1d-v15.1d}, [x19], #32
|
||||
st1 {v16.1d-v19.1d}, [x19], #32
|
||||
st1 {v20.1d-v23.1d}, [x19], #32
|
||||
st1 {v24.1d}, [x19]
|
||||
frame_pop
|
||||
ret
|
||||
ENDPROC(sha3_ce_transform)
|
||||
|
||||
|
@ -1,4 +1,14 @@
|
||||
#! /usr/bin/env perl
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
# This code is taken from the OpenSSL project but the author (Andy Polyakov)
|
||||
# has relicensed it under the GPLv2. Therefore this program is free software;
|
||||
# you can redistribute it and/or modify it under the terms of the GNU General
|
||||
# Public License version 2 as published by the Free Software Foundation.
|
||||
#
|
||||
# The original headers, including the original license headers, are
|
||||
# included below for completeness.
|
||||
|
||||
# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
@ -11,8 +21,6 @@
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
#
|
||||
# Permission to use under GPLv2 terms is granted.
|
||||
# ====================================================================
|
||||
#
|
||||
# SHA256/512 for ARMv8.
|
||||
|
@ -107,17 +107,23 @@
|
||||
*/
|
||||
.text
|
||||
ENTRY(sha512_ce_transform)
|
||||
frame_push 3
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
|
||||
/* load state */
|
||||
ld1 {v8.2d-v11.2d}, [x0]
|
||||
0: ld1 {v8.2d-v11.2d}, [x19]
|
||||
|
||||
/* load first 4 round constants */
|
||||
adr_l x3, .Lsha512_rcon
|
||||
ld1 {v20.2d-v23.2d}, [x3], #64
|
||||
|
||||
/* load input */
|
||||
0: ld1 {v12.2d-v15.2d}, [x1], #64
|
||||
ld1 {v16.2d-v19.2d}, [x1], #64
|
||||
sub w2, w2, #1
|
||||
1: ld1 {v12.2d-v15.2d}, [x20], #64
|
||||
ld1 {v16.2d-v19.2d}, [x20], #64
|
||||
sub w21, w21, #1
|
||||
|
||||
CPU_LE( rev64 v12.16b, v12.16b )
|
||||
CPU_LE( rev64 v13.16b, v13.16b )
|
||||
@ -196,9 +202,18 @@ CPU_LE( rev64 v19.16b, v19.16b )
|
||||
add v11.2d, v11.2d, v3.2d
|
||||
|
||||
/* handled all input blocks? */
|
||||
cbnz w2, 0b
|
||||
cbz w21, 3f
|
||||
|
||||
if_will_cond_yield_neon
|
||||
st1 {v8.2d-v11.2d}, [x19]
|
||||
do_cond_yield_neon
|
||||
b 0b
|
||||
endif_yield_neon
|
||||
|
||||
b 1b
|
||||
|
||||
/* store new state */
|
||||
3: st1 {v8.2d-v11.2d}, [x0]
|
||||
3: st1 {v8.2d-v11.2d}, [x19]
|
||||
frame_pop
|
||||
ret
|
||||
ENDPROC(sha512_ce_transform)
|
||||
|
@ -1,3 +1,13 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// This code is taken from the OpenSSL project but the author (Andy Polyakov)
|
||||
// has relicensed it under the GPLv2. Therefore this program is free software;
|
||||
// you can redistribute it and/or modify it under the terms of the GNU General
|
||||
// Public License version 2 as published by the Free Software Foundation.
|
||||
//
|
||||
// The original headers, including the original license headers, are
|
||||
// included below for completeness.
|
||||
|
||||
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the OpenSSL license (the "License"). You may not use
|
||||
@ -10,8 +20,6 @@
|
||||
// project. The module is, however, dual licensed under OpenSSL and
|
||||
// CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
// details see http://www.openssl.org/~appro/cryptogams/.
|
||||
//
|
||||
// Permission to use under GPLv2 terms is granted.
|
||||
// ====================================================================
|
||||
//
|
||||
// SHA256/512 for ARMv8.
|
||||
|
36
arch/arm64/crypto/sm4-ce-core.S
Normal file
36
arch/arm64/crypto/sm4-ce-core.S
Normal file
@ -0,0 +1,36 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8
|
||||
.set .Lv\b\().4s, \b
|
||||
.endr
|
||||
|
||||
.macro sm4e, rd, rn
|
||||
.inst 0xcec08400 | .L\rd | (.L\rn << 5)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in);
|
||||
*/
|
||||
.text
|
||||
ENTRY(sm4_ce_do_crypt)
|
||||
ld1 {v8.4s}, [x2]
|
||||
ld1 {v0.4s-v3.4s}, [x0], #64
|
||||
CPU_LE( rev32 v8.16b, v8.16b )
|
||||
ld1 {v4.4s-v7.4s}, [x0]
|
||||
sm4e v8.4s, v0.4s
|
||||
sm4e v8.4s, v1.4s
|
||||
sm4e v8.4s, v2.4s
|
||||
sm4e v8.4s, v3.4s
|
||||
sm4e v8.4s, v4.4s
|
||||
sm4e v8.4s, v5.4s
|
||||
sm4e v8.4s, v6.4s
|
||||
sm4e v8.4s, v7.4s
|
||||
rev64 v8.4s, v8.4s
|
||||
ext v8.16b, v8.16b, v8.16b, #8
|
||||
CPU_LE( rev32 v8.16b, v8.16b )
|
||||
st1 {v8.4s}, [x1]
|
||||
ret
|
||||
ENDPROC(sm4_ce_do_crypt)
|
73
arch/arm64/crypto/sm4-ce-glue.c
Normal file
73
arch/arm64/crypto/sm4-ce-glue.c
Normal file
@ -0,0 +1,73 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/sm4.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
MODULE_ALIAS_CRYPTO("sm4");
|
||||
MODULE_ALIAS_CRYPTO("sm4-ce");
|
||||
MODULE_DESCRIPTION("SM4 symmetric cipher using ARMv8 Crypto Extensions");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in);
|
||||
|
||||
static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
if (!may_use_simd()) {
|
||||
crypto_sm4_encrypt(tfm, out, in);
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
sm4_ce_do_crypt(ctx->rkey_enc, out, in);
|
||||
kernel_neon_end();
|
||||
}
|
||||
}
|
||||
|
||||
static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
if (!may_use_simd()) {
|
||||
crypto_sm4_decrypt(tfm, out, in);
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
sm4_ce_do_crypt(ctx->rkey_dec, out, in);
|
||||
kernel_neon_end();
|
||||
}
|
||||
}
|
||||
|
||||
static struct crypto_alg sm4_ce_alg = {
|
||||
.cra_name = "sm4",
|
||||
.cra_driver_name = "sm4-ce",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = SM4_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u.cipher = {
|
||||
.cia_min_keysize = SM4_KEY_SIZE,
|
||||
.cia_max_keysize = SM4_KEY_SIZE,
|
||||
.cia_setkey = crypto_sm4_set_key,
|
||||
.cia_encrypt = sm4_ce_encrypt,
|
||||
.cia_decrypt = sm4_ce_decrypt
|
||||
}
|
||||
};
|
||||
|
||||
static int __init sm4_ce_mod_init(void)
|
||||
{
|
||||
return crypto_register_alg(&sm4_ce_alg);
|
||||
}
|
||||
|
||||
static void __exit sm4_ce_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&sm4_ce_alg);
|
||||
}
|
||||
|
||||
module_cpu_feature_match(SM3, sm4_ce_mod_init);
|
||||
module_exit(sm4_ce_mod_fini);
|
@ -15,7 +15,6 @@ obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
|
||||
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
|
||||
obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
|
||||
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
|
||||
@ -24,7 +23,6 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
|
||||
obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
|
||||
@ -38,6 +36,16 @@ obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
|
||||
obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
|
||||
obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
|
||||
obj-$(CONFIG_CRYPTO_AEGIS128L_AESNI_SSE2) += aegis128l-aesni.o
|
||||
obj-$(CONFIG_CRYPTO_AEGIS256_AESNI_SSE2) += aegis256-aesni.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_MORUS640_GLUE) += morus640_glue.o
|
||||
obj-$(CONFIG_CRYPTO_MORUS1280_GLUE) += morus1280_glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_MORUS640_SSE2) += morus640-sse2.o
|
||||
obj-$(CONFIG_CRYPTO_MORUS1280_SSE2) += morus1280-sse2.o
|
||||
|
||||
# These modules require assembler to support AVX.
|
||||
ifeq ($(avx_supported),yes)
|
||||
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += \
|
||||
@ -55,11 +63,12 @@ ifeq ($(avx2_supported),yes)
|
||||
obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/
|
||||
obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/
|
||||
obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/
|
||||
|
||||
obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o
|
||||
endif
|
||||
|
||||
aes-i586-y := aes-i586-asm_32.o aes_glue.o
|
||||
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
|
||||
salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
|
||||
serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
|
||||
|
||||
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
|
||||
@ -68,10 +77,16 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
|
||||
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
|
||||
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
|
||||
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
|
||||
salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
|
||||
chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
|
||||
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
|
||||
|
||||
aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
|
||||
aegis128l-aesni-y := aegis128l-aesni-asm.o aegis128l-aesni-glue.o
|
||||
aegis256-aesni-y := aegis256-aesni-asm.o aegis256-aesni-glue.o
|
||||
|
||||
morus640-sse2-y := morus640-sse2-asm.o morus640-sse2-glue.o
|
||||
morus1280-sse2-y := morus1280-sse2-asm.o morus1280-sse2-glue.o
|
||||
|
||||
ifeq ($(avx_supported),yes)
|
||||
camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
|
||||
camellia_aesni_avx_glue.o
|
||||
@ -87,6 +102,8 @@ ifeq ($(avx2_supported),yes)
|
||||
camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
|
||||
chacha20-x86_64-y += chacha20-avx2-x86_64.o
|
||||
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
|
||||
|
||||
morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
|
||||
endif
|
||||
|
||||
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
|
||||
|
749
arch/x86/crypto/aegis128-aesni-asm.S
Normal file
749
arch/x86/crypto/aegis128-aesni-asm.S
Normal file
@ -0,0 +1,749 @@
|
||||
/*
|
||||
* AES-NI + SSE2 implementation of AEGIS-128
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
#define STATE0 %xmm0
|
||||
#define STATE1 %xmm1
|
||||
#define STATE2 %xmm2
|
||||
#define STATE3 %xmm3
|
||||
#define STATE4 %xmm4
|
||||
#define KEY %xmm5
|
||||
#define MSG %xmm5
|
||||
#define T0 %xmm6
|
||||
#define T1 %xmm7
|
||||
|
||||
#define STATEP %rdi
|
||||
#define LEN %rsi
|
||||
#define SRC %rdx
|
||||
#define DST %rcx
|
||||
|
||||
.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
|
||||
.align 16
|
||||
.Laegis128_const_0:
|
||||
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
|
||||
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
|
||||
.Laegis128_const_1:
|
||||
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
|
||||
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
|
||||
|
||||
.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Laegis128_counter:
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
||||
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
||||
|
||||
.text
|
||||
|
||||
/*
|
||||
* aegis128_update
|
||||
* input:
|
||||
* STATE[0-4] - input state
|
||||
* output:
|
||||
* STATE[0-4] - output state (shifted positions)
|
||||
* changed:
|
||||
* T0
|
||||
*/
|
||||
.macro aegis128_update
|
||||
movdqa STATE4, T0
|
||||
aesenc STATE0, STATE4
|
||||
aesenc STATE1, STATE0
|
||||
aesenc STATE2, STATE1
|
||||
aesenc STATE3, STATE2
|
||||
aesenc T0, STATE3
|
||||
.endm
|
||||
|
||||
/*
|
||||
* __load_partial: internal ABI
|
||||
* input:
|
||||
* LEN - bytes
|
||||
* SRC - src
|
||||
* output:
|
||||
* MSG - message block
|
||||
* changed:
|
||||
* T0
|
||||
* %r8
|
||||
* %r9
|
||||
*/
|
||||
__load_partial:
|
||||
xor %r9, %r9
|
||||
pxor MSG, MSG
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x1, %r8
|
||||
jz .Lld_partial_1
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x1E, %r8
|
||||
add SRC, %r8
|
||||
mov (%r8), %r9b
|
||||
|
||||
.Lld_partial_1:
|
||||
mov LEN, %r8
|
||||
and $0x2, %r8
|
||||
jz .Lld_partial_2
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x1C, %r8
|
||||
add SRC, %r8
|
||||
shl $0x10, %r9
|
||||
mov (%r8), %r9w
|
||||
|
||||
.Lld_partial_2:
|
||||
mov LEN, %r8
|
||||
and $0x4, %r8
|
||||
jz .Lld_partial_4
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x18, %r8
|
||||
add SRC, %r8
|
||||
shl $32, %r9
|
||||
mov (%r8), %r8d
|
||||
xor %r8, %r9
|
||||
|
||||
.Lld_partial_4:
|
||||
movq %r9, MSG
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x8, %r8
|
||||
jz .Lld_partial_8
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x10, %r8
|
||||
add SRC, %r8
|
||||
pslldq $8, MSG
|
||||
movq (%r8), T0
|
||||
pxor T0, MSG
|
||||
|
||||
.Lld_partial_8:
|
||||
ret
|
||||
ENDPROC(__load_partial)
|
||||
|
||||
/*
|
||||
* __store_partial: internal ABI
|
||||
* input:
|
||||
* LEN - bytes
|
||||
* DST - dst
|
||||
* output:
|
||||
* T0 - message block
|
||||
* changed:
|
||||
* %r8
|
||||
* %r9
|
||||
* %r10
|
||||
*/
|
||||
__store_partial:
|
||||
mov LEN, %r8
|
||||
mov DST, %r9
|
||||
|
||||
movq T0, %r10
|
||||
|
||||
cmp $8, %r8
|
||||
jl .Lst_partial_8
|
||||
|
||||
mov %r10, (%r9)
|
||||
psrldq $8, T0
|
||||
movq T0, %r10
|
||||
|
||||
sub $8, %r8
|
||||
add $8, %r9
|
||||
|
||||
.Lst_partial_8:
|
||||
cmp $4, %r8
|
||||
jl .Lst_partial_4
|
||||
|
||||
mov %r10d, (%r9)
|
||||
shr $32, %r10
|
||||
|
||||
sub $4, %r8
|
||||
add $4, %r9
|
||||
|
||||
.Lst_partial_4:
|
||||
cmp $2, %r8
|
||||
jl .Lst_partial_2
|
||||
|
||||
mov %r10w, (%r9)
|
||||
shr $0x10, %r10
|
||||
|
||||
sub $2, %r8
|
||||
add $2, %r9
|
||||
|
||||
.Lst_partial_2:
|
||||
cmp $1, %r8
|
||||
jl .Lst_partial_1
|
||||
|
||||
mov %r10b, (%r9)
|
||||
|
||||
.Lst_partial_1:
|
||||
ret
|
||||
ENDPROC(__store_partial)
|
||||
|
||||
/*
|
||||
* void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
|
||||
*/
|
||||
ENTRY(crypto_aegis128_aesni_init)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load IV: */
|
||||
movdqu (%rdx), T1
|
||||
|
||||
/* load key: */
|
||||
movdqa (%rsi), KEY
|
||||
pxor KEY, T1
|
||||
movdqa T1, STATE0
|
||||
movdqa KEY, STATE3
|
||||
movdqa KEY, STATE4
|
||||
|
||||
/* load the constants: */
|
||||
movdqa .Laegis128_const_0, STATE2
|
||||
movdqa .Laegis128_const_1, STATE1
|
||||
pxor STATE2, STATE3
|
||||
pxor STATE1, STATE4
|
||||
|
||||
/* update 10 times with KEY / KEY xor IV: */
|
||||
aegis128_update; pxor KEY, STATE4
|
||||
aegis128_update; pxor T1, STATE3
|
||||
aegis128_update; pxor KEY, STATE2
|
||||
aegis128_update; pxor T1, STATE1
|
||||
aegis128_update; pxor KEY, STATE0
|
||||
aegis128_update; pxor T1, STATE4
|
||||
aegis128_update; pxor KEY, STATE3
|
||||
aegis128_update; pxor T1, STATE2
|
||||
aegis128_update; pxor KEY, STATE1
|
||||
aegis128_update; pxor T1, STATE0
|
||||
|
||||
/* store the state: */
|
||||
movdqu STATE0, 0x00(STATEP)
|
||||
movdqu STATE1, 0x10(STATEP)
|
||||
movdqu STATE2, 0x20(STATEP)
|
||||
movdqu STATE3, 0x30(STATEP)
|
||||
movdqu STATE4, 0x40(STATEP)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128_aesni_init)
|
||||
|
||||
/*
|
||||
* void crypto_aegis128_aesni_ad(void *state, unsigned int length,
|
||||
* const void *data);
|
||||
*/
|
||||
ENTRY(crypto_aegis128_aesni_ad)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $0x10, LEN
|
||||
jb .Lad_out
|
||||
|
||||
/* load the state: */
|
||||
movdqu 0x00(STATEP), STATE0
|
||||
movdqu 0x10(STATEP), STATE1
|
||||
movdqu 0x20(STATEP), STATE2
|
||||
movdqu 0x30(STATEP), STATE3
|
||||
movdqu 0x40(STATEP), STATE4
|
||||
|
||||
mov SRC, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Lad_u_loop
|
||||
|
||||
.align 8
|
||||
.Lad_a_loop:
|
||||
movdqa 0x00(SRC), MSG
|
||||
aegis128_update
|
||||
pxor MSG, STATE4
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Lad_out_1
|
||||
|
||||
movdqa 0x10(SRC), MSG
|
||||
aegis128_update
|
||||
pxor MSG, STATE3
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Lad_out_2
|
||||
|
||||
movdqa 0x20(SRC), MSG
|
||||
aegis128_update
|
||||
pxor MSG, STATE2
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Lad_out_3
|
||||
|
||||
movdqa 0x30(SRC), MSG
|
||||
aegis128_update
|
||||
pxor MSG, STATE1
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Lad_out_4
|
||||
|
||||
movdqa 0x40(SRC), MSG
|
||||
aegis128_update
|
||||
pxor MSG, STATE0
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Lad_out_0
|
||||
|
||||
add $0x50, SRC
|
||||
jmp .Lad_a_loop
|
||||
|
||||
.align 8
|
||||
.Lad_u_loop:
|
||||
movdqu 0x00(SRC), MSG
|
||||
aegis128_update
|
||||
pxor MSG, STATE4
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Lad_out_1
|
||||
|
||||
movdqu 0x10(SRC), MSG
|
||||
aegis128_update
|
||||
pxor MSG, STATE3
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Lad_out_2
|
||||
|
||||
movdqu 0x20(SRC), MSG
|
||||
aegis128_update
|
||||
pxor MSG, STATE2
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Lad_out_3
|
||||
|
||||
movdqu 0x30(SRC), MSG
|
||||
aegis128_update
|
||||
pxor MSG, STATE1
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Lad_out_4
|
||||
|
||||
movdqu 0x40(SRC), MSG
|
||||
aegis128_update
|
||||
pxor MSG, STATE0
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Lad_out_0
|
||||
|
||||
add $0x50, SRC
|
||||
jmp .Lad_u_loop
|
||||
|
||||
/* store the state: */
|
||||
.Lad_out_0:
|
||||
movdqu STATE0, 0x00(STATEP)
|
||||
movdqu STATE1, 0x10(STATEP)
|
||||
movdqu STATE2, 0x20(STATEP)
|
||||
movdqu STATE3, 0x30(STATEP)
|
||||
movdqu STATE4, 0x40(STATEP)
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_1:
|
||||
movdqu STATE4, 0x00(STATEP)
|
||||
movdqu STATE0, 0x10(STATEP)
|
||||
movdqu STATE1, 0x20(STATEP)
|
||||
movdqu STATE2, 0x30(STATEP)
|
||||
movdqu STATE3, 0x40(STATEP)
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_2:
|
||||
movdqu STATE3, 0x00(STATEP)
|
||||
movdqu STATE4, 0x10(STATEP)
|
||||
movdqu STATE0, 0x20(STATEP)
|
||||
movdqu STATE1, 0x30(STATEP)
|
||||
movdqu STATE2, 0x40(STATEP)
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_3:
|
||||
movdqu STATE2, 0x00(STATEP)
|
||||
movdqu STATE3, 0x10(STATEP)
|
||||
movdqu STATE4, 0x20(STATEP)
|
||||
movdqu STATE0, 0x30(STATEP)
|
||||
movdqu STATE1, 0x40(STATEP)
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_4:
|
||||
movdqu STATE1, 0x00(STATEP)
|
||||
movdqu STATE2, 0x10(STATEP)
|
||||
movdqu STATE3, 0x20(STATEP)
|
||||
movdqu STATE4, 0x30(STATEP)
|
||||
movdqu STATE0, 0x40(STATEP)
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128_aesni_ad)
|
||||
|
||||
.macro encrypt_block a s0 s1 s2 s3 s4 i
|
||||
movdq\a (\i * 0x10)(SRC), MSG
|
||||
movdqa MSG, T0
|
||||
pxor \s1, T0
|
||||
pxor \s4, T0
|
||||
movdqa \s2, T1
|
||||
pand \s3, T1
|
||||
pxor T1, T0
|
||||
movdq\a T0, (\i * 0x10)(DST)
|
||||
|
||||
aegis128_update
|
||||
pxor MSG, \s4
|
||||
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Lenc_out_\i
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void crypto_aegis128_aesni_enc(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis128_aesni_enc)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $0x10, LEN
|
||||
jb .Lenc_out
|
||||
|
||||
/* load the state: */
|
||||
movdqu 0x00(STATEP), STATE0
|
||||
movdqu 0x10(STATEP), STATE1
|
||||
movdqu 0x20(STATEP), STATE2
|
||||
movdqu 0x30(STATEP), STATE3
|
||||
movdqu 0x40(STATEP), STATE4
|
||||
|
||||
mov SRC, %r8
|
||||
or DST, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Lenc_u_loop
|
||||
|
||||
.align 8
|
||||
.Lenc_a_loop:
|
||||
encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
|
||||
encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
|
||||
encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
|
||||
encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
|
||||
encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
|
||||
|
||||
add $0x50, SRC
|
||||
add $0x50, DST
|
||||
jmp .Lenc_a_loop
|
||||
|
||||
.align 8
|
||||
.Lenc_u_loop:
|
||||
encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
|
||||
encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
|
||||
encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
|
||||
encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
|
||||
encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
|
||||
|
||||
add $0x50, SRC
|
||||
add $0x50, DST
|
||||
jmp .Lenc_u_loop
|
||||
|
||||
/* store the state: */
|
||||
.Lenc_out_0:
|
||||
movdqu STATE4, 0x00(STATEP)
|
||||
movdqu STATE0, 0x10(STATEP)
|
||||
movdqu STATE1, 0x20(STATEP)
|
||||
movdqu STATE2, 0x30(STATEP)
|
||||
movdqu STATE3, 0x40(STATEP)
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_1:
|
||||
movdqu STATE3, 0x00(STATEP)
|
||||
movdqu STATE4, 0x10(STATEP)
|
||||
movdqu STATE0, 0x20(STATEP)
|
||||
movdqu STATE1, 0x30(STATEP)
|
||||
movdqu STATE2, 0x40(STATEP)
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_2:
|
||||
movdqu STATE2, 0x00(STATEP)
|
||||
movdqu STATE3, 0x10(STATEP)
|
||||
movdqu STATE4, 0x20(STATEP)
|
||||
movdqu STATE0, 0x30(STATEP)
|
||||
movdqu STATE1, 0x40(STATEP)
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_3:
|
||||
movdqu STATE1, 0x00(STATEP)
|
||||
movdqu STATE2, 0x10(STATEP)
|
||||
movdqu STATE3, 0x20(STATEP)
|
||||
movdqu STATE4, 0x30(STATEP)
|
||||
movdqu STATE0, 0x40(STATEP)
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_4:
|
||||
movdqu STATE0, 0x00(STATEP)
|
||||
movdqu STATE1, 0x10(STATEP)
|
||||
movdqu STATE2, 0x20(STATEP)
|
||||
movdqu STATE3, 0x30(STATEP)
|
||||
movdqu STATE4, 0x40(STATEP)
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128_aesni_enc)
|
||||
|
||||
/*
|
||||
* void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis128_aesni_enc_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
movdqu 0x00(STATEP), STATE0
|
||||
movdqu 0x10(STATEP), STATE1
|
||||
movdqu 0x20(STATEP), STATE2
|
||||
movdqu 0x30(STATEP), STATE3
|
||||
movdqu 0x40(STATEP), STATE4
|
||||
|
||||
/* encrypt message: */
|
||||
call __load_partial
|
||||
|
||||
movdqa MSG, T0
|
||||
pxor STATE1, T0
|
||||
pxor STATE4, T0
|
||||
movdqa STATE2, T1
|
||||
pand STATE3, T1
|
||||
pxor T1, T0
|
||||
|
||||
call __store_partial
|
||||
|
||||
aegis128_update
|
||||
pxor MSG, STATE4
|
||||
|
||||
/* store the state: */
|
||||
movdqu STATE4, 0x00(STATEP)
|
||||
movdqu STATE0, 0x10(STATEP)
|
||||
movdqu STATE1, 0x20(STATEP)
|
||||
movdqu STATE2, 0x30(STATEP)
|
||||
movdqu STATE3, 0x40(STATEP)
|
||||
|
||||
FRAME_END
|
||||
ENDPROC(crypto_aegis128_aesni_enc_tail)
|
||||
|
||||
.macro decrypt_block a s0 s1 s2 s3 s4 i
|
||||
movdq\a (\i * 0x10)(SRC), MSG
|
||||
pxor \s1, MSG
|
||||
pxor \s4, MSG
|
||||
movdqa \s2, T1
|
||||
pand \s3, T1
|
||||
pxor T1, MSG
|
||||
movdq\a MSG, (\i * 0x10)(DST)
|
||||
|
||||
aegis128_update
|
||||
pxor MSG, \s4
|
||||
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Ldec_out_\i
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void crypto_aegis128_aesni_dec(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis128_aesni_dec)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $0x10, LEN
|
||||
jb .Ldec_out
|
||||
|
||||
/* load the state: */
|
||||
movdqu 0x00(STATEP), STATE0
|
||||
movdqu 0x10(STATEP), STATE1
|
||||
movdqu 0x20(STATEP), STATE2
|
||||
movdqu 0x30(STATEP), STATE3
|
||||
movdqu 0x40(STATEP), STATE4
|
||||
|
||||
mov SRC, %r8
|
||||
or DST, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Ldec_u_loop
|
||||
|
||||
.align 8
|
||||
.Ldec_a_loop:
|
||||
decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
|
||||
decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
|
||||
decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
|
||||
decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
|
||||
decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
|
||||
|
||||
add $0x50, SRC
|
||||
add $0x50, DST
|
||||
jmp .Ldec_a_loop
|
||||
|
||||
.align 8
|
||||
.Ldec_u_loop:
|
||||
decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
|
||||
decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
|
||||
decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
|
||||
decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
|
||||
decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
|
||||
|
||||
add $0x50, SRC
|
||||
add $0x50, DST
|
||||
jmp .Ldec_u_loop
|
||||
|
||||
/* store the state: */
|
||||
.Ldec_out_0:
|
||||
movdqu STATE4, 0x00(STATEP)
|
||||
movdqu STATE0, 0x10(STATEP)
|
||||
movdqu STATE1, 0x20(STATEP)
|
||||
movdqu STATE2, 0x30(STATEP)
|
||||
movdqu STATE3, 0x40(STATEP)
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_1:
|
||||
movdqu STATE3, 0x00(STATEP)
|
||||
movdqu STATE4, 0x10(STATEP)
|
||||
movdqu STATE0, 0x20(STATEP)
|
||||
movdqu STATE1, 0x30(STATEP)
|
||||
movdqu STATE2, 0x40(STATEP)
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_2:
|
||||
movdqu STATE2, 0x00(STATEP)
|
||||
movdqu STATE3, 0x10(STATEP)
|
||||
movdqu STATE4, 0x20(STATEP)
|
||||
movdqu STATE0, 0x30(STATEP)
|
||||
movdqu STATE1, 0x40(STATEP)
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_3:
|
||||
movdqu STATE1, 0x00(STATEP)
|
||||
movdqu STATE2, 0x10(STATEP)
|
||||
movdqu STATE3, 0x20(STATEP)
|
||||
movdqu STATE4, 0x30(STATEP)
|
||||
movdqu STATE0, 0x40(STATEP)
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_4:
|
||||
movdqu STATE0, 0x00(STATEP)
|
||||
movdqu STATE1, 0x10(STATEP)
|
||||
movdqu STATE2, 0x20(STATEP)
|
||||
movdqu STATE3, 0x30(STATEP)
|
||||
movdqu STATE4, 0x40(STATEP)
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128_aesni_dec)
|
||||
|
||||
/*
|
||||
* void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis128_aesni_dec_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
movdqu 0x00(STATEP), STATE0
|
||||
movdqu 0x10(STATEP), STATE1
|
||||
movdqu 0x20(STATEP), STATE2
|
||||
movdqu 0x30(STATEP), STATE3
|
||||
movdqu 0x40(STATEP), STATE4
|
||||
|
||||
/* decrypt message: */
|
||||
call __load_partial
|
||||
|
||||
pxor STATE1, MSG
|
||||
pxor STATE4, MSG
|
||||
movdqa STATE2, T1
|
||||
pand STATE3, T1
|
||||
pxor T1, MSG
|
||||
|
||||
movdqa MSG, T0
|
||||
call __store_partial
|
||||
|
||||
/* mask with byte count: */
|
||||
movq LEN, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
movdqa .Laegis128_counter, T1
|
||||
pcmpgtb T1, T0
|
||||
pand T0, MSG
|
||||
|
||||
aegis128_update
|
||||
pxor MSG, STATE4
|
||||
|
||||
/* store the state: */
|
||||
movdqu STATE4, 0x00(STATEP)
|
||||
movdqu STATE0, 0x10(STATEP)
|
||||
movdqu STATE1, 0x20(STATEP)
|
||||
movdqu STATE2, 0x30(STATEP)
|
||||
movdqu STATE3, 0x40(STATEP)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128_aesni_dec_tail)
|
||||
|
||||
/*
|
||||
* void crypto_aegis128_aesni_final(void *state, void *tag_xor,
|
||||
* u64 assoclen, u64 cryptlen);
|
||||
*/
|
||||
ENTRY(crypto_aegis128_aesni_final)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
movdqu 0x00(STATEP), STATE0
|
||||
movdqu 0x10(STATEP), STATE1
|
||||
movdqu 0x20(STATEP), STATE2
|
||||
movdqu 0x30(STATEP), STATE3
|
||||
movdqu 0x40(STATEP), STATE4
|
||||
|
||||
/* prepare length block: */
|
||||
movq %rdx, MSG
|
||||
movq %rcx, T0
|
||||
pslldq $8, T0
|
||||
pxor T0, MSG
|
||||
psllq $3, MSG /* multiply by 8 (to get bit count) */
|
||||
|
||||
pxor STATE3, MSG
|
||||
|
||||
/* update state: */
|
||||
aegis128_update; pxor MSG, STATE4
|
||||
aegis128_update; pxor MSG, STATE3
|
||||
aegis128_update; pxor MSG, STATE2
|
||||
aegis128_update; pxor MSG, STATE1
|
||||
aegis128_update; pxor MSG, STATE0
|
||||
aegis128_update; pxor MSG, STATE4
|
||||
aegis128_update; pxor MSG, STATE3
|
||||
|
||||
/* xor tag: */
|
||||
movdqu (%rsi), MSG
|
||||
|
||||
pxor STATE0, MSG
|
||||
pxor STATE1, MSG
|
||||
pxor STATE2, MSG
|
||||
pxor STATE3, MSG
|
||||
pxor STATE4, MSG
|
||||
|
||||
movdqu MSG, (%rsi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128_aesni_final)
|
407
arch/x86/crypto/aegis128-aesni-glue.c
Normal file
407
arch/x86/crypto/aegis128-aesni-glue.c
Normal file
@ -0,0 +1,407 @@
|
||||
/*
|
||||
* The AEGIS-128 Authenticated-Encryption Algorithm
|
||||
* Glue for AES-NI + SSE2 implementation
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/cryptd.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
#define AEGIS128_BLOCK_ALIGN 16
|
||||
#define AEGIS128_BLOCK_SIZE 16
|
||||
#define AEGIS128_NONCE_SIZE 16
|
||||
#define AEGIS128_STATE_BLOCKS 5
|
||||
#define AEGIS128_KEY_SIZE 16
|
||||
#define AEGIS128_MIN_AUTH_SIZE 8
|
||||
#define AEGIS128_MAX_AUTH_SIZE 16
|
||||
|
||||
asmlinkage void crypto_aegis128_aesni_init(void *state, void *key, void *iv);
|
||||
|
||||
asmlinkage void crypto_aegis128_aesni_ad(
|
||||
void *state, unsigned int length, const void *data);
|
||||
|
||||
asmlinkage void crypto_aegis128_aesni_enc(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis128_aesni_dec(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis128_aesni_enc_tail(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis128_aesni_dec_tail(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis128_aesni_final(
|
||||
void *state, void *tag_xor, unsigned int cryptlen,
|
||||
unsigned int assoclen);
|
||||
|
||||
struct aegis_block {
|
||||
u8 bytes[AEGIS128_BLOCK_SIZE] __aligned(AEGIS128_BLOCK_ALIGN);
|
||||
};
|
||||
|
||||
struct aegis_state {
|
||||
struct aegis_block blocks[AEGIS128_STATE_BLOCKS];
|
||||
};
|
||||
|
||||
struct aegis_ctx {
|
||||
struct aegis_block key;
|
||||
};
|
||||
|
||||
struct aegis_crypt_ops {
|
||||
int (*skcipher_walk_init)(struct skcipher_walk *walk,
|
||||
struct aead_request *req, bool atomic);
|
||||
|
||||
void (*crypt_blocks)(void *state, unsigned int length, const void *src,
|
||||
void *dst);
|
||||
void (*crypt_tail)(void *state, unsigned int length, const void *src,
|
||||
void *dst);
|
||||
};
|
||||
|
||||
static void crypto_aegis128_aesni_process_ad(
|
||||
struct aegis_state *state, struct scatterlist *sg_src,
|
||||
unsigned int assoclen)
|
||||
{
|
||||
struct scatter_walk walk;
|
||||
struct aegis_block buf;
|
||||
unsigned int pos = 0;
|
||||
|
||||
scatterwalk_start(&walk, sg_src);
|
||||
while (assoclen != 0) {
|
||||
unsigned int size = scatterwalk_clamp(&walk, assoclen);
|
||||
unsigned int left = size;
|
||||
void *mapped = scatterwalk_map(&walk);
|
||||
const u8 *src = (const u8 *)mapped;
|
||||
|
||||
if (pos + size >= AEGIS128_BLOCK_SIZE) {
|
||||
if (pos > 0) {
|
||||
unsigned int fill = AEGIS128_BLOCK_SIZE - pos;
|
||||
memcpy(buf.bytes + pos, src, fill);
|
||||
crypto_aegis128_aesni_ad(state,
|
||||
AEGIS128_BLOCK_SIZE,
|
||||
buf.bytes);
|
||||
pos = 0;
|
||||
left -= fill;
|
||||
src += fill;
|
||||
}
|
||||
|
||||
crypto_aegis128_aesni_ad(state, left, src);
|
||||
|
||||
src += left & ~(AEGIS128_BLOCK_SIZE - 1);
|
||||
left &= AEGIS128_BLOCK_SIZE - 1;
|
||||
}
|
||||
|
||||
memcpy(buf.bytes + pos, src, left);
|
||||
pos += left;
|
||||
assoclen -= size;
|
||||
|
||||
scatterwalk_unmap(mapped);
|
||||
scatterwalk_advance(&walk, size);
|
||||
scatterwalk_done(&walk, 0, assoclen);
|
||||
}
|
||||
|
||||
if (pos > 0) {
|
||||
memset(buf.bytes + pos, 0, AEGIS128_BLOCK_SIZE - pos);
|
||||
crypto_aegis128_aesni_ad(state, AEGIS128_BLOCK_SIZE, buf.bytes);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis128_aesni_process_crypt(
|
||||
struct aegis_state *state, struct aead_request *req,
|
||||
const struct aegis_crypt_ops *ops)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
u8 *src, *dst;
|
||||
unsigned int chunksize, base;
|
||||
|
||||
ops->skcipher_walk_init(&walk, req, false);
|
||||
|
||||
while (walk.nbytes) {
|
||||
src = walk.src.virt.addr;
|
||||
dst = walk.dst.virt.addr;
|
||||
chunksize = walk.nbytes;
|
||||
|
||||
ops->crypt_blocks(state, chunksize, src, dst);
|
||||
|
||||
base = chunksize & ~(AEGIS128_BLOCK_SIZE - 1);
|
||||
src += base;
|
||||
dst += base;
|
||||
chunksize &= AEGIS128_BLOCK_SIZE - 1;
|
||||
|
||||
if (chunksize > 0)
|
||||
ops->crypt_tail(state, chunksize, src, dst);
|
||||
|
||||
skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static struct aegis_ctx *crypto_aegis128_aesni_ctx(struct crypto_aead *aead)
|
||||
{
|
||||
u8 *ctx = crypto_aead_ctx(aead);
|
||||
ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
|
||||
return (void *)ctx;
|
||||
}
|
||||
|
||||
static int crypto_aegis128_aesni_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(aead);
|
||||
|
||||
if (keylen != AEGIS128_KEY_SIZE) {
|
||||
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memcpy(ctx->key.bytes, key, AEGIS128_KEY_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis128_aesni_setauthsize(struct crypto_aead *tfm,
|
||||
unsigned int authsize)
|
||||
{
|
||||
if (authsize > AEGIS128_MAX_AUTH_SIZE)
|
||||
return -EINVAL;
|
||||
if (authsize < AEGIS128_MIN_AUTH_SIZE)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_aegis128_aesni_crypt(struct aead_request *req,
|
||||
struct aegis_block *tag_xor,
|
||||
unsigned int cryptlen,
|
||||
const struct aegis_crypt_ops *ops)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(tfm);
|
||||
struct aegis_state state;
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv);
|
||||
crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen);
|
||||
crypto_aegis128_aesni_process_crypt(&state, req, ops);
|
||||
crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
|
||||
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static int crypto_aegis128_aesni_encrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct aegis_crypt_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_encrypt,
|
||||
.crypt_blocks = crypto_aegis128_aesni_enc,
|
||||
.crypt_tail = crypto_aegis128_aesni_enc_tail,
|
||||
};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_block tag = {};
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen;
|
||||
|
||||
crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS);
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->dst,
|
||||
req->assoclen + cryptlen, authsize, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis128_aesni_decrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct aegis_block zeros = {};
|
||||
|
||||
static const struct aegis_crypt_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_decrypt,
|
||||
.crypt_blocks = crypto_aegis128_aesni_dec,
|
||||
.crypt_tail = crypto_aegis128_aesni_dec_tail,
|
||||
};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_block tag;
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen - authsize;
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->src,
|
||||
req->assoclen + cryptlen, authsize, 0);
|
||||
|
||||
crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS);
|
||||
|
||||
return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis128_aesni_init_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_aegis128_aesni_exit_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
}
|
||||
|
||||
static int cryptd_aegis128_aesni_setkey(struct crypto_aead *aead,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
|
||||
}
|
||||
|
||||
static int cryptd_aegis128_aesni_setauthsize(struct crypto_aead *aead,
|
||||
unsigned int authsize)
|
||||
{
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
|
||||
}
|
||||
|
||||
static int cryptd_aegis128_aesni_encrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
aead = &cryptd_tfm->base;
|
||||
if (irq_fpu_usable() && (!in_atomic() ||
|
||||
!cryptd_aead_queued(cryptd_tfm)))
|
||||
aead = cryptd_aead_child(cryptd_tfm);
|
||||
|
||||
aead_request_set_tfm(req, aead);
|
||||
|
||||
return crypto_aead_encrypt(req);
|
||||
}
|
||||
|
||||
static int cryptd_aegis128_aesni_decrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
aead = &cryptd_tfm->base;
|
||||
if (irq_fpu_usable() && (!in_atomic() ||
|
||||
!cryptd_aead_queued(cryptd_tfm)))
|
||||
aead = cryptd_aead_child(cryptd_tfm);
|
||||
|
||||
aead_request_set_tfm(req, aead);
|
||||
|
||||
return crypto_aead_decrypt(req);
|
||||
}
|
||||
|
||||
static int cryptd_aegis128_aesni_init_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
struct cryptd_aead *cryptd_tfm;
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
|
||||
cryptd_tfm = cryptd_alloc_aead("__aegis128-aesni", CRYPTO_ALG_INTERNAL,
|
||||
CRYPTO_ALG_INTERNAL);
|
||||
if (IS_ERR(cryptd_tfm))
|
||||
return PTR_ERR(cryptd_tfm);
|
||||
|
||||
*ctx = cryptd_tfm;
|
||||
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cryptd_aegis128_aesni_exit_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
|
||||
cryptd_free_aead(*ctx);
|
||||
}
|
||||
|
||||
static struct aead_alg crypto_aegis128_aesni_alg[] = {
|
||||
{
|
||||
.setkey = crypto_aegis128_aesni_setkey,
|
||||
.setauthsize = crypto_aegis128_aesni_setauthsize,
|
||||
.encrypt = crypto_aegis128_aesni_encrypt,
|
||||
.decrypt = crypto_aegis128_aesni_decrypt,
|
||||
.init = crypto_aegis128_aesni_init_tfm,
|
||||
.exit = crypto_aegis128_aesni_exit_tfm,
|
||||
|
||||
.ivsize = AEGIS128_NONCE_SIZE,
|
||||
.maxauthsize = AEGIS128_MAX_AUTH_SIZE,
|
||||
.chunksize = AEGIS128_BLOCK_SIZE,
|
||||
|
||||
.base = {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct aegis_ctx) +
|
||||
__alignof__(struct aegis_ctx),
|
||||
.cra_alignmask = 0,
|
||||
|
||||
.cra_name = "__aegis128",
|
||||
.cra_driver_name = "__aegis128-aesni",
|
||||
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
}, {
|
||||
.setkey = cryptd_aegis128_aesni_setkey,
|
||||
.setauthsize = cryptd_aegis128_aesni_setauthsize,
|
||||
.encrypt = cryptd_aegis128_aesni_encrypt,
|
||||
.decrypt = cryptd_aegis128_aesni_decrypt,
|
||||
.init = cryptd_aegis128_aesni_init_tfm,
|
||||
.exit = cryptd_aegis128_aesni_exit_tfm,
|
||||
|
||||
.ivsize = AEGIS128_NONCE_SIZE,
|
||||
.maxauthsize = AEGIS128_MAX_AUTH_SIZE,
|
||||
.chunksize = AEGIS128_BLOCK_SIZE,
|
||||
|
||||
.base = {
|
||||
.cra_flags = CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct cryptd_aead *),
|
||||
.cra_alignmask = 0,
|
||||
|
||||
.cra_priority = 400,
|
||||
|
||||
.cra_name = "aegis128",
|
||||
.cra_driver_name = "aegis128-aesni",
|
||||
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id aesni_cpu_id[] = {
|
||||
X86_FEATURE_MATCH(X86_FEATURE_AES),
|
||||
X86_FEATURE_MATCH(X86_FEATURE_XMM2),
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
|
||||
|
||||
static int __init crypto_aegis128_aesni_module_init(void)
|
||||
{
|
||||
if (!x86_match_cpu(aesni_cpu_id))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_aeads(crypto_aegis128_aesni_alg,
|
||||
ARRAY_SIZE(crypto_aegis128_aesni_alg));
|
||||
}
|
||||
|
||||
static void __exit crypto_aegis128_aesni_module_exit(void)
|
||||
{
|
||||
crypto_unregister_aeads(crypto_aegis128_aesni_alg,
|
||||
ARRAY_SIZE(crypto_aegis128_aesni_alg));
|
||||
}
|
||||
|
||||
module_init(crypto_aegis128_aesni_module_init);
|
||||
module_exit(crypto_aegis128_aesni_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm -- AESNI+SSE2 implementation");
|
||||
MODULE_ALIAS_CRYPTO("aegis128");
|
||||
MODULE_ALIAS_CRYPTO("aegis128-aesni");
|
825
arch/x86/crypto/aegis128l-aesni-asm.S
Normal file
825
arch/x86/crypto/aegis128l-aesni-asm.S
Normal file
@ -0,0 +1,825 @@
|
||||
/*
|
||||
* AES-NI + SSE2 implementation of AEGIS-128L
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
#define STATE0 %xmm0
|
||||
#define STATE1 %xmm1
|
||||
#define STATE2 %xmm2
|
||||
#define STATE3 %xmm3
|
||||
#define STATE4 %xmm4
|
||||
#define STATE5 %xmm5
|
||||
#define STATE6 %xmm6
|
||||
#define STATE7 %xmm7
|
||||
#define MSG0 %xmm8
|
||||
#define MSG1 %xmm9
|
||||
#define T0 %xmm10
|
||||
#define T1 %xmm11
|
||||
#define T2 %xmm12
|
||||
#define T3 %xmm13
|
||||
|
||||
#define STATEP %rdi
|
||||
#define LEN %rsi
|
||||
#define SRC %rdx
|
||||
#define DST %rcx
|
||||
|
||||
.section .rodata.cst16.aegis128l_const, "aM", @progbits, 32
|
||||
.align 16
|
||||
.Laegis128l_const_0:
|
||||
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
|
||||
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
|
||||
.Laegis128l_const_1:
|
||||
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
|
||||
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
|
||||
|
||||
.section .rodata.cst16.aegis128l_counter, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Laegis128l_counter0:
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
||||
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
||||
.Laegis128l_counter1:
|
||||
.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
|
||||
.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
|
||||
|
||||
.text
|
||||
|
||||
/*
|
||||
* __load_partial: internal ABI
|
||||
* input:
|
||||
* LEN - bytes
|
||||
* SRC - src
|
||||
* output:
|
||||
* MSG0 - first message block
|
||||
* MSG1 - second message block
|
||||
* changed:
|
||||
* T0
|
||||
* %r8
|
||||
* %r9
|
||||
*/
|
||||
__load_partial:
|
||||
xor %r9, %r9
|
||||
pxor MSG0, MSG0
|
||||
pxor MSG1, MSG1
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x1, %r8
|
||||
jz .Lld_partial_1
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x1E, %r8
|
||||
add SRC, %r8
|
||||
mov (%r8), %r9b
|
||||
|
||||
.Lld_partial_1:
|
||||
mov LEN, %r8
|
||||
and $0x2, %r8
|
||||
jz .Lld_partial_2
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x1C, %r8
|
||||
add SRC, %r8
|
||||
shl $0x10, %r9
|
||||
mov (%r8), %r9w
|
||||
|
||||
.Lld_partial_2:
|
||||
mov LEN, %r8
|
||||
and $0x4, %r8
|
||||
jz .Lld_partial_4
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x18, %r8
|
||||
add SRC, %r8
|
||||
shl $32, %r9
|
||||
mov (%r8), %r8d
|
||||
xor %r8, %r9
|
||||
|
||||
.Lld_partial_4:
|
||||
movq %r9, MSG0
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x8, %r8
|
||||
jz .Lld_partial_8
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x10, %r8
|
||||
add SRC, %r8
|
||||
pslldq $8, MSG0
|
||||
movq (%r8), T0
|
||||
pxor T0, MSG0
|
||||
|
||||
.Lld_partial_8:
|
||||
mov LEN, %r8
|
||||
and $0x10, %r8
|
||||
jz .Lld_partial_16
|
||||
|
||||
movdqa MSG0, MSG1
|
||||
movdqu (SRC), MSG0
|
||||
|
||||
.Lld_partial_16:
|
||||
ret
|
||||
ENDPROC(__load_partial)
|
||||
|
||||
/*
|
||||
* __store_partial: internal ABI
|
||||
* input:
|
||||
* LEN - bytes
|
||||
* DST - dst
|
||||
* output:
|
||||
* T0 - first message block
|
||||
* T1 - second message block
|
||||
* changed:
|
||||
* %r8
|
||||
* %r9
|
||||
* %r10
|
||||
*/
|
||||
__store_partial:
|
||||
mov LEN, %r8
|
||||
mov DST, %r9
|
||||
|
||||
cmp $16, %r8
|
||||
jl .Lst_partial_16
|
||||
|
||||
movdqu T0, (%r9)
|
||||
movdqa T1, T0
|
||||
|
||||
sub $16, %r8
|
||||
add $16, %r9
|
||||
|
||||
.Lst_partial_16:
|
||||
movq T0, %r10
|
||||
|
||||
cmp $8, %r8
|
||||
jl .Lst_partial_8
|
||||
|
||||
mov %r10, (%r9)
|
||||
psrldq $8, T0
|
||||
movq T0, %r10
|
||||
|
||||
sub $8, %r8
|
||||
add $8, %r9
|
||||
|
||||
.Lst_partial_8:
|
||||
cmp $4, %r8
|
||||
jl .Lst_partial_4
|
||||
|
||||
mov %r10d, (%r9)
|
||||
shr $32, %r10
|
||||
|
||||
sub $4, %r8
|
||||
add $4, %r9
|
||||
|
||||
.Lst_partial_4:
|
||||
cmp $2, %r8
|
||||
jl .Lst_partial_2
|
||||
|
||||
mov %r10w, (%r9)
|
||||
shr $0x10, %r10
|
||||
|
||||
sub $2, %r8
|
||||
add $2, %r9
|
||||
|
||||
.Lst_partial_2:
|
||||
cmp $1, %r8
|
||||
jl .Lst_partial_1
|
||||
|
||||
mov %r10b, (%r9)
|
||||
|
||||
.Lst_partial_1:
|
||||
ret
|
||||
ENDPROC(__store_partial)
|
||||
|
||||
.macro update
|
||||
movdqa STATE7, T0
|
||||
aesenc STATE0, STATE7
|
||||
aesenc STATE1, STATE0
|
||||
aesenc STATE2, STATE1
|
||||
aesenc STATE3, STATE2
|
||||
aesenc STATE4, STATE3
|
||||
aesenc STATE5, STATE4
|
||||
aesenc STATE6, STATE5
|
||||
aesenc T0, STATE6
|
||||
.endm
|
||||
|
||||
.macro update0
|
||||
update
|
||||
pxor MSG0, STATE7
|
||||
pxor MSG1, STATE3
|
||||
.endm
|
||||
|
||||
.macro update1
|
||||
update
|
||||
pxor MSG0, STATE6
|
||||
pxor MSG1, STATE2
|
||||
.endm
|
||||
|
||||
.macro update2
|
||||
update
|
||||
pxor MSG0, STATE5
|
||||
pxor MSG1, STATE1
|
||||
.endm
|
||||
|
||||
.macro update3
|
||||
update
|
||||
pxor MSG0, STATE4
|
||||
pxor MSG1, STATE0
|
||||
.endm
|
||||
|
||||
.macro update4
|
||||
update
|
||||
pxor MSG0, STATE3
|
||||
pxor MSG1, STATE7
|
||||
.endm
|
||||
|
||||
.macro update5
|
||||
update
|
||||
pxor MSG0, STATE2
|
||||
pxor MSG1, STATE6
|
||||
.endm
|
||||
|
||||
.macro update6
|
||||
update
|
||||
pxor MSG0, STATE1
|
||||
pxor MSG1, STATE5
|
||||
.endm
|
||||
|
||||
.macro update7
|
||||
update
|
||||
pxor MSG0, STATE0
|
||||
pxor MSG1, STATE4
|
||||
.endm
|
||||
|
||||
.macro state_load
|
||||
movdqu 0x00(STATEP), STATE0
|
||||
movdqu 0x10(STATEP), STATE1
|
||||
movdqu 0x20(STATEP), STATE2
|
||||
movdqu 0x30(STATEP), STATE3
|
||||
movdqu 0x40(STATEP), STATE4
|
||||
movdqu 0x50(STATEP), STATE5
|
||||
movdqu 0x60(STATEP), STATE6
|
||||
movdqu 0x70(STATEP), STATE7
|
||||
.endm
|
||||
|
||||
.macro state_store s0 s1 s2 s3 s4 s5 s6 s7
|
||||
movdqu \s7, 0x00(STATEP)
|
||||
movdqu \s0, 0x10(STATEP)
|
||||
movdqu \s1, 0x20(STATEP)
|
||||
movdqu \s2, 0x30(STATEP)
|
||||
movdqu \s3, 0x40(STATEP)
|
||||
movdqu \s4, 0x50(STATEP)
|
||||
movdqu \s5, 0x60(STATEP)
|
||||
movdqu \s6, 0x70(STATEP)
|
||||
.endm
|
||||
|
||||
.macro state_store0
|
||||
state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
|
||||
.endm
|
||||
|
||||
.macro state_store1
|
||||
state_store STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
|
||||
.endm
|
||||
|
||||
.macro state_store2
|
||||
state_store STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
|
||||
.endm
|
||||
|
||||
.macro state_store3
|
||||
state_store STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
|
||||
.endm
|
||||
|
||||
.macro state_store4
|
||||
state_store STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
|
||||
.endm
|
||||
|
||||
.macro state_store5
|
||||
state_store STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
|
||||
.endm
|
||||
|
||||
.macro state_store6
|
||||
state_store STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
|
||||
.endm
|
||||
|
||||
.macro state_store7
|
||||
state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void crypto_aegis128l_aesni_init(void *state, const void *key, const void *iv);
|
||||
*/
|
||||
ENTRY(crypto_aegis128l_aesni_init)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load key: */
|
||||
movdqa (%rsi), MSG1
|
||||
movdqa MSG1, STATE0
|
||||
movdqa MSG1, STATE4
|
||||
movdqa MSG1, STATE5
|
||||
movdqa MSG1, STATE6
|
||||
movdqa MSG1, STATE7
|
||||
|
||||
/* load IV: */
|
||||
movdqu (%rdx), MSG0
|
||||
pxor MSG0, STATE0
|
||||
pxor MSG0, STATE4
|
||||
|
||||
/* load the constants: */
|
||||
movdqa .Laegis128l_const_0, STATE2
|
||||
movdqa .Laegis128l_const_1, STATE1
|
||||
movdqa STATE1, STATE3
|
||||
pxor STATE2, STATE5
|
||||
pxor STATE1, STATE6
|
||||
pxor STATE2, STATE7
|
||||
|
||||
/* update 10 times with IV and KEY: */
|
||||
update0
|
||||
update1
|
||||
update2
|
||||
update3
|
||||
update4
|
||||
update5
|
||||
update6
|
||||
update7
|
||||
update0
|
||||
update1
|
||||
|
||||
state_store1
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128l_aesni_init)
|
||||
|
||||
.macro ad_block a i
|
||||
movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
|
||||
movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
|
||||
update\i
|
||||
sub $0x20, LEN
|
||||
cmp $0x20, LEN
|
||||
jl .Lad_out_\i
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void crypto_aegis128l_aesni_ad(void *state, unsigned int length,
|
||||
* const void *data);
|
||||
*/
|
||||
ENTRY(crypto_aegis128l_aesni_ad)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $0x20, LEN
|
||||
jb .Lad_out
|
||||
|
||||
state_load
|
||||
|
||||
mov SRC, %r8
|
||||
and $0xf, %r8
|
||||
jnz .Lad_u_loop
|
||||
|
||||
.align 8
|
||||
.Lad_a_loop:
|
||||
ad_block a 0
|
||||
ad_block a 1
|
||||
ad_block a 2
|
||||
ad_block a 3
|
||||
ad_block a 4
|
||||
ad_block a 5
|
||||
ad_block a 6
|
||||
ad_block a 7
|
||||
|
||||
add $0x100, SRC
|
||||
jmp .Lad_a_loop
|
||||
|
||||
.align 8
|
||||
.Lad_u_loop:
|
||||
ad_block u 0
|
||||
ad_block u 1
|
||||
ad_block u 2
|
||||
ad_block u 3
|
||||
ad_block u 4
|
||||
ad_block u 5
|
||||
ad_block u 6
|
||||
ad_block u 7
|
||||
|
||||
add $0x100, SRC
|
||||
jmp .Lad_u_loop
|
||||
|
||||
.Lad_out_0:
|
||||
state_store0
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_1:
|
||||
state_store1
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_2:
|
||||
state_store2
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_3:
|
||||
state_store3
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_4:
|
||||
state_store4
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_5:
|
||||
state_store5
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_6:
|
||||
state_store6
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_7:
|
||||
state_store7
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128l_aesni_ad)
|
||||
|
||||
.macro crypt m0 m1 s0 s1 s2 s3 s4 s5 s6 s7
|
||||
pxor \s1, \m0
|
||||
pxor \s6, \m0
|
||||
movdqa \s2, T3
|
||||
pand \s3, T3
|
||||
pxor T3, \m0
|
||||
|
||||
pxor \s2, \m1
|
||||
pxor \s5, \m1
|
||||
movdqa \s6, T3
|
||||
pand \s7, T3
|
||||
pxor T3, \m1
|
||||
.endm
|
||||
|
||||
.macro crypt0 m0 m1
|
||||
crypt \m0 \m1 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
|
||||
.endm
|
||||
|
||||
.macro crypt1 m0 m1
|
||||
crypt \m0 \m1 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
|
||||
.endm
|
||||
|
||||
.macro crypt2 m0 m1
|
||||
crypt \m0 \m1 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
|
||||
.endm
|
||||
|
||||
.macro crypt3 m0 m1
|
||||
crypt \m0 \m1 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
|
||||
.endm
|
||||
|
||||
.macro crypt4 m0 m1
|
||||
crypt \m0 \m1 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
|
||||
.endm
|
||||
|
||||
.macro crypt5 m0 m1
|
||||
crypt \m0 \m1 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
|
||||
.endm
|
||||
|
||||
.macro crypt6 m0 m1
|
||||
crypt \m0 \m1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
|
||||
.endm
|
||||
|
||||
.macro crypt7 m0 m1
|
||||
crypt \m0 \m1 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
|
||||
.endm
|
||||
|
||||
.macro encrypt_block a i
|
||||
movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
|
||||
movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
|
||||
movdqa MSG0, T0
|
||||
movdqa MSG1, T1
|
||||
crypt\i T0, T1
|
||||
movdq\a T0, (\i * 0x20 + 0x00)(DST)
|
||||
movdq\a T1, (\i * 0x20 + 0x10)(DST)
|
||||
|
||||
update\i
|
||||
|
||||
sub $0x20, LEN
|
||||
cmp $0x20, LEN
|
||||
jl .Lenc_out_\i
|
||||
.endm
|
||||
|
||||
.macro decrypt_block a i
|
||||
movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
|
||||
movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
|
||||
crypt\i MSG0, MSG1
|
||||
movdq\a MSG0, (\i * 0x20 + 0x00)(DST)
|
||||
movdq\a MSG1, (\i * 0x20 + 0x10)(DST)
|
||||
|
||||
update\i
|
||||
|
||||
sub $0x20, LEN
|
||||
cmp $0x20, LEN
|
||||
jl .Ldec_out_\i
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void crypto_aegis128l_aesni_enc(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis128l_aesni_enc)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $0x20, LEN
|
||||
jb .Lenc_out
|
||||
|
||||
state_load
|
||||
|
||||
mov SRC, %r8
|
||||
or DST, %r8
|
||||
and $0xf, %r8
|
||||
jnz .Lenc_u_loop
|
||||
|
||||
.align 8
|
||||
.Lenc_a_loop:
|
||||
encrypt_block a 0
|
||||
encrypt_block a 1
|
||||
encrypt_block a 2
|
||||
encrypt_block a 3
|
||||
encrypt_block a 4
|
||||
encrypt_block a 5
|
||||
encrypt_block a 6
|
||||
encrypt_block a 7
|
||||
|
||||
add $0x100, SRC
|
||||
add $0x100, DST
|
||||
jmp .Lenc_a_loop
|
||||
|
||||
.align 8
|
||||
.Lenc_u_loop:
|
||||
encrypt_block u 0
|
||||
encrypt_block u 1
|
||||
encrypt_block u 2
|
||||
encrypt_block u 3
|
||||
encrypt_block u 4
|
||||
encrypt_block u 5
|
||||
encrypt_block u 6
|
||||
encrypt_block u 7
|
||||
|
||||
add $0x100, SRC
|
||||
add $0x100, DST
|
||||
jmp .Lenc_u_loop
|
||||
|
||||
.Lenc_out_0:
|
||||
state_store0
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_1:
|
||||
state_store1
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_2:
|
||||
state_store2
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_3:
|
||||
state_store3
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_4:
|
||||
state_store4
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_5:
|
||||
state_store5
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_6:
|
||||
state_store6
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_7:
|
||||
state_store7
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128l_aesni_enc)
|
||||
|
||||
/*
|
||||
* void crypto_aegis128l_aesni_enc_tail(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis128l_aesni_enc_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
state_load
|
||||
|
||||
/* encrypt message: */
|
||||
call __load_partial
|
||||
|
||||
movdqa MSG0, T0
|
||||
movdqa MSG1, T1
|
||||
crypt0 T0, T1
|
||||
|
||||
call __store_partial
|
||||
|
||||
update0
|
||||
|
||||
state_store0
|
||||
|
||||
FRAME_END
|
||||
ENDPROC(crypto_aegis128l_aesni_enc_tail)
|
||||
|
||||
/*
|
||||
* void crypto_aegis128l_aesni_dec(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis128l_aesni_dec)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $0x20, LEN
|
||||
jb .Ldec_out
|
||||
|
||||
state_load
|
||||
|
||||
mov SRC, %r8
|
||||
or DST, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Ldec_u_loop
|
||||
|
||||
.align 8
|
||||
.Ldec_a_loop:
|
||||
decrypt_block a 0
|
||||
decrypt_block a 1
|
||||
decrypt_block a 2
|
||||
decrypt_block a 3
|
||||
decrypt_block a 4
|
||||
decrypt_block a 5
|
||||
decrypt_block a 6
|
||||
decrypt_block a 7
|
||||
|
||||
add $0x100, SRC
|
||||
add $0x100, DST
|
||||
jmp .Ldec_a_loop
|
||||
|
||||
.align 8
|
||||
.Ldec_u_loop:
|
||||
decrypt_block u 0
|
||||
decrypt_block u 1
|
||||
decrypt_block u 2
|
||||
decrypt_block u 3
|
||||
decrypt_block u 4
|
||||
decrypt_block u 5
|
||||
decrypt_block u 6
|
||||
decrypt_block u 7
|
||||
|
||||
add $0x100, SRC
|
||||
add $0x100, DST
|
||||
jmp .Ldec_u_loop
|
||||
|
||||
.Ldec_out_0:
|
||||
state_store0
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_1:
|
||||
state_store1
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_2:
|
||||
state_store2
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_3:
|
||||
state_store3
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_4:
|
||||
state_store4
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_5:
|
||||
state_store5
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_6:
|
||||
state_store6
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_7:
|
||||
state_store7
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128l_aesni_dec)
|
||||
|
||||
/*
|
||||
* void crypto_aegis128l_aesni_dec_tail(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis128l_aesni_dec_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
state_load
|
||||
|
||||
/* decrypt message: */
|
||||
call __load_partial
|
||||
|
||||
crypt0 MSG0, MSG1
|
||||
|
||||
movdqa MSG0, T0
|
||||
movdqa MSG1, T1
|
||||
call __store_partial
|
||||
|
||||
/* mask with byte count: */
|
||||
movq LEN, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
movdqa T0, T1
|
||||
movdqa .Laegis128l_counter0, T2
|
||||
movdqa .Laegis128l_counter1, T3
|
||||
pcmpgtb T2, T0
|
||||
pcmpgtb T3, T1
|
||||
pand T0, MSG0
|
||||
pand T1, MSG1
|
||||
|
||||
update0
|
||||
|
||||
state_store0
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128l_aesni_dec_tail)
|
||||
|
||||
/*
|
||||
* void crypto_aegis128l_aesni_final(void *state, void *tag_xor,
|
||||
* u64 assoclen, u64 cryptlen);
|
||||
*/
|
||||
ENTRY(crypto_aegis128l_aesni_final)
|
||||
FRAME_BEGIN
|
||||
|
||||
state_load
|
||||
|
||||
/* prepare length block: */
|
||||
movq %rdx, MSG0
|
||||
movq %rcx, T0
|
||||
pslldq $8, T0
|
||||
pxor T0, MSG0
|
||||
psllq $3, MSG0 /* multiply by 8 (to get bit count) */
|
||||
|
||||
pxor STATE2, MSG0
|
||||
movdqa MSG0, MSG1
|
||||
|
||||
/* update state: */
|
||||
update0
|
||||
update1
|
||||
update2
|
||||
update3
|
||||
update4
|
||||
update5
|
||||
update6
|
||||
|
||||
/* xor tag: */
|
||||
movdqu (%rsi), T0
|
||||
|
||||
pxor STATE1, T0
|
||||
pxor STATE2, T0
|
||||
pxor STATE3, T0
|
||||
pxor STATE4, T0
|
||||
pxor STATE5, T0
|
||||
pxor STATE6, T0
|
||||
pxor STATE7, T0
|
||||
|
||||
movdqu T0, (%rsi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128l_aesni_final)
|
407
arch/x86/crypto/aegis128l-aesni-glue.c
Normal file
407
arch/x86/crypto/aegis128l-aesni-glue.c
Normal file
@ -0,0 +1,407 @@
|
||||
/*
|
||||
* The AEGIS-128L Authenticated-Encryption Algorithm
|
||||
* Glue for AES-NI + SSE2 implementation
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/cryptd.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
#define AEGIS128L_BLOCK_ALIGN 16
|
||||
#define AEGIS128L_BLOCK_SIZE 32
|
||||
#define AEGIS128L_NONCE_SIZE 16
|
||||
#define AEGIS128L_STATE_BLOCKS 8
|
||||
#define AEGIS128L_KEY_SIZE 16
|
||||
#define AEGIS128L_MIN_AUTH_SIZE 8
|
||||
#define AEGIS128L_MAX_AUTH_SIZE 16
|
||||
|
||||
asmlinkage void crypto_aegis128l_aesni_init(void *state, void *key, void *iv);
|
||||
|
||||
asmlinkage void crypto_aegis128l_aesni_ad(
|
||||
void *state, unsigned int length, const void *data);
|
||||
|
||||
asmlinkage void crypto_aegis128l_aesni_enc(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis128l_aesni_dec(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis128l_aesni_enc_tail(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis128l_aesni_dec_tail(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis128l_aesni_final(
|
||||
void *state, void *tag_xor, unsigned int cryptlen,
|
||||
unsigned int assoclen);
|
||||
|
||||
struct aegis_block {
|
||||
u8 bytes[AEGIS128L_BLOCK_SIZE] __aligned(AEGIS128L_BLOCK_ALIGN);
|
||||
};
|
||||
|
||||
struct aegis_state {
|
||||
struct aegis_block blocks[AEGIS128L_STATE_BLOCKS];
|
||||
};
|
||||
|
||||
struct aegis_ctx {
|
||||
struct aegis_block key;
|
||||
};
|
||||
|
||||
struct aegis_crypt_ops {
|
||||
int (*skcipher_walk_init)(struct skcipher_walk *walk,
|
||||
struct aead_request *req, bool atomic);
|
||||
|
||||
void (*crypt_blocks)(void *state, unsigned int length, const void *src,
|
||||
void *dst);
|
||||
void (*crypt_tail)(void *state, unsigned int length, const void *src,
|
||||
void *dst);
|
||||
};
|
||||
|
||||
static void crypto_aegis128l_aesni_process_ad(
|
||||
struct aegis_state *state, struct scatterlist *sg_src,
|
||||
unsigned int assoclen)
|
||||
{
|
||||
struct scatter_walk walk;
|
||||
struct aegis_block buf;
|
||||
unsigned int pos = 0;
|
||||
|
||||
scatterwalk_start(&walk, sg_src);
|
||||
while (assoclen != 0) {
|
||||
unsigned int size = scatterwalk_clamp(&walk, assoclen);
|
||||
unsigned int left = size;
|
||||
void *mapped = scatterwalk_map(&walk);
|
||||
const u8 *src = (const u8 *)mapped;
|
||||
|
||||
if (pos + size >= AEGIS128L_BLOCK_SIZE) {
|
||||
if (pos > 0) {
|
||||
unsigned int fill = AEGIS128L_BLOCK_SIZE - pos;
|
||||
memcpy(buf.bytes + pos, src, fill);
|
||||
crypto_aegis128l_aesni_ad(state,
|
||||
AEGIS128L_BLOCK_SIZE,
|
||||
buf.bytes);
|
||||
pos = 0;
|
||||
left -= fill;
|
||||
src += fill;
|
||||
}
|
||||
|
||||
crypto_aegis128l_aesni_ad(state, left, src);
|
||||
|
||||
src += left & ~(AEGIS128L_BLOCK_SIZE - 1);
|
||||
left &= AEGIS128L_BLOCK_SIZE - 1;
|
||||
}
|
||||
|
||||
memcpy(buf.bytes + pos, src, left);
|
||||
pos += left;
|
||||
assoclen -= size;
|
||||
|
||||
scatterwalk_unmap(mapped);
|
||||
scatterwalk_advance(&walk, size);
|
||||
scatterwalk_done(&walk, 0, assoclen);
|
||||
}
|
||||
|
||||
if (pos > 0) {
|
||||
memset(buf.bytes + pos, 0, AEGIS128L_BLOCK_SIZE - pos);
|
||||
crypto_aegis128l_aesni_ad(state, AEGIS128L_BLOCK_SIZE, buf.bytes);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis128l_aesni_process_crypt(
|
||||
struct aegis_state *state, struct aead_request *req,
|
||||
const struct aegis_crypt_ops *ops)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
u8 *src, *dst;
|
||||
unsigned int chunksize, base;
|
||||
|
||||
ops->skcipher_walk_init(&walk, req, false);
|
||||
|
||||
while (walk.nbytes) {
|
||||
src = walk.src.virt.addr;
|
||||
dst = walk.dst.virt.addr;
|
||||
chunksize = walk.nbytes;
|
||||
|
||||
ops->crypt_blocks(state, chunksize, src, dst);
|
||||
|
||||
base = chunksize & ~(AEGIS128L_BLOCK_SIZE - 1);
|
||||
src += base;
|
||||
dst += base;
|
||||
chunksize &= AEGIS128L_BLOCK_SIZE - 1;
|
||||
|
||||
if (chunksize > 0)
|
||||
ops->crypt_tail(state, chunksize, src, dst);
|
||||
|
||||
skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static struct aegis_ctx *crypto_aegis128l_aesni_ctx(struct crypto_aead *aead)
|
||||
{
|
||||
u8 *ctx = crypto_aead_ctx(aead);
|
||||
ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
|
||||
return (void *)ctx;
|
||||
}
|
||||
|
||||
static int crypto_aegis128l_aesni_setkey(struct crypto_aead *aead,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(aead);
|
||||
|
||||
if (keylen != AEGIS128L_KEY_SIZE) {
|
||||
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memcpy(ctx->key.bytes, key, AEGIS128L_KEY_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis128l_aesni_setauthsize(struct crypto_aead *tfm,
|
||||
unsigned int authsize)
|
||||
{
|
||||
if (authsize > AEGIS128L_MAX_AUTH_SIZE)
|
||||
return -EINVAL;
|
||||
if (authsize < AEGIS128L_MIN_AUTH_SIZE)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_aegis128l_aesni_crypt(struct aead_request *req,
|
||||
struct aegis_block *tag_xor,
|
||||
unsigned int cryptlen,
|
||||
const struct aegis_crypt_ops *ops)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(tfm);
|
||||
struct aegis_state state;
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
crypto_aegis128l_aesni_init(&state, ctx->key.bytes, req->iv);
|
||||
crypto_aegis128l_aesni_process_ad(&state, req->src, req->assoclen);
|
||||
crypto_aegis128l_aesni_process_crypt(&state, req, ops);
|
||||
crypto_aegis128l_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
|
||||
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static int crypto_aegis128l_aesni_encrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct aegis_crypt_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_encrypt,
|
||||
.crypt_blocks = crypto_aegis128l_aesni_enc,
|
||||
.crypt_tail = crypto_aegis128l_aesni_enc_tail,
|
||||
};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_block tag = {};
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen;
|
||||
|
||||
crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS);
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->dst,
|
||||
req->assoclen + cryptlen, authsize, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis128l_aesni_decrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct aegis_block zeros = {};
|
||||
|
||||
static const struct aegis_crypt_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_decrypt,
|
||||
.crypt_blocks = crypto_aegis128l_aesni_dec,
|
||||
.crypt_tail = crypto_aegis128l_aesni_dec_tail,
|
||||
};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_block tag;
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen - authsize;
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->src,
|
||||
req->assoclen + cryptlen, authsize, 0);
|
||||
|
||||
crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS);
|
||||
|
||||
return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis128l_aesni_init_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_aegis128l_aesni_exit_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
}
|
||||
|
||||
static int cryptd_aegis128l_aesni_setkey(struct crypto_aead *aead,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
|
||||
}
|
||||
|
||||
static int cryptd_aegis128l_aesni_setauthsize(struct crypto_aead *aead,
|
||||
unsigned int authsize)
|
||||
{
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
|
||||
}
|
||||
|
||||
static int cryptd_aegis128l_aesni_encrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
aead = &cryptd_tfm->base;
|
||||
if (irq_fpu_usable() && (!in_atomic() ||
|
||||
!cryptd_aead_queued(cryptd_tfm)))
|
||||
aead = cryptd_aead_child(cryptd_tfm);
|
||||
|
||||
aead_request_set_tfm(req, aead);
|
||||
|
||||
return crypto_aead_encrypt(req);
|
||||
}
|
||||
|
||||
static int cryptd_aegis128l_aesni_decrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
aead = &cryptd_tfm->base;
|
||||
if (irq_fpu_usable() && (!in_atomic() ||
|
||||
!cryptd_aead_queued(cryptd_tfm)))
|
||||
aead = cryptd_aead_child(cryptd_tfm);
|
||||
|
||||
aead_request_set_tfm(req, aead);
|
||||
|
||||
return crypto_aead_decrypt(req);
|
||||
}
|
||||
|
||||
static int cryptd_aegis128l_aesni_init_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
struct cryptd_aead *cryptd_tfm;
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
|
||||
cryptd_tfm = cryptd_alloc_aead("__aegis128l-aesni", CRYPTO_ALG_INTERNAL,
|
||||
CRYPTO_ALG_INTERNAL);
|
||||
if (IS_ERR(cryptd_tfm))
|
||||
return PTR_ERR(cryptd_tfm);
|
||||
|
||||
*ctx = cryptd_tfm;
|
||||
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cryptd_aegis128l_aesni_exit_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
|
||||
cryptd_free_aead(*ctx);
|
||||
}
|
||||
|
||||
static struct aead_alg crypto_aegis128l_aesni_alg[] = {
|
||||
{
|
||||
.setkey = crypto_aegis128l_aesni_setkey,
|
||||
.setauthsize = crypto_aegis128l_aesni_setauthsize,
|
||||
.encrypt = crypto_aegis128l_aesni_encrypt,
|
||||
.decrypt = crypto_aegis128l_aesni_decrypt,
|
||||
.init = crypto_aegis128l_aesni_init_tfm,
|
||||
.exit = crypto_aegis128l_aesni_exit_tfm,
|
||||
|
||||
.ivsize = AEGIS128L_NONCE_SIZE,
|
||||
.maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
|
||||
.chunksize = AEGIS128L_BLOCK_SIZE,
|
||||
|
||||
.base = {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct aegis_ctx) +
|
||||
__alignof__(struct aegis_ctx),
|
||||
.cra_alignmask = 0,
|
||||
|
||||
.cra_name = "__aegis128l",
|
||||
.cra_driver_name = "__aegis128l-aesni",
|
||||
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
}, {
|
||||
.setkey = cryptd_aegis128l_aesni_setkey,
|
||||
.setauthsize = cryptd_aegis128l_aesni_setauthsize,
|
||||
.encrypt = cryptd_aegis128l_aesni_encrypt,
|
||||
.decrypt = cryptd_aegis128l_aesni_decrypt,
|
||||
.init = cryptd_aegis128l_aesni_init_tfm,
|
||||
.exit = cryptd_aegis128l_aesni_exit_tfm,
|
||||
|
||||
.ivsize = AEGIS128L_NONCE_SIZE,
|
||||
.maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
|
||||
.chunksize = AEGIS128L_BLOCK_SIZE,
|
||||
|
||||
.base = {
|
||||
.cra_flags = CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct cryptd_aead *),
|
||||
.cra_alignmask = 0,
|
||||
|
||||
.cra_priority = 400,
|
||||
|
||||
.cra_name = "aegis128l",
|
||||
.cra_driver_name = "aegis128l-aesni",
|
||||
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id aesni_cpu_id[] = {
|
||||
X86_FEATURE_MATCH(X86_FEATURE_AES),
|
||||
X86_FEATURE_MATCH(X86_FEATURE_XMM2),
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
|
||||
|
||||
static int __init crypto_aegis128l_aesni_module_init(void)
|
||||
{
|
||||
if (!x86_match_cpu(aesni_cpu_id))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_aeads(crypto_aegis128l_aesni_alg,
|
||||
ARRAY_SIZE(crypto_aegis128l_aesni_alg));
|
||||
}
|
||||
|
||||
static void __exit crypto_aegis128l_aesni_module_exit(void)
|
||||
{
|
||||
crypto_unregister_aeads(crypto_aegis128l_aesni_alg,
|
||||
ARRAY_SIZE(crypto_aegis128l_aesni_alg));
|
||||
}
|
||||
|
||||
module_init(crypto_aegis128l_aesni_module_init);
|
||||
module_exit(crypto_aegis128l_aesni_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("AEGIS-128L AEAD algorithm -- AESNI+SSE2 implementation");
|
||||
MODULE_ALIAS_CRYPTO("aegis128l");
|
||||
MODULE_ALIAS_CRYPTO("aegis128l-aesni");
|
702
arch/x86/crypto/aegis256-aesni-asm.S
Normal file
702
arch/x86/crypto/aegis256-aesni-asm.S
Normal file
@ -0,0 +1,702 @@
|
||||
/*
|
||||
* AES-NI + SSE2 implementation of AEGIS-128L
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
#define STATE0 %xmm0
|
||||
#define STATE1 %xmm1
|
||||
#define STATE2 %xmm2
|
||||
#define STATE3 %xmm3
|
||||
#define STATE4 %xmm4
|
||||
#define STATE5 %xmm5
|
||||
#define MSG %xmm6
|
||||
#define T0 %xmm7
|
||||
#define T1 %xmm8
|
||||
#define T2 %xmm9
|
||||
#define T3 %xmm10
|
||||
|
||||
#define STATEP %rdi
|
||||
#define LEN %rsi
|
||||
#define SRC %rdx
|
||||
#define DST %rcx
|
||||
|
||||
.section .rodata.cst16.aegis256_const, "aM", @progbits, 32
|
||||
.align 16
|
||||
.Laegis256_const_0:
|
||||
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
|
||||
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
|
||||
.Laegis256_const_1:
|
||||
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
|
||||
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
|
||||
|
||||
.section .rodata.cst16.aegis256_counter, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Laegis256_counter:
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
||||
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
||||
|
||||
.text
|
||||
|
||||
/*
|
||||
* __load_partial: internal ABI
|
||||
* input:
|
||||
* LEN - bytes
|
||||
* SRC - src
|
||||
* output:
|
||||
* MSG - message block
|
||||
* changed:
|
||||
* T0
|
||||
* %r8
|
||||
* %r9
|
||||
*/
|
||||
__load_partial:
|
||||
xor %r9, %r9
|
||||
pxor MSG, MSG
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x1, %r8
|
||||
jz .Lld_partial_1
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x1E, %r8
|
||||
add SRC, %r8
|
||||
mov (%r8), %r9b
|
||||
|
||||
.Lld_partial_1:
|
||||
mov LEN, %r8
|
||||
and $0x2, %r8
|
||||
jz .Lld_partial_2
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x1C, %r8
|
||||
add SRC, %r8
|
||||
shl $0x10, %r9
|
||||
mov (%r8), %r9w
|
||||
|
||||
.Lld_partial_2:
|
||||
mov LEN, %r8
|
||||
and $0x4, %r8
|
||||
jz .Lld_partial_4
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x18, %r8
|
||||
add SRC, %r8
|
||||
shl $32, %r9
|
||||
mov (%r8), %r8d
|
||||
xor %r8, %r9
|
||||
|
||||
.Lld_partial_4:
|
||||
movq %r9, MSG
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x8, %r8
|
||||
jz .Lld_partial_8
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x10, %r8
|
||||
add SRC, %r8
|
||||
pslldq $8, MSG
|
||||
movq (%r8), T0
|
||||
pxor T0, MSG
|
||||
|
||||
.Lld_partial_8:
|
||||
ret
|
||||
ENDPROC(__load_partial)
|
||||
|
||||
/*
|
||||
* __store_partial: internal ABI
|
||||
* input:
|
||||
* LEN - bytes
|
||||
* DST - dst
|
||||
* output:
|
||||
* T0 - message block
|
||||
* changed:
|
||||
* %r8
|
||||
* %r9
|
||||
* %r10
|
||||
*/
|
||||
__store_partial:
|
||||
mov LEN, %r8
|
||||
mov DST, %r9
|
||||
|
||||
movq T0, %r10
|
||||
|
||||
cmp $8, %r8
|
||||
jl .Lst_partial_8
|
||||
|
||||
mov %r10, (%r9)
|
||||
psrldq $8, T0
|
||||
movq T0, %r10
|
||||
|
||||
sub $8, %r8
|
||||
add $8, %r9
|
||||
|
||||
.Lst_partial_8:
|
||||
cmp $4, %r8
|
||||
jl .Lst_partial_4
|
||||
|
||||
mov %r10d, (%r9)
|
||||
shr $32, %r10
|
||||
|
||||
sub $4, %r8
|
||||
add $4, %r9
|
||||
|
||||
.Lst_partial_4:
|
||||
cmp $2, %r8
|
||||
jl .Lst_partial_2
|
||||
|
||||
mov %r10w, (%r9)
|
||||
shr $0x10, %r10
|
||||
|
||||
sub $2, %r8
|
||||
add $2, %r9
|
||||
|
||||
.Lst_partial_2:
|
||||
cmp $1, %r8
|
||||
jl .Lst_partial_1
|
||||
|
||||
mov %r10b, (%r9)
|
||||
|
||||
.Lst_partial_1:
|
||||
ret
|
||||
ENDPROC(__store_partial)
|
||||
|
||||
.macro update
|
||||
movdqa STATE5, T0
|
||||
aesenc STATE0, STATE5
|
||||
aesenc STATE1, STATE0
|
||||
aesenc STATE2, STATE1
|
||||
aesenc STATE3, STATE2
|
||||
aesenc STATE4, STATE3
|
||||
aesenc T0, STATE4
|
||||
.endm
|
||||
|
||||
.macro update0 m
|
||||
update
|
||||
pxor \m, STATE5
|
||||
.endm
|
||||
|
||||
.macro update1 m
|
||||
update
|
||||
pxor \m, STATE4
|
||||
.endm
|
||||
|
||||
.macro update2 m
|
||||
update
|
||||
pxor \m, STATE3
|
||||
.endm
|
||||
|
||||
.macro update3 m
|
||||
update
|
||||
pxor \m, STATE2
|
||||
.endm
|
||||
|
||||
.macro update4 m
|
||||
update
|
||||
pxor \m, STATE1
|
||||
.endm
|
||||
|
||||
.macro update5 m
|
||||
update
|
||||
pxor \m, STATE0
|
||||
.endm
|
||||
|
||||
.macro state_load
|
||||
movdqu 0x00(STATEP), STATE0
|
||||
movdqu 0x10(STATEP), STATE1
|
||||
movdqu 0x20(STATEP), STATE2
|
||||
movdqu 0x30(STATEP), STATE3
|
||||
movdqu 0x40(STATEP), STATE4
|
||||
movdqu 0x50(STATEP), STATE5
|
||||
.endm
|
||||
|
||||
.macro state_store s0 s1 s2 s3 s4 s5
|
||||
movdqu \s5, 0x00(STATEP)
|
||||
movdqu \s0, 0x10(STATEP)
|
||||
movdqu \s1, 0x20(STATEP)
|
||||
movdqu \s2, 0x30(STATEP)
|
||||
movdqu \s3, 0x40(STATEP)
|
||||
movdqu \s4, 0x50(STATEP)
|
||||
.endm
|
||||
|
||||
.macro state_store0
|
||||
state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
|
||||
.endm
|
||||
|
||||
.macro state_store1
|
||||
state_store STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
|
||||
.endm
|
||||
|
||||
.macro state_store2
|
||||
state_store STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
|
||||
.endm
|
||||
|
||||
.macro state_store3
|
||||
state_store STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
|
||||
.endm
|
||||
|
||||
.macro state_store4
|
||||
state_store STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
|
||||
.endm
|
||||
|
||||
.macro state_store5
|
||||
state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void crypto_aegis256_aesni_init(void *state, const void *key, const void *iv);
|
||||
*/
|
||||
ENTRY(crypto_aegis256_aesni_init)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load key: */
|
||||
movdqa 0x00(%rsi), MSG
|
||||
movdqa 0x10(%rsi), T1
|
||||
movdqa MSG, STATE4
|
||||
movdqa T1, STATE5
|
||||
|
||||
/* load IV: */
|
||||
movdqu 0x00(%rdx), T2
|
||||
movdqu 0x10(%rdx), T3
|
||||
pxor MSG, T2
|
||||
pxor T1, T3
|
||||
movdqa T2, STATE0
|
||||
movdqa T3, STATE1
|
||||
|
||||
/* load the constants: */
|
||||
movdqa .Laegis256_const_0, STATE3
|
||||
movdqa .Laegis256_const_1, STATE2
|
||||
pxor STATE3, STATE4
|
||||
pxor STATE2, STATE5
|
||||
|
||||
/* update 10 times with IV and KEY: */
|
||||
update0 MSG
|
||||
update1 T1
|
||||
update2 T2
|
||||
update3 T3
|
||||
update4 MSG
|
||||
update5 T1
|
||||
update0 T2
|
||||
update1 T3
|
||||
update2 MSG
|
||||
update3 T1
|
||||
update4 T2
|
||||
update5 T3
|
||||
update0 MSG
|
||||
update1 T1
|
||||
update2 T2
|
||||
update3 T3
|
||||
|
||||
state_store3
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis256_aesni_init)
|
||||
|
||||
.macro ad_block a i
|
||||
movdq\a (\i * 0x10)(SRC), MSG
|
||||
update\i MSG
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Lad_out_\i
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void crypto_aegis256_aesni_ad(void *state, unsigned int length,
|
||||
* const void *data);
|
||||
*/
|
||||
ENTRY(crypto_aegis256_aesni_ad)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $0x10, LEN
|
||||
jb .Lad_out
|
||||
|
||||
state_load
|
||||
|
||||
mov SRC, %r8
|
||||
and $0xf, %r8
|
||||
jnz .Lad_u_loop
|
||||
|
||||
.align 8
|
||||
.Lad_a_loop:
|
||||
ad_block a 0
|
||||
ad_block a 1
|
||||
ad_block a 2
|
||||
ad_block a 3
|
||||
ad_block a 4
|
||||
ad_block a 5
|
||||
|
||||
add $0x60, SRC
|
||||
jmp .Lad_a_loop
|
||||
|
||||
.align 8
|
||||
.Lad_u_loop:
|
||||
ad_block u 0
|
||||
ad_block u 1
|
||||
ad_block u 2
|
||||
ad_block u 3
|
||||
ad_block u 4
|
||||
ad_block u 5
|
||||
|
||||
add $0x60, SRC
|
||||
jmp .Lad_u_loop
|
||||
|
||||
.Lad_out_0:
|
||||
state_store0
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_1:
|
||||
state_store1
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_2:
|
||||
state_store2
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_3:
|
||||
state_store3
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_4:
|
||||
state_store4
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_5:
|
||||
state_store5
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis256_aesni_ad)
|
||||
|
||||
.macro crypt m s0 s1 s2 s3 s4 s5
|
||||
pxor \s1, \m
|
||||
pxor \s4, \m
|
||||
pxor \s5, \m
|
||||
movdqa \s2, T3
|
||||
pand \s3, T3
|
||||
pxor T3, \m
|
||||
.endm
|
||||
|
||||
.macro crypt0 m
|
||||
crypt \m STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
|
||||
.endm
|
||||
|
||||
.macro crypt1 m
|
||||
crypt \m STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
|
||||
.endm
|
||||
|
||||
.macro crypt2 m
|
||||
crypt \m STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
|
||||
.endm
|
||||
|
||||
.macro crypt3 m
|
||||
crypt \m STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
|
||||
.endm
|
||||
|
||||
.macro crypt4 m
|
||||
crypt \m STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
|
||||
.endm
|
||||
|
||||
.macro crypt5 m
|
||||
crypt \m STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
|
||||
.endm
|
||||
|
||||
.macro encrypt_block a i
|
||||
movdq\a (\i * 0x10)(SRC), MSG
|
||||
movdqa MSG, T0
|
||||
crypt\i T0
|
||||
movdq\a T0, (\i * 0x10)(DST)
|
||||
|
||||
update\i MSG
|
||||
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Lenc_out_\i
|
||||
.endm
|
||||
|
||||
.macro decrypt_block a i
|
||||
movdq\a (\i * 0x10)(SRC), MSG
|
||||
crypt\i MSG
|
||||
movdq\a MSG, (\i * 0x10)(DST)
|
||||
|
||||
update\i MSG
|
||||
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Ldec_out_\i
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void crypto_aegis256_aesni_enc(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis256_aesni_enc)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $0x10, LEN
|
||||
jb .Lenc_out
|
||||
|
||||
state_load
|
||||
|
||||
mov SRC, %r8
|
||||
or DST, %r8
|
||||
and $0xf, %r8
|
||||
jnz .Lenc_u_loop
|
||||
|
||||
.align 8
|
||||
.Lenc_a_loop:
|
||||
encrypt_block a 0
|
||||
encrypt_block a 1
|
||||
encrypt_block a 2
|
||||
encrypt_block a 3
|
||||
encrypt_block a 4
|
||||
encrypt_block a 5
|
||||
|
||||
add $0x60, SRC
|
||||
add $0x60, DST
|
||||
jmp .Lenc_a_loop
|
||||
|
||||
.align 8
|
||||
.Lenc_u_loop:
|
||||
encrypt_block u 0
|
||||
encrypt_block u 1
|
||||
encrypt_block u 2
|
||||
encrypt_block u 3
|
||||
encrypt_block u 4
|
||||
encrypt_block u 5
|
||||
|
||||
add $0x60, SRC
|
||||
add $0x60, DST
|
||||
jmp .Lenc_u_loop
|
||||
|
||||
.Lenc_out_0:
|
||||
state_store0
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_1:
|
||||
state_store1
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_2:
|
||||
state_store2
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_3:
|
||||
state_store3
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_4:
|
||||
state_store4
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_5:
|
||||
state_store5
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis256_aesni_enc)
|
||||
|
||||
/*
|
||||
* void crypto_aegis256_aesni_enc_tail(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis256_aesni_enc_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
state_load
|
||||
|
||||
/* encrypt message: */
|
||||
call __load_partial
|
||||
|
||||
movdqa MSG, T0
|
||||
crypt0 T0
|
||||
|
||||
call __store_partial
|
||||
|
||||
update0 MSG
|
||||
|
||||
state_store0
|
||||
|
||||
FRAME_END
|
||||
ENDPROC(crypto_aegis256_aesni_enc_tail)
|
||||
|
||||
/*
|
||||
* void crypto_aegis256_aesni_dec(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis256_aesni_dec)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $0x10, LEN
|
||||
jb .Ldec_out
|
||||
|
||||
state_load
|
||||
|
||||
mov SRC, %r8
|
||||
or DST, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Ldec_u_loop
|
||||
|
||||
.align 8
|
||||
.Ldec_a_loop:
|
||||
decrypt_block a 0
|
||||
decrypt_block a 1
|
||||
decrypt_block a 2
|
||||
decrypt_block a 3
|
||||
decrypt_block a 4
|
||||
decrypt_block a 5
|
||||
|
||||
add $0x60, SRC
|
||||
add $0x60, DST
|
||||
jmp .Ldec_a_loop
|
||||
|
||||
.align 8
|
||||
.Ldec_u_loop:
|
||||
decrypt_block u 0
|
||||
decrypt_block u 1
|
||||
decrypt_block u 2
|
||||
decrypt_block u 3
|
||||
decrypt_block u 4
|
||||
decrypt_block u 5
|
||||
|
||||
add $0x60, SRC
|
||||
add $0x60, DST
|
||||
jmp .Ldec_u_loop
|
||||
|
||||
.Ldec_out_0:
|
||||
state_store0
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_1:
|
||||
state_store1
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_2:
|
||||
state_store2
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_3:
|
||||
state_store3
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_4:
|
||||
state_store4
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_5:
|
||||
state_store5
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis256_aesni_dec)
|
||||
|
||||
/*
|
||||
* void crypto_aegis256_aesni_dec_tail(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis256_aesni_dec_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
state_load
|
||||
|
||||
/* decrypt message: */
|
||||
call __load_partial
|
||||
|
||||
crypt0 MSG
|
||||
|
||||
movdqa MSG, T0
|
||||
call __store_partial
|
||||
|
||||
/* mask with byte count: */
|
||||
movq LEN, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
movdqa .Laegis256_counter, T1
|
||||
pcmpgtb T1, T0
|
||||
pand T0, MSG
|
||||
|
||||
update0 MSG
|
||||
|
||||
state_store0
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis256_aesni_dec_tail)
|
||||
|
||||
/*
|
||||
* void crypto_aegis256_aesni_final(void *state, void *tag_xor,
|
||||
* u64 assoclen, u64 cryptlen);
|
||||
*/
|
||||
ENTRY(crypto_aegis256_aesni_final)
|
||||
FRAME_BEGIN
|
||||
|
||||
state_load
|
||||
|
||||
/* prepare length block: */
|
||||
movq %rdx, MSG
|
||||
movq %rcx, T0
|
||||
pslldq $8, T0
|
||||
pxor T0, MSG
|
||||
psllq $3, MSG /* multiply by 8 (to get bit count) */
|
||||
|
||||
pxor STATE3, MSG
|
||||
|
||||
/* update state: */
|
||||
update0 MSG
|
||||
update1 MSG
|
||||
update2 MSG
|
||||
update3 MSG
|
||||
update4 MSG
|
||||
update5 MSG
|
||||
update0 MSG
|
||||
|
||||
/* xor tag: */
|
||||
movdqu (%rsi), MSG
|
||||
|
||||
pxor STATE0, MSG
|
||||
pxor STATE1, MSG
|
||||
pxor STATE2, MSG
|
||||
pxor STATE3, MSG
|
||||
pxor STATE4, MSG
|
||||
pxor STATE5, MSG
|
||||
|
||||
movdqu MSG, (%rsi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis256_aesni_final)
|
407
arch/x86/crypto/aegis256-aesni-glue.c
Normal file
407
arch/x86/crypto/aegis256-aesni-glue.c
Normal file
@ -0,0 +1,407 @@
|
||||
/*
|
||||
* The AEGIS-256 Authenticated-Encryption Algorithm
|
||||
* Glue for AES-NI + SSE2 implementation
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/cryptd.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
#define AEGIS256_BLOCK_ALIGN 16
|
||||
#define AEGIS256_BLOCK_SIZE 16
|
||||
#define AEGIS256_NONCE_SIZE 32
|
||||
#define AEGIS256_STATE_BLOCKS 6
|
||||
#define AEGIS256_KEY_SIZE 32
|
||||
#define AEGIS256_MIN_AUTH_SIZE 8
|
||||
#define AEGIS256_MAX_AUTH_SIZE 16
|
||||
|
||||
asmlinkage void crypto_aegis256_aesni_init(void *state, void *key, void *iv);
|
||||
|
||||
asmlinkage void crypto_aegis256_aesni_ad(
|
||||
void *state, unsigned int length, const void *data);
|
||||
|
||||
asmlinkage void crypto_aegis256_aesni_enc(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis256_aesni_dec(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis256_aesni_enc_tail(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis256_aesni_dec_tail(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis256_aesni_final(
|
||||
void *state, void *tag_xor, unsigned int cryptlen,
|
||||
unsigned int assoclen);
|
||||
|
||||
struct aegis_block {
|
||||
u8 bytes[AEGIS256_BLOCK_SIZE] __aligned(AEGIS256_BLOCK_ALIGN);
|
||||
};
|
||||
|
||||
struct aegis_state {
|
||||
struct aegis_block blocks[AEGIS256_STATE_BLOCKS];
|
||||
};
|
||||
|
||||
struct aegis_ctx {
|
||||
struct aegis_block key[AEGIS256_KEY_SIZE / AEGIS256_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
struct aegis_crypt_ops {
|
||||
int (*skcipher_walk_init)(struct skcipher_walk *walk,
|
||||
struct aead_request *req, bool atomic);
|
||||
|
||||
void (*crypt_blocks)(void *state, unsigned int length, const void *src,
|
||||
void *dst);
|
||||
void (*crypt_tail)(void *state, unsigned int length, const void *src,
|
||||
void *dst);
|
||||
};
|
||||
|
||||
static void crypto_aegis256_aesni_process_ad(
|
||||
struct aegis_state *state, struct scatterlist *sg_src,
|
||||
unsigned int assoclen)
|
||||
{
|
||||
struct scatter_walk walk;
|
||||
struct aegis_block buf;
|
||||
unsigned int pos = 0;
|
||||
|
||||
scatterwalk_start(&walk, sg_src);
|
||||
while (assoclen != 0) {
|
||||
unsigned int size = scatterwalk_clamp(&walk, assoclen);
|
||||
unsigned int left = size;
|
||||
void *mapped = scatterwalk_map(&walk);
|
||||
const u8 *src = (const u8 *)mapped;
|
||||
|
||||
if (pos + size >= AEGIS256_BLOCK_SIZE) {
|
||||
if (pos > 0) {
|
||||
unsigned int fill = AEGIS256_BLOCK_SIZE - pos;
|
||||
memcpy(buf.bytes + pos, src, fill);
|
||||
crypto_aegis256_aesni_ad(state,
|
||||
AEGIS256_BLOCK_SIZE,
|
||||
buf.bytes);
|
||||
pos = 0;
|
||||
left -= fill;
|
||||
src += fill;
|
||||
}
|
||||
|
||||
crypto_aegis256_aesni_ad(state, left, src);
|
||||
|
||||
src += left & ~(AEGIS256_BLOCK_SIZE - 1);
|
||||
left &= AEGIS256_BLOCK_SIZE - 1;
|
||||
}
|
||||
|
||||
memcpy(buf.bytes + pos, src, left);
|
||||
pos += left;
|
||||
assoclen -= size;
|
||||
|
||||
scatterwalk_unmap(mapped);
|
||||
scatterwalk_advance(&walk, size);
|
||||
scatterwalk_done(&walk, 0, assoclen);
|
||||
}
|
||||
|
||||
if (pos > 0) {
|
||||
memset(buf.bytes + pos, 0, AEGIS256_BLOCK_SIZE - pos);
|
||||
crypto_aegis256_aesni_ad(state, AEGIS256_BLOCK_SIZE, buf.bytes);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis256_aesni_process_crypt(
|
||||
struct aegis_state *state, struct aead_request *req,
|
||||
const struct aegis_crypt_ops *ops)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
u8 *src, *dst;
|
||||
unsigned int chunksize, base;
|
||||
|
||||
ops->skcipher_walk_init(&walk, req, false);
|
||||
|
||||
while (walk.nbytes) {
|
||||
src = walk.src.virt.addr;
|
||||
dst = walk.dst.virt.addr;
|
||||
chunksize = walk.nbytes;
|
||||
|
||||
ops->crypt_blocks(state, chunksize, src, dst);
|
||||
|
||||
base = chunksize & ~(AEGIS256_BLOCK_SIZE - 1);
|
||||
src += base;
|
||||
dst += base;
|
||||
chunksize &= AEGIS256_BLOCK_SIZE - 1;
|
||||
|
||||
if (chunksize > 0)
|
||||
ops->crypt_tail(state, chunksize, src, dst);
|
||||
|
||||
skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static struct aegis_ctx *crypto_aegis256_aesni_ctx(struct crypto_aead *aead)
|
||||
{
|
||||
u8 *ctx = crypto_aead_ctx(aead);
|
||||
ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
|
||||
return (void *)ctx;
|
||||
}
|
||||
|
||||
static int crypto_aegis256_aesni_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(aead);
|
||||
|
||||
if (keylen != AEGIS256_KEY_SIZE) {
|
||||
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memcpy(ctx->key, key, AEGIS256_KEY_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis256_aesni_setauthsize(struct crypto_aead *tfm,
|
||||
unsigned int authsize)
|
||||
{
|
||||
if (authsize > AEGIS256_MAX_AUTH_SIZE)
|
||||
return -EINVAL;
|
||||
if (authsize < AEGIS256_MIN_AUTH_SIZE)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_aegis256_aesni_crypt(struct aead_request *req,
|
||||
struct aegis_block *tag_xor,
|
||||
unsigned int cryptlen,
|
||||
const struct aegis_crypt_ops *ops)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(tfm);
|
||||
struct aegis_state state;
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
crypto_aegis256_aesni_init(&state, ctx->key, req->iv);
|
||||
crypto_aegis256_aesni_process_ad(&state, req->src, req->assoclen);
|
||||
crypto_aegis256_aesni_process_crypt(&state, req, ops);
|
||||
crypto_aegis256_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
|
||||
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static int crypto_aegis256_aesni_encrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct aegis_crypt_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_encrypt,
|
||||
.crypt_blocks = crypto_aegis256_aesni_enc,
|
||||
.crypt_tail = crypto_aegis256_aesni_enc_tail,
|
||||
};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_block tag = {};
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen;
|
||||
|
||||
crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS);
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->dst,
|
||||
req->assoclen + cryptlen, authsize, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis256_aesni_decrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct aegis_block zeros = {};
|
||||
|
||||
static const struct aegis_crypt_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_decrypt,
|
||||
.crypt_blocks = crypto_aegis256_aesni_dec,
|
||||
.crypt_tail = crypto_aegis256_aesni_dec_tail,
|
||||
};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_block tag;
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen - authsize;
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->src,
|
||||
req->assoclen + cryptlen, authsize, 0);
|
||||
|
||||
crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS);
|
||||
|
||||
return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis256_aesni_init_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_aegis256_aesni_exit_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
}
|
||||
|
||||
static int cryptd_aegis256_aesni_setkey(struct crypto_aead *aead,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
|
||||
}
|
||||
|
||||
static int cryptd_aegis256_aesni_setauthsize(struct crypto_aead *aead,
|
||||
unsigned int authsize)
|
||||
{
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
|
||||
}
|
||||
|
||||
static int cryptd_aegis256_aesni_encrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
aead = &cryptd_tfm->base;
|
||||
if (irq_fpu_usable() && (!in_atomic() ||
|
||||
!cryptd_aead_queued(cryptd_tfm)))
|
||||
aead = cryptd_aead_child(cryptd_tfm);
|
||||
|
||||
aead_request_set_tfm(req, aead);
|
||||
|
||||
return crypto_aead_encrypt(req);
|
||||
}
|
||||
|
||||
static int cryptd_aegis256_aesni_decrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
aead = &cryptd_tfm->base;
|
||||
if (irq_fpu_usable() && (!in_atomic() ||
|
||||
!cryptd_aead_queued(cryptd_tfm)))
|
||||
aead = cryptd_aead_child(cryptd_tfm);
|
||||
|
||||
aead_request_set_tfm(req, aead);
|
||||
|
||||
return crypto_aead_decrypt(req);
|
||||
}
|
||||
|
||||
static int cryptd_aegis256_aesni_init_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
struct cryptd_aead *cryptd_tfm;
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
|
||||
cryptd_tfm = cryptd_alloc_aead("__aegis256-aesni", CRYPTO_ALG_INTERNAL,
|
||||
CRYPTO_ALG_INTERNAL);
|
||||
if (IS_ERR(cryptd_tfm))
|
||||
return PTR_ERR(cryptd_tfm);
|
||||
|
||||
*ctx = cryptd_tfm;
|
||||
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cryptd_aegis256_aesni_exit_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
|
||||
cryptd_free_aead(*ctx);
|
||||
}
|
||||
|
||||
static struct aead_alg crypto_aegis256_aesni_alg[] = {
|
||||
{
|
||||
.setkey = crypto_aegis256_aesni_setkey,
|
||||
.setauthsize = crypto_aegis256_aesni_setauthsize,
|
||||
.encrypt = crypto_aegis256_aesni_encrypt,
|
||||
.decrypt = crypto_aegis256_aesni_decrypt,
|
||||
.init = crypto_aegis256_aesni_init_tfm,
|
||||
.exit = crypto_aegis256_aesni_exit_tfm,
|
||||
|
||||
.ivsize = AEGIS256_NONCE_SIZE,
|
||||
.maxauthsize = AEGIS256_MAX_AUTH_SIZE,
|
||||
.chunksize = AEGIS256_BLOCK_SIZE,
|
||||
|
||||
.base = {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct aegis_ctx) +
|
||||
__alignof__(struct aegis_ctx),
|
||||
.cra_alignmask = 0,
|
||||
|
||||
.cra_name = "__aegis256",
|
||||
.cra_driver_name = "__aegis256-aesni",
|
||||
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
}, {
|
||||
.setkey = cryptd_aegis256_aesni_setkey,
|
||||
.setauthsize = cryptd_aegis256_aesni_setauthsize,
|
||||
.encrypt = cryptd_aegis256_aesni_encrypt,
|
||||
.decrypt = cryptd_aegis256_aesni_decrypt,
|
||||
.init = cryptd_aegis256_aesni_init_tfm,
|
||||
.exit = cryptd_aegis256_aesni_exit_tfm,
|
||||
|
||||
.ivsize = AEGIS256_NONCE_SIZE,
|
||||
.maxauthsize = AEGIS256_MAX_AUTH_SIZE,
|
||||
.chunksize = AEGIS256_BLOCK_SIZE,
|
||||
|
||||
.base = {
|
||||
.cra_flags = CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct cryptd_aead *),
|
||||
.cra_alignmask = 0,
|
||||
|
||||
.cra_priority = 400,
|
||||
|
||||
.cra_name = "aegis256",
|
||||
.cra_driver_name = "aegis256-aesni",
|
||||
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id aesni_cpu_id[] = {
|
||||
X86_FEATURE_MATCH(X86_FEATURE_AES),
|
||||
X86_FEATURE_MATCH(X86_FEATURE_XMM2),
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
|
||||
|
||||
static int __init crypto_aegis256_aesni_module_init(void)
|
||||
{
|
||||
if (!x86_match_cpu(aesni_cpu_id))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_aeads(crypto_aegis256_aesni_alg,
|
||||
ARRAY_SIZE(crypto_aegis256_aesni_alg));
|
||||
}
|
||||
|
||||
static void __exit crypto_aegis256_aesni_module_exit(void)
|
||||
{
|
||||
crypto_unregister_aeads(crypto_aegis256_aesni_alg,
|
||||
ARRAY_SIZE(crypto_aegis256_aesni_alg));
|
||||
}
|
||||
|
||||
module_init(crypto_aegis256_aesni_module_init);
|
||||
module_exit(crypto_aegis256_aesni_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("AEGIS-256 AEAD algorithm -- AESNI+SSE2 implementation");
|
||||
MODULE_ALIAS_CRYPTO("aegis256");
|
||||
MODULE_ALIAS_CRYPTO("aegis256-aesni");
|
@ -364,5 +364,5 @@ module_exit(ghash_pclmulqdqni_mod_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("GHASH Message Digest Algorithm, "
|
||||
"acclerated by PCLMULQDQ-NI");
|
||||
"accelerated by PCLMULQDQ-NI");
|
||||
MODULE_ALIAS_CRYPTO("ghash");
|
||||
|
621
arch/x86/crypto/morus1280-avx2-asm.S
Normal file
621
arch/x86/crypto/morus1280-avx2-asm.S
Normal file
@ -0,0 +1,621 @@
|
||||
/*
|
||||
* AVX2 implementation of MORUS-1280
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
#define SHUFFLE_MASK(i0, i1, i2, i3) \
|
||||
(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
|
||||
|
||||
#define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
|
||||
#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
|
||||
#define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
|
||||
|
||||
#define STATE0 %ymm0
|
||||
#define STATE0_LOW %xmm0
|
||||
#define STATE1 %ymm1
|
||||
#define STATE2 %ymm2
|
||||
#define STATE3 %ymm3
|
||||
#define STATE4 %ymm4
|
||||
#define KEY %ymm5
|
||||
#define MSG %ymm5
|
||||
#define MSG_LOW %xmm5
|
||||
#define T0 %ymm6
|
||||
#define T0_LOW %xmm6
|
||||
#define T1 %ymm7
|
||||
|
||||
.section .rodata.cst32.morus1280_const, "aM", @progbits, 32
|
||||
.align 32
|
||||
.Lmorus1280_const:
|
||||
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
|
||||
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
|
||||
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
|
||||
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
|
||||
|
||||
.section .rodata.cst32.morus1280_counter, "aM", @progbits, 32
|
||||
.align 32
|
||||
.Lmorus1280_counter:
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
||||
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
||||
.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
|
||||
.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
|
||||
|
||||
.text
|
||||
|
||||
.macro morus1280_round s0, s1, s2, s3, s4, b, w
|
||||
vpand \s1, \s2, T0
|
||||
vpxor T0, \s0, \s0
|
||||
vpxor \s3, \s0, \s0
|
||||
vpsllq $\b, \s0, T0
|
||||
vpsrlq $(64 - \b), \s0, \s0
|
||||
vpxor T0, \s0, \s0
|
||||
vpermq $\w, \s3, \s3
|
||||
.endm
|
||||
|
||||
/*
|
||||
* __morus1280_update: internal ABI
|
||||
* input:
|
||||
* STATE[0-4] - input state
|
||||
* MSG - message block
|
||||
* output:
|
||||
* STATE[0-4] - output state
|
||||
* changed:
|
||||
* T0
|
||||
*/
|
||||
__morus1280_update:
|
||||
morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
|
||||
vpxor MSG, STATE1, STATE1
|
||||
morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
|
||||
vpxor MSG, STATE2, STATE2
|
||||
morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
|
||||
vpxor MSG, STATE3, STATE3
|
||||
morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2
|
||||
vpxor MSG, STATE4, STATE4
|
||||
morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1
|
||||
ret
|
||||
ENDPROC(__morus1280_update)
|
||||
|
||||
/*
|
||||
* __morus1280_update_zero: internal ABI
|
||||
* input:
|
||||
* STATE[0-4] - input state
|
||||
* output:
|
||||
* STATE[0-4] - output state
|
||||
* changed:
|
||||
* T0
|
||||
*/
|
||||
__morus1280_update_zero:
|
||||
morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
|
||||
morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
|
||||
morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
|
||||
morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2
|
||||
morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1
|
||||
ret
|
||||
ENDPROC(__morus1280_update_zero)
|
||||
|
||||
/*
|
||||
* __load_partial: internal ABI
|
||||
* input:
|
||||
* %rsi - src
|
||||
* %rcx - bytes
|
||||
* output:
|
||||
* MSG - message block
|
||||
* changed:
|
||||
* %r8
|
||||
* %r9
|
||||
*/
|
||||
__load_partial:
|
||||
xor %r9, %r9
|
||||
vpxor MSG, MSG, MSG
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1, %r8
|
||||
jz .Lld_partial_1
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1E, %r8
|
||||
add %rsi, %r8
|
||||
mov (%r8), %r9b
|
||||
|
||||
.Lld_partial_1:
|
||||
mov %rcx, %r8
|
||||
and $0x2, %r8
|
||||
jz .Lld_partial_2
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1C, %r8
|
||||
add %rsi, %r8
|
||||
shl $16, %r9
|
||||
mov (%r8), %r9w
|
||||
|
||||
.Lld_partial_2:
|
||||
mov %rcx, %r8
|
||||
and $0x4, %r8
|
||||
jz .Lld_partial_4
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x18, %r8
|
||||
add %rsi, %r8
|
||||
shl $32, %r9
|
||||
mov (%r8), %r8d
|
||||
xor %r8, %r9
|
||||
|
||||
.Lld_partial_4:
|
||||
movq %r9, MSG_LOW
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x8, %r8
|
||||
jz .Lld_partial_8
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x10, %r8
|
||||
add %rsi, %r8
|
||||
pshufd $MASK2, MSG_LOW, MSG_LOW
|
||||
pinsrq $0, (%r8), MSG_LOW
|
||||
|
||||
.Lld_partial_8:
|
||||
mov %rcx, %r8
|
||||
and $0x10, %r8
|
||||
jz .Lld_partial_16
|
||||
|
||||
vpermq $MASK2, MSG, MSG
|
||||
movdqu (%rsi), MSG_LOW
|
||||
|
||||
.Lld_partial_16:
|
||||
ret
|
||||
ENDPROC(__load_partial)
|
||||
|
||||
/*
|
||||
* __store_partial: internal ABI
|
||||
* input:
|
||||
* %rdx - dst
|
||||
* %rcx - bytes
|
||||
* output:
|
||||
* T0 - message block
|
||||
* changed:
|
||||
* %r8
|
||||
* %r9
|
||||
* %r10
|
||||
*/
|
||||
__store_partial:
|
||||
mov %rcx, %r8
|
||||
mov %rdx, %r9
|
||||
|
||||
cmp $16, %r8
|
||||
jl .Lst_partial_16
|
||||
|
||||
movdqu T0_LOW, (%r9)
|
||||
vpermq $MASK2, T0, T0
|
||||
|
||||
sub $16, %r8
|
||||
add $16, %r9
|
||||
|
||||
.Lst_partial_16:
|
||||
movq T0_LOW, %r10
|
||||
|
||||
cmp $8, %r8
|
||||
jl .Lst_partial_8
|
||||
|
||||
mov %r10, (%r9)
|
||||
pextrq $1, T0_LOW, %r10
|
||||
|
||||
sub $8, %r8
|
||||
add $8, %r9
|
||||
|
||||
.Lst_partial_8:
|
||||
cmp $4, %r8
|
||||
jl .Lst_partial_4
|
||||
|
||||
mov %r10d, (%r9)
|
||||
shr $32, %r10
|
||||
|
||||
sub $4, %r8
|
||||
add $4, %r9
|
||||
|
||||
.Lst_partial_4:
|
||||
cmp $2, %r8
|
||||
jl .Lst_partial_2
|
||||
|
||||
mov %r10w, (%r9)
|
||||
shr $16, %r10
|
||||
|
||||
sub $2, %r8
|
||||
add $2, %r9
|
||||
|
||||
.Lst_partial_2:
|
||||
cmp $1, %r8
|
||||
jl .Lst_partial_1
|
||||
|
||||
mov %r10b, (%r9)
|
||||
|
||||
.Lst_partial_1:
|
||||
ret
|
||||
ENDPROC(__store_partial)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_avx2_init(void *state, const void *key,
|
||||
* const void *iv);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_avx2_init)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load IV: */
|
||||
vpxor STATE0, STATE0, STATE0
|
||||
movdqu (%rdx), STATE0_LOW
|
||||
/* load key: */
|
||||
vmovdqu (%rsi), KEY
|
||||
vmovdqa KEY, STATE1
|
||||
/* load all ones: */
|
||||
vpcmpeqd STATE2, STATE2, STATE2
|
||||
/* load all zeros: */
|
||||
vpxor STATE3, STATE3, STATE3
|
||||
/* load the constant: */
|
||||
vmovdqa .Lmorus1280_const, STATE4
|
||||
|
||||
/* update 16 times with zero: */
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
|
||||
/* xor-in the key again after updates: */
|
||||
vpxor KEY, STATE1, STATE1
|
||||
|
||||
/* store the state: */
|
||||
vmovdqu STATE0, (0 * 32)(%rdi)
|
||||
vmovdqu STATE1, (1 * 32)(%rdi)
|
||||
vmovdqu STATE2, (2 * 32)(%rdi)
|
||||
vmovdqu STATE3, (3 * 32)(%rdi)
|
||||
vmovdqu STATE4, (4 * 32)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_avx2_init)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_avx2_ad(void *state, const void *data,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_avx2_ad)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $32, %rdx
|
||||
jb .Lad_out
|
||||
|
||||
/* load the state: */
|
||||
vmovdqu (0 * 32)(%rdi), STATE0
|
||||
vmovdqu (1 * 32)(%rdi), STATE1
|
||||
vmovdqu (2 * 32)(%rdi), STATE2
|
||||
vmovdqu (3 * 32)(%rdi), STATE3
|
||||
vmovdqu (4 * 32)(%rdi), STATE4
|
||||
|
||||
mov %rsi, %r8
|
||||
and $0x1F, %r8
|
||||
jnz .Lad_u_loop
|
||||
|
||||
.align 4
|
||||
.Lad_a_loop:
|
||||
vmovdqa (%rsi), MSG
|
||||
call __morus1280_update
|
||||
sub $32, %rdx
|
||||
add $32, %rsi
|
||||
cmp $32, %rdx
|
||||
jge .Lad_a_loop
|
||||
|
||||
jmp .Lad_cont
|
||||
.align 4
|
||||
.Lad_u_loop:
|
||||
vmovdqu (%rsi), MSG
|
||||
call __morus1280_update
|
||||
sub $32, %rdx
|
||||
add $32, %rsi
|
||||
cmp $32, %rdx
|
||||
jge .Lad_u_loop
|
||||
|
||||
.Lad_cont:
|
||||
/* store the state: */
|
||||
vmovdqu STATE0, (0 * 32)(%rdi)
|
||||
vmovdqu STATE1, (1 * 32)(%rdi)
|
||||
vmovdqu STATE2, (2 * 32)(%rdi)
|
||||
vmovdqu STATE3, (3 * 32)(%rdi)
|
||||
vmovdqu STATE4, (4 * 32)(%rdi)
|
||||
|
||||
.Lad_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_avx2_ad)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_avx2_enc(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_avx2_enc)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $32, %rcx
|
||||
jb .Lenc_out
|
||||
|
||||
/* load the state: */
|
||||
vmovdqu (0 * 32)(%rdi), STATE0
|
||||
vmovdqu (1 * 32)(%rdi), STATE1
|
||||
vmovdqu (2 * 32)(%rdi), STATE2
|
||||
vmovdqu (3 * 32)(%rdi), STATE3
|
||||
vmovdqu (4 * 32)(%rdi), STATE4
|
||||
|
||||
mov %rsi, %r8
|
||||
or %rdx, %r8
|
||||
and $0x1F, %r8
|
||||
jnz .Lenc_u_loop
|
||||
|
||||
.align 4
|
||||
.Lenc_a_loop:
|
||||
vmovdqa (%rsi), MSG
|
||||
vmovdqa MSG, T0
|
||||
vpxor STATE0, T0, T0
|
||||
vpermq $MASK3, STATE1, T1
|
||||
vpxor T1, T0, T0
|
||||
vpand STATE2, STATE3, T1
|
||||
vpxor T1, T0, T0
|
||||
vmovdqa T0, (%rdx)
|
||||
|
||||
call __morus1280_update
|
||||
sub $32, %rcx
|
||||
add $32, %rsi
|
||||
add $32, %rdx
|
||||
cmp $32, %rcx
|
||||
jge .Lenc_a_loop
|
||||
|
||||
jmp .Lenc_cont
|
||||
.align 4
|
||||
.Lenc_u_loop:
|
||||
vmovdqu (%rsi), MSG
|
||||
vmovdqa MSG, T0
|
||||
vpxor STATE0, T0, T0
|
||||
vpermq $MASK3, STATE1, T1
|
||||
vpxor T1, T0, T0
|
||||
vpand STATE2, STATE3, T1
|
||||
vpxor T1, T0, T0
|
||||
vmovdqu T0, (%rdx)
|
||||
|
||||
call __morus1280_update
|
||||
sub $32, %rcx
|
||||
add $32, %rsi
|
||||
add $32, %rdx
|
||||
cmp $32, %rcx
|
||||
jge .Lenc_u_loop
|
||||
|
||||
.Lenc_cont:
|
||||
/* store the state: */
|
||||
vmovdqu STATE0, (0 * 32)(%rdi)
|
||||
vmovdqu STATE1, (1 * 32)(%rdi)
|
||||
vmovdqu STATE2, (2 * 32)(%rdi)
|
||||
vmovdqu STATE3, (3 * 32)(%rdi)
|
||||
vmovdqu STATE4, (4 * 32)(%rdi)
|
||||
|
||||
.Lenc_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_avx2_enc)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_avx2_enc_tail(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_avx2_enc_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
vmovdqu (0 * 32)(%rdi), STATE0
|
||||
vmovdqu (1 * 32)(%rdi), STATE1
|
||||
vmovdqu (2 * 32)(%rdi), STATE2
|
||||
vmovdqu (3 * 32)(%rdi), STATE3
|
||||
vmovdqu (4 * 32)(%rdi), STATE4
|
||||
|
||||
/* encrypt message: */
|
||||
call __load_partial
|
||||
|
||||
vmovdqa MSG, T0
|
||||
vpxor STATE0, T0, T0
|
||||
vpermq $MASK3, STATE1, T1
|
||||
vpxor T1, T0, T0
|
||||
vpand STATE2, STATE3, T1
|
||||
vpxor T1, T0, T0
|
||||
|
||||
call __store_partial
|
||||
|
||||
call __morus1280_update
|
||||
|
||||
/* store the state: */
|
||||
vmovdqu STATE0, (0 * 32)(%rdi)
|
||||
vmovdqu STATE1, (1 * 32)(%rdi)
|
||||
vmovdqu STATE2, (2 * 32)(%rdi)
|
||||
vmovdqu STATE3, (3 * 32)(%rdi)
|
||||
vmovdqu STATE4, (4 * 32)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ENDPROC(crypto_morus1280_avx2_enc_tail)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_avx2_dec(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_avx2_dec)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $32, %rcx
|
||||
jb .Ldec_out
|
||||
|
||||
/* load the state: */
|
||||
vmovdqu (0 * 32)(%rdi), STATE0
|
||||
vmovdqu (1 * 32)(%rdi), STATE1
|
||||
vmovdqu (2 * 32)(%rdi), STATE2
|
||||
vmovdqu (3 * 32)(%rdi), STATE3
|
||||
vmovdqu (4 * 32)(%rdi), STATE4
|
||||
|
||||
mov %rsi, %r8
|
||||
or %rdx, %r8
|
||||
and $0x1F, %r8
|
||||
jnz .Ldec_u_loop
|
||||
|
||||
.align 4
|
||||
.Ldec_a_loop:
|
||||
vmovdqa (%rsi), MSG
|
||||
vpxor STATE0, MSG, MSG
|
||||
vpermq $MASK3, STATE1, T0
|
||||
vpxor T0, MSG, MSG
|
||||
vpand STATE2, STATE3, T0
|
||||
vpxor T0, MSG, MSG
|
||||
vmovdqa MSG, (%rdx)
|
||||
|
||||
call __morus1280_update
|
||||
sub $32, %rcx
|
||||
add $32, %rsi
|
||||
add $32, %rdx
|
||||
cmp $32, %rcx
|
||||
jge .Ldec_a_loop
|
||||
|
||||
jmp .Ldec_cont
|
||||
.align 4
|
||||
.Ldec_u_loop:
|
||||
vmovdqu (%rsi), MSG
|
||||
vpxor STATE0, MSG, MSG
|
||||
vpermq $MASK3, STATE1, T0
|
||||
vpxor T0, MSG, MSG
|
||||
vpand STATE2, STATE3, T0
|
||||
vpxor T0, MSG, MSG
|
||||
vmovdqu MSG, (%rdx)
|
||||
|
||||
call __morus1280_update
|
||||
sub $32, %rcx
|
||||
add $32, %rsi
|
||||
add $32, %rdx
|
||||
cmp $32, %rcx
|
||||
jge .Ldec_u_loop
|
||||
|
||||
.Ldec_cont:
|
||||
/* store the state: */
|
||||
vmovdqu STATE0, (0 * 32)(%rdi)
|
||||
vmovdqu STATE1, (1 * 32)(%rdi)
|
||||
vmovdqu STATE2, (2 * 32)(%rdi)
|
||||
vmovdqu STATE3, (3 * 32)(%rdi)
|
||||
vmovdqu STATE4, (4 * 32)(%rdi)
|
||||
|
||||
.Ldec_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_avx2_dec)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_avx2_dec_tail(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_avx2_dec_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
vmovdqu (0 * 32)(%rdi), STATE0
|
||||
vmovdqu (1 * 32)(%rdi), STATE1
|
||||
vmovdqu (2 * 32)(%rdi), STATE2
|
||||
vmovdqu (3 * 32)(%rdi), STATE3
|
||||
vmovdqu (4 * 32)(%rdi), STATE4
|
||||
|
||||
/* decrypt message: */
|
||||
call __load_partial
|
||||
|
||||
vpxor STATE0, MSG, MSG
|
||||
vpermq $MASK3, STATE1, T0
|
||||
vpxor T0, MSG, MSG
|
||||
vpand STATE2, STATE3, T0
|
||||
vpxor T0, MSG, MSG
|
||||
vmovdqa MSG, T0
|
||||
|
||||
call __store_partial
|
||||
|
||||
/* mask with byte count: */
|
||||
movq %rcx, T0_LOW
|
||||
vpbroadcastb T0_LOW, T0
|
||||
vmovdqa .Lmorus1280_counter, T1
|
||||
vpcmpgtb T1, T0, T0
|
||||
vpand T0, MSG, MSG
|
||||
|
||||
call __morus1280_update
|
||||
|
||||
/* store the state: */
|
||||
vmovdqu STATE0, (0 * 32)(%rdi)
|
||||
vmovdqu STATE1, (1 * 32)(%rdi)
|
||||
vmovdqu STATE2, (2 * 32)(%rdi)
|
||||
vmovdqu STATE3, (3 * 32)(%rdi)
|
||||
vmovdqu STATE4, (4 * 32)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_avx2_dec_tail)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_avx2_final(void *state, void *tag_xor,
|
||||
* u64 assoclen, u64 cryptlen);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_avx2_final)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
vmovdqu (0 * 32)(%rdi), STATE0
|
||||
vmovdqu (1 * 32)(%rdi), STATE1
|
||||
vmovdqu (2 * 32)(%rdi), STATE2
|
||||
vmovdqu (3 * 32)(%rdi), STATE3
|
||||
vmovdqu (4 * 32)(%rdi), STATE4
|
||||
|
||||
/* xor state[0] into state[4]: */
|
||||
vpxor STATE0, STATE4, STATE4
|
||||
|
||||
/* prepare length block: */
|
||||
vpxor MSG, MSG, MSG
|
||||
vpinsrq $0, %rdx, MSG_LOW, MSG_LOW
|
||||
vpinsrq $1, %rcx, MSG_LOW, MSG_LOW
|
||||
vpsllq $3, MSG, MSG /* multiply by 8 (to get bit count) */
|
||||
|
||||
/* update state: */
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
|
||||
/* xor tag: */
|
||||
vmovdqu (%rsi), MSG
|
||||
|
||||
vpxor STATE0, MSG, MSG
|
||||
vpermq $MASK3, STATE1, T0
|
||||
vpxor T0, MSG, MSG
|
||||
vpand STATE2, STATE3, T0
|
||||
vpxor T0, MSG, MSG
|
||||
vmovdqu MSG, (%rsi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_avx2_final)
|
68
arch/x86/crypto/morus1280-avx2-glue.c
Normal file
68
arch/x86/crypto/morus1280-avx2-glue.c
Normal file
@ -0,0 +1,68 @@
|
||||
/*
|
||||
* The MORUS-1280 Authenticated-Encryption Algorithm
|
||||
* Glue for AVX2 implementation
|
||||
*
|
||||
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/morus1280_glue.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
asmlinkage void crypto_morus1280_avx2_init(void *state, const void *key,
|
||||
const void *iv);
|
||||
asmlinkage void crypto_morus1280_avx2_ad(void *state, const void *data,
|
||||
unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus1280_avx2_enc(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
asmlinkage void crypto_morus1280_avx2_dec(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus1280_avx2_enc_tail(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
asmlinkage void crypto_morus1280_avx2_dec_tail(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor,
|
||||
u64 assoclen, u64 cryptlen);
|
||||
|
||||
MORUS1280_DECLARE_ALGS(avx2, "morus1280-avx2", 400);
|
||||
|
||||
static const struct x86_cpu_id avx2_cpu_id[] = {
|
||||
X86_FEATURE_MATCH(X86_FEATURE_AVX2),
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, avx2_cpu_id);
|
||||
|
||||
static int __init crypto_morus1280_avx2_module_init(void)
|
||||
{
|
||||
if (!x86_match_cpu(avx2_cpu_id))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_aeads(crypto_morus1280_avx2_algs,
|
||||
ARRAY_SIZE(crypto_morus1280_avx2_algs));
|
||||
}
|
||||
|
||||
static void __exit crypto_morus1280_avx2_module_exit(void)
|
||||
{
|
||||
crypto_unregister_aeads(crypto_morus1280_avx2_algs,
|
||||
ARRAY_SIZE(crypto_morus1280_avx2_algs));
|
||||
}
|
||||
|
||||
module_init(crypto_morus1280_avx2_module_init);
|
||||
module_exit(crypto_morus1280_avx2_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- AVX2 implementation");
|
||||
MODULE_ALIAS_CRYPTO("morus1280");
|
||||
MODULE_ALIAS_CRYPTO("morus1280-avx2");
|
895
arch/x86/crypto/morus1280-sse2-asm.S
Normal file
895
arch/x86/crypto/morus1280-sse2-asm.S
Normal file
@ -0,0 +1,895 @@
|
||||
/*
|
||||
* SSE2 implementation of MORUS-1280
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
#define SHUFFLE_MASK(i0, i1, i2, i3) \
|
||||
(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
|
||||
|
||||
#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
|
||||
|
||||
#define STATE0_LO %xmm0
|
||||
#define STATE0_HI %xmm1
|
||||
#define STATE1_LO %xmm2
|
||||
#define STATE1_HI %xmm3
|
||||
#define STATE2_LO %xmm4
|
||||
#define STATE2_HI %xmm5
|
||||
#define STATE3_LO %xmm6
|
||||
#define STATE3_HI %xmm7
|
||||
#define STATE4_LO %xmm8
|
||||
#define STATE4_HI %xmm9
|
||||
#define KEY_LO %xmm10
|
||||
#define KEY_HI %xmm11
|
||||
#define MSG_LO %xmm10
|
||||
#define MSG_HI %xmm11
|
||||
#define T0_LO %xmm12
|
||||
#define T0_HI %xmm13
|
||||
#define T1_LO %xmm14
|
||||
#define T1_HI %xmm15
|
||||
|
||||
.section .rodata.cst16.morus640_const, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lmorus640_const_0:
|
||||
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
|
||||
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
|
||||
.Lmorus640_const_1:
|
||||
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
|
||||
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
|
||||
|
||||
.section .rodata.cst16.morus640_counter, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lmorus640_counter_0:
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
||||
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
||||
.Lmorus640_counter_1:
|
||||
.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
|
||||
.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
|
||||
|
||||
.text
|
||||
|
||||
.macro rol1 hi, lo
|
||||
/*
|
||||
* HI_1 | HI_0 || LO_1 | LO_0
|
||||
* ==>
|
||||
* HI_0 | HI_1 || LO_1 | LO_0
|
||||
* ==>
|
||||
* HI_0 | LO_1 || LO_0 | HI_1
|
||||
*/
|
||||
pshufd $MASK2, \hi, \hi
|
||||
movdqa \hi, T0_LO
|
||||
punpcklqdq \lo, T0_LO
|
||||
punpckhqdq \hi, \lo
|
||||
movdqa \lo, \hi
|
||||
movdqa T0_LO, \lo
|
||||
.endm
|
||||
|
||||
.macro rol2 hi, lo
|
||||
movdqa \lo, T0_LO
|
||||
movdqa \hi, \lo
|
||||
movdqa T0_LO, \hi
|
||||
.endm
|
||||
|
||||
.macro rol3 hi, lo
|
||||
/*
|
||||
* HI_1 | HI_0 || LO_1 | LO_0
|
||||
* ==>
|
||||
* HI_0 | HI_1 || LO_1 | LO_0
|
||||
* ==>
|
||||
* LO_0 | HI_1 || HI_0 | LO_1
|
||||
*/
|
||||
pshufd $MASK2, \hi, \hi
|
||||
movdqa \lo, T0_LO
|
||||
punpckhqdq \hi, T0_LO
|
||||
punpcklqdq \lo, \hi
|
||||
movdqa T0_LO, \lo
|
||||
.endm
|
||||
|
||||
.macro morus1280_round s0_l, s0_h, s1_l, s1_h, s2_l, s2_h, s3_l, s3_h, s4_l, s4_h, b, w
|
||||
movdqa \s1_l, T0_LO
|
||||
pand \s2_l, T0_LO
|
||||
pxor T0_LO, \s0_l
|
||||
|
||||
movdqa \s1_h, T0_LO
|
||||
pand \s2_h, T0_LO
|
||||
pxor T0_LO, \s0_h
|
||||
|
||||
pxor \s3_l, \s0_l
|
||||
pxor \s3_h, \s0_h
|
||||
|
||||
movdqa \s0_l, T0_LO
|
||||
psllq $\b, T0_LO
|
||||
psrlq $(64 - \b), \s0_l
|
||||
pxor T0_LO, \s0_l
|
||||
|
||||
movdqa \s0_h, T0_LO
|
||||
psllq $\b, T0_LO
|
||||
psrlq $(64 - \b), \s0_h
|
||||
pxor T0_LO, \s0_h
|
||||
|
||||
\w \s3_h, \s3_l
|
||||
.endm
|
||||
|
||||
/*
|
||||
* __morus1280_update: internal ABI
|
||||
* input:
|
||||
* STATE[0-4] - input state
|
||||
* MSG - message block
|
||||
* output:
|
||||
* STATE[0-4] - output state
|
||||
* changed:
|
||||
* T0
|
||||
*/
|
||||
__morus1280_update:
|
||||
morus1280_round \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
13, rol1
|
||||
pxor MSG_LO, STATE1_LO
|
||||
pxor MSG_HI, STATE1_HI
|
||||
morus1280_round \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
46, rol2
|
||||
pxor MSG_LO, STATE2_LO
|
||||
pxor MSG_HI, STATE2_HI
|
||||
morus1280_round \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
38, rol3
|
||||
pxor MSG_LO, STATE3_LO
|
||||
pxor MSG_HI, STATE3_HI
|
||||
morus1280_round \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
7, rol2
|
||||
pxor MSG_LO, STATE4_LO
|
||||
pxor MSG_HI, STATE4_HI
|
||||
morus1280_round \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
4, rol1
|
||||
ret
|
||||
ENDPROC(__morus1280_update)
|
||||
|
||||
/*
|
||||
* __morus1280_update_zero: internal ABI
|
||||
* input:
|
||||
* STATE[0-4] - input state
|
||||
* output:
|
||||
* STATE[0-4] - output state
|
||||
* changed:
|
||||
* T0
|
||||
*/
|
||||
__morus1280_update_zero:
|
||||
morus1280_round \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
13, rol1
|
||||
morus1280_round \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
46, rol2
|
||||
morus1280_round \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
38, rol3
|
||||
morus1280_round \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
7, rol2
|
||||
morus1280_round \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
4, rol1
|
||||
ret
|
||||
ENDPROC(__morus1280_update_zero)
|
||||
|
||||
/*
|
||||
* __load_partial: internal ABI
|
||||
* input:
|
||||
* %rsi - src
|
||||
* %rcx - bytes
|
||||
* output:
|
||||
* MSG - message block
|
||||
* changed:
|
||||
* %r8
|
||||
* %r9
|
||||
*/
|
||||
__load_partial:
|
||||
xor %r9, %r9
|
||||
pxor MSG_LO, MSG_LO
|
||||
pxor MSG_HI, MSG_HI
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1, %r8
|
||||
jz .Lld_partial_1
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1E, %r8
|
||||
add %rsi, %r8
|
||||
mov (%r8), %r9b
|
||||
|
||||
.Lld_partial_1:
|
||||
mov %rcx, %r8
|
||||
and $0x2, %r8
|
||||
jz .Lld_partial_2
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1C, %r8
|
||||
add %rsi, %r8
|
||||
shl $16, %r9
|
||||
mov (%r8), %r9w
|
||||
|
||||
.Lld_partial_2:
|
||||
mov %rcx, %r8
|
||||
and $0x4, %r8
|
||||
jz .Lld_partial_4
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x18, %r8
|
||||
add %rsi, %r8
|
||||
shl $32, %r9
|
||||
mov (%r8), %r8d
|
||||
xor %r8, %r9
|
||||
|
||||
.Lld_partial_4:
|
||||
movq %r9, MSG_LO
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x8, %r8
|
||||
jz .Lld_partial_8
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x10, %r8
|
||||
add %rsi, %r8
|
||||
pslldq $8, MSG_LO
|
||||
movq (%r8), T0_LO
|
||||
pxor T0_LO, MSG_LO
|
||||
|
||||
.Lld_partial_8:
|
||||
mov %rcx, %r8
|
||||
and $0x10, %r8
|
||||
jz .Lld_partial_16
|
||||
|
||||
movdqa MSG_LO, MSG_HI
|
||||
movdqu (%rsi), MSG_LO
|
||||
|
||||
.Lld_partial_16:
|
||||
ret
|
||||
ENDPROC(__load_partial)
|
||||
|
||||
/*
|
||||
* __store_partial: internal ABI
|
||||
* input:
|
||||
* %rdx - dst
|
||||
* %rcx - bytes
|
||||
* output:
|
||||
* T0 - message block
|
||||
* changed:
|
||||
* %r8
|
||||
* %r9
|
||||
* %r10
|
||||
*/
|
||||
__store_partial:
|
||||
mov %rcx, %r8
|
||||
mov %rdx, %r9
|
||||
|
||||
cmp $16, %r8
|
||||
jl .Lst_partial_16
|
||||
|
||||
movdqu T0_LO, (%r9)
|
||||
movdqa T0_HI, T0_LO
|
||||
|
||||
sub $16, %r8
|
||||
add $16, %r9
|
||||
|
||||
.Lst_partial_16:
|
||||
movq T0_LO, %r10
|
||||
|
||||
cmp $8, %r8
|
||||
jl .Lst_partial_8
|
||||
|
||||
mov %r10, (%r9)
|
||||
psrldq $8, T0_LO
|
||||
movq T0_LO, %r10
|
||||
|
||||
sub $8, %r8
|
||||
add $8, %r9
|
||||
|
||||
.Lst_partial_8:
|
||||
cmp $4, %r8
|
||||
jl .Lst_partial_4
|
||||
|
||||
mov %r10d, (%r9)
|
||||
shr $32, %r10
|
||||
|
||||
sub $4, %r8
|
||||
add $4, %r9
|
||||
|
||||
.Lst_partial_4:
|
||||
cmp $2, %r8
|
||||
jl .Lst_partial_2
|
||||
|
||||
mov %r10w, (%r9)
|
||||
shr $16, %r10
|
||||
|
||||
sub $2, %r8
|
||||
add $2, %r9
|
||||
|
||||
.Lst_partial_2:
|
||||
cmp $1, %r8
|
||||
jl .Lst_partial_1
|
||||
|
||||
mov %r10b, (%r9)
|
||||
|
||||
.Lst_partial_1:
|
||||
ret
|
||||
ENDPROC(__store_partial)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_sse2_init(void *state, const void *key,
|
||||
* const void *iv);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_sse2_init)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load IV: */
|
||||
pxor STATE0_HI, STATE0_HI
|
||||
movdqu (%rdx), STATE0_LO
|
||||
/* load key: */
|
||||
movdqu 0(%rsi), KEY_LO
|
||||
movdqu 16(%rsi), KEY_HI
|
||||
movdqa KEY_LO, STATE1_LO
|
||||
movdqa KEY_HI, STATE1_HI
|
||||
/* load all ones: */
|
||||
pcmpeqd STATE2_LO, STATE2_LO
|
||||
pcmpeqd STATE2_HI, STATE2_HI
|
||||
/* load all zeros: */
|
||||
pxor STATE3_LO, STATE3_LO
|
||||
pxor STATE3_HI, STATE3_HI
|
||||
/* load the constant: */
|
||||
movdqa .Lmorus640_const_0, STATE4_LO
|
||||
movdqa .Lmorus640_const_1, STATE4_HI
|
||||
|
||||
/* update 16 times with zero: */
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
|
||||
/* xor-in the key again after updates: */
|
||||
pxor KEY_LO, STATE1_LO
|
||||
pxor KEY_HI, STATE1_HI
|
||||
|
||||
/* store the state: */
|
||||
movdqu STATE0_LO, (0 * 16)(%rdi)
|
||||
movdqu STATE0_HI, (1 * 16)(%rdi)
|
||||
movdqu STATE1_LO, (2 * 16)(%rdi)
|
||||
movdqu STATE1_HI, (3 * 16)(%rdi)
|
||||
movdqu STATE2_LO, (4 * 16)(%rdi)
|
||||
movdqu STATE2_HI, (5 * 16)(%rdi)
|
||||
movdqu STATE3_LO, (6 * 16)(%rdi)
|
||||
movdqu STATE3_HI, (7 * 16)(%rdi)
|
||||
movdqu STATE4_LO, (8 * 16)(%rdi)
|
||||
movdqu STATE4_HI, (9 * 16)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_sse2_init)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_sse2_ad(void *state, const void *data,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_sse2_ad)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $32, %rdx
|
||||
jb .Lad_out
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0_LO
|
||||
movdqu (1 * 16)(%rdi), STATE0_HI
|
||||
movdqu (2 * 16)(%rdi), STATE1_LO
|
||||
movdqu (3 * 16)(%rdi), STATE1_HI
|
||||
movdqu (4 * 16)(%rdi), STATE2_LO
|
||||
movdqu (5 * 16)(%rdi), STATE2_HI
|
||||
movdqu (6 * 16)(%rdi), STATE3_LO
|
||||
movdqu (7 * 16)(%rdi), STATE3_HI
|
||||
movdqu (8 * 16)(%rdi), STATE4_LO
|
||||
movdqu (9 * 16)(%rdi), STATE4_HI
|
||||
|
||||
mov %rsi, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Lad_u_loop
|
||||
|
||||
.align 4
|
||||
.Lad_a_loop:
|
||||
movdqa 0(%rsi), MSG_LO
|
||||
movdqa 16(%rsi), MSG_HI
|
||||
call __morus1280_update
|
||||
sub $32, %rdx
|
||||
add $32, %rsi
|
||||
cmp $32, %rdx
|
||||
jge .Lad_a_loop
|
||||
|
||||
jmp .Lad_cont
|
||||
.align 4
|
||||
.Lad_u_loop:
|
||||
movdqu 0(%rsi), MSG_LO
|
||||
movdqu 16(%rsi), MSG_HI
|
||||
call __morus1280_update
|
||||
sub $32, %rdx
|
||||
add $32, %rsi
|
||||
cmp $32, %rdx
|
||||
jge .Lad_u_loop
|
||||
|
||||
.Lad_cont:
|
||||
/* store the state: */
|
||||
movdqu STATE0_LO, (0 * 16)(%rdi)
|
||||
movdqu STATE0_HI, (1 * 16)(%rdi)
|
||||
movdqu STATE1_LO, (2 * 16)(%rdi)
|
||||
movdqu STATE1_HI, (3 * 16)(%rdi)
|
||||
movdqu STATE2_LO, (4 * 16)(%rdi)
|
||||
movdqu STATE2_HI, (5 * 16)(%rdi)
|
||||
movdqu STATE3_LO, (6 * 16)(%rdi)
|
||||
movdqu STATE3_HI, (7 * 16)(%rdi)
|
||||
movdqu STATE4_LO, (8 * 16)(%rdi)
|
||||
movdqu STATE4_HI, (9 * 16)(%rdi)
|
||||
|
||||
.Lad_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_sse2_ad)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_sse2_enc(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_sse2_enc)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $32, %rcx
|
||||
jb .Lenc_out
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0_LO
|
||||
movdqu (1 * 16)(%rdi), STATE0_HI
|
||||
movdqu (2 * 16)(%rdi), STATE1_LO
|
||||
movdqu (3 * 16)(%rdi), STATE1_HI
|
||||
movdqu (4 * 16)(%rdi), STATE2_LO
|
||||
movdqu (5 * 16)(%rdi), STATE2_HI
|
||||
movdqu (6 * 16)(%rdi), STATE3_LO
|
||||
movdqu (7 * 16)(%rdi), STATE3_HI
|
||||
movdqu (8 * 16)(%rdi), STATE4_LO
|
||||
movdqu (9 * 16)(%rdi), STATE4_HI
|
||||
|
||||
mov %rsi, %r8
|
||||
or %rdx, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Lenc_u_loop
|
||||
|
||||
.align 4
|
||||
.Lenc_a_loop:
|
||||
movdqa 0(%rsi), MSG_LO
|
||||
movdqa 16(%rsi), MSG_HI
|
||||
movdqa STATE1_LO, T1_LO
|
||||
movdqa STATE1_HI, T1_HI
|
||||
rol3 T1_HI, T1_LO
|
||||
movdqa MSG_LO, T0_LO
|
||||
movdqa MSG_HI, T0_HI
|
||||
pxor T1_LO, T0_LO
|
||||
pxor T1_HI, T0_HI
|
||||
pxor STATE0_LO, T0_LO
|
||||
pxor STATE0_HI, T0_HI
|
||||
movdqa STATE2_LO, T1_LO
|
||||
movdqa STATE2_HI, T1_HI
|
||||
pand STATE3_LO, T1_LO
|
||||
pand STATE3_HI, T1_HI
|
||||
pxor T1_LO, T0_LO
|
||||
pxor T1_HI, T0_HI
|
||||
movdqa T0_LO, 0(%rdx)
|
||||
movdqa T0_HI, 16(%rdx)
|
||||
|
||||
call __morus1280_update
|
||||
sub $32, %rcx
|
||||
add $32, %rsi
|
||||
add $32, %rdx
|
||||
cmp $32, %rcx
|
||||
jge .Lenc_a_loop
|
||||
|
||||
jmp .Lenc_cont
|
||||
.align 4
|
||||
.Lenc_u_loop:
|
||||
movdqu 0(%rsi), MSG_LO
|
||||
movdqu 16(%rsi), MSG_HI
|
||||
movdqa STATE1_LO, T1_LO
|
||||
movdqa STATE1_HI, T1_HI
|
||||
rol3 T1_HI, T1_LO
|
||||
movdqa MSG_LO, T0_LO
|
||||
movdqa MSG_HI, T0_HI
|
||||
pxor T1_LO, T0_LO
|
||||
pxor T1_HI, T0_HI
|
||||
pxor STATE0_LO, T0_LO
|
||||
pxor STATE0_HI, T0_HI
|
||||
movdqa STATE2_LO, T1_LO
|
||||
movdqa STATE2_HI, T1_HI
|
||||
pand STATE3_LO, T1_LO
|
||||
pand STATE3_HI, T1_HI
|
||||
pxor T1_LO, T0_LO
|
||||
pxor T1_HI, T0_HI
|
||||
movdqu T0_LO, 0(%rdx)
|
||||
movdqu T0_HI, 16(%rdx)
|
||||
|
||||
call __morus1280_update
|
||||
sub $32, %rcx
|
||||
add $32, %rsi
|
||||
add $32, %rdx
|
||||
cmp $32, %rcx
|
||||
jge .Lenc_u_loop
|
||||
|
||||
.Lenc_cont:
|
||||
/* store the state: */
|
||||
movdqu STATE0_LO, (0 * 16)(%rdi)
|
||||
movdqu STATE0_HI, (1 * 16)(%rdi)
|
||||
movdqu STATE1_LO, (2 * 16)(%rdi)
|
||||
movdqu STATE1_HI, (3 * 16)(%rdi)
|
||||
movdqu STATE2_LO, (4 * 16)(%rdi)
|
||||
movdqu STATE2_HI, (5 * 16)(%rdi)
|
||||
movdqu STATE3_LO, (6 * 16)(%rdi)
|
||||
movdqu STATE3_HI, (7 * 16)(%rdi)
|
||||
movdqu STATE4_LO, (8 * 16)(%rdi)
|
||||
movdqu STATE4_HI, (9 * 16)(%rdi)
|
||||
|
||||
.Lenc_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_sse2_enc)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_sse2_enc_tail(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_sse2_enc_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0_LO
|
||||
movdqu (1 * 16)(%rdi), STATE0_HI
|
||||
movdqu (2 * 16)(%rdi), STATE1_LO
|
||||
movdqu (3 * 16)(%rdi), STATE1_HI
|
||||
movdqu (4 * 16)(%rdi), STATE2_LO
|
||||
movdqu (5 * 16)(%rdi), STATE2_HI
|
||||
movdqu (6 * 16)(%rdi), STATE3_LO
|
||||
movdqu (7 * 16)(%rdi), STATE3_HI
|
||||
movdqu (8 * 16)(%rdi), STATE4_LO
|
||||
movdqu (9 * 16)(%rdi), STATE4_HI
|
||||
|
||||
/* encrypt message: */
|
||||
call __load_partial
|
||||
|
||||
movdqa STATE1_LO, T1_LO
|
||||
movdqa STATE1_HI, T1_HI
|
||||
rol3 T1_HI, T1_LO
|
||||
movdqa MSG_LO, T0_LO
|
||||
movdqa MSG_HI, T0_HI
|
||||
pxor T1_LO, T0_LO
|
||||
pxor T1_HI, T0_HI
|
||||
pxor STATE0_LO, T0_LO
|
||||
pxor STATE0_HI, T0_HI
|
||||
movdqa STATE2_LO, T1_LO
|
||||
movdqa STATE2_HI, T1_HI
|
||||
pand STATE3_LO, T1_LO
|
||||
pand STATE3_HI, T1_HI
|
||||
pxor T1_LO, T0_LO
|
||||
pxor T1_HI, T0_HI
|
||||
|
||||
call __store_partial
|
||||
|
||||
call __morus1280_update
|
||||
|
||||
/* store the state: */
|
||||
movdqu STATE0_LO, (0 * 16)(%rdi)
|
||||
movdqu STATE0_HI, (1 * 16)(%rdi)
|
||||
movdqu STATE1_LO, (2 * 16)(%rdi)
|
||||
movdqu STATE1_HI, (3 * 16)(%rdi)
|
||||
movdqu STATE2_LO, (4 * 16)(%rdi)
|
||||
movdqu STATE2_HI, (5 * 16)(%rdi)
|
||||
movdqu STATE3_LO, (6 * 16)(%rdi)
|
||||
movdqu STATE3_HI, (7 * 16)(%rdi)
|
||||
movdqu STATE4_LO, (8 * 16)(%rdi)
|
||||
movdqu STATE4_HI, (9 * 16)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ENDPROC(crypto_morus1280_sse2_enc_tail)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_sse2_dec(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_sse2_dec)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $32, %rcx
|
||||
jb .Ldec_out
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0_LO
|
||||
movdqu (1 * 16)(%rdi), STATE0_HI
|
||||
movdqu (2 * 16)(%rdi), STATE1_LO
|
||||
movdqu (3 * 16)(%rdi), STATE1_HI
|
||||
movdqu (4 * 16)(%rdi), STATE2_LO
|
||||
movdqu (5 * 16)(%rdi), STATE2_HI
|
||||
movdqu (6 * 16)(%rdi), STATE3_LO
|
||||
movdqu (7 * 16)(%rdi), STATE3_HI
|
||||
movdqu (8 * 16)(%rdi), STATE4_LO
|
||||
movdqu (9 * 16)(%rdi), STATE4_HI
|
||||
|
||||
mov %rsi, %r8
|
||||
or %rdx, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Ldec_u_loop
|
||||
|
||||
.align 4
|
||||
.Ldec_a_loop:
|
||||
movdqa 0(%rsi), MSG_LO
|
||||
movdqa 16(%rsi), MSG_HI
|
||||
pxor STATE0_LO, MSG_LO
|
||||
pxor STATE0_HI, MSG_HI
|
||||
movdqa STATE1_LO, T1_LO
|
||||
movdqa STATE1_HI, T1_HI
|
||||
rol3 T1_HI, T1_LO
|
||||
pxor T1_LO, MSG_LO
|
||||
pxor T1_HI, MSG_HI
|
||||
movdqa STATE2_LO, T1_LO
|
||||
movdqa STATE2_HI, T1_HI
|
||||
pand STATE3_LO, T1_LO
|
||||
pand STATE3_HI, T1_HI
|
||||
pxor T1_LO, MSG_LO
|
||||
pxor T1_HI, MSG_HI
|
||||
movdqa MSG_LO, 0(%rdx)
|
||||
movdqa MSG_HI, 16(%rdx)
|
||||
|
||||
call __morus1280_update
|
||||
sub $32, %rcx
|
||||
add $32, %rsi
|
||||
add $32, %rdx
|
||||
cmp $32, %rcx
|
||||
jge .Ldec_a_loop
|
||||
|
||||
jmp .Ldec_cont
|
||||
.align 4
|
||||
.Ldec_u_loop:
|
||||
movdqu 0(%rsi), MSG_LO
|
||||
movdqu 16(%rsi), MSG_HI
|
||||
pxor STATE0_LO, MSG_LO
|
||||
pxor STATE0_HI, MSG_HI
|
||||
movdqa STATE1_LO, T1_LO
|
||||
movdqa STATE1_HI, T1_HI
|
||||
rol3 T1_HI, T1_LO
|
||||
pxor T1_LO, MSG_LO
|
||||
pxor T1_HI, MSG_HI
|
||||
movdqa STATE2_LO, T1_LO
|
||||
movdqa STATE2_HI, T1_HI
|
||||
pand STATE3_LO, T1_LO
|
||||
pand STATE3_HI, T1_HI
|
||||
pxor T1_LO, MSG_LO
|
||||
pxor T1_HI, MSG_HI
|
||||
movdqu MSG_LO, 0(%rdx)
|
||||
movdqu MSG_HI, 16(%rdx)
|
||||
|
||||
call __morus1280_update
|
||||
sub $32, %rcx
|
||||
add $32, %rsi
|
||||
add $32, %rdx
|
||||
cmp $32, %rcx
|
||||
jge .Ldec_u_loop
|
||||
|
||||
.Ldec_cont:
|
||||
/* store the state: */
|
||||
movdqu STATE0_LO, (0 * 16)(%rdi)
|
||||
movdqu STATE0_HI, (1 * 16)(%rdi)
|
||||
movdqu STATE1_LO, (2 * 16)(%rdi)
|
||||
movdqu STATE1_HI, (3 * 16)(%rdi)
|
||||
movdqu STATE2_LO, (4 * 16)(%rdi)
|
||||
movdqu STATE2_HI, (5 * 16)(%rdi)
|
||||
movdqu STATE3_LO, (6 * 16)(%rdi)
|
||||
movdqu STATE3_HI, (7 * 16)(%rdi)
|
||||
movdqu STATE4_LO, (8 * 16)(%rdi)
|
||||
movdqu STATE4_HI, (9 * 16)(%rdi)
|
||||
|
||||
.Ldec_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_sse2_dec)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_sse2_dec_tail(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_sse2_dec_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0_LO
|
||||
movdqu (1 * 16)(%rdi), STATE0_HI
|
||||
movdqu (2 * 16)(%rdi), STATE1_LO
|
||||
movdqu (3 * 16)(%rdi), STATE1_HI
|
||||
movdqu (4 * 16)(%rdi), STATE2_LO
|
||||
movdqu (5 * 16)(%rdi), STATE2_HI
|
||||
movdqu (6 * 16)(%rdi), STATE3_LO
|
||||
movdqu (7 * 16)(%rdi), STATE3_HI
|
||||
movdqu (8 * 16)(%rdi), STATE4_LO
|
||||
movdqu (9 * 16)(%rdi), STATE4_HI
|
||||
|
||||
/* decrypt message: */
|
||||
call __load_partial
|
||||
|
||||
pxor STATE0_LO, MSG_LO
|
||||
pxor STATE0_HI, MSG_HI
|
||||
movdqa STATE1_LO, T1_LO
|
||||
movdqa STATE1_HI, T1_HI
|
||||
rol3 T1_HI, T1_LO
|
||||
pxor T1_LO, MSG_LO
|
||||
pxor T1_HI, MSG_HI
|
||||
movdqa STATE2_LO, T1_LO
|
||||
movdqa STATE2_HI, T1_HI
|
||||
pand STATE3_LO, T1_LO
|
||||
pand STATE3_HI, T1_HI
|
||||
pxor T1_LO, MSG_LO
|
||||
pxor T1_HI, MSG_HI
|
||||
movdqa MSG_LO, T0_LO
|
||||
movdqa MSG_HI, T0_HI
|
||||
|
||||
call __store_partial
|
||||
|
||||
/* mask with byte count: */
|
||||
movq %rcx, T0_LO
|
||||
punpcklbw T0_LO, T0_LO
|
||||
punpcklbw T0_LO, T0_LO
|
||||
punpcklbw T0_LO, T0_LO
|
||||
punpcklbw T0_LO, T0_LO
|
||||
movdqa T0_LO, T0_HI
|
||||
movdqa .Lmorus640_counter_0, T1_LO
|
||||
movdqa .Lmorus640_counter_1, T1_HI
|
||||
pcmpgtb T1_LO, T0_LO
|
||||
pcmpgtb T1_HI, T0_HI
|
||||
pand T0_LO, MSG_LO
|
||||
pand T0_HI, MSG_HI
|
||||
|
||||
call __morus1280_update
|
||||
|
||||
/* store the state: */
|
||||
movdqu STATE0_LO, (0 * 16)(%rdi)
|
||||
movdqu STATE0_HI, (1 * 16)(%rdi)
|
||||
movdqu STATE1_LO, (2 * 16)(%rdi)
|
||||
movdqu STATE1_HI, (3 * 16)(%rdi)
|
||||
movdqu STATE2_LO, (4 * 16)(%rdi)
|
||||
movdqu STATE2_HI, (5 * 16)(%rdi)
|
||||
movdqu STATE3_LO, (6 * 16)(%rdi)
|
||||
movdqu STATE3_HI, (7 * 16)(%rdi)
|
||||
movdqu STATE4_LO, (8 * 16)(%rdi)
|
||||
movdqu STATE4_HI, (9 * 16)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_sse2_dec_tail)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_sse2_final(void *state, void *tag_xor,
|
||||
* u64 assoclen, u64 cryptlen);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_sse2_final)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0_LO
|
||||
movdqu (1 * 16)(%rdi), STATE0_HI
|
||||
movdqu (2 * 16)(%rdi), STATE1_LO
|
||||
movdqu (3 * 16)(%rdi), STATE1_HI
|
||||
movdqu (4 * 16)(%rdi), STATE2_LO
|
||||
movdqu (5 * 16)(%rdi), STATE2_HI
|
||||
movdqu (6 * 16)(%rdi), STATE3_LO
|
||||
movdqu (7 * 16)(%rdi), STATE3_HI
|
||||
movdqu (8 * 16)(%rdi), STATE4_LO
|
||||
movdqu (9 * 16)(%rdi), STATE4_HI
|
||||
|
||||
/* xor state[0] into state[4]: */
|
||||
pxor STATE0_LO, STATE4_LO
|
||||
pxor STATE0_HI, STATE4_HI
|
||||
|
||||
/* prepare length block: */
|
||||
movq %rdx, MSG_LO
|
||||
movq %rcx, T0_LO
|
||||
pslldq $8, T0_LO
|
||||
pxor T0_LO, MSG_LO
|
||||
psllq $3, MSG_LO /* multiply by 8 (to get bit count) */
|
||||
pxor MSG_HI, MSG_HI
|
||||
|
||||
/* update state: */
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
|
||||
/* xor tag: */
|
||||
movdqu 0(%rsi), MSG_LO
|
||||
movdqu 16(%rsi), MSG_HI
|
||||
|
||||
pxor STATE0_LO, MSG_LO
|
||||
pxor STATE0_HI, MSG_HI
|
||||
movdqa STATE1_LO, T0_LO
|
||||
movdqa STATE1_HI, T0_HI
|
||||
rol3 T0_HI, T0_LO
|
||||
pxor T0_LO, MSG_LO
|
||||
pxor T0_HI, MSG_HI
|
||||
movdqa STATE2_LO, T0_LO
|
||||
movdqa STATE2_HI, T0_HI
|
||||
pand STATE3_LO, T0_LO
|
||||
pand STATE3_HI, T0_HI
|
||||
pxor T0_LO, MSG_LO
|
||||
pxor T0_HI, MSG_HI
|
||||
|
||||
movdqu MSG_LO, 0(%rsi)
|
||||
movdqu MSG_HI, 16(%rsi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_sse2_final)
|
68
arch/x86/crypto/morus1280-sse2-glue.c
Normal file
68
arch/x86/crypto/morus1280-sse2-glue.c
Normal file
@ -0,0 +1,68 @@
|
||||
/*
|
||||
* The MORUS-1280 Authenticated-Encryption Algorithm
|
||||
* Glue for SSE2 implementation
|
||||
*
|
||||
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/morus1280_glue.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
asmlinkage void crypto_morus1280_sse2_init(void *state, const void *key,
|
||||
const void *iv);
|
||||
asmlinkage void crypto_morus1280_sse2_ad(void *state, const void *data,
|
||||
unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus1280_sse2_enc(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
asmlinkage void crypto_morus1280_sse2_dec(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus1280_sse2_enc_tail(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
asmlinkage void crypto_morus1280_sse2_dec_tail(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor,
|
||||
u64 assoclen, u64 cryptlen);
|
||||
|
||||
MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350);
|
||||
|
||||
static const struct x86_cpu_id sse2_cpu_id[] = {
|
||||
X86_FEATURE_MATCH(X86_FEATURE_XMM2),
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
|
||||
|
||||
static int __init crypto_morus1280_sse2_module_init(void)
|
||||
{
|
||||
if (!x86_match_cpu(sse2_cpu_id))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_aeads(crypto_morus1280_sse2_algs,
|
||||
ARRAY_SIZE(crypto_morus1280_sse2_algs));
|
||||
}
|
||||
|
||||
static void __exit crypto_morus1280_sse2_module_exit(void)
|
||||
{
|
||||
crypto_unregister_aeads(crypto_morus1280_sse2_algs,
|
||||
ARRAY_SIZE(crypto_morus1280_sse2_algs));
|
||||
}
|
||||
|
||||
module_init(crypto_morus1280_sse2_module_init);
|
||||
module_exit(crypto_morus1280_sse2_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- SSE2 implementation");
|
||||
MODULE_ALIAS_CRYPTO("morus1280");
|
||||
MODULE_ALIAS_CRYPTO("morus1280-sse2");
|
302
arch/x86/crypto/morus1280_glue.c
Normal file
302
arch/x86/crypto/morus1280_glue.c
Normal file
@ -0,0 +1,302 @@
|
||||
/*
|
||||
* The MORUS-1280 Authenticated-Encryption Algorithm
|
||||
* Common x86 SIMD glue skeleton
|
||||
*
|
||||
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/cryptd.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/morus1280_glue.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <asm/fpu/api.h>
|
||||
|
||||
struct morus1280_state {
|
||||
struct morus1280_block s[MORUS_STATE_BLOCKS];
|
||||
};
|
||||
|
||||
struct morus1280_ops {
|
||||
int (*skcipher_walk_init)(struct skcipher_walk *walk,
|
||||
struct aead_request *req, bool atomic);
|
||||
|
||||
void (*crypt_blocks)(void *state, const void *src, void *dst,
|
||||
unsigned int length);
|
||||
void (*crypt_tail)(void *state, const void *src, void *dst,
|
||||
unsigned int length);
|
||||
};
|
||||
|
||||
static void crypto_morus1280_glue_process_ad(
|
||||
struct morus1280_state *state,
|
||||
const struct morus1280_glue_ops *ops,
|
||||
struct scatterlist *sg_src, unsigned int assoclen)
|
||||
{
|
||||
struct scatter_walk walk;
|
||||
struct morus1280_block buf;
|
||||
unsigned int pos = 0;
|
||||
|
||||
scatterwalk_start(&walk, sg_src);
|
||||
while (assoclen != 0) {
|
||||
unsigned int size = scatterwalk_clamp(&walk, assoclen);
|
||||
unsigned int left = size;
|
||||
void *mapped = scatterwalk_map(&walk);
|
||||
const u8 *src = (const u8 *)mapped;
|
||||
|
||||
if (pos + size >= MORUS1280_BLOCK_SIZE) {
|
||||
if (pos > 0) {
|
||||
unsigned int fill = MORUS1280_BLOCK_SIZE - pos;
|
||||
memcpy(buf.bytes + pos, src, fill);
|
||||
ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
|
||||
pos = 0;
|
||||
left -= fill;
|
||||
src += fill;
|
||||
}
|
||||
|
||||
ops->ad(state, src, left);
|
||||
src += left & ~(MORUS1280_BLOCK_SIZE - 1);
|
||||
left &= MORUS1280_BLOCK_SIZE - 1;
|
||||
}
|
||||
|
||||
memcpy(buf.bytes + pos, src, left);
|
||||
|
||||
pos += left;
|
||||
assoclen -= size;
|
||||
scatterwalk_unmap(mapped);
|
||||
scatterwalk_advance(&walk, size);
|
||||
scatterwalk_done(&walk, 0, assoclen);
|
||||
}
|
||||
|
||||
if (pos > 0) {
|
||||
memset(buf.bytes + pos, 0, MORUS1280_BLOCK_SIZE - pos);
|
||||
ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus1280_glue_process_crypt(struct morus1280_state *state,
|
||||
struct morus1280_ops ops,
|
||||
struct aead_request *req)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
u8 *cursor_src, *cursor_dst;
|
||||
unsigned int chunksize, base;
|
||||
|
||||
ops.skcipher_walk_init(&walk, req, false);
|
||||
|
||||
while (walk.nbytes) {
|
||||
cursor_src = walk.src.virt.addr;
|
||||
cursor_dst = walk.dst.virt.addr;
|
||||
chunksize = walk.nbytes;
|
||||
|
||||
ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize);
|
||||
|
||||
base = chunksize & ~(MORUS1280_BLOCK_SIZE - 1);
|
||||
cursor_src += base;
|
||||
cursor_dst += base;
|
||||
chunksize &= MORUS1280_BLOCK_SIZE - 1;
|
||||
|
||||
if (chunksize > 0)
|
||||
ops.crypt_tail(state, cursor_src, cursor_dst,
|
||||
chunksize);
|
||||
|
||||
skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
}
|
||||
|
||||
int crypto_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
|
||||
|
||||
if (keylen == MORUS1280_BLOCK_SIZE) {
|
||||
memcpy(ctx->key.bytes, key, MORUS1280_BLOCK_SIZE);
|
||||
} else if (keylen == MORUS1280_BLOCK_SIZE / 2) {
|
||||
memcpy(ctx->key.bytes, key, keylen);
|
||||
memcpy(ctx->key.bytes + keylen, key, keylen);
|
||||
} else {
|
||||
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setkey);
|
||||
|
||||
int crypto_morus1280_glue_setauthsize(struct crypto_aead *tfm,
|
||||
unsigned int authsize)
|
||||
{
|
||||
return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setauthsize);
|
||||
|
||||
static void crypto_morus1280_glue_crypt(struct aead_request *req,
|
||||
struct morus1280_ops ops,
|
||||
unsigned int cryptlen,
|
||||
struct morus1280_block *tag_xor)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct morus1280_state state;
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
ctx->ops->init(&state, &ctx->key, req->iv);
|
||||
crypto_morus1280_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
|
||||
crypto_morus1280_glue_process_crypt(&state, ops, req);
|
||||
ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
|
||||
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
int crypto_morus1280_glue_encrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct morus1280_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_encrypt,
|
||||
.crypt_blocks = ctx->ops->enc,
|
||||
.crypt_tail = ctx->ops->enc_tail,
|
||||
};
|
||||
|
||||
struct morus1280_block tag = {};
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen;
|
||||
|
||||
crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->dst,
|
||||
req->assoclen + cryptlen, authsize, 1);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_encrypt);
|
||||
|
||||
int crypto_morus1280_glue_decrypt(struct aead_request *req)
|
||||
{
|
||||
static const u8 zeros[MORUS1280_BLOCK_SIZE] = {};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct morus1280_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_decrypt,
|
||||
.crypt_blocks = ctx->ops->dec,
|
||||
.crypt_tail = ctx->ops->dec_tail,
|
||||
};
|
||||
|
||||
struct morus1280_block tag;
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen - authsize;
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->src,
|
||||
req->assoclen + cryptlen, authsize, 0);
|
||||
|
||||
crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
|
||||
|
||||
return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_decrypt);
|
||||
|
||||
void crypto_morus1280_glue_init_ops(struct crypto_aead *aead,
|
||||
const struct morus1280_glue_ops *ops)
|
||||
{
|
||||
struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
|
||||
ctx->ops = ops;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_init_ops);
|
||||
|
||||
int cryptd_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setkey);
|
||||
|
||||
int cryptd_morus1280_glue_setauthsize(struct crypto_aead *aead,
|
||||
unsigned int authsize)
|
||||
{
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setauthsize);
|
||||
|
||||
int cryptd_morus1280_glue_encrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
aead = &cryptd_tfm->base;
|
||||
if (irq_fpu_usable() && (!in_atomic() ||
|
||||
!cryptd_aead_queued(cryptd_tfm)))
|
||||
aead = cryptd_aead_child(cryptd_tfm);
|
||||
|
||||
aead_request_set_tfm(req, aead);
|
||||
|
||||
return crypto_aead_encrypt(req);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_encrypt);
|
||||
|
||||
int cryptd_morus1280_glue_decrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
aead = &cryptd_tfm->base;
|
||||
if (irq_fpu_usable() && (!in_atomic() ||
|
||||
!cryptd_aead_queued(cryptd_tfm)))
|
||||
aead = cryptd_aead_child(cryptd_tfm);
|
||||
|
||||
aead_request_set_tfm(req, aead);
|
||||
|
||||
return crypto_aead_decrypt(req);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_decrypt);
|
||||
|
||||
int cryptd_morus1280_glue_init_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
struct cryptd_aead *cryptd_tfm;
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
const char *name = crypto_aead_alg(aead)->base.cra_driver_name;
|
||||
char internal_name[CRYPTO_MAX_ALG_NAME];
|
||||
|
||||
if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name)
|
||||
>= CRYPTO_MAX_ALG_NAME)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL,
|
||||
CRYPTO_ALG_INTERNAL);
|
||||
if (IS_ERR(cryptd_tfm))
|
||||
return PTR_ERR(cryptd_tfm);
|
||||
|
||||
*ctx = cryptd_tfm;
|
||||
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_init_tfm);
|
||||
|
||||
void cryptd_morus1280_glue_exit_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
|
||||
cryptd_free_aead(*ctx);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_exit_tfm);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("MORUS-1280 AEAD mode -- glue for x86 optimizations");
|
614
arch/x86/crypto/morus640-sse2-asm.S
Normal file
614
arch/x86/crypto/morus640-sse2-asm.S
Normal file
@ -0,0 +1,614 @@
|
||||
/*
|
||||
* SSE2 implementation of MORUS-640
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
#define SHUFFLE_MASK(i0, i1, i2, i3) \
|
||||
(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
|
||||
|
||||
#define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
|
||||
#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
|
||||
#define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
|
||||
|
||||
#define STATE0 %xmm0
|
||||
#define STATE1 %xmm1
|
||||
#define STATE2 %xmm2
|
||||
#define STATE3 %xmm3
|
||||
#define STATE4 %xmm4
|
||||
#define KEY %xmm5
|
||||
#define MSG %xmm5
|
||||
#define T0 %xmm6
|
||||
#define T1 %xmm7
|
||||
|
||||
.section .rodata.cst16.morus640_const, "aM", @progbits, 32
|
||||
.align 16
|
||||
.Lmorus640_const_0:
|
||||
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
|
||||
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
|
||||
.Lmorus640_const_1:
|
||||
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
|
||||
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
|
||||
|
||||
.section .rodata.cst16.morus640_counter, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lmorus640_counter:
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
||||
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
||||
|
||||
.text
|
||||
|
||||
.macro morus640_round s0, s1, s2, s3, s4, b, w
|
||||
movdqa \s1, T0
|
||||
pand \s2, T0
|
||||
pxor T0, \s0
|
||||
pxor \s3, \s0
|
||||
movdqa \s0, T0
|
||||
pslld $\b, T0
|
||||
psrld $(32 - \b), \s0
|
||||
pxor T0, \s0
|
||||
pshufd $\w, \s3, \s3
|
||||
.endm
|
||||
|
||||
/*
|
||||
* __morus640_update: internal ABI
|
||||
* input:
|
||||
* STATE[0-4] - input state
|
||||
* MSG - message block
|
||||
* output:
|
||||
* STATE[0-4] - output state
|
||||
* changed:
|
||||
* T0
|
||||
*/
|
||||
__morus640_update:
|
||||
morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1
|
||||
pxor MSG, STATE1
|
||||
morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
|
||||
pxor MSG, STATE2
|
||||
morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3
|
||||
pxor MSG, STATE3
|
||||
morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
|
||||
pxor MSG, STATE4
|
||||
morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
|
||||
ret
|
||||
ENDPROC(__morus640_update)
|
||||
|
||||
|
||||
/*
|
||||
* __morus640_update_zero: internal ABI
|
||||
* input:
|
||||
* STATE[0-4] - input state
|
||||
* output:
|
||||
* STATE[0-4] - output state
|
||||
* changed:
|
||||
* T0
|
||||
*/
|
||||
__morus640_update_zero:
|
||||
morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1
|
||||
morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
|
||||
morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3
|
||||
morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
|
||||
morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
|
||||
ret
|
||||
ENDPROC(__morus640_update_zero)
|
||||
|
||||
/*
|
||||
* __load_partial: internal ABI
|
||||
* input:
|
||||
* %rsi - src
|
||||
* %rcx - bytes
|
||||
* output:
|
||||
* MSG - message block
|
||||
* changed:
|
||||
* T0
|
||||
* %r8
|
||||
* %r9
|
||||
*/
|
||||
__load_partial:
|
||||
xor %r9, %r9
|
||||
pxor MSG, MSG
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1, %r8
|
||||
jz .Lld_partial_1
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1E, %r8
|
||||
add %rsi, %r8
|
||||
mov (%r8), %r9b
|
||||
|
||||
.Lld_partial_1:
|
||||
mov %rcx, %r8
|
||||
and $0x2, %r8
|
||||
jz .Lld_partial_2
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1C, %r8
|
||||
add %rsi, %r8
|
||||
shl $16, %r9
|
||||
mov (%r8), %r9w
|
||||
|
||||
.Lld_partial_2:
|
||||
mov %rcx, %r8
|
||||
and $0x4, %r8
|
||||
jz .Lld_partial_4
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x18, %r8
|
||||
add %rsi, %r8
|
||||
shl $32, %r9
|
||||
mov (%r8), %r8d
|
||||
xor %r8, %r9
|
||||
|
||||
.Lld_partial_4:
|
||||
movq %r9, MSG
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x8, %r8
|
||||
jz .Lld_partial_8
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x10, %r8
|
||||
add %rsi, %r8
|
||||
pslldq $8, MSG
|
||||
movq (%r8), T0
|
||||
pxor T0, MSG
|
||||
|
||||
.Lld_partial_8:
|
||||
ret
|
||||
ENDPROC(__load_partial)
|
||||
|
||||
/*
|
||||
* __store_partial: internal ABI
|
||||
* input:
|
||||
* %rdx - dst
|
||||
* %rcx - bytes
|
||||
* output:
|
||||
* T0 - message block
|
||||
* changed:
|
||||
* %r8
|
||||
* %r9
|
||||
* %r10
|
||||
*/
|
||||
__store_partial:
|
||||
mov %rcx, %r8
|
||||
mov %rdx, %r9
|
||||
|
||||
movq T0, %r10
|
||||
|
||||
cmp $8, %r8
|
||||
jl .Lst_partial_8
|
||||
|
||||
mov %r10, (%r9)
|
||||
psrldq $8, T0
|
||||
movq T0, %r10
|
||||
|
||||
sub $8, %r8
|
||||
add $8, %r9
|
||||
|
||||
.Lst_partial_8:
|
||||
cmp $4, %r8
|
||||
jl .Lst_partial_4
|
||||
|
||||
mov %r10d, (%r9)
|
||||
shr $32, %r10
|
||||
|
||||
sub $4, %r8
|
||||
add $4, %r9
|
||||
|
||||
.Lst_partial_4:
|
||||
cmp $2, %r8
|
||||
jl .Lst_partial_2
|
||||
|
||||
mov %r10w, (%r9)
|
||||
shr $16, %r10
|
||||
|
||||
sub $2, %r8
|
||||
add $2, %r9
|
||||
|
||||
.Lst_partial_2:
|
||||
cmp $1, %r8
|
||||
jl .Lst_partial_1
|
||||
|
||||
mov %r10b, (%r9)
|
||||
|
||||
.Lst_partial_1:
|
||||
ret
|
||||
ENDPROC(__store_partial)
|
||||
|
||||
/*
|
||||
* void crypto_morus640_sse2_init(void *state, const void *key, const void *iv);
|
||||
*/
|
||||
ENTRY(crypto_morus640_sse2_init)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load IV: */
|
||||
movdqu (%rdx), STATE0
|
||||
/* load key: */
|
||||
movdqu (%rsi), KEY
|
||||
movdqa KEY, STATE1
|
||||
/* load all ones: */
|
||||
pcmpeqd STATE2, STATE2
|
||||
/* load the constants: */
|
||||
movdqa .Lmorus640_const_0, STATE3
|
||||
movdqa .Lmorus640_const_1, STATE4
|
||||
|
||||
/* update 16 times with zero: */
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
|
||||
/* xor-in the key again after updates: */
|
||||
pxor KEY, STATE1
|
||||
|
||||
/* store the state: */
|
||||
movdqu STATE0, (0 * 16)(%rdi)
|
||||
movdqu STATE1, (1 * 16)(%rdi)
|
||||
movdqu STATE2, (2 * 16)(%rdi)
|
||||
movdqu STATE3, (3 * 16)(%rdi)
|
||||
movdqu STATE4, (4 * 16)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus640_sse2_init)
|
||||
|
||||
/*
|
||||
* void crypto_morus640_sse2_ad(void *state, const void *data,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus640_sse2_ad)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $16, %rdx
|
||||
jb .Lad_out
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0
|
||||
movdqu (1 * 16)(%rdi), STATE1
|
||||
movdqu (2 * 16)(%rdi), STATE2
|
||||
movdqu (3 * 16)(%rdi), STATE3
|
||||
movdqu (4 * 16)(%rdi), STATE4
|
||||
|
||||
mov %rsi, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Lad_u_loop
|
||||
|
||||
.align 4
|
||||
.Lad_a_loop:
|
||||
movdqa (%rsi), MSG
|
||||
call __morus640_update
|
||||
sub $16, %rdx
|
||||
add $16, %rsi
|
||||
cmp $16, %rdx
|
||||
jge .Lad_a_loop
|
||||
|
||||
jmp .Lad_cont
|
||||
.align 4
|
||||
.Lad_u_loop:
|
||||
movdqu (%rsi), MSG
|
||||
call __morus640_update
|
||||
sub $16, %rdx
|
||||
add $16, %rsi
|
||||
cmp $16, %rdx
|
||||
jge .Lad_u_loop
|
||||
|
||||
.Lad_cont:
|
||||
/* store the state: */
|
||||
movdqu STATE0, (0 * 16)(%rdi)
|
||||
movdqu STATE1, (1 * 16)(%rdi)
|
||||
movdqu STATE2, (2 * 16)(%rdi)
|
||||
movdqu STATE3, (3 * 16)(%rdi)
|
||||
movdqu STATE4, (4 * 16)(%rdi)
|
||||
|
||||
.Lad_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus640_sse2_ad)
|
||||
|
||||
/*
|
||||
* void crypto_morus640_sse2_enc(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus640_sse2_enc)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $16, %rcx
|
||||
jb .Lenc_out
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0
|
||||
movdqu (1 * 16)(%rdi), STATE1
|
||||
movdqu (2 * 16)(%rdi), STATE2
|
||||
movdqu (3 * 16)(%rdi), STATE3
|
||||
movdqu (4 * 16)(%rdi), STATE4
|
||||
|
||||
mov %rsi, %r8
|
||||
or %rdx, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Lenc_u_loop
|
||||
|
||||
.align 4
|
||||
.Lenc_a_loop:
|
||||
movdqa (%rsi), MSG
|
||||
movdqa MSG, T0
|
||||
pxor STATE0, T0
|
||||
pshufd $MASK3, STATE1, T1
|
||||
pxor T1, T0
|
||||
movdqa STATE2, T1
|
||||
pand STATE3, T1
|
||||
pxor T1, T0
|
||||
movdqa T0, (%rdx)
|
||||
|
||||
call __morus640_update
|
||||
sub $16, %rcx
|
||||
add $16, %rsi
|
||||
add $16, %rdx
|
||||
cmp $16, %rcx
|
||||
jge .Lenc_a_loop
|
||||
|
||||
jmp .Lenc_cont
|
||||
.align 4
|
||||
.Lenc_u_loop:
|
||||
movdqu (%rsi), MSG
|
||||
movdqa MSG, T0
|
||||
pxor STATE0, T0
|
||||
pshufd $MASK3, STATE1, T1
|
||||
pxor T1, T0
|
||||
movdqa STATE2, T1
|
||||
pand STATE3, T1
|
||||
pxor T1, T0
|
||||
movdqu T0, (%rdx)
|
||||
|
||||
call __morus640_update
|
||||
sub $16, %rcx
|
||||
add $16, %rsi
|
||||
add $16, %rdx
|
||||
cmp $16, %rcx
|
||||
jge .Lenc_u_loop
|
||||
|
||||
.Lenc_cont:
|
||||
/* store the state: */
|
||||
movdqu STATE0, (0 * 16)(%rdi)
|
||||
movdqu STATE1, (1 * 16)(%rdi)
|
||||
movdqu STATE2, (2 * 16)(%rdi)
|
||||
movdqu STATE3, (3 * 16)(%rdi)
|
||||
movdqu STATE4, (4 * 16)(%rdi)
|
||||
|
||||
.Lenc_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus640_sse2_enc)
|
||||
|
||||
/*
|
||||
* void crypto_morus640_sse2_enc_tail(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus640_sse2_enc_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0
|
||||
movdqu (1 * 16)(%rdi), STATE1
|
||||
movdqu (2 * 16)(%rdi), STATE2
|
||||
movdqu (3 * 16)(%rdi), STATE3
|
||||
movdqu (4 * 16)(%rdi), STATE4
|
||||
|
||||
/* encrypt message: */
|
||||
call __load_partial
|
||||
|
||||
movdqa MSG, T0
|
||||
pxor STATE0, T0
|
||||
pshufd $MASK3, STATE1, T1
|
||||
pxor T1, T0
|
||||
movdqa STATE2, T1
|
||||
pand STATE3, T1
|
||||
pxor T1, T0
|
||||
|
||||
call __store_partial
|
||||
|
||||
call __morus640_update
|
||||
|
||||
/* store the state: */
|
||||
movdqu STATE0, (0 * 16)(%rdi)
|
||||
movdqu STATE1, (1 * 16)(%rdi)
|
||||
movdqu STATE2, (2 * 16)(%rdi)
|
||||
movdqu STATE3, (3 * 16)(%rdi)
|
||||
movdqu STATE4, (4 * 16)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ENDPROC(crypto_morus640_sse2_enc_tail)
|
||||
|
||||
/*
|
||||
* void crypto_morus640_sse2_dec(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus640_sse2_dec)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $16, %rcx
|
||||
jb .Ldec_out
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0
|
||||
movdqu (1 * 16)(%rdi), STATE1
|
||||
movdqu (2 * 16)(%rdi), STATE2
|
||||
movdqu (3 * 16)(%rdi), STATE3
|
||||
movdqu (4 * 16)(%rdi), STATE4
|
||||
|
||||
mov %rsi, %r8
|
||||
or %rdx, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Ldec_u_loop
|
||||
|
||||
.align 4
|
||||
.Ldec_a_loop:
|
||||
movdqa (%rsi), MSG
|
||||
pxor STATE0, MSG
|
||||
pshufd $MASK3, STATE1, T0
|
||||
pxor T0, MSG
|
||||
movdqa STATE2, T0
|
||||
pand STATE3, T0
|
||||
pxor T0, MSG
|
||||
movdqa MSG, (%rdx)
|
||||
|
||||
call __morus640_update
|
||||
sub $16, %rcx
|
||||
add $16, %rsi
|
||||
add $16, %rdx
|
||||
cmp $16, %rcx
|
||||
jge .Ldec_a_loop
|
||||
|
||||
jmp .Ldec_cont
|
||||
.align 4
|
||||
.Ldec_u_loop:
|
||||
movdqu (%rsi), MSG
|
||||
pxor STATE0, MSG
|
||||
pshufd $MASK3, STATE1, T0
|
||||
pxor T0, MSG
|
||||
movdqa STATE2, T0
|
||||
pand STATE3, T0
|
||||
pxor T0, MSG
|
||||
movdqu MSG, (%rdx)
|
||||
|
||||
call __morus640_update
|
||||
sub $16, %rcx
|
||||
add $16, %rsi
|
||||
add $16, %rdx
|
||||
cmp $16, %rcx
|
||||
jge .Ldec_u_loop
|
||||
|
||||
.Ldec_cont:
|
||||
/* store the state: */
|
||||
movdqu STATE0, (0 * 16)(%rdi)
|
||||
movdqu STATE1, (1 * 16)(%rdi)
|
||||
movdqu STATE2, (2 * 16)(%rdi)
|
||||
movdqu STATE3, (3 * 16)(%rdi)
|
||||
movdqu STATE4, (4 * 16)(%rdi)
|
||||
|
||||
.Ldec_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus640_sse2_dec)
|
||||
|
||||
/*
|
||||
* void crypto_morus640_sse2_dec_tail(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus640_sse2_dec_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0
|
||||
movdqu (1 * 16)(%rdi), STATE1
|
||||
movdqu (2 * 16)(%rdi), STATE2
|
||||
movdqu (3 * 16)(%rdi), STATE3
|
||||
movdqu (4 * 16)(%rdi), STATE4
|
||||
|
||||
/* decrypt message: */
|
||||
call __load_partial
|
||||
|
||||
pxor STATE0, MSG
|
||||
pshufd $MASK3, STATE1, T0
|
||||
pxor T0, MSG
|
||||
movdqa STATE2, T0
|
||||
pand STATE3, T0
|
||||
pxor T0, MSG
|
||||
movdqa MSG, T0
|
||||
|
||||
call __store_partial
|
||||
|
||||
/* mask with byte count: */
|
||||
movq %rcx, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
movdqa .Lmorus640_counter, T1
|
||||
pcmpgtb T1, T0
|
||||
pand T0, MSG
|
||||
|
||||
call __morus640_update
|
||||
|
||||
/* store the state: */
|
||||
movdqu STATE0, (0 * 16)(%rdi)
|
||||
movdqu STATE1, (1 * 16)(%rdi)
|
||||
movdqu STATE2, (2 * 16)(%rdi)
|
||||
movdqu STATE3, (3 * 16)(%rdi)
|
||||
movdqu STATE4, (4 * 16)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus640_sse2_dec_tail)
|
||||
|
||||
/*
|
||||
* void crypto_morus640_sse2_final(void *state, void *tag_xor,
|
||||
* u64 assoclen, u64 cryptlen);
|
||||
*/
|
||||
ENTRY(crypto_morus640_sse2_final)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0
|
||||
movdqu (1 * 16)(%rdi), STATE1
|
||||
movdqu (2 * 16)(%rdi), STATE2
|
||||
movdqu (3 * 16)(%rdi), STATE3
|
||||
movdqu (4 * 16)(%rdi), STATE4
|
||||
|
||||
/* xor state[0] into state[4]: */
|
||||
pxor STATE0, STATE4
|
||||
|
||||
/* prepare length block: */
|
||||
movq %rdx, MSG
|
||||
movq %rcx, T0
|
||||
pslldq $8, T0
|
||||
pxor T0, MSG
|
||||
psllq $3, MSG /* multiply by 8 (to get bit count) */
|
||||
|
||||
/* update state: */
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
|
||||
/* xor tag: */
|
||||
movdqu (%rsi), MSG
|
||||
|
||||
pxor STATE0, MSG
|
||||
pshufd $MASK3, STATE1, T0
|
||||
pxor T0, MSG
|
||||
movdqa STATE2, T0
|
||||
pand STATE3, T0
|
||||
pxor T0, MSG
|
||||
|
||||
movdqu MSG, (%rsi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus640_sse2_final)
|
68
arch/x86/crypto/morus640-sse2-glue.c
Normal file
68
arch/x86/crypto/morus640-sse2-glue.c
Normal file
@ -0,0 +1,68 @@
|
||||
/*
|
||||
* The MORUS-640 Authenticated-Encryption Algorithm
|
||||
* Glue for SSE2 implementation
|
||||
*
|
||||
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/morus640_glue.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
asmlinkage void crypto_morus640_sse2_init(void *state, const void *key,
|
||||
const void *iv);
|
||||
asmlinkage void crypto_morus640_sse2_ad(void *state, const void *data,
|
||||
unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus640_sse2_enc(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
asmlinkage void crypto_morus640_sse2_dec(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus640_sse2_enc_tail(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
asmlinkage void crypto_morus640_sse2_dec_tail(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor,
|
||||
u64 assoclen, u64 cryptlen);
|
||||
|
||||
MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400);
|
||||
|
||||
static const struct x86_cpu_id sse2_cpu_id[] = {
|
||||
X86_FEATURE_MATCH(X86_FEATURE_XMM2),
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
|
||||
|
||||
static int __init crypto_morus640_sse2_module_init(void)
|
||||
{
|
||||
if (!x86_match_cpu(sse2_cpu_id))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_aeads(crypto_morus640_sse2_algs,
|
||||
ARRAY_SIZE(crypto_morus640_sse2_algs));
|
||||
}
|
||||
|
||||
static void __exit crypto_morus640_sse2_module_exit(void)
|
||||
{
|
||||
crypto_unregister_aeads(crypto_morus640_sse2_algs,
|
||||
ARRAY_SIZE(crypto_morus640_sse2_algs));
|
||||
}
|
||||
|
||||
module_init(crypto_morus640_sse2_module_init);
|
||||
module_exit(crypto_morus640_sse2_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("MORUS-640 AEAD algorithm -- SSE2 implementation");
|
||||
MODULE_ALIAS_CRYPTO("morus640");
|
||||
MODULE_ALIAS_CRYPTO("morus640-sse2");
|
298
arch/x86/crypto/morus640_glue.c
Normal file
298
arch/x86/crypto/morus640_glue.c
Normal file
@ -0,0 +1,298 @@
|
||||
/*
|
||||
* The MORUS-640 Authenticated-Encryption Algorithm
|
||||
* Common x86 SIMD glue skeleton
|
||||
*
|
||||
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/cryptd.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/morus640_glue.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <asm/fpu/api.h>
|
||||
|
||||
struct morus640_state {
|
||||
struct morus640_block s[MORUS_STATE_BLOCKS];
|
||||
};
|
||||
|
||||
struct morus640_ops {
|
||||
int (*skcipher_walk_init)(struct skcipher_walk *walk,
|
||||
struct aead_request *req, bool atomic);
|
||||
|
||||
void (*crypt_blocks)(void *state, const void *src, void *dst,
|
||||
unsigned int length);
|
||||
void (*crypt_tail)(void *state, const void *src, void *dst,
|
||||
unsigned int length);
|
||||
};
|
||||
|
||||
static void crypto_morus640_glue_process_ad(
|
||||
struct morus640_state *state,
|
||||
const struct morus640_glue_ops *ops,
|
||||
struct scatterlist *sg_src, unsigned int assoclen)
|
||||
{
|
||||
struct scatter_walk walk;
|
||||
struct morus640_block buf;
|
||||
unsigned int pos = 0;
|
||||
|
||||
scatterwalk_start(&walk, sg_src);
|
||||
while (assoclen != 0) {
|
||||
unsigned int size = scatterwalk_clamp(&walk, assoclen);
|
||||
unsigned int left = size;
|
||||
void *mapped = scatterwalk_map(&walk);
|
||||
const u8 *src = (const u8 *)mapped;
|
||||
|
||||
if (pos + size >= MORUS640_BLOCK_SIZE) {
|
||||
if (pos > 0) {
|
||||
unsigned int fill = MORUS640_BLOCK_SIZE - pos;
|
||||
memcpy(buf.bytes + pos, src, fill);
|
||||
ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE);
|
||||
pos = 0;
|
||||
left -= fill;
|
||||
src += fill;
|
||||
}
|
||||
|
||||
ops->ad(state, src, left);
|
||||
src += left & ~(MORUS640_BLOCK_SIZE - 1);
|
||||
left &= MORUS640_BLOCK_SIZE - 1;
|
||||
}
|
||||
|
||||
memcpy(buf.bytes + pos, src, left);
|
||||
|
||||
pos += left;
|
||||
assoclen -= size;
|
||||
scatterwalk_unmap(mapped);
|
||||
scatterwalk_advance(&walk, size);
|
||||
scatterwalk_done(&walk, 0, assoclen);
|
||||
}
|
||||
|
||||
if (pos > 0) {
|
||||
memset(buf.bytes + pos, 0, MORUS640_BLOCK_SIZE - pos);
|
||||
ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus640_glue_process_crypt(struct morus640_state *state,
|
||||
struct morus640_ops ops,
|
||||
struct aead_request *req)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
u8 *cursor_src, *cursor_dst;
|
||||
unsigned int chunksize, base;
|
||||
|
||||
ops.skcipher_walk_init(&walk, req, false);
|
||||
|
||||
while (walk.nbytes) {
|
||||
cursor_src = walk.src.virt.addr;
|
||||
cursor_dst = walk.dst.virt.addr;
|
||||
chunksize = walk.nbytes;
|
||||
|
||||
ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize);
|
||||
|
||||
base = chunksize & ~(MORUS640_BLOCK_SIZE - 1);
|
||||
cursor_src += base;
|
||||
cursor_dst += base;
|
||||
chunksize &= MORUS640_BLOCK_SIZE - 1;
|
||||
|
||||
if (chunksize > 0)
|
||||
ops.crypt_tail(state, cursor_src, cursor_dst,
|
||||
chunksize);
|
||||
|
||||
skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
}
|
||||
|
||||
int crypto_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct morus640_ctx *ctx = crypto_aead_ctx(aead);
|
||||
|
||||
if (keylen != MORUS640_BLOCK_SIZE) {
|
||||
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memcpy(ctx->key.bytes, key, MORUS640_BLOCK_SIZE);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus640_glue_setkey);
|
||||
|
||||
int crypto_morus640_glue_setauthsize(struct crypto_aead *tfm,
|
||||
unsigned int authsize)
|
||||
{
|
||||
return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus640_glue_setauthsize);
|
||||
|
||||
static void crypto_morus640_glue_crypt(struct aead_request *req,
|
||||
struct morus640_ops ops,
|
||||
unsigned int cryptlen,
|
||||
struct morus640_block *tag_xor)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct morus640_state state;
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
ctx->ops->init(&state, &ctx->key, req->iv);
|
||||
crypto_morus640_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
|
||||
crypto_morus640_glue_process_crypt(&state, ops, req);
|
||||
ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
|
||||
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
int crypto_morus640_glue_encrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct morus640_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_encrypt,
|
||||
.crypt_blocks = ctx->ops->enc,
|
||||
.crypt_tail = ctx->ops->enc_tail,
|
||||
};
|
||||
|
||||
struct morus640_block tag = {};
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen;
|
||||
|
||||
crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->dst,
|
||||
req->assoclen + cryptlen, authsize, 1);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus640_glue_encrypt);
|
||||
|
||||
int crypto_morus640_glue_decrypt(struct aead_request *req)
|
||||
{
|
||||
static const u8 zeros[MORUS640_BLOCK_SIZE] = {};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct morus640_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_decrypt,
|
||||
.crypt_blocks = ctx->ops->dec,
|
||||
.crypt_tail = ctx->ops->dec_tail,
|
||||
};
|
||||
|
||||
struct morus640_block tag;
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen - authsize;
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->src,
|
||||
req->assoclen + cryptlen, authsize, 0);
|
||||
|
||||
crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
|
||||
|
||||
return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus640_glue_decrypt);
|
||||
|
||||
void crypto_morus640_glue_init_ops(struct crypto_aead *aead,
|
||||
const struct morus640_glue_ops *ops)
|
||||
{
|
||||
struct morus640_ctx *ctx = crypto_aead_ctx(aead);
|
||||
ctx->ops = ops;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus640_glue_init_ops);
|
||||
|
||||
int cryptd_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setkey);
|
||||
|
||||
int cryptd_morus640_glue_setauthsize(struct crypto_aead *aead,
|
||||
unsigned int authsize)
|
||||
{
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setauthsize);
|
||||
|
||||
int cryptd_morus640_glue_encrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
aead = &cryptd_tfm->base;
|
||||
if (irq_fpu_usable() && (!in_atomic() ||
|
||||
!cryptd_aead_queued(cryptd_tfm)))
|
||||
aead = cryptd_aead_child(cryptd_tfm);
|
||||
|
||||
aead_request_set_tfm(req, aead);
|
||||
|
||||
return crypto_aead_encrypt(req);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_encrypt);
|
||||
|
||||
int cryptd_morus640_glue_decrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
struct cryptd_aead *cryptd_tfm = *ctx;
|
||||
|
||||
aead = &cryptd_tfm->base;
|
||||
if (irq_fpu_usable() && (!in_atomic() ||
|
||||
!cryptd_aead_queued(cryptd_tfm)))
|
||||
aead = cryptd_aead_child(cryptd_tfm);
|
||||
|
||||
aead_request_set_tfm(req, aead);
|
||||
|
||||
return crypto_aead_decrypt(req);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_decrypt);
|
||||
|
||||
int cryptd_morus640_glue_init_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
struct cryptd_aead *cryptd_tfm;
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
const char *name = crypto_aead_alg(aead)->base.cra_driver_name;
|
||||
char internal_name[CRYPTO_MAX_ALG_NAME];
|
||||
|
||||
if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name)
|
||||
>= CRYPTO_MAX_ALG_NAME)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL,
|
||||
CRYPTO_ALG_INTERNAL);
|
||||
if (IS_ERR(cryptd_tfm))
|
||||
return PTR_ERR(cryptd_tfm);
|
||||
|
||||
*ctx = cryptd_tfm;
|
||||
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_init_tfm);
|
||||
|
||||
void cryptd_morus640_glue_exit_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
|
||||
|
||||
cryptd_free_aead(*ctx);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_exit_tfm);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("MORUS-640 AEAD mode -- glue for x86 optimizations");
|
@ -1,938 +0,0 @@
|
||||
# Derived from:
|
||||
# salsa20_pm.s version 20051229
|
||||
# D. J. Bernstein
|
||||
# Public domain.
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
|
||||
# enter salsa20_encrypt_bytes
|
||||
ENTRY(salsa20_encrypt_bytes)
|
||||
mov %esp,%eax
|
||||
and $31,%eax
|
||||
add $256,%eax
|
||||
sub %eax,%esp
|
||||
# eax_stack = eax
|
||||
movl %eax,80(%esp)
|
||||
# ebx_stack = ebx
|
||||
movl %ebx,84(%esp)
|
||||
# esi_stack = esi
|
||||
movl %esi,88(%esp)
|
||||
# edi_stack = edi
|
||||
movl %edi,92(%esp)
|
||||
# ebp_stack = ebp
|
||||
movl %ebp,96(%esp)
|
||||
# x = arg1
|
||||
movl 4(%esp,%eax),%edx
|
||||
# m = arg2
|
||||
movl 8(%esp,%eax),%esi
|
||||
# out = arg3
|
||||
movl 12(%esp,%eax),%edi
|
||||
# bytes = arg4
|
||||
movl 16(%esp,%eax),%ebx
|
||||
# bytes -= 0
|
||||
sub $0,%ebx
|
||||
# goto done if unsigned<=
|
||||
jbe ._done
|
||||
._start:
|
||||
# in0 = *(uint32 *) (x + 0)
|
||||
movl 0(%edx),%eax
|
||||
# in1 = *(uint32 *) (x + 4)
|
||||
movl 4(%edx),%ecx
|
||||
# in2 = *(uint32 *) (x + 8)
|
||||
movl 8(%edx),%ebp
|
||||
# j0 = in0
|
||||
movl %eax,164(%esp)
|
||||
# in3 = *(uint32 *) (x + 12)
|
||||
movl 12(%edx),%eax
|
||||
# j1 = in1
|
||||
movl %ecx,168(%esp)
|
||||
# in4 = *(uint32 *) (x + 16)
|
||||
movl 16(%edx),%ecx
|
||||
# j2 = in2
|
||||
movl %ebp,172(%esp)
|
||||
# in5 = *(uint32 *) (x + 20)
|
||||
movl 20(%edx),%ebp
|
||||
# j3 = in3
|
||||
movl %eax,176(%esp)
|
||||
# in6 = *(uint32 *) (x + 24)
|
||||
movl 24(%edx),%eax
|
||||
# j4 = in4
|
||||
movl %ecx,180(%esp)
|
||||
# in7 = *(uint32 *) (x + 28)
|
||||
movl 28(%edx),%ecx
|
||||
# j5 = in5
|
||||
movl %ebp,184(%esp)
|
||||
# in8 = *(uint32 *) (x + 32)
|
||||
movl 32(%edx),%ebp
|
||||
# j6 = in6
|
||||
movl %eax,188(%esp)
|
||||
# in9 = *(uint32 *) (x + 36)
|
||||
movl 36(%edx),%eax
|
||||
# j7 = in7
|
||||
movl %ecx,192(%esp)
|
||||
# in10 = *(uint32 *) (x + 40)
|
||||
movl 40(%edx),%ecx
|
||||
# j8 = in8
|
||||
movl %ebp,196(%esp)
|
||||
# in11 = *(uint32 *) (x + 44)
|
||||
movl 44(%edx),%ebp
|
||||
# j9 = in9
|
||||
movl %eax,200(%esp)
|
||||
# in12 = *(uint32 *) (x + 48)
|
||||
movl 48(%edx),%eax
|
||||
# j10 = in10
|
||||
movl %ecx,204(%esp)
|
||||
# in13 = *(uint32 *) (x + 52)
|
||||
movl 52(%edx),%ecx
|
||||
# j11 = in11
|
||||
movl %ebp,208(%esp)
|
||||
# in14 = *(uint32 *) (x + 56)
|
||||
movl 56(%edx),%ebp
|
||||
# j12 = in12
|
||||
movl %eax,212(%esp)
|
||||
# in15 = *(uint32 *) (x + 60)
|
||||
movl 60(%edx),%eax
|
||||
# j13 = in13
|
||||
movl %ecx,216(%esp)
|
||||
# j14 = in14
|
||||
movl %ebp,220(%esp)
|
||||
# j15 = in15
|
||||
movl %eax,224(%esp)
|
||||
# x_backup = x
|
||||
movl %edx,64(%esp)
|
||||
._bytesatleast1:
|
||||
# bytes - 64
|
||||
cmp $64,%ebx
|
||||
# goto nocopy if unsigned>=
|
||||
jae ._nocopy
|
||||
# ctarget = out
|
||||
movl %edi,228(%esp)
|
||||
# out = &tmp
|
||||
leal 0(%esp),%edi
|
||||
# i = bytes
|
||||
mov %ebx,%ecx
|
||||
# while (i) { *out++ = *m++; --i }
|
||||
rep movsb
|
||||
# out = &tmp
|
||||
leal 0(%esp),%edi
|
||||
# m = &tmp
|
||||
leal 0(%esp),%esi
|
||||
._nocopy:
|
||||
# out_backup = out
|
||||
movl %edi,72(%esp)
|
||||
# m_backup = m
|
||||
movl %esi,68(%esp)
|
||||
# bytes_backup = bytes
|
||||
movl %ebx,76(%esp)
|
||||
# in0 = j0
|
||||
movl 164(%esp),%eax
|
||||
# in1 = j1
|
||||
movl 168(%esp),%ecx
|
||||
# in2 = j2
|
||||
movl 172(%esp),%edx
|
||||
# in3 = j3
|
||||
movl 176(%esp),%ebx
|
||||
# x0 = in0
|
||||
movl %eax,100(%esp)
|
||||
# x1 = in1
|
||||
movl %ecx,104(%esp)
|
||||
# x2 = in2
|
||||
movl %edx,108(%esp)
|
||||
# x3 = in3
|
||||
movl %ebx,112(%esp)
|
||||
# in4 = j4
|
||||
movl 180(%esp),%eax
|
||||
# in5 = j5
|
||||
movl 184(%esp),%ecx
|
||||
# in6 = j6
|
||||
movl 188(%esp),%edx
|
||||
# in7 = j7
|
||||
movl 192(%esp),%ebx
|
||||
# x4 = in4
|
||||
movl %eax,116(%esp)
|
||||
# x5 = in5
|
||||
movl %ecx,120(%esp)
|
||||
# x6 = in6
|
||||
movl %edx,124(%esp)
|
||||
# x7 = in7
|
||||
movl %ebx,128(%esp)
|
||||
# in8 = j8
|
||||
movl 196(%esp),%eax
|
||||
# in9 = j9
|
||||
movl 200(%esp),%ecx
|
||||
# in10 = j10
|
||||
movl 204(%esp),%edx
|
||||
# in11 = j11
|
||||
movl 208(%esp),%ebx
|
||||
# x8 = in8
|
||||
movl %eax,132(%esp)
|
||||
# x9 = in9
|
||||
movl %ecx,136(%esp)
|
||||
# x10 = in10
|
||||
movl %edx,140(%esp)
|
||||
# x11 = in11
|
||||
movl %ebx,144(%esp)
|
||||
# in12 = j12
|
||||
movl 212(%esp),%eax
|
||||
# in13 = j13
|
||||
movl 216(%esp),%ecx
|
||||
# in14 = j14
|
||||
movl 220(%esp),%edx
|
||||
# in15 = j15
|
||||
movl 224(%esp),%ebx
|
||||
# x12 = in12
|
||||
movl %eax,148(%esp)
|
||||
# x13 = in13
|
||||
movl %ecx,152(%esp)
|
||||
# x14 = in14
|
||||
movl %edx,156(%esp)
|
||||
# x15 = in15
|
||||
movl %ebx,160(%esp)
|
||||
# i = 20
|
||||
mov $20,%ebp
|
||||
# p = x0
|
||||
movl 100(%esp),%eax
|
||||
# s = x5
|
||||
movl 120(%esp),%ecx
|
||||
# t = x10
|
||||
movl 140(%esp),%edx
|
||||
# w = x15
|
||||
movl 160(%esp),%ebx
|
||||
._mainloop:
|
||||
# x0 = p
|
||||
movl %eax,100(%esp)
|
||||
# x10 = t
|
||||
movl %edx,140(%esp)
|
||||
# p += x12
|
||||
addl 148(%esp),%eax
|
||||
# x5 = s
|
||||
movl %ecx,120(%esp)
|
||||
# t += x6
|
||||
addl 124(%esp),%edx
|
||||
# x15 = w
|
||||
movl %ebx,160(%esp)
|
||||
# r = x1
|
||||
movl 104(%esp),%esi
|
||||
# r += s
|
||||
add %ecx,%esi
|
||||
# v = x11
|
||||
movl 144(%esp),%edi
|
||||
# v += w
|
||||
add %ebx,%edi
|
||||
# p <<<= 7
|
||||
rol $7,%eax
|
||||
# p ^= x4
|
||||
xorl 116(%esp),%eax
|
||||
# t <<<= 7
|
||||
rol $7,%edx
|
||||
# t ^= x14
|
||||
xorl 156(%esp),%edx
|
||||
# r <<<= 7
|
||||
rol $7,%esi
|
||||
# r ^= x9
|
||||
xorl 136(%esp),%esi
|
||||
# v <<<= 7
|
||||
rol $7,%edi
|
||||
# v ^= x3
|
||||
xorl 112(%esp),%edi
|
||||
# x4 = p
|
||||
movl %eax,116(%esp)
|
||||
# x14 = t
|
||||
movl %edx,156(%esp)
|
||||
# p += x0
|
||||
addl 100(%esp),%eax
|
||||
# x9 = r
|
||||
movl %esi,136(%esp)
|
||||
# t += x10
|
||||
addl 140(%esp),%edx
|
||||
# x3 = v
|
||||
movl %edi,112(%esp)
|
||||
# p <<<= 9
|
||||
rol $9,%eax
|
||||
# p ^= x8
|
||||
xorl 132(%esp),%eax
|
||||
# t <<<= 9
|
||||
rol $9,%edx
|
||||
# t ^= x2
|
||||
xorl 108(%esp),%edx
|
||||
# s += r
|
||||
add %esi,%ecx
|
||||
# s <<<= 9
|
||||
rol $9,%ecx
|
||||
# s ^= x13
|
||||
xorl 152(%esp),%ecx
|
||||
# w += v
|
||||
add %edi,%ebx
|
||||
# w <<<= 9
|
||||
rol $9,%ebx
|
||||
# w ^= x7
|
||||
xorl 128(%esp),%ebx
|
||||
# x8 = p
|
||||
movl %eax,132(%esp)
|
||||
# x2 = t
|
||||
movl %edx,108(%esp)
|
||||
# p += x4
|
||||
addl 116(%esp),%eax
|
||||
# x13 = s
|
||||
movl %ecx,152(%esp)
|
||||
# t += x14
|
||||
addl 156(%esp),%edx
|
||||
# x7 = w
|
||||
movl %ebx,128(%esp)
|
||||
# p <<<= 13
|
||||
rol $13,%eax
|
||||
# p ^= x12
|
||||
xorl 148(%esp),%eax
|
||||
# t <<<= 13
|
||||
rol $13,%edx
|
||||
# t ^= x6
|
||||
xorl 124(%esp),%edx
|
||||
# r += s
|
||||
add %ecx,%esi
|
||||
# r <<<= 13
|
||||
rol $13,%esi
|
||||
# r ^= x1
|
||||
xorl 104(%esp),%esi
|
||||
# v += w
|
||||
add %ebx,%edi
|
||||
# v <<<= 13
|
||||
rol $13,%edi
|
||||
# v ^= x11
|
||||
xorl 144(%esp),%edi
|
||||
# x12 = p
|
||||
movl %eax,148(%esp)
|
||||
# x6 = t
|
||||
movl %edx,124(%esp)
|
||||
# p += x8
|
||||
addl 132(%esp),%eax
|
||||
# x1 = r
|
||||
movl %esi,104(%esp)
|
||||
# t += x2
|
||||
addl 108(%esp),%edx
|
||||
# x11 = v
|
||||
movl %edi,144(%esp)
|
||||
# p <<<= 18
|
||||
rol $18,%eax
|
||||
# p ^= x0
|
||||
xorl 100(%esp),%eax
|
||||
# t <<<= 18
|
||||
rol $18,%edx
|
||||
# t ^= x10
|
||||
xorl 140(%esp),%edx
|
||||
# s += r
|
||||
add %esi,%ecx
|
||||
# s <<<= 18
|
||||
rol $18,%ecx
|
||||
# s ^= x5
|
||||
xorl 120(%esp),%ecx
|
||||
# w += v
|
||||
add %edi,%ebx
|
||||
# w <<<= 18
|
||||
rol $18,%ebx
|
||||
# w ^= x15
|
||||
xorl 160(%esp),%ebx
|
||||
# x0 = p
|
||||
movl %eax,100(%esp)
|
||||
# x10 = t
|
||||
movl %edx,140(%esp)
|
||||
# p += x3
|
||||
addl 112(%esp),%eax
|
||||
# p <<<= 7
|
||||
rol $7,%eax
|
||||
# x5 = s
|
||||
movl %ecx,120(%esp)
|
||||
# t += x9
|
||||
addl 136(%esp),%edx
|
||||
# x15 = w
|
||||
movl %ebx,160(%esp)
|
||||
# r = x4
|
||||
movl 116(%esp),%esi
|
||||
# r += s
|
||||
add %ecx,%esi
|
||||
# v = x14
|
||||
movl 156(%esp),%edi
|
||||
# v += w
|
||||
add %ebx,%edi
|
||||
# p ^= x1
|
||||
xorl 104(%esp),%eax
|
||||
# t <<<= 7
|
||||
rol $7,%edx
|
||||
# t ^= x11
|
||||
xorl 144(%esp),%edx
|
||||
# r <<<= 7
|
||||
rol $7,%esi
|
||||
# r ^= x6
|
||||
xorl 124(%esp),%esi
|
||||
# v <<<= 7
|
||||
rol $7,%edi
|
||||
# v ^= x12
|
||||
xorl 148(%esp),%edi
|
||||
# x1 = p
|
||||
movl %eax,104(%esp)
|
||||
# x11 = t
|
||||
movl %edx,144(%esp)
|
||||
# p += x0
|
||||
addl 100(%esp),%eax
|
||||
# x6 = r
|
||||
movl %esi,124(%esp)
|
||||
# t += x10
|
||||
addl 140(%esp),%edx
|
||||
# x12 = v
|
||||
movl %edi,148(%esp)
|
||||
# p <<<= 9
|
||||
rol $9,%eax
|
||||
# p ^= x2
|
||||
xorl 108(%esp),%eax
|
||||
# t <<<= 9
|
||||
rol $9,%edx
|
||||
# t ^= x8
|
||||
xorl 132(%esp),%edx
|
||||
# s += r
|
||||
add %esi,%ecx
|
||||
# s <<<= 9
|
||||
rol $9,%ecx
|
||||
# s ^= x7
|
||||
xorl 128(%esp),%ecx
|
||||
# w += v
|
||||
add %edi,%ebx
|
||||
# w <<<= 9
|
||||
rol $9,%ebx
|
||||
# w ^= x13
|
||||
xorl 152(%esp),%ebx
|
||||
# x2 = p
|
||||
movl %eax,108(%esp)
|
||||
# x8 = t
|
||||
movl %edx,132(%esp)
|
||||
# p += x1
|
||||
addl 104(%esp),%eax
|
||||
# x7 = s
|
||||
movl %ecx,128(%esp)
|
||||
# t += x11
|
||||
addl 144(%esp),%edx
|
||||
# x13 = w
|
||||
movl %ebx,152(%esp)
|
||||
# p <<<= 13
|
||||
rol $13,%eax
|
||||
# p ^= x3
|
||||
xorl 112(%esp),%eax
|
||||
# t <<<= 13
|
||||
rol $13,%edx
|
||||
# t ^= x9
|
||||
xorl 136(%esp),%edx
|
||||
# r += s
|
||||
add %ecx,%esi
|
||||
# r <<<= 13
|
||||
rol $13,%esi
|
||||
# r ^= x4
|
||||
xorl 116(%esp),%esi
|
||||
# v += w
|
||||
add %ebx,%edi
|
||||
# v <<<= 13
|
||||
rol $13,%edi
|
||||
# v ^= x14
|
||||
xorl 156(%esp),%edi
|
||||
# x3 = p
|
||||
movl %eax,112(%esp)
|
||||
# x9 = t
|
||||
movl %edx,136(%esp)
|
||||
# p += x2
|
||||
addl 108(%esp),%eax
|
||||
# x4 = r
|
||||
movl %esi,116(%esp)
|
||||
# t += x8
|
||||
addl 132(%esp),%edx
|
||||
# x14 = v
|
||||
movl %edi,156(%esp)
|
||||
# p <<<= 18
|
||||
rol $18,%eax
|
||||
# p ^= x0
|
||||
xorl 100(%esp),%eax
|
||||
# t <<<= 18
|
||||
rol $18,%edx
|
||||
# t ^= x10
|
||||
xorl 140(%esp),%edx
|
||||
# s += r
|
||||
add %esi,%ecx
|
||||
# s <<<= 18
|
||||
rol $18,%ecx
|
||||
# s ^= x5
|
||||
xorl 120(%esp),%ecx
|
||||
# w += v
|
||||
add %edi,%ebx
|
||||
# w <<<= 18
|
||||
rol $18,%ebx
|
||||
# w ^= x15
|
||||
xorl 160(%esp),%ebx
|
||||
# x0 = p
|
||||
movl %eax,100(%esp)
|
||||
# x10 = t
|
||||
movl %edx,140(%esp)
|
||||
# p += x12
|
||||
addl 148(%esp),%eax
|
||||
# x5 = s
|
||||
movl %ecx,120(%esp)
|
||||
# t += x6
|
||||
addl 124(%esp),%edx
|
||||
# x15 = w
|
||||
movl %ebx,160(%esp)
|
||||
# r = x1
|
||||
movl 104(%esp),%esi
|
||||
# r += s
|
||||
add %ecx,%esi
|
||||
# v = x11
|
||||
movl 144(%esp),%edi
|
||||
# v += w
|
||||
add %ebx,%edi
|
||||
# p <<<= 7
|
||||
rol $7,%eax
|
||||
# p ^= x4
|
||||
xorl 116(%esp),%eax
|
||||
# t <<<= 7
|
||||
rol $7,%edx
|
||||
# t ^= x14
|
||||
xorl 156(%esp),%edx
|
||||
# r <<<= 7
|
||||
rol $7,%esi
|
||||
# r ^= x9
|
||||
xorl 136(%esp),%esi
|
||||
# v <<<= 7
|
||||
rol $7,%edi
|
||||
# v ^= x3
|
||||
xorl 112(%esp),%edi
|
||||
# x4 = p
|
||||
movl %eax,116(%esp)
|
||||
# x14 = t
|
||||
movl %edx,156(%esp)
|
||||
# p += x0
|
||||
addl 100(%esp),%eax
|
||||
# x9 = r
|
||||
movl %esi,136(%esp)
|
||||
# t += x10
|
||||
addl 140(%esp),%edx
|
||||
# x3 = v
|
||||
movl %edi,112(%esp)
|
||||
# p <<<= 9
|
||||
rol $9,%eax
|
||||
# p ^= x8
|
||||
xorl 132(%esp),%eax
|
||||
# t <<<= 9
|
||||
rol $9,%edx
|
||||
# t ^= x2
|
||||
xorl 108(%esp),%edx
|
||||
# s += r
|
||||
add %esi,%ecx
|
||||
# s <<<= 9
|
||||
rol $9,%ecx
|
||||
# s ^= x13
|
||||
xorl 152(%esp),%ecx
|
||||
# w += v
|
||||
add %edi,%ebx
|
||||
# w <<<= 9
|
||||
rol $9,%ebx
|
||||
# w ^= x7
|
||||
xorl 128(%esp),%ebx
|
||||
# x8 = p
|
||||
movl %eax,132(%esp)
|
||||
# x2 = t
|
||||
movl %edx,108(%esp)
|
||||
# p += x4
|
||||
addl 116(%esp),%eax
|
||||
# x13 = s
|
||||
movl %ecx,152(%esp)
|
||||
# t += x14
|
||||
addl 156(%esp),%edx
|
||||
# x7 = w
|
||||
movl %ebx,128(%esp)
|
||||
# p <<<= 13
|
||||
rol $13,%eax
|
||||
# p ^= x12
|
||||
xorl 148(%esp),%eax
|
||||
# t <<<= 13
|
||||
rol $13,%edx
|
||||
# t ^= x6
|
||||
xorl 124(%esp),%edx
|
||||
# r += s
|
||||
add %ecx,%esi
|
||||
# r <<<= 13
|
||||
rol $13,%esi
|
||||
# r ^= x1
|
||||
xorl 104(%esp),%esi
|
||||
# v += w
|
||||
add %ebx,%edi
|
||||
# v <<<= 13
|
||||
rol $13,%edi
|
||||
# v ^= x11
|
||||
xorl 144(%esp),%edi
|
||||
# x12 = p
|
||||
movl %eax,148(%esp)
|
||||
# x6 = t
|
||||
movl %edx,124(%esp)
|
||||
# p += x8
|
||||
addl 132(%esp),%eax
|
||||
# x1 = r
|
||||
movl %esi,104(%esp)
|
||||
# t += x2
|
||||
addl 108(%esp),%edx
|
||||
# x11 = v
|
||||
movl %edi,144(%esp)
|
||||
# p <<<= 18
|
||||
rol $18,%eax
|
||||
# p ^= x0
|
||||
xorl 100(%esp),%eax
|
||||
# t <<<= 18
|
||||
rol $18,%edx
|
||||
# t ^= x10
|
||||
xorl 140(%esp),%edx
|
||||
# s += r
|
||||
add %esi,%ecx
|
||||
# s <<<= 18
|
||||
rol $18,%ecx
|
||||
# s ^= x5
|
||||
xorl 120(%esp),%ecx
|
||||
# w += v
|
||||
add %edi,%ebx
|
||||
# w <<<= 18
|
||||
rol $18,%ebx
|
||||
# w ^= x15
|
||||
xorl 160(%esp),%ebx
|
||||
# x0 = p
|
||||
movl %eax,100(%esp)
|
||||
# x10 = t
|
||||
movl %edx,140(%esp)
|
||||
# p += x3
|
||||
addl 112(%esp),%eax
|
||||
# p <<<= 7
|
||||
rol $7,%eax
|
||||
# x5 = s
|
||||
movl %ecx,120(%esp)
|
||||
# t += x9
|
||||
addl 136(%esp),%edx
|
||||
# x15 = w
|
||||
movl %ebx,160(%esp)
|
||||
# r = x4
|
||||
movl 116(%esp),%esi
|
||||
# r += s
|
||||
add %ecx,%esi
|
||||
# v = x14
|
||||
movl 156(%esp),%edi
|
||||
# v += w
|
||||
add %ebx,%edi
|
||||
# p ^= x1
|
||||
xorl 104(%esp),%eax
|
||||
# t <<<= 7
|
||||
rol $7,%edx
|
||||
# t ^= x11
|
||||
xorl 144(%esp),%edx
|
||||
# r <<<= 7
|
||||
rol $7,%esi
|
||||
# r ^= x6
|
||||
xorl 124(%esp),%esi
|
||||
# v <<<= 7
|
||||
rol $7,%edi
|
||||
# v ^= x12
|
||||
xorl 148(%esp),%edi
|
||||
# x1 = p
|
||||
movl %eax,104(%esp)
|
||||
# x11 = t
|
||||
movl %edx,144(%esp)
|
||||
# p += x0
|
||||
addl 100(%esp),%eax
|
||||
# x6 = r
|
||||
movl %esi,124(%esp)
|
||||
# t += x10
|
||||
addl 140(%esp),%edx
|
||||
# x12 = v
|
||||
movl %edi,148(%esp)
|
||||
# p <<<= 9
|
||||
rol $9,%eax
|
||||
# p ^= x2
|
||||
xorl 108(%esp),%eax
|
||||
# t <<<= 9
|
||||
rol $9,%edx
|
||||
# t ^= x8
|
||||
xorl 132(%esp),%edx
|
||||
# s += r
|
||||
add %esi,%ecx
|
||||
# s <<<= 9
|
||||
rol $9,%ecx
|
||||
# s ^= x7
|
||||
xorl 128(%esp),%ecx
|
||||
# w += v
|
||||
add %edi,%ebx
|
||||
# w <<<= 9
|
||||
rol $9,%ebx
|
||||
# w ^= x13
|
||||
xorl 152(%esp),%ebx
|
||||
# x2 = p
|
||||
movl %eax,108(%esp)
|
||||
# x8 = t
|
||||
movl %edx,132(%esp)
|
||||
# p += x1
|
||||
addl 104(%esp),%eax
|
||||
# x7 = s
|
||||
movl %ecx,128(%esp)
|
||||
# t += x11
|
||||
addl 144(%esp),%edx
|
||||
# x13 = w
|
||||
movl %ebx,152(%esp)
|
||||
# p <<<= 13
|
||||
rol $13,%eax
|
||||
# p ^= x3
|
||||
xorl 112(%esp),%eax
|
||||
# t <<<= 13
|
||||
rol $13,%edx
|
||||
# t ^= x9
|
||||
xorl 136(%esp),%edx
|
||||
# r += s
|
||||
add %ecx,%esi
|
||||
# r <<<= 13
|
||||
rol $13,%esi
|
||||
# r ^= x4
|
||||
xorl 116(%esp),%esi
|
||||
# v += w
|
||||
add %ebx,%edi
|
||||
# v <<<= 13
|
||||
rol $13,%edi
|
||||
# v ^= x14
|
||||
xorl 156(%esp),%edi
|
||||
# x3 = p
|
||||
movl %eax,112(%esp)
|
||||
# x9 = t
|
||||
movl %edx,136(%esp)
|
||||
# p += x2
|
||||
addl 108(%esp),%eax
|
||||
# x4 = r
|
||||
movl %esi,116(%esp)
|
||||
# t += x8
|
||||
addl 132(%esp),%edx
|
||||
# x14 = v
|
||||
movl %edi,156(%esp)
|
||||
# p <<<= 18
|
||||
rol $18,%eax
|
||||
# p ^= x0
|
||||
xorl 100(%esp),%eax
|
||||
# t <<<= 18
|
||||
rol $18,%edx
|
||||
# t ^= x10
|
||||
xorl 140(%esp),%edx
|
||||
# s += r
|
||||
add %esi,%ecx
|
||||
# s <<<= 18
|
||||
rol $18,%ecx
|
||||
# s ^= x5
|
||||
xorl 120(%esp),%ecx
|
||||
# w += v
|
||||
add %edi,%ebx
|
||||
# w <<<= 18
|
||||
rol $18,%ebx
|
||||
# w ^= x15
|
||||
xorl 160(%esp),%ebx
|
||||
# i -= 4
|
||||
sub $4,%ebp
|
||||
# goto mainloop if unsigned >
|
||||
ja ._mainloop
|
||||
# x0 = p
|
||||
movl %eax,100(%esp)
|
||||
# x5 = s
|
||||
movl %ecx,120(%esp)
|
||||
# x10 = t
|
||||
movl %edx,140(%esp)
|
||||
# x15 = w
|
||||
movl %ebx,160(%esp)
|
||||
# out = out_backup
|
||||
movl 72(%esp),%edi
|
||||
# m = m_backup
|
||||
movl 68(%esp),%esi
|
||||
# in0 = x0
|
||||
movl 100(%esp),%eax
|
||||
# in1 = x1
|
||||
movl 104(%esp),%ecx
|
||||
# in0 += j0
|
||||
addl 164(%esp),%eax
|
||||
# in1 += j1
|
||||
addl 168(%esp),%ecx
|
||||
# in0 ^= *(uint32 *) (m + 0)
|
||||
xorl 0(%esi),%eax
|
||||
# in1 ^= *(uint32 *) (m + 4)
|
||||
xorl 4(%esi),%ecx
|
||||
# *(uint32 *) (out + 0) = in0
|
||||
movl %eax,0(%edi)
|
||||
# *(uint32 *) (out + 4) = in1
|
||||
movl %ecx,4(%edi)
|
||||
# in2 = x2
|
||||
movl 108(%esp),%eax
|
||||
# in3 = x3
|
||||
movl 112(%esp),%ecx
|
||||
# in2 += j2
|
||||
addl 172(%esp),%eax
|
||||
# in3 += j3
|
||||
addl 176(%esp),%ecx
|
||||
# in2 ^= *(uint32 *) (m + 8)
|
||||
xorl 8(%esi),%eax
|
||||
# in3 ^= *(uint32 *) (m + 12)
|
||||
xorl 12(%esi),%ecx
|
||||
# *(uint32 *) (out + 8) = in2
|
||||
movl %eax,8(%edi)
|
||||
# *(uint32 *) (out + 12) = in3
|
||||
movl %ecx,12(%edi)
|
||||
# in4 = x4
|
||||
movl 116(%esp),%eax
|
||||
# in5 = x5
|
||||
movl 120(%esp),%ecx
|
||||
# in4 += j4
|
||||
addl 180(%esp),%eax
|
||||
# in5 += j5
|
||||
addl 184(%esp),%ecx
|
||||
# in4 ^= *(uint32 *) (m + 16)
|
||||
xorl 16(%esi),%eax
|
||||
# in5 ^= *(uint32 *) (m + 20)
|
||||
xorl 20(%esi),%ecx
|
||||
# *(uint32 *) (out + 16) = in4
|
||||
movl %eax,16(%edi)
|
||||
# *(uint32 *) (out + 20) = in5
|
||||
movl %ecx,20(%edi)
|
||||
# in6 = x6
|
||||
movl 124(%esp),%eax
|
||||
# in7 = x7
|
||||
movl 128(%esp),%ecx
|
||||
# in6 += j6
|
||||
addl 188(%esp),%eax
|
||||
# in7 += j7
|
||||
addl 192(%esp),%ecx
|
||||
# in6 ^= *(uint32 *) (m + 24)
|
||||
xorl 24(%esi),%eax
|
||||
# in7 ^= *(uint32 *) (m + 28)
|
||||
xorl 28(%esi),%ecx
|
||||
# *(uint32 *) (out + 24) = in6
|
||||
movl %eax,24(%edi)
|
||||
# *(uint32 *) (out + 28) = in7
|
||||
movl %ecx,28(%edi)
|
||||
# in8 = x8
|
||||
movl 132(%esp),%eax
|
||||
# in9 = x9
|
||||
movl 136(%esp),%ecx
|
||||
# in8 += j8
|
||||
addl 196(%esp),%eax
|
||||
# in9 += j9
|
||||
addl 200(%esp),%ecx
|
||||
# in8 ^= *(uint32 *) (m + 32)
|
||||
xorl 32(%esi),%eax
|
||||
# in9 ^= *(uint32 *) (m + 36)
|
||||
xorl 36(%esi),%ecx
|
||||
# *(uint32 *) (out + 32) = in8
|
||||
movl %eax,32(%edi)
|
||||
# *(uint32 *) (out + 36) = in9
|
||||
movl %ecx,36(%edi)
|
||||
# in10 = x10
|
||||
movl 140(%esp),%eax
|
||||
# in11 = x11
|
||||
movl 144(%esp),%ecx
|
||||
# in10 += j10
|
||||
addl 204(%esp),%eax
|
||||
# in11 += j11
|
||||
addl 208(%esp),%ecx
|
||||
# in10 ^= *(uint32 *) (m + 40)
|
||||
xorl 40(%esi),%eax
|
||||
# in11 ^= *(uint32 *) (m + 44)
|
||||
xorl 44(%esi),%ecx
|
||||
# *(uint32 *) (out + 40) = in10
|
||||
movl %eax,40(%edi)
|
||||
# *(uint32 *) (out + 44) = in11
|
||||
movl %ecx,44(%edi)
|
||||
# in12 = x12
|
||||
movl 148(%esp),%eax
|
||||
# in13 = x13
|
||||
movl 152(%esp),%ecx
|
||||
# in12 += j12
|
||||
addl 212(%esp),%eax
|
||||
# in13 += j13
|
||||
addl 216(%esp),%ecx
|
||||
# in12 ^= *(uint32 *) (m + 48)
|
||||
xorl 48(%esi),%eax
|
||||
# in13 ^= *(uint32 *) (m + 52)
|
||||
xorl 52(%esi),%ecx
|
||||
# *(uint32 *) (out + 48) = in12
|
||||
movl %eax,48(%edi)
|
||||
# *(uint32 *) (out + 52) = in13
|
||||
movl %ecx,52(%edi)
|
||||
# in14 = x14
|
||||
movl 156(%esp),%eax
|
||||
# in15 = x15
|
||||
movl 160(%esp),%ecx
|
||||
# in14 += j14
|
||||
addl 220(%esp),%eax
|
||||
# in15 += j15
|
||||
addl 224(%esp),%ecx
|
||||
# in14 ^= *(uint32 *) (m + 56)
|
||||
xorl 56(%esi),%eax
|
||||
# in15 ^= *(uint32 *) (m + 60)
|
||||
xorl 60(%esi),%ecx
|
||||
# *(uint32 *) (out + 56) = in14
|
||||
movl %eax,56(%edi)
|
||||
# *(uint32 *) (out + 60) = in15
|
||||
movl %ecx,60(%edi)
|
||||
# bytes = bytes_backup
|
||||
movl 76(%esp),%ebx
|
||||
# in8 = j8
|
||||
movl 196(%esp),%eax
|
||||
# in9 = j9
|
||||
movl 200(%esp),%ecx
|
||||
# in8 += 1
|
||||
add $1,%eax
|
||||
# in9 += 0 + carry
|
||||
adc $0,%ecx
|
||||
# j8 = in8
|
||||
movl %eax,196(%esp)
|
||||
# j9 = in9
|
||||
movl %ecx,200(%esp)
|
||||
# bytes - 64
|
||||
cmp $64,%ebx
|
||||
# goto bytesatleast65 if unsigned>
|
||||
ja ._bytesatleast65
|
||||
# goto bytesatleast64 if unsigned>=
|
||||
jae ._bytesatleast64
|
||||
# m = out
|
||||
mov %edi,%esi
|
||||
# out = ctarget
|
||||
movl 228(%esp),%edi
|
||||
# i = bytes
|
||||
mov %ebx,%ecx
|
||||
# while (i) { *out++ = *m++; --i }
|
||||
rep movsb
|
||||
._bytesatleast64:
|
||||
# x = x_backup
|
||||
movl 64(%esp),%eax
|
||||
# in8 = j8
|
||||
movl 196(%esp),%ecx
|
||||
# in9 = j9
|
||||
movl 200(%esp),%edx
|
||||
# *(uint32 *) (x + 32) = in8
|
||||
movl %ecx,32(%eax)
|
||||
# *(uint32 *) (x + 36) = in9
|
||||
movl %edx,36(%eax)
|
||||
._done:
|
||||
# eax = eax_stack
|
||||
movl 80(%esp),%eax
|
||||
# ebx = ebx_stack
|
||||
movl 84(%esp),%ebx
|
||||
# esi = esi_stack
|
||||
movl 88(%esp),%esi
|
||||
# edi = edi_stack
|
||||
movl 92(%esp),%edi
|
||||
# ebp = ebp_stack
|
||||
movl 96(%esp),%ebp
|
||||
# leave
|
||||
add %eax,%esp
|
||||
ret
|
||||
._bytesatleast65:
|
||||
# bytes -= 64
|
||||
sub $64,%ebx
|
||||
# out += 64
|
||||
add $64,%edi
|
||||
# m += 64
|
||||
add $64,%esi
|
||||
# goto bytesatleast1
|
||||
jmp ._bytesatleast1
|
||||
ENDPROC(salsa20_encrypt_bytes)
|
@ -1,805 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#include <linux/linkage.h>
|
||||
|
||||
# enter salsa20_encrypt_bytes
|
||||
ENTRY(salsa20_encrypt_bytes)
|
||||
mov %rsp,%r11
|
||||
and $31,%r11
|
||||
add $256,%r11
|
||||
sub %r11,%rsp
|
||||
# x = arg1
|
||||
mov %rdi,%r8
|
||||
# m = arg2
|
||||
mov %rsi,%rsi
|
||||
# out = arg3
|
||||
mov %rdx,%rdi
|
||||
# bytes = arg4
|
||||
mov %rcx,%rdx
|
||||
# unsigned>? bytes - 0
|
||||
cmp $0,%rdx
|
||||
# comment:fp stack unchanged by jump
|
||||
# goto done if !unsigned>
|
||||
jbe ._done
|
||||
# comment:fp stack unchanged by fallthrough
|
||||
# start:
|
||||
._start:
|
||||
# r11_stack = r11
|
||||
movq %r11,0(%rsp)
|
||||
# r12_stack = r12
|
||||
movq %r12,8(%rsp)
|
||||
# r13_stack = r13
|
||||
movq %r13,16(%rsp)
|
||||
# r14_stack = r14
|
||||
movq %r14,24(%rsp)
|
||||
# r15_stack = r15
|
||||
movq %r15,32(%rsp)
|
||||
# rbx_stack = rbx
|
||||
movq %rbx,40(%rsp)
|
||||
# rbp_stack = rbp
|
||||
movq %rbp,48(%rsp)
|
||||
# in0 = *(uint64 *) (x + 0)
|
||||
movq 0(%r8),%rcx
|
||||
# in2 = *(uint64 *) (x + 8)
|
||||
movq 8(%r8),%r9
|
||||
# in4 = *(uint64 *) (x + 16)
|
||||
movq 16(%r8),%rax
|
||||
# in6 = *(uint64 *) (x + 24)
|
||||
movq 24(%r8),%r10
|
||||
# in8 = *(uint64 *) (x + 32)
|
||||
movq 32(%r8),%r11
|
||||
# in10 = *(uint64 *) (x + 40)
|
||||
movq 40(%r8),%r12
|
||||
# in12 = *(uint64 *) (x + 48)
|
||||
movq 48(%r8),%r13
|
||||
# in14 = *(uint64 *) (x + 56)
|
||||
movq 56(%r8),%r14
|
||||
# j0 = in0
|
||||
movq %rcx,56(%rsp)
|
||||
# j2 = in2
|
||||
movq %r9,64(%rsp)
|
||||
# j4 = in4
|
||||
movq %rax,72(%rsp)
|
||||
# j6 = in6
|
||||
movq %r10,80(%rsp)
|
||||
# j8 = in8
|
||||
movq %r11,88(%rsp)
|
||||
# j10 = in10
|
||||
movq %r12,96(%rsp)
|
||||
# j12 = in12
|
||||
movq %r13,104(%rsp)
|
||||
# j14 = in14
|
||||
movq %r14,112(%rsp)
|
||||
# x_backup = x
|
||||
movq %r8,120(%rsp)
|
||||
# bytesatleast1:
|
||||
._bytesatleast1:
|
||||
# unsigned<? bytes - 64
|
||||
cmp $64,%rdx
|
||||
# comment:fp stack unchanged by jump
|
||||
# goto nocopy if !unsigned<
|
||||
jae ._nocopy
|
||||
# ctarget = out
|
||||
movq %rdi,128(%rsp)
|
||||
# out = &tmp
|
||||
leaq 192(%rsp),%rdi
|
||||
# i = bytes
|
||||
mov %rdx,%rcx
|
||||
# while (i) { *out++ = *m++; --i }
|
||||
rep movsb
|
||||
# out = &tmp
|
||||
leaq 192(%rsp),%rdi
|
||||
# m = &tmp
|
||||
leaq 192(%rsp),%rsi
|
||||
# comment:fp stack unchanged by fallthrough
|
||||
# nocopy:
|
||||
._nocopy:
|
||||
# out_backup = out
|
||||
movq %rdi,136(%rsp)
|
||||
# m_backup = m
|
||||
movq %rsi,144(%rsp)
|
||||
# bytes_backup = bytes
|
||||
movq %rdx,152(%rsp)
|
||||
# x1 = j0
|
||||
movq 56(%rsp),%rdi
|
||||
# x0 = x1
|
||||
mov %rdi,%rdx
|
||||
# (uint64) x1 >>= 32
|
||||
shr $32,%rdi
|
||||
# x3 = j2
|
||||
movq 64(%rsp),%rsi
|
||||
# x2 = x3
|
||||
mov %rsi,%rcx
|
||||
# (uint64) x3 >>= 32
|
||||
shr $32,%rsi
|
||||
# x5 = j4
|
||||
movq 72(%rsp),%r8
|
||||
# x4 = x5
|
||||
mov %r8,%r9
|
||||
# (uint64) x5 >>= 32
|
||||
shr $32,%r8
|
||||
# x5_stack = x5
|
||||
movq %r8,160(%rsp)
|
||||
# x7 = j6
|
||||
movq 80(%rsp),%r8
|
||||
# x6 = x7
|
||||
mov %r8,%rax
|
||||
# (uint64) x7 >>= 32
|
||||
shr $32,%r8
|
||||
# x9 = j8
|
||||
movq 88(%rsp),%r10
|
||||
# x8 = x9
|
||||
mov %r10,%r11
|
||||
# (uint64) x9 >>= 32
|
||||
shr $32,%r10
|
||||
# x11 = j10
|
||||
movq 96(%rsp),%r12
|
||||
# x10 = x11
|
||||
mov %r12,%r13
|
||||
# x10_stack = x10
|
||||
movq %r13,168(%rsp)
|
||||
# (uint64) x11 >>= 32
|
||||
shr $32,%r12
|
||||
# x13 = j12
|
||||
movq 104(%rsp),%r13
|
||||
# x12 = x13
|
||||
mov %r13,%r14
|
||||
# (uint64) x13 >>= 32
|
||||
shr $32,%r13
|
||||
# x15 = j14
|
||||
movq 112(%rsp),%r15
|
||||
# x14 = x15
|
||||
mov %r15,%rbx
|
||||
# (uint64) x15 >>= 32
|
||||
shr $32,%r15
|
||||
# x15_stack = x15
|
||||
movq %r15,176(%rsp)
|
||||
# i = 20
|
||||
mov $20,%r15
|
||||
# mainloop:
|
||||
._mainloop:
|
||||
# i_backup = i
|
||||
movq %r15,184(%rsp)
|
||||
# x5 = x5_stack
|
||||
movq 160(%rsp),%r15
|
||||
# a = x12 + x0
|
||||
lea (%r14,%rdx),%rbp
|
||||
# (uint32) a <<<= 7
|
||||
rol $7,%ebp
|
||||
# x4 ^= a
|
||||
xor %rbp,%r9
|
||||
# b = x1 + x5
|
||||
lea (%rdi,%r15),%rbp
|
||||
# (uint32) b <<<= 7
|
||||
rol $7,%ebp
|
||||
# x9 ^= b
|
||||
xor %rbp,%r10
|
||||
# a = x0 + x4
|
||||
lea (%rdx,%r9),%rbp
|
||||
# (uint32) a <<<= 9
|
||||
rol $9,%ebp
|
||||
# x8 ^= a
|
||||
xor %rbp,%r11
|
||||
# b = x5 + x9
|
||||
lea (%r15,%r10),%rbp
|
||||
# (uint32) b <<<= 9
|
||||
rol $9,%ebp
|
||||
# x13 ^= b
|
||||
xor %rbp,%r13
|
||||
# a = x4 + x8
|
||||
lea (%r9,%r11),%rbp
|
||||
# (uint32) a <<<= 13
|
||||
rol $13,%ebp
|
||||
# x12 ^= a
|
||||
xor %rbp,%r14
|
||||
# b = x9 + x13
|
||||
lea (%r10,%r13),%rbp
|
||||
# (uint32) b <<<= 13
|
||||
rol $13,%ebp
|
||||
# x1 ^= b
|
||||
xor %rbp,%rdi
|
||||
# a = x8 + x12
|
||||
lea (%r11,%r14),%rbp
|
||||
# (uint32) a <<<= 18
|
||||
rol $18,%ebp
|
||||
# x0 ^= a
|
||||
xor %rbp,%rdx
|
||||
# b = x13 + x1
|
||||
lea (%r13,%rdi),%rbp
|
||||
# (uint32) b <<<= 18
|
||||
rol $18,%ebp
|
||||
# x5 ^= b
|
||||
xor %rbp,%r15
|
||||
# x10 = x10_stack
|
||||
movq 168(%rsp),%rbp
|
||||
# x5_stack = x5
|
||||
movq %r15,160(%rsp)
|
||||
# c = x6 + x10
|
||||
lea (%rax,%rbp),%r15
|
||||
# (uint32) c <<<= 7
|
||||
rol $7,%r15d
|
||||
# x14 ^= c
|
||||
xor %r15,%rbx
|
||||
# c = x10 + x14
|
||||
lea (%rbp,%rbx),%r15
|
||||
# (uint32) c <<<= 9
|
||||
rol $9,%r15d
|
||||
# x2 ^= c
|
||||
xor %r15,%rcx
|
||||
# c = x14 + x2
|
||||
lea (%rbx,%rcx),%r15
|
||||
# (uint32) c <<<= 13
|
||||
rol $13,%r15d
|
||||
# x6 ^= c
|
||||
xor %r15,%rax
|
||||
# c = x2 + x6
|
||||
lea (%rcx,%rax),%r15
|
||||
# (uint32) c <<<= 18
|
||||
rol $18,%r15d
|
||||
# x10 ^= c
|
||||
xor %r15,%rbp
|
||||
# x15 = x15_stack
|
||||
movq 176(%rsp),%r15
|
||||
# x10_stack = x10
|
||||
movq %rbp,168(%rsp)
|
||||
# d = x11 + x15
|
||||
lea (%r12,%r15),%rbp
|
||||
# (uint32) d <<<= 7
|
||||
rol $7,%ebp
|
||||
# x3 ^= d
|
||||
xor %rbp,%rsi
|
||||
# d = x15 + x3
|
||||
lea (%r15,%rsi),%rbp
|
||||
# (uint32) d <<<= 9
|
||||
rol $9,%ebp
|
||||
# x7 ^= d
|
||||
xor %rbp,%r8
|
||||
# d = x3 + x7
|
||||
lea (%rsi,%r8),%rbp
|
||||
# (uint32) d <<<= 13
|
||||
rol $13,%ebp
|
||||
# x11 ^= d
|
||||
xor %rbp,%r12
|
||||
# d = x7 + x11
|
||||
lea (%r8,%r12),%rbp
|
||||
# (uint32) d <<<= 18
|
||||
rol $18,%ebp
|
||||
# x15 ^= d
|
||||
xor %rbp,%r15
|
||||
# x15_stack = x15
|
||||
movq %r15,176(%rsp)
|
||||
# x5 = x5_stack
|
||||
movq 160(%rsp),%r15
|
||||
# a = x3 + x0
|
||||
lea (%rsi,%rdx),%rbp
|
||||
# (uint32) a <<<= 7
|
||||
rol $7,%ebp
|
||||
# x1 ^= a
|
||||
xor %rbp,%rdi
|
||||
# b = x4 + x5
|
||||
lea (%r9,%r15),%rbp
|
||||
# (uint32) b <<<= 7
|
||||
rol $7,%ebp
|
||||
# x6 ^= b
|
||||
xor %rbp,%rax
|
||||
# a = x0 + x1
|
||||
lea (%rdx,%rdi),%rbp
|
||||
# (uint32) a <<<= 9
|
||||
rol $9,%ebp
|
||||
# x2 ^= a
|
||||
xor %rbp,%rcx
|
||||
# b = x5 + x6
|
||||
lea (%r15,%rax),%rbp
|
||||
# (uint32) b <<<= 9
|
||||
rol $9,%ebp
|
||||
# x7 ^= b
|
||||
xor %rbp,%r8
|
||||
# a = x1 + x2
|
||||
lea (%rdi,%rcx),%rbp
|
||||
# (uint32) a <<<= 13
|
||||
rol $13,%ebp
|
||||
# x3 ^= a
|
||||
xor %rbp,%rsi
|
||||
# b = x6 + x7
|
||||
lea (%rax,%r8),%rbp
|
||||
# (uint32) b <<<= 13
|
||||
rol $13,%ebp
|
||||
# x4 ^= b
|
||||
xor %rbp,%r9
|
||||
# a = x2 + x3
|
||||
lea (%rcx,%rsi),%rbp
|
||||
# (uint32) a <<<= 18
|
||||
rol $18,%ebp
|
||||
# x0 ^= a
|
||||
xor %rbp,%rdx
|
||||
# b = x7 + x4
|
||||
lea (%r8,%r9),%rbp
|
||||
# (uint32) b <<<= 18
|
||||
rol $18,%ebp
|
||||
# x5 ^= b
|
||||
xor %rbp,%r15
|
||||
# x10 = x10_stack
|
||||
movq 168(%rsp),%rbp
|
||||
# x5_stack = x5
|
||||
movq %r15,160(%rsp)
|
||||
# c = x9 + x10
|
||||
lea (%r10,%rbp),%r15
|
||||
# (uint32) c <<<= 7
|
||||
rol $7,%r15d
|
||||
# x11 ^= c
|
||||
xor %r15,%r12
|
||||
# c = x10 + x11
|
||||
lea (%rbp,%r12),%r15
|
||||
# (uint32) c <<<= 9
|
||||
rol $9,%r15d
|
||||
# x8 ^= c
|
||||
xor %r15,%r11
|
||||
# c = x11 + x8
|
||||
lea (%r12,%r11),%r15
|
||||
# (uint32) c <<<= 13
|
||||
rol $13,%r15d
|
||||
# x9 ^= c
|
||||
xor %r15,%r10
|
||||
# c = x8 + x9
|
||||
lea (%r11,%r10),%r15
|
||||
# (uint32) c <<<= 18
|
||||
rol $18,%r15d
|
||||
# x10 ^= c
|
||||
xor %r15,%rbp
|
||||
# x15 = x15_stack
|
||||
movq 176(%rsp),%r15
|
||||
# x10_stack = x10
|
||||
movq %rbp,168(%rsp)
|
||||
# d = x14 + x15
|
||||
lea (%rbx,%r15),%rbp
|
||||
# (uint32) d <<<= 7
|
||||
rol $7,%ebp
|
||||
# x12 ^= d
|
||||
xor %rbp,%r14
|
||||
# d = x15 + x12
|
||||
lea (%r15,%r14),%rbp
|
||||
# (uint32) d <<<= 9
|
||||
rol $9,%ebp
|
||||
# x13 ^= d
|
||||
xor %rbp,%r13
|
||||
# d = x12 + x13
|
||||
lea (%r14,%r13),%rbp
|
||||
# (uint32) d <<<= 13
|
||||
rol $13,%ebp
|
||||
# x14 ^= d
|
||||
xor %rbp,%rbx
|
||||
# d = x13 + x14
|
||||
lea (%r13,%rbx),%rbp
|
||||
# (uint32) d <<<= 18
|
||||
rol $18,%ebp
|
||||
# x15 ^= d
|
||||
xor %rbp,%r15
|
||||
# x15_stack = x15
|
||||
movq %r15,176(%rsp)
|
||||
# x5 = x5_stack
|
||||
movq 160(%rsp),%r15
|
||||
# a = x12 + x0
|
||||
lea (%r14,%rdx),%rbp
|
||||
# (uint32) a <<<= 7
|
||||
rol $7,%ebp
|
||||
# x4 ^= a
|
||||
xor %rbp,%r9
|
||||
# b = x1 + x5
|
||||
lea (%rdi,%r15),%rbp
|
||||
# (uint32) b <<<= 7
|
||||
rol $7,%ebp
|
||||
# x9 ^= b
|
||||
xor %rbp,%r10
|
||||
# a = x0 + x4
|
||||
lea (%rdx,%r9),%rbp
|
||||
# (uint32) a <<<= 9
|
||||
rol $9,%ebp
|
||||
# x8 ^= a
|
||||
xor %rbp,%r11
|
||||
# b = x5 + x9
|
||||
lea (%r15,%r10),%rbp
|
||||
# (uint32) b <<<= 9
|
||||
rol $9,%ebp
|
||||
# x13 ^= b
|
||||
xor %rbp,%r13
|
||||
# a = x4 + x8
|
||||
lea (%r9,%r11),%rbp
|
||||
# (uint32) a <<<= 13
|
||||
rol $13,%ebp
|
||||
# x12 ^= a
|
||||
xor %rbp,%r14
|
||||
# b = x9 + x13
|
||||
lea (%r10,%r13),%rbp
|
||||
# (uint32) b <<<= 13
|
||||
rol $13,%ebp
|
||||
# x1 ^= b
|
||||
xor %rbp,%rdi
|
||||
# a = x8 + x12
|
||||
lea (%r11,%r14),%rbp
|
||||
# (uint32) a <<<= 18
|
||||
rol $18,%ebp
|
||||
# x0 ^= a
|
||||
xor %rbp,%rdx
|
||||
# b = x13 + x1
|
||||
lea (%r13,%rdi),%rbp
|
||||
# (uint32) b <<<= 18
|
||||
rol $18,%ebp
|
||||
# x5 ^= b
|
||||
xor %rbp,%r15
|
||||
# x10 = x10_stack
|
||||
movq 168(%rsp),%rbp
|
||||
# x5_stack = x5
|
||||
movq %r15,160(%rsp)
|
||||
# c = x6 + x10
|
||||
lea (%rax,%rbp),%r15
|
||||
# (uint32) c <<<= 7
|
||||
rol $7,%r15d
|
||||
# x14 ^= c
|
||||
xor %r15,%rbx
|
||||
# c = x10 + x14
|
||||
lea (%rbp,%rbx),%r15
|
||||
# (uint32) c <<<= 9
|
||||
rol $9,%r15d
|
||||
# x2 ^= c
|
||||
xor %r15,%rcx
|
||||
# c = x14 + x2
|
||||
lea (%rbx,%rcx),%r15
|
||||
# (uint32) c <<<= 13
|
||||
rol $13,%r15d
|
||||
# x6 ^= c
|
||||
xor %r15,%rax
|
||||
# c = x2 + x6
|
||||
lea (%rcx,%rax),%r15
|
||||
# (uint32) c <<<= 18
|
||||
rol $18,%r15d
|
||||
# x10 ^= c
|
||||
xor %r15,%rbp
|
||||
# x15 = x15_stack
|
||||
movq 176(%rsp),%r15
|
||||
# x10_stack = x10
|
||||
movq %rbp,168(%rsp)
|
||||
# d = x11 + x15
|
||||
lea (%r12,%r15),%rbp
|
||||
# (uint32) d <<<= 7
|
||||
rol $7,%ebp
|
||||
# x3 ^= d
|
||||
xor %rbp,%rsi
|
||||
# d = x15 + x3
|
||||
lea (%r15,%rsi),%rbp
|
||||
# (uint32) d <<<= 9
|
||||
rol $9,%ebp
|
||||
# x7 ^= d
|
||||
xor %rbp,%r8
|
||||
# d = x3 + x7
|
||||
lea (%rsi,%r8),%rbp
|
||||
# (uint32) d <<<= 13
|
||||
rol $13,%ebp
|
||||
# x11 ^= d
|
||||
xor %rbp,%r12
|
||||
# d = x7 + x11
|
||||
lea (%r8,%r12),%rbp
|
||||
# (uint32) d <<<= 18
|
||||
rol $18,%ebp
|
||||
# x15 ^= d
|
||||
xor %rbp,%r15
|
||||
# x15_stack = x15
|
||||
movq %r15,176(%rsp)
|
||||
# x5 = x5_stack
|
||||
movq 160(%rsp),%r15
|
||||
# a = x3 + x0
|
||||
lea (%rsi,%rdx),%rbp
|
||||
# (uint32) a <<<= 7
|
||||
rol $7,%ebp
|
||||
# x1 ^= a
|
||||
xor %rbp,%rdi
|
||||
# b = x4 + x5
|
||||
lea (%r9,%r15),%rbp
|
||||
# (uint32) b <<<= 7
|
||||
rol $7,%ebp
|
||||
# x6 ^= b
|
||||
xor %rbp,%rax
|
||||
# a = x0 + x1
|
||||
lea (%rdx,%rdi),%rbp
|
||||
# (uint32) a <<<= 9
|
||||
rol $9,%ebp
|
||||
# x2 ^= a
|
||||
xor %rbp,%rcx
|
||||
# b = x5 + x6
|
||||
lea (%r15,%rax),%rbp
|
||||
# (uint32) b <<<= 9
|
||||
rol $9,%ebp
|
||||
# x7 ^= b
|
||||
xor %rbp,%r8
|
||||
# a = x1 + x2
|
||||
lea (%rdi,%rcx),%rbp
|
||||
# (uint32) a <<<= 13
|
||||
rol $13,%ebp
|
||||
# x3 ^= a
|
||||
xor %rbp,%rsi
|
||||
# b = x6 + x7
|
||||
lea (%rax,%r8),%rbp
|
||||
# (uint32) b <<<= 13
|
||||
rol $13,%ebp
|
||||
# x4 ^= b
|
||||
xor %rbp,%r9
|
||||
# a = x2 + x3
|
||||
lea (%rcx,%rsi),%rbp
|
||||
# (uint32) a <<<= 18
|
||||
rol $18,%ebp
|
||||
# x0 ^= a
|
||||
xor %rbp,%rdx
|
||||
# b = x7 + x4
|
||||
lea (%r8,%r9),%rbp
|
||||
# (uint32) b <<<= 18
|
||||
rol $18,%ebp
|
||||
# x5 ^= b
|
||||
xor %rbp,%r15
|
||||
# x10 = x10_stack
|
||||
movq 168(%rsp),%rbp
|
||||
# x5_stack = x5
|
||||
movq %r15,160(%rsp)
|
||||
# c = x9 + x10
|
||||
lea (%r10,%rbp),%r15
|
||||
# (uint32) c <<<= 7
|
||||
rol $7,%r15d
|
||||
# x11 ^= c
|
||||
xor %r15,%r12
|
||||
# c = x10 + x11
|
||||
lea (%rbp,%r12),%r15
|
||||
# (uint32) c <<<= 9
|
||||
rol $9,%r15d
|
||||
# x8 ^= c
|
||||
xor %r15,%r11
|
||||
# c = x11 + x8
|
||||
lea (%r12,%r11),%r15
|
||||
# (uint32) c <<<= 13
|
||||
rol $13,%r15d
|
||||
# x9 ^= c
|
||||
xor %r15,%r10
|
||||
# c = x8 + x9
|
||||
lea (%r11,%r10),%r15
|
||||
# (uint32) c <<<= 18
|
||||
rol $18,%r15d
|
||||
# x10 ^= c
|
||||
xor %r15,%rbp
|
||||
# x15 = x15_stack
|
||||
movq 176(%rsp),%r15
|
||||
# x10_stack = x10
|
||||
movq %rbp,168(%rsp)
|
||||
# d = x14 + x15
|
||||
lea (%rbx,%r15),%rbp
|
||||
# (uint32) d <<<= 7
|
||||
rol $7,%ebp
|
||||
# x12 ^= d
|
||||
xor %rbp,%r14
|
||||
# d = x15 + x12
|
||||
lea (%r15,%r14),%rbp
|
||||
# (uint32) d <<<= 9
|
||||
rol $9,%ebp
|
||||
# x13 ^= d
|
||||
xor %rbp,%r13
|
||||
# d = x12 + x13
|
||||
lea (%r14,%r13),%rbp
|
||||
# (uint32) d <<<= 13
|
||||
rol $13,%ebp
|
||||
# x14 ^= d
|
||||
xor %rbp,%rbx
|
||||
# d = x13 + x14
|
||||
lea (%r13,%rbx),%rbp
|
||||
# (uint32) d <<<= 18
|
||||
rol $18,%ebp
|
||||
# x15 ^= d
|
||||
xor %rbp,%r15
|
||||
# x15_stack = x15
|
||||
movq %r15,176(%rsp)
|
||||
# i = i_backup
|
||||
movq 184(%rsp),%r15
|
||||
# unsigned>? i -= 4
|
||||
sub $4,%r15
|
||||
# comment:fp stack unchanged by jump
|
||||
# goto mainloop if unsigned>
|
||||
ja ._mainloop
|
||||
# (uint32) x2 += j2
|
||||
addl 64(%rsp),%ecx
|
||||
# x3 <<= 32
|
||||
shl $32,%rsi
|
||||
# x3 += j2
|
||||
addq 64(%rsp),%rsi
|
||||
# (uint64) x3 >>= 32
|
||||
shr $32,%rsi
|
||||
# x3 <<= 32
|
||||
shl $32,%rsi
|
||||
# x2 += x3
|
||||
add %rsi,%rcx
|
||||
# (uint32) x6 += j6
|
||||
addl 80(%rsp),%eax
|
||||
# x7 <<= 32
|
||||
shl $32,%r8
|
||||
# x7 += j6
|
||||
addq 80(%rsp),%r8
|
||||
# (uint64) x7 >>= 32
|
||||
shr $32,%r8
|
||||
# x7 <<= 32
|
||||
shl $32,%r8
|
||||
# x6 += x7
|
||||
add %r8,%rax
|
||||
# (uint32) x8 += j8
|
||||
addl 88(%rsp),%r11d
|
||||
# x9 <<= 32
|
||||
shl $32,%r10
|
||||
# x9 += j8
|
||||
addq 88(%rsp),%r10
|
||||
# (uint64) x9 >>= 32
|
||||
shr $32,%r10
|
||||
# x9 <<= 32
|
||||
shl $32,%r10
|
||||
# x8 += x9
|
||||
add %r10,%r11
|
||||
# (uint32) x12 += j12
|
||||
addl 104(%rsp),%r14d
|
||||
# x13 <<= 32
|
||||
shl $32,%r13
|
||||
# x13 += j12
|
||||
addq 104(%rsp),%r13
|
||||
# (uint64) x13 >>= 32
|
||||
shr $32,%r13
|
||||
# x13 <<= 32
|
||||
shl $32,%r13
|
||||
# x12 += x13
|
||||
add %r13,%r14
|
||||
# (uint32) x0 += j0
|
||||
addl 56(%rsp),%edx
|
||||
# x1 <<= 32
|
||||
shl $32,%rdi
|
||||
# x1 += j0
|
||||
addq 56(%rsp),%rdi
|
||||
# (uint64) x1 >>= 32
|
||||
shr $32,%rdi
|
||||
# x1 <<= 32
|
||||
shl $32,%rdi
|
||||
# x0 += x1
|
||||
add %rdi,%rdx
|
||||
# x5 = x5_stack
|
||||
movq 160(%rsp),%rdi
|
||||
# (uint32) x4 += j4
|
||||
addl 72(%rsp),%r9d
|
||||
# x5 <<= 32
|
||||
shl $32,%rdi
|
||||
# x5 += j4
|
||||
addq 72(%rsp),%rdi
|
||||
# (uint64) x5 >>= 32
|
||||
shr $32,%rdi
|
||||
# x5 <<= 32
|
||||
shl $32,%rdi
|
||||
# x4 += x5
|
||||
add %rdi,%r9
|
||||
# x10 = x10_stack
|
||||
movq 168(%rsp),%r8
|
||||
# (uint32) x10 += j10
|
||||
addl 96(%rsp),%r8d
|
||||
# x11 <<= 32
|
||||
shl $32,%r12
|
||||
# x11 += j10
|
||||
addq 96(%rsp),%r12
|
||||
# (uint64) x11 >>= 32
|
||||
shr $32,%r12
|
||||
# x11 <<= 32
|
||||
shl $32,%r12
|
||||
# x10 += x11
|
||||
add %r12,%r8
|
||||
# x15 = x15_stack
|
||||
movq 176(%rsp),%rdi
|
||||
# (uint32) x14 += j14
|
||||
addl 112(%rsp),%ebx
|
||||
# x15 <<= 32
|
||||
shl $32,%rdi
|
||||
# x15 += j14
|
||||
addq 112(%rsp),%rdi
|
||||
# (uint64) x15 >>= 32
|
||||
shr $32,%rdi
|
||||
# x15 <<= 32
|
||||
shl $32,%rdi
|
||||
# x14 += x15
|
||||
add %rdi,%rbx
|
||||
# out = out_backup
|
||||
movq 136(%rsp),%rdi
|
||||
# m = m_backup
|
||||
movq 144(%rsp),%rsi
|
||||
# x0 ^= *(uint64 *) (m + 0)
|
||||
xorq 0(%rsi),%rdx
|
||||
# *(uint64 *) (out + 0) = x0
|
||||
movq %rdx,0(%rdi)
|
||||
# x2 ^= *(uint64 *) (m + 8)
|
||||
xorq 8(%rsi),%rcx
|
||||
# *(uint64 *) (out + 8) = x2
|
||||
movq %rcx,8(%rdi)
|
||||
# x4 ^= *(uint64 *) (m + 16)
|
||||
xorq 16(%rsi),%r9
|
||||
# *(uint64 *) (out + 16) = x4
|
||||
movq %r9,16(%rdi)
|
||||
# x6 ^= *(uint64 *) (m + 24)
|
||||
xorq 24(%rsi),%rax
|
||||
# *(uint64 *) (out + 24) = x6
|
||||
movq %rax,24(%rdi)
|
||||
# x8 ^= *(uint64 *) (m + 32)
|
||||
xorq 32(%rsi),%r11
|
||||
# *(uint64 *) (out + 32) = x8
|
||||
movq %r11,32(%rdi)
|
||||
# x10 ^= *(uint64 *) (m + 40)
|
||||
xorq 40(%rsi),%r8
|
||||
# *(uint64 *) (out + 40) = x10
|
||||
movq %r8,40(%rdi)
|
||||
# x12 ^= *(uint64 *) (m + 48)
|
||||
xorq 48(%rsi),%r14
|
||||
# *(uint64 *) (out + 48) = x12
|
||||
movq %r14,48(%rdi)
|
||||
# x14 ^= *(uint64 *) (m + 56)
|
||||
xorq 56(%rsi),%rbx
|
||||
# *(uint64 *) (out + 56) = x14
|
||||
movq %rbx,56(%rdi)
|
||||
# bytes = bytes_backup
|
||||
movq 152(%rsp),%rdx
|
||||
# in8 = j8
|
||||
movq 88(%rsp),%rcx
|
||||
# in8 += 1
|
||||
add $1,%rcx
|
||||
# j8 = in8
|
||||
movq %rcx,88(%rsp)
|
||||
# unsigned>? unsigned<? bytes - 64
|
||||
cmp $64,%rdx
|
||||
# comment:fp stack unchanged by jump
|
||||
# goto bytesatleast65 if unsigned>
|
||||
ja ._bytesatleast65
|
||||
# comment:fp stack unchanged by jump
|
||||
# goto bytesatleast64 if !unsigned<
|
||||
jae ._bytesatleast64
|
||||
# m = out
|
||||
mov %rdi,%rsi
|
||||
# out = ctarget
|
||||
movq 128(%rsp),%rdi
|
||||
# i = bytes
|
||||
mov %rdx,%rcx
|
||||
# while (i) { *out++ = *m++; --i }
|
||||
rep movsb
|
||||
# comment:fp stack unchanged by fallthrough
|
||||
# bytesatleast64:
|
||||
._bytesatleast64:
|
||||
# x = x_backup
|
||||
movq 120(%rsp),%rdi
|
||||
# in8 = j8
|
||||
movq 88(%rsp),%rsi
|
||||
# *(uint64 *) (x + 32) = in8
|
||||
movq %rsi,32(%rdi)
|
||||
# r11 = r11_stack
|
||||
movq 0(%rsp),%r11
|
||||
# r12 = r12_stack
|
||||
movq 8(%rsp),%r12
|
||||
# r13 = r13_stack
|
||||
movq 16(%rsp),%r13
|
||||
# r14 = r14_stack
|
||||
movq 24(%rsp),%r14
|
||||
# r15 = r15_stack
|
||||
movq 32(%rsp),%r15
|
||||
# rbx = rbx_stack
|
||||
movq 40(%rsp),%rbx
|
||||
# rbp = rbp_stack
|
||||
movq 48(%rsp),%rbp
|
||||
# comment:fp stack unchanged by fallthrough
|
||||
# done:
|
||||
._done:
|
||||
# leave
|
||||
add %r11,%rsp
|
||||
mov %rdi,%rax
|
||||
mov %rsi,%rdx
|
||||
ret
|
||||
# bytesatleast65:
|
||||
._bytesatleast65:
|
||||
# bytes -= 64
|
||||
sub $64,%rdx
|
||||
# out += 64
|
||||
add $64,%rdi
|
||||
# m += 64
|
||||
add $64,%rsi
|
||||
# comment:fp stack unchanged by jump
|
||||
# goto bytesatleast1
|
||||
jmp ._bytesatleast1
|
||||
ENDPROC(salsa20_encrypt_bytes)
|
@ -1,91 +0,0 @@
|
||||
/*
|
||||
* Glue code for optimized assembly version of Salsa20.
|
||||
*
|
||||
* Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com>
|
||||
*
|
||||
* The assembly codes are public domain assembly codes written by Daniel. J.
|
||||
* Bernstein <djb@cr.yp.to>. The codes are modified to include indentation
|
||||
* and to remove extraneous comments and functions that are not needed.
|
||||
* - i586 version, renamed as salsa20-i586-asm_32.S
|
||||
* available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s>
|
||||
* - x86-64 version, renamed as salsa20-x86_64-asm_64.S
|
||||
* available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s>
|
||||
*
|
||||
* Also modified to set up the initial state using the generic C code rather
|
||||
* than in assembly.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/salsa20.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
asmlinkage void salsa20_encrypt_bytes(u32 state[16], const u8 *src, u8 *dst,
|
||||
u32 bytes);
|
||||
|
||||
static int salsa20_asm_crypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
const struct salsa20_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
u32 state[16];
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
crypto_salsa20_init(state, ctx, walk.iv);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
|
||||
if (nbytes < walk.total)
|
||||
nbytes = round_down(nbytes, walk.stride);
|
||||
|
||||
salsa20_encrypt_bytes(state, walk.src.virt.addr,
|
||||
walk.dst.virt.addr, nbytes);
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct skcipher_alg alg = {
|
||||
.base.cra_name = "salsa20",
|
||||
.base.cra_driver_name = "salsa20-asm",
|
||||
.base.cra_priority = 200,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct salsa20_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = SALSA20_MIN_KEY_SIZE,
|
||||
.max_keysize = SALSA20_MAX_KEY_SIZE,
|
||||
.ivsize = SALSA20_IV_SIZE,
|
||||
.chunksize = SALSA20_BLOCK_SIZE,
|
||||
.setkey = crypto_salsa20_setkey,
|
||||
.encrypt = salsa20_asm_crypt,
|
||||
.decrypt = salsa20_asm_crypt,
|
||||
};
|
||||
|
||||
static int __init init(void)
|
||||
{
|
||||
return crypto_register_skcipher(&alg);
|
||||
}
|
||||
|
||||
static void __exit fini(void)
|
||||
{
|
||||
crypto_unregister_skcipher(&alg);
|
||||
}
|
||||
|
||||
module_init(init);
|
||||
module_exit(fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)");
|
||||
MODULE_ALIAS_CRYPTO("salsa20");
|
||||
MODULE_ALIAS_CRYPTO("salsa20-asm");
|
138
crypto/Kconfig
138
crypto/Kconfig
@ -289,6 +289,107 @@ config CRYPTO_CHACHA20POLY1305
|
||||
with the Poly1305 authenticator. It is defined in RFC7539 for use in
|
||||
IETF protocols.
|
||||
|
||||
config CRYPTO_AEGIS128
|
||||
tristate "AEGIS-128 AEAD algorithm"
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_AES # for AES S-box tables
|
||||
help
|
||||
Support for the AEGIS-128 dedicated AEAD algorithm.
|
||||
|
||||
config CRYPTO_AEGIS128L
|
||||
tristate "AEGIS-128L AEAD algorithm"
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_AES # for AES S-box tables
|
||||
help
|
||||
Support for the AEGIS-128L dedicated AEAD algorithm.
|
||||
|
||||
config CRYPTO_AEGIS256
|
||||
tristate "AEGIS-256 AEAD algorithm"
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_AES # for AES S-box tables
|
||||
help
|
||||
Support for the AEGIS-256 dedicated AEAD algorithm.
|
||||
|
||||
config CRYPTO_AEGIS128_AESNI_SSE2
|
||||
tristate "AEGIS-128 AEAD algorithm (x86_64 AESNI+SSE2 implementation)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_CRYPTD
|
||||
help
|
||||
AESNI+SSE2 implementation of the AEGSI-128 dedicated AEAD algorithm.
|
||||
|
||||
config CRYPTO_AEGIS128L_AESNI_SSE2
|
||||
tristate "AEGIS-128L AEAD algorithm (x86_64 AESNI+SSE2 implementation)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_CRYPTD
|
||||
help
|
||||
AESNI+SSE2 implementation of the AEGSI-128L dedicated AEAD algorithm.
|
||||
|
||||
config CRYPTO_AEGIS256_AESNI_SSE2
|
||||
tristate "AEGIS-256 AEAD algorithm (x86_64 AESNI+SSE2 implementation)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_CRYPTD
|
||||
help
|
||||
AESNI+SSE2 implementation of the AEGSI-256 dedicated AEAD algorithm.
|
||||
|
||||
config CRYPTO_MORUS640
|
||||
tristate "MORUS-640 AEAD algorithm"
|
||||
select CRYPTO_AEAD
|
||||
help
|
||||
Support for the MORUS-640 dedicated AEAD algorithm.
|
||||
|
||||
config CRYPTO_MORUS640_GLUE
|
||||
tristate
|
||||
depends on X86
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_CRYPTD
|
||||
help
|
||||
Common glue for SIMD optimizations of the MORUS-640 dedicated AEAD
|
||||
algorithm.
|
||||
|
||||
config CRYPTO_MORUS640_SSE2
|
||||
tristate "MORUS-640 AEAD algorithm (x86_64 SSE2 implementation)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_MORUS640_GLUE
|
||||
help
|
||||
SSE2 implementation of the MORUS-640 dedicated AEAD algorithm.
|
||||
|
||||
config CRYPTO_MORUS1280
|
||||
tristate "MORUS-1280 AEAD algorithm"
|
||||
select CRYPTO_AEAD
|
||||
help
|
||||
Support for the MORUS-1280 dedicated AEAD algorithm.
|
||||
|
||||
config CRYPTO_MORUS1280_GLUE
|
||||
tristate
|
||||
depends on X86
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_CRYPTD
|
||||
help
|
||||
Common glue for SIMD optimizations of the MORUS-1280 dedicated AEAD
|
||||
algorithm.
|
||||
|
||||
config CRYPTO_MORUS1280_SSE2
|
||||
tristate "MORUS-1280 AEAD algorithm (x86_64 SSE2 implementation)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_MORUS1280_GLUE
|
||||
help
|
||||
SSE2 optimizedimplementation of the MORUS-1280 dedicated AEAD
|
||||
algorithm.
|
||||
|
||||
config CRYPTO_MORUS1280_AVX2
|
||||
tristate "MORUS-1280 AEAD algorithm (x86_64 AVX2 implementation)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_MORUS1280_GLUE
|
||||
help
|
||||
AVX2 optimized implementation of the MORUS-1280 dedicated AEAD
|
||||
algorithm.
|
||||
|
||||
config CRYPTO_SEQIV
|
||||
tristate "Sequence Number IV Generator"
|
||||
select CRYPTO_AEAD
|
||||
@ -1335,34 +1436,6 @@ config CRYPTO_SALSA20
|
||||
The Salsa20 stream cipher algorithm is designed by Daniel J.
|
||||
Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
|
||||
|
||||
config CRYPTO_SALSA20_586
|
||||
tristate "Salsa20 stream cipher algorithm (i586)"
|
||||
depends on (X86 || UML_X86) && !64BIT
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_SALSA20
|
||||
help
|
||||
Salsa20 stream cipher algorithm.
|
||||
|
||||
Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
|
||||
Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>
|
||||
|
||||
The Salsa20 stream cipher algorithm is designed by Daniel J.
|
||||
Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
|
||||
|
||||
config CRYPTO_SALSA20_X86_64
|
||||
tristate "Salsa20 stream cipher algorithm (x86_64)"
|
||||
depends on (X86 || UML_X86) && 64BIT
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_SALSA20
|
||||
help
|
||||
Salsa20 stream cipher algorithm.
|
||||
|
||||
Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
|
||||
Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>
|
||||
|
||||
The Salsa20 stream cipher algorithm is designed by Daniel J.
|
||||
Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
|
||||
|
||||
config CRYPTO_CHACHA20
|
||||
tristate "ChaCha20 cipher algorithm"
|
||||
select CRYPTO_BLKCIPHER
|
||||
@ -1695,6 +1768,15 @@ config CRYPTO_LZ4HC
|
||||
help
|
||||
This is the LZ4 high compression mode algorithm.
|
||||
|
||||
config CRYPTO_ZSTD
|
||||
tristate "Zstd compression algorithm"
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_ACOMP2
|
||||
select ZSTD_COMPRESS
|
||||
select ZSTD_DECOMPRESS
|
||||
help
|
||||
This is the zstd algorithm.
|
||||
|
||||
comment "Random Number Generation"
|
||||
|
||||
config CRYPTO_ANSI_CPRNG
|
||||
|
@ -86,6 +86,11 @@ obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o
|
||||
obj-$(CONFIG_CRYPTO_GCM) += gcm.o
|
||||
obj-$(CONFIG_CRYPTO_CCM) += ccm.o
|
||||
obj-$(CONFIG_CRYPTO_CHACHA20POLY1305) += chacha20poly1305.o
|
||||
obj-$(CONFIG_CRYPTO_AEGIS128) += aegis128.o
|
||||
obj-$(CONFIG_CRYPTO_AEGIS128L) += aegis128l.o
|
||||
obj-$(CONFIG_CRYPTO_AEGIS256) += aegis256.o
|
||||
obj-$(CONFIG_CRYPTO_MORUS640) += morus640.o
|
||||
obj-$(CONFIG_CRYPTO_MORUS1280) += morus1280.o
|
||||
obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o
|
||||
obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
|
||||
obj-$(CONFIG_CRYPTO_MCRYPTD) += mcryptd.o
|
||||
@ -137,6 +142,7 @@ obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o
|
||||
obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
|
||||
obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
|
||||
obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
|
||||
obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o
|
||||
|
||||
ecdh_generic-y := ecc.o
|
||||
ecdh_generic-y += ecdh.o
|
||||
|
80
crypto/aegis.h
Normal file
80
crypto/aegis.h
Normal file
@ -0,0 +1,80 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* AEGIS common definitions
|
||||
*
|
||||
* Copyright (c) 2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (c) 2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*/
|
||||
|
||||
#ifndef _CRYPTO_AEGIS_H
|
||||
#define _CRYPTO_AEGIS_H
|
||||
|
||||
#include <crypto/aes.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#define AEGIS_BLOCK_SIZE 16
|
||||
|
||||
union aegis_block {
|
||||
__le64 words64[AEGIS_BLOCK_SIZE / sizeof(__le64)];
|
||||
u32 words32[AEGIS_BLOCK_SIZE / sizeof(u32)];
|
||||
u8 bytes[AEGIS_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
#define AEGIS_BLOCK_ALIGN (__alignof__(union aegis_block))
|
||||
#define AEGIS_ALIGNED(p) IS_ALIGNED((uintptr_t)p, AEGIS_BLOCK_ALIGN)
|
||||
|
||||
static const union aegis_block crypto_aegis_const[2] = {
|
||||
{ .words64 = {
|
||||
cpu_to_le64(U64_C(0x0d08050302010100)),
|
||||
cpu_to_le64(U64_C(0x6279e99059372215)),
|
||||
} },
|
||||
{ .words64 = {
|
||||
cpu_to_le64(U64_C(0xf12fc26d55183ddb)),
|
||||
cpu_to_le64(U64_C(0xdd28b57342311120)),
|
||||
} },
|
||||
};
|
||||
|
||||
static void crypto_aegis_block_xor(union aegis_block *dst,
|
||||
const union aegis_block *src)
|
||||
{
|
||||
dst->words64[0] ^= src->words64[0];
|
||||
dst->words64[1] ^= src->words64[1];
|
||||
}
|
||||
|
||||
static void crypto_aegis_block_and(union aegis_block *dst,
|
||||
const union aegis_block *src)
|
||||
{
|
||||
dst->words64[0] &= src->words64[0];
|
||||
dst->words64[1] &= src->words64[1];
|
||||
}
|
||||
|
||||
static void crypto_aegis_aesenc(union aegis_block *dst,
|
||||
const union aegis_block *src,
|
||||
const union aegis_block *key)
|
||||
{
|
||||
u32 *d = dst->words32;
|
||||
const u8 *s = src->bytes;
|
||||
const u32 *k = key->words32;
|
||||
const u32 *t0 = crypto_ft_tab[0];
|
||||
const u32 *t1 = crypto_ft_tab[1];
|
||||
const u32 *t2 = crypto_ft_tab[2];
|
||||
const u32 *t3 = crypto_ft_tab[3];
|
||||
u32 d0, d1, d2, d3;
|
||||
|
||||
d0 = t0[s[ 0]] ^ t1[s[ 5]] ^ t2[s[10]] ^ t3[s[15]] ^ k[0];
|
||||
d1 = t0[s[ 4]] ^ t1[s[ 9]] ^ t2[s[14]] ^ t3[s[ 3]] ^ k[1];
|
||||
d2 = t0[s[ 8]] ^ t1[s[13]] ^ t2[s[ 2]] ^ t3[s[ 7]] ^ k[2];
|
||||
d3 = t0[s[12]] ^ t1[s[ 1]] ^ t2[s[ 6]] ^ t3[s[11]] ^ k[3];
|
||||
|
||||
d[0] = d0;
|
||||
d[1] = d1;
|
||||
d[2] = d2;
|
||||
d[3] = d3;
|
||||
}
|
||||
|
||||
#endif /* _CRYPTO_AEGIS_H */
|
463
crypto/aegis128.c
Normal file
463
crypto/aegis128.c
Normal file
@ -0,0 +1,463 @@
|
||||
/*
|
||||
* The AEGIS-128 Authenticated-Encryption Algorithm
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/scatterlist.h>
|
||||
|
||||
#include "aegis.h"
|
||||
|
||||
#define AEGIS128_NONCE_SIZE 16
|
||||
#define AEGIS128_STATE_BLOCKS 5
|
||||
#define AEGIS128_KEY_SIZE 16
|
||||
#define AEGIS128_MIN_AUTH_SIZE 8
|
||||
#define AEGIS128_MAX_AUTH_SIZE 16
|
||||
|
||||
struct aegis_state {
|
||||
union aegis_block blocks[AEGIS128_STATE_BLOCKS];
|
||||
};
|
||||
|
||||
struct aegis_ctx {
|
||||
union aegis_block key;
|
||||
};
|
||||
|
||||
struct aegis128_ops {
|
||||
int (*skcipher_walk_init)(struct skcipher_walk *walk,
|
||||
struct aead_request *req, bool atomic);
|
||||
|
||||
void (*crypt_chunk)(struct aegis_state *state, u8 *dst,
|
||||
const u8 *src, unsigned int size);
|
||||
};
|
||||
|
||||
static void crypto_aegis128_update(struct aegis_state *state)
|
||||
{
|
||||
union aegis_block tmp;
|
||||
unsigned int i;
|
||||
|
||||
tmp = state->blocks[AEGIS128_STATE_BLOCKS - 1];
|
||||
for (i = AEGIS128_STATE_BLOCKS - 1; i > 0; i--)
|
||||
crypto_aegis_aesenc(&state->blocks[i], &state->blocks[i - 1],
|
||||
&state->blocks[i]);
|
||||
crypto_aegis_aesenc(&state->blocks[0], &tmp, &state->blocks[0]);
|
||||
}
|
||||
|
||||
static void crypto_aegis128_update_a(struct aegis_state *state,
|
||||
const union aegis_block *msg)
|
||||
{
|
||||
crypto_aegis128_update(state);
|
||||
crypto_aegis_block_xor(&state->blocks[0], msg);
|
||||
}
|
||||
|
||||
static void crypto_aegis128_update_u(struct aegis_state *state, const void *msg)
|
||||
{
|
||||
crypto_aegis128_update(state);
|
||||
crypto_xor(state->blocks[0].bytes, msg, AEGIS_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
static void crypto_aegis128_init(struct aegis_state *state,
|
||||
const union aegis_block *key,
|
||||
const u8 *iv)
|
||||
{
|
||||
union aegis_block key_iv;
|
||||
unsigned int i;
|
||||
|
||||
key_iv = *key;
|
||||
crypto_xor(key_iv.bytes, iv, AEGIS_BLOCK_SIZE);
|
||||
|
||||
state->blocks[0] = key_iv;
|
||||
state->blocks[1] = crypto_aegis_const[1];
|
||||
state->blocks[2] = crypto_aegis_const[0];
|
||||
state->blocks[3] = *key;
|
||||
state->blocks[4] = *key;
|
||||
|
||||
crypto_aegis_block_xor(&state->blocks[3], &crypto_aegis_const[0]);
|
||||
crypto_aegis_block_xor(&state->blocks[4], &crypto_aegis_const[1]);
|
||||
|
||||
for (i = 0; i < 5; i++) {
|
||||
crypto_aegis128_update_a(state, key);
|
||||
crypto_aegis128_update_a(state, &key_iv);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis128_ad(struct aegis_state *state,
|
||||
const u8 *src, unsigned int size)
|
||||
{
|
||||
if (AEGIS_ALIGNED(src)) {
|
||||
const union aegis_block *src_blk =
|
||||
(const union aegis_block *)src;
|
||||
|
||||
while (size >= AEGIS_BLOCK_SIZE) {
|
||||
crypto_aegis128_update_a(state, src_blk);
|
||||
|
||||
size -= AEGIS_BLOCK_SIZE;
|
||||
src_blk++;
|
||||
}
|
||||
} else {
|
||||
while (size >= AEGIS_BLOCK_SIZE) {
|
||||
crypto_aegis128_update_u(state, src);
|
||||
|
||||
size -= AEGIS_BLOCK_SIZE;
|
||||
src += AEGIS_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis128_encrypt_chunk(struct aegis_state *state, u8 *dst,
|
||||
const u8 *src, unsigned int size)
|
||||
{
|
||||
union aegis_block tmp;
|
||||
|
||||
if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
|
||||
while (size >= AEGIS_BLOCK_SIZE) {
|
||||
union aegis_block *dst_blk =
|
||||
(union aegis_block *)dst;
|
||||
const union aegis_block *src_blk =
|
||||
(const union aegis_block *)src;
|
||||
|
||||
tmp = state->blocks[2];
|
||||
crypto_aegis_block_and(&tmp, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
|
||||
crypto_aegis_block_xor(&tmp, src_blk);
|
||||
|
||||
crypto_aegis128_update_a(state, src_blk);
|
||||
|
||||
*dst_blk = tmp;
|
||||
|
||||
size -= AEGIS_BLOCK_SIZE;
|
||||
src += AEGIS_BLOCK_SIZE;
|
||||
dst += AEGIS_BLOCK_SIZE;
|
||||
}
|
||||
} else {
|
||||
while (size >= AEGIS_BLOCK_SIZE) {
|
||||
tmp = state->blocks[2];
|
||||
crypto_aegis_block_and(&tmp, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
|
||||
crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE);
|
||||
|
||||
crypto_aegis128_update_u(state, src);
|
||||
|
||||
memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE);
|
||||
|
||||
size -= AEGIS_BLOCK_SIZE;
|
||||
src += AEGIS_BLOCK_SIZE;
|
||||
dst += AEGIS_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
if (size > 0) {
|
||||
union aegis_block msg = {};
|
||||
memcpy(msg.bytes, src, size);
|
||||
|
||||
tmp = state->blocks[2];
|
||||
crypto_aegis_block_and(&tmp, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
|
||||
|
||||
crypto_aegis128_update_a(state, &msg);
|
||||
|
||||
crypto_aegis_block_xor(&msg, &tmp);
|
||||
|
||||
memcpy(dst, msg.bytes, size);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis128_decrypt_chunk(struct aegis_state *state, u8 *dst,
|
||||
const u8 *src, unsigned int size)
|
||||
{
|
||||
union aegis_block tmp;
|
||||
|
||||
if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
|
||||
while (size >= AEGIS_BLOCK_SIZE) {
|
||||
union aegis_block *dst_blk =
|
||||
(union aegis_block *)dst;
|
||||
const union aegis_block *src_blk =
|
||||
(const union aegis_block *)src;
|
||||
|
||||
tmp = state->blocks[2];
|
||||
crypto_aegis_block_and(&tmp, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
|
||||
crypto_aegis_block_xor(&tmp, src_blk);
|
||||
|
||||
crypto_aegis128_update_a(state, &tmp);
|
||||
|
||||
*dst_blk = tmp;
|
||||
|
||||
size -= AEGIS_BLOCK_SIZE;
|
||||
src += AEGIS_BLOCK_SIZE;
|
||||
dst += AEGIS_BLOCK_SIZE;
|
||||
}
|
||||
} else {
|
||||
while (size >= AEGIS_BLOCK_SIZE) {
|
||||
tmp = state->blocks[2];
|
||||
crypto_aegis_block_and(&tmp, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
|
||||
crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE);
|
||||
|
||||
crypto_aegis128_update_a(state, &tmp);
|
||||
|
||||
memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE);
|
||||
|
||||
size -= AEGIS_BLOCK_SIZE;
|
||||
src += AEGIS_BLOCK_SIZE;
|
||||
dst += AEGIS_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
if (size > 0) {
|
||||
union aegis_block msg = {};
|
||||
memcpy(msg.bytes, src, size);
|
||||
|
||||
tmp = state->blocks[2];
|
||||
crypto_aegis_block_and(&tmp, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
|
||||
crypto_aegis_block_xor(&msg, &tmp);
|
||||
|
||||
memset(msg.bytes + size, 0, AEGIS_BLOCK_SIZE - size);
|
||||
|
||||
crypto_aegis128_update_a(state, &msg);
|
||||
|
||||
memcpy(dst, msg.bytes, size);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis128_process_ad(struct aegis_state *state,
|
||||
struct scatterlist *sg_src,
|
||||
unsigned int assoclen)
|
||||
{
|
||||
struct scatter_walk walk;
|
||||
union aegis_block buf;
|
||||
unsigned int pos = 0;
|
||||
|
||||
scatterwalk_start(&walk, sg_src);
|
||||
while (assoclen != 0) {
|
||||
unsigned int size = scatterwalk_clamp(&walk, assoclen);
|
||||
unsigned int left = size;
|
||||
void *mapped = scatterwalk_map(&walk);
|
||||
const u8 *src = (const u8 *)mapped;
|
||||
|
||||
if (pos + size >= AEGIS_BLOCK_SIZE) {
|
||||
if (pos > 0) {
|
||||
unsigned int fill = AEGIS_BLOCK_SIZE - pos;
|
||||
memcpy(buf.bytes + pos, src, fill);
|
||||
crypto_aegis128_update_a(state, &buf);
|
||||
pos = 0;
|
||||
left -= fill;
|
||||
src += fill;
|
||||
}
|
||||
|
||||
crypto_aegis128_ad(state, src, left);
|
||||
src += left & ~(AEGIS_BLOCK_SIZE - 1);
|
||||
left &= AEGIS_BLOCK_SIZE - 1;
|
||||
}
|
||||
|
||||
memcpy(buf.bytes + pos, src, left);
|
||||
|
||||
pos += left;
|
||||
assoclen -= size;
|
||||
scatterwalk_unmap(mapped);
|
||||
scatterwalk_advance(&walk, size);
|
||||
scatterwalk_done(&walk, 0, assoclen);
|
||||
}
|
||||
|
||||
if (pos > 0) {
|
||||
memset(buf.bytes + pos, 0, AEGIS_BLOCK_SIZE - pos);
|
||||
crypto_aegis128_update_a(state, &buf);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis128_process_crypt(struct aegis_state *state,
|
||||
struct aead_request *req,
|
||||
const struct aegis128_ops *ops)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
u8 *src, *dst;
|
||||
unsigned int chunksize;
|
||||
|
||||
ops->skcipher_walk_init(&walk, req, false);
|
||||
|
||||
while (walk.nbytes) {
|
||||
src = walk.src.virt.addr;
|
||||
dst = walk.dst.virt.addr;
|
||||
chunksize = walk.nbytes;
|
||||
|
||||
ops->crypt_chunk(state, dst, src, chunksize);
|
||||
|
||||
skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis128_final(struct aegis_state *state,
|
||||
union aegis_block *tag_xor,
|
||||
u64 assoclen, u64 cryptlen)
|
||||
{
|
||||
u64 assocbits = assoclen * 8;
|
||||
u64 cryptbits = cryptlen * 8;
|
||||
|
||||
union aegis_block tmp;
|
||||
unsigned int i;
|
||||
|
||||
tmp.words64[0] = cpu_to_le64(assocbits);
|
||||
tmp.words64[1] = cpu_to_le64(cryptbits);
|
||||
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[3]);
|
||||
|
||||
for (i = 0; i < 7; i++)
|
||||
crypto_aegis128_update_a(state, &tmp);
|
||||
|
||||
for (i = 0; i < AEGIS128_STATE_BLOCKS; i++)
|
||||
crypto_aegis_block_xor(tag_xor, &state->blocks[i]);
|
||||
}
|
||||
|
||||
static int crypto_aegis128_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct aegis_ctx *ctx = crypto_aead_ctx(aead);
|
||||
|
||||
if (keylen != AEGIS128_KEY_SIZE) {
|
||||
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memcpy(ctx->key.bytes, key, AEGIS128_KEY_SIZE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis128_setauthsize(struct crypto_aead *tfm,
|
||||
unsigned int authsize)
|
||||
{
|
||||
if (authsize > AEGIS128_MAX_AUTH_SIZE)
|
||||
return -EINVAL;
|
||||
if (authsize < AEGIS128_MIN_AUTH_SIZE)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_aegis128_crypt(struct aead_request *req,
|
||||
union aegis_block *tag_xor,
|
||||
unsigned int cryptlen,
|
||||
const struct aegis128_ops *ops)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct aegis_state state;
|
||||
|
||||
crypto_aegis128_init(&state, &ctx->key, req->iv);
|
||||
crypto_aegis128_process_ad(&state, req->src, req->assoclen);
|
||||
crypto_aegis128_process_crypt(&state, req, ops);
|
||||
crypto_aegis128_final(&state, tag_xor, req->assoclen, cryptlen);
|
||||
}
|
||||
|
||||
static int crypto_aegis128_encrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct aegis128_ops ops = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_encrypt,
|
||||
.crypt_chunk = crypto_aegis128_encrypt_chunk,
|
||||
};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
union aegis_block tag = {};
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen;
|
||||
|
||||
crypto_aegis128_crypt(req, &tag, cryptlen, &ops);
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
|
||||
authsize, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis128_decrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct aegis128_ops ops = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_decrypt,
|
||||
.crypt_chunk = crypto_aegis128_decrypt_chunk,
|
||||
};
|
||||
static const u8 zeros[AEGIS128_MAX_AUTH_SIZE] = {};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
union aegis_block tag;
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen - authsize;
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
|
||||
authsize, 0);
|
||||
|
||||
crypto_aegis128_crypt(req, &tag, cryptlen, &ops);
|
||||
|
||||
return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis128_init_tfm(struct crypto_aead *tfm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_aegis128_exit_tfm(struct crypto_aead *tfm)
|
||||
{
|
||||
}
|
||||
|
||||
static struct aead_alg crypto_aegis128_alg = {
|
||||
.setkey = crypto_aegis128_setkey,
|
||||
.setauthsize = crypto_aegis128_setauthsize,
|
||||
.encrypt = crypto_aegis128_encrypt,
|
||||
.decrypt = crypto_aegis128_decrypt,
|
||||
.init = crypto_aegis128_init_tfm,
|
||||
.exit = crypto_aegis128_exit_tfm,
|
||||
|
||||
.ivsize = AEGIS128_NONCE_SIZE,
|
||||
.maxauthsize = AEGIS128_MAX_AUTH_SIZE,
|
||||
.chunksize = AEGIS_BLOCK_SIZE,
|
||||
|
||||
.base = {
|
||||
.cra_flags = CRYPTO_ALG_TYPE_AEAD,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct aegis_ctx),
|
||||
.cra_alignmask = 0,
|
||||
|
||||
.cra_priority = 100,
|
||||
|
||||
.cra_name = "aegis128",
|
||||
.cra_driver_name = "aegis128-generic",
|
||||
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init crypto_aegis128_module_init(void)
|
||||
{
|
||||
return crypto_register_aead(&crypto_aegis128_alg);
|
||||
}
|
||||
|
||||
static void __exit crypto_aegis128_module_exit(void)
|
||||
{
|
||||
crypto_unregister_aead(&crypto_aegis128_alg);
|
||||
}
|
||||
|
||||
module_init(crypto_aegis128_module_init);
|
||||
module_exit(crypto_aegis128_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm");
|
||||
MODULE_ALIAS_CRYPTO("aegis128");
|
||||
MODULE_ALIAS_CRYPTO("aegis128-generic");
|
527
crypto/aegis128l.c
Normal file
527
crypto/aegis128l.c
Normal file
@ -0,0 +1,527 @@
|
||||
/*
|
||||
* The AEGIS-128L Authenticated-Encryption Algorithm
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/scatterlist.h>
|
||||
|
||||
#include "aegis.h"
|
||||
|
||||
#define AEGIS128L_CHUNK_BLOCKS 2
|
||||
#define AEGIS128L_CHUNK_SIZE (AEGIS128L_CHUNK_BLOCKS * AEGIS_BLOCK_SIZE)
|
||||
#define AEGIS128L_NONCE_SIZE 16
|
||||
#define AEGIS128L_STATE_BLOCKS 8
|
||||
#define AEGIS128L_KEY_SIZE 16
|
||||
#define AEGIS128L_MIN_AUTH_SIZE 8
|
||||
#define AEGIS128L_MAX_AUTH_SIZE 16
|
||||
|
||||
union aegis_chunk {
|
||||
union aegis_block blocks[AEGIS128L_CHUNK_BLOCKS];
|
||||
u8 bytes[AEGIS128L_CHUNK_SIZE];
|
||||
};
|
||||
|
||||
struct aegis_state {
|
||||
union aegis_block blocks[AEGIS128L_STATE_BLOCKS];
|
||||
};
|
||||
|
||||
struct aegis_ctx {
|
||||
union aegis_block key;
|
||||
};
|
||||
|
||||
struct aegis128l_ops {
|
||||
int (*skcipher_walk_init)(struct skcipher_walk *walk,
|
||||
struct aead_request *req, bool atomic);
|
||||
|
||||
void (*crypt_chunk)(struct aegis_state *state, u8 *dst,
|
||||
const u8 *src, unsigned int size);
|
||||
};
|
||||
|
||||
static void crypto_aegis128l_update(struct aegis_state *state)
|
||||
{
|
||||
union aegis_block tmp;
|
||||
unsigned int i;
|
||||
|
||||
tmp = state->blocks[AEGIS128L_STATE_BLOCKS - 1];
|
||||
for (i = AEGIS128L_STATE_BLOCKS - 1; i > 0; i--)
|
||||
crypto_aegis_aesenc(&state->blocks[i], &state->blocks[i - 1],
|
||||
&state->blocks[i]);
|
||||
crypto_aegis_aesenc(&state->blocks[0], &tmp, &state->blocks[0]);
|
||||
}
|
||||
|
||||
static void crypto_aegis128l_update_a(struct aegis_state *state,
|
||||
const union aegis_chunk *msg)
|
||||
{
|
||||
crypto_aegis128l_update(state);
|
||||
crypto_aegis_block_xor(&state->blocks[0], &msg->blocks[0]);
|
||||
crypto_aegis_block_xor(&state->blocks[4], &msg->blocks[1]);
|
||||
}
|
||||
|
||||
static void crypto_aegis128l_update_u(struct aegis_state *state,
|
||||
const void *msg)
|
||||
{
|
||||
crypto_aegis128l_update(state);
|
||||
crypto_xor(state->blocks[0].bytes, msg + 0 * AEGIS_BLOCK_SIZE,
|
||||
AEGIS_BLOCK_SIZE);
|
||||
crypto_xor(state->blocks[4].bytes, msg + 1 * AEGIS_BLOCK_SIZE,
|
||||
AEGIS_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
static void crypto_aegis128l_init(struct aegis_state *state,
|
||||
const union aegis_block *key,
|
||||
const u8 *iv)
|
||||
{
|
||||
union aegis_block key_iv;
|
||||
union aegis_chunk chunk;
|
||||
unsigned int i;
|
||||
|
||||
memcpy(chunk.blocks[0].bytes, iv, AEGIS_BLOCK_SIZE);
|
||||
chunk.blocks[1] = *key;
|
||||
|
||||
key_iv = *key;
|
||||
crypto_aegis_block_xor(&key_iv, &chunk.blocks[0]);
|
||||
|
||||
state->blocks[0] = key_iv;
|
||||
state->blocks[1] = crypto_aegis_const[1];
|
||||
state->blocks[2] = crypto_aegis_const[0];
|
||||
state->blocks[3] = crypto_aegis_const[1];
|
||||
state->blocks[4] = key_iv;
|
||||
state->blocks[5] = *key;
|
||||
state->blocks[6] = *key;
|
||||
state->blocks[7] = *key;
|
||||
|
||||
crypto_aegis_block_xor(&state->blocks[5], &crypto_aegis_const[0]);
|
||||
crypto_aegis_block_xor(&state->blocks[6], &crypto_aegis_const[1]);
|
||||
crypto_aegis_block_xor(&state->blocks[7], &crypto_aegis_const[0]);
|
||||
|
||||
for (i = 0; i < 10; i++) {
|
||||
crypto_aegis128l_update_a(state, &chunk);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis128l_ad(struct aegis_state *state,
|
||||
const u8 *src, unsigned int size)
|
||||
{
|
||||
if (AEGIS_ALIGNED(src)) {
|
||||
const union aegis_chunk *src_chunk =
|
||||
(const union aegis_chunk *)src;
|
||||
|
||||
while (size >= AEGIS128L_CHUNK_SIZE) {
|
||||
crypto_aegis128l_update_a(state, src_chunk);
|
||||
|
||||
size -= AEGIS128L_CHUNK_SIZE;
|
||||
src_chunk += 1;
|
||||
}
|
||||
} else {
|
||||
while (size >= AEGIS128L_CHUNK_SIZE) {
|
||||
crypto_aegis128l_update_u(state, src);
|
||||
|
||||
size -= AEGIS128L_CHUNK_SIZE;
|
||||
src += AEGIS128L_CHUNK_SIZE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis128l_encrypt_chunk(struct aegis_state *state, u8 *dst,
|
||||
const u8 *src, unsigned int size)
|
||||
{
|
||||
union aegis_chunk tmp;
|
||||
union aegis_block *tmp0 = &tmp.blocks[0];
|
||||
union aegis_block *tmp1 = &tmp.blocks[1];
|
||||
|
||||
if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
|
||||
while (size >= AEGIS128L_CHUNK_SIZE) {
|
||||
union aegis_chunk *dst_blk =
|
||||
(union aegis_chunk *)dst;
|
||||
const union aegis_chunk *src_blk =
|
||||
(const union aegis_chunk *)src;
|
||||
|
||||
*tmp0 = state->blocks[2];
|
||||
crypto_aegis_block_and(tmp0, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(tmp0, &state->blocks[6]);
|
||||
crypto_aegis_block_xor(tmp0, &state->blocks[1]);
|
||||
crypto_aegis_block_xor(tmp0, &src_blk->blocks[0]);
|
||||
|
||||
*tmp1 = state->blocks[6];
|
||||
crypto_aegis_block_and(tmp1, &state->blocks[7]);
|
||||
crypto_aegis_block_xor(tmp1, &state->blocks[5]);
|
||||
crypto_aegis_block_xor(tmp1, &state->blocks[2]);
|
||||
crypto_aegis_block_xor(tmp1, &src_blk->blocks[1]);
|
||||
|
||||
crypto_aegis128l_update_a(state, src_blk);
|
||||
|
||||
*dst_blk = tmp;
|
||||
|
||||
size -= AEGIS128L_CHUNK_SIZE;
|
||||
src += AEGIS128L_CHUNK_SIZE;
|
||||
dst += AEGIS128L_CHUNK_SIZE;
|
||||
}
|
||||
} else {
|
||||
while (size >= AEGIS128L_CHUNK_SIZE) {
|
||||
*tmp0 = state->blocks[2];
|
||||
crypto_aegis_block_and(tmp0, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(tmp0, &state->blocks[6]);
|
||||
crypto_aegis_block_xor(tmp0, &state->blocks[1]);
|
||||
crypto_xor(tmp0->bytes, src + 0 * AEGIS_BLOCK_SIZE,
|
||||
AEGIS_BLOCK_SIZE);
|
||||
|
||||
*tmp1 = state->blocks[6];
|
||||
crypto_aegis_block_and(tmp1, &state->blocks[7]);
|
||||
crypto_aegis_block_xor(tmp1, &state->blocks[5]);
|
||||
crypto_aegis_block_xor(tmp1, &state->blocks[2]);
|
||||
crypto_xor(tmp1->bytes, src + 1 * AEGIS_BLOCK_SIZE,
|
||||
AEGIS_BLOCK_SIZE);
|
||||
|
||||
crypto_aegis128l_update_u(state, src);
|
||||
|
||||
memcpy(dst, tmp.bytes, AEGIS128L_CHUNK_SIZE);
|
||||
|
||||
size -= AEGIS128L_CHUNK_SIZE;
|
||||
src += AEGIS128L_CHUNK_SIZE;
|
||||
dst += AEGIS128L_CHUNK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
if (size > 0) {
|
||||
union aegis_chunk msg = {};
|
||||
memcpy(msg.bytes, src, size);
|
||||
|
||||
*tmp0 = state->blocks[2];
|
||||
crypto_aegis_block_and(tmp0, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(tmp0, &state->blocks[6]);
|
||||
crypto_aegis_block_xor(tmp0, &state->blocks[1]);
|
||||
|
||||
*tmp1 = state->blocks[6];
|
||||
crypto_aegis_block_and(tmp1, &state->blocks[7]);
|
||||
crypto_aegis_block_xor(tmp1, &state->blocks[5]);
|
||||
crypto_aegis_block_xor(tmp1, &state->blocks[2]);
|
||||
|
||||
crypto_aegis128l_update_a(state, &msg);
|
||||
|
||||
crypto_aegis_block_xor(&msg.blocks[0], tmp0);
|
||||
crypto_aegis_block_xor(&msg.blocks[1], tmp1);
|
||||
|
||||
memcpy(dst, msg.bytes, size);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis128l_decrypt_chunk(struct aegis_state *state, u8 *dst,
|
||||
const u8 *src, unsigned int size)
|
||||
{
|
||||
union aegis_chunk tmp;
|
||||
union aegis_block *tmp0 = &tmp.blocks[0];
|
||||
union aegis_block *tmp1 = &tmp.blocks[1];
|
||||
|
||||
if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
|
||||
while (size >= AEGIS128L_CHUNK_SIZE) {
|
||||
union aegis_chunk *dst_blk =
|
||||
(union aegis_chunk *)dst;
|
||||
const union aegis_chunk *src_blk =
|
||||
(const union aegis_chunk *)src;
|
||||
|
||||
*tmp0 = state->blocks[2];
|
||||
crypto_aegis_block_and(tmp0, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(tmp0, &state->blocks[6]);
|
||||
crypto_aegis_block_xor(tmp0, &state->blocks[1]);
|
||||
crypto_aegis_block_xor(tmp0, &src_blk->blocks[0]);
|
||||
|
||||
*tmp1 = state->blocks[6];
|
||||
crypto_aegis_block_and(tmp1, &state->blocks[7]);
|
||||
crypto_aegis_block_xor(tmp1, &state->blocks[5]);
|
||||
crypto_aegis_block_xor(tmp1, &state->blocks[2]);
|
||||
crypto_aegis_block_xor(tmp1, &src_blk->blocks[1]);
|
||||
|
||||
crypto_aegis128l_update_a(state, &tmp);
|
||||
|
||||
*dst_blk = tmp;
|
||||
|
||||
size -= AEGIS128L_CHUNK_SIZE;
|
||||
src += AEGIS128L_CHUNK_SIZE;
|
||||
dst += AEGIS128L_CHUNK_SIZE;
|
||||
}
|
||||
} else {
|
||||
while (size >= AEGIS128L_CHUNK_SIZE) {
|
||||
*tmp0 = state->blocks[2];
|
||||
crypto_aegis_block_and(tmp0, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(tmp0, &state->blocks[6]);
|
||||
crypto_aegis_block_xor(tmp0, &state->blocks[1]);
|
||||
crypto_xor(tmp0->bytes, src + 0 * AEGIS_BLOCK_SIZE,
|
||||
AEGIS_BLOCK_SIZE);
|
||||
|
||||
*tmp1 = state->blocks[6];
|
||||
crypto_aegis_block_and(tmp1, &state->blocks[7]);
|
||||
crypto_aegis_block_xor(tmp1, &state->blocks[5]);
|
||||
crypto_aegis_block_xor(tmp1, &state->blocks[2]);
|
||||
crypto_xor(tmp1->bytes, src + 1 * AEGIS_BLOCK_SIZE,
|
||||
AEGIS_BLOCK_SIZE);
|
||||
|
||||
crypto_aegis128l_update_a(state, &tmp);
|
||||
|
||||
memcpy(dst, tmp.bytes, AEGIS128L_CHUNK_SIZE);
|
||||
|
||||
size -= AEGIS128L_CHUNK_SIZE;
|
||||
src += AEGIS128L_CHUNK_SIZE;
|
||||
dst += AEGIS128L_CHUNK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
if (size > 0) {
|
||||
union aegis_chunk msg = {};
|
||||
memcpy(msg.bytes, src, size);
|
||||
|
||||
*tmp0 = state->blocks[2];
|
||||
crypto_aegis_block_and(tmp0, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(tmp0, &state->blocks[6]);
|
||||
crypto_aegis_block_xor(tmp0, &state->blocks[1]);
|
||||
crypto_aegis_block_xor(&msg.blocks[0], tmp0);
|
||||
|
||||
*tmp1 = state->blocks[6];
|
||||
crypto_aegis_block_and(tmp1, &state->blocks[7]);
|
||||
crypto_aegis_block_xor(tmp1, &state->blocks[5]);
|
||||
crypto_aegis_block_xor(tmp1, &state->blocks[2]);
|
||||
crypto_aegis_block_xor(&msg.blocks[1], tmp1);
|
||||
|
||||
memset(msg.bytes + size, 0, AEGIS128L_CHUNK_SIZE - size);
|
||||
|
||||
crypto_aegis128l_update_a(state, &msg);
|
||||
|
||||
memcpy(dst, msg.bytes, size);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis128l_process_ad(struct aegis_state *state,
|
||||
struct scatterlist *sg_src,
|
||||
unsigned int assoclen)
|
||||
{
|
||||
struct scatter_walk walk;
|
||||
union aegis_chunk buf;
|
||||
unsigned int pos = 0;
|
||||
|
||||
scatterwalk_start(&walk, sg_src);
|
||||
while (assoclen != 0) {
|
||||
unsigned int size = scatterwalk_clamp(&walk, assoclen);
|
||||
unsigned int left = size;
|
||||
void *mapped = scatterwalk_map(&walk);
|
||||
const u8 *src = (const u8 *)mapped;
|
||||
|
||||
if (pos + size >= AEGIS128L_CHUNK_SIZE) {
|
||||
if (pos > 0) {
|
||||
unsigned int fill = AEGIS128L_CHUNK_SIZE - pos;
|
||||
memcpy(buf.bytes + pos, src, fill);
|
||||
crypto_aegis128l_update_a(state, &buf);
|
||||
pos = 0;
|
||||
left -= fill;
|
||||
src += fill;
|
||||
}
|
||||
|
||||
crypto_aegis128l_ad(state, src, left);
|
||||
src += left & ~(AEGIS128L_CHUNK_SIZE - 1);
|
||||
left &= AEGIS128L_CHUNK_SIZE - 1;
|
||||
}
|
||||
|
||||
memcpy(buf.bytes + pos, src, left);
|
||||
|
||||
pos += left;
|
||||
assoclen -= size;
|
||||
scatterwalk_unmap(mapped);
|
||||
scatterwalk_advance(&walk, size);
|
||||
scatterwalk_done(&walk, 0, assoclen);
|
||||
}
|
||||
|
||||
if (pos > 0) {
|
||||
memset(buf.bytes + pos, 0, AEGIS128L_CHUNK_SIZE - pos);
|
||||
crypto_aegis128l_update_a(state, &buf);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis128l_process_crypt(struct aegis_state *state,
|
||||
struct aead_request *req,
|
||||
const struct aegis128l_ops *ops)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
u8 *src, *dst;
|
||||
unsigned int chunksize;
|
||||
|
||||
ops->skcipher_walk_init(&walk, req, false);
|
||||
|
||||
while (walk.nbytes) {
|
||||
src = walk.src.virt.addr;
|
||||
dst = walk.dst.virt.addr;
|
||||
chunksize = walk.nbytes;
|
||||
|
||||
ops->crypt_chunk(state, dst, src, chunksize);
|
||||
|
||||
skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis128l_final(struct aegis_state *state,
|
||||
union aegis_block *tag_xor,
|
||||
u64 assoclen, u64 cryptlen)
|
||||
{
|
||||
u64 assocbits = assoclen * 8;
|
||||
u64 cryptbits = cryptlen * 8;
|
||||
|
||||
union aegis_chunk tmp;
|
||||
unsigned int i;
|
||||
|
||||
tmp.blocks[0].words64[0] = cpu_to_le64(assocbits);
|
||||
tmp.blocks[0].words64[1] = cpu_to_le64(cryptbits);
|
||||
|
||||
crypto_aegis_block_xor(&tmp.blocks[0], &state->blocks[2]);
|
||||
|
||||
tmp.blocks[1] = tmp.blocks[0];
|
||||
for (i = 0; i < 7; i++)
|
||||
crypto_aegis128l_update_a(state, &tmp);
|
||||
|
||||
for (i = 0; i < 7; i++)
|
||||
crypto_aegis_block_xor(tag_xor, &state->blocks[i]);
|
||||
}
|
||||
|
||||
static int crypto_aegis128l_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct aegis_ctx *ctx = crypto_aead_ctx(aead);
|
||||
|
||||
if (keylen != AEGIS128L_KEY_SIZE) {
|
||||
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memcpy(ctx->key.bytes, key, AEGIS128L_KEY_SIZE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis128l_setauthsize(struct crypto_aead *tfm,
|
||||
unsigned int authsize)
|
||||
{
|
||||
if (authsize > AEGIS128L_MAX_AUTH_SIZE)
|
||||
return -EINVAL;
|
||||
if (authsize < AEGIS128L_MIN_AUTH_SIZE)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_aegis128l_crypt(struct aead_request *req,
|
||||
union aegis_block *tag_xor,
|
||||
unsigned int cryptlen,
|
||||
const struct aegis128l_ops *ops)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct aegis_state state;
|
||||
|
||||
crypto_aegis128l_init(&state, &ctx->key, req->iv);
|
||||
crypto_aegis128l_process_ad(&state, req->src, req->assoclen);
|
||||
crypto_aegis128l_process_crypt(&state, req, ops);
|
||||
crypto_aegis128l_final(&state, tag_xor, req->assoclen, cryptlen);
|
||||
}
|
||||
|
||||
static int crypto_aegis128l_encrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct aegis128l_ops ops = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_encrypt,
|
||||
.crypt_chunk = crypto_aegis128l_encrypt_chunk,
|
||||
};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
union aegis_block tag = {};
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen;
|
||||
|
||||
crypto_aegis128l_crypt(req, &tag, cryptlen, &ops);
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
|
||||
authsize, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis128l_decrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct aegis128l_ops ops = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_decrypt,
|
||||
.crypt_chunk = crypto_aegis128l_decrypt_chunk,
|
||||
};
|
||||
static const u8 zeros[AEGIS128L_MAX_AUTH_SIZE] = {};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
union aegis_block tag;
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen - authsize;
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
|
||||
authsize, 0);
|
||||
|
||||
crypto_aegis128l_crypt(req, &tag, cryptlen, &ops);
|
||||
|
||||
return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis128l_init_tfm(struct crypto_aead *tfm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_aegis128l_exit_tfm(struct crypto_aead *tfm)
|
||||
{
|
||||
}
|
||||
|
||||
static struct aead_alg crypto_aegis128l_alg = {
|
||||
.setkey = crypto_aegis128l_setkey,
|
||||
.setauthsize = crypto_aegis128l_setauthsize,
|
||||
.encrypt = crypto_aegis128l_encrypt,
|
||||
.decrypt = crypto_aegis128l_decrypt,
|
||||
.init = crypto_aegis128l_init_tfm,
|
||||
.exit = crypto_aegis128l_exit_tfm,
|
||||
|
||||
.ivsize = AEGIS128L_NONCE_SIZE,
|
||||
.maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
|
||||
.chunksize = AEGIS128L_CHUNK_SIZE,
|
||||
|
||||
.base = {
|
||||
.cra_flags = CRYPTO_ALG_TYPE_AEAD,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct aegis_ctx),
|
||||
.cra_alignmask = 0,
|
||||
|
||||
.cra_priority = 100,
|
||||
|
||||
.cra_name = "aegis128l",
|
||||
.cra_driver_name = "aegis128l-generic",
|
||||
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init crypto_aegis128l_module_init(void)
|
||||
{
|
||||
return crypto_register_aead(&crypto_aegis128l_alg);
|
||||
}
|
||||
|
||||
static void __exit crypto_aegis128l_module_exit(void)
|
||||
{
|
||||
crypto_unregister_aead(&crypto_aegis128l_alg);
|
||||
}
|
||||
|
||||
module_init(crypto_aegis128l_module_init);
|
||||
module_exit(crypto_aegis128l_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("AEGIS-128L AEAD algorithm");
|
||||
MODULE_ALIAS_CRYPTO("aegis128l");
|
||||
MODULE_ALIAS_CRYPTO("aegis128l-generic");
|
478
crypto/aegis256.c
Normal file
478
crypto/aegis256.c
Normal file
@ -0,0 +1,478 @@
|
||||
/*
|
||||
* The AEGIS-256 Authenticated-Encryption Algorithm
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/scatterlist.h>
|
||||
|
||||
#include "aegis.h"
|
||||
|
||||
#define AEGIS256_NONCE_SIZE 32
|
||||
#define AEGIS256_STATE_BLOCKS 6
|
||||
#define AEGIS256_KEY_SIZE 32
|
||||
#define AEGIS256_MIN_AUTH_SIZE 8
|
||||
#define AEGIS256_MAX_AUTH_SIZE 16
|
||||
|
||||
struct aegis_state {
|
||||
union aegis_block blocks[AEGIS256_STATE_BLOCKS];
|
||||
};
|
||||
|
||||
struct aegis_ctx {
|
||||
union aegis_block key[AEGIS256_KEY_SIZE / AEGIS_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
struct aegis256_ops {
|
||||
int (*skcipher_walk_init)(struct skcipher_walk *walk,
|
||||
struct aead_request *req, bool atomic);
|
||||
|
||||
void (*crypt_chunk)(struct aegis_state *state, u8 *dst,
|
||||
const u8 *src, unsigned int size);
|
||||
};
|
||||
|
||||
static void crypto_aegis256_update(struct aegis_state *state)
|
||||
{
|
||||
union aegis_block tmp;
|
||||
unsigned int i;
|
||||
|
||||
tmp = state->blocks[AEGIS256_STATE_BLOCKS - 1];
|
||||
for (i = AEGIS256_STATE_BLOCKS - 1; i > 0; i--)
|
||||
crypto_aegis_aesenc(&state->blocks[i], &state->blocks[i - 1],
|
||||
&state->blocks[i]);
|
||||
crypto_aegis_aesenc(&state->blocks[0], &tmp, &state->blocks[0]);
|
||||
}
|
||||
|
||||
static void crypto_aegis256_update_a(struct aegis_state *state,
|
||||
const union aegis_block *msg)
|
||||
{
|
||||
crypto_aegis256_update(state);
|
||||
crypto_aegis_block_xor(&state->blocks[0], msg);
|
||||
}
|
||||
|
||||
static void crypto_aegis256_update_u(struct aegis_state *state, const void *msg)
|
||||
{
|
||||
crypto_aegis256_update(state);
|
||||
crypto_xor(state->blocks[0].bytes, msg, AEGIS_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
static void crypto_aegis256_init(struct aegis_state *state,
|
||||
const union aegis_block *key,
|
||||
const u8 *iv)
|
||||
{
|
||||
union aegis_block key_iv[2];
|
||||
unsigned int i;
|
||||
|
||||
key_iv[0] = key[0];
|
||||
key_iv[1] = key[1];
|
||||
crypto_xor(key_iv[0].bytes, iv + 0 * AEGIS_BLOCK_SIZE,
|
||||
AEGIS_BLOCK_SIZE);
|
||||
crypto_xor(key_iv[1].bytes, iv + 1 * AEGIS_BLOCK_SIZE,
|
||||
AEGIS_BLOCK_SIZE);
|
||||
|
||||
state->blocks[0] = key_iv[0];
|
||||
state->blocks[1] = key_iv[1];
|
||||
state->blocks[2] = crypto_aegis_const[1];
|
||||
state->blocks[3] = crypto_aegis_const[0];
|
||||
state->blocks[4] = key[0];
|
||||
state->blocks[5] = key[1];
|
||||
|
||||
crypto_aegis_block_xor(&state->blocks[4], &crypto_aegis_const[0]);
|
||||
crypto_aegis_block_xor(&state->blocks[5], &crypto_aegis_const[1]);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
crypto_aegis256_update_a(state, &key[0]);
|
||||
crypto_aegis256_update_a(state, &key[1]);
|
||||
crypto_aegis256_update_a(state, &key_iv[0]);
|
||||
crypto_aegis256_update_a(state, &key_iv[1]);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis256_ad(struct aegis_state *state,
|
||||
const u8 *src, unsigned int size)
|
||||
{
|
||||
if (AEGIS_ALIGNED(src)) {
|
||||
const union aegis_block *src_blk =
|
||||
(const union aegis_block *)src;
|
||||
|
||||
while (size >= AEGIS_BLOCK_SIZE) {
|
||||
crypto_aegis256_update_a(state, src_blk);
|
||||
|
||||
size -= AEGIS_BLOCK_SIZE;
|
||||
src_blk++;
|
||||
}
|
||||
} else {
|
||||
while (size >= AEGIS_BLOCK_SIZE) {
|
||||
crypto_aegis256_update_u(state, src);
|
||||
|
||||
size -= AEGIS_BLOCK_SIZE;
|
||||
src += AEGIS_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis256_encrypt_chunk(struct aegis_state *state, u8 *dst,
|
||||
const u8 *src, unsigned int size)
|
||||
{
|
||||
union aegis_block tmp;
|
||||
|
||||
if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
|
||||
while (size >= AEGIS_BLOCK_SIZE) {
|
||||
union aegis_block *dst_blk =
|
||||
(union aegis_block *)dst;
|
||||
const union aegis_block *src_blk =
|
||||
(const union aegis_block *)src;
|
||||
|
||||
tmp = state->blocks[2];
|
||||
crypto_aegis_block_and(&tmp, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[5]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
|
||||
crypto_aegis_block_xor(&tmp, src_blk);
|
||||
|
||||
crypto_aegis256_update_a(state, src_blk);
|
||||
|
||||
*dst_blk = tmp;
|
||||
|
||||
size -= AEGIS_BLOCK_SIZE;
|
||||
src += AEGIS_BLOCK_SIZE;
|
||||
dst += AEGIS_BLOCK_SIZE;
|
||||
}
|
||||
} else {
|
||||
while (size >= AEGIS_BLOCK_SIZE) {
|
||||
tmp = state->blocks[2];
|
||||
crypto_aegis_block_and(&tmp, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[5]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
|
||||
crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE);
|
||||
|
||||
crypto_aegis256_update_u(state, src);
|
||||
|
||||
memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE);
|
||||
|
||||
size -= AEGIS_BLOCK_SIZE;
|
||||
src += AEGIS_BLOCK_SIZE;
|
||||
dst += AEGIS_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
if (size > 0) {
|
||||
union aegis_block msg = {};
|
||||
memcpy(msg.bytes, src, size);
|
||||
|
||||
tmp = state->blocks[2];
|
||||
crypto_aegis_block_and(&tmp, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[5]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
|
||||
|
||||
crypto_aegis256_update_a(state, &msg);
|
||||
|
||||
crypto_aegis_block_xor(&msg, &tmp);
|
||||
|
||||
memcpy(dst, msg.bytes, size);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis256_decrypt_chunk(struct aegis_state *state, u8 *dst,
|
||||
const u8 *src, unsigned int size)
|
||||
{
|
||||
union aegis_block tmp;
|
||||
|
||||
if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
|
||||
while (size >= AEGIS_BLOCK_SIZE) {
|
||||
union aegis_block *dst_blk =
|
||||
(union aegis_block *)dst;
|
||||
const union aegis_block *src_blk =
|
||||
(const union aegis_block *)src;
|
||||
|
||||
tmp = state->blocks[2];
|
||||
crypto_aegis_block_and(&tmp, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[5]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
|
||||
crypto_aegis_block_xor(&tmp, src_blk);
|
||||
|
||||
crypto_aegis256_update_a(state, &tmp);
|
||||
|
||||
*dst_blk = tmp;
|
||||
|
||||
size -= AEGIS_BLOCK_SIZE;
|
||||
src += AEGIS_BLOCK_SIZE;
|
||||
dst += AEGIS_BLOCK_SIZE;
|
||||
}
|
||||
} else {
|
||||
while (size >= AEGIS_BLOCK_SIZE) {
|
||||
tmp = state->blocks[2];
|
||||
crypto_aegis_block_and(&tmp, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[5]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
|
||||
crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE);
|
||||
|
||||
crypto_aegis256_update_a(state, &tmp);
|
||||
|
||||
memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE);
|
||||
|
||||
size -= AEGIS_BLOCK_SIZE;
|
||||
src += AEGIS_BLOCK_SIZE;
|
||||
dst += AEGIS_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
if (size > 0) {
|
||||
union aegis_block msg = {};
|
||||
memcpy(msg.bytes, src, size);
|
||||
|
||||
tmp = state->blocks[2];
|
||||
crypto_aegis_block_and(&tmp, &state->blocks[3]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[5]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
|
||||
crypto_aegis_block_xor(&msg, &tmp);
|
||||
|
||||
memset(msg.bytes + size, 0, AEGIS_BLOCK_SIZE - size);
|
||||
|
||||
crypto_aegis256_update_a(state, &msg);
|
||||
|
||||
memcpy(dst, msg.bytes, size);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis256_process_ad(struct aegis_state *state,
|
||||
struct scatterlist *sg_src,
|
||||
unsigned int assoclen)
|
||||
{
|
||||
struct scatter_walk walk;
|
||||
union aegis_block buf;
|
||||
unsigned int pos = 0;
|
||||
|
||||
scatterwalk_start(&walk, sg_src);
|
||||
while (assoclen != 0) {
|
||||
unsigned int size = scatterwalk_clamp(&walk, assoclen);
|
||||
unsigned int left = size;
|
||||
void *mapped = scatterwalk_map(&walk);
|
||||
const u8 *src = (const u8 *)mapped;
|
||||
|
||||
if (pos + size >= AEGIS_BLOCK_SIZE) {
|
||||
if (pos > 0) {
|
||||
unsigned int fill = AEGIS_BLOCK_SIZE - pos;
|
||||
memcpy(buf.bytes + pos, src, fill);
|
||||
crypto_aegis256_update_a(state, &buf);
|
||||
pos = 0;
|
||||
left -= fill;
|
||||
src += fill;
|
||||
}
|
||||
|
||||
crypto_aegis256_ad(state, src, left);
|
||||
src += left & ~(AEGIS_BLOCK_SIZE - 1);
|
||||
left &= AEGIS_BLOCK_SIZE - 1;
|
||||
}
|
||||
|
||||
memcpy(buf.bytes + pos, src, left);
|
||||
|
||||
pos += left;
|
||||
assoclen -= size;
|
||||
scatterwalk_unmap(mapped);
|
||||
scatterwalk_advance(&walk, size);
|
||||
scatterwalk_done(&walk, 0, assoclen);
|
||||
}
|
||||
|
||||
if (pos > 0) {
|
||||
memset(buf.bytes + pos, 0, AEGIS_BLOCK_SIZE - pos);
|
||||
crypto_aegis256_update_a(state, &buf);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis256_process_crypt(struct aegis_state *state,
|
||||
struct aead_request *req,
|
||||
const struct aegis256_ops *ops)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
u8 *src, *dst;
|
||||
unsigned int chunksize;
|
||||
|
||||
ops->skcipher_walk_init(&walk, req, false);
|
||||
|
||||
while (walk.nbytes) {
|
||||
src = walk.src.virt.addr;
|
||||
dst = walk.dst.virt.addr;
|
||||
chunksize = walk.nbytes;
|
||||
|
||||
ops->crypt_chunk(state, dst, src, chunksize);
|
||||
|
||||
skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis256_final(struct aegis_state *state,
|
||||
union aegis_block *tag_xor,
|
||||
u64 assoclen, u64 cryptlen)
|
||||
{
|
||||
u64 assocbits = assoclen * 8;
|
||||
u64 cryptbits = cryptlen * 8;
|
||||
|
||||
union aegis_block tmp;
|
||||
unsigned int i;
|
||||
|
||||
tmp.words64[0] = cpu_to_le64(assocbits);
|
||||
tmp.words64[1] = cpu_to_le64(cryptbits);
|
||||
|
||||
crypto_aegis_block_xor(&tmp, &state->blocks[3]);
|
||||
|
||||
for (i = 0; i < 7; i++)
|
||||
crypto_aegis256_update_a(state, &tmp);
|
||||
|
||||
for (i = 0; i < AEGIS256_STATE_BLOCKS; i++)
|
||||
crypto_aegis_block_xor(tag_xor, &state->blocks[i]);
|
||||
}
|
||||
|
||||
static int crypto_aegis256_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct aegis_ctx *ctx = crypto_aead_ctx(aead);
|
||||
|
||||
if (keylen != AEGIS256_KEY_SIZE) {
|
||||
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memcpy(ctx->key[0].bytes, key, AEGIS_BLOCK_SIZE);
|
||||
memcpy(ctx->key[1].bytes, key + AEGIS_BLOCK_SIZE,
|
||||
AEGIS_BLOCK_SIZE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis256_setauthsize(struct crypto_aead *tfm,
|
||||
unsigned int authsize)
|
||||
{
|
||||
if (authsize > AEGIS256_MAX_AUTH_SIZE)
|
||||
return -EINVAL;
|
||||
if (authsize < AEGIS256_MIN_AUTH_SIZE)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_aegis256_crypt(struct aead_request *req,
|
||||
union aegis_block *tag_xor,
|
||||
unsigned int cryptlen,
|
||||
const struct aegis256_ops *ops)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct aegis_state state;
|
||||
|
||||
crypto_aegis256_init(&state, ctx->key, req->iv);
|
||||
crypto_aegis256_process_ad(&state, req->src, req->assoclen);
|
||||
crypto_aegis256_process_crypt(&state, req, ops);
|
||||
crypto_aegis256_final(&state, tag_xor, req->assoclen, cryptlen);
|
||||
}
|
||||
|
||||
static int crypto_aegis256_encrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct aegis256_ops ops = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_encrypt,
|
||||
.crypt_chunk = crypto_aegis256_encrypt_chunk,
|
||||
};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
union aegis_block tag = {};
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen;
|
||||
|
||||
crypto_aegis256_crypt(req, &tag, cryptlen, &ops);
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
|
||||
authsize, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis256_decrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct aegis256_ops ops = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_decrypt,
|
||||
.crypt_chunk = crypto_aegis256_decrypt_chunk,
|
||||
};
|
||||
static const u8 zeros[AEGIS256_MAX_AUTH_SIZE] = {};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
union aegis_block tag;
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen - authsize;
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
|
||||
authsize, 0);
|
||||
|
||||
crypto_aegis256_crypt(req, &tag, cryptlen, &ops);
|
||||
|
||||
return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis256_init_tfm(struct crypto_aead *tfm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_aegis256_exit_tfm(struct crypto_aead *tfm)
|
||||
{
|
||||
}
|
||||
|
||||
static struct aead_alg crypto_aegis256_alg = {
|
||||
.setkey = crypto_aegis256_setkey,
|
||||
.setauthsize = crypto_aegis256_setauthsize,
|
||||
.encrypt = crypto_aegis256_encrypt,
|
||||
.decrypt = crypto_aegis256_decrypt,
|
||||
.init = crypto_aegis256_init_tfm,
|
||||
.exit = crypto_aegis256_exit_tfm,
|
||||
|
||||
.ivsize = AEGIS256_NONCE_SIZE,
|
||||
.maxauthsize = AEGIS256_MAX_AUTH_SIZE,
|
||||
.chunksize = AEGIS_BLOCK_SIZE,
|
||||
|
||||
.base = {
|
||||
.cra_flags = CRYPTO_ALG_TYPE_AEAD,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct aegis_ctx),
|
||||
.cra_alignmask = 0,
|
||||
|
||||
.cra_priority = 100,
|
||||
|
||||
.cra_name = "aegis256",
|
||||
.cra_driver_name = "aegis256-generic",
|
||||
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init crypto_aegis256_module_init(void)
|
||||
{
|
||||
return crypto_register_aead(&crypto_aegis256_alg);
|
||||
}
|
||||
|
||||
static void __exit crypto_aegis256_module_exit(void)
|
||||
{
|
||||
crypto_unregister_aead(&crypto_aegis256_alg);
|
||||
}
|
||||
|
||||
module_init(crypto_aegis256_module_init);
|
||||
module_exit(crypto_aegis256_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("AEGIS-256 AEAD algorithm");
|
||||
MODULE_ALIAS_CRYPTO("aegis256");
|
||||
MODULE_ALIAS_CRYPTO("aegis256-generic");
|
@ -10,6 +10,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/fips.h>
|
||||
@ -59,6 +60,15 @@ static int crypto_check_alg(struct crypto_alg *alg)
|
||||
if (alg->cra_blocksize > PAGE_SIZE / 8)
|
||||
return -EINVAL;
|
||||
|
||||
if (!alg->cra_type && (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
|
||||
CRYPTO_ALG_TYPE_CIPHER) {
|
||||
if (alg->cra_alignmask > MAX_CIPHER_ALIGNMASK)
|
||||
return -EINVAL;
|
||||
|
||||
if (alg->cra_blocksize > MAX_CIPHER_BLOCKSIZE)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (alg->cra_priority < 0)
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -108,6 +108,7 @@ static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
|
||||
CRYPTO_TFM_RES_MASK);
|
||||
|
||||
out:
|
||||
memzero_explicit(&keys, sizeof(keys));
|
||||
return err;
|
||||
|
||||
badkey:
|
||||
|
@ -90,6 +90,7 @@ static int crypto_authenc_esn_setkey(struct crypto_aead *authenc_esn, const u8 *
|
||||
CRYPTO_TFM_RES_MASK);
|
||||
|
||||
out:
|
||||
memzero_explicit(&keys, sizeof(keys));
|
||||
return err;
|
||||
|
||||
badkey:
|
||||
|
@ -53,9 +53,8 @@ static void crypto_cfb_encrypt_one(struct crypto_skcipher *tfm,
|
||||
static void crypto_cfb_final(struct skcipher_walk *walk,
|
||||
struct crypto_skcipher *tfm)
|
||||
{
|
||||
const unsigned int bsize = crypto_cfb_bsize(tfm);
|
||||
const unsigned long alignmask = crypto_skcipher_alignmask(tfm);
|
||||
u8 tmp[bsize + alignmask];
|
||||
u8 tmp[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
|
||||
u8 *stream = PTR_ALIGN(tmp + 0, alignmask + 1);
|
||||
u8 *src = walk->src.virt.addr;
|
||||
u8 *dst = walk->dst.virt.addr;
|
||||
@ -94,7 +93,7 @@ static int crypto_cfb_encrypt_inplace(struct skcipher_walk *walk,
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u8 *src = walk->src.virt.addr;
|
||||
u8 *iv = walk->iv;
|
||||
u8 tmp[bsize];
|
||||
u8 tmp[MAX_CIPHER_BLOCKSIZE];
|
||||
|
||||
do {
|
||||
crypto_cfb_encrypt_one(tfm, iv, tmp);
|
||||
@ -164,7 +163,7 @@ static int crypto_cfb_decrypt_inplace(struct skcipher_walk *walk,
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u8 *src = walk->src.virt.addr;
|
||||
u8 *iv = walk->iv;
|
||||
u8 tmp[bsize];
|
||||
u8 tmp[MAX_CIPHER_BLOCKSIZE];
|
||||
|
||||
do {
|
||||
crypto_cfb_encrypt_one(tfm, iv, tmp);
|
||||
|
@ -13,6 +13,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/errno.h>
|
||||
@ -67,7 +68,7 @@ static void cipher_crypt_unaligned(void (*fn)(struct crypto_tfm *, u8 *,
|
||||
{
|
||||
unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
|
||||
unsigned int size = crypto_tfm_alg_blocksize(tfm);
|
||||
u8 buffer[size + alignmask];
|
||||
u8 buffer[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
|
||||
u8 *tmp = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
|
||||
|
||||
memcpy(tmp, src, size);
|
||||
|
@ -29,6 +29,7 @@
|
||||
* This is crypto api shash wrappers to crc32_le.
|
||||
*/
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/init.h>
|
||||
@ -39,11 +40,6 @@
|
||||
#define CHKSUM_BLOCK_SIZE 1
|
||||
#define CHKSUM_DIGEST_SIZE 4
|
||||
|
||||
static u32 __crc32_le(u32 crc, unsigned char const *p, size_t len)
|
||||
{
|
||||
return crc32_le(crc, p, len);
|
||||
}
|
||||
|
||||
/** No default init with ~0 */
|
||||
static int crc32_cra_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
@ -54,7 +50,6 @@ static int crc32_cra_init(struct crypto_tfm *tfm)
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Setting the seed allows arbitrary accumulators and flexible XOR policy
|
||||
* If your algorithm starts with ~0, then XOR with ~0 before you set
|
||||
@ -69,7 +64,7 @@ static int crc32_setkey(struct crypto_shash *hash, const u8 *key,
|
||||
crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
*mctx = le32_to_cpup((__le32 *)key);
|
||||
*mctx = get_unaligned_le32(key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -88,7 +83,7 @@ static int crc32_update(struct shash_desc *desc, const u8 *data,
|
||||
{
|
||||
u32 *crcp = shash_desc_ctx(desc);
|
||||
|
||||
*crcp = __crc32_le(*crcp, data, len);
|
||||
*crcp = crc32_le(*crcp, data, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -96,7 +91,7 @@ static int crc32_update(struct shash_desc *desc, const u8 *data,
|
||||
static int __crc32_finup(u32 *crcp, const u8 *data, unsigned int len,
|
||||
u8 *out)
|
||||
{
|
||||
*(__le32 *)out = cpu_to_le32(__crc32_le(*crcp, data, len));
|
||||
put_unaligned_le32(crc32_le(*crcp, data, len), out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -110,7 +105,7 @@ static int crc32_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
u32 *crcp = shash_desc_ctx(desc);
|
||||
|
||||
*(__le32 *)out = cpu_to_le32p(crcp);
|
||||
put_unaligned_le32(*crcp, out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -35,6 +35,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
@ -82,7 +83,7 @@ static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
|
||||
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
mctx->key = le32_to_cpu(*(__le32 *)key);
|
||||
mctx->key = get_unaligned_le32(key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -99,13 +100,13 @@ static int chksum_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
*(__le32 *)out = ~cpu_to_le32p(&ctx->crc);
|
||||
put_unaligned_le32(~ctx->crc, out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __chksum_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out)
|
||||
{
|
||||
*(__le32 *)out = ~cpu_to_le32(__crc32c_le(*crcp, data, len));
|
||||
put_unaligned_le32(~__crc32c_le(*crcp, data, len), out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -148,7 +149,6 @@ static struct shash_alg alg = {
|
||||
.cra_priority = 100,
|
||||
.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
|
||||
.cra_blocksize = CHKSUM_BLOCK_SIZE,
|
||||
.cra_alignmask = 3,
|
||||
.cra_ctxsize = sizeof(struct chksum_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = crc32c_cra_init,
|
||||
|
@ -58,7 +58,7 @@ static void crypto_ctr_crypt_final(struct blkcipher_walk *walk,
|
||||
unsigned int bsize = crypto_cipher_blocksize(tfm);
|
||||
unsigned long alignmask = crypto_cipher_alignmask(tfm);
|
||||
u8 *ctrblk = walk->iv;
|
||||
u8 tmp[bsize + alignmask];
|
||||
u8 tmp[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
|
||||
u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1);
|
||||
u8 *src = walk->src.virt.addr;
|
||||
u8 *dst = walk->dst.virt.addr;
|
||||
@ -106,7 +106,7 @@ static int crypto_ctr_crypt_inplace(struct blkcipher_walk *walk,
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u8 *ctrblk = walk->iv;
|
||||
u8 *src = walk->src.virt.addr;
|
||||
u8 tmp[bsize + alignmask];
|
||||
u8 tmp[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
|
||||
u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1);
|
||||
|
||||
do {
|
||||
|
@ -40,6 +40,7 @@
|
||||
* rfc3962 includes errata information in its Appendix A.
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
@ -104,7 +105,7 @@ static int cts_cbc_encrypt(struct skcipher_request *req)
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct skcipher_request *subreq = &rctx->subreq;
|
||||
int bsize = crypto_skcipher_blocksize(tfm);
|
||||
u8 d[bsize * 2] __aligned(__alignof__(u32));
|
||||
u8 d[MAX_CIPHER_BLOCKSIZE * 2] __aligned(__alignof__(u32));
|
||||
struct scatterlist *sg;
|
||||
unsigned int offset;
|
||||
int lastn;
|
||||
@ -183,7 +184,7 @@ static int cts_cbc_decrypt(struct skcipher_request *req)
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct skcipher_request *subreq = &rctx->subreq;
|
||||
int bsize = crypto_skcipher_blocksize(tfm);
|
||||
u8 d[bsize * 2] __aligned(__alignof__(u32));
|
||||
u8 d[MAX_CIPHER_BLOCKSIZE * 2] __aligned(__alignof__(u32));
|
||||
struct scatterlist *sg;
|
||||
unsigned int offset;
|
||||
u8 *space;
|
||||
|
66
crypto/ecc.c
66
crypto/ecc.c
@ -515,7 +515,7 @@ static void vli_mmod_fast_256(u64 *result, const u64 *product,
|
||||
static bool vli_mmod_fast(u64 *result, u64 *product,
|
||||
const u64 *curve_prime, unsigned int ndigits)
|
||||
{
|
||||
u64 tmp[2 * ndigits];
|
||||
u64 tmp[2 * ECC_MAX_DIGITS];
|
||||
|
||||
switch (ndigits) {
|
||||
case 3:
|
||||
@ -536,7 +536,7 @@ static bool vli_mmod_fast(u64 *result, u64 *product,
|
||||
static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right,
|
||||
const u64 *curve_prime, unsigned int ndigits)
|
||||
{
|
||||
u64 product[2 * ndigits];
|
||||
u64 product[2 * ECC_MAX_DIGITS];
|
||||
|
||||
vli_mult(product, left, right, ndigits);
|
||||
vli_mmod_fast(result, product, curve_prime, ndigits);
|
||||
@ -546,7 +546,7 @@ static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right,
|
||||
static void vli_mod_square_fast(u64 *result, const u64 *left,
|
||||
const u64 *curve_prime, unsigned int ndigits)
|
||||
{
|
||||
u64 product[2 * ndigits];
|
||||
u64 product[2 * ECC_MAX_DIGITS];
|
||||
|
||||
vli_square(product, left, ndigits);
|
||||
vli_mmod_fast(result, product, curve_prime, ndigits);
|
||||
@ -560,8 +560,8 @@ static void vli_mod_square_fast(u64 *result, const u64 *left,
|
||||
static void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod,
|
||||
unsigned int ndigits)
|
||||
{
|
||||
u64 a[ndigits], b[ndigits];
|
||||
u64 u[ndigits], v[ndigits];
|
||||
u64 a[ECC_MAX_DIGITS], b[ECC_MAX_DIGITS];
|
||||
u64 u[ECC_MAX_DIGITS], v[ECC_MAX_DIGITS];
|
||||
u64 carry;
|
||||
int cmp_result;
|
||||
|
||||
@ -649,8 +649,8 @@ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1,
|
||||
u64 *curve_prime, unsigned int ndigits)
|
||||
{
|
||||
/* t1 = x, t2 = y, t3 = z */
|
||||
u64 t4[ndigits];
|
||||
u64 t5[ndigits];
|
||||
u64 t4[ECC_MAX_DIGITS];
|
||||
u64 t5[ECC_MAX_DIGITS];
|
||||
|
||||
if (vli_is_zero(z1, ndigits))
|
||||
return;
|
||||
@ -711,7 +711,7 @@ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1,
|
||||
static void apply_z(u64 *x1, u64 *y1, u64 *z, u64 *curve_prime,
|
||||
unsigned int ndigits)
|
||||
{
|
||||
u64 t1[ndigits];
|
||||
u64 t1[ECC_MAX_DIGITS];
|
||||
|
||||
vli_mod_square_fast(t1, z, curve_prime, ndigits); /* z^2 */
|
||||
vli_mod_mult_fast(x1, x1, t1, curve_prime, ndigits); /* x1 * z^2 */
|
||||
@ -724,7 +724,7 @@ static void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
|
||||
u64 *p_initial_z, u64 *curve_prime,
|
||||
unsigned int ndigits)
|
||||
{
|
||||
u64 z[ndigits];
|
||||
u64 z[ECC_MAX_DIGITS];
|
||||
|
||||
vli_set(x2, x1, ndigits);
|
||||
vli_set(y2, y1, ndigits);
|
||||
@ -750,7 +750,7 @@ static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime,
|
||||
unsigned int ndigits)
|
||||
{
|
||||
/* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
|
||||
u64 t5[ndigits];
|
||||
u64 t5[ECC_MAX_DIGITS];
|
||||
|
||||
/* t5 = x2 - x1 */
|
||||
vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
|
||||
@ -791,9 +791,9 @@ static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime,
|
||||
unsigned int ndigits)
|
||||
{
|
||||
/* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
|
||||
u64 t5[ndigits];
|
||||
u64 t6[ndigits];
|
||||
u64 t7[ndigits];
|
||||
u64 t5[ECC_MAX_DIGITS];
|
||||
u64 t6[ECC_MAX_DIGITS];
|
||||
u64 t7[ECC_MAX_DIGITS];
|
||||
|
||||
/* t5 = x2 - x1 */
|
||||
vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
|
||||
@ -846,9 +846,9 @@ static void ecc_point_mult(struct ecc_point *result,
|
||||
unsigned int ndigits)
|
||||
{
|
||||
/* R0 and R1 */
|
||||
u64 rx[2][ndigits];
|
||||
u64 ry[2][ndigits];
|
||||
u64 z[ndigits];
|
||||
u64 rx[2][ECC_MAX_DIGITS];
|
||||
u64 ry[2][ECC_MAX_DIGITS];
|
||||
u64 z[ECC_MAX_DIGITS];
|
||||
int i, nb;
|
||||
int num_bits = vli_num_bits(scalar, ndigits);
|
||||
|
||||
@ -943,13 +943,13 @@ int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
|
||||
int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey)
|
||||
{
|
||||
const struct ecc_curve *curve = ecc_get_curve(curve_id);
|
||||
u64 priv[ndigits];
|
||||
u64 priv[ECC_MAX_DIGITS];
|
||||
unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
|
||||
unsigned int nbits = vli_num_bits(curve->n, ndigits);
|
||||
int err;
|
||||
|
||||
/* Check that N is included in Table 1 of FIPS 186-4, section 6.1.1 */
|
||||
if (nbits < 160)
|
||||
if (nbits < 160 || ndigits > ARRAY_SIZE(priv))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
@ -988,10 +988,10 @@ int ecc_make_pub_key(unsigned int curve_id, unsigned int ndigits,
|
||||
{
|
||||
int ret = 0;
|
||||
struct ecc_point *pk;
|
||||
u64 priv[ndigits];
|
||||
u64 priv[ECC_MAX_DIGITS];
|
||||
const struct ecc_curve *curve = ecc_get_curve(curve_id);
|
||||
|
||||
if (!private_key || !curve) {
|
||||
if (!private_key || !curve || ndigits > ARRAY_SIZE(priv)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
@ -1025,30 +1025,25 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
|
||||
{
|
||||
int ret = 0;
|
||||
struct ecc_point *product, *pk;
|
||||
u64 *priv, *rand_z;
|
||||
u64 priv[ECC_MAX_DIGITS];
|
||||
u64 rand_z[ECC_MAX_DIGITS];
|
||||
unsigned int nbytes;
|
||||
const struct ecc_curve *curve = ecc_get_curve(curve_id);
|
||||
|
||||
if (!private_key || !public_key || !curve) {
|
||||
if (!private_key || !public_key || !curve ||
|
||||
ndigits > ARRAY_SIZE(priv) || ndigits > ARRAY_SIZE(rand_z)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
priv = kmalloc_array(ndigits, sizeof(*priv), GFP_KERNEL);
|
||||
if (!priv) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
|
||||
|
||||
rand_z = kmalloc_array(ndigits, sizeof(*rand_z), GFP_KERNEL);
|
||||
if (!rand_z) {
|
||||
ret = -ENOMEM;
|
||||
goto kfree_out;
|
||||
}
|
||||
get_random_bytes(rand_z, nbytes);
|
||||
|
||||
pk = ecc_alloc_point(ndigits);
|
||||
if (!pk) {
|
||||
ret = -ENOMEM;
|
||||
goto kfree_out;
|
||||
goto out;
|
||||
}
|
||||
|
||||
product = ecc_alloc_point(ndigits);
|
||||
@ -1057,8 +1052,6 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
|
||||
goto err_alloc_product;
|
||||
}
|
||||
|
||||
get_random_bytes(rand_z, ndigits << ECC_DIGITS_TO_BYTES_SHIFT);
|
||||
|
||||
ecc_swap_digits(public_key, pk->x, ndigits);
|
||||
ecc_swap_digits(&public_key[ndigits], pk->y, ndigits);
|
||||
ecc_swap_digits(private_key, priv, ndigits);
|
||||
@ -1073,9 +1066,6 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
|
||||
ecc_free_point(product);
|
||||
err_alloc_product:
|
||||
ecc_free_point(pk);
|
||||
kfree_out:
|
||||
kzfree(priv);
|
||||
kzfree(rand_z);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
@ -26,7 +26,9 @@
|
||||
#ifndef _CRYPTO_ECC_H
|
||||
#define _CRYPTO_ECC_H
|
||||
|
||||
#define ECC_MAX_DIGITS 4 /* 256 */
|
||||
#define ECC_CURVE_NIST_P192_DIGITS 3
|
||||
#define ECC_CURVE_NIST_P256_DIGITS 4
|
||||
#define ECC_MAX_DIGITS ECC_CURVE_NIST_P256_DIGITS
|
||||
|
||||
#define ECC_DIGITS_TO_BYTES_SHIFT 3
|
||||
|
||||
|
@ -30,8 +30,8 @@ static inline struct ecdh_ctx *ecdh_get_ctx(struct crypto_kpp *tfm)
|
||||
static unsigned int ecdh_supported_curve(unsigned int curve_id)
|
||||
{
|
||||
switch (curve_id) {
|
||||
case ECC_CURVE_NIST_P192: return 3;
|
||||
case ECC_CURVE_NIST_P256: return 4;
|
||||
case ECC_CURVE_NIST_P192: return ECC_CURVE_NIST_P192_DIGITS;
|
||||
case ECC_CURVE_NIST_P256: return ECC_CURVE_NIST_P256_DIGITS;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
549
crypto/morus1280.c
Normal file
549
crypto/morus1280.c
Normal file
@ -0,0 +1,549 @@
|
||||
/*
|
||||
* The MORUS-1280 Authenticated-Encryption Algorithm
|
||||
*
|
||||
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*/
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/morus_common.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/scatterlist.h>
|
||||
|
||||
#define MORUS1280_WORD_SIZE 8
|
||||
#define MORUS1280_BLOCK_SIZE (MORUS_BLOCK_WORDS * MORUS1280_WORD_SIZE)
|
||||
#define MORUS1280_BLOCK_ALIGN (__alignof__(__le64))
|
||||
#define MORUS1280_ALIGNED(p) IS_ALIGNED((uintptr_t)p, MORUS1280_BLOCK_ALIGN)
|
||||
|
||||
struct morus1280_block {
|
||||
u64 words[MORUS_BLOCK_WORDS];
|
||||
};
|
||||
|
||||
union morus1280_block_in {
|
||||
__le64 words[MORUS_BLOCK_WORDS];
|
||||
u8 bytes[MORUS1280_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
struct morus1280_state {
|
||||
struct morus1280_block s[MORUS_STATE_BLOCKS];
|
||||
};
|
||||
|
||||
struct morus1280_ctx {
|
||||
struct morus1280_block key;
|
||||
};
|
||||
|
||||
struct morus1280_ops {
|
||||
int (*skcipher_walk_init)(struct skcipher_walk *walk,
|
||||
struct aead_request *req, bool atomic);
|
||||
|
||||
void (*crypt_chunk)(struct morus1280_state *state,
|
||||
u8 *dst, const u8 *src, unsigned int size);
|
||||
};
|
||||
|
||||
static const struct morus1280_block crypto_morus1280_const[1] = {
|
||||
{ .words = {
|
||||
U64_C(0x0d08050302010100),
|
||||
U64_C(0x6279e99059372215),
|
||||
U64_C(0xf12fc26d55183ddb),
|
||||
U64_C(0xdd28b57342311120),
|
||||
} },
|
||||
};
|
||||
|
||||
static void crypto_morus1280_round(struct morus1280_block *b0,
|
||||
struct morus1280_block *b1,
|
||||
struct morus1280_block *b2,
|
||||
struct morus1280_block *b3,
|
||||
struct morus1280_block *b4,
|
||||
const struct morus1280_block *m,
|
||||
unsigned int b, unsigned int w)
|
||||
{
|
||||
unsigned int i;
|
||||
struct morus1280_block tmp;
|
||||
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
|
||||
b0->words[i] ^= b1->words[i] & b2->words[i];
|
||||
b0->words[i] ^= b3->words[i];
|
||||
b0->words[i] ^= m->words[i];
|
||||
b0->words[i] = rol64(b0->words[i], b);
|
||||
}
|
||||
|
||||
tmp = *b3;
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
|
||||
b3->words[(i + w) % MORUS_BLOCK_WORDS] = tmp.words[i];
|
||||
}
|
||||
|
||||
static void crypto_morus1280_update(struct morus1280_state *state,
|
||||
const struct morus1280_block *m)
|
||||
{
|
||||
static const struct morus1280_block z = {};
|
||||
|
||||
struct morus1280_block *s = state->s;
|
||||
|
||||
crypto_morus1280_round(&s[0], &s[1], &s[2], &s[3], &s[4], &z, 13, 1);
|
||||
crypto_morus1280_round(&s[1], &s[2], &s[3], &s[4], &s[0], m, 46, 2);
|
||||
crypto_morus1280_round(&s[2], &s[3], &s[4], &s[0], &s[1], m, 38, 3);
|
||||
crypto_morus1280_round(&s[3], &s[4], &s[0], &s[1], &s[2], m, 7, 2);
|
||||
crypto_morus1280_round(&s[4], &s[0], &s[1], &s[2], &s[3], m, 4, 1);
|
||||
}
|
||||
|
||||
static void crypto_morus1280_load_a(struct morus1280_block *dst, const u8 *src)
|
||||
{
|
||||
unsigned int i;
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
|
||||
dst->words[i] = le64_to_cpu(*(const __le64 *)src);
|
||||
src += MORUS1280_WORD_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus1280_load_u(struct morus1280_block *dst, const u8 *src)
|
||||
{
|
||||
unsigned int i;
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
|
||||
dst->words[i] = get_unaligned_le64(src);
|
||||
src += MORUS1280_WORD_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus1280_load(struct morus1280_block *dst, const u8 *src)
|
||||
{
|
||||
if (MORUS1280_ALIGNED(src))
|
||||
crypto_morus1280_load_a(dst, src);
|
||||
else
|
||||
crypto_morus1280_load_u(dst, src);
|
||||
}
|
||||
|
||||
static void crypto_morus1280_store_a(u8 *dst, const struct morus1280_block *src)
|
||||
{
|
||||
unsigned int i;
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
|
||||
*(__le64 *)dst = cpu_to_le64(src->words[i]);
|
||||
dst += MORUS1280_WORD_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus1280_store_u(u8 *dst, const struct morus1280_block *src)
|
||||
{
|
||||
unsigned int i;
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
|
||||
put_unaligned_le64(src->words[i], dst);
|
||||
dst += MORUS1280_WORD_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus1280_store(u8 *dst, const struct morus1280_block *src)
|
||||
{
|
||||
if (MORUS1280_ALIGNED(dst))
|
||||
crypto_morus1280_store_a(dst, src);
|
||||
else
|
||||
crypto_morus1280_store_u(dst, src);
|
||||
}
|
||||
|
||||
static void crypto_morus1280_ad(struct morus1280_state *state, const u8 *src,
|
||||
unsigned int size)
|
||||
{
|
||||
struct morus1280_block m;
|
||||
|
||||
if (MORUS1280_ALIGNED(src)) {
|
||||
while (size >= MORUS1280_BLOCK_SIZE) {
|
||||
crypto_morus1280_load_a(&m, src);
|
||||
crypto_morus1280_update(state, &m);
|
||||
|
||||
size -= MORUS1280_BLOCK_SIZE;
|
||||
src += MORUS1280_BLOCK_SIZE;
|
||||
}
|
||||
} else {
|
||||
while (size >= MORUS1280_BLOCK_SIZE) {
|
||||
crypto_morus1280_load_u(&m, src);
|
||||
crypto_morus1280_update(state, &m);
|
||||
|
||||
size -= MORUS1280_BLOCK_SIZE;
|
||||
src += MORUS1280_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus1280_core(const struct morus1280_state *state,
|
||||
struct morus1280_block *blk)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
|
||||
blk->words[(i + 3) % MORUS_BLOCK_WORDS] ^= state->s[1].words[i];
|
||||
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
|
||||
blk->words[i] ^= state->s[0].words[i];
|
||||
blk->words[i] ^= state->s[2].words[i] & state->s[3].words[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus1280_encrypt_chunk(struct morus1280_state *state,
|
||||
u8 *dst, const u8 *src,
|
||||
unsigned int size)
|
||||
{
|
||||
struct morus1280_block c, m;
|
||||
|
||||
if (MORUS1280_ALIGNED(src) && MORUS1280_ALIGNED(dst)) {
|
||||
while (size >= MORUS1280_BLOCK_SIZE) {
|
||||
crypto_morus1280_load_a(&m, src);
|
||||
c = m;
|
||||
crypto_morus1280_core(state, &c);
|
||||
crypto_morus1280_store_a(dst, &c);
|
||||
crypto_morus1280_update(state, &m);
|
||||
|
||||
src += MORUS1280_BLOCK_SIZE;
|
||||
dst += MORUS1280_BLOCK_SIZE;
|
||||
size -= MORUS1280_BLOCK_SIZE;
|
||||
}
|
||||
} else {
|
||||
while (size >= MORUS1280_BLOCK_SIZE) {
|
||||
crypto_morus1280_load_u(&m, src);
|
||||
c = m;
|
||||
crypto_morus1280_core(state, &c);
|
||||
crypto_morus1280_store_u(dst, &c);
|
||||
crypto_morus1280_update(state, &m);
|
||||
|
||||
src += MORUS1280_BLOCK_SIZE;
|
||||
dst += MORUS1280_BLOCK_SIZE;
|
||||
size -= MORUS1280_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
if (size > 0) {
|
||||
union morus1280_block_in tail;
|
||||
|
||||
memcpy(tail.bytes, src, size);
|
||||
memset(tail.bytes + size, 0, MORUS1280_BLOCK_SIZE - size);
|
||||
|
||||
crypto_morus1280_load_a(&m, tail.bytes);
|
||||
c = m;
|
||||
crypto_morus1280_core(state, &c);
|
||||
crypto_morus1280_store_a(tail.bytes, &c);
|
||||
crypto_morus1280_update(state, &m);
|
||||
|
||||
memcpy(dst, tail.bytes, size);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus1280_decrypt_chunk(struct morus1280_state *state,
|
||||
u8 *dst, const u8 *src,
|
||||
unsigned int size)
|
||||
{
|
||||
struct morus1280_block m;
|
||||
|
||||
if (MORUS1280_ALIGNED(src) && MORUS1280_ALIGNED(dst)) {
|
||||
while (size >= MORUS1280_BLOCK_SIZE) {
|
||||
crypto_morus1280_load_a(&m, src);
|
||||
crypto_morus1280_core(state, &m);
|
||||
crypto_morus1280_store_a(dst, &m);
|
||||
crypto_morus1280_update(state, &m);
|
||||
|
||||
src += MORUS1280_BLOCK_SIZE;
|
||||
dst += MORUS1280_BLOCK_SIZE;
|
||||
size -= MORUS1280_BLOCK_SIZE;
|
||||
}
|
||||
} else {
|
||||
while (size >= MORUS1280_BLOCK_SIZE) {
|
||||
crypto_morus1280_load_u(&m, src);
|
||||
crypto_morus1280_core(state, &m);
|
||||
crypto_morus1280_store_u(dst, &m);
|
||||
crypto_morus1280_update(state, &m);
|
||||
|
||||
src += MORUS1280_BLOCK_SIZE;
|
||||
dst += MORUS1280_BLOCK_SIZE;
|
||||
size -= MORUS1280_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
if (size > 0) {
|
||||
union morus1280_block_in tail;
|
||||
|
||||
memcpy(tail.bytes, src, size);
|
||||
memset(tail.bytes + size, 0, MORUS1280_BLOCK_SIZE - size);
|
||||
|
||||
crypto_morus1280_load_a(&m, tail.bytes);
|
||||
crypto_morus1280_core(state, &m);
|
||||
crypto_morus1280_store_a(tail.bytes, &m);
|
||||
memset(tail.bytes + size, 0, MORUS1280_BLOCK_SIZE - size);
|
||||
crypto_morus1280_load_a(&m, tail.bytes);
|
||||
crypto_morus1280_update(state, &m);
|
||||
|
||||
memcpy(dst, tail.bytes, size);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus1280_init(struct morus1280_state *state,
|
||||
const struct morus1280_block *key,
|
||||
const u8 *iv)
|
||||
{
|
||||
static const struct morus1280_block z = {};
|
||||
|
||||
union morus1280_block_in tmp;
|
||||
unsigned int i;
|
||||
|
||||
memcpy(tmp.bytes, iv, MORUS_NONCE_SIZE);
|
||||
memset(tmp.bytes + MORUS_NONCE_SIZE, 0,
|
||||
MORUS1280_BLOCK_SIZE - MORUS_NONCE_SIZE);
|
||||
|
||||
crypto_morus1280_load(&state->s[0], tmp.bytes);
|
||||
state->s[1] = *key;
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
|
||||
state->s[2].words[i] = U64_C(0xFFFFFFFFFFFFFFFF);
|
||||
state->s[3] = z;
|
||||
state->s[4] = crypto_morus1280_const[0];
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
crypto_morus1280_update(state, &z);
|
||||
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
|
||||
state->s[1].words[i] ^= key->words[i];
|
||||
}
|
||||
|
||||
static void crypto_morus1280_process_ad(struct morus1280_state *state,
|
||||
struct scatterlist *sg_src,
|
||||
unsigned int assoclen)
|
||||
{
|
||||
struct scatter_walk walk;
|
||||
struct morus1280_block m;
|
||||
union morus1280_block_in buf;
|
||||
unsigned int pos = 0;
|
||||
|
||||
scatterwalk_start(&walk, sg_src);
|
||||
while (assoclen != 0) {
|
||||
unsigned int size = scatterwalk_clamp(&walk, assoclen);
|
||||
unsigned int left = size;
|
||||
void *mapped = scatterwalk_map(&walk);
|
||||
const u8 *src = (const u8 *)mapped;
|
||||
|
||||
if (pos + size >= MORUS1280_BLOCK_SIZE) {
|
||||
if (pos > 0) {
|
||||
unsigned int fill = MORUS1280_BLOCK_SIZE - pos;
|
||||
memcpy(buf.bytes + pos, src, fill);
|
||||
|
||||
crypto_morus1280_load_a(&m, buf.bytes);
|
||||
crypto_morus1280_update(state, &m);
|
||||
|
||||
pos = 0;
|
||||
left -= fill;
|
||||
src += fill;
|
||||
}
|
||||
|
||||
crypto_morus1280_ad(state, src, left);
|
||||
src += left & ~(MORUS1280_BLOCK_SIZE - 1);
|
||||
left &= MORUS1280_BLOCK_SIZE - 1;
|
||||
}
|
||||
|
||||
memcpy(buf.bytes + pos, src, left);
|
||||
|
||||
pos += left;
|
||||
assoclen -= size;
|
||||
scatterwalk_unmap(mapped);
|
||||
scatterwalk_advance(&walk, size);
|
||||
scatterwalk_done(&walk, 0, assoclen);
|
||||
}
|
||||
|
||||
if (pos > 0) {
|
||||
memset(buf.bytes + pos, 0, MORUS1280_BLOCK_SIZE - pos);
|
||||
|
||||
crypto_morus1280_load_a(&m, buf.bytes);
|
||||
crypto_morus1280_update(state, &m);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus1280_process_crypt(struct morus1280_state *state,
|
||||
struct aead_request *req,
|
||||
const struct morus1280_ops *ops)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
u8 *dst;
|
||||
const u8 *src;
|
||||
|
||||
ops->skcipher_walk_init(&walk, req, false);
|
||||
|
||||
while (walk.nbytes) {
|
||||
src = walk.src.virt.addr;
|
||||
dst = walk.dst.virt.addr;
|
||||
|
||||
ops->crypt_chunk(state, dst, src, walk.nbytes);
|
||||
|
||||
skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus1280_final(struct morus1280_state *state,
|
||||
struct morus1280_block *tag_xor,
|
||||
u64 assoclen, u64 cryptlen)
|
||||
{
|
||||
u64 assocbits = assoclen * 8;
|
||||
u64 cryptbits = cryptlen * 8;
|
||||
|
||||
struct morus1280_block tmp;
|
||||
unsigned int i;
|
||||
|
||||
tmp.words[0] = cpu_to_le64(assocbits);
|
||||
tmp.words[1] = cpu_to_le64(cryptbits);
|
||||
tmp.words[2] = 0;
|
||||
tmp.words[3] = 0;
|
||||
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
|
||||
state->s[4].words[i] ^= state->s[0].words[i];
|
||||
|
||||
for (i = 0; i < 10; i++)
|
||||
crypto_morus1280_update(state, &tmp);
|
||||
|
||||
crypto_morus1280_core(state, tag_xor);
|
||||
}
|
||||
|
||||
static int crypto_morus1280_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
|
||||
union morus1280_block_in tmp;
|
||||
|
||||
if (keylen == MORUS1280_BLOCK_SIZE)
|
||||
crypto_morus1280_load(&ctx->key, key);
|
||||
else if (keylen == MORUS1280_BLOCK_SIZE / 2) {
|
||||
memcpy(tmp.bytes, key, keylen);
|
||||
memcpy(tmp.bytes + keylen, key, keylen);
|
||||
|
||||
crypto_morus1280_load(&ctx->key, tmp.bytes);
|
||||
} else {
|
||||
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_morus1280_setauthsize(struct crypto_aead *tfm,
|
||||
unsigned int authsize)
|
||||
{
|
||||
return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
|
||||
}
|
||||
|
||||
static void crypto_morus1280_crypt(struct aead_request *req,
|
||||
struct morus1280_block *tag_xor,
|
||||
unsigned int cryptlen,
|
||||
const struct morus1280_ops *ops)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct morus1280_state state;
|
||||
|
||||
crypto_morus1280_init(&state, &ctx->key, req->iv);
|
||||
crypto_morus1280_process_ad(&state, req->src, req->assoclen);
|
||||
crypto_morus1280_process_crypt(&state, req, ops);
|
||||
crypto_morus1280_final(&state, tag_xor, req->assoclen, cryptlen);
|
||||
}
|
||||
|
||||
static int crypto_morus1280_encrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct morus1280_ops ops = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_encrypt,
|
||||
.crypt_chunk = crypto_morus1280_encrypt_chunk,
|
||||
};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct morus1280_block tag = {};
|
||||
union morus1280_block_in tag_out;
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen;
|
||||
|
||||
crypto_morus1280_crypt(req, &tag, cryptlen, &ops);
|
||||
crypto_morus1280_store(tag_out.bytes, &tag);
|
||||
|
||||
scatterwalk_map_and_copy(tag_out.bytes, req->dst,
|
||||
req->assoclen + cryptlen, authsize, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_morus1280_decrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct morus1280_ops ops = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_decrypt,
|
||||
.crypt_chunk = crypto_morus1280_decrypt_chunk,
|
||||
};
|
||||
static const u8 zeros[MORUS1280_BLOCK_SIZE] = {};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
union morus1280_block_in tag_in;
|
||||
struct morus1280_block tag;
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen - authsize;
|
||||
|
||||
scatterwalk_map_and_copy(tag_in.bytes, req->src,
|
||||
req->assoclen + cryptlen, authsize, 0);
|
||||
|
||||
crypto_morus1280_load(&tag, tag_in.bytes);
|
||||
crypto_morus1280_crypt(req, &tag, cryptlen, &ops);
|
||||
crypto_morus1280_store(tag_in.bytes, &tag);
|
||||
|
||||
return crypto_memneq(tag_in.bytes, zeros, authsize) ? -EBADMSG : 0;
|
||||
}
|
||||
|
||||
static int crypto_morus1280_init_tfm(struct crypto_aead *tfm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_morus1280_exit_tfm(struct crypto_aead *tfm)
|
||||
{
|
||||
}
|
||||
|
||||
static struct aead_alg crypto_morus1280_alg = {
|
||||
.setkey = crypto_morus1280_setkey,
|
||||
.setauthsize = crypto_morus1280_setauthsize,
|
||||
.encrypt = crypto_morus1280_encrypt,
|
||||
.decrypt = crypto_morus1280_decrypt,
|
||||
.init = crypto_morus1280_init_tfm,
|
||||
.exit = crypto_morus1280_exit_tfm,
|
||||
|
||||
.ivsize = MORUS_NONCE_SIZE,
|
||||
.maxauthsize = MORUS_MAX_AUTH_SIZE,
|
||||
.chunksize = MORUS1280_BLOCK_SIZE,
|
||||
|
||||
.base = {
|
||||
.cra_flags = CRYPTO_ALG_TYPE_AEAD,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct morus1280_ctx),
|
||||
.cra_alignmask = 0,
|
||||
|
||||
.cra_priority = 100,
|
||||
|
||||
.cra_name = "morus1280",
|
||||
.cra_driver_name = "morus1280-generic",
|
||||
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static int __init crypto_morus1280_module_init(void)
|
||||
{
|
||||
return crypto_register_aead(&crypto_morus1280_alg);
|
||||
}
|
||||
|
||||
static void __exit crypto_morus1280_module_exit(void)
|
||||
{
|
||||
crypto_unregister_aead(&crypto_morus1280_alg);
|
||||
}
|
||||
|
||||
module_init(crypto_morus1280_module_init);
|
||||
module_exit(crypto_morus1280_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm");
|
||||
MODULE_ALIAS_CRYPTO("morus1280");
|
||||
MODULE_ALIAS_CRYPTO("morus1280-generic");
|
544
crypto/morus640.c
Normal file
544
crypto/morus640.c
Normal file
@ -0,0 +1,544 @@
|
||||
/*
|
||||
* The MORUS-640 Authenticated-Encryption Algorithm
|
||||
*
|
||||
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*/
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/morus_common.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/scatterlist.h>
|
||||
|
||||
#define MORUS640_WORD_SIZE 4
|
||||
#define MORUS640_BLOCK_SIZE (MORUS_BLOCK_WORDS * MORUS640_WORD_SIZE)
|
||||
#define MORUS640_BLOCK_ALIGN (__alignof__(__le32))
|
||||
#define MORUS640_ALIGNED(p) IS_ALIGNED((uintptr_t)p, MORUS640_BLOCK_ALIGN)
|
||||
|
||||
struct morus640_block {
|
||||
u32 words[MORUS_BLOCK_WORDS];
|
||||
};
|
||||
|
||||
union morus640_block_in {
|
||||
__le32 words[MORUS_BLOCK_WORDS];
|
||||
u8 bytes[MORUS640_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
struct morus640_state {
|
||||
struct morus640_block s[MORUS_STATE_BLOCKS];
|
||||
};
|
||||
|
||||
struct morus640_ctx {
|
||||
struct morus640_block key;
|
||||
};
|
||||
|
||||
struct morus640_ops {
|
||||
int (*skcipher_walk_init)(struct skcipher_walk *walk,
|
||||
struct aead_request *req, bool atomic);
|
||||
|
||||
void (*crypt_chunk)(struct morus640_state *state,
|
||||
u8 *dst, const u8 *src, unsigned int size);
|
||||
};
|
||||
|
||||
static const struct morus640_block crypto_morus640_const[2] = {
|
||||
{ .words = {
|
||||
U32_C(0x02010100),
|
||||
U32_C(0x0d080503),
|
||||
U32_C(0x59372215),
|
||||
U32_C(0x6279e990),
|
||||
} },
|
||||
{ .words = {
|
||||
U32_C(0x55183ddb),
|
||||
U32_C(0xf12fc26d),
|
||||
U32_C(0x42311120),
|
||||
U32_C(0xdd28b573),
|
||||
} },
|
||||
};
|
||||
|
||||
static void crypto_morus640_round(struct morus640_block *b0,
|
||||
struct morus640_block *b1,
|
||||
struct morus640_block *b2,
|
||||
struct morus640_block *b3,
|
||||
struct morus640_block *b4,
|
||||
const struct morus640_block *m,
|
||||
unsigned int b, unsigned int w)
|
||||
{
|
||||
unsigned int i;
|
||||
struct morus640_block tmp;
|
||||
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
|
||||
b0->words[i] ^= b1->words[i] & b2->words[i];
|
||||
b0->words[i] ^= b3->words[i];
|
||||
b0->words[i] ^= m->words[i];
|
||||
b0->words[i] = rol32(b0->words[i], b);
|
||||
}
|
||||
|
||||
tmp = *b3;
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
|
||||
b3->words[(i + w) % MORUS_BLOCK_WORDS] = tmp.words[i];
|
||||
}
|
||||
|
||||
static void crypto_morus640_update(struct morus640_state *state,
|
||||
const struct morus640_block *m)
|
||||
{
|
||||
static const struct morus640_block z = {};
|
||||
|
||||
struct morus640_block *s = state->s;
|
||||
|
||||
crypto_morus640_round(&s[0], &s[1], &s[2], &s[3], &s[4], &z, 5, 1);
|
||||
crypto_morus640_round(&s[1], &s[2], &s[3], &s[4], &s[0], m, 31, 2);
|
||||
crypto_morus640_round(&s[2], &s[3], &s[4], &s[0], &s[1], m, 7, 3);
|
||||
crypto_morus640_round(&s[3], &s[4], &s[0], &s[1], &s[2], m, 22, 2);
|
||||
crypto_morus640_round(&s[4], &s[0], &s[1], &s[2], &s[3], m, 13, 1);
|
||||
}
|
||||
|
||||
static void crypto_morus640_load_a(struct morus640_block *dst, const u8 *src)
|
||||
{
|
||||
unsigned int i;
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
|
||||
dst->words[i] = le32_to_cpu(*(const __le32 *)src);
|
||||
src += MORUS640_WORD_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus640_load_u(struct morus640_block *dst, const u8 *src)
|
||||
{
|
||||
unsigned int i;
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
|
||||
dst->words[i] = get_unaligned_le32(src);
|
||||
src += MORUS640_WORD_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus640_load(struct morus640_block *dst, const u8 *src)
|
||||
{
|
||||
if (MORUS640_ALIGNED(src))
|
||||
crypto_morus640_load_a(dst, src);
|
||||
else
|
||||
crypto_morus640_load_u(dst, src);
|
||||
}
|
||||
|
||||
static void crypto_morus640_store_a(u8 *dst, const struct morus640_block *src)
|
||||
{
|
||||
unsigned int i;
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
|
||||
*(__le32 *)dst = cpu_to_le32(src->words[i]);
|
||||
dst += MORUS640_WORD_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus640_store_u(u8 *dst, const struct morus640_block *src)
|
||||
{
|
||||
unsigned int i;
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
|
||||
put_unaligned_le32(src->words[i], dst);
|
||||
dst += MORUS640_WORD_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus640_store(u8 *dst, const struct morus640_block *src)
|
||||
{
|
||||
if (MORUS640_ALIGNED(dst))
|
||||
crypto_morus640_store_a(dst, src);
|
||||
else
|
||||
crypto_morus640_store_u(dst, src);
|
||||
}
|
||||
|
||||
static void crypto_morus640_ad(struct morus640_state *state, const u8 *src,
|
||||
unsigned int size)
|
||||
{
|
||||
struct morus640_block m;
|
||||
|
||||
if (MORUS640_ALIGNED(src)) {
|
||||
while (size >= MORUS640_BLOCK_SIZE) {
|
||||
crypto_morus640_load_a(&m, src);
|
||||
crypto_morus640_update(state, &m);
|
||||
|
||||
size -= MORUS640_BLOCK_SIZE;
|
||||
src += MORUS640_BLOCK_SIZE;
|
||||
}
|
||||
} else {
|
||||
while (size >= MORUS640_BLOCK_SIZE) {
|
||||
crypto_morus640_load_u(&m, src);
|
||||
crypto_morus640_update(state, &m);
|
||||
|
||||
size -= MORUS640_BLOCK_SIZE;
|
||||
src += MORUS640_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus640_core(const struct morus640_state *state,
|
||||
struct morus640_block *blk)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
|
||||
blk->words[(i + 3) % MORUS_BLOCK_WORDS] ^= state->s[1].words[i];
|
||||
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
|
||||
blk->words[i] ^= state->s[0].words[i];
|
||||
blk->words[i] ^= state->s[2].words[i] & state->s[3].words[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus640_encrypt_chunk(struct morus640_state *state, u8 *dst,
|
||||
const u8 *src, unsigned int size)
|
||||
{
|
||||
struct morus640_block c, m;
|
||||
|
||||
if (MORUS640_ALIGNED(src) && MORUS640_ALIGNED(dst)) {
|
||||
while (size >= MORUS640_BLOCK_SIZE) {
|
||||
crypto_morus640_load_a(&m, src);
|
||||
c = m;
|
||||
crypto_morus640_core(state, &c);
|
||||
crypto_morus640_store_a(dst, &c);
|
||||
crypto_morus640_update(state, &m);
|
||||
|
||||
src += MORUS640_BLOCK_SIZE;
|
||||
dst += MORUS640_BLOCK_SIZE;
|
||||
size -= MORUS640_BLOCK_SIZE;
|
||||
}
|
||||
} else {
|
||||
while (size >= MORUS640_BLOCK_SIZE) {
|
||||
crypto_morus640_load_u(&m, src);
|
||||
c = m;
|
||||
crypto_morus640_core(state, &c);
|
||||
crypto_morus640_store_u(dst, &c);
|
||||
crypto_morus640_update(state, &m);
|
||||
|
||||
src += MORUS640_BLOCK_SIZE;
|
||||
dst += MORUS640_BLOCK_SIZE;
|
||||
size -= MORUS640_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
if (size > 0) {
|
||||
union morus640_block_in tail;
|
||||
|
||||
memcpy(tail.bytes, src, size);
|
||||
memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size);
|
||||
|
||||
crypto_morus640_load_a(&m, tail.bytes);
|
||||
c = m;
|
||||
crypto_morus640_core(state, &c);
|
||||
crypto_morus640_store_a(tail.bytes, &c);
|
||||
crypto_morus640_update(state, &m);
|
||||
|
||||
memcpy(dst, tail.bytes, size);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus640_decrypt_chunk(struct morus640_state *state, u8 *dst,
|
||||
const u8 *src, unsigned int size)
|
||||
{
|
||||
struct morus640_block m;
|
||||
|
||||
if (MORUS640_ALIGNED(src) && MORUS640_ALIGNED(dst)) {
|
||||
while (size >= MORUS640_BLOCK_SIZE) {
|
||||
crypto_morus640_load_a(&m, src);
|
||||
crypto_morus640_core(state, &m);
|
||||
crypto_morus640_store_a(dst, &m);
|
||||
crypto_morus640_update(state, &m);
|
||||
|
||||
src += MORUS640_BLOCK_SIZE;
|
||||
dst += MORUS640_BLOCK_SIZE;
|
||||
size -= MORUS640_BLOCK_SIZE;
|
||||
}
|
||||
} else {
|
||||
while (size >= MORUS640_BLOCK_SIZE) {
|
||||
crypto_morus640_load_u(&m, src);
|
||||
crypto_morus640_core(state, &m);
|
||||
crypto_morus640_store_u(dst, &m);
|
||||
crypto_morus640_update(state, &m);
|
||||
|
||||
src += MORUS640_BLOCK_SIZE;
|
||||
dst += MORUS640_BLOCK_SIZE;
|
||||
size -= MORUS640_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
if (size > 0) {
|
||||
union morus640_block_in tail;
|
||||
|
||||
memcpy(tail.bytes, src, size);
|
||||
|
||||
crypto_morus640_load_a(&m, src);
|
||||
crypto_morus640_core(state, &m);
|
||||
crypto_morus640_store_a(tail.bytes, &m);
|
||||
memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size);
|
||||
crypto_morus640_load_a(&m, tail.bytes);
|
||||
crypto_morus640_update(state, &m);
|
||||
|
||||
memcpy(dst, tail.bytes, size);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus640_init(struct morus640_state *state,
|
||||
const struct morus640_block *key,
|
||||
const u8 *iv)
|
||||
{
|
||||
static const struct morus640_block z = {};
|
||||
|
||||
unsigned int i;
|
||||
|
||||
crypto_morus640_load(&state->s[0], iv);
|
||||
state->s[1] = *key;
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
|
||||
state->s[2].words[i] = U32_C(0xFFFFFFFF);
|
||||
state->s[3] = crypto_morus640_const[0];
|
||||
state->s[4] = crypto_morus640_const[1];
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
crypto_morus640_update(state, &z);
|
||||
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
|
||||
state->s[1].words[i] ^= key->words[i];
|
||||
}
|
||||
|
||||
static void crypto_morus640_process_ad(struct morus640_state *state,
|
||||
struct scatterlist *sg_src,
|
||||
unsigned int assoclen)
|
||||
{
|
||||
struct scatter_walk walk;
|
||||
struct morus640_block m;
|
||||
union morus640_block_in buf;
|
||||
unsigned int pos = 0;
|
||||
|
||||
scatterwalk_start(&walk, sg_src);
|
||||
while (assoclen != 0) {
|
||||
unsigned int size = scatterwalk_clamp(&walk, assoclen);
|
||||
unsigned int left = size;
|
||||
void *mapped = scatterwalk_map(&walk);
|
||||
const u8 *src = (const u8 *)mapped;
|
||||
|
||||
if (pos + size >= MORUS640_BLOCK_SIZE) {
|
||||
if (pos > 0) {
|
||||
unsigned int fill = MORUS640_BLOCK_SIZE - pos;
|
||||
memcpy(buf.bytes + pos, src, fill);
|
||||
|
||||
crypto_morus640_load_a(&m, buf.bytes);
|
||||
crypto_morus640_update(state, &m);
|
||||
|
||||
pos = 0;
|
||||
left -= fill;
|
||||
src += fill;
|
||||
}
|
||||
|
||||
crypto_morus640_ad(state, src, left);
|
||||
src += left & ~(MORUS640_BLOCK_SIZE - 1);
|
||||
left &= MORUS640_BLOCK_SIZE - 1;
|
||||
}
|
||||
|
||||
memcpy(buf.bytes + pos, src, left);
|
||||
|
||||
pos += left;
|
||||
assoclen -= size;
|
||||
scatterwalk_unmap(mapped);
|
||||
scatterwalk_advance(&walk, size);
|
||||
scatterwalk_done(&walk, 0, assoclen);
|
||||
}
|
||||
|
||||
if (pos > 0) {
|
||||
memset(buf.bytes + pos, 0, MORUS640_BLOCK_SIZE - pos);
|
||||
|
||||
crypto_morus640_load_a(&m, buf.bytes);
|
||||
crypto_morus640_update(state, &m);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus640_process_crypt(struct morus640_state *state,
|
||||
struct aead_request *req,
|
||||
const struct morus640_ops *ops)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
u8 *dst;
|
||||
const u8 *src;
|
||||
|
||||
ops->skcipher_walk_init(&walk, req, false);
|
||||
|
||||
while (walk.nbytes) {
|
||||
src = walk.src.virt.addr;
|
||||
dst = walk.dst.virt.addr;
|
||||
|
||||
ops->crypt_chunk(state, dst, src, walk.nbytes);
|
||||
|
||||
skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus640_final(struct morus640_state *state,
|
||||
struct morus640_block *tag_xor,
|
||||
u64 assoclen, u64 cryptlen)
|
||||
{
|
||||
u64 assocbits = assoclen * 8;
|
||||
u64 cryptbits = cryptlen * 8;
|
||||
|
||||
u32 assocbits_lo = (u32)assocbits;
|
||||
u32 assocbits_hi = (u32)(assocbits >> 32);
|
||||
u32 cryptbits_lo = (u32)cryptbits;
|
||||
u32 cryptbits_hi = (u32)(cryptbits >> 32);
|
||||
|
||||
struct morus640_block tmp;
|
||||
unsigned int i;
|
||||
|
||||
tmp.words[0] = cpu_to_le32(assocbits_lo);
|
||||
tmp.words[1] = cpu_to_le32(assocbits_hi);
|
||||
tmp.words[2] = cpu_to_le32(cryptbits_lo);
|
||||
tmp.words[3] = cpu_to_le32(cryptbits_hi);
|
||||
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
|
||||
state->s[4].words[i] ^= state->s[0].words[i];
|
||||
|
||||
for (i = 0; i < 10; i++)
|
||||
crypto_morus640_update(state, &tmp);
|
||||
|
||||
crypto_morus640_core(state, tag_xor);
|
||||
}
|
||||
|
||||
static int crypto_morus640_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct morus640_ctx *ctx = crypto_aead_ctx(aead);
|
||||
|
||||
if (keylen != MORUS640_BLOCK_SIZE) {
|
||||
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
crypto_morus640_load(&ctx->key, key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_morus640_setauthsize(struct crypto_aead *tfm,
|
||||
unsigned int authsize)
|
||||
{
|
||||
return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
|
||||
}
|
||||
|
||||
static void crypto_morus640_crypt(struct aead_request *req,
|
||||
struct morus640_block *tag_xor,
|
||||
unsigned int cryptlen,
|
||||
const struct morus640_ops *ops)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct morus640_state state;
|
||||
|
||||
crypto_morus640_init(&state, &ctx->key, req->iv);
|
||||
crypto_morus640_process_ad(&state, req->src, req->assoclen);
|
||||
crypto_morus640_process_crypt(&state, req, ops);
|
||||
crypto_morus640_final(&state, tag_xor, req->assoclen, cryptlen);
|
||||
}
|
||||
|
||||
static int crypto_morus640_encrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct morus640_ops ops = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_encrypt,
|
||||
.crypt_chunk = crypto_morus640_encrypt_chunk,
|
||||
};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct morus640_block tag = {};
|
||||
union morus640_block_in tag_out;
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen;
|
||||
|
||||
crypto_morus640_crypt(req, &tag, cryptlen, &ops);
|
||||
crypto_morus640_store(tag_out.bytes, &tag);
|
||||
|
||||
scatterwalk_map_and_copy(tag_out.bytes, req->dst,
|
||||
req->assoclen + cryptlen, authsize, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_morus640_decrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct morus640_ops ops = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_decrypt,
|
||||
.crypt_chunk = crypto_morus640_decrypt_chunk,
|
||||
};
|
||||
static const u8 zeros[MORUS640_BLOCK_SIZE] = {};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
union morus640_block_in tag_in;
|
||||
struct morus640_block tag;
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen - authsize;
|
||||
|
||||
scatterwalk_map_and_copy(tag_in.bytes, req->src,
|
||||
req->assoclen + cryptlen, authsize, 0);
|
||||
|
||||
crypto_morus640_load(&tag, tag_in.bytes);
|
||||
crypto_morus640_crypt(req, &tag, cryptlen, &ops);
|
||||
crypto_morus640_store(tag_in.bytes, &tag);
|
||||
|
||||
return crypto_memneq(tag_in.bytes, zeros, authsize) ? -EBADMSG : 0;
|
||||
}
|
||||
|
||||
static int crypto_morus640_init_tfm(struct crypto_aead *tfm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_morus640_exit_tfm(struct crypto_aead *tfm)
|
||||
{
|
||||
}
|
||||
|
||||
static struct aead_alg crypto_morus640_alg = {
|
||||
.setkey = crypto_morus640_setkey,
|
||||
.setauthsize = crypto_morus640_setauthsize,
|
||||
.encrypt = crypto_morus640_encrypt,
|
||||
.decrypt = crypto_morus640_decrypt,
|
||||
.init = crypto_morus640_init_tfm,
|
||||
.exit = crypto_morus640_exit_tfm,
|
||||
|
||||
.ivsize = MORUS_NONCE_SIZE,
|
||||
.maxauthsize = MORUS_MAX_AUTH_SIZE,
|
||||
.chunksize = MORUS640_BLOCK_SIZE,
|
||||
|
||||
.base = {
|
||||
.cra_flags = CRYPTO_ALG_TYPE_AEAD,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct morus640_ctx),
|
||||
.cra_alignmask = 0,
|
||||
|
||||
.cra_priority = 100,
|
||||
|
||||
.cra_name = "morus640",
|
||||
.cra_driver_name = "morus640-generic",
|
||||
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init crypto_morus640_module_init(void)
|
||||
{
|
||||
return crypto_register_aead(&crypto_morus640_alg);
|
||||
}
|
||||
|
||||
static void __exit crypto_morus640_module_exit(void)
|
||||
{
|
||||
crypto_unregister_aead(&crypto_morus640_alg);
|
||||
}
|
||||
|
||||
module_init(crypto_morus640_module_init);
|
||||
module_exit(crypto_morus640_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("MORUS-640 AEAD algorithm");
|
||||
MODULE_ALIAS_CRYPTO("morus640");
|
||||
MODULE_ALIAS_CRYPTO("morus640-generic");
|
@ -14,6 +14,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
@ -72,7 +73,7 @@ static int crypto_pcbc_encrypt_inplace(struct skcipher_request *req,
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u8 *src = walk->src.virt.addr;
|
||||
u8 *iv = walk->iv;
|
||||
u8 tmpbuf[bsize];
|
||||
u8 tmpbuf[MAX_CIPHER_BLOCKSIZE];
|
||||
|
||||
do {
|
||||
memcpy(tmpbuf, src, bsize);
|
||||
@ -144,7 +145,7 @@ static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req,
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u8 *src = walk->src.virt.addr;
|
||||
u8 *iv = walk->iv;
|
||||
u8 tmpbuf[bsize] __aligned(__alignof__(u32));
|
||||
u8 tmpbuf[MAX_CIPHER_BLOCKSIZE] __aligned(__alignof__(u32));
|
||||
|
||||
do {
|
||||
memcpy(tmpbuf, src, bsize);
|
||||
|
@ -215,7 +215,6 @@ static int rsa_verify(struct akcipher_request *req)
|
||||
goto err_free_m;
|
||||
}
|
||||
|
||||
ret = -ENOMEM;
|
||||
s = mpi_read_raw_from_sgl(req->src, req->src_len);
|
||||
if (!s) {
|
||||
ret = -ENOMEM;
|
||||
|
@ -21,9 +21,17 @@
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/salsa20.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#define SALSA20_IV_SIZE 8
|
||||
#define SALSA20_MIN_KEY_SIZE 16
|
||||
#define SALSA20_MAX_KEY_SIZE 32
|
||||
#define SALSA20_BLOCK_SIZE 64
|
||||
|
||||
struct salsa20_ctx {
|
||||
u32 initial_state[16];
|
||||
};
|
||||
|
||||
static void salsa20_block(u32 *state, __le32 *stream)
|
||||
{
|
||||
u32 x[16];
|
||||
@ -93,16 +101,15 @@ static void salsa20_docrypt(u32 *state, u8 *dst, const u8 *src,
|
||||
}
|
||||
}
|
||||
|
||||
void crypto_salsa20_init(u32 *state, const struct salsa20_ctx *ctx,
|
||||
static void salsa20_init(u32 *state, const struct salsa20_ctx *ctx,
|
||||
const u8 *iv)
|
||||
{
|
||||
memcpy(state, ctx->initial_state, sizeof(ctx->initial_state));
|
||||
state[6] = get_unaligned_le32(iv + 0);
|
||||
state[7] = get_unaligned_le32(iv + 4);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_salsa20_init);
|
||||
|
||||
int crypto_salsa20_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
static int salsa20_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int keysize)
|
||||
{
|
||||
static const char sigma[16] = "expand 32-byte k";
|
||||
@ -143,7 +150,6 @@ int crypto_salsa20_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_salsa20_setkey);
|
||||
|
||||
static int salsa20_crypt(struct skcipher_request *req)
|
||||
{
|
||||
@ -155,7 +161,7 @@ static int salsa20_crypt(struct skcipher_request *req)
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
crypto_salsa20_init(state, ctx, walk.iv);
|
||||
salsa20_init(state, ctx, walk.iv);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
@ -183,7 +189,7 @@ static struct skcipher_alg alg = {
|
||||
.max_keysize = SALSA20_MAX_KEY_SIZE,
|
||||
.ivsize = SALSA20_IV_SIZE,
|
||||
.chunksize = SALSA20_BLOCK_SIZE,
|
||||
.setkey = crypto_salsa20_setkey,
|
||||
.setkey = salsa20_setkey,
|
||||
.encrypt = salsa20_crypt,
|
||||
.decrypt = salsa20_crypt,
|
||||
};
|
||||
|
@ -190,21 +190,23 @@ static void sm4_do_crypt(const u32 *rk, u32 *out, const u32 *in)
|
||||
|
||||
/* encrypt a block of text */
|
||||
|
||||
static void sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
sm4_do_crypt(ctx->rkey_enc, (u32 *)out, (u32 *)in);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_sm4_encrypt);
|
||||
|
||||
/* decrypt a block of text */
|
||||
|
||||
static void sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
sm4_do_crypt(ctx->rkey_dec, (u32 *)out, (u32 *)in);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_sm4_decrypt);
|
||||
|
||||
static struct crypto_alg sm4_alg = {
|
||||
.cra_name = "sm4",
|
||||
@ -219,8 +221,8 @@ static struct crypto_alg sm4_alg = {
|
||||
.cia_min_keysize = SM4_KEY_SIZE,
|
||||
.cia_max_keysize = SM4_KEY_SIZE,
|
||||
.cia_setkey = crypto_sm4_set_key,
|
||||
.cia_encrypt = sm4_encrypt,
|
||||
.cia_decrypt = sm4_decrypt
|
||||
.cia_encrypt = crypto_sm4_encrypt,
|
||||
.cia_decrypt = crypto_sm4_decrypt
|
||||
}
|
||||
}
|
||||
};
|
||||
|
122
crypto/tcrypt.c
122
crypto/tcrypt.c
@ -158,9 +158,9 @@ struct test_mb_aead_data {
|
||||
};
|
||||
|
||||
static int do_mult_aead_op(struct test_mb_aead_data *data, int enc,
|
||||
u32 num_mb)
|
||||
u32 num_mb, int *rc)
|
||||
{
|
||||
int i, rc[num_mb], err = 0;
|
||||
int i, err = 0;
|
||||
|
||||
/* Fire up a bunch of concurrent requests */
|
||||
for (i = 0; i < num_mb; i++) {
|
||||
@ -188,18 +188,26 @@ static int test_mb_aead_jiffies(struct test_mb_aead_data *data, int enc,
|
||||
{
|
||||
unsigned long start, end;
|
||||
int bcount;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
int *rc;
|
||||
|
||||
rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
|
||||
if (!rc)
|
||||
return -ENOMEM;
|
||||
|
||||
for (start = jiffies, end = start + secs * HZ, bcount = 0;
|
||||
time_before(jiffies, end); bcount++) {
|
||||
ret = do_mult_aead_op(data, enc, num_mb);
|
||||
ret = do_mult_aead_op(data, enc, num_mb, rc);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pr_cont("%d operations in %d seconds (%ld bytes)\n",
|
||||
bcount * num_mb, secs, (long)bcount * blen * num_mb);
|
||||
return 0;
|
||||
|
||||
out:
|
||||
kfree(rc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int test_mb_aead_cycles(struct test_mb_aead_data *data, int enc,
|
||||
@ -208,10 +216,15 @@ static int test_mb_aead_cycles(struct test_mb_aead_data *data, int enc,
|
||||
unsigned long cycles = 0;
|
||||
int ret = 0;
|
||||
int i;
|
||||
int *rc;
|
||||
|
||||
rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
|
||||
if (!rc)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Warm-up run. */
|
||||
for (i = 0; i < 4; i++) {
|
||||
ret = do_mult_aead_op(data, enc, num_mb);
|
||||
ret = do_mult_aead_op(data, enc, num_mb, rc);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
@ -221,7 +234,7 @@ static int test_mb_aead_cycles(struct test_mb_aead_data *data, int enc,
|
||||
cycles_t start, end;
|
||||
|
||||
start = get_cycles();
|
||||
ret = do_mult_aead_op(data, enc, num_mb);
|
||||
ret = do_mult_aead_op(data, enc, num_mb, rc);
|
||||
end = get_cycles();
|
||||
|
||||
if (ret)
|
||||
@ -230,11 +243,11 @@ static int test_mb_aead_cycles(struct test_mb_aead_data *data, int enc,
|
||||
cycles += end - start;
|
||||
}
|
||||
|
||||
out:
|
||||
if (ret == 0)
|
||||
pr_cont("1 operation in %lu cycles (%d bytes)\n",
|
||||
(cycles + 4) / (8 * num_mb), blen);
|
||||
pr_cont("1 operation in %lu cycles (%d bytes)\n",
|
||||
(cycles + 4) / (8 * num_mb), blen);
|
||||
|
||||
out:
|
||||
kfree(rc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -705,9 +718,10 @@ struct test_mb_ahash_data {
|
||||
char *xbuf[XBUFSIZE];
|
||||
};
|
||||
|
||||
static inline int do_mult_ahash_op(struct test_mb_ahash_data *data, u32 num_mb)
|
||||
static inline int do_mult_ahash_op(struct test_mb_ahash_data *data, u32 num_mb,
|
||||
int *rc)
|
||||
{
|
||||
int i, rc[num_mb], err = 0;
|
||||
int i, err = 0;
|
||||
|
||||
/* Fire up a bunch of concurrent requests */
|
||||
for (i = 0; i < num_mb; i++)
|
||||
@ -731,18 +745,26 @@ static int test_mb_ahash_jiffies(struct test_mb_ahash_data *data, int blen,
|
||||
{
|
||||
unsigned long start, end;
|
||||
int bcount;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
int *rc;
|
||||
|
||||
rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
|
||||
if (!rc)
|
||||
return -ENOMEM;
|
||||
|
||||
for (start = jiffies, end = start + secs * HZ, bcount = 0;
|
||||
time_before(jiffies, end); bcount++) {
|
||||
ret = do_mult_ahash_op(data, num_mb);
|
||||
ret = do_mult_ahash_op(data, num_mb, rc);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pr_cont("%d operations in %d seconds (%ld bytes)\n",
|
||||
bcount * num_mb, secs, (long)bcount * blen * num_mb);
|
||||
return 0;
|
||||
|
||||
out:
|
||||
kfree(rc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int test_mb_ahash_cycles(struct test_mb_ahash_data *data, int blen,
|
||||
@ -751,10 +773,15 @@ static int test_mb_ahash_cycles(struct test_mb_ahash_data *data, int blen,
|
||||
unsigned long cycles = 0;
|
||||
int ret = 0;
|
||||
int i;
|
||||
int *rc;
|
||||
|
||||
rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
|
||||
if (!rc)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Warm-up run. */
|
||||
for (i = 0; i < 4; i++) {
|
||||
ret = do_mult_ahash_op(data, num_mb);
|
||||
ret = do_mult_ahash_op(data, num_mb, rc);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
@ -764,7 +791,7 @@ static int test_mb_ahash_cycles(struct test_mb_ahash_data *data, int blen,
|
||||
cycles_t start, end;
|
||||
|
||||
start = get_cycles();
|
||||
ret = do_mult_ahash_op(data, num_mb);
|
||||
ret = do_mult_ahash_op(data, num_mb, rc);
|
||||
end = get_cycles();
|
||||
|
||||
if (ret)
|
||||
@ -773,11 +800,11 @@ static int test_mb_ahash_cycles(struct test_mb_ahash_data *data, int blen,
|
||||
cycles += end - start;
|
||||
}
|
||||
|
||||
out:
|
||||
if (ret == 0)
|
||||
pr_cont("1 operation in %lu cycles (%d bytes)\n",
|
||||
(cycles + 4) / (8 * num_mb), blen);
|
||||
pr_cont("1 operation in %lu cycles (%d bytes)\n",
|
||||
(cycles + 4) / (8 * num_mb), blen);
|
||||
|
||||
out:
|
||||
kfree(rc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1118,9 +1145,9 @@ struct test_mb_skcipher_data {
|
||||
};
|
||||
|
||||
static int do_mult_acipher_op(struct test_mb_skcipher_data *data, int enc,
|
||||
u32 num_mb)
|
||||
u32 num_mb, int *rc)
|
||||
{
|
||||
int i, rc[num_mb], err = 0;
|
||||
int i, err = 0;
|
||||
|
||||
/* Fire up a bunch of concurrent requests */
|
||||
for (i = 0; i < num_mb; i++) {
|
||||
@ -1148,18 +1175,26 @@ static int test_mb_acipher_jiffies(struct test_mb_skcipher_data *data, int enc,
|
||||
{
|
||||
unsigned long start, end;
|
||||
int bcount;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
int *rc;
|
||||
|
||||
rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
|
||||
if (!rc)
|
||||
return -ENOMEM;
|
||||
|
||||
for (start = jiffies, end = start + secs * HZ, bcount = 0;
|
||||
time_before(jiffies, end); bcount++) {
|
||||
ret = do_mult_acipher_op(data, enc, num_mb);
|
||||
ret = do_mult_acipher_op(data, enc, num_mb, rc);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pr_cont("%d operations in %d seconds (%ld bytes)\n",
|
||||
bcount * num_mb, secs, (long)bcount * blen * num_mb);
|
||||
return 0;
|
||||
|
||||
out:
|
||||
kfree(rc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int test_mb_acipher_cycles(struct test_mb_skcipher_data *data, int enc,
|
||||
@ -1168,10 +1203,15 @@ static int test_mb_acipher_cycles(struct test_mb_skcipher_data *data, int enc,
|
||||
unsigned long cycles = 0;
|
||||
int ret = 0;
|
||||
int i;
|
||||
int *rc;
|
||||
|
||||
rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
|
||||
if (!rc)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Warm-up run. */
|
||||
for (i = 0; i < 4; i++) {
|
||||
ret = do_mult_acipher_op(data, enc, num_mb);
|
||||
ret = do_mult_acipher_op(data, enc, num_mb, rc);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
@ -1181,7 +1221,7 @@ static int test_mb_acipher_cycles(struct test_mb_skcipher_data *data, int enc,
|
||||
cycles_t start, end;
|
||||
|
||||
start = get_cycles();
|
||||
ret = do_mult_acipher_op(data, enc, num_mb);
|
||||
ret = do_mult_acipher_op(data, enc, num_mb, rc);
|
||||
end = get_cycles();
|
||||
|
||||
if (ret)
|
||||
@ -1190,11 +1230,11 @@ static int test_mb_acipher_cycles(struct test_mb_skcipher_data *data, int enc,
|
||||
cycles += end - start;
|
||||
}
|
||||
|
||||
out:
|
||||
if (ret == 0)
|
||||
pr_cont("1 operation in %lu cycles (%d bytes)\n",
|
||||
(cycles + 4) / (8 * num_mb), blen);
|
||||
pr_cont("1 operation in %lu cycles (%d bytes)\n",
|
||||
(cycles + 4) / (8 * num_mb), blen);
|
||||
|
||||
out:
|
||||
kfree(rc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1606,7 +1646,7 @@ static inline int tcrypt_test(const char *alg)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int do_test(const char *alg, u32 type, u32 mask, int m)
|
||||
static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
{
|
||||
int i;
|
||||
int ret = 0;
|
||||
@ -1621,7 +1661,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
|
||||
}
|
||||
|
||||
for (i = 1; i < 200; i++)
|
||||
ret += do_test(NULL, 0, 0, i);
|
||||
ret += do_test(NULL, 0, 0, i, num_mb);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
@ -1902,10 +1942,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
|
||||
ret += tcrypt_test("vmac(aes)");
|
||||
break;
|
||||
|
||||
case 110:
|
||||
ret += tcrypt_test("hmac(crc32)");
|
||||
break;
|
||||
|
||||
case 111:
|
||||
ret += tcrypt_test("hmac(sha3-224)");
|
||||
break;
|
||||
@ -2903,7 +2939,7 @@ static int __init tcrypt_mod_init(void)
|
||||
goto err_free_tv;
|
||||
}
|
||||
|
||||
err = do_test(alg, type, mask, mode);
|
||||
err = do_test(alg, type, mask, mode, num_mb);
|
||||
|
||||
if (err) {
|
||||
printk(KERN_ERR "tcrypt: one or more tests failed!\n");
|
||||
|
613
crypto/testmgr.c
613
crypto/testmgr.c
File diff suppressed because it is too large
Load Diff
18635
crypto/testmgr.h
18635
crypto/testmgr.h
File diff suppressed because it is too large
Load Diff
265
crypto/zstd.c
Normal file
265
crypto/zstd.c
Normal file
@ -0,0 +1,265 @@
|
||||
/*
|
||||
* Cryptographic API.
|
||||
*
|
||||
* Copyright (c) 2017-present, Facebook, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/net.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/zstd.h>
|
||||
#include <crypto/internal/scompress.h>
|
||||
|
||||
|
||||
#define ZSTD_DEF_LEVEL 3
|
||||
|
||||
struct zstd_ctx {
|
||||
ZSTD_CCtx *cctx;
|
||||
ZSTD_DCtx *dctx;
|
||||
void *cwksp;
|
||||
void *dwksp;
|
||||
};
|
||||
|
||||
static ZSTD_parameters zstd_params(void)
|
||||
{
|
||||
return ZSTD_getParams(ZSTD_DEF_LEVEL, 0, 0);
|
||||
}
|
||||
|
||||
static int zstd_comp_init(struct zstd_ctx *ctx)
|
||||
{
|
||||
int ret = 0;
|
||||
const ZSTD_parameters params = zstd_params();
|
||||
const size_t wksp_size = ZSTD_CCtxWorkspaceBound(params.cParams);
|
||||
|
||||
ctx->cwksp = vzalloc(wksp_size);
|
||||
if (!ctx->cwksp) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ctx->cctx = ZSTD_initCCtx(ctx->cwksp, wksp_size);
|
||||
if (!ctx->cctx) {
|
||||
ret = -EINVAL;
|
||||
goto out_free;
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
out_free:
|
||||
vfree(ctx->cwksp);
|
||||
goto out;
|
||||
}
|
||||
|
||||
static int zstd_decomp_init(struct zstd_ctx *ctx)
|
||||
{
|
||||
int ret = 0;
|
||||
const size_t wksp_size = ZSTD_DCtxWorkspaceBound();
|
||||
|
||||
ctx->dwksp = vzalloc(wksp_size);
|
||||
if (!ctx->dwksp) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ctx->dctx = ZSTD_initDCtx(ctx->dwksp, wksp_size);
|
||||
if (!ctx->dctx) {
|
||||
ret = -EINVAL;
|
||||
goto out_free;
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
out_free:
|
||||
vfree(ctx->dwksp);
|
||||
goto out;
|
||||
}
|
||||
|
||||
static void zstd_comp_exit(struct zstd_ctx *ctx)
|
||||
{
|
||||
vfree(ctx->cwksp);
|
||||
ctx->cwksp = NULL;
|
||||
ctx->cctx = NULL;
|
||||
}
|
||||
|
||||
static void zstd_decomp_exit(struct zstd_ctx *ctx)
|
||||
{
|
||||
vfree(ctx->dwksp);
|
||||
ctx->dwksp = NULL;
|
||||
ctx->dctx = NULL;
|
||||
}
|
||||
|
||||
static int __zstd_init(void *ctx)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = zstd_comp_init(ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = zstd_decomp_init(ctx);
|
||||
if (ret)
|
||||
zstd_comp_exit(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void *zstd_alloc_ctx(struct crypto_scomp *tfm)
|
||||
{
|
||||
int ret;
|
||||
struct zstd_ctx *ctx;
|
||||
|
||||
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
|
||||
if (!ctx)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
ret = __zstd_init(ctx);
|
||||
if (ret) {
|
||||
kfree(ctx);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static int zstd_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
return __zstd_init(ctx);
|
||||
}
|
||||
|
||||
static void __zstd_exit(void *ctx)
|
||||
{
|
||||
zstd_comp_exit(ctx);
|
||||
zstd_decomp_exit(ctx);
|
||||
}
|
||||
|
||||
static void zstd_free_ctx(struct crypto_scomp *tfm, void *ctx)
|
||||
{
|
||||
__zstd_exit(ctx);
|
||||
kzfree(ctx);
|
||||
}
|
||||
|
||||
static void zstd_exit(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
__zstd_exit(ctx);
|
||||
}
|
||||
|
||||
static int __zstd_compress(const u8 *src, unsigned int slen,
|
||||
u8 *dst, unsigned int *dlen, void *ctx)
|
||||
{
|
||||
size_t out_len;
|
||||
struct zstd_ctx *zctx = ctx;
|
||||
const ZSTD_parameters params = zstd_params();
|
||||
|
||||
out_len = ZSTD_compressCCtx(zctx->cctx, dst, *dlen, src, slen, params);
|
||||
if (ZSTD_isError(out_len))
|
||||
return -EINVAL;
|
||||
*dlen = out_len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int zstd_compress(struct crypto_tfm *tfm, const u8 *src,
|
||||
unsigned int slen, u8 *dst, unsigned int *dlen)
|
||||
{
|
||||
struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
return __zstd_compress(src, slen, dst, dlen, ctx);
|
||||
}
|
||||
|
||||
static int zstd_scompress(struct crypto_scomp *tfm, const u8 *src,
|
||||
unsigned int slen, u8 *dst, unsigned int *dlen,
|
||||
void *ctx)
|
||||
{
|
||||
return __zstd_compress(src, slen, dst, dlen, ctx);
|
||||
}
|
||||
|
||||
static int __zstd_decompress(const u8 *src, unsigned int slen,
|
||||
u8 *dst, unsigned int *dlen, void *ctx)
|
||||
{
|
||||
size_t out_len;
|
||||
struct zstd_ctx *zctx = ctx;
|
||||
|
||||
out_len = ZSTD_decompressDCtx(zctx->dctx, dst, *dlen, src, slen);
|
||||
if (ZSTD_isError(out_len))
|
||||
return -EINVAL;
|
||||
*dlen = out_len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int zstd_decompress(struct crypto_tfm *tfm, const u8 *src,
|
||||
unsigned int slen, u8 *dst, unsigned int *dlen)
|
||||
{
|
||||
struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
return __zstd_decompress(src, slen, dst, dlen, ctx);
|
||||
}
|
||||
|
||||
static int zstd_sdecompress(struct crypto_scomp *tfm, const u8 *src,
|
||||
unsigned int slen, u8 *dst, unsigned int *dlen,
|
||||
void *ctx)
|
||||
{
|
||||
return __zstd_decompress(src, slen, dst, dlen, ctx);
|
||||
}
|
||||
|
||||
static struct crypto_alg alg = {
|
||||
.cra_name = "zstd",
|
||||
.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
|
||||
.cra_ctxsize = sizeof(struct zstd_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = zstd_init,
|
||||
.cra_exit = zstd_exit,
|
||||
.cra_u = { .compress = {
|
||||
.coa_compress = zstd_compress,
|
||||
.coa_decompress = zstd_decompress } }
|
||||
};
|
||||
|
||||
static struct scomp_alg scomp = {
|
||||
.alloc_ctx = zstd_alloc_ctx,
|
||||
.free_ctx = zstd_free_ctx,
|
||||
.compress = zstd_scompress,
|
||||
.decompress = zstd_sdecompress,
|
||||
.base = {
|
||||
.cra_name = "zstd",
|
||||
.cra_driver_name = "zstd-scomp",
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init zstd_mod_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = crypto_register_alg(&alg);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = crypto_register_scomp(&scomp);
|
||||
if (ret)
|
||||
crypto_unregister_alg(&alg);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit zstd_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&alg);
|
||||
crypto_unregister_scomp(&scomp);
|
||||
}
|
||||
|
||||
module_init(zstd_mod_init);
|
||||
module_exit(zstd_mod_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Zstd Compression Algorithm");
|
||||
MODULE_ALIAS_CRYPTO("zstd");
|
@ -347,6 +347,7 @@ config HW_RANDOM_STM32
|
||||
tristate "STMicroelectronics STM32 random number generator"
|
||||
depends on HW_RANDOM && (ARCH_STM32 || COMPILE_TEST)
|
||||
depends on HAS_IOMEM
|
||||
default HW_RANDOM
|
||||
help
|
||||
This driver provides kernel-side support for the Random Number
|
||||
Generator hardware found on STM32 microcontrollers.
|
||||
|
@ -435,7 +435,7 @@ static int n2rng_data_read(struct hwrng *rng, u32 *data)
|
||||
*data = np->test_data & 0xffffffff;
|
||||
len = 4;
|
||||
} else {
|
||||
dev_err(&np->op->dev, "RNG error, restesting\n");
|
||||
dev_err(&np->op->dev, "RNG error, retesting\n");
|
||||
np->flags &= ~N2RNG_FLAG_READY;
|
||||
if (!(np->flags & N2RNG_FLAG_SHUTDOWN))
|
||||
schedule_delayed_work(&np->work, 0);
|
||||
|
@ -187,8 +187,13 @@ static int stm32_rng_runtime_resume(struct device *dev)
|
||||
}
|
||||
#endif
|
||||
|
||||
static UNIVERSAL_DEV_PM_OPS(stm32_rng_pm_ops, stm32_rng_runtime_suspend,
|
||||
stm32_rng_runtime_resume, NULL);
|
||||
static const struct dev_pm_ops stm32_rng_pm_ops = {
|
||||
SET_RUNTIME_PM_OPS(stm32_rng_runtime_suspend,
|
||||
stm32_rng_runtime_resume, NULL)
|
||||
SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
|
||||
pm_runtime_force_resume)
|
||||
};
|
||||
|
||||
|
||||
static const struct of_device_id stm32_rng_match[] = {
|
||||
{
|
||||
|
@ -135,7 +135,7 @@ static int via_rng_init(struct hwrng *rng)
|
||||
* is always enabled if CPUID rng_en is set. There is no
|
||||
* RNG configuration like it used to be the case in this
|
||||
* register */
|
||||
if ((c->x86 == 6) && (c->x86_model >= 0x0f)) {
|
||||
if (((c->x86 == 6) && (c->x86_model >= 0x0f)) || (c->x86 > 6)){
|
||||
if (!boot_cpu_has(X86_FEATURE_XSTORE_EN)) {
|
||||
pr_err(PFX "can't enable hardware RNG "
|
||||
"if XSTORE is not enabled\n");
|
||||
|
@ -302,6 +302,7 @@ config CRYPTO_DEV_PPC4XX
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_AES
|
||||
select CRYPTO_CCM
|
||||
select CRYPTO_CTR
|
||||
select CRYPTO_GCM
|
||||
select CRYPTO_BLKCIPHER
|
||||
help
|
||||
@ -419,7 +420,7 @@ config CRYPTO_DEV_EXYNOS_RNG
|
||||
config CRYPTO_DEV_S5P
|
||||
tristate "Support for Samsung S5PV210/Exynos crypto accelerator"
|
||||
depends on ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST
|
||||
depends on HAS_IOMEM && HAS_DMA
|
||||
depends on HAS_IOMEM
|
||||
select CRYPTO_AES
|
||||
select CRYPTO_BLKCIPHER
|
||||
help
|
||||
@ -466,7 +467,6 @@ endif # if CRYPTO_DEV_UX500
|
||||
|
||||
config CRYPTO_DEV_ATMEL_AUTHENC
|
||||
tristate "Support for Atmel IPSEC/SSL hw accelerator"
|
||||
depends on HAS_DMA
|
||||
depends on ARCH_AT91 || COMPILE_TEST
|
||||
select CRYPTO_AUTHENC
|
||||
select CRYPTO_DEV_ATMEL_AES
|
||||
@ -479,7 +479,6 @@ config CRYPTO_DEV_ATMEL_AUTHENC
|
||||
|
||||
config CRYPTO_DEV_ATMEL_AES
|
||||
tristate "Support for Atmel AES hw accelerator"
|
||||
depends on HAS_DMA
|
||||
depends on ARCH_AT91 || COMPILE_TEST
|
||||
select CRYPTO_AES
|
||||
select CRYPTO_AEAD
|
||||
@ -494,7 +493,6 @@ config CRYPTO_DEV_ATMEL_AES
|
||||
|
||||
config CRYPTO_DEV_ATMEL_TDES
|
||||
tristate "Support for Atmel DES/TDES hw accelerator"
|
||||
depends on HAS_DMA
|
||||
depends on ARCH_AT91 || COMPILE_TEST
|
||||
select CRYPTO_DES
|
||||
select CRYPTO_BLKCIPHER
|
||||
@ -508,7 +506,6 @@ config CRYPTO_DEV_ATMEL_TDES
|
||||
|
||||
config CRYPTO_DEV_ATMEL_SHA
|
||||
tristate "Support for Atmel SHA hw accelerator"
|
||||
depends on HAS_DMA
|
||||
depends on ARCH_AT91 || COMPILE_TEST
|
||||
select CRYPTO_HASH
|
||||
help
|
||||
@ -574,7 +571,8 @@ config CRYPTO_DEV_CAVIUM_ZIP
|
||||
|
||||
config CRYPTO_DEV_QCE
|
||||
tristate "Qualcomm crypto engine accelerator"
|
||||
depends on (ARCH_QCOM || COMPILE_TEST) && HAS_DMA && HAS_IOMEM
|
||||
depends on ARCH_QCOM || COMPILE_TEST
|
||||
depends on HAS_IOMEM
|
||||
select CRYPTO_AES
|
||||
select CRYPTO_DES
|
||||
select CRYPTO_ECB
|
||||
@ -598,7 +596,6 @@ source "drivers/crypto/vmx/Kconfig"
|
||||
config CRYPTO_DEV_IMGTEC_HASH
|
||||
tristate "Imagination Technologies hardware hash accelerator"
|
||||
depends on MIPS || COMPILE_TEST
|
||||
depends on HAS_DMA
|
||||
select CRYPTO_MD5
|
||||
select CRYPTO_SHA1
|
||||
select CRYPTO_SHA256
|
||||
@ -650,7 +647,6 @@ config CRYPTO_DEV_ROCKCHIP
|
||||
|
||||
config CRYPTO_DEV_MEDIATEK
|
||||
tristate "MediaTek's EIP97 Cryptographic Engine driver"
|
||||
depends on HAS_DMA
|
||||
depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST
|
||||
select CRYPTO_AES
|
||||
select CRYPTO_AEAD
|
||||
@ -688,9 +684,10 @@ source "drivers/crypto/stm32/Kconfig"
|
||||
|
||||
config CRYPTO_DEV_SAFEXCEL
|
||||
tristate "Inside Secure's SafeXcel cryptographic engine driver"
|
||||
depends on HAS_DMA && OF
|
||||
depends on OF
|
||||
depends on (ARM64 && ARCH_MVEBU) || (COMPILE_TEST && 64BIT)
|
||||
select CRYPTO_AES
|
||||
select CRYPTO_AUTHENC
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_HMAC
|
||||
@ -706,7 +703,6 @@ config CRYPTO_DEV_SAFEXCEL
|
||||
config CRYPTO_DEV_ARTPEC6
|
||||
tristate "Support for Axis ARTPEC-6/7 hardware crypto acceleration."
|
||||
depends on ARM && (ARCH_ARTPEC || COMPILE_TEST)
|
||||
depends on HAS_DMA
|
||||
depends on OF
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_AES
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <crypto/gcm.h>
|
||||
#include <crypto/sha.h>
|
||||
#include <crypto/ctr.h>
|
||||
#include <crypto/skcipher.h>
|
||||
#include "crypto4xx_reg_def.h"
|
||||
#include "crypto4xx_core.h"
|
||||
#include "crypto4xx_sa.h"
|
||||
@ -74,51 +75,57 @@ static void set_dynamic_sa_command_1(struct dynamic_sa_ctl *sa, u32 cm,
|
||||
sa->sa_command_1.bf.copy_hdr = cp_hdr;
|
||||
}
|
||||
|
||||
int crypto4xx_encrypt(struct ablkcipher_request *req)
|
||||
static inline int crypto4xx_crypt(struct skcipher_request *req,
|
||||
const unsigned int ivlen, bool decrypt)
|
||||
{
|
||||
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
|
||||
unsigned int ivlen = crypto_ablkcipher_ivsize(
|
||||
crypto_ablkcipher_reqtfm(req));
|
||||
__le32 iv[ivlen];
|
||||
struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
|
||||
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
|
||||
__le32 iv[AES_IV_SIZE];
|
||||
|
||||
if (ivlen)
|
||||
crypto4xx_memcpy_to_le32(iv, req->info, ivlen);
|
||||
crypto4xx_memcpy_to_le32(iv, req->iv, ivlen);
|
||||
|
||||
return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
|
||||
req->nbytes, iv, ivlen, ctx->sa_out, ctx->sa_len, 0);
|
||||
req->cryptlen, iv, ivlen, decrypt ? ctx->sa_in : ctx->sa_out,
|
||||
ctx->sa_len, 0, NULL);
|
||||
}
|
||||
|
||||
int crypto4xx_decrypt(struct ablkcipher_request *req)
|
||||
int crypto4xx_encrypt_noiv(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
|
||||
unsigned int ivlen = crypto_ablkcipher_ivsize(
|
||||
crypto_ablkcipher_reqtfm(req));
|
||||
__le32 iv[ivlen];
|
||||
return crypto4xx_crypt(req, 0, false);
|
||||
}
|
||||
|
||||
if (ivlen)
|
||||
crypto4xx_memcpy_to_le32(iv, req->info, ivlen);
|
||||
int crypto4xx_encrypt_iv(struct skcipher_request *req)
|
||||
{
|
||||
return crypto4xx_crypt(req, AES_IV_SIZE, false);
|
||||
}
|
||||
|
||||
return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
|
||||
req->nbytes, iv, ivlen, ctx->sa_in, ctx->sa_len, 0);
|
||||
int crypto4xx_decrypt_noiv(struct skcipher_request *req)
|
||||
{
|
||||
return crypto4xx_crypt(req, 0, true);
|
||||
}
|
||||
|
||||
int crypto4xx_decrypt_iv(struct skcipher_request *req)
|
||||
{
|
||||
return crypto4xx_crypt(req, AES_IV_SIZE, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* AES Functions
|
||||
*/
|
||||
static int crypto4xx_setkey_aes(struct crypto_ablkcipher *cipher,
|
||||
static int crypto4xx_setkey_aes(struct crypto_skcipher *cipher,
|
||||
const u8 *key,
|
||||
unsigned int keylen,
|
||||
unsigned char cm,
|
||||
u8 fb)
|
||||
{
|
||||
struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
|
||||
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
|
||||
struct dynamic_sa_ctl *sa;
|
||||
int rc;
|
||||
|
||||
if (keylen != AES_KEYSIZE_256 &&
|
||||
keylen != AES_KEYSIZE_192 && keylen != AES_KEYSIZE_128) {
|
||||
crypto_ablkcipher_set_flags(cipher,
|
||||
crypto_skcipher_set_flags(cipher,
|
||||
CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -134,7 +141,8 @@ static int crypto4xx_setkey_aes(struct crypto_ablkcipher *cipher,
|
||||
/* Setup SA */
|
||||
sa = ctx->sa_in;
|
||||
|
||||
set_dynamic_sa_command_0(sa, SA_NOT_SAVE_HASH, SA_NOT_SAVE_IV,
|
||||
set_dynamic_sa_command_0(sa, SA_NOT_SAVE_HASH, (cm == CRYPTO_MODE_CBC ?
|
||||
SA_SAVE_IV : SA_NOT_SAVE_IV),
|
||||
SA_LOAD_HASH_FROM_SA, SA_LOAD_IV_FROM_STATE,
|
||||
SA_NO_HEADER_PROC, SA_HASH_ALG_NULL,
|
||||
SA_CIPHER_ALG_AES, SA_PAD_TYPE_ZERO,
|
||||
@ -158,39 +166,38 @@ static int crypto4xx_setkey_aes(struct crypto_ablkcipher *cipher,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int crypto4xx_setkey_aes_cbc(struct crypto_ablkcipher *cipher,
|
||||
int crypto4xx_setkey_aes_cbc(struct crypto_skcipher *cipher,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
return crypto4xx_setkey_aes(cipher, key, keylen, CRYPTO_MODE_CBC,
|
||||
CRYPTO_FEEDBACK_MODE_NO_FB);
|
||||
}
|
||||
|
||||
int crypto4xx_setkey_aes_cfb(struct crypto_ablkcipher *cipher,
|
||||
int crypto4xx_setkey_aes_cfb(struct crypto_skcipher *cipher,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
return crypto4xx_setkey_aes(cipher, key, keylen, CRYPTO_MODE_CFB,
|
||||
CRYPTO_FEEDBACK_MODE_128BIT_CFB);
|
||||
}
|
||||
|
||||
int crypto4xx_setkey_aes_ecb(struct crypto_ablkcipher *cipher,
|
||||
int crypto4xx_setkey_aes_ecb(struct crypto_skcipher *cipher,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
return crypto4xx_setkey_aes(cipher, key, keylen, CRYPTO_MODE_ECB,
|
||||
CRYPTO_FEEDBACK_MODE_NO_FB);
|
||||
}
|
||||
|
||||
int crypto4xx_setkey_aes_ofb(struct crypto_ablkcipher *cipher,
|
||||
int crypto4xx_setkey_aes_ofb(struct crypto_skcipher *cipher,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
return crypto4xx_setkey_aes(cipher, key, keylen, CRYPTO_MODE_OFB,
|
||||
CRYPTO_FEEDBACK_MODE_64BIT_OFB);
|
||||
}
|
||||
|
||||
int crypto4xx_setkey_rfc3686(struct crypto_ablkcipher *cipher,
|
||||
int crypto4xx_setkey_rfc3686(struct crypto_skcipher *cipher,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
|
||||
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
|
||||
int rc;
|
||||
|
||||
rc = crypto4xx_setkey_aes(cipher, key, keylen - CTR_RFC3686_NONCE_SIZE,
|
||||
@ -204,35 +211,117 @@ int crypto4xx_setkey_rfc3686(struct crypto_ablkcipher *cipher,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int crypto4xx_rfc3686_encrypt(struct ablkcipher_request *req)
|
||||
int crypto4xx_rfc3686_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
|
||||
struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
|
||||
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
|
||||
__le32 iv[AES_IV_SIZE / 4] = {
|
||||
ctx->iv_nonce,
|
||||
cpu_to_le32p((u32 *) req->info),
|
||||
cpu_to_le32p((u32 *) (req->info + 4)),
|
||||
cpu_to_le32p((u32 *) req->iv),
|
||||
cpu_to_le32p((u32 *) (req->iv + 4)),
|
||||
cpu_to_le32(1) };
|
||||
|
||||
return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
|
||||
req->nbytes, iv, AES_IV_SIZE,
|
||||
ctx->sa_out, ctx->sa_len, 0);
|
||||
req->cryptlen, iv, AES_IV_SIZE,
|
||||
ctx->sa_out, ctx->sa_len, 0, NULL);
|
||||
}
|
||||
|
||||
int crypto4xx_rfc3686_decrypt(struct ablkcipher_request *req)
|
||||
int crypto4xx_rfc3686_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
|
||||
struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
|
||||
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
|
||||
__le32 iv[AES_IV_SIZE / 4] = {
|
||||
ctx->iv_nonce,
|
||||
cpu_to_le32p((u32 *) req->info),
|
||||
cpu_to_le32p((u32 *) (req->info + 4)),
|
||||
cpu_to_le32p((u32 *) req->iv),
|
||||
cpu_to_le32p((u32 *) (req->iv + 4)),
|
||||
cpu_to_le32(1) };
|
||||
|
||||
return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
|
||||
req->nbytes, iv, AES_IV_SIZE,
|
||||
ctx->sa_out, ctx->sa_len, 0);
|
||||
req->cryptlen, iv, AES_IV_SIZE,
|
||||
ctx->sa_out, ctx->sa_len, 0, NULL);
|
||||
}
|
||||
|
||||
static int
|
||||
crypto4xx_ctr_crypt(struct skcipher_request *req, bool encrypt)
|
||||
{
|
||||
struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
|
||||
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
|
||||
size_t iv_len = crypto_skcipher_ivsize(cipher);
|
||||
unsigned int counter = be32_to_cpup((__be32 *)(req->iv + iv_len - 4));
|
||||
unsigned int nblks = ALIGN(req->cryptlen, AES_BLOCK_SIZE) /
|
||||
AES_BLOCK_SIZE;
|
||||
|
||||
/*
|
||||
* The hardware uses only the last 32-bits as the counter while the
|
||||
* kernel tests (aes_ctr_enc_tv_template[4] for example) expect that
|
||||
* the whole IV is a counter. So fallback if the counter is going to
|
||||
* overlow.
|
||||
*/
|
||||
if (counter + nblks < counter) {
|
||||
struct skcipher_request *subreq = skcipher_request_ctx(req);
|
||||
int ret;
|
||||
|
||||
skcipher_request_set_tfm(subreq, ctx->sw_cipher.cipher);
|
||||
skcipher_request_set_callback(subreq, req->base.flags,
|
||||
NULL, NULL);
|
||||
skcipher_request_set_crypt(subreq, req->src, req->dst,
|
||||
req->cryptlen, req->iv);
|
||||
ret = encrypt ? crypto_skcipher_encrypt(subreq)
|
||||
: crypto_skcipher_decrypt(subreq);
|
||||
skcipher_request_zero(subreq);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return encrypt ? crypto4xx_encrypt_iv(req)
|
||||
: crypto4xx_decrypt_iv(req);
|
||||
}
|
||||
|
||||
static int crypto4xx_sk_setup_fallback(struct crypto4xx_ctx *ctx,
|
||||
struct crypto_skcipher *cipher,
|
||||
const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
int rc;
|
||||
|
||||
crypto_skcipher_clear_flags(ctx->sw_cipher.cipher,
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
crypto_skcipher_set_flags(ctx->sw_cipher.cipher,
|
||||
crypto_skcipher_get_flags(cipher) & CRYPTO_TFM_REQ_MASK);
|
||||
rc = crypto_skcipher_setkey(ctx->sw_cipher.cipher, key, keylen);
|
||||
crypto_skcipher_clear_flags(cipher, CRYPTO_TFM_RES_MASK);
|
||||
crypto_skcipher_set_flags(cipher,
|
||||
crypto_skcipher_get_flags(ctx->sw_cipher.cipher) &
|
||||
CRYPTO_TFM_RES_MASK);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int crypto4xx_setkey_aes_ctr(struct crypto_skcipher *cipher,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
|
||||
int rc;
|
||||
|
||||
rc = crypto4xx_sk_setup_fallback(ctx, cipher, key, keylen);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
return crypto4xx_setkey_aes(cipher, key, keylen,
|
||||
CRYPTO_MODE_CTR, CRYPTO_FEEDBACK_MODE_NO_FB);
|
||||
}
|
||||
|
||||
int crypto4xx_encrypt_ctr(struct skcipher_request *req)
|
||||
{
|
||||
return crypto4xx_ctr_crypt(req, true);
|
||||
}
|
||||
|
||||
int crypto4xx_decrypt_ctr(struct skcipher_request *req)
|
||||
{
|
||||
return crypto4xx_ctr_crypt(req, false);
|
||||
}
|
||||
|
||||
static inline bool crypto4xx_aead_need_fallback(struct aead_request *req,
|
||||
unsigned int len,
|
||||
bool is_ccm, bool decrypt)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
@ -242,14 +331,14 @@ static inline bool crypto4xx_aead_need_fallback(struct aead_request *req,
|
||||
return true;
|
||||
|
||||
/*
|
||||
* hardware does not handle cases where cryptlen
|
||||
* is less than a block
|
||||
* hardware does not handle cases where plaintext
|
||||
* is less than a block.
|
||||
*/
|
||||
if (req->cryptlen < AES_BLOCK_SIZE)
|
||||
if (len < AES_BLOCK_SIZE)
|
||||
return true;
|
||||
|
||||
/* assoc len needs to be a multiple of 4 */
|
||||
if (req->assoclen & 0x3)
|
||||
/* assoc len needs to be a multiple of 4 and <= 1020 */
|
||||
if (req->assoclen & 0x3 || req->assoclen > 1020)
|
||||
return true;
|
||||
|
||||
/* CCM supports only counter field length of 2 and 4 bytes */
|
||||
@ -262,13 +351,7 @@ static inline bool crypto4xx_aead_need_fallback(struct aead_request *req,
|
||||
static int crypto4xx_aead_fallback(struct aead_request *req,
|
||||
struct crypto4xx_ctx *ctx, bool do_decrypt)
|
||||
{
|
||||
char aead_req_data[sizeof(struct aead_request) +
|
||||
crypto_aead_reqsize(ctx->sw_cipher.aead)]
|
||||
__aligned(__alignof__(struct aead_request));
|
||||
|
||||
struct aead_request *subreq = (void *) aead_req_data;
|
||||
|
||||
memset(subreq, 0, sizeof(aead_req_data));
|
||||
struct aead_request *subreq = aead_request_ctx(req);
|
||||
|
||||
aead_request_set_tfm(subreq, ctx->sw_cipher.aead);
|
||||
aead_request_set_callback(subreq, req->base.flags,
|
||||
@ -280,10 +363,10 @@ static int crypto4xx_aead_fallback(struct aead_request *req,
|
||||
crypto_aead_encrypt(subreq);
|
||||
}
|
||||
|
||||
static int crypto4xx_setup_fallback(struct crypto4xx_ctx *ctx,
|
||||
struct crypto_aead *cipher,
|
||||
const u8 *key,
|
||||
unsigned int keylen)
|
||||
static int crypto4xx_aead_setup_fallback(struct crypto4xx_ctx *ctx,
|
||||
struct crypto_aead *cipher,
|
||||
const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
int rc;
|
||||
|
||||
@ -311,7 +394,7 @@ int crypto4xx_setkey_aes_ccm(struct crypto_aead *cipher, const u8 *key,
|
||||
struct dynamic_sa_ctl *sa;
|
||||
int rc = 0;
|
||||
|
||||
rc = crypto4xx_setup_fallback(ctx, cipher, key, keylen);
|
||||
rc = crypto4xx_aead_setup_fallback(ctx, cipher, key, keylen);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
@ -366,19 +449,20 @@ int crypto4xx_setkey_aes_ccm(struct crypto_aead *cipher, const u8 *key,
|
||||
static int crypto4xx_crypt_aes_ccm(struct aead_request *req, bool decrypt)
|
||||
{
|
||||
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
|
||||
struct crypto4xx_aead_reqctx *rctx = aead_request_ctx(req);
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
unsigned int len = req->cryptlen;
|
||||
__le32 iv[16];
|
||||
u32 tmp_sa[ctx->sa_len * 4];
|
||||
u32 tmp_sa[SA_AES128_CCM_LEN + 4];
|
||||
struct dynamic_sa_ctl *sa = (struct dynamic_sa_ctl *)tmp_sa;
|
||||
|
||||
if (crypto4xx_aead_need_fallback(req, true, decrypt))
|
||||
return crypto4xx_aead_fallback(req, ctx, decrypt);
|
||||
unsigned int len = req->cryptlen;
|
||||
|
||||
if (decrypt)
|
||||
len -= crypto_aead_authsize(aead);
|
||||
|
||||
memcpy(tmp_sa, decrypt ? ctx->sa_in : ctx->sa_out, sizeof(tmp_sa));
|
||||
if (crypto4xx_aead_need_fallback(req, len, true, decrypt))
|
||||
return crypto4xx_aead_fallback(req, ctx, decrypt);
|
||||
|
||||
memcpy(tmp_sa, decrypt ? ctx->sa_in : ctx->sa_out, ctx->sa_len * 4);
|
||||
sa->sa_command_0.bf.digest_len = crypto_aead_authsize(aead) >> 2;
|
||||
|
||||
if (req->iv[0] == 1) {
|
||||
@ -391,7 +475,7 @@ static int crypto4xx_crypt_aes_ccm(struct aead_request *req, bool decrypt)
|
||||
|
||||
return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
|
||||
len, iv, sizeof(iv),
|
||||
sa, ctx->sa_len, req->assoclen);
|
||||
sa, ctx->sa_len, req->assoclen, rctx->dst);
|
||||
}
|
||||
|
||||
int crypto4xx_encrypt_aes_ccm(struct aead_request *req)
|
||||
@ -470,7 +554,7 @@ int crypto4xx_setkey_aes_gcm(struct crypto_aead *cipher,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rc = crypto4xx_setup_fallback(ctx, cipher, key, keylen);
|
||||
rc = crypto4xx_aead_setup_fallback(ctx, cipher, key, keylen);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
@ -523,22 +607,23 @@ static inline int crypto4xx_crypt_aes_gcm(struct aead_request *req,
|
||||
bool decrypt)
|
||||
{
|
||||
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
|
||||
unsigned int len = req->cryptlen;
|
||||
struct crypto4xx_aead_reqctx *rctx = aead_request_ctx(req);
|
||||
__le32 iv[4];
|
||||
unsigned int len = req->cryptlen;
|
||||
|
||||
if (crypto4xx_aead_need_fallback(req, false, decrypt))
|
||||
if (decrypt)
|
||||
len -= crypto_aead_authsize(crypto_aead_reqtfm(req));
|
||||
|
||||
if (crypto4xx_aead_need_fallback(req, len, false, decrypt))
|
||||
return crypto4xx_aead_fallback(req, ctx, decrypt);
|
||||
|
||||
crypto4xx_memcpy_to_le32(iv, req->iv, GCM_AES_IV_SIZE);
|
||||
iv[3] = cpu_to_le32(1);
|
||||
|
||||
if (decrypt)
|
||||
len -= crypto_aead_authsize(crypto_aead_reqtfm(req));
|
||||
|
||||
return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
|
||||
len, iv, sizeof(iv),
|
||||
decrypt ? ctx->sa_in : ctx->sa_out,
|
||||
ctx->sa_len, req->assoclen);
|
||||
ctx->sa_len, req->assoclen, rctx->dst);
|
||||
}
|
||||
|
||||
int crypto4xx_encrypt_aes_gcm(struct aead_request *req)
|
||||
@ -623,7 +708,7 @@ int crypto4xx_hash_update(struct ahash_request *req)
|
||||
|
||||
return crypto4xx_build_pd(&req->base, ctx, req->src, &dst,
|
||||
req->nbytes, NULL, 0, ctx->sa_in,
|
||||
ctx->sa_len, 0);
|
||||
ctx->sa_len, 0, NULL);
|
||||
}
|
||||
|
||||
int crypto4xx_hash_final(struct ahash_request *req)
|
||||
@ -642,7 +727,7 @@ int crypto4xx_hash_digest(struct ahash_request *req)
|
||||
|
||||
return crypto4xx_build_pd(&req->base, ctx, req->src, &dst,
|
||||
req->nbytes, NULL, 0, ctx->sa_in,
|
||||
ctx->sa_len, 0);
|
||||
ctx->sa_len, 0, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include <crypto/gcm.h>
|
||||
#include <crypto/sha.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <crypto/skcipher.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include "crypto4xx_reg_def.h"
|
||||
@ -526,31 +527,38 @@ static void crypto4xx_ret_sg_desc(struct crypto4xx_device *dev,
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto4xx_ablkcipher_done(struct crypto4xx_device *dev,
|
||||
static void crypto4xx_cipher_done(struct crypto4xx_device *dev,
|
||||
struct pd_uinfo *pd_uinfo,
|
||||
struct ce_pd *pd)
|
||||
{
|
||||
struct crypto4xx_ctx *ctx;
|
||||
struct ablkcipher_request *ablk_req;
|
||||
struct skcipher_request *req;
|
||||
struct scatterlist *dst;
|
||||
dma_addr_t addr;
|
||||
|
||||
ablk_req = ablkcipher_request_cast(pd_uinfo->async_req);
|
||||
ctx = crypto_tfm_ctx(ablk_req->base.tfm);
|
||||
req = skcipher_request_cast(pd_uinfo->async_req);
|
||||
|
||||
if (pd_uinfo->using_sd) {
|
||||
crypto4xx_copy_pkt_to_dst(dev, pd, pd_uinfo, ablk_req->nbytes,
|
||||
ablk_req->dst);
|
||||
crypto4xx_copy_pkt_to_dst(dev, pd, pd_uinfo,
|
||||
req->cryptlen, req->dst);
|
||||
} else {
|
||||
dst = pd_uinfo->dest_va;
|
||||
addr = dma_map_page(dev->core_dev->device, sg_page(dst),
|
||||
dst->offset, dst->length, DMA_FROM_DEVICE);
|
||||
}
|
||||
|
||||
if (pd_uinfo->sa_va->sa_command_0.bf.save_iv == SA_SAVE_IV) {
|
||||
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
|
||||
|
||||
crypto4xx_memcpy_from_le32((u32 *)req->iv,
|
||||
pd_uinfo->sr_va->save_iv,
|
||||
crypto_skcipher_ivsize(skcipher));
|
||||
}
|
||||
|
||||
crypto4xx_ret_sg_desc(dev, pd_uinfo);
|
||||
|
||||
if (pd_uinfo->state & PD_ENTRY_BUSY)
|
||||
ablkcipher_request_complete(ablk_req, -EINPROGRESS);
|
||||
ablkcipher_request_complete(ablk_req, 0);
|
||||
skcipher_request_complete(req, -EINPROGRESS);
|
||||
skcipher_request_complete(req, 0);
|
||||
}
|
||||
|
||||
static void crypto4xx_ahash_done(struct crypto4xx_device *dev,
|
||||
@ -580,7 +588,7 @@ static void crypto4xx_aead_done(struct crypto4xx_device *dev,
|
||||
struct scatterlist *dst = pd_uinfo->dest_va;
|
||||
size_t cp_len = crypto_aead_authsize(
|
||||
crypto_aead_reqtfm(aead_req));
|
||||
u32 icv[cp_len];
|
||||
u32 icv[AES_BLOCK_SIZE];
|
||||
int err = 0;
|
||||
|
||||
if (pd_uinfo->using_sd) {
|
||||
@ -595,7 +603,7 @@ static void crypto4xx_aead_done(struct crypto4xx_device *dev,
|
||||
if (pd_uinfo->sa_va->sa_command_0.bf.dir == DIR_OUTBOUND) {
|
||||
/* append icv at the end */
|
||||
crypto4xx_memcpy_from_le32(icv, pd_uinfo->sr_va->save_digest,
|
||||
cp_len);
|
||||
sizeof(icv));
|
||||
|
||||
scatterwalk_map_and_copy(icv, dst, aead_req->cryptlen,
|
||||
cp_len, 1);
|
||||
@ -605,7 +613,7 @@ static void crypto4xx_aead_done(struct crypto4xx_device *dev,
|
||||
aead_req->assoclen + aead_req->cryptlen -
|
||||
cp_len, cp_len, 0);
|
||||
|
||||
crypto4xx_memcpy_from_le32(icv, icv, cp_len);
|
||||
crypto4xx_memcpy_from_le32(icv, icv, sizeof(icv));
|
||||
|
||||
if (crypto_memneq(icv, pd_uinfo->sr_va->save_digest, cp_len))
|
||||
err = -EBADMSG;
|
||||
@ -641,8 +649,8 @@ static void crypto4xx_pd_done(struct crypto4xx_device *dev, u32 idx)
|
||||
struct pd_uinfo *pd_uinfo = &dev->pdr_uinfo[idx];
|
||||
|
||||
switch (crypto_tfm_alg_type(pd_uinfo->async_req->tfm)) {
|
||||
case CRYPTO_ALG_TYPE_ABLKCIPHER:
|
||||
crypto4xx_ablkcipher_done(dev, pd_uinfo, pd);
|
||||
case CRYPTO_ALG_TYPE_SKCIPHER:
|
||||
crypto4xx_cipher_done(dev, pd_uinfo, pd);
|
||||
break;
|
||||
case CRYPTO_ALG_TYPE_AEAD:
|
||||
crypto4xx_aead_done(dev, pd_uinfo, pd);
|
||||
@ -687,9 +695,9 @@ int crypto4xx_build_pd(struct crypto_async_request *req,
|
||||
const __le32 *iv, const u32 iv_len,
|
||||
const struct dynamic_sa_ctl *req_sa,
|
||||
const unsigned int sa_len,
|
||||
const unsigned int assoclen)
|
||||
const unsigned int assoclen,
|
||||
struct scatterlist *_dst)
|
||||
{
|
||||
struct scatterlist _dst[2];
|
||||
struct crypto4xx_device *dev = ctx->dev;
|
||||
struct dynamic_sa_ctl *sa;
|
||||
struct ce_gd *gd;
|
||||
@ -936,15 +944,27 @@ static void crypto4xx_ctx_init(struct crypto4xx_alg *amcc_alg,
|
||||
ctx->sa_len = 0;
|
||||
}
|
||||
|
||||
static int crypto4xx_ablk_init(struct crypto_tfm *tfm)
|
||||
static int crypto4xx_sk_init(struct crypto_skcipher *sk)
|
||||
{
|
||||
struct crypto_alg *alg = tfm->__crt_alg;
|
||||
struct skcipher_alg *alg = crypto_skcipher_alg(sk);
|
||||
struct crypto4xx_alg *amcc_alg;
|
||||
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(sk);
|
||||
|
||||
if (alg->base.cra_flags & CRYPTO_ALG_NEED_FALLBACK) {
|
||||
ctx->sw_cipher.cipher =
|
||||
crypto_alloc_skcipher(alg->base.cra_name, 0,
|
||||
CRYPTO_ALG_NEED_FALLBACK |
|
||||
CRYPTO_ALG_ASYNC);
|
||||
if (IS_ERR(ctx->sw_cipher.cipher))
|
||||
return PTR_ERR(ctx->sw_cipher.cipher);
|
||||
|
||||
crypto_skcipher_set_reqsize(sk,
|
||||
sizeof(struct skcipher_request) + 32 +
|
||||
crypto_skcipher_reqsize(ctx->sw_cipher.cipher));
|
||||
}
|
||||
|
||||
amcc_alg = container_of(alg, struct crypto4xx_alg, alg.u.cipher);
|
||||
crypto4xx_ctx_init(amcc_alg, ctx);
|
||||
tfm->crt_ablkcipher.reqsize = sizeof(struct crypto4xx_ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -953,9 +973,13 @@ static void crypto4xx_common_exit(struct crypto4xx_ctx *ctx)
|
||||
crypto4xx_free_sa(ctx);
|
||||
}
|
||||
|
||||
static void crypto4xx_ablk_exit(struct crypto_tfm *tfm)
|
||||
static void crypto4xx_sk_exit(struct crypto_skcipher *sk)
|
||||
{
|
||||
crypto4xx_common_exit(crypto_tfm_ctx(tfm));
|
||||
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(sk);
|
||||
|
||||
crypto4xx_common_exit(ctx);
|
||||
if (ctx->sw_cipher.cipher)
|
||||
crypto_free_skcipher(ctx->sw_cipher.cipher);
|
||||
}
|
||||
|
||||
static int crypto4xx_aead_init(struct crypto_aead *tfm)
|
||||
@ -972,9 +996,9 @@ static int crypto4xx_aead_init(struct crypto_aead *tfm)
|
||||
|
||||
amcc_alg = container_of(alg, struct crypto4xx_alg, alg.u.aead);
|
||||
crypto4xx_ctx_init(amcc_alg, ctx);
|
||||
crypto_aead_set_reqsize(tfm, sizeof(struct aead_request) +
|
||||
max(sizeof(struct crypto4xx_ctx), 32 +
|
||||
crypto_aead_reqsize(ctx->sw_cipher.aead)));
|
||||
crypto_aead_set_reqsize(tfm, max(sizeof(struct aead_request) + 32 +
|
||||
crypto_aead_reqsize(ctx->sw_cipher.aead),
|
||||
sizeof(struct crypto4xx_aead_reqctx)));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1012,7 +1036,7 @@ static int crypto4xx_register_alg(struct crypto4xx_device *sec_dev,
|
||||
break;
|
||||
|
||||
default:
|
||||
rc = crypto_register_alg(&alg->alg.u.cipher);
|
||||
rc = crypto_register_skcipher(&alg->alg.u.cipher);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1041,7 +1065,7 @@ static void crypto4xx_unregister_alg(struct crypto4xx_device *sec_dev)
|
||||
break;
|
||||
|
||||
default:
|
||||
crypto_unregister_alg(&alg->alg.u.cipher);
|
||||
crypto_unregister_skcipher(&alg->alg.u.cipher);
|
||||
}
|
||||
kfree(alg);
|
||||
}
|
||||
@ -1103,126 +1127,131 @@ static irqreturn_t crypto4xx_ce_interrupt_handler_revb(int irq, void *data)
|
||||
*/
|
||||
static struct crypto4xx_alg_common crypto4xx_alg[] = {
|
||||
/* Crypto AES modes */
|
||||
{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .u.cipher = {
|
||||
.cra_name = "cbc(aes)",
|
||||
.cra_driver_name = "cbc-aes-ppc4xx",
|
||||
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
|
||||
CRYPTO_ALG_ASYNC |
|
||||
CRYPTO_ALG_KERN_DRIVER_ONLY,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_init = crypto4xx_ablk_init,
|
||||
.cra_exit = crypto4xx_ablk_exit,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_IV_SIZE,
|
||||
.setkey = crypto4xx_setkey_aes_cbc,
|
||||
.encrypt = crypto4xx_encrypt,
|
||||
.decrypt = crypto4xx_decrypt,
|
||||
}
|
||||
}
|
||||
}},
|
||||
{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .u.cipher = {
|
||||
.cra_name = "cfb(aes)",
|
||||
.cra_driver_name = "cfb-aes-ppc4xx",
|
||||
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
|
||||
CRYPTO_ALG_ASYNC |
|
||||
CRYPTO_ALG_KERN_DRIVER_ONLY,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_init = crypto4xx_ablk_init,
|
||||
.cra_exit = crypto4xx_ablk_exit,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_IV_SIZE,
|
||||
.setkey = crypto4xx_setkey_aes_cfb,
|
||||
.encrypt = crypto4xx_encrypt,
|
||||
.decrypt = crypto4xx_decrypt,
|
||||
}
|
||||
}
|
||||
{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
|
||||
.base = {
|
||||
.cra_name = "cbc(aes)",
|
||||
.cra_driver_name = "cbc-aes-ppc4xx",
|
||||
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
|
||||
CRYPTO_ALG_ASYNC |
|
||||
CRYPTO_ALG_KERN_DRIVER_ONLY,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_IV_SIZE,
|
||||
.setkey = crypto4xx_setkey_aes_cbc,
|
||||
.encrypt = crypto4xx_encrypt_iv,
|
||||
.decrypt = crypto4xx_decrypt_iv,
|
||||
.init = crypto4xx_sk_init,
|
||||
.exit = crypto4xx_sk_exit,
|
||||
} },
|
||||
{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .u.cipher = {
|
||||
.cra_name = "rfc3686(ctr(aes))",
|
||||
.cra_driver_name = "rfc3686-ctr-aes-ppc4xx",
|
||||
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
|
||||
CRYPTO_ALG_ASYNC |
|
||||
CRYPTO_ALG_KERN_DRIVER_ONLY,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_init = crypto4xx_ablk_init,
|
||||
.cra_exit = crypto4xx_ablk_exit,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE +
|
||||
CTR_RFC3686_NONCE_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE +
|
||||
CTR_RFC3686_NONCE_SIZE,
|
||||
.ivsize = CTR_RFC3686_IV_SIZE,
|
||||
.setkey = crypto4xx_setkey_rfc3686,
|
||||
.encrypt = crypto4xx_rfc3686_encrypt,
|
||||
.decrypt = crypto4xx_rfc3686_decrypt,
|
||||
}
|
||||
}
|
||||
{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
|
||||
.base = {
|
||||
.cra_name = "cfb(aes)",
|
||||
.cra_driver_name = "cfb-aes-ppc4xx",
|
||||
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
|
||||
CRYPTO_ALG_ASYNC |
|
||||
CRYPTO_ALG_KERN_DRIVER_ONLY,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_IV_SIZE,
|
||||
.setkey = crypto4xx_setkey_aes_cfb,
|
||||
.encrypt = crypto4xx_encrypt_iv,
|
||||
.decrypt = crypto4xx_decrypt_iv,
|
||||
.init = crypto4xx_sk_init,
|
||||
.exit = crypto4xx_sk_exit,
|
||||
} },
|
||||
{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .u.cipher = {
|
||||
.cra_name = "ecb(aes)",
|
||||
.cra_driver_name = "ecb-aes-ppc4xx",
|
||||
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
|
||||
CRYPTO_ALG_ASYNC |
|
||||
CRYPTO_ALG_KERN_DRIVER_ONLY,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_init = crypto4xx_ablk_init,
|
||||
.cra_exit = crypto4xx_ablk_exit,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.setkey = crypto4xx_setkey_aes_ecb,
|
||||
.encrypt = crypto4xx_encrypt,
|
||||
.decrypt = crypto4xx_decrypt,
|
||||
}
|
||||
}
|
||||
{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
|
||||
.base = {
|
||||
.cra_name = "ctr(aes)",
|
||||
.cra_driver_name = "ctr-aes-ppc4xx",
|
||||
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
|
||||
CRYPTO_ALG_NEED_FALLBACK |
|
||||
CRYPTO_ALG_ASYNC |
|
||||
CRYPTO_ALG_KERN_DRIVER_ONLY,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_IV_SIZE,
|
||||
.setkey = crypto4xx_setkey_aes_ctr,
|
||||
.encrypt = crypto4xx_encrypt_ctr,
|
||||
.decrypt = crypto4xx_decrypt_ctr,
|
||||
.init = crypto4xx_sk_init,
|
||||
.exit = crypto4xx_sk_exit,
|
||||
} },
|
||||
{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .u.cipher = {
|
||||
.cra_name = "ofb(aes)",
|
||||
.cra_driver_name = "ofb-aes-ppc4xx",
|
||||
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
|
||||
CRYPTO_ALG_ASYNC |
|
||||
CRYPTO_ALG_KERN_DRIVER_ONLY,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_init = crypto4xx_ablk_init,
|
||||
.cra_exit = crypto4xx_ablk_exit,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.ablkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_IV_SIZE,
|
||||
.setkey = crypto4xx_setkey_aes_ofb,
|
||||
.encrypt = crypto4xx_encrypt,
|
||||
.decrypt = crypto4xx_decrypt,
|
||||
}
|
||||
}
|
||||
{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
|
||||
.base = {
|
||||
.cra_name = "rfc3686(ctr(aes))",
|
||||
.cra_driver_name = "rfc3686-ctr-aes-ppc4xx",
|
||||
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
|
||||
CRYPTO_ALG_ASYNC |
|
||||
CRYPTO_ALG_KERN_DRIVER_ONLY,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
|
||||
.ivsize = CTR_RFC3686_IV_SIZE,
|
||||
.setkey = crypto4xx_setkey_rfc3686,
|
||||
.encrypt = crypto4xx_rfc3686_encrypt,
|
||||
.decrypt = crypto4xx_rfc3686_decrypt,
|
||||
.init = crypto4xx_sk_init,
|
||||
.exit = crypto4xx_sk_exit,
|
||||
} },
|
||||
{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
|
||||
.base = {
|
||||
.cra_name = "ecb(aes)",
|
||||
.cra_driver_name = "ecb-aes-ppc4xx",
|
||||
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
|
||||
CRYPTO_ALG_ASYNC |
|
||||
CRYPTO_ALG_KERN_DRIVER_ONLY,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.setkey = crypto4xx_setkey_aes_ecb,
|
||||
.encrypt = crypto4xx_encrypt_noiv,
|
||||
.decrypt = crypto4xx_decrypt_noiv,
|
||||
.init = crypto4xx_sk_init,
|
||||
.exit = crypto4xx_sk_exit,
|
||||
} },
|
||||
{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
|
||||
.base = {
|
||||
.cra_name = "ofb(aes)",
|
||||
.cra_driver_name = "ofb-aes-ppc4xx",
|
||||
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
|
||||
CRYPTO_ALG_ASYNC |
|
||||
CRYPTO_ALG_KERN_DRIVER_ONLY,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_IV_SIZE,
|
||||
.setkey = crypto4xx_setkey_aes_ofb,
|
||||
.encrypt = crypto4xx_encrypt_iv,
|
||||
.decrypt = crypto4xx_decrypt_iv,
|
||||
.init = crypto4xx_sk_init,
|
||||
.exit = crypto4xx_sk_exit,
|
||||
} },
|
||||
|
||||
/* AEAD */
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <linux/ratelimit.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include "crypto4xx_reg_def.h"
|
||||
#include "crypto4xx_sa.h"
|
||||
|
||||
@ -127,14 +128,19 @@ struct crypto4xx_ctx {
|
||||
__le32 iv_nonce;
|
||||
u32 sa_len;
|
||||
union {
|
||||
struct crypto_skcipher *cipher;
|
||||
struct crypto_aead *aead;
|
||||
} sw_cipher;
|
||||
};
|
||||
|
||||
struct crypto4xx_aead_reqctx {
|
||||
struct scatterlist dst[2];
|
||||
};
|
||||
|
||||
struct crypto4xx_alg_common {
|
||||
u32 type;
|
||||
union {
|
||||
struct crypto_alg cipher;
|
||||
struct skcipher_alg cipher;
|
||||
struct ahash_alg hash;
|
||||
struct aead_alg aead;
|
||||
} u;
|
||||
@ -157,21 +163,28 @@ int crypto4xx_build_pd(struct crypto_async_request *req,
|
||||
const __le32 *iv, const u32 iv_len,
|
||||
const struct dynamic_sa_ctl *sa,
|
||||
const unsigned int sa_len,
|
||||
const unsigned int assoclen);
|
||||
int crypto4xx_setkey_aes_cbc(struct crypto_ablkcipher *cipher,
|
||||
const unsigned int assoclen,
|
||||
struct scatterlist *dst_tmp);
|
||||
int crypto4xx_setkey_aes_cbc(struct crypto_skcipher *cipher,
|
||||
const u8 *key, unsigned int keylen);
|
||||
int crypto4xx_setkey_aes_cfb(struct crypto_ablkcipher *cipher,
|
||||
int crypto4xx_setkey_aes_cfb(struct crypto_skcipher *cipher,
|
||||
const u8 *key, unsigned int keylen);
|
||||
int crypto4xx_setkey_aes_ecb(struct crypto_ablkcipher *cipher,
|
||||
int crypto4xx_setkey_aes_ctr(struct crypto_skcipher *cipher,
|
||||
const u8 *key, unsigned int keylen);
|
||||
int crypto4xx_setkey_aes_ofb(struct crypto_ablkcipher *cipher,
|
||||
int crypto4xx_setkey_aes_ecb(struct crypto_skcipher *cipher,
|
||||
const u8 *key, unsigned int keylen);
|
||||
int crypto4xx_setkey_rfc3686(struct crypto_ablkcipher *cipher,
|
||||
int crypto4xx_setkey_aes_ofb(struct crypto_skcipher *cipher,
|
||||
const u8 *key, unsigned int keylen);
|
||||
int crypto4xx_encrypt(struct ablkcipher_request *req);
|
||||
int crypto4xx_decrypt(struct ablkcipher_request *req);
|
||||
int crypto4xx_rfc3686_encrypt(struct ablkcipher_request *req);
|
||||
int crypto4xx_rfc3686_decrypt(struct ablkcipher_request *req);
|
||||
int crypto4xx_setkey_rfc3686(struct crypto_skcipher *cipher,
|
||||
const u8 *key, unsigned int keylen);
|
||||
int crypto4xx_encrypt_ctr(struct skcipher_request *req);
|
||||
int crypto4xx_decrypt_ctr(struct skcipher_request *req);
|
||||
int crypto4xx_encrypt_iv(struct skcipher_request *req);
|
||||
int crypto4xx_decrypt_iv(struct skcipher_request *req);
|
||||
int crypto4xx_encrypt_noiv(struct skcipher_request *req);
|
||||
int crypto4xx_decrypt_noiv(struct skcipher_request *req);
|
||||
int crypto4xx_rfc3686_encrypt(struct skcipher_request *req);
|
||||
int crypto4xx_rfc3686_decrypt(struct skcipher_request *req);
|
||||
int crypto4xx_sha1_alg_init(struct crypto_tfm *tfm);
|
||||
int crypto4xx_hash_digest(struct ahash_request *req);
|
||||
int crypto4xx_hash_final(struct ahash_request *req);
|
||||
|
@ -769,15 +769,18 @@ struct aead_edesc {
|
||||
* @src_nents: number of segments in input s/w scatterlist
|
||||
* @dst_nents: number of segments in output s/w scatterlist
|
||||
* @iv_dma: dma address of iv for checking continuity and link table
|
||||
* @iv_dir: DMA mapping direction for IV
|
||||
* @sec4_sg_bytes: length of dma mapped sec4_sg space
|
||||
* @sec4_sg_dma: bus physical mapped address of h/w link table
|
||||
* @sec4_sg: pointer to h/w link table
|
||||
* @hw_desc: the h/w job descriptor followed by any referenced link tables
|
||||
* and IV
|
||||
*/
|
||||
struct ablkcipher_edesc {
|
||||
int src_nents;
|
||||
int dst_nents;
|
||||
dma_addr_t iv_dma;
|
||||
enum dma_data_direction iv_dir;
|
||||
int sec4_sg_bytes;
|
||||
dma_addr_t sec4_sg_dma;
|
||||
struct sec4_sg_entry *sec4_sg;
|
||||
@ -787,7 +790,8 @@ struct ablkcipher_edesc {
|
||||
static void caam_unmap(struct device *dev, struct scatterlist *src,
|
||||
struct scatterlist *dst, int src_nents,
|
||||
int dst_nents,
|
||||
dma_addr_t iv_dma, int ivsize, dma_addr_t sec4_sg_dma,
|
||||
dma_addr_t iv_dma, int ivsize,
|
||||
enum dma_data_direction iv_dir, dma_addr_t sec4_sg_dma,
|
||||
int sec4_sg_bytes)
|
||||
{
|
||||
if (dst != src) {
|
||||
@ -799,7 +803,7 @@ static void caam_unmap(struct device *dev, struct scatterlist *src,
|
||||
}
|
||||
|
||||
if (iv_dma)
|
||||
dma_unmap_single(dev, iv_dma, ivsize, DMA_TO_DEVICE);
|
||||
dma_unmap_single(dev, iv_dma, ivsize, iv_dir);
|
||||
if (sec4_sg_bytes)
|
||||
dma_unmap_single(dev, sec4_sg_dma, sec4_sg_bytes,
|
||||
DMA_TO_DEVICE);
|
||||
@ -810,7 +814,7 @@ static void aead_unmap(struct device *dev,
|
||||
struct aead_request *req)
|
||||
{
|
||||
caam_unmap(dev, req->src, req->dst,
|
||||
edesc->src_nents, edesc->dst_nents, 0, 0,
|
||||
edesc->src_nents, edesc->dst_nents, 0, 0, DMA_NONE,
|
||||
edesc->sec4_sg_dma, edesc->sec4_sg_bytes);
|
||||
}
|
||||
|
||||
@ -823,7 +827,7 @@ static void ablkcipher_unmap(struct device *dev,
|
||||
|
||||
caam_unmap(dev, req->src, req->dst,
|
||||
edesc->src_nents, edesc->dst_nents,
|
||||
edesc->iv_dma, ivsize,
|
||||
edesc->iv_dma, ivsize, edesc->iv_dir,
|
||||
edesc->sec4_sg_dma, edesc->sec4_sg_bytes);
|
||||
}
|
||||
|
||||
@ -912,6 +916,18 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
|
||||
scatterwalk_map_and_copy(req->info, req->dst, req->nbytes - ivsize,
|
||||
ivsize, 0);
|
||||
|
||||
/* In case initial IV was generated, copy it in GIVCIPHER request */
|
||||
if (edesc->iv_dir == DMA_FROM_DEVICE) {
|
||||
u8 *iv;
|
||||
struct skcipher_givcrypt_request *greq;
|
||||
|
||||
greq = container_of(req, struct skcipher_givcrypt_request,
|
||||
creq);
|
||||
iv = (u8 *)edesc->hw_desc + desc_bytes(edesc->hw_desc) +
|
||||
edesc->sec4_sg_bytes;
|
||||
memcpy(greq->giv, iv, ivsize);
|
||||
}
|
||||
|
||||
kfree(edesc);
|
||||
|
||||
ablkcipher_request_complete(req, err);
|
||||
@ -922,10 +938,10 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
|
||||
{
|
||||
struct ablkcipher_request *req = context;
|
||||
struct ablkcipher_edesc *edesc;
|
||||
#ifdef DEBUG
|
||||
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
|
||||
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
|
||||
|
||||
#ifdef DEBUG
|
||||
dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
|
||||
#endif
|
||||
|
||||
@ -943,14 +959,6 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
|
||||
edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
|
||||
|
||||
ablkcipher_unmap(jrdev, edesc, req);
|
||||
|
||||
/*
|
||||
* The crypto API expects us to set the IV (req->info) to the last
|
||||
* ciphertext block.
|
||||
*/
|
||||
scatterwalk_map_and_copy(req->info, req->src, req->nbytes - ivsize,
|
||||
ivsize, 0);
|
||||
|
||||
kfree(edesc);
|
||||
|
||||
ablkcipher_request_complete(req, err);
|
||||
@ -1099,15 +1107,14 @@ static void init_authenc_job(struct aead_request *req,
|
||||
*/
|
||||
static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
|
||||
struct ablkcipher_edesc *edesc,
|
||||
struct ablkcipher_request *req,
|
||||
bool iv_contig)
|
||||
struct ablkcipher_request *req)
|
||||
{
|
||||
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
|
||||
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
|
||||
u32 *desc = edesc->hw_desc;
|
||||
u32 out_options = 0, in_options;
|
||||
dma_addr_t dst_dma, src_dma;
|
||||
int len, sec4_sg_index = 0;
|
||||
u32 out_options = 0;
|
||||
dma_addr_t dst_dma;
|
||||
int len;
|
||||
|
||||
#ifdef DEBUG
|
||||
print_hex_dump(KERN_ERR, "presciv@"__stringify(__LINE__)": ",
|
||||
@ -1123,30 +1130,18 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
|
||||
len = desc_len(sh_desc);
|
||||
init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE);
|
||||
|
||||
if (iv_contig) {
|
||||
src_dma = edesc->iv_dma;
|
||||
in_options = 0;
|
||||
} else {
|
||||
src_dma = edesc->sec4_sg_dma;
|
||||
sec4_sg_index += edesc->src_nents + 1;
|
||||
in_options = LDST_SGF;
|
||||
}
|
||||
append_seq_in_ptr(desc, src_dma, req->nbytes + ivsize, in_options);
|
||||
append_seq_in_ptr(desc, edesc->sec4_sg_dma, req->nbytes + ivsize,
|
||||
LDST_SGF);
|
||||
|
||||
if (likely(req->src == req->dst)) {
|
||||
if (edesc->src_nents == 1 && iv_contig) {
|
||||
dst_dma = sg_dma_address(req->src);
|
||||
} else {
|
||||
dst_dma = edesc->sec4_sg_dma +
|
||||
sizeof(struct sec4_sg_entry);
|
||||
out_options = LDST_SGF;
|
||||
}
|
||||
dst_dma = edesc->sec4_sg_dma + sizeof(struct sec4_sg_entry);
|
||||
out_options = LDST_SGF;
|
||||
} else {
|
||||
if (edesc->dst_nents == 1) {
|
||||
dst_dma = sg_dma_address(req->dst);
|
||||
} else {
|
||||
dst_dma = edesc->sec4_sg_dma +
|
||||
sec4_sg_index * sizeof(struct sec4_sg_entry);
|
||||
dst_dma = edesc->sec4_sg_dma + (edesc->src_nents + 1) *
|
||||
sizeof(struct sec4_sg_entry);
|
||||
out_options = LDST_SGF;
|
||||
}
|
||||
}
|
||||
@ -1158,13 +1153,12 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
|
||||
*/
|
||||
static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr,
|
||||
struct ablkcipher_edesc *edesc,
|
||||
struct ablkcipher_request *req,
|
||||
bool iv_contig)
|
||||
struct ablkcipher_request *req)
|
||||
{
|
||||
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
|
||||
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
|
||||
u32 *desc = edesc->hw_desc;
|
||||
u32 out_options, in_options;
|
||||
u32 in_options;
|
||||
dma_addr_t dst_dma, src_dma;
|
||||
int len, sec4_sg_index = 0;
|
||||
|
||||
@ -1190,15 +1184,9 @@ static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr,
|
||||
}
|
||||
append_seq_in_ptr(desc, src_dma, req->nbytes, in_options);
|
||||
|
||||
if (iv_contig) {
|
||||
dst_dma = edesc->iv_dma;
|
||||
out_options = 0;
|
||||
} else {
|
||||
dst_dma = edesc->sec4_sg_dma +
|
||||
sec4_sg_index * sizeof(struct sec4_sg_entry);
|
||||
out_options = LDST_SGF;
|
||||
}
|
||||
append_seq_out_ptr(desc, dst_dma, req->nbytes + ivsize, out_options);
|
||||
dst_dma = edesc->sec4_sg_dma + sec4_sg_index *
|
||||
sizeof(struct sec4_sg_entry);
|
||||
append_seq_out_ptr(desc, dst_dma, req->nbytes + ivsize, LDST_SGF);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1287,7 +1275,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
|
||||
GFP_DMA | flags);
|
||||
if (!edesc) {
|
||||
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
|
||||
0, 0, 0);
|
||||
0, DMA_NONE, 0, 0);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
@ -1491,8 +1479,7 @@ static int aead_decrypt(struct aead_request *req)
|
||||
* allocate and map the ablkcipher extended descriptor for ablkcipher
|
||||
*/
|
||||
static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
|
||||
*req, int desc_bytes,
|
||||
bool *iv_contig_out)
|
||||
*req, int desc_bytes)
|
||||
{
|
||||
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
|
||||
struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
|
||||
@ -1501,8 +1488,8 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
|
||||
GFP_KERNEL : GFP_ATOMIC;
|
||||
int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
|
||||
struct ablkcipher_edesc *edesc;
|
||||
dma_addr_t iv_dma = 0;
|
||||
bool in_contig;
|
||||
dma_addr_t iv_dma;
|
||||
u8 *iv;
|
||||
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
|
||||
int dst_sg_idx, sec4_sg_ents, sec4_sg_bytes;
|
||||
|
||||
@ -1546,33 +1533,20 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
|
||||
}
|
||||
}
|
||||
|
||||
iv_dma = dma_map_single(jrdev, req->info, ivsize, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(jrdev, iv_dma)) {
|
||||
dev_err(jrdev, "unable to map IV\n");
|
||||
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
|
||||
0, 0, 0);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
if (mapped_src_nents == 1 &&
|
||||
iv_dma + ivsize == sg_dma_address(req->src)) {
|
||||
in_contig = true;
|
||||
sec4_sg_ents = 0;
|
||||
} else {
|
||||
in_contig = false;
|
||||
sec4_sg_ents = 1 + mapped_src_nents;
|
||||
}
|
||||
sec4_sg_ents = 1 + mapped_src_nents;
|
||||
dst_sg_idx = sec4_sg_ents;
|
||||
sec4_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
|
||||
sec4_sg_bytes = sec4_sg_ents * sizeof(struct sec4_sg_entry);
|
||||
|
||||
/* allocate space for base edesc and hw desc commands, link tables */
|
||||
edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes,
|
||||
/*
|
||||
* allocate space for base edesc and hw desc commands, link tables, IV
|
||||
*/
|
||||
edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes + ivsize,
|
||||
GFP_DMA | flags);
|
||||
if (!edesc) {
|
||||
dev_err(jrdev, "could not allocate extended descriptor\n");
|
||||
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
|
||||
iv_dma, ivsize, 0, 0);
|
||||
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
|
||||
0, DMA_NONE, 0, 0);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
@ -1581,13 +1555,24 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
|
||||
edesc->sec4_sg_bytes = sec4_sg_bytes;
|
||||
edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) +
|
||||
desc_bytes;
|
||||
edesc->iv_dir = DMA_TO_DEVICE;
|
||||
|
||||
if (!in_contig) {
|
||||
dma_to_sec4_sg_one(edesc->sec4_sg, iv_dma, ivsize, 0);
|
||||
sg_to_sec4_sg_last(req->src, mapped_src_nents,
|
||||
edesc->sec4_sg + 1, 0);
|
||||
/* Make sure IV is located in a DMAable area */
|
||||
iv = (u8 *)edesc->hw_desc + desc_bytes + sec4_sg_bytes;
|
||||
memcpy(iv, req->info, ivsize);
|
||||
|
||||
iv_dma = dma_map_single(jrdev, iv, ivsize, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(jrdev, iv_dma)) {
|
||||
dev_err(jrdev, "unable to map IV\n");
|
||||
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
|
||||
0, DMA_NONE, 0, 0);
|
||||
kfree(edesc);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
dma_to_sec4_sg_one(edesc->sec4_sg, iv_dma, ivsize, 0);
|
||||
sg_to_sec4_sg_last(req->src, mapped_src_nents, edesc->sec4_sg + 1, 0);
|
||||
|
||||
if (mapped_dst_nents > 1) {
|
||||
sg_to_sec4_sg_last(req->dst, mapped_dst_nents,
|
||||
edesc->sec4_sg + dst_sg_idx, 0);
|
||||
@ -1598,7 +1583,7 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
|
||||
if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
|
||||
dev_err(jrdev, "unable to map S/G table\n");
|
||||
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
|
||||
iv_dma, ivsize, 0, 0);
|
||||
iv_dma, ivsize, DMA_TO_DEVICE, 0, 0);
|
||||
kfree(edesc);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
@ -1611,7 +1596,6 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
|
||||
sec4_sg_bytes, 1);
|
||||
#endif
|
||||
|
||||
*iv_contig_out = in_contig;
|
||||
return edesc;
|
||||
}
|
||||
|
||||
@ -1621,19 +1605,16 @@ static int ablkcipher_encrypt(struct ablkcipher_request *req)
|
||||
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
|
||||
struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
|
||||
struct device *jrdev = ctx->jrdev;
|
||||
bool iv_contig;
|
||||
u32 *desc;
|
||||
int ret = 0;
|
||||
|
||||
/* allocate extended descriptor */
|
||||
edesc = ablkcipher_edesc_alloc(req, DESC_JOB_IO_LEN *
|
||||
CAAM_CMD_SZ, &iv_contig);
|
||||
edesc = ablkcipher_edesc_alloc(req, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
|
||||
if (IS_ERR(edesc))
|
||||
return PTR_ERR(edesc);
|
||||
|
||||
/* Create and submit job descriptor*/
|
||||
init_ablkcipher_job(ctx->sh_desc_enc,
|
||||
ctx->sh_desc_enc_dma, edesc, req, iv_contig);
|
||||
init_ablkcipher_job(ctx->sh_desc_enc, ctx->sh_desc_enc_dma, edesc, req);
|
||||
#ifdef DEBUG
|
||||
print_hex_dump(KERN_ERR, "ablkcipher jobdesc@"__stringify(__LINE__)": ",
|
||||
DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
|
||||
@ -1657,20 +1638,25 @@ static int ablkcipher_decrypt(struct ablkcipher_request *req)
|
||||
struct ablkcipher_edesc *edesc;
|
||||
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
|
||||
struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
|
||||
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
|
||||
struct device *jrdev = ctx->jrdev;
|
||||
bool iv_contig;
|
||||
u32 *desc;
|
||||
int ret = 0;
|
||||
|
||||
/* allocate extended descriptor */
|
||||
edesc = ablkcipher_edesc_alloc(req, DESC_JOB_IO_LEN *
|
||||
CAAM_CMD_SZ, &iv_contig);
|
||||
edesc = ablkcipher_edesc_alloc(req, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
|
||||
if (IS_ERR(edesc))
|
||||
return PTR_ERR(edesc);
|
||||
|
||||
/*
|
||||
* The crypto API expects us to set the IV (req->info) to the last
|
||||
* ciphertext block.
|
||||
*/
|
||||
scatterwalk_map_and_copy(req->info, req->src, req->nbytes - ivsize,
|
||||
ivsize, 0);
|
||||
|
||||
/* Create and submit job descriptor*/
|
||||
init_ablkcipher_job(ctx->sh_desc_dec,
|
||||
ctx->sh_desc_dec_dma, edesc, req, iv_contig);
|
||||
init_ablkcipher_job(ctx->sh_desc_dec, ctx->sh_desc_dec_dma, edesc, req);
|
||||
desc = edesc->hw_desc;
|
||||
#ifdef DEBUG
|
||||
print_hex_dump(KERN_ERR, "ablkcipher jobdesc@"__stringify(__LINE__)": ",
|
||||
@ -1695,8 +1681,7 @@ static int ablkcipher_decrypt(struct ablkcipher_request *req)
|
||||
*/
|
||||
static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
|
||||
struct skcipher_givcrypt_request *greq,
|
||||
int desc_bytes,
|
||||
bool *iv_contig_out)
|
||||
int desc_bytes)
|
||||
{
|
||||
struct ablkcipher_request *req = &greq->creq;
|
||||
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
|
||||
@ -1706,8 +1691,8 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
|
||||
GFP_KERNEL : GFP_ATOMIC;
|
||||
int src_nents, mapped_src_nents, dst_nents, mapped_dst_nents;
|
||||
struct ablkcipher_edesc *edesc;
|
||||
dma_addr_t iv_dma = 0;
|
||||
bool out_contig;
|
||||
dma_addr_t iv_dma;
|
||||
u8 *iv;
|
||||
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
|
||||
int dst_sg_idx, sec4_sg_ents, sec4_sg_bytes;
|
||||
|
||||
@ -1752,36 +1737,20 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if iv can be contiguous with source and destination.
|
||||
* If so, include it. If not, create scatterlist.
|
||||
*/
|
||||
iv_dma = dma_map_single(jrdev, greq->giv, ivsize, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(jrdev, iv_dma)) {
|
||||
dev_err(jrdev, "unable to map IV\n");
|
||||
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
|
||||
0, 0, 0);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
sec4_sg_ents = mapped_src_nents > 1 ? mapped_src_nents : 0;
|
||||
dst_sg_idx = sec4_sg_ents;
|
||||
if (mapped_dst_nents == 1 &&
|
||||
iv_dma + ivsize == sg_dma_address(req->dst)) {
|
||||
out_contig = true;
|
||||
} else {
|
||||
out_contig = false;
|
||||
sec4_sg_ents += 1 + mapped_dst_nents;
|
||||
}
|
||||
sec4_sg_ents += 1 + mapped_dst_nents;
|
||||
|
||||
/* allocate space for base edesc and hw desc commands, link tables */
|
||||
/*
|
||||
* allocate space for base edesc and hw desc commands, link tables, IV
|
||||
*/
|
||||
sec4_sg_bytes = sec4_sg_ents * sizeof(struct sec4_sg_entry);
|
||||
edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes,
|
||||
edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes + ivsize,
|
||||
GFP_DMA | flags);
|
||||
if (!edesc) {
|
||||
dev_err(jrdev, "could not allocate extended descriptor\n");
|
||||
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
|
||||
iv_dma, ivsize, 0, 0);
|
||||
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
|
||||
0, DMA_NONE, 0, 0);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
@ -1790,24 +1759,33 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
|
||||
edesc->sec4_sg_bytes = sec4_sg_bytes;
|
||||
edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) +
|
||||
desc_bytes;
|
||||
edesc->iv_dir = DMA_FROM_DEVICE;
|
||||
|
||||
/* Make sure IV is located in a DMAable area */
|
||||
iv = (u8 *)edesc->hw_desc + desc_bytes + sec4_sg_bytes;
|
||||
iv_dma = dma_map_single(jrdev, iv, ivsize, DMA_FROM_DEVICE);
|
||||
if (dma_mapping_error(jrdev, iv_dma)) {
|
||||
dev_err(jrdev, "unable to map IV\n");
|
||||
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
|
||||
0, DMA_NONE, 0, 0);
|
||||
kfree(edesc);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
if (mapped_src_nents > 1)
|
||||
sg_to_sec4_sg_last(req->src, mapped_src_nents, edesc->sec4_sg,
|
||||
0);
|
||||
|
||||
if (!out_contig) {
|
||||
dma_to_sec4_sg_one(edesc->sec4_sg + dst_sg_idx,
|
||||
iv_dma, ivsize, 0);
|
||||
sg_to_sec4_sg_last(req->dst, mapped_dst_nents,
|
||||
edesc->sec4_sg + dst_sg_idx + 1, 0);
|
||||
}
|
||||
dma_to_sec4_sg_one(edesc->sec4_sg + dst_sg_idx, iv_dma, ivsize, 0);
|
||||
sg_to_sec4_sg_last(req->dst, mapped_dst_nents, edesc->sec4_sg +
|
||||
dst_sg_idx + 1, 0);
|
||||
|
||||
edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
|
||||
sec4_sg_bytes, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
|
||||
dev_err(jrdev, "unable to map S/G table\n");
|
||||
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
|
||||
iv_dma, ivsize, 0, 0);
|
||||
iv_dma, ivsize, DMA_FROM_DEVICE, 0, 0);
|
||||
kfree(edesc);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
@ -1820,7 +1798,6 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
|
||||
sec4_sg_bytes, 1);
|
||||
#endif
|
||||
|
||||
*iv_contig_out = out_contig;
|
||||
return edesc;
|
||||
}
|
||||
|
||||
@ -1831,19 +1808,17 @@ static int ablkcipher_givencrypt(struct skcipher_givcrypt_request *creq)
|
||||
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
|
||||
struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
|
||||
struct device *jrdev = ctx->jrdev;
|
||||
bool iv_contig = false;
|
||||
u32 *desc;
|
||||
int ret = 0;
|
||||
|
||||
/* allocate extended descriptor */
|
||||
edesc = ablkcipher_giv_edesc_alloc(creq, DESC_JOB_IO_LEN *
|
||||
CAAM_CMD_SZ, &iv_contig);
|
||||
edesc = ablkcipher_giv_edesc_alloc(creq, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
|
||||
if (IS_ERR(edesc))
|
||||
return PTR_ERR(edesc);
|
||||
|
||||
/* Create and submit job descriptor*/
|
||||
init_ablkcipher_giv_job(ctx->sh_desc_givenc, ctx->sh_desc_givenc_dma,
|
||||
edesc, req, iv_contig);
|
||||
edesc, req);
|
||||
#ifdef DEBUG
|
||||
print_hex_dump(KERN_ERR,
|
||||
"ablkcipher jobdesc@" __stringify(__LINE__) ": ",
|
||||
|
@ -1093,7 +1093,7 @@ void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
|
||||
read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 |
|
||||
(0x6 << MOVE_LEN_SHIFT));
|
||||
write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF |
|
||||
(0x8 << MOVE_LEN_SHIFT));
|
||||
(0x8 << MOVE_LEN_SHIFT) | MOVE_WAITCOMP);
|
||||
|
||||
/* Will read assoclen + cryptlen bytes */
|
||||
append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
|
||||
@ -1178,7 +1178,7 @@ void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
|
||||
read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 |
|
||||
(0x6 << MOVE_LEN_SHIFT));
|
||||
write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF |
|
||||
(0x8 << MOVE_LEN_SHIFT));
|
||||
(0x8 << MOVE_LEN_SHIFT) | MOVE_WAITCOMP);
|
||||
|
||||
/* Will read assoclen + cryptlen bytes */
|
||||
append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
|
||||
|
@ -728,7 +728,7 @@ badkey:
|
||||
* @assoclen: associated data length, in CAAM endianness
|
||||
* @assoclen_dma: bus physical mapped address of req->assoclen
|
||||
* @drv_req: driver-specific request structure
|
||||
* @sgt: the h/w link table
|
||||
* @sgt: the h/w link table, followed by IV
|
||||
*/
|
||||
struct aead_edesc {
|
||||
int src_nents;
|
||||
@ -739,9 +739,6 @@ struct aead_edesc {
|
||||
unsigned int assoclen;
|
||||
dma_addr_t assoclen_dma;
|
||||
struct caam_drv_req drv_req;
|
||||
#define CAAM_QI_MAX_AEAD_SG \
|
||||
((CAAM_QI_MEMCACHE_SIZE - offsetof(struct aead_edesc, sgt)) / \
|
||||
sizeof(struct qm_sg_entry))
|
||||
struct qm_sg_entry sgt[0];
|
||||
};
|
||||
|
||||
@ -753,7 +750,7 @@ struct aead_edesc {
|
||||
* @qm_sg_bytes: length of dma mapped h/w link table
|
||||
* @qm_sg_dma: bus physical mapped address of h/w link table
|
||||
* @drv_req: driver-specific request structure
|
||||
* @sgt: the h/w link table
|
||||
* @sgt: the h/w link table, followed by IV
|
||||
*/
|
||||
struct ablkcipher_edesc {
|
||||
int src_nents;
|
||||
@ -762,9 +759,6 @@ struct ablkcipher_edesc {
|
||||
int qm_sg_bytes;
|
||||
dma_addr_t qm_sg_dma;
|
||||
struct caam_drv_req drv_req;
|
||||
#define CAAM_QI_MAX_ABLKCIPHER_SG \
|
||||
((CAAM_QI_MEMCACHE_SIZE - offsetof(struct ablkcipher_edesc, sgt)) / \
|
||||
sizeof(struct qm_sg_entry))
|
||||
struct qm_sg_entry sgt[0];
|
||||
};
|
||||
|
||||
@ -986,17 +980,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
|
||||
}
|
||||
}
|
||||
|
||||
if ((alg->caam.rfc3686 && encrypt) || !alg->caam.geniv) {
|
||||
if ((alg->caam.rfc3686 && encrypt) || !alg->caam.geniv)
|
||||
ivsize = crypto_aead_ivsize(aead);
|
||||
iv_dma = dma_map_single(qidev, req->iv, ivsize, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(qidev, iv_dma)) {
|
||||
dev_err(qidev, "unable to map IV\n");
|
||||
caam_unmap(qidev, req->src, req->dst, src_nents,
|
||||
dst_nents, 0, 0, op_type, 0, 0);
|
||||
qi_cache_free(edesc);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Create S/G table: req->assoclen, [IV,] req->src [, req->dst].
|
||||
@ -1004,16 +989,33 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
|
||||
*/
|
||||
qm_sg_ents = 1 + !!ivsize + mapped_src_nents +
|
||||
(mapped_dst_nents > 1 ? mapped_dst_nents : 0);
|
||||
if (unlikely(qm_sg_ents > CAAM_QI_MAX_AEAD_SG)) {
|
||||
dev_err(qidev, "Insufficient S/G entries: %d > %zu\n",
|
||||
qm_sg_ents, CAAM_QI_MAX_AEAD_SG);
|
||||
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
|
||||
iv_dma, ivsize, op_type, 0, 0);
|
||||
sg_table = &edesc->sgt[0];
|
||||
qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
|
||||
if (unlikely(offsetof(struct aead_edesc, sgt) + qm_sg_bytes + ivsize >
|
||||
CAAM_QI_MEMCACHE_SIZE)) {
|
||||
dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n",
|
||||
qm_sg_ents, ivsize);
|
||||
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
|
||||
0, 0, 0, 0);
|
||||
qi_cache_free(edesc);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
sg_table = &edesc->sgt[0];
|
||||
qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
|
||||
|
||||
if (ivsize) {
|
||||
u8 *iv = (u8 *)(sg_table + qm_sg_ents);
|
||||
|
||||
/* Make sure IV is located in a DMAable area */
|
||||
memcpy(iv, req->iv, ivsize);
|
||||
|
||||
iv_dma = dma_map_single(qidev, iv, ivsize, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(qidev, iv_dma)) {
|
||||
dev_err(qidev, "unable to map IV\n");
|
||||
caam_unmap(qidev, req->src, req->dst, src_nents,
|
||||
dst_nents, 0, 0, 0, 0, 0);
|
||||
qi_cache_free(edesc);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
}
|
||||
|
||||
edesc->src_nents = src_nents;
|
||||
edesc->dst_nents = dst_nents;
|
||||
@ -1166,15 +1168,27 @@ static void ablkcipher_done(struct caam_drv_req *drv_req, u32 status)
|
||||
#endif
|
||||
|
||||
ablkcipher_unmap(qidev, edesc, req);
|
||||
qi_cache_free(edesc);
|
||||
|
||||
/* In case initial IV was generated, copy it in GIVCIPHER request */
|
||||
if (edesc->drv_req.drv_ctx->op_type == GIVENCRYPT) {
|
||||
u8 *iv;
|
||||
struct skcipher_givcrypt_request *greq;
|
||||
|
||||
greq = container_of(req, struct skcipher_givcrypt_request,
|
||||
creq);
|
||||
iv = (u8 *)edesc->sgt + edesc->qm_sg_bytes;
|
||||
memcpy(greq->giv, iv, ivsize);
|
||||
}
|
||||
|
||||
/*
|
||||
* The crypto API expects us to set the IV (req->info) to the last
|
||||
* ciphertext block. This is used e.g. by the CTS mode.
|
||||
*/
|
||||
scatterwalk_map_and_copy(req->info, req->dst, req->nbytes - ivsize,
|
||||
ivsize, 0);
|
||||
if (edesc->drv_req.drv_ctx->op_type != DECRYPT)
|
||||
scatterwalk_map_and_copy(req->info, req->dst, req->nbytes -
|
||||
ivsize, ivsize, 0);
|
||||
|
||||
qi_cache_free(edesc);
|
||||
ablkcipher_request_complete(req, status);
|
||||
}
|
||||
|
||||
@ -1189,9 +1203,9 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
|
||||
int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
|
||||
struct ablkcipher_edesc *edesc;
|
||||
dma_addr_t iv_dma;
|
||||
bool in_contig;
|
||||
u8 *iv;
|
||||
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
|
||||
int dst_sg_idx, qm_sg_ents;
|
||||
int dst_sg_idx, qm_sg_ents, qm_sg_bytes;
|
||||
struct qm_sg_entry *sg_table, *fd_sgt;
|
||||
struct caam_drv_ctx *drv_ctx;
|
||||
enum optype op_type = encrypt ? ENCRYPT : DECRYPT;
|
||||
@ -1238,55 +1252,53 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
|
||||
}
|
||||
}
|
||||
|
||||
iv_dma = dma_map_single(qidev, req->info, ivsize, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(qidev, iv_dma)) {
|
||||
dev_err(qidev, "unable to map IV\n");
|
||||
qm_sg_ents = 1 + mapped_src_nents;
|
||||
dst_sg_idx = qm_sg_ents;
|
||||
|
||||
qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
|
||||
qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry);
|
||||
if (unlikely(offsetof(struct ablkcipher_edesc, sgt) + qm_sg_bytes +
|
||||
ivsize > CAAM_QI_MEMCACHE_SIZE)) {
|
||||
dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n",
|
||||
qm_sg_ents, ivsize);
|
||||
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
|
||||
0, 0, 0, 0);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
if (mapped_src_nents == 1 &&
|
||||
iv_dma + ivsize == sg_dma_address(req->src)) {
|
||||
in_contig = true;
|
||||
qm_sg_ents = 0;
|
||||
} else {
|
||||
in_contig = false;
|
||||
qm_sg_ents = 1 + mapped_src_nents;
|
||||
}
|
||||
dst_sg_idx = qm_sg_ents;
|
||||
|
||||
qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
|
||||
if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) {
|
||||
dev_err(qidev, "Insufficient S/G entries: %d > %zu\n",
|
||||
qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG);
|
||||
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
|
||||
iv_dma, ivsize, op_type, 0, 0);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
/* allocate space for base edesc and link tables */
|
||||
/* allocate space for base edesc, link tables and IV */
|
||||
edesc = qi_cache_alloc(GFP_DMA | flags);
|
||||
if (unlikely(!edesc)) {
|
||||
dev_err(qidev, "could not allocate extended descriptor\n");
|
||||
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
|
||||
iv_dma, ivsize, op_type, 0, 0);
|
||||
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
|
||||
0, 0, 0, 0);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
/* Make sure IV is located in a DMAable area */
|
||||
sg_table = &edesc->sgt[0];
|
||||
iv = (u8 *)(sg_table + qm_sg_ents);
|
||||
memcpy(iv, req->info, ivsize);
|
||||
|
||||
iv_dma = dma_map_single(qidev, iv, ivsize, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(qidev, iv_dma)) {
|
||||
dev_err(qidev, "unable to map IV\n");
|
||||
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
|
||||
0, 0, 0, 0);
|
||||
qi_cache_free(edesc);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
edesc->src_nents = src_nents;
|
||||
edesc->dst_nents = dst_nents;
|
||||
edesc->iv_dma = iv_dma;
|
||||
sg_table = &edesc->sgt[0];
|
||||
edesc->qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
|
||||
edesc->qm_sg_bytes = qm_sg_bytes;
|
||||
edesc->drv_req.app_ctx = req;
|
||||
edesc->drv_req.cbk = ablkcipher_done;
|
||||
edesc->drv_req.drv_ctx = drv_ctx;
|
||||
|
||||
if (!in_contig) {
|
||||
dma_to_qm_sg_one(sg_table, iv_dma, ivsize, 0);
|
||||
sg_to_qm_sg_last(req->src, mapped_src_nents, sg_table + 1, 0);
|
||||
}
|
||||
dma_to_qm_sg_one(sg_table, iv_dma, ivsize, 0);
|
||||
sg_to_qm_sg_last(req->src, mapped_src_nents, sg_table + 1, 0);
|
||||
|
||||
if (mapped_dst_nents > 1)
|
||||
sg_to_qm_sg_last(req->dst, mapped_dst_nents, sg_table +
|
||||
@ -1304,20 +1316,12 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
|
||||
|
||||
fd_sgt = &edesc->drv_req.fd_sgt[0];
|
||||
|
||||
if (!in_contig)
|
||||
dma_to_qm_sg_one_last_ext(&fd_sgt[1], edesc->qm_sg_dma,
|
||||
ivsize + req->nbytes, 0);
|
||||
else
|
||||
dma_to_qm_sg_one_last(&fd_sgt[1], iv_dma, ivsize + req->nbytes,
|
||||
0);
|
||||
dma_to_qm_sg_one_last_ext(&fd_sgt[1], edesc->qm_sg_dma,
|
||||
ivsize + req->nbytes, 0);
|
||||
|
||||
if (req->src == req->dst) {
|
||||
if (!in_contig)
|
||||
dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma +
|
||||
sizeof(*sg_table), req->nbytes, 0);
|
||||
else
|
||||
dma_to_qm_sg_one(&fd_sgt[0], sg_dma_address(req->src),
|
||||
req->nbytes, 0);
|
||||
dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma +
|
||||
sizeof(*sg_table), req->nbytes, 0);
|
||||
} else if (mapped_dst_nents > 1) {
|
||||
dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma + dst_sg_idx *
|
||||
sizeof(*sg_table), req->nbytes, 0);
|
||||
@ -1341,10 +1345,10 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
|
||||
int src_nents, mapped_src_nents, dst_nents, mapped_dst_nents;
|
||||
struct ablkcipher_edesc *edesc;
|
||||
dma_addr_t iv_dma;
|
||||
bool out_contig;
|
||||
u8 *iv;
|
||||
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
|
||||
struct qm_sg_entry *sg_table, *fd_sgt;
|
||||
int dst_sg_idx, qm_sg_ents;
|
||||
int dst_sg_idx, qm_sg_ents, qm_sg_bytes;
|
||||
struct caam_drv_ctx *drv_ctx;
|
||||
|
||||
drv_ctx = get_drv_ctx(ctx, GIVENCRYPT);
|
||||
@ -1392,46 +1396,45 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
|
||||
mapped_dst_nents = src_nents;
|
||||
}
|
||||
|
||||
iv_dma = dma_map_single(qidev, creq->giv, ivsize, DMA_FROM_DEVICE);
|
||||
if (dma_mapping_error(qidev, iv_dma)) {
|
||||
dev_err(qidev, "unable to map IV\n");
|
||||
qm_sg_ents = mapped_src_nents > 1 ? mapped_src_nents : 0;
|
||||
dst_sg_idx = qm_sg_ents;
|
||||
|
||||
qm_sg_ents += 1 + mapped_dst_nents;
|
||||
qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry);
|
||||
if (unlikely(offsetof(struct ablkcipher_edesc, sgt) + qm_sg_bytes +
|
||||
ivsize > CAAM_QI_MEMCACHE_SIZE)) {
|
||||
dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n",
|
||||
qm_sg_ents, ivsize);
|
||||
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
|
||||
0, 0, 0, 0);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
qm_sg_ents = mapped_src_nents > 1 ? mapped_src_nents : 0;
|
||||
dst_sg_idx = qm_sg_ents;
|
||||
if (mapped_dst_nents == 1 &&
|
||||
iv_dma + ivsize == sg_dma_address(req->dst)) {
|
||||
out_contig = true;
|
||||
} else {
|
||||
out_contig = false;
|
||||
qm_sg_ents += 1 + mapped_dst_nents;
|
||||
}
|
||||
|
||||
if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) {
|
||||
dev_err(qidev, "Insufficient S/G entries: %d > %zu\n",
|
||||
qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG);
|
||||
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
|
||||
iv_dma, ivsize, GIVENCRYPT, 0, 0);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
/* allocate space for base edesc and link tables */
|
||||
/* allocate space for base edesc, link tables and IV */
|
||||
edesc = qi_cache_alloc(GFP_DMA | flags);
|
||||
if (!edesc) {
|
||||
dev_err(qidev, "could not allocate extended descriptor\n");
|
||||
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
|
||||
iv_dma, ivsize, GIVENCRYPT, 0, 0);
|
||||
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
|
||||
0, 0, 0, 0);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
/* Make sure IV is located in a DMAable area */
|
||||
sg_table = &edesc->sgt[0];
|
||||
iv = (u8 *)(sg_table + qm_sg_ents);
|
||||
iv_dma = dma_map_single(qidev, iv, ivsize, DMA_FROM_DEVICE);
|
||||
if (dma_mapping_error(qidev, iv_dma)) {
|
||||
dev_err(qidev, "unable to map IV\n");
|
||||
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
|
||||
0, 0, 0, 0);
|
||||
qi_cache_free(edesc);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
edesc->src_nents = src_nents;
|
||||
edesc->dst_nents = dst_nents;
|
||||
edesc->iv_dma = iv_dma;
|
||||
sg_table = &edesc->sgt[0];
|
||||
edesc->qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
|
||||
edesc->qm_sg_bytes = qm_sg_bytes;
|
||||
edesc->drv_req.app_ctx = req;
|
||||
edesc->drv_req.cbk = ablkcipher_done;
|
||||
edesc->drv_req.drv_ctx = drv_ctx;
|
||||
@ -1439,11 +1442,9 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
|
||||
if (mapped_src_nents > 1)
|
||||
sg_to_qm_sg_last(req->src, mapped_src_nents, sg_table, 0);
|
||||
|
||||
if (!out_contig) {
|
||||
dma_to_qm_sg_one(sg_table + dst_sg_idx, iv_dma, ivsize, 0);
|
||||
sg_to_qm_sg_last(req->dst, mapped_dst_nents, sg_table +
|
||||
dst_sg_idx + 1, 0);
|
||||
}
|
||||
dma_to_qm_sg_one(sg_table + dst_sg_idx, iv_dma, ivsize, 0);
|
||||
sg_to_qm_sg_last(req->dst, mapped_dst_nents, sg_table + dst_sg_idx + 1,
|
||||
0);
|
||||
|
||||
edesc->qm_sg_dma = dma_map_single(qidev, sg_table, edesc->qm_sg_bytes,
|
||||
DMA_TO_DEVICE);
|
||||
@ -1464,13 +1465,8 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
|
||||
dma_to_qm_sg_one(&fd_sgt[1], sg_dma_address(req->src),
|
||||
req->nbytes, 0);
|
||||
|
||||
if (!out_contig)
|
||||
dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma + dst_sg_idx *
|
||||
sizeof(*sg_table), ivsize + req->nbytes,
|
||||
0);
|
||||
else
|
||||
dma_to_qm_sg_one(&fd_sgt[0], sg_dma_address(req->dst),
|
||||
ivsize + req->nbytes, 0);
|
||||
dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma + dst_sg_idx *
|
||||
sizeof(*sg_table), ivsize + req->nbytes, 0);
|
||||
|
||||
return edesc;
|
||||
}
|
||||
@ -1480,6 +1476,7 @@ static inline int ablkcipher_crypt(struct ablkcipher_request *req, bool encrypt)
|
||||
struct ablkcipher_edesc *edesc;
|
||||
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
|
||||
struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
|
||||
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
|
||||
int ret;
|
||||
|
||||
if (unlikely(caam_congested))
|
||||
@ -1490,6 +1487,14 @@ static inline int ablkcipher_crypt(struct ablkcipher_request *req, bool encrypt)
|
||||
if (IS_ERR(edesc))
|
||||
return PTR_ERR(edesc);
|
||||
|
||||
/*
|
||||
* The crypto API expects us to set the IV (req->info) to the last
|
||||
* ciphertext block.
|
||||
*/
|
||||
if (!encrypt)
|
||||
scatterwalk_map_and_copy(req->info, req->src, req->nbytes -
|
||||
ivsize, ivsize, 0);
|
||||
|
||||
ret = caam_qi_enqueue(ctx->qidev, &edesc->drv_req);
|
||||
if (!ret) {
|
||||
ret = -EINPROGRESS;
|
||||
|
@ -66,7 +66,7 @@ static void rsa_priv_f2_unmap(struct device *dev, struct rsa_edesc *edesc,
|
||||
struct caam_rsa_key *key = &ctx->key;
|
||||
struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2;
|
||||
size_t p_sz = key->p_sz;
|
||||
size_t q_sz = key->p_sz;
|
||||
size_t q_sz = key->q_sz;
|
||||
|
||||
dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE);
|
||||
dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE);
|
||||
@ -83,7 +83,7 @@ static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc,
|
||||
struct caam_rsa_key *key = &ctx->key;
|
||||
struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3;
|
||||
size_t p_sz = key->p_sz;
|
||||
size_t q_sz = key->p_sz;
|
||||
size_t q_sz = key->q_sz;
|
||||
|
||||
dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE);
|
||||
dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE);
|
||||
@ -166,18 +166,71 @@ static void rsa_priv_f3_done(struct device *dev, u32 *desc, u32 err,
|
||||
akcipher_request_complete(req, err);
|
||||
}
|
||||
|
||||
static int caam_rsa_count_leading_zeros(struct scatterlist *sgl,
|
||||
unsigned int nbytes,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct sg_mapping_iter miter;
|
||||
int lzeros, ents;
|
||||
unsigned int len;
|
||||
unsigned int tbytes = nbytes;
|
||||
const u8 *buff;
|
||||
|
||||
ents = sg_nents_for_len(sgl, nbytes);
|
||||
if (ents < 0)
|
||||
return ents;
|
||||
|
||||
sg_miter_start(&miter, sgl, ents, SG_MITER_FROM_SG | flags);
|
||||
|
||||
lzeros = 0;
|
||||
len = 0;
|
||||
while (nbytes > 0) {
|
||||
while (len && !*buff) {
|
||||
lzeros++;
|
||||
len--;
|
||||
buff++;
|
||||
}
|
||||
|
||||
if (len && *buff)
|
||||
break;
|
||||
|
||||
sg_miter_next(&miter);
|
||||
buff = miter.addr;
|
||||
len = miter.length;
|
||||
|
||||
nbytes -= lzeros;
|
||||
lzeros = 0;
|
||||
}
|
||||
|
||||
miter.consumed = lzeros;
|
||||
sg_miter_stop(&miter);
|
||||
nbytes -= lzeros;
|
||||
|
||||
return tbytes - nbytes;
|
||||
}
|
||||
|
||||
static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
|
||||
size_t desclen)
|
||||
{
|
||||
struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
|
||||
struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
|
||||
struct device *dev = ctx->dev;
|
||||
struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
|
||||
struct rsa_edesc *edesc;
|
||||
gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
|
||||
GFP_KERNEL : GFP_ATOMIC;
|
||||
int sg_flags = (flags == GFP_ATOMIC) ? SG_MITER_ATOMIC : 0;
|
||||
int sgc;
|
||||
int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes;
|
||||
int src_nents, dst_nents;
|
||||
int lzeros;
|
||||
|
||||
lzeros = caam_rsa_count_leading_zeros(req->src, req->src_len, sg_flags);
|
||||
if (lzeros < 0)
|
||||
return ERR_PTR(lzeros);
|
||||
|
||||
req->src_len -= lzeros;
|
||||
req->src = scatterwalk_ffwd(req_ctx->src, req->src, lzeros);
|
||||
|
||||
src_nents = sg_nents_for_len(req->src, req->src_len);
|
||||
dst_nents = sg_nents_for_len(req->dst, req->dst_len);
|
||||
@ -344,7 +397,7 @@ static int set_rsa_priv_f2_pdb(struct akcipher_request *req,
|
||||
struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2;
|
||||
int sec4_sg_index = 0;
|
||||
size_t p_sz = key->p_sz;
|
||||
size_t q_sz = key->p_sz;
|
||||
size_t q_sz = key->q_sz;
|
||||
|
||||
pdb->d_dma = dma_map_single(dev, key->d, key->d_sz, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(dev, pdb->d_dma)) {
|
||||
@ -419,7 +472,7 @@ static int set_rsa_priv_f3_pdb(struct akcipher_request *req,
|
||||
struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3;
|
||||
int sec4_sg_index = 0;
|
||||
size_t p_sz = key->p_sz;
|
||||
size_t q_sz = key->p_sz;
|
||||
size_t q_sz = key->q_sz;
|
||||
|
||||
pdb->p_dma = dma_map_single(dev, key->p, p_sz, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(dev, pdb->p_dma)) {
|
||||
@ -730,19 +783,12 @@ static u8 *caam_read_rsa_crt(const u8 *ptr, size_t nbytes, size_t dstlen)
|
||||
*/
|
||||
static inline u8 *caam_read_raw_data(const u8 *buf, size_t *nbytes)
|
||||
{
|
||||
u8 *val;
|
||||
|
||||
caam_rsa_drop_leading_zeros(&buf, nbytes);
|
||||
if (!*nbytes)
|
||||
return NULL;
|
||||
|
||||
val = kzalloc(*nbytes, GFP_DMA | GFP_KERNEL);
|
||||
if (!val)
|
||||
return NULL;
|
||||
|
||||
memcpy(val, buf, *nbytes);
|
||||
|
||||
return val;
|
||||
return kmemdup(buf, *nbytes, GFP_DMA | GFP_KERNEL);
|
||||
}
|
||||
|
||||
static int caam_rsa_check_key_length(unsigned int len)
|
||||
@ -953,6 +999,7 @@ static struct akcipher_alg caam_rsa = {
|
||||
.max_size = caam_rsa_max_size,
|
||||
.init = caam_rsa_init_tfm,
|
||||
.exit = caam_rsa_exit_tfm,
|
||||
.reqsize = sizeof(struct caam_rsa_req_ctx),
|
||||
.base = {
|
||||
.cra_name = "rsa",
|
||||
.cra_driver_name = "rsa-caam",
|
||||
|
@ -95,6 +95,14 @@ struct caam_rsa_ctx {
|
||||
struct device *dev;
|
||||
};
|
||||
|
||||
/**
|
||||
* caam_rsa_req_ctx - per request context.
|
||||
* @src: input scatterlist (stripped of leading zeros)
|
||||
*/
|
||||
struct caam_rsa_req_ctx {
|
||||
struct scatterlist src[2];
|
||||
};
|
||||
|
||||
/**
|
||||
* rsa_edesc - s/w-extended rsa descriptor
|
||||
* @src_nents : number of segments in input scatterlist
|
||||
|
@ -322,9 +322,9 @@ static int caam_remove(struct platform_device *pdev)
|
||||
|
||||
/*
|
||||
* De-initialize RNG state handles initialized by this driver.
|
||||
* In case of DPAA 2.x, RNG is managed by MC firmware.
|
||||
* In case of SoCs with Management Complex, RNG is managed by MC f/w.
|
||||
*/
|
||||
if (!caam_dpaa2 && ctrlpriv->rng4_sh_init)
|
||||
if (!ctrlpriv->mc_en && ctrlpriv->rng4_sh_init)
|
||||
deinstantiate_rng(ctrldev, ctrlpriv->rng4_sh_init);
|
||||
|
||||
/* Shut down debug views */
|
||||
@ -396,11 +396,56 @@ start_rng:
|
||||
clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, RTMCTL_SAMP_MODE_RAW_ES_SC);
|
||||
}
|
||||
|
||||
static int caam_get_era_from_hw(struct caam_ctrl __iomem *ctrl)
|
||||
{
|
||||
static const struct {
|
||||
u16 ip_id;
|
||||
u8 maj_rev;
|
||||
u8 era;
|
||||
} id[] = {
|
||||
{0x0A10, 1, 1},
|
||||
{0x0A10, 2, 2},
|
||||
{0x0A12, 1, 3},
|
||||
{0x0A14, 1, 3},
|
||||
{0x0A14, 2, 4},
|
||||
{0x0A16, 1, 4},
|
||||
{0x0A10, 3, 4},
|
||||
{0x0A11, 1, 4},
|
||||
{0x0A18, 1, 4},
|
||||
{0x0A11, 2, 5},
|
||||
{0x0A12, 2, 5},
|
||||
{0x0A13, 1, 5},
|
||||
{0x0A1C, 1, 5}
|
||||
};
|
||||
u32 ccbvid, id_ms;
|
||||
u8 maj_rev, era;
|
||||
u16 ip_id;
|
||||
int i;
|
||||
|
||||
ccbvid = rd_reg32(&ctrl->perfmon.ccb_id);
|
||||
era = (ccbvid & CCBVID_ERA_MASK) >> CCBVID_ERA_SHIFT;
|
||||
if (era) /* This is '0' prior to CAAM ERA-6 */
|
||||
return era;
|
||||
|
||||
id_ms = rd_reg32(&ctrl->perfmon.caam_id_ms);
|
||||
ip_id = (id_ms & SECVID_MS_IPID_MASK) >> SECVID_MS_IPID_SHIFT;
|
||||
maj_rev = (id_ms & SECVID_MS_MAJ_REV_MASK) >> SECVID_MS_MAJ_REV_SHIFT;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(id); i++)
|
||||
if (id[i].ip_id == ip_id && id[i].maj_rev == maj_rev)
|
||||
return id[i].era;
|
||||
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
/**
|
||||
* caam_get_era() - Return the ERA of the SEC on SoC, based
|
||||
* on "sec-era" propery in the DTS. This property is updated by u-boot.
|
||||
* on "sec-era" optional property in the DTS. This property is updated
|
||||
* by u-boot.
|
||||
* In case this property is not passed an attempt to retrieve the CAAM
|
||||
* era via register reads will be made.
|
||||
**/
|
||||
int caam_get_era(void)
|
||||
static int caam_get_era(struct caam_ctrl __iomem *ctrl)
|
||||
{
|
||||
struct device_node *caam_node;
|
||||
int ret;
|
||||
@ -410,9 +455,11 @@ int caam_get_era(void)
|
||||
ret = of_property_read_u32(caam_node, "fsl,sec-era", &prop);
|
||||
of_node_put(caam_node);
|
||||
|
||||
return ret ? -ENOTSUPP : prop;
|
||||
if (!ret)
|
||||
return prop;
|
||||
else
|
||||
return caam_get_era_from_hw(ctrl);
|
||||
}
|
||||
EXPORT_SYMBOL(caam_get_era);
|
||||
|
||||
static const struct of_device_id caam_match[] = {
|
||||
{
|
||||
@ -571,11 +618,15 @@ static int caam_probe(struct platform_device *pdev)
|
||||
/*
|
||||
* Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel,
|
||||
* long pointers in master configuration register.
|
||||
* In case of DPAA 2.x, Management Complex firmware performs
|
||||
* In case of SoCs with Management Complex, MC f/w performs
|
||||
* the configuration.
|
||||
*/
|
||||
caam_dpaa2 = !!(comp_params & CTPR_MS_DPAA2);
|
||||
if (!caam_dpaa2)
|
||||
np = of_find_compatible_node(NULL, NULL, "fsl,qoriq-mc");
|
||||
ctrlpriv->mc_en = !!np;
|
||||
of_node_put(np);
|
||||
|
||||
if (!ctrlpriv->mc_en)
|
||||
clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK | MCFGR_LONG_PTR,
|
||||
MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF |
|
||||
MCFGR_WDENABLE | MCFGR_LARGE_BURST |
|
||||
@ -623,7 +674,7 @@ static int caam_probe(struct platform_device *pdev)
|
||||
goto iounmap_ctrl;
|
||||
}
|
||||
|
||||
ctrlpriv->era = caam_get_era();
|
||||
ctrlpriv->era = caam_get_era(ctrl);
|
||||
|
||||
ret = of_platform_populate(nprop, caam_match, NULL, dev);
|
||||
if (ret) {
|
||||
@ -686,9 +737,9 @@ static int caam_probe(struct platform_device *pdev)
|
||||
/*
|
||||
* If SEC has RNG version >= 4 and RNG state handle has not been
|
||||
* already instantiated, do RNG instantiation
|
||||
* In case of DPAA 2.x, RNG is managed by MC firmware.
|
||||
* In case of SoCs with Management Complex, RNG is managed by MC f/w.
|
||||
*/
|
||||
if (!caam_dpaa2 &&
|
||||
if (!ctrlpriv->mc_en &&
|
||||
(cha_vid_ls & CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT >= 4) {
|
||||
ctrlpriv->rng4_sh_init =
|
||||
rd_reg32(&ctrl->r4tst[0].rdsta);
|
||||
@ -757,9 +808,8 @@ static int caam_probe(struct platform_device *pdev)
|
||||
/* Report "alive" for developer to see */
|
||||
dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id,
|
||||
ctrlpriv->era);
|
||||
dev_info(dev, "job rings = %d, qi = %d, dpaa2 = %s\n",
|
||||
ctrlpriv->total_jobrs, ctrlpriv->qi_present,
|
||||
caam_dpaa2 ? "yes" : "no");
|
||||
dev_info(dev, "job rings = %d, qi = %d\n",
|
||||
ctrlpriv->total_jobrs, ctrlpriv->qi_present);
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
debugfs_create_file("rq_dequeued", S_IRUSR | S_IRGRP | S_IROTH,
|
||||
|
@ -9,8 +9,6 @@
|
||||
#define CTRL_H
|
||||
|
||||
/* Prototypes for backend-level services exposed to APIs */
|
||||
int caam_get_era(void);
|
||||
|
||||
extern bool caam_dpaa2;
|
||||
|
||||
#endif /* CTRL_H */
|
||||
|
@ -82,6 +82,7 @@ struct caam_drv_private {
|
||||
*/
|
||||
u8 total_jobrs; /* Total Job Rings in device */
|
||||
u8 qi_present; /* Nonzero if QI present in device */
|
||||
u8 mc_en; /* Nonzero if MC f/w is active */
|
||||
int secvio_irq; /* Security violation interrupt number */
|
||||
int virt_en; /* Virtualization enabled in CAAM */
|
||||
int era; /* CAAM Era (internal HW revision) */
|
||||
|
@ -657,9 +657,8 @@ static int init_cgr(struct device *qidev)
|
||||
{
|
||||
int ret;
|
||||
struct qm_mcc_initcgr opts;
|
||||
const u64 cpus = *(u64 *)qman_affine_cpus();
|
||||
const int num_cpus = hweight64(cpus);
|
||||
const u64 val = num_cpus * MAX_RSP_FQ_BACKLOG_PER_CPU;
|
||||
const u64 val = (u64)cpumask_weight(qman_affine_cpus()) *
|
||||
MAX_RSP_FQ_BACKLOG_PER_CPU;
|
||||
|
||||
ret = qman_alloc_cgrid(&qipriv.cgr.cgrid);
|
||||
if (ret) {
|
||||
|
@ -312,11 +312,17 @@ struct caam_perfmon {
|
||||
|
||||
/* Component Instantiation Parameters fe0-fff */
|
||||
u32 rtic_id; /* RVID - RTIC Version ID */
|
||||
#define CCBVID_ERA_MASK 0xff000000
|
||||
#define CCBVID_ERA_SHIFT 24
|
||||
u32 ccb_id; /* CCBVID - CCB Version ID */
|
||||
u32 cha_id_ms; /* CHAVID - CHA Version ID Most Significant*/
|
||||
u32 cha_id_ls; /* CHAVID - CHA Version ID Least Significant*/
|
||||
u32 cha_num_ms; /* CHANUM - CHA Number Most Significant */
|
||||
u32 cha_num_ls; /* CHANUM - CHA Number Least Significant*/
|
||||
#define SECVID_MS_IPID_MASK 0xffff0000
|
||||
#define SECVID_MS_IPID_SHIFT 16
|
||||
#define SECVID_MS_MAJ_REV_MASK 0x0000ff00
|
||||
#define SECVID_MS_MAJ_REV_SHIFT 8
|
||||
u32 caam_id_ms; /* CAAMVID - CAAM Version ID MS */
|
||||
u32 caam_id_ls; /* CAAMVID - CAAM Version ID LS */
|
||||
};
|
||||
|
@ -46,8 +46,10 @@
|
||||
#ifndef __COMMON_H__
|
||||
#define __COMMON_H__
|
||||
|
||||
#include <linux/delay.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
@ -149,6 +151,25 @@ struct zip_operation {
|
||||
u32 sizeofzops;
|
||||
};
|
||||
|
||||
static inline int zip_poll_result(union zip_zres_s *result)
|
||||
{
|
||||
int retries = 1000;
|
||||
|
||||
while (!result->s.compcode) {
|
||||
if (!--retries) {
|
||||
pr_err("ZIP ERR: request timed out");
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
udelay(10);
|
||||
/*
|
||||
* Force re-reading of compcode which is updated
|
||||
* by the ZIP coprocessor.
|
||||
*/
|
||||
rmb();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* error messages */
|
||||
#define zip_err(fmt, args...) pr_err("ZIP ERR:%s():%d: " \
|
||||
fmt "\n", __func__, __LINE__, ## args)
|
||||
|
@ -124,7 +124,7 @@ int zip_compress(const u8 *src, unsigned int slen,
|
||||
struct zip_kernel_ctx *zip_ctx)
|
||||
{
|
||||
struct zip_operation *zip_ops = NULL;
|
||||
struct zip_state zip_state;
|
||||
struct zip_state *zip_state;
|
||||
struct zip_device *zip = NULL;
|
||||
int ret;
|
||||
|
||||
@ -135,20 +135,23 @@ int zip_compress(const u8 *src, unsigned int slen,
|
||||
if (!zip)
|
||||
return -ENODEV;
|
||||
|
||||
memset(&zip_state, 0, sizeof(struct zip_state));
|
||||
zip_state = kzalloc(sizeof(*zip_state), GFP_ATOMIC);
|
||||
if (!zip_state)
|
||||
return -ENOMEM;
|
||||
|
||||
zip_ops = &zip_ctx->zip_comp;
|
||||
|
||||
zip_ops->input_len = slen;
|
||||
zip_ops->output_len = *dlen;
|
||||
memcpy(zip_ops->input, src, slen);
|
||||
|
||||
ret = zip_deflate(zip_ops, &zip_state, zip);
|
||||
ret = zip_deflate(zip_ops, zip_state, zip);
|
||||
|
||||
if (!ret) {
|
||||
*dlen = zip_ops->output_len;
|
||||
memcpy(dst, zip_ops->output, *dlen);
|
||||
}
|
||||
|
||||
kfree(zip_state);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -157,7 +160,7 @@ int zip_decompress(const u8 *src, unsigned int slen,
|
||||
struct zip_kernel_ctx *zip_ctx)
|
||||
{
|
||||
struct zip_operation *zip_ops = NULL;
|
||||
struct zip_state zip_state;
|
||||
struct zip_state *zip_state;
|
||||
struct zip_device *zip = NULL;
|
||||
int ret;
|
||||
|
||||
@ -168,7 +171,10 @@ int zip_decompress(const u8 *src, unsigned int slen,
|
||||
if (!zip)
|
||||
return -ENODEV;
|
||||
|
||||
memset(&zip_state, 0, sizeof(struct zip_state));
|
||||
zip_state = kzalloc(sizeof(*zip_state), GFP_ATOMIC);
|
||||
if (!zip_state)
|
||||
return -ENOMEM;
|
||||
|
||||
zip_ops = &zip_ctx->zip_decomp;
|
||||
memcpy(zip_ops->input, src, slen);
|
||||
|
||||
@ -179,13 +185,13 @@ int zip_decompress(const u8 *src, unsigned int slen,
|
||||
zip_ops->input_len = slen;
|
||||
zip_ops->output_len = *dlen;
|
||||
|
||||
ret = zip_inflate(zip_ops, &zip_state, zip);
|
||||
ret = zip_inflate(zip_ops, zip_state, zip);
|
||||
|
||||
if (!ret) {
|
||||
*dlen = zip_ops->output_len;
|
||||
memcpy(dst, zip_ops->output, *dlen);
|
||||
}
|
||||
|
||||
kfree(zip_state);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -129,8 +129,8 @@ int zip_deflate(struct zip_operation *zip_ops, struct zip_state *s,
|
||||
/* Stats update for compression requests submitted */
|
||||
atomic64_inc(&zip_dev->stats.comp_req_submit);
|
||||
|
||||
while (!result_ptr->s.compcode)
|
||||
continue;
|
||||
/* Wait for completion or error */
|
||||
zip_poll_result(result_ptr);
|
||||
|
||||
/* Stats update for compression requests completed */
|
||||
atomic64_inc(&zip_dev->stats.comp_req_complete);
|
||||
|
@ -87,12 +87,12 @@ u32 zip_load_instr(union zip_inst_s *instr,
|
||||
* Distribute the instructions between the enabled queues based on
|
||||
* the CPU id.
|
||||
*/
|
||||
if (smp_processor_id() % 2 == 0)
|
||||
if (raw_smp_processor_id() % 2 == 0)
|
||||
queue = 0;
|
||||
else
|
||||
queue = 1;
|
||||
|
||||
zip_dbg("CPU Core: %d Queue number:%d", smp_processor_id(), queue);
|
||||
zip_dbg("CPU Core: %d Queue number:%d", raw_smp_processor_id(), queue);
|
||||
|
||||
/* Take cmd buffer lock */
|
||||
spin_lock(&zip_dev->iq[queue].lock);
|
||||
|
@ -143,8 +143,8 @@ int zip_inflate(struct zip_operation *zip_ops, struct zip_state *s,
|
||||
/* Decompression requests submitted stats update */
|
||||
atomic64_inc(&zip_dev->stats.decomp_req_submit);
|
||||
|
||||
while (!result_ptr->s.compcode)
|
||||
continue;
|
||||
/* Wait for completion or error */
|
||||
zip_poll_result(result_ptr);
|
||||
|
||||
/* Decompression requests completed stats update */
|
||||
atomic64_inc(&zip_dev->stats.decomp_req_complete);
|
||||
|
@ -113,7 +113,7 @@ struct zip_device *zip_get_device(int node)
|
||||
*/
|
||||
int zip_get_node_id(void)
|
||||
{
|
||||
return cpu_to_node(smp_processor_id());
|
||||
return cpu_to_node(raw_smp_processor_id());
|
||||
}
|
||||
|
||||
/* Initializes the ZIP h/w sub-system */
|
||||
@ -469,6 +469,8 @@ static int zip_show_stats(struct seq_file *s, void *unused)
|
||||
struct zip_stats *st;
|
||||
|
||||
for (index = 0; index < MAX_ZIP_DEVICES; index++) {
|
||||
u64 pending = 0;
|
||||
|
||||
if (zip_dev[index]) {
|
||||
zip = zip_dev[index];
|
||||
st = &zip->stats;
|
||||
@ -476,16 +478,15 @@ static int zip_show_stats(struct seq_file *s, void *unused)
|
||||
/* Get all the pending requests */
|
||||
for (q = 0; q < ZIP_NUM_QUEUES; q++) {
|
||||
val = zip_reg_read((zip->reg_base +
|
||||
ZIP_DBG_COREX_STA(q)));
|
||||
val = (val >> 32);
|
||||
val = val & 0xffffff;
|
||||
atomic64_add(val, &st->pending_req);
|
||||
ZIP_DBG_QUEX_STA(q)));
|
||||
pending += val >> 32 & 0xffffff;
|
||||
}
|
||||
|
||||
avg_chunk = (atomic64_read(&st->comp_in_bytes) /
|
||||
atomic64_read(&st->comp_req_complete));
|
||||
avg_cr = (atomic64_read(&st->comp_in_bytes) /
|
||||
atomic64_read(&st->comp_out_bytes));
|
||||
val = atomic64_read(&st->comp_req_complete);
|
||||
avg_chunk = (val) ? atomic64_read(&st->comp_in_bytes) / val : 0;
|
||||
|
||||
val = atomic64_read(&st->comp_out_bytes);
|
||||
avg_cr = (val) ? atomic64_read(&st->comp_in_bytes) / val : 0;
|
||||
seq_printf(s, " ZIP Device %d Stats\n"
|
||||
"-----------------------------------\n"
|
||||
"Comp Req Submitted : \t%lld\n"
|
||||
@ -513,10 +514,7 @@ static int zip_show_stats(struct seq_file *s, void *unused)
|
||||
(u64)atomic64_read(&st->decomp_in_bytes),
|
||||
(u64)atomic64_read(&st->decomp_out_bytes),
|
||||
(u64)atomic64_read(&st->decomp_bad_reqs),
|
||||
(u64)atomic64_read(&st->pending_req));
|
||||
|
||||
/* Reset pending requests count */
|
||||
atomic64_set(&st->pending_req, 0);
|
||||
pending);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
@ -74,7 +74,6 @@ struct zip_stats {
|
||||
atomic64_t comp_req_complete;
|
||||
atomic64_t decomp_req_submit;
|
||||
atomic64_t decomp_req_complete;
|
||||
atomic64_t pending_req;
|
||||
atomic64_t comp_in_bytes;
|
||||
atomic64_t comp_out_bytes;
|
||||
atomic64_t decomp_in_bytes;
|
||||
|
@ -443,7 +443,7 @@ union zip_corex_bist_status {
|
||||
|
||||
static inline u64 ZIP_COREX_BIST_STATUS(u64 param1)
|
||||
{
|
||||
if (((param1 <= 1)))
|
||||
if (param1 <= 1)
|
||||
return 0x0520ull + (param1 & 1) * 0x8ull;
|
||||
pr_err("ZIP_COREX_BIST_STATUS: %llu\n", param1);
|
||||
return 0;
|
||||
@ -537,7 +537,7 @@ union zip_dbg_corex_inst {
|
||||
|
||||
static inline u64 ZIP_DBG_COREX_INST(u64 param1)
|
||||
{
|
||||
if (((param1 <= 1)))
|
||||
if (param1 <= 1)
|
||||
return 0x0640ull + (param1 & 1) * 0x8ull;
|
||||
pr_err("ZIP_DBG_COREX_INST: %llu\n", param1);
|
||||
return 0;
|
||||
@ -568,7 +568,7 @@ union zip_dbg_corex_sta {
|
||||
|
||||
static inline u64 ZIP_DBG_COREX_STA(u64 param1)
|
||||
{
|
||||
if (((param1 <= 1)))
|
||||
if (param1 <= 1)
|
||||
return 0x0680ull + (param1 & 1) * 0x8ull;
|
||||
pr_err("ZIP_DBG_COREX_STA: %llu\n", param1);
|
||||
return 0;
|
||||
@ -599,7 +599,7 @@ union zip_dbg_quex_sta {
|
||||
|
||||
static inline u64 ZIP_DBG_QUEX_STA(u64 param1)
|
||||
{
|
||||
if (((param1 <= 7)))
|
||||
if (param1 <= 7)
|
||||
return 0x1800ull + (param1 & 7) * 0x8ull;
|
||||
pr_err("ZIP_DBG_QUEX_STA: %llu\n", param1);
|
||||
return 0;
|
||||
@ -817,7 +817,7 @@ union zip_msix_pbax {
|
||||
|
||||
static inline u64 ZIP_MSIX_PBAX(u64 param1)
|
||||
{
|
||||
if (((param1 == 0)))
|
||||
if (param1 == 0)
|
||||
return 0x0000838000FF0000ull;
|
||||
pr_err("ZIP_MSIX_PBAX: %llu\n", param1);
|
||||
return 0;
|
||||
@ -846,7 +846,7 @@ union zip_msix_vecx_addr {
|
||||
|
||||
static inline u64 ZIP_MSIX_VECX_ADDR(u64 param1)
|
||||
{
|
||||
if (((param1 <= 17)))
|
||||
if (param1 <= 17)
|
||||
return 0x0000838000F00000ull + (param1 & 31) * 0x10ull;
|
||||
pr_err("ZIP_MSIX_VECX_ADDR: %llu\n", param1);
|
||||
return 0;
|
||||
@ -875,7 +875,7 @@ union zip_msix_vecx_ctl {
|
||||
|
||||
static inline u64 ZIP_MSIX_VECX_CTL(u64 param1)
|
||||
{
|
||||
if (((param1 <= 17)))
|
||||
if (param1 <= 17)
|
||||
return 0x0000838000F00008ull + (param1 & 31) * 0x10ull;
|
||||
pr_err("ZIP_MSIX_VECX_CTL: %llu\n", param1);
|
||||
return 0;
|
||||
@ -900,7 +900,7 @@ union zip_quex_done {
|
||||
|
||||
static inline u64 ZIP_QUEX_DONE(u64 param1)
|
||||
{
|
||||
if (((param1 <= 7)))
|
||||
if (param1 <= 7)
|
||||
return 0x2000ull + (param1 & 7) * 0x8ull;
|
||||
pr_err("ZIP_QUEX_DONE: %llu\n", param1);
|
||||
return 0;
|
||||
@ -925,7 +925,7 @@ union zip_quex_done_ack {
|
||||
|
||||
static inline u64 ZIP_QUEX_DONE_ACK(u64 param1)
|
||||
{
|
||||
if (((param1 <= 7)))
|
||||
if (param1 <= 7)
|
||||
return 0x2200ull + (param1 & 7) * 0x8ull;
|
||||
pr_err("ZIP_QUEX_DONE_ACK: %llu\n", param1);
|
||||
return 0;
|
||||
@ -950,7 +950,7 @@ union zip_quex_done_ena_w1c {
|
||||
|
||||
static inline u64 ZIP_QUEX_DONE_ENA_W1C(u64 param1)
|
||||
{
|
||||
if (((param1 <= 7)))
|
||||
if (param1 <= 7)
|
||||
return 0x2600ull + (param1 & 7) * 0x8ull;
|
||||
pr_err("ZIP_QUEX_DONE_ENA_W1C: %llu\n", param1);
|
||||
return 0;
|
||||
@ -975,7 +975,7 @@ union zip_quex_done_ena_w1s {
|
||||
|
||||
static inline u64 ZIP_QUEX_DONE_ENA_W1S(u64 param1)
|
||||
{
|
||||
if (((param1 <= 7)))
|
||||
if (param1 <= 7)
|
||||
return 0x2400ull + (param1 & 7) * 0x8ull;
|
||||
pr_err("ZIP_QUEX_DONE_ENA_W1S: %llu\n", param1);
|
||||
return 0;
|
||||
@ -1004,7 +1004,7 @@ union zip_quex_done_wait {
|
||||
|
||||
static inline u64 ZIP_QUEX_DONE_WAIT(u64 param1)
|
||||
{
|
||||
if (((param1 <= 7)))
|
||||
if (param1 <= 7)
|
||||
return 0x2800ull + (param1 & 7) * 0x8ull;
|
||||
pr_err("ZIP_QUEX_DONE_WAIT: %llu\n", param1);
|
||||
return 0;
|
||||
@ -1029,7 +1029,7 @@ union zip_quex_doorbell {
|
||||
|
||||
static inline u64 ZIP_QUEX_DOORBELL(u64 param1)
|
||||
{
|
||||
if (((param1 <= 7)))
|
||||
if (param1 <= 7)
|
||||
return 0x4000ull + (param1 & 7) * 0x8ull;
|
||||
pr_err("ZIP_QUEX_DOORBELL: %llu\n", param1);
|
||||
return 0;
|
||||
@ -1058,7 +1058,7 @@ union zip_quex_err_ena_w1c {
|
||||
|
||||
static inline u64 ZIP_QUEX_ERR_ENA_W1C(u64 param1)
|
||||
{
|
||||
if (((param1 <= 7)))
|
||||
if (param1 <= 7)
|
||||
return 0x3600ull + (param1 & 7) * 0x8ull;
|
||||
pr_err("ZIP_QUEX_ERR_ENA_W1C: %llu\n", param1);
|
||||
return 0;
|
||||
@ -1087,7 +1087,7 @@ union zip_quex_err_ena_w1s {
|
||||
|
||||
static inline u64 ZIP_QUEX_ERR_ENA_W1S(u64 param1)
|
||||
{
|
||||
if (((param1 <= 7)))
|
||||
if (param1 <= 7)
|
||||
return 0x3400ull + (param1 & 7) * 0x8ull;
|
||||
pr_err("ZIP_QUEX_ERR_ENA_W1S: %llu\n", param1);
|
||||
return 0;
|
||||
@ -1120,7 +1120,7 @@ union zip_quex_err_int {
|
||||
|
||||
static inline u64 ZIP_QUEX_ERR_INT(u64 param1)
|
||||
{
|
||||
if (((param1 <= 7)))
|
||||
if (param1 <= 7)
|
||||
return 0x3000ull + (param1 & 7) * 0x8ull;
|
||||
pr_err("ZIP_QUEX_ERR_INT: %llu\n", param1);
|
||||
return 0;
|
||||
@ -1150,7 +1150,7 @@ union zip_quex_err_int_w1s {
|
||||
|
||||
static inline u64 ZIP_QUEX_ERR_INT_W1S(u64 param1)
|
||||
{
|
||||
if (((param1 <= 7)))
|
||||
if (param1 <= 7)
|
||||
return 0x3200ull + (param1 & 7) * 0x8ull;
|
||||
pr_err("ZIP_QUEX_ERR_INT_W1S: %llu\n", param1);
|
||||
return 0;
|
||||
@ -1179,7 +1179,7 @@ union zip_quex_gcfg {
|
||||
|
||||
static inline u64 ZIP_QUEX_GCFG(u64 param1)
|
||||
{
|
||||
if (((param1 <= 7)))
|
||||
if (param1 <= 7)
|
||||
return 0x1A00ull + (param1 & 7) * 0x8ull;
|
||||
pr_err("ZIP_QUEX_GCFG: %llu\n", param1);
|
||||
return 0;
|
||||
@ -1204,7 +1204,7 @@ union zip_quex_map {
|
||||
|
||||
static inline u64 ZIP_QUEX_MAP(u64 param1)
|
||||
{
|
||||
if (((param1 <= 7)))
|
||||
if (param1 <= 7)
|
||||
return 0x1400ull + (param1 & 7) * 0x8ull;
|
||||
pr_err("ZIP_QUEX_MAP: %llu\n", param1);
|
||||
return 0;
|
||||
@ -1236,7 +1236,7 @@ union zip_quex_sbuf_addr {
|
||||
|
||||
static inline u64 ZIP_QUEX_SBUF_ADDR(u64 param1)
|
||||
{
|
||||
if (((param1 <= 7)))
|
||||
if (param1 <= 7)
|
||||
return 0x1000ull + (param1 & 7) * 0x8ull;
|
||||
pr_err("ZIP_QUEX_SBUF_ADDR: %llu\n", param1);
|
||||
return 0;
|
||||
@ -1276,7 +1276,7 @@ union zip_quex_sbuf_ctl {
|
||||
|
||||
static inline u64 ZIP_QUEX_SBUF_CTL(u64 param1)
|
||||
{
|
||||
if (((param1 <= 7)))
|
||||
if (param1 <= 7)
|
||||
return 0x1200ull + (param1 & 7) * 0x8ull;
|
||||
pr_err("ZIP_QUEX_SBUF_CTL: %llu\n", param1);
|
||||
return 0;
|
||||
|
@ -22,11 +22,17 @@
|
||||
#include <linux/delay.h>
|
||||
#include <linux/hw_random.h>
|
||||
#include <linux/ccp.h>
|
||||
#include <linux/firmware.h>
|
||||
|
||||
#include "sp-dev.h"
|
||||
#include "psp-dev.h"
|
||||
|
||||
#define SEV_VERSION_GREATER_OR_EQUAL(_maj, _min) \
|
||||
((psp_master->api_major) >= _maj && \
|
||||
(psp_master->api_minor) >= _min)
|
||||
|
||||
#define DEVICE_NAME "sev"
|
||||
#define SEV_FW_FILE "amd/sev.fw"
|
||||
|
||||
static DEFINE_MUTEX(sev_cmd_mutex);
|
||||
static struct sev_misc_dev *misc_dev;
|
||||
@ -112,6 +118,8 @@ static int sev_cmd_buffer_len(int cmd)
|
||||
case SEV_CMD_RECEIVE_UPDATE_DATA: return sizeof(struct sev_data_receive_update_data);
|
||||
case SEV_CMD_RECEIVE_UPDATE_VMSA: return sizeof(struct sev_data_receive_update_vmsa);
|
||||
case SEV_CMD_LAUNCH_UPDATE_SECRET: return sizeof(struct sev_data_launch_secret);
|
||||
case SEV_CMD_DOWNLOAD_FIRMWARE: return sizeof(struct sev_data_download_firmware);
|
||||
case SEV_CMD_GET_ID: return sizeof(struct sev_data_get_id);
|
||||
default: return 0;
|
||||
}
|
||||
|
||||
@ -378,6 +386,79 @@ void *psp_copy_user_blob(u64 __user uaddr, u32 len)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(psp_copy_user_blob);
|
||||
|
||||
static int sev_get_api_version(void)
|
||||
{
|
||||
struct sev_user_data_status *status;
|
||||
int error, ret;
|
||||
|
||||
status = &psp_master->status_cmd_buf;
|
||||
ret = sev_platform_status(status, &error);
|
||||
if (ret) {
|
||||
dev_err(psp_master->dev,
|
||||
"SEV: failed to get status. Error: %#x\n", error);
|
||||
return 1;
|
||||
}
|
||||
|
||||
psp_master->api_major = status->api_major;
|
||||
psp_master->api_minor = status->api_minor;
|
||||
psp_master->build = status->build;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Don't fail if SEV FW couldn't be updated. Continue with existing SEV FW */
|
||||
static int sev_update_firmware(struct device *dev)
|
||||
{
|
||||
struct sev_data_download_firmware *data;
|
||||
const struct firmware *firmware;
|
||||
int ret, error, order;
|
||||
struct page *p;
|
||||
u64 data_size;
|
||||
|
||||
ret = request_firmware(&firmware, SEV_FW_FILE, dev);
|
||||
if (ret < 0)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* SEV FW expects the physical address given to it to be 32
|
||||
* byte aligned. Memory allocated has structure placed at the
|
||||
* beginning followed by the firmware being passed to the SEV
|
||||
* FW. Allocate enough memory for data structure + alignment
|
||||
* padding + SEV FW.
|
||||
*/
|
||||
data_size = ALIGN(sizeof(struct sev_data_download_firmware), 32);
|
||||
|
||||
order = get_order(firmware->size + data_size);
|
||||
p = alloc_pages(GFP_KERNEL, order);
|
||||
if (!p) {
|
||||
ret = -1;
|
||||
goto fw_err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy firmware data to a kernel allocated contiguous
|
||||
* memory region.
|
||||
*/
|
||||
data = page_address(p);
|
||||
memcpy(page_address(p) + data_size, firmware->data, firmware->size);
|
||||
|
||||
data->address = __psp_pa(page_address(p) + data_size);
|
||||
data->len = firmware->size;
|
||||
|
||||
ret = sev_do_cmd(SEV_CMD_DOWNLOAD_FIRMWARE, data, &error);
|
||||
if (ret)
|
||||
dev_dbg(dev, "Failed to update SEV firmware: %#x\n", error);
|
||||
else
|
||||
dev_info(dev, "SEV firmware update successful\n");
|
||||
|
||||
__free_pages(p, order);
|
||||
|
||||
fw_err:
|
||||
release_firmware(firmware);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp)
|
||||
{
|
||||
struct sev_user_data_pek_cert_import input;
|
||||
@ -430,6 +511,46 @@ e_free:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int sev_ioctl_do_get_id(struct sev_issue_cmd *argp)
|
||||
{
|
||||
struct sev_data_get_id *data;
|
||||
u64 data_size, user_size;
|
||||
void *id_blob, *mem;
|
||||
int ret;
|
||||
|
||||
/* SEV GET_ID available from SEV API v0.16 and up */
|
||||
if (!SEV_VERSION_GREATER_OR_EQUAL(0, 16))
|
||||
return -ENOTSUPP;
|
||||
|
||||
/* SEV FW expects the buffer it fills with the ID to be
|
||||
* 8-byte aligned. Memory allocated should be enough to
|
||||
* hold data structure + alignment padding + memory
|
||||
* where SEV FW writes the ID.
|
||||
*/
|
||||
data_size = ALIGN(sizeof(struct sev_data_get_id), 8);
|
||||
user_size = sizeof(struct sev_user_data_get_id);
|
||||
|
||||
mem = kzalloc(data_size + user_size, GFP_KERNEL);
|
||||
if (!mem)
|
||||
return -ENOMEM;
|
||||
|
||||
data = mem;
|
||||
id_blob = mem + data_size;
|
||||
|
||||
data->address = __psp_pa(id_blob);
|
||||
data->len = user_size;
|
||||
|
||||
ret = __sev_do_cmd_locked(SEV_CMD_GET_ID, data, &argp->error);
|
||||
if (!ret) {
|
||||
if (copy_to_user((void __user *)argp->data, id_blob, data->len))
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
kfree(mem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp)
|
||||
{
|
||||
struct sev_user_data_pdh_cert_export input;
|
||||
@ -567,6 +688,9 @@ static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
|
||||
case SEV_PDH_CERT_EXPORT:
|
||||
ret = sev_ioctl_do_pdh_export(&input);
|
||||
break;
|
||||
case SEV_GET_ID:
|
||||
ret = sev_ioctl_do_get_id(&input);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
@ -750,7 +874,6 @@ EXPORT_SYMBOL_GPL(sev_issue_cmd_external_user);
|
||||
|
||||
void psp_pci_init(void)
|
||||
{
|
||||
struct sev_user_data_status *status;
|
||||
struct sp_device *sp;
|
||||
int error, rc;
|
||||
|
||||
@ -760,6 +883,13 @@ void psp_pci_init(void)
|
||||
|
||||
psp_master = sp->psp_data;
|
||||
|
||||
if (sev_get_api_version())
|
||||
goto err;
|
||||
|
||||
if (SEV_VERSION_GREATER_OR_EQUAL(0, 15) &&
|
||||
sev_update_firmware(psp_master->dev) == 0)
|
||||
sev_get_api_version();
|
||||
|
||||
/* Initialize the platform */
|
||||
rc = sev_platform_init(&error);
|
||||
if (rc) {
|
||||
@ -767,16 +897,9 @@ void psp_pci_init(void)
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Display SEV firmware version */
|
||||
status = &psp_master->status_cmd_buf;
|
||||
rc = sev_platform_status(status, &error);
|
||||
if (rc) {
|
||||
dev_err(sp->dev, "SEV: failed to get status error %#x\n", error);
|
||||
goto err;
|
||||
}
|
||||
dev_info(sp->dev, "SEV API:%d.%d build:%d\n", psp_master->api_major,
|
||||
psp_master->api_minor, psp_master->build);
|
||||
|
||||
dev_info(sp->dev, "SEV API:%d.%d build:%d\n", status->api_major,
|
||||
status->api_minor, status->build);
|
||||
return;
|
||||
|
||||
err:
|
||||
|
@ -78,6 +78,10 @@ struct psp_device {
|
||||
struct sev_misc_dev *sev_misc;
|
||||
struct sev_user_data_status status_cmd_buf;
|
||||
struct sev_data_init init_cmd_buf;
|
||||
|
||||
u8 api_major;
|
||||
u8 api_minor;
|
||||
u8 build;
|
||||
};
|
||||
|
||||
#endif /* __PSP_DEV_H */
|
||||
|
@ -42,6 +42,7 @@ struct cc_cipher_ctx {
|
||||
int cipher_mode;
|
||||
int flow_mode;
|
||||
unsigned int flags;
|
||||
bool hw_key;
|
||||
struct cc_user_key_info user;
|
||||
struct cc_hw_key_info hw;
|
||||
struct crypto_shash *shash_tfm;
|
||||
@ -49,6 +50,13 @@ struct cc_cipher_ctx {
|
||||
|
||||
static void cc_cipher_complete(struct device *dev, void *cc_req, int err);
|
||||
|
||||
static inline bool cc_is_hw_key(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm);
|
||||
|
||||
return ctx_p->hw_key;
|
||||
}
|
||||
|
||||
static int validate_keys_sizes(struct cc_cipher_ctx *ctx_p, u32 size)
|
||||
{
|
||||
switch (ctx_p->flow_mode) {
|
||||
@ -211,7 +219,7 @@ struct tdes_keys {
|
||||
u8 key3[DES_KEY_SIZE];
|
||||
};
|
||||
|
||||
static enum cc_hw_crypto_key hw_key_to_cc_hw_key(int slot_num)
|
||||
static enum cc_hw_crypto_key cc_slot_to_hw_key(int slot_num)
|
||||
{
|
||||
switch (slot_num) {
|
||||
case 0:
|
||||
@ -226,6 +234,74 @@ static enum cc_hw_crypto_key hw_key_to_cc_hw_key(int slot_num)
|
||||
return END_OF_KEYS;
|
||||
}
|
||||
|
||||
static int cc_cipher_sethkey(struct crypto_skcipher *sktfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct crypto_tfm *tfm = crypto_skcipher_tfm(sktfm);
|
||||
struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm);
|
||||
struct device *dev = drvdata_to_dev(ctx_p->drvdata);
|
||||
struct cc_hkey_info hki;
|
||||
|
||||
dev_dbg(dev, "Setting HW key in context @%p for %s. keylen=%u\n",
|
||||
ctx_p, crypto_tfm_alg_name(tfm), keylen);
|
||||
dump_byte_array("key", (u8 *)key, keylen);
|
||||
|
||||
/* STAT_PHASE_0: Init and sanity checks */
|
||||
|
||||
/* This check the size of the hardware key token */
|
||||
if (keylen != sizeof(hki)) {
|
||||
dev_err(dev, "Unsupported HW key size %d.\n", keylen);
|
||||
crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (ctx_p->flow_mode != S_DIN_to_AES) {
|
||||
dev_err(dev, "HW key not supported for non-AES flows\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memcpy(&hki, key, keylen);
|
||||
|
||||
/* The real key len for crypto op is the size of the HW key
|
||||
* referenced by the HW key slot, not the hardware key token
|
||||
*/
|
||||
keylen = hki.keylen;
|
||||
|
||||
if (validate_keys_sizes(ctx_p, keylen)) {
|
||||
dev_err(dev, "Unsupported key size %d.\n", keylen);
|
||||
crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ctx_p->hw.key1_slot = cc_slot_to_hw_key(hki.hw_key1);
|
||||
if (ctx_p->hw.key1_slot == END_OF_KEYS) {
|
||||
dev_err(dev, "Unsupported hw key1 number (%d)\n", hki.hw_key1);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (ctx_p->cipher_mode == DRV_CIPHER_XTS ||
|
||||
ctx_p->cipher_mode == DRV_CIPHER_ESSIV ||
|
||||
ctx_p->cipher_mode == DRV_CIPHER_BITLOCKER) {
|
||||
if (hki.hw_key1 == hki.hw_key2) {
|
||||
dev_err(dev, "Illegal hw key numbers (%d,%d)\n",
|
||||
hki.hw_key1, hki.hw_key2);
|
||||
return -EINVAL;
|
||||
}
|
||||
ctx_p->hw.key2_slot = cc_slot_to_hw_key(hki.hw_key2);
|
||||
if (ctx_p->hw.key2_slot == END_OF_KEYS) {
|
||||
dev_err(dev, "Unsupported hw key2 number (%d)\n",
|
||||
hki.hw_key2);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
ctx_p->keylen = keylen;
|
||||
ctx_p->hw_key = true;
|
||||
dev_dbg(dev, "cc_is_hw_key ret 0");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
@ -250,44 +326,7 @@ static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (cc_is_hw_key(tfm)) {
|
||||
/* setting HW key slots */
|
||||
struct arm_hw_key_info *hki = (struct arm_hw_key_info *)key;
|
||||
|
||||
if (ctx_p->flow_mode != S_DIN_to_AES) {
|
||||
dev_err(dev, "HW key not supported for non-AES flows\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ctx_p->hw.key1_slot = hw_key_to_cc_hw_key(hki->hw_key1);
|
||||
if (ctx_p->hw.key1_slot == END_OF_KEYS) {
|
||||
dev_err(dev, "Unsupported hw key1 number (%d)\n",
|
||||
hki->hw_key1);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (ctx_p->cipher_mode == DRV_CIPHER_XTS ||
|
||||
ctx_p->cipher_mode == DRV_CIPHER_ESSIV ||
|
||||
ctx_p->cipher_mode == DRV_CIPHER_BITLOCKER) {
|
||||
if (hki->hw_key1 == hki->hw_key2) {
|
||||
dev_err(dev, "Illegal hw key numbers (%d,%d)\n",
|
||||
hki->hw_key1, hki->hw_key2);
|
||||
return -EINVAL;
|
||||
}
|
||||
ctx_p->hw.key2_slot =
|
||||
hw_key_to_cc_hw_key(hki->hw_key2);
|
||||
if (ctx_p->hw.key2_slot == END_OF_KEYS) {
|
||||
dev_err(dev, "Unsupported hw key2 number (%d)\n",
|
||||
hki->hw_key2);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
ctx_p->keylen = keylen;
|
||||
dev_dbg(dev, "cc_is_hw_key ret 0");
|
||||
|
||||
return 0;
|
||||
}
|
||||
ctx_p->hw_key = false;
|
||||
|
||||
/*
|
||||
* Verify DES weak keys
|
||||
@ -734,6 +773,241 @@ static int cc_cipher_decrypt(struct skcipher_request *req)
|
||||
|
||||
/* Block cipher alg */
|
||||
static const struct cc_alg_template skcipher_algs[] = {
|
||||
{
|
||||
.name = "xts(paes)",
|
||||
.driver_name = "xts-paes-ccree",
|
||||
.blocksize = AES_BLOCK_SIZE,
|
||||
.template_skcipher = {
|
||||
.setkey = cc_cipher_sethkey,
|
||||
.encrypt = cc_cipher_encrypt,
|
||||
.decrypt = cc_cipher_decrypt,
|
||||
.min_keysize = CC_HW_KEY_SIZE,
|
||||
.max_keysize = CC_HW_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
},
|
||||
.cipher_mode = DRV_CIPHER_XTS,
|
||||
.flow_mode = S_DIN_to_AES,
|
||||
.min_hw_rev = CC_HW_REV_630,
|
||||
},
|
||||
{
|
||||
.name = "xts512(paes)",
|
||||
.driver_name = "xts-paes-du512-ccree",
|
||||
.blocksize = AES_BLOCK_SIZE,
|
||||
.template_skcipher = {
|
||||
.setkey = cc_cipher_sethkey,
|
||||
.encrypt = cc_cipher_encrypt,
|
||||
.decrypt = cc_cipher_decrypt,
|
||||
.min_keysize = CC_HW_KEY_SIZE,
|
||||
.max_keysize = CC_HW_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
},
|
||||
.cipher_mode = DRV_CIPHER_XTS,
|
||||
.flow_mode = S_DIN_to_AES,
|
||||
.data_unit = 512,
|
||||
.min_hw_rev = CC_HW_REV_712,
|
||||
},
|
||||
{
|
||||
.name = "xts4096(paes)",
|
||||
.driver_name = "xts-paes-du4096-ccree",
|
||||
.blocksize = AES_BLOCK_SIZE,
|
||||
.template_skcipher = {
|
||||
.setkey = cc_cipher_sethkey,
|
||||
.encrypt = cc_cipher_encrypt,
|
||||
.decrypt = cc_cipher_decrypt,
|
||||
.min_keysize = CC_HW_KEY_SIZE,
|
||||
.max_keysize = CC_HW_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
},
|
||||
.cipher_mode = DRV_CIPHER_XTS,
|
||||
.flow_mode = S_DIN_to_AES,
|
||||
.data_unit = 4096,
|
||||
.min_hw_rev = CC_HW_REV_712,
|
||||
},
|
||||
{
|
||||
.name = "essiv(paes)",
|
||||
.driver_name = "essiv-paes-ccree",
|
||||
.blocksize = AES_BLOCK_SIZE,
|
||||
.template_skcipher = {
|
||||
.setkey = cc_cipher_sethkey,
|
||||
.encrypt = cc_cipher_encrypt,
|
||||
.decrypt = cc_cipher_decrypt,
|
||||
.min_keysize = CC_HW_KEY_SIZE,
|
||||
.max_keysize = CC_HW_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
},
|
||||
.cipher_mode = DRV_CIPHER_ESSIV,
|
||||
.flow_mode = S_DIN_to_AES,
|
||||
.min_hw_rev = CC_HW_REV_712,
|
||||
},
|
||||
{
|
||||
.name = "essiv512(paes)",
|
||||
.driver_name = "essiv-paes-du512-ccree",
|
||||
.blocksize = AES_BLOCK_SIZE,
|
||||
.template_skcipher = {
|
||||
.setkey = cc_cipher_sethkey,
|
||||
.encrypt = cc_cipher_encrypt,
|
||||
.decrypt = cc_cipher_decrypt,
|
||||
.min_keysize = CC_HW_KEY_SIZE,
|
||||
.max_keysize = CC_HW_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
},
|
||||
.cipher_mode = DRV_CIPHER_ESSIV,
|
||||
.flow_mode = S_DIN_to_AES,
|
||||
.data_unit = 512,
|
||||
.min_hw_rev = CC_HW_REV_712,
|
||||
},
|
||||
{
|
||||
.name = "essiv4096(paes)",
|
||||
.driver_name = "essiv-paes-du4096-ccree",
|
||||
.blocksize = AES_BLOCK_SIZE,
|
||||
.template_skcipher = {
|
||||
.setkey = cc_cipher_sethkey,
|
||||
.encrypt = cc_cipher_encrypt,
|
||||
.decrypt = cc_cipher_decrypt,
|
||||
.min_keysize = CC_HW_KEY_SIZE,
|
||||
.max_keysize = CC_HW_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
},
|
||||
.cipher_mode = DRV_CIPHER_ESSIV,
|
||||
.flow_mode = S_DIN_to_AES,
|
||||
.data_unit = 4096,
|
||||
.min_hw_rev = CC_HW_REV_712,
|
||||
},
|
||||
{
|
||||
.name = "bitlocker(paes)",
|
||||
.driver_name = "bitlocker-paes-ccree",
|
||||
.blocksize = AES_BLOCK_SIZE,
|
||||
.template_skcipher = {
|
||||
.setkey = cc_cipher_sethkey,
|
||||
.encrypt = cc_cipher_encrypt,
|
||||
.decrypt = cc_cipher_decrypt,
|
||||
.min_keysize = CC_HW_KEY_SIZE,
|
||||
.max_keysize = CC_HW_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
},
|
||||
.cipher_mode = DRV_CIPHER_BITLOCKER,
|
||||
.flow_mode = S_DIN_to_AES,
|
||||
.min_hw_rev = CC_HW_REV_712,
|
||||
},
|
||||
{
|
||||
.name = "bitlocker512(paes)",
|
||||
.driver_name = "bitlocker-paes-du512-ccree",
|
||||
.blocksize = AES_BLOCK_SIZE,
|
||||
.template_skcipher = {
|
||||
.setkey = cc_cipher_sethkey,
|
||||
.encrypt = cc_cipher_encrypt,
|
||||
.decrypt = cc_cipher_decrypt,
|
||||
.min_keysize = CC_HW_KEY_SIZE,
|
||||
.max_keysize = CC_HW_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
},
|
||||
.cipher_mode = DRV_CIPHER_BITLOCKER,
|
||||
.flow_mode = S_DIN_to_AES,
|
||||
.data_unit = 512,
|
||||
.min_hw_rev = CC_HW_REV_712,
|
||||
},
|
||||
{
|
||||
.name = "bitlocker4096(paes)",
|
||||
.driver_name = "bitlocker-paes-du4096-ccree",
|
||||
.blocksize = AES_BLOCK_SIZE,
|
||||
.template_skcipher = {
|
||||
.setkey = cc_cipher_sethkey,
|
||||
.encrypt = cc_cipher_encrypt,
|
||||
.decrypt = cc_cipher_decrypt,
|
||||
.min_keysize = CC_HW_KEY_SIZE,
|
||||
.max_keysize = CC_HW_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
},
|
||||
.cipher_mode = DRV_CIPHER_BITLOCKER,
|
||||
.flow_mode = S_DIN_to_AES,
|
||||
.data_unit = 4096,
|
||||
.min_hw_rev = CC_HW_REV_712,
|
||||
},
|
||||
{
|
||||
.name = "ecb(paes)",
|
||||
.driver_name = "ecb-paes-ccree",
|
||||
.blocksize = AES_BLOCK_SIZE,
|
||||
.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
|
||||
.template_skcipher = {
|
||||
.setkey = cc_cipher_sethkey,
|
||||
.encrypt = cc_cipher_encrypt,
|
||||
.decrypt = cc_cipher_decrypt,
|
||||
.min_keysize = CC_HW_KEY_SIZE,
|
||||
.max_keysize = CC_HW_KEY_SIZE,
|
||||
.ivsize = 0,
|
||||
},
|
||||
.cipher_mode = DRV_CIPHER_ECB,
|
||||
.flow_mode = S_DIN_to_AES,
|
||||
.min_hw_rev = CC_HW_REV_712,
|
||||
},
|
||||
{
|
||||
.name = "cbc(paes)",
|
||||
.driver_name = "cbc-paes-ccree",
|
||||
.blocksize = AES_BLOCK_SIZE,
|
||||
.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
|
||||
.template_skcipher = {
|
||||
.setkey = cc_cipher_sethkey,
|
||||
.encrypt = cc_cipher_encrypt,
|
||||
.decrypt = cc_cipher_decrypt,
|
||||
.min_keysize = CC_HW_KEY_SIZE,
|
||||
.max_keysize = CC_HW_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
},
|
||||
.cipher_mode = DRV_CIPHER_CBC,
|
||||
.flow_mode = S_DIN_to_AES,
|
||||
.min_hw_rev = CC_HW_REV_712,
|
||||
},
|
||||
{
|
||||
.name = "ofb(paes)",
|
||||
.driver_name = "ofb-paes-ccree",
|
||||
.blocksize = AES_BLOCK_SIZE,
|
||||
.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
|
||||
.template_skcipher = {
|
||||
.setkey = cc_cipher_sethkey,
|
||||
.encrypt = cc_cipher_encrypt,
|
||||
.decrypt = cc_cipher_decrypt,
|
||||
.min_keysize = CC_HW_KEY_SIZE,
|
||||
.max_keysize = CC_HW_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
},
|
||||
.cipher_mode = DRV_CIPHER_OFB,
|
||||
.flow_mode = S_DIN_to_AES,
|
||||
.min_hw_rev = CC_HW_REV_712,
|
||||
},
|
||||
{
|
||||
.name = "cts1(cbc(paes))",
|
||||
.driver_name = "cts1-cbc-paes-ccree",
|
||||
.blocksize = AES_BLOCK_SIZE,
|
||||
.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
|
||||
.template_skcipher = {
|
||||
.setkey = cc_cipher_sethkey,
|
||||
.encrypt = cc_cipher_encrypt,
|
||||
.decrypt = cc_cipher_decrypt,
|
||||
.min_keysize = CC_HW_KEY_SIZE,
|
||||
.max_keysize = CC_HW_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
},
|
||||
.cipher_mode = DRV_CIPHER_CBC_CTS,
|
||||
.flow_mode = S_DIN_to_AES,
|
||||
.min_hw_rev = CC_HW_REV_712,
|
||||
},
|
||||
{
|
||||
.name = "ctr(paes)",
|
||||
.driver_name = "ctr-paes-ccree",
|
||||
.blocksize = 1,
|
||||
.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
|
||||
.template_skcipher = {
|
||||
.setkey = cc_cipher_sethkey,
|
||||
.encrypt = cc_cipher_encrypt,
|
||||
.decrypt = cc_cipher_decrypt,
|
||||
.min_keysize = CC_HW_KEY_SIZE,
|
||||
.max_keysize = CC_HW_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
},
|
||||
.cipher_mode = DRV_CIPHER_CTR,
|
||||
.flow_mode = S_DIN_to_AES,
|
||||
.min_hw_rev = CC_HW_REV_712,
|
||||
},
|
||||
{
|
||||
.name = "xts(aes)",
|
||||
.driver_name = "xts-aes-ccree",
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user