Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "API:

   - Decryption test vectors are now automatically generated from
     encryption test vectors.

  Algorithms:

   - Fix unaligned access issues in crc32/crc32c.

   - Add zstd compression algorithm.

   - Add AEGIS.

   - Add MORUS.

  Drivers:

   - Add accelerated AEGIS/MORUS on x86.

   - Add accelerated SM4 on arm64.

   - Removed x86 assembly salsa implementation as it is slower than C.

   - Add authenc(hmac(sha*), cbc(aes)) support in inside-secure.

   - Add ctr(aes) support in crypto4xx.

   - Add hardware key support in ccree.

   - Add support for new Centaur CPU in via-rng"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (112 commits)
  crypto: chtls - free beyond end rspq_skb_cache
  crypto: chtls - kbuild warnings
  crypto: chtls - dereference null variable
  crypto: chtls - wait for memory sendmsg, sendpage
  crypto: chtls - key len correction
  crypto: salsa20 - Revert "crypto: salsa20 - export generic helpers"
  crypto: x86/salsa20 - remove x86 salsa20 implementations
  crypto: ccp - Add GET_ID SEV command
  crypto: ccp - Add DOWNLOAD_FIRMWARE SEV command
  crypto: qat - Add MODULE_FIRMWARE for all qat drivers
  crypto: ccree - silence debug prints
  crypto: ccree - better clock handling
  crypto: ccree - correct host regs offset
  crypto: chelsio - Remove separate buffer used for DMA map B0 block in CCM
  crypt: chelsio - Send IV as Immediate for cipher algo
  crypto: chelsio - Return -ENOSPC for transient busy indication.
  crypto: caam/qi - fix warning in init_cgr()
  crypto: caam - fix rfc4543 descriptors
  crypto: caam - fix MC firmware detection
  crypto: clarify licensing of OpenSSL asm code
  ...
This commit is contained in:
Linus Torvalds 2018-06-05 15:51:21 -07:00
commit 3e1a29b3bf
141 changed files with 20660 additions and 15353 deletions

View File

@ -1,4 +1,14 @@
#define __ARM_ARCH__ __LINUX_ARM_ARCH__
@ SPDX-License-Identifier: GPL-2.0
@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
@ has relicensed it under the GPLv2. Therefore this program is free software;
@ you can redistribute it and/or modify it under the terms of the GNU General
@ Public License version 2 as published by the Free Software Foundation.
@
@ The original headers, including the original license headers, are
@ included below for completeness.
@ ====================================================================
@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@ project. The module is, however, dual licensed under OpenSSL and

View File

@ -1,12 +1,19 @@
#!/usr/bin/env perl
# SPDX-License-Identifier: GPL-2.0
# This code is taken from the OpenSSL project but the author (Andy Polyakov)
# has relicensed it under the GPLv2. Therefore this program is free software;
# you can redistribute it and/or modify it under the terms of the GNU General
# Public License version 2 as published by the Free Software Foundation.
#
# The original headers, including the original license headers, are
# included below for completeness.
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
#
# Permission to use under GPL terms is granted.
# ====================================================================
# SHA256 block procedure for ARMv4. May 2007.

View File

@ -1,11 +1,18 @@
@ SPDX-License-Identifier: GPL-2.0
@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
@ has relicensed it under the GPLv2. Therefore this program is free software;
@ you can redistribute it and/or modify it under the terms of the GNU General
@ Public License version 2 as published by the Free Software Foundation.
@
@ The original headers, including the original license headers, are
@ included below for completeness.
@ ====================================================================
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@ project. The module is, however, dual licensed under OpenSSL and
@ CRYPTOGAMS licenses depending on where you obtain it. For further
@ details see http://www.openssl.org/~appro/cryptogams/.
@
@ Permission to use under GPL terms is granted.
@ ====================================================================
@ SHA256 block procedure for ARMv4. May 2007.

View File

@ -1,12 +1,19 @@
#!/usr/bin/env perl
# SPDX-License-Identifier: GPL-2.0
# This code is taken from the OpenSSL project but the author (Andy Polyakov)
# has relicensed it under the GPLv2. Therefore this program is free software;
# you can redistribute it and/or modify it under the terms of the GNU General
# Public License version 2 as published by the Free Software Foundation.
#
# The original headers, including the original license headers, are
# included below for completeness.
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
#
# Permission to use under GPL terms is granted.
# ====================================================================
# SHA512 block procedure for ARMv4. September 2007.

View File

@ -1,11 +1,18 @@
@ SPDX-License-Identifier: GPL-2.0
@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
@ has relicensed it under the GPLv2. Therefore this program is free software;
@ you can redistribute it and/or modify it under the terms of the GNU General
@ Public License version 2 as published by the Free Software Foundation.
@
@ The original headers, including the original license headers, are
@ included below for completeness.
@ ====================================================================
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@ project. The module is, however, dual licensed under OpenSSL and
@ CRYPTOGAMS licenses depending on where you obtain it. For further
@ details see http://www.openssl.org/~appro/cryptogams/.
@
@ Permission to use under GPL terms is granted.
@ ====================================================================
@ SHA512 block procedure for ARMv4. September 2007.

View File

@ -47,6 +47,12 @@ config CRYPTO_SM3_ARM64_CE
select CRYPTO_HASH
select CRYPTO_SM3
config CRYPTO_SM4_ARM64_CE
tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_SM4
config CRYPTO_GHASH_ARM64_CE
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
depends on KERNEL_MODE_NEON

View File

@ -23,6 +23,9 @@ sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o
sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o
obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce.o
sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o

View File

@ -19,24 +19,33 @@
* u32 *macp, u8 const rk[], u32 rounds);
*/
ENTRY(ce_aes_ccm_auth_data)
ldr w8, [x3] /* leftover from prev round? */
frame_push 7
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x5
ldr w25, [x22] /* leftover from prev round? */
ld1 {v0.16b}, [x0] /* load mac */
cbz w8, 1f
sub w8, w8, #16
cbz w25, 1f
sub w25, w25, #16
eor v1.16b, v1.16b, v1.16b
0: ldrb w7, [x1], #1 /* get 1 byte of input */
subs w2, w2, #1
add w8, w8, #1
0: ldrb w7, [x20], #1 /* get 1 byte of input */
subs w21, w21, #1
add w25, w25, #1
ins v1.b[0], w7
ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
beq 8f /* out of input? */
cbnz w8, 0b
cbnz w25, 0b
eor v0.16b, v0.16b, v1.16b
1: ld1 {v3.4s}, [x4] /* load first round key */
prfm pldl1strm, [x1]
cmp w5, #12 /* which key size? */
add x6, x4, #16
sub w7, w5, #2 /* modified # of rounds */
1: ld1 {v3.4s}, [x23] /* load first round key */
prfm pldl1strm, [x20]
cmp w24, #12 /* which key size? */
add x6, x23, #16
sub w7, w24, #2 /* modified # of rounds */
bmi 2f
bne 5f
mov v5.16b, v3.16b
@ -55,33 +64,43 @@ ENTRY(ce_aes_ccm_auth_data)
ld1 {v5.4s}, [x6], #16 /* load next round key */
bpl 3b
aese v0.16b, v4.16b
subs w2, w2, #16 /* last data? */
subs w21, w21, #16 /* last data? */
eor v0.16b, v0.16b, v5.16b /* final round */
bmi 6f
ld1 {v1.16b}, [x1], #16 /* load next input block */
ld1 {v1.16b}, [x20], #16 /* load next input block */
eor v0.16b, v0.16b, v1.16b /* xor with mac */
bne 1b
6: st1 {v0.16b}, [x0] /* store mac */
beq 6f
if_will_cond_yield_neon
st1 {v0.16b}, [x19] /* store mac */
do_cond_yield_neon
ld1 {v0.16b}, [x19] /* reload mac */
endif_yield_neon
b 1b
6: st1 {v0.16b}, [x19] /* store mac */
beq 10f
adds w2, w2, #16
adds w21, w21, #16
beq 10f
mov w8, w2
7: ldrb w7, [x1], #1
mov w25, w21
7: ldrb w7, [x20], #1
umov w6, v0.b[0]
eor w6, w6, w7
strb w6, [x0], #1
subs w2, w2, #1
strb w6, [x19], #1
subs w21, w21, #1
beq 10f
ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
b 7b
8: mov w7, w8
add w8, w8, #16
8: mov w7, w25
add w25, w25, #16
9: ext v1.16b, v1.16b, v1.16b, #1
adds w7, w7, #1
bne 9b
eor v0.16b, v0.16b, v1.16b
st1 {v0.16b}, [x0]
10: str w8, [x3]
st1 {v0.16b}, [x19]
10: str w25, [x22]
frame_pop
ret
ENDPROC(ce_aes_ccm_auth_data)
@ -126,19 +145,29 @@ ENTRY(ce_aes_ccm_final)
ENDPROC(ce_aes_ccm_final)
.macro aes_ccm_do_crypt,enc
ldr x8, [x6, #8] /* load lower ctr */
ld1 {v0.16b}, [x5] /* load mac */
CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
frame_push 8
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x5
mov x25, x6
ldr x26, [x25, #8] /* load lower ctr */
ld1 {v0.16b}, [x24] /* load mac */
CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */
0: /* outer loop */
ld1 {v1.8b}, [x6] /* load upper ctr */
prfm pldl1strm, [x1]
add x8, x8, #1
rev x9, x8
cmp w4, #12 /* which key size? */
sub w7, w4, #2 /* get modified # of rounds */
ld1 {v1.8b}, [x25] /* load upper ctr */
prfm pldl1strm, [x20]
add x26, x26, #1
rev x9, x26
cmp w23, #12 /* which key size? */
sub w7, w23, #2 /* get modified # of rounds */
ins v1.d[1], x9 /* no carry in lower ctr */
ld1 {v3.4s}, [x3] /* load first round key */
add x10, x3, #16
ld1 {v3.4s}, [x22] /* load first round key */
add x10, x22, #16
bmi 1f
bne 4f
mov v5.16b, v3.16b
@ -165,9 +194,9 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
bpl 2b
aese v0.16b, v4.16b
aese v1.16b, v4.16b
subs w2, w2, #16
bmi 6f /* partial block? */
ld1 {v2.16b}, [x1], #16 /* load next input block */
subs w21, w21, #16
bmi 7f /* partial block? */
ld1 {v2.16b}, [x20], #16 /* load next input block */
.if \enc == 1
eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
@ -176,18 +205,29 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
eor v1.16b, v2.16b, v5.16b /* final round enc */
.endif
eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
st1 {v1.16b}, [x0], #16 /* write output block */
bne 0b
CPU_LE( rev x8, x8 )
st1 {v0.16b}, [x5] /* store mac */
str x8, [x6, #8] /* store lsb end of ctr (BE) */
5: ret
st1 {v1.16b}, [x19], #16 /* write output block */
beq 5f
6: eor v0.16b, v0.16b, v5.16b /* final round mac */
if_will_cond_yield_neon
st1 {v0.16b}, [x24] /* store mac */
do_cond_yield_neon
ld1 {v0.16b}, [x24] /* reload mac */
endif_yield_neon
b 0b
5:
CPU_LE( rev x26, x26 )
st1 {v0.16b}, [x24] /* store mac */
str x26, [x25, #8] /* store lsb end of ctr (BE) */
6: frame_pop
ret
7: eor v0.16b, v0.16b, v5.16b /* final round mac */
eor v1.16b, v1.16b, v5.16b /* final round enc */
st1 {v0.16b}, [x5] /* store mac */
add w2, w2, #16 /* process partial tail block */
7: ldrb w9, [x1], #1 /* get 1 byte of input */
st1 {v0.16b}, [x24] /* store mac */
add w21, w21, #16 /* process partial tail block */
8: ldrb w9, [x20], #1 /* get 1 byte of input */
umov w6, v1.b[0] /* get top crypted ctr byte */
umov w7, v0.b[0] /* get top mac byte */
.if \enc == 1
@ -197,13 +237,13 @@ CPU_LE( rev x8, x8 )
eor w9, w9, w6
eor w7, w7, w9
.endif
strb w9, [x0], #1 /* store out byte */
strb w7, [x5], #1 /* store mac byte */
subs w2, w2, #1
beq 5b
strb w9, [x19], #1 /* store out byte */
strb w7, [x24], #1 /* store mac byte */
subs w21, w21, #1
beq 6b
ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
b 7b
b 8b
.endm
/*

View File

@ -30,18 +30,21 @@
.endm
/* prepare for encryption with key in rk[] */
.macro enc_prepare, rounds, rk, ignore
load_round_keys \rounds, \rk
.macro enc_prepare, rounds, rk, temp
mov \temp, \rk
load_round_keys \rounds, \temp
.endm
/* prepare for encryption (again) but with new key in rk[] */
.macro enc_switch_key, rounds, rk, ignore
load_round_keys \rounds, \rk
.macro enc_switch_key, rounds, rk, temp
mov \temp, \rk
load_round_keys \rounds, \temp
.endm
/* prepare for decryption with key in rk[] */
.macro dec_prepare, rounds, rk, ignore
load_round_keys \rounds, \rk
.macro dec_prepare, rounds, rk, temp
mov \temp, \rk
load_round_keys \rounds, \temp
.endm
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3

View File

@ -14,12 +14,12 @@
.align 4
aes_encrypt_block4x:
encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
ret
ENDPROC(aes_encrypt_block4x)
aes_decrypt_block4x:
decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
ret
ENDPROC(aes_decrypt_block4x)
@ -31,57 +31,71 @@ ENDPROC(aes_decrypt_block4x)
*/
AES_ENTRY(aes_ecb_encrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
frame_push 5
enc_prepare w3, x2, x5
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
.Lecbencrestart:
enc_prepare w22, x21, x5
.LecbencloopNx:
subs w4, w4, #4
subs w23, w23, #4
bmi .Lecbenc1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
bl aes_encrypt_block4x
st1 {v0.16b-v3.16b}, [x0], #64
st1 {v0.16b-v3.16b}, [x19], #64
cond_yield_neon .Lecbencrestart
b .LecbencloopNx
.Lecbenc1x:
adds w4, w4, #4
adds w23, w23, #4
beq .Lecbencout
.Lecbencloop:
ld1 {v0.16b}, [x1], #16 /* get next pt block */
encrypt_block v0, w3, x2, x5, w6
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
ld1 {v0.16b}, [x20], #16 /* get next pt block */
encrypt_block v0, w22, x21, x5, w6
st1 {v0.16b}, [x19], #16
subs w23, w23, #1
bne .Lecbencloop
.Lecbencout:
ldp x29, x30, [sp], #16
frame_pop
ret
AES_ENDPROC(aes_ecb_encrypt)
AES_ENTRY(aes_ecb_decrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
frame_push 5
dec_prepare w3, x2, x5
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
.Lecbdecrestart:
dec_prepare w22, x21, x5
.LecbdecloopNx:
subs w4, w4, #4
subs w23, w23, #4
bmi .Lecbdec1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
bl aes_decrypt_block4x
st1 {v0.16b-v3.16b}, [x0], #64
st1 {v0.16b-v3.16b}, [x19], #64
cond_yield_neon .Lecbdecrestart
b .LecbdecloopNx
.Lecbdec1x:
adds w4, w4, #4
adds w23, w23, #4
beq .Lecbdecout
.Lecbdecloop:
ld1 {v0.16b}, [x1], #16 /* get next ct block */
decrypt_block v0, w3, x2, x5, w6
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
ld1 {v0.16b}, [x20], #16 /* get next ct block */
decrypt_block v0, w22, x21, x5, w6
st1 {v0.16b}, [x19], #16
subs w23, w23, #1
bne .Lecbdecloop
.Lecbdecout:
ldp x29, x30, [sp], #16
frame_pop
ret
AES_ENDPROC(aes_ecb_decrypt)
@ -94,78 +108,100 @@ AES_ENDPROC(aes_ecb_decrypt)
*/
AES_ENTRY(aes_cbc_encrypt)
ld1 {v4.16b}, [x5] /* get iv */
enc_prepare w3, x2, x6
frame_push 6
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x5
.Lcbcencrestart:
ld1 {v4.16b}, [x24] /* get iv */
enc_prepare w22, x21, x6
.Lcbcencloop4x:
subs w4, w4, #4
subs w23, w23, #4
bmi .Lcbcenc1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
encrypt_block v0, w3, x2, x6, w7
encrypt_block v0, w22, x21, x6, w7
eor v1.16b, v1.16b, v0.16b
encrypt_block v1, w3, x2, x6, w7
encrypt_block v1, w22, x21, x6, w7
eor v2.16b, v2.16b, v1.16b
encrypt_block v2, w3, x2, x6, w7
encrypt_block v2, w22, x21, x6, w7
eor v3.16b, v3.16b, v2.16b
encrypt_block v3, w3, x2, x6, w7
st1 {v0.16b-v3.16b}, [x0], #64
encrypt_block v3, w22, x21, x6, w7
st1 {v0.16b-v3.16b}, [x19], #64
mov v4.16b, v3.16b
st1 {v4.16b}, [x24] /* return iv */
cond_yield_neon .Lcbcencrestart
b .Lcbcencloop4x
.Lcbcenc1x:
adds w4, w4, #4
adds w23, w23, #4
beq .Lcbcencout
.Lcbcencloop:
ld1 {v0.16b}, [x1], #16 /* get next pt block */
ld1 {v0.16b}, [x20], #16 /* get next pt block */
eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
encrypt_block v4, w3, x2, x6, w7
st1 {v4.16b}, [x0], #16
subs w4, w4, #1
encrypt_block v4, w22, x21, x6, w7
st1 {v4.16b}, [x19], #16
subs w23, w23, #1
bne .Lcbcencloop
.Lcbcencout:
st1 {v4.16b}, [x5] /* return iv */
st1 {v4.16b}, [x24] /* return iv */
frame_pop
ret
AES_ENDPROC(aes_cbc_encrypt)
AES_ENTRY(aes_cbc_decrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
frame_push 6
ld1 {v7.16b}, [x5] /* get iv */
dec_prepare w3, x2, x6
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x5
.Lcbcdecrestart:
ld1 {v7.16b}, [x24] /* get iv */
dec_prepare w22, x21, x6
.LcbcdecloopNx:
subs w4, w4, #4
subs w23, w23, #4
bmi .Lcbcdec1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
mov v4.16b, v0.16b
mov v5.16b, v1.16b
mov v6.16b, v2.16b
bl aes_decrypt_block4x
sub x1, x1, #16
sub x20, x20, #16
eor v0.16b, v0.16b, v7.16b
eor v1.16b, v1.16b, v4.16b
ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */
eor v2.16b, v2.16b, v5.16b
eor v3.16b, v3.16b, v6.16b
st1 {v0.16b-v3.16b}, [x0], #64
st1 {v0.16b-v3.16b}, [x19], #64
st1 {v7.16b}, [x24] /* return iv */
cond_yield_neon .Lcbcdecrestart
b .LcbcdecloopNx
.Lcbcdec1x:
adds w4, w4, #4
adds w23, w23, #4
beq .Lcbcdecout
.Lcbcdecloop:
ld1 {v1.16b}, [x1], #16 /* get next ct block */
ld1 {v1.16b}, [x20], #16 /* get next ct block */
mov v0.16b, v1.16b /* ...and copy to v0 */
decrypt_block v0, w3, x2, x6, w7
decrypt_block v0, w22, x21, x6, w7
eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
mov v7.16b, v1.16b /* ct is next iv */
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
st1 {v0.16b}, [x19], #16
subs w23, w23, #1
bne .Lcbcdecloop
.Lcbcdecout:
st1 {v7.16b}, [x5] /* return iv */
ldp x29, x30, [sp], #16
st1 {v7.16b}, [x24] /* return iv */
frame_pop
ret
AES_ENDPROC(aes_cbc_decrypt)
@ -176,19 +212,26 @@ AES_ENDPROC(aes_cbc_decrypt)
*/
AES_ENTRY(aes_ctr_encrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
frame_push 6
enc_prepare w3, x2, x6
ld1 {v4.16b}, [x5]
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x5
.Lctrrestart:
enc_prepare w22, x21, x6
ld1 {v4.16b}, [x24]
umov x6, v4.d[1] /* keep swabbed ctr in reg */
rev x6, x6
cmn w6, w4 /* 32 bit overflow? */
bcs .Lctrloop
.LctrloopNx:
subs w4, w4, #4
subs w23, w23, #4
bmi .Lctr1x
cmn w6, #4 /* 32 bit overflow? */
bcs .Lctr1x
ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
dup v7.4s, w6
mov v0.16b, v4.16b
@ -200,25 +243,27 @@ AES_ENTRY(aes_ctr_encrypt)
mov v1.s[3], v8.s[0]
mov v2.s[3], v8.s[1]
mov v3.s[3], v8.s[2]
ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */
bl aes_encrypt_block4x
eor v0.16b, v5.16b, v0.16b
ld1 {v5.16b}, [x1], #16 /* get 1 input block */
ld1 {v5.16b}, [x20], #16 /* get 1 input block */
eor v1.16b, v6.16b, v1.16b
eor v2.16b, v7.16b, v2.16b
eor v3.16b, v5.16b, v3.16b
st1 {v0.16b-v3.16b}, [x0], #64
st1 {v0.16b-v3.16b}, [x19], #64
add x6, x6, #4
rev x7, x6
ins v4.d[1], x7
cbz w4, .Lctrout
cbz w23, .Lctrout
st1 {v4.16b}, [x24] /* return next CTR value */
cond_yield_neon .Lctrrestart
b .LctrloopNx
.Lctr1x:
adds w4, w4, #4
adds w23, w23, #4
beq .Lctrout
.Lctrloop:
mov v0.16b, v4.16b
encrypt_block v0, w3, x2, x8, w7
encrypt_block v0, w22, x21, x8, w7
adds x6, x6, #1 /* increment BE ctr */
rev x7, x6
@ -226,22 +271,22 @@ AES_ENTRY(aes_ctr_encrypt)
bcs .Lctrcarry /* overflow? */
.Lctrcarrydone:
subs w4, w4, #1
subs w23, w23, #1
bmi .Lctrtailblock /* blocks <0 means tail block */
ld1 {v3.16b}, [x1], #16
ld1 {v3.16b}, [x20], #16
eor v3.16b, v0.16b, v3.16b
st1 {v3.16b}, [x0], #16
st1 {v3.16b}, [x19], #16
bne .Lctrloop
.Lctrout:
st1 {v4.16b}, [x5] /* return next CTR value */
ldp x29, x30, [sp], #16
st1 {v4.16b}, [x24] /* return next CTR value */
.Lctrret:
frame_pop
ret
.Lctrtailblock:
st1 {v0.16b}, [x0]
ldp x29, x30, [sp], #16
ret
st1 {v0.16b}, [x19]
b .Lctrret
.Lctrcarry:
umov x7, v4.d[0] /* load upper word of ctr */
@ -274,10 +319,16 @@ CPU_LE( .quad 1, 0x87 )
CPU_BE( .quad 0x87, 1 )
AES_ENTRY(aes_xts_encrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
frame_push 6
ld1 {v4.16b}, [x6]
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x6
ld1 {v4.16b}, [x24]
cbz w7, .Lxtsencnotfirst
enc_prepare w3, x5, x8
@ -286,15 +337,17 @@ AES_ENTRY(aes_xts_encrypt)
ldr q7, .Lxts_mul_x
b .LxtsencNx
.Lxtsencrestart:
ld1 {v4.16b}, [x24]
.Lxtsencnotfirst:
enc_prepare w3, x2, x8
enc_prepare w22, x21, x8
.LxtsencloopNx:
ldr q7, .Lxts_mul_x
next_tweak v4, v4, v7, v8
.LxtsencNx:
subs w4, w4, #4
subs w23, w23, #4
bmi .Lxtsenc1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
next_tweak v5, v4, v7, v8
eor v0.16b, v0.16b, v4.16b
next_tweak v6, v5, v7, v8
@ -307,35 +360,43 @@ AES_ENTRY(aes_xts_encrypt)
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
st1 {v0.16b-v3.16b}, [x0], #64
st1 {v0.16b-v3.16b}, [x19], #64
mov v4.16b, v7.16b
cbz w4, .Lxtsencout
cbz w23, .Lxtsencout
st1 {v4.16b}, [x24]
cond_yield_neon .Lxtsencrestart
b .LxtsencloopNx
.Lxtsenc1x:
adds w4, w4, #4
adds w23, w23, #4
beq .Lxtsencout
.Lxtsencloop:
ld1 {v1.16b}, [x1], #16
ld1 {v1.16b}, [x20], #16
eor v0.16b, v1.16b, v4.16b
encrypt_block v0, w3, x2, x8, w7
encrypt_block v0, w22, x21, x8, w7
eor v0.16b, v0.16b, v4.16b
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
st1 {v0.16b}, [x19], #16
subs w23, w23, #1
beq .Lxtsencout
next_tweak v4, v4, v7, v8
b .Lxtsencloop
.Lxtsencout:
st1 {v4.16b}, [x6]
ldp x29, x30, [sp], #16
st1 {v4.16b}, [x24]
frame_pop
ret
AES_ENDPROC(aes_xts_encrypt)
AES_ENTRY(aes_xts_decrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
frame_push 6
ld1 {v4.16b}, [x6]
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x6
ld1 {v4.16b}, [x24]
cbz w7, .Lxtsdecnotfirst
enc_prepare w3, x5, x8
@ -344,15 +405,17 @@ AES_ENTRY(aes_xts_decrypt)
ldr q7, .Lxts_mul_x
b .LxtsdecNx
.Lxtsdecrestart:
ld1 {v4.16b}, [x24]
.Lxtsdecnotfirst:
dec_prepare w3, x2, x8
dec_prepare w22, x21, x8
.LxtsdecloopNx:
ldr q7, .Lxts_mul_x
next_tweak v4, v4, v7, v8
.LxtsdecNx:
subs w4, w4, #4
subs w23, w23, #4
bmi .Lxtsdec1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
next_tweak v5, v4, v7, v8
eor v0.16b, v0.16b, v4.16b
next_tweak v6, v5, v7, v8
@ -365,26 +428,28 @@ AES_ENTRY(aes_xts_decrypt)
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
st1 {v0.16b-v3.16b}, [x0], #64
st1 {v0.16b-v3.16b}, [x19], #64
mov v4.16b, v7.16b
cbz w4, .Lxtsdecout
cbz w23, .Lxtsdecout
st1 {v4.16b}, [x24]
cond_yield_neon .Lxtsdecrestart
b .LxtsdecloopNx
.Lxtsdec1x:
adds w4, w4, #4
adds w23, w23, #4
beq .Lxtsdecout
.Lxtsdecloop:
ld1 {v1.16b}, [x1], #16
ld1 {v1.16b}, [x20], #16
eor v0.16b, v1.16b, v4.16b
decrypt_block v0, w3, x2, x8, w7
decrypt_block v0, w22, x21, x8, w7
eor v0.16b, v0.16b, v4.16b
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
st1 {v0.16b}, [x19], #16
subs w23, w23, #1
beq .Lxtsdecout
next_tweak v4, v4, v7, v8
b .Lxtsdecloop
.Lxtsdecout:
st1 {v4.16b}, [x6]
ldp x29, x30, [sp], #16
st1 {v4.16b}, [x24]
frame_pop
ret
AES_ENDPROC(aes_xts_decrypt)
@ -393,43 +458,61 @@ AES_ENDPROC(aes_xts_decrypt)
* int blocks, u8 dg[], int enc_before, int enc_after)
*/
AES_ENTRY(aes_mac_update)
ld1 {v0.16b}, [x4] /* get dg */
frame_push 6
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x6
ld1 {v0.16b}, [x23] /* get dg */
enc_prepare w2, x1, x7
cbz w5, .Lmacloop4x
encrypt_block v0, w2, x1, x7, w8
.Lmacloop4x:
subs w3, w3, #4
subs w22, w22, #4
bmi .Lmac1x
ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */
ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
encrypt_block v0, w2, x1, x7, w8
encrypt_block v0, w21, x20, x7, w8
eor v0.16b, v0.16b, v2.16b
encrypt_block v0, w2, x1, x7, w8
encrypt_block v0, w21, x20, x7, w8
eor v0.16b, v0.16b, v3.16b
encrypt_block v0, w2, x1, x7, w8
encrypt_block v0, w21, x20, x7, w8
eor v0.16b, v0.16b, v4.16b
cmp w3, wzr
csinv x5, x6, xzr, eq
cmp w22, wzr
csinv x5, x24, xzr, eq
cbz w5, .Lmacout
encrypt_block v0, w2, x1, x7, w8
encrypt_block v0, w21, x20, x7, w8
st1 {v0.16b}, [x23] /* return dg */
cond_yield_neon .Lmacrestart
b .Lmacloop4x
.Lmac1x:
add w3, w3, #4
add w22, w22, #4
.Lmacloop:
cbz w3, .Lmacout
ld1 {v1.16b}, [x0], #16 /* get next pt block */
cbz w22, .Lmacout
ld1 {v1.16b}, [x19], #16 /* get next pt block */
eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
subs w3, w3, #1
csinv x5, x6, xzr, eq
subs w22, w22, #1
csinv x5, x24, xzr, eq
cbz w5, .Lmacout
encrypt_block v0, w2, x1, x7, w8
.Lmacenc:
encrypt_block v0, w21, x20, x7, w8
b .Lmacloop
.Lmacout:
st1 {v0.16b}, [x4] /* return dg */
st1 {v0.16b}, [x23] /* return dg */
frame_pop
ret
.Lmacrestart:
ld1 {v0.16b}, [x23] /* get dg */
enc_prepare w21, x20, x0
b .Lmacloop4x
AES_ENDPROC(aes_mac_update)

View File

@ -565,54 +565,61 @@ ENDPROC(aesbs_decrypt8)
* int blocks)
*/
.macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
stp x29, x30, [sp, #-16]!
mov x29, sp
frame_push 5
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
99: mov x5, #1
lsl x5, x5, x4
subs w4, w4, #8
csel x4, x4, xzr, pl
lsl x5, x5, x23
subs w23, w23, #8
csel x23, x23, xzr, pl
csel x5, x5, xzr, mi
ld1 {v0.16b}, [x1], #16
ld1 {v0.16b}, [x20], #16
tbnz x5, #1, 0f
ld1 {v1.16b}, [x1], #16
ld1 {v1.16b}, [x20], #16
tbnz x5, #2, 0f
ld1 {v2.16b}, [x1], #16
ld1 {v2.16b}, [x20], #16
tbnz x5, #3, 0f
ld1 {v3.16b}, [x1], #16
ld1 {v3.16b}, [x20], #16
tbnz x5, #4, 0f
ld1 {v4.16b}, [x1], #16
ld1 {v4.16b}, [x20], #16
tbnz x5, #5, 0f
ld1 {v5.16b}, [x1], #16
ld1 {v5.16b}, [x20], #16
tbnz x5, #6, 0f
ld1 {v6.16b}, [x1], #16
ld1 {v6.16b}, [x20], #16
tbnz x5, #7, 0f
ld1 {v7.16b}, [x1], #16
ld1 {v7.16b}, [x20], #16
0: mov bskey, x2
mov rounds, x3
0: mov bskey, x21
mov rounds, x22
bl \do8
st1 {\o0\().16b}, [x0], #16
st1 {\o0\().16b}, [x19], #16
tbnz x5, #1, 1f
st1 {\o1\().16b}, [x0], #16
st1 {\o1\().16b}, [x19], #16
tbnz x5, #2, 1f
st1 {\o2\().16b}, [x0], #16
st1 {\o2\().16b}, [x19], #16
tbnz x5, #3, 1f
st1 {\o3\().16b}, [x0], #16
st1 {\o3\().16b}, [x19], #16
tbnz x5, #4, 1f
st1 {\o4\().16b}, [x0], #16
st1 {\o4\().16b}, [x19], #16
tbnz x5, #5, 1f
st1 {\o5\().16b}, [x0], #16
st1 {\o5\().16b}, [x19], #16
tbnz x5, #6, 1f
st1 {\o6\().16b}, [x0], #16
st1 {\o6\().16b}, [x19], #16
tbnz x5, #7, 1f
st1 {\o7\().16b}, [x0], #16
st1 {\o7\().16b}, [x19], #16
cbnz x4, 99b
cbz x23, 1f
cond_yield_neon
b 99b
1: ldp x29, x30, [sp], #16
1: frame_pop
ret
.endm
@ -632,43 +639,49 @@ ENDPROC(aesbs_ecb_decrypt)
*/
.align 4
ENTRY(aesbs_cbc_decrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
frame_push 6
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x5
99: mov x6, #1
lsl x6, x6, x4
subs w4, w4, #8
csel x4, x4, xzr, pl
lsl x6, x6, x23
subs w23, w23, #8
csel x23, x23, xzr, pl
csel x6, x6, xzr, mi
ld1 {v0.16b}, [x1], #16
ld1 {v0.16b}, [x20], #16
mov v25.16b, v0.16b
tbnz x6, #1, 0f
ld1 {v1.16b}, [x1], #16
ld1 {v1.16b}, [x20], #16
mov v26.16b, v1.16b
tbnz x6, #2, 0f
ld1 {v2.16b}, [x1], #16
ld1 {v2.16b}, [x20], #16
mov v27.16b, v2.16b
tbnz x6, #3, 0f
ld1 {v3.16b}, [x1], #16
ld1 {v3.16b}, [x20], #16
mov v28.16b, v3.16b
tbnz x6, #4, 0f
ld1 {v4.16b}, [x1], #16
ld1 {v4.16b}, [x20], #16
mov v29.16b, v4.16b
tbnz x6, #5, 0f
ld1 {v5.16b}, [x1], #16
ld1 {v5.16b}, [x20], #16
mov v30.16b, v5.16b
tbnz x6, #6, 0f
ld1 {v6.16b}, [x1], #16
ld1 {v6.16b}, [x20], #16
mov v31.16b, v6.16b
tbnz x6, #7, 0f
ld1 {v7.16b}, [x1]
ld1 {v7.16b}, [x20]
0: mov bskey, x2
mov rounds, x3
0: mov bskey, x21
mov rounds, x22
bl aesbs_decrypt8
ld1 {v24.16b}, [x5] // load IV
ld1 {v24.16b}, [x24] // load IV
eor v1.16b, v1.16b, v25.16b
eor v6.16b, v6.16b, v26.16b
@ -679,34 +692,36 @@ ENTRY(aesbs_cbc_decrypt)
eor v3.16b, v3.16b, v30.16b
eor v5.16b, v5.16b, v31.16b
st1 {v0.16b}, [x0], #16
st1 {v0.16b}, [x19], #16
mov v24.16b, v25.16b
tbnz x6, #1, 1f
st1 {v1.16b}, [x0], #16
st1 {v1.16b}, [x19], #16
mov v24.16b, v26.16b
tbnz x6, #2, 1f
st1 {v6.16b}, [x0], #16
st1 {v6.16b}, [x19], #16
mov v24.16b, v27.16b
tbnz x6, #3, 1f
st1 {v4.16b}, [x0], #16
st1 {v4.16b}, [x19], #16
mov v24.16b, v28.16b
tbnz x6, #4, 1f
st1 {v2.16b}, [x0], #16
st1 {v2.16b}, [x19], #16
mov v24.16b, v29.16b
tbnz x6, #5, 1f
st1 {v7.16b}, [x0], #16
st1 {v7.16b}, [x19], #16
mov v24.16b, v30.16b
tbnz x6, #6, 1f
st1 {v3.16b}, [x0], #16
st1 {v3.16b}, [x19], #16
mov v24.16b, v31.16b
tbnz x6, #7, 1f
ld1 {v24.16b}, [x1], #16
st1 {v5.16b}, [x0], #16
1: st1 {v24.16b}, [x5] // store IV
ld1 {v24.16b}, [x20], #16
st1 {v5.16b}, [x19], #16
1: st1 {v24.16b}, [x24] // store IV
cbnz x4, 99b
cbz x23, 2f
cond_yield_neon
b 99b
ldp x29, x30, [sp], #16
2: frame_pop
ret
ENDPROC(aesbs_cbc_decrypt)
@ -731,87 +746,93 @@ CPU_BE( .quad 0x87, 1 )
*/
__xts_crypt8:
mov x6, #1
lsl x6, x6, x4
subs w4, w4, #8
csel x4, x4, xzr, pl
lsl x6, x6, x23
subs w23, w23, #8
csel x23, x23, xzr, pl
csel x6, x6, xzr, mi
ld1 {v0.16b}, [x1], #16
ld1 {v0.16b}, [x20], #16
next_tweak v26, v25, v30, v31
eor v0.16b, v0.16b, v25.16b
tbnz x6, #1, 0f
ld1 {v1.16b}, [x1], #16
ld1 {v1.16b}, [x20], #16
next_tweak v27, v26, v30, v31
eor v1.16b, v1.16b, v26.16b
tbnz x6, #2, 0f
ld1 {v2.16b}, [x1], #16
ld1 {v2.16b}, [x20], #16
next_tweak v28, v27, v30, v31
eor v2.16b, v2.16b, v27.16b
tbnz x6, #3, 0f
ld1 {v3.16b}, [x1], #16
ld1 {v3.16b}, [x20], #16
next_tweak v29, v28, v30, v31
eor v3.16b, v3.16b, v28.16b
tbnz x6, #4, 0f
ld1 {v4.16b}, [x1], #16
str q29, [sp, #16]
ld1 {v4.16b}, [x20], #16
str q29, [sp, #.Lframe_local_offset]
eor v4.16b, v4.16b, v29.16b
next_tweak v29, v29, v30, v31
tbnz x6, #5, 0f
ld1 {v5.16b}, [x1], #16
str q29, [sp, #32]
ld1 {v5.16b}, [x20], #16
str q29, [sp, #.Lframe_local_offset + 16]
eor v5.16b, v5.16b, v29.16b
next_tweak v29, v29, v30, v31
tbnz x6, #6, 0f
ld1 {v6.16b}, [x1], #16
str q29, [sp, #48]
ld1 {v6.16b}, [x20], #16
str q29, [sp, #.Lframe_local_offset + 32]
eor v6.16b, v6.16b, v29.16b
next_tweak v29, v29, v30, v31
tbnz x6, #7, 0f
ld1 {v7.16b}, [x1], #16
str q29, [sp, #64]
ld1 {v7.16b}, [x20], #16
str q29, [sp, #.Lframe_local_offset + 48]
eor v7.16b, v7.16b, v29.16b
next_tweak v29, v29, v30, v31
0: mov bskey, x2
mov rounds, x3
0: mov bskey, x21
mov rounds, x22
br x7
ENDPROC(__xts_crypt8)
.macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
stp x29, x30, [sp, #-80]!
mov x29, sp
frame_push 6, 64
ldr q30, .Lxts_mul_x
ld1 {v25.16b}, [x5]
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x5
0: ldr q30, .Lxts_mul_x
ld1 {v25.16b}, [x24]
99: adr x7, \do8
bl __xts_crypt8
ldp q16, q17, [sp, #16]
ldp q18, q19, [sp, #48]
ldp q16, q17, [sp, #.Lframe_local_offset]
ldp q18, q19, [sp, #.Lframe_local_offset + 32]
eor \o0\().16b, \o0\().16b, v25.16b
eor \o1\().16b, \o1\().16b, v26.16b
eor \o2\().16b, \o2\().16b, v27.16b
eor \o3\().16b, \o3\().16b, v28.16b
st1 {\o0\().16b}, [x0], #16
st1 {\o0\().16b}, [x19], #16
mov v25.16b, v26.16b
tbnz x6, #1, 1f
st1 {\o1\().16b}, [x0], #16
st1 {\o1\().16b}, [x19], #16
mov v25.16b, v27.16b
tbnz x6, #2, 1f
st1 {\o2\().16b}, [x0], #16
st1 {\o2\().16b}, [x19], #16
mov v25.16b, v28.16b
tbnz x6, #3, 1f
st1 {\o3\().16b}, [x0], #16
st1 {\o3\().16b}, [x19], #16
mov v25.16b, v29.16b
tbnz x6, #4, 1f
@ -820,18 +841,22 @@ ENDPROC(__xts_crypt8)
eor \o6\().16b, \o6\().16b, v18.16b
eor \o7\().16b, \o7\().16b, v19.16b
st1 {\o4\().16b}, [x0], #16
st1 {\o4\().16b}, [x19], #16
tbnz x6, #5, 1f
st1 {\o5\().16b}, [x0], #16
st1 {\o5\().16b}, [x19], #16
tbnz x6, #6, 1f
st1 {\o6\().16b}, [x0], #16
st1 {\o6\().16b}, [x19], #16
tbnz x6, #7, 1f
st1 {\o7\().16b}, [x0], #16
st1 {\o7\().16b}, [x19], #16
cbnz x4, 99b
cbz x23, 1f
st1 {v25.16b}, [x24]
1: st1 {v25.16b}, [x5]
ldp x29, x30, [sp], #80
cond_yield_neon 0b
b 99b
1: st1 {v25.16b}, [x24]
frame_pop
ret
.endm
@ -856,24 +881,31 @@ ENDPROC(aesbs_xts_decrypt)
* int rounds, int blocks, u8 iv[], u8 final[])
*/
ENTRY(aesbs_ctr_encrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
frame_push 8
cmp x6, #0
cset x10, ne
add x4, x4, x10 // do one extra block if final
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x5
mov x25, x6
ldp x7, x8, [x5]
ld1 {v0.16b}, [x5]
cmp x25, #0
cset x26, ne
add x23, x23, x26 // do one extra block if final
98: ldp x7, x8, [x24]
ld1 {v0.16b}, [x24]
CPU_LE( rev x7, x7 )
CPU_LE( rev x8, x8 )
adds x8, x8, #1
adc x7, x7, xzr
99: mov x9, #1
lsl x9, x9, x4
subs w4, w4, #8
csel x4, x4, xzr, pl
lsl x9, x9, x23
subs w23, w23, #8
csel x23, x23, xzr, pl
csel x9, x9, xzr, le
tbnz x9, #1, 0f
@ -891,82 +923,85 @@ CPU_LE( rev x8, x8 )
tbnz x9, #7, 0f
next_ctr v7
0: mov bskey, x2
mov rounds, x3
0: mov bskey, x21
mov rounds, x22
bl aesbs_encrypt8
lsr x9, x9, x10 // disregard the extra block
lsr x9, x9, x26 // disregard the extra block
tbnz x9, #0, 0f
ld1 {v8.16b}, [x1], #16
ld1 {v8.16b}, [x20], #16
eor v0.16b, v0.16b, v8.16b
st1 {v0.16b}, [x0], #16
st1 {v0.16b}, [x19], #16
tbnz x9, #1, 1f
ld1 {v9.16b}, [x1], #16
ld1 {v9.16b}, [x20], #16
eor v1.16b, v1.16b, v9.16b
st1 {v1.16b}, [x0], #16
st1 {v1.16b}, [x19], #16
tbnz x9, #2, 2f
ld1 {v10.16b}, [x1], #16
ld1 {v10.16b}, [x20], #16
eor v4.16b, v4.16b, v10.16b
st1 {v4.16b}, [x0], #16
st1 {v4.16b}, [x19], #16
tbnz x9, #3, 3f
ld1 {v11.16b}, [x1], #16
ld1 {v11.16b}, [x20], #16
eor v6.16b, v6.16b, v11.16b
st1 {v6.16b}, [x0], #16
st1 {v6.16b}, [x19], #16
tbnz x9, #4, 4f
ld1 {v12.16b}, [x1], #16
ld1 {v12.16b}, [x20], #16
eor v3.16b, v3.16b, v12.16b
st1 {v3.16b}, [x0], #16
st1 {v3.16b}, [x19], #16
tbnz x9, #5, 5f
ld1 {v13.16b}, [x1], #16
ld1 {v13.16b}, [x20], #16
eor v7.16b, v7.16b, v13.16b
st1 {v7.16b}, [x0], #16
st1 {v7.16b}, [x19], #16
tbnz x9, #6, 6f
ld1 {v14.16b}, [x1], #16
ld1 {v14.16b}, [x20], #16
eor v2.16b, v2.16b, v14.16b
st1 {v2.16b}, [x0], #16
st1 {v2.16b}, [x19], #16
tbnz x9, #7, 7f
ld1 {v15.16b}, [x1], #16
ld1 {v15.16b}, [x20], #16
eor v5.16b, v5.16b, v15.16b
st1 {v5.16b}, [x0], #16
st1 {v5.16b}, [x19], #16
8: next_ctr v0
cbnz x4, 99b
st1 {v0.16b}, [x24]
cbz x23, 0f
0: st1 {v0.16b}, [x5]
ldp x29, x30, [sp], #16
cond_yield_neon 98b
b 99b
0: frame_pop
ret
/*
* If we are handling the tail of the input (x6 != NULL), return the
* final keystream block back to the caller.
*/
1: cbz x6, 8b
st1 {v1.16b}, [x6]
1: cbz x25, 8b
st1 {v1.16b}, [x25]
b 8b
2: cbz x6, 8b
st1 {v4.16b}, [x6]
2: cbz x25, 8b
st1 {v4.16b}, [x25]
b 8b
3: cbz x6, 8b
st1 {v6.16b}, [x6]
3: cbz x25, 8b
st1 {v6.16b}, [x25]
b 8b
4: cbz x6, 8b
st1 {v3.16b}, [x6]
4: cbz x25, 8b
st1 {v3.16b}, [x25]
b 8b
5: cbz x6, 8b
st1 {v7.16b}, [x6]
5: cbz x25, 8b
st1 {v7.16b}, [x25]
b 8b
6: cbz x6, 8b
st1 {v2.16b}, [x6]
6: cbz x25, 8b
st1 {v2.16b}, [x25]
b 8b
7: cbz x6, 8b
st1 {v5.16b}, [x6]
7: cbz x25, 8b
st1 {v5.16b}, [x25]
b 8b
ENDPROC(aesbs_ctr_encrypt)

View File

@ -100,9 +100,10 @@
dCONSTANT .req d0
qCONSTANT .req q0
BUF .req x0
LEN .req x1
CRC .req x2
BUF .req x19
LEN .req x20
CRC .req x21
CONST .req x22
vzr .req v9
@ -123,7 +124,14 @@ ENTRY(crc32_pmull_le)
ENTRY(crc32c_pmull_le)
adr_l x3, .Lcrc32c_constants
0: bic LEN, LEN, #15
0: frame_push 4, 64
mov BUF, x0
mov LEN, x1
mov CRC, x2
mov CONST, x3
bic LEN, LEN, #15
ld1 {v1.16b-v4.16b}, [BUF], #0x40
movi vzr.16b, #0
fmov dCONSTANT, CRC
@ -132,7 +140,7 @@ ENTRY(crc32c_pmull_le)
cmp LEN, #0x40
b.lt less_64
ldr qCONSTANT, [x3]
ldr qCONSTANT, [CONST]
loop_64: /* 64 bytes Full cache line folding */
sub LEN, LEN, #0x40
@ -162,10 +170,21 @@ loop_64: /* 64 bytes Full cache line folding */
eor v4.16b, v4.16b, v8.16b
cmp LEN, #0x40
b.ge loop_64
b.lt less_64
if_will_cond_yield_neon
stp q1, q2, [sp, #.Lframe_local_offset]
stp q3, q4, [sp, #.Lframe_local_offset + 32]
do_cond_yield_neon
ldp q1, q2, [sp, #.Lframe_local_offset]
ldp q3, q4, [sp, #.Lframe_local_offset + 32]
ldr qCONSTANT, [CONST]
movi vzr.16b, #0
endif_yield_neon
b loop_64
less_64: /* Folding cache line into 128bit */
ldr qCONSTANT, [x3, #16]
ldr qCONSTANT, [CONST, #16]
pmull2 v5.1q, v1.2d, vCONSTANT.2d
pmull v1.1q, v1.1d, vCONSTANT.1d
@ -204,8 +223,8 @@ fold_64:
eor v1.16b, v1.16b, v2.16b
/* final 32-bit fold */
ldr dCONSTANT, [x3, #32]
ldr d3, [x3, #40]
ldr dCONSTANT, [CONST, #32]
ldr d3, [CONST, #40]
ext v2.16b, v1.16b, vzr.16b, #4
and v1.16b, v1.16b, v3.16b
@ -213,7 +232,7 @@ fold_64:
eor v1.16b, v1.16b, v2.16b
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
ldr qCONSTANT, [x3, #48]
ldr qCONSTANT, [CONST, #48]
and v2.16b, v1.16b, v3.16b
ext v2.16b, vzr.16b, v2.16b, #8
@ -223,6 +242,7 @@ fold_64:
eor v1.16b, v1.16b, v2.16b
mov w0, v1.s[1]
frame_pop
ret
ENDPROC(crc32_pmull_le)
ENDPROC(crc32c_pmull_le)

View File

@ -74,13 +74,19 @@
.text
.cpu generic+crypto
arg1_low32 .req w0
arg2 .req x1
arg3 .req x2
arg1_low32 .req w19
arg2 .req x20
arg3 .req x21
vzr .req v13
ENTRY(crc_t10dif_pmull)
frame_push 3, 128
mov arg1_low32, w0
mov arg2, x1
mov arg3, x2
movi vzr.16b, #0 // init zero register
// adjust the 16-bit initial_crc value, scale it to 32 bits
@ -175,8 +181,25 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
subs arg3, arg3, #128
// check if there is another 64B in the buffer to be able to fold
b.ge _fold_64_B_loop
b.lt _fold_64_B_end
if_will_cond_yield_neon
stp q0, q1, [sp, #.Lframe_local_offset]
stp q2, q3, [sp, #.Lframe_local_offset + 32]
stp q4, q5, [sp, #.Lframe_local_offset + 64]
stp q6, q7, [sp, #.Lframe_local_offset + 96]
do_cond_yield_neon
ldp q0, q1, [sp, #.Lframe_local_offset]
ldp q2, q3, [sp, #.Lframe_local_offset + 32]
ldp q4, q5, [sp, #.Lframe_local_offset + 64]
ldp q6, q7, [sp, #.Lframe_local_offset + 96]
ldr_l q10, rk3, x8
movi vzr.16b, #0 // init zero register
endif_yield_neon
b _fold_64_B_loop
_fold_64_B_end:
// at this point, the buffer pointer is pointing at the last y Bytes
// of the buffer the 64B of folded data is in 4 of the vector
// registers: v0, v1, v2, v3
@ -304,6 +327,7 @@ _barrett:
_cleanup:
// scale the result back to 16 bits
lsr x0, x0, #16
frame_pop
ret
_less_than_128:

View File

@ -213,22 +213,31 @@
.endm
.macro __pmull_ghash, pn
ld1 {SHASH.2d}, [x3]
ld1 {XL.2d}, [x1]
frame_push 5
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
0: ld1 {SHASH.2d}, [x22]
ld1 {XL.2d}, [x20]
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
eor SHASH2.16b, SHASH2.16b, SHASH.16b
__pmull_pre_\pn
/* do the head block first, if supplied */
cbz x4, 0f
ld1 {T1.2d}, [x4]
b 1f
cbz x23, 1f
ld1 {T1.2d}, [x23]
mov x23, xzr
b 2f
0: ld1 {T1.2d}, [x2], #16
sub w0, w0, #1
1: ld1 {T1.2d}, [x21], #16
sub w19, w19, #1
1: /* multiply XL by SHASH in GF(2^128) */
2: /* multiply XL by SHASH in GF(2^128) */
CPU_LE( rev64 T1.16b, T1.16b )
ext T2.16b, XL.16b, XL.16b, #8
@ -250,9 +259,18 @@ CPU_LE( rev64 T1.16b, T1.16b )
eor T2.16b, T2.16b, XH.16b
eor XL.16b, XL.16b, T2.16b
cbnz w0, 0b
cbz w19, 3f
st1 {XL.2d}, [x1]
if_will_cond_yield_neon
st1 {XL.2d}, [x20]
do_cond_yield_neon
b 0b
endif_yield_neon
b 1b
3: st1 {XL.2d}, [x20]
frame_pop
ret
.endm
@ -304,38 +322,55 @@ ENDPROC(pmull_ghash_update_p8)
.endm
.macro pmull_gcm_do_crypt, enc
ld1 {SHASH.2d}, [x4]
ld1 {XL.2d}, [x1]
ldr x8, [x5, #8] // load lower counter
frame_push 10
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x5
mov x25, x6
mov x26, x7
.if \enc == 1
ldr x27, [sp, #96] // first stacked arg
.endif
ldr x28, [x24, #8] // load lower counter
CPU_LE( rev x28, x28 )
0: mov x0, x25
load_round_keys w26, x0
ld1 {SHASH.2d}, [x23]
ld1 {XL.2d}, [x20]
movi MASK.16b, #0xe1
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
CPU_LE( rev x8, x8 )
shl MASK.2d, MASK.2d, #57
eor SHASH2.16b, SHASH2.16b, SHASH.16b
.if \enc == 1
ld1 {KS.16b}, [x7]
ld1 {KS.16b}, [x27]
.endif
0: ld1 {CTR.8b}, [x5] // load upper counter
ld1 {INP.16b}, [x3], #16
rev x9, x8
add x8, x8, #1
sub w0, w0, #1
1: ld1 {CTR.8b}, [x24] // load upper counter
ld1 {INP.16b}, [x22], #16
rev x9, x28
add x28, x28, #1
sub w19, w19, #1
ins CTR.d[1], x9 // set lower counter
.if \enc == 1
eor INP.16b, INP.16b, KS.16b // encrypt input
st1 {INP.16b}, [x2], #16
st1 {INP.16b}, [x21], #16
.endif
rev64 T1.16b, INP.16b
cmp w6, #12
b.ge 2f // AES-192/256?
cmp w26, #12
b.ge 4f // AES-192/256?
1: enc_round CTR, v21
2: enc_round CTR, v21
ext T2.16b, XL.16b, XL.16b, #8
ext IN1.16b, T1.16b, T1.16b, #8
@ -390,27 +425,39 @@ CPU_LE( rev x8, x8 )
.if \enc == 0
eor INP.16b, INP.16b, KS.16b
st1 {INP.16b}, [x2], #16
st1 {INP.16b}, [x21], #16
.endif
cbnz w0, 0b
CPU_LE( rev x8, x8 )
st1 {XL.2d}, [x1]
str x8, [x5, #8] // store lower counter
cbz w19, 3f
if_will_cond_yield_neon
st1 {XL.2d}, [x20]
.if \enc == 1
st1 {KS.16b}, [x7]
st1 {KS.16b}, [x27]
.endif
do_cond_yield_neon
b 0b
endif_yield_neon
b 1b
3: st1 {XL.2d}, [x20]
.if \enc == 1
st1 {KS.16b}, [x27]
.endif
CPU_LE( rev x28, x28 )
str x28, [x24, #8] // store lower counter
frame_pop
ret
2: b.eq 3f // AES-192?
4: b.eq 5f // AES-192?
enc_round CTR, v17
enc_round CTR, v18
3: enc_round CTR, v19
5: enc_round CTR, v19
enc_round CTR, v20
b 1b
b 2b
.endm
/*

View File

@ -63,11 +63,12 @@ static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
const u8 src[], struct ghash_key const *k,
u8 ctr[], int rounds, u8 ks[]);
u8 ctr[], u32 const rk[], int rounds,
u8 ks[]);
asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
const u8 src[], struct ghash_key const *k,
u8 ctr[], int rounds);
u8 ctr[], u32 const rk[], int rounds);
asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
u32 const rk[], int rounds);
@ -368,26 +369,29 @@ static int gcm_encrypt(struct aead_request *req)
pmull_gcm_encrypt_block(ks, iv, NULL,
num_rounds(&ctx->aes_key));
put_unaligned_be32(3, iv + GCM_IV_SIZE);
kernel_neon_end();
err = skcipher_walk_aead_encrypt(&walk, req, true);
err = skcipher_walk_aead_encrypt(&walk, req, false);
while (walk.nbytes >= AES_BLOCK_SIZE) {
int blocks = walk.nbytes / AES_BLOCK_SIZE;
kernel_neon_begin();
pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
walk.src.virt.addr, &ctx->ghash_key,
iv, num_rounds(&ctx->aes_key), ks);
iv, ctx->aes_key.key_enc,
num_rounds(&ctx->aes_key), ks);
kernel_neon_end();
err = skcipher_walk_done(&walk,
walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
} else {
__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
num_rounds(&ctx->aes_key));
put_unaligned_be32(2, iv + GCM_IV_SIZE);
err = skcipher_walk_aead_encrypt(&walk, req, true);
err = skcipher_walk_aead_encrypt(&walk, req, false);
while (walk.nbytes >= AES_BLOCK_SIZE) {
int blocks = walk.nbytes / AES_BLOCK_SIZE;
@ -467,15 +471,19 @@ static int gcm_decrypt(struct aead_request *req)
pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
num_rounds(&ctx->aes_key));
put_unaligned_be32(2, iv + GCM_IV_SIZE);
kernel_neon_end();
err = skcipher_walk_aead_decrypt(&walk, req, true);
err = skcipher_walk_aead_decrypt(&walk, req, false);
while (walk.nbytes >= AES_BLOCK_SIZE) {
int blocks = walk.nbytes / AES_BLOCK_SIZE;
kernel_neon_begin();
pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
walk.src.virt.addr, &ctx->ghash_key,
iv, num_rounds(&ctx->aes_key));
iv, ctx->aes_key.key_enc,
num_rounds(&ctx->aes_key));
kernel_neon_end();
err = skcipher_walk_done(&walk,
walk.nbytes % AES_BLOCK_SIZE);
@ -483,14 +491,12 @@ static int gcm_decrypt(struct aead_request *req)
if (walk.nbytes)
pmull_gcm_encrypt_block(iv, iv, NULL,
num_rounds(&ctx->aes_key));
kernel_neon_end();
} else {
__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
num_rounds(&ctx->aes_key));
put_unaligned_be32(2, iv + GCM_IV_SIZE);
err = skcipher_walk_aead_decrypt(&walk, req, true);
err = skcipher_walk_aead_decrypt(&walk, req, false);
while (walk.nbytes >= AES_BLOCK_SIZE) {
int blocks = walk.nbytes / AES_BLOCK_SIZE;

View File

@ -69,30 +69,36 @@
* int blocks)
*/
ENTRY(sha1_ce_transform)
frame_push 3
mov x19, x0
mov x20, x1
mov x21, x2
/* load round constants */
loadrc k0.4s, 0x5a827999, w6
0: loadrc k0.4s, 0x5a827999, w6
loadrc k1.4s, 0x6ed9eba1, w6
loadrc k2.4s, 0x8f1bbcdc, w6
loadrc k3.4s, 0xca62c1d6, w6
/* load state */
ld1 {dgav.4s}, [x0]
ldr dgb, [x0, #16]
ld1 {dgav.4s}, [x19]
ldr dgb, [x19, #16]
/* load sha1_ce_state::finalize */
ldr_l w4, sha1_ce_offsetof_finalize, x4
ldr w4, [x0, x4]
ldr w4, [x19, x4]
/* load input */
0: ld1 {v8.4s-v11.4s}, [x1], #64
sub w2, w2, #1
1: ld1 {v8.4s-v11.4s}, [x20], #64
sub w21, w21, #1
CPU_LE( rev32 v8.16b, v8.16b )
CPU_LE( rev32 v9.16b, v9.16b )
CPU_LE( rev32 v10.16b, v10.16b )
CPU_LE( rev32 v11.16b, v11.16b )
1: add t0.4s, v8.4s, k0.4s
2: add t0.4s, v8.4s, k0.4s
mov dg0v.16b, dgav.16b
add_update c, ev, k0, 8, 9, 10, 11, dgb
@ -123,16 +129,25 @@ CPU_LE( rev32 v11.16b, v11.16b )
add dgbv.2s, dgbv.2s, dg1v.2s
add dgav.4s, dgav.4s, dg0v.4s
cbnz w2, 0b
cbz w21, 3f
if_will_cond_yield_neon
st1 {dgav.4s}, [x19]
str dgb, [x19, #16]
do_cond_yield_neon
b 0b
endif_yield_neon
b 1b
/*
* Final block: add padding and total bit count.
* Skip if the input size was not a round multiple of the block size,
* the padding is handled by the C code in that case.
*/
cbz x4, 3f
3: cbz x4, 4f
ldr_l w4, sha1_ce_offsetof_count, x4
ldr x4, [x0, x4]
ldr x4, [x19, x4]
movi v9.2d, #0
mov x8, #0x80000000
movi v10.2d, #0
@ -141,10 +156,11 @@ CPU_LE( rev32 v11.16b, v11.16b )
mov x4, #0
mov v11.d[0], xzr
mov v11.d[1], x7
b 1b
b 2b
/* store new state */
3: st1 {dgav.4s}, [x0]
str dgb, [x0, #16]
4: st1 {dgav.4s}, [x19]
str dgb, [x19, #16]
frame_pop
ret
ENDPROC(sha1_ce_transform)

View File

@ -79,30 +79,36 @@
*/
.text
ENTRY(sha2_ce_transform)
frame_push 3
mov x19, x0
mov x20, x1
mov x21, x2
/* load round constants */
adr_l x8, .Lsha2_rcon
0: adr_l x8, .Lsha2_rcon
ld1 { v0.4s- v3.4s}, [x8], #64
ld1 { v4.4s- v7.4s}, [x8], #64
ld1 { v8.4s-v11.4s}, [x8], #64
ld1 {v12.4s-v15.4s}, [x8]
/* load state */
ld1 {dgav.4s, dgbv.4s}, [x0]
ld1 {dgav.4s, dgbv.4s}, [x19]
/* load sha256_ce_state::finalize */
ldr_l w4, sha256_ce_offsetof_finalize, x4
ldr w4, [x0, x4]
ldr w4, [x19, x4]
/* load input */
0: ld1 {v16.4s-v19.4s}, [x1], #64
sub w2, w2, #1
1: ld1 {v16.4s-v19.4s}, [x20], #64
sub w21, w21, #1
CPU_LE( rev32 v16.16b, v16.16b )
CPU_LE( rev32 v17.16b, v17.16b )
CPU_LE( rev32 v18.16b, v18.16b )
CPU_LE( rev32 v19.16b, v19.16b )
1: add t0.4s, v16.4s, v0.4s
2: add t0.4s, v16.4s, v0.4s
mov dg0v.16b, dgav.16b
mov dg1v.16b, dgbv.16b
@ -131,16 +137,24 @@ CPU_LE( rev32 v19.16b, v19.16b )
add dgbv.4s, dgbv.4s, dg1v.4s
/* handled all input blocks? */
cbnz w2, 0b
cbz w21, 3f
if_will_cond_yield_neon
st1 {dgav.4s, dgbv.4s}, [x19]
do_cond_yield_neon
b 0b
endif_yield_neon
b 1b
/*
* Final block: add padding and total bit count.
* Skip if the input size was not a round multiple of the block size,
* the padding is handled by the C code in that case.
*/
cbz x4, 3f
3: cbz x4, 4f
ldr_l w4, sha256_ce_offsetof_count, x4
ldr x4, [x0, x4]
ldr x4, [x19, x4]
movi v17.2d, #0
mov x8, #0x80000000
movi v18.2d, #0
@ -149,9 +163,10 @@ CPU_LE( rev32 v19.16b, v19.16b )
mov x4, #0
mov v19.d[0], xzr
mov v19.d[1], x7
b 1b
b 2b
/* store new state */
3: st1 {dgav.4s, dgbv.4s}, [x0]
4: st1 {dgav.4s, dgbv.4s}, [x19]
frame_pop
ret
ENDPROC(sha2_ce_transform)

View File

@ -1,3 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
// This code is taken from the OpenSSL project but the author (Andy Polyakov)
// has relicensed it under the GPLv2. Therefore this program is free software;
// you can redistribute it and/or modify it under the terms of the GNU General
// Public License version 2 as published by the Free Software Foundation.
//
// The original headers, including the original license headers, are
// included below for completeness.
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the OpenSSL license (the "License"). You may not use
@ -10,8 +20,6 @@
// project. The module is, however, dual licensed under OpenSSL and
// CRYPTOGAMS licenses depending on where you obtain it. For further
// details see http://www.openssl.org/~appro/cryptogams/.
//
// Permission to use under GPLv2 terms is granted.
// ====================================================================
//
// SHA256/512 for ARMv8.

View File

@ -41,9 +41,16 @@
*/
.text
ENTRY(sha3_ce_transform)
/* load state */
add x8, x0, #32
ld1 { v0.1d- v3.1d}, [x0]
frame_push 4
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
0: /* load state */
add x8, x19, #32
ld1 { v0.1d- v3.1d}, [x19]
ld1 { v4.1d- v7.1d}, [x8], #32
ld1 { v8.1d-v11.1d}, [x8], #32
ld1 {v12.1d-v15.1d}, [x8], #32
@ -51,13 +58,13 @@ ENTRY(sha3_ce_transform)
ld1 {v20.1d-v23.1d}, [x8], #32
ld1 {v24.1d}, [x8]
0: sub w2, w2, #1
1: sub w21, w21, #1
mov w8, #24
adr_l x9, .Lsha3_rcon
/* load input */
ld1 {v25.8b-v28.8b}, [x1], #32
ld1 {v29.8b-v31.8b}, [x1], #24
ld1 {v25.8b-v28.8b}, [x20], #32
ld1 {v29.8b-v31.8b}, [x20], #24
eor v0.8b, v0.8b, v25.8b
eor v1.8b, v1.8b, v26.8b
eor v2.8b, v2.8b, v27.8b
@ -66,10 +73,10 @@ ENTRY(sha3_ce_transform)
eor v5.8b, v5.8b, v30.8b
eor v6.8b, v6.8b, v31.8b
tbnz x3, #6, 2f // SHA3-512
tbnz x22, #6, 3f // SHA3-512
ld1 {v25.8b-v28.8b}, [x1], #32
ld1 {v29.8b-v30.8b}, [x1], #16
ld1 {v25.8b-v28.8b}, [x20], #32
ld1 {v29.8b-v30.8b}, [x20], #16
eor v7.8b, v7.8b, v25.8b
eor v8.8b, v8.8b, v26.8b
eor v9.8b, v9.8b, v27.8b
@ -77,34 +84,34 @@ ENTRY(sha3_ce_transform)
eor v11.8b, v11.8b, v29.8b
eor v12.8b, v12.8b, v30.8b
tbnz x3, #4, 1f // SHA3-384 or SHA3-224
tbnz x22, #4, 2f // SHA3-384 or SHA3-224
// SHA3-256
ld1 {v25.8b-v28.8b}, [x1], #32
ld1 {v25.8b-v28.8b}, [x20], #32
eor v13.8b, v13.8b, v25.8b
eor v14.8b, v14.8b, v26.8b
eor v15.8b, v15.8b, v27.8b
eor v16.8b, v16.8b, v28.8b
b 3f
b 4f
1: tbz x3, #2, 3f // bit 2 cleared? SHA-384
2: tbz x22, #2, 4f // bit 2 cleared? SHA-384
// SHA3-224
ld1 {v25.8b-v28.8b}, [x1], #32
ld1 {v29.8b}, [x1], #8
ld1 {v25.8b-v28.8b}, [x20], #32
ld1 {v29.8b}, [x20], #8
eor v13.8b, v13.8b, v25.8b
eor v14.8b, v14.8b, v26.8b
eor v15.8b, v15.8b, v27.8b
eor v16.8b, v16.8b, v28.8b
eor v17.8b, v17.8b, v29.8b
b 3f
b 4f
// SHA3-512
2: ld1 {v25.8b-v26.8b}, [x1], #16
3: ld1 {v25.8b-v26.8b}, [x20], #16
eor v7.8b, v7.8b, v25.8b
eor v8.8b, v8.8b, v26.8b
3: sub w8, w8, #1
4: sub w8, w8, #1
eor3 v29.16b, v4.16b, v9.16b, v14.16b
eor3 v26.16b, v1.16b, v6.16b, v11.16b
@ -183,17 +190,33 @@ ENTRY(sha3_ce_transform)
eor v0.16b, v0.16b, v31.16b
cbnz w8, 3b
cbnz w2, 0b
cbnz w8, 4b
cbz w21, 5f
if_will_cond_yield_neon
add x8, x19, #32
st1 { v0.1d- v3.1d}, [x19]
st1 { v4.1d- v7.1d}, [x8], #32
st1 { v8.1d-v11.1d}, [x8], #32
st1 {v12.1d-v15.1d}, [x8], #32
st1 {v16.1d-v19.1d}, [x8], #32
st1 {v20.1d-v23.1d}, [x8], #32
st1 {v24.1d}, [x8]
do_cond_yield_neon
b 0b
endif_yield_neon
b 1b
/* save state */
st1 { v0.1d- v3.1d}, [x0], #32
st1 { v4.1d- v7.1d}, [x0], #32
st1 { v8.1d-v11.1d}, [x0], #32
st1 {v12.1d-v15.1d}, [x0], #32
st1 {v16.1d-v19.1d}, [x0], #32
st1 {v20.1d-v23.1d}, [x0], #32
st1 {v24.1d}, [x0]
5: st1 { v0.1d- v3.1d}, [x19], #32
st1 { v4.1d- v7.1d}, [x19], #32
st1 { v8.1d-v11.1d}, [x19], #32
st1 {v12.1d-v15.1d}, [x19], #32
st1 {v16.1d-v19.1d}, [x19], #32
st1 {v20.1d-v23.1d}, [x19], #32
st1 {v24.1d}, [x19]
frame_pop
ret
ENDPROC(sha3_ce_transform)

View File

@ -1,4 +1,14 @@
#! /usr/bin/env perl
# SPDX-License-Identifier: GPL-2.0
# This code is taken from the OpenSSL project but the author (Andy Polyakov)
# has relicensed it under the GPLv2. Therefore this program is free software;
# you can redistribute it and/or modify it under the terms of the GNU General
# Public License version 2 as published by the Free Software Foundation.
#
# The original headers, including the original license headers, are
# included below for completeness.
# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
@ -11,8 +21,6 @@
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
#
# Permission to use under GPLv2 terms is granted.
# ====================================================================
#
# SHA256/512 for ARMv8.

View File

@ -107,17 +107,23 @@
*/
.text
ENTRY(sha512_ce_transform)
frame_push 3
mov x19, x0
mov x20, x1
mov x21, x2
/* load state */
ld1 {v8.2d-v11.2d}, [x0]
0: ld1 {v8.2d-v11.2d}, [x19]
/* load first 4 round constants */
adr_l x3, .Lsha512_rcon
ld1 {v20.2d-v23.2d}, [x3], #64
/* load input */
0: ld1 {v12.2d-v15.2d}, [x1], #64
ld1 {v16.2d-v19.2d}, [x1], #64
sub w2, w2, #1
1: ld1 {v12.2d-v15.2d}, [x20], #64
ld1 {v16.2d-v19.2d}, [x20], #64
sub w21, w21, #1
CPU_LE( rev64 v12.16b, v12.16b )
CPU_LE( rev64 v13.16b, v13.16b )
@ -196,9 +202,18 @@ CPU_LE( rev64 v19.16b, v19.16b )
add v11.2d, v11.2d, v3.2d
/* handled all input blocks? */
cbnz w2, 0b
cbz w21, 3f
if_will_cond_yield_neon
st1 {v8.2d-v11.2d}, [x19]
do_cond_yield_neon
b 0b
endif_yield_neon
b 1b
/* store new state */
3: st1 {v8.2d-v11.2d}, [x0]
3: st1 {v8.2d-v11.2d}, [x19]
frame_pop
ret
ENDPROC(sha512_ce_transform)

View File

@ -1,3 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
// This code is taken from the OpenSSL project but the author (Andy Polyakov)
// has relicensed it under the GPLv2. Therefore this program is free software;
// you can redistribute it and/or modify it under the terms of the GNU General
// Public License version 2 as published by the Free Software Foundation.
//
// The original headers, including the original license headers, are
// included below for completeness.
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the OpenSSL license (the "License"). You may not use
@ -10,8 +20,6 @@
// project. The module is, however, dual licensed under OpenSSL and
// CRYPTOGAMS licenses depending on where you obtain it. For further
// details see http://www.openssl.org/~appro/cryptogams/.
//
// Permission to use under GPLv2 terms is granted.
// ====================================================================
//
// SHA256/512 for ARMv8.

View File

@ -0,0 +1,36 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/linkage.h>
#include <asm/assembler.h>
.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8
.set .Lv\b\().4s, \b
.endr
.macro sm4e, rd, rn
.inst 0xcec08400 | .L\rd | (.L\rn << 5)
.endm
/*
* void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in);
*/
.text
ENTRY(sm4_ce_do_crypt)
ld1 {v8.4s}, [x2]
ld1 {v0.4s-v3.4s}, [x0], #64
CPU_LE( rev32 v8.16b, v8.16b )
ld1 {v4.4s-v7.4s}, [x0]
sm4e v8.4s, v0.4s
sm4e v8.4s, v1.4s
sm4e v8.4s, v2.4s
sm4e v8.4s, v3.4s
sm4e v8.4s, v4.4s
sm4e v8.4s, v5.4s
sm4e v8.4s, v6.4s
sm4e v8.4s, v7.4s
rev64 v8.4s, v8.4s
ext v8.16b, v8.16b, v8.16b, #8
CPU_LE( rev32 v8.16b, v8.16b )
st1 {v8.4s}, [x1]
ret
ENDPROC(sm4_ce_do_crypt)

View File

@ -0,0 +1,73 @@
// SPDX-License-Identifier: GPL-2.0
#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/sm4.h>
#include <linux/module.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/types.h>
MODULE_ALIAS_CRYPTO("sm4");
MODULE_ALIAS_CRYPTO("sm4-ce");
MODULE_DESCRIPTION("SM4 symmetric cipher using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in);
static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
if (!may_use_simd()) {
crypto_sm4_encrypt(tfm, out, in);
} else {
kernel_neon_begin();
sm4_ce_do_crypt(ctx->rkey_enc, out, in);
kernel_neon_end();
}
}
static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
if (!may_use_simd()) {
crypto_sm4_decrypt(tfm, out, in);
} else {
kernel_neon_begin();
sm4_ce_do_crypt(ctx->rkey_dec, out, in);
kernel_neon_end();
}
}
static struct crypto_alg sm4_ce_alg = {
.cra_name = "sm4",
.cra_driver_name = "sm4-ce",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_sm4_ctx),
.cra_module = THIS_MODULE,
.cra_u.cipher = {
.cia_min_keysize = SM4_KEY_SIZE,
.cia_max_keysize = SM4_KEY_SIZE,
.cia_setkey = crypto_sm4_set_key,
.cia_encrypt = sm4_ce_encrypt,
.cia_decrypt = sm4_ce_decrypt
}
};
static int __init sm4_ce_mod_init(void)
{
return crypto_register_alg(&sm4_ce_alg);
}
static void __exit sm4_ce_mod_fini(void)
{
crypto_unregister_alg(&sm4_ce_alg);
}
module_cpu_feature_match(SM3, sm4_ce_mod_init);
module_exit(sm4_ce_mod_fini);

View File

@ -15,7 +15,6 @@ obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
@ -24,7 +23,6 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
@ -38,6 +36,16 @@ obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o
obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
obj-$(CONFIG_CRYPTO_AEGIS128L_AESNI_SSE2) += aegis128l-aesni.o
obj-$(CONFIG_CRYPTO_AEGIS256_AESNI_SSE2) += aegis256-aesni.o
obj-$(CONFIG_CRYPTO_MORUS640_GLUE) += morus640_glue.o
obj-$(CONFIG_CRYPTO_MORUS1280_GLUE) += morus1280_glue.o
obj-$(CONFIG_CRYPTO_MORUS640_SSE2) += morus640-sse2.o
obj-$(CONFIG_CRYPTO_MORUS1280_SSE2) += morus1280-sse2.o
# These modules require assembler to support AVX.
ifeq ($(avx_supported),yes)
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += \
@ -55,11 +63,12 @@ ifeq ($(avx2_supported),yes)
obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/
obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/
obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/
obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o
endif
aes-i586-y := aes-i586-asm_32.o aes_glue.o
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
@ -68,10 +77,16 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
aegis128l-aesni-y := aegis128l-aesni-asm.o aegis128l-aesni-glue.o
aegis256-aesni-y := aegis256-aesni-asm.o aegis256-aesni-glue.o
morus640-sse2-y := morus640-sse2-asm.o morus640-sse2-glue.o
morus1280-sse2-y := morus1280-sse2-asm.o morus1280-sse2-glue.o
ifeq ($(avx_supported),yes)
camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
camellia_aesni_avx_glue.o
@ -87,6 +102,8 @@ ifeq ($(avx2_supported),yes)
camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
chacha20-x86_64-y += chacha20-avx2-x86_64.o
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
endif
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o

View File

@ -0,0 +1,749 @@
/*
* AES-NI + SSE2 implementation of AEGIS-128
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/frame.h>
#define STATE0 %xmm0
#define STATE1 %xmm1
#define STATE2 %xmm2
#define STATE3 %xmm3
#define STATE4 %xmm4
#define KEY %xmm5
#define MSG %xmm5
#define T0 %xmm6
#define T1 %xmm7
#define STATEP %rdi
#define LEN %rsi
#define SRC %rdx
#define DST %rcx
.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
.align 16
.Laegis128_const_0:
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
.Laegis128_const_1:
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
.align 16
.Laegis128_counter:
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
.text
/*
* aegis128_update
* input:
* STATE[0-4] - input state
* output:
* STATE[0-4] - output state (shifted positions)
* changed:
* T0
*/
.macro aegis128_update
movdqa STATE4, T0
aesenc STATE0, STATE4
aesenc STATE1, STATE0
aesenc STATE2, STATE1
aesenc STATE3, STATE2
aesenc T0, STATE3
.endm
/*
* __load_partial: internal ABI
* input:
* LEN - bytes
* SRC - src
* output:
* MSG - message block
* changed:
* T0
* %r8
* %r9
*/
__load_partial:
xor %r9, %r9
pxor MSG, MSG
mov LEN, %r8
and $0x1, %r8
jz .Lld_partial_1
mov LEN, %r8
and $0x1E, %r8
add SRC, %r8
mov (%r8), %r9b
.Lld_partial_1:
mov LEN, %r8
and $0x2, %r8
jz .Lld_partial_2
mov LEN, %r8
and $0x1C, %r8
add SRC, %r8
shl $0x10, %r9
mov (%r8), %r9w
.Lld_partial_2:
mov LEN, %r8
and $0x4, %r8
jz .Lld_partial_4
mov LEN, %r8
and $0x18, %r8
add SRC, %r8
shl $32, %r9
mov (%r8), %r8d
xor %r8, %r9
.Lld_partial_4:
movq %r9, MSG
mov LEN, %r8
and $0x8, %r8
jz .Lld_partial_8
mov LEN, %r8
and $0x10, %r8
add SRC, %r8
pslldq $8, MSG
movq (%r8), T0
pxor T0, MSG
.Lld_partial_8:
ret
ENDPROC(__load_partial)
/*
* __store_partial: internal ABI
* input:
* LEN - bytes
* DST - dst
* output:
* T0 - message block
* changed:
* %r8
* %r9
* %r10
*/
__store_partial:
mov LEN, %r8
mov DST, %r9
movq T0, %r10
cmp $8, %r8
jl .Lst_partial_8
mov %r10, (%r9)
psrldq $8, T0
movq T0, %r10
sub $8, %r8
add $8, %r9
.Lst_partial_8:
cmp $4, %r8
jl .Lst_partial_4
mov %r10d, (%r9)
shr $32, %r10
sub $4, %r8
add $4, %r9
.Lst_partial_4:
cmp $2, %r8
jl .Lst_partial_2
mov %r10w, (%r9)
shr $0x10, %r10
sub $2, %r8
add $2, %r9
.Lst_partial_2:
cmp $1, %r8
jl .Lst_partial_1
mov %r10b, (%r9)
.Lst_partial_1:
ret
ENDPROC(__store_partial)
/*
* void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
*/
ENTRY(crypto_aegis128_aesni_init)
FRAME_BEGIN
/* load IV: */
movdqu (%rdx), T1
/* load key: */
movdqa (%rsi), KEY
pxor KEY, T1
movdqa T1, STATE0
movdqa KEY, STATE3
movdqa KEY, STATE4
/* load the constants: */
movdqa .Laegis128_const_0, STATE2
movdqa .Laegis128_const_1, STATE1
pxor STATE2, STATE3
pxor STATE1, STATE4
/* update 10 times with KEY / KEY xor IV: */
aegis128_update; pxor KEY, STATE4
aegis128_update; pxor T1, STATE3
aegis128_update; pxor KEY, STATE2
aegis128_update; pxor T1, STATE1
aegis128_update; pxor KEY, STATE0
aegis128_update; pxor T1, STATE4
aegis128_update; pxor KEY, STATE3
aegis128_update; pxor T1, STATE2
aegis128_update; pxor KEY, STATE1
aegis128_update; pxor T1, STATE0
/* store the state: */
movdqu STATE0, 0x00(STATEP)
movdqu STATE1, 0x10(STATEP)
movdqu STATE2, 0x20(STATEP)
movdqu STATE3, 0x30(STATEP)
movdqu STATE4, 0x40(STATEP)
FRAME_END
ret
ENDPROC(crypto_aegis128_aesni_init)
/*
* void crypto_aegis128_aesni_ad(void *state, unsigned int length,
* const void *data);
*/
ENTRY(crypto_aegis128_aesni_ad)
FRAME_BEGIN
cmp $0x10, LEN
jb .Lad_out
/* load the state: */
movdqu 0x00(STATEP), STATE0
movdqu 0x10(STATEP), STATE1
movdqu 0x20(STATEP), STATE2
movdqu 0x30(STATEP), STATE3
movdqu 0x40(STATEP), STATE4
mov SRC, %r8
and $0xF, %r8
jnz .Lad_u_loop
.align 8
.Lad_a_loop:
movdqa 0x00(SRC), MSG
aegis128_update
pxor MSG, STATE4
sub $0x10, LEN
cmp $0x10, LEN
jl .Lad_out_1
movdqa 0x10(SRC), MSG
aegis128_update
pxor MSG, STATE3
sub $0x10, LEN
cmp $0x10, LEN
jl .Lad_out_2
movdqa 0x20(SRC), MSG
aegis128_update
pxor MSG, STATE2
sub $0x10, LEN
cmp $0x10, LEN
jl .Lad_out_3
movdqa 0x30(SRC), MSG
aegis128_update
pxor MSG, STATE1
sub $0x10, LEN
cmp $0x10, LEN
jl .Lad_out_4
movdqa 0x40(SRC), MSG
aegis128_update
pxor MSG, STATE0
sub $0x10, LEN
cmp $0x10, LEN
jl .Lad_out_0
add $0x50, SRC
jmp .Lad_a_loop
.align 8
.Lad_u_loop:
movdqu 0x00(SRC), MSG
aegis128_update
pxor MSG, STATE4
sub $0x10, LEN
cmp $0x10, LEN
jl .Lad_out_1
movdqu 0x10(SRC), MSG
aegis128_update
pxor MSG, STATE3
sub $0x10, LEN
cmp $0x10, LEN
jl .Lad_out_2
movdqu 0x20(SRC), MSG
aegis128_update
pxor MSG, STATE2
sub $0x10, LEN
cmp $0x10, LEN
jl .Lad_out_3
movdqu 0x30(SRC), MSG
aegis128_update
pxor MSG, STATE1
sub $0x10, LEN
cmp $0x10, LEN
jl .Lad_out_4
movdqu 0x40(SRC), MSG
aegis128_update
pxor MSG, STATE0
sub $0x10, LEN
cmp $0x10, LEN
jl .Lad_out_0
add $0x50, SRC
jmp .Lad_u_loop
/* store the state: */
.Lad_out_0:
movdqu STATE0, 0x00(STATEP)
movdqu STATE1, 0x10(STATEP)
movdqu STATE2, 0x20(STATEP)
movdqu STATE3, 0x30(STATEP)
movdqu STATE4, 0x40(STATEP)
FRAME_END
ret
.Lad_out_1:
movdqu STATE4, 0x00(STATEP)
movdqu STATE0, 0x10(STATEP)
movdqu STATE1, 0x20(STATEP)
movdqu STATE2, 0x30(STATEP)
movdqu STATE3, 0x40(STATEP)
FRAME_END
ret
.Lad_out_2:
movdqu STATE3, 0x00(STATEP)
movdqu STATE4, 0x10(STATEP)
movdqu STATE0, 0x20(STATEP)
movdqu STATE1, 0x30(STATEP)
movdqu STATE2, 0x40(STATEP)
FRAME_END
ret
.Lad_out_3:
movdqu STATE2, 0x00(STATEP)
movdqu STATE3, 0x10(STATEP)
movdqu STATE4, 0x20(STATEP)
movdqu STATE0, 0x30(STATEP)
movdqu STATE1, 0x40(STATEP)
FRAME_END
ret
.Lad_out_4:
movdqu STATE1, 0x00(STATEP)
movdqu STATE2, 0x10(STATEP)
movdqu STATE3, 0x20(STATEP)
movdqu STATE4, 0x30(STATEP)
movdqu STATE0, 0x40(STATEP)
FRAME_END
ret
.Lad_out:
FRAME_END
ret
ENDPROC(crypto_aegis128_aesni_ad)
.macro encrypt_block a s0 s1 s2 s3 s4 i
movdq\a (\i * 0x10)(SRC), MSG
movdqa MSG, T0
pxor \s1, T0
pxor \s4, T0
movdqa \s2, T1
pand \s3, T1
pxor T1, T0
movdq\a T0, (\i * 0x10)(DST)
aegis128_update
pxor MSG, \s4
sub $0x10, LEN
cmp $0x10, LEN
jl .Lenc_out_\i
.endm
/*
* void crypto_aegis128_aesni_enc(void *state, unsigned int length,
* const void *src, void *dst);
*/
ENTRY(crypto_aegis128_aesni_enc)
FRAME_BEGIN
cmp $0x10, LEN
jb .Lenc_out
/* load the state: */
movdqu 0x00(STATEP), STATE0
movdqu 0x10(STATEP), STATE1
movdqu 0x20(STATEP), STATE2
movdqu 0x30(STATEP), STATE3
movdqu 0x40(STATEP), STATE4
mov SRC, %r8
or DST, %r8
and $0xF, %r8
jnz .Lenc_u_loop
.align 8
.Lenc_a_loop:
encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
add $0x50, SRC
add $0x50, DST
jmp .Lenc_a_loop
.align 8
.Lenc_u_loop:
encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
add $0x50, SRC
add $0x50, DST
jmp .Lenc_u_loop
/* store the state: */
.Lenc_out_0:
movdqu STATE4, 0x00(STATEP)
movdqu STATE0, 0x10(STATEP)
movdqu STATE1, 0x20(STATEP)
movdqu STATE2, 0x30(STATEP)
movdqu STATE3, 0x40(STATEP)
FRAME_END
ret
.Lenc_out_1:
movdqu STATE3, 0x00(STATEP)
movdqu STATE4, 0x10(STATEP)
movdqu STATE0, 0x20(STATEP)
movdqu STATE1, 0x30(STATEP)
movdqu STATE2, 0x40(STATEP)
FRAME_END
ret
.Lenc_out_2:
movdqu STATE2, 0x00(STATEP)
movdqu STATE3, 0x10(STATEP)
movdqu STATE4, 0x20(STATEP)
movdqu STATE0, 0x30(STATEP)
movdqu STATE1, 0x40(STATEP)
FRAME_END
ret
.Lenc_out_3:
movdqu STATE1, 0x00(STATEP)
movdqu STATE2, 0x10(STATEP)
movdqu STATE3, 0x20(STATEP)
movdqu STATE4, 0x30(STATEP)
movdqu STATE0, 0x40(STATEP)
FRAME_END
ret
.Lenc_out_4:
movdqu STATE0, 0x00(STATEP)
movdqu STATE1, 0x10(STATEP)
movdqu STATE2, 0x20(STATEP)
movdqu STATE3, 0x30(STATEP)
movdqu STATE4, 0x40(STATEP)
FRAME_END
ret
.Lenc_out:
FRAME_END
ret
ENDPROC(crypto_aegis128_aesni_enc)
/*
* void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
* const void *src, void *dst);
*/
ENTRY(crypto_aegis128_aesni_enc_tail)
FRAME_BEGIN
/* load the state: */
movdqu 0x00(STATEP), STATE0
movdqu 0x10(STATEP), STATE1
movdqu 0x20(STATEP), STATE2
movdqu 0x30(STATEP), STATE3
movdqu 0x40(STATEP), STATE4
/* encrypt message: */
call __load_partial
movdqa MSG, T0
pxor STATE1, T0
pxor STATE4, T0
movdqa STATE2, T1
pand STATE3, T1
pxor T1, T0
call __store_partial
aegis128_update
pxor MSG, STATE4
/* store the state: */
movdqu STATE4, 0x00(STATEP)
movdqu STATE0, 0x10(STATEP)
movdqu STATE1, 0x20(STATEP)
movdqu STATE2, 0x30(STATEP)
movdqu STATE3, 0x40(STATEP)
FRAME_END
ENDPROC(crypto_aegis128_aesni_enc_tail)
.macro decrypt_block a s0 s1 s2 s3 s4 i
movdq\a (\i * 0x10)(SRC), MSG
pxor \s1, MSG
pxor \s4, MSG
movdqa \s2, T1
pand \s3, T1
pxor T1, MSG
movdq\a MSG, (\i * 0x10)(DST)
aegis128_update
pxor MSG, \s4
sub $0x10, LEN
cmp $0x10, LEN
jl .Ldec_out_\i
.endm
/*
* void crypto_aegis128_aesni_dec(void *state, unsigned int length,
* const void *src, void *dst);
*/
ENTRY(crypto_aegis128_aesni_dec)
FRAME_BEGIN
cmp $0x10, LEN
jb .Ldec_out
/* load the state: */
movdqu 0x00(STATEP), STATE0
movdqu 0x10(STATEP), STATE1
movdqu 0x20(STATEP), STATE2
movdqu 0x30(STATEP), STATE3
movdqu 0x40(STATEP), STATE4
mov SRC, %r8
or DST, %r8
and $0xF, %r8
jnz .Ldec_u_loop
.align 8
.Ldec_a_loop:
decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
add $0x50, SRC
add $0x50, DST
jmp .Ldec_a_loop
.align 8
.Ldec_u_loop:
decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
add $0x50, SRC
add $0x50, DST
jmp .Ldec_u_loop
/* store the state: */
.Ldec_out_0:
movdqu STATE4, 0x00(STATEP)
movdqu STATE0, 0x10(STATEP)
movdqu STATE1, 0x20(STATEP)
movdqu STATE2, 0x30(STATEP)
movdqu STATE3, 0x40(STATEP)
FRAME_END
ret
.Ldec_out_1:
movdqu STATE3, 0x00(STATEP)
movdqu STATE4, 0x10(STATEP)
movdqu STATE0, 0x20(STATEP)
movdqu STATE1, 0x30(STATEP)
movdqu STATE2, 0x40(STATEP)
FRAME_END
ret
.Ldec_out_2:
movdqu STATE2, 0x00(STATEP)
movdqu STATE3, 0x10(STATEP)
movdqu STATE4, 0x20(STATEP)
movdqu STATE0, 0x30(STATEP)
movdqu STATE1, 0x40(STATEP)
FRAME_END
ret
.Ldec_out_3:
movdqu STATE1, 0x00(STATEP)
movdqu STATE2, 0x10(STATEP)
movdqu STATE3, 0x20(STATEP)
movdqu STATE4, 0x30(STATEP)
movdqu STATE0, 0x40(STATEP)
FRAME_END
ret
.Ldec_out_4:
movdqu STATE0, 0x00(STATEP)
movdqu STATE1, 0x10(STATEP)
movdqu STATE2, 0x20(STATEP)
movdqu STATE3, 0x30(STATEP)
movdqu STATE4, 0x40(STATEP)
FRAME_END
ret
.Ldec_out:
FRAME_END
ret
ENDPROC(crypto_aegis128_aesni_dec)
/*
* void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
* const void *src, void *dst);
*/
ENTRY(crypto_aegis128_aesni_dec_tail)
FRAME_BEGIN
/* load the state: */
movdqu 0x00(STATEP), STATE0
movdqu 0x10(STATEP), STATE1
movdqu 0x20(STATEP), STATE2
movdqu 0x30(STATEP), STATE3
movdqu 0x40(STATEP), STATE4
/* decrypt message: */
call __load_partial
pxor STATE1, MSG
pxor STATE4, MSG
movdqa STATE2, T1
pand STATE3, T1
pxor T1, MSG
movdqa MSG, T0
call __store_partial
/* mask with byte count: */
movq LEN, T0
punpcklbw T0, T0
punpcklbw T0, T0
punpcklbw T0, T0
punpcklbw T0, T0
movdqa .Laegis128_counter, T1
pcmpgtb T1, T0
pand T0, MSG
aegis128_update
pxor MSG, STATE4
/* store the state: */
movdqu STATE4, 0x00(STATEP)
movdqu STATE0, 0x10(STATEP)
movdqu STATE1, 0x20(STATEP)
movdqu STATE2, 0x30(STATEP)
movdqu STATE3, 0x40(STATEP)
FRAME_END
ret
ENDPROC(crypto_aegis128_aesni_dec_tail)
/*
* void crypto_aegis128_aesni_final(void *state, void *tag_xor,
* u64 assoclen, u64 cryptlen);
*/
ENTRY(crypto_aegis128_aesni_final)
FRAME_BEGIN
/* load the state: */
movdqu 0x00(STATEP), STATE0
movdqu 0x10(STATEP), STATE1
movdqu 0x20(STATEP), STATE2
movdqu 0x30(STATEP), STATE3
movdqu 0x40(STATEP), STATE4
/* prepare length block: */
movq %rdx, MSG
movq %rcx, T0
pslldq $8, T0
pxor T0, MSG
psllq $3, MSG /* multiply by 8 (to get bit count) */
pxor STATE3, MSG
/* update state: */
aegis128_update; pxor MSG, STATE4
aegis128_update; pxor MSG, STATE3
aegis128_update; pxor MSG, STATE2
aegis128_update; pxor MSG, STATE1
aegis128_update; pxor MSG, STATE0
aegis128_update; pxor MSG, STATE4
aegis128_update; pxor MSG, STATE3
/* xor tag: */
movdqu (%rsi), MSG
pxor STATE0, MSG
pxor STATE1, MSG
pxor STATE2, MSG
pxor STATE3, MSG
pxor STATE4, MSG
movdqu MSG, (%rsi)
FRAME_END
ret
ENDPROC(crypto_aegis128_aesni_final)

View File

@ -0,0 +1,407 @@
/*
* The AEGIS-128 Authenticated-Encryption Algorithm
* Glue for AES-NI + SSE2 implementation
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/cryptd.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
#include <asm/cpu_device_id.h>
#define AEGIS128_BLOCK_ALIGN 16
#define AEGIS128_BLOCK_SIZE 16
#define AEGIS128_NONCE_SIZE 16
#define AEGIS128_STATE_BLOCKS 5
#define AEGIS128_KEY_SIZE 16
#define AEGIS128_MIN_AUTH_SIZE 8
#define AEGIS128_MAX_AUTH_SIZE 16
asmlinkage void crypto_aegis128_aesni_init(void *state, void *key, void *iv);
asmlinkage void crypto_aegis128_aesni_ad(
void *state, unsigned int length, const void *data);
asmlinkage void crypto_aegis128_aesni_enc(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis128_aesni_dec(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis128_aesni_enc_tail(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis128_aesni_dec_tail(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis128_aesni_final(
void *state, void *tag_xor, unsigned int cryptlen,
unsigned int assoclen);
struct aegis_block {
u8 bytes[AEGIS128_BLOCK_SIZE] __aligned(AEGIS128_BLOCK_ALIGN);
};
struct aegis_state {
struct aegis_block blocks[AEGIS128_STATE_BLOCKS];
};
struct aegis_ctx {
struct aegis_block key;
};
struct aegis_crypt_ops {
int (*skcipher_walk_init)(struct skcipher_walk *walk,
struct aead_request *req, bool atomic);
void (*crypt_blocks)(void *state, unsigned int length, const void *src,
void *dst);
void (*crypt_tail)(void *state, unsigned int length, const void *src,
void *dst);
};
static void crypto_aegis128_aesni_process_ad(
struct aegis_state *state, struct scatterlist *sg_src,
unsigned int assoclen)
{
struct scatter_walk walk;
struct aegis_block buf;
unsigned int pos = 0;
scatterwalk_start(&walk, sg_src);
while (assoclen != 0) {
unsigned int size = scatterwalk_clamp(&walk, assoclen);
unsigned int left = size;
void *mapped = scatterwalk_map(&walk);
const u8 *src = (const u8 *)mapped;
if (pos + size >= AEGIS128_BLOCK_SIZE) {
if (pos > 0) {
unsigned int fill = AEGIS128_BLOCK_SIZE - pos;
memcpy(buf.bytes + pos, src, fill);
crypto_aegis128_aesni_ad(state,
AEGIS128_BLOCK_SIZE,
buf.bytes);
pos = 0;
left -= fill;
src += fill;
}
crypto_aegis128_aesni_ad(state, left, src);
src += left & ~(AEGIS128_BLOCK_SIZE - 1);
left &= AEGIS128_BLOCK_SIZE - 1;
}
memcpy(buf.bytes + pos, src, left);
pos += left;
assoclen -= size;
scatterwalk_unmap(mapped);
scatterwalk_advance(&walk, size);
scatterwalk_done(&walk, 0, assoclen);
}
if (pos > 0) {
memset(buf.bytes + pos, 0, AEGIS128_BLOCK_SIZE - pos);
crypto_aegis128_aesni_ad(state, AEGIS128_BLOCK_SIZE, buf.bytes);
}
}
static void crypto_aegis128_aesni_process_crypt(
struct aegis_state *state, struct aead_request *req,
const struct aegis_crypt_ops *ops)
{
struct skcipher_walk walk;
u8 *src, *dst;
unsigned int chunksize, base;
ops->skcipher_walk_init(&walk, req, false);
while (walk.nbytes) {
src = walk.src.virt.addr;
dst = walk.dst.virt.addr;
chunksize = walk.nbytes;
ops->crypt_blocks(state, chunksize, src, dst);
base = chunksize & ~(AEGIS128_BLOCK_SIZE - 1);
src += base;
dst += base;
chunksize &= AEGIS128_BLOCK_SIZE - 1;
if (chunksize > 0)
ops->crypt_tail(state, chunksize, src, dst);
skcipher_walk_done(&walk, 0);
}
}
static struct aegis_ctx *crypto_aegis128_aesni_ctx(struct crypto_aead *aead)
{
u8 *ctx = crypto_aead_ctx(aead);
ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
return (void *)ctx;
}
static int crypto_aegis128_aesni_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(aead);
if (keylen != AEGIS128_KEY_SIZE) {
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
memcpy(ctx->key.bytes, key, AEGIS128_KEY_SIZE);
return 0;
}
static int crypto_aegis128_aesni_setauthsize(struct crypto_aead *tfm,
unsigned int authsize)
{
if (authsize > AEGIS128_MAX_AUTH_SIZE)
return -EINVAL;
if (authsize < AEGIS128_MIN_AUTH_SIZE)
return -EINVAL;
return 0;
}
static void crypto_aegis128_aesni_crypt(struct aead_request *req,
struct aegis_block *tag_xor,
unsigned int cryptlen,
const struct aegis_crypt_ops *ops)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(tfm);
struct aegis_state state;
kernel_fpu_begin();
crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv);
crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen);
crypto_aegis128_aesni_process_crypt(&state, req, ops);
crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
}
static int crypto_aegis128_aesni_encrypt(struct aead_request *req)
{
static const struct aegis_crypt_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_encrypt,
.crypt_blocks = crypto_aegis128_aesni_enc,
.crypt_tail = crypto_aegis128_aesni_enc_tail,
};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_block tag = {};
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen;
crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS);
scatterwalk_map_and_copy(tag.bytes, req->dst,
req->assoclen + cryptlen, authsize, 1);
return 0;
}
static int crypto_aegis128_aesni_decrypt(struct aead_request *req)
{
static const struct aegis_block zeros = {};
static const struct aegis_crypt_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_decrypt,
.crypt_blocks = crypto_aegis128_aesni_dec,
.crypt_tail = crypto_aegis128_aesni_dec_tail,
};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_block tag;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen - authsize;
scatterwalk_map_and_copy(tag.bytes, req->src,
req->assoclen + cryptlen, authsize, 0);
crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS);
return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
}
static int crypto_aegis128_aesni_init_tfm(struct crypto_aead *aead)
{
return 0;
}
static void crypto_aegis128_aesni_exit_tfm(struct crypto_aead *aead)
{
}
static int cryptd_aegis128_aesni_setkey(struct crypto_aead *aead,
const u8 *key, unsigned int keylen)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
}
static int cryptd_aegis128_aesni_setauthsize(struct crypto_aead *aead,
unsigned int authsize)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
}
static int cryptd_aegis128_aesni_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_encrypt(req);
}
static int cryptd_aegis128_aesni_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_decrypt(req);
}
static int cryptd_aegis128_aesni_init_tfm(struct crypto_aead *aead)
{
struct cryptd_aead *cryptd_tfm;
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_tfm = cryptd_alloc_aead("__aegis128-aesni", CRYPTO_ALG_INTERNAL,
CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
*ctx = cryptd_tfm;
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
return 0;
}
static void cryptd_aegis128_aesni_exit_tfm(struct crypto_aead *aead)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_free_aead(*ctx);
}
static struct aead_alg crypto_aegis128_aesni_alg[] = {
{
.setkey = crypto_aegis128_aesni_setkey,
.setauthsize = crypto_aegis128_aesni_setauthsize,
.encrypt = crypto_aegis128_aesni_encrypt,
.decrypt = crypto_aegis128_aesni_decrypt,
.init = crypto_aegis128_aesni_init_tfm,
.exit = crypto_aegis128_aesni_exit_tfm,
.ivsize = AEGIS128_NONCE_SIZE,
.maxauthsize = AEGIS128_MAX_AUTH_SIZE,
.chunksize = AEGIS128_BLOCK_SIZE,
.base = {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aegis_ctx) +
__alignof__(struct aegis_ctx),
.cra_alignmask = 0,
.cra_name = "__aegis128",
.cra_driver_name = "__aegis128-aesni",
.cra_module = THIS_MODULE,
}
}, {
.setkey = cryptd_aegis128_aesni_setkey,
.setauthsize = cryptd_aegis128_aesni_setauthsize,
.encrypt = cryptd_aegis128_aesni_encrypt,
.decrypt = cryptd_aegis128_aesni_decrypt,
.init = cryptd_aegis128_aesni_init_tfm,
.exit = cryptd_aegis128_aesni_exit_tfm,
.ivsize = AEGIS128_NONCE_SIZE,
.maxauthsize = AEGIS128_MAX_AUTH_SIZE,
.chunksize = AEGIS128_BLOCK_SIZE,
.base = {
.cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct cryptd_aead *),
.cra_alignmask = 0,
.cra_priority = 400,
.cra_name = "aegis128",
.cra_driver_name = "aegis128-aesni",
.cra_module = THIS_MODULE,
}
}
};
static const struct x86_cpu_id aesni_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_AES),
X86_FEATURE_MATCH(X86_FEATURE_XMM2),
{}
};
MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
static int __init crypto_aegis128_aesni_module_init(void)
{
if (!x86_match_cpu(aesni_cpu_id))
return -ENODEV;
return crypto_register_aeads(crypto_aegis128_aesni_alg,
ARRAY_SIZE(crypto_aegis128_aesni_alg));
}
static void __exit crypto_aegis128_aesni_module_exit(void)
{
crypto_unregister_aeads(crypto_aegis128_aesni_alg,
ARRAY_SIZE(crypto_aegis128_aesni_alg));
}
module_init(crypto_aegis128_aesni_module_init);
module_exit(crypto_aegis128_aesni_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm -- AESNI+SSE2 implementation");
MODULE_ALIAS_CRYPTO("aegis128");
MODULE_ALIAS_CRYPTO("aegis128-aesni");

View File

@ -0,0 +1,825 @@
/*
* AES-NI + SSE2 implementation of AEGIS-128L
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/frame.h>
#define STATE0 %xmm0
#define STATE1 %xmm1
#define STATE2 %xmm2
#define STATE3 %xmm3
#define STATE4 %xmm4
#define STATE5 %xmm5
#define STATE6 %xmm6
#define STATE7 %xmm7
#define MSG0 %xmm8
#define MSG1 %xmm9
#define T0 %xmm10
#define T1 %xmm11
#define T2 %xmm12
#define T3 %xmm13
#define STATEP %rdi
#define LEN %rsi
#define SRC %rdx
#define DST %rcx
.section .rodata.cst16.aegis128l_const, "aM", @progbits, 32
.align 16
.Laegis128l_const_0:
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
.Laegis128l_const_1:
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
.section .rodata.cst16.aegis128l_counter, "aM", @progbits, 16
.align 16
.Laegis128l_counter0:
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
.Laegis128l_counter1:
.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
.text
/*
* __load_partial: internal ABI
* input:
* LEN - bytes
* SRC - src
* output:
* MSG0 - first message block
* MSG1 - second message block
* changed:
* T0
* %r8
* %r9
*/
__load_partial:
xor %r9, %r9
pxor MSG0, MSG0
pxor MSG1, MSG1
mov LEN, %r8
and $0x1, %r8
jz .Lld_partial_1
mov LEN, %r8
and $0x1E, %r8
add SRC, %r8
mov (%r8), %r9b
.Lld_partial_1:
mov LEN, %r8
and $0x2, %r8
jz .Lld_partial_2
mov LEN, %r8
and $0x1C, %r8
add SRC, %r8
shl $0x10, %r9
mov (%r8), %r9w
.Lld_partial_2:
mov LEN, %r8
and $0x4, %r8
jz .Lld_partial_4
mov LEN, %r8
and $0x18, %r8
add SRC, %r8
shl $32, %r9
mov (%r8), %r8d
xor %r8, %r9
.Lld_partial_4:
movq %r9, MSG0
mov LEN, %r8
and $0x8, %r8
jz .Lld_partial_8
mov LEN, %r8
and $0x10, %r8
add SRC, %r8
pslldq $8, MSG0
movq (%r8), T0
pxor T0, MSG0
.Lld_partial_8:
mov LEN, %r8
and $0x10, %r8
jz .Lld_partial_16
movdqa MSG0, MSG1
movdqu (SRC), MSG0
.Lld_partial_16:
ret
ENDPROC(__load_partial)
/*
* __store_partial: internal ABI
* input:
* LEN - bytes
* DST - dst
* output:
* T0 - first message block
* T1 - second message block
* changed:
* %r8
* %r9
* %r10
*/
__store_partial:
mov LEN, %r8
mov DST, %r9
cmp $16, %r8
jl .Lst_partial_16
movdqu T0, (%r9)
movdqa T1, T0
sub $16, %r8
add $16, %r9
.Lst_partial_16:
movq T0, %r10
cmp $8, %r8
jl .Lst_partial_8
mov %r10, (%r9)
psrldq $8, T0
movq T0, %r10
sub $8, %r8
add $8, %r9
.Lst_partial_8:
cmp $4, %r8
jl .Lst_partial_4
mov %r10d, (%r9)
shr $32, %r10
sub $4, %r8
add $4, %r9
.Lst_partial_4:
cmp $2, %r8
jl .Lst_partial_2
mov %r10w, (%r9)
shr $0x10, %r10
sub $2, %r8
add $2, %r9
.Lst_partial_2:
cmp $1, %r8
jl .Lst_partial_1
mov %r10b, (%r9)
.Lst_partial_1:
ret
ENDPROC(__store_partial)
.macro update
movdqa STATE7, T0
aesenc STATE0, STATE7
aesenc STATE1, STATE0
aesenc STATE2, STATE1
aesenc STATE3, STATE2
aesenc STATE4, STATE3
aesenc STATE5, STATE4
aesenc STATE6, STATE5
aesenc T0, STATE6
.endm
.macro update0
update
pxor MSG0, STATE7
pxor MSG1, STATE3
.endm
.macro update1
update
pxor MSG0, STATE6
pxor MSG1, STATE2
.endm
.macro update2
update
pxor MSG0, STATE5
pxor MSG1, STATE1
.endm
.macro update3
update
pxor MSG0, STATE4
pxor MSG1, STATE0
.endm
.macro update4
update
pxor MSG0, STATE3
pxor MSG1, STATE7
.endm
.macro update5
update
pxor MSG0, STATE2
pxor MSG1, STATE6
.endm
.macro update6
update
pxor MSG0, STATE1
pxor MSG1, STATE5
.endm
.macro update7
update
pxor MSG0, STATE0
pxor MSG1, STATE4
.endm
.macro state_load
movdqu 0x00(STATEP), STATE0
movdqu 0x10(STATEP), STATE1
movdqu 0x20(STATEP), STATE2
movdqu 0x30(STATEP), STATE3
movdqu 0x40(STATEP), STATE4
movdqu 0x50(STATEP), STATE5
movdqu 0x60(STATEP), STATE6
movdqu 0x70(STATEP), STATE7
.endm
.macro state_store s0 s1 s2 s3 s4 s5 s6 s7
movdqu \s7, 0x00(STATEP)
movdqu \s0, 0x10(STATEP)
movdqu \s1, 0x20(STATEP)
movdqu \s2, 0x30(STATEP)
movdqu \s3, 0x40(STATEP)
movdqu \s4, 0x50(STATEP)
movdqu \s5, 0x60(STATEP)
movdqu \s6, 0x70(STATEP)
.endm
.macro state_store0
state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
.endm
.macro state_store1
state_store STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
.endm
.macro state_store2
state_store STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
.endm
.macro state_store3
state_store STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
.endm
.macro state_store4
state_store STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
.endm
.macro state_store5
state_store STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
.endm
.macro state_store6
state_store STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
.endm
.macro state_store7
state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
.endm
/*
* void crypto_aegis128l_aesni_init(void *state, const void *key, const void *iv);
*/
ENTRY(crypto_aegis128l_aesni_init)
FRAME_BEGIN
/* load key: */
movdqa (%rsi), MSG1
movdqa MSG1, STATE0
movdqa MSG1, STATE4
movdqa MSG1, STATE5
movdqa MSG1, STATE6
movdqa MSG1, STATE7
/* load IV: */
movdqu (%rdx), MSG0
pxor MSG0, STATE0
pxor MSG0, STATE4
/* load the constants: */
movdqa .Laegis128l_const_0, STATE2
movdqa .Laegis128l_const_1, STATE1
movdqa STATE1, STATE3
pxor STATE2, STATE5
pxor STATE1, STATE6
pxor STATE2, STATE7
/* update 10 times with IV and KEY: */
update0
update1
update2
update3
update4
update5
update6
update7
update0
update1
state_store1
FRAME_END
ret
ENDPROC(crypto_aegis128l_aesni_init)
.macro ad_block a i
movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
update\i
sub $0x20, LEN
cmp $0x20, LEN
jl .Lad_out_\i
.endm
/*
* void crypto_aegis128l_aesni_ad(void *state, unsigned int length,
* const void *data);
*/
ENTRY(crypto_aegis128l_aesni_ad)
FRAME_BEGIN
cmp $0x20, LEN
jb .Lad_out
state_load
mov SRC, %r8
and $0xf, %r8
jnz .Lad_u_loop
.align 8
.Lad_a_loop:
ad_block a 0
ad_block a 1
ad_block a 2
ad_block a 3
ad_block a 4
ad_block a 5
ad_block a 6
ad_block a 7
add $0x100, SRC
jmp .Lad_a_loop
.align 8
.Lad_u_loop:
ad_block u 0
ad_block u 1
ad_block u 2
ad_block u 3
ad_block u 4
ad_block u 5
ad_block u 6
ad_block u 7
add $0x100, SRC
jmp .Lad_u_loop
.Lad_out_0:
state_store0
FRAME_END
ret
.Lad_out_1:
state_store1
FRAME_END
ret
.Lad_out_2:
state_store2
FRAME_END
ret
.Lad_out_3:
state_store3
FRAME_END
ret
.Lad_out_4:
state_store4
FRAME_END
ret
.Lad_out_5:
state_store5
FRAME_END
ret
.Lad_out_6:
state_store6
FRAME_END
ret
.Lad_out_7:
state_store7
FRAME_END
ret
.Lad_out:
FRAME_END
ret
ENDPROC(crypto_aegis128l_aesni_ad)
.macro crypt m0 m1 s0 s1 s2 s3 s4 s5 s6 s7
pxor \s1, \m0
pxor \s6, \m0
movdqa \s2, T3
pand \s3, T3
pxor T3, \m0
pxor \s2, \m1
pxor \s5, \m1
movdqa \s6, T3
pand \s7, T3
pxor T3, \m1
.endm
.macro crypt0 m0 m1
crypt \m0 \m1 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
.endm
.macro crypt1 m0 m1
crypt \m0 \m1 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
.endm
.macro crypt2 m0 m1
crypt \m0 \m1 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
.endm
.macro crypt3 m0 m1
crypt \m0 \m1 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
.endm
.macro crypt4 m0 m1
crypt \m0 \m1 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
.endm
.macro crypt5 m0 m1
crypt \m0 \m1 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
.endm
.macro crypt6 m0 m1
crypt \m0 \m1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
.endm
.macro crypt7 m0 m1
crypt \m0 \m1 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
.endm
.macro encrypt_block a i
movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
movdqa MSG0, T0
movdqa MSG1, T1
crypt\i T0, T1
movdq\a T0, (\i * 0x20 + 0x00)(DST)
movdq\a T1, (\i * 0x20 + 0x10)(DST)
update\i
sub $0x20, LEN
cmp $0x20, LEN
jl .Lenc_out_\i
.endm
.macro decrypt_block a i
movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
crypt\i MSG0, MSG1
movdq\a MSG0, (\i * 0x20 + 0x00)(DST)
movdq\a MSG1, (\i * 0x20 + 0x10)(DST)
update\i
sub $0x20, LEN
cmp $0x20, LEN
jl .Ldec_out_\i
.endm
/*
* void crypto_aegis128l_aesni_enc(void *state, unsigned int length,
* const void *src, void *dst);
*/
ENTRY(crypto_aegis128l_aesni_enc)
FRAME_BEGIN
cmp $0x20, LEN
jb .Lenc_out
state_load
mov SRC, %r8
or DST, %r8
and $0xf, %r8
jnz .Lenc_u_loop
.align 8
.Lenc_a_loop:
encrypt_block a 0
encrypt_block a 1
encrypt_block a 2
encrypt_block a 3
encrypt_block a 4
encrypt_block a 5
encrypt_block a 6
encrypt_block a 7
add $0x100, SRC
add $0x100, DST
jmp .Lenc_a_loop
.align 8
.Lenc_u_loop:
encrypt_block u 0
encrypt_block u 1
encrypt_block u 2
encrypt_block u 3
encrypt_block u 4
encrypt_block u 5
encrypt_block u 6
encrypt_block u 7
add $0x100, SRC
add $0x100, DST
jmp .Lenc_u_loop
.Lenc_out_0:
state_store0
FRAME_END
ret
.Lenc_out_1:
state_store1
FRAME_END
ret
.Lenc_out_2:
state_store2
FRAME_END
ret
.Lenc_out_3:
state_store3
FRAME_END
ret
.Lenc_out_4:
state_store4
FRAME_END
ret
.Lenc_out_5:
state_store5
FRAME_END
ret
.Lenc_out_6:
state_store6
FRAME_END
ret
.Lenc_out_7:
state_store7
FRAME_END
ret
.Lenc_out:
FRAME_END
ret
ENDPROC(crypto_aegis128l_aesni_enc)
/*
* void crypto_aegis128l_aesni_enc_tail(void *state, unsigned int length,
* const void *src, void *dst);
*/
ENTRY(crypto_aegis128l_aesni_enc_tail)
FRAME_BEGIN
state_load
/* encrypt message: */
call __load_partial
movdqa MSG0, T0
movdqa MSG1, T1
crypt0 T0, T1
call __store_partial
update0
state_store0
FRAME_END
ENDPROC(crypto_aegis128l_aesni_enc_tail)
/*
* void crypto_aegis128l_aesni_dec(void *state, unsigned int length,
* const void *src, void *dst);
*/
ENTRY(crypto_aegis128l_aesni_dec)
FRAME_BEGIN
cmp $0x20, LEN
jb .Ldec_out
state_load
mov SRC, %r8
or DST, %r8
and $0xF, %r8
jnz .Ldec_u_loop
.align 8
.Ldec_a_loop:
decrypt_block a 0
decrypt_block a 1
decrypt_block a 2
decrypt_block a 3
decrypt_block a 4
decrypt_block a 5
decrypt_block a 6
decrypt_block a 7
add $0x100, SRC
add $0x100, DST
jmp .Ldec_a_loop
.align 8
.Ldec_u_loop:
decrypt_block u 0
decrypt_block u 1
decrypt_block u 2
decrypt_block u 3
decrypt_block u 4
decrypt_block u 5
decrypt_block u 6
decrypt_block u 7
add $0x100, SRC
add $0x100, DST
jmp .Ldec_u_loop
.Ldec_out_0:
state_store0
FRAME_END
ret
.Ldec_out_1:
state_store1
FRAME_END
ret
.Ldec_out_2:
state_store2
FRAME_END
ret
.Ldec_out_3:
state_store3
FRAME_END
ret
.Ldec_out_4:
state_store4
FRAME_END
ret
.Ldec_out_5:
state_store5
FRAME_END
ret
.Ldec_out_6:
state_store6
FRAME_END
ret
.Ldec_out_7:
state_store7
FRAME_END
ret
.Ldec_out:
FRAME_END
ret
ENDPROC(crypto_aegis128l_aesni_dec)
/*
* void crypto_aegis128l_aesni_dec_tail(void *state, unsigned int length,
* const void *src, void *dst);
*/
ENTRY(crypto_aegis128l_aesni_dec_tail)
FRAME_BEGIN
state_load
/* decrypt message: */
call __load_partial
crypt0 MSG0, MSG1
movdqa MSG0, T0
movdqa MSG1, T1
call __store_partial
/* mask with byte count: */
movq LEN, T0
punpcklbw T0, T0
punpcklbw T0, T0
punpcklbw T0, T0
punpcklbw T0, T0
movdqa T0, T1
movdqa .Laegis128l_counter0, T2
movdqa .Laegis128l_counter1, T3
pcmpgtb T2, T0
pcmpgtb T3, T1
pand T0, MSG0
pand T1, MSG1
update0
state_store0
FRAME_END
ret
ENDPROC(crypto_aegis128l_aesni_dec_tail)
/*
* void crypto_aegis128l_aesni_final(void *state, void *tag_xor,
* u64 assoclen, u64 cryptlen);
*/
ENTRY(crypto_aegis128l_aesni_final)
FRAME_BEGIN
state_load
/* prepare length block: */
movq %rdx, MSG0
movq %rcx, T0
pslldq $8, T0
pxor T0, MSG0
psllq $3, MSG0 /* multiply by 8 (to get bit count) */
pxor STATE2, MSG0
movdqa MSG0, MSG1
/* update state: */
update0
update1
update2
update3
update4
update5
update6
/* xor tag: */
movdqu (%rsi), T0
pxor STATE1, T0
pxor STATE2, T0
pxor STATE3, T0
pxor STATE4, T0
pxor STATE5, T0
pxor STATE6, T0
pxor STATE7, T0
movdqu T0, (%rsi)
FRAME_END
ret
ENDPROC(crypto_aegis128l_aesni_final)

View File

@ -0,0 +1,407 @@
/*
* The AEGIS-128L Authenticated-Encryption Algorithm
* Glue for AES-NI + SSE2 implementation
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/cryptd.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
#include <asm/cpu_device_id.h>
#define AEGIS128L_BLOCK_ALIGN 16
#define AEGIS128L_BLOCK_SIZE 32
#define AEGIS128L_NONCE_SIZE 16
#define AEGIS128L_STATE_BLOCKS 8
#define AEGIS128L_KEY_SIZE 16
#define AEGIS128L_MIN_AUTH_SIZE 8
#define AEGIS128L_MAX_AUTH_SIZE 16
asmlinkage void crypto_aegis128l_aesni_init(void *state, void *key, void *iv);
asmlinkage void crypto_aegis128l_aesni_ad(
void *state, unsigned int length, const void *data);
asmlinkage void crypto_aegis128l_aesni_enc(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis128l_aesni_dec(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis128l_aesni_enc_tail(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis128l_aesni_dec_tail(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis128l_aesni_final(
void *state, void *tag_xor, unsigned int cryptlen,
unsigned int assoclen);
struct aegis_block {
u8 bytes[AEGIS128L_BLOCK_SIZE] __aligned(AEGIS128L_BLOCK_ALIGN);
};
struct aegis_state {
struct aegis_block blocks[AEGIS128L_STATE_BLOCKS];
};
struct aegis_ctx {
struct aegis_block key;
};
struct aegis_crypt_ops {
int (*skcipher_walk_init)(struct skcipher_walk *walk,
struct aead_request *req, bool atomic);
void (*crypt_blocks)(void *state, unsigned int length, const void *src,
void *dst);
void (*crypt_tail)(void *state, unsigned int length, const void *src,
void *dst);
};
static void crypto_aegis128l_aesni_process_ad(
struct aegis_state *state, struct scatterlist *sg_src,
unsigned int assoclen)
{
struct scatter_walk walk;
struct aegis_block buf;
unsigned int pos = 0;
scatterwalk_start(&walk, sg_src);
while (assoclen != 0) {
unsigned int size = scatterwalk_clamp(&walk, assoclen);
unsigned int left = size;
void *mapped = scatterwalk_map(&walk);
const u8 *src = (const u8 *)mapped;
if (pos + size >= AEGIS128L_BLOCK_SIZE) {
if (pos > 0) {
unsigned int fill = AEGIS128L_BLOCK_SIZE - pos;
memcpy(buf.bytes + pos, src, fill);
crypto_aegis128l_aesni_ad(state,
AEGIS128L_BLOCK_SIZE,
buf.bytes);
pos = 0;
left -= fill;
src += fill;
}
crypto_aegis128l_aesni_ad(state, left, src);
src += left & ~(AEGIS128L_BLOCK_SIZE - 1);
left &= AEGIS128L_BLOCK_SIZE - 1;
}
memcpy(buf.bytes + pos, src, left);
pos += left;
assoclen -= size;
scatterwalk_unmap(mapped);
scatterwalk_advance(&walk, size);
scatterwalk_done(&walk, 0, assoclen);
}
if (pos > 0) {
memset(buf.bytes + pos, 0, AEGIS128L_BLOCK_SIZE - pos);
crypto_aegis128l_aesni_ad(state, AEGIS128L_BLOCK_SIZE, buf.bytes);
}
}
static void crypto_aegis128l_aesni_process_crypt(
struct aegis_state *state, struct aead_request *req,
const struct aegis_crypt_ops *ops)
{
struct skcipher_walk walk;
u8 *src, *dst;
unsigned int chunksize, base;
ops->skcipher_walk_init(&walk, req, false);
while (walk.nbytes) {
src = walk.src.virt.addr;
dst = walk.dst.virt.addr;
chunksize = walk.nbytes;
ops->crypt_blocks(state, chunksize, src, dst);
base = chunksize & ~(AEGIS128L_BLOCK_SIZE - 1);
src += base;
dst += base;
chunksize &= AEGIS128L_BLOCK_SIZE - 1;
if (chunksize > 0)
ops->crypt_tail(state, chunksize, src, dst);
skcipher_walk_done(&walk, 0);
}
}
static struct aegis_ctx *crypto_aegis128l_aesni_ctx(struct crypto_aead *aead)
{
u8 *ctx = crypto_aead_ctx(aead);
ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
return (void *)ctx;
}
static int crypto_aegis128l_aesni_setkey(struct crypto_aead *aead,
const u8 *key, unsigned int keylen)
{
struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(aead);
if (keylen != AEGIS128L_KEY_SIZE) {
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
memcpy(ctx->key.bytes, key, AEGIS128L_KEY_SIZE);
return 0;
}
static int crypto_aegis128l_aesni_setauthsize(struct crypto_aead *tfm,
unsigned int authsize)
{
if (authsize > AEGIS128L_MAX_AUTH_SIZE)
return -EINVAL;
if (authsize < AEGIS128L_MIN_AUTH_SIZE)
return -EINVAL;
return 0;
}
static void crypto_aegis128l_aesni_crypt(struct aead_request *req,
struct aegis_block *tag_xor,
unsigned int cryptlen,
const struct aegis_crypt_ops *ops)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(tfm);
struct aegis_state state;
kernel_fpu_begin();
crypto_aegis128l_aesni_init(&state, ctx->key.bytes, req->iv);
crypto_aegis128l_aesni_process_ad(&state, req->src, req->assoclen);
crypto_aegis128l_aesni_process_crypt(&state, req, ops);
crypto_aegis128l_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
}
static int crypto_aegis128l_aesni_encrypt(struct aead_request *req)
{
static const struct aegis_crypt_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_encrypt,
.crypt_blocks = crypto_aegis128l_aesni_enc,
.crypt_tail = crypto_aegis128l_aesni_enc_tail,
};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_block tag = {};
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen;
crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS);
scatterwalk_map_and_copy(tag.bytes, req->dst,
req->assoclen + cryptlen, authsize, 1);
return 0;
}
static int crypto_aegis128l_aesni_decrypt(struct aead_request *req)
{
static const struct aegis_block zeros = {};
static const struct aegis_crypt_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_decrypt,
.crypt_blocks = crypto_aegis128l_aesni_dec,
.crypt_tail = crypto_aegis128l_aesni_dec_tail,
};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_block tag;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen - authsize;
scatterwalk_map_and_copy(tag.bytes, req->src,
req->assoclen + cryptlen, authsize, 0);
crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS);
return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
}
static int crypto_aegis128l_aesni_init_tfm(struct crypto_aead *aead)
{
return 0;
}
static void crypto_aegis128l_aesni_exit_tfm(struct crypto_aead *aead)
{
}
static int cryptd_aegis128l_aesni_setkey(struct crypto_aead *aead,
const u8 *key, unsigned int keylen)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
}
static int cryptd_aegis128l_aesni_setauthsize(struct crypto_aead *aead,
unsigned int authsize)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
}
static int cryptd_aegis128l_aesni_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_encrypt(req);
}
static int cryptd_aegis128l_aesni_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_decrypt(req);
}
static int cryptd_aegis128l_aesni_init_tfm(struct crypto_aead *aead)
{
struct cryptd_aead *cryptd_tfm;
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_tfm = cryptd_alloc_aead("__aegis128l-aesni", CRYPTO_ALG_INTERNAL,
CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
*ctx = cryptd_tfm;
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
return 0;
}
static void cryptd_aegis128l_aesni_exit_tfm(struct crypto_aead *aead)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_free_aead(*ctx);
}
static struct aead_alg crypto_aegis128l_aesni_alg[] = {
{
.setkey = crypto_aegis128l_aesni_setkey,
.setauthsize = crypto_aegis128l_aesni_setauthsize,
.encrypt = crypto_aegis128l_aesni_encrypt,
.decrypt = crypto_aegis128l_aesni_decrypt,
.init = crypto_aegis128l_aesni_init_tfm,
.exit = crypto_aegis128l_aesni_exit_tfm,
.ivsize = AEGIS128L_NONCE_SIZE,
.maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
.chunksize = AEGIS128L_BLOCK_SIZE,
.base = {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aegis_ctx) +
__alignof__(struct aegis_ctx),
.cra_alignmask = 0,
.cra_name = "__aegis128l",
.cra_driver_name = "__aegis128l-aesni",
.cra_module = THIS_MODULE,
}
}, {
.setkey = cryptd_aegis128l_aesni_setkey,
.setauthsize = cryptd_aegis128l_aesni_setauthsize,
.encrypt = cryptd_aegis128l_aesni_encrypt,
.decrypt = cryptd_aegis128l_aesni_decrypt,
.init = cryptd_aegis128l_aesni_init_tfm,
.exit = cryptd_aegis128l_aesni_exit_tfm,
.ivsize = AEGIS128L_NONCE_SIZE,
.maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
.chunksize = AEGIS128L_BLOCK_SIZE,
.base = {
.cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct cryptd_aead *),
.cra_alignmask = 0,
.cra_priority = 400,
.cra_name = "aegis128l",
.cra_driver_name = "aegis128l-aesni",
.cra_module = THIS_MODULE,
}
}
};
static const struct x86_cpu_id aesni_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_AES),
X86_FEATURE_MATCH(X86_FEATURE_XMM2),
{}
};
MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
static int __init crypto_aegis128l_aesni_module_init(void)
{
if (!x86_match_cpu(aesni_cpu_id))
return -ENODEV;
return crypto_register_aeads(crypto_aegis128l_aesni_alg,
ARRAY_SIZE(crypto_aegis128l_aesni_alg));
}
static void __exit crypto_aegis128l_aesni_module_exit(void)
{
crypto_unregister_aeads(crypto_aegis128l_aesni_alg,
ARRAY_SIZE(crypto_aegis128l_aesni_alg));
}
module_init(crypto_aegis128l_aesni_module_init);
module_exit(crypto_aegis128l_aesni_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("AEGIS-128L AEAD algorithm -- AESNI+SSE2 implementation");
MODULE_ALIAS_CRYPTO("aegis128l");
MODULE_ALIAS_CRYPTO("aegis128l-aesni");

View File

@ -0,0 +1,702 @@
/*
* AES-NI + SSE2 implementation of AEGIS-128L
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/frame.h>
#define STATE0 %xmm0
#define STATE1 %xmm1
#define STATE2 %xmm2
#define STATE3 %xmm3
#define STATE4 %xmm4
#define STATE5 %xmm5
#define MSG %xmm6
#define T0 %xmm7
#define T1 %xmm8
#define T2 %xmm9
#define T3 %xmm10
#define STATEP %rdi
#define LEN %rsi
#define SRC %rdx
#define DST %rcx
.section .rodata.cst16.aegis256_const, "aM", @progbits, 32
.align 16
.Laegis256_const_0:
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
.Laegis256_const_1:
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
.section .rodata.cst16.aegis256_counter, "aM", @progbits, 16
.align 16
.Laegis256_counter:
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
.text
/*
* __load_partial: internal ABI
* input:
* LEN - bytes
* SRC - src
* output:
* MSG - message block
* changed:
* T0
* %r8
* %r9
*/
__load_partial:
xor %r9, %r9
pxor MSG, MSG
mov LEN, %r8
and $0x1, %r8
jz .Lld_partial_1
mov LEN, %r8
and $0x1E, %r8
add SRC, %r8
mov (%r8), %r9b
.Lld_partial_1:
mov LEN, %r8
and $0x2, %r8
jz .Lld_partial_2
mov LEN, %r8
and $0x1C, %r8
add SRC, %r8
shl $0x10, %r9
mov (%r8), %r9w
.Lld_partial_2:
mov LEN, %r8
and $0x4, %r8
jz .Lld_partial_4
mov LEN, %r8
and $0x18, %r8
add SRC, %r8
shl $32, %r9
mov (%r8), %r8d
xor %r8, %r9
.Lld_partial_4:
movq %r9, MSG
mov LEN, %r8
and $0x8, %r8
jz .Lld_partial_8
mov LEN, %r8
and $0x10, %r8
add SRC, %r8
pslldq $8, MSG
movq (%r8), T0
pxor T0, MSG
.Lld_partial_8:
ret
ENDPROC(__load_partial)
/*
* __store_partial: internal ABI
* input:
* LEN - bytes
* DST - dst
* output:
* T0 - message block
* changed:
* %r8
* %r9
* %r10
*/
__store_partial:
mov LEN, %r8
mov DST, %r9
movq T0, %r10
cmp $8, %r8
jl .Lst_partial_8
mov %r10, (%r9)
psrldq $8, T0
movq T0, %r10
sub $8, %r8
add $8, %r9
.Lst_partial_8:
cmp $4, %r8
jl .Lst_partial_4
mov %r10d, (%r9)
shr $32, %r10
sub $4, %r8
add $4, %r9
.Lst_partial_4:
cmp $2, %r8
jl .Lst_partial_2
mov %r10w, (%r9)
shr $0x10, %r10
sub $2, %r8
add $2, %r9
.Lst_partial_2:
cmp $1, %r8
jl .Lst_partial_1
mov %r10b, (%r9)
.Lst_partial_1:
ret
ENDPROC(__store_partial)
.macro update
movdqa STATE5, T0
aesenc STATE0, STATE5
aesenc STATE1, STATE0
aesenc STATE2, STATE1
aesenc STATE3, STATE2
aesenc STATE4, STATE3
aesenc T0, STATE4
.endm
.macro update0 m
update
pxor \m, STATE5
.endm
.macro update1 m
update
pxor \m, STATE4
.endm
.macro update2 m
update
pxor \m, STATE3
.endm
.macro update3 m
update
pxor \m, STATE2
.endm
.macro update4 m
update
pxor \m, STATE1
.endm
.macro update5 m
update
pxor \m, STATE0
.endm
.macro state_load
movdqu 0x00(STATEP), STATE0
movdqu 0x10(STATEP), STATE1
movdqu 0x20(STATEP), STATE2
movdqu 0x30(STATEP), STATE3
movdqu 0x40(STATEP), STATE4
movdqu 0x50(STATEP), STATE5
.endm
.macro state_store s0 s1 s2 s3 s4 s5
movdqu \s5, 0x00(STATEP)
movdqu \s0, 0x10(STATEP)
movdqu \s1, 0x20(STATEP)
movdqu \s2, 0x30(STATEP)
movdqu \s3, 0x40(STATEP)
movdqu \s4, 0x50(STATEP)
.endm
.macro state_store0
state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
.endm
.macro state_store1
state_store STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
.endm
.macro state_store2
state_store STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
.endm
.macro state_store3
state_store STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
.endm
.macro state_store4
state_store STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
.endm
.macro state_store5
state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
.endm
/*
* void crypto_aegis256_aesni_init(void *state, const void *key, const void *iv);
*/
ENTRY(crypto_aegis256_aesni_init)
FRAME_BEGIN
/* load key: */
movdqa 0x00(%rsi), MSG
movdqa 0x10(%rsi), T1
movdqa MSG, STATE4
movdqa T1, STATE5
/* load IV: */
movdqu 0x00(%rdx), T2
movdqu 0x10(%rdx), T3
pxor MSG, T2
pxor T1, T3
movdqa T2, STATE0
movdqa T3, STATE1
/* load the constants: */
movdqa .Laegis256_const_0, STATE3
movdqa .Laegis256_const_1, STATE2
pxor STATE3, STATE4
pxor STATE2, STATE5
/* update 10 times with IV and KEY: */
update0 MSG
update1 T1
update2 T2
update3 T3
update4 MSG
update5 T1
update0 T2
update1 T3
update2 MSG
update3 T1
update4 T2
update5 T3
update0 MSG
update1 T1
update2 T2
update3 T3
state_store3
FRAME_END
ret
ENDPROC(crypto_aegis256_aesni_init)
.macro ad_block a i
movdq\a (\i * 0x10)(SRC), MSG
update\i MSG
sub $0x10, LEN
cmp $0x10, LEN
jl .Lad_out_\i
.endm
/*
* void crypto_aegis256_aesni_ad(void *state, unsigned int length,
* const void *data);
*/
ENTRY(crypto_aegis256_aesni_ad)
FRAME_BEGIN
cmp $0x10, LEN
jb .Lad_out
state_load
mov SRC, %r8
and $0xf, %r8
jnz .Lad_u_loop
.align 8
.Lad_a_loop:
ad_block a 0
ad_block a 1
ad_block a 2
ad_block a 3
ad_block a 4
ad_block a 5
add $0x60, SRC
jmp .Lad_a_loop
.align 8
.Lad_u_loop:
ad_block u 0
ad_block u 1
ad_block u 2
ad_block u 3
ad_block u 4
ad_block u 5
add $0x60, SRC
jmp .Lad_u_loop
.Lad_out_0:
state_store0
FRAME_END
ret
.Lad_out_1:
state_store1
FRAME_END
ret
.Lad_out_2:
state_store2
FRAME_END
ret
.Lad_out_3:
state_store3
FRAME_END
ret
.Lad_out_4:
state_store4
FRAME_END
ret
.Lad_out_5:
state_store5
FRAME_END
ret
.Lad_out:
FRAME_END
ret
ENDPROC(crypto_aegis256_aesni_ad)
.macro crypt m s0 s1 s2 s3 s4 s5
pxor \s1, \m
pxor \s4, \m
pxor \s5, \m
movdqa \s2, T3
pand \s3, T3
pxor T3, \m
.endm
.macro crypt0 m
crypt \m STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
.endm
.macro crypt1 m
crypt \m STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
.endm
.macro crypt2 m
crypt \m STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
.endm
.macro crypt3 m
crypt \m STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
.endm
.macro crypt4 m
crypt \m STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
.endm
.macro crypt5 m
crypt \m STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
.endm
.macro encrypt_block a i
movdq\a (\i * 0x10)(SRC), MSG
movdqa MSG, T0
crypt\i T0
movdq\a T0, (\i * 0x10)(DST)
update\i MSG
sub $0x10, LEN
cmp $0x10, LEN
jl .Lenc_out_\i
.endm
.macro decrypt_block a i
movdq\a (\i * 0x10)(SRC), MSG
crypt\i MSG
movdq\a MSG, (\i * 0x10)(DST)
update\i MSG
sub $0x10, LEN
cmp $0x10, LEN
jl .Ldec_out_\i
.endm
/*
* void crypto_aegis256_aesni_enc(void *state, unsigned int length,
* const void *src, void *dst);
*/
ENTRY(crypto_aegis256_aesni_enc)
FRAME_BEGIN
cmp $0x10, LEN
jb .Lenc_out
state_load
mov SRC, %r8
or DST, %r8
and $0xf, %r8
jnz .Lenc_u_loop
.align 8
.Lenc_a_loop:
encrypt_block a 0
encrypt_block a 1
encrypt_block a 2
encrypt_block a 3
encrypt_block a 4
encrypt_block a 5
add $0x60, SRC
add $0x60, DST
jmp .Lenc_a_loop
.align 8
.Lenc_u_loop:
encrypt_block u 0
encrypt_block u 1
encrypt_block u 2
encrypt_block u 3
encrypt_block u 4
encrypt_block u 5
add $0x60, SRC
add $0x60, DST
jmp .Lenc_u_loop
.Lenc_out_0:
state_store0
FRAME_END
ret
.Lenc_out_1:
state_store1
FRAME_END
ret
.Lenc_out_2:
state_store2
FRAME_END
ret
.Lenc_out_3:
state_store3
FRAME_END
ret
.Lenc_out_4:
state_store4
FRAME_END
ret
.Lenc_out_5:
state_store5
FRAME_END
ret
.Lenc_out:
FRAME_END
ret
ENDPROC(crypto_aegis256_aesni_enc)
/*
* void crypto_aegis256_aesni_enc_tail(void *state, unsigned int length,
* const void *src, void *dst);
*/
ENTRY(crypto_aegis256_aesni_enc_tail)
FRAME_BEGIN
state_load
/* encrypt message: */
call __load_partial
movdqa MSG, T0
crypt0 T0
call __store_partial
update0 MSG
state_store0
FRAME_END
ENDPROC(crypto_aegis256_aesni_enc_tail)
/*
* void crypto_aegis256_aesni_dec(void *state, unsigned int length,
* const void *src, void *dst);
*/
ENTRY(crypto_aegis256_aesni_dec)
FRAME_BEGIN
cmp $0x10, LEN
jb .Ldec_out
state_load
mov SRC, %r8
or DST, %r8
and $0xF, %r8
jnz .Ldec_u_loop
.align 8
.Ldec_a_loop:
decrypt_block a 0
decrypt_block a 1
decrypt_block a 2
decrypt_block a 3
decrypt_block a 4
decrypt_block a 5
add $0x60, SRC
add $0x60, DST
jmp .Ldec_a_loop
.align 8
.Ldec_u_loop:
decrypt_block u 0
decrypt_block u 1
decrypt_block u 2
decrypt_block u 3
decrypt_block u 4
decrypt_block u 5
add $0x60, SRC
add $0x60, DST
jmp .Ldec_u_loop
.Ldec_out_0:
state_store0
FRAME_END
ret
.Ldec_out_1:
state_store1
FRAME_END
ret
.Ldec_out_2:
state_store2
FRAME_END
ret
.Ldec_out_3:
state_store3
FRAME_END
ret
.Ldec_out_4:
state_store4
FRAME_END
ret
.Ldec_out_5:
state_store5
FRAME_END
ret
.Ldec_out:
FRAME_END
ret
ENDPROC(crypto_aegis256_aesni_dec)
/*
* void crypto_aegis256_aesni_dec_tail(void *state, unsigned int length,
* const void *src, void *dst);
*/
ENTRY(crypto_aegis256_aesni_dec_tail)
FRAME_BEGIN
state_load
/* decrypt message: */
call __load_partial
crypt0 MSG
movdqa MSG, T0
call __store_partial
/* mask with byte count: */
movq LEN, T0
punpcklbw T0, T0
punpcklbw T0, T0
punpcklbw T0, T0
punpcklbw T0, T0
movdqa .Laegis256_counter, T1
pcmpgtb T1, T0
pand T0, MSG
update0 MSG
state_store0
FRAME_END
ret
ENDPROC(crypto_aegis256_aesni_dec_tail)
/*
* void crypto_aegis256_aesni_final(void *state, void *tag_xor,
* u64 assoclen, u64 cryptlen);
*/
ENTRY(crypto_aegis256_aesni_final)
FRAME_BEGIN
state_load
/* prepare length block: */
movq %rdx, MSG
movq %rcx, T0
pslldq $8, T0
pxor T0, MSG
psllq $3, MSG /* multiply by 8 (to get bit count) */
pxor STATE3, MSG
/* update state: */
update0 MSG
update1 MSG
update2 MSG
update3 MSG
update4 MSG
update5 MSG
update0 MSG
/* xor tag: */
movdqu (%rsi), MSG
pxor STATE0, MSG
pxor STATE1, MSG
pxor STATE2, MSG
pxor STATE3, MSG
pxor STATE4, MSG
pxor STATE5, MSG
movdqu MSG, (%rsi)
FRAME_END
ret
ENDPROC(crypto_aegis256_aesni_final)

View File

@ -0,0 +1,407 @@
/*
* The AEGIS-256 Authenticated-Encryption Algorithm
* Glue for AES-NI + SSE2 implementation
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/cryptd.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
#include <asm/cpu_device_id.h>
#define AEGIS256_BLOCK_ALIGN 16
#define AEGIS256_BLOCK_SIZE 16
#define AEGIS256_NONCE_SIZE 32
#define AEGIS256_STATE_BLOCKS 6
#define AEGIS256_KEY_SIZE 32
#define AEGIS256_MIN_AUTH_SIZE 8
#define AEGIS256_MAX_AUTH_SIZE 16
asmlinkage void crypto_aegis256_aesni_init(void *state, void *key, void *iv);
asmlinkage void crypto_aegis256_aesni_ad(
void *state, unsigned int length, const void *data);
asmlinkage void crypto_aegis256_aesni_enc(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis256_aesni_dec(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis256_aesni_enc_tail(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis256_aesni_dec_tail(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis256_aesni_final(
void *state, void *tag_xor, unsigned int cryptlen,
unsigned int assoclen);
struct aegis_block {
u8 bytes[AEGIS256_BLOCK_SIZE] __aligned(AEGIS256_BLOCK_ALIGN);
};
struct aegis_state {
struct aegis_block blocks[AEGIS256_STATE_BLOCKS];
};
struct aegis_ctx {
struct aegis_block key[AEGIS256_KEY_SIZE / AEGIS256_BLOCK_SIZE];
};
struct aegis_crypt_ops {
int (*skcipher_walk_init)(struct skcipher_walk *walk,
struct aead_request *req, bool atomic);
void (*crypt_blocks)(void *state, unsigned int length, const void *src,
void *dst);
void (*crypt_tail)(void *state, unsigned int length, const void *src,
void *dst);
};
static void crypto_aegis256_aesni_process_ad(
struct aegis_state *state, struct scatterlist *sg_src,
unsigned int assoclen)
{
struct scatter_walk walk;
struct aegis_block buf;
unsigned int pos = 0;
scatterwalk_start(&walk, sg_src);
while (assoclen != 0) {
unsigned int size = scatterwalk_clamp(&walk, assoclen);
unsigned int left = size;
void *mapped = scatterwalk_map(&walk);
const u8 *src = (const u8 *)mapped;
if (pos + size >= AEGIS256_BLOCK_SIZE) {
if (pos > 0) {
unsigned int fill = AEGIS256_BLOCK_SIZE - pos;
memcpy(buf.bytes + pos, src, fill);
crypto_aegis256_aesni_ad(state,
AEGIS256_BLOCK_SIZE,
buf.bytes);
pos = 0;
left -= fill;
src += fill;
}
crypto_aegis256_aesni_ad(state, left, src);
src += left & ~(AEGIS256_BLOCK_SIZE - 1);
left &= AEGIS256_BLOCK_SIZE - 1;
}
memcpy(buf.bytes + pos, src, left);
pos += left;
assoclen -= size;
scatterwalk_unmap(mapped);
scatterwalk_advance(&walk, size);
scatterwalk_done(&walk, 0, assoclen);
}
if (pos > 0) {
memset(buf.bytes + pos, 0, AEGIS256_BLOCK_SIZE - pos);
crypto_aegis256_aesni_ad(state, AEGIS256_BLOCK_SIZE, buf.bytes);
}
}
static void crypto_aegis256_aesni_process_crypt(
struct aegis_state *state, struct aead_request *req,
const struct aegis_crypt_ops *ops)
{
struct skcipher_walk walk;
u8 *src, *dst;
unsigned int chunksize, base;
ops->skcipher_walk_init(&walk, req, false);
while (walk.nbytes) {
src = walk.src.virt.addr;
dst = walk.dst.virt.addr;
chunksize = walk.nbytes;
ops->crypt_blocks(state, chunksize, src, dst);
base = chunksize & ~(AEGIS256_BLOCK_SIZE - 1);
src += base;
dst += base;
chunksize &= AEGIS256_BLOCK_SIZE - 1;
if (chunksize > 0)
ops->crypt_tail(state, chunksize, src, dst);
skcipher_walk_done(&walk, 0);
}
}
static struct aegis_ctx *crypto_aegis256_aesni_ctx(struct crypto_aead *aead)
{
u8 *ctx = crypto_aead_ctx(aead);
ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
return (void *)ctx;
}
static int crypto_aegis256_aesni_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(aead);
if (keylen != AEGIS256_KEY_SIZE) {
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
memcpy(ctx->key, key, AEGIS256_KEY_SIZE);
return 0;
}
static int crypto_aegis256_aesni_setauthsize(struct crypto_aead *tfm,
unsigned int authsize)
{
if (authsize > AEGIS256_MAX_AUTH_SIZE)
return -EINVAL;
if (authsize < AEGIS256_MIN_AUTH_SIZE)
return -EINVAL;
return 0;
}
static void crypto_aegis256_aesni_crypt(struct aead_request *req,
struct aegis_block *tag_xor,
unsigned int cryptlen,
const struct aegis_crypt_ops *ops)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(tfm);
struct aegis_state state;
kernel_fpu_begin();
crypto_aegis256_aesni_init(&state, ctx->key, req->iv);
crypto_aegis256_aesni_process_ad(&state, req->src, req->assoclen);
crypto_aegis256_aesni_process_crypt(&state, req, ops);
crypto_aegis256_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
}
static int crypto_aegis256_aesni_encrypt(struct aead_request *req)
{
static const struct aegis_crypt_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_encrypt,
.crypt_blocks = crypto_aegis256_aesni_enc,
.crypt_tail = crypto_aegis256_aesni_enc_tail,
};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_block tag = {};
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen;
crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS);
scatterwalk_map_and_copy(tag.bytes, req->dst,
req->assoclen + cryptlen, authsize, 1);
return 0;
}
static int crypto_aegis256_aesni_decrypt(struct aead_request *req)
{
static const struct aegis_block zeros = {};
static const struct aegis_crypt_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_decrypt,
.crypt_blocks = crypto_aegis256_aesni_dec,
.crypt_tail = crypto_aegis256_aesni_dec_tail,
};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_block tag;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen - authsize;
scatterwalk_map_and_copy(tag.bytes, req->src,
req->assoclen + cryptlen, authsize, 0);
crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS);
return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
}
static int crypto_aegis256_aesni_init_tfm(struct crypto_aead *aead)
{
return 0;
}
static void crypto_aegis256_aesni_exit_tfm(struct crypto_aead *aead)
{
}
static int cryptd_aegis256_aesni_setkey(struct crypto_aead *aead,
const u8 *key, unsigned int keylen)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
}
static int cryptd_aegis256_aesni_setauthsize(struct crypto_aead *aead,
unsigned int authsize)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
}
static int cryptd_aegis256_aesni_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_encrypt(req);
}
static int cryptd_aegis256_aesni_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_decrypt(req);
}
static int cryptd_aegis256_aesni_init_tfm(struct crypto_aead *aead)
{
struct cryptd_aead *cryptd_tfm;
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_tfm = cryptd_alloc_aead("__aegis256-aesni", CRYPTO_ALG_INTERNAL,
CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
*ctx = cryptd_tfm;
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
return 0;
}
static void cryptd_aegis256_aesni_exit_tfm(struct crypto_aead *aead)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_free_aead(*ctx);
}
static struct aead_alg crypto_aegis256_aesni_alg[] = {
{
.setkey = crypto_aegis256_aesni_setkey,
.setauthsize = crypto_aegis256_aesni_setauthsize,
.encrypt = crypto_aegis256_aesni_encrypt,
.decrypt = crypto_aegis256_aesni_decrypt,
.init = crypto_aegis256_aesni_init_tfm,
.exit = crypto_aegis256_aesni_exit_tfm,
.ivsize = AEGIS256_NONCE_SIZE,
.maxauthsize = AEGIS256_MAX_AUTH_SIZE,
.chunksize = AEGIS256_BLOCK_SIZE,
.base = {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aegis_ctx) +
__alignof__(struct aegis_ctx),
.cra_alignmask = 0,
.cra_name = "__aegis256",
.cra_driver_name = "__aegis256-aesni",
.cra_module = THIS_MODULE,
}
}, {
.setkey = cryptd_aegis256_aesni_setkey,
.setauthsize = cryptd_aegis256_aesni_setauthsize,
.encrypt = cryptd_aegis256_aesni_encrypt,
.decrypt = cryptd_aegis256_aesni_decrypt,
.init = cryptd_aegis256_aesni_init_tfm,
.exit = cryptd_aegis256_aesni_exit_tfm,
.ivsize = AEGIS256_NONCE_SIZE,
.maxauthsize = AEGIS256_MAX_AUTH_SIZE,
.chunksize = AEGIS256_BLOCK_SIZE,
.base = {
.cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct cryptd_aead *),
.cra_alignmask = 0,
.cra_priority = 400,
.cra_name = "aegis256",
.cra_driver_name = "aegis256-aesni",
.cra_module = THIS_MODULE,
}
}
};
static const struct x86_cpu_id aesni_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_AES),
X86_FEATURE_MATCH(X86_FEATURE_XMM2),
{}
};
MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
static int __init crypto_aegis256_aesni_module_init(void)
{
if (!x86_match_cpu(aesni_cpu_id))
return -ENODEV;
return crypto_register_aeads(crypto_aegis256_aesni_alg,
ARRAY_SIZE(crypto_aegis256_aesni_alg));
}
static void __exit crypto_aegis256_aesni_module_exit(void)
{
crypto_unregister_aeads(crypto_aegis256_aesni_alg,
ARRAY_SIZE(crypto_aegis256_aesni_alg));
}
module_init(crypto_aegis256_aesni_module_init);
module_exit(crypto_aegis256_aesni_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("AEGIS-256 AEAD algorithm -- AESNI+SSE2 implementation");
MODULE_ALIAS_CRYPTO("aegis256");
MODULE_ALIAS_CRYPTO("aegis256-aesni");

View File

@ -364,5 +364,5 @@ module_exit(ghash_pclmulqdqni_mod_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("GHASH Message Digest Algorithm, "
"acclerated by PCLMULQDQ-NI");
"accelerated by PCLMULQDQ-NI");
MODULE_ALIAS_CRYPTO("ghash");

View File

@ -0,0 +1,621 @@
/*
* AVX2 implementation of MORUS-1280
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/frame.h>
#define SHUFFLE_MASK(i0, i1, i2, i3) \
(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
#define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
#define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
#define STATE0 %ymm0
#define STATE0_LOW %xmm0
#define STATE1 %ymm1
#define STATE2 %ymm2
#define STATE3 %ymm3
#define STATE4 %ymm4
#define KEY %ymm5
#define MSG %ymm5
#define MSG_LOW %xmm5
#define T0 %ymm6
#define T0_LOW %xmm6
#define T1 %ymm7
.section .rodata.cst32.morus1280_const, "aM", @progbits, 32
.align 32
.Lmorus1280_const:
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
.section .rodata.cst32.morus1280_counter, "aM", @progbits, 32
.align 32
.Lmorus1280_counter:
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
.text
.macro morus1280_round s0, s1, s2, s3, s4, b, w
vpand \s1, \s2, T0
vpxor T0, \s0, \s0
vpxor \s3, \s0, \s0
vpsllq $\b, \s0, T0
vpsrlq $(64 - \b), \s0, \s0
vpxor T0, \s0, \s0
vpermq $\w, \s3, \s3
.endm
/*
* __morus1280_update: internal ABI
* input:
* STATE[0-4] - input state
* MSG - message block
* output:
* STATE[0-4] - output state
* changed:
* T0
*/
__morus1280_update:
morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
vpxor MSG, STATE1, STATE1
morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
vpxor MSG, STATE2, STATE2
morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
vpxor MSG, STATE3, STATE3
morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2
vpxor MSG, STATE4, STATE4
morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1
ret
ENDPROC(__morus1280_update)
/*
* __morus1280_update_zero: internal ABI
* input:
* STATE[0-4] - input state
* output:
* STATE[0-4] - output state
* changed:
* T0
*/
__morus1280_update_zero:
morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2
morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1
ret
ENDPROC(__morus1280_update_zero)
/*
* __load_partial: internal ABI
* input:
* %rsi - src
* %rcx - bytes
* output:
* MSG - message block
* changed:
* %r8
* %r9
*/
__load_partial:
xor %r9, %r9
vpxor MSG, MSG, MSG
mov %rcx, %r8
and $0x1, %r8
jz .Lld_partial_1
mov %rcx, %r8
and $0x1E, %r8
add %rsi, %r8
mov (%r8), %r9b
.Lld_partial_1:
mov %rcx, %r8
and $0x2, %r8
jz .Lld_partial_2
mov %rcx, %r8
and $0x1C, %r8
add %rsi, %r8
shl $16, %r9
mov (%r8), %r9w
.Lld_partial_2:
mov %rcx, %r8
and $0x4, %r8
jz .Lld_partial_4
mov %rcx, %r8
and $0x18, %r8
add %rsi, %r8
shl $32, %r9
mov (%r8), %r8d
xor %r8, %r9
.Lld_partial_4:
movq %r9, MSG_LOW
mov %rcx, %r8
and $0x8, %r8
jz .Lld_partial_8
mov %rcx, %r8
and $0x10, %r8
add %rsi, %r8
pshufd $MASK2, MSG_LOW, MSG_LOW
pinsrq $0, (%r8), MSG_LOW
.Lld_partial_8:
mov %rcx, %r8
and $0x10, %r8
jz .Lld_partial_16
vpermq $MASK2, MSG, MSG
movdqu (%rsi), MSG_LOW
.Lld_partial_16:
ret
ENDPROC(__load_partial)
/*
* __store_partial: internal ABI
* input:
* %rdx - dst
* %rcx - bytes
* output:
* T0 - message block
* changed:
* %r8
* %r9
* %r10
*/
__store_partial:
mov %rcx, %r8
mov %rdx, %r9
cmp $16, %r8
jl .Lst_partial_16
movdqu T0_LOW, (%r9)
vpermq $MASK2, T0, T0
sub $16, %r8
add $16, %r9
.Lst_partial_16:
movq T0_LOW, %r10
cmp $8, %r8
jl .Lst_partial_8
mov %r10, (%r9)
pextrq $1, T0_LOW, %r10
sub $8, %r8
add $8, %r9
.Lst_partial_8:
cmp $4, %r8
jl .Lst_partial_4
mov %r10d, (%r9)
shr $32, %r10
sub $4, %r8
add $4, %r9
.Lst_partial_4:
cmp $2, %r8
jl .Lst_partial_2
mov %r10w, (%r9)
shr $16, %r10
sub $2, %r8
add $2, %r9
.Lst_partial_2:
cmp $1, %r8
jl .Lst_partial_1
mov %r10b, (%r9)
.Lst_partial_1:
ret
ENDPROC(__store_partial)
/*
* void crypto_morus1280_avx2_init(void *state, const void *key,
* const void *iv);
*/
ENTRY(crypto_morus1280_avx2_init)
FRAME_BEGIN
/* load IV: */
vpxor STATE0, STATE0, STATE0
movdqu (%rdx), STATE0_LOW
/* load key: */
vmovdqu (%rsi), KEY
vmovdqa KEY, STATE1
/* load all ones: */
vpcmpeqd STATE2, STATE2, STATE2
/* load all zeros: */
vpxor STATE3, STATE3, STATE3
/* load the constant: */
vmovdqa .Lmorus1280_const, STATE4
/* update 16 times with zero: */
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
/* xor-in the key again after updates: */
vpxor KEY, STATE1, STATE1
/* store the state: */
vmovdqu STATE0, (0 * 32)(%rdi)
vmovdqu STATE1, (1 * 32)(%rdi)
vmovdqu STATE2, (2 * 32)(%rdi)
vmovdqu STATE3, (3 * 32)(%rdi)
vmovdqu STATE4, (4 * 32)(%rdi)
FRAME_END
ret
ENDPROC(crypto_morus1280_avx2_init)
/*
* void crypto_morus1280_avx2_ad(void *state, const void *data,
* unsigned int length);
*/
ENTRY(crypto_morus1280_avx2_ad)
FRAME_BEGIN
cmp $32, %rdx
jb .Lad_out
/* load the state: */
vmovdqu (0 * 32)(%rdi), STATE0
vmovdqu (1 * 32)(%rdi), STATE1
vmovdqu (2 * 32)(%rdi), STATE2
vmovdqu (3 * 32)(%rdi), STATE3
vmovdqu (4 * 32)(%rdi), STATE4
mov %rsi, %r8
and $0x1F, %r8
jnz .Lad_u_loop
.align 4
.Lad_a_loop:
vmovdqa (%rsi), MSG
call __morus1280_update
sub $32, %rdx
add $32, %rsi
cmp $32, %rdx
jge .Lad_a_loop
jmp .Lad_cont
.align 4
.Lad_u_loop:
vmovdqu (%rsi), MSG
call __morus1280_update
sub $32, %rdx
add $32, %rsi
cmp $32, %rdx
jge .Lad_u_loop
.Lad_cont:
/* store the state: */
vmovdqu STATE0, (0 * 32)(%rdi)
vmovdqu STATE1, (1 * 32)(%rdi)
vmovdqu STATE2, (2 * 32)(%rdi)
vmovdqu STATE3, (3 * 32)(%rdi)
vmovdqu STATE4, (4 * 32)(%rdi)
.Lad_out:
FRAME_END
ret
ENDPROC(crypto_morus1280_avx2_ad)
/*
* void crypto_morus1280_avx2_enc(void *state, const void *src, void *dst,
* unsigned int length);
*/
ENTRY(crypto_morus1280_avx2_enc)
FRAME_BEGIN
cmp $32, %rcx
jb .Lenc_out
/* load the state: */
vmovdqu (0 * 32)(%rdi), STATE0
vmovdqu (1 * 32)(%rdi), STATE1
vmovdqu (2 * 32)(%rdi), STATE2
vmovdqu (3 * 32)(%rdi), STATE3
vmovdqu (4 * 32)(%rdi), STATE4
mov %rsi, %r8
or %rdx, %r8
and $0x1F, %r8
jnz .Lenc_u_loop
.align 4
.Lenc_a_loop:
vmovdqa (%rsi), MSG
vmovdqa MSG, T0
vpxor STATE0, T0, T0
vpermq $MASK3, STATE1, T1
vpxor T1, T0, T0
vpand STATE2, STATE3, T1
vpxor T1, T0, T0
vmovdqa T0, (%rdx)
call __morus1280_update
sub $32, %rcx
add $32, %rsi
add $32, %rdx
cmp $32, %rcx
jge .Lenc_a_loop
jmp .Lenc_cont
.align 4
.Lenc_u_loop:
vmovdqu (%rsi), MSG
vmovdqa MSG, T0
vpxor STATE0, T0, T0
vpermq $MASK3, STATE1, T1
vpxor T1, T0, T0
vpand STATE2, STATE3, T1
vpxor T1, T0, T0
vmovdqu T0, (%rdx)
call __morus1280_update
sub $32, %rcx
add $32, %rsi
add $32, %rdx
cmp $32, %rcx
jge .Lenc_u_loop
.Lenc_cont:
/* store the state: */
vmovdqu STATE0, (0 * 32)(%rdi)
vmovdqu STATE1, (1 * 32)(%rdi)
vmovdqu STATE2, (2 * 32)(%rdi)
vmovdqu STATE3, (3 * 32)(%rdi)
vmovdqu STATE4, (4 * 32)(%rdi)
.Lenc_out:
FRAME_END
ret
ENDPROC(crypto_morus1280_avx2_enc)
/*
* void crypto_morus1280_avx2_enc_tail(void *state, const void *src, void *dst,
* unsigned int length);
*/
ENTRY(crypto_morus1280_avx2_enc_tail)
FRAME_BEGIN
/* load the state: */
vmovdqu (0 * 32)(%rdi), STATE0
vmovdqu (1 * 32)(%rdi), STATE1
vmovdqu (2 * 32)(%rdi), STATE2
vmovdqu (3 * 32)(%rdi), STATE3
vmovdqu (4 * 32)(%rdi), STATE4
/* encrypt message: */
call __load_partial
vmovdqa MSG, T0
vpxor STATE0, T0, T0
vpermq $MASK3, STATE1, T1
vpxor T1, T0, T0
vpand STATE2, STATE3, T1
vpxor T1, T0, T0
call __store_partial
call __morus1280_update
/* store the state: */
vmovdqu STATE0, (0 * 32)(%rdi)
vmovdqu STATE1, (1 * 32)(%rdi)
vmovdqu STATE2, (2 * 32)(%rdi)
vmovdqu STATE3, (3 * 32)(%rdi)
vmovdqu STATE4, (4 * 32)(%rdi)
FRAME_END
ENDPROC(crypto_morus1280_avx2_enc_tail)
/*
* void crypto_morus1280_avx2_dec(void *state, const void *src, void *dst,
* unsigned int length);
*/
ENTRY(crypto_morus1280_avx2_dec)
FRAME_BEGIN
cmp $32, %rcx
jb .Ldec_out
/* load the state: */
vmovdqu (0 * 32)(%rdi), STATE0
vmovdqu (1 * 32)(%rdi), STATE1
vmovdqu (2 * 32)(%rdi), STATE2
vmovdqu (3 * 32)(%rdi), STATE3
vmovdqu (4 * 32)(%rdi), STATE4
mov %rsi, %r8
or %rdx, %r8
and $0x1F, %r8
jnz .Ldec_u_loop
.align 4
.Ldec_a_loop:
vmovdqa (%rsi), MSG
vpxor STATE0, MSG, MSG
vpermq $MASK3, STATE1, T0
vpxor T0, MSG, MSG
vpand STATE2, STATE3, T0
vpxor T0, MSG, MSG
vmovdqa MSG, (%rdx)
call __morus1280_update
sub $32, %rcx
add $32, %rsi
add $32, %rdx
cmp $32, %rcx
jge .Ldec_a_loop
jmp .Ldec_cont
.align 4
.Ldec_u_loop:
vmovdqu (%rsi), MSG
vpxor STATE0, MSG, MSG
vpermq $MASK3, STATE1, T0
vpxor T0, MSG, MSG
vpand STATE2, STATE3, T0
vpxor T0, MSG, MSG
vmovdqu MSG, (%rdx)
call __morus1280_update
sub $32, %rcx
add $32, %rsi
add $32, %rdx
cmp $32, %rcx
jge .Ldec_u_loop
.Ldec_cont:
/* store the state: */
vmovdqu STATE0, (0 * 32)(%rdi)
vmovdqu STATE1, (1 * 32)(%rdi)
vmovdqu STATE2, (2 * 32)(%rdi)
vmovdqu STATE3, (3 * 32)(%rdi)
vmovdqu STATE4, (4 * 32)(%rdi)
.Ldec_out:
FRAME_END
ret
ENDPROC(crypto_morus1280_avx2_dec)
/*
* void crypto_morus1280_avx2_dec_tail(void *state, const void *src, void *dst,
* unsigned int length);
*/
ENTRY(crypto_morus1280_avx2_dec_tail)
FRAME_BEGIN
/* load the state: */
vmovdqu (0 * 32)(%rdi), STATE0
vmovdqu (1 * 32)(%rdi), STATE1
vmovdqu (2 * 32)(%rdi), STATE2
vmovdqu (3 * 32)(%rdi), STATE3
vmovdqu (4 * 32)(%rdi), STATE4
/* decrypt message: */
call __load_partial
vpxor STATE0, MSG, MSG
vpermq $MASK3, STATE1, T0
vpxor T0, MSG, MSG
vpand STATE2, STATE3, T0
vpxor T0, MSG, MSG
vmovdqa MSG, T0
call __store_partial
/* mask with byte count: */
movq %rcx, T0_LOW
vpbroadcastb T0_LOW, T0
vmovdqa .Lmorus1280_counter, T1
vpcmpgtb T1, T0, T0
vpand T0, MSG, MSG
call __morus1280_update
/* store the state: */
vmovdqu STATE0, (0 * 32)(%rdi)
vmovdqu STATE1, (1 * 32)(%rdi)
vmovdqu STATE2, (2 * 32)(%rdi)
vmovdqu STATE3, (3 * 32)(%rdi)
vmovdqu STATE4, (4 * 32)(%rdi)
FRAME_END
ret
ENDPROC(crypto_morus1280_avx2_dec_tail)
/*
* void crypto_morus1280_avx2_final(void *state, void *tag_xor,
* u64 assoclen, u64 cryptlen);
*/
ENTRY(crypto_morus1280_avx2_final)
FRAME_BEGIN
/* load the state: */
vmovdqu (0 * 32)(%rdi), STATE0
vmovdqu (1 * 32)(%rdi), STATE1
vmovdqu (2 * 32)(%rdi), STATE2
vmovdqu (3 * 32)(%rdi), STATE3
vmovdqu (4 * 32)(%rdi), STATE4
/* xor state[0] into state[4]: */
vpxor STATE0, STATE4, STATE4
/* prepare length block: */
vpxor MSG, MSG, MSG
vpinsrq $0, %rdx, MSG_LOW, MSG_LOW
vpinsrq $1, %rcx, MSG_LOW, MSG_LOW
vpsllq $3, MSG, MSG /* multiply by 8 (to get bit count) */
/* update state: */
call __morus1280_update
call __morus1280_update
call __morus1280_update
call __morus1280_update
call __morus1280_update
call __morus1280_update
call __morus1280_update
call __morus1280_update
call __morus1280_update
call __morus1280_update
/* xor tag: */
vmovdqu (%rsi), MSG
vpxor STATE0, MSG, MSG
vpermq $MASK3, STATE1, T0
vpxor T0, MSG, MSG
vpand STATE2, STATE3, T0
vpxor T0, MSG, MSG
vmovdqu MSG, (%rsi)
FRAME_END
ret
ENDPROC(crypto_morus1280_avx2_final)

View File

@ -0,0 +1,68 @@
/*
* The MORUS-1280 Authenticated-Encryption Algorithm
* Glue for AVX2 implementation
*
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/internal/aead.h>
#include <crypto/morus1280_glue.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
#include <asm/cpu_device_id.h>
asmlinkage void crypto_morus1280_avx2_init(void *state, const void *key,
const void *iv);
asmlinkage void crypto_morus1280_avx2_ad(void *state, const void *data,
unsigned int length);
asmlinkage void crypto_morus1280_avx2_enc(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus1280_avx2_dec(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus1280_avx2_enc_tail(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus1280_avx2_dec_tail(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor,
u64 assoclen, u64 cryptlen);
MORUS1280_DECLARE_ALGS(avx2, "morus1280-avx2", 400);
static const struct x86_cpu_id avx2_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_AVX2),
{}
};
MODULE_DEVICE_TABLE(x86cpu, avx2_cpu_id);
static int __init crypto_morus1280_avx2_module_init(void)
{
if (!x86_match_cpu(avx2_cpu_id))
return -ENODEV;
return crypto_register_aeads(crypto_morus1280_avx2_algs,
ARRAY_SIZE(crypto_morus1280_avx2_algs));
}
static void __exit crypto_morus1280_avx2_module_exit(void)
{
crypto_unregister_aeads(crypto_morus1280_avx2_algs,
ARRAY_SIZE(crypto_morus1280_avx2_algs));
}
module_init(crypto_morus1280_avx2_module_init);
module_exit(crypto_morus1280_avx2_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- AVX2 implementation");
MODULE_ALIAS_CRYPTO("morus1280");
MODULE_ALIAS_CRYPTO("morus1280-avx2");

View File

@ -0,0 +1,895 @@
/*
* SSE2 implementation of MORUS-1280
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/frame.h>
#define SHUFFLE_MASK(i0, i1, i2, i3) \
(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
#define STATE0_LO %xmm0
#define STATE0_HI %xmm1
#define STATE1_LO %xmm2
#define STATE1_HI %xmm3
#define STATE2_LO %xmm4
#define STATE2_HI %xmm5
#define STATE3_LO %xmm6
#define STATE3_HI %xmm7
#define STATE4_LO %xmm8
#define STATE4_HI %xmm9
#define KEY_LO %xmm10
#define KEY_HI %xmm11
#define MSG_LO %xmm10
#define MSG_HI %xmm11
#define T0_LO %xmm12
#define T0_HI %xmm13
#define T1_LO %xmm14
#define T1_HI %xmm15
.section .rodata.cst16.morus640_const, "aM", @progbits, 16
.align 16
.Lmorus640_const_0:
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
.Lmorus640_const_1:
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
.section .rodata.cst16.morus640_counter, "aM", @progbits, 16
.align 16
.Lmorus640_counter_0:
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
.Lmorus640_counter_1:
.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
.text
.macro rol1 hi, lo
/*
* HI_1 | HI_0 || LO_1 | LO_0
* ==>
* HI_0 | HI_1 || LO_1 | LO_0
* ==>
* HI_0 | LO_1 || LO_0 | HI_1
*/
pshufd $MASK2, \hi, \hi
movdqa \hi, T0_LO
punpcklqdq \lo, T0_LO
punpckhqdq \hi, \lo
movdqa \lo, \hi
movdqa T0_LO, \lo
.endm
.macro rol2 hi, lo
movdqa \lo, T0_LO
movdqa \hi, \lo
movdqa T0_LO, \hi
.endm
.macro rol3 hi, lo
/*
* HI_1 | HI_0 || LO_1 | LO_0
* ==>
* HI_0 | HI_1 || LO_1 | LO_0
* ==>
* LO_0 | HI_1 || HI_0 | LO_1
*/
pshufd $MASK2, \hi, \hi
movdqa \lo, T0_LO
punpckhqdq \hi, T0_LO
punpcklqdq \lo, \hi
movdqa T0_LO, \lo
.endm
.macro morus1280_round s0_l, s0_h, s1_l, s1_h, s2_l, s2_h, s3_l, s3_h, s4_l, s4_h, b, w
movdqa \s1_l, T0_LO
pand \s2_l, T0_LO
pxor T0_LO, \s0_l
movdqa \s1_h, T0_LO
pand \s2_h, T0_LO
pxor T0_LO, \s0_h
pxor \s3_l, \s0_l
pxor \s3_h, \s0_h
movdqa \s0_l, T0_LO
psllq $\b, T0_LO
psrlq $(64 - \b), \s0_l
pxor T0_LO, \s0_l
movdqa \s0_h, T0_LO
psllq $\b, T0_LO
psrlq $(64 - \b), \s0_h
pxor T0_LO, \s0_h
\w \s3_h, \s3_l
.endm
/*
* __morus1280_update: internal ABI
* input:
* STATE[0-4] - input state
* MSG - message block
* output:
* STATE[0-4] - output state
* changed:
* T0
*/
__morus1280_update:
morus1280_round \
STATE0_LO, STATE0_HI, \
STATE1_LO, STATE1_HI, \
STATE2_LO, STATE2_HI, \
STATE3_LO, STATE3_HI, \
STATE4_LO, STATE4_HI, \
13, rol1
pxor MSG_LO, STATE1_LO
pxor MSG_HI, STATE1_HI
morus1280_round \
STATE1_LO, STATE1_HI, \
STATE2_LO, STATE2_HI, \
STATE3_LO, STATE3_HI, \
STATE4_LO, STATE4_HI, \
STATE0_LO, STATE0_HI, \
46, rol2
pxor MSG_LO, STATE2_LO
pxor MSG_HI, STATE2_HI
morus1280_round \
STATE2_LO, STATE2_HI, \
STATE3_LO, STATE3_HI, \
STATE4_LO, STATE4_HI, \
STATE0_LO, STATE0_HI, \
STATE1_LO, STATE1_HI, \
38, rol3
pxor MSG_LO, STATE3_LO
pxor MSG_HI, STATE3_HI
morus1280_round \
STATE3_LO, STATE3_HI, \
STATE4_LO, STATE4_HI, \
STATE0_LO, STATE0_HI, \
STATE1_LO, STATE1_HI, \
STATE2_LO, STATE2_HI, \
7, rol2
pxor MSG_LO, STATE4_LO
pxor MSG_HI, STATE4_HI
morus1280_round \
STATE4_LO, STATE4_HI, \
STATE0_LO, STATE0_HI, \
STATE1_LO, STATE1_HI, \
STATE2_LO, STATE2_HI, \
STATE3_LO, STATE3_HI, \
4, rol1
ret
ENDPROC(__morus1280_update)
/*
* __morus1280_update_zero: internal ABI
* input:
* STATE[0-4] - input state
* output:
* STATE[0-4] - output state
* changed:
* T0
*/
__morus1280_update_zero:
morus1280_round \
STATE0_LO, STATE0_HI, \
STATE1_LO, STATE1_HI, \
STATE2_LO, STATE2_HI, \
STATE3_LO, STATE3_HI, \
STATE4_LO, STATE4_HI, \
13, rol1
morus1280_round \
STATE1_LO, STATE1_HI, \
STATE2_LO, STATE2_HI, \
STATE3_LO, STATE3_HI, \
STATE4_LO, STATE4_HI, \
STATE0_LO, STATE0_HI, \
46, rol2
morus1280_round \
STATE2_LO, STATE2_HI, \
STATE3_LO, STATE3_HI, \
STATE4_LO, STATE4_HI, \
STATE0_LO, STATE0_HI, \
STATE1_LO, STATE1_HI, \
38, rol3
morus1280_round \
STATE3_LO, STATE3_HI, \
STATE4_LO, STATE4_HI, \
STATE0_LO, STATE0_HI, \
STATE1_LO, STATE1_HI, \
STATE2_LO, STATE2_HI, \
7, rol2
morus1280_round \
STATE4_LO, STATE4_HI, \
STATE0_LO, STATE0_HI, \
STATE1_LO, STATE1_HI, \
STATE2_LO, STATE2_HI, \
STATE3_LO, STATE3_HI, \
4, rol1
ret
ENDPROC(__morus1280_update_zero)
/*
* __load_partial: internal ABI
* input:
* %rsi - src
* %rcx - bytes
* output:
* MSG - message block
* changed:
* %r8
* %r9
*/
__load_partial:
xor %r9, %r9
pxor MSG_LO, MSG_LO
pxor MSG_HI, MSG_HI
mov %rcx, %r8
and $0x1, %r8
jz .Lld_partial_1
mov %rcx, %r8
and $0x1E, %r8
add %rsi, %r8
mov (%r8), %r9b
.Lld_partial_1:
mov %rcx, %r8
and $0x2, %r8
jz .Lld_partial_2
mov %rcx, %r8
and $0x1C, %r8
add %rsi, %r8
shl $16, %r9
mov (%r8), %r9w
.Lld_partial_2:
mov %rcx, %r8
and $0x4, %r8
jz .Lld_partial_4
mov %rcx, %r8
and $0x18, %r8
add %rsi, %r8
shl $32, %r9
mov (%r8), %r8d
xor %r8, %r9
.Lld_partial_4:
movq %r9, MSG_LO
mov %rcx, %r8
and $0x8, %r8
jz .Lld_partial_8
mov %rcx, %r8
and $0x10, %r8
add %rsi, %r8
pslldq $8, MSG_LO
movq (%r8), T0_LO
pxor T0_LO, MSG_LO
.Lld_partial_8:
mov %rcx, %r8
and $0x10, %r8
jz .Lld_partial_16
movdqa MSG_LO, MSG_HI
movdqu (%rsi), MSG_LO
.Lld_partial_16:
ret
ENDPROC(__load_partial)
/*
* __store_partial: internal ABI
* input:
* %rdx - dst
* %rcx - bytes
* output:
* T0 - message block
* changed:
* %r8
* %r9
* %r10
*/
__store_partial:
mov %rcx, %r8
mov %rdx, %r9
cmp $16, %r8
jl .Lst_partial_16
movdqu T0_LO, (%r9)
movdqa T0_HI, T0_LO
sub $16, %r8
add $16, %r9
.Lst_partial_16:
movq T0_LO, %r10
cmp $8, %r8
jl .Lst_partial_8
mov %r10, (%r9)
psrldq $8, T0_LO
movq T0_LO, %r10
sub $8, %r8
add $8, %r9
.Lst_partial_8:
cmp $4, %r8
jl .Lst_partial_4
mov %r10d, (%r9)
shr $32, %r10
sub $4, %r8
add $4, %r9
.Lst_partial_4:
cmp $2, %r8
jl .Lst_partial_2
mov %r10w, (%r9)
shr $16, %r10
sub $2, %r8
add $2, %r9
.Lst_partial_2:
cmp $1, %r8
jl .Lst_partial_1
mov %r10b, (%r9)
.Lst_partial_1:
ret
ENDPROC(__store_partial)
/*
* void crypto_morus1280_sse2_init(void *state, const void *key,
* const void *iv);
*/
ENTRY(crypto_morus1280_sse2_init)
FRAME_BEGIN
/* load IV: */
pxor STATE0_HI, STATE0_HI
movdqu (%rdx), STATE0_LO
/* load key: */
movdqu 0(%rsi), KEY_LO
movdqu 16(%rsi), KEY_HI
movdqa KEY_LO, STATE1_LO
movdqa KEY_HI, STATE1_HI
/* load all ones: */
pcmpeqd STATE2_LO, STATE2_LO
pcmpeqd STATE2_HI, STATE2_HI
/* load all zeros: */
pxor STATE3_LO, STATE3_LO
pxor STATE3_HI, STATE3_HI
/* load the constant: */
movdqa .Lmorus640_const_0, STATE4_LO
movdqa .Lmorus640_const_1, STATE4_HI
/* update 16 times with zero: */
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
call __morus1280_update_zero
/* xor-in the key again after updates: */
pxor KEY_LO, STATE1_LO
pxor KEY_HI, STATE1_HI
/* store the state: */
movdqu STATE0_LO, (0 * 16)(%rdi)
movdqu STATE0_HI, (1 * 16)(%rdi)
movdqu STATE1_LO, (2 * 16)(%rdi)
movdqu STATE1_HI, (3 * 16)(%rdi)
movdqu STATE2_LO, (4 * 16)(%rdi)
movdqu STATE2_HI, (5 * 16)(%rdi)
movdqu STATE3_LO, (6 * 16)(%rdi)
movdqu STATE3_HI, (7 * 16)(%rdi)
movdqu STATE4_LO, (8 * 16)(%rdi)
movdqu STATE4_HI, (9 * 16)(%rdi)
FRAME_END
ret
ENDPROC(crypto_morus1280_sse2_init)
/*
* void crypto_morus1280_sse2_ad(void *state, const void *data,
* unsigned int length);
*/
ENTRY(crypto_morus1280_sse2_ad)
FRAME_BEGIN
cmp $32, %rdx
jb .Lad_out
/* load the state: */
movdqu (0 * 16)(%rdi), STATE0_LO
movdqu (1 * 16)(%rdi), STATE0_HI
movdqu (2 * 16)(%rdi), STATE1_LO
movdqu (3 * 16)(%rdi), STATE1_HI
movdqu (4 * 16)(%rdi), STATE2_LO
movdqu (5 * 16)(%rdi), STATE2_HI
movdqu (6 * 16)(%rdi), STATE3_LO
movdqu (7 * 16)(%rdi), STATE3_HI
movdqu (8 * 16)(%rdi), STATE4_LO
movdqu (9 * 16)(%rdi), STATE4_HI
mov %rsi, %r8
and $0xF, %r8
jnz .Lad_u_loop
.align 4
.Lad_a_loop:
movdqa 0(%rsi), MSG_LO
movdqa 16(%rsi), MSG_HI
call __morus1280_update
sub $32, %rdx
add $32, %rsi
cmp $32, %rdx
jge .Lad_a_loop
jmp .Lad_cont
.align 4
.Lad_u_loop:
movdqu 0(%rsi), MSG_LO
movdqu 16(%rsi), MSG_HI
call __morus1280_update
sub $32, %rdx
add $32, %rsi
cmp $32, %rdx
jge .Lad_u_loop
.Lad_cont:
/* store the state: */
movdqu STATE0_LO, (0 * 16)(%rdi)
movdqu STATE0_HI, (1 * 16)(%rdi)
movdqu STATE1_LO, (2 * 16)(%rdi)
movdqu STATE1_HI, (3 * 16)(%rdi)
movdqu STATE2_LO, (4 * 16)(%rdi)
movdqu STATE2_HI, (5 * 16)(%rdi)
movdqu STATE3_LO, (6 * 16)(%rdi)
movdqu STATE3_HI, (7 * 16)(%rdi)
movdqu STATE4_LO, (8 * 16)(%rdi)
movdqu STATE4_HI, (9 * 16)(%rdi)
.Lad_out:
FRAME_END
ret
ENDPROC(crypto_morus1280_sse2_ad)
/*
* void crypto_morus1280_sse2_enc(void *state, const void *src, void *dst,
* unsigned int length);
*/
ENTRY(crypto_morus1280_sse2_enc)
FRAME_BEGIN
cmp $32, %rcx
jb .Lenc_out
/* load the state: */
movdqu (0 * 16)(%rdi), STATE0_LO
movdqu (1 * 16)(%rdi), STATE0_HI
movdqu (2 * 16)(%rdi), STATE1_LO
movdqu (3 * 16)(%rdi), STATE1_HI
movdqu (4 * 16)(%rdi), STATE2_LO
movdqu (5 * 16)(%rdi), STATE2_HI
movdqu (6 * 16)(%rdi), STATE3_LO
movdqu (7 * 16)(%rdi), STATE3_HI
movdqu (8 * 16)(%rdi), STATE4_LO
movdqu (9 * 16)(%rdi), STATE4_HI
mov %rsi, %r8
or %rdx, %r8
and $0xF, %r8
jnz .Lenc_u_loop
.align 4
.Lenc_a_loop:
movdqa 0(%rsi), MSG_LO
movdqa 16(%rsi), MSG_HI
movdqa STATE1_LO, T1_LO
movdqa STATE1_HI, T1_HI
rol3 T1_HI, T1_LO
movdqa MSG_LO, T0_LO
movdqa MSG_HI, T0_HI
pxor T1_LO, T0_LO
pxor T1_HI, T0_HI
pxor STATE0_LO, T0_LO
pxor STATE0_HI, T0_HI
movdqa STATE2_LO, T1_LO
movdqa STATE2_HI, T1_HI
pand STATE3_LO, T1_LO
pand STATE3_HI, T1_HI
pxor T1_LO, T0_LO
pxor T1_HI, T0_HI
movdqa T0_LO, 0(%rdx)
movdqa T0_HI, 16(%rdx)
call __morus1280_update
sub $32, %rcx
add $32, %rsi
add $32, %rdx
cmp $32, %rcx
jge .Lenc_a_loop
jmp .Lenc_cont
.align 4
.Lenc_u_loop:
movdqu 0(%rsi), MSG_LO
movdqu 16(%rsi), MSG_HI
movdqa STATE1_LO, T1_LO
movdqa STATE1_HI, T1_HI
rol3 T1_HI, T1_LO
movdqa MSG_LO, T0_LO
movdqa MSG_HI, T0_HI
pxor T1_LO, T0_LO
pxor T1_HI, T0_HI
pxor STATE0_LO, T0_LO
pxor STATE0_HI, T0_HI
movdqa STATE2_LO, T1_LO
movdqa STATE2_HI, T1_HI
pand STATE3_LO, T1_LO
pand STATE3_HI, T1_HI
pxor T1_LO, T0_LO
pxor T1_HI, T0_HI
movdqu T0_LO, 0(%rdx)
movdqu T0_HI, 16(%rdx)
call __morus1280_update
sub $32, %rcx
add $32, %rsi
add $32, %rdx
cmp $32, %rcx
jge .Lenc_u_loop
.Lenc_cont:
/* store the state: */
movdqu STATE0_LO, (0 * 16)(%rdi)
movdqu STATE0_HI, (1 * 16)(%rdi)
movdqu STATE1_LO, (2 * 16)(%rdi)
movdqu STATE1_HI, (3 * 16)(%rdi)
movdqu STATE2_LO, (4 * 16)(%rdi)
movdqu STATE2_HI, (5 * 16)(%rdi)
movdqu STATE3_LO, (6 * 16)(%rdi)
movdqu STATE3_HI, (7 * 16)(%rdi)
movdqu STATE4_LO, (8 * 16)(%rdi)
movdqu STATE4_HI, (9 * 16)(%rdi)
.Lenc_out:
FRAME_END
ret
ENDPROC(crypto_morus1280_sse2_enc)
/*
* void crypto_morus1280_sse2_enc_tail(void *state, const void *src, void *dst,
* unsigned int length);
*/
ENTRY(crypto_morus1280_sse2_enc_tail)
FRAME_BEGIN
/* load the state: */
movdqu (0 * 16)(%rdi), STATE0_LO
movdqu (1 * 16)(%rdi), STATE0_HI
movdqu (2 * 16)(%rdi), STATE1_LO
movdqu (3 * 16)(%rdi), STATE1_HI
movdqu (4 * 16)(%rdi), STATE2_LO
movdqu (5 * 16)(%rdi), STATE2_HI
movdqu (6 * 16)(%rdi), STATE3_LO
movdqu (7 * 16)(%rdi), STATE3_HI
movdqu (8 * 16)(%rdi), STATE4_LO
movdqu (9 * 16)(%rdi), STATE4_HI
/* encrypt message: */
call __load_partial
movdqa STATE1_LO, T1_LO
movdqa STATE1_HI, T1_HI
rol3 T1_HI, T1_LO
movdqa MSG_LO, T0_LO
movdqa MSG_HI, T0_HI
pxor T1_LO, T0_LO
pxor T1_HI, T0_HI
pxor STATE0_LO, T0_LO
pxor STATE0_HI, T0_HI
movdqa STATE2_LO, T1_LO
movdqa STATE2_HI, T1_HI
pand STATE3_LO, T1_LO
pand STATE3_HI, T1_HI
pxor T1_LO, T0_LO
pxor T1_HI, T0_HI
call __store_partial
call __morus1280_update
/* store the state: */
movdqu STATE0_LO, (0 * 16)(%rdi)
movdqu STATE0_HI, (1 * 16)(%rdi)
movdqu STATE1_LO, (2 * 16)(%rdi)
movdqu STATE1_HI, (3 * 16)(%rdi)
movdqu STATE2_LO, (4 * 16)(%rdi)
movdqu STATE2_HI, (5 * 16)(%rdi)
movdqu STATE3_LO, (6 * 16)(%rdi)
movdqu STATE3_HI, (7 * 16)(%rdi)
movdqu STATE4_LO, (8 * 16)(%rdi)
movdqu STATE4_HI, (9 * 16)(%rdi)
FRAME_END
ENDPROC(crypto_morus1280_sse2_enc_tail)
/*
* void crypto_morus1280_sse2_dec(void *state, const void *src, void *dst,
* unsigned int length);
*/
ENTRY(crypto_morus1280_sse2_dec)
FRAME_BEGIN
cmp $32, %rcx
jb .Ldec_out
/* load the state: */
movdqu (0 * 16)(%rdi), STATE0_LO
movdqu (1 * 16)(%rdi), STATE0_HI
movdqu (2 * 16)(%rdi), STATE1_LO
movdqu (3 * 16)(%rdi), STATE1_HI
movdqu (4 * 16)(%rdi), STATE2_LO
movdqu (5 * 16)(%rdi), STATE2_HI
movdqu (6 * 16)(%rdi), STATE3_LO
movdqu (7 * 16)(%rdi), STATE3_HI
movdqu (8 * 16)(%rdi), STATE4_LO
movdqu (9 * 16)(%rdi), STATE4_HI
mov %rsi, %r8
or %rdx, %r8
and $0xF, %r8
jnz .Ldec_u_loop
.align 4
.Ldec_a_loop:
movdqa 0(%rsi), MSG_LO
movdqa 16(%rsi), MSG_HI
pxor STATE0_LO, MSG_LO
pxor STATE0_HI, MSG_HI
movdqa STATE1_LO, T1_LO
movdqa STATE1_HI, T1_HI
rol3 T1_HI, T1_LO
pxor T1_LO, MSG_LO
pxor T1_HI, MSG_HI
movdqa STATE2_LO, T1_LO
movdqa STATE2_HI, T1_HI
pand STATE3_LO, T1_LO
pand STATE3_HI, T1_HI
pxor T1_LO, MSG_LO
pxor T1_HI, MSG_HI
movdqa MSG_LO, 0(%rdx)
movdqa MSG_HI, 16(%rdx)
call __morus1280_update
sub $32, %rcx
add $32, %rsi
add $32, %rdx
cmp $32, %rcx
jge .Ldec_a_loop
jmp .Ldec_cont
.align 4
.Ldec_u_loop:
movdqu 0(%rsi), MSG_LO
movdqu 16(%rsi), MSG_HI
pxor STATE0_LO, MSG_LO
pxor STATE0_HI, MSG_HI
movdqa STATE1_LO, T1_LO
movdqa STATE1_HI, T1_HI
rol3 T1_HI, T1_LO
pxor T1_LO, MSG_LO
pxor T1_HI, MSG_HI
movdqa STATE2_LO, T1_LO
movdqa STATE2_HI, T1_HI
pand STATE3_LO, T1_LO
pand STATE3_HI, T1_HI
pxor T1_LO, MSG_LO
pxor T1_HI, MSG_HI
movdqu MSG_LO, 0(%rdx)
movdqu MSG_HI, 16(%rdx)
call __morus1280_update
sub $32, %rcx
add $32, %rsi
add $32, %rdx
cmp $32, %rcx
jge .Ldec_u_loop
.Ldec_cont:
/* store the state: */
movdqu STATE0_LO, (0 * 16)(%rdi)
movdqu STATE0_HI, (1 * 16)(%rdi)
movdqu STATE1_LO, (2 * 16)(%rdi)
movdqu STATE1_HI, (3 * 16)(%rdi)
movdqu STATE2_LO, (4 * 16)(%rdi)
movdqu STATE2_HI, (5 * 16)(%rdi)
movdqu STATE3_LO, (6 * 16)(%rdi)
movdqu STATE3_HI, (7 * 16)(%rdi)
movdqu STATE4_LO, (8 * 16)(%rdi)
movdqu STATE4_HI, (9 * 16)(%rdi)
.Ldec_out:
FRAME_END
ret
ENDPROC(crypto_morus1280_sse2_dec)
/*
* void crypto_morus1280_sse2_dec_tail(void *state, const void *src, void *dst,
* unsigned int length);
*/
ENTRY(crypto_morus1280_sse2_dec_tail)
FRAME_BEGIN
/* load the state: */
movdqu (0 * 16)(%rdi), STATE0_LO
movdqu (1 * 16)(%rdi), STATE0_HI
movdqu (2 * 16)(%rdi), STATE1_LO
movdqu (3 * 16)(%rdi), STATE1_HI
movdqu (4 * 16)(%rdi), STATE2_LO
movdqu (5 * 16)(%rdi), STATE2_HI
movdqu (6 * 16)(%rdi), STATE3_LO
movdqu (7 * 16)(%rdi), STATE3_HI
movdqu (8 * 16)(%rdi), STATE4_LO
movdqu (9 * 16)(%rdi), STATE4_HI
/* decrypt message: */
call __load_partial
pxor STATE0_LO, MSG_LO
pxor STATE0_HI, MSG_HI
movdqa STATE1_LO, T1_LO
movdqa STATE1_HI, T1_HI
rol3 T1_HI, T1_LO
pxor T1_LO, MSG_LO
pxor T1_HI, MSG_HI
movdqa STATE2_LO, T1_LO
movdqa STATE2_HI, T1_HI
pand STATE3_LO, T1_LO
pand STATE3_HI, T1_HI
pxor T1_LO, MSG_LO
pxor T1_HI, MSG_HI
movdqa MSG_LO, T0_LO
movdqa MSG_HI, T0_HI
call __store_partial
/* mask with byte count: */
movq %rcx, T0_LO
punpcklbw T0_LO, T0_LO
punpcklbw T0_LO, T0_LO
punpcklbw T0_LO, T0_LO
punpcklbw T0_LO, T0_LO
movdqa T0_LO, T0_HI
movdqa .Lmorus640_counter_0, T1_LO
movdqa .Lmorus640_counter_1, T1_HI
pcmpgtb T1_LO, T0_LO
pcmpgtb T1_HI, T0_HI
pand T0_LO, MSG_LO
pand T0_HI, MSG_HI
call __morus1280_update
/* store the state: */
movdqu STATE0_LO, (0 * 16)(%rdi)
movdqu STATE0_HI, (1 * 16)(%rdi)
movdqu STATE1_LO, (2 * 16)(%rdi)
movdqu STATE1_HI, (3 * 16)(%rdi)
movdqu STATE2_LO, (4 * 16)(%rdi)
movdqu STATE2_HI, (5 * 16)(%rdi)
movdqu STATE3_LO, (6 * 16)(%rdi)
movdqu STATE3_HI, (7 * 16)(%rdi)
movdqu STATE4_LO, (8 * 16)(%rdi)
movdqu STATE4_HI, (9 * 16)(%rdi)
FRAME_END
ret
ENDPROC(crypto_morus1280_sse2_dec_tail)
/*
* void crypto_morus1280_sse2_final(void *state, void *tag_xor,
* u64 assoclen, u64 cryptlen);
*/
ENTRY(crypto_morus1280_sse2_final)
FRAME_BEGIN
/* load the state: */
movdqu (0 * 16)(%rdi), STATE0_LO
movdqu (1 * 16)(%rdi), STATE0_HI
movdqu (2 * 16)(%rdi), STATE1_LO
movdqu (3 * 16)(%rdi), STATE1_HI
movdqu (4 * 16)(%rdi), STATE2_LO
movdqu (5 * 16)(%rdi), STATE2_HI
movdqu (6 * 16)(%rdi), STATE3_LO
movdqu (7 * 16)(%rdi), STATE3_HI
movdqu (8 * 16)(%rdi), STATE4_LO
movdqu (9 * 16)(%rdi), STATE4_HI
/* xor state[0] into state[4]: */
pxor STATE0_LO, STATE4_LO
pxor STATE0_HI, STATE4_HI
/* prepare length block: */
movq %rdx, MSG_LO
movq %rcx, T0_LO
pslldq $8, T0_LO
pxor T0_LO, MSG_LO
psllq $3, MSG_LO /* multiply by 8 (to get bit count) */
pxor MSG_HI, MSG_HI
/* update state: */
call __morus1280_update
call __morus1280_update
call __morus1280_update
call __morus1280_update
call __morus1280_update
call __morus1280_update
call __morus1280_update
call __morus1280_update
call __morus1280_update
call __morus1280_update
/* xor tag: */
movdqu 0(%rsi), MSG_LO
movdqu 16(%rsi), MSG_HI
pxor STATE0_LO, MSG_LO
pxor STATE0_HI, MSG_HI
movdqa STATE1_LO, T0_LO
movdqa STATE1_HI, T0_HI
rol3 T0_HI, T0_LO
pxor T0_LO, MSG_LO
pxor T0_HI, MSG_HI
movdqa STATE2_LO, T0_LO
movdqa STATE2_HI, T0_HI
pand STATE3_LO, T0_LO
pand STATE3_HI, T0_HI
pxor T0_LO, MSG_LO
pxor T0_HI, MSG_HI
movdqu MSG_LO, 0(%rsi)
movdqu MSG_HI, 16(%rsi)
FRAME_END
ret
ENDPROC(crypto_morus1280_sse2_final)

View File

@ -0,0 +1,68 @@
/*
* The MORUS-1280 Authenticated-Encryption Algorithm
* Glue for SSE2 implementation
*
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/internal/aead.h>
#include <crypto/morus1280_glue.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
#include <asm/cpu_device_id.h>
asmlinkage void crypto_morus1280_sse2_init(void *state, const void *key,
const void *iv);
asmlinkage void crypto_morus1280_sse2_ad(void *state, const void *data,
unsigned int length);
asmlinkage void crypto_morus1280_sse2_enc(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus1280_sse2_dec(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus1280_sse2_enc_tail(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus1280_sse2_dec_tail(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor,
u64 assoclen, u64 cryptlen);
MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350);
static const struct x86_cpu_id sse2_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_XMM2),
{}
};
MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
static int __init crypto_morus1280_sse2_module_init(void)
{
if (!x86_match_cpu(sse2_cpu_id))
return -ENODEV;
return crypto_register_aeads(crypto_morus1280_sse2_algs,
ARRAY_SIZE(crypto_morus1280_sse2_algs));
}
static void __exit crypto_morus1280_sse2_module_exit(void)
{
crypto_unregister_aeads(crypto_morus1280_sse2_algs,
ARRAY_SIZE(crypto_morus1280_sse2_algs));
}
module_init(crypto_morus1280_sse2_module_init);
module_exit(crypto_morus1280_sse2_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- SSE2 implementation");
MODULE_ALIAS_CRYPTO("morus1280");
MODULE_ALIAS_CRYPTO("morus1280-sse2");

View File

@ -0,0 +1,302 @@
/*
* The MORUS-1280 Authenticated-Encryption Algorithm
* Common x86 SIMD glue skeleton
*
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/cryptd.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/morus1280_glue.h>
#include <crypto/scatterwalk.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
#include <asm/fpu/api.h>
struct morus1280_state {
struct morus1280_block s[MORUS_STATE_BLOCKS];
};
struct morus1280_ops {
int (*skcipher_walk_init)(struct skcipher_walk *walk,
struct aead_request *req, bool atomic);
void (*crypt_blocks)(void *state, const void *src, void *dst,
unsigned int length);
void (*crypt_tail)(void *state, const void *src, void *dst,
unsigned int length);
};
static void crypto_morus1280_glue_process_ad(
struct morus1280_state *state,
const struct morus1280_glue_ops *ops,
struct scatterlist *sg_src, unsigned int assoclen)
{
struct scatter_walk walk;
struct morus1280_block buf;
unsigned int pos = 0;
scatterwalk_start(&walk, sg_src);
while (assoclen != 0) {
unsigned int size = scatterwalk_clamp(&walk, assoclen);
unsigned int left = size;
void *mapped = scatterwalk_map(&walk);
const u8 *src = (const u8 *)mapped;
if (pos + size >= MORUS1280_BLOCK_SIZE) {
if (pos > 0) {
unsigned int fill = MORUS1280_BLOCK_SIZE - pos;
memcpy(buf.bytes + pos, src, fill);
ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
pos = 0;
left -= fill;
src += fill;
}
ops->ad(state, src, left);
src += left & ~(MORUS1280_BLOCK_SIZE - 1);
left &= MORUS1280_BLOCK_SIZE - 1;
}
memcpy(buf.bytes + pos, src, left);
pos += left;
assoclen -= size;
scatterwalk_unmap(mapped);
scatterwalk_advance(&walk, size);
scatterwalk_done(&walk, 0, assoclen);
}
if (pos > 0) {
memset(buf.bytes + pos, 0, MORUS1280_BLOCK_SIZE - pos);
ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
}
}
static void crypto_morus1280_glue_process_crypt(struct morus1280_state *state,
struct morus1280_ops ops,
struct aead_request *req)
{
struct skcipher_walk walk;
u8 *cursor_src, *cursor_dst;
unsigned int chunksize, base;
ops.skcipher_walk_init(&walk, req, false);
while (walk.nbytes) {
cursor_src = walk.src.virt.addr;
cursor_dst = walk.dst.virt.addr;
chunksize = walk.nbytes;
ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize);
base = chunksize & ~(MORUS1280_BLOCK_SIZE - 1);
cursor_src += base;
cursor_dst += base;
chunksize &= MORUS1280_BLOCK_SIZE - 1;
if (chunksize > 0)
ops.crypt_tail(state, cursor_src, cursor_dst,
chunksize);
skcipher_walk_done(&walk, 0);
}
}
int crypto_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
if (keylen == MORUS1280_BLOCK_SIZE) {
memcpy(ctx->key.bytes, key, MORUS1280_BLOCK_SIZE);
} else if (keylen == MORUS1280_BLOCK_SIZE / 2) {
memcpy(ctx->key.bytes, key, keylen);
memcpy(ctx->key.bytes + keylen, key, keylen);
} else {
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
return 0;
}
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setkey);
int crypto_morus1280_glue_setauthsize(struct crypto_aead *tfm,
unsigned int authsize)
{
return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
}
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setauthsize);
static void crypto_morus1280_glue_crypt(struct aead_request *req,
struct morus1280_ops ops,
unsigned int cryptlen,
struct morus1280_block *tag_xor)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
struct morus1280_state state;
kernel_fpu_begin();
ctx->ops->init(&state, &ctx->key, req->iv);
crypto_morus1280_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
crypto_morus1280_glue_process_crypt(&state, ops, req);
ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
}
int crypto_morus1280_glue_encrypt(struct aead_request *req)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
struct morus1280_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_encrypt,
.crypt_blocks = ctx->ops->enc,
.crypt_tail = ctx->ops->enc_tail,
};
struct morus1280_block tag = {};
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen;
crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
scatterwalk_map_and_copy(tag.bytes, req->dst,
req->assoclen + cryptlen, authsize, 1);
return 0;
}
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_encrypt);
int crypto_morus1280_glue_decrypt(struct aead_request *req)
{
static const u8 zeros[MORUS1280_BLOCK_SIZE] = {};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
struct morus1280_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_decrypt,
.crypt_blocks = ctx->ops->dec,
.crypt_tail = ctx->ops->dec_tail,
};
struct morus1280_block tag;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen - authsize;
scatterwalk_map_and_copy(tag.bytes, req->src,
req->assoclen + cryptlen, authsize, 0);
crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
}
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_decrypt);
void crypto_morus1280_glue_init_ops(struct crypto_aead *aead,
const struct morus1280_glue_ops *ops)
{
struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
ctx->ops = ops;
}
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_init_ops);
int cryptd_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
}
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setkey);
int cryptd_morus1280_glue_setauthsize(struct crypto_aead *aead,
unsigned int authsize)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
}
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setauthsize);
int cryptd_morus1280_glue_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_encrypt(req);
}
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_encrypt);
int cryptd_morus1280_glue_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_decrypt(req);
}
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_decrypt);
int cryptd_morus1280_glue_init_tfm(struct crypto_aead *aead)
{
struct cryptd_aead *cryptd_tfm;
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
const char *name = crypto_aead_alg(aead)->base.cra_driver_name;
char internal_name[CRYPTO_MAX_ALG_NAME];
if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name)
>= CRYPTO_MAX_ALG_NAME)
return -ENAMETOOLONG;
cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL,
CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
*ctx = cryptd_tfm;
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
return 0;
}
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_init_tfm);
void cryptd_morus1280_glue_exit_tfm(struct crypto_aead *aead)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_free_aead(*ctx);
}
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_exit_tfm);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("MORUS-1280 AEAD mode -- glue for x86 optimizations");

View File

@ -0,0 +1,614 @@
/*
* SSE2 implementation of MORUS-640
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/frame.h>
#define SHUFFLE_MASK(i0, i1, i2, i3) \
(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
#define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
#define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
#define STATE0 %xmm0
#define STATE1 %xmm1
#define STATE2 %xmm2
#define STATE3 %xmm3
#define STATE4 %xmm4
#define KEY %xmm5
#define MSG %xmm5
#define T0 %xmm6
#define T1 %xmm7
.section .rodata.cst16.morus640_const, "aM", @progbits, 32
.align 16
.Lmorus640_const_0:
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
.Lmorus640_const_1:
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
.section .rodata.cst16.morus640_counter, "aM", @progbits, 16
.align 16
.Lmorus640_counter:
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
.text
.macro morus640_round s0, s1, s2, s3, s4, b, w
movdqa \s1, T0
pand \s2, T0
pxor T0, \s0
pxor \s3, \s0
movdqa \s0, T0
pslld $\b, T0
psrld $(32 - \b), \s0
pxor T0, \s0
pshufd $\w, \s3, \s3
.endm
/*
* __morus640_update: internal ABI
* input:
* STATE[0-4] - input state
* MSG - message block
* output:
* STATE[0-4] - output state
* changed:
* T0
*/
__morus640_update:
morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1
pxor MSG, STATE1
morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
pxor MSG, STATE2
morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3
pxor MSG, STATE3
morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
pxor MSG, STATE4
morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
ret
ENDPROC(__morus640_update)
/*
* __morus640_update_zero: internal ABI
* input:
* STATE[0-4] - input state
* output:
* STATE[0-4] - output state
* changed:
* T0
*/
__morus640_update_zero:
morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1
morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3
morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
ret
ENDPROC(__morus640_update_zero)
/*
* __load_partial: internal ABI
* input:
* %rsi - src
* %rcx - bytes
* output:
* MSG - message block
* changed:
* T0
* %r8
* %r9
*/
__load_partial:
xor %r9, %r9
pxor MSG, MSG
mov %rcx, %r8
and $0x1, %r8
jz .Lld_partial_1
mov %rcx, %r8
and $0x1E, %r8
add %rsi, %r8
mov (%r8), %r9b
.Lld_partial_1:
mov %rcx, %r8
and $0x2, %r8
jz .Lld_partial_2
mov %rcx, %r8
and $0x1C, %r8
add %rsi, %r8
shl $16, %r9
mov (%r8), %r9w
.Lld_partial_2:
mov %rcx, %r8
and $0x4, %r8
jz .Lld_partial_4
mov %rcx, %r8
and $0x18, %r8
add %rsi, %r8
shl $32, %r9
mov (%r8), %r8d
xor %r8, %r9
.Lld_partial_4:
movq %r9, MSG
mov %rcx, %r8
and $0x8, %r8
jz .Lld_partial_8
mov %rcx, %r8
and $0x10, %r8
add %rsi, %r8
pslldq $8, MSG
movq (%r8), T0
pxor T0, MSG
.Lld_partial_8:
ret
ENDPROC(__load_partial)
/*
* __store_partial: internal ABI
* input:
* %rdx - dst
* %rcx - bytes
* output:
* T0 - message block
* changed:
* %r8
* %r9
* %r10
*/
__store_partial:
mov %rcx, %r8
mov %rdx, %r9
movq T0, %r10
cmp $8, %r8
jl .Lst_partial_8
mov %r10, (%r9)
psrldq $8, T0
movq T0, %r10
sub $8, %r8
add $8, %r9
.Lst_partial_8:
cmp $4, %r8
jl .Lst_partial_4
mov %r10d, (%r9)
shr $32, %r10
sub $4, %r8
add $4, %r9
.Lst_partial_4:
cmp $2, %r8
jl .Lst_partial_2
mov %r10w, (%r9)
shr $16, %r10
sub $2, %r8
add $2, %r9
.Lst_partial_2:
cmp $1, %r8
jl .Lst_partial_1
mov %r10b, (%r9)
.Lst_partial_1:
ret
ENDPROC(__store_partial)
/*
* void crypto_morus640_sse2_init(void *state, const void *key, const void *iv);
*/
ENTRY(crypto_morus640_sse2_init)
FRAME_BEGIN
/* load IV: */
movdqu (%rdx), STATE0
/* load key: */
movdqu (%rsi), KEY
movdqa KEY, STATE1
/* load all ones: */
pcmpeqd STATE2, STATE2
/* load the constants: */
movdqa .Lmorus640_const_0, STATE3
movdqa .Lmorus640_const_1, STATE4
/* update 16 times with zero: */
call __morus640_update_zero
call __morus640_update_zero
call __morus640_update_zero
call __morus640_update_zero
call __morus640_update_zero
call __morus640_update_zero
call __morus640_update_zero
call __morus640_update_zero
call __morus640_update_zero
call __morus640_update_zero
call __morus640_update_zero
call __morus640_update_zero
call __morus640_update_zero
call __morus640_update_zero
call __morus640_update_zero
call __morus640_update_zero
/* xor-in the key again after updates: */
pxor KEY, STATE1
/* store the state: */
movdqu STATE0, (0 * 16)(%rdi)
movdqu STATE1, (1 * 16)(%rdi)
movdqu STATE2, (2 * 16)(%rdi)
movdqu STATE3, (3 * 16)(%rdi)
movdqu STATE4, (4 * 16)(%rdi)
FRAME_END
ret
ENDPROC(crypto_morus640_sse2_init)
/*
* void crypto_morus640_sse2_ad(void *state, const void *data,
* unsigned int length);
*/
ENTRY(crypto_morus640_sse2_ad)
FRAME_BEGIN
cmp $16, %rdx
jb .Lad_out
/* load the state: */
movdqu (0 * 16)(%rdi), STATE0
movdqu (1 * 16)(%rdi), STATE1
movdqu (2 * 16)(%rdi), STATE2
movdqu (3 * 16)(%rdi), STATE3
movdqu (4 * 16)(%rdi), STATE4
mov %rsi, %r8
and $0xF, %r8
jnz .Lad_u_loop
.align 4
.Lad_a_loop:
movdqa (%rsi), MSG
call __morus640_update
sub $16, %rdx
add $16, %rsi
cmp $16, %rdx
jge .Lad_a_loop
jmp .Lad_cont
.align 4
.Lad_u_loop:
movdqu (%rsi), MSG
call __morus640_update
sub $16, %rdx
add $16, %rsi
cmp $16, %rdx
jge .Lad_u_loop
.Lad_cont:
/* store the state: */
movdqu STATE0, (0 * 16)(%rdi)
movdqu STATE1, (1 * 16)(%rdi)
movdqu STATE2, (2 * 16)(%rdi)
movdqu STATE3, (3 * 16)(%rdi)
movdqu STATE4, (4 * 16)(%rdi)
.Lad_out:
FRAME_END
ret
ENDPROC(crypto_morus640_sse2_ad)
/*
* void crypto_morus640_sse2_enc(void *state, const void *src, void *dst,
* unsigned int length);
*/
ENTRY(crypto_morus640_sse2_enc)
FRAME_BEGIN
cmp $16, %rcx
jb .Lenc_out
/* load the state: */
movdqu (0 * 16)(%rdi), STATE0
movdqu (1 * 16)(%rdi), STATE1
movdqu (2 * 16)(%rdi), STATE2
movdqu (3 * 16)(%rdi), STATE3
movdqu (4 * 16)(%rdi), STATE4
mov %rsi, %r8
or %rdx, %r8
and $0xF, %r8
jnz .Lenc_u_loop
.align 4
.Lenc_a_loop:
movdqa (%rsi), MSG
movdqa MSG, T0
pxor STATE0, T0
pshufd $MASK3, STATE1, T1
pxor T1, T0
movdqa STATE2, T1
pand STATE3, T1
pxor T1, T0
movdqa T0, (%rdx)
call __morus640_update
sub $16, %rcx
add $16, %rsi
add $16, %rdx
cmp $16, %rcx
jge .Lenc_a_loop
jmp .Lenc_cont
.align 4
.Lenc_u_loop:
movdqu (%rsi), MSG
movdqa MSG, T0
pxor STATE0, T0
pshufd $MASK3, STATE1, T1
pxor T1, T0
movdqa STATE2, T1
pand STATE3, T1
pxor T1, T0
movdqu T0, (%rdx)
call __morus640_update
sub $16, %rcx
add $16, %rsi
add $16, %rdx
cmp $16, %rcx
jge .Lenc_u_loop
.Lenc_cont:
/* store the state: */
movdqu STATE0, (0 * 16)(%rdi)
movdqu STATE1, (1 * 16)(%rdi)
movdqu STATE2, (2 * 16)(%rdi)
movdqu STATE3, (3 * 16)(%rdi)
movdqu STATE4, (4 * 16)(%rdi)
.Lenc_out:
FRAME_END
ret
ENDPROC(crypto_morus640_sse2_enc)
/*
* void crypto_morus640_sse2_enc_tail(void *state, const void *src, void *dst,
* unsigned int length);
*/
ENTRY(crypto_morus640_sse2_enc_tail)
FRAME_BEGIN
/* load the state: */
movdqu (0 * 16)(%rdi), STATE0
movdqu (1 * 16)(%rdi), STATE1
movdqu (2 * 16)(%rdi), STATE2
movdqu (3 * 16)(%rdi), STATE3
movdqu (4 * 16)(%rdi), STATE4
/* encrypt message: */
call __load_partial
movdqa MSG, T0
pxor STATE0, T0
pshufd $MASK3, STATE1, T1
pxor T1, T0
movdqa STATE2, T1
pand STATE3, T1
pxor T1, T0
call __store_partial
call __morus640_update
/* store the state: */
movdqu STATE0, (0 * 16)(%rdi)
movdqu STATE1, (1 * 16)(%rdi)
movdqu STATE2, (2 * 16)(%rdi)
movdqu STATE3, (3 * 16)(%rdi)
movdqu STATE4, (4 * 16)(%rdi)
FRAME_END
ENDPROC(crypto_morus640_sse2_enc_tail)
/*
* void crypto_morus640_sse2_dec(void *state, const void *src, void *dst,
* unsigned int length);
*/
ENTRY(crypto_morus640_sse2_dec)
FRAME_BEGIN
cmp $16, %rcx
jb .Ldec_out
/* load the state: */
movdqu (0 * 16)(%rdi), STATE0
movdqu (1 * 16)(%rdi), STATE1
movdqu (2 * 16)(%rdi), STATE2
movdqu (3 * 16)(%rdi), STATE3
movdqu (4 * 16)(%rdi), STATE4
mov %rsi, %r8
or %rdx, %r8
and $0xF, %r8
jnz .Ldec_u_loop
.align 4
.Ldec_a_loop:
movdqa (%rsi), MSG
pxor STATE0, MSG
pshufd $MASK3, STATE1, T0
pxor T0, MSG
movdqa STATE2, T0
pand STATE3, T0
pxor T0, MSG
movdqa MSG, (%rdx)
call __morus640_update
sub $16, %rcx
add $16, %rsi
add $16, %rdx
cmp $16, %rcx
jge .Ldec_a_loop
jmp .Ldec_cont
.align 4
.Ldec_u_loop:
movdqu (%rsi), MSG
pxor STATE0, MSG
pshufd $MASK3, STATE1, T0
pxor T0, MSG
movdqa STATE2, T0
pand STATE3, T0
pxor T0, MSG
movdqu MSG, (%rdx)
call __morus640_update
sub $16, %rcx
add $16, %rsi
add $16, %rdx
cmp $16, %rcx
jge .Ldec_u_loop
.Ldec_cont:
/* store the state: */
movdqu STATE0, (0 * 16)(%rdi)
movdqu STATE1, (1 * 16)(%rdi)
movdqu STATE2, (2 * 16)(%rdi)
movdqu STATE3, (3 * 16)(%rdi)
movdqu STATE4, (4 * 16)(%rdi)
.Ldec_out:
FRAME_END
ret
ENDPROC(crypto_morus640_sse2_dec)
/*
* void crypto_morus640_sse2_dec_tail(void *state, const void *src, void *dst,
* unsigned int length);
*/
ENTRY(crypto_morus640_sse2_dec_tail)
FRAME_BEGIN
/* load the state: */
movdqu (0 * 16)(%rdi), STATE0
movdqu (1 * 16)(%rdi), STATE1
movdqu (2 * 16)(%rdi), STATE2
movdqu (3 * 16)(%rdi), STATE3
movdqu (4 * 16)(%rdi), STATE4
/* decrypt message: */
call __load_partial
pxor STATE0, MSG
pshufd $MASK3, STATE1, T0
pxor T0, MSG
movdqa STATE2, T0
pand STATE3, T0
pxor T0, MSG
movdqa MSG, T0
call __store_partial
/* mask with byte count: */
movq %rcx, T0
punpcklbw T0, T0
punpcklbw T0, T0
punpcklbw T0, T0
punpcklbw T0, T0
movdqa .Lmorus640_counter, T1
pcmpgtb T1, T0
pand T0, MSG
call __morus640_update
/* store the state: */
movdqu STATE0, (0 * 16)(%rdi)
movdqu STATE1, (1 * 16)(%rdi)
movdqu STATE2, (2 * 16)(%rdi)
movdqu STATE3, (3 * 16)(%rdi)
movdqu STATE4, (4 * 16)(%rdi)
FRAME_END
ret
ENDPROC(crypto_morus640_sse2_dec_tail)
/*
* void crypto_morus640_sse2_final(void *state, void *tag_xor,
* u64 assoclen, u64 cryptlen);
*/
ENTRY(crypto_morus640_sse2_final)
FRAME_BEGIN
/* load the state: */
movdqu (0 * 16)(%rdi), STATE0
movdqu (1 * 16)(%rdi), STATE1
movdqu (2 * 16)(%rdi), STATE2
movdqu (3 * 16)(%rdi), STATE3
movdqu (4 * 16)(%rdi), STATE4
/* xor state[0] into state[4]: */
pxor STATE0, STATE4
/* prepare length block: */
movq %rdx, MSG
movq %rcx, T0
pslldq $8, T0
pxor T0, MSG
psllq $3, MSG /* multiply by 8 (to get bit count) */
/* update state: */
call __morus640_update
call __morus640_update
call __morus640_update
call __morus640_update
call __morus640_update
call __morus640_update
call __morus640_update
call __morus640_update
call __morus640_update
call __morus640_update
/* xor tag: */
movdqu (%rsi), MSG
pxor STATE0, MSG
pshufd $MASK3, STATE1, T0
pxor T0, MSG
movdqa STATE2, T0
pand STATE3, T0
pxor T0, MSG
movdqu MSG, (%rsi)
FRAME_END
ret
ENDPROC(crypto_morus640_sse2_final)

View File

@ -0,0 +1,68 @@
/*
* The MORUS-640 Authenticated-Encryption Algorithm
* Glue for SSE2 implementation
*
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/internal/aead.h>
#include <crypto/morus640_glue.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
#include <asm/cpu_device_id.h>
asmlinkage void crypto_morus640_sse2_init(void *state, const void *key,
const void *iv);
asmlinkage void crypto_morus640_sse2_ad(void *state, const void *data,
unsigned int length);
asmlinkage void crypto_morus640_sse2_enc(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus640_sse2_dec(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus640_sse2_enc_tail(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus640_sse2_dec_tail(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor,
u64 assoclen, u64 cryptlen);
MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400);
static const struct x86_cpu_id sse2_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_XMM2),
{}
};
MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
static int __init crypto_morus640_sse2_module_init(void)
{
if (!x86_match_cpu(sse2_cpu_id))
return -ENODEV;
return crypto_register_aeads(crypto_morus640_sse2_algs,
ARRAY_SIZE(crypto_morus640_sse2_algs));
}
static void __exit crypto_morus640_sse2_module_exit(void)
{
crypto_unregister_aeads(crypto_morus640_sse2_algs,
ARRAY_SIZE(crypto_morus640_sse2_algs));
}
module_init(crypto_morus640_sse2_module_init);
module_exit(crypto_morus640_sse2_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("MORUS-640 AEAD algorithm -- SSE2 implementation");
MODULE_ALIAS_CRYPTO("morus640");
MODULE_ALIAS_CRYPTO("morus640-sse2");

View File

@ -0,0 +1,298 @@
/*
* The MORUS-640 Authenticated-Encryption Algorithm
* Common x86 SIMD glue skeleton
*
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/cryptd.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/morus640_glue.h>
#include <crypto/scatterwalk.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
#include <asm/fpu/api.h>
struct morus640_state {
struct morus640_block s[MORUS_STATE_BLOCKS];
};
struct morus640_ops {
int (*skcipher_walk_init)(struct skcipher_walk *walk,
struct aead_request *req, bool atomic);
void (*crypt_blocks)(void *state, const void *src, void *dst,
unsigned int length);
void (*crypt_tail)(void *state, const void *src, void *dst,
unsigned int length);
};
static void crypto_morus640_glue_process_ad(
struct morus640_state *state,
const struct morus640_glue_ops *ops,
struct scatterlist *sg_src, unsigned int assoclen)
{
struct scatter_walk walk;
struct morus640_block buf;
unsigned int pos = 0;
scatterwalk_start(&walk, sg_src);
while (assoclen != 0) {
unsigned int size = scatterwalk_clamp(&walk, assoclen);
unsigned int left = size;
void *mapped = scatterwalk_map(&walk);
const u8 *src = (const u8 *)mapped;
if (pos + size >= MORUS640_BLOCK_SIZE) {
if (pos > 0) {
unsigned int fill = MORUS640_BLOCK_SIZE - pos;
memcpy(buf.bytes + pos, src, fill);
ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE);
pos = 0;
left -= fill;
src += fill;
}
ops->ad(state, src, left);
src += left & ~(MORUS640_BLOCK_SIZE - 1);
left &= MORUS640_BLOCK_SIZE - 1;
}
memcpy(buf.bytes + pos, src, left);
pos += left;
assoclen -= size;
scatterwalk_unmap(mapped);
scatterwalk_advance(&walk, size);
scatterwalk_done(&walk, 0, assoclen);
}
if (pos > 0) {
memset(buf.bytes + pos, 0, MORUS640_BLOCK_SIZE - pos);
ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE);
}
}
static void crypto_morus640_glue_process_crypt(struct morus640_state *state,
struct morus640_ops ops,
struct aead_request *req)
{
struct skcipher_walk walk;
u8 *cursor_src, *cursor_dst;
unsigned int chunksize, base;
ops.skcipher_walk_init(&walk, req, false);
while (walk.nbytes) {
cursor_src = walk.src.virt.addr;
cursor_dst = walk.dst.virt.addr;
chunksize = walk.nbytes;
ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize);
base = chunksize & ~(MORUS640_BLOCK_SIZE - 1);
cursor_src += base;
cursor_dst += base;
chunksize &= MORUS640_BLOCK_SIZE - 1;
if (chunksize > 0)
ops.crypt_tail(state, cursor_src, cursor_dst,
chunksize);
skcipher_walk_done(&walk, 0);
}
}
int crypto_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct morus640_ctx *ctx = crypto_aead_ctx(aead);
if (keylen != MORUS640_BLOCK_SIZE) {
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
memcpy(ctx->key.bytes, key, MORUS640_BLOCK_SIZE);
return 0;
}
EXPORT_SYMBOL_GPL(crypto_morus640_glue_setkey);
int crypto_morus640_glue_setauthsize(struct crypto_aead *tfm,
unsigned int authsize)
{
return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
}
EXPORT_SYMBOL_GPL(crypto_morus640_glue_setauthsize);
static void crypto_morus640_glue_crypt(struct aead_request *req,
struct morus640_ops ops,
unsigned int cryptlen,
struct morus640_block *tag_xor)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
struct morus640_state state;
kernel_fpu_begin();
ctx->ops->init(&state, &ctx->key, req->iv);
crypto_morus640_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
crypto_morus640_glue_process_crypt(&state, ops, req);
ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
}
int crypto_morus640_glue_encrypt(struct aead_request *req)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
struct morus640_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_encrypt,
.crypt_blocks = ctx->ops->enc,
.crypt_tail = ctx->ops->enc_tail,
};
struct morus640_block tag = {};
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen;
crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
scatterwalk_map_and_copy(tag.bytes, req->dst,
req->assoclen + cryptlen, authsize, 1);
return 0;
}
EXPORT_SYMBOL_GPL(crypto_morus640_glue_encrypt);
int crypto_morus640_glue_decrypt(struct aead_request *req)
{
static const u8 zeros[MORUS640_BLOCK_SIZE] = {};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
struct morus640_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_decrypt,
.crypt_blocks = ctx->ops->dec,
.crypt_tail = ctx->ops->dec_tail,
};
struct morus640_block tag;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen - authsize;
scatterwalk_map_and_copy(tag.bytes, req->src,
req->assoclen + cryptlen, authsize, 0);
crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
}
EXPORT_SYMBOL_GPL(crypto_morus640_glue_decrypt);
void crypto_morus640_glue_init_ops(struct crypto_aead *aead,
const struct morus640_glue_ops *ops)
{
struct morus640_ctx *ctx = crypto_aead_ctx(aead);
ctx->ops = ops;
}
EXPORT_SYMBOL_GPL(crypto_morus640_glue_init_ops);
int cryptd_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
}
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setkey);
int cryptd_morus640_glue_setauthsize(struct crypto_aead *aead,
unsigned int authsize)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
}
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setauthsize);
int cryptd_morus640_glue_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_encrypt(req);
}
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_encrypt);
int cryptd_morus640_glue_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_decrypt(req);
}
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_decrypt);
int cryptd_morus640_glue_init_tfm(struct crypto_aead *aead)
{
struct cryptd_aead *cryptd_tfm;
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
const char *name = crypto_aead_alg(aead)->base.cra_driver_name;
char internal_name[CRYPTO_MAX_ALG_NAME];
if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name)
>= CRYPTO_MAX_ALG_NAME)
return -ENAMETOOLONG;
cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL,
CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
*ctx = cryptd_tfm;
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
return 0;
}
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_init_tfm);
void cryptd_morus640_glue_exit_tfm(struct crypto_aead *aead)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_free_aead(*ctx);
}
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_exit_tfm);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("MORUS-640 AEAD mode -- glue for x86 optimizations");

View File

@ -1,938 +0,0 @@
# Derived from:
# salsa20_pm.s version 20051229
# D. J. Bernstein
# Public domain.
#include <linux/linkage.h>
.text
# enter salsa20_encrypt_bytes
ENTRY(salsa20_encrypt_bytes)
mov %esp,%eax
and $31,%eax
add $256,%eax
sub %eax,%esp
# eax_stack = eax
movl %eax,80(%esp)
# ebx_stack = ebx
movl %ebx,84(%esp)
# esi_stack = esi
movl %esi,88(%esp)
# edi_stack = edi
movl %edi,92(%esp)
# ebp_stack = ebp
movl %ebp,96(%esp)
# x = arg1
movl 4(%esp,%eax),%edx
# m = arg2
movl 8(%esp,%eax),%esi
# out = arg3
movl 12(%esp,%eax),%edi
# bytes = arg4
movl 16(%esp,%eax),%ebx
# bytes -= 0
sub $0,%ebx
# goto done if unsigned<=
jbe ._done
._start:
# in0 = *(uint32 *) (x + 0)
movl 0(%edx),%eax
# in1 = *(uint32 *) (x + 4)
movl 4(%edx),%ecx
# in2 = *(uint32 *) (x + 8)
movl 8(%edx),%ebp
# j0 = in0
movl %eax,164(%esp)
# in3 = *(uint32 *) (x + 12)
movl 12(%edx),%eax
# j1 = in1
movl %ecx,168(%esp)
# in4 = *(uint32 *) (x + 16)
movl 16(%edx),%ecx
# j2 = in2
movl %ebp,172(%esp)
# in5 = *(uint32 *) (x + 20)
movl 20(%edx),%ebp
# j3 = in3
movl %eax,176(%esp)
# in6 = *(uint32 *) (x + 24)
movl 24(%edx),%eax
# j4 = in4
movl %ecx,180(%esp)
# in7 = *(uint32 *) (x + 28)
movl 28(%edx),%ecx
# j5 = in5
movl %ebp,184(%esp)
# in8 = *(uint32 *) (x + 32)
movl 32(%edx),%ebp
# j6 = in6
movl %eax,188(%esp)
# in9 = *(uint32 *) (x + 36)
movl 36(%edx),%eax
# j7 = in7
movl %ecx,192(%esp)
# in10 = *(uint32 *) (x + 40)
movl 40(%edx),%ecx
# j8 = in8
movl %ebp,196(%esp)
# in11 = *(uint32 *) (x + 44)
movl 44(%edx),%ebp
# j9 = in9
movl %eax,200(%esp)
# in12 = *(uint32 *) (x + 48)
movl 48(%edx),%eax
# j10 = in10
movl %ecx,204(%esp)
# in13 = *(uint32 *) (x + 52)
movl 52(%edx),%ecx
# j11 = in11
movl %ebp,208(%esp)
# in14 = *(uint32 *) (x + 56)
movl 56(%edx),%ebp
# j12 = in12
movl %eax,212(%esp)
# in15 = *(uint32 *) (x + 60)
movl 60(%edx),%eax
# j13 = in13
movl %ecx,216(%esp)
# j14 = in14
movl %ebp,220(%esp)
# j15 = in15
movl %eax,224(%esp)
# x_backup = x
movl %edx,64(%esp)
._bytesatleast1:
# bytes - 64
cmp $64,%ebx
# goto nocopy if unsigned>=
jae ._nocopy
# ctarget = out
movl %edi,228(%esp)
# out = &tmp
leal 0(%esp),%edi
# i = bytes
mov %ebx,%ecx
# while (i) { *out++ = *m++; --i }
rep movsb
# out = &tmp
leal 0(%esp),%edi
# m = &tmp
leal 0(%esp),%esi
._nocopy:
# out_backup = out
movl %edi,72(%esp)
# m_backup = m
movl %esi,68(%esp)
# bytes_backup = bytes
movl %ebx,76(%esp)
# in0 = j0
movl 164(%esp),%eax
# in1 = j1
movl 168(%esp),%ecx
# in2 = j2
movl 172(%esp),%edx
# in3 = j3
movl 176(%esp),%ebx
# x0 = in0
movl %eax,100(%esp)
# x1 = in1
movl %ecx,104(%esp)
# x2 = in2
movl %edx,108(%esp)
# x3 = in3
movl %ebx,112(%esp)
# in4 = j4
movl 180(%esp),%eax
# in5 = j5
movl 184(%esp),%ecx
# in6 = j6
movl 188(%esp),%edx
# in7 = j7
movl 192(%esp),%ebx
# x4 = in4
movl %eax,116(%esp)
# x5 = in5
movl %ecx,120(%esp)
# x6 = in6
movl %edx,124(%esp)
# x7 = in7
movl %ebx,128(%esp)
# in8 = j8
movl 196(%esp),%eax
# in9 = j9
movl 200(%esp),%ecx
# in10 = j10
movl 204(%esp),%edx
# in11 = j11
movl 208(%esp),%ebx
# x8 = in8
movl %eax,132(%esp)
# x9 = in9
movl %ecx,136(%esp)
# x10 = in10
movl %edx,140(%esp)
# x11 = in11
movl %ebx,144(%esp)
# in12 = j12
movl 212(%esp),%eax
# in13 = j13
movl 216(%esp),%ecx
# in14 = j14
movl 220(%esp),%edx
# in15 = j15
movl 224(%esp),%ebx
# x12 = in12
movl %eax,148(%esp)
# x13 = in13
movl %ecx,152(%esp)
# x14 = in14
movl %edx,156(%esp)
# x15 = in15
movl %ebx,160(%esp)
# i = 20
mov $20,%ebp
# p = x0
movl 100(%esp),%eax
# s = x5
movl 120(%esp),%ecx
# t = x10
movl 140(%esp),%edx
# w = x15
movl 160(%esp),%ebx
._mainloop:
# x0 = p
movl %eax,100(%esp)
# x10 = t
movl %edx,140(%esp)
# p += x12
addl 148(%esp),%eax
# x5 = s
movl %ecx,120(%esp)
# t += x6
addl 124(%esp),%edx
# x15 = w
movl %ebx,160(%esp)
# r = x1
movl 104(%esp),%esi
# r += s
add %ecx,%esi
# v = x11
movl 144(%esp),%edi
# v += w
add %ebx,%edi
# p <<<= 7
rol $7,%eax
# p ^= x4
xorl 116(%esp),%eax
# t <<<= 7
rol $7,%edx
# t ^= x14
xorl 156(%esp),%edx
# r <<<= 7
rol $7,%esi
# r ^= x9
xorl 136(%esp),%esi
# v <<<= 7
rol $7,%edi
# v ^= x3
xorl 112(%esp),%edi
# x4 = p
movl %eax,116(%esp)
# x14 = t
movl %edx,156(%esp)
# p += x0
addl 100(%esp),%eax
# x9 = r
movl %esi,136(%esp)
# t += x10
addl 140(%esp),%edx
# x3 = v
movl %edi,112(%esp)
# p <<<= 9
rol $9,%eax
# p ^= x8
xorl 132(%esp),%eax
# t <<<= 9
rol $9,%edx
# t ^= x2
xorl 108(%esp),%edx
# s += r
add %esi,%ecx
# s <<<= 9
rol $9,%ecx
# s ^= x13
xorl 152(%esp),%ecx
# w += v
add %edi,%ebx
# w <<<= 9
rol $9,%ebx
# w ^= x7
xorl 128(%esp),%ebx
# x8 = p
movl %eax,132(%esp)
# x2 = t
movl %edx,108(%esp)
# p += x4
addl 116(%esp),%eax
# x13 = s
movl %ecx,152(%esp)
# t += x14
addl 156(%esp),%edx
# x7 = w
movl %ebx,128(%esp)
# p <<<= 13
rol $13,%eax
# p ^= x12
xorl 148(%esp),%eax
# t <<<= 13
rol $13,%edx
# t ^= x6
xorl 124(%esp),%edx
# r += s
add %ecx,%esi
# r <<<= 13
rol $13,%esi
# r ^= x1
xorl 104(%esp),%esi
# v += w
add %ebx,%edi
# v <<<= 13
rol $13,%edi
# v ^= x11
xorl 144(%esp),%edi
# x12 = p
movl %eax,148(%esp)
# x6 = t
movl %edx,124(%esp)
# p += x8
addl 132(%esp),%eax
# x1 = r
movl %esi,104(%esp)
# t += x2
addl 108(%esp),%edx
# x11 = v
movl %edi,144(%esp)
# p <<<= 18
rol $18,%eax
# p ^= x0
xorl 100(%esp),%eax
# t <<<= 18
rol $18,%edx
# t ^= x10
xorl 140(%esp),%edx
# s += r
add %esi,%ecx
# s <<<= 18
rol $18,%ecx
# s ^= x5
xorl 120(%esp),%ecx
# w += v
add %edi,%ebx
# w <<<= 18
rol $18,%ebx
# w ^= x15
xorl 160(%esp),%ebx
# x0 = p
movl %eax,100(%esp)
# x10 = t
movl %edx,140(%esp)
# p += x3
addl 112(%esp),%eax
# p <<<= 7
rol $7,%eax
# x5 = s
movl %ecx,120(%esp)
# t += x9
addl 136(%esp),%edx
# x15 = w
movl %ebx,160(%esp)
# r = x4
movl 116(%esp),%esi
# r += s
add %ecx,%esi
# v = x14
movl 156(%esp),%edi
# v += w
add %ebx,%edi
# p ^= x1
xorl 104(%esp),%eax
# t <<<= 7
rol $7,%edx
# t ^= x11
xorl 144(%esp),%edx
# r <<<= 7
rol $7,%esi
# r ^= x6
xorl 124(%esp),%esi
# v <<<= 7
rol $7,%edi
# v ^= x12
xorl 148(%esp),%edi
# x1 = p
movl %eax,104(%esp)
# x11 = t
movl %edx,144(%esp)
# p += x0
addl 100(%esp),%eax
# x6 = r
movl %esi,124(%esp)
# t += x10
addl 140(%esp),%edx
# x12 = v
movl %edi,148(%esp)
# p <<<= 9
rol $9,%eax
# p ^= x2
xorl 108(%esp),%eax
# t <<<= 9
rol $9,%edx
# t ^= x8
xorl 132(%esp),%edx
# s += r
add %esi,%ecx
# s <<<= 9
rol $9,%ecx
# s ^= x7
xorl 128(%esp),%ecx
# w += v
add %edi,%ebx
# w <<<= 9
rol $9,%ebx
# w ^= x13
xorl 152(%esp),%ebx
# x2 = p
movl %eax,108(%esp)
# x8 = t
movl %edx,132(%esp)
# p += x1
addl 104(%esp),%eax
# x7 = s
movl %ecx,128(%esp)
# t += x11
addl 144(%esp),%edx
# x13 = w
movl %ebx,152(%esp)
# p <<<= 13
rol $13,%eax
# p ^= x3
xorl 112(%esp),%eax
# t <<<= 13
rol $13,%edx
# t ^= x9
xorl 136(%esp),%edx
# r += s
add %ecx,%esi
# r <<<= 13
rol $13,%esi
# r ^= x4
xorl 116(%esp),%esi
# v += w
add %ebx,%edi
# v <<<= 13
rol $13,%edi
# v ^= x14
xorl 156(%esp),%edi
# x3 = p
movl %eax,112(%esp)
# x9 = t
movl %edx,136(%esp)
# p += x2
addl 108(%esp),%eax
# x4 = r
movl %esi,116(%esp)
# t += x8
addl 132(%esp),%edx
# x14 = v
movl %edi,156(%esp)
# p <<<= 18
rol $18,%eax
# p ^= x0
xorl 100(%esp),%eax
# t <<<= 18
rol $18,%edx
# t ^= x10
xorl 140(%esp),%edx
# s += r
add %esi,%ecx
# s <<<= 18
rol $18,%ecx
# s ^= x5
xorl 120(%esp),%ecx
# w += v
add %edi,%ebx
# w <<<= 18
rol $18,%ebx
# w ^= x15
xorl 160(%esp),%ebx
# x0 = p
movl %eax,100(%esp)
# x10 = t
movl %edx,140(%esp)
# p += x12
addl 148(%esp),%eax
# x5 = s
movl %ecx,120(%esp)
# t += x6
addl 124(%esp),%edx
# x15 = w
movl %ebx,160(%esp)
# r = x1
movl 104(%esp),%esi
# r += s
add %ecx,%esi
# v = x11
movl 144(%esp),%edi
# v += w
add %ebx,%edi
# p <<<= 7
rol $7,%eax
# p ^= x4
xorl 116(%esp),%eax
# t <<<= 7
rol $7,%edx
# t ^= x14
xorl 156(%esp),%edx
# r <<<= 7
rol $7,%esi
# r ^= x9
xorl 136(%esp),%esi
# v <<<= 7
rol $7,%edi
# v ^= x3
xorl 112(%esp),%edi
# x4 = p
movl %eax,116(%esp)
# x14 = t
movl %edx,156(%esp)
# p += x0
addl 100(%esp),%eax
# x9 = r
movl %esi,136(%esp)
# t += x10
addl 140(%esp),%edx
# x3 = v
movl %edi,112(%esp)
# p <<<= 9
rol $9,%eax
# p ^= x8
xorl 132(%esp),%eax
# t <<<= 9
rol $9,%edx
# t ^= x2
xorl 108(%esp),%edx
# s += r
add %esi,%ecx
# s <<<= 9
rol $9,%ecx
# s ^= x13
xorl 152(%esp),%ecx
# w += v
add %edi,%ebx
# w <<<= 9
rol $9,%ebx
# w ^= x7
xorl 128(%esp),%ebx
# x8 = p
movl %eax,132(%esp)
# x2 = t
movl %edx,108(%esp)
# p += x4
addl 116(%esp),%eax
# x13 = s
movl %ecx,152(%esp)
# t += x14
addl 156(%esp),%edx
# x7 = w
movl %ebx,128(%esp)
# p <<<= 13
rol $13,%eax
# p ^= x12
xorl 148(%esp),%eax
# t <<<= 13
rol $13,%edx
# t ^= x6
xorl 124(%esp),%edx
# r += s
add %ecx,%esi
# r <<<= 13
rol $13,%esi
# r ^= x1
xorl 104(%esp),%esi
# v += w
add %ebx,%edi
# v <<<= 13
rol $13,%edi
# v ^= x11
xorl 144(%esp),%edi
# x12 = p
movl %eax,148(%esp)
# x6 = t
movl %edx,124(%esp)
# p += x8
addl 132(%esp),%eax
# x1 = r
movl %esi,104(%esp)
# t += x2
addl 108(%esp),%edx
# x11 = v
movl %edi,144(%esp)
# p <<<= 18
rol $18,%eax
# p ^= x0
xorl 100(%esp),%eax
# t <<<= 18
rol $18,%edx
# t ^= x10
xorl 140(%esp),%edx
# s += r
add %esi,%ecx
# s <<<= 18
rol $18,%ecx
# s ^= x5
xorl 120(%esp),%ecx
# w += v
add %edi,%ebx
# w <<<= 18
rol $18,%ebx
# w ^= x15
xorl 160(%esp),%ebx
# x0 = p
movl %eax,100(%esp)
# x10 = t
movl %edx,140(%esp)
# p += x3
addl 112(%esp),%eax
# p <<<= 7
rol $7,%eax
# x5 = s
movl %ecx,120(%esp)
# t += x9
addl 136(%esp),%edx
# x15 = w
movl %ebx,160(%esp)
# r = x4
movl 116(%esp),%esi
# r += s
add %ecx,%esi
# v = x14
movl 156(%esp),%edi
# v += w
add %ebx,%edi
# p ^= x1
xorl 104(%esp),%eax
# t <<<= 7
rol $7,%edx
# t ^= x11
xorl 144(%esp),%edx
# r <<<= 7
rol $7,%esi
# r ^= x6
xorl 124(%esp),%esi
# v <<<= 7
rol $7,%edi
# v ^= x12
xorl 148(%esp),%edi
# x1 = p
movl %eax,104(%esp)
# x11 = t
movl %edx,144(%esp)
# p += x0
addl 100(%esp),%eax
# x6 = r
movl %esi,124(%esp)
# t += x10
addl 140(%esp),%edx
# x12 = v
movl %edi,148(%esp)
# p <<<= 9
rol $9,%eax
# p ^= x2
xorl 108(%esp),%eax
# t <<<= 9
rol $9,%edx
# t ^= x8
xorl 132(%esp),%edx
# s += r
add %esi,%ecx
# s <<<= 9
rol $9,%ecx
# s ^= x7
xorl 128(%esp),%ecx
# w += v
add %edi,%ebx
# w <<<= 9
rol $9,%ebx
# w ^= x13
xorl 152(%esp),%ebx
# x2 = p
movl %eax,108(%esp)
# x8 = t
movl %edx,132(%esp)
# p += x1
addl 104(%esp),%eax
# x7 = s
movl %ecx,128(%esp)
# t += x11
addl 144(%esp),%edx
# x13 = w
movl %ebx,152(%esp)
# p <<<= 13
rol $13,%eax
# p ^= x3
xorl 112(%esp),%eax
# t <<<= 13
rol $13,%edx
# t ^= x9
xorl 136(%esp),%edx
# r += s
add %ecx,%esi
# r <<<= 13
rol $13,%esi
# r ^= x4
xorl 116(%esp),%esi
# v += w
add %ebx,%edi
# v <<<= 13
rol $13,%edi
# v ^= x14
xorl 156(%esp),%edi
# x3 = p
movl %eax,112(%esp)
# x9 = t
movl %edx,136(%esp)
# p += x2
addl 108(%esp),%eax
# x4 = r
movl %esi,116(%esp)
# t += x8
addl 132(%esp),%edx
# x14 = v
movl %edi,156(%esp)
# p <<<= 18
rol $18,%eax
# p ^= x0
xorl 100(%esp),%eax
# t <<<= 18
rol $18,%edx
# t ^= x10
xorl 140(%esp),%edx
# s += r
add %esi,%ecx
# s <<<= 18
rol $18,%ecx
# s ^= x5
xorl 120(%esp),%ecx
# w += v
add %edi,%ebx
# w <<<= 18
rol $18,%ebx
# w ^= x15
xorl 160(%esp),%ebx
# i -= 4
sub $4,%ebp
# goto mainloop if unsigned >
ja ._mainloop
# x0 = p
movl %eax,100(%esp)
# x5 = s
movl %ecx,120(%esp)
# x10 = t
movl %edx,140(%esp)
# x15 = w
movl %ebx,160(%esp)
# out = out_backup
movl 72(%esp),%edi
# m = m_backup
movl 68(%esp),%esi
# in0 = x0
movl 100(%esp),%eax
# in1 = x1
movl 104(%esp),%ecx
# in0 += j0
addl 164(%esp),%eax
# in1 += j1
addl 168(%esp),%ecx
# in0 ^= *(uint32 *) (m + 0)
xorl 0(%esi),%eax
# in1 ^= *(uint32 *) (m + 4)
xorl 4(%esi),%ecx
# *(uint32 *) (out + 0) = in0
movl %eax,0(%edi)
# *(uint32 *) (out + 4) = in1
movl %ecx,4(%edi)
# in2 = x2
movl 108(%esp),%eax
# in3 = x3
movl 112(%esp),%ecx
# in2 += j2
addl 172(%esp),%eax
# in3 += j3
addl 176(%esp),%ecx
# in2 ^= *(uint32 *) (m + 8)
xorl 8(%esi),%eax
# in3 ^= *(uint32 *) (m + 12)
xorl 12(%esi),%ecx
# *(uint32 *) (out + 8) = in2
movl %eax,8(%edi)
# *(uint32 *) (out + 12) = in3
movl %ecx,12(%edi)
# in4 = x4
movl 116(%esp),%eax
# in5 = x5
movl 120(%esp),%ecx
# in4 += j4
addl 180(%esp),%eax
# in5 += j5
addl 184(%esp),%ecx
# in4 ^= *(uint32 *) (m + 16)
xorl 16(%esi),%eax
# in5 ^= *(uint32 *) (m + 20)
xorl 20(%esi),%ecx
# *(uint32 *) (out + 16) = in4
movl %eax,16(%edi)
# *(uint32 *) (out + 20) = in5
movl %ecx,20(%edi)
# in6 = x6
movl 124(%esp),%eax
# in7 = x7
movl 128(%esp),%ecx
# in6 += j6
addl 188(%esp),%eax
# in7 += j7
addl 192(%esp),%ecx
# in6 ^= *(uint32 *) (m + 24)
xorl 24(%esi),%eax
# in7 ^= *(uint32 *) (m + 28)
xorl 28(%esi),%ecx
# *(uint32 *) (out + 24) = in6
movl %eax,24(%edi)
# *(uint32 *) (out + 28) = in7
movl %ecx,28(%edi)
# in8 = x8
movl 132(%esp),%eax
# in9 = x9
movl 136(%esp),%ecx
# in8 += j8
addl 196(%esp),%eax
# in9 += j9
addl 200(%esp),%ecx
# in8 ^= *(uint32 *) (m + 32)
xorl 32(%esi),%eax
# in9 ^= *(uint32 *) (m + 36)
xorl 36(%esi),%ecx
# *(uint32 *) (out + 32) = in8
movl %eax,32(%edi)
# *(uint32 *) (out + 36) = in9
movl %ecx,36(%edi)
# in10 = x10
movl 140(%esp),%eax
# in11 = x11
movl 144(%esp),%ecx
# in10 += j10
addl 204(%esp),%eax
# in11 += j11
addl 208(%esp),%ecx
# in10 ^= *(uint32 *) (m + 40)
xorl 40(%esi),%eax
# in11 ^= *(uint32 *) (m + 44)
xorl 44(%esi),%ecx
# *(uint32 *) (out + 40) = in10
movl %eax,40(%edi)
# *(uint32 *) (out + 44) = in11
movl %ecx,44(%edi)
# in12 = x12
movl 148(%esp),%eax
# in13 = x13
movl 152(%esp),%ecx
# in12 += j12
addl 212(%esp),%eax
# in13 += j13
addl 216(%esp),%ecx
# in12 ^= *(uint32 *) (m + 48)
xorl 48(%esi),%eax
# in13 ^= *(uint32 *) (m + 52)
xorl 52(%esi),%ecx
# *(uint32 *) (out + 48) = in12
movl %eax,48(%edi)
# *(uint32 *) (out + 52) = in13
movl %ecx,52(%edi)
# in14 = x14
movl 156(%esp),%eax
# in15 = x15
movl 160(%esp),%ecx
# in14 += j14
addl 220(%esp),%eax
# in15 += j15
addl 224(%esp),%ecx
# in14 ^= *(uint32 *) (m + 56)
xorl 56(%esi),%eax
# in15 ^= *(uint32 *) (m + 60)
xorl 60(%esi),%ecx
# *(uint32 *) (out + 56) = in14
movl %eax,56(%edi)
# *(uint32 *) (out + 60) = in15
movl %ecx,60(%edi)
# bytes = bytes_backup
movl 76(%esp),%ebx
# in8 = j8
movl 196(%esp),%eax
# in9 = j9
movl 200(%esp),%ecx
# in8 += 1
add $1,%eax
# in9 += 0 + carry
adc $0,%ecx
# j8 = in8
movl %eax,196(%esp)
# j9 = in9
movl %ecx,200(%esp)
# bytes - 64
cmp $64,%ebx
# goto bytesatleast65 if unsigned>
ja ._bytesatleast65
# goto bytesatleast64 if unsigned>=
jae ._bytesatleast64
# m = out
mov %edi,%esi
# out = ctarget
movl 228(%esp),%edi
# i = bytes
mov %ebx,%ecx
# while (i) { *out++ = *m++; --i }
rep movsb
._bytesatleast64:
# x = x_backup
movl 64(%esp),%eax
# in8 = j8
movl 196(%esp),%ecx
# in9 = j9
movl 200(%esp),%edx
# *(uint32 *) (x + 32) = in8
movl %ecx,32(%eax)
# *(uint32 *) (x + 36) = in9
movl %edx,36(%eax)
._done:
# eax = eax_stack
movl 80(%esp),%eax
# ebx = ebx_stack
movl 84(%esp),%ebx
# esi = esi_stack
movl 88(%esp),%esi
# edi = edi_stack
movl 92(%esp),%edi
# ebp = ebp_stack
movl 96(%esp),%ebp
# leave
add %eax,%esp
ret
._bytesatleast65:
# bytes -= 64
sub $64,%ebx
# out += 64
add $64,%edi
# m += 64
add $64,%esi
# goto bytesatleast1
jmp ._bytesatleast1
ENDPROC(salsa20_encrypt_bytes)

View File

@ -1,805 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
# enter salsa20_encrypt_bytes
ENTRY(salsa20_encrypt_bytes)
mov %rsp,%r11
and $31,%r11
add $256,%r11
sub %r11,%rsp
# x = arg1
mov %rdi,%r8
# m = arg2
mov %rsi,%rsi
# out = arg3
mov %rdx,%rdi
# bytes = arg4
mov %rcx,%rdx
# unsigned>? bytes - 0
cmp $0,%rdx
# comment:fp stack unchanged by jump
# goto done if !unsigned>
jbe ._done
# comment:fp stack unchanged by fallthrough
# start:
._start:
# r11_stack = r11
movq %r11,0(%rsp)
# r12_stack = r12
movq %r12,8(%rsp)
# r13_stack = r13
movq %r13,16(%rsp)
# r14_stack = r14
movq %r14,24(%rsp)
# r15_stack = r15
movq %r15,32(%rsp)
# rbx_stack = rbx
movq %rbx,40(%rsp)
# rbp_stack = rbp
movq %rbp,48(%rsp)
# in0 = *(uint64 *) (x + 0)
movq 0(%r8),%rcx
# in2 = *(uint64 *) (x + 8)
movq 8(%r8),%r9
# in4 = *(uint64 *) (x + 16)
movq 16(%r8),%rax
# in6 = *(uint64 *) (x + 24)
movq 24(%r8),%r10
# in8 = *(uint64 *) (x + 32)
movq 32(%r8),%r11
# in10 = *(uint64 *) (x + 40)
movq 40(%r8),%r12
# in12 = *(uint64 *) (x + 48)
movq 48(%r8),%r13
# in14 = *(uint64 *) (x + 56)
movq 56(%r8),%r14
# j0 = in0
movq %rcx,56(%rsp)
# j2 = in2
movq %r9,64(%rsp)
# j4 = in4
movq %rax,72(%rsp)
# j6 = in6
movq %r10,80(%rsp)
# j8 = in8
movq %r11,88(%rsp)
# j10 = in10
movq %r12,96(%rsp)
# j12 = in12
movq %r13,104(%rsp)
# j14 = in14
movq %r14,112(%rsp)
# x_backup = x
movq %r8,120(%rsp)
# bytesatleast1:
._bytesatleast1:
# unsigned<? bytes - 64
cmp $64,%rdx
# comment:fp stack unchanged by jump
# goto nocopy if !unsigned<
jae ._nocopy
# ctarget = out
movq %rdi,128(%rsp)
# out = &tmp
leaq 192(%rsp),%rdi
# i = bytes
mov %rdx,%rcx
# while (i) { *out++ = *m++; --i }
rep movsb
# out = &tmp
leaq 192(%rsp),%rdi
# m = &tmp
leaq 192(%rsp),%rsi
# comment:fp stack unchanged by fallthrough
# nocopy:
._nocopy:
# out_backup = out
movq %rdi,136(%rsp)
# m_backup = m
movq %rsi,144(%rsp)
# bytes_backup = bytes
movq %rdx,152(%rsp)
# x1 = j0
movq 56(%rsp),%rdi
# x0 = x1
mov %rdi,%rdx
# (uint64) x1 >>= 32
shr $32,%rdi
# x3 = j2
movq 64(%rsp),%rsi
# x2 = x3
mov %rsi,%rcx
# (uint64) x3 >>= 32
shr $32,%rsi
# x5 = j4
movq 72(%rsp),%r8
# x4 = x5
mov %r8,%r9
# (uint64) x5 >>= 32
shr $32,%r8
# x5_stack = x5
movq %r8,160(%rsp)
# x7 = j6
movq 80(%rsp),%r8
# x6 = x7
mov %r8,%rax
# (uint64) x7 >>= 32
shr $32,%r8
# x9 = j8
movq 88(%rsp),%r10
# x8 = x9
mov %r10,%r11
# (uint64) x9 >>= 32
shr $32,%r10
# x11 = j10
movq 96(%rsp),%r12
# x10 = x11
mov %r12,%r13
# x10_stack = x10
movq %r13,168(%rsp)
# (uint64) x11 >>= 32
shr $32,%r12
# x13 = j12
movq 104(%rsp),%r13
# x12 = x13
mov %r13,%r14
# (uint64) x13 >>= 32
shr $32,%r13
# x15 = j14
movq 112(%rsp),%r15
# x14 = x15
mov %r15,%rbx
# (uint64) x15 >>= 32
shr $32,%r15
# x15_stack = x15
movq %r15,176(%rsp)
# i = 20
mov $20,%r15
# mainloop:
._mainloop:
# i_backup = i
movq %r15,184(%rsp)
# x5 = x5_stack
movq 160(%rsp),%r15
# a = x12 + x0
lea (%r14,%rdx),%rbp
# (uint32) a <<<= 7
rol $7,%ebp
# x4 ^= a
xor %rbp,%r9
# b = x1 + x5
lea (%rdi,%r15),%rbp
# (uint32) b <<<= 7
rol $7,%ebp
# x9 ^= b
xor %rbp,%r10
# a = x0 + x4
lea (%rdx,%r9),%rbp
# (uint32) a <<<= 9
rol $9,%ebp
# x8 ^= a
xor %rbp,%r11
# b = x5 + x9
lea (%r15,%r10),%rbp
# (uint32) b <<<= 9
rol $9,%ebp
# x13 ^= b
xor %rbp,%r13
# a = x4 + x8
lea (%r9,%r11),%rbp
# (uint32) a <<<= 13
rol $13,%ebp
# x12 ^= a
xor %rbp,%r14
# b = x9 + x13
lea (%r10,%r13),%rbp
# (uint32) b <<<= 13
rol $13,%ebp
# x1 ^= b
xor %rbp,%rdi
# a = x8 + x12
lea (%r11,%r14),%rbp
# (uint32) a <<<= 18
rol $18,%ebp
# x0 ^= a
xor %rbp,%rdx
# b = x13 + x1
lea (%r13,%rdi),%rbp
# (uint32) b <<<= 18
rol $18,%ebp
# x5 ^= b
xor %rbp,%r15
# x10 = x10_stack
movq 168(%rsp),%rbp
# x5_stack = x5
movq %r15,160(%rsp)
# c = x6 + x10
lea (%rax,%rbp),%r15
# (uint32) c <<<= 7
rol $7,%r15d
# x14 ^= c
xor %r15,%rbx
# c = x10 + x14
lea (%rbp,%rbx),%r15
# (uint32) c <<<= 9
rol $9,%r15d
# x2 ^= c
xor %r15,%rcx
# c = x14 + x2
lea (%rbx,%rcx),%r15
# (uint32) c <<<= 13
rol $13,%r15d
# x6 ^= c
xor %r15,%rax
# c = x2 + x6
lea (%rcx,%rax),%r15
# (uint32) c <<<= 18
rol $18,%r15d
# x10 ^= c
xor %r15,%rbp
# x15 = x15_stack
movq 176(%rsp),%r15
# x10_stack = x10
movq %rbp,168(%rsp)
# d = x11 + x15
lea (%r12,%r15),%rbp
# (uint32) d <<<= 7
rol $7,%ebp
# x3 ^= d
xor %rbp,%rsi
# d = x15 + x3
lea (%r15,%rsi),%rbp
# (uint32) d <<<= 9
rol $9,%ebp
# x7 ^= d
xor %rbp,%r8
# d = x3 + x7
lea (%rsi,%r8),%rbp
# (uint32) d <<<= 13
rol $13,%ebp
# x11 ^= d
xor %rbp,%r12
# d = x7 + x11
lea (%r8,%r12),%rbp
# (uint32) d <<<= 18
rol $18,%ebp
# x15 ^= d
xor %rbp,%r15
# x15_stack = x15
movq %r15,176(%rsp)
# x5 = x5_stack
movq 160(%rsp),%r15
# a = x3 + x0
lea (%rsi,%rdx),%rbp
# (uint32) a <<<= 7
rol $7,%ebp
# x1 ^= a
xor %rbp,%rdi
# b = x4 + x5
lea (%r9,%r15),%rbp
# (uint32) b <<<= 7
rol $7,%ebp
# x6 ^= b
xor %rbp,%rax
# a = x0 + x1
lea (%rdx,%rdi),%rbp
# (uint32) a <<<= 9
rol $9,%ebp
# x2 ^= a
xor %rbp,%rcx
# b = x5 + x6
lea (%r15,%rax),%rbp
# (uint32) b <<<= 9
rol $9,%ebp
# x7 ^= b
xor %rbp,%r8
# a = x1 + x2
lea (%rdi,%rcx),%rbp
# (uint32) a <<<= 13
rol $13,%ebp
# x3 ^= a
xor %rbp,%rsi
# b = x6 + x7
lea (%rax,%r8),%rbp
# (uint32) b <<<= 13
rol $13,%ebp
# x4 ^= b
xor %rbp,%r9
# a = x2 + x3
lea (%rcx,%rsi),%rbp
# (uint32) a <<<= 18
rol $18,%ebp
# x0 ^= a
xor %rbp,%rdx
# b = x7 + x4
lea (%r8,%r9),%rbp
# (uint32) b <<<= 18
rol $18,%ebp
# x5 ^= b
xor %rbp,%r15
# x10 = x10_stack
movq 168(%rsp),%rbp
# x5_stack = x5
movq %r15,160(%rsp)
# c = x9 + x10
lea (%r10,%rbp),%r15
# (uint32) c <<<= 7
rol $7,%r15d
# x11 ^= c
xor %r15,%r12
# c = x10 + x11
lea (%rbp,%r12),%r15
# (uint32) c <<<= 9
rol $9,%r15d
# x8 ^= c
xor %r15,%r11
# c = x11 + x8
lea (%r12,%r11),%r15
# (uint32) c <<<= 13
rol $13,%r15d
# x9 ^= c
xor %r15,%r10
# c = x8 + x9
lea (%r11,%r10),%r15
# (uint32) c <<<= 18
rol $18,%r15d
# x10 ^= c
xor %r15,%rbp
# x15 = x15_stack
movq 176(%rsp),%r15
# x10_stack = x10
movq %rbp,168(%rsp)
# d = x14 + x15
lea (%rbx,%r15),%rbp
# (uint32) d <<<= 7
rol $7,%ebp
# x12 ^= d
xor %rbp,%r14
# d = x15 + x12
lea (%r15,%r14),%rbp
# (uint32) d <<<= 9
rol $9,%ebp
# x13 ^= d
xor %rbp,%r13
# d = x12 + x13
lea (%r14,%r13),%rbp
# (uint32) d <<<= 13
rol $13,%ebp
# x14 ^= d
xor %rbp,%rbx
# d = x13 + x14
lea (%r13,%rbx),%rbp
# (uint32) d <<<= 18
rol $18,%ebp
# x15 ^= d
xor %rbp,%r15
# x15_stack = x15
movq %r15,176(%rsp)
# x5 = x5_stack
movq 160(%rsp),%r15
# a = x12 + x0
lea (%r14,%rdx),%rbp
# (uint32) a <<<= 7
rol $7,%ebp
# x4 ^= a
xor %rbp,%r9
# b = x1 + x5
lea (%rdi,%r15),%rbp
# (uint32) b <<<= 7
rol $7,%ebp
# x9 ^= b
xor %rbp,%r10
# a = x0 + x4
lea (%rdx,%r9),%rbp
# (uint32) a <<<= 9
rol $9,%ebp
# x8 ^= a
xor %rbp,%r11
# b = x5 + x9
lea (%r15,%r10),%rbp
# (uint32) b <<<= 9
rol $9,%ebp
# x13 ^= b
xor %rbp,%r13
# a = x4 + x8
lea (%r9,%r11),%rbp
# (uint32) a <<<= 13
rol $13,%ebp
# x12 ^= a
xor %rbp,%r14
# b = x9 + x13
lea (%r10,%r13),%rbp
# (uint32) b <<<= 13
rol $13,%ebp
# x1 ^= b
xor %rbp,%rdi
# a = x8 + x12
lea (%r11,%r14),%rbp
# (uint32) a <<<= 18
rol $18,%ebp
# x0 ^= a
xor %rbp,%rdx
# b = x13 + x1
lea (%r13,%rdi),%rbp
# (uint32) b <<<= 18
rol $18,%ebp
# x5 ^= b
xor %rbp,%r15
# x10 = x10_stack
movq 168(%rsp),%rbp
# x5_stack = x5
movq %r15,160(%rsp)
# c = x6 + x10
lea (%rax,%rbp),%r15
# (uint32) c <<<= 7
rol $7,%r15d
# x14 ^= c
xor %r15,%rbx
# c = x10 + x14
lea (%rbp,%rbx),%r15
# (uint32) c <<<= 9
rol $9,%r15d
# x2 ^= c
xor %r15,%rcx
# c = x14 + x2
lea (%rbx,%rcx),%r15
# (uint32) c <<<= 13
rol $13,%r15d
# x6 ^= c
xor %r15,%rax
# c = x2 + x6
lea (%rcx,%rax),%r15
# (uint32) c <<<= 18
rol $18,%r15d
# x10 ^= c
xor %r15,%rbp
# x15 = x15_stack
movq 176(%rsp),%r15
# x10_stack = x10
movq %rbp,168(%rsp)
# d = x11 + x15
lea (%r12,%r15),%rbp
# (uint32) d <<<= 7
rol $7,%ebp
# x3 ^= d
xor %rbp,%rsi
# d = x15 + x3
lea (%r15,%rsi),%rbp
# (uint32) d <<<= 9
rol $9,%ebp
# x7 ^= d
xor %rbp,%r8
# d = x3 + x7
lea (%rsi,%r8),%rbp
# (uint32) d <<<= 13
rol $13,%ebp
# x11 ^= d
xor %rbp,%r12
# d = x7 + x11
lea (%r8,%r12),%rbp
# (uint32) d <<<= 18
rol $18,%ebp
# x15 ^= d
xor %rbp,%r15
# x15_stack = x15
movq %r15,176(%rsp)
# x5 = x5_stack
movq 160(%rsp),%r15
# a = x3 + x0
lea (%rsi,%rdx),%rbp
# (uint32) a <<<= 7
rol $7,%ebp
# x1 ^= a
xor %rbp,%rdi
# b = x4 + x5
lea (%r9,%r15),%rbp
# (uint32) b <<<= 7
rol $7,%ebp
# x6 ^= b
xor %rbp,%rax
# a = x0 + x1
lea (%rdx,%rdi),%rbp
# (uint32) a <<<= 9
rol $9,%ebp
# x2 ^= a
xor %rbp,%rcx
# b = x5 + x6
lea (%r15,%rax),%rbp
# (uint32) b <<<= 9
rol $9,%ebp
# x7 ^= b
xor %rbp,%r8
# a = x1 + x2
lea (%rdi,%rcx),%rbp
# (uint32) a <<<= 13
rol $13,%ebp
# x3 ^= a
xor %rbp,%rsi
# b = x6 + x7
lea (%rax,%r8),%rbp
# (uint32) b <<<= 13
rol $13,%ebp
# x4 ^= b
xor %rbp,%r9
# a = x2 + x3
lea (%rcx,%rsi),%rbp
# (uint32) a <<<= 18
rol $18,%ebp
# x0 ^= a
xor %rbp,%rdx
# b = x7 + x4
lea (%r8,%r9),%rbp
# (uint32) b <<<= 18
rol $18,%ebp
# x5 ^= b
xor %rbp,%r15
# x10 = x10_stack
movq 168(%rsp),%rbp
# x5_stack = x5
movq %r15,160(%rsp)
# c = x9 + x10
lea (%r10,%rbp),%r15
# (uint32) c <<<= 7
rol $7,%r15d
# x11 ^= c
xor %r15,%r12
# c = x10 + x11
lea (%rbp,%r12),%r15
# (uint32) c <<<= 9
rol $9,%r15d
# x8 ^= c
xor %r15,%r11
# c = x11 + x8
lea (%r12,%r11),%r15
# (uint32) c <<<= 13
rol $13,%r15d
# x9 ^= c
xor %r15,%r10
# c = x8 + x9
lea (%r11,%r10),%r15
# (uint32) c <<<= 18
rol $18,%r15d
# x10 ^= c
xor %r15,%rbp
# x15 = x15_stack
movq 176(%rsp),%r15
# x10_stack = x10
movq %rbp,168(%rsp)
# d = x14 + x15
lea (%rbx,%r15),%rbp
# (uint32) d <<<= 7
rol $7,%ebp
# x12 ^= d
xor %rbp,%r14
# d = x15 + x12
lea (%r15,%r14),%rbp
# (uint32) d <<<= 9
rol $9,%ebp
# x13 ^= d
xor %rbp,%r13
# d = x12 + x13
lea (%r14,%r13),%rbp
# (uint32) d <<<= 13
rol $13,%ebp
# x14 ^= d
xor %rbp,%rbx
# d = x13 + x14
lea (%r13,%rbx),%rbp
# (uint32) d <<<= 18
rol $18,%ebp
# x15 ^= d
xor %rbp,%r15
# x15_stack = x15
movq %r15,176(%rsp)
# i = i_backup
movq 184(%rsp),%r15
# unsigned>? i -= 4
sub $4,%r15
# comment:fp stack unchanged by jump
# goto mainloop if unsigned>
ja ._mainloop
# (uint32) x2 += j2
addl 64(%rsp),%ecx
# x3 <<= 32
shl $32,%rsi
# x3 += j2
addq 64(%rsp),%rsi
# (uint64) x3 >>= 32
shr $32,%rsi
# x3 <<= 32
shl $32,%rsi
# x2 += x3
add %rsi,%rcx
# (uint32) x6 += j6
addl 80(%rsp),%eax
# x7 <<= 32
shl $32,%r8
# x7 += j6
addq 80(%rsp),%r8
# (uint64) x7 >>= 32
shr $32,%r8
# x7 <<= 32
shl $32,%r8
# x6 += x7
add %r8,%rax
# (uint32) x8 += j8
addl 88(%rsp),%r11d
# x9 <<= 32
shl $32,%r10
# x9 += j8
addq 88(%rsp),%r10
# (uint64) x9 >>= 32
shr $32,%r10
# x9 <<= 32
shl $32,%r10
# x8 += x9
add %r10,%r11
# (uint32) x12 += j12
addl 104(%rsp),%r14d
# x13 <<= 32
shl $32,%r13
# x13 += j12
addq 104(%rsp),%r13
# (uint64) x13 >>= 32
shr $32,%r13
# x13 <<= 32
shl $32,%r13
# x12 += x13
add %r13,%r14
# (uint32) x0 += j0
addl 56(%rsp),%edx
# x1 <<= 32
shl $32,%rdi
# x1 += j0
addq 56(%rsp),%rdi
# (uint64) x1 >>= 32
shr $32,%rdi
# x1 <<= 32
shl $32,%rdi
# x0 += x1
add %rdi,%rdx
# x5 = x5_stack
movq 160(%rsp),%rdi
# (uint32) x4 += j4
addl 72(%rsp),%r9d
# x5 <<= 32
shl $32,%rdi
# x5 += j4
addq 72(%rsp),%rdi
# (uint64) x5 >>= 32
shr $32,%rdi
# x5 <<= 32
shl $32,%rdi
# x4 += x5
add %rdi,%r9
# x10 = x10_stack
movq 168(%rsp),%r8
# (uint32) x10 += j10
addl 96(%rsp),%r8d
# x11 <<= 32
shl $32,%r12
# x11 += j10
addq 96(%rsp),%r12
# (uint64) x11 >>= 32
shr $32,%r12
# x11 <<= 32
shl $32,%r12
# x10 += x11
add %r12,%r8
# x15 = x15_stack
movq 176(%rsp),%rdi
# (uint32) x14 += j14
addl 112(%rsp),%ebx
# x15 <<= 32
shl $32,%rdi
# x15 += j14
addq 112(%rsp),%rdi
# (uint64) x15 >>= 32
shr $32,%rdi
# x15 <<= 32
shl $32,%rdi
# x14 += x15
add %rdi,%rbx
# out = out_backup
movq 136(%rsp),%rdi
# m = m_backup
movq 144(%rsp),%rsi
# x0 ^= *(uint64 *) (m + 0)
xorq 0(%rsi),%rdx
# *(uint64 *) (out + 0) = x0
movq %rdx,0(%rdi)
# x2 ^= *(uint64 *) (m + 8)
xorq 8(%rsi),%rcx
# *(uint64 *) (out + 8) = x2
movq %rcx,8(%rdi)
# x4 ^= *(uint64 *) (m + 16)
xorq 16(%rsi),%r9
# *(uint64 *) (out + 16) = x4
movq %r9,16(%rdi)
# x6 ^= *(uint64 *) (m + 24)
xorq 24(%rsi),%rax
# *(uint64 *) (out + 24) = x6
movq %rax,24(%rdi)
# x8 ^= *(uint64 *) (m + 32)
xorq 32(%rsi),%r11
# *(uint64 *) (out + 32) = x8
movq %r11,32(%rdi)
# x10 ^= *(uint64 *) (m + 40)
xorq 40(%rsi),%r8
# *(uint64 *) (out + 40) = x10
movq %r8,40(%rdi)
# x12 ^= *(uint64 *) (m + 48)
xorq 48(%rsi),%r14
# *(uint64 *) (out + 48) = x12
movq %r14,48(%rdi)
# x14 ^= *(uint64 *) (m + 56)
xorq 56(%rsi),%rbx
# *(uint64 *) (out + 56) = x14
movq %rbx,56(%rdi)
# bytes = bytes_backup
movq 152(%rsp),%rdx
# in8 = j8
movq 88(%rsp),%rcx
# in8 += 1
add $1,%rcx
# j8 = in8
movq %rcx,88(%rsp)
# unsigned>? unsigned<? bytes - 64
cmp $64,%rdx
# comment:fp stack unchanged by jump
# goto bytesatleast65 if unsigned>
ja ._bytesatleast65
# comment:fp stack unchanged by jump
# goto bytesatleast64 if !unsigned<
jae ._bytesatleast64
# m = out
mov %rdi,%rsi
# out = ctarget
movq 128(%rsp),%rdi
# i = bytes
mov %rdx,%rcx
# while (i) { *out++ = *m++; --i }
rep movsb
# comment:fp stack unchanged by fallthrough
# bytesatleast64:
._bytesatleast64:
# x = x_backup
movq 120(%rsp),%rdi
# in8 = j8
movq 88(%rsp),%rsi
# *(uint64 *) (x + 32) = in8
movq %rsi,32(%rdi)
# r11 = r11_stack
movq 0(%rsp),%r11
# r12 = r12_stack
movq 8(%rsp),%r12
# r13 = r13_stack
movq 16(%rsp),%r13
# r14 = r14_stack
movq 24(%rsp),%r14
# r15 = r15_stack
movq 32(%rsp),%r15
# rbx = rbx_stack
movq 40(%rsp),%rbx
# rbp = rbp_stack
movq 48(%rsp),%rbp
# comment:fp stack unchanged by fallthrough
# done:
._done:
# leave
add %r11,%rsp
mov %rdi,%rax
mov %rsi,%rdx
ret
# bytesatleast65:
._bytesatleast65:
# bytes -= 64
sub $64,%rdx
# out += 64
add $64,%rdi
# m += 64
add $64,%rsi
# comment:fp stack unchanged by jump
# goto bytesatleast1
jmp ._bytesatleast1
ENDPROC(salsa20_encrypt_bytes)

View File

@ -1,91 +0,0 @@
/*
* Glue code for optimized assembly version of Salsa20.
*
* Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com>
*
* The assembly codes are public domain assembly codes written by Daniel. J.
* Bernstein <djb@cr.yp.to>. The codes are modified to include indentation
* and to remove extraneous comments and functions that are not needed.
* - i586 version, renamed as salsa20-i586-asm_32.S
* available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s>
* - x86-64 version, renamed as salsa20-x86_64-asm_64.S
* available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s>
*
* Also modified to set up the initial state using the generic C code rather
* than in assembly.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <asm/unaligned.h>
#include <crypto/internal/skcipher.h>
#include <crypto/salsa20.h>
#include <linux/module.h>
asmlinkage void salsa20_encrypt_bytes(u32 state[16], const u8 *src, u8 *dst,
u32 bytes);
static int salsa20_asm_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
const struct salsa20_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
u32 state[16];
int err;
err = skcipher_walk_virt(&walk, req, true);
crypto_salsa20_init(state, ctx, walk.iv);
while (walk.nbytes > 0) {
unsigned int nbytes = walk.nbytes;
if (nbytes < walk.total)
nbytes = round_down(nbytes, walk.stride);
salsa20_encrypt_bytes(state, walk.src.virt.addr,
walk.dst.virt.addr, nbytes);
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
}
static struct skcipher_alg alg = {
.base.cra_name = "salsa20",
.base.cra_driver_name = "salsa20-asm",
.base.cra_priority = 200,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct salsa20_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = SALSA20_MIN_KEY_SIZE,
.max_keysize = SALSA20_MAX_KEY_SIZE,
.ivsize = SALSA20_IV_SIZE,
.chunksize = SALSA20_BLOCK_SIZE,
.setkey = crypto_salsa20_setkey,
.encrypt = salsa20_asm_crypt,
.decrypt = salsa20_asm_crypt,
};
static int __init init(void)
{
return crypto_register_skcipher(&alg);
}
static void __exit fini(void)
{
crypto_unregister_skcipher(&alg);
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)");
MODULE_ALIAS_CRYPTO("salsa20");
MODULE_ALIAS_CRYPTO("salsa20-asm");

View File

@ -289,6 +289,107 @@ config CRYPTO_CHACHA20POLY1305
with the Poly1305 authenticator. It is defined in RFC7539 for use in
IETF protocols.
config CRYPTO_AEGIS128
tristate "AEGIS-128 AEAD algorithm"
select CRYPTO_AEAD
select CRYPTO_AES # for AES S-box tables
help
Support for the AEGIS-128 dedicated AEAD algorithm.
config CRYPTO_AEGIS128L
tristate "AEGIS-128L AEAD algorithm"
select CRYPTO_AEAD
select CRYPTO_AES # for AES S-box tables
help
Support for the AEGIS-128L dedicated AEAD algorithm.
config CRYPTO_AEGIS256
tristate "AEGIS-256 AEAD algorithm"
select CRYPTO_AEAD
select CRYPTO_AES # for AES S-box tables
help
Support for the AEGIS-256 dedicated AEAD algorithm.
config CRYPTO_AEGIS128_AESNI_SSE2
tristate "AEGIS-128 AEAD algorithm (x86_64 AESNI+SSE2 implementation)"
depends on X86 && 64BIT
select CRYPTO_AEAD
select CRYPTO_CRYPTD
help
AESNI+SSE2 implementation of the AEGSI-128 dedicated AEAD algorithm.
config CRYPTO_AEGIS128L_AESNI_SSE2
tristate "AEGIS-128L AEAD algorithm (x86_64 AESNI+SSE2 implementation)"
depends on X86 && 64BIT
select CRYPTO_AEAD
select CRYPTO_CRYPTD
help
AESNI+SSE2 implementation of the AEGSI-128L dedicated AEAD algorithm.
config CRYPTO_AEGIS256_AESNI_SSE2
tristate "AEGIS-256 AEAD algorithm (x86_64 AESNI+SSE2 implementation)"
depends on X86 && 64BIT
select CRYPTO_AEAD
select CRYPTO_CRYPTD
help
AESNI+SSE2 implementation of the AEGSI-256 dedicated AEAD algorithm.
config CRYPTO_MORUS640
tristate "MORUS-640 AEAD algorithm"
select CRYPTO_AEAD
help
Support for the MORUS-640 dedicated AEAD algorithm.
config CRYPTO_MORUS640_GLUE
tristate
depends on X86
select CRYPTO_AEAD
select CRYPTO_CRYPTD
help
Common glue for SIMD optimizations of the MORUS-640 dedicated AEAD
algorithm.
config CRYPTO_MORUS640_SSE2
tristate "MORUS-640 AEAD algorithm (x86_64 SSE2 implementation)"
depends on X86 && 64BIT
select CRYPTO_AEAD
select CRYPTO_MORUS640_GLUE
help
SSE2 implementation of the MORUS-640 dedicated AEAD algorithm.
config CRYPTO_MORUS1280
tristate "MORUS-1280 AEAD algorithm"
select CRYPTO_AEAD
help
Support for the MORUS-1280 dedicated AEAD algorithm.
config CRYPTO_MORUS1280_GLUE
tristate
depends on X86
select CRYPTO_AEAD
select CRYPTO_CRYPTD
help
Common glue for SIMD optimizations of the MORUS-1280 dedicated AEAD
algorithm.
config CRYPTO_MORUS1280_SSE2
tristate "MORUS-1280 AEAD algorithm (x86_64 SSE2 implementation)"
depends on X86 && 64BIT
select CRYPTO_AEAD
select CRYPTO_MORUS1280_GLUE
help
SSE2 optimizedimplementation of the MORUS-1280 dedicated AEAD
algorithm.
config CRYPTO_MORUS1280_AVX2
tristate "MORUS-1280 AEAD algorithm (x86_64 AVX2 implementation)"
depends on X86 && 64BIT
select CRYPTO_AEAD
select CRYPTO_MORUS1280_GLUE
help
AVX2 optimized implementation of the MORUS-1280 dedicated AEAD
algorithm.
config CRYPTO_SEQIV
tristate "Sequence Number IV Generator"
select CRYPTO_AEAD
@ -1335,34 +1436,6 @@ config CRYPTO_SALSA20
The Salsa20 stream cipher algorithm is designed by Daniel J.
Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
config CRYPTO_SALSA20_586
tristate "Salsa20 stream cipher algorithm (i586)"
depends on (X86 || UML_X86) && !64BIT
select CRYPTO_BLKCIPHER
select CRYPTO_SALSA20
help
Salsa20 stream cipher algorithm.
Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>
The Salsa20 stream cipher algorithm is designed by Daniel J.
Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
config CRYPTO_SALSA20_X86_64
tristate "Salsa20 stream cipher algorithm (x86_64)"
depends on (X86 || UML_X86) && 64BIT
select CRYPTO_BLKCIPHER
select CRYPTO_SALSA20
help
Salsa20 stream cipher algorithm.
Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>
The Salsa20 stream cipher algorithm is designed by Daniel J.
Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
config CRYPTO_CHACHA20
tristate "ChaCha20 cipher algorithm"
select CRYPTO_BLKCIPHER
@ -1695,6 +1768,15 @@ config CRYPTO_LZ4HC
help
This is the LZ4 high compression mode algorithm.
config CRYPTO_ZSTD
tristate "Zstd compression algorithm"
select CRYPTO_ALGAPI
select CRYPTO_ACOMP2
select ZSTD_COMPRESS
select ZSTD_DECOMPRESS
help
This is the zstd algorithm.
comment "Random Number Generation"
config CRYPTO_ANSI_CPRNG

View File

@ -86,6 +86,11 @@ obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o
obj-$(CONFIG_CRYPTO_GCM) += gcm.o
obj-$(CONFIG_CRYPTO_CCM) += ccm.o
obj-$(CONFIG_CRYPTO_CHACHA20POLY1305) += chacha20poly1305.o
obj-$(CONFIG_CRYPTO_AEGIS128) += aegis128.o
obj-$(CONFIG_CRYPTO_AEGIS128L) += aegis128l.o
obj-$(CONFIG_CRYPTO_AEGIS256) += aegis256.o
obj-$(CONFIG_CRYPTO_MORUS640) += morus640.o
obj-$(CONFIG_CRYPTO_MORUS1280) += morus1280.o
obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o
obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
obj-$(CONFIG_CRYPTO_MCRYPTD) += mcryptd.o
@ -137,6 +142,7 @@ obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o
obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o
ecdh_generic-y := ecc.o
ecdh_generic-y += ecdh.o

80
crypto/aegis.h Normal file
View File

@ -0,0 +1,80 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* AEGIS common definitions
*
* Copyright (c) 2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (c) 2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#ifndef _CRYPTO_AEGIS_H
#define _CRYPTO_AEGIS_H
#include <crypto/aes.h>
#include <linux/types.h>
#define AEGIS_BLOCK_SIZE 16
union aegis_block {
__le64 words64[AEGIS_BLOCK_SIZE / sizeof(__le64)];
u32 words32[AEGIS_BLOCK_SIZE / sizeof(u32)];
u8 bytes[AEGIS_BLOCK_SIZE];
};
#define AEGIS_BLOCK_ALIGN (__alignof__(union aegis_block))
#define AEGIS_ALIGNED(p) IS_ALIGNED((uintptr_t)p, AEGIS_BLOCK_ALIGN)
static const union aegis_block crypto_aegis_const[2] = {
{ .words64 = {
cpu_to_le64(U64_C(0x0d08050302010100)),
cpu_to_le64(U64_C(0x6279e99059372215)),
} },
{ .words64 = {
cpu_to_le64(U64_C(0xf12fc26d55183ddb)),
cpu_to_le64(U64_C(0xdd28b57342311120)),
} },
};
static void crypto_aegis_block_xor(union aegis_block *dst,
const union aegis_block *src)
{
dst->words64[0] ^= src->words64[0];
dst->words64[1] ^= src->words64[1];
}
static void crypto_aegis_block_and(union aegis_block *dst,
const union aegis_block *src)
{
dst->words64[0] &= src->words64[0];
dst->words64[1] &= src->words64[1];
}
static void crypto_aegis_aesenc(union aegis_block *dst,
const union aegis_block *src,
const union aegis_block *key)
{
u32 *d = dst->words32;
const u8 *s = src->bytes;
const u32 *k = key->words32;
const u32 *t0 = crypto_ft_tab[0];
const u32 *t1 = crypto_ft_tab[1];
const u32 *t2 = crypto_ft_tab[2];
const u32 *t3 = crypto_ft_tab[3];
u32 d0, d1, d2, d3;
d0 = t0[s[ 0]] ^ t1[s[ 5]] ^ t2[s[10]] ^ t3[s[15]] ^ k[0];
d1 = t0[s[ 4]] ^ t1[s[ 9]] ^ t2[s[14]] ^ t3[s[ 3]] ^ k[1];
d2 = t0[s[ 8]] ^ t1[s[13]] ^ t2[s[ 2]] ^ t3[s[ 7]] ^ k[2];
d3 = t0[s[12]] ^ t1[s[ 1]] ^ t2[s[ 6]] ^ t3[s[11]] ^ k[3];
d[0] = d0;
d[1] = d1;
d[2] = d2;
d[3] = d3;
}
#endif /* _CRYPTO_AEGIS_H */

463
crypto/aegis128.c Normal file
View File

@ -0,0 +1,463 @@
/*
* The AEGIS-128 Authenticated-Encryption Algorithm
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/algapi.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
#include "aegis.h"
#define AEGIS128_NONCE_SIZE 16
#define AEGIS128_STATE_BLOCKS 5
#define AEGIS128_KEY_SIZE 16
#define AEGIS128_MIN_AUTH_SIZE 8
#define AEGIS128_MAX_AUTH_SIZE 16
struct aegis_state {
union aegis_block blocks[AEGIS128_STATE_BLOCKS];
};
struct aegis_ctx {
union aegis_block key;
};
struct aegis128_ops {
int (*skcipher_walk_init)(struct skcipher_walk *walk,
struct aead_request *req, bool atomic);
void (*crypt_chunk)(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size);
};
static void crypto_aegis128_update(struct aegis_state *state)
{
union aegis_block tmp;
unsigned int i;
tmp = state->blocks[AEGIS128_STATE_BLOCKS - 1];
for (i = AEGIS128_STATE_BLOCKS - 1; i > 0; i--)
crypto_aegis_aesenc(&state->blocks[i], &state->blocks[i - 1],
&state->blocks[i]);
crypto_aegis_aesenc(&state->blocks[0], &tmp, &state->blocks[0]);
}
static void crypto_aegis128_update_a(struct aegis_state *state,
const union aegis_block *msg)
{
crypto_aegis128_update(state);
crypto_aegis_block_xor(&state->blocks[0], msg);
}
static void crypto_aegis128_update_u(struct aegis_state *state, const void *msg)
{
crypto_aegis128_update(state);
crypto_xor(state->blocks[0].bytes, msg, AEGIS_BLOCK_SIZE);
}
static void crypto_aegis128_init(struct aegis_state *state,
const union aegis_block *key,
const u8 *iv)
{
union aegis_block key_iv;
unsigned int i;
key_iv = *key;
crypto_xor(key_iv.bytes, iv, AEGIS_BLOCK_SIZE);
state->blocks[0] = key_iv;
state->blocks[1] = crypto_aegis_const[1];
state->blocks[2] = crypto_aegis_const[0];
state->blocks[3] = *key;
state->blocks[4] = *key;
crypto_aegis_block_xor(&state->blocks[3], &crypto_aegis_const[0]);
crypto_aegis_block_xor(&state->blocks[4], &crypto_aegis_const[1]);
for (i = 0; i < 5; i++) {
crypto_aegis128_update_a(state, key);
crypto_aegis128_update_a(state, &key_iv);
}
}
static void crypto_aegis128_ad(struct aegis_state *state,
const u8 *src, unsigned int size)
{
if (AEGIS_ALIGNED(src)) {
const union aegis_block *src_blk =
(const union aegis_block *)src;
while (size >= AEGIS_BLOCK_SIZE) {
crypto_aegis128_update_a(state, src_blk);
size -= AEGIS_BLOCK_SIZE;
src_blk++;
}
} else {
while (size >= AEGIS_BLOCK_SIZE) {
crypto_aegis128_update_u(state, src);
size -= AEGIS_BLOCK_SIZE;
src += AEGIS_BLOCK_SIZE;
}
}
}
static void crypto_aegis128_encrypt_chunk(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size)
{
union aegis_block tmp;
if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
while (size >= AEGIS_BLOCK_SIZE) {
union aegis_block *dst_blk =
(union aegis_block *)dst;
const union aegis_block *src_blk =
(const union aegis_block *)src;
tmp = state->blocks[2];
crypto_aegis_block_and(&tmp, &state->blocks[3]);
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
crypto_aegis_block_xor(&tmp, src_blk);
crypto_aegis128_update_a(state, src_blk);
*dst_blk = tmp;
size -= AEGIS_BLOCK_SIZE;
src += AEGIS_BLOCK_SIZE;
dst += AEGIS_BLOCK_SIZE;
}
} else {
while (size >= AEGIS_BLOCK_SIZE) {
tmp = state->blocks[2];
crypto_aegis_block_and(&tmp, &state->blocks[3]);
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE);
crypto_aegis128_update_u(state, src);
memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE);
size -= AEGIS_BLOCK_SIZE;
src += AEGIS_BLOCK_SIZE;
dst += AEGIS_BLOCK_SIZE;
}
}
if (size > 0) {
union aegis_block msg = {};
memcpy(msg.bytes, src, size);
tmp = state->blocks[2];
crypto_aegis_block_and(&tmp, &state->blocks[3]);
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
crypto_aegis128_update_a(state, &msg);
crypto_aegis_block_xor(&msg, &tmp);
memcpy(dst, msg.bytes, size);
}
}
static void crypto_aegis128_decrypt_chunk(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size)
{
union aegis_block tmp;
if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
while (size >= AEGIS_BLOCK_SIZE) {
union aegis_block *dst_blk =
(union aegis_block *)dst;
const union aegis_block *src_blk =
(const union aegis_block *)src;
tmp = state->blocks[2];
crypto_aegis_block_and(&tmp, &state->blocks[3]);
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
crypto_aegis_block_xor(&tmp, src_blk);
crypto_aegis128_update_a(state, &tmp);
*dst_blk = tmp;
size -= AEGIS_BLOCK_SIZE;
src += AEGIS_BLOCK_SIZE;
dst += AEGIS_BLOCK_SIZE;
}
} else {
while (size >= AEGIS_BLOCK_SIZE) {
tmp = state->blocks[2];
crypto_aegis_block_and(&tmp, &state->blocks[3]);
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE);
crypto_aegis128_update_a(state, &tmp);
memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE);
size -= AEGIS_BLOCK_SIZE;
src += AEGIS_BLOCK_SIZE;
dst += AEGIS_BLOCK_SIZE;
}
}
if (size > 0) {
union aegis_block msg = {};
memcpy(msg.bytes, src, size);
tmp = state->blocks[2];
crypto_aegis_block_and(&tmp, &state->blocks[3]);
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
crypto_aegis_block_xor(&msg, &tmp);
memset(msg.bytes + size, 0, AEGIS_BLOCK_SIZE - size);
crypto_aegis128_update_a(state, &msg);
memcpy(dst, msg.bytes, size);
}
}
static void crypto_aegis128_process_ad(struct aegis_state *state,
struct scatterlist *sg_src,
unsigned int assoclen)
{
struct scatter_walk walk;
union aegis_block buf;
unsigned int pos = 0;
scatterwalk_start(&walk, sg_src);
while (assoclen != 0) {
unsigned int size = scatterwalk_clamp(&walk, assoclen);
unsigned int left = size;
void *mapped = scatterwalk_map(&walk);
const u8 *src = (const u8 *)mapped;
if (pos + size >= AEGIS_BLOCK_SIZE) {
if (pos > 0) {
unsigned int fill = AEGIS_BLOCK_SIZE - pos;
memcpy(buf.bytes + pos, src, fill);
crypto_aegis128_update_a(state, &buf);
pos = 0;
left -= fill;
src += fill;
}
crypto_aegis128_ad(state, src, left);
src += left & ~(AEGIS_BLOCK_SIZE - 1);
left &= AEGIS_BLOCK_SIZE - 1;
}
memcpy(buf.bytes + pos, src, left);
pos += left;
assoclen -= size;
scatterwalk_unmap(mapped);
scatterwalk_advance(&walk, size);
scatterwalk_done(&walk, 0, assoclen);
}
if (pos > 0) {
memset(buf.bytes + pos, 0, AEGIS_BLOCK_SIZE - pos);
crypto_aegis128_update_a(state, &buf);
}
}
static void crypto_aegis128_process_crypt(struct aegis_state *state,
struct aead_request *req,
const struct aegis128_ops *ops)
{
struct skcipher_walk walk;
u8 *src, *dst;
unsigned int chunksize;
ops->skcipher_walk_init(&walk, req, false);
while (walk.nbytes) {
src = walk.src.virt.addr;
dst = walk.dst.virt.addr;
chunksize = walk.nbytes;
ops->crypt_chunk(state, dst, src, chunksize);
skcipher_walk_done(&walk, 0);
}
}
static void crypto_aegis128_final(struct aegis_state *state,
union aegis_block *tag_xor,
u64 assoclen, u64 cryptlen)
{
u64 assocbits = assoclen * 8;
u64 cryptbits = cryptlen * 8;
union aegis_block tmp;
unsigned int i;
tmp.words64[0] = cpu_to_le64(assocbits);
tmp.words64[1] = cpu_to_le64(cryptbits);
crypto_aegis_block_xor(&tmp, &state->blocks[3]);
for (i = 0; i < 7; i++)
crypto_aegis128_update_a(state, &tmp);
for (i = 0; i < AEGIS128_STATE_BLOCKS; i++)
crypto_aegis_block_xor(tag_xor, &state->blocks[i]);
}
static int crypto_aegis128_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct aegis_ctx *ctx = crypto_aead_ctx(aead);
if (keylen != AEGIS128_KEY_SIZE) {
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
memcpy(ctx->key.bytes, key, AEGIS128_KEY_SIZE);
return 0;
}
static int crypto_aegis128_setauthsize(struct crypto_aead *tfm,
unsigned int authsize)
{
if (authsize > AEGIS128_MAX_AUTH_SIZE)
return -EINVAL;
if (authsize < AEGIS128_MIN_AUTH_SIZE)
return -EINVAL;
return 0;
}
static void crypto_aegis128_crypt(struct aead_request *req,
union aegis_block *tag_xor,
unsigned int cryptlen,
const struct aegis128_ops *ops)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_ctx *ctx = crypto_aead_ctx(tfm);
struct aegis_state state;
crypto_aegis128_init(&state, &ctx->key, req->iv);
crypto_aegis128_process_ad(&state, req->src, req->assoclen);
crypto_aegis128_process_crypt(&state, req, ops);
crypto_aegis128_final(&state, tag_xor, req->assoclen, cryptlen);
}
static int crypto_aegis128_encrypt(struct aead_request *req)
{
static const struct aegis128_ops ops = {
.skcipher_walk_init = skcipher_walk_aead_encrypt,
.crypt_chunk = crypto_aegis128_encrypt_chunk,
};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
union aegis_block tag = {};
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen;
crypto_aegis128_crypt(req, &tag, cryptlen, &ops);
scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
authsize, 1);
return 0;
}
static int crypto_aegis128_decrypt(struct aead_request *req)
{
static const struct aegis128_ops ops = {
.skcipher_walk_init = skcipher_walk_aead_decrypt,
.crypt_chunk = crypto_aegis128_decrypt_chunk,
};
static const u8 zeros[AEGIS128_MAX_AUTH_SIZE] = {};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
union aegis_block tag;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen - authsize;
scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
authsize, 0);
crypto_aegis128_crypt(req, &tag, cryptlen, &ops);
return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
}
static int crypto_aegis128_init_tfm(struct crypto_aead *tfm)
{
return 0;
}
static void crypto_aegis128_exit_tfm(struct crypto_aead *tfm)
{
}
static struct aead_alg crypto_aegis128_alg = {
.setkey = crypto_aegis128_setkey,
.setauthsize = crypto_aegis128_setauthsize,
.encrypt = crypto_aegis128_encrypt,
.decrypt = crypto_aegis128_decrypt,
.init = crypto_aegis128_init_tfm,
.exit = crypto_aegis128_exit_tfm,
.ivsize = AEGIS128_NONCE_SIZE,
.maxauthsize = AEGIS128_MAX_AUTH_SIZE,
.chunksize = AEGIS_BLOCK_SIZE,
.base = {
.cra_flags = CRYPTO_ALG_TYPE_AEAD,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aegis_ctx),
.cra_alignmask = 0,
.cra_priority = 100,
.cra_name = "aegis128",
.cra_driver_name = "aegis128-generic",
.cra_module = THIS_MODULE,
}
};
static int __init crypto_aegis128_module_init(void)
{
return crypto_register_aead(&crypto_aegis128_alg);
}
static void __exit crypto_aegis128_module_exit(void)
{
crypto_unregister_aead(&crypto_aegis128_alg);
}
module_init(crypto_aegis128_module_init);
module_exit(crypto_aegis128_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm");
MODULE_ALIAS_CRYPTO("aegis128");
MODULE_ALIAS_CRYPTO("aegis128-generic");

527
crypto/aegis128l.c Normal file
View File

@ -0,0 +1,527 @@
/*
* The AEGIS-128L Authenticated-Encryption Algorithm
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/algapi.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
#include "aegis.h"
#define AEGIS128L_CHUNK_BLOCKS 2
#define AEGIS128L_CHUNK_SIZE (AEGIS128L_CHUNK_BLOCKS * AEGIS_BLOCK_SIZE)
#define AEGIS128L_NONCE_SIZE 16
#define AEGIS128L_STATE_BLOCKS 8
#define AEGIS128L_KEY_SIZE 16
#define AEGIS128L_MIN_AUTH_SIZE 8
#define AEGIS128L_MAX_AUTH_SIZE 16
union aegis_chunk {
union aegis_block blocks[AEGIS128L_CHUNK_BLOCKS];
u8 bytes[AEGIS128L_CHUNK_SIZE];
};
struct aegis_state {
union aegis_block blocks[AEGIS128L_STATE_BLOCKS];
};
struct aegis_ctx {
union aegis_block key;
};
struct aegis128l_ops {
int (*skcipher_walk_init)(struct skcipher_walk *walk,
struct aead_request *req, bool atomic);
void (*crypt_chunk)(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size);
};
static void crypto_aegis128l_update(struct aegis_state *state)
{
union aegis_block tmp;
unsigned int i;
tmp = state->blocks[AEGIS128L_STATE_BLOCKS - 1];
for (i = AEGIS128L_STATE_BLOCKS - 1; i > 0; i--)
crypto_aegis_aesenc(&state->blocks[i], &state->blocks[i - 1],
&state->blocks[i]);
crypto_aegis_aesenc(&state->blocks[0], &tmp, &state->blocks[0]);
}
static void crypto_aegis128l_update_a(struct aegis_state *state,
const union aegis_chunk *msg)
{
crypto_aegis128l_update(state);
crypto_aegis_block_xor(&state->blocks[0], &msg->blocks[0]);
crypto_aegis_block_xor(&state->blocks[4], &msg->blocks[1]);
}
static void crypto_aegis128l_update_u(struct aegis_state *state,
const void *msg)
{
crypto_aegis128l_update(state);
crypto_xor(state->blocks[0].bytes, msg + 0 * AEGIS_BLOCK_SIZE,
AEGIS_BLOCK_SIZE);
crypto_xor(state->blocks[4].bytes, msg + 1 * AEGIS_BLOCK_SIZE,
AEGIS_BLOCK_SIZE);
}
static void crypto_aegis128l_init(struct aegis_state *state,
const union aegis_block *key,
const u8 *iv)
{
union aegis_block key_iv;
union aegis_chunk chunk;
unsigned int i;
memcpy(chunk.blocks[0].bytes, iv, AEGIS_BLOCK_SIZE);
chunk.blocks[1] = *key;
key_iv = *key;
crypto_aegis_block_xor(&key_iv, &chunk.blocks[0]);
state->blocks[0] = key_iv;
state->blocks[1] = crypto_aegis_const[1];
state->blocks[2] = crypto_aegis_const[0];
state->blocks[3] = crypto_aegis_const[1];
state->blocks[4] = key_iv;
state->blocks[5] = *key;
state->blocks[6] = *key;
state->blocks[7] = *key;
crypto_aegis_block_xor(&state->blocks[5], &crypto_aegis_const[0]);
crypto_aegis_block_xor(&state->blocks[6], &crypto_aegis_const[1]);
crypto_aegis_block_xor(&state->blocks[7], &crypto_aegis_const[0]);
for (i = 0; i < 10; i++) {
crypto_aegis128l_update_a(state, &chunk);
}
}
static void crypto_aegis128l_ad(struct aegis_state *state,
const u8 *src, unsigned int size)
{
if (AEGIS_ALIGNED(src)) {
const union aegis_chunk *src_chunk =
(const union aegis_chunk *)src;
while (size >= AEGIS128L_CHUNK_SIZE) {
crypto_aegis128l_update_a(state, src_chunk);
size -= AEGIS128L_CHUNK_SIZE;
src_chunk += 1;
}
} else {
while (size >= AEGIS128L_CHUNK_SIZE) {
crypto_aegis128l_update_u(state, src);
size -= AEGIS128L_CHUNK_SIZE;
src += AEGIS128L_CHUNK_SIZE;
}
}
}
static void crypto_aegis128l_encrypt_chunk(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size)
{
union aegis_chunk tmp;
union aegis_block *tmp0 = &tmp.blocks[0];
union aegis_block *tmp1 = &tmp.blocks[1];
if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
while (size >= AEGIS128L_CHUNK_SIZE) {
union aegis_chunk *dst_blk =
(union aegis_chunk *)dst;
const union aegis_chunk *src_blk =
(const union aegis_chunk *)src;
*tmp0 = state->blocks[2];
crypto_aegis_block_and(tmp0, &state->blocks[3]);
crypto_aegis_block_xor(tmp0, &state->blocks[6]);
crypto_aegis_block_xor(tmp0, &state->blocks[1]);
crypto_aegis_block_xor(tmp0, &src_blk->blocks[0]);
*tmp1 = state->blocks[6];
crypto_aegis_block_and(tmp1, &state->blocks[7]);
crypto_aegis_block_xor(tmp1, &state->blocks[5]);
crypto_aegis_block_xor(tmp1, &state->blocks[2]);
crypto_aegis_block_xor(tmp1, &src_blk->blocks[1]);
crypto_aegis128l_update_a(state, src_blk);
*dst_blk = tmp;
size -= AEGIS128L_CHUNK_SIZE;
src += AEGIS128L_CHUNK_SIZE;
dst += AEGIS128L_CHUNK_SIZE;
}
} else {
while (size >= AEGIS128L_CHUNK_SIZE) {
*tmp0 = state->blocks[2];
crypto_aegis_block_and(tmp0, &state->blocks[3]);
crypto_aegis_block_xor(tmp0, &state->blocks[6]);
crypto_aegis_block_xor(tmp0, &state->blocks[1]);
crypto_xor(tmp0->bytes, src + 0 * AEGIS_BLOCK_SIZE,
AEGIS_BLOCK_SIZE);
*tmp1 = state->blocks[6];
crypto_aegis_block_and(tmp1, &state->blocks[7]);
crypto_aegis_block_xor(tmp1, &state->blocks[5]);
crypto_aegis_block_xor(tmp1, &state->blocks[2]);
crypto_xor(tmp1->bytes, src + 1 * AEGIS_BLOCK_SIZE,
AEGIS_BLOCK_SIZE);
crypto_aegis128l_update_u(state, src);
memcpy(dst, tmp.bytes, AEGIS128L_CHUNK_SIZE);
size -= AEGIS128L_CHUNK_SIZE;
src += AEGIS128L_CHUNK_SIZE;
dst += AEGIS128L_CHUNK_SIZE;
}
}
if (size > 0) {
union aegis_chunk msg = {};
memcpy(msg.bytes, src, size);
*tmp0 = state->blocks[2];
crypto_aegis_block_and(tmp0, &state->blocks[3]);
crypto_aegis_block_xor(tmp0, &state->blocks[6]);
crypto_aegis_block_xor(tmp0, &state->blocks[1]);
*tmp1 = state->blocks[6];
crypto_aegis_block_and(tmp1, &state->blocks[7]);
crypto_aegis_block_xor(tmp1, &state->blocks[5]);
crypto_aegis_block_xor(tmp1, &state->blocks[2]);
crypto_aegis128l_update_a(state, &msg);
crypto_aegis_block_xor(&msg.blocks[0], tmp0);
crypto_aegis_block_xor(&msg.blocks[1], tmp1);
memcpy(dst, msg.bytes, size);
}
}
static void crypto_aegis128l_decrypt_chunk(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size)
{
union aegis_chunk tmp;
union aegis_block *tmp0 = &tmp.blocks[0];
union aegis_block *tmp1 = &tmp.blocks[1];
if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
while (size >= AEGIS128L_CHUNK_SIZE) {
union aegis_chunk *dst_blk =
(union aegis_chunk *)dst;
const union aegis_chunk *src_blk =
(const union aegis_chunk *)src;
*tmp0 = state->blocks[2];
crypto_aegis_block_and(tmp0, &state->blocks[3]);
crypto_aegis_block_xor(tmp0, &state->blocks[6]);
crypto_aegis_block_xor(tmp0, &state->blocks[1]);
crypto_aegis_block_xor(tmp0, &src_blk->blocks[0]);
*tmp1 = state->blocks[6];
crypto_aegis_block_and(tmp1, &state->blocks[7]);
crypto_aegis_block_xor(tmp1, &state->blocks[5]);
crypto_aegis_block_xor(tmp1, &state->blocks[2]);
crypto_aegis_block_xor(tmp1, &src_blk->blocks[1]);
crypto_aegis128l_update_a(state, &tmp);
*dst_blk = tmp;
size -= AEGIS128L_CHUNK_SIZE;
src += AEGIS128L_CHUNK_SIZE;
dst += AEGIS128L_CHUNK_SIZE;
}
} else {
while (size >= AEGIS128L_CHUNK_SIZE) {
*tmp0 = state->blocks[2];
crypto_aegis_block_and(tmp0, &state->blocks[3]);
crypto_aegis_block_xor(tmp0, &state->blocks[6]);
crypto_aegis_block_xor(tmp0, &state->blocks[1]);
crypto_xor(tmp0->bytes, src + 0 * AEGIS_BLOCK_SIZE,
AEGIS_BLOCK_SIZE);
*tmp1 = state->blocks[6];
crypto_aegis_block_and(tmp1, &state->blocks[7]);
crypto_aegis_block_xor(tmp1, &state->blocks[5]);
crypto_aegis_block_xor(tmp1, &state->blocks[2]);
crypto_xor(tmp1->bytes, src + 1 * AEGIS_BLOCK_SIZE,
AEGIS_BLOCK_SIZE);
crypto_aegis128l_update_a(state, &tmp);
memcpy(dst, tmp.bytes, AEGIS128L_CHUNK_SIZE);
size -= AEGIS128L_CHUNK_SIZE;
src += AEGIS128L_CHUNK_SIZE;
dst += AEGIS128L_CHUNK_SIZE;
}
}
if (size > 0) {
union aegis_chunk msg = {};
memcpy(msg.bytes, src, size);
*tmp0 = state->blocks[2];
crypto_aegis_block_and(tmp0, &state->blocks[3]);
crypto_aegis_block_xor(tmp0, &state->blocks[6]);
crypto_aegis_block_xor(tmp0, &state->blocks[1]);
crypto_aegis_block_xor(&msg.blocks[0], tmp0);
*tmp1 = state->blocks[6];
crypto_aegis_block_and(tmp1, &state->blocks[7]);
crypto_aegis_block_xor(tmp1, &state->blocks[5]);
crypto_aegis_block_xor(tmp1, &state->blocks[2]);
crypto_aegis_block_xor(&msg.blocks[1], tmp1);
memset(msg.bytes + size, 0, AEGIS128L_CHUNK_SIZE - size);
crypto_aegis128l_update_a(state, &msg);
memcpy(dst, msg.bytes, size);
}
}
static void crypto_aegis128l_process_ad(struct aegis_state *state,
struct scatterlist *sg_src,
unsigned int assoclen)
{
struct scatter_walk walk;
union aegis_chunk buf;
unsigned int pos = 0;
scatterwalk_start(&walk, sg_src);
while (assoclen != 0) {
unsigned int size = scatterwalk_clamp(&walk, assoclen);
unsigned int left = size;
void *mapped = scatterwalk_map(&walk);
const u8 *src = (const u8 *)mapped;
if (pos + size >= AEGIS128L_CHUNK_SIZE) {
if (pos > 0) {
unsigned int fill = AEGIS128L_CHUNK_SIZE - pos;
memcpy(buf.bytes + pos, src, fill);
crypto_aegis128l_update_a(state, &buf);
pos = 0;
left -= fill;
src += fill;
}
crypto_aegis128l_ad(state, src, left);
src += left & ~(AEGIS128L_CHUNK_SIZE - 1);
left &= AEGIS128L_CHUNK_SIZE - 1;
}
memcpy(buf.bytes + pos, src, left);
pos += left;
assoclen -= size;
scatterwalk_unmap(mapped);
scatterwalk_advance(&walk, size);
scatterwalk_done(&walk, 0, assoclen);
}
if (pos > 0) {
memset(buf.bytes + pos, 0, AEGIS128L_CHUNK_SIZE - pos);
crypto_aegis128l_update_a(state, &buf);
}
}
static void crypto_aegis128l_process_crypt(struct aegis_state *state,
struct aead_request *req,
const struct aegis128l_ops *ops)
{
struct skcipher_walk walk;
u8 *src, *dst;
unsigned int chunksize;
ops->skcipher_walk_init(&walk, req, false);
while (walk.nbytes) {
src = walk.src.virt.addr;
dst = walk.dst.virt.addr;
chunksize = walk.nbytes;
ops->crypt_chunk(state, dst, src, chunksize);
skcipher_walk_done(&walk, 0);
}
}
static void crypto_aegis128l_final(struct aegis_state *state,
union aegis_block *tag_xor,
u64 assoclen, u64 cryptlen)
{
u64 assocbits = assoclen * 8;
u64 cryptbits = cryptlen * 8;
union aegis_chunk tmp;
unsigned int i;
tmp.blocks[0].words64[0] = cpu_to_le64(assocbits);
tmp.blocks[0].words64[1] = cpu_to_le64(cryptbits);
crypto_aegis_block_xor(&tmp.blocks[0], &state->blocks[2]);
tmp.blocks[1] = tmp.blocks[0];
for (i = 0; i < 7; i++)
crypto_aegis128l_update_a(state, &tmp);
for (i = 0; i < 7; i++)
crypto_aegis_block_xor(tag_xor, &state->blocks[i]);
}
static int crypto_aegis128l_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct aegis_ctx *ctx = crypto_aead_ctx(aead);
if (keylen != AEGIS128L_KEY_SIZE) {
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
memcpy(ctx->key.bytes, key, AEGIS128L_KEY_SIZE);
return 0;
}
static int crypto_aegis128l_setauthsize(struct crypto_aead *tfm,
unsigned int authsize)
{
if (authsize > AEGIS128L_MAX_AUTH_SIZE)
return -EINVAL;
if (authsize < AEGIS128L_MIN_AUTH_SIZE)
return -EINVAL;
return 0;
}
static void crypto_aegis128l_crypt(struct aead_request *req,
union aegis_block *tag_xor,
unsigned int cryptlen,
const struct aegis128l_ops *ops)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_ctx *ctx = crypto_aead_ctx(tfm);
struct aegis_state state;
crypto_aegis128l_init(&state, &ctx->key, req->iv);
crypto_aegis128l_process_ad(&state, req->src, req->assoclen);
crypto_aegis128l_process_crypt(&state, req, ops);
crypto_aegis128l_final(&state, tag_xor, req->assoclen, cryptlen);
}
static int crypto_aegis128l_encrypt(struct aead_request *req)
{
static const struct aegis128l_ops ops = {
.skcipher_walk_init = skcipher_walk_aead_encrypt,
.crypt_chunk = crypto_aegis128l_encrypt_chunk,
};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
union aegis_block tag = {};
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen;
crypto_aegis128l_crypt(req, &tag, cryptlen, &ops);
scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
authsize, 1);
return 0;
}
static int crypto_aegis128l_decrypt(struct aead_request *req)
{
static const struct aegis128l_ops ops = {
.skcipher_walk_init = skcipher_walk_aead_decrypt,
.crypt_chunk = crypto_aegis128l_decrypt_chunk,
};
static const u8 zeros[AEGIS128L_MAX_AUTH_SIZE] = {};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
union aegis_block tag;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen - authsize;
scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
authsize, 0);
crypto_aegis128l_crypt(req, &tag, cryptlen, &ops);
return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
}
static int crypto_aegis128l_init_tfm(struct crypto_aead *tfm)
{
return 0;
}
static void crypto_aegis128l_exit_tfm(struct crypto_aead *tfm)
{
}
static struct aead_alg crypto_aegis128l_alg = {
.setkey = crypto_aegis128l_setkey,
.setauthsize = crypto_aegis128l_setauthsize,
.encrypt = crypto_aegis128l_encrypt,
.decrypt = crypto_aegis128l_decrypt,
.init = crypto_aegis128l_init_tfm,
.exit = crypto_aegis128l_exit_tfm,
.ivsize = AEGIS128L_NONCE_SIZE,
.maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
.chunksize = AEGIS128L_CHUNK_SIZE,
.base = {
.cra_flags = CRYPTO_ALG_TYPE_AEAD,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aegis_ctx),
.cra_alignmask = 0,
.cra_priority = 100,
.cra_name = "aegis128l",
.cra_driver_name = "aegis128l-generic",
.cra_module = THIS_MODULE,
}
};
static int __init crypto_aegis128l_module_init(void)
{
return crypto_register_aead(&crypto_aegis128l_alg);
}
static void __exit crypto_aegis128l_module_exit(void)
{
crypto_unregister_aead(&crypto_aegis128l_alg);
}
module_init(crypto_aegis128l_module_init);
module_exit(crypto_aegis128l_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("AEGIS-128L AEAD algorithm");
MODULE_ALIAS_CRYPTO("aegis128l");
MODULE_ALIAS_CRYPTO("aegis128l-generic");

478
crypto/aegis256.c Normal file
View File

@ -0,0 +1,478 @@
/*
* The AEGIS-256 Authenticated-Encryption Algorithm
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/algapi.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
#include "aegis.h"
#define AEGIS256_NONCE_SIZE 32
#define AEGIS256_STATE_BLOCKS 6
#define AEGIS256_KEY_SIZE 32
#define AEGIS256_MIN_AUTH_SIZE 8
#define AEGIS256_MAX_AUTH_SIZE 16
struct aegis_state {
union aegis_block blocks[AEGIS256_STATE_BLOCKS];
};
struct aegis_ctx {
union aegis_block key[AEGIS256_KEY_SIZE / AEGIS_BLOCK_SIZE];
};
struct aegis256_ops {
int (*skcipher_walk_init)(struct skcipher_walk *walk,
struct aead_request *req, bool atomic);
void (*crypt_chunk)(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size);
};
static void crypto_aegis256_update(struct aegis_state *state)
{
union aegis_block tmp;
unsigned int i;
tmp = state->blocks[AEGIS256_STATE_BLOCKS - 1];
for (i = AEGIS256_STATE_BLOCKS - 1; i > 0; i--)
crypto_aegis_aesenc(&state->blocks[i], &state->blocks[i - 1],
&state->blocks[i]);
crypto_aegis_aesenc(&state->blocks[0], &tmp, &state->blocks[0]);
}
static void crypto_aegis256_update_a(struct aegis_state *state,
const union aegis_block *msg)
{
crypto_aegis256_update(state);
crypto_aegis_block_xor(&state->blocks[0], msg);
}
static void crypto_aegis256_update_u(struct aegis_state *state, const void *msg)
{
crypto_aegis256_update(state);
crypto_xor(state->blocks[0].bytes, msg, AEGIS_BLOCK_SIZE);
}
static void crypto_aegis256_init(struct aegis_state *state,
const union aegis_block *key,
const u8 *iv)
{
union aegis_block key_iv[2];
unsigned int i;
key_iv[0] = key[0];
key_iv[1] = key[1];
crypto_xor(key_iv[0].bytes, iv + 0 * AEGIS_BLOCK_SIZE,
AEGIS_BLOCK_SIZE);
crypto_xor(key_iv[1].bytes, iv + 1 * AEGIS_BLOCK_SIZE,
AEGIS_BLOCK_SIZE);
state->blocks[0] = key_iv[0];
state->blocks[1] = key_iv[1];
state->blocks[2] = crypto_aegis_const[1];
state->blocks[3] = crypto_aegis_const[0];
state->blocks[4] = key[0];
state->blocks[5] = key[1];
crypto_aegis_block_xor(&state->blocks[4], &crypto_aegis_const[0]);
crypto_aegis_block_xor(&state->blocks[5], &crypto_aegis_const[1]);
for (i = 0; i < 4; i++) {
crypto_aegis256_update_a(state, &key[0]);
crypto_aegis256_update_a(state, &key[1]);
crypto_aegis256_update_a(state, &key_iv[0]);
crypto_aegis256_update_a(state, &key_iv[1]);
}
}
static void crypto_aegis256_ad(struct aegis_state *state,
const u8 *src, unsigned int size)
{
if (AEGIS_ALIGNED(src)) {
const union aegis_block *src_blk =
(const union aegis_block *)src;
while (size >= AEGIS_BLOCK_SIZE) {
crypto_aegis256_update_a(state, src_blk);
size -= AEGIS_BLOCK_SIZE;
src_blk++;
}
} else {
while (size >= AEGIS_BLOCK_SIZE) {
crypto_aegis256_update_u(state, src);
size -= AEGIS_BLOCK_SIZE;
src += AEGIS_BLOCK_SIZE;
}
}
}
static void crypto_aegis256_encrypt_chunk(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size)
{
union aegis_block tmp;
if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
while (size >= AEGIS_BLOCK_SIZE) {
union aegis_block *dst_blk =
(union aegis_block *)dst;
const union aegis_block *src_blk =
(const union aegis_block *)src;
tmp = state->blocks[2];
crypto_aegis_block_and(&tmp, &state->blocks[3]);
crypto_aegis_block_xor(&tmp, &state->blocks[5]);
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
crypto_aegis_block_xor(&tmp, src_blk);
crypto_aegis256_update_a(state, src_blk);
*dst_blk = tmp;
size -= AEGIS_BLOCK_SIZE;
src += AEGIS_BLOCK_SIZE;
dst += AEGIS_BLOCK_SIZE;
}
} else {
while (size >= AEGIS_BLOCK_SIZE) {
tmp = state->blocks[2];
crypto_aegis_block_and(&tmp, &state->blocks[3]);
crypto_aegis_block_xor(&tmp, &state->blocks[5]);
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE);
crypto_aegis256_update_u(state, src);
memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE);
size -= AEGIS_BLOCK_SIZE;
src += AEGIS_BLOCK_SIZE;
dst += AEGIS_BLOCK_SIZE;
}
}
if (size > 0) {
union aegis_block msg = {};
memcpy(msg.bytes, src, size);
tmp = state->blocks[2];
crypto_aegis_block_and(&tmp, &state->blocks[3]);
crypto_aegis_block_xor(&tmp, &state->blocks[5]);
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
crypto_aegis256_update_a(state, &msg);
crypto_aegis_block_xor(&msg, &tmp);
memcpy(dst, msg.bytes, size);
}
}
static void crypto_aegis256_decrypt_chunk(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size)
{
union aegis_block tmp;
if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
while (size >= AEGIS_BLOCK_SIZE) {
union aegis_block *dst_blk =
(union aegis_block *)dst;
const union aegis_block *src_blk =
(const union aegis_block *)src;
tmp = state->blocks[2];
crypto_aegis_block_and(&tmp, &state->blocks[3]);
crypto_aegis_block_xor(&tmp, &state->blocks[5]);
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
crypto_aegis_block_xor(&tmp, src_blk);
crypto_aegis256_update_a(state, &tmp);
*dst_blk = tmp;
size -= AEGIS_BLOCK_SIZE;
src += AEGIS_BLOCK_SIZE;
dst += AEGIS_BLOCK_SIZE;
}
} else {
while (size >= AEGIS_BLOCK_SIZE) {
tmp = state->blocks[2];
crypto_aegis_block_and(&tmp, &state->blocks[3]);
crypto_aegis_block_xor(&tmp, &state->blocks[5]);
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE);
crypto_aegis256_update_a(state, &tmp);
memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE);
size -= AEGIS_BLOCK_SIZE;
src += AEGIS_BLOCK_SIZE;
dst += AEGIS_BLOCK_SIZE;
}
}
if (size > 0) {
union aegis_block msg = {};
memcpy(msg.bytes, src, size);
tmp = state->blocks[2];
crypto_aegis_block_and(&tmp, &state->blocks[3]);
crypto_aegis_block_xor(&tmp, &state->blocks[5]);
crypto_aegis_block_xor(&tmp, &state->blocks[4]);
crypto_aegis_block_xor(&tmp, &state->blocks[1]);
crypto_aegis_block_xor(&msg, &tmp);
memset(msg.bytes + size, 0, AEGIS_BLOCK_SIZE - size);
crypto_aegis256_update_a(state, &msg);
memcpy(dst, msg.bytes, size);
}
}
static void crypto_aegis256_process_ad(struct aegis_state *state,
struct scatterlist *sg_src,
unsigned int assoclen)
{
struct scatter_walk walk;
union aegis_block buf;
unsigned int pos = 0;
scatterwalk_start(&walk, sg_src);
while (assoclen != 0) {
unsigned int size = scatterwalk_clamp(&walk, assoclen);
unsigned int left = size;
void *mapped = scatterwalk_map(&walk);
const u8 *src = (const u8 *)mapped;
if (pos + size >= AEGIS_BLOCK_SIZE) {
if (pos > 0) {
unsigned int fill = AEGIS_BLOCK_SIZE - pos;
memcpy(buf.bytes + pos, src, fill);
crypto_aegis256_update_a(state, &buf);
pos = 0;
left -= fill;
src += fill;
}
crypto_aegis256_ad(state, src, left);
src += left & ~(AEGIS_BLOCK_SIZE - 1);
left &= AEGIS_BLOCK_SIZE - 1;
}
memcpy(buf.bytes + pos, src, left);
pos += left;
assoclen -= size;
scatterwalk_unmap(mapped);
scatterwalk_advance(&walk, size);
scatterwalk_done(&walk, 0, assoclen);
}
if (pos > 0) {
memset(buf.bytes + pos, 0, AEGIS_BLOCK_SIZE - pos);
crypto_aegis256_update_a(state, &buf);
}
}
static void crypto_aegis256_process_crypt(struct aegis_state *state,
struct aead_request *req,
const struct aegis256_ops *ops)
{
struct skcipher_walk walk;
u8 *src, *dst;
unsigned int chunksize;
ops->skcipher_walk_init(&walk, req, false);
while (walk.nbytes) {
src = walk.src.virt.addr;
dst = walk.dst.virt.addr;
chunksize = walk.nbytes;
ops->crypt_chunk(state, dst, src, chunksize);
skcipher_walk_done(&walk, 0);
}
}
static void crypto_aegis256_final(struct aegis_state *state,
union aegis_block *tag_xor,
u64 assoclen, u64 cryptlen)
{
u64 assocbits = assoclen * 8;
u64 cryptbits = cryptlen * 8;
union aegis_block tmp;
unsigned int i;
tmp.words64[0] = cpu_to_le64(assocbits);
tmp.words64[1] = cpu_to_le64(cryptbits);
crypto_aegis_block_xor(&tmp, &state->blocks[3]);
for (i = 0; i < 7; i++)
crypto_aegis256_update_a(state, &tmp);
for (i = 0; i < AEGIS256_STATE_BLOCKS; i++)
crypto_aegis_block_xor(tag_xor, &state->blocks[i]);
}
static int crypto_aegis256_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct aegis_ctx *ctx = crypto_aead_ctx(aead);
if (keylen != AEGIS256_KEY_SIZE) {
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
memcpy(ctx->key[0].bytes, key, AEGIS_BLOCK_SIZE);
memcpy(ctx->key[1].bytes, key + AEGIS_BLOCK_SIZE,
AEGIS_BLOCK_SIZE);
return 0;
}
static int crypto_aegis256_setauthsize(struct crypto_aead *tfm,
unsigned int authsize)
{
if (authsize > AEGIS256_MAX_AUTH_SIZE)
return -EINVAL;
if (authsize < AEGIS256_MIN_AUTH_SIZE)
return -EINVAL;
return 0;
}
static void crypto_aegis256_crypt(struct aead_request *req,
union aegis_block *tag_xor,
unsigned int cryptlen,
const struct aegis256_ops *ops)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_ctx *ctx = crypto_aead_ctx(tfm);
struct aegis_state state;
crypto_aegis256_init(&state, ctx->key, req->iv);
crypto_aegis256_process_ad(&state, req->src, req->assoclen);
crypto_aegis256_process_crypt(&state, req, ops);
crypto_aegis256_final(&state, tag_xor, req->assoclen, cryptlen);
}
static int crypto_aegis256_encrypt(struct aead_request *req)
{
static const struct aegis256_ops ops = {
.skcipher_walk_init = skcipher_walk_aead_encrypt,
.crypt_chunk = crypto_aegis256_encrypt_chunk,
};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
union aegis_block tag = {};
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen;
crypto_aegis256_crypt(req, &tag, cryptlen, &ops);
scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
authsize, 1);
return 0;
}
static int crypto_aegis256_decrypt(struct aead_request *req)
{
static const struct aegis256_ops ops = {
.skcipher_walk_init = skcipher_walk_aead_decrypt,
.crypt_chunk = crypto_aegis256_decrypt_chunk,
};
static const u8 zeros[AEGIS256_MAX_AUTH_SIZE] = {};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
union aegis_block tag;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen - authsize;
scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
authsize, 0);
crypto_aegis256_crypt(req, &tag, cryptlen, &ops);
return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
}
static int crypto_aegis256_init_tfm(struct crypto_aead *tfm)
{
return 0;
}
static void crypto_aegis256_exit_tfm(struct crypto_aead *tfm)
{
}
static struct aead_alg crypto_aegis256_alg = {
.setkey = crypto_aegis256_setkey,
.setauthsize = crypto_aegis256_setauthsize,
.encrypt = crypto_aegis256_encrypt,
.decrypt = crypto_aegis256_decrypt,
.init = crypto_aegis256_init_tfm,
.exit = crypto_aegis256_exit_tfm,
.ivsize = AEGIS256_NONCE_SIZE,
.maxauthsize = AEGIS256_MAX_AUTH_SIZE,
.chunksize = AEGIS_BLOCK_SIZE,
.base = {
.cra_flags = CRYPTO_ALG_TYPE_AEAD,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aegis_ctx),
.cra_alignmask = 0,
.cra_priority = 100,
.cra_name = "aegis256",
.cra_driver_name = "aegis256-generic",
.cra_module = THIS_MODULE,
}
};
static int __init crypto_aegis256_module_init(void)
{
return crypto_register_aead(&crypto_aegis256_alg);
}
static void __exit crypto_aegis256_module_exit(void)
{
crypto_unregister_aead(&crypto_aegis256_alg);
}
module_init(crypto_aegis256_module_init);
module_exit(crypto_aegis256_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("AEGIS-256 AEAD algorithm");
MODULE_ALIAS_CRYPTO("aegis256");
MODULE_ALIAS_CRYPTO("aegis256-generic");

View File

@ -10,6 +10,7 @@
*
*/
#include <crypto/algapi.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/fips.h>
@ -59,6 +60,15 @@ static int crypto_check_alg(struct crypto_alg *alg)
if (alg->cra_blocksize > PAGE_SIZE / 8)
return -EINVAL;
if (!alg->cra_type && (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
CRYPTO_ALG_TYPE_CIPHER) {
if (alg->cra_alignmask > MAX_CIPHER_ALIGNMASK)
return -EINVAL;
if (alg->cra_blocksize > MAX_CIPHER_BLOCKSIZE)
return -EINVAL;
}
if (alg->cra_priority < 0)
return -EINVAL;

View File

@ -108,6 +108,7 @@ static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
CRYPTO_TFM_RES_MASK);
out:
memzero_explicit(&keys, sizeof(keys));
return err;
badkey:

View File

@ -90,6 +90,7 @@ static int crypto_authenc_esn_setkey(struct crypto_aead *authenc_esn, const u8 *
CRYPTO_TFM_RES_MASK);
out:
memzero_explicit(&keys, sizeof(keys));
return err;
badkey:

View File

@ -53,9 +53,8 @@ static void crypto_cfb_encrypt_one(struct crypto_skcipher *tfm,
static void crypto_cfb_final(struct skcipher_walk *walk,
struct crypto_skcipher *tfm)
{
const unsigned int bsize = crypto_cfb_bsize(tfm);
const unsigned long alignmask = crypto_skcipher_alignmask(tfm);
u8 tmp[bsize + alignmask];
u8 tmp[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
u8 *stream = PTR_ALIGN(tmp + 0, alignmask + 1);
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
@ -94,7 +93,7 @@ static int crypto_cfb_encrypt_inplace(struct skcipher_walk *walk,
unsigned int nbytes = walk->nbytes;
u8 *src = walk->src.virt.addr;
u8 *iv = walk->iv;
u8 tmp[bsize];
u8 tmp[MAX_CIPHER_BLOCKSIZE];
do {
crypto_cfb_encrypt_one(tfm, iv, tmp);
@ -164,7 +163,7 @@ static int crypto_cfb_decrypt_inplace(struct skcipher_walk *walk,
unsigned int nbytes = walk->nbytes;
u8 *src = walk->src.virt.addr;
u8 *iv = walk->iv;
u8 tmp[bsize];
u8 tmp[MAX_CIPHER_BLOCKSIZE];
do {
crypto_cfb_encrypt_one(tfm, iv, tmp);

View File

@ -13,6 +13,7 @@
*
*/
#include <crypto/algapi.h>
#include <linux/kernel.h>
#include <linux/crypto.h>
#include <linux/errno.h>
@ -67,7 +68,7 @@ static void cipher_crypt_unaligned(void (*fn)(struct crypto_tfm *, u8 *,
{
unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
unsigned int size = crypto_tfm_alg_blocksize(tfm);
u8 buffer[size + alignmask];
u8 buffer[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
u8 *tmp = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
memcpy(tmp, src, size);

View File

@ -29,6 +29,7 @@
* This is crypto api shash wrappers to crc32_le.
*/
#include <asm/unaligned.h>
#include <linux/crc32.h>
#include <crypto/internal/hash.h>
#include <linux/init.h>
@ -39,11 +40,6 @@
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
static u32 __crc32_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le(crc, p, len);
}
/** No default init with ~0 */
static int crc32_cra_init(struct crypto_tfm *tfm)
{
@ -54,7 +50,6 @@ static int crc32_cra_init(struct crypto_tfm *tfm)
return 0;
}
/*
* Setting the seed allows arbitrary accumulators and flexible XOR policy
* If your algorithm starts with ~0, then XOR with ~0 before you set
@ -69,7 +64,7 @@ static int crc32_setkey(struct crypto_shash *hash, const u8 *key,
crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
*mctx = le32_to_cpup((__le32 *)key);
*mctx = get_unaligned_le32(key);
return 0;
}
@ -88,7 +83,7 @@ static int crc32_update(struct shash_desc *desc, const u8 *data,
{
u32 *crcp = shash_desc_ctx(desc);
*crcp = __crc32_le(*crcp, data, len);
*crcp = crc32_le(*crcp, data, len);
return 0;
}
@ -96,7 +91,7 @@ static int crc32_update(struct shash_desc *desc, const u8 *data,
static int __crc32_finup(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
*(__le32 *)out = cpu_to_le32(__crc32_le(*crcp, data, len));
put_unaligned_le32(crc32_le(*crcp, data, len), out);
return 0;
}
@ -110,7 +105,7 @@ static int crc32_final(struct shash_desc *desc, u8 *out)
{
u32 *crcp = shash_desc_ctx(desc);
*(__le32 *)out = cpu_to_le32p(crcp);
put_unaligned_le32(*crcp, out);
return 0;
}

View File

@ -35,6 +35,7 @@
*
*/
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
@ -82,7 +83,7 @@ static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
mctx->key = le32_to_cpu(*(__le32 *)key);
mctx->key = get_unaligned_le32(key);
return 0;
}
@ -99,13 +100,13 @@ static int chksum_final(struct shash_desc *desc, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
*(__le32 *)out = ~cpu_to_le32p(&ctx->crc);
put_unaligned_le32(~ctx->crc, out);
return 0;
}
static int __chksum_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out)
{
*(__le32 *)out = ~cpu_to_le32(__crc32c_le(*crcp, data, len));
put_unaligned_le32(~__crc32c_le(*crcp, data, len), out);
return 0;
}
@ -148,7 +149,6 @@ static struct shash_alg alg = {
.cra_priority = 100,
.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_alignmask = 3,
.cra_ctxsize = sizeof(struct chksum_ctx),
.cra_module = THIS_MODULE,
.cra_init = crc32c_cra_init,

View File

@ -58,7 +58,7 @@ static void crypto_ctr_crypt_final(struct blkcipher_walk *walk,
unsigned int bsize = crypto_cipher_blocksize(tfm);
unsigned long alignmask = crypto_cipher_alignmask(tfm);
u8 *ctrblk = walk->iv;
u8 tmp[bsize + alignmask];
u8 tmp[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1);
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
@ -106,7 +106,7 @@ static int crypto_ctr_crypt_inplace(struct blkcipher_walk *walk,
unsigned int nbytes = walk->nbytes;
u8 *ctrblk = walk->iv;
u8 *src = walk->src.virt.addr;
u8 tmp[bsize + alignmask];
u8 tmp[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1);
do {

View File

@ -40,6 +40,7 @@
* rfc3962 includes errata information in its Appendix A.
*/
#include <crypto/algapi.h>
#include <crypto/internal/skcipher.h>
#include <linux/err.h>
#include <linux/init.h>
@ -104,7 +105,7 @@ static int cts_cbc_encrypt(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct skcipher_request *subreq = &rctx->subreq;
int bsize = crypto_skcipher_blocksize(tfm);
u8 d[bsize * 2] __aligned(__alignof__(u32));
u8 d[MAX_CIPHER_BLOCKSIZE * 2] __aligned(__alignof__(u32));
struct scatterlist *sg;
unsigned int offset;
int lastn;
@ -183,7 +184,7 @@ static int cts_cbc_decrypt(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct skcipher_request *subreq = &rctx->subreq;
int bsize = crypto_skcipher_blocksize(tfm);
u8 d[bsize * 2] __aligned(__alignof__(u32));
u8 d[MAX_CIPHER_BLOCKSIZE * 2] __aligned(__alignof__(u32));
struct scatterlist *sg;
unsigned int offset;
u8 *space;

View File

@ -515,7 +515,7 @@ static void vli_mmod_fast_256(u64 *result, const u64 *product,
static bool vli_mmod_fast(u64 *result, u64 *product,
const u64 *curve_prime, unsigned int ndigits)
{
u64 tmp[2 * ndigits];
u64 tmp[2 * ECC_MAX_DIGITS];
switch (ndigits) {
case 3:
@ -536,7 +536,7 @@ static bool vli_mmod_fast(u64 *result, u64 *product,
static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right,
const u64 *curve_prime, unsigned int ndigits)
{
u64 product[2 * ndigits];
u64 product[2 * ECC_MAX_DIGITS];
vli_mult(product, left, right, ndigits);
vli_mmod_fast(result, product, curve_prime, ndigits);
@ -546,7 +546,7 @@ static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right,
static void vli_mod_square_fast(u64 *result, const u64 *left,
const u64 *curve_prime, unsigned int ndigits)
{
u64 product[2 * ndigits];
u64 product[2 * ECC_MAX_DIGITS];
vli_square(product, left, ndigits);
vli_mmod_fast(result, product, curve_prime, ndigits);
@ -560,8 +560,8 @@ static void vli_mod_square_fast(u64 *result, const u64 *left,
static void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod,
unsigned int ndigits)
{
u64 a[ndigits], b[ndigits];
u64 u[ndigits], v[ndigits];
u64 a[ECC_MAX_DIGITS], b[ECC_MAX_DIGITS];
u64 u[ECC_MAX_DIGITS], v[ECC_MAX_DIGITS];
u64 carry;
int cmp_result;
@ -649,8 +649,8 @@ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1,
u64 *curve_prime, unsigned int ndigits)
{
/* t1 = x, t2 = y, t3 = z */
u64 t4[ndigits];
u64 t5[ndigits];
u64 t4[ECC_MAX_DIGITS];
u64 t5[ECC_MAX_DIGITS];
if (vli_is_zero(z1, ndigits))
return;
@ -711,7 +711,7 @@ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1,
static void apply_z(u64 *x1, u64 *y1, u64 *z, u64 *curve_prime,
unsigned int ndigits)
{
u64 t1[ndigits];
u64 t1[ECC_MAX_DIGITS];
vli_mod_square_fast(t1, z, curve_prime, ndigits); /* z^2 */
vli_mod_mult_fast(x1, x1, t1, curve_prime, ndigits); /* x1 * z^2 */
@ -724,7 +724,7 @@ static void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
u64 *p_initial_z, u64 *curve_prime,
unsigned int ndigits)
{
u64 z[ndigits];
u64 z[ECC_MAX_DIGITS];
vli_set(x2, x1, ndigits);
vli_set(y2, y1, ndigits);
@ -750,7 +750,7 @@ static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime,
unsigned int ndigits)
{
/* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
u64 t5[ndigits];
u64 t5[ECC_MAX_DIGITS];
/* t5 = x2 - x1 */
vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
@ -791,9 +791,9 @@ static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime,
unsigned int ndigits)
{
/* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
u64 t5[ndigits];
u64 t6[ndigits];
u64 t7[ndigits];
u64 t5[ECC_MAX_DIGITS];
u64 t6[ECC_MAX_DIGITS];
u64 t7[ECC_MAX_DIGITS];
/* t5 = x2 - x1 */
vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
@ -846,9 +846,9 @@ static void ecc_point_mult(struct ecc_point *result,
unsigned int ndigits)
{
/* R0 and R1 */
u64 rx[2][ndigits];
u64 ry[2][ndigits];
u64 z[ndigits];
u64 rx[2][ECC_MAX_DIGITS];
u64 ry[2][ECC_MAX_DIGITS];
u64 z[ECC_MAX_DIGITS];
int i, nb;
int num_bits = vli_num_bits(scalar, ndigits);
@ -943,13 +943,13 @@ int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey)
{
const struct ecc_curve *curve = ecc_get_curve(curve_id);
u64 priv[ndigits];
u64 priv[ECC_MAX_DIGITS];
unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
unsigned int nbits = vli_num_bits(curve->n, ndigits);
int err;
/* Check that N is included in Table 1 of FIPS 186-4, section 6.1.1 */
if (nbits < 160)
if (nbits < 160 || ndigits > ARRAY_SIZE(priv))
return -EINVAL;
/*
@ -988,10 +988,10 @@ int ecc_make_pub_key(unsigned int curve_id, unsigned int ndigits,
{
int ret = 0;
struct ecc_point *pk;
u64 priv[ndigits];
u64 priv[ECC_MAX_DIGITS];
const struct ecc_curve *curve = ecc_get_curve(curve_id);
if (!private_key || !curve) {
if (!private_key || !curve || ndigits > ARRAY_SIZE(priv)) {
ret = -EINVAL;
goto out;
}
@ -1025,30 +1025,25 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
{
int ret = 0;
struct ecc_point *product, *pk;
u64 *priv, *rand_z;
u64 priv[ECC_MAX_DIGITS];
u64 rand_z[ECC_MAX_DIGITS];
unsigned int nbytes;
const struct ecc_curve *curve = ecc_get_curve(curve_id);
if (!private_key || !public_key || !curve) {
if (!private_key || !public_key || !curve ||
ndigits > ARRAY_SIZE(priv) || ndigits > ARRAY_SIZE(rand_z)) {
ret = -EINVAL;
goto out;
}
priv = kmalloc_array(ndigits, sizeof(*priv), GFP_KERNEL);
if (!priv) {
ret = -ENOMEM;
goto out;
}
nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
rand_z = kmalloc_array(ndigits, sizeof(*rand_z), GFP_KERNEL);
if (!rand_z) {
ret = -ENOMEM;
goto kfree_out;
}
get_random_bytes(rand_z, nbytes);
pk = ecc_alloc_point(ndigits);
if (!pk) {
ret = -ENOMEM;
goto kfree_out;
goto out;
}
product = ecc_alloc_point(ndigits);
@ -1057,8 +1052,6 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
goto err_alloc_product;
}
get_random_bytes(rand_z, ndigits << ECC_DIGITS_TO_BYTES_SHIFT);
ecc_swap_digits(public_key, pk->x, ndigits);
ecc_swap_digits(&public_key[ndigits], pk->y, ndigits);
ecc_swap_digits(private_key, priv, ndigits);
@ -1073,9 +1066,6 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
ecc_free_point(product);
err_alloc_product:
ecc_free_point(pk);
kfree_out:
kzfree(priv);
kzfree(rand_z);
out:
return ret;
}

View File

@ -26,7 +26,9 @@
#ifndef _CRYPTO_ECC_H
#define _CRYPTO_ECC_H
#define ECC_MAX_DIGITS 4 /* 256 */
#define ECC_CURVE_NIST_P192_DIGITS 3
#define ECC_CURVE_NIST_P256_DIGITS 4
#define ECC_MAX_DIGITS ECC_CURVE_NIST_P256_DIGITS
#define ECC_DIGITS_TO_BYTES_SHIFT 3

View File

@ -30,8 +30,8 @@ static inline struct ecdh_ctx *ecdh_get_ctx(struct crypto_kpp *tfm)
static unsigned int ecdh_supported_curve(unsigned int curve_id)
{
switch (curve_id) {
case ECC_CURVE_NIST_P192: return 3;
case ECC_CURVE_NIST_P256: return 4;
case ECC_CURVE_NIST_P192: return ECC_CURVE_NIST_P192_DIGITS;
case ECC_CURVE_NIST_P256: return ECC_CURVE_NIST_P256_DIGITS;
default: return 0;
}
}

549
crypto/morus1280.c Normal file
View File

@ -0,0 +1,549 @@
/*
* The MORUS-1280 Authenticated-Encryption Algorithm
*
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <asm/unaligned.h>
#include <crypto/algapi.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/morus_common.h>
#include <crypto/scatterwalk.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
#define MORUS1280_WORD_SIZE 8
#define MORUS1280_BLOCK_SIZE (MORUS_BLOCK_WORDS * MORUS1280_WORD_SIZE)
#define MORUS1280_BLOCK_ALIGN (__alignof__(__le64))
#define MORUS1280_ALIGNED(p) IS_ALIGNED((uintptr_t)p, MORUS1280_BLOCK_ALIGN)
struct morus1280_block {
u64 words[MORUS_BLOCK_WORDS];
};
union morus1280_block_in {
__le64 words[MORUS_BLOCK_WORDS];
u8 bytes[MORUS1280_BLOCK_SIZE];
};
struct morus1280_state {
struct morus1280_block s[MORUS_STATE_BLOCKS];
};
struct morus1280_ctx {
struct morus1280_block key;
};
struct morus1280_ops {
int (*skcipher_walk_init)(struct skcipher_walk *walk,
struct aead_request *req, bool atomic);
void (*crypt_chunk)(struct morus1280_state *state,
u8 *dst, const u8 *src, unsigned int size);
};
static const struct morus1280_block crypto_morus1280_const[1] = {
{ .words = {
U64_C(0x0d08050302010100),
U64_C(0x6279e99059372215),
U64_C(0xf12fc26d55183ddb),
U64_C(0xdd28b57342311120),
} },
};
static void crypto_morus1280_round(struct morus1280_block *b0,
struct morus1280_block *b1,
struct morus1280_block *b2,
struct morus1280_block *b3,
struct morus1280_block *b4,
const struct morus1280_block *m,
unsigned int b, unsigned int w)
{
unsigned int i;
struct morus1280_block tmp;
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
b0->words[i] ^= b1->words[i] & b2->words[i];
b0->words[i] ^= b3->words[i];
b0->words[i] ^= m->words[i];
b0->words[i] = rol64(b0->words[i], b);
}
tmp = *b3;
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
b3->words[(i + w) % MORUS_BLOCK_WORDS] = tmp.words[i];
}
static void crypto_morus1280_update(struct morus1280_state *state,
const struct morus1280_block *m)
{
static const struct morus1280_block z = {};
struct morus1280_block *s = state->s;
crypto_morus1280_round(&s[0], &s[1], &s[2], &s[3], &s[4], &z, 13, 1);
crypto_morus1280_round(&s[1], &s[2], &s[3], &s[4], &s[0], m, 46, 2);
crypto_morus1280_round(&s[2], &s[3], &s[4], &s[0], &s[1], m, 38, 3);
crypto_morus1280_round(&s[3], &s[4], &s[0], &s[1], &s[2], m, 7, 2);
crypto_morus1280_round(&s[4], &s[0], &s[1], &s[2], &s[3], m, 4, 1);
}
static void crypto_morus1280_load_a(struct morus1280_block *dst, const u8 *src)
{
unsigned int i;
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
dst->words[i] = le64_to_cpu(*(const __le64 *)src);
src += MORUS1280_WORD_SIZE;
}
}
static void crypto_morus1280_load_u(struct morus1280_block *dst, const u8 *src)
{
unsigned int i;
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
dst->words[i] = get_unaligned_le64(src);
src += MORUS1280_WORD_SIZE;
}
}
static void crypto_morus1280_load(struct morus1280_block *dst, const u8 *src)
{
if (MORUS1280_ALIGNED(src))
crypto_morus1280_load_a(dst, src);
else
crypto_morus1280_load_u(dst, src);
}
static void crypto_morus1280_store_a(u8 *dst, const struct morus1280_block *src)
{
unsigned int i;
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
*(__le64 *)dst = cpu_to_le64(src->words[i]);
dst += MORUS1280_WORD_SIZE;
}
}
static void crypto_morus1280_store_u(u8 *dst, const struct morus1280_block *src)
{
unsigned int i;
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
put_unaligned_le64(src->words[i], dst);
dst += MORUS1280_WORD_SIZE;
}
}
static void crypto_morus1280_store(u8 *dst, const struct morus1280_block *src)
{
if (MORUS1280_ALIGNED(dst))
crypto_morus1280_store_a(dst, src);
else
crypto_morus1280_store_u(dst, src);
}
static void crypto_morus1280_ad(struct morus1280_state *state, const u8 *src,
unsigned int size)
{
struct morus1280_block m;
if (MORUS1280_ALIGNED(src)) {
while (size >= MORUS1280_BLOCK_SIZE) {
crypto_morus1280_load_a(&m, src);
crypto_morus1280_update(state, &m);
size -= MORUS1280_BLOCK_SIZE;
src += MORUS1280_BLOCK_SIZE;
}
} else {
while (size >= MORUS1280_BLOCK_SIZE) {
crypto_morus1280_load_u(&m, src);
crypto_morus1280_update(state, &m);
size -= MORUS1280_BLOCK_SIZE;
src += MORUS1280_BLOCK_SIZE;
}
}
}
static void crypto_morus1280_core(const struct morus1280_state *state,
struct morus1280_block *blk)
{
unsigned int i;
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
blk->words[(i + 3) % MORUS_BLOCK_WORDS] ^= state->s[1].words[i];
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
blk->words[i] ^= state->s[0].words[i];
blk->words[i] ^= state->s[2].words[i] & state->s[3].words[i];
}
}
static void crypto_morus1280_encrypt_chunk(struct morus1280_state *state,
u8 *dst, const u8 *src,
unsigned int size)
{
struct morus1280_block c, m;
if (MORUS1280_ALIGNED(src) && MORUS1280_ALIGNED(dst)) {
while (size >= MORUS1280_BLOCK_SIZE) {
crypto_morus1280_load_a(&m, src);
c = m;
crypto_morus1280_core(state, &c);
crypto_morus1280_store_a(dst, &c);
crypto_morus1280_update(state, &m);
src += MORUS1280_BLOCK_SIZE;
dst += MORUS1280_BLOCK_SIZE;
size -= MORUS1280_BLOCK_SIZE;
}
} else {
while (size >= MORUS1280_BLOCK_SIZE) {
crypto_morus1280_load_u(&m, src);
c = m;
crypto_morus1280_core(state, &c);
crypto_morus1280_store_u(dst, &c);
crypto_morus1280_update(state, &m);
src += MORUS1280_BLOCK_SIZE;
dst += MORUS1280_BLOCK_SIZE;
size -= MORUS1280_BLOCK_SIZE;
}
}
if (size > 0) {
union morus1280_block_in tail;
memcpy(tail.bytes, src, size);
memset(tail.bytes + size, 0, MORUS1280_BLOCK_SIZE - size);
crypto_morus1280_load_a(&m, tail.bytes);
c = m;
crypto_morus1280_core(state, &c);
crypto_morus1280_store_a(tail.bytes, &c);
crypto_morus1280_update(state, &m);
memcpy(dst, tail.bytes, size);
}
}
static void crypto_morus1280_decrypt_chunk(struct morus1280_state *state,
u8 *dst, const u8 *src,
unsigned int size)
{
struct morus1280_block m;
if (MORUS1280_ALIGNED(src) && MORUS1280_ALIGNED(dst)) {
while (size >= MORUS1280_BLOCK_SIZE) {
crypto_morus1280_load_a(&m, src);
crypto_morus1280_core(state, &m);
crypto_morus1280_store_a(dst, &m);
crypto_morus1280_update(state, &m);
src += MORUS1280_BLOCK_SIZE;
dst += MORUS1280_BLOCK_SIZE;
size -= MORUS1280_BLOCK_SIZE;
}
} else {
while (size >= MORUS1280_BLOCK_SIZE) {
crypto_morus1280_load_u(&m, src);
crypto_morus1280_core(state, &m);
crypto_morus1280_store_u(dst, &m);
crypto_morus1280_update(state, &m);
src += MORUS1280_BLOCK_SIZE;
dst += MORUS1280_BLOCK_SIZE;
size -= MORUS1280_BLOCK_SIZE;
}
}
if (size > 0) {
union morus1280_block_in tail;
memcpy(tail.bytes, src, size);
memset(tail.bytes + size, 0, MORUS1280_BLOCK_SIZE - size);
crypto_morus1280_load_a(&m, tail.bytes);
crypto_morus1280_core(state, &m);
crypto_morus1280_store_a(tail.bytes, &m);
memset(tail.bytes + size, 0, MORUS1280_BLOCK_SIZE - size);
crypto_morus1280_load_a(&m, tail.bytes);
crypto_morus1280_update(state, &m);
memcpy(dst, tail.bytes, size);
}
}
static void crypto_morus1280_init(struct morus1280_state *state,
const struct morus1280_block *key,
const u8 *iv)
{
static const struct morus1280_block z = {};
union morus1280_block_in tmp;
unsigned int i;
memcpy(tmp.bytes, iv, MORUS_NONCE_SIZE);
memset(tmp.bytes + MORUS_NONCE_SIZE, 0,
MORUS1280_BLOCK_SIZE - MORUS_NONCE_SIZE);
crypto_morus1280_load(&state->s[0], tmp.bytes);
state->s[1] = *key;
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
state->s[2].words[i] = U64_C(0xFFFFFFFFFFFFFFFF);
state->s[3] = z;
state->s[4] = crypto_morus1280_const[0];
for (i = 0; i < 16; i++)
crypto_morus1280_update(state, &z);
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
state->s[1].words[i] ^= key->words[i];
}
static void crypto_morus1280_process_ad(struct morus1280_state *state,
struct scatterlist *sg_src,
unsigned int assoclen)
{
struct scatter_walk walk;
struct morus1280_block m;
union morus1280_block_in buf;
unsigned int pos = 0;
scatterwalk_start(&walk, sg_src);
while (assoclen != 0) {
unsigned int size = scatterwalk_clamp(&walk, assoclen);
unsigned int left = size;
void *mapped = scatterwalk_map(&walk);
const u8 *src = (const u8 *)mapped;
if (pos + size >= MORUS1280_BLOCK_SIZE) {
if (pos > 0) {
unsigned int fill = MORUS1280_BLOCK_SIZE - pos;
memcpy(buf.bytes + pos, src, fill);
crypto_morus1280_load_a(&m, buf.bytes);
crypto_morus1280_update(state, &m);
pos = 0;
left -= fill;
src += fill;
}
crypto_morus1280_ad(state, src, left);
src += left & ~(MORUS1280_BLOCK_SIZE - 1);
left &= MORUS1280_BLOCK_SIZE - 1;
}
memcpy(buf.bytes + pos, src, left);
pos += left;
assoclen -= size;
scatterwalk_unmap(mapped);
scatterwalk_advance(&walk, size);
scatterwalk_done(&walk, 0, assoclen);
}
if (pos > 0) {
memset(buf.bytes + pos, 0, MORUS1280_BLOCK_SIZE - pos);
crypto_morus1280_load_a(&m, buf.bytes);
crypto_morus1280_update(state, &m);
}
}
static void crypto_morus1280_process_crypt(struct morus1280_state *state,
struct aead_request *req,
const struct morus1280_ops *ops)
{
struct skcipher_walk walk;
u8 *dst;
const u8 *src;
ops->skcipher_walk_init(&walk, req, false);
while (walk.nbytes) {
src = walk.src.virt.addr;
dst = walk.dst.virt.addr;
ops->crypt_chunk(state, dst, src, walk.nbytes);
skcipher_walk_done(&walk, 0);
}
}
static void crypto_morus1280_final(struct morus1280_state *state,
struct morus1280_block *tag_xor,
u64 assoclen, u64 cryptlen)
{
u64 assocbits = assoclen * 8;
u64 cryptbits = cryptlen * 8;
struct morus1280_block tmp;
unsigned int i;
tmp.words[0] = cpu_to_le64(assocbits);
tmp.words[1] = cpu_to_le64(cryptbits);
tmp.words[2] = 0;
tmp.words[3] = 0;
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
state->s[4].words[i] ^= state->s[0].words[i];
for (i = 0; i < 10; i++)
crypto_morus1280_update(state, &tmp);
crypto_morus1280_core(state, tag_xor);
}
static int crypto_morus1280_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
union morus1280_block_in tmp;
if (keylen == MORUS1280_BLOCK_SIZE)
crypto_morus1280_load(&ctx->key, key);
else if (keylen == MORUS1280_BLOCK_SIZE / 2) {
memcpy(tmp.bytes, key, keylen);
memcpy(tmp.bytes + keylen, key, keylen);
crypto_morus1280_load(&ctx->key, tmp.bytes);
} else {
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
return 0;
}
static int crypto_morus1280_setauthsize(struct crypto_aead *tfm,
unsigned int authsize)
{
return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
}
static void crypto_morus1280_crypt(struct aead_request *req,
struct morus1280_block *tag_xor,
unsigned int cryptlen,
const struct morus1280_ops *ops)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
struct morus1280_state state;
crypto_morus1280_init(&state, &ctx->key, req->iv);
crypto_morus1280_process_ad(&state, req->src, req->assoclen);
crypto_morus1280_process_crypt(&state, req, ops);
crypto_morus1280_final(&state, tag_xor, req->assoclen, cryptlen);
}
static int crypto_morus1280_encrypt(struct aead_request *req)
{
static const struct morus1280_ops ops = {
.skcipher_walk_init = skcipher_walk_aead_encrypt,
.crypt_chunk = crypto_morus1280_encrypt_chunk,
};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct morus1280_block tag = {};
union morus1280_block_in tag_out;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen;
crypto_morus1280_crypt(req, &tag, cryptlen, &ops);
crypto_morus1280_store(tag_out.bytes, &tag);
scatterwalk_map_and_copy(tag_out.bytes, req->dst,
req->assoclen + cryptlen, authsize, 1);
return 0;
}
static int crypto_morus1280_decrypt(struct aead_request *req)
{
static const struct morus1280_ops ops = {
.skcipher_walk_init = skcipher_walk_aead_decrypt,
.crypt_chunk = crypto_morus1280_decrypt_chunk,
};
static const u8 zeros[MORUS1280_BLOCK_SIZE] = {};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
union morus1280_block_in tag_in;
struct morus1280_block tag;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen - authsize;
scatterwalk_map_and_copy(tag_in.bytes, req->src,
req->assoclen + cryptlen, authsize, 0);
crypto_morus1280_load(&tag, tag_in.bytes);
crypto_morus1280_crypt(req, &tag, cryptlen, &ops);
crypto_morus1280_store(tag_in.bytes, &tag);
return crypto_memneq(tag_in.bytes, zeros, authsize) ? -EBADMSG : 0;
}
static int crypto_morus1280_init_tfm(struct crypto_aead *tfm)
{
return 0;
}
static void crypto_morus1280_exit_tfm(struct crypto_aead *tfm)
{
}
static struct aead_alg crypto_morus1280_alg = {
.setkey = crypto_morus1280_setkey,
.setauthsize = crypto_morus1280_setauthsize,
.encrypt = crypto_morus1280_encrypt,
.decrypt = crypto_morus1280_decrypt,
.init = crypto_morus1280_init_tfm,
.exit = crypto_morus1280_exit_tfm,
.ivsize = MORUS_NONCE_SIZE,
.maxauthsize = MORUS_MAX_AUTH_SIZE,
.chunksize = MORUS1280_BLOCK_SIZE,
.base = {
.cra_flags = CRYPTO_ALG_TYPE_AEAD,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct morus1280_ctx),
.cra_alignmask = 0,
.cra_priority = 100,
.cra_name = "morus1280",
.cra_driver_name = "morus1280-generic",
.cra_module = THIS_MODULE,
}
};
static int __init crypto_morus1280_module_init(void)
{
return crypto_register_aead(&crypto_morus1280_alg);
}
static void __exit crypto_morus1280_module_exit(void)
{
crypto_unregister_aead(&crypto_morus1280_alg);
}
module_init(crypto_morus1280_module_init);
module_exit(crypto_morus1280_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm");
MODULE_ALIAS_CRYPTO("morus1280");
MODULE_ALIAS_CRYPTO("morus1280-generic");

544
crypto/morus640.c Normal file
View File

@ -0,0 +1,544 @@
/*
* The MORUS-640 Authenticated-Encryption Algorithm
*
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <asm/unaligned.h>
#include <crypto/algapi.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/morus_common.h>
#include <crypto/scatterwalk.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
#define MORUS640_WORD_SIZE 4
#define MORUS640_BLOCK_SIZE (MORUS_BLOCK_WORDS * MORUS640_WORD_SIZE)
#define MORUS640_BLOCK_ALIGN (__alignof__(__le32))
#define MORUS640_ALIGNED(p) IS_ALIGNED((uintptr_t)p, MORUS640_BLOCK_ALIGN)
struct morus640_block {
u32 words[MORUS_BLOCK_WORDS];
};
union morus640_block_in {
__le32 words[MORUS_BLOCK_WORDS];
u8 bytes[MORUS640_BLOCK_SIZE];
};
struct morus640_state {
struct morus640_block s[MORUS_STATE_BLOCKS];
};
struct morus640_ctx {
struct morus640_block key;
};
struct morus640_ops {
int (*skcipher_walk_init)(struct skcipher_walk *walk,
struct aead_request *req, bool atomic);
void (*crypt_chunk)(struct morus640_state *state,
u8 *dst, const u8 *src, unsigned int size);
};
static const struct morus640_block crypto_morus640_const[2] = {
{ .words = {
U32_C(0x02010100),
U32_C(0x0d080503),
U32_C(0x59372215),
U32_C(0x6279e990),
} },
{ .words = {
U32_C(0x55183ddb),
U32_C(0xf12fc26d),
U32_C(0x42311120),
U32_C(0xdd28b573),
} },
};
static void crypto_morus640_round(struct morus640_block *b0,
struct morus640_block *b1,
struct morus640_block *b2,
struct morus640_block *b3,
struct morus640_block *b4,
const struct morus640_block *m,
unsigned int b, unsigned int w)
{
unsigned int i;
struct morus640_block tmp;
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
b0->words[i] ^= b1->words[i] & b2->words[i];
b0->words[i] ^= b3->words[i];
b0->words[i] ^= m->words[i];
b0->words[i] = rol32(b0->words[i], b);
}
tmp = *b3;
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
b3->words[(i + w) % MORUS_BLOCK_WORDS] = tmp.words[i];
}
static void crypto_morus640_update(struct morus640_state *state,
const struct morus640_block *m)
{
static const struct morus640_block z = {};
struct morus640_block *s = state->s;
crypto_morus640_round(&s[0], &s[1], &s[2], &s[3], &s[4], &z, 5, 1);
crypto_morus640_round(&s[1], &s[2], &s[3], &s[4], &s[0], m, 31, 2);
crypto_morus640_round(&s[2], &s[3], &s[4], &s[0], &s[1], m, 7, 3);
crypto_morus640_round(&s[3], &s[4], &s[0], &s[1], &s[2], m, 22, 2);
crypto_morus640_round(&s[4], &s[0], &s[1], &s[2], &s[3], m, 13, 1);
}
static void crypto_morus640_load_a(struct morus640_block *dst, const u8 *src)
{
unsigned int i;
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
dst->words[i] = le32_to_cpu(*(const __le32 *)src);
src += MORUS640_WORD_SIZE;
}
}
static void crypto_morus640_load_u(struct morus640_block *dst, const u8 *src)
{
unsigned int i;
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
dst->words[i] = get_unaligned_le32(src);
src += MORUS640_WORD_SIZE;
}
}
static void crypto_morus640_load(struct morus640_block *dst, const u8 *src)
{
if (MORUS640_ALIGNED(src))
crypto_morus640_load_a(dst, src);
else
crypto_morus640_load_u(dst, src);
}
static void crypto_morus640_store_a(u8 *dst, const struct morus640_block *src)
{
unsigned int i;
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
*(__le32 *)dst = cpu_to_le32(src->words[i]);
dst += MORUS640_WORD_SIZE;
}
}
static void crypto_morus640_store_u(u8 *dst, const struct morus640_block *src)
{
unsigned int i;
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
put_unaligned_le32(src->words[i], dst);
dst += MORUS640_WORD_SIZE;
}
}
static void crypto_morus640_store(u8 *dst, const struct morus640_block *src)
{
if (MORUS640_ALIGNED(dst))
crypto_morus640_store_a(dst, src);
else
crypto_morus640_store_u(dst, src);
}
static void crypto_morus640_ad(struct morus640_state *state, const u8 *src,
unsigned int size)
{
struct morus640_block m;
if (MORUS640_ALIGNED(src)) {
while (size >= MORUS640_BLOCK_SIZE) {
crypto_morus640_load_a(&m, src);
crypto_morus640_update(state, &m);
size -= MORUS640_BLOCK_SIZE;
src += MORUS640_BLOCK_SIZE;
}
} else {
while (size >= MORUS640_BLOCK_SIZE) {
crypto_morus640_load_u(&m, src);
crypto_morus640_update(state, &m);
size -= MORUS640_BLOCK_SIZE;
src += MORUS640_BLOCK_SIZE;
}
}
}
static void crypto_morus640_core(const struct morus640_state *state,
struct morus640_block *blk)
{
unsigned int i;
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
blk->words[(i + 3) % MORUS_BLOCK_WORDS] ^= state->s[1].words[i];
for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
blk->words[i] ^= state->s[0].words[i];
blk->words[i] ^= state->s[2].words[i] & state->s[3].words[i];
}
}
static void crypto_morus640_encrypt_chunk(struct morus640_state *state, u8 *dst,
const u8 *src, unsigned int size)
{
struct morus640_block c, m;
if (MORUS640_ALIGNED(src) && MORUS640_ALIGNED(dst)) {
while (size >= MORUS640_BLOCK_SIZE) {
crypto_morus640_load_a(&m, src);
c = m;
crypto_morus640_core(state, &c);
crypto_morus640_store_a(dst, &c);
crypto_morus640_update(state, &m);
src += MORUS640_BLOCK_SIZE;
dst += MORUS640_BLOCK_SIZE;
size -= MORUS640_BLOCK_SIZE;
}
} else {
while (size >= MORUS640_BLOCK_SIZE) {
crypto_morus640_load_u(&m, src);
c = m;
crypto_morus640_core(state, &c);
crypto_morus640_store_u(dst, &c);
crypto_morus640_update(state, &m);
src += MORUS640_BLOCK_SIZE;
dst += MORUS640_BLOCK_SIZE;
size -= MORUS640_BLOCK_SIZE;
}
}
if (size > 0) {
union morus640_block_in tail;
memcpy(tail.bytes, src, size);
memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size);
crypto_morus640_load_a(&m, tail.bytes);
c = m;
crypto_morus640_core(state, &c);
crypto_morus640_store_a(tail.bytes, &c);
crypto_morus640_update(state, &m);
memcpy(dst, tail.bytes, size);
}
}
static void crypto_morus640_decrypt_chunk(struct morus640_state *state, u8 *dst,
const u8 *src, unsigned int size)
{
struct morus640_block m;
if (MORUS640_ALIGNED(src) && MORUS640_ALIGNED(dst)) {
while (size >= MORUS640_BLOCK_SIZE) {
crypto_morus640_load_a(&m, src);
crypto_morus640_core(state, &m);
crypto_morus640_store_a(dst, &m);
crypto_morus640_update(state, &m);
src += MORUS640_BLOCK_SIZE;
dst += MORUS640_BLOCK_SIZE;
size -= MORUS640_BLOCK_SIZE;
}
} else {
while (size >= MORUS640_BLOCK_SIZE) {
crypto_morus640_load_u(&m, src);
crypto_morus640_core(state, &m);
crypto_morus640_store_u(dst, &m);
crypto_morus640_update(state, &m);
src += MORUS640_BLOCK_SIZE;
dst += MORUS640_BLOCK_SIZE;
size -= MORUS640_BLOCK_SIZE;
}
}
if (size > 0) {
union morus640_block_in tail;
memcpy(tail.bytes, src, size);
crypto_morus640_load_a(&m, src);
crypto_morus640_core(state, &m);
crypto_morus640_store_a(tail.bytes, &m);
memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size);
crypto_morus640_load_a(&m, tail.bytes);
crypto_morus640_update(state, &m);
memcpy(dst, tail.bytes, size);
}
}
static void crypto_morus640_init(struct morus640_state *state,
const struct morus640_block *key,
const u8 *iv)
{
static const struct morus640_block z = {};
unsigned int i;
crypto_morus640_load(&state->s[0], iv);
state->s[1] = *key;
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
state->s[2].words[i] = U32_C(0xFFFFFFFF);
state->s[3] = crypto_morus640_const[0];
state->s[4] = crypto_morus640_const[1];
for (i = 0; i < 16; i++)
crypto_morus640_update(state, &z);
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
state->s[1].words[i] ^= key->words[i];
}
static void crypto_morus640_process_ad(struct morus640_state *state,
struct scatterlist *sg_src,
unsigned int assoclen)
{
struct scatter_walk walk;
struct morus640_block m;
union morus640_block_in buf;
unsigned int pos = 0;
scatterwalk_start(&walk, sg_src);
while (assoclen != 0) {
unsigned int size = scatterwalk_clamp(&walk, assoclen);
unsigned int left = size;
void *mapped = scatterwalk_map(&walk);
const u8 *src = (const u8 *)mapped;
if (pos + size >= MORUS640_BLOCK_SIZE) {
if (pos > 0) {
unsigned int fill = MORUS640_BLOCK_SIZE - pos;
memcpy(buf.bytes + pos, src, fill);
crypto_morus640_load_a(&m, buf.bytes);
crypto_morus640_update(state, &m);
pos = 0;
left -= fill;
src += fill;
}
crypto_morus640_ad(state, src, left);
src += left & ~(MORUS640_BLOCK_SIZE - 1);
left &= MORUS640_BLOCK_SIZE - 1;
}
memcpy(buf.bytes + pos, src, left);
pos += left;
assoclen -= size;
scatterwalk_unmap(mapped);
scatterwalk_advance(&walk, size);
scatterwalk_done(&walk, 0, assoclen);
}
if (pos > 0) {
memset(buf.bytes + pos, 0, MORUS640_BLOCK_SIZE - pos);
crypto_morus640_load_a(&m, buf.bytes);
crypto_morus640_update(state, &m);
}
}
static void crypto_morus640_process_crypt(struct morus640_state *state,
struct aead_request *req,
const struct morus640_ops *ops)
{
struct skcipher_walk walk;
u8 *dst;
const u8 *src;
ops->skcipher_walk_init(&walk, req, false);
while (walk.nbytes) {
src = walk.src.virt.addr;
dst = walk.dst.virt.addr;
ops->crypt_chunk(state, dst, src, walk.nbytes);
skcipher_walk_done(&walk, 0);
}
}
static void crypto_morus640_final(struct morus640_state *state,
struct morus640_block *tag_xor,
u64 assoclen, u64 cryptlen)
{
u64 assocbits = assoclen * 8;
u64 cryptbits = cryptlen * 8;
u32 assocbits_lo = (u32)assocbits;
u32 assocbits_hi = (u32)(assocbits >> 32);
u32 cryptbits_lo = (u32)cryptbits;
u32 cryptbits_hi = (u32)(cryptbits >> 32);
struct morus640_block tmp;
unsigned int i;
tmp.words[0] = cpu_to_le32(assocbits_lo);
tmp.words[1] = cpu_to_le32(assocbits_hi);
tmp.words[2] = cpu_to_le32(cryptbits_lo);
tmp.words[3] = cpu_to_le32(cryptbits_hi);
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
state->s[4].words[i] ^= state->s[0].words[i];
for (i = 0; i < 10; i++)
crypto_morus640_update(state, &tmp);
crypto_morus640_core(state, tag_xor);
}
static int crypto_morus640_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct morus640_ctx *ctx = crypto_aead_ctx(aead);
if (keylen != MORUS640_BLOCK_SIZE) {
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
crypto_morus640_load(&ctx->key, key);
return 0;
}
static int crypto_morus640_setauthsize(struct crypto_aead *tfm,
unsigned int authsize)
{
return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
}
static void crypto_morus640_crypt(struct aead_request *req,
struct morus640_block *tag_xor,
unsigned int cryptlen,
const struct morus640_ops *ops)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
struct morus640_state state;
crypto_morus640_init(&state, &ctx->key, req->iv);
crypto_morus640_process_ad(&state, req->src, req->assoclen);
crypto_morus640_process_crypt(&state, req, ops);
crypto_morus640_final(&state, tag_xor, req->assoclen, cryptlen);
}
static int crypto_morus640_encrypt(struct aead_request *req)
{
static const struct morus640_ops ops = {
.skcipher_walk_init = skcipher_walk_aead_encrypt,
.crypt_chunk = crypto_morus640_encrypt_chunk,
};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct morus640_block tag = {};
union morus640_block_in tag_out;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen;
crypto_morus640_crypt(req, &tag, cryptlen, &ops);
crypto_morus640_store(tag_out.bytes, &tag);
scatterwalk_map_and_copy(tag_out.bytes, req->dst,
req->assoclen + cryptlen, authsize, 1);
return 0;
}
static int crypto_morus640_decrypt(struct aead_request *req)
{
static const struct morus640_ops ops = {
.skcipher_walk_init = skcipher_walk_aead_decrypt,
.crypt_chunk = crypto_morus640_decrypt_chunk,
};
static const u8 zeros[MORUS640_BLOCK_SIZE] = {};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
union morus640_block_in tag_in;
struct morus640_block tag;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen - authsize;
scatterwalk_map_and_copy(tag_in.bytes, req->src,
req->assoclen + cryptlen, authsize, 0);
crypto_morus640_load(&tag, tag_in.bytes);
crypto_morus640_crypt(req, &tag, cryptlen, &ops);
crypto_morus640_store(tag_in.bytes, &tag);
return crypto_memneq(tag_in.bytes, zeros, authsize) ? -EBADMSG : 0;
}
static int crypto_morus640_init_tfm(struct crypto_aead *tfm)
{
return 0;
}
static void crypto_morus640_exit_tfm(struct crypto_aead *tfm)
{
}
static struct aead_alg crypto_morus640_alg = {
.setkey = crypto_morus640_setkey,
.setauthsize = crypto_morus640_setauthsize,
.encrypt = crypto_morus640_encrypt,
.decrypt = crypto_morus640_decrypt,
.init = crypto_morus640_init_tfm,
.exit = crypto_morus640_exit_tfm,
.ivsize = MORUS_NONCE_SIZE,
.maxauthsize = MORUS_MAX_AUTH_SIZE,
.chunksize = MORUS640_BLOCK_SIZE,
.base = {
.cra_flags = CRYPTO_ALG_TYPE_AEAD,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct morus640_ctx),
.cra_alignmask = 0,
.cra_priority = 100,
.cra_name = "morus640",
.cra_driver_name = "morus640-generic",
.cra_module = THIS_MODULE,
}
};
static int __init crypto_morus640_module_init(void)
{
return crypto_register_aead(&crypto_morus640_alg);
}
static void __exit crypto_morus640_module_exit(void)
{
crypto_unregister_aead(&crypto_morus640_alg);
}
module_init(crypto_morus640_module_init);
module_exit(crypto_morus640_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("MORUS-640 AEAD algorithm");
MODULE_ALIAS_CRYPTO("morus640");
MODULE_ALIAS_CRYPTO("morus640-generic");

View File

@ -14,6 +14,7 @@
*
*/
#include <crypto/algapi.h>
#include <crypto/internal/skcipher.h>
#include <linux/err.h>
#include <linux/init.h>
@ -72,7 +73,7 @@ static int crypto_pcbc_encrypt_inplace(struct skcipher_request *req,
unsigned int nbytes = walk->nbytes;
u8 *src = walk->src.virt.addr;
u8 *iv = walk->iv;
u8 tmpbuf[bsize];
u8 tmpbuf[MAX_CIPHER_BLOCKSIZE];
do {
memcpy(tmpbuf, src, bsize);
@ -144,7 +145,7 @@ static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req,
unsigned int nbytes = walk->nbytes;
u8 *src = walk->src.virt.addr;
u8 *iv = walk->iv;
u8 tmpbuf[bsize] __aligned(__alignof__(u32));
u8 tmpbuf[MAX_CIPHER_BLOCKSIZE] __aligned(__alignof__(u32));
do {
memcpy(tmpbuf, src, bsize);

View File

@ -215,7 +215,6 @@ static int rsa_verify(struct akcipher_request *req)
goto err_free_m;
}
ret = -ENOMEM;
s = mpi_read_raw_from_sgl(req->src, req->src_len);
if (!s) {
ret = -ENOMEM;

View File

@ -21,9 +21,17 @@
#include <asm/unaligned.h>
#include <crypto/internal/skcipher.h>
#include <crypto/salsa20.h>
#include <linux/module.h>
#define SALSA20_IV_SIZE 8
#define SALSA20_MIN_KEY_SIZE 16
#define SALSA20_MAX_KEY_SIZE 32
#define SALSA20_BLOCK_SIZE 64
struct salsa20_ctx {
u32 initial_state[16];
};
static void salsa20_block(u32 *state, __le32 *stream)
{
u32 x[16];
@ -93,16 +101,15 @@ static void salsa20_docrypt(u32 *state, u8 *dst, const u8 *src,
}
}
void crypto_salsa20_init(u32 *state, const struct salsa20_ctx *ctx,
static void salsa20_init(u32 *state, const struct salsa20_ctx *ctx,
const u8 *iv)
{
memcpy(state, ctx->initial_state, sizeof(ctx->initial_state));
state[6] = get_unaligned_le32(iv + 0);
state[7] = get_unaligned_le32(iv + 4);
}
EXPORT_SYMBOL_GPL(crypto_salsa20_init);
int crypto_salsa20_setkey(struct crypto_skcipher *tfm, const u8 *key,
static int salsa20_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keysize)
{
static const char sigma[16] = "expand 32-byte k";
@ -143,7 +150,6 @@ int crypto_salsa20_setkey(struct crypto_skcipher *tfm, const u8 *key,
return 0;
}
EXPORT_SYMBOL_GPL(crypto_salsa20_setkey);
static int salsa20_crypt(struct skcipher_request *req)
{
@ -155,7 +161,7 @@ static int salsa20_crypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, true);
crypto_salsa20_init(state, ctx, walk.iv);
salsa20_init(state, ctx, walk.iv);
while (walk.nbytes > 0) {
unsigned int nbytes = walk.nbytes;
@ -183,7 +189,7 @@ static struct skcipher_alg alg = {
.max_keysize = SALSA20_MAX_KEY_SIZE,
.ivsize = SALSA20_IV_SIZE,
.chunksize = SALSA20_BLOCK_SIZE,
.setkey = crypto_salsa20_setkey,
.setkey = salsa20_setkey,
.encrypt = salsa20_crypt,
.decrypt = salsa20_crypt,
};

View File

@ -190,21 +190,23 @@ static void sm4_do_crypt(const u32 *rk, u32 *out, const u32 *in)
/* encrypt a block of text */
static void sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
sm4_do_crypt(ctx->rkey_enc, (u32 *)out, (u32 *)in);
}
EXPORT_SYMBOL_GPL(crypto_sm4_encrypt);
/* decrypt a block of text */
static void sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
sm4_do_crypt(ctx->rkey_dec, (u32 *)out, (u32 *)in);
}
EXPORT_SYMBOL_GPL(crypto_sm4_decrypt);
static struct crypto_alg sm4_alg = {
.cra_name = "sm4",
@ -219,8 +221,8 @@ static struct crypto_alg sm4_alg = {
.cia_min_keysize = SM4_KEY_SIZE,
.cia_max_keysize = SM4_KEY_SIZE,
.cia_setkey = crypto_sm4_set_key,
.cia_encrypt = sm4_encrypt,
.cia_decrypt = sm4_decrypt
.cia_encrypt = crypto_sm4_encrypt,
.cia_decrypt = crypto_sm4_decrypt
}
}
};

View File

@ -158,9 +158,9 @@ struct test_mb_aead_data {
};
static int do_mult_aead_op(struct test_mb_aead_data *data, int enc,
u32 num_mb)
u32 num_mb, int *rc)
{
int i, rc[num_mb], err = 0;
int i, err = 0;
/* Fire up a bunch of concurrent requests */
for (i = 0; i < num_mb; i++) {
@ -188,18 +188,26 @@ static int test_mb_aead_jiffies(struct test_mb_aead_data *data, int enc,
{
unsigned long start, end;
int bcount;
int ret;
int ret = 0;
int *rc;
rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
if (!rc)
return -ENOMEM;
for (start = jiffies, end = start + secs * HZ, bcount = 0;
time_before(jiffies, end); bcount++) {
ret = do_mult_aead_op(data, enc, num_mb);
ret = do_mult_aead_op(data, enc, num_mb, rc);
if (ret)
return ret;
goto out;
}
pr_cont("%d operations in %d seconds (%ld bytes)\n",
bcount * num_mb, secs, (long)bcount * blen * num_mb);
return 0;
out:
kfree(rc);
return ret;
}
static int test_mb_aead_cycles(struct test_mb_aead_data *data, int enc,
@ -208,10 +216,15 @@ static int test_mb_aead_cycles(struct test_mb_aead_data *data, int enc,
unsigned long cycles = 0;
int ret = 0;
int i;
int *rc;
rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
if (!rc)
return -ENOMEM;
/* Warm-up run. */
for (i = 0; i < 4; i++) {
ret = do_mult_aead_op(data, enc, num_mb);
ret = do_mult_aead_op(data, enc, num_mb, rc);
if (ret)
goto out;
}
@ -221,7 +234,7 @@ static int test_mb_aead_cycles(struct test_mb_aead_data *data, int enc,
cycles_t start, end;
start = get_cycles();
ret = do_mult_aead_op(data, enc, num_mb);
ret = do_mult_aead_op(data, enc, num_mb, rc);
end = get_cycles();
if (ret)
@ -230,11 +243,11 @@ static int test_mb_aead_cycles(struct test_mb_aead_data *data, int enc,
cycles += end - start;
}
out:
if (ret == 0)
pr_cont("1 operation in %lu cycles (%d bytes)\n",
(cycles + 4) / (8 * num_mb), blen);
pr_cont("1 operation in %lu cycles (%d bytes)\n",
(cycles + 4) / (8 * num_mb), blen);
out:
kfree(rc);
return ret;
}
@ -705,9 +718,10 @@ struct test_mb_ahash_data {
char *xbuf[XBUFSIZE];
};
static inline int do_mult_ahash_op(struct test_mb_ahash_data *data, u32 num_mb)
static inline int do_mult_ahash_op(struct test_mb_ahash_data *data, u32 num_mb,
int *rc)
{
int i, rc[num_mb], err = 0;
int i, err = 0;
/* Fire up a bunch of concurrent requests */
for (i = 0; i < num_mb; i++)
@ -731,18 +745,26 @@ static int test_mb_ahash_jiffies(struct test_mb_ahash_data *data, int blen,
{
unsigned long start, end;
int bcount;
int ret;
int ret = 0;
int *rc;
rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
if (!rc)
return -ENOMEM;
for (start = jiffies, end = start + secs * HZ, bcount = 0;
time_before(jiffies, end); bcount++) {
ret = do_mult_ahash_op(data, num_mb);
ret = do_mult_ahash_op(data, num_mb, rc);
if (ret)
return ret;
goto out;
}
pr_cont("%d operations in %d seconds (%ld bytes)\n",
bcount * num_mb, secs, (long)bcount * blen * num_mb);
return 0;
out:
kfree(rc);
return ret;
}
static int test_mb_ahash_cycles(struct test_mb_ahash_data *data, int blen,
@ -751,10 +773,15 @@ static int test_mb_ahash_cycles(struct test_mb_ahash_data *data, int blen,
unsigned long cycles = 0;
int ret = 0;
int i;
int *rc;
rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
if (!rc)
return -ENOMEM;
/* Warm-up run. */
for (i = 0; i < 4; i++) {
ret = do_mult_ahash_op(data, num_mb);
ret = do_mult_ahash_op(data, num_mb, rc);
if (ret)
goto out;
}
@ -764,7 +791,7 @@ static int test_mb_ahash_cycles(struct test_mb_ahash_data *data, int blen,
cycles_t start, end;
start = get_cycles();
ret = do_mult_ahash_op(data, num_mb);
ret = do_mult_ahash_op(data, num_mb, rc);
end = get_cycles();
if (ret)
@ -773,11 +800,11 @@ static int test_mb_ahash_cycles(struct test_mb_ahash_data *data, int blen,
cycles += end - start;
}
out:
if (ret == 0)
pr_cont("1 operation in %lu cycles (%d bytes)\n",
(cycles + 4) / (8 * num_mb), blen);
pr_cont("1 operation in %lu cycles (%d bytes)\n",
(cycles + 4) / (8 * num_mb), blen);
out:
kfree(rc);
return ret;
}
@ -1118,9 +1145,9 @@ struct test_mb_skcipher_data {
};
static int do_mult_acipher_op(struct test_mb_skcipher_data *data, int enc,
u32 num_mb)
u32 num_mb, int *rc)
{
int i, rc[num_mb], err = 0;
int i, err = 0;
/* Fire up a bunch of concurrent requests */
for (i = 0; i < num_mb; i++) {
@ -1148,18 +1175,26 @@ static int test_mb_acipher_jiffies(struct test_mb_skcipher_data *data, int enc,
{
unsigned long start, end;
int bcount;
int ret;
int ret = 0;
int *rc;
rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
if (!rc)
return -ENOMEM;
for (start = jiffies, end = start + secs * HZ, bcount = 0;
time_before(jiffies, end); bcount++) {
ret = do_mult_acipher_op(data, enc, num_mb);
ret = do_mult_acipher_op(data, enc, num_mb, rc);
if (ret)
return ret;
goto out;
}
pr_cont("%d operations in %d seconds (%ld bytes)\n",
bcount * num_mb, secs, (long)bcount * blen * num_mb);
return 0;
out:
kfree(rc);
return ret;
}
static int test_mb_acipher_cycles(struct test_mb_skcipher_data *data, int enc,
@ -1168,10 +1203,15 @@ static int test_mb_acipher_cycles(struct test_mb_skcipher_data *data, int enc,
unsigned long cycles = 0;
int ret = 0;
int i;
int *rc;
rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
if (!rc)
return -ENOMEM;
/* Warm-up run. */
for (i = 0; i < 4; i++) {
ret = do_mult_acipher_op(data, enc, num_mb);
ret = do_mult_acipher_op(data, enc, num_mb, rc);
if (ret)
goto out;
}
@ -1181,7 +1221,7 @@ static int test_mb_acipher_cycles(struct test_mb_skcipher_data *data, int enc,
cycles_t start, end;
start = get_cycles();
ret = do_mult_acipher_op(data, enc, num_mb);
ret = do_mult_acipher_op(data, enc, num_mb, rc);
end = get_cycles();
if (ret)
@ -1190,11 +1230,11 @@ static int test_mb_acipher_cycles(struct test_mb_skcipher_data *data, int enc,
cycles += end - start;
}
out:
if (ret == 0)
pr_cont("1 operation in %lu cycles (%d bytes)\n",
(cycles + 4) / (8 * num_mb), blen);
pr_cont("1 operation in %lu cycles (%d bytes)\n",
(cycles + 4) / (8 * num_mb), blen);
out:
kfree(rc);
return ret;
}
@ -1606,7 +1646,7 @@ static inline int tcrypt_test(const char *alg)
return ret;
}
static int do_test(const char *alg, u32 type, u32 mask, int m)
static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
{
int i;
int ret = 0;
@ -1621,7 +1661,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
}
for (i = 1; i < 200; i++)
ret += do_test(NULL, 0, 0, i);
ret += do_test(NULL, 0, 0, i, num_mb);
break;
case 1:
@ -1902,10 +1942,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
ret += tcrypt_test("vmac(aes)");
break;
case 110:
ret += tcrypt_test("hmac(crc32)");
break;
case 111:
ret += tcrypt_test("hmac(sha3-224)");
break;
@ -2903,7 +2939,7 @@ static int __init tcrypt_mod_init(void)
goto err_free_tv;
}
err = do_test(alg, type, mask, mode);
err = do_test(alg, type, mask, mode, num_mb);
if (err) {
printk(KERN_ERR "tcrypt: one or more tests failed!\n");

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

265
crypto/zstd.c Normal file
View File

@ -0,0 +1,265 @@
/*
* Cryptographic API.
*
* Copyright (c) 2017-present, Facebook, Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/net.h>
#include <linux/vmalloc.h>
#include <linux/zstd.h>
#include <crypto/internal/scompress.h>
#define ZSTD_DEF_LEVEL 3
struct zstd_ctx {
ZSTD_CCtx *cctx;
ZSTD_DCtx *dctx;
void *cwksp;
void *dwksp;
};
static ZSTD_parameters zstd_params(void)
{
return ZSTD_getParams(ZSTD_DEF_LEVEL, 0, 0);
}
static int zstd_comp_init(struct zstd_ctx *ctx)
{
int ret = 0;
const ZSTD_parameters params = zstd_params();
const size_t wksp_size = ZSTD_CCtxWorkspaceBound(params.cParams);
ctx->cwksp = vzalloc(wksp_size);
if (!ctx->cwksp) {
ret = -ENOMEM;
goto out;
}
ctx->cctx = ZSTD_initCCtx(ctx->cwksp, wksp_size);
if (!ctx->cctx) {
ret = -EINVAL;
goto out_free;
}
out:
return ret;
out_free:
vfree(ctx->cwksp);
goto out;
}
static int zstd_decomp_init(struct zstd_ctx *ctx)
{
int ret = 0;
const size_t wksp_size = ZSTD_DCtxWorkspaceBound();
ctx->dwksp = vzalloc(wksp_size);
if (!ctx->dwksp) {
ret = -ENOMEM;
goto out;
}
ctx->dctx = ZSTD_initDCtx(ctx->dwksp, wksp_size);
if (!ctx->dctx) {
ret = -EINVAL;
goto out_free;
}
out:
return ret;
out_free:
vfree(ctx->dwksp);
goto out;
}
static void zstd_comp_exit(struct zstd_ctx *ctx)
{
vfree(ctx->cwksp);
ctx->cwksp = NULL;
ctx->cctx = NULL;
}
static void zstd_decomp_exit(struct zstd_ctx *ctx)
{
vfree(ctx->dwksp);
ctx->dwksp = NULL;
ctx->dctx = NULL;
}
static int __zstd_init(void *ctx)
{
int ret;
ret = zstd_comp_init(ctx);
if (ret)
return ret;
ret = zstd_decomp_init(ctx);
if (ret)
zstd_comp_exit(ctx);
return ret;
}
static void *zstd_alloc_ctx(struct crypto_scomp *tfm)
{
int ret;
struct zstd_ctx *ctx;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return ERR_PTR(-ENOMEM);
ret = __zstd_init(ctx);
if (ret) {
kfree(ctx);
return ERR_PTR(ret);
}
return ctx;
}
static int zstd_init(struct crypto_tfm *tfm)
{
struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
return __zstd_init(ctx);
}
static void __zstd_exit(void *ctx)
{
zstd_comp_exit(ctx);
zstd_decomp_exit(ctx);
}
static void zstd_free_ctx(struct crypto_scomp *tfm, void *ctx)
{
__zstd_exit(ctx);
kzfree(ctx);
}
static void zstd_exit(struct crypto_tfm *tfm)
{
struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
__zstd_exit(ctx);
}
static int __zstd_compress(const u8 *src, unsigned int slen,
u8 *dst, unsigned int *dlen, void *ctx)
{
size_t out_len;
struct zstd_ctx *zctx = ctx;
const ZSTD_parameters params = zstd_params();
out_len = ZSTD_compressCCtx(zctx->cctx, dst, *dlen, src, slen, params);
if (ZSTD_isError(out_len))
return -EINVAL;
*dlen = out_len;
return 0;
}
static int zstd_compress(struct crypto_tfm *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen)
{
struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
return __zstd_compress(src, slen, dst, dlen, ctx);
}
static int zstd_scompress(struct crypto_scomp *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen,
void *ctx)
{
return __zstd_compress(src, slen, dst, dlen, ctx);
}
static int __zstd_decompress(const u8 *src, unsigned int slen,
u8 *dst, unsigned int *dlen, void *ctx)
{
size_t out_len;
struct zstd_ctx *zctx = ctx;
out_len = ZSTD_decompressDCtx(zctx->dctx, dst, *dlen, src, slen);
if (ZSTD_isError(out_len))
return -EINVAL;
*dlen = out_len;
return 0;
}
static int zstd_decompress(struct crypto_tfm *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen)
{
struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
return __zstd_decompress(src, slen, dst, dlen, ctx);
}
static int zstd_sdecompress(struct crypto_scomp *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen,
void *ctx)
{
return __zstd_decompress(src, slen, dst, dlen, ctx);
}
static struct crypto_alg alg = {
.cra_name = "zstd",
.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
.cra_ctxsize = sizeof(struct zstd_ctx),
.cra_module = THIS_MODULE,
.cra_init = zstd_init,
.cra_exit = zstd_exit,
.cra_u = { .compress = {
.coa_compress = zstd_compress,
.coa_decompress = zstd_decompress } }
};
static struct scomp_alg scomp = {
.alloc_ctx = zstd_alloc_ctx,
.free_ctx = zstd_free_ctx,
.compress = zstd_scompress,
.decompress = zstd_sdecompress,
.base = {
.cra_name = "zstd",
.cra_driver_name = "zstd-scomp",
.cra_module = THIS_MODULE,
}
};
static int __init zstd_mod_init(void)
{
int ret;
ret = crypto_register_alg(&alg);
if (ret)
return ret;
ret = crypto_register_scomp(&scomp);
if (ret)
crypto_unregister_alg(&alg);
return ret;
}
static void __exit zstd_mod_fini(void)
{
crypto_unregister_alg(&alg);
crypto_unregister_scomp(&scomp);
}
module_init(zstd_mod_init);
module_exit(zstd_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Zstd Compression Algorithm");
MODULE_ALIAS_CRYPTO("zstd");

View File

@ -347,6 +347,7 @@ config HW_RANDOM_STM32
tristate "STMicroelectronics STM32 random number generator"
depends on HW_RANDOM && (ARCH_STM32 || COMPILE_TEST)
depends on HAS_IOMEM
default HW_RANDOM
help
This driver provides kernel-side support for the Random Number
Generator hardware found on STM32 microcontrollers.

View File

@ -435,7 +435,7 @@ static int n2rng_data_read(struct hwrng *rng, u32 *data)
*data = np->test_data & 0xffffffff;
len = 4;
} else {
dev_err(&np->op->dev, "RNG error, restesting\n");
dev_err(&np->op->dev, "RNG error, retesting\n");
np->flags &= ~N2RNG_FLAG_READY;
if (!(np->flags & N2RNG_FLAG_SHUTDOWN))
schedule_delayed_work(&np->work, 0);

View File

@ -187,8 +187,13 @@ static int stm32_rng_runtime_resume(struct device *dev)
}
#endif
static UNIVERSAL_DEV_PM_OPS(stm32_rng_pm_ops, stm32_rng_runtime_suspend,
stm32_rng_runtime_resume, NULL);
static const struct dev_pm_ops stm32_rng_pm_ops = {
SET_RUNTIME_PM_OPS(stm32_rng_runtime_suspend,
stm32_rng_runtime_resume, NULL)
SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
pm_runtime_force_resume)
};
static const struct of_device_id stm32_rng_match[] = {
{

View File

@ -135,7 +135,7 @@ static int via_rng_init(struct hwrng *rng)
* is always enabled if CPUID rng_en is set. There is no
* RNG configuration like it used to be the case in this
* register */
if ((c->x86 == 6) && (c->x86_model >= 0x0f)) {
if (((c->x86 == 6) && (c->x86_model >= 0x0f)) || (c->x86 > 6)){
if (!boot_cpu_has(X86_FEATURE_XSTORE_EN)) {
pr_err(PFX "can't enable hardware RNG "
"if XSTORE is not enabled\n");

View File

@ -302,6 +302,7 @@ config CRYPTO_DEV_PPC4XX
select CRYPTO_AEAD
select CRYPTO_AES
select CRYPTO_CCM
select CRYPTO_CTR
select CRYPTO_GCM
select CRYPTO_BLKCIPHER
help
@ -419,7 +420,7 @@ config CRYPTO_DEV_EXYNOS_RNG
config CRYPTO_DEV_S5P
tristate "Support for Samsung S5PV210/Exynos crypto accelerator"
depends on ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST
depends on HAS_IOMEM && HAS_DMA
depends on HAS_IOMEM
select CRYPTO_AES
select CRYPTO_BLKCIPHER
help
@ -466,7 +467,6 @@ endif # if CRYPTO_DEV_UX500
config CRYPTO_DEV_ATMEL_AUTHENC
tristate "Support for Atmel IPSEC/SSL hw accelerator"
depends on HAS_DMA
depends on ARCH_AT91 || COMPILE_TEST
select CRYPTO_AUTHENC
select CRYPTO_DEV_ATMEL_AES
@ -479,7 +479,6 @@ config CRYPTO_DEV_ATMEL_AUTHENC
config CRYPTO_DEV_ATMEL_AES
tristate "Support for Atmel AES hw accelerator"
depends on HAS_DMA
depends on ARCH_AT91 || COMPILE_TEST
select CRYPTO_AES
select CRYPTO_AEAD
@ -494,7 +493,6 @@ config CRYPTO_DEV_ATMEL_AES
config CRYPTO_DEV_ATMEL_TDES
tristate "Support for Atmel DES/TDES hw accelerator"
depends on HAS_DMA
depends on ARCH_AT91 || COMPILE_TEST
select CRYPTO_DES
select CRYPTO_BLKCIPHER
@ -508,7 +506,6 @@ config CRYPTO_DEV_ATMEL_TDES
config CRYPTO_DEV_ATMEL_SHA
tristate "Support for Atmel SHA hw accelerator"
depends on HAS_DMA
depends on ARCH_AT91 || COMPILE_TEST
select CRYPTO_HASH
help
@ -574,7 +571,8 @@ config CRYPTO_DEV_CAVIUM_ZIP
config CRYPTO_DEV_QCE
tristate "Qualcomm crypto engine accelerator"
depends on (ARCH_QCOM || COMPILE_TEST) && HAS_DMA && HAS_IOMEM
depends on ARCH_QCOM || COMPILE_TEST
depends on HAS_IOMEM
select CRYPTO_AES
select CRYPTO_DES
select CRYPTO_ECB
@ -598,7 +596,6 @@ source "drivers/crypto/vmx/Kconfig"
config CRYPTO_DEV_IMGTEC_HASH
tristate "Imagination Technologies hardware hash accelerator"
depends on MIPS || COMPILE_TEST
depends on HAS_DMA
select CRYPTO_MD5
select CRYPTO_SHA1
select CRYPTO_SHA256
@ -650,7 +647,6 @@ config CRYPTO_DEV_ROCKCHIP
config CRYPTO_DEV_MEDIATEK
tristate "MediaTek's EIP97 Cryptographic Engine driver"
depends on HAS_DMA
depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST
select CRYPTO_AES
select CRYPTO_AEAD
@ -688,9 +684,10 @@ source "drivers/crypto/stm32/Kconfig"
config CRYPTO_DEV_SAFEXCEL
tristate "Inside Secure's SafeXcel cryptographic engine driver"
depends on HAS_DMA && OF
depends on OF
depends on (ARM64 && ARCH_MVEBU) || (COMPILE_TEST && 64BIT)
select CRYPTO_AES
select CRYPTO_AUTHENC
select CRYPTO_BLKCIPHER
select CRYPTO_HASH
select CRYPTO_HMAC
@ -706,7 +703,6 @@ config CRYPTO_DEV_SAFEXCEL
config CRYPTO_DEV_ARTPEC6
tristate "Support for Axis ARTPEC-6/7 hardware crypto acceleration."
depends on ARM && (ARCH_ARTPEC || COMPILE_TEST)
depends on HAS_DMA
depends on OF
select CRYPTO_AEAD
select CRYPTO_AES

View File

@ -31,6 +31,7 @@
#include <crypto/gcm.h>
#include <crypto/sha.h>
#include <crypto/ctr.h>
#include <crypto/skcipher.h>
#include "crypto4xx_reg_def.h"
#include "crypto4xx_core.h"
#include "crypto4xx_sa.h"
@ -74,51 +75,57 @@ static void set_dynamic_sa_command_1(struct dynamic_sa_ctl *sa, u32 cm,
sa->sa_command_1.bf.copy_hdr = cp_hdr;
}
int crypto4xx_encrypt(struct ablkcipher_request *req)
static inline int crypto4xx_crypt(struct skcipher_request *req,
const unsigned int ivlen, bool decrypt)
{
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
unsigned int ivlen = crypto_ablkcipher_ivsize(
crypto_ablkcipher_reqtfm(req));
__le32 iv[ivlen];
struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
__le32 iv[AES_IV_SIZE];
if (ivlen)
crypto4xx_memcpy_to_le32(iv, req->info, ivlen);
crypto4xx_memcpy_to_le32(iv, req->iv, ivlen);
return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
req->nbytes, iv, ivlen, ctx->sa_out, ctx->sa_len, 0);
req->cryptlen, iv, ivlen, decrypt ? ctx->sa_in : ctx->sa_out,
ctx->sa_len, 0, NULL);
}
int crypto4xx_decrypt(struct ablkcipher_request *req)
int crypto4xx_encrypt_noiv(struct skcipher_request *req)
{
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
unsigned int ivlen = crypto_ablkcipher_ivsize(
crypto_ablkcipher_reqtfm(req));
__le32 iv[ivlen];
return crypto4xx_crypt(req, 0, false);
}
if (ivlen)
crypto4xx_memcpy_to_le32(iv, req->info, ivlen);
int crypto4xx_encrypt_iv(struct skcipher_request *req)
{
return crypto4xx_crypt(req, AES_IV_SIZE, false);
}
return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
req->nbytes, iv, ivlen, ctx->sa_in, ctx->sa_len, 0);
int crypto4xx_decrypt_noiv(struct skcipher_request *req)
{
return crypto4xx_crypt(req, 0, true);
}
int crypto4xx_decrypt_iv(struct skcipher_request *req)
{
return crypto4xx_crypt(req, AES_IV_SIZE, true);
}
/**
* AES Functions
*/
static int crypto4xx_setkey_aes(struct crypto_ablkcipher *cipher,
static int crypto4xx_setkey_aes(struct crypto_skcipher *cipher,
const u8 *key,
unsigned int keylen,
unsigned char cm,
u8 fb)
{
struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(tfm);
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
struct dynamic_sa_ctl *sa;
int rc;
if (keylen != AES_KEYSIZE_256 &&
keylen != AES_KEYSIZE_192 && keylen != AES_KEYSIZE_128) {
crypto_ablkcipher_set_flags(cipher,
crypto_skcipher_set_flags(cipher,
CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
@ -134,7 +141,8 @@ static int crypto4xx_setkey_aes(struct crypto_ablkcipher *cipher,
/* Setup SA */
sa = ctx->sa_in;
set_dynamic_sa_command_0(sa, SA_NOT_SAVE_HASH, SA_NOT_SAVE_IV,
set_dynamic_sa_command_0(sa, SA_NOT_SAVE_HASH, (cm == CRYPTO_MODE_CBC ?
SA_SAVE_IV : SA_NOT_SAVE_IV),
SA_LOAD_HASH_FROM_SA, SA_LOAD_IV_FROM_STATE,
SA_NO_HEADER_PROC, SA_HASH_ALG_NULL,
SA_CIPHER_ALG_AES, SA_PAD_TYPE_ZERO,
@ -158,39 +166,38 @@ static int crypto4xx_setkey_aes(struct crypto_ablkcipher *cipher,
return 0;
}
int crypto4xx_setkey_aes_cbc(struct crypto_ablkcipher *cipher,
int crypto4xx_setkey_aes_cbc(struct crypto_skcipher *cipher,
const u8 *key, unsigned int keylen)
{
return crypto4xx_setkey_aes(cipher, key, keylen, CRYPTO_MODE_CBC,
CRYPTO_FEEDBACK_MODE_NO_FB);
}
int crypto4xx_setkey_aes_cfb(struct crypto_ablkcipher *cipher,
int crypto4xx_setkey_aes_cfb(struct crypto_skcipher *cipher,
const u8 *key, unsigned int keylen)
{
return crypto4xx_setkey_aes(cipher, key, keylen, CRYPTO_MODE_CFB,
CRYPTO_FEEDBACK_MODE_128BIT_CFB);
}
int crypto4xx_setkey_aes_ecb(struct crypto_ablkcipher *cipher,
int crypto4xx_setkey_aes_ecb(struct crypto_skcipher *cipher,
const u8 *key, unsigned int keylen)
{
return crypto4xx_setkey_aes(cipher, key, keylen, CRYPTO_MODE_ECB,
CRYPTO_FEEDBACK_MODE_NO_FB);
}
int crypto4xx_setkey_aes_ofb(struct crypto_ablkcipher *cipher,
int crypto4xx_setkey_aes_ofb(struct crypto_skcipher *cipher,
const u8 *key, unsigned int keylen)
{
return crypto4xx_setkey_aes(cipher, key, keylen, CRYPTO_MODE_OFB,
CRYPTO_FEEDBACK_MODE_64BIT_OFB);
}
int crypto4xx_setkey_rfc3686(struct crypto_ablkcipher *cipher,
int crypto4xx_setkey_rfc3686(struct crypto_skcipher *cipher,
const u8 *key, unsigned int keylen)
{
struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(tfm);
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
int rc;
rc = crypto4xx_setkey_aes(cipher, key, keylen - CTR_RFC3686_NONCE_SIZE,
@ -204,35 +211,117 @@ int crypto4xx_setkey_rfc3686(struct crypto_ablkcipher *cipher,
return 0;
}
int crypto4xx_rfc3686_encrypt(struct ablkcipher_request *req)
int crypto4xx_rfc3686_encrypt(struct skcipher_request *req)
{
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
__le32 iv[AES_IV_SIZE / 4] = {
ctx->iv_nonce,
cpu_to_le32p((u32 *) req->info),
cpu_to_le32p((u32 *) (req->info + 4)),
cpu_to_le32p((u32 *) req->iv),
cpu_to_le32p((u32 *) (req->iv + 4)),
cpu_to_le32(1) };
return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
req->nbytes, iv, AES_IV_SIZE,
ctx->sa_out, ctx->sa_len, 0);
req->cryptlen, iv, AES_IV_SIZE,
ctx->sa_out, ctx->sa_len, 0, NULL);
}
int crypto4xx_rfc3686_decrypt(struct ablkcipher_request *req)
int crypto4xx_rfc3686_decrypt(struct skcipher_request *req)
{
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
__le32 iv[AES_IV_SIZE / 4] = {
ctx->iv_nonce,
cpu_to_le32p((u32 *) req->info),
cpu_to_le32p((u32 *) (req->info + 4)),
cpu_to_le32p((u32 *) req->iv),
cpu_to_le32p((u32 *) (req->iv + 4)),
cpu_to_le32(1) };
return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
req->nbytes, iv, AES_IV_SIZE,
ctx->sa_out, ctx->sa_len, 0);
req->cryptlen, iv, AES_IV_SIZE,
ctx->sa_out, ctx->sa_len, 0, NULL);
}
static int
crypto4xx_ctr_crypt(struct skcipher_request *req, bool encrypt)
{
struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
size_t iv_len = crypto_skcipher_ivsize(cipher);
unsigned int counter = be32_to_cpup((__be32 *)(req->iv + iv_len - 4));
unsigned int nblks = ALIGN(req->cryptlen, AES_BLOCK_SIZE) /
AES_BLOCK_SIZE;
/*
* The hardware uses only the last 32-bits as the counter while the
* kernel tests (aes_ctr_enc_tv_template[4] for example) expect that
* the whole IV is a counter. So fallback if the counter is going to
* overlow.
*/
if (counter + nblks < counter) {
struct skcipher_request *subreq = skcipher_request_ctx(req);
int ret;
skcipher_request_set_tfm(subreq, ctx->sw_cipher.cipher);
skcipher_request_set_callback(subreq, req->base.flags,
NULL, NULL);
skcipher_request_set_crypt(subreq, req->src, req->dst,
req->cryptlen, req->iv);
ret = encrypt ? crypto_skcipher_encrypt(subreq)
: crypto_skcipher_decrypt(subreq);
skcipher_request_zero(subreq);
return ret;
}
return encrypt ? crypto4xx_encrypt_iv(req)
: crypto4xx_decrypt_iv(req);
}
static int crypto4xx_sk_setup_fallback(struct crypto4xx_ctx *ctx,
struct crypto_skcipher *cipher,
const u8 *key,
unsigned int keylen)
{
int rc;
crypto_skcipher_clear_flags(ctx->sw_cipher.cipher,
CRYPTO_TFM_REQ_MASK);
crypto_skcipher_set_flags(ctx->sw_cipher.cipher,
crypto_skcipher_get_flags(cipher) & CRYPTO_TFM_REQ_MASK);
rc = crypto_skcipher_setkey(ctx->sw_cipher.cipher, key, keylen);
crypto_skcipher_clear_flags(cipher, CRYPTO_TFM_RES_MASK);
crypto_skcipher_set_flags(cipher,
crypto_skcipher_get_flags(ctx->sw_cipher.cipher) &
CRYPTO_TFM_RES_MASK);
return rc;
}
int crypto4xx_setkey_aes_ctr(struct crypto_skcipher *cipher,
const u8 *key, unsigned int keylen)
{
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
int rc;
rc = crypto4xx_sk_setup_fallback(ctx, cipher, key, keylen);
if (rc)
return rc;
return crypto4xx_setkey_aes(cipher, key, keylen,
CRYPTO_MODE_CTR, CRYPTO_FEEDBACK_MODE_NO_FB);
}
int crypto4xx_encrypt_ctr(struct skcipher_request *req)
{
return crypto4xx_ctr_crypt(req, true);
}
int crypto4xx_decrypt_ctr(struct skcipher_request *req)
{
return crypto4xx_ctr_crypt(req, false);
}
static inline bool crypto4xx_aead_need_fallback(struct aead_request *req,
unsigned int len,
bool is_ccm, bool decrypt)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
@ -242,14 +331,14 @@ static inline bool crypto4xx_aead_need_fallback(struct aead_request *req,
return true;
/*
* hardware does not handle cases where cryptlen
* is less than a block
* hardware does not handle cases where plaintext
* is less than a block.
*/
if (req->cryptlen < AES_BLOCK_SIZE)
if (len < AES_BLOCK_SIZE)
return true;
/* assoc len needs to be a multiple of 4 */
if (req->assoclen & 0x3)
/* assoc len needs to be a multiple of 4 and <= 1020 */
if (req->assoclen & 0x3 || req->assoclen > 1020)
return true;
/* CCM supports only counter field length of 2 and 4 bytes */
@ -262,13 +351,7 @@ static inline bool crypto4xx_aead_need_fallback(struct aead_request *req,
static int crypto4xx_aead_fallback(struct aead_request *req,
struct crypto4xx_ctx *ctx, bool do_decrypt)
{
char aead_req_data[sizeof(struct aead_request) +
crypto_aead_reqsize(ctx->sw_cipher.aead)]
__aligned(__alignof__(struct aead_request));
struct aead_request *subreq = (void *) aead_req_data;
memset(subreq, 0, sizeof(aead_req_data));
struct aead_request *subreq = aead_request_ctx(req);
aead_request_set_tfm(subreq, ctx->sw_cipher.aead);
aead_request_set_callback(subreq, req->base.flags,
@ -280,10 +363,10 @@ static int crypto4xx_aead_fallback(struct aead_request *req,
crypto_aead_encrypt(subreq);
}
static int crypto4xx_setup_fallback(struct crypto4xx_ctx *ctx,
struct crypto_aead *cipher,
const u8 *key,
unsigned int keylen)
static int crypto4xx_aead_setup_fallback(struct crypto4xx_ctx *ctx,
struct crypto_aead *cipher,
const u8 *key,
unsigned int keylen)
{
int rc;
@ -311,7 +394,7 @@ int crypto4xx_setkey_aes_ccm(struct crypto_aead *cipher, const u8 *key,
struct dynamic_sa_ctl *sa;
int rc = 0;
rc = crypto4xx_setup_fallback(ctx, cipher, key, keylen);
rc = crypto4xx_aead_setup_fallback(ctx, cipher, key, keylen);
if (rc)
return rc;
@ -366,19 +449,20 @@ int crypto4xx_setkey_aes_ccm(struct crypto_aead *cipher, const u8 *key,
static int crypto4xx_crypt_aes_ccm(struct aead_request *req, bool decrypt)
{
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
struct crypto4xx_aead_reqctx *rctx = aead_request_ctx(req);
struct crypto_aead *aead = crypto_aead_reqtfm(req);
unsigned int len = req->cryptlen;
__le32 iv[16];
u32 tmp_sa[ctx->sa_len * 4];
u32 tmp_sa[SA_AES128_CCM_LEN + 4];
struct dynamic_sa_ctl *sa = (struct dynamic_sa_ctl *)tmp_sa;
if (crypto4xx_aead_need_fallback(req, true, decrypt))
return crypto4xx_aead_fallback(req, ctx, decrypt);
unsigned int len = req->cryptlen;
if (decrypt)
len -= crypto_aead_authsize(aead);
memcpy(tmp_sa, decrypt ? ctx->sa_in : ctx->sa_out, sizeof(tmp_sa));
if (crypto4xx_aead_need_fallback(req, len, true, decrypt))
return crypto4xx_aead_fallback(req, ctx, decrypt);
memcpy(tmp_sa, decrypt ? ctx->sa_in : ctx->sa_out, ctx->sa_len * 4);
sa->sa_command_0.bf.digest_len = crypto_aead_authsize(aead) >> 2;
if (req->iv[0] == 1) {
@ -391,7 +475,7 @@ static int crypto4xx_crypt_aes_ccm(struct aead_request *req, bool decrypt)
return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
len, iv, sizeof(iv),
sa, ctx->sa_len, req->assoclen);
sa, ctx->sa_len, req->assoclen, rctx->dst);
}
int crypto4xx_encrypt_aes_ccm(struct aead_request *req)
@ -470,7 +554,7 @@ int crypto4xx_setkey_aes_gcm(struct crypto_aead *cipher,
return -EINVAL;
}
rc = crypto4xx_setup_fallback(ctx, cipher, key, keylen);
rc = crypto4xx_aead_setup_fallback(ctx, cipher, key, keylen);
if (rc)
return rc;
@ -523,22 +607,23 @@ static inline int crypto4xx_crypt_aes_gcm(struct aead_request *req,
bool decrypt)
{
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
unsigned int len = req->cryptlen;
struct crypto4xx_aead_reqctx *rctx = aead_request_ctx(req);
__le32 iv[4];
unsigned int len = req->cryptlen;
if (crypto4xx_aead_need_fallback(req, false, decrypt))
if (decrypt)
len -= crypto_aead_authsize(crypto_aead_reqtfm(req));
if (crypto4xx_aead_need_fallback(req, len, false, decrypt))
return crypto4xx_aead_fallback(req, ctx, decrypt);
crypto4xx_memcpy_to_le32(iv, req->iv, GCM_AES_IV_SIZE);
iv[3] = cpu_to_le32(1);
if (decrypt)
len -= crypto_aead_authsize(crypto_aead_reqtfm(req));
return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
len, iv, sizeof(iv),
decrypt ? ctx->sa_in : ctx->sa_out,
ctx->sa_len, req->assoclen);
ctx->sa_len, req->assoclen, rctx->dst);
}
int crypto4xx_encrypt_aes_gcm(struct aead_request *req)
@ -623,7 +708,7 @@ int crypto4xx_hash_update(struct ahash_request *req)
return crypto4xx_build_pd(&req->base, ctx, req->src, &dst,
req->nbytes, NULL, 0, ctx->sa_in,
ctx->sa_len, 0);
ctx->sa_len, 0, NULL);
}
int crypto4xx_hash_final(struct ahash_request *req)
@ -642,7 +727,7 @@ int crypto4xx_hash_digest(struct ahash_request *req)
return crypto4xx_build_pd(&req->base, ctx, req->src, &dst,
req->nbytes, NULL, 0, ctx->sa_in,
ctx->sa_len, 0);
ctx->sa_len, 0, NULL);
}
/**

View File

@ -41,6 +41,7 @@
#include <crypto/gcm.h>
#include <crypto/sha.h>
#include <crypto/scatterwalk.h>
#include <crypto/skcipher.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include "crypto4xx_reg_def.h"
@ -526,31 +527,38 @@ static void crypto4xx_ret_sg_desc(struct crypto4xx_device *dev,
}
}
static void crypto4xx_ablkcipher_done(struct crypto4xx_device *dev,
static void crypto4xx_cipher_done(struct crypto4xx_device *dev,
struct pd_uinfo *pd_uinfo,
struct ce_pd *pd)
{
struct crypto4xx_ctx *ctx;
struct ablkcipher_request *ablk_req;
struct skcipher_request *req;
struct scatterlist *dst;
dma_addr_t addr;
ablk_req = ablkcipher_request_cast(pd_uinfo->async_req);
ctx = crypto_tfm_ctx(ablk_req->base.tfm);
req = skcipher_request_cast(pd_uinfo->async_req);
if (pd_uinfo->using_sd) {
crypto4xx_copy_pkt_to_dst(dev, pd, pd_uinfo, ablk_req->nbytes,
ablk_req->dst);
crypto4xx_copy_pkt_to_dst(dev, pd, pd_uinfo,
req->cryptlen, req->dst);
} else {
dst = pd_uinfo->dest_va;
addr = dma_map_page(dev->core_dev->device, sg_page(dst),
dst->offset, dst->length, DMA_FROM_DEVICE);
}
if (pd_uinfo->sa_va->sa_command_0.bf.save_iv == SA_SAVE_IV) {
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
crypto4xx_memcpy_from_le32((u32 *)req->iv,
pd_uinfo->sr_va->save_iv,
crypto_skcipher_ivsize(skcipher));
}
crypto4xx_ret_sg_desc(dev, pd_uinfo);
if (pd_uinfo->state & PD_ENTRY_BUSY)
ablkcipher_request_complete(ablk_req, -EINPROGRESS);
ablkcipher_request_complete(ablk_req, 0);
skcipher_request_complete(req, -EINPROGRESS);
skcipher_request_complete(req, 0);
}
static void crypto4xx_ahash_done(struct crypto4xx_device *dev,
@ -580,7 +588,7 @@ static void crypto4xx_aead_done(struct crypto4xx_device *dev,
struct scatterlist *dst = pd_uinfo->dest_va;
size_t cp_len = crypto_aead_authsize(
crypto_aead_reqtfm(aead_req));
u32 icv[cp_len];
u32 icv[AES_BLOCK_SIZE];
int err = 0;
if (pd_uinfo->using_sd) {
@ -595,7 +603,7 @@ static void crypto4xx_aead_done(struct crypto4xx_device *dev,
if (pd_uinfo->sa_va->sa_command_0.bf.dir == DIR_OUTBOUND) {
/* append icv at the end */
crypto4xx_memcpy_from_le32(icv, pd_uinfo->sr_va->save_digest,
cp_len);
sizeof(icv));
scatterwalk_map_and_copy(icv, dst, aead_req->cryptlen,
cp_len, 1);
@ -605,7 +613,7 @@ static void crypto4xx_aead_done(struct crypto4xx_device *dev,
aead_req->assoclen + aead_req->cryptlen -
cp_len, cp_len, 0);
crypto4xx_memcpy_from_le32(icv, icv, cp_len);
crypto4xx_memcpy_from_le32(icv, icv, sizeof(icv));
if (crypto_memneq(icv, pd_uinfo->sr_va->save_digest, cp_len))
err = -EBADMSG;
@ -641,8 +649,8 @@ static void crypto4xx_pd_done(struct crypto4xx_device *dev, u32 idx)
struct pd_uinfo *pd_uinfo = &dev->pdr_uinfo[idx];
switch (crypto_tfm_alg_type(pd_uinfo->async_req->tfm)) {
case CRYPTO_ALG_TYPE_ABLKCIPHER:
crypto4xx_ablkcipher_done(dev, pd_uinfo, pd);
case CRYPTO_ALG_TYPE_SKCIPHER:
crypto4xx_cipher_done(dev, pd_uinfo, pd);
break;
case CRYPTO_ALG_TYPE_AEAD:
crypto4xx_aead_done(dev, pd_uinfo, pd);
@ -687,9 +695,9 @@ int crypto4xx_build_pd(struct crypto_async_request *req,
const __le32 *iv, const u32 iv_len,
const struct dynamic_sa_ctl *req_sa,
const unsigned int sa_len,
const unsigned int assoclen)
const unsigned int assoclen,
struct scatterlist *_dst)
{
struct scatterlist _dst[2];
struct crypto4xx_device *dev = ctx->dev;
struct dynamic_sa_ctl *sa;
struct ce_gd *gd;
@ -936,15 +944,27 @@ static void crypto4xx_ctx_init(struct crypto4xx_alg *amcc_alg,
ctx->sa_len = 0;
}
static int crypto4xx_ablk_init(struct crypto_tfm *tfm)
static int crypto4xx_sk_init(struct crypto_skcipher *sk)
{
struct crypto_alg *alg = tfm->__crt_alg;
struct skcipher_alg *alg = crypto_skcipher_alg(sk);
struct crypto4xx_alg *amcc_alg;
struct crypto4xx_ctx *ctx = crypto_tfm_ctx(tfm);
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(sk);
if (alg->base.cra_flags & CRYPTO_ALG_NEED_FALLBACK) {
ctx->sw_cipher.cipher =
crypto_alloc_skcipher(alg->base.cra_name, 0,
CRYPTO_ALG_NEED_FALLBACK |
CRYPTO_ALG_ASYNC);
if (IS_ERR(ctx->sw_cipher.cipher))
return PTR_ERR(ctx->sw_cipher.cipher);
crypto_skcipher_set_reqsize(sk,
sizeof(struct skcipher_request) + 32 +
crypto_skcipher_reqsize(ctx->sw_cipher.cipher));
}
amcc_alg = container_of(alg, struct crypto4xx_alg, alg.u.cipher);
crypto4xx_ctx_init(amcc_alg, ctx);
tfm->crt_ablkcipher.reqsize = sizeof(struct crypto4xx_ctx);
return 0;
}
@ -953,9 +973,13 @@ static void crypto4xx_common_exit(struct crypto4xx_ctx *ctx)
crypto4xx_free_sa(ctx);
}
static void crypto4xx_ablk_exit(struct crypto_tfm *tfm)
static void crypto4xx_sk_exit(struct crypto_skcipher *sk)
{
crypto4xx_common_exit(crypto_tfm_ctx(tfm));
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(sk);
crypto4xx_common_exit(ctx);
if (ctx->sw_cipher.cipher)
crypto_free_skcipher(ctx->sw_cipher.cipher);
}
static int crypto4xx_aead_init(struct crypto_aead *tfm)
@ -972,9 +996,9 @@ static int crypto4xx_aead_init(struct crypto_aead *tfm)
amcc_alg = container_of(alg, struct crypto4xx_alg, alg.u.aead);
crypto4xx_ctx_init(amcc_alg, ctx);
crypto_aead_set_reqsize(tfm, sizeof(struct aead_request) +
max(sizeof(struct crypto4xx_ctx), 32 +
crypto_aead_reqsize(ctx->sw_cipher.aead)));
crypto_aead_set_reqsize(tfm, max(sizeof(struct aead_request) + 32 +
crypto_aead_reqsize(ctx->sw_cipher.aead),
sizeof(struct crypto4xx_aead_reqctx)));
return 0;
}
@ -1012,7 +1036,7 @@ static int crypto4xx_register_alg(struct crypto4xx_device *sec_dev,
break;
default:
rc = crypto_register_alg(&alg->alg.u.cipher);
rc = crypto_register_skcipher(&alg->alg.u.cipher);
break;
}
@ -1041,7 +1065,7 @@ static void crypto4xx_unregister_alg(struct crypto4xx_device *sec_dev)
break;
default:
crypto_unregister_alg(&alg->alg.u.cipher);
crypto_unregister_skcipher(&alg->alg.u.cipher);
}
kfree(alg);
}
@ -1103,126 +1127,131 @@ static irqreturn_t crypto4xx_ce_interrupt_handler_revb(int irq, void *data)
*/
static struct crypto4xx_alg_common crypto4xx_alg[] = {
/* Crypto AES modes */
{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .u.cipher = {
.cra_name = "cbc(aes)",
.cra_driver_name = "cbc-aes-ppc4xx",
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_KERN_DRIVER_ONLY,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
.cra_type = &crypto_ablkcipher_type,
.cra_init = crypto4xx_ablk_init,
.cra_exit = crypto4xx_ablk_exit,
.cra_module = THIS_MODULE,
.cra_u = {
.ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_IV_SIZE,
.setkey = crypto4xx_setkey_aes_cbc,
.encrypt = crypto4xx_encrypt,
.decrypt = crypto4xx_decrypt,
}
}
}},
{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .u.cipher = {
.cra_name = "cfb(aes)",
.cra_driver_name = "cfb-aes-ppc4xx",
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_KERN_DRIVER_ONLY,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
.cra_type = &crypto_ablkcipher_type,
.cra_init = crypto4xx_ablk_init,
.cra_exit = crypto4xx_ablk_exit,
.cra_module = THIS_MODULE,
.cra_u = {
.ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_IV_SIZE,
.setkey = crypto4xx_setkey_aes_cfb,
.encrypt = crypto4xx_encrypt,
.decrypt = crypto4xx_decrypt,
}
}
{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
.base = {
.cra_name = "cbc(aes)",
.cra_driver_name = "cbc-aes-ppc4xx",
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_KERN_DRIVER_ONLY,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_IV_SIZE,
.setkey = crypto4xx_setkey_aes_cbc,
.encrypt = crypto4xx_encrypt_iv,
.decrypt = crypto4xx_decrypt_iv,
.init = crypto4xx_sk_init,
.exit = crypto4xx_sk_exit,
} },
{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .u.cipher = {
.cra_name = "rfc3686(ctr(aes))",
.cra_driver_name = "rfc3686-ctr-aes-ppc4xx",
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_KERN_DRIVER_ONLY,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
.cra_type = &crypto_ablkcipher_type,
.cra_init = crypto4xx_ablk_init,
.cra_exit = crypto4xx_ablk_exit,
.cra_module = THIS_MODULE,
.cra_u = {
.ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE +
CTR_RFC3686_NONCE_SIZE,
.max_keysize = AES_MAX_KEY_SIZE +
CTR_RFC3686_NONCE_SIZE,
.ivsize = CTR_RFC3686_IV_SIZE,
.setkey = crypto4xx_setkey_rfc3686,
.encrypt = crypto4xx_rfc3686_encrypt,
.decrypt = crypto4xx_rfc3686_decrypt,
}
}
{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
.base = {
.cra_name = "cfb(aes)",
.cra_driver_name = "cfb-aes-ppc4xx",
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_KERN_DRIVER_ONLY,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_IV_SIZE,
.setkey = crypto4xx_setkey_aes_cfb,
.encrypt = crypto4xx_encrypt_iv,
.decrypt = crypto4xx_decrypt_iv,
.init = crypto4xx_sk_init,
.exit = crypto4xx_sk_exit,
} },
{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .u.cipher = {
.cra_name = "ecb(aes)",
.cra_driver_name = "ecb-aes-ppc4xx",
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_KERN_DRIVER_ONLY,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
.cra_type = &crypto_ablkcipher_type,
.cra_init = crypto4xx_ablk_init,
.cra_exit = crypto4xx_ablk_exit,
.cra_module = THIS_MODULE,
.cra_u = {
.ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.setkey = crypto4xx_setkey_aes_ecb,
.encrypt = crypto4xx_encrypt,
.decrypt = crypto4xx_decrypt,
}
}
{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
.base = {
.cra_name = "ctr(aes)",
.cra_driver_name = "ctr-aes-ppc4xx",
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
CRYPTO_ALG_NEED_FALLBACK |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_KERN_DRIVER_ONLY,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_IV_SIZE,
.setkey = crypto4xx_setkey_aes_ctr,
.encrypt = crypto4xx_encrypt_ctr,
.decrypt = crypto4xx_decrypt_ctr,
.init = crypto4xx_sk_init,
.exit = crypto4xx_sk_exit,
} },
{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .u.cipher = {
.cra_name = "ofb(aes)",
.cra_driver_name = "ofb-aes-ppc4xx",
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_KERN_DRIVER_ONLY,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
.cra_type = &crypto_ablkcipher_type,
.cra_init = crypto4xx_ablk_init,
.cra_exit = crypto4xx_ablk_exit,
.cra_module = THIS_MODULE,
.cra_u = {
.ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_IV_SIZE,
.setkey = crypto4xx_setkey_aes_ofb,
.encrypt = crypto4xx_encrypt,
.decrypt = crypto4xx_decrypt,
}
}
{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
.base = {
.cra_name = "rfc3686(ctr(aes))",
.cra_driver_name = "rfc3686-ctr-aes-ppc4xx",
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_KERN_DRIVER_ONLY,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
.max_keysize = AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
.ivsize = CTR_RFC3686_IV_SIZE,
.setkey = crypto4xx_setkey_rfc3686,
.encrypt = crypto4xx_rfc3686_encrypt,
.decrypt = crypto4xx_rfc3686_decrypt,
.init = crypto4xx_sk_init,
.exit = crypto4xx_sk_exit,
} },
{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
.base = {
.cra_name = "ecb(aes)",
.cra_driver_name = "ecb-aes-ppc4xx",
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_KERN_DRIVER_ONLY,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.setkey = crypto4xx_setkey_aes_ecb,
.encrypt = crypto4xx_encrypt_noiv,
.decrypt = crypto4xx_decrypt_noiv,
.init = crypto4xx_sk_init,
.exit = crypto4xx_sk_exit,
} },
{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
.base = {
.cra_name = "ofb(aes)",
.cra_driver_name = "ofb-aes-ppc4xx",
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_KERN_DRIVER_ONLY,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_IV_SIZE,
.setkey = crypto4xx_setkey_aes_ofb,
.encrypt = crypto4xx_encrypt_iv,
.decrypt = crypto4xx_decrypt_iv,
.init = crypto4xx_sk_init,
.exit = crypto4xx_sk_exit,
} },
/* AEAD */

View File

@ -25,6 +25,7 @@
#include <linux/ratelimit.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include "crypto4xx_reg_def.h"
#include "crypto4xx_sa.h"
@ -127,14 +128,19 @@ struct crypto4xx_ctx {
__le32 iv_nonce;
u32 sa_len;
union {
struct crypto_skcipher *cipher;
struct crypto_aead *aead;
} sw_cipher;
};
struct crypto4xx_aead_reqctx {
struct scatterlist dst[2];
};
struct crypto4xx_alg_common {
u32 type;
union {
struct crypto_alg cipher;
struct skcipher_alg cipher;
struct ahash_alg hash;
struct aead_alg aead;
} u;
@ -157,21 +163,28 @@ int crypto4xx_build_pd(struct crypto_async_request *req,
const __le32 *iv, const u32 iv_len,
const struct dynamic_sa_ctl *sa,
const unsigned int sa_len,
const unsigned int assoclen);
int crypto4xx_setkey_aes_cbc(struct crypto_ablkcipher *cipher,
const unsigned int assoclen,
struct scatterlist *dst_tmp);
int crypto4xx_setkey_aes_cbc(struct crypto_skcipher *cipher,
const u8 *key, unsigned int keylen);
int crypto4xx_setkey_aes_cfb(struct crypto_ablkcipher *cipher,
int crypto4xx_setkey_aes_cfb(struct crypto_skcipher *cipher,
const u8 *key, unsigned int keylen);
int crypto4xx_setkey_aes_ecb(struct crypto_ablkcipher *cipher,
int crypto4xx_setkey_aes_ctr(struct crypto_skcipher *cipher,
const u8 *key, unsigned int keylen);
int crypto4xx_setkey_aes_ofb(struct crypto_ablkcipher *cipher,
int crypto4xx_setkey_aes_ecb(struct crypto_skcipher *cipher,
const u8 *key, unsigned int keylen);
int crypto4xx_setkey_rfc3686(struct crypto_ablkcipher *cipher,
int crypto4xx_setkey_aes_ofb(struct crypto_skcipher *cipher,
const u8 *key, unsigned int keylen);
int crypto4xx_encrypt(struct ablkcipher_request *req);
int crypto4xx_decrypt(struct ablkcipher_request *req);
int crypto4xx_rfc3686_encrypt(struct ablkcipher_request *req);
int crypto4xx_rfc3686_decrypt(struct ablkcipher_request *req);
int crypto4xx_setkey_rfc3686(struct crypto_skcipher *cipher,
const u8 *key, unsigned int keylen);
int crypto4xx_encrypt_ctr(struct skcipher_request *req);
int crypto4xx_decrypt_ctr(struct skcipher_request *req);
int crypto4xx_encrypt_iv(struct skcipher_request *req);
int crypto4xx_decrypt_iv(struct skcipher_request *req);
int crypto4xx_encrypt_noiv(struct skcipher_request *req);
int crypto4xx_decrypt_noiv(struct skcipher_request *req);
int crypto4xx_rfc3686_encrypt(struct skcipher_request *req);
int crypto4xx_rfc3686_decrypt(struct skcipher_request *req);
int crypto4xx_sha1_alg_init(struct crypto_tfm *tfm);
int crypto4xx_hash_digest(struct ahash_request *req);
int crypto4xx_hash_final(struct ahash_request *req);

View File

@ -769,15 +769,18 @@ struct aead_edesc {
* @src_nents: number of segments in input s/w scatterlist
* @dst_nents: number of segments in output s/w scatterlist
* @iv_dma: dma address of iv for checking continuity and link table
* @iv_dir: DMA mapping direction for IV
* @sec4_sg_bytes: length of dma mapped sec4_sg space
* @sec4_sg_dma: bus physical mapped address of h/w link table
* @sec4_sg: pointer to h/w link table
* @hw_desc: the h/w job descriptor followed by any referenced link tables
* and IV
*/
struct ablkcipher_edesc {
int src_nents;
int dst_nents;
dma_addr_t iv_dma;
enum dma_data_direction iv_dir;
int sec4_sg_bytes;
dma_addr_t sec4_sg_dma;
struct sec4_sg_entry *sec4_sg;
@ -787,7 +790,8 @@ struct ablkcipher_edesc {
static void caam_unmap(struct device *dev, struct scatterlist *src,
struct scatterlist *dst, int src_nents,
int dst_nents,
dma_addr_t iv_dma, int ivsize, dma_addr_t sec4_sg_dma,
dma_addr_t iv_dma, int ivsize,
enum dma_data_direction iv_dir, dma_addr_t sec4_sg_dma,
int sec4_sg_bytes)
{
if (dst != src) {
@ -799,7 +803,7 @@ static void caam_unmap(struct device *dev, struct scatterlist *src,
}
if (iv_dma)
dma_unmap_single(dev, iv_dma, ivsize, DMA_TO_DEVICE);
dma_unmap_single(dev, iv_dma, ivsize, iv_dir);
if (sec4_sg_bytes)
dma_unmap_single(dev, sec4_sg_dma, sec4_sg_bytes,
DMA_TO_DEVICE);
@ -810,7 +814,7 @@ static void aead_unmap(struct device *dev,
struct aead_request *req)
{
caam_unmap(dev, req->src, req->dst,
edesc->src_nents, edesc->dst_nents, 0, 0,
edesc->src_nents, edesc->dst_nents, 0, 0, DMA_NONE,
edesc->sec4_sg_dma, edesc->sec4_sg_bytes);
}
@ -823,7 +827,7 @@ static void ablkcipher_unmap(struct device *dev,
caam_unmap(dev, req->src, req->dst,
edesc->src_nents, edesc->dst_nents,
edesc->iv_dma, ivsize,
edesc->iv_dma, ivsize, edesc->iv_dir,
edesc->sec4_sg_dma, edesc->sec4_sg_bytes);
}
@ -912,6 +916,18 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
scatterwalk_map_and_copy(req->info, req->dst, req->nbytes - ivsize,
ivsize, 0);
/* In case initial IV was generated, copy it in GIVCIPHER request */
if (edesc->iv_dir == DMA_FROM_DEVICE) {
u8 *iv;
struct skcipher_givcrypt_request *greq;
greq = container_of(req, struct skcipher_givcrypt_request,
creq);
iv = (u8 *)edesc->hw_desc + desc_bytes(edesc->hw_desc) +
edesc->sec4_sg_bytes;
memcpy(greq->giv, iv, ivsize);
}
kfree(edesc);
ablkcipher_request_complete(req, err);
@ -922,10 +938,10 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
{
struct ablkcipher_request *req = context;
struct ablkcipher_edesc *edesc;
#ifdef DEBUG
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
#ifdef DEBUG
dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
#endif
@ -943,14 +959,6 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
ablkcipher_unmap(jrdev, edesc, req);
/*
* The crypto API expects us to set the IV (req->info) to the last
* ciphertext block.
*/
scatterwalk_map_and_copy(req->info, req->src, req->nbytes - ivsize,
ivsize, 0);
kfree(edesc);
ablkcipher_request_complete(req, err);
@ -1099,15 +1107,14 @@ static void init_authenc_job(struct aead_request *req,
*/
static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
struct ablkcipher_edesc *edesc,
struct ablkcipher_request *req,
bool iv_contig)
struct ablkcipher_request *req)
{
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
u32 *desc = edesc->hw_desc;
u32 out_options = 0, in_options;
dma_addr_t dst_dma, src_dma;
int len, sec4_sg_index = 0;
u32 out_options = 0;
dma_addr_t dst_dma;
int len;
#ifdef DEBUG
print_hex_dump(KERN_ERR, "presciv@"__stringify(__LINE__)": ",
@ -1123,30 +1130,18 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
len = desc_len(sh_desc);
init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE);
if (iv_contig) {
src_dma = edesc->iv_dma;
in_options = 0;
} else {
src_dma = edesc->sec4_sg_dma;
sec4_sg_index += edesc->src_nents + 1;
in_options = LDST_SGF;
}
append_seq_in_ptr(desc, src_dma, req->nbytes + ivsize, in_options);
append_seq_in_ptr(desc, edesc->sec4_sg_dma, req->nbytes + ivsize,
LDST_SGF);
if (likely(req->src == req->dst)) {
if (edesc->src_nents == 1 && iv_contig) {
dst_dma = sg_dma_address(req->src);
} else {
dst_dma = edesc->sec4_sg_dma +
sizeof(struct sec4_sg_entry);
out_options = LDST_SGF;
}
dst_dma = edesc->sec4_sg_dma + sizeof(struct sec4_sg_entry);
out_options = LDST_SGF;
} else {
if (edesc->dst_nents == 1) {
dst_dma = sg_dma_address(req->dst);
} else {
dst_dma = edesc->sec4_sg_dma +
sec4_sg_index * sizeof(struct sec4_sg_entry);
dst_dma = edesc->sec4_sg_dma + (edesc->src_nents + 1) *
sizeof(struct sec4_sg_entry);
out_options = LDST_SGF;
}
}
@ -1158,13 +1153,12 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
*/
static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr,
struct ablkcipher_edesc *edesc,
struct ablkcipher_request *req,
bool iv_contig)
struct ablkcipher_request *req)
{
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
u32 *desc = edesc->hw_desc;
u32 out_options, in_options;
u32 in_options;
dma_addr_t dst_dma, src_dma;
int len, sec4_sg_index = 0;
@ -1190,15 +1184,9 @@ static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr,
}
append_seq_in_ptr(desc, src_dma, req->nbytes, in_options);
if (iv_contig) {
dst_dma = edesc->iv_dma;
out_options = 0;
} else {
dst_dma = edesc->sec4_sg_dma +
sec4_sg_index * sizeof(struct sec4_sg_entry);
out_options = LDST_SGF;
}
append_seq_out_ptr(desc, dst_dma, req->nbytes + ivsize, out_options);
dst_dma = edesc->sec4_sg_dma + sec4_sg_index *
sizeof(struct sec4_sg_entry);
append_seq_out_ptr(desc, dst_dma, req->nbytes + ivsize, LDST_SGF);
}
/*
@ -1287,7 +1275,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
GFP_DMA | flags);
if (!edesc) {
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
0, 0, 0);
0, DMA_NONE, 0, 0);
return ERR_PTR(-ENOMEM);
}
@ -1491,8 +1479,7 @@ static int aead_decrypt(struct aead_request *req)
* allocate and map the ablkcipher extended descriptor for ablkcipher
*/
static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
*req, int desc_bytes,
bool *iv_contig_out)
*req, int desc_bytes)
{
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
@ -1501,8 +1488,8 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
GFP_KERNEL : GFP_ATOMIC;
int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
struct ablkcipher_edesc *edesc;
dma_addr_t iv_dma = 0;
bool in_contig;
dma_addr_t iv_dma;
u8 *iv;
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
int dst_sg_idx, sec4_sg_ents, sec4_sg_bytes;
@ -1546,33 +1533,20 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
}
}
iv_dma = dma_map_single(jrdev, req->info, ivsize, DMA_TO_DEVICE);
if (dma_mapping_error(jrdev, iv_dma)) {
dev_err(jrdev, "unable to map IV\n");
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
0, 0, 0);
return ERR_PTR(-ENOMEM);
}
if (mapped_src_nents == 1 &&
iv_dma + ivsize == sg_dma_address(req->src)) {
in_contig = true;
sec4_sg_ents = 0;
} else {
in_contig = false;
sec4_sg_ents = 1 + mapped_src_nents;
}
sec4_sg_ents = 1 + mapped_src_nents;
dst_sg_idx = sec4_sg_ents;
sec4_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
sec4_sg_bytes = sec4_sg_ents * sizeof(struct sec4_sg_entry);
/* allocate space for base edesc and hw desc commands, link tables */
edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes,
/*
* allocate space for base edesc and hw desc commands, link tables, IV
*/
edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes + ivsize,
GFP_DMA | flags);
if (!edesc) {
dev_err(jrdev, "could not allocate extended descriptor\n");
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
iv_dma, ivsize, 0, 0);
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
0, DMA_NONE, 0, 0);
return ERR_PTR(-ENOMEM);
}
@ -1581,13 +1555,24 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
edesc->sec4_sg_bytes = sec4_sg_bytes;
edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) +
desc_bytes;
edesc->iv_dir = DMA_TO_DEVICE;
if (!in_contig) {
dma_to_sec4_sg_one(edesc->sec4_sg, iv_dma, ivsize, 0);
sg_to_sec4_sg_last(req->src, mapped_src_nents,
edesc->sec4_sg + 1, 0);
/* Make sure IV is located in a DMAable area */
iv = (u8 *)edesc->hw_desc + desc_bytes + sec4_sg_bytes;
memcpy(iv, req->info, ivsize);
iv_dma = dma_map_single(jrdev, iv, ivsize, DMA_TO_DEVICE);
if (dma_mapping_error(jrdev, iv_dma)) {
dev_err(jrdev, "unable to map IV\n");
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
0, DMA_NONE, 0, 0);
kfree(edesc);
return ERR_PTR(-ENOMEM);
}
dma_to_sec4_sg_one(edesc->sec4_sg, iv_dma, ivsize, 0);
sg_to_sec4_sg_last(req->src, mapped_src_nents, edesc->sec4_sg + 1, 0);
if (mapped_dst_nents > 1) {
sg_to_sec4_sg_last(req->dst, mapped_dst_nents,
edesc->sec4_sg + dst_sg_idx, 0);
@ -1598,7 +1583,7 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
dev_err(jrdev, "unable to map S/G table\n");
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
iv_dma, ivsize, 0, 0);
iv_dma, ivsize, DMA_TO_DEVICE, 0, 0);
kfree(edesc);
return ERR_PTR(-ENOMEM);
}
@ -1611,7 +1596,6 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
sec4_sg_bytes, 1);
#endif
*iv_contig_out = in_contig;
return edesc;
}
@ -1621,19 +1605,16 @@ static int ablkcipher_encrypt(struct ablkcipher_request *req)
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
struct device *jrdev = ctx->jrdev;
bool iv_contig;
u32 *desc;
int ret = 0;
/* allocate extended descriptor */
edesc = ablkcipher_edesc_alloc(req, DESC_JOB_IO_LEN *
CAAM_CMD_SZ, &iv_contig);
edesc = ablkcipher_edesc_alloc(req, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
if (IS_ERR(edesc))
return PTR_ERR(edesc);
/* Create and submit job descriptor*/
init_ablkcipher_job(ctx->sh_desc_enc,
ctx->sh_desc_enc_dma, edesc, req, iv_contig);
init_ablkcipher_job(ctx->sh_desc_enc, ctx->sh_desc_enc_dma, edesc, req);
#ifdef DEBUG
print_hex_dump(KERN_ERR, "ablkcipher jobdesc@"__stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
@ -1657,20 +1638,25 @@ static int ablkcipher_decrypt(struct ablkcipher_request *req)
struct ablkcipher_edesc *edesc;
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
struct device *jrdev = ctx->jrdev;
bool iv_contig;
u32 *desc;
int ret = 0;
/* allocate extended descriptor */
edesc = ablkcipher_edesc_alloc(req, DESC_JOB_IO_LEN *
CAAM_CMD_SZ, &iv_contig);
edesc = ablkcipher_edesc_alloc(req, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
if (IS_ERR(edesc))
return PTR_ERR(edesc);
/*
* The crypto API expects us to set the IV (req->info) to the last
* ciphertext block.
*/
scatterwalk_map_and_copy(req->info, req->src, req->nbytes - ivsize,
ivsize, 0);
/* Create and submit job descriptor*/
init_ablkcipher_job(ctx->sh_desc_dec,
ctx->sh_desc_dec_dma, edesc, req, iv_contig);
init_ablkcipher_job(ctx->sh_desc_dec, ctx->sh_desc_dec_dma, edesc, req);
desc = edesc->hw_desc;
#ifdef DEBUG
print_hex_dump(KERN_ERR, "ablkcipher jobdesc@"__stringify(__LINE__)": ",
@ -1695,8 +1681,7 @@ static int ablkcipher_decrypt(struct ablkcipher_request *req)
*/
static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
struct skcipher_givcrypt_request *greq,
int desc_bytes,
bool *iv_contig_out)
int desc_bytes)
{
struct ablkcipher_request *req = &greq->creq;
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
@ -1706,8 +1691,8 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
GFP_KERNEL : GFP_ATOMIC;
int src_nents, mapped_src_nents, dst_nents, mapped_dst_nents;
struct ablkcipher_edesc *edesc;
dma_addr_t iv_dma = 0;
bool out_contig;
dma_addr_t iv_dma;
u8 *iv;
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
int dst_sg_idx, sec4_sg_ents, sec4_sg_bytes;
@ -1752,36 +1737,20 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
}
}
/*
* Check if iv can be contiguous with source and destination.
* If so, include it. If not, create scatterlist.
*/
iv_dma = dma_map_single(jrdev, greq->giv, ivsize, DMA_TO_DEVICE);
if (dma_mapping_error(jrdev, iv_dma)) {
dev_err(jrdev, "unable to map IV\n");
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
0, 0, 0);
return ERR_PTR(-ENOMEM);
}
sec4_sg_ents = mapped_src_nents > 1 ? mapped_src_nents : 0;
dst_sg_idx = sec4_sg_ents;
if (mapped_dst_nents == 1 &&
iv_dma + ivsize == sg_dma_address(req->dst)) {
out_contig = true;
} else {
out_contig = false;
sec4_sg_ents += 1 + mapped_dst_nents;
}
sec4_sg_ents += 1 + mapped_dst_nents;
/* allocate space for base edesc and hw desc commands, link tables */
/*
* allocate space for base edesc and hw desc commands, link tables, IV
*/
sec4_sg_bytes = sec4_sg_ents * sizeof(struct sec4_sg_entry);
edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes,
edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes + ivsize,
GFP_DMA | flags);
if (!edesc) {
dev_err(jrdev, "could not allocate extended descriptor\n");
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
iv_dma, ivsize, 0, 0);
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
0, DMA_NONE, 0, 0);
return ERR_PTR(-ENOMEM);
}
@ -1790,24 +1759,33 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
edesc->sec4_sg_bytes = sec4_sg_bytes;
edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) +
desc_bytes;
edesc->iv_dir = DMA_FROM_DEVICE;
/* Make sure IV is located in a DMAable area */
iv = (u8 *)edesc->hw_desc + desc_bytes + sec4_sg_bytes;
iv_dma = dma_map_single(jrdev, iv, ivsize, DMA_FROM_DEVICE);
if (dma_mapping_error(jrdev, iv_dma)) {
dev_err(jrdev, "unable to map IV\n");
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
0, DMA_NONE, 0, 0);
kfree(edesc);
return ERR_PTR(-ENOMEM);
}
if (mapped_src_nents > 1)
sg_to_sec4_sg_last(req->src, mapped_src_nents, edesc->sec4_sg,
0);
if (!out_contig) {
dma_to_sec4_sg_one(edesc->sec4_sg + dst_sg_idx,
iv_dma, ivsize, 0);
sg_to_sec4_sg_last(req->dst, mapped_dst_nents,
edesc->sec4_sg + dst_sg_idx + 1, 0);
}
dma_to_sec4_sg_one(edesc->sec4_sg + dst_sg_idx, iv_dma, ivsize, 0);
sg_to_sec4_sg_last(req->dst, mapped_dst_nents, edesc->sec4_sg +
dst_sg_idx + 1, 0);
edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
sec4_sg_bytes, DMA_TO_DEVICE);
if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
dev_err(jrdev, "unable to map S/G table\n");
caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
iv_dma, ivsize, 0, 0);
iv_dma, ivsize, DMA_FROM_DEVICE, 0, 0);
kfree(edesc);
return ERR_PTR(-ENOMEM);
}
@ -1820,7 +1798,6 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
sec4_sg_bytes, 1);
#endif
*iv_contig_out = out_contig;
return edesc;
}
@ -1831,19 +1808,17 @@ static int ablkcipher_givencrypt(struct skcipher_givcrypt_request *creq)
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
struct device *jrdev = ctx->jrdev;
bool iv_contig = false;
u32 *desc;
int ret = 0;
/* allocate extended descriptor */
edesc = ablkcipher_giv_edesc_alloc(creq, DESC_JOB_IO_LEN *
CAAM_CMD_SZ, &iv_contig);
edesc = ablkcipher_giv_edesc_alloc(creq, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
if (IS_ERR(edesc))
return PTR_ERR(edesc);
/* Create and submit job descriptor*/
init_ablkcipher_giv_job(ctx->sh_desc_givenc, ctx->sh_desc_givenc_dma,
edesc, req, iv_contig);
edesc, req);
#ifdef DEBUG
print_hex_dump(KERN_ERR,
"ablkcipher jobdesc@" __stringify(__LINE__) ": ",

View File

@ -1093,7 +1093,7 @@ void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 |
(0x6 << MOVE_LEN_SHIFT));
write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF |
(0x8 << MOVE_LEN_SHIFT));
(0x8 << MOVE_LEN_SHIFT) | MOVE_WAITCOMP);
/* Will read assoclen + cryptlen bytes */
append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
@ -1178,7 +1178,7 @@ void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 |
(0x6 << MOVE_LEN_SHIFT));
write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF |
(0x8 << MOVE_LEN_SHIFT));
(0x8 << MOVE_LEN_SHIFT) | MOVE_WAITCOMP);
/* Will read assoclen + cryptlen bytes */
append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);

View File

@ -728,7 +728,7 @@ badkey:
* @assoclen: associated data length, in CAAM endianness
* @assoclen_dma: bus physical mapped address of req->assoclen
* @drv_req: driver-specific request structure
* @sgt: the h/w link table
* @sgt: the h/w link table, followed by IV
*/
struct aead_edesc {
int src_nents;
@ -739,9 +739,6 @@ struct aead_edesc {
unsigned int assoclen;
dma_addr_t assoclen_dma;
struct caam_drv_req drv_req;
#define CAAM_QI_MAX_AEAD_SG \
((CAAM_QI_MEMCACHE_SIZE - offsetof(struct aead_edesc, sgt)) / \
sizeof(struct qm_sg_entry))
struct qm_sg_entry sgt[0];
};
@ -753,7 +750,7 @@ struct aead_edesc {
* @qm_sg_bytes: length of dma mapped h/w link table
* @qm_sg_dma: bus physical mapped address of h/w link table
* @drv_req: driver-specific request structure
* @sgt: the h/w link table
* @sgt: the h/w link table, followed by IV
*/
struct ablkcipher_edesc {
int src_nents;
@ -762,9 +759,6 @@ struct ablkcipher_edesc {
int qm_sg_bytes;
dma_addr_t qm_sg_dma;
struct caam_drv_req drv_req;
#define CAAM_QI_MAX_ABLKCIPHER_SG \
((CAAM_QI_MEMCACHE_SIZE - offsetof(struct ablkcipher_edesc, sgt)) / \
sizeof(struct qm_sg_entry))
struct qm_sg_entry sgt[0];
};
@ -986,17 +980,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
}
}
if ((alg->caam.rfc3686 && encrypt) || !alg->caam.geniv) {
if ((alg->caam.rfc3686 && encrypt) || !alg->caam.geniv)
ivsize = crypto_aead_ivsize(aead);
iv_dma = dma_map_single(qidev, req->iv, ivsize, DMA_TO_DEVICE);
if (dma_mapping_error(qidev, iv_dma)) {
dev_err(qidev, "unable to map IV\n");
caam_unmap(qidev, req->src, req->dst, src_nents,
dst_nents, 0, 0, op_type, 0, 0);
qi_cache_free(edesc);
return ERR_PTR(-ENOMEM);
}
}
/*
* Create S/G table: req->assoclen, [IV,] req->src [, req->dst].
@ -1004,16 +989,33 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
*/
qm_sg_ents = 1 + !!ivsize + mapped_src_nents +
(mapped_dst_nents > 1 ? mapped_dst_nents : 0);
if (unlikely(qm_sg_ents > CAAM_QI_MAX_AEAD_SG)) {
dev_err(qidev, "Insufficient S/G entries: %d > %zu\n",
qm_sg_ents, CAAM_QI_MAX_AEAD_SG);
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
iv_dma, ivsize, op_type, 0, 0);
sg_table = &edesc->sgt[0];
qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
if (unlikely(offsetof(struct aead_edesc, sgt) + qm_sg_bytes + ivsize >
CAAM_QI_MEMCACHE_SIZE)) {
dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n",
qm_sg_ents, ivsize);
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
0, 0, 0, 0);
qi_cache_free(edesc);
return ERR_PTR(-ENOMEM);
}
sg_table = &edesc->sgt[0];
qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
if (ivsize) {
u8 *iv = (u8 *)(sg_table + qm_sg_ents);
/* Make sure IV is located in a DMAable area */
memcpy(iv, req->iv, ivsize);
iv_dma = dma_map_single(qidev, iv, ivsize, DMA_TO_DEVICE);
if (dma_mapping_error(qidev, iv_dma)) {
dev_err(qidev, "unable to map IV\n");
caam_unmap(qidev, req->src, req->dst, src_nents,
dst_nents, 0, 0, 0, 0, 0);
qi_cache_free(edesc);
return ERR_PTR(-ENOMEM);
}
}
edesc->src_nents = src_nents;
edesc->dst_nents = dst_nents;
@ -1166,15 +1168,27 @@ static void ablkcipher_done(struct caam_drv_req *drv_req, u32 status)
#endif
ablkcipher_unmap(qidev, edesc, req);
qi_cache_free(edesc);
/* In case initial IV was generated, copy it in GIVCIPHER request */
if (edesc->drv_req.drv_ctx->op_type == GIVENCRYPT) {
u8 *iv;
struct skcipher_givcrypt_request *greq;
greq = container_of(req, struct skcipher_givcrypt_request,
creq);
iv = (u8 *)edesc->sgt + edesc->qm_sg_bytes;
memcpy(greq->giv, iv, ivsize);
}
/*
* The crypto API expects us to set the IV (req->info) to the last
* ciphertext block. This is used e.g. by the CTS mode.
*/
scatterwalk_map_and_copy(req->info, req->dst, req->nbytes - ivsize,
ivsize, 0);
if (edesc->drv_req.drv_ctx->op_type != DECRYPT)
scatterwalk_map_and_copy(req->info, req->dst, req->nbytes -
ivsize, ivsize, 0);
qi_cache_free(edesc);
ablkcipher_request_complete(req, status);
}
@ -1189,9 +1203,9 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
struct ablkcipher_edesc *edesc;
dma_addr_t iv_dma;
bool in_contig;
u8 *iv;
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
int dst_sg_idx, qm_sg_ents;
int dst_sg_idx, qm_sg_ents, qm_sg_bytes;
struct qm_sg_entry *sg_table, *fd_sgt;
struct caam_drv_ctx *drv_ctx;
enum optype op_type = encrypt ? ENCRYPT : DECRYPT;
@ -1238,55 +1252,53 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
}
}
iv_dma = dma_map_single(qidev, req->info, ivsize, DMA_TO_DEVICE);
if (dma_mapping_error(qidev, iv_dma)) {
dev_err(qidev, "unable to map IV\n");
qm_sg_ents = 1 + mapped_src_nents;
dst_sg_idx = qm_sg_ents;
qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry);
if (unlikely(offsetof(struct ablkcipher_edesc, sgt) + qm_sg_bytes +
ivsize > CAAM_QI_MEMCACHE_SIZE)) {
dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n",
qm_sg_ents, ivsize);
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
0, 0, 0, 0);
return ERR_PTR(-ENOMEM);
}
if (mapped_src_nents == 1 &&
iv_dma + ivsize == sg_dma_address(req->src)) {
in_contig = true;
qm_sg_ents = 0;
} else {
in_contig = false;
qm_sg_ents = 1 + mapped_src_nents;
}
dst_sg_idx = qm_sg_ents;
qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) {
dev_err(qidev, "Insufficient S/G entries: %d > %zu\n",
qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG);
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
iv_dma, ivsize, op_type, 0, 0);
return ERR_PTR(-ENOMEM);
}
/* allocate space for base edesc and link tables */
/* allocate space for base edesc, link tables and IV */
edesc = qi_cache_alloc(GFP_DMA | flags);
if (unlikely(!edesc)) {
dev_err(qidev, "could not allocate extended descriptor\n");
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
iv_dma, ivsize, op_type, 0, 0);
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
0, 0, 0, 0);
return ERR_PTR(-ENOMEM);
}
/* Make sure IV is located in a DMAable area */
sg_table = &edesc->sgt[0];
iv = (u8 *)(sg_table + qm_sg_ents);
memcpy(iv, req->info, ivsize);
iv_dma = dma_map_single(qidev, iv, ivsize, DMA_TO_DEVICE);
if (dma_mapping_error(qidev, iv_dma)) {
dev_err(qidev, "unable to map IV\n");
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
0, 0, 0, 0);
qi_cache_free(edesc);
return ERR_PTR(-ENOMEM);
}
edesc->src_nents = src_nents;
edesc->dst_nents = dst_nents;
edesc->iv_dma = iv_dma;
sg_table = &edesc->sgt[0];
edesc->qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
edesc->qm_sg_bytes = qm_sg_bytes;
edesc->drv_req.app_ctx = req;
edesc->drv_req.cbk = ablkcipher_done;
edesc->drv_req.drv_ctx = drv_ctx;
if (!in_contig) {
dma_to_qm_sg_one(sg_table, iv_dma, ivsize, 0);
sg_to_qm_sg_last(req->src, mapped_src_nents, sg_table + 1, 0);
}
dma_to_qm_sg_one(sg_table, iv_dma, ivsize, 0);
sg_to_qm_sg_last(req->src, mapped_src_nents, sg_table + 1, 0);
if (mapped_dst_nents > 1)
sg_to_qm_sg_last(req->dst, mapped_dst_nents, sg_table +
@ -1304,20 +1316,12 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
fd_sgt = &edesc->drv_req.fd_sgt[0];
if (!in_contig)
dma_to_qm_sg_one_last_ext(&fd_sgt[1], edesc->qm_sg_dma,
ivsize + req->nbytes, 0);
else
dma_to_qm_sg_one_last(&fd_sgt[1], iv_dma, ivsize + req->nbytes,
0);
dma_to_qm_sg_one_last_ext(&fd_sgt[1], edesc->qm_sg_dma,
ivsize + req->nbytes, 0);
if (req->src == req->dst) {
if (!in_contig)
dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma +
sizeof(*sg_table), req->nbytes, 0);
else
dma_to_qm_sg_one(&fd_sgt[0], sg_dma_address(req->src),
req->nbytes, 0);
dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma +
sizeof(*sg_table), req->nbytes, 0);
} else if (mapped_dst_nents > 1) {
dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma + dst_sg_idx *
sizeof(*sg_table), req->nbytes, 0);
@ -1341,10 +1345,10 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
int src_nents, mapped_src_nents, dst_nents, mapped_dst_nents;
struct ablkcipher_edesc *edesc;
dma_addr_t iv_dma;
bool out_contig;
u8 *iv;
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
struct qm_sg_entry *sg_table, *fd_sgt;
int dst_sg_idx, qm_sg_ents;
int dst_sg_idx, qm_sg_ents, qm_sg_bytes;
struct caam_drv_ctx *drv_ctx;
drv_ctx = get_drv_ctx(ctx, GIVENCRYPT);
@ -1392,46 +1396,45 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
mapped_dst_nents = src_nents;
}
iv_dma = dma_map_single(qidev, creq->giv, ivsize, DMA_FROM_DEVICE);
if (dma_mapping_error(qidev, iv_dma)) {
dev_err(qidev, "unable to map IV\n");
qm_sg_ents = mapped_src_nents > 1 ? mapped_src_nents : 0;
dst_sg_idx = qm_sg_ents;
qm_sg_ents += 1 + mapped_dst_nents;
qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry);
if (unlikely(offsetof(struct ablkcipher_edesc, sgt) + qm_sg_bytes +
ivsize > CAAM_QI_MEMCACHE_SIZE)) {
dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n",
qm_sg_ents, ivsize);
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
0, 0, 0, 0);
return ERR_PTR(-ENOMEM);
}
qm_sg_ents = mapped_src_nents > 1 ? mapped_src_nents : 0;
dst_sg_idx = qm_sg_ents;
if (mapped_dst_nents == 1 &&
iv_dma + ivsize == sg_dma_address(req->dst)) {
out_contig = true;
} else {
out_contig = false;
qm_sg_ents += 1 + mapped_dst_nents;
}
if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) {
dev_err(qidev, "Insufficient S/G entries: %d > %zu\n",
qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG);
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
iv_dma, ivsize, GIVENCRYPT, 0, 0);
return ERR_PTR(-ENOMEM);
}
/* allocate space for base edesc and link tables */
/* allocate space for base edesc, link tables and IV */
edesc = qi_cache_alloc(GFP_DMA | flags);
if (!edesc) {
dev_err(qidev, "could not allocate extended descriptor\n");
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
iv_dma, ivsize, GIVENCRYPT, 0, 0);
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
0, 0, 0, 0);
return ERR_PTR(-ENOMEM);
}
/* Make sure IV is located in a DMAable area */
sg_table = &edesc->sgt[0];
iv = (u8 *)(sg_table + qm_sg_ents);
iv_dma = dma_map_single(qidev, iv, ivsize, DMA_FROM_DEVICE);
if (dma_mapping_error(qidev, iv_dma)) {
dev_err(qidev, "unable to map IV\n");
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
0, 0, 0, 0);
qi_cache_free(edesc);
return ERR_PTR(-ENOMEM);
}
edesc->src_nents = src_nents;
edesc->dst_nents = dst_nents;
edesc->iv_dma = iv_dma;
sg_table = &edesc->sgt[0];
edesc->qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
edesc->qm_sg_bytes = qm_sg_bytes;
edesc->drv_req.app_ctx = req;
edesc->drv_req.cbk = ablkcipher_done;
edesc->drv_req.drv_ctx = drv_ctx;
@ -1439,11 +1442,9 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
if (mapped_src_nents > 1)
sg_to_qm_sg_last(req->src, mapped_src_nents, sg_table, 0);
if (!out_contig) {
dma_to_qm_sg_one(sg_table + dst_sg_idx, iv_dma, ivsize, 0);
sg_to_qm_sg_last(req->dst, mapped_dst_nents, sg_table +
dst_sg_idx + 1, 0);
}
dma_to_qm_sg_one(sg_table + dst_sg_idx, iv_dma, ivsize, 0);
sg_to_qm_sg_last(req->dst, mapped_dst_nents, sg_table + dst_sg_idx + 1,
0);
edesc->qm_sg_dma = dma_map_single(qidev, sg_table, edesc->qm_sg_bytes,
DMA_TO_DEVICE);
@ -1464,13 +1465,8 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
dma_to_qm_sg_one(&fd_sgt[1], sg_dma_address(req->src),
req->nbytes, 0);
if (!out_contig)
dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma + dst_sg_idx *
sizeof(*sg_table), ivsize + req->nbytes,
0);
else
dma_to_qm_sg_one(&fd_sgt[0], sg_dma_address(req->dst),
ivsize + req->nbytes, 0);
dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma + dst_sg_idx *
sizeof(*sg_table), ivsize + req->nbytes, 0);
return edesc;
}
@ -1480,6 +1476,7 @@ static inline int ablkcipher_crypt(struct ablkcipher_request *req, bool encrypt)
struct ablkcipher_edesc *edesc;
struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
int ret;
if (unlikely(caam_congested))
@ -1490,6 +1487,14 @@ static inline int ablkcipher_crypt(struct ablkcipher_request *req, bool encrypt)
if (IS_ERR(edesc))
return PTR_ERR(edesc);
/*
* The crypto API expects us to set the IV (req->info) to the last
* ciphertext block.
*/
if (!encrypt)
scatterwalk_map_and_copy(req->info, req->src, req->nbytes -
ivsize, ivsize, 0);
ret = caam_qi_enqueue(ctx->qidev, &edesc->drv_req);
if (!ret) {
ret = -EINPROGRESS;

View File

@ -66,7 +66,7 @@ static void rsa_priv_f2_unmap(struct device *dev, struct rsa_edesc *edesc,
struct caam_rsa_key *key = &ctx->key;
struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2;
size_t p_sz = key->p_sz;
size_t q_sz = key->p_sz;
size_t q_sz = key->q_sz;
dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE);
dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE);
@ -83,7 +83,7 @@ static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc,
struct caam_rsa_key *key = &ctx->key;
struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3;
size_t p_sz = key->p_sz;
size_t q_sz = key->p_sz;
size_t q_sz = key->q_sz;
dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE);
dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE);
@ -166,18 +166,71 @@ static void rsa_priv_f3_done(struct device *dev, u32 *desc, u32 err,
akcipher_request_complete(req, err);
}
static int caam_rsa_count_leading_zeros(struct scatterlist *sgl,
unsigned int nbytes,
unsigned int flags)
{
struct sg_mapping_iter miter;
int lzeros, ents;
unsigned int len;
unsigned int tbytes = nbytes;
const u8 *buff;
ents = sg_nents_for_len(sgl, nbytes);
if (ents < 0)
return ents;
sg_miter_start(&miter, sgl, ents, SG_MITER_FROM_SG | flags);
lzeros = 0;
len = 0;
while (nbytes > 0) {
while (len && !*buff) {
lzeros++;
len--;
buff++;
}
if (len && *buff)
break;
sg_miter_next(&miter);
buff = miter.addr;
len = miter.length;
nbytes -= lzeros;
lzeros = 0;
}
miter.consumed = lzeros;
sg_miter_stop(&miter);
nbytes -= lzeros;
return tbytes - nbytes;
}
static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
size_t desclen)
{
struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
struct device *dev = ctx->dev;
struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
struct rsa_edesc *edesc;
gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
GFP_KERNEL : GFP_ATOMIC;
int sg_flags = (flags == GFP_ATOMIC) ? SG_MITER_ATOMIC : 0;
int sgc;
int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes;
int src_nents, dst_nents;
int lzeros;
lzeros = caam_rsa_count_leading_zeros(req->src, req->src_len, sg_flags);
if (lzeros < 0)
return ERR_PTR(lzeros);
req->src_len -= lzeros;
req->src = scatterwalk_ffwd(req_ctx->src, req->src, lzeros);
src_nents = sg_nents_for_len(req->src, req->src_len);
dst_nents = sg_nents_for_len(req->dst, req->dst_len);
@ -344,7 +397,7 @@ static int set_rsa_priv_f2_pdb(struct akcipher_request *req,
struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2;
int sec4_sg_index = 0;
size_t p_sz = key->p_sz;
size_t q_sz = key->p_sz;
size_t q_sz = key->q_sz;
pdb->d_dma = dma_map_single(dev, key->d, key->d_sz, DMA_TO_DEVICE);
if (dma_mapping_error(dev, pdb->d_dma)) {
@ -419,7 +472,7 @@ static int set_rsa_priv_f3_pdb(struct akcipher_request *req,
struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3;
int sec4_sg_index = 0;
size_t p_sz = key->p_sz;
size_t q_sz = key->p_sz;
size_t q_sz = key->q_sz;
pdb->p_dma = dma_map_single(dev, key->p, p_sz, DMA_TO_DEVICE);
if (dma_mapping_error(dev, pdb->p_dma)) {
@ -730,19 +783,12 @@ static u8 *caam_read_rsa_crt(const u8 *ptr, size_t nbytes, size_t dstlen)
*/
static inline u8 *caam_read_raw_data(const u8 *buf, size_t *nbytes)
{
u8 *val;
caam_rsa_drop_leading_zeros(&buf, nbytes);
if (!*nbytes)
return NULL;
val = kzalloc(*nbytes, GFP_DMA | GFP_KERNEL);
if (!val)
return NULL;
memcpy(val, buf, *nbytes);
return val;
return kmemdup(buf, *nbytes, GFP_DMA | GFP_KERNEL);
}
static int caam_rsa_check_key_length(unsigned int len)
@ -953,6 +999,7 @@ static struct akcipher_alg caam_rsa = {
.max_size = caam_rsa_max_size,
.init = caam_rsa_init_tfm,
.exit = caam_rsa_exit_tfm,
.reqsize = sizeof(struct caam_rsa_req_ctx),
.base = {
.cra_name = "rsa",
.cra_driver_name = "rsa-caam",

View File

@ -95,6 +95,14 @@ struct caam_rsa_ctx {
struct device *dev;
};
/**
* caam_rsa_req_ctx - per request context.
* @src: input scatterlist (stripped of leading zeros)
*/
struct caam_rsa_req_ctx {
struct scatterlist src[2];
};
/**
* rsa_edesc - s/w-extended rsa descriptor
* @src_nents : number of segments in input scatterlist

View File

@ -322,9 +322,9 @@ static int caam_remove(struct platform_device *pdev)
/*
* De-initialize RNG state handles initialized by this driver.
* In case of DPAA 2.x, RNG is managed by MC firmware.
* In case of SoCs with Management Complex, RNG is managed by MC f/w.
*/
if (!caam_dpaa2 && ctrlpriv->rng4_sh_init)
if (!ctrlpriv->mc_en && ctrlpriv->rng4_sh_init)
deinstantiate_rng(ctrldev, ctrlpriv->rng4_sh_init);
/* Shut down debug views */
@ -396,11 +396,56 @@ start_rng:
clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, RTMCTL_SAMP_MODE_RAW_ES_SC);
}
static int caam_get_era_from_hw(struct caam_ctrl __iomem *ctrl)
{
static const struct {
u16 ip_id;
u8 maj_rev;
u8 era;
} id[] = {
{0x0A10, 1, 1},
{0x0A10, 2, 2},
{0x0A12, 1, 3},
{0x0A14, 1, 3},
{0x0A14, 2, 4},
{0x0A16, 1, 4},
{0x0A10, 3, 4},
{0x0A11, 1, 4},
{0x0A18, 1, 4},
{0x0A11, 2, 5},
{0x0A12, 2, 5},
{0x0A13, 1, 5},
{0x0A1C, 1, 5}
};
u32 ccbvid, id_ms;
u8 maj_rev, era;
u16 ip_id;
int i;
ccbvid = rd_reg32(&ctrl->perfmon.ccb_id);
era = (ccbvid & CCBVID_ERA_MASK) >> CCBVID_ERA_SHIFT;
if (era) /* This is '0' prior to CAAM ERA-6 */
return era;
id_ms = rd_reg32(&ctrl->perfmon.caam_id_ms);
ip_id = (id_ms & SECVID_MS_IPID_MASK) >> SECVID_MS_IPID_SHIFT;
maj_rev = (id_ms & SECVID_MS_MAJ_REV_MASK) >> SECVID_MS_MAJ_REV_SHIFT;
for (i = 0; i < ARRAY_SIZE(id); i++)
if (id[i].ip_id == ip_id && id[i].maj_rev == maj_rev)
return id[i].era;
return -ENOTSUPP;
}
/**
* caam_get_era() - Return the ERA of the SEC on SoC, based
* on "sec-era" propery in the DTS. This property is updated by u-boot.
* on "sec-era" optional property in the DTS. This property is updated
* by u-boot.
* In case this property is not passed an attempt to retrieve the CAAM
* era via register reads will be made.
**/
int caam_get_era(void)
static int caam_get_era(struct caam_ctrl __iomem *ctrl)
{
struct device_node *caam_node;
int ret;
@ -410,9 +455,11 @@ int caam_get_era(void)
ret = of_property_read_u32(caam_node, "fsl,sec-era", &prop);
of_node_put(caam_node);
return ret ? -ENOTSUPP : prop;
if (!ret)
return prop;
else
return caam_get_era_from_hw(ctrl);
}
EXPORT_SYMBOL(caam_get_era);
static const struct of_device_id caam_match[] = {
{
@ -571,11 +618,15 @@ static int caam_probe(struct platform_device *pdev)
/*
* Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel,
* long pointers in master configuration register.
* In case of DPAA 2.x, Management Complex firmware performs
* In case of SoCs with Management Complex, MC f/w performs
* the configuration.
*/
caam_dpaa2 = !!(comp_params & CTPR_MS_DPAA2);
if (!caam_dpaa2)
np = of_find_compatible_node(NULL, NULL, "fsl,qoriq-mc");
ctrlpriv->mc_en = !!np;
of_node_put(np);
if (!ctrlpriv->mc_en)
clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK | MCFGR_LONG_PTR,
MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF |
MCFGR_WDENABLE | MCFGR_LARGE_BURST |
@ -623,7 +674,7 @@ static int caam_probe(struct platform_device *pdev)
goto iounmap_ctrl;
}
ctrlpriv->era = caam_get_era();
ctrlpriv->era = caam_get_era(ctrl);
ret = of_platform_populate(nprop, caam_match, NULL, dev);
if (ret) {
@ -686,9 +737,9 @@ static int caam_probe(struct platform_device *pdev)
/*
* If SEC has RNG version >= 4 and RNG state handle has not been
* already instantiated, do RNG instantiation
* In case of DPAA 2.x, RNG is managed by MC firmware.
* In case of SoCs with Management Complex, RNG is managed by MC f/w.
*/
if (!caam_dpaa2 &&
if (!ctrlpriv->mc_en &&
(cha_vid_ls & CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT >= 4) {
ctrlpriv->rng4_sh_init =
rd_reg32(&ctrl->r4tst[0].rdsta);
@ -757,9 +808,8 @@ static int caam_probe(struct platform_device *pdev)
/* Report "alive" for developer to see */
dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id,
ctrlpriv->era);
dev_info(dev, "job rings = %d, qi = %d, dpaa2 = %s\n",
ctrlpriv->total_jobrs, ctrlpriv->qi_present,
caam_dpaa2 ? "yes" : "no");
dev_info(dev, "job rings = %d, qi = %d\n",
ctrlpriv->total_jobrs, ctrlpriv->qi_present);
#ifdef CONFIG_DEBUG_FS
debugfs_create_file("rq_dequeued", S_IRUSR | S_IRGRP | S_IROTH,

View File

@ -9,8 +9,6 @@
#define CTRL_H
/* Prototypes for backend-level services exposed to APIs */
int caam_get_era(void);
extern bool caam_dpaa2;
#endif /* CTRL_H */

View File

@ -82,6 +82,7 @@ struct caam_drv_private {
*/
u8 total_jobrs; /* Total Job Rings in device */
u8 qi_present; /* Nonzero if QI present in device */
u8 mc_en; /* Nonzero if MC f/w is active */
int secvio_irq; /* Security violation interrupt number */
int virt_en; /* Virtualization enabled in CAAM */
int era; /* CAAM Era (internal HW revision) */

View File

@ -657,9 +657,8 @@ static int init_cgr(struct device *qidev)
{
int ret;
struct qm_mcc_initcgr opts;
const u64 cpus = *(u64 *)qman_affine_cpus();
const int num_cpus = hweight64(cpus);
const u64 val = num_cpus * MAX_RSP_FQ_BACKLOG_PER_CPU;
const u64 val = (u64)cpumask_weight(qman_affine_cpus()) *
MAX_RSP_FQ_BACKLOG_PER_CPU;
ret = qman_alloc_cgrid(&qipriv.cgr.cgrid);
if (ret) {

View File

@ -312,11 +312,17 @@ struct caam_perfmon {
/* Component Instantiation Parameters fe0-fff */
u32 rtic_id; /* RVID - RTIC Version ID */
#define CCBVID_ERA_MASK 0xff000000
#define CCBVID_ERA_SHIFT 24
u32 ccb_id; /* CCBVID - CCB Version ID */
u32 cha_id_ms; /* CHAVID - CHA Version ID Most Significant*/
u32 cha_id_ls; /* CHAVID - CHA Version ID Least Significant*/
u32 cha_num_ms; /* CHANUM - CHA Number Most Significant */
u32 cha_num_ls; /* CHANUM - CHA Number Least Significant*/
#define SECVID_MS_IPID_MASK 0xffff0000
#define SECVID_MS_IPID_SHIFT 16
#define SECVID_MS_MAJ_REV_MASK 0x0000ff00
#define SECVID_MS_MAJ_REV_SHIFT 8
u32 caam_id_ms; /* CAAMVID - CAAM Version ID MS */
u32 caam_id_ls; /* CAAMVID - CAAM Version ID LS */
};

View File

@ -46,8 +46,10 @@
#ifndef __COMMON_H__
#define __COMMON_H__
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
@ -149,6 +151,25 @@ struct zip_operation {
u32 sizeofzops;
};
static inline int zip_poll_result(union zip_zres_s *result)
{
int retries = 1000;
while (!result->s.compcode) {
if (!--retries) {
pr_err("ZIP ERR: request timed out");
return -ETIMEDOUT;
}
udelay(10);
/*
* Force re-reading of compcode which is updated
* by the ZIP coprocessor.
*/
rmb();
}
return 0;
}
/* error messages */
#define zip_err(fmt, args...) pr_err("ZIP ERR:%s():%d: " \
fmt "\n", __func__, __LINE__, ## args)

View File

@ -124,7 +124,7 @@ int zip_compress(const u8 *src, unsigned int slen,
struct zip_kernel_ctx *zip_ctx)
{
struct zip_operation *zip_ops = NULL;
struct zip_state zip_state;
struct zip_state *zip_state;
struct zip_device *zip = NULL;
int ret;
@ -135,20 +135,23 @@ int zip_compress(const u8 *src, unsigned int slen,
if (!zip)
return -ENODEV;
memset(&zip_state, 0, sizeof(struct zip_state));
zip_state = kzalloc(sizeof(*zip_state), GFP_ATOMIC);
if (!zip_state)
return -ENOMEM;
zip_ops = &zip_ctx->zip_comp;
zip_ops->input_len = slen;
zip_ops->output_len = *dlen;
memcpy(zip_ops->input, src, slen);
ret = zip_deflate(zip_ops, &zip_state, zip);
ret = zip_deflate(zip_ops, zip_state, zip);
if (!ret) {
*dlen = zip_ops->output_len;
memcpy(dst, zip_ops->output, *dlen);
}
kfree(zip_state);
return ret;
}
@ -157,7 +160,7 @@ int zip_decompress(const u8 *src, unsigned int slen,
struct zip_kernel_ctx *zip_ctx)
{
struct zip_operation *zip_ops = NULL;
struct zip_state zip_state;
struct zip_state *zip_state;
struct zip_device *zip = NULL;
int ret;
@ -168,7 +171,10 @@ int zip_decompress(const u8 *src, unsigned int slen,
if (!zip)
return -ENODEV;
memset(&zip_state, 0, sizeof(struct zip_state));
zip_state = kzalloc(sizeof(*zip_state), GFP_ATOMIC);
if (!zip_state)
return -ENOMEM;
zip_ops = &zip_ctx->zip_decomp;
memcpy(zip_ops->input, src, slen);
@ -179,13 +185,13 @@ int zip_decompress(const u8 *src, unsigned int slen,
zip_ops->input_len = slen;
zip_ops->output_len = *dlen;
ret = zip_inflate(zip_ops, &zip_state, zip);
ret = zip_inflate(zip_ops, zip_state, zip);
if (!ret) {
*dlen = zip_ops->output_len;
memcpy(dst, zip_ops->output, *dlen);
}
kfree(zip_state);
return ret;
}

View File

@ -129,8 +129,8 @@ int zip_deflate(struct zip_operation *zip_ops, struct zip_state *s,
/* Stats update for compression requests submitted */
atomic64_inc(&zip_dev->stats.comp_req_submit);
while (!result_ptr->s.compcode)
continue;
/* Wait for completion or error */
zip_poll_result(result_ptr);
/* Stats update for compression requests completed */
atomic64_inc(&zip_dev->stats.comp_req_complete);

View File

@ -87,12 +87,12 @@ u32 zip_load_instr(union zip_inst_s *instr,
* Distribute the instructions between the enabled queues based on
* the CPU id.
*/
if (smp_processor_id() % 2 == 0)
if (raw_smp_processor_id() % 2 == 0)
queue = 0;
else
queue = 1;
zip_dbg("CPU Core: %d Queue number:%d", smp_processor_id(), queue);
zip_dbg("CPU Core: %d Queue number:%d", raw_smp_processor_id(), queue);
/* Take cmd buffer lock */
spin_lock(&zip_dev->iq[queue].lock);

View File

@ -143,8 +143,8 @@ int zip_inflate(struct zip_operation *zip_ops, struct zip_state *s,
/* Decompression requests submitted stats update */
atomic64_inc(&zip_dev->stats.decomp_req_submit);
while (!result_ptr->s.compcode)
continue;
/* Wait for completion or error */
zip_poll_result(result_ptr);
/* Decompression requests completed stats update */
atomic64_inc(&zip_dev->stats.decomp_req_complete);

View File

@ -113,7 +113,7 @@ struct zip_device *zip_get_device(int node)
*/
int zip_get_node_id(void)
{
return cpu_to_node(smp_processor_id());
return cpu_to_node(raw_smp_processor_id());
}
/* Initializes the ZIP h/w sub-system */
@ -469,6 +469,8 @@ static int zip_show_stats(struct seq_file *s, void *unused)
struct zip_stats *st;
for (index = 0; index < MAX_ZIP_DEVICES; index++) {
u64 pending = 0;
if (zip_dev[index]) {
zip = zip_dev[index];
st = &zip->stats;
@ -476,16 +478,15 @@ static int zip_show_stats(struct seq_file *s, void *unused)
/* Get all the pending requests */
for (q = 0; q < ZIP_NUM_QUEUES; q++) {
val = zip_reg_read((zip->reg_base +
ZIP_DBG_COREX_STA(q)));
val = (val >> 32);
val = val & 0xffffff;
atomic64_add(val, &st->pending_req);
ZIP_DBG_QUEX_STA(q)));
pending += val >> 32 & 0xffffff;
}
avg_chunk = (atomic64_read(&st->comp_in_bytes) /
atomic64_read(&st->comp_req_complete));
avg_cr = (atomic64_read(&st->comp_in_bytes) /
atomic64_read(&st->comp_out_bytes));
val = atomic64_read(&st->comp_req_complete);
avg_chunk = (val) ? atomic64_read(&st->comp_in_bytes) / val : 0;
val = atomic64_read(&st->comp_out_bytes);
avg_cr = (val) ? atomic64_read(&st->comp_in_bytes) / val : 0;
seq_printf(s, " ZIP Device %d Stats\n"
"-----------------------------------\n"
"Comp Req Submitted : \t%lld\n"
@ -513,10 +514,7 @@ static int zip_show_stats(struct seq_file *s, void *unused)
(u64)atomic64_read(&st->decomp_in_bytes),
(u64)atomic64_read(&st->decomp_out_bytes),
(u64)atomic64_read(&st->decomp_bad_reqs),
(u64)atomic64_read(&st->pending_req));
/* Reset pending requests count */
atomic64_set(&st->pending_req, 0);
pending);
}
}
return 0;

View File

@ -74,7 +74,6 @@ struct zip_stats {
atomic64_t comp_req_complete;
atomic64_t decomp_req_submit;
atomic64_t decomp_req_complete;
atomic64_t pending_req;
atomic64_t comp_in_bytes;
atomic64_t comp_out_bytes;
atomic64_t decomp_in_bytes;

View File

@ -443,7 +443,7 @@ union zip_corex_bist_status {
static inline u64 ZIP_COREX_BIST_STATUS(u64 param1)
{
if (((param1 <= 1)))
if (param1 <= 1)
return 0x0520ull + (param1 & 1) * 0x8ull;
pr_err("ZIP_COREX_BIST_STATUS: %llu\n", param1);
return 0;
@ -537,7 +537,7 @@ union zip_dbg_corex_inst {
static inline u64 ZIP_DBG_COREX_INST(u64 param1)
{
if (((param1 <= 1)))
if (param1 <= 1)
return 0x0640ull + (param1 & 1) * 0x8ull;
pr_err("ZIP_DBG_COREX_INST: %llu\n", param1);
return 0;
@ -568,7 +568,7 @@ union zip_dbg_corex_sta {
static inline u64 ZIP_DBG_COREX_STA(u64 param1)
{
if (((param1 <= 1)))
if (param1 <= 1)
return 0x0680ull + (param1 & 1) * 0x8ull;
pr_err("ZIP_DBG_COREX_STA: %llu\n", param1);
return 0;
@ -599,7 +599,7 @@ union zip_dbg_quex_sta {
static inline u64 ZIP_DBG_QUEX_STA(u64 param1)
{
if (((param1 <= 7)))
if (param1 <= 7)
return 0x1800ull + (param1 & 7) * 0x8ull;
pr_err("ZIP_DBG_QUEX_STA: %llu\n", param1);
return 0;
@ -817,7 +817,7 @@ union zip_msix_pbax {
static inline u64 ZIP_MSIX_PBAX(u64 param1)
{
if (((param1 == 0)))
if (param1 == 0)
return 0x0000838000FF0000ull;
pr_err("ZIP_MSIX_PBAX: %llu\n", param1);
return 0;
@ -846,7 +846,7 @@ union zip_msix_vecx_addr {
static inline u64 ZIP_MSIX_VECX_ADDR(u64 param1)
{
if (((param1 <= 17)))
if (param1 <= 17)
return 0x0000838000F00000ull + (param1 & 31) * 0x10ull;
pr_err("ZIP_MSIX_VECX_ADDR: %llu\n", param1);
return 0;
@ -875,7 +875,7 @@ union zip_msix_vecx_ctl {
static inline u64 ZIP_MSIX_VECX_CTL(u64 param1)
{
if (((param1 <= 17)))
if (param1 <= 17)
return 0x0000838000F00008ull + (param1 & 31) * 0x10ull;
pr_err("ZIP_MSIX_VECX_CTL: %llu\n", param1);
return 0;
@ -900,7 +900,7 @@ union zip_quex_done {
static inline u64 ZIP_QUEX_DONE(u64 param1)
{
if (((param1 <= 7)))
if (param1 <= 7)
return 0x2000ull + (param1 & 7) * 0x8ull;
pr_err("ZIP_QUEX_DONE: %llu\n", param1);
return 0;
@ -925,7 +925,7 @@ union zip_quex_done_ack {
static inline u64 ZIP_QUEX_DONE_ACK(u64 param1)
{
if (((param1 <= 7)))
if (param1 <= 7)
return 0x2200ull + (param1 & 7) * 0x8ull;
pr_err("ZIP_QUEX_DONE_ACK: %llu\n", param1);
return 0;
@ -950,7 +950,7 @@ union zip_quex_done_ena_w1c {
static inline u64 ZIP_QUEX_DONE_ENA_W1C(u64 param1)
{
if (((param1 <= 7)))
if (param1 <= 7)
return 0x2600ull + (param1 & 7) * 0x8ull;
pr_err("ZIP_QUEX_DONE_ENA_W1C: %llu\n", param1);
return 0;
@ -975,7 +975,7 @@ union zip_quex_done_ena_w1s {
static inline u64 ZIP_QUEX_DONE_ENA_W1S(u64 param1)
{
if (((param1 <= 7)))
if (param1 <= 7)
return 0x2400ull + (param1 & 7) * 0x8ull;
pr_err("ZIP_QUEX_DONE_ENA_W1S: %llu\n", param1);
return 0;
@ -1004,7 +1004,7 @@ union zip_quex_done_wait {
static inline u64 ZIP_QUEX_DONE_WAIT(u64 param1)
{
if (((param1 <= 7)))
if (param1 <= 7)
return 0x2800ull + (param1 & 7) * 0x8ull;
pr_err("ZIP_QUEX_DONE_WAIT: %llu\n", param1);
return 0;
@ -1029,7 +1029,7 @@ union zip_quex_doorbell {
static inline u64 ZIP_QUEX_DOORBELL(u64 param1)
{
if (((param1 <= 7)))
if (param1 <= 7)
return 0x4000ull + (param1 & 7) * 0x8ull;
pr_err("ZIP_QUEX_DOORBELL: %llu\n", param1);
return 0;
@ -1058,7 +1058,7 @@ union zip_quex_err_ena_w1c {
static inline u64 ZIP_QUEX_ERR_ENA_W1C(u64 param1)
{
if (((param1 <= 7)))
if (param1 <= 7)
return 0x3600ull + (param1 & 7) * 0x8ull;
pr_err("ZIP_QUEX_ERR_ENA_W1C: %llu\n", param1);
return 0;
@ -1087,7 +1087,7 @@ union zip_quex_err_ena_w1s {
static inline u64 ZIP_QUEX_ERR_ENA_W1S(u64 param1)
{
if (((param1 <= 7)))
if (param1 <= 7)
return 0x3400ull + (param1 & 7) * 0x8ull;
pr_err("ZIP_QUEX_ERR_ENA_W1S: %llu\n", param1);
return 0;
@ -1120,7 +1120,7 @@ union zip_quex_err_int {
static inline u64 ZIP_QUEX_ERR_INT(u64 param1)
{
if (((param1 <= 7)))
if (param1 <= 7)
return 0x3000ull + (param1 & 7) * 0x8ull;
pr_err("ZIP_QUEX_ERR_INT: %llu\n", param1);
return 0;
@ -1150,7 +1150,7 @@ union zip_quex_err_int_w1s {
static inline u64 ZIP_QUEX_ERR_INT_W1S(u64 param1)
{
if (((param1 <= 7)))
if (param1 <= 7)
return 0x3200ull + (param1 & 7) * 0x8ull;
pr_err("ZIP_QUEX_ERR_INT_W1S: %llu\n", param1);
return 0;
@ -1179,7 +1179,7 @@ union zip_quex_gcfg {
static inline u64 ZIP_QUEX_GCFG(u64 param1)
{
if (((param1 <= 7)))
if (param1 <= 7)
return 0x1A00ull + (param1 & 7) * 0x8ull;
pr_err("ZIP_QUEX_GCFG: %llu\n", param1);
return 0;
@ -1204,7 +1204,7 @@ union zip_quex_map {
static inline u64 ZIP_QUEX_MAP(u64 param1)
{
if (((param1 <= 7)))
if (param1 <= 7)
return 0x1400ull + (param1 & 7) * 0x8ull;
pr_err("ZIP_QUEX_MAP: %llu\n", param1);
return 0;
@ -1236,7 +1236,7 @@ union zip_quex_sbuf_addr {
static inline u64 ZIP_QUEX_SBUF_ADDR(u64 param1)
{
if (((param1 <= 7)))
if (param1 <= 7)
return 0x1000ull + (param1 & 7) * 0x8ull;
pr_err("ZIP_QUEX_SBUF_ADDR: %llu\n", param1);
return 0;
@ -1276,7 +1276,7 @@ union zip_quex_sbuf_ctl {
static inline u64 ZIP_QUEX_SBUF_CTL(u64 param1)
{
if (((param1 <= 7)))
if (param1 <= 7)
return 0x1200ull + (param1 & 7) * 0x8ull;
pr_err("ZIP_QUEX_SBUF_CTL: %llu\n", param1);
return 0;

View File

@ -22,11 +22,17 @@
#include <linux/delay.h>
#include <linux/hw_random.h>
#include <linux/ccp.h>
#include <linux/firmware.h>
#include "sp-dev.h"
#include "psp-dev.h"
#define SEV_VERSION_GREATER_OR_EQUAL(_maj, _min) \
((psp_master->api_major) >= _maj && \
(psp_master->api_minor) >= _min)
#define DEVICE_NAME "sev"
#define SEV_FW_FILE "amd/sev.fw"
static DEFINE_MUTEX(sev_cmd_mutex);
static struct sev_misc_dev *misc_dev;
@ -112,6 +118,8 @@ static int sev_cmd_buffer_len(int cmd)
case SEV_CMD_RECEIVE_UPDATE_DATA: return sizeof(struct sev_data_receive_update_data);
case SEV_CMD_RECEIVE_UPDATE_VMSA: return sizeof(struct sev_data_receive_update_vmsa);
case SEV_CMD_LAUNCH_UPDATE_SECRET: return sizeof(struct sev_data_launch_secret);
case SEV_CMD_DOWNLOAD_FIRMWARE: return sizeof(struct sev_data_download_firmware);
case SEV_CMD_GET_ID: return sizeof(struct sev_data_get_id);
default: return 0;
}
@ -378,6 +386,79 @@ void *psp_copy_user_blob(u64 __user uaddr, u32 len)
}
EXPORT_SYMBOL_GPL(psp_copy_user_blob);
static int sev_get_api_version(void)
{
struct sev_user_data_status *status;
int error, ret;
status = &psp_master->status_cmd_buf;
ret = sev_platform_status(status, &error);
if (ret) {
dev_err(psp_master->dev,
"SEV: failed to get status. Error: %#x\n", error);
return 1;
}
psp_master->api_major = status->api_major;
psp_master->api_minor = status->api_minor;
psp_master->build = status->build;
return 0;
}
/* Don't fail if SEV FW couldn't be updated. Continue with existing SEV FW */
static int sev_update_firmware(struct device *dev)
{
struct sev_data_download_firmware *data;
const struct firmware *firmware;
int ret, error, order;
struct page *p;
u64 data_size;
ret = request_firmware(&firmware, SEV_FW_FILE, dev);
if (ret < 0)
return -1;
/*
* SEV FW expects the physical address given to it to be 32
* byte aligned. Memory allocated has structure placed at the
* beginning followed by the firmware being passed to the SEV
* FW. Allocate enough memory for data structure + alignment
* padding + SEV FW.
*/
data_size = ALIGN(sizeof(struct sev_data_download_firmware), 32);
order = get_order(firmware->size + data_size);
p = alloc_pages(GFP_KERNEL, order);
if (!p) {
ret = -1;
goto fw_err;
}
/*
* Copy firmware data to a kernel allocated contiguous
* memory region.
*/
data = page_address(p);
memcpy(page_address(p) + data_size, firmware->data, firmware->size);
data->address = __psp_pa(page_address(p) + data_size);
data->len = firmware->size;
ret = sev_do_cmd(SEV_CMD_DOWNLOAD_FIRMWARE, data, &error);
if (ret)
dev_dbg(dev, "Failed to update SEV firmware: %#x\n", error);
else
dev_info(dev, "SEV firmware update successful\n");
__free_pages(p, order);
fw_err:
release_firmware(firmware);
return ret;
}
static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp)
{
struct sev_user_data_pek_cert_import input;
@ -430,6 +511,46 @@ e_free:
return ret;
}
static int sev_ioctl_do_get_id(struct sev_issue_cmd *argp)
{
struct sev_data_get_id *data;
u64 data_size, user_size;
void *id_blob, *mem;
int ret;
/* SEV GET_ID available from SEV API v0.16 and up */
if (!SEV_VERSION_GREATER_OR_EQUAL(0, 16))
return -ENOTSUPP;
/* SEV FW expects the buffer it fills with the ID to be
* 8-byte aligned. Memory allocated should be enough to
* hold data structure + alignment padding + memory
* where SEV FW writes the ID.
*/
data_size = ALIGN(sizeof(struct sev_data_get_id), 8);
user_size = sizeof(struct sev_user_data_get_id);
mem = kzalloc(data_size + user_size, GFP_KERNEL);
if (!mem)
return -ENOMEM;
data = mem;
id_blob = mem + data_size;
data->address = __psp_pa(id_blob);
data->len = user_size;
ret = __sev_do_cmd_locked(SEV_CMD_GET_ID, data, &argp->error);
if (!ret) {
if (copy_to_user((void __user *)argp->data, id_blob, data->len))
ret = -EFAULT;
}
kfree(mem);
return ret;
}
static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp)
{
struct sev_user_data_pdh_cert_export input;
@ -567,6 +688,9 @@ static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
case SEV_PDH_CERT_EXPORT:
ret = sev_ioctl_do_pdh_export(&input);
break;
case SEV_GET_ID:
ret = sev_ioctl_do_get_id(&input);
break;
default:
ret = -EINVAL;
goto out;
@ -750,7 +874,6 @@ EXPORT_SYMBOL_GPL(sev_issue_cmd_external_user);
void psp_pci_init(void)
{
struct sev_user_data_status *status;
struct sp_device *sp;
int error, rc;
@ -760,6 +883,13 @@ void psp_pci_init(void)
psp_master = sp->psp_data;
if (sev_get_api_version())
goto err;
if (SEV_VERSION_GREATER_OR_EQUAL(0, 15) &&
sev_update_firmware(psp_master->dev) == 0)
sev_get_api_version();
/* Initialize the platform */
rc = sev_platform_init(&error);
if (rc) {
@ -767,16 +897,9 @@ void psp_pci_init(void)
goto err;
}
/* Display SEV firmware version */
status = &psp_master->status_cmd_buf;
rc = sev_platform_status(status, &error);
if (rc) {
dev_err(sp->dev, "SEV: failed to get status error %#x\n", error);
goto err;
}
dev_info(sp->dev, "SEV API:%d.%d build:%d\n", psp_master->api_major,
psp_master->api_minor, psp_master->build);
dev_info(sp->dev, "SEV API:%d.%d build:%d\n", status->api_major,
status->api_minor, status->build);
return;
err:

View File

@ -78,6 +78,10 @@ struct psp_device {
struct sev_misc_dev *sev_misc;
struct sev_user_data_status status_cmd_buf;
struct sev_data_init init_cmd_buf;
u8 api_major;
u8 api_minor;
u8 build;
};
#endif /* __PSP_DEV_H */

View File

@ -42,6 +42,7 @@ struct cc_cipher_ctx {
int cipher_mode;
int flow_mode;
unsigned int flags;
bool hw_key;
struct cc_user_key_info user;
struct cc_hw_key_info hw;
struct crypto_shash *shash_tfm;
@ -49,6 +50,13 @@ struct cc_cipher_ctx {
static void cc_cipher_complete(struct device *dev, void *cc_req, int err);
static inline bool cc_is_hw_key(struct crypto_tfm *tfm)
{
struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm);
return ctx_p->hw_key;
}
static int validate_keys_sizes(struct cc_cipher_ctx *ctx_p, u32 size)
{
switch (ctx_p->flow_mode) {
@ -211,7 +219,7 @@ struct tdes_keys {
u8 key3[DES_KEY_SIZE];
};
static enum cc_hw_crypto_key hw_key_to_cc_hw_key(int slot_num)
static enum cc_hw_crypto_key cc_slot_to_hw_key(int slot_num)
{
switch (slot_num) {
case 0:
@ -226,6 +234,74 @@ static enum cc_hw_crypto_key hw_key_to_cc_hw_key(int slot_num)
return END_OF_KEYS;
}
static int cc_cipher_sethkey(struct crypto_skcipher *sktfm, const u8 *key,
unsigned int keylen)
{
struct crypto_tfm *tfm = crypto_skcipher_tfm(sktfm);
struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm);
struct device *dev = drvdata_to_dev(ctx_p->drvdata);
struct cc_hkey_info hki;
dev_dbg(dev, "Setting HW key in context @%p for %s. keylen=%u\n",
ctx_p, crypto_tfm_alg_name(tfm), keylen);
dump_byte_array("key", (u8 *)key, keylen);
/* STAT_PHASE_0: Init and sanity checks */
/* This check the size of the hardware key token */
if (keylen != sizeof(hki)) {
dev_err(dev, "Unsupported HW key size %d.\n", keylen);
crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
if (ctx_p->flow_mode != S_DIN_to_AES) {
dev_err(dev, "HW key not supported for non-AES flows\n");
return -EINVAL;
}
memcpy(&hki, key, keylen);
/* The real key len for crypto op is the size of the HW key
* referenced by the HW key slot, not the hardware key token
*/
keylen = hki.keylen;
if (validate_keys_sizes(ctx_p, keylen)) {
dev_err(dev, "Unsupported key size %d.\n", keylen);
crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
ctx_p->hw.key1_slot = cc_slot_to_hw_key(hki.hw_key1);
if (ctx_p->hw.key1_slot == END_OF_KEYS) {
dev_err(dev, "Unsupported hw key1 number (%d)\n", hki.hw_key1);
return -EINVAL;
}
if (ctx_p->cipher_mode == DRV_CIPHER_XTS ||
ctx_p->cipher_mode == DRV_CIPHER_ESSIV ||
ctx_p->cipher_mode == DRV_CIPHER_BITLOCKER) {
if (hki.hw_key1 == hki.hw_key2) {
dev_err(dev, "Illegal hw key numbers (%d,%d)\n",
hki.hw_key1, hki.hw_key2);
return -EINVAL;
}
ctx_p->hw.key2_slot = cc_slot_to_hw_key(hki.hw_key2);
if (ctx_p->hw.key2_slot == END_OF_KEYS) {
dev_err(dev, "Unsupported hw key2 number (%d)\n",
hki.hw_key2);
return -EINVAL;
}
}
ctx_p->keylen = keylen;
ctx_p->hw_key = true;
dev_dbg(dev, "cc_is_hw_key ret 0");
return 0;
}
static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key,
unsigned int keylen)
{
@ -250,44 +326,7 @@ static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key,
return -EINVAL;
}
if (cc_is_hw_key(tfm)) {
/* setting HW key slots */
struct arm_hw_key_info *hki = (struct arm_hw_key_info *)key;
if (ctx_p->flow_mode != S_DIN_to_AES) {
dev_err(dev, "HW key not supported for non-AES flows\n");
return -EINVAL;
}
ctx_p->hw.key1_slot = hw_key_to_cc_hw_key(hki->hw_key1);
if (ctx_p->hw.key1_slot == END_OF_KEYS) {
dev_err(dev, "Unsupported hw key1 number (%d)\n",
hki->hw_key1);
return -EINVAL;
}
if (ctx_p->cipher_mode == DRV_CIPHER_XTS ||
ctx_p->cipher_mode == DRV_CIPHER_ESSIV ||
ctx_p->cipher_mode == DRV_CIPHER_BITLOCKER) {
if (hki->hw_key1 == hki->hw_key2) {
dev_err(dev, "Illegal hw key numbers (%d,%d)\n",
hki->hw_key1, hki->hw_key2);
return -EINVAL;
}
ctx_p->hw.key2_slot =
hw_key_to_cc_hw_key(hki->hw_key2);
if (ctx_p->hw.key2_slot == END_OF_KEYS) {
dev_err(dev, "Unsupported hw key2 number (%d)\n",
hki->hw_key2);
return -EINVAL;
}
}
ctx_p->keylen = keylen;
dev_dbg(dev, "cc_is_hw_key ret 0");
return 0;
}
ctx_p->hw_key = false;
/*
* Verify DES weak keys
@ -734,6 +773,241 @@ static int cc_cipher_decrypt(struct skcipher_request *req)
/* Block cipher alg */
static const struct cc_alg_template skcipher_algs[] = {
{
.name = "xts(paes)",
.driver_name = "xts-paes-ccree",
.blocksize = AES_BLOCK_SIZE,
.template_skcipher = {
.setkey = cc_cipher_sethkey,
.encrypt = cc_cipher_encrypt,
.decrypt = cc_cipher_decrypt,
.min_keysize = CC_HW_KEY_SIZE,
.max_keysize = CC_HW_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
},
.cipher_mode = DRV_CIPHER_XTS,
.flow_mode = S_DIN_to_AES,
.min_hw_rev = CC_HW_REV_630,
},
{
.name = "xts512(paes)",
.driver_name = "xts-paes-du512-ccree",
.blocksize = AES_BLOCK_SIZE,
.template_skcipher = {
.setkey = cc_cipher_sethkey,
.encrypt = cc_cipher_encrypt,
.decrypt = cc_cipher_decrypt,
.min_keysize = CC_HW_KEY_SIZE,
.max_keysize = CC_HW_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
},
.cipher_mode = DRV_CIPHER_XTS,
.flow_mode = S_DIN_to_AES,
.data_unit = 512,
.min_hw_rev = CC_HW_REV_712,
},
{
.name = "xts4096(paes)",
.driver_name = "xts-paes-du4096-ccree",
.blocksize = AES_BLOCK_SIZE,
.template_skcipher = {
.setkey = cc_cipher_sethkey,
.encrypt = cc_cipher_encrypt,
.decrypt = cc_cipher_decrypt,
.min_keysize = CC_HW_KEY_SIZE,
.max_keysize = CC_HW_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
},
.cipher_mode = DRV_CIPHER_XTS,
.flow_mode = S_DIN_to_AES,
.data_unit = 4096,
.min_hw_rev = CC_HW_REV_712,
},
{
.name = "essiv(paes)",
.driver_name = "essiv-paes-ccree",
.blocksize = AES_BLOCK_SIZE,
.template_skcipher = {
.setkey = cc_cipher_sethkey,
.encrypt = cc_cipher_encrypt,
.decrypt = cc_cipher_decrypt,
.min_keysize = CC_HW_KEY_SIZE,
.max_keysize = CC_HW_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
},
.cipher_mode = DRV_CIPHER_ESSIV,
.flow_mode = S_DIN_to_AES,
.min_hw_rev = CC_HW_REV_712,
},
{
.name = "essiv512(paes)",
.driver_name = "essiv-paes-du512-ccree",
.blocksize = AES_BLOCK_SIZE,
.template_skcipher = {
.setkey = cc_cipher_sethkey,
.encrypt = cc_cipher_encrypt,
.decrypt = cc_cipher_decrypt,
.min_keysize = CC_HW_KEY_SIZE,
.max_keysize = CC_HW_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
},
.cipher_mode = DRV_CIPHER_ESSIV,
.flow_mode = S_DIN_to_AES,
.data_unit = 512,
.min_hw_rev = CC_HW_REV_712,
},
{
.name = "essiv4096(paes)",
.driver_name = "essiv-paes-du4096-ccree",
.blocksize = AES_BLOCK_SIZE,
.template_skcipher = {
.setkey = cc_cipher_sethkey,
.encrypt = cc_cipher_encrypt,
.decrypt = cc_cipher_decrypt,
.min_keysize = CC_HW_KEY_SIZE,
.max_keysize = CC_HW_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
},
.cipher_mode = DRV_CIPHER_ESSIV,
.flow_mode = S_DIN_to_AES,
.data_unit = 4096,
.min_hw_rev = CC_HW_REV_712,
},
{
.name = "bitlocker(paes)",
.driver_name = "bitlocker-paes-ccree",
.blocksize = AES_BLOCK_SIZE,
.template_skcipher = {
.setkey = cc_cipher_sethkey,
.encrypt = cc_cipher_encrypt,
.decrypt = cc_cipher_decrypt,
.min_keysize = CC_HW_KEY_SIZE,
.max_keysize = CC_HW_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
},
.cipher_mode = DRV_CIPHER_BITLOCKER,
.flow_mode = S_DIN_to_AES,
.min_hw_rev = CC_HW_REV_712,
},
{
.name = "bitlocker512(paes)",
.driver_name = "bitlocker-paes-du512-ccree",
.blocksize = AES_BLOCK_SIZE,
.template_skcipher = {
.setkey = cc_cipher_sethkey,
.encrypt = cc_cipher_encrypt,
.decrypt = cc_cipher_decrypt,
.min_keysize = CC_HW_KEY_SIZE,
.max_keysize = CC_HW_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
},
.cipher_mode = DRV_CIPHER_BITLOCKER,
.flow_mode = S_DIN_to_AES,
.data_unit = 512,
.min_hw_rev = CC_HW_REV_712,
},
{
.name = "bitlocker4096(paes)",
.driver_name = "bitlocker-paes-du4096-ccree",
.blocksize = AES_BLOCK_SIZE,
.template_skcipher = {
.setkey = cc_cipher_sethkey,
.encrypt = cc_cipher_encrypt,
.decrypt = cc_cipher_decrypt,
.min_keysize = CC_HW_KEY_SIZE,
.max_keysize = CC_HW_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
},
.cipher_mode = DRV_CIPHER_BITLOCKER,
.flow_mode = S_DIN_to_AES,
.data_unit = 4096,
.min_hw_rev = CC_HW_REV_712,
},
{
.name = "ecb(paes)",
.driver_name = "ecb-paes-ccree",
.blocksize = AES_BLOCK_SIZE,
.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
.template_skcipher = {
.setkey = cc_cipher_sethkey,
.encrypt = cc_cipher_encrypt,
.decrypt = cc_cipher_decrypt,
.min_keysize = CC_HW_KEY_SIZE,
.max_keysize = CC_HW_KEY_SIZE,
.ivsize = 0,
},
.cipher_mode = DRV_CIPHER_ECB,
.flow_mode = S_DIN_to_AES,
.min_hw_rev = CC_HW_REV_712,
},
{
.name = "cbc(paes)",
.driver_name = "cbc-paes-ccree",
.blocksize = AES_BLOCK_SIZE,
.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
.template_skcipher = {
.setkey = cc_cipher_sethkey,
.encrypt = cc_cipher_encrypt,
.decrypt = cc_cipher_decrypt,
.min_keysize = CC_HW_KEY_SIZE,
.max_keysize = CC_HW_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
},
.cipher_mode = DRV_CIPHER_CBC,
.flow_mode = S_DIN_to_AES,
.min_hw_rev = CC_HW_REV_712,
},
{
.name = "ofb(paes)",
.driver_name = "ofb-paes-ccree",
.blocksize = AES_BLOCK_SIZE,
.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
.template_skcipher = {
.setkey = cc_cipher_sethkey,
.encrypt = cc_cipher_encrypt,
.decrypt = cc_cipher_decrypt,
.min_keysize = CC_HW_KEY_SIZE,
.max_keysize = CC_HW_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
},
.cipher_mode = DRV_CIPHER_OFB,
.flow_mode = S_DIN_to_AES,
.min_hw_rev = CC_HW_REV_712,
},
{
.name = "cts1(cbc(paes))",
.driver_name = "cts1-cbc-paes-ccree",
.blocksize = AES_BLOCK_SIZE,
.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
.template_skcipher = {
.setkey = cc_cipher_sethkey,
.encrypt = cc_cipher_encrypt,
.decrypt = cc_cipher_decrypt,
.min_keysize = CC_HW_KEY_SIZE,
.max_keysize = CC_HW_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
},
.cipher_mode = DRV_CIPHER_CBC_CTS,
.flow_mode = S_DIN_to_AES,
.min_hw_rev = CC_HW_REV_712,
},
{
.name = "ctr(paes)",
.driver_name = "ctr-paes-ccree",
.blocksize = 1,
.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
.template_skcipher = {
.setkey = cc_cipher_sethkey,
.encrypt = cc_cipher_encrypt,
.decrypt = cc_cipher_decrypt,
.min_keysize = CC_HW_KEY_SIZE,
.max_keysize = CC_HW_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
},
.cipher_mode = DRV_CIPHER_CTR,
.flow_mode = S_DIN_to_AES,
.min_hw_rev = CC_HW_REV_712,
},
{
.name = "xts(aes)",
.driver_name = "xts-aes-ccree",

Some files were not shown because too many files have changed in this diff Show More