mirror of
https://github.com/torvalds/linux.git
synced 2024-12-12 22:23:55 +00:00
1d373d4e8e
This patch adds optimized implementations of AEGIS-128, AEGIS-128L, and AEGIS-256, utilizing the AES-NI and SSE2 x86 extensions. Signed-off-by: Ondrej Mosnacek <omosnacek@gmail.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
826 lines
13 KiB
ArmAsm
826 lines
13 KiB
ArmAsm
/*
|
|
* AES-NI + SSE2 implementation of AEGIS-128L
|
|
*
|
|
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
|
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License version 2 as published
|
|
* by the Free Software Foundation.
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/frame.h>
|
|
|
|
#define STATE0 %xmm0
|
|
#define STATE1 %xmm1
|
|
#define STATE2 %xmm2
|
|
#define STATE3 %xmm3
|
|
#define STATE4 %xmm4
|
|
#define STATE5 %xmm5
|
|
#define STATE6 %xmm6
|
|
#define STATE7 %xmm7
|
|
#define MSG0 %xmm8
|
|
#define MSG1 %xmm9
|
|
#define T0 %xmm10
|
|
#define T1 %xmm11
|
|
#define T2 %xmm12
|
|
#define T3 %xmm13
|
|
|
|
#define STATEP %rdi
|
|
#define LEN %rsi
|
|
#define SRC %rdx
|
|
#define DST %rcx
|
|
|
|
.section .rodata.cst16.aegis128l_const, "aM", @progbits, 32
|
|
.align 16
|
|
.Laegis128l_const_0:
|
|
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
|
|
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
|
|
.Laegis128l_const_1:
|
|
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
|
|
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
|
|
|
|
.section .rodata.cst16.aegis128l_counter, "aM", @progbits, 16
|
|
.align 16
|
|
.Laegis128l_counter0:
|
|
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
|
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
|
.Laegis128l_counter1:
|
|
.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
|
|
.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
|
|
|
|
.text
|
|
|
|
/*
|
|
* __load_partial: internal ABI
|
|
* input:
|
|
* LEN - bytes
|
|
* SRC - src
|
|
* output:
|
|
* MSG0 - first message block
|
|
* MSG1 - second message block
|
|
* changed:
|
|
* T0
|
|
* %r8
|
|
* %r9
|
|
*/
|
|
__load_partial:
|
|
xor %r9, %r9
|
|
pxor MSG0, MSG0
|
|
pxor MSG1, MSG1
|
|
|
|
mov LEN, %r8
|
|
and $0x1, %r8
|
|
jz .Lld_partial_1
|
|
|
|
mov LEN, %r8
|
|
and $0x1E, %r8
|
|
add SRC, %r8
|
|
mov (%r8), %r9b
|
|
|
|
.Lld_partial_1:
|
|
mov LEN, %r8
|
|
and $0x2, %r8
|
|
jz .Lld_partial_2
|
|
|
|
mov LEN, %r8
|
|
and $0x1C, %r8
|
|
add SRC, %r8
|
|
shl $0x10, %r9
|
|
mov (%r8), %r9w
|
|
|
|
.Lld_partial_2:
|
|
mov LEN, %r8
|
|
and $0x4, %r8
|
|
jz .Lld_partial_4
|
|
|
|
mov LEN, %r8
|
|
and $0x18, %r8
|
|
add SRC, %r8
|
|
shl $32, %r9
|
|
mov (%r8), %r8d
|
|
xor %r8, %r9
|
|
|
|
.Lld_partial_4:
|
|
movq %r9, MSG0
|
|
|
|
mov LEN, %r8
|
|
and $0x8, %r8
|
|
jz .Lld_partial_8
|
|
|
|
mov LEN, %r8
|
|
and $0x10, %r8
|
|
add SRC, %r8
|
|
pslldq $8, MSG0
|
|
movq (%r8), T0
|
|
pxor T0, MSG0
|
|
|
|
.Lld_partial_8:
|
|
mov LEN, %r8
|
|
and $0x10, %r8
|
|
jz .Lld_partial_16
|
|
|
|
movdqa MSG0, MSG1
|
|
movdqu (SRC), MSG0
|
|
|
|
.Lld_partial_16:
|
|
ret
|
|
ENDPROC(__load_partial)
|
|
|
|
/*
|
|
* __store_partial: internal ABI
|
|
* input:
|
|
* LEN - bytes
|
|
* DST - dst
|
|
* output:
|
|
* T0 - first message block
|
|
* T1 - second message block
|
|
* changed:
|
|
* %r8
|
|
* %r9
|
|
* %r10
|
|
*/
|
|
__store_partial:
|
|
mov LEN, %r8
|
|
mov DST, %r9
|
|
|
|
cmp $16, %r8
|
|
jl .Lst_partial_16
|
|
|
|
movdqu T0, (%r9)
|
|
movdqa T1, T0
|
|
|
|
sub $16, %r8
|
|
add $16, %r9
|
|
|
|
.Lst_partial_16:
|
|
movq T0, %r10
|
|
|
|
cmp $8, %r8
|
|
jl .Lst_partial_8
|
|
|
|
mov %r10, (%r9)
|
|
psrldq $8, T0
|
|
movq T0, %r10
|
|
|
|
sub $8, %r8
|
|
add $8, %r9
|
|
|
|
.Lst_partial_8:
|
|
cmp $4, %r8
|
|
jl .Lst_partial_4
|
|
|
|
mov %r10d, (%r9)
|
|
shr $32, %r10
|
|
|
|
sub $4, %r8
|
|
add $4, %r9
|
|
|
|
.Lst_partial_4:
|
|
cmp $2, %r8
|
|
jl .Lst_partial_2
|
|
|
|
mov %r10w, (%r9)
|
|
shr $0x10, %r10
|
|
|
|
sub $2, %r8
|
|
add $2, %r9
|
|
|
|
.Lst_partial_2:
|
|
cmp $1, %r8
|
|
jl .Lst_partial_1
|
|
|
|
mov %r10b, (%r9)
|
|
|
|
.Lst_partial_1:
|
|
ret
|
|
ENDPROC(__store_partial)
|
|
|
|
.macro update
|
|
movdqa STATE7, T0
|
|
aesenc STATE0, STATE7
|
|
aesenc STATE1, STATE0
|
|
aesenc STATE2, STATE1
|
|
aesenc STATE3, STATE2
|
|
aesenc STATE4, STATE3
|
|
aesenc STATE5, STATE4
|
|
aesenc STATE6, STATE5
|
|
aesenc T0, STATE6
|
|
.endm
|
|
|
|
.macro update0
|
|
update
|
|
pxor MSG0, STATE7
|
|
pxor MSG1, STATE3
|
|
.endm
|
|
|
|
.macro update1
|
|
update
|
|
pxor MSG0, STATE6
|
|
pxor MSG1, STATE2
|
|
.endm
|
|
|
|
.macro update2
|
|
update
|
|
pxor MSG0, STATE5
|
|
pxor MSG1, STATE1
|
|
.endm
|
|
|
|
.macro update3
|
|
update
|
|
pxor MSG0, STATE4
|
|
pxor MSG1, STATE0
|
|
.endm
|
|
|
|
.macro update4
|
|
update
|
|
pxor MSG0, STATE3
|
|
pxor MSG1, STATE7
|
|
.endm
|
|
|
|
.macro update5
|
|
update
|
|
pxor MSG0, STATE2
|
|
pxor MSG1, STATE6
|
|
.endm
|
|
|
|
.macro update6
|
|
update
|
|
pxor MSG0, STATE1
|
|
pxor MSG1, STATE5
|
|
.endm
|
|
|
|
.macro update7
|
|
update
|
|
pxor MSG0, STATE0
|
|
pxor MSG1, STATE4
|
|
.endm
|
|
|
|
.macro state_load
|
|
movdqu 0x00(STATEP), STATE0
|
|
movdqu 0x10(STATEP), STATE1
|
|
movdqu 0x20(STATEP), STATE2
|
|
movdqu 0x30(STATEP), STATE3
|
|
movdqu 0x40(STATEP), STATE4
|
|
movdqu 0x50(STATEP), STATE5
|
|
movdqu 0x60(STATEP), STATE6
|
|
movdqu 0x70(STATEP), STATE7
|
|
.endm
|
|
|
|
.macro state_store s0 s1 s2 s3 s4 s5 s6 s7
|
|
movdqu \s7, 0x00(STATEP)
|
|
movdqu \s0, 0x10(STATEP)
|
|
movdqu \s1, 0x20(STATEP)
|
|
movdqu \s2, 0x30(STATEP)
|
|
movdqu \s3, 0x40(STATEP)
|
|
movdqu \s4, 0x50(STATEP)
|
|
movdqu \s5, 0x60(STATEP)
|
|
movdqu \s6, 0x70(STATEP)
|
|
.endm
|
|
|
|
.macro state_store0
|
|
state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
|
|
.endm
|
|
|
|
.macro state_store1
|
|
state_store STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
|
|
.endm
|
|
|
|
.macro state_store2
|
|
state_store STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
|
|
.endm
|
|
|
|
.macro state_store3
|
|
state_store STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
|
|
.endm
|
|
|
|
.macro state_store4
|
|
state_store STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
|
|
.endm
|
|
|
|
.macro state_store5
|
|
state_store STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
|
|
.endm
|
|
|
|
.macro state_store6
|
|
state_store STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
|
|
.endm
|
|
|
|
.macro state_store7
|
|
state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
|
|
.endm
|
|
|
|
/*
|
|
* void crypto_aegis128l_aesni_init(void *state, const void *key, const void *iv);
|
|
*/
|
|
ENTRY(crypto_aegis128l_aesni_init)
|
|
FRAME_BEGIN
|
|
|
|
/* load key: */
|
|
movdqa (%rsi), MSG1
|
|
movdqa MSG1, STATE0
|
|
movdqa MSG1, STATE4
|
|
movdqa MSG1, STATE5
|
|
movdqa MSG1, STATE6
|
|
movdqa MSG1, STATE7
|
|
|
|
/* load IV: */
|
|
movdqu (%rdx), MSG0
|
|
pxor MSG0, STATE0
|
|
pxor MSG0, STATE4
|
|
|
|
/* load the constants: */
|
|
movdqa .Laegis128l_const_0, STATE2
|
|
movdqa .Laegis128l_const_1, STATE1
|
|
movdqa STATE1, STATE3
|
|
pxor STATE2, STATE5
|
|
pxor STATE1, STATE6
|
|
pxor STATE2, STATE7
|
|
|
|
/* update 10 times with IV and KEY: */
|
|
update0
|
|
update1
|
|
update2
|
|
update3
|
|
update4
|
|
update5
|
|
update6
|
|
update7
|
|
update0
|
|
update1
|
|
|
|
state_store1
|
|
|
|
FRAME_END
|
|
ret
|
|
ENDPROC(crypto_aegis128l_aesni_init)
|
|
|
|
.macro ad_block a i
|
|
movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
|
|
movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
|
|
update\i
|
|
sub $0x20, LEN
|
|
cmp $0x20, LEN
|
|
jl .Lad_out_\i
|
|
.endm
|
|
|
|
/*
|
|
* void crypto_aegis128l_aesni_ad(void *state, unsigned int length,
|
|
* const void *data);
|
|
*/
|
|
ENTRY(crypto_aegis128l_aesni_ad)
|
|
FRAME_BEGIN
|
|
|
|
cmp $0x20, LEN
|
|
jb .Lad_out
|
|
|
|
state_load
|
|
|
|
mov SRC, %r8
|
|
and $0xf, %r8
|
|
jnz .Lad_u_loop
|
|
|
|
.align 8
|
|
.Lad_a_loop:
|
|
ad_block a 0
|
|
ad_block a 1
|
|
ad_block a 2
|
|
ad_block a 3
|
|
ad_block a 4
|
|
ad_block a 5
|
|
ad_block a 6
|
|
ad_block a 7
|
|
|
|
add $0x100, SRC
|
|
jmp .Lad_a_loop
|
|
|
|
.align 8
|
|
.Lad_u_loop:
|
|
ad_block u 0
|
|
ad_block u 1
|
|
ad_block u 2
|
|
ad_block u 3
|
|
ad_block u 4
|
|
ad_block u 5
|
|
ad_block u 6
|
|
ad_block u 7
|
|
|
|
add $0x100, SRC
|
|
jmp .Lad_u_loop
|
|
|
|
.Lad_out_0:
|
|
state_store0
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lad_out_1:
|
|
state_store1
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lad_out_2:
|
|
state_store2
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lad_out_3:
|
|
state_store3
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lad_out_4:
|
|
state_store4
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lad_out_5:
|
|
state_store5
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lad_out_6:
|
|
state_store6
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lad_out_7:
|
|
state_store7
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lad_out:
|
|
FRAME_END
|
|
ret
|
|
ENDPROC(crypto_aegis128l_aesni_ad)
|
|
|
|
.macro crypt m0 m1 s0 s1 s2 s3 s4 s5 s6 s7
|
|
pxor \s1, \m0
|
|
pxor \s6, \m0
|
|
movdqa \s2, T3
|
|
pand \s3, T3
|
|
pxor T3, \m0
|
|
|
|
pxor \s2, \m1
|
|
pxor \s5, \m1
|
|
movdqa \s6, T3
|
|
pand \s7, T3
|
|
pxor T3, \m1
|
|
.endm
|
|
|
|
.macro crypt0 m0 m1
|
|
crypt \m0 \m1 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
|
|
.endm
|
|
|
|
.macro crypt1 m0 m1
|
|
crypt \m0 \m1 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
|
|
.endm
|
|
|
|
.macro crypt2 m0 m1
|
|
crypt \m0 \m1 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
|
|
.endm
|
|
|
|
.macro crypt3 m0 m1
|
|
crypt \m0 \m1 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
|
|
.endm
|
|
|
|
.macro crypt4 m0 m1
|
|
crypt \m0 \m1 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
|
|
.endm
|
|
|
|
.macro crypt5 m0 m1
|
|
crypt \m0 \m1 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
|
|
.endm
|
|
|
|
.macro crypt6 m0 m1
|
|
crypt \m0 \m1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
|
|
.endm
|
|
|
|
.macro crypt7 m0 m1
|
|
crypt \m0 \m1 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
|
|
.endm
|
|
|
|
.macro encrypt_block a i
|
|
movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
|
|
movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
|
|
movdqa MSG0, T0
|
|
movdqa MSG1, T1
|
|
crypt\i T0, T1
|
|
movdq\a T0, (\i * 0x20 + 0x00)(DST)
|
|
movdq\a T1, (\i * 0x20 + 0x10)(DST)
|
|
|
|
update\i
|
|
|
|
sub $0x20, LEN
|
|
cmp $0x20, LEN
|
|
jl .Lenc_out_\i
|
|
.endm
|
|
|
|
.macro decrypt_block a i
|
|
movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
|
|
movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
|
|
crypt\i MSG0, MSG1
|
|
movdq\a MSG0, (\i * 0x20 + 0x00)(DST)
|
|
movdq\a MSG1, (\i * 0x20 + 0x10)(DST)
|
|
|
|
update\i
|
|
|
|
sub $0x20, LEN
|
|
cmp $0x20, LEN
|
|
jl .Ldec_out_\i
|
|
.endm
|
|
|
|
/*
|
|
* void crypto_aegis128l_aesni_enc(void *state, unsigned int length,
|
|
* const void *src, void *dst);
|
|
*/
|
|
ENTRY(crypto_aegis128l_aesni_enc)
|
|
FRAME_BEGIN
|
|
|
|
cmp $0x20, LEN
|
|
jb .Lenc_out
|
|
|
|
state_load
|
|
|
|
mov SRC, %r8
|
|
or DST, %r8
|
|
and $0xf, %r8
|
|
jnz .Lenc_u_loop
|
|
|
|
.align 8
|
|
.Lenc_a_loop:
|
|
encrypt_block a 0
|
|
encrypt_block a 1
|
|
encrypt_block a 2
|
|
encrypt_block a 3
|
|
encrypt_block a 4
|
|
encrypt_block a 5
|
|
encrypt_block a 6
|
|
encrypt_block a 7
|
|
|
|
add $0x100, SRC
|
|
add $0x100, DST
|
|
jmp .Lenc_a_loop
|
|
|
|
.align 8
|
|
.Lenc_u_loop:
|
|
encrypt_block u 0
|
|
encrypt_block u 1
|
|
encrypt_block u 2
|
|
encrypt_block u 3
|
|
encrypt_block u 4
|
|
encrypt_block u 5
|
|
encrypt_block u 6
|
|
encrypt_block u 7
|
|
|
|
add $0x100, SRC
|
|
add $0x100, DST
|
|
jmp .Lenc_u_loop
|
|
|
|
.Lenc_out_0:
|
|
state_store0
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lenc_out_1:
|
|
state_store1
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lenc_out_2:
|
|
state_store2
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lenc_out_3:
|
|
state_store3
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lenc_out_4:
|
|
state_store4
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lenc_out_5:
|
|
state_store5
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lenc_out_6:
|
|
state_store6
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lenc_out_7:
|
|
state_store7
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lenc_out:
|
|
FRAME_END
|
|
ret
|
|
ENDPROC(crypto_aegis128l_aesni_enc)
|
|
|
|
/*
|
|
* void crypto_aegis128l_aesni_enc_tail(void *state, unsigned int length,
|
|
* const void *src, void *dst);
|
|
*/
|
|
ENTRY(crypto_aegis128l_aesni_enc_tail)
|
|
FRAME_BEGIN
|
|
|
|
state_load
|
|
|
|
/* encrypt message: */
|
|
call __load_partial
|
|
|
|
movdqa MSG0, T0
|
|
movdqa MSG1, T1
|
|
crypt0 T0, T1
|
|
|
|
call __store_partial
|
|
|
|
update0
|
|
|
|
state_store0
|
|
|
|
FRAME_END
|
|
ENDPROC(crypto_aegis128l_aesni_enc_tail)
|
|
|
|
/*
|
|
* void crypto_aegis128l_aesni_dec(void *state, unsigned int length,
|
|
* const void *src, void *dst);
|
|
*/
|
|
ENTRY(crypto_aegis128l_aesni_dec)
|
|
FRAME_BEGIN
|
|
|
|
cmp $0x20, LEN
|
|
jb .Ldec_out
|
|
|
|
state_load
|
|
|
|
mov SRC, %r8
|
|
or DST, %r8
|
|
and $0xF, %r8
|
|
jnz .Ldec_u_loop
|
|
|
|
.align 8
|
|
.Ldec_a_loop:
|
|
decrypt_block a 0
|
|
decrypt_block a 1
|
|
decrypt_block a 2
|
|
decrypt_block a 3
|
|
decrypt_block a 4
|
|
decrypt_block a 5
|
|
decrypt_block a 6
|
|
decrypt_block a 7
|
|
|
|
add $0x100, SRC
|
|
add $0x100, DST
|
|
jmp .Ldec_a_loop
|
|
|
|
.align 8
|
|
.Ldec_u_loop:
|
|
decrypt_block u 0
|
|
decrypt_block u 1
|
|
decrypt_block u 2
|
|
decrypt_block u 3
|
|
decrypt_block u 4
|
|
decrypt_block u 5
|
|
decrypt_block u 6
|
|
decrypt_block u 7
|
|
|
|
add $0x100, SRC
|
|
add $0x100, DST
|
|
jmp .Ldec_u_loop
|
|
|
|
.Ldec_out_0:
|
|
state_store0
|
|
FRAME_END
|
|
ret
|
|
|
|
.Ldec_out_1:
|
|
state_store1
|
|
FRAME_END
|
|
ret
|
|
|
|
.Ldec_out_2:
|
|
state_store2
|
|
FRAME_END
|
|
ret
|
|
|
|
.Ldec_out_3:
|
|
state_store3
|
|
FRAME_END
|
|
ret
|
|
|
|
.Ldec_out_4:
|
|
state_store4
|
|
FRAME_END
|
|
ret
|
|
|
|
.Ldec_out_5:
|
|
state_store5
|
|
FRAME_END
|
|
ret
|
|
|
|
.Ldec_out_6:
|
|
state_store6
|
|
FRAME_END
|
|
ret
|
|
|
|
.Ldec_out_7:
|
|
state_store7
|
|
FRAME_END
|
|
ret
|
|
|
|
.Ldec_out:
|
|
FRAME_END
|
|
ret
|
|
ENDPROC(crypto_aegis128l_aesni_dec)
|
|
|
|
/*
|
|
* void crypto_aegis128l_aesni_dec_tail(void *state, unsigned int length,
|
|
* const void *src, void *dst);
|
|
*/
|
|
ENTRY(crypto_aegis128l_aesni_dec_tail)
|
|
FRAME_BEGIN
|
|
|
|
state_load
|
|
|
|
/* decrypt message: */
|
|
call __load_partial
|
|
|
|
crypt0 MSG0, MSG1
|
|
|
|
movdqa MSG0, T0
|
|
movdqa MSG1, T1
|
|
call __store_partial
|
|
|
|
/* mask with byte count: */
|
|
movq LEN, T0
|
|
punpcklbw T0, T0
|
|
punpcklbw T0, T0
|
|
punpcklbw T0, T0
|
|
punpcklbw T0, T0
|
|
movdqa T0, T1
|
|
movdqa .Laegis128l_counter0, T2
|
|
movdqa .Laegis128l_counter1, T3
|
|
pcmpgtb T2, T0
|
|
pcmpgtb T3, T1
|
|
pand T0, MSG0
|
|
pand T1, MSG1
|
|
|
|
update0
|
|
|
|
state_store0
|
|
|
|
FRAME_END
|
|
ret
|
|
ENDPROC(crypto_aegis128l_aesni_dec_tail)
|
|
|
|
/*
|
|
* void crypto_aegis128l_aesni_final(void *state, void *tag_xor,
|
|
* u64 assoclen, u64 cryptlen);
|
|
*/
|
|
ENTRY(crypto_aegis128l_aesni_final)
|
|
FRAME_BEGIN
|
|
|
|
state_load
|
|
|
|
/* prepare length block: */
|
|
movq %rdx, MSG0
|
|
movq %rcx, T0
|
|
pslldq $8, T0
|
|
pxor T0, MSG0
|
|
psllq $3, MSG0 /* multiply by 8 (to get bit count) */
|
|
|
|
pxor STATE2, MSG0
|
|
movdqa MSG0, MSG1
|
|
|
|
/* update state: */
|
|
update0
|
|
update1
|
|
update2
|
|
update3
|
|
update4
|
|
update5
|
|
update6
|
|
|
|
/* xor tag: */
|
|
movdqu (%rsi), T0
|
|
|
|
pxor STATE1, T0
|
|
pxor STATE2, T0
|
|
pxor STATE3, T0
|
|
pxor STATE4, T0
|
|
pxor STATE5, T0
|
|
pxor STATE6, T0
|
|
pxor STATE7, T0
|
|
|
|
movdqu T0, (%rsi)
|
|
|
|
FRAME_END
|
|
ret
|
|
ENDPROC(crypto_aegis128l_aesni_final)
|