crypto: aesni - Introduce READ_PARTIAL_BLOCK macro
Introduce READ_PARTIAL_BLOCK macro, and use it in the two existing partial block cases: AAD and the end of ENC_DEC. In particular, the ENC_DEC case should be faster, since we read by 8/4 bytes if possible. This macro will also be used to read partial blocks between enc_update and dec_update calls. Signed-off-by: Dave Watson <davejwatson@fb.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
517a448e09
commit
ec8c02d9a3
@ -415,38 +415,6 @@ _zero_cipher_left\@:
|
||||
vmovdqu %xmm14, AadHash(arg2)
|
||||
vmovdqu %xmm9, CurCount(arg2)
|
||||
|
||||
cmp $16, arg5
|
||||
jl _only_less_than_16\@
|
||||
|
||||
mov arg5, %r13
|
||||
and $15, %r13 # r13 = (arg5 mod 16)
|
||||
|
||||
je _multiple_of_16_bytes\@
|
||||
|
||||
# handle the last <16 Byte block seperately
|
||||
|
||||
mov %r13, PBlockLen(arg2)
|
||||
|
||||
vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn
|
||||
vmovdqu %xmm9, CurCount(arg2)
|
||||
vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
|
||||
|
||||
ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Yn)
|
||||
vmovdqu %xmm9, PBlockEncKey(arg2)
|
||||
|
||||
sub $16, %r11
|
||||
add %r13, %r11
|
||||
vmovdqu (arg4, %r11), %xmm1 # receive the last <16 Byte block
|
||||
|
||||
lea SHIFT_MASK+16(%rip), %r12
|
||||
sub %r13, %r12 # adjust the shuffle mask pointer to be
|
||||
# able to shift 16-r13 bytes (r13 is the
|
||||
# number of bytes in plaintext mod 16)
|
||||
vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask
|
||||
vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes
|
||||
jmp _final_ghash_mul\@
|
||||
|
||||
_only_less_than_16\@:
|
||||
# check for 0 length
|
||||
mov arg5, %r13
|
||||
and $15, %r13 # r13 = (arg5 mod 16)
|
||||
@ -455,28 +423,48 @@ _only_less_than_16\@:
|
||||
|
||||
# handle the last <16 Byte block separately
|
||||
|
||||
mov %r13, PBlockLen(arg2)
|
||||
|
||||
vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn
|
||||
vmovdqu %xmm9, CurCount(arg2)
|
||||
vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
|
||||
ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Yn)
|
||||
|
||||
ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Yn)
|
||||
vmovdqu %xmm9, PBlockEncKey(arg2)
|
||||
|
||||
cmp $16, arg5
|
||||
jge _large_enough_update\@
|
||||
|
||||
lea (arg4,%r11,1), %r10
|
||||
mov %r13, %r12
|
||||
|
||||
READ_PARTIAL_BLOCK %r10 %r12 %xmm1
|
||||
|
||||
lea SHIFT_MASK+16(%rip), %r12
|
||||
sub %r13, %r12 # adjust the shuffle mask pointer to be
|
||||
# able to shift 16-r13 bytes (r13 is the
|
||||
# number of bytes in plaintext mod 16)
|
||||
# number of bytes in plaintext mod 16)
|
||||
|
||||
_get_last_16_byte_loop\@:
|
||||
movb (arg4, %r11), %al
|
||||
movb %al, TMP1 (%rsp , %r11)
|
||||
add $1, %r11
|
||||
cmp %r13, %r11
|
||||
jne _get_last_16_byte_loop\@
|
||||
jmp _final_ghash_mul\@
|
||||
|
||||
vmovdqu TMP1(%rsp), %xmm1
|
||||
_large_enough_update\@:
|
||||
sub $16, %r11
|
||||
add %r13, %r11
|
||||
|
||||
sub $16, %r11
|
||||
# receive the last <16 Byte block
|
||||
vmovdqu (arg4, %r11, 1), %xmm1
|
||||
|
||||
sub %r13, %r11
|
||||
add $16, %r11
|
||||
|
||||
lea SHIFT_MASK+16(%rip), %r12
|
||||
# adjust the shuffle mask pointer to be able to shift 16-r13 bytes
|
||||
# (r13 is the number of bytes in plaintext mod 16)
|
||||
sub %r13, %r12
|
||||
# get the appropriate shuffle mask
|
||||
vmovdqu (%r12), %xmm2
|
||||
# shift right 16-r13 bytes
|
||||
vpshufb %xmm2, %xmm1, %xmm1
|
||||
|
||||
_final_ghash_mul\@:
|
||||
.if \ENC_DEC == DEC
|
||||
@ -490,8 +478,6 @@ _final_ghash_mul\@:
|
||||
vpxor %xmm2, %xmm14, %xmm14
|
||||
|
||||
vmovdqu %xmm14, AadHash(arg2)
|
||||
sub %r13, %r11
|
||||
add $16, %r11
|
||||
.else
|
||||
vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
|
||||
vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to
|
||||
@ -501,8 +487,6 @@ _final_ghash_mul\@:
|
||||
vpxor %xmm9, %xmm14, %xmm14
|
||||
|
||||
vmovdqu %xmm14, AadHash(arg2)
|
||||
sub %r13, %r11
|
||||
add $16, %r11
|
||||
vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext
|
||||
.endif
|
||||
|
||||
@ -721,6 +705,38 @@ _get_AAD_done\@:
|
||||
\PRECOMPUTE %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5
|
||||
.endm
|
||||
|
||||
|
||||
# Reads DLEN bytes starting at DPTR and stores in XMMDst
|
||||
# where 0 < DLEN < 16
|
||||
# Clobbers %rax, DLEN
|
||||
.macro READ_PARTIAL_BLOCK DPTR DLEN XMMDst
|
||||
vpxor \XMMDst, \XMMDst, \XMMDst
|
||||
|
||||
cmp $8, \DLEN
|
||||
jl _read_lt8_\@
|
||||
mov (\DPTR), %rax
|
||||
vpinsrq $0, %rax, \XMMDst, \XMMDst
|
||||
sub $8, \DLEN
|
||||
jz _done_read_partial_block_\@
|
||||
xor %eax, %eax
|
||||
_read_next_byte_\@:
|
||||
shl $8, %rax
|
||||
mov 7(\DPTR, \DLEN, 1), %al
|
||||
dec \DLEN
|
||||
jnz _read_next_byte_\@
|
||||
vpinsrq $1, %rax, \XMMDst, \XMMDst
|
||||
jmp _done_read_partial_block_\@
|
||||
_read_lt8_\@:
|
||||
xor %eax, %eax
|
||||
_read_next_byte_lt8_\@:
|
||||
shl $8, %rax
|
||||
mov -1(\DPTR, \DLEN, 1), %al
|
||||
dec \DLEN
|
||||
jnz _read_next_byte_lt8_\@
|
||||
vpinsrq $0, %rax, \XMMDst, \XMMDst
|
||||
_done_read_partial_block_\@:
|
||||
.endm
|
||||
|
||||
#ifdef CONFIG_AS_AVX
|
||||
###############################################################################
|
||||
# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
|
||||
|
Loading…
Reference in New Issue
Block a user