mirror of
https://github.com/torvalds/linux.git
synced 2024-12-03 17:41:22 +00:00
fd0e9b3e2e
Improve overall performance of AES/GCM encrypt and decrypt operations for Power10 or later CPU. Signed-off-by: Danny Tsen <dtsen@linux.ibm.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
1522 lines
25 KiB
ArmAsm
1522 lines
25 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
#
|
|
# Accelerated AES-GCM stitched implementation for ppc64le.
|
|
#
|
|
# Copyright 2022- IBM Inc. All rights reserved
|
|
#
|
|
#===================================================================================
|
|
# Written by Danny Tsen <dtsen@linux.ibm.com>
|
|
#
|
|
# GHASH is based on the Karatsuba multiplication method.
|
|
#
|
|
# Xi xor X1
|
|
#
|
|
# X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
|
|
# (X1.h * H4.h + xX.l * H4.l + X1 * H4) +
|
|
# (X2.h * H3.h + X2.l * H3.l + X2 * H3) +
|
|
# (X3.h * H2.h + X3.l * H2.l + X3 * H2) +
|
|
# (X4.h * H.h + X4.l * H.l + X4 * H)
|
|
#
|
|
# Xi = v0
|
|
# H Poly = v2
|
|
# Hash keys = v3 - v14
|
|
# ( H.l, H, H.h)
|
|
# ( H^2.l, H^2, H^2.h)
|
|
# ( H^3.l, H^3, H^3.h)
|
|
# ( H^4.l, H^4, H^4.h)
|
|
#
|
|
# v30 is IV
|
|
# v31 - counter 1
|
|
#
|
|
# AES used,
|
|
# vs0 - vs14 for round keys
|
|
# v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
|
|
#
|
|
# This implementation uses stitched AES-GCM approach to improve overall performance.
|
|
# AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
|
|
#
|
|
# ===================================================================================
|
|
#
|
|
|
|
#include <asm/ppc_asm.h>
|
|
#include <linux/linkage.h>
|
|
|
|
.machine "any"
|
|
.text
|
|
|
|
# 4x loops
|
|
# v15 - v18 - input states
|
|
# vs1 - vs9 - round keys
|
|
#
|
|
.macro Loop_aes_middle4x
|
|
xxlor 19+32, 1, 1
|
|
xxlor 20+32, 2, 2
|
|
xxlor 21+32, 3, 3
|
|
xxlor 22+32, 4, 4
|
|
|
|
vcipher 15, 15, 19
|
|
vcipher 16, 16, 19
|
|
vcipher 17, 17, 19
|
|
vcipher 18, 18, 19
|
|
|
|
vcipher 15, 15, 20
|
|
vcipher 16, 16, 20
|
|
vcipher 17, 17, 20
|
|
vcipher 18, 18, 20
|
|
|
|
vcipher 15, 15, 21
|
|
vcipher 16, 16, 21
|
|
vcipher 17, 17, 21
|
|
vcipher 18, 18, 21
|
|
|
|
vcipher 15, 15, 22
|
|
vcipher 16, 16, 22
|
|
vcipher 17, 17, 22
|
|
vcipher 18, 18, 22
|
|
|
|
xxlor 19+32, 5, 5
|
|
xxlor 20+32, 6, 6
|
|
xxlor 21+32, 7, 7
|
|
xxlor 22+32, 8, 8
|
|
|
|
vcipher 15, 15, 19
|
|
vcipher 16, 16, 19
|
|
vcipher 17, 17, 19
|
|
vcipher 18, 18, 19
|
|
|
|
vcipher 15, 15, 20
|
|
vcipher 16, 16, 20
|
|
vcipher 17, 17, 20
|
|
vcipher 18, 18, 20
|
|
|
|
vcipher 15, 15, 21
|
|
vcipher 16, 16, 21
|
|
vcipher 17, 17, 21
|
|
vcipher 18, 18, 21
|
|
|
|
vcipher 15, 15, 22
|
|
vcipher 16, 16, 22
|
|
vcipher 17, 17, 22
|
|
vcipher 18, 18, 22
|
|
|
|
xxlor 23+32, 9, 9
|
|
vcipher 15, 15, 23
|
|
vcipher 16, 16, 23
|
|
vcipher 17, 17, 23
|
|
vcipher 18, 18, 23
|
|
.endm
|
|
|
|
# 8x loops
|
|
# v15 - v22 - input states
|
|
# vs1 - vs9 - round keys
|
|
#
|
|
.macro Loop_aes_middle8x
|
|
xxlor 23+32, 1, 1
|
|
xxlor 24+32, 2, 2
|
|
xxlor 25+32, 3, 3
|
|
xxlor 26+32, 4, 4
|
|
|
|
vcipher 15, 15, 23
|
|
vcipher 16, 16, 23
|
|
vcipher 17, 17, 23
|
|
vcipher 18, 18, 23
|
|
vcipher 19, 19, 23
|
|
vcipher 20, 20, 23
|
|
vcipher 21, 21, 23
|
|
vcipher 22, 22, 23
|
|
|
|
vcipher 15, 15, 24
|
|
vcipher 16, 16, 24
|
|
vcipher 17, 17, 24
|
|
vcipher 18, 18, 24
|
|
vcipher 19, 19, 24
|
|
vcipher 20, 20, 24
|
|
vcipher 21, 21, 24
|
|
vcipher 22, 22, 24
|
|
|
|
vcipher 15, 15, 25
|
|
vcipher 16, 16, 25
|
|
vcipher 17, 17, 25
|
|
vcipher 18, 18, 25
|
|
vcipher 19, 19, 25
|
|
vcipher 20, 20, 25
|
|
vcipher 21, 21, 25
|
|
vcipher 22, 22, 25
|
|
|
|
vcipher 15, 15, 26
|
|
vcipher 16, 16, 26
|
|
vcipher 17, 17, 26
|
|
vcipher 18, 18, 26
|
|
vcipher 19, 19, 26
|
|
vcipher 20, 20, 26
|
|
vcipher 21, 21, 26
|
|
vcipher 22, 22, 26
|
|
|
|
xxlor 23+32, 5, 5
|
|
xxlor 24+32, 6, 6
|
|
xxlor 25+32, 7, 7
|
|
xxlor 26+32, 8, 8
|
|
|
|
vcipher 15, 15, 23
|
|
vcipher 16, 16, 23
|
|
vcipher 17, 17, 23
|
|
vcipher 18, 18, 23
|
|
vcipher 19, 19, 23
|
|
vcipher 20, 20, 23
|
|
vcipher 21, 21, 23
|
|
vcipher 22, 22, 23
|
|
|
|
vcipher 15, 15, 24
|
|
vcipher 16, 16, 24
|
|
vcipher 17, 17, 24
|
|
vcipher 18, 18, 24
|
|
vcipher 19, 19, 24
|
|
vcipher 20, 20, 24
|
|
vcipher 21, 21, 24
|
|
vcipher 22, 22, 24
|
|
|
|
vcipher 15, 15, 25
|
|
vcipher 16, 16, 25
|
|
vcipher 17, 17, 25
|
|
vcipher 18, 18, 25
|
|
vcipher 19, 19, 25
|
|
vcipher 20, 20, 25
|
|
vcipher 21, 21, 25
|
|
vcipher 22, 22, 25
|
|
|
|
vcipher 15, 15, 26
|
|
vcipher 16, 16, 26
|
|
vcipher 17, 17, 26
|
|
vcipher 18, 18, 26
|
|
vcipher 19, 19, 26
|
|
vcipher 20, 20, 26
|
|
vcipher 21, 21, 26
|
|
vcipher 22, 22, 26
|
|
|
|
xxlor 23+32, 9, 9
|
|
vcipher 15, 15, 23
|
|
vcipher 16, 16, 23
|
|
vcipher 17, 17, 23
|
|
vcipher 18, 18, 23
|
|
vcipher 19, 19, 23
|
|
vcipher 20, 20, 23
|
|
vcipher 21, 21, 23
|
|
vcipher 22, 22, 23
|
|
.endm
|
|
|
|
.macro Loop_aes_middle_1x
|
|
xxlor 19+32, 1, 1
|
|
xxlor 20+32, 2, 2
|
|
xxlor 21+32, 3, 3
|
|
xxlor 22+32, 4, 4
|
|
|
|
vcipher 15, 15, 19
|
|
vcipher 15, 15, 20
|
|
vcipher 15, 15, 21
|
|
vcipher 15, 15, 22
|
|
|
|
xxlor 19+32, 5, 5
|
|
xxlor 20+32, 6, 6
|
|
xxlor 21+32, 7, 7
|
|
xxlor 22+32, 8, 8
|
|
|
|
vcipher 15, 15, 19
|
|
vcipher 15, 15, 20
|
|
vcipher 15, 15, 21
|
|
vcipher 15, 15, 22
|
|
|
|
xxlor 19+32, 9, 9
|
|
vcipher 15, 15, 19
|
|
.endm
|
|
|
|
#
|
|
# Compute 4x hash values based on Karatsuba method.
|
|
#
|
|
.macro ppc_aes_gcm_ghash
|
|
vxor 15, 15, 0
|
|
|
|
vpmsumd 23, 12, 15 # H4.L * X.L
|
|
vpmsumd 24, 9, 16
|
|
vpmsumd 25, 6, 17
|
|
vpmsumd 26, 3, 18
|
|
|
|
vxor 23, 23, 24
|
|
vxor 23, 23, 25
|
|
vxor 23, 23, 26 # L
|
|
|
|
vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L
|
|
vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
|
|
vpmsumd 26, 7, 17
|
|
vpmsumd 27, 4, 18
|
|
|
|
vxor 24, 24, 25
|
|
vxor 24, 24, 26
|
|
vxor 24, 24, 27 # M
|
|
|
|
# sum hash and reduction with H Poly
|
|
vpmsumd 28, 23, 2 # reduction
|
|
|
|
vxor 29, 29, 29
|
|
vsldoi 26, 24, 29, 8 # mL
|
|
vsldoi 29, 29, 24, 8 # mH
|
|
vxor 23, 23, 26 # mL + L
|
|
|
|
vsldoi 23, 23, 23, 8 # swap
|
|
vxor 23, 23, 28
|
|
|
|
vpmsumd 24, 14, 15 # H4.H * X.H
|
|
vpmsumd 25, 11, 16
|
|
vpmsumd 26, 8, 17
|
|
vpmsumd 27, 5, 18
|
|
|
|
vxor 24, 24, 25
|
|
vxor 24, 24, 26
|
|
vxor 24, 24, 27
|
|
|
|
vxor 24, 24, 29
|
|
|
|
# sum hash and reduction with H Poly
|
|
vsldoi 27, 23, 23, 8 # swap
|
|
vpmsumd 23, 23, 2
|
|
vxor 27, 27, 24
|
|
vxor 23, 23, 27
|
|
|
|
xxlor 32, 23+32, 23+32 # update hash
|
|
|
|
.endm
|
|
|
|
#
|
|
# Combine two 4x ghash
|
|
# v15 - v22 - input blocks
|
|
#
|
|
.macro ppc_aes_gcm_ghash2_4x
|
|
# first 4x hash
|
|
vxor 15, 15, 0 # Xi + X
|
|
|
|
vpmsumd 23, 12, 15 # H4.L * X.L
|
|
vpmsumd 24, 9, 16
|
|
vpmsumd 25, 6, 17
|
|
vpmsumd 26, 3, 18
|
|
|
|
vxor 23, 23, 24
|
|
vxor 23, 23, 25
|
|
vxor 23, 23, 26 # L
|
|
|
|
vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L
|
|
vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
|
|
vpmsumd 26, 7, 17
|
|
vpmsumd 27, 4, 18
|
|
|
|
vxor 24, 24, 25
|
|
vxor 24, 24, 26
|
|
|
|
# sum hash and reduction with H Poly
|
|
vpmsumd 28, 23, 2 # reduction
|
|
|
|
vxor 29, 29, 29
|
|
|
|
vxor 24, 24, 27 # M
|
|
vsldoi 26, 24, 29, 8 # mL
|
|
vsldoi 29, 29, 24, 8 # mH
|
|
vxor 23, 23, 26 # mL + L
|
|
|
|
vsldoi 23, 23, 23, 8 # swap
|
|
vxor 23, 23, 28
|
|
|
|
vpmsumd 24, 14, 15 # H4.H * X.H
|
|
vpmsumd 25, 11, 16
|
|
vpmsumd 26, 8, 17
|
|
vpmsumd 27, 5, 18
|
|
|
|
vxor 24, 24, 25
|
|
vxor 24, 24, 26
|
|
vxor 24, 24, 27 # H
|
|
|
|
vxor 24, 24, 29 # H + mH
|
|
|
|
# sum hash and reduction with H Poly
|
|
vsldoi 27, 23, 23, 8 # swap
|
|
vpmsumd 23, 23, 2
|
|
vxor 27, 27, 24
|
|
vxor 27, 23, 27 # 1st Xi
|
|
|
|
# 2nd 4x hash
|
|
vpmsumd 24, 9, 20
|
|
vpmsumd 25, 6, 21
|
|
vpmsumd 26, 3, 22
|
|
vxor 19, 19, 27 # Xi + X
|
|
vpmsumd 23, 12, 19 # H4.L * X.L
|
|
|
|
vxor 23, 23, 24
|
|
vxor 23, 23, 25
|
|
vxor 23, 23, 26 # L
|
|
|
|
vpmsumd 24, 13, 19 # H4.L * X.H + H4.H * X.L
|
|
vpmsumd 25, 10, 20 # H3.L * X1.H + H3.H * X1.L
|
|
vpmsumd 26, 7, 21
|
|
vpmsumd 27, 4, 22
|
|
|
|
vxor 24, 24, 25
|
|
vxor 24, 24, 26
|
|
|
|
# sum hash and reduction with H Poly
|
|
vpmsumd 28, 23, 2 # reduction
|
|
|
|
vxor 29, 29, 29
|
|
|
|
vxor 24, 24, 27 # M
|
|
vsldoi 26, 24, 29, 8 # mL
|
|
vsldoi 29, 29, 24, 8 # mH
|
|
vxor 23, 23, 26 # mL + L
|
|
|
|
vsldoi 23, 23, 23, 8 # swap
|
|
vxor 23, 23, 28
|
|
|
|
vpmsumd 24, 14, 19 # H4.H * X.H
|
|
vpmsumd 25, 11, 20
|
|
vpmsumd 26, 8, 21
|
|
vpmsumd 27, 5, 22
|
|
|
|
vxor 24, 24, 25
|
|
vxor 24, 24, 26
|
|
vxor 24, 24, 27 # H
|
|
|
|
vxor 24, 24, 29 # H + mH
|
|
|
|
# sum hash and reduction with H Poly
|
|
vsldoi 27, 23, 23, 8 # swap
|
|
vpmsumd 23, 23, 2
|
|
vxor 27, 27, 24
|
|
vxor 23, 23, 27
|
|
|
|
xxlor 32, 23+32, 23+32 # update hash
|
|
|
|
.endm
|
|
|
|
#
|
|
# Compute update single hash
|
|
#
|
|
.macro ppc_update_hash_1x
|
|
vxor 28, 28, 0
|
|
|
|
vxor 19, 19, 19
|
|
|
|
vpmsumd 22, 3, 28 # L
|
|
vpmsumd 23, 4, 28 # M
|
|
vpmsumd 24, 5, 28 # H
|
|
|
|
vpmsumd 27, 22, 2 # reduction
|
|
|
|
vsldoi 25, 23, 19, 8 # mL
|
|
vsldoi 26, 19, 23, 8 # mH
|
|
vxor 22, 22, 25 # LL + LL
|
|
vxor 24, 24, 26 # HH + HH
|
|
|
|
vsldoi 22, 22, 22, 8 # swap
|
|
vxor 22, 22, 27
|
|
|
|
vsldoi 20, 22, 22, 8 # swap
|
|
vpmsumd 22, 22, 2 # reduction
|
|
vxor 20, 20, 24
|
|
vxor 22, 22, 20
|
|
|
|
vmr 0, 22 # update hash
|
|
|
|
.endm
|
|
|
|
.macro SAVE_REGS
|
|
stdu 1,-640(1)
|
|
mflr 0
|
|
|
|
std 14,112(1)
|
|
std 15,120(1)
|
|
std 16,128(1)
|
|
std 17,136(1)
|
|
std 18,144(1)
|
|
std 19,152(1)
|
|
std 20,160(1)
|
|
std 21,168(1)
|
|
li 9, 256
|
|
stvx 20, 9, 1
|
|
addi 9, 9, 16
|
|
stvx 21, 9, 1
|
|
addi 9, 9, 16
|
|
stvx 22, 9, 1
|
|
addi 9, 9, 16
|
|
stvx 23, 9, 1
|
|
addi 9, 9, 16
|
|
stvx 24, 9, 1
|
|
addi 9, 9, 16
|
|
stvx 25, 9, 1
|
|
addi 9, 9, 16
|
|
stvx 26, 9, 1
|
|
addi 9, 9, 16
|
|
stvx 27, 9, 1
|
|
addi 9, 9, 16
|
|
stvx 28, 9, 1
|
|
addi 9, 9, 16
|
|
stvx 29, 9, 1
|
|
addi 9, 9, 16
|
|
stvx 30, 9, 1
|
|
addi 9, 9, 16
|
|
stvx 31, 9, 1
|
|
stxv 14, 464(1)
|
|
stxv 15, 480(1)
|
|
stxv 16, 496(1)
|
|
stxv 17, 512(1)
|
|
stxv 18, 528(1)
|
|
stxv 19, 544(1)
|
|
stxv 20, 560(1)
|
|
stxv 21, 576(1)
|
|
stxv 22, 592(1)
|
|
std 0, 656(1)
|
|
.endm
|
|
|
|
.macro RESTORE_REGS
|
|
lxv 14, 464(1)
|
|
lxv 15, 480(1)
|
|
lxv 16, 496(1)
|
|
lxv 17, 512(1)
|
|
lxv 18, 528(1)
|
|
lxv 19, 544(1)
|
|
lxv 20, 560(1)
|
|
lxv 21, 576(1)
|
|
lxv 22, 592(1)
|
|
li 9, 256
|
|
lvx 20, 9, 1
|
|
addi 9, 9, 16
|
|
lvx 21, 9, 1
|
|
addi 9, 9, 16
|
|
lvx 22, 9, 1
|
|
addi 9, 9, 16
|
|
lvx 23, 9, 1
|
|
addi 9, 9, 16
|
|
lvx 24, 9, 1
|
|
addi 9, 9, 16
|
|
lvx 25, 9, 1
|
|
addi 9, 9, 16
|
|
lvx 26, 9, 1
|
|
addi 9, 9, 16
|
|
lvx 27, 9, 1
|
|
addi 9, 9, 16
|
|
lvx 28, 9, 1
|
|
addi 9, 9, 16
|
|
lvx 29, 9, 1
|
|
addi 9, 9, 16
|
|
lvx 30, 9, 1
|
|
addi 9, 9, 16
|
|
lvx 31, 9, 1
|
|
|
|
ld 0, 656(1)
|
|
ld 14,112(1)
|
|
ld 15,120(1)
|
|
ld 16,128(1)
|
|
ld 17,136(1)
|
|
ld 18,144(1)
|
|
ld 19,152(1)
|
|
ld 20,160(1)
|
|
ld 21,168(1)
|
|
|
|
mtlr 0
|
|
addi 1, 1, 640
|
|
.endm
|
|
|
|
.macro LOAD_HASH_TABLE
|
|
# Load Xi
|
|
lxvb16x 32, 0, 8 # load Xi
|
|
|
|
# load Hash - h^4, h^3, h^2, h
|
|
li 10, 32
|
|
lxvd2x 2+32, 10, 8 # H Poli
|
|
li 10, 48
|
|
lxvd2x 3+32, 10, 8 # Hl
|
|
li 10, 64
|
|
lxvd2x 4+32, 10, 8 # H
|
|
li 10, 80
|
|
lxvd2x 5+32, 10, 8 # Hh
|
|
|
|
li 10, 96
|
|
lxvd2x 6+32, 10, 8 # H^2l
|
|
li 10, 112
|
|
lxvd2x 7+32, 10, 8 # H^2
|
|
li 10, 128
|
|
lxvd2x 8+32, 10, 8 # H^2h
|
|
|
|
li 10, 144
|
|
lxvd2x 9+32, 10, 8 # H^3l
|
|
li 10, 160
|
|
lxvd2x 10+32, 10, 8 # H^3
|
|
li 10, 176
|
|
lxvd2x 11+32, 10, 8 # H^3h
|
|
|
|
li 10, 192
|
|
lxvd2x 12+32, 10, 8 # H^4l
|
|
li 10, 208
|
|
lxvd2x 13+32, 10, 8 # H^4
|
|
li 10, 224
|
|
lxvd2x 14+32, 10, 8 # H^4h
|
|
.endm
|
|
|
|
#
|
|
# aes_p10_gcm_encrypt (const void *inp, void *out, size_t len,
|
|
# const char *rk, unsigned char iv[16], void *Xip);
|
|
#
|
|
# r3 - inp
|
|
# r4 - out
|
|
# r5 - len
|
|
# r6 - AES round keys
|
|
# r7 - iv and other data
|
|
# r8 - Xi, HPoli, hash keys
|
|
#
|
|
# rounds is at offset 240 in rk
|
|
# Xi is at 0 in gcm_table (Xip).
|
|
#
|
|
_GLOBAL(aes_p10_gcm_encrypt)
|
|
.align 5
|
|
|
|
SAVE_REGS
|
|
|
|
LOAD_HASH_TABLE
|
|
|
|
# initialize ICB: GHASH( IV ), IV - r7
|
|
lxvb16x 30+32, 0, 7 # load IV - v30
|
|
|
|
mr 12, 5 # length
|
|
li 11, 0 # block index
|
|
|
|
# counter 1
|
|
vxor 31, 31, 31
|
|
vspltisb 22, 1
|
|
vsldoi 31, 31, 22,1 # counter 1
|
|
|
|
# load round key to VSR
|
|
lxv 0, 0(6)
|
|
lxv 1, 0x10(6)
|
|
lxv 2, 0x20(6)
|
|
lxv 3, 0x30(6)
|
|
lxv 4, 0x40(6)
|
|
lxv 5, 0x50(6)
|
|
lxv 6, 0x60(6)
|
|
lxv 7, 0x70(6)
|
|
lxv 8, 0x80(6)
|
|
lxv 9, 0x90(6)
|
|
lxv 10, 0xa0(6)
|
|
|
|
# load rounds - 10 (128), 12 (192), 14 (256)
|
|
lwz 9,240(6)
|
|
|
|
#
|
|
# vxor state, state, w # addroundkey
|
|
xxlor 32+29, 0, 0
|
|
vxor 15, 30, 29 # IV + round key - add round key 0
|
|
|
|
cmpdi 9, 10
|
|
beq Loop_aes_gcm_8x
|
|
|
|
# load 2 more round keys (v11, v12)
|
|
lxv 11, 0xb0(6)
|
|
lxv 12, 0xc0(6)
|
|
|
|
cmpdi 9, 12
|
|
beq Loop_aes_gcm_8x
|
|
|
|
# load 2 more round keys (v11, v12, v13, v14)
|
|
lxv 13, 0xd0(6)
|
|
lxv 14, 0xe0(6)
|
|
cmpdi 9, 14
|
|
beq Loop_aes_gcm_8x
|
|
|
|
b aes_gcm_out
|
|
|
|
.align 5
|
|
Loop_aes_gcm_8x:
|
|
mr 14, 3
|
|
mr 9, 4
|
|
|
|
#
|
|
# check partial block
|
|
#
|
|
Continue_partial_check:
|
|
ld 15, 56(7)
|
|
cmpdi 15, 0
|
|
beq Continue
|
|
bgt Final_block
|
|
cmpdi 15, 16
|
|
blt Final_block
|
|
|
|
Continue:
|
|
# n blcoks
|
|
li 10, 128
|
|
divdu 10, 12, 10 # n 128 bytes-blocks
|
|
cmpdi 10, 0
|
|
beq Loop_last_block
|
|
|
|
vaddudm 30, 30, 31 # IV + counter
|
|
vxor 16, 30, 29
|
|
vaddudm 30, 30, 31
|
|
vxor 17, 30, 29
|
|
vaddudm 30, 30, 31
|
|
vxor 18, 30, 29
|
|
vaddudm 30, 30, 31
|
|
vxor 19, 30, 29
|
|
vaddudm 30, 30, 31
|
|
vxor 20, 30, 29
|
|
vaddudm 30, 30, 31
|
|
vxor 21, 30, 29
|
|
vaddudm 30, 30, 31
|
|
vxor 22, 30, 29
|
|
|
|
mtctr 10
|
|
|
|
li 15, 16
|
|
li 16, 32
|
|
li 17, 48
|
|
li 18, 64
|
|
li 19, 80
|
|
li 20, 96
|
|
li 21, 112
|
|
|
|
lwz 10, 240(6)
|
|
|
|
Loop_8x_block:
|
|
|
|
lxvb16x 15, 0, 14 # load block
|
|
lxvb16x 16, 15, 14 # load block
|
|
lxvb16x 17, 16, 14 # load block
|
|
lxvb16x 18, 17, 14 # load block
|
|
lxvb16x 19, 18, 14 # load block
|
|
lxvb16x 20, 19, 14 # load block
|
|
lxvb16x 21, 20, 14 # load block
|
|
lxvb16x 22, 21, 14 # load block
|
|
addi 14, 14, 128
|
|
|
|
Loop_aes_middle8x
|
|
|
|
xxlor 23+32, 10, 10
|
|
|
|
cmpdi 10, 10
|
|
beq Do_next_ghash
|
|
|
|
# 192 bits
|
|
xxlor 24+32, 11, 11
|
|
|
|
vcipher 15, 15, 23
|
|
vcipher 16, 16, 23
|
|
vcipher 17, 17, 23
|
|
vcipher 18, 18, 23
|
|
vcipher 19, 19, 23
|
|
vcipher 20, 20, 23
|
|
vcipher 21, 21, 23
|
|
vcipher 22, 22, 23
|
|
|
|
vcipher 15, 15, 24
|
|
vcipher 16, 16, 24
|
|
vcipher 17, 17, 24
|
|
vcipher 18, 18, 24
|
|
vcipher 19, 19, 24
|
|
vcipher 20, 20, 24
|
|
vcipher 21, 21, 24
|
|
vcipher 22, 22, 24
|
|
|
|
xxlor 23+32, 12, 12
|
|
|
|
cmpdi 10, 12
|
|
beq Do_next_ghash
|
|
|
|
# 256 bits
|
|
xxlor 24+32, 13, 13
|
|
|
|
vcipher 15, 15, 23
|
|
vcipher 16, 16, 23
|
|
vcipher 17, 17, 23
|
|
vcipher 18, 18, 23
|
|
vcipher 19, 19, 23
|
|
vcipher 20, 20, 23
|
|
vcipher 21, 21, 23
|
|
vcipher 22, 22, 23
|
|
|
|
vcipher 15, 15, 24
|
|
vcipher 16, 16, 24
|
|
vcipher 17, 17, 24
|
|
vcipher 18, 18, 24
|
|
vcipher 19, 19, 24
|
|
vcipher 20, 20, 24
|
|
vcipher 21, 21, 24
|
|
vcipher 22, 22, 24
|
|
|
|
xxlor 23+32, 14, 14
|
|
|
|
cmpdi 10, 14
|
|
beq Do_next_ghash
|
|
b aes_gcm_out
|
|
|
|
Do_next_ghash:
|
|
|
|
#
|
|
# last round
|
|
vcipherlast 15, 15, 23
|
|
vcipherlast 16, 16, 23
|
|
|
|
xxlxor 47, 47, 15
|
|
stxvb16x 47, 0, 9 # store output
|
|
xxlxor 48, 48, 16
|
|
stxvb16x 48, 15, 9 # store output
|
|
|
|
vcipherlast 17, 17, 23
|
|
vcipherlast 18, 18, 23
|
|
|
|
xxlxor 49, 49, 17
|
|
stxvb16x 49, 16, 9 # store output
|
|
xxlxor 50, 50, 18
|
|
stxvb16x 50, 17, 9 # store output
|
|
|
|
vcipherlast 19, 19, 23
|
|
vcipherlast 20, 20, 23
|
|
|
|
xxlxor 51, 51, 19
|
|
stxvb16x 51, 18, 9 # store output
|
|
xxlxor 52, 52, 20
|
|
stxvb16x 52, 19, 9 # store output
|
|
|
|
vcipherlast 21, 21, 23
|
|
vcipherlast 22, 22, 23
|
|
|
|
xxlxor 53, 53, 21
|
|
stxvb16x 53, 20, 9 # store output
|
|
xxlxor 54, 54, 22
|
|
stxvb16x 54, 21, 9 # store output
|
|
|
|
addi 9, 9, 128
|
|
|
|
# ghash here
|
|
ppc_aes_gcm_ghash2_4x
|
|
|
|
xxlor 27+32, 0, 0
|
|
vaddudm 30, 30, 31 # IV + counter
|
|
vmr 29, 30
|
|
vxor 15, 30, 27 # add round key
|
|
vaddudm 30, 30, 31
|
|
vxor 16, 30, 27
|
|
vaddudm 30, 30, 31
|
|
vxor 17, 30, 27
|
|
vaddudm 30, 30, 31
|
|
vxor 18, 30, 27
|
|
vaddudm 30, 30, 31
|
|
vxor 19, 30, 27
|
|
vaddudm 30, 30, 31
|
|
vxor 20, 30, 27
|
|
vaddudm 30, 30, 31
|
|
vxor 21, 30, 27
|
|
vaddudm 30, 30, 31
|
|
vxor 22, 30, 27
|
|
|
|
addi 12, 12, -128
|
|
addi 11, 11, 128
|
|
|
|
bdnz Loop_8x_block
|
|
|
|
vmr 30, 29
|
|
stxvb16x 30+32, 0, 7 # update IV
|
|
|
|
Loop_last_block:
|
|
cmpdi 12, 0
|
|
beq aes_gcm_out
|
|
|
|
# loop last few blocks
|
|
li 10, 16
|
|
divdu 10, 12, 10
|
|
|
|
mtctr 10
|
|
|
|
lwz 10, 240(6)
|
|
|
|
cmpdi 12, 16
|
|
blt Final_block
|
|
|
|
Next_rem_block:
|
|
lxvb16x 15, 0, 14 # load block
|
|
|
|
Loop_aes_middle_1x
|
|
|
|
xxlor 23+32, 10, 10
|
|
|
|
cmpdi 10, 10
|
|
beq Do_next_1x
|
|
|
|
# 192 bits
|
|
xxlor 24+32, 11, 11
|
|
|
|
vcipher 15, 15, 23
|
|
vcipher 15, 15, 24
|
|
|
|
xxlor 23+32, 12, 12
|
|
|
|
cmpdi 10, 12
|
|
beq Do_next_1x
|
|
|
|
# 256 bits
|
|
xxlor 24+32, 13, 13
|
|
|
|
vcipher 15, 15, 23
|
|
vcipher 15, 15, 24
|
|
|
|
xxlor 23+32, 14, 14
|
|
|
|
cmpdi 10, 14
|
|
beq Do_next_1x
|
|
|
|
Do_next_1x:
|
|
vcipherlast 15, 15, 23
|
|
|
|
xxlxor 47, 47, 15
|
|
stxvb16x 47, 0, 9 # store output
|
|
addi 14, 14, 16
|
|
addi 9, 9, 16
|
|
|
|
vmr 28, 15
|
|
ppc_update_hash_1x
|
|
|
|
addi 12, 12, -16
|
|
addi 11, 11, 16
|
|
xxlor 19+32, 0, 0
|
|
vaddudm 30, 30, 31 # IV + counter
|
|
vxor 15, 30, 19 # add round key
|
|
|
|
bdnz Next_rem_block
|
|
|
|
li 15, 0
|
|
std 15, 56(7) # clear partial?
|
|
stxvb16x 30+32, 0, 7 # update IV
|
|
cmpdi 12, 0
|
|
beq aes_gcm_out
|
|
|
|
Final_block:
|
|
lwz 10, 240(6)
|
|
Loop_aes_middle_1x
|
|
|
|
xxlor 23+32, 10, 10
|
|
|
|
cmpdi 10, 10
|
|
beq Do_final_1x
|
|
|
|
# 192 bits
|
|
xxlor 24+32, 11, 11
|
|
|
|
vcipher 15, 15, 23
|
|
vcipher 15, 15, 24
|
|
|
|
xxlor 23+32, 12, 12
|
|
|
|
cmpdi 10, 12
|
|
beq Do_final_1x
|
|
|
|
# 256 bits
|
|
xxlor 24+32, 13, 13
|
|
|
|
vcipher 15, 15, 23
|
|
vcipher 15, 15, 24
|
|
|
|
xxlor 23+32, 14, 14
|
|
|
|
cmpdi 10, 14
|
|
beq Do_final_1x
|
|
|
|
Do_final_1x:
|
|
vcipherlast 15, 15, 23
|
|
|
|
# check partial block
|
|
li 21, 0 # encrypt
|
|
ld 15, 56(7) # partial?
|
|
cmpdi 15, 0
|
|
beq Normal_block
|
|
bl Do_partial_block
|
|
|
|
cmpdi 12, 0
|
|
ble aes_gcm_out
|
|
|
|
b Continue_partial_check
|
|
|
|
Normal_block:
|
|
lxvb16x 15, 0, 14 # load last block
|
|
xxlxor 47, 47, 15
|
|
|
|
# create partial block mask
|
|
li 15, 16
|
|
sub 15, 15, 12 # index to the mask
|
|
|
|
vspltisb 16, -1 # first 16 bytes - 0xffff...ff
|
|
vspltisb 17, 0 # second 16 bytes - 0x0000...00
|
|
li 10, 192
|
|
stvx 16, 10, 1
|
|
addi 10, 10, 16
|
|
stvx 17, 10, 1
|
|
|
|
addi 10, 1, 192
|
|
lxvb16x 16, 15, 10 # load partial block mask
|
|
xxland 47, 47, 16
|
|
|
|
vmr 28, 15
|
|
ppc_update_hash_1x
|
|
|
|
# * should store only the remaining bytes.
|
|
bl Write_partial_block
|
|
|
|
stxvb16x 30+32, 0, 7 # update IV
|
|
std 12, 56(7) # update partial?
|
|
li 16, 16
|
|
|
|
stxvb16x 32, 0, 8 # write out Xi
|
|
stxvb16x 32, 16, 8 # write out Xi
|
|
b aes_gcm_out
|
|
|
|
#
|
|
# Compute data mask
|
|
#
|
|
.macro GEN_MASK _mask _start _end
|
|
vspltisb 16, -1 # first 16 bytes - 0xffff...ff
|
|
vspltisb 17, 0 # second 16 bytes - 0x0000...00
|
|
li 10, 192
|
|
stxvb16x 17+32, 10, 1
|
|
add 10, 10, \_start
|
|
stxvb16x 16+32, 10, 1
|
|
add 10, 10, \_end
|
|
stxvb16x 17+32, 10, 1
|
|
|
|
addi 10, 1, 192
|
|
lxvb16x \_mask, 0, 10 # load partial block mask
|
|
.endm
|
|
|
|
#
|
|
# Handle multiple partial blocks for encrypt and decrypt
|
|
# operations.
|
|
#
|
|
SYM_FUNC_START_LOCAL(Do_partial_block)
|
|
add 17, 15, 5
|
|
cmpdi 17, 16
|
|
bgt Big_block
|
|
GEN_MASK 18, 15, 5
|
|
b _Partial
|
|
SYM_FUNC_END(Do_partial_block)
|
|
Big_block:
|
|
li 16, 16
|
|
GEN_MASK 18, 15, 16
|
|
|
|
_Partial:
|
|
lxvb16x 17+32, 0, 14 # load last block
|
|
sldi 16, 15, 3
|
|
mtvsrdd 32+16, 0, 16
|
|
vsro 17, 17, 16
|
|
xxlxor 47, 47, 17+32
|
|
xxland 47, 47, 18
|
|
|
|
vxor 0, 0, 0 # clear Xi
|
|
vmr 28, 15
|
|
|
|
cmpdi 21, 0 # encrypt/decrypt ops?
|
|
beq Skip_decrypt
|
|
xxland 32+28, 32+17, 18
|
|
|
|
Skip_decrypt:
|
|
|
|
ppc_update_hash_1x
|
|
|
|
li 16, 16
|
|
lxvb16x 32+29, 16, 8
|
|
vxor 0, 0, 29
|
|
stxvb16x 32, 0, 8 # save Xi
|
|
stxvb16x 32, 16, 8 # save Xi
|
|
|
|
# store partial block
|
|
# loop the rest of the stream if any
|
|
sldi 16, 15, 3
|
|
mtvsrdd 32+16, 0, 16
|
|
vslo 15, 15, 16
|
|
#stxvb16x 15+32, 0, 9 # last block
|
|
|
|
li 16, 16
|
|
sub 17, 16, 15 # 16 - partial
|
|
|
|
add 16, 15, 5
|
|
cmpdi 16, 16
|
|
bgt Larger_16
|
|
mr 17, 5
|
|
Larger_16:
|
|
|
|
# write partial
|
|
li 10, 192
|
|
stxvb16x 15+32, 10, 1 # save current block
|
|
|
|
addi 10, 9, -1
|
|
addi 16, 1, 191
|
|
mtctr 17 # move partial byte count
|
|
|
|
Write_last_partial:
|
|
lbzu 18, 1(16)
|
|
stbu 18, 1(10)
|
|
bdnz Write_last_partial
|
|
# Complete loop partial
|
|
|
|
add 14, 14, 17
|
|
add 9, 9, 17
|
|
sub 12, 12, 17
|
|
add 11, 11, 17
|
|
|
|
add 15, 15, 5
|
|
cmpdi 15, 16
|
|
blt Save_partial
|
|
|
|
vaddudm 30, 30, 31
|
|
stxvb16x 30+32, 0, 7 # update IV
|
|
xxlor 32+29, 0, 0
|
|
vxor 15, 30, 29 # IV + round key - add round key 0
|
|
li 15, 0
|
|
std 15, 56(7) # partial done - clear
|
|
b Partial_done
|
|
Save_partial:
|
|
std 15, 56(7) # partial
|
|
|
|
Partial_done:
|
|
blr
|
|
|
|
#
|
|
# Write partial block
|
|
# r9 - output
|
|
# r12 - remaining bytes
|
|
# v15 - partial input data
|
|
#
|
|
SYM_FUNC_START_LOCAL(Write_partial_block)
|
|
li 10, 192
|
|
stxvb16x 15+32, 10, 1 # last block
|
|
|
|
addi 10, 9, -1
|
|
addi 16, 1, 191
|
|
|
|
mtctr 12 # remaining bytes
|
|
li 15, 0
|
|
|
|
Write_last_byte:
|
|
lbzu 14, 1(16)
|
|
stbu 14, 1(10)
|
|
bdnz Write_last_byte
|
|
blr
|
|
SYM_FUNC_END(Write_partial_block)
|
|
|
|
aes_gcm_out:
|
|
# out = state
|
|
stxvb16x 32, 0, 8 # write out Xi
|
|
add 3, 11, 12 # return count
|
|
|
|
RESTORE_REGS
|
|
blr
|
|
|
|
#
|
|
# 8x Decrypt
|
|
#
|
|
_GLOBAL(aes_p10_gcm_decrypt)
|
|
.align 5
|
|
|
|
SAVE_REGS
|
|
|
|
LOAD_HASH_TABLE
|
|
|
|
# initialize ICB: GHASH( IV ), IV - r7
|
|
lxvb16x 30+32, 0, 7 # load IV - v30
|
|
|
|
mr 12, 5 # length
|
|
li 11, 0 # block index
|
|
|
|
# counter 1
|
|
vxor 31, 31, 31
|
|
vspltisb 22, 1
|
|
vsldoi 31, 31, 22,1 # counter 1
|
|
|
|
# load round key to VSR
|
|
lxv 0, 0(6)
|
|
lxv 1, 0x10(6)
|
|
lxv 2, 0x20(6)
|
|
lxv 3, 0x30(6)
|
|
lxv 4, 0x40(6)
|
|
lxv 5, 0x50(6)
|
|
lxv 6, 0x60(6)
|
|
lxv 7, 0x70(6)
|
|
lxv 8, 0x80(6)
|
|
lxv 9, 0x90(6)
|
|
lxv 10, 0xa0(6)
|
|
|
|
# load rounds - 10 (128), 12 (192), 14 (256)
|
|
lwz 9,240(6)
|
|
|
|
#
|
|
# vxor state, state, w # addroundkey
|
|
xxlor 32+29, 0, 0
|
|
vxor 15, 30, 29 # IV + round key - add round key 0
|
|
|
|
cmpdi 9, 10
|
|
beq Loop_aes_gcm_8x_dec
|
|
|
|
# load 2 more round keys (v11, v12)
|
|
lxv 11, 0xb0(6)
|
|
lxv 12, 0xc0(6)
|
|
|
|
cmpdi 9, 12
|
|
beq Loop_aes_gcm_8x_dec
|
|
|
|
# load 2 more round keys (v11, v12, v13, v14)
|
|
lxv 13, 0xd0(6)
|
|
lxv 14, 0xe0(6)
|
|
cmpdi 9, 14
|
|
beq Loop_aes_gcm_8x_dec
|
|
|
|
b aes_gcm_out
|
|
|
|
.align 5
|
|
Loop_aes_gcm_8x_dec:
|
|
mr 14, 3
|
|
mr 9, 4
|
|
|
|
#
|
|
# check partial block
|
|
#
|
|
Continue_partial_check_dec:
|
|
ld 15, 56(7)
|
|
cmpdi 15, 0
|
|
beq Continue_dec
|
|
bgt Final_block_dec
|
|
cmpdi 15, 16
|
|
blt Final_block_dec
|
|
|
|
Continue_dec:
|
|
# n blcoks
|
|
li 10, 128
|
|
divdu 10, 12, 10 # n 128 bytes-blocks
|
|
cmpdi 10, 0
|
|
beq Loop_last_block_dec
|
|
|
|
vaddudm 30, 30, 31 # IV + counter
|
|
vxor 16, 30, 29
|
|
vaddudm 30, 30, 31
|
|
vxor 17, 30, 29
|
|
vaddudm 30, 30, 31
|
|
vxor 18, 30, 29
|
|
vaddudm 30, 30, 31
|
|
vxor 19, 30, 29
|
|
vaddudm 30, 30, 31
|
|
vxor 20, 30, 29
|
|
vaddudm 30, 30, 31
|
|
vxor 21, 30, 29
|
|
vaddudm 30, 30, 31
|
|
vxor 22, 30, 29
|
|
|
|
mtctr 10
|
|
|
|
li 15, 16
|
|
li 16, 32
|
|
li 17, 48
|
|
li 18, 64
|
|
li 19, 80
|
|
li 20, 96
|
|
li 21, 112
|
|
|
|
lwz 10, 240(6)
|
|
|
|
Loop_8x_block_dec:
|
|
|
|
lxvb16x 15, 0, 14 # load block
|
|
lxvb16x 16, 15, 14 # load block
|
|
lxvb16x 17, 16, 14 # load block
|
|
lxvb16x 18, 17, 14 # load block
|
|
lxvb16x 19, 18, 14 # load block
|
|
lxvb16x 20, 19, 14 # load block
|
|
lxvb16x 21, 20, 14 # load block
|
|
lxvb16x 22, 21, 14 # load block
|
|
addi 14, 14, 128
|
|
|
|
Loop_aes_middle8x
|
|
|
|
xxlor 23+32, 10, 10
|
|
|
|
cmpdi 10, 10
|
|
beq Do_next_ghash_dec
|
|
|
|
# 192 bits
|
|
xxlor 24+32, 11, 11
|
|
|
|
vcipher 15, 15, 23
|
|
vcipher 16, 16, 23
|
|
vcipher 17, 17, 23
|
|
vcipher 18, 18, 23
|
|
vcipher 19, 19, 23
|
|
vcipher 20, 20, 23
|
|
vcipher 21, 21, 23
|
|
vcipher 22, 22, 23
|
|
|
|
vcipher 15, 15, 24
|
|
vcipher 16, 16, 24
|
|
vcipher 17, 17, 24
|
|
vcipher 18, 18, 24
|
|
vcipher 19, 19, 24
|
|
vcipher 20, 20, 24
|
|
vcipher 21, 21, 24
|
|
vcipher 22, 22, 24
|
|
|
|
xxlor 23+32, 12, 12
|
|
|
|
cmpdi 10, 12
|
|
beq Do_next_ghash_dec
|
|
|
|
# 256 bits
|
|
xxlor 24+32, 13, 13
|
|
|
|
vcipher 15, 15, 23
|
|
vcipher 16, 16, 23
|
|
vcipher 17, 17, 23
|
|
vcipher 18, 18, 23
|
|
vcipher 19, 19, 23
|
|
vcipher 20, 20, 23
|
|
vcipher 21, 21, 23
|
|
vcipher 22, 22, 23
|
|
|
|
vcipher 15, 15, 24
|
|
vcipher 16, 16, 24
|
|
vcipher 17, 17, 24
|
|
vcipher 18, 18, 24
|
|
vcipher 19, 19, 24
|
|
vcipher 20, 20, 24
|
|
vcipher 21, 21, 24
|
|
vcipher 22, 22, 24
|
|
|
|
xxlor 23+32, 14, 14
|
|
|
|
cmpdi 10, 14
|
|
beq Do_next_ghash_dec
|
|
b aes_gcm_out
|
|
|
|
Do_next_ghash_dec:
|
|
|
|
#
|
|
# last round
|
|
vcipherlast 15, 15, 23
|
|
vcipherlast 16, 16, 23
|
|
|
|
xxlxor 47, 47, 15
|
|
stxvb16x 47, 0, 9 # store output
|
|
xxlxor 48, 48, 16
|
|
stxvb16x 48, 15, 9 # store output
|
|
|
|
vcipherlast 17, 17, 23
|
|
vcipherlast 18, 18, 23
|
|
|
|
xxlxor 49, 49, 17
|
|
stxvb16x 49, 16, 9 # store output
|
|
xxlxor 50, 50, 18
|
|
stxvb16x 50, 17, 9 # store output
|
|
|
|
vcipherlast 19, 19, 23
|
|
vcipherlast 20, 20, 23
|
|
|
|
xxlxor 51, 51, 19
|
|
stxvb16x 51, 18, 9 # store output
|
|
xxlxor 52, 52, 20
|
|
stxvb16x 52, 19, 9 # store output
|
|
|
|
vcipherlast 21, 21, 23
|
|
vcipherlast 22, 22, 23
|
|
|
|
xxlxor 53, 53, 21
|
|
stxvb16x 53, 20, 9 # store output
|
|
xxlxor 54, 54, 22
|
|
stxvb16x 54, 21, 9 # store output
|
|
|
|
addi 9, 9, 128
|
|
|
|
xxlor 15+32, 15, 15
|
|
xxlor 16+32, 16, 16
|
|
xxlor 17+32, 17, 17
|
|
xxlor 18+32, 18, 18
|
|
xxlor 19+32, 19, 19
|
|
xxlor 20+32, 20, 20
|
|
xxlor 21+32, 21, 21
|
|
xxlor 22+32, 22, 22
|
|
|
|
# ghash here
|
|
ppc_aes_gcm_ghash2_4x
|
|
|
|
xxlor 27+32, 0, 0
|
|
vaddudm 30, 30, 31 # IV + counter
|
|
vmr 29, 30
|
|
vxor 15, 30, 27 # add round key
|
|
vaddudm 30, 30, 31
|
|
vxor 16, 30, 27
|
|
vaddudm 30, 30, 31
|
|
vxor 17, 30, 27
|
|
vaddudm 30, 30, 31
|
|
vxor 18, 30, 27
|
|
vaddudm 30, 30, 31
|
|
vxor 19, 30, 27
|
|
vaddudm 30, 30, 31
|
|
vxor 20, 30, 27
|
|
vaddudm 30, 30, 31
|
|
vxor 21, 30, 27
|
|
vaddudm 30, 30, 31
|
|
vxor 22, 30, 27
|
|
|
|
addi 12, 12, -128
|
|
addi 11, 11, 128
|
|
|
|
bdnz Loop_8x_block_dec
|
|
|
|
vmr 30, 29
|
|
stxvb16x 30+32, 0, 7 # update IV
|
|
|
|
Loop_last_block_dec:
|
|
cmpdi 12, 0
|
|
beq aes_gcm_out
|
|
|
|
# loop last few blocks
|
|
li 10, 16
|
|
divdu 10, 12, 10
|
|
|
|
mtctr 10
|
|
|
|
lwz 10, 240(6)
|
|
|
|
cmpdi 12, 16
|
|
blt Final_block_dec
|
|
|
|
Next_rem_block_dec:
|
|
lxvb16x 15, 0, 14 # load block
|
|
|
|
Loop_aes_middle_1x
|
|
|
|
xxlor 23+32, 10, 10
|
|
|
|
cmpdi 10, 10
|
|
beq Do_next_1x_dec
|
|
|
|
# 192 bits
|
|
xxlor 24+32, 11, 11
|
|
|
|
vcipher 15, 15, 23
|
|
vcipher 15, 15, 24
|
|
|
|
xxlor 23+32, 12, 12
|
|
|
|
cmpdi 10, 12
|
|
beq Do_next_1x_dec
|
|
|
|
# 256 bits
|
|
xxlor 24+32, 13, 13
|
|
|
|
vcipher 15, 15, 23
|
|
vcipher 15, 15, 24
|
|
|
|
xxlor 23+32, 14, 14
|
|
|
|
cmpdi 10, 14
|
|
beq Do_next_1x_dec
|
|
|
|
Do_next_1x_dec:
|
|
vcipherlast 15, 15, 23
|
|
|
|
xxlxor 47, 47, 15
|
|
stxvb16x 47, 0, 9 # store output
|
|
addi 14, 14, 16
|
|
addi 9, 9, 16
|
|
|
|
xxlor 28+32, 15, 15
|
|
#vmr 28, 15
|
|
ppc_update_hash_1x
|
|
|
|
addi 12, 12, -16
|
|
addi 11, 11, 16
|
|
xxlor 19+32, 0, 0
|
|
vaddudm 30, 30, 31 # IV + counter
|
|
vxor 15, 30, 19 # add round key
|
|
|
|
bdnz Next_rem_block_dec
|
|
|
|
li 15, 0
|
|
std 15, 56(7) # clear partial?
|
|
stxvb16x 30+32, 0, 7 # update IV
|
|
cmpdi 12, 0
|
|
beq aes_gcm_out
|
|
|
|
Final_block_dec:
|
|
lwz 10, 240(6)
|
|
Loop_aes_middle_1x
|
|
|
|
xxlor 23+32, 10, 10
|
|
|
|
cmpdi 10, 10
|
|
beq Do_final_1x_dec
|
|
|
|
# 192 bits
|
|
xxlor 24+32, 11, 11
|
|
|
|
vcipher 15, 15, 23
|
|
vcipher 15, 15, 24
|
|
|
|
xxlor 23+32, 12, 12
|
|
|
|
cmpdi 10, 12
|
|
beq Do_final_1x_dec
|
|
|
|
# 256 bits
|
|
xxlor 24+32, 13, 13
|
|
|
|
vcipher 15, 15, 23
|
|
vcipher 15, 15, 24
|
|
|
|
xxlor 23+32, 14, 14
|
|
|
|
cmpdi 10, 14
|
|
beq Do_final_1x_dec
|
|
|
|
Do_final_1x_dec:
|
|
vcipherlast 15, 15, 23
|
|
|
|
# check partial block
|
|
li 21, 1 # decrypt
|
|
ld 15, 56(7) # partial?
|
|
cmpdi 15, 0
|
|
beq Normal_block_dec
|
|
bl Do_partial_block
|
|
cmpdi 12, 0
|
|
ble aes_gcm_out
|
|
|
|
b Continue_partial_check_dec
|
|
|
|
Normal_block_dec:
|
|
lxvb16x 15, 0, 14 # load last block
|
|
xxlxor 47, 47, 15
|
|
|
|
# create partial block mask
|
|
li 15, 16
|
|
sub 15, 15, 12 # index to the mask
|
|
|
|
vspltisb 16, -1 # first 16 bytes - 0xffff...ff
|
|
vspltisb 17, 0 # second 16 bytes - 0x0000...00
|
|
li 10, 192
|
|
stvx 16, 10, 1
|
|
addi 10, 10, 16
|
|
stvx 17, 10, 1
|
|
|
|
addi 10, 1, 192
|
|
lxvb16x 16, 15, 10 # load partial block mask
|
|
xxland 47, 47, 16
|
|
|
|
xxland 32+28, 15, 16
|
|
#vmr 28, 15
|
|
ppc_update_hash_1x
|
|
|
|
# * should store only the remaining bytes.
|
|
bl Write_partial_block
|
|
|
|
stxvb16x 30+32, 0, 7 # update IV
|
|
std 12, 56(7) # update partial?
|
|
li 16, 16
|
|
|
|
stxvb16x 32, 0, 8 # write out Xi
|
|
stxvb16x 32, 16, 8 # write out Xi
|
|
b aes_gcm_out
|