forked from Minki/linux
This update includes the following changes:
API: - Make proc files report fips module name and version. Algorithms: - Move generic SHA1 code into lib/crypto. - Implement Chinese Remainder Theorem for RSA. - Remove blake2s. - Add XCTR with x86/arm64 acceleration. - Add POLYVAL with x86/arm64 acceleration. - Add HCTR2. - Add ARIA. Drivers: - Add support for new CCP/PSP device ID in ccp. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEn51F/lCuNhUwmDeSxycdCkmxi6cFAmLosAAACgkQxycdCkmx i6dvgxAAzcw0cKMuq3dbQamzeVu1bDW8rPb7yHnpXal3ao5ewa15+hFjsKhdh/s3 cjM5Lu7Qx4lnqtsh2JVSU5o2SgEpptxXNfxAngcn46ld5EgV/G4DYNKuXsatMZ2A erCzXqG9dDxJmREat+5XgVfD1RFVsglmEA/Nv4Rvn+9O4O6PfwRa8GyUzeKC+byG qs/1JyiPqpyApgzCvlQFAdTF4PM7ruDtg3mnMy2EKAzqj4JUseXRi1i81vLVlfBL T40WESG/CnOwIF5MROhziAtkJMS4Y4v2VQ2++1p0gwG6pDCnq4w7u9cKPXYfNgZK fMVCxrNlxIH3W99VfVXbXwqDSN6qEZtQvhnliwj9aEbEltIoH+B02wNfS/BDsTec im+5NCnNQ6olMPyL0yHrMKisKd+DwTrEfYT5H2kFhcdcYZncQ9C6el57kimnJRzp 4ymPRudCKm/8weWGTtmjFMi+PFP4LgvCoR+VMUd+gVe91F9ZMAO0K7b5z5FVDyDf wmsNBvsEnTdm/r7YceVzGwdKQaP9sE5wq8iD/yySD1PjlmzZos1CtCrqAIT/v2RK pQdZCIkT8qCB+Jm03eEd4pwjEDnbZdQmpKt4cTy0HWIeLJVG1sXPNpgwPCaBEV4U g0nctILtypChlSDmuGhTCyuElfMg6CXt4cgSZJTBikT+QcyWOm4= =rfWK -----END PGP SIGNATURE----- Merge tag 'v5.20-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6 Pull crypto updates from Herbert Xu: "API: - Make proc files report fips module name and version Algorithms: - Move generic SHA1 code into lib/crypto - Implement Chinese Remainder Theorem for RSA - Remove blake2s - Add XCTR with x86/arm64 acceleration - Add POLYVAL with x86/arm64 acceleration - Add HCTR2 - Add ARIA Drivers: - Add support for new CCP/PSP device ID in ccp" * tag 'v5.20-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (89 commits) crypto: tcrypt - Remove the static variable initialisations to NULL crypto: arm64/poly1305 - fix a read out-of-bound crypto: hisilicon/zip - Use the bitmap API to allocate bitmaps crypto: hisilicon/sec - fix auth key size error crypto: ccree - Remove a useless dma_supported() call crypto: ccp - Add support for new CCP/PSP device ID crypto: inside-secure - Add missing MODULE_DEVICE_TABLE for of crypto: hisilicon/hpre - don't use GFP_KERNEL to alloc mem during softirq crypto: testmgr - some more fixes to RSA test vectors cyrpto: powerpc/aes - delete the rebundant word "block" in comments hwrng: via - Fix comment typo crypto: twofish - Fix comment typo crypto: rmd160 - fix Kconfig "its" grammar crypto: keembay-ocs-ecc - Drop if with an always false condition Documentation: qat: rewrite description Documentation: qat: Use code block for qat sysfs example crypto: lib - add module license to libsha1 crypto: lib - make the sha1 library optional crypto: lib - move lib/sha1.c into lib/crypto/ crypto: fips - make proc files report fips module name and version ...
This commit is contained in:
commit
c2a24a7a03
49
Documentation/ABI/testing/sysfs-driver-qat
Normal file
49
Documentation/ABI/testing/sysfs-driver-qat
Normal file
@ -0,0 +1,49 @@
|
||||
What: /sys/bus/pci/devices/<BDF>/qat/state
|
||||
Date: June 2022
|
||||
KernelVersion: 5.20
|
||||
Contact: qat-linux@intel.com
|
||||
Description: (RW) Reports the current state of the QAT device. Write to
|
||||
the file to start or stop the device.
|
||||
|
||||
The values are:
|
||||
|
||||
* up: the device is up and running
|
||||
* down: the device is down
|
||||
|
||||
|
||||
It is possible to transition the device from up to down only
|
||||
if the device is up and vice versa.
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
||||
|
||||
What: /sys/bus/pci/devices/<BDF>/qat/cfg_services
|
||||
Date: June 2022
|
||||
KernelVersion: 5.20
|
||||
Contact: qat-linux@intel.com
|
||||
Description: (RW) Reports the current configuration of the QAT device.
|
||||
Write to the file to change the configured services.
|
||||
|
||||
The values are:
|
||||
|
||||
* sym;asym: the device is configured for running crypto
|
||||
services
|
||||
* dc: the device is configured for running compression services
|
||||
|
||||
It is possible to set the configuration only if the device
|
||||
is in the `down` state (see /sys/bus/pci/devices/<BDF>/qat/state)
|
||||
|
||||
The following example shows how to change the configuration of
|
||||
a device configured for running crypto services in order to
|
||||
run data compression::
|
||||
|
||||
# cat /sys/bus/pci/devices/<BDF>/qat/state
|
||||
up
|
||||
# cat /sys/bus/pci/devices/<BDF>/qat/cfg_services
|
||||
sym;asym
|
||||
# echo down > /sys/bus/pci/devices/<BDF>/qat/state
|
||||
# echo dc > /sys/bus/pci/devices/<BDF>/qat/cfg_services
|
||||
# echo up > /sys/bus/pci/devices/<BDF>/qat/state
|
||||
# cat /sys/bus/pci/devices/<BDF>/qat/cfg_services
|
||||
dc
|
||||
|
||||
This attribute is only available for qat_4xxx devices.
|
@ -337,6 +337,7 @@ Currently, the following pairs of encryption modes are supported:
|
||||
- AES-256-XTS for contents and AES-256-CTS-CBC for filenames
|
||||
- AES-128-CBC for contents and AES-128-CTS-CBC for filenames
|
||||
- Adiantum for both contents and filenames
|
||||
- AES-256-XTS for contents and AES-256-HCTR2 for filenames (v2 policies only)
|
||||
|
||||
If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.
|
||||
|
||||
@ -357,6 +358,17 @@ To use Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled. Also, fast
|
||||
implementations of ChaCha and NHPoly1305 should be enabled, e.g.
|
||||
CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM.
|
||||
|
||||
AES-256-HCTR2 is another true wide-block encryption mode that is intended for
|
||||
use on CPUs with dedicated crypto instructions. AES-256-HCTR2 has the property
|
||||
that a bitflip in the plaintext changes the entire ciphertext. This property
|
||||
makes it desirable for filename encryption since initialization vectors are
|
||||
reused within a directory. For more details on AES-256-HCTR2, see the paper
|
||||
"Length-preserving encryption with HCTR2"
|
||||
(https://eprint.iacr.org/2021/1441.pdf). To use AES-256-HCTR2,
|
||||
CONFIG_CRYPTO_HCTR2 must be enabled. Also, fast implementations of XCTR and
|
||||
POLYVAL should be enabled, e.g. CRYPTO_POLYVAL_ARM64_CE and
|
||||
CRYPTO_AES_ARM64_CE_BLK for ARM64.
|
||||
|
||||
New encryption modes can be added relatively easily, without changes
|
||||
to individual filesystems. However, authenticated encryption (AE)
|
||||
modes are not currently supported because of the difficulty of dealing
|
||||
@ -404,11 +416,11 @@ alternatively has the file's nonce (for `DIRECT_KEY policies`_) or
|
||||
inode number (for `IV_INO_LBLK_64 policies`_) included in the IVs.
|
||||
Thus, IV reuse is limited to within a single directory.
|
||||
|
||||
With CTS-CBC, the IV reuse means that when the plaintext filenames
|
||||
share a common prefix at least as long as the cipher block size (16
|
||||
bytes for AES), the corresponding encrypted filenames will also share
|
||||
a common prefix. This is undesirable. Adiantum does not have this
|
||||
weakness, as it is a wide-block encryption mode.
|
||||
With CTS-CBC, the IV reuse means that when the plaintext filenames share a
|
||||
common prefix at least as long as the cipher block size (16 bytes for AES), the
|
||||
corresponding encrypted filenames will also share a common prefix. This is
|
||||
undesirable. Adiantum and HCTR2 do not have this weakness, as they are
|
||||
wide-block encryption modes.
|
||||
|
||||
All supported filenames encryption modes accept any plaintext length
|
||||
>= 16 bytes; cipher block alignment is not required. However,
|
||||
|
17
MAINTAINERS
17
MAINTAINERS
@ -9079,15 +9079,24 @@ S: Supported
|
||||
F: Documentation/admin-guide/perf/hns3-pmu.rst
|
||||
F: drivers/perf/hisilicon/hns3_pmu.c
|
||||
|
||||
HISILICON QM AND ZIP Controller DRIVER
|
||||
HISILICON QM DRIVER
|
||||
M: Weili Qian <qianweili@huawei.com>
|
||||
M: Zhou Wang <wangzhou1@hisilicon.com>
|
||||
L: linux-crypto@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/crypto/hisilicon/Kconfig
|
||||
F: drivers/crypto/hisilicon/Makefile
|
||||
F: drivers/crypto/hisilicon/qm.c
|
||||
F: drivers/crypto/hisilicon/sgl.c
|
||||
F: include/linux/hisi_acc_qm.h
|
||||
|
||||
HISILICON ZIP Controller DRIVER
|
||||
M: Yang Shen <shenyang39@huawei.com>
|
||||
M: Zhou Wang <wangzhou1@hisilicon.com>
|
||||
L: linux-crypto@vger.kernel.org
|
||||
S: Maintained
|
||||
F: Documentation/ABI/testing/debugfs-hisi-zip
|
||||
F: drivers/crypto/hisilicon/qm.c
|
||||
F: drivers/crypto/hisilicon/sgl.c
|
||||
F: drivers/crypto/hisilicon/zip/
|
||||
F: include/linux/hisi_acc_qm.h
|
||||
|
||||
HISILICON ROCE DRIVER
|
||||
M: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
|
@ -63,7 +63,7 @@ config CRYPTO_SHA512_ARM
|
||||
using optimized ARM assembler and NEON, when available.
|
||||
|
||||
config CRYPTO_BLAKE2S_ARM
|
||||
tristate "BLAKE2s digest algorithm (ARM)"
|
||||
bool "BLAKE2s digest algorithm (ARM)"
|
||||
select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
|
||||
help
|
||||
BLAKE2s digest algorithm optimized with ARM scalar instructions. This
|
||||
|
@ -9,8 +9,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
|
||||
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
|
||||
obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += blake2s-arm.o
|
||||
obj-$(if $(CONFIG_CRYPTO_BLAKE2S_ARM),y) += libblake2s-arm.o
|
||||
obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += libblake2s-arm.o
|
||||
obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o
|
||||
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
|
||||
obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
|
||||
@ -32,7 +31,6 @@ sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
|
||||
sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
|
||||
sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o
|
||||
sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y)
|
||||
blake2s-arm-y := blake2s-shash.o
|
||||
libblake2s-arm-y:= blake2s-core.o blake2s-glue.o
|
||||
blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o
|
||||
sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
|
||||
|
@ -1,75 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* BLAKE2s digest algorithm, ARM scalar implementation
|
||||
*
|
||||
* Copyright 2020 Google LLC
|
||||
*/
|
||||
|
||||
#include <crypto/internal/blake2s.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
#include <linux/module.h>
|
||||
|
||||
static int crypto_blake2s_update_arm(struct shash_desc *desc,
|
||||
const u8 *in, unsigned int inlen)
|
||||
{
|
||||
return crypto_blake2s_update(desc, in, inlen, false);
|
||||
}
|
||||
|
||||
static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
return crypto_blake2s_final(desc, out, false);
|
||||
}
|
||||
|
||||
#define BLAKE2S_ALG(name, driver_name, digest_size) \
|
||||
{ \
|
||||
.base.cra_name = name, \
|
||||
.base.cra_driver_name = driver_name, \
|
||||
.base.cra_priority = 200, \
|
||||
.base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
|
||||
.base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \
|
||||
.base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \
|
||||
.base.cra_module = THIS_MODULE, \
|
||||
.digestsize = digest_size, \
|
||||
.setkey = crypto_blake2s_setkey, \
|
||||
.init = crypto_blake2s_init, \
|
||||
.update = crypto_blake2s_update_arm, \
|
||||
.final = crypto_blake2s_final_arm, \
|
||||
.descsize = sizeof(struct blake2s_state), \
|
||||
}
|
||||
|
||||
static struct shash_alg blake2s_arm_algs[] = {
|
||||
BLAKE2S_ALG("blake2s-128", "blake2s-128-arm", BLAKE2S_128_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-160", "blake2s-160-arm", BLAKE2S_160_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-224", "blake2s-224-arm", BLAKE2S_224_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-256", "blake2s-256-arm", BLAKE2S_256_HASH_SIZE),
|
||||
};
|
||||
|
||||
static int __init blake2s_arm_mod_init(void)
|
||||
{
|
||||
return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
|
||||
crypto_register_shashes(blake2s_arm_algs,
|
||||
ARRAY_SIZE(blake2s_arm_algs)) : 0;
|
||||
}
|
||||
|
||||
static void __exit blake2s_arm_mod_exit(void)
|
||||
{
|
||||
if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
|
||||
crypto_unregister_shashes(blake2s_arm_algs,
|
||||
ARRAY_SIZE(blake2s_arm_algs));
|
||||
}
|
||||
|
||||
module_init(blake2s_arm_mod_init);
|
||||
module_exit(blake2s_arm_mod_exit);
|
||||
|
||||
MODULE_DESCRIPTION("BLAKE2s digest algorithm, ARM scalar implementation");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-128");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-128-arm");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-160");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-160-arm");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-224");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-224-arm");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-256");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-256-arm");
|
@ -71,6 +71,12 @@ config CRYPTO_GHASH_ARM64_CE
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_GF128MUL
|
||||
select CRYPTO_LIB_AES
|
||||
select CRYPTO_AEAD
|
||||
|
||||
config CRYPTO_POLYVAL_ARM64_CE
|
||||
tristate "POLYVAL using ARMv8 Crypto Extensions (for HCTR2)"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_POLYVAL
|
||||
|
||||
config CRYPTO_CRCT10DIF_ARM64_CE
|
||||
tristate "CRCT10DIF digest algorithm using PMULL instructions"
|
||||
@ -96,13 +102,13 @@ config CRYPTO_AES_ARM64_CE_CCM
|
||||
select CRYPTO_LIB_AES
|
||||
|
||||
config CRYPTO_AES_ARM64_CE_BLK
|
||||
tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
|
||||
tristate "AES in ECB/CBC/CTR/XTS/XCTR modes using ARMv8 Crypto Extensions"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_AES_ARM64_CE
|
||||
|
||||
config CRYPTO_AES_ARM64_NEON_BLK
|
||||
tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
|
||||
tristate "AES in ECB/CBC/CTR/XTS/XCTR modes using NEON instructions"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_LIB_AES
|
||||
|
@ -32,6 +32,9 @@ sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o
|
||||
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
|
||||
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_POLYVAL_ARM64_CE) += polyval-ce.o
|
||||
polyval-ce-y := polyval-ce-glue.o polyval-ce-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM64_CE) += crct10dif-ce.o
|
||||
crct10dif-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
|
||||
|
||||
|
@ -34,10 +34,11 @@
|
||||
#define aes_essiv_cbc_encrypt ce_aes_essiv_cbc_encrypt
|
||||
#define aes_essiv_cbc_decrypt ce_aes_essiv_cbc_decrypt
|
||||
#define aes_ctr_encrypt ce_aes_ctr_encrypt
|
||||
#define aes_xctr_encrypt ce_aes_xctr_encrypt
|
||||
#define aes_xts_encrypt ce_aes_xts_encrypt
|
||||
#define aes_xts_decrypt ce_aes_xts_decrypt
|
||||
#define aes_mac_update ce_aes_mac_update
|
||||
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
|
||||
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS/XCTR using ARMv8 Crypto Extensions");
|
||||
#else
|
||||
#define MODE "neon"
|
||||
#define PRIO 200
|
||||
@ -50,16 +51,18 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
|
||||
#define aes_essiv_cbc_encrypt neon_aes_essiv_cbc_encrypt
|
||||
#define aes_essiv_cbc_decrypt neon_aes_essiv_cbc_decrypt
|
||||
#define aes_ctr_encrypt neon_aes_ctr_encrypt
|
||||
#define aes_xctr_encrypt neon_aes_xctr_encrypt
|
||||
#define aes_xts_encrypt neon_aes_xts_encrypt
|
||||
#define aes_xts_decrypt neon_aes_xts_decrypt
|
||||
#define aes_mac_update neon_aes_mac_update
|
||||
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
|
||||
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS/XCTR using ARMv8 NEON");
|
||||
#endif
|
||||
#if defined(USE_V8_CRYPTO_EXTENSIONS) || !IS_ENABLED(CONFIG_CRYPTO_AES_ARM64_BS)
|
||||
MODULE_ALIAS_CRYPTO("ecb(aes)");
|
||||
MODULE_ALIAS_CRYPTO("cbc(aes)");
|
||||
MODULE_ALIAS_CRYPTO("ctr(aes)");
|
||||
MODULE_ALIAS_CRYPTO("xts(aes)");
|
||||
MODULE_ALIAS_CRYPTO("xctr(aes)");
|
||||
#endif
|
||||
MODULE_ALIAS_CRYPTO("cts(cbc(aes))");
|
||||
MODULE_ALIAS_CRYPTO("essiv(cbc(aes),sha256)");
|
||||
@ -89,6 +92,9 @@ asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int bytes, u8 ctr[]);
|
||||
|
||||
asmlinkage void aes_xctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int bytes, u8 ctr[], int byte_ctr);
|
||||
|
||||
asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
|
||||
int rounds, int bytes, u32 const rk2[], u8 iv[],
|
||||
int first);
|
||||
@ -442,6 +448,52 @@ static int __maybe_unused essiv_cbc_decrypt(struct skcipher_request *req)
|
||||
return err ?: cbc_decrypt_walk(req, &walk);
|
||||
}
|
||||
|
||||
static int __maybe_unused xctr_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err, rounds = 6 + ctx->key_length / 4;
|
||||
struct skcipher_walk walk;
|
||||
unsigned int byte_ctr = 0;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
|
||||
/*
|
||||
* If given less than 16 bytes, we must copy the partial block
|
||||
* into a temporary buffer of 16 bytes to avoid out of bounds
|
||||
* reads and writes. Furthermore, this code is somewhat unusual
|
||||
* in that it expects the end of the data to be at the end of
|
||||
* the temporary buffer, rather than the start of the data at
|
||||
* the start of the temporary buffer.
|
||||
*/
|
||||
if (unlikely(nbytes < AES_BLOCK_SIZE))
|
||||
src = dst = memcpy(buf + sizeof(buf) - nbytes,
|
||||
src, nbytes);
|
||||
else if (nbytes < walk.total)
|
||||
nbytes &= ~(AES_BLOCK_SIZE - 1);
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
|
||||
walk.iv, byte_ctr);
|
||||
kernel_neon_end();
|
||||
|
||||
if (unlikely(nbytes < AES_BLOCK_SIZE))
|
||||
memcpy(walk.dst.virt.addr,
|
||||
buf + sizeof(buf) - nbytes, nbytes);
|
||||
byte_ctr += nbytes;
|
||||
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int __maybe_unused ctr_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
@ -457,6 +509,14 @@ static int __maybe_unused ctr_encrypt(struct skcipher_request *req)
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
|
||||
/*
|
||||
* If given less than 16 bytes, we must copy the partial block
|
||||
* into a temporary buffer of 16 bytes to avoid out of bounds
|
||||
* reads and writes. Furthermore, this code is somewhat unusual
|
||||
* in that it expects the end of the data to be at the end of
|
||||
* the temporary buffer, rather than the start of the data at
|
||||
* the start of the temporary buffer.
|
||||
*/
|
||||
if (unlikely(nbytes < AES_BLOCK_SIZE))
|
||||
src = dst = memcpy(buf + sizeof(buf) - nbytes,
|
||||
src, nbytes);
|
||||
@ -669,6 +729,22 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.setkey = skcipher_aes_setkey,
|
||||
.encrypt = ctr_encrypt,
|
||||
.decrypt = ctr_encrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "xctr(aes)",
|
||||
.cra_driver_name = "xctr-aes-" MODE,
|
||||
.cra_priority = PRIO,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.setkey = skcipher_aes_setkey,
|
||||
.encrypt = xctr_encrypt,
|
||||
.decrypt = xctr_encrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "xts(aes)",
|
||||
|
@ -318,127 +318,211 @@ AES_FUNC_END(aes_cbc_cts_decrypt)
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
.previous
|
||||
|
||||
|
||||
/*
|
||||
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int bytes, u8 ctr[])
|
||||
* This macro generates the code for CTR and XCTR mode.
|
||||
*/
|
||||
.macro ctr_encrypt xctr
|
||||
// Arguments
|
||||
OUT .req x0
|
||||
IN .req x1
|
||||
KEY .req x2
|
||||
ROUNDS_W .req w3
|
||||
BYTES_W .req w4
|
||||
IV .req x5
|
||||
BYTE_CTR_W .req w6 // XCTR only
|
||||
// Intermediate values
|
||||
CTR_W .req w11 // XCTR only
|
||||
CTR .req x11 // XCTR only
|
||||
IV_PART .req x12
|
||||
BLOCKS .req x13
|
||||
BLOCKS_W .req w13
|
||||
|
||||
AES_FUNC_START(aes_ctr_encrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
enc_prepare w3, x2, x12
|
||||
ld1 {vctr.16b}, [x5]
|
||||
enc_prepare ROUNDS_W, KEY, IV_PART
|
||||
ld1 {vctr.16b}, [IV]
|
||||
|
||||
umov x12, vctr.d[1] /* keep swabbed ctr in reg */
|
||||
rev x12, x12
|
||||
/*
|
||||
* Keep 64 bits of the IV in a register. For CTR mode this lets us
|
||||
* easily increment the IV. For XCTR mode this lets us efficiently XOR
|
||||
* the 64-bit counter with the IV.
|
||||
*/
|
||||
.if \xctr
|
||||
umov IV_PART, vctr.d[0]
|
||||
lsr CTR_W, BYTE_CTR_W, #4
|
||||
.else
|
||||
umov IV_PART, vctr.d[1]
|
||||
rev IV_PART, IV_PART
|
||||
.endif
|
||||
|
||||
.LctrloopNx:
|
||||
add w7, w4, #15
|
||||
sub w4, w4, #MAX_STRIDE << 4
|
||||
lsr w7, w7, #4
|
||||
.LctrloopNx\xctr:
|
||||
add BLOCKS_W, BYTES_W, #15
|
||||
sub BYTES_W, BYTES_W, #MAX_STRIDE << 4
|
||||
lsr BLOCKS_W, BLOCKS_W, #4
|
||||
mov w8, #MAX_STRIDE
|
||||
cmp w7, w8
|
||||
csel w7, w7, w8, lt
|
||||
adds x12, x12, x7
|
||||
cmp BLOCKS_W, w8
|
||||
csel BLOCKS_W, BLOCKS_W, w8, lt
|
||||
|
||||
/*
|
||||
* Set up the counter values in v0-v{MAX_STRIDE-1}.
|
||||
*
|
||||
* If we are encrypting less than MAX_STRIDE blocks, the tail block
|
||||
* handling code expects the last keystream block to be in
|
||||
* v{MAX_STRIDE-1}. For example: if encrypting two blocks with
|
||||
* MAX_STRIDE=5, then v3 and v4 should have the next two counter blocks.
|
||||
*/
|
||||
.if \xctr
|
||||
add CTR, CTR, BLOCKS
|
||||
.else
|
||||
adds IV_PART, IV_PART, BLOCKS
|
||||
.endif
|
||||
mov v0.16b, vctr.16b
|
||||
mov v1.16b, vctr.16b
|
||||
mov v2.16b, vctr.16b
|
||||
mov v3.16b, vctr.16b
|
||||
ST5( mov v4.16b, vctr.16b )
|
||||
bcs 0f
|
||||
.if \xctr
|
||||
sub x6, CTR, #MAX_STRIDE - 1
|
||||
sub x7, CTR, #MAX_STRIDE - 2
|
||||
sub x8, CTR, #MAX_STRIDE - 3
|
||||
sub x9, CTR, #MAX_STRIDE - 4
|
||||
ST5( sub x10, CTR, #MAX_STRIDE - 5 )
|
||||
eor x6, x6, IV_PART
|
||||
eor x7, x7, IV_PART
|
||||
eor x8, x8, IV_PART
|
||||
eor x9, x9, IV_PART
|
||||
ST5( eor x10, x10, IV_PART )
|
||||
mov v0.d[0], x6
|
||||
mov v1.d[0], x7
|
||||
mov v2.d[0], x8
|
||||
mov v3.d[0], x9
|
||||
ST5( mov v4.d[0], x10 )
|
||||
.else
|
||||
bcs 0f
|
||||
.subsection 1
|
||||
/*
|
||||
* This subsection handles carries.
|
||||
*
|
||||
* Conditional branching here is allowed with respect to time
|
||||
* invariance since the branches are dependent on the IV instead
|
||||
* of the plaintext or key. This code is rarely executed in
|
||||
* practice anyway.
|
||||
*/
|
||||
|
||||
.subsection 1
|
||||
/* apply carry to outgoing counter */
|
||||
0: umov x8, vctr.d[0]
|
||||
rev x8, x8
|
||||
add x8, x8, #1
|
||||
rev x8, x8
|
||||
ins vctr.d[0], x8
|
||||
/* Apply carry to outgoing counter. */
|
||||
0: umov x8, vctr.d[0]
|
||||
rev x8, x8
|
||||
add x8, x8, #1
|
||||
rev x8, x8
|
||||
ins vctr.d[0], x8
|
||||
|
||||
/* apply carry to N counter blocks for N := x12 */
|
||||
cbz x12, 2f
|
||||
adr x16, 1f
|
||||
sub x16, x16, x12, lsl #3
|
||||
br x16
|
||||
bti c
|
||||
mov v0.d[0], vctr.d[0]
|
||||
bti c
|
||||
mov v1.d[0], vctr.d[0]
|
||||
bti c
|
||||
mov v2.d[0], vctr.d[0]
|
||||
bti c
|
||||
mov v3.d[0], vctr.d[0]
|
||||
ST5( bti c )
|
||||
ST5( mov v4.d[0], vctr.d[0] )
|
||||
1: b 2f
|
||||
.previous
|
||||
/*
|
||||
* Apply carry to counter blocks if needed.
|
||||
*
|
||||
* Since the carry flag was set, we know 0 <= IV_PART <
|
||||
* MAX_STRIDE. Using the value of IV_PART we can determine how
|
||||
* many counter blocks need to be updated.
|
||||
*/
|
||||
cbz IV_PART, 2f
|
||||
adr x16, 1f
|
||||
sub x16, x16, IV_PART, lsl #3
|
||||
br x16
|
||||
bti c
|
||||
mov v0.d[0], vctr.d[0]
|
||||
bti c
|
||||
mov v1.d[0], vctr.d[0]
|
||||
bti c
|
||||
mov v2.d[0], vctr.d[0]
|
||||
bti c
|
||||
mov v3.d[0], vctr.d[0]
|
||||
ST5( bti c )
|
||||
ST5( mov v4.d[0], vctr.d[0] )
|
||||
1: b 2f
|
||||
.previous
|
||||
|
||||
2: rev x7, x12
|
||||
ins vctr.d[1], x7
|
||||
sub x7, x12, #MAX_STRIDE - 1
|
||||
sub x8, x12, #MAX_STRIDE - 2
|
||||
sub x9, x12, #MAX_STRIDE - 3
|
||||
rev x7, x7
|
||||
rev x8, x8
|
||||
mov v1.d[1], x7
|
||||
rev x9, x9
|
||||
ST5( sub x10, x12, #MAX_STRIDE - 4 )
|
||||
mov v2.d[1], x8
|
||||
ST5( rev x10, x10 )
|
||||
mov v3.d[1], x9
|
||||
ST5( mov v4.d[1], x10 )
|
||||
tbnz w4, #31, .Lctrtail
|
||||
ld1 {v5.16b-v7.16b}, [x1], #48
|
||||
2: rev x7, IV_PART
|
||||
ins vctr.d[1], x7
|
||||
sub x7, IV_PART, #MAX_STRIDE - 1
|
||||
sub x8, IV_PART, #MAX_STRIDE - 2
|
||||
sub x9, IV_PART, #MAX_STRIDE - 3
|
||||
rev x7, x7
|
||||
rev x8, x8
|
||||
mov v1.d[1], x7
|
||||
rev x9, x9
|
||||
ST5( sub x10, IV_PART, #MAX_STRIDE - 4 )
|
||||
mov v2.d[1], x8
|
||||
ST5( rev x10, x10 )
|
||||
mov v3.d[1], x9
|
||||
ST5( mov v4.d[1], x10 )
|
||||
.endif
|
||||
|
||||
/*
|
||||
* If there are at least MAX_STRIDE blocks left, XOR the data with
|
||||
* keystream and store. Otherwise jump to tail handling.
|
||||
*/
|
||||
tbnz BYTES_W, #31, .Lctrtail\xctr
|
||||
ld1 {v5.16b-v7.16b}, [IN], #48
|
||||
ST4( bl aes_encrypt_block4x )
|
||||
ST5( bl aes_encrypt_block5x )
|
||||
eor v0.16b, v5.16b, v0.16b
|
||||
ST4( ld1 {v5.16b}, [x1], #16 )
|
||||
ST4( ld1 {v5.16b}, [IN], #16 )
|
||||
eor v1.16b, v6.16b, v1.16b
|
||||
ST5( ld1 {v5.16b-v6.16b}, [x1], #32 )
|
||||
ST5( ld1 {v5.16b-v6.16b}, [IN], #32 )
|
||||
eor v2.16b, v7.16b, v2.16b
|
||||
eor v3.16b, v5.16b, v3.16b
|
||||
ST5( eor v4.16b, v6.16b, v4.16b )
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
ST5( st1 {v4.16b}, [x0], #16 )
|
||||
cbz w4, .Lctrout
|
||||
b .LctrloopNx
|
||||
st1 {v0.16b-v3.16b}, [OUT], #64
|
||||
ST5( st1 {v4.16b}, [OUT], #16 )
|
||||
cbz BYTES_W, .Lctrout\xctr
|
||||
b .LctrloopNx\xctr
|
||||
|
||||
.Lctrout:
|
||||
st1 {vctr.16b}, [x5] /* return next CTR value */
|
||||
.Lctrout\xctr:
|
||||
.if !\xctr
|
||||
st1 {vctr.16b}, [IV] /* return next CTR value */
|
||||
.endif
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
|
||||
.Lctrtail:
|
||||
/* XOR up to MAX_STRIDE * 16 - 1 bytes of in/output with v0 ... v3/v4 */
|
||||
.Lctrtail\xctr:
|
||||
/*
|
||||
* Handle up to MAX_STRIDE * 16 - 1 bytes of plaintext
|
||||
*
|
||||
* This code expects the last keystream block to be in v{MAX_STRIDE-1}.
|
||||
* For example: if encrypting two blocks with MAX_STRIDE=5, then v3 and
|
||||
* v4 should have the next two counter blocks.
|
||||
*
|
||||
* This allows us to store the ciphertext by writing to overlapping
|
||||
* regions of memory. Any invalid ciphertext blocks get overwritten by
|
||||
* correctly computed blocks. This approach greatly simplifies the
|
||||
* logic for storing the ciphertext.
|
||||
*/
|
||||
mov x16, #16
|
||||
ands x6, x4, #0xf
|
||||
csel x13, x6, x16, ne
|
||||
ands w7, BYTES_W, #0xf
|
||||
csel x13, x7, x16, ne
|
||||
|
||||
ST5( cmp w4, #64 - (MAX_STRIDE << 4) )
|
||||
ST5( cmp BYTES_W, #64 - (MAX_STRIDE << 4))
|
||||
ST5( csel x14, x16, xzr, gt )
|
||||
cmp w4, #48 - (MAX_STRIDE << 4)
|
||||
cmp BYTES_W, #48 - (MAX_STRIDE << 4)
|
||||
csel x15, x16, xzr, gt
|
||||
cmp w4, #32 - (MAX_STRIDE << 4)
|
||||
cmp BYTES_W, #32 - (MAX_STRIDE << 4)
|
||||
csel x16, x16, xzr, gt
|
||||
cmp w4, #16 - (MAX_STRIDE << 4)
|
||||
cmp BYTES_W, #16 - (MAX_STRIDE << 4)
|
||||
|
||||
adr_l x12, .Lcts_permute_table
|
||||
add x12, x12, x13
|
||||
ble .Lctrtail1x
|
||||
adr_l x9, .Lcts_permute_table
|
||||
add x9, x9, x13
|
||||
ble .Lctrtail1x\xctr
|
||||
|
||||
ST5( ld1 {v5.16b}, [x1], x14 )
|
||||
ld1 {v6.16b}, [x1], x15
|
||||
ld1 {v7.16b}, [x1], x16
|
||||
ST5( ld1 {v5.16b}, [IN], x14 )
|
||||
ld1 {v6.16b}, [IN], x15
|
||||
ld1 {v7.16b}, [IN], x16
|
||||
|
||||
ST4( bl aes_encrypt_block4x )
|
||||
ST5( bl aes_encrypt_block5x )
|
||||
|
||||
ld1 {v8.16b}, [x1], x13
|
||||
ld1 {v9.16b}, [x1]
|
||||
ld1 {v10.16b}, [x12]
|
||||
ld1 {v8.16b}, [IN], x13
|
||||
ld1 {v9.16b}, [IN]
|
||||
ld1 {v10.16b}, [x9]
|
||||
|
||||
ST4( eor v6.16b, v6.16b, v0.16b )
|
||||
ST4( eor v7.16b, v7.16b, v1.16b )
|
||||
@ -453,32 +537,91 @@ ST5( eor v7.16b, v7.16b, v2.16b )
|
||||
ST5( eor v8.16b, v8.16b, v3.16b )
|
||||
ST5( eor v9.16b, v9.16b, v4.16b )
|
||||
|
||||
ST5( st1 {v5.16b}, [x0], x14 )
|
||||
st1 {v6.16b}, [x0], x15
|
||||
st1 {v7.16b}, [x0], x16
|
||||
add x13, x13, x0
|
||||
ST5( st1 {v5.16b}, [OUT], x14 )
|
||||
st1 {v6.16b}, [OUT], x15
|
||||
st1 {v7.16b}, [OUT], x16
|
||||
add x13, x13, OUT
|
||||
st1 {v9.16b}, [x13] // overlapping stores
|
||||
st1 {v8.16b}, [x0]
|
||||
b .Lctrout
|
||||
st1 {v8.16b}, [OUT]
|
||||
b .Lctrout\xctr
|
||||
|
||||
.Lctrtail1x:
|
||||
sub x7, x6, #16
|
||||
csel x6, x6, x7, eq
|
||||
add x1, x1, x6
|
||||
add x0, x0, x6
|
||||
ld1 {v5.16b}, [x1]
|
||||
ld1 {v6.16b}, [x0]
|
||||
.Lctrtail1x\xctr:
|
||||
/*
|
||||
* Handle <= 16 bytes of plaintext
|
||||
*
|
||||
* This code always reads and writes 16 bytes. To avoid out of bounds
|
||||
* accesses, XCTR and CTR modes must use a temporary buffer when
|
||||
* encrypting/decrypting less than 16 bytes.
|
||||
*
|
||||
* This code is unusual in that it loads the input and stores the output
|
||||
* relative to the end of the buffers rather than relative to the start.
|
||||
* This causes unusual behaviour when encrypting/decrypting less than 16
|
||||
* bytes; the end of the data is expected to be at the end of the
|
||||
* temporary buffer rather than the start of the data being at the start
|
||||
* of the temporary buffer.
|
||||
*/
|
||||
sub x8, x7, #16
|
||||
csel x7, x7, x8, eq
|
||||
add IN, IN, x7
|
||||
add OUT, OUT, x7
|
||||
ld1 {v5.16b}, [IN]
|
||||
ld1 {v6.16b}, [OUT]
|
||||
ST5( mov v3.16b, v4.16b )
|
||||
encrypt_block v3, w3, x2, x8, w7
|
||||
ld1 {v10.16b-v11.16b}, [x12]
|
||||
encrypt_block v3, ROUNDS_W, KEY, x8, w7
|
||||
ld1 {v10.16b-v11.16b}, [x9]
|
||||
tbl v3.16b, {v3.16b}, v10.16b
|
||||
sshr v11.16b, v11.16b, #7
|
||||
eor v5.16b, v5.16b, v3.16b
|
||||
bif v5.16b, v6.16b, v11.16b
|
||||
st1 {v5.16b}, [x0]
|
||||
b .Lctrout
|
||||
st1 {v5.16b}, [OUT]
|
||||
b .Lctrout\xctr
|
||||
|
||||
// Arguments
|
||||
.unreq OUT
|
||||
.unreq IN
|
||||
.unreq KEY
|
||||
.unreq ROUNDS_W
|
||||
.unreq BYTES_W
|
||||
.unreq IV
|
||||
.unreq BYTE_CTR_W // XCTR only
|
||||
// Intermediate values
|
||||
.unreq CTR_W // XCTR only
|
||||
.unreq CTR // XCTR only
|
||||
.unreq IV_PART
|
||||
.unreq BLOCKS
|
||||
.unreq BLOCKS_W
|
||||
.endm
|
||||
|
||||
/*
|
||||
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int bytes, u8 ctr[])
|
||||
*
|
||||
* The input and output buffers must always be at least 16 bytes even if
|
||||
* encrypting/decrypting less than 16 bytes. Otherwise out of bounds
|
||||
* accesses will occur. The data to be encrypted/decrypted is expected
|
||||
* to be at the end of this 16-byte temporary buffer rather than the
|
||||
* start.
|
||||
*/
|
||||
|
||||
AES_FUNC_START(aes_ctr_encrypt)
|
||||
ctr_encrypt 0
|
||||
AES_FUNC_END(aes_ctr_encrypt)
|
||||
|
||||
/*
|
||||
* aes_xctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int bytes, u8 const iv[], int byte_ctr)
|
||||
*
|
||||
* The input and output buffers must always be at least 16 bytes even if
|
||||
* encrypting/decrypting less than 16 bytes. Otherwise out of bounds
|
||||
* accesses will occur. The data to be encrypted/decrypted is expected
|
||||
* to be at the end of this 16-byte temporary buffer rather than the
|
||||
* start.
|
||||
*/
|
||||
|
||||
AES_FUNC_START(aes_xctr_encrypt)
|
||||
ctr_encrypt 1
|
||||
AES_FUNC_END(aes_xctr_encrypt)
|
||||
|
||||
|
||||
/*
|
||||
* aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
|
||||
|
@ -66,7 +66,7 @@
|
||||
prepare crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
|
||||
.endm
|
||||
|
||||
/* apply SubBytes transformation using the the preloaded Sbox */
|
||||
/* apply SubBytes transformation using the preloaded Sbox */
|
||||
.macro sub_bytes, in
|
||||
sub v9.16b, \in\().16b, v15.16b
|
||||
tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
|
||||
|
@ -52,7 +52,7 @@ static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
|
||||
{
|
||||
if (unlikely(!dctx->sset)) {
|
||||
if (!dctx->rset) {
|
||||
poly1305_init_arch(dctx, src);
|
||||
poly1305_init_arm64(&dctx->h, src);
|
||||
src += POLY1305_BLOCK_SIZE;
|
||||
len -= POLY1305_BLOCK_SIZE;
|
||||
dctx->rset = 1;
|
||||
|
361
arch/arm64/crypto/polyval-ce-core.S
Normal file
361
arch/arm64/crypto/polyval-ce-core.S
Normal file
@ -0,0 +1,361 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Implementation of POLYVAL using ARMv8 Crypto Extensions.
|
||||
*
|
||||
* Copyright 2021 Google LLC
|
||||
*/
|
||||
/*
|
||||
* This is an efficient implementation of POLYVAL using ARMv8 Crypto Extensions
|
||||
* It works on 8 blocks at a time, by precomputing the first 8 keys powers h^8,
|
||||
* ..., h^1 in the POLYVAL finite field. This precomputation allows us to split
|
||||
* finite field multiplication into two steps.
|
||||
*
|
||||
* In the first step, we consider h^i, m_i as normal polynomials of degree less
|
||||
* than 128. We then compute p(x) = h^8m_0 + ... + h^1m_7 where multiplication
|
||||
* is simply polynomial multiplication.
|
||||
*
|
||||
* In the second step, we compute the reduction of p(x) modulo the finite field
|
||||
* modulus g(x) = x^128 + x^127 + x^126 + x^121 + 1.
|
||||
*
|
||||
* This two step process is equivalent to computing h^8m_0 + ... + h^1m_7 where
|
||||
* multiplication is finite field multiplication. The advantage is that the
|
||||
* two-step process only requires 1 finite field reduction for every 8
|
||||
* polynomial multiplications. Further parallelism is gained by interleaving the
|
||||
* multiplications and polynomial reductions.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#define STRIDE_BLOCKS 8
|
||||
|
||||
KEY_POWERS .req x0
|
||||
MSG .req x1
|
||||
BLOCKS_LEFT .req x2
|
||||
ACCUMULATOR .req x3
|
||||
KEY_START .req x10
|
||||
EXTRA_BYTES .req x11
|
||||
TMP .req x13
|
||||
|
||||
M0 .req v0
|
||||
M1 .req v1
|
||||
M2 .req v2
|
||||
M3 .req v3
|
||||
M4 .req v4
|
||||
M5 .req v5
|
||||
M6 .req v6
|
||||
M7 .req v7
|
||||
KEY8 .req v8
|
||||
KEY7 .req v9
|
||||
KEY6 .req v10
|
||||
KEY5 .req v11
|
||||
KEY4 .req v12
|
||||
KEY3 .req v13
|
||||
KEY2 .req v14
|
||||
KEY1 .req v15
|
||||
PL .req v16
|
||||
PH .req v17
|
||||
TMP_V .req v18
|
||||
LO .req v20
|
||||
MI .req v21
|
||||
HI .req v22
|
||||
SUM .req v23
|
||||
GSTAR .req v24
|
||||
|
||||
.text
|
||||
|
||||
.arch armv8-a+crypto
|
||||
.align 4
|
||||
|
||||
.Lgstar:
|
||||
.quad 0xc200000000000000, 0xc200000000000000
|
||||
|
||||
/*
|
||||
* Computes the product of two 128-bit polynomials in X and Y and XORs the
|
||||
* components of the 256-bit product into LO, MI, HI.
|
||||
*
|
||||
* Given:
|
||||
* X = [X_1 : X_0]
|
||||
* Y = [Y_1 : Y_0]
|
||||
*
|
||||
* We compute:
|
||||
* LO += X_0 * Y_0
|
||||
* MI += (X_0 + X_1) * (Y_0 + Y_1)
|
||||
* HI += X_1 * Y_1
|
||||
*
|
||||
* Later, the 256-bit result can be extracted as:
|
||||
* [HI_1 : HI_0 + HI_1 + MI_1 + LO_1 : LO_1 + HI_0 + MI_0 + LO_0 : LO_0]
|
||||
* This step is done when computing the polynomial reduction for efficiency
|
||||
* reasons.
|
||||
*
|
||||
* Karatsuba multiplication is used instead of Schoolbook multiplication because
|
||||
* it was found to be slightly faster on ARM64 CPUs.
|
||||
*
|
||||
*/
|
||||
.macro karatsuba1 X Y
|
||||
X .req \X
|
||||
Y .req \Y
|
||||
ext v25.16b, X.16b, X.16b, #8
|
||||
ext v26.16b, Y.16b, Y.16b, #8
|
||||
eor v25.16b, v25.16b, X.16b
|
||||
eor v26.16b, v26.16b, Y.16b
|
||||
pmull2 v28.1q, X.2d, Y.2d
|
||||
pmull v29.1q, X.1d, Y.1d
|
||||
pmull v27.1q, v25.1d, v26.1d
|
||||
eor HI.16b, HI.16b, v28.16b
|
||||
eor LO.16b, LO.16b, v29.16b
|
||||
eor MI.16b, MI.16b, v27.16b
|
||||
.unreq X
|
||||
.unreq Y
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Same as karatsuba1, except overwrites HI, LO, MI rather than XORing into
|
||||
* them.
|
||||
*/
|
||||
.macro karatsuba1_store X Y
|
||||
X .req \X
|
||||
Y .req \Y
|
||||
ext v25.16b, X.16b, X.16b, #8
|
||||
ext v26.16b, Y.16b, Y.16b, #8
|
||||
eor v25.16b, v25.16b, X.16b
|
||||
eor v26.16b, v26.16b, Y.16b
|
||||
pmull2 HI.1q, X.2d, Y.2d
|
||||
pmull LO.1q, X.1d, Y.1d
|
||||
pmull MI.1q, v25.1d, v26.1d
|
||||
.unreq X
|
||||
.unreq Y
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Computes the 256-bit polynomial represented by LO, HI, MI. Stores
|
||||
* the result in PL, PH.
|
||||
* [PH : PL] =
|
||||
* [HI_1 : HI_1 + HI_0 + MI_1 + LO_1 : HI_0 + MI_0 + LO_1 + LO_0 : LO_0]
|
||||
*/
|
||||
.macro karatsuba2
|
||||
// v4 = [HI_1 + MI_1 : HI_0 + MI_0]
|
||||
eor v4.16b, HI.16b, MI.16b
|
||||
// v4 = [HI_1 + MI_1 + LO_1 : HI_0 + MI_0 + LO_0]
|
||||
eor v4.16b, v4.16b, LO.16b
|
||||
// v5 = [HI_0 : LO_1]
|
||||
ext v5.16b, LO.16b, HI.16b, #8
|
||||
// v4 = [HI_1 + HI_0 + MI_1 + LO_1 : HI_0 + MI_0 + LO_1 + LO_0]
|
||||
eor v4.16b, v4.16b, v5.16b
|
||||
// HI = [HI_0 : HI_1]
|
||||
ext HI.16b, HI.16b, HI.16b, #8
|
||||
// LO = [LO_0 : LO_1]
|
||||
ext LO.16b, LO.16b, LO.16b, #8
|
||||
// PH = [HI_1 : HI_1 + HI_0 + MI_1 + LO_1]
|
||||
ext PH.16b, v4.16b, HI.16b, #8
|
||||
// PL = [HI_0 + MI_0 + LO_1 + LO_0 : LO_0]
|
||||
ext PL.16b, LO.16b, v4.16b, #8
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Computes the 128-bit reduction of PH : PL. Stores the result in dest.
|
||||
*
|
||||
* This macro computes p(x) mod g(x) where p(x) is in montgomery form and g(x) =
|
||||
* x^128 + x^127 + x^126 + x^121 + 1.
|
||||
*
|
||||
* We have a 256-bit polynomial PH : PL = P_3 : P_2 : P_1 : P_0 that is the
|
||||
* product of two 128-bit polynomials in Montgomery form. We need to reduce it
|
||||
* mod g(x). Also, since polynomials in Montgomery form have an "extra" factor
|
||||
* of x^128, this product has two extra factors of x^128. To get it back into
|
||||
* Montgomery form, we need to remove one of these factors by dividing by x^128.
|
||||
*
|
||||
* To accomplish both of these goals, we add multiples of g(x) that cancel out
|
||||
* the low 128 bits P_1 : P_0, leaving just the high 128 bits. Since the low
|
||||
* bits are zero, the polynomial division by x^128 can be done by right
|
||||
* shifting.
|
||||
*
|
||||
* Since the only nonzero term in the low 64 bits of g(x) is the constant term,
|
||||
* the multiple of g(x) needed to cancel out P_0 is P_0 * g(x). The CPU can
|
||||
* only do 64x64 bit multiplications, so split P_0 * g(x) into x^128 * P_0 +
|
||||
* x^64 * g*(x) * P_0 + P_0, where g*(x) is bits 64-127 of g(x). Adding this to
|
||||
* the original polynomial gives P_3 : P_2 + P_0 + T_1 : P_1 + T_0 : 0, where T
|
||||
* = T_1 : T_0 = g*(x) * P_0. Thus, bits 0-63 got "folded" into bits 64-191.
|
||||
*
|
||||
* Repeating this same process on the next 64 bits "folds" bits 64-127 into bits
|
||||
* 128-255, giving the answer in bits 128-255. This time, we need to cancel P_1
|
||||
* + T_0 in bits 64-127. The multiple of g(x) required is (P_1 + T_0) * g(x) *
|
||||
* x^64. Adding this to our previous computation gives P_3 + P_1 + T_0 + V_1 :
|
||||
* P_2 + P_0 + T_1 + V_0 : 0 : 0, where V = V_1 : V_0 = g*(x) * (P_1 + T_0).
|
||||
*
|
||||
* So our final computation is:
|
||||
* T = T_1 : T_0 = g*(x) * P_0
|
||||
* V = V_1 : V_0 = g*(x) * (P_1 + T_0)
|
||||
* p(x) / x^{128} mod g(x) = P_3 + P_1 + T_0 + V_1 : P_2 + P_0 + T_1 + V_0
|
||||
*
|
||||
* The implementation below saves a XOR instruction by computing P_1 + T_0 : P_0
|
||||
* + T_1 and XORing into dest, rather than separately XORing P_1 : P_0 and T_0 :
|
||||
* T_1 into dest. This allows us to reuse P_1 + T_0 when computing V.
|
||||
*/
|
||||
.macro montgomery_reduction dest
|
||||
DEST .req \dest
|
||||
// TMP_V = T_1 : T_0 = P_0 * g*(x)
|
||||
pmull TMP_V.1q, PL.1d, GSTAR.1d
|
||||
// TMP_V = T_0 : T_1
|
||||
ext TMP_V.16b, TMP_V.16b, TMP_V.16b, #8
|
||||
// TMP_V = P_1 + T_0 : P_0 + T_1
|
||||
eor TMP_V.16b, PL.16b, TMP_V.16b
|
||||
// PH = P_3 + P_1 + T_0 : P_2 + P_0 + T_1
|
||||
eor PH.16b, PH.16b, TMP_V.16b
|
||||
// TMP_V = V_1 : V_0 = (P_1 + T_0) * g*(x)
|
||||
pmull2 TMP_V.1q, TMP_V.2d, GSTAR.2d
|
||||
eor DEST.16b, PH.16b, TMP_V.16b
|
||||
.unreq DEST
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Compute Polyval on 8 blocks.
|
||||
*
|
||||
* If reduce is set, also computes the montgomery reduction of the
|
||||
* previous full_stride call and XORs with the first message block.
|
||||
* (m_0 + REDUCE(PL, PH))h^8 + ... + m_7h^1.
|
||||
* I.e., the first multiplication uses m_0 + REDUCE(PL, PH) instead of m_0.
|
||||
*
|
||||
* Sets PL, PH.
|
||||
*/
|
||||
.macro full_stride reduce
|
||||
eor LO.16b, LO.16b, LO.16b
|
||||
eor MI.16b, MI.16b, MI.16b
|
||||
eor HI.16b, HI.16b, HI.16b
|
||||
|
||||
ld1 {M0.16b, M1.16b, M2.16b, M3.16b}, [MSG], #64
|
||||
ld1 {M4.16b, M5.16b, M6.16b, M7.16b}, [MSG], #64
|
||||
|
||||
karatsuba1 M7 KEY1
|
||||
.if \reduce
|
||||
pmull TMP_V.1q, PL.1d, GSTAR.1d
|
||||
.endif
|
||||
|
||||
karatsuba1 M6 KEY2
|
||||
.if \reduce
|
||||
ext TMP_V.16b, TMP_V.16b, TMP_V.16b, #8
|
||||
.endif
|
||||
|
||||
karatsuba1 M5 KEY3
|
||||
.if \reduce
|
||||
eor TMP_V.16b, PL.16b, TMP_V.16b
|
||||
.endif
|
||||
|
||||
karatsuba1 M4 KEY4
|
||||
.if \reduce
|
||||
eor PH.16b, PH.16b, TMP_V.16b
|
||||
.endif
|
||||
|
||||
karatsuba1 M3 KEY5
|
||||
.if \reduce
|
||||
pmull2 TMP_V.1q, TMP_V.2d, GSTAR.2d
|
||||
.endif
|
||||
|
||||
karatsuba1 M2 KEY6
|
||||
.if \reduce
|
||||
eor SUM.16b, PH.16b, TMP_V.16b
|
||||
.endif
|
||||
|
||||
karatsuba1 M1 KEY7
|
||||
eor M0.16b, M0.16b, SUM.16b
|
||||
|
||||
karatsuba1 M0 KEY8
|
||||
karatsuba2
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Handle any extra blocks after full_stride loop.
|
||||
*/
|
||||
.macro partial_stride
|
||||
add KEY_POWERS, KEY_START, #(STRIDE_BLOCKS << 4)
|
||||
sub KEY_POWERS, KEY_POWERS, BLOCKS_LEFT, lsl #4
|
||||
ld1 {KEY1.16b}, [KEY_POWERS], #16
|
||||
|
||||
ld1 {TMP_V.16b}, [MSG], #16
|
||||
eor SUM.16b, SUM.16b, TMP_V.16b
|
||||
karatsuba1_store KEY1 SUM
|
||||
sub BLOCKS_LEFT, BLOCKS_LEFT, #1
|
||||
|
||||
tst BLOCKS_LEFT, #4
|
||||
beq .Lpartial4BlocksDone
|
||||
ld1 {M0.16b, M1.16b, M2.16b, M3.16b}, [MSG], #64
|
||||
ld1 {KEY8.16b, KEY7.16b, KEY6.16b, KEY5.16b}, [KEY_POWERS], #64
|
||||
karatsuba1 M0 KEY8
|
||||
karatsuba1 M1 KEY7
|
||||
karatsuba1 M2 KEY6
|
||||
karatsuba1 M3 KEY5
|
||||
.Lpartial4BlocksDone:
|
||||
tst BLOCKS_LEFT, #2
|
||||
beq .Lpartial2BlocksDone
|
||||
ld1 {M0.16b, M1.16b}, [MSG], #32
|
||||
ld1 {KEY8.16b, KEY7.16b}, [KEY_POWERS], #32
|
||||
karatsuba1 M0 KEY8
|
||||
karatsuba1 M1 KEY7
|
||||
.Lpartial2BlocksDone:
|
||||
tst BLOCKS_LEFT, #1
|
||||
beq .LpartialDone
|
||||
ld1 {M0.16b}, [MSG], #16
|
||||
ld1 {KEY8.16b}, [KEY_POWERS], #16
|
||||
karatsuba1 M0 KEY8
|
||||
.LpartialDone:
|
||||
karatsuba2
|
||||
montgomery_reduction SUM
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Perform montgomery multiplication in GF(2^128) and store result in op1.
|
||||
*
|
||||
* Computes op1*op2*x^{-128} mod x^128 + x^127 + x^126 + x^121 + 1
|
||||
* If op1, op2 are in montgomery form, this computes the montgomery
|
||||
* form of op1*op2.
|
||||
*
|
||||
* void pmull_polyval_mul(u8 *op1, const u8 *op2);
|
||||
*/
|
||||
SYM_FUNC_START(pmull_polyval_mul)
|
||||
adr TMP, .Lgstar
|
||||
ld1 {GSTAR.2d}, [TMP]
|
||||
ld1 {v0.16b}, [x0]
|
||||
ld1 {v1.16b}, [x1]
|
||||
karatsuba1_store v0 v1
|
||||
karatsuba2
|
||||
montgomery_reduction SUM
|
||||
st1 {SUM.16b}, [x0]
|
||||
ret
|
||||
SYM_FUNC_END(pmull_polyval_mul)
|
||||
|
||||
/*
|
||||
* Perform polynomial evaluation as specified by POLYVAL. This computes:
|
||||
* h^n * accumulator + h^n * m_0 + ... + h^1 * m_{n-1}
|
||||
* where n=nblocks, h is the hash key, and m_i are the message blocks.
|
||||
*
|
||||
* x0 - pointer to precomputed key powers h^8 ... h^1
|
||||
* x1 - pointer to message blocks
|
||||
* x2 - number of blocks to hash
|
||||
* x3 - pointer to accumulator
|
||||
*
|
||||
* void pmull_polyval_update(const struct polyval_ctx *ctx, const u8 *in,
|
||||
* size_t nblocks, u8 *accumulator);
|
||||
*/
|
||||
SYM_FUNC_START(pmull_polyval_update)
|
||||
adr TMP, .Lgstar
|
||||
mov KEY_START, KEY_POWERS
|
||||
ld1 {GSTAR.2d}, [TMP]
|
||||
ld1 {SUM.16b}, [ACCUMULATOR]
|
||||
subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
|
||||
blt .LstrideLoopExit
|
||||
ld1 {KEY8.16b, KEY7.16b, KEY6.16b, KEY5.16b}, [KEY_POWERS], #64
|
||||
ld1 {KEY4.16b, KEY3.16b, KEY2.16b, KEY1.16b}, [KEY_POWERS], #64
|
||||
full_stride 0
|
||||
subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
|
||||
blt .LstrideLoopExitReduce
|
||||
.LstrideLoop:
|
||||
full_stride 1
|
||||
subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
|
||||
bge .LstrideLoop
|
||||
.LstrideLoopExitReduce:
|
||||
montgomery_reduction SUM
|
||||
.LstrideLoopExit:
|
||||
adds BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
|
||||
beq .LskipPartial
|
||||
partial_stride
|
||||
.LskipPartial:
|
||||
st1 {SUM.16b}, [ACCUMULATOR]
|
||||
ret
|
||||
SYM_FUNC_END(pmull_polyval_update)
|
191
arch/arm64/crypto/polyval-ce-glue.c
Normal file
191
arch/arm64/crypto/polyval-ce-glue.c
Normal file
@ -0,0 +1,191 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Glue code for POLYVAL using ARMv8 Crypto Extensions
|
||||
*
|
||||
* Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi>
|
||||
* Copyright (c) 2009 Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
* Copyright 2021 Google LLC
|
||||
*/
|
||||
|
||||
/*
|
||||
* Glue code based on ghash-clmulni-intel_glue.c.
|
||||
*
|
||||
* This implementation of POLYVAL uses montgomery multiplication accelerated by
|
||||
* ARMv8 Crypto Extensions instructions to implement the finite field operations.
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/polyval.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
#define NUM_KEY_POWERS 8
|
||||
|
||||
struct polyval_tfm_ctx {
|
||||
/*
|
||||
* These powers must be in the order h^8, ..., h^1.
|
||||
*/
|
||||
u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
struct polyval_desc_ctx {
|
||||
u8 buffer[POLYVAL_BLOCK_SIZE];
|
||||
u32 bytes;
|
||||
};
|
||||
|
||||
asmlinkage void pmull_polyval_update(const struct polyval_tfm_ctx *keys,
|
||||
const u8 *in, size_t nblocks, u8 *accumulator);
|
||||
asmlinkage void pmull_polyval_mul(u8 *op1, const u8 *op2);
|
||||
|
||||
static void internal_polyval_update(const struct polyval_tfm_ctx *keys,
|
||||
const u8 *in, size_t nblocks, u8 *accumulator)
|
||||
{
|
||||
if (likely(crypto_simd_usable())) {
|
||||
kernel_neon_begin();
|
||||
pmull_polyval_update(keys, in, nblocks, accumulator);
|
||||
kernel_neon_end();
|
||||
} else {
|
||||
polyval_update_non4k(keys->key_powers[NUM_KEY_POWERS-1], in,
|
||||
nblocks, accumulator);
|
||||
}
|
||||
}
|
||||
|
||||
static void internal_polyval_mul(u8 *op1, const u8 *op2)
|
||||
{
|
||||
if (likely(crypto_simd_usable())) {
|
||||
kernel_neon_begin();
|
||||
pmull_polyval_mul(op1, op2);
|
||||
kernel_neon_end();
|
||||
} else {
|
||||
polyval_mul_non4k(op1, op2);
|
||||
}
|
||||
}
|
||||
|
||||
static int polyval_arm64_setkey(struct crypto_shash *tfm,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
struct polyval_tfm_ctx *tctx = crypto_shash_ctx(tfm);
|
||||
int i;
|
||||
|
||||
if (keylen != POLYVAL_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
memcpy(tctx->key_powers[NUM_KEY_POWERS-1], key, POLYVAL_BLOCK_SIZE);
|
||||
|
||||
for (i = NUM_KEY_POWERS-2; i >= 0; i--) {
|
||||
memcpy(tctx->key_powers[i], key, POLYVAL_BLOCK_SIZE);
|
||||
internal_polyval_mul(tctx->key_powers[i],
|
||||
tctx->key_powers[i+1]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_arm64_init(struct shash_desc *desc)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
|
||||
memset(dctx, 0, sizeof(*dctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_arm64_update(struct shash_desc *desc,
|
||||
const u8 *src, unsigned int srclen)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
u8 *pos;
|
||||
unsigned int nblocks;
|
||||
unsigned int n;
|
||||
|
||||
if (dctx->bytes) {
|
||||
n = min(srclen, dctx->bytes);
|
||||
pos = dctx->buffer + POLYVAL_BLOCK_SIZE - dctx->bytes;
|
||||
|
||||
dctx->bytes -= n;
|
||||
srclen -= n;
|
||||
|
||||
while (n--)
|
||||
*pos++ ^= *src++;
|
||||
|
||||
if (!dctx->bytes)
|
||||
internal_polyval_mul(dctx->buffer,
|
||||
tctx->key_powers[NUM_KEY_POWERS-1]);
|
||||
}
|
||||
|
||||
while (srclen >= POLYVAL_BLOCK_SIZE) {
|
||||
/* allow rescheduling every 4K bytes */
|
||||
nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE;
|
||||
internal_polyval_update(tctx, src, nblocks, dctx->buffer);
|
||||
srclen -= nblocks * POLYVAL_BLOCK_SIZE;
|
||||
src += nblocks * POLYVAL_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
if (srclen) {
|
||||
dctx->bytes = POLYVAL_BLOCK_SIZE - srclen;
|
||||
pos = dctx->buffer;
|
||||
while (srclen--)
|
||||
*pos++ ^= *src++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_arm64_final(struct shash_desc *desc, u8 *dst)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
|
||||
if (dctx->bytes) {
|
||||
internal_polyval_mul(dctx->buffer,
|
||||
tctx->key_powers[NUM_KEY_POWERS-1]);
|
||||
}
|
||||
|
||||
memcpy(dst, dctx->buffer, POLYVAL_BLOCK_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg polyval_alg = {
|
||||
.digestsize = POLYVAL_DIGEST_SIZE,
|
||||
.init = polyval_arm64_init,
|
||||
.update = polyval_arm64_update,
|
||||
.final = polyval_arm64_final,
|
||||
.setkey = polyval_arm64_setkey,
|
||||
.descsize = sizeof(struct polyval_desc_ctx),
|
||||
.base = {
|
||||
.cra_name = "polyval",
|
||||
.cra_driver_name = "polyval-ce",
|
||||
.cra_priority = 200,
|
||||
.cra_blocksize = POLYVAL_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct polyval_tfm_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
};
|
||||
|
||||
static int __init polyval_ce_mod_init(void)
|
||||
{
|
||||
return crypto_register_shash(&polyval_alg);
|
||||
}
|
||||
|
||||
static void __exit polyval_ce_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_shash(&polyval_alg);
|
||||
}
|
||||
|
||||
module_cpu_feature_match(PMULL, polyval_ce_mod_init)
|
||||
module_exit(polyval_ce_mod_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("POLYVAL hash function accelerated by ARMv8 Crypto Extensions");
|
||||
MODULE_ALIAS_CRYPTO("polyval");
|
||||
MODULE_ALIAS_CRYPTO("polyval-ce");
|
@ -28,7 +28,7 @@
|
||||
* instructions per clock cycle using one 32/64 bit unit (SU1) and one 32
|
||||
* bit unit (SU2). One of these can be a memory access that is executed via
|
||||
* a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per
|
||||
* 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data
|
||||
* 16 byte block or 25 cycles per byte. Thus 768 bytes of input data
|
||||
* will need an estimated maximum of 20,000 cycles. Headroom for cache misses
|
||||
* included. Even with the low end model clocked at 667 MHz this equals to a
|
||||
* critical time window of less than 30us. The value has been chosen to
|
||||
|
@ -61,14 +61,15 @@ sha256-ssse3-$(CONFIG_AS_SHA256_NI) += sha256_ni_asm.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
|
||||
sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o
|
||||
blake2s-x86_64-y := blake2s-shash.o
|
||||
obj-$(if $(CONFIG_CRYPTO_BLAKE2S_X86),y) += libblake2s-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += libblake2s-x86_64.o
|
||||
libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
|
||||
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_POLYVAL_CLMUL_NI) += polyval-clmulni.o
|
||||
polyval-clmulni-y := polyval-clmulni_asm.o polyval-clmulni_glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
|
||||
crc32c-intel-y := crc32c-intel_glue.o
|
||||
crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
|
||||
|
@ -23,6 +23,11 @@
|
||||
|
||||
#define VMOVDQ vmovdqu
|
||||
|
||||
/*
|
||||
* Note: the "x" prefix in these aliases means "this is an xmm register". The
|
||||
* alias prefixes have no relation to XCTR where the "X" prefix means "XOR
|
||||
* counter".
|
||||
*/
|
||||
#define xdata0 %xmm0
|
||||
#define xdata1 %xmm1
|
||||
#define xdata2 %xmm2
|
||||
@ -31,8 +36,10 @@
|
||||
#define xdata5 %xmm5
|
||||
#define xdata6 %xmm6
|
||||
#define xdata7 %xmm7
|
||||
#define xcounter %xmm8
|
||||
#define xbyteswap %xmm9
|
||||
#define xcounter %xmm8 // CTR mode only
|
||||
#define xiv %xmm8 // XCTR mode only
|
||||
#define xbyteswap %xmm9 // CTR mode only
|
||||
#define xtmp %xmm9 // XCTR mode only
|
||||
#define xkey0 %xmm10
|
||||
#define xkey4 %xmm11
|
||||
#define xkey8 %xmm12
|
||||
@ -45,7 +52,7 @@
|
||||
#define p_keys %rdx
|
||||
#define p_out %rcx
|
||||
#define num_bytes %r8
|
||||
|
||||
#define counter %r9 // XCTR mode only
|
||||
#define tmp %r10
|
||||
#define DDQ_DATA 0
|
||||
#define XDATA 1
|
||||
@ -102,7 +109,7 @@ ddq_add_8:
|
||||
* do_aes num_in_par load_keys key_len
|
||||
* This increments p_in, but not p_out
|
||||
*/
|
||||
.macro do_aes b, k, key_len
|
||||
.macro do_aes b, k, key_len, xctr
|
||||
.set by, \b
|
||||
.set load_keys, \k
|
||||
.set klen, \key_len
|
||||
@ -111,29 +118,48 @@ ddq_add_8:
|
||||
vmovdqa 0*16(p_keys), xkey0
|
||||
.endif
|
||||
|
||||
vpshufb xbyteswap, xcounter, xdata0
|
||||
|
||||
.set i, 1
|
||||
.rept (by - 1)
|
||||
club XDATA, i
|
||||
vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata
|
||||
vptest ddq_low_msk(%rip), var_xdata
|
||||
jnz 1f
|
||||
vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata
|
||||
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
|
||||
1:
|
||||
vpshufb xbyteswap, var_xdata, var_xdata
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
.if \xctr
|
||||
movq counter, xtmp
|
||||
.set i, 0
|
||||
.rept (by)
|
||||
club XDATA, i
|
||||
vpaddq (ddq_add_1 + 16 * i)(%rip), xtmp, var_xdata
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
.set i, 0
|
||||
.rept (by)
|
||||
club XDATA, i
|
||||
vpxor xiv, var_xdata, var_xdata
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
.else
|
||||
vpshufb xbyteswap, xcounter, xdata0
|
||||
.set i, 1
|
||||
.rept (by - 1)
|
||||
club XDATA, i
|
||||
vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata
|
||||
vptest ddq_low_msk(%rip), var_xdata
|
||||
jnz 1f
|
||||
vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata
|
||||
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
|
||||
1:
|
||||
vpshufb xbyteswap, var_xdata, var_xdata
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
.endif
|
||||
|
||||
vmovdqa 1*16(p_keys), xkeyA
|
||||
|
||||
vpxor xkey0, xdata0, xdata0
|
||||
vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter
|
||||
vptest ddq_low_msk(%rip), xcounter
|
||||
jnz 1f
|
||||
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
|
||||
1:
|
||||
.if \xctr
|
||||
add $by, counter
|
||||
.else
|
||||
vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter
|
||||
vptest ddq_low_msk(%rip), xcounter
|
||||
jnz 1f
|
||||
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
|
||||
1:
|
||||
.endif
|
||||
|
||||
.set i, 1
|
||||
.rept (by - 1)
|
||||
@ -371,94 +397,99 @@ ddq_add_8:
|
||||
.endr
|
||||
.endm
|
||||
|
||||
.macro do_aes_load val, key_len
|
||||
do_aes \val, 1, \key_len
|
||||
.macro do_aes_load val, key_len, xctr
|
||||
do_aes \val, 1, \key_len, \xctr
|
||||
.endm
|
||||
|
||||
.macro do_aes_noload val, key_len
|
||||
do_aes \val, 0, \key_len
|
||||
.macro do_aes_noload val, key_len, xctr
|
||||
do_aes \val, 0, \key_len, \xctr
|
||||
.endm
|
||||
|
||||
/* main body of aes ctr load */
|
||||
|
||||
.macro do_aes_ctrmain key_len
|
||||
.macro do_aes_ctrmain key_len, xctr
|
||||
cmp $16, num_bytes
|
||||
jb .Ldo_return2\key_len
|
||||
jb .Ldo_return2\xctr\key_len
|
||||
|
||||
vmovdqa byteswap_const(%rip), xbyteswap
|
||||
vmovdqu (p_iv), xcounter
|
||||
vpshufb xbyteswap, xcounter, xcounter
|
||||
.if \xctr
|
||||
shr $4, counter
|
||||
vmovdqu (p_iv), xiv
|
||||
.else
|
||||
vmovdqa byteswap_const(%rip), xbyteswap
|
||||
vmovdqu (p_iv), xcounter
|
||||
vpshufb xbyteswap, xcounter, xcounter
|
||||
.endif
|
||||
|
||||
mov num_bytes, tmp
|
||||
and $(7*16), tmp
|
||||
jz .Lmult_of_8_blks\key_len
|
||||
jz .Lmult_of_8_blks\xctr\key_len
|
||||
|
||||
/* 1 <= tmp <= 7 */
|
||||
cmp $(4*16), tmp
|
||||
jg .Lgt4\key_len
|
||||
je .Leq4\key_len
|
||||
jg .Lgt4\xctr\key_len
|
||||
je .Leq4\xctr\key_len
|
||||
|
||||
.Llt4\key_len:
|
||||
.Llt4\xctr\key_len:
|
||||
cmp $(2*16), tmp
|
||||
jg .Leq3\key_len
|
||||
je .Leq2\key_len
|
||||
jg .Leq3\xctr\key_len
|
||||
je .Leq2\xctr\key_len
|
||||
|
||||
.Leq1\key_len:
|
||||
do_aes_load 1, \key_len
|
||||
.Leq1\xctr\key_len:
|
||||
do_aes_load 1, \key_len, \xctr
|
||||
add $(1*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
jz .Ldo_return2\xctr\key_len
|
||||
jmp .Lmain_loop2\xctr\key_len
|
||||
|
||||
.Leq2\key_len:
|
||||
do_aes_load 2, \key_len
|
||||
.Leq2\xctr\key_len:
|
||||
do_aes_load 2, \key_len, \xctr
|
||||
add $(2*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
jz .Ldo_return2\xctr\key_len
|
||||
jmp .Lmain_loop2\xctr\key_len
|
||||
|
||||
|
||||
.Leq3\key_len:
|
||||
do_aes_load 3, \key_len
|
||||
.Leq3\xctr\key_len:
|
||||
do_aes_load 3, \key_len, \xctr
|
||||
add $(3*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
jz .Ldo_return2\xctr\key_len
|
||||
jmp .Lmain_loop2\xctr\key_len
|
||||
|
||||
.Leq4\key_len:
|
||||
do_aes_load 4, \key_len
|
||||
.Leq4\xctr\key_len:
|
||||
do_aes_load 4, \key_len, \xctr
|
||||
add $(4*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
jz .Ldo_return2\xctr\key_len
|
||||
jmp .Lmain_loop2\xctr\key_len
|
||||
|
||||
.Lgt4\key_len:
|
||||
.Lgt4\xctr\key_len:
|
||||
cmp $(6*16), tmp
|
||||
jg .Leq7\key_len
|
||||
je .Leq6\key_len
|
||||
jg .Leq7\xctr\key_len
|
||||
je .Leq6\xctr\key_len
|
||||
|
||||
.Leq5\key_len:
|
||||
do_aes_load 5, \key_len
|
||||
.Leq5\xctr\key_len:
|
||||
do_aes_load 5, \key_len, \xctr
|
||||
add $(5*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
jz .Ldo_return2\xctr\key_len
|
||||
jmp .Lmain_loop2\xctr\key_len
|
||||
|
||||
.Leq6\key_len:
|
||||
do_aes_load 6, \key_len
|
||||
.Leq6\xctr\key_len:
|
||||
do_aes_load 6, \key_len, \xctr
|
||||
add $(6*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
jz .Ldo_return2\xctr\key_len
|
||||
jmp .Lmain_loop2\xctr\key_len
|
||||
|
||||
.Leq7\key_len:
|
||||
do_aes_load 7, \key_len
|
||||
.Leq7\xctr\key_len:
|
||||
do_aes_load 7, \key_len, \xctr
|
||||
add $(7*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
jz .Ldo_return2\xctr\key_len
|
||||
jmp .Lmain_loop2\xctr\key_len
|
||||
|
||||
.Lmult_of_8_blks\key_len:
|
||||
.Lmult_of_8_blks\xctr\key_len:
|
||||
.if (\key_len != KEY_128)
|
||||
vmovdqa 0*16(p_keys), xkey0
|
||||
vmovdqa 4*16(p_keys), xkey4
|
||||
@ -471,17 +502,19 @@ ddq_add_8:
|
||||
vmovdqa 9*16(p_keys), xkey12
|
||||
.endif
|
||||
.align 16
|
||||
.Lmain_loop2\key_len:
|
||||
.Lmain_loop2\xctr\key_len:
|
||||
/* num_bytes is a multiple of 8 and >0 */
|
||||
do_aes_noload 8, \key_len
|
||||
do_aes_noload 8, \key_len, \xctr
|
||||
add $(8*16), p_out
|
||||
sub $(8*16), num_bytes
|
||||
jne .Lmain_loop2\key_len
|
||||
jne .Lmain_loop2\xctr\key_len
|
||||
|
||||
.Ldo_return2\key_len:
|
||||
/* return updated IV */
|
||||
vpshufb xbyteswap, xcounter, xcounter
|
||||
vmovdqu xcounter, (p_iv)
|
||||
.Ldo_return2\xctr\key_len:
|
||||
.if !\xctr
|
||||
/* return updated IV */
|
||||
vpshufb xbyteswap, xcounter, xcounter
|
||||
vmovdqu xcounter, (p_iv)
|
||||
.endif
|
||||
RET
|
||||
.endm
|
||||
|
||||
@ -494,7 +527,7 @@ ddq_add_8:
|
||||
*/
|
||||
SYM_FUNC_START(aes_ctr_enc_128_avx_by8)
|
||||
/* call the aes main loop */
|
||||
do_aes_ctrmain KEY_128
|
||||
do_aes_ctrmain KEY_128 0
|
||||
|
||||
SYM_FUNC_END(aes_ctr_enc_128_avx_by8)
|
||||
|
||||
@ -507,7 +540,7 @@ SYM_FUNC_END(aes_ctr_enc_128_avx_by8)
|
||||
*/
|
||||
SYM_FUNC_START(aes_ctr_enc_192_avx_by8)
|
||||
/* call the aes main loop */
|
||||
do_aes_ctrmain KEY_192
|
||||
do_aes_ctrmain KEY_192 0
|
||||
|
||||
SYM_FUNC_END(aes_ctr_enc_192_avx_by8)
|
||||
|
||||
@ -520,6 +553,45 @@ SYM_FUNC_END(aes_ctr_enc_192_avx_by8)
|
||||
*/
|
||||
SYM_FUNC_START(aes_ctr_enc_256_avx_by8)
|
||||
/* call the aes main loop */
|
||||
do_aes_ctrmain KEY_256
|
||||
do_aes_ctrmain KEY_256 0
|
||||
|
||||
SYM_FUNC_END(aes_ctr_enc_256_avx_by8)
|
||||
|
||||
/*
|
||||
* routine to do AES128 XCTR enc/decrypt "by8"
|
||||
* XMM registers are clobbered.
|
||||
* Saving/restoring must be done at a higher level
|
||||
* aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv, const void *keys,
|
||||
* u8* out, unsigned int num_bytes, unsigned int byte_ctr)
|
||||
*/
|
||||
SYM_FUNC_START(aes_xctr_enc_128_avx_by8)
|
||||
/* call the aes main loop */
|
||||
do_aes_ctrmain KEY_128 1
|
||||
|
||||
SYM_FUNC_END(aes_xctr_enc_128_avx_by8)
|
||||
|
||||
/*
|
||||
* routine to do AES192 XCTR enc/decrypt "by8"
|
||||
* XMM registers are clobbered.
|
||||
* Saving/restoring must be done at a higher level
|
||||
* aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv, const void *keys,
|
||||
* u8* out, unsigned int num_bytes, unsigned int byte_ctr)
|
||||
*/
|
||||
SYM_FUNC_START(aes_xctr_enc_192_avx_by8)
|
||||
/* call the aes main loop */
|
||||
do_aes_ctrmain KEY_192 1
|
||||
|
||||
SYM_FUNC_END(aes_xctr_enc_192_avx_by8)
|
||||
|
||||
/*
|
||||
* routine to do AES256 XCTR enc/decrypt "by8"
|
||||
* XMM registers are clobbered.
|
||||
* Saving/restoring must be done at a higher level
|
||||
* aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv, const void *keys,
|
||||
* u8* out, unsigned int num_bytes, unsigned int byte_ctr)
|
||||
*/
|
||||
SYM_FUNC_START(aes_xctr_enc_256_avx_by8)
|
||||
/* call the aes main loop */
|
||||
do_aes_ctrmain KEY_256 1
|
||||
|
||||
SYM_FUNC_END(aes_xctr_enc_256_avx_by8)
|
||||
|
@ -135,6 +135,20 @@ asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv,
|
||||
void *keys, u8 *out, unsigned int num_bytes);
|
||||
asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv,
|
||||
void *keys, u8 *out, unsigned int num_bytes);
|
||||
|
||||
|
||||
asmlinkage void aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv,
|
||||
const void *keys, u8 *out, unsigned int num_bytes,
|
||||
unsigned int byte_ctr);
|
||||
|
||||
asmlinkage void aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv,
|
||||
const void *keys, u8 *out, unsigned int num_bytes,
|
||||
unsigned int byte_ctr);
|
||||
|
||||
asmlinkage void aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv,
|
||||
const void *keys, u8 *out, unsigned int num_bytes,
|
||||
unsigned int byte_ctr);
|
||||
|
||||
/*
|
||||
* asmlinkage void aesni_gcm_init_avx_gen2()
|
||||
* gcm_data *my_ctx_data, context data
|
||||
@ -527,6 +541,59 @@ static int ctr_crypt(struct skcipher_request *req)
|
||||
return err;
|
||||
}
|
||||
|
||||
static void aesni_xctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out,
|
||||
const u8 *in, unsigned int len, u8 *iv,
|
||||
unsigned int byte_ctr)
|
||||
{
|
||||
if (ctx->key_length == AES_KEYSIZE_128)
|
||||
aes_xctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len,
|
||||
byte_ctr);
|
||||
else if (ctx->key_length == AES_KEYSIZE_192)
|
||||
aes_xctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len,
|
||||
byte_ctr);
|
||||
else
|
||||
aes_xctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len,
|
||||
byte_ctr);
|
||||
}
|
||||
|
||||
static int xctr_crypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
|
||||
u8 keystream[AES_BLOCK_SIZE];
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
unsigned int byte_ctr = 0;
|
||||
int err;
|
||||
__le32 block[AES_BLOCK_SIZE / sizeof(__le32)];
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((nbytes = walk.nbytes) > 0) {
|
||||
kernel_fpu_begin();
|
||||
if (nbytes & AES_BLOCK_MASK)
|
||||
aesni_xctr_enc_avx_tfm(ctx, walk.dst.virt.addr,
|
||||
walk.src.virt.addr, nbytes & AES_BLOCK_MASK,
|
||||
walk.iv, byte_ctr);
|
||||
nbytes &= ~AES_BLOCK_MASK;
|
||||
byte_ctr += walk.nbytes - nbytes;
|
||||
|
||||
if (walk.nbytes == walk.total && nbytes > 0) {
|
||||
memcpy(block, walk.iv, AES_BLOCK_SIZE);
|
||||
block[0] ^= cpu_to_le32(1 + byte_ctr / AES_BLOCK_SIZE);
|
||||
aesni_enc(ctx, keystream, (u8 *)block);
|
||||
crypto_xor_cpy(walk.dst.virt.addr + walk.nbytes -
|
||||
nbytes, walk.src.virt.addr + walk.nbytes
|
||||
- nbytes, keystream, nbytes);
|
||||
byte_ctr += nbytes;
|
||||
nbytes = 0;
|
||||
}
|
||||
kernel_fpu_end();
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int
|
||||
rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
|
||||
{
|
||||
@ -1050,6 +1117,33 @@ static struct skcipher_alg aesni_skciphers[] = {
|
||||
static
|
||||
struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* XCTR does not have a non-AVX implementation, so it must be enabled
|
||||
* conditionally.
|
||||
*/
|
||||
static struct skcipher_alg aesni_xctr = {
|
||||
.base = {
|
||||
.cra_name = "__xctr(aes)",
|
||||
.cra_driver_name = "__xctr-aes-aesni",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = CRYPTO_AES_CTX_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.setkey = aesni_skcipher_setkey,
|
||||
.encrypt = xctr_crypt,
|
||||
.decrypt = xctr_crypt,
|
||||
};
|
||||
|
||||
static struct simd_skcipher_alg *aesni_simd_xctr;
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int key_len)
|
||||
@ -1163,7 +1257,7 @@ static int __init aesni_init(void)
|
||||
static_call_update(aesni_ctr_enc_tfm, aesni_ctr_enc_avx_tfm);
|
||||
pr_info("AES CTR mode by8 optimization enabled\n");
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
err = crypto_register_alg(&aesni_cipher_alg);
|
||||
if (err)
|
||||
@ -1180,8 +1274,22 @@ static int __init aesni_init(void)
|
||||
if (err)
|
||||
goto unregister_skciphers;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (boot_cpu_has(X86_FEATURE_AVX))
|
||||
err = simd_register_skciphers_compat(&aesni_xctr, 1,
|
||||
&aesni_simd_xctr);
|
||||
if (err)
|
||||
goto unregister_aeads;
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
unregister_aeads:
|
||||
simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads),
|
||||
aesni_simd_aeads);
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
unregister_skciphers:
|
||||
simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
|
||||
aesni_simd_skciphers);
|
||||
@ -1197,6 +1305,10 @@ static void __exit aesni_exit(void)
|
||||
simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
|
||||
aesni_simd_skciphers);
|
||||
crypto_unregister_alg(&aesni_cipher_alg);
|
||||
#ifdef CONFIG_X86_64
|
||||
if (boot_cpu_has(X86_FEATURE_AVX))
|
||||
simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr);
|
||||
#endif /* CONFIG_X86_64 */
|
||||
}
|
||||
|
||||
late_initcall(aesni_init);
|
||||
|
@ -4,7 +4,6 @@
|
||||
*/
|
||||
|
||||
#include <crypto/internal/blake2s.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/jump_label.h>
|
||||
@ -33,7 +32,7 @@ void blake2s_compress(struct blake2s_state *state, const u8 *block,
|
||||
/* SIMD disables preemption, so relax after processing each page. */
|
||||
BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
|
||||
|
||||
if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
|
||||
if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) {
|
||||
blake2s_compress_generic(state, block, nblocks, inc);
|
||||
return;
|
||||
}
|
||||
|
@ -1,77 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <crypto/internal/blake2s.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
static int crypto_blake2s_update_x86(struct shash_desc *desc,
|
||||
const u8 *in, unsigned int inlen)
|
||||
{
|
||||
return crypto_blake2s_update(desc, in, inlen, false);
|
||||
}
|
||||
|
||||
static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
return crypto_blake2s_final(desc, out, false);
|
||||
}
|
||||
|
||||
#define BLAKE2S_ALG(name, driver_name, digest_size) \
|
||||
{ \
|
||||
.base.cra_name = name, \
|
||||
.base.cra_driver_name = driver_name, \
|
||||
.base.cra_priority = 200, \
|
||||
.base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
|
||||
.base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \
|
||||
.base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \
|
||||
.base.cra_module = THIS_MODULE, \
|
||||
.digestsize = digest_size, \
|
||||
.setkey = crypto_blake2s_setkey, \
|
||||
.init = crypto_blake2s_init, \
|
||||
.update = crypto_blake2s_update_x86, \
|
||||
.final = crypto_blake2s_final_x86, \
|
||||
.descsize = sizeof(struct blake2s_state), \
|
||||
}
|
||||
|
||||
static struct shash_alg blake2s_algs[] = {
|
||||
BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE),
|
||||
};
|
||||
|
||||
static int __init blake2s_mod_init(void)
|
||||
{
|
||||
if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
|
||||
return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit blake2s_mod_exit(void)
|
||||
{
|
||||
if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
|
||||
crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
|
||||
}
|
||||
|
||||
module_init(blake2s_mod_init);
|
||||
module_exit(blake2s_mod_exit);
|
||||
|
||||
MODULE_ALIAS_CRYPTO("blake2s-128");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-128-x86");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-160");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-160-x86");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-224");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-224-x86");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-256");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-256-x86");
|
||||
MODULE_LICENSE("GPL v2");
|
@ -144,7 +144,7 @@ static int cbc_encrypt(struct skcipher_request *req)
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
while (walk.nbytes) {
|
||||
nbytes = __cbc_encrypt(ctx, &walk);
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
@ -225,7 +225,7 @@ static int cbc_decrypt(struct skcipher_request *req)
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
while (walk.nbytes) {
|
||||
nbytes = __cbc_decrypt(ctx, &walk);
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
321
arch/x86/crypto/polyval-clmulni_asm.S
Normal file
321
arch/x86/crypto/polyval-clmulni_asm.S
Normal file
@ -0,0 +1,321 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright 2021 Google LLC
|
||||
*/
|
||||
/*
|
||||
* This is an efficient implementation of POLYVAL using intel PCLMULQDQ-NI
|
||||
* instructions. It works on 8 blocks at a time, by precomputing the first 8
|
||||
* keys powers h^8, ..., h^1 in the POLYVAL finite field. This precomputation
|
||||
* allows us to split finite field multiplication into two steps.
|
||||
*
|
||||
* In the first step, we consider h^i, m_i as normal polynomials of degree less
|
||||
* than 128. We then compute p(x) = h^8m_0 + ... + h^1m_7 where multiplication
|
||||
* is simply polynomial multiplication.
|
||||
*
|
||||
* In the second step, we compute the reduction of p(x) modulo the finite field
|
||||
* modulus g(x) = x^128 + x^127 + x^126 + x^121 + 1.
|
||||
*
|
||||
* This two step process is equivalent to computing h^8m_0 + ... + h^1m_7 where
|
||||
* multiplication is finite field multiplication. The advantage is that the
|
||||
* two-step process only requires 1 finite field reduction for every 8
|
||||
* polynomial multiplications. Further parallelism is gained by interleaving the
|
||||
* multiplications and polynomial reductions.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
#define STRIDE_BLOCKS 8
|
||||
|
||||
#define GSTAR %xmm7
|
||||
#define PL %xmm8
|
||||
#define PH %xmm9
|
||||
#define TMP_XMM %xmm11
|
||||
#define LO %xmm12
|
||||
#define HI %xmm13
|
||||
#define MI %xmm14
|
||||
#define SUM %xmm15
|
||||
|
||||
#define KEY_POWERS %rdi
|
||||
#define MSG %rsi
|
||||
#define BLOCKS_LEFT %rdx
|
||||
#define ACCUMULATOR %rcx
|
||||
#define TMP %rax
|
||||
|
||||
.section .rodata.cst16.gstar, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
.Lgstar:
|
||||
.quad 0xc200000000000000, 0xc200000000000000
|
||||
|
||||
.text
|
||||
|
||||
/*
|
||||
* Performs schoolbook1_iteration on two lists of 128-bit polynomials of length
|
||||
* count pointed to by MSG and KEY_POWERS.
|
||||
*/
|
||||
.macro schoolbook1 count
|
||||
.set i, 0
|
||||
.rept (\count)
|
||||
schoolbook1_iteration i 0
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Computes the product of two 128-bit polynomials at the memory locations
|
||||
* specified by (MSG + 16*i) and (KEY_POWERS + 16*i) and XORs the components of
|
||||
* the 256-bit product into LO, MI, HI.
|
||||
*
|
||||
* Given:
|
||||
* X = [X_1 : X_0]
|
||||
* Y = [Y_1 : Y_0]
|
||||
*
|
||||
* We compute:
|
||||
* LO += X_0 * Y_0
|
||||
* MI += X_0 * Y_1 + X_1 * Y_0
|
||||
* HI += X_1 * Y_1
|
||||
*
|
||||
* Later, the 256-bit result can be extracted as:
|
||||
* [HI_1 : HI_0 + MI_1 : LO_1 + MI_0 : LO_0]
|
||||
* This step is done when computing the polynomial reduction for efficiency
|
||||
* reasons.
|
||||
*
|
||||
* If xor_sum == 1, then also XOR the value of SUM into m_0. This avoids an
|
||||
* extra multiplication of SUM and h^8.
|
||||
*/
|
||||
.macro schoolbook1_iteration i xor_sum
|
||||
movups (16*\i)(MSG), %xmm0
|
||||
.if (\i == 0 && \xor_sum == 1)
|
||||
pxor SUM, %xmm0
|
||||
.endif
|
||||
vpclmulqdq $0x01, (16*\i)(KEY_POWERS), %xmm0, %xmm2
|
||||
vpclmulqdq $0x00, (16*\i)(KEY_POWERS), %xmm0, %xmm1
|
||||
vpclmulqdq $0x10, (16*\i)(KEY_POWERS), %xmm0, %xmm3
|
||||
vpclmulqdq $0x11, (16*\i)(KEY_POWERS), %xmm0, %xmm4
|
||||
vpxor %xmm2, MI, MI
|
||||
vpxor %xmm1, LO, LO
|
||||
vpxor %xmm4, HI, HI
|
||||
vpxor %xmm3, MI, MI
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Performs the same computation as schoolbook1_iteration, except we expect the
|
||||
* arguments to already be loaded into xmm0 and xmm1 and we set the result
|
||||
* registers LO, MI, and HI directly rather than XOR'ing into them.
|
||||
*/
|
||||
.macro schoolbook1_noload
|
||||
vpclmulqdq $0x01, %xmm0, %xmm1, MI
|
||||
vpclmulqdq $0x10, %xmm0, %xmm1, %xmm2
|
||||
vpclmulqdq $0x00, %xmm0, %xmm1, LO
|
||||
vpclmulqdq $0x11, %xmm0, %xmm1, HI
|
||||
vpxor %xmm2, MI, MI
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Computes the 256-bit polynomial represented by LO, HI, MI. Stores
|
||||
* the result in PL, PH.
|
||||
* [PH : PL] = [HI_1 : HI_0 + MI_1 : LO_1 + MI_0 : LO_0]
|
||||
*/
|
||||
.macro schoolbook2
|
||||
vpslldq $8, MI, PL
|
||||
vpsrldq $8, MI, PH
|
||||
pxor LO, PL
|
||||
pxor HI, PH
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Computes the 128-bit reduction of PH : PL. Stores the result in dest.
|
||||
*
|
||||
* This macro computes p(x) mod g(x) where p(x) is in montgomery form and g(x) =
|
||||
* x^128 + x^127 + x^126 + x^121 + 1.
|
||||
*
|
||||
* We have a 256-bit polynomial PH : PL = P_3 : P_2 : P_1 : P_0 that is the
|
||||
* product of two 128-bit polynomials in Montgomery form. We need to reduce it
|
||||
* mod g(x). Also, since polynomials in Montgomery form have an "extra" factor
|
||||
* of x^128, this product has two extra factors of x^128. To get it back into
|
||||
* Montgomery form, we need to remove one of these factors by dividing by x^128.
|
||||
*
|
||||
* To accomplish both of these goals, we add multiples of g(x) that cancel out
|
||||
* the low 128 bits P_1 : P_0, leaving just the high 128 bits. Since the low
|
||||
* bits are zero, the polynomial division by x^128 can be done by right shifting.
|
||||
*
|
||||
* Since the only nonzero term in the low 64 bits of g(x) is the constant term,
|
||||
* the multiple of g(x) needed to cancel out P_0 is P_0 * g(x). The CPU can
|
||||
* only do 64x64 bit multiplications, so split P_0 * g(x) into x^128 * P_0 +
|
||||
* x^64 * g*(x) * P_0 + P_0, where g*(x) is bits 64-127 of g(x). Adding this to
|
||||
* the original polynomial gives P_3 : P_2 + P_0 + T_1 : P_1 + T_0 : 0, where T
|
||||
* = T_1 : T_0 = g*(x) * P_0. Thus, bits 0-63 got "folded" into bits 64-191.
|
||||
*
|
||||
* Repeating this same process on the next 64 bits "folds" bits 64-127 into bits
|
||||
* 128-255, giving the answer in bits 128-255. This time, we need to cancel P_1
|
||||
* + T_0 in bits 64-127. The multiple of g(x) required is (P_1 + T_0) * g(x) *
|
||||
* x^64. Adding this to our previous computation gives P_3 + P_1 + T_0 + V_1 :
|
||||
* P_2 + P_0 + T_1 + V_0 : 0 : 0, where V = V_1 : V_0 = g*(x) * (P_1 + T_0).
|
||||
*
|
||||
* So our final computation is:
|
||||
* T = T_1 : T_0 = g*(x) * P_0
|
||||
* V = V_1 : V_0 = g*(x) * (P_1 + T_0)
|
||||
* p(x) / x^{128} mod g(x) = P_3 + P_1 + T_0 + V_1 : P_2 + P_0 + T_1 + V_0
|
||||
*
|
||||
* The implementation below saves a XOR instruction by computing P_1 + T_0 : P_0
|
||||
* + T_1 and XORing into dest, rather than separately XORing P_1 : P_0 and T_0 :
|
||||
* T_1 into dest. This allows us to reuse P_1 + T_0 when computing V.
|
||||
*/
|
||||
.macro montgomery_reduction dest
|
||||
vpclmulqdq $0x00, PL, GSTAR, TMP_XMM # TMP_XMM = T_1 : T_0 = P_0 * g*(x)
|
||||
pshufd $0b01001110, TMP_XMM, TMP_XMM # TMP_XMM = T_0 : T_1
|
||||
pxor PL, TMP_XMM # TMP_XMM = P_1 + T_0 : P_0 + T_1
|
||||
pxor TMP_XMM, PH # PH = P_3 + P_1 + T_0 : P_2 + P_0 + T_1
|
||||
pclmulqdq $0x11, GSTAR, TMP_XMM # TMP_XMM = V_1 : V_0 = V = [(P_1 + T_0) * g*(x)]
|
||||
vpxor TMP_XMM, PH, \dest
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Compute schoolbook multiplication for 8 blocks
|
||||
* m_0h^8 + ... + m_7h^1
|
||||
*
|
||||
* If reduce is set, also computes the montgomery reduction of the
|
||||
* previous full_stride call and XORs with the first message block.
|
||||
* (m_0 + REDUCE(PL, PH))h^8 + ... + m_7h^1.
|
||||
* I.e., the first multiplication uses m_0 + REDUCE(PL, PH) instead of m_0.
|
||||
*/
|
||||
.macro full_stride reduce
|
||||
pxor LO, LO
|
||||
pxor HI, HI
|
||||
pxor MI, MI
|
||||
|
||||
schoolbook1_iteration 7 0
|
||||
.if \reduce
|
||||
vpclmulqdq $0x00, PL, GSTAR, TMP_XMM
|
||||
.endif
|
||||
|
||||
schoolbook1_iteration 6 0
|
||||
.if \reduce
|
||||
pshufd $0b01001110, TMP_XMM, TMP_XMM
|
||||
.endif
|
||||
|
||||
schoolbook1_iteration 5 0
|
||||
.if \reduce
|
||||
pxor PL, TMP_XMM
|
||||
.endif
|
||||
|
||||
schoolbook1_iteration 4 0
|
||||
.if \reduce
|
||||
pxor TMP_XMM, PH
|
||||
.endif
|
||||
|
||||
schoolbook1_iteration 3 0
|
||||
.if \reduce
|
||||
pclmulqdq $0x11, GSTAR, TMP_XMM
|
||||
.endif
|
||||
|
||||
schoolbook1_iteration 2 0
|
||||
.if \reduce
|
||||
vpxor TMP_XMM, PH, SUM
|
||||
.endif
|
||||
|
||||
schoolbook1_iteration 1 0
|
||||
|
||||
schoolbook1_iteration 0 1
|
||||
|
||||
addq $(8*16), MSG
|
||||
schoolbook2
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Process BLOCKS_LEFT blocks, where 0 < BLOCKS_LEFT < STRIDE_BLOCKS
|
||||
*/
|
||||
.macro partial_stride
|
||||
mov BLOCKS_LEFT, TMP
|
||||
shlq $4, TMP
|
||||
addq $(16*STRIDE_BLOCKS), KEY_POWERS
|
||||
subq TMP, KEY_POWERS
|
||||
|
||||
movups (MSG), %xmm0
|
||||
pxor SUM, %xmm0
|
||||
movaps (KEY_POWERS), %xmm1
|
||||
schoolbook1_noload
|
||||
dec BLOCKS_LEFT
|
||||
addq $16, MSG
|
||||
addq $16, KEY_POWERS
|
||||
|
||||
test $4, BLOCKS_LEFT
|
||||
jz .Lpartial4BlocksDone
|
||||
schoolbook1 4
|
||||
addq $(4*16), MSG
|
||||
addq $(4*16), KEY_POWERS
|
||||
.Lpartial4BlocksDone:
|
||||
test $2, BLOCKS_LEFT
|
||||
jz .Lpartial2BlocksDone
|
||||
schoolbook1 2
|
||||
addq $(2*16), MSG
|
||||
addq $(2*16), KEY_POWERS
|
||||
.Lpartial2BlocksDone:
|
||||
test $1, BLOCKS_LEFT
|
||||
jz .LpartialDone
|
||||
schoolbook1 1
|
||||
.LpartialDone:
|
||||
schoolbook2
|
||||
montgomery_reduction SUM
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Perform montgomery multiplication in GF(2^128) and store result in op1.
|
||||
*
|
||||
* Computes op1*op2*x^{-128} mod x^128 + x^127 + x^126 + x^121 + 1
|
||||
* If op1, op2 are in montgomery form, this computes the montgomery
|
||||
* form of op1*op2.
|
||||
*
|
||||
* void clmul_polyval_mul(u8 *op1, const u8 *op2);
|
||||
*/
|
||||
SYM_FUNC_START(clmul_polyval_mul)
|
||||
FRAME_BEGIN
|
||||
vmovdqa .Lgstar(%rip), GSTAR
|
||||
movups (%rdi), %xmm0
|
||||
movups (%rsi), %xmm1
|
||||
schoolbook1_noload
|
||||
schoolbook2
|
||||
montgomery_reduction SUM
|
||||
movups SUM, (%rdi)
|
||||
FRAME_END
|
||||
RET
|
||||
SYM_FUNC_END(clmul_polyval_mul)
|
||||
|
||||
/*
|
||||
* Perform polynomial evaluation as specified by POLYVAL. This computes:
|
||||
* h^n * accumulator + h^n * m_0 + ... + h^1 * m_{n-1}
|
||||
* where n=nblocks, h is the hash key, and m_i are the message blocks.
|
||||
*
|
||||
* rdi - pointer to precomputed key powers h^8 ... h^1
|
||||
* rsi - pointer to message blocks
|
||||
* rdx - number of blocks to hash
|
||||
* rcx - pointer to the accumulator
|
||||
*
|
||||
* void clmul_polyval_update(const struct polyval_tfm_ctx *keys,
|
||||
* const u8 *in, size_t nblocks, u8 *accumulator);
|
||||
*/
|
||||
SYM_FUNC_START(clmul_polyval_update)
|
||||
FRAME_BEGIN
|
||||
vmovdqa .Lgstar(%rip), GSTAR
|
||||
movups (ACCUMULATOR), SUM
|
||||
subq $STRIDE_BLOCKS, BLOCKS_LEFT
|
||||
js .LstrideLoopExit
|
||||
full_stride 0
|
||||
subq $STRIDE_BLOCKS, BLOCKS_LEFT
|
||||
js .LstrideLoopExitReduce
|
||||
.LstrideLoop:
|
||||
full_stride 1
|
||||
subq $STRIDE_BLOCKS, BLOCKS_LEFT
|
||||
jns .LstrideLoop
|
||||
.LstrideLoopExitReduce:
|
||||
montgomery_reduction SUM
|
||||
.LstrideLoopExit:
|
||||
add $STRIDE_BLOCKS, BLOCKS_LEFT
|
||||
jz .LskipPartial
|
||||
partial_stride
|
||||
.LskipPartial:
|
||||
movups SUM, (ACCUMULATOR)
|
||||
FRAME_END
|
||||
RET
|
||||
SYM_FUNC_END(clmul_polyval_update)
|
203
arch/x86/crypto/polyval-clmulni_glue.c
Normal file
203
arch/x86/crypto/polyval-clmulni_glue.c
Normal file
@ -0,0 +1,203 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Glue code for POLYVAL using PCMULQDQ-NI
|
||||
*
|
||||
* Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi>
|
||||
* Copyright (c) 2009 Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
* Copyright 2021 Google LLC
|
||||
*/
|
||||
|
||||
/*
|
||||
* Glue code based on ghash-clmulni-intel_glue.c.
|
||||
*
|
||||
* This implementation of POLYVAL uses montgomery multiplication
|
||||
* accelerated by PCLMULQDQ-NI to implement the finite field
|
||||
* operations.
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/polyval.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
#define NUM_KEY_POWERS 8
|
||||
|
||||
struct polyval_tfm_ctx {
|
||||
/*
|
||||
* These powers must be in the order h^8, ..., h^1.
|
||||
*/
|
||||
u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
struct polyval_desc_ctx {
|
||||
u8 buffer[POLYVAL_BLOCK_SIZE];
|
||||
u32 bytes;
|
||||
};
|
||||
|
||||
asmlinkage void clmul_polyval_update(const struct polyval_tfm_ctx *keys,
|
||||
const u8 *in, size_t nblocks, u8 *accumulator);
|
||||
asmlinkage void clmul_polyval_mul(u8 *op1, const u8 *op2);
|
||||
|
||||
static void internal_polyval_update(const struct polyval_tfm_ctx *keys,
|
||||
const u8 *in, size_t nblocks, u8 *accumulator)
|
||||
{
|
||||
if (likely(crypto_simd_usable())) {
|
||||
kernel_fpu_begin();
|
||||
clmul_polyval_update(keys, in, nblocks, accumulator);
|
||||
kernel_fpu_end();
|
||||
} else {
|
||||
polyval_update_non4k(keys->key_powers[NUM_KEY_POWERS-1], in,
|
||||
nblocks, accumulator);
|
||||
}
|
||||
}
|
||||
|
||||
static void internal_polyval_mul(u8 *op1, const u8 *op2)
|
||||
{
|
||||
if (likely(crypto_simd_usable())) {
|
||||
kernel_fpu_begin();
|
||||
clmul_polyval_mul(op1, op2);
|
||||
kernel_fpu_end();
|
||||
} else {
|
||||
polyval_mul_non4k(op1, op2);
|
||||
}
|
||||
}
|
||||
|
||||
static int polyval_x86_setkey(struct crypto_shash *tfm,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
struct polyval_tfm_ctx *tctx = crypto_shash_ctx(tfm);
|
||||
int i;
|
||||
|
||||
if (keylen != POLYVAL_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
memcpy(tctx->key_powers[NUM_KEY_POWERS-1], key, POLYVAL_BLOCK_SIZE);
|
||||
|
||||
for (i = NUM_KEY_POWERS-2; i >= 0; i--) {
|
||||
memcpy(tctx->key_powers[i], key, POLYVAL_BLOCK_SIZE);
|
||||
internal_polyval_mul(tctx->key_powers[i],
|
||||
tctx->key_powers[i+1]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_x86_init(struct shash_desc *desc)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
|
||||
memset(dctx, 0, sizeof(*dctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_x86_update(struct shash_desc *desc,
|
||||
const u8 *src, unsigned int srclen)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
u8 *pos;
|
||||
unsigned int nblocks;
|
||||
unsigned int n;
|
||||
|
||||
if (dctx->bytes) {
|
||||
n = min(srclen, dctx->bytes);
|
||||
pos = dctx->buffer + POLYVAL_BLOCK_SIZE - dctx->bytes;
|
||||
|
||||
dctx->bytes -= n;
|
||||
srclen -= n;
|
||||
|
||||
while (n--)
|
||||
*pos++ ^= *src++;
|
||||
|
||||
if (!dctx->bytes)
|
||||
internal_polyval_mul(dctx->buffer,
|
||||
tctx->key_powers[NUM_KEY_POWERS-1]);
|
||||
}
|
||||
|
||||
while (srclen >= POLYVAL_BLOCK_SIZE) {
|
||||
/* Allow rescheduling every 4K bytes. */
|
||||
nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE;
|
||||
internal_polyval_update(tctx, src, nblocks, dctx->buffer);
|
||||
srclen -= nblocks * POLYVAL_BLOCK_SIZE;
|
||||
src += nblocks * POLYVAL_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
if (srclen) {
|
||||
dctx->bytes = POLYVAL_BLOCK_SIZE - srclen;
|
||||
pos = dctx->buffer;
|
||||
while (srclen--)
|
||||
*pos++ ^= *src++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_x86_final(struct shash_desc *desc, u8 *dst)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
|
||||
if (dctx->bytes) {
|
||||
internal_polyval_mul(dctx->buffer,
|
||||
tctx->key_powers[NUM_KEY_POWERS-1]);
|
||||
}
|
||||
|
||||
memcpy(dst, dctx->buffer, POLYVAL_BLOCK_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg polyval_alg = {
|
||||
.digestsize = POLYVAL_DIGEST_SIZE,
|
||||
.init = polyval_x86_init,
|
||||
.update = polyval_x86_update,
|
||||
.final = polyval_x86_final,
|
||||
.setkey = polyval_x86_setkey,
|
||||
.descsize = sizeof(struct polyval_desc_ctx),
|
||||
.base = {
|
||||
.cra_name = "polyval",
|
||||
.cra_driver_name = "polyval-clmulni",
|
||||
.cra_priority = 200,
|
||||
.cra_blocksize = POLYVAL_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct polyval_tfm_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
};
|
||||
|
||||
__maybe_unused static const struct x86_cpu_id pcmul_cpu_id[] = {
|
||||
X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL),
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
|
||||
|
||||
static int __init polyval_clmulni_mod_init(void)
|
||||
{
|
||||
if (!x86_match_cpu(pcmul_cpu_id))
|
||||
return -ENODEV;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_AVX))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_shash(&polyval_alg);
|
||||
}
|
||||
|
||||
static void __exit polyval_clmulni_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_shash(&polyval_alg);
|
||||
}
|
||||
|
||||
module_init(polyval_clmulni_mod_init);
|
||||
module_exit(polyval_clmulni_mod_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("POLYVAL hash function accelerated by PCLMULQDQ-NI");
|
||||
MODULE_ALIAS_CRYPTO("polyval");
|
||||
MODULE_ALIAS_CRYPTO("polyval-clmulni");
|
@ -33,6 +33,27 @@ config CRYPTO_FIPS
|
||||
certification. You should say no unless you know what
|
||||
this is.
|
||||
|
||||
config CRYPTO_FIPS_NAME
|
||||
string "FIPS Module Name"
|
||||
default "Linux Kernel Cryptographic API"
|
||||
depends on CRYPTO_FIPS
|
||||
help
|
||||
This option sets the FIPS Module name reported by the Crypto API via
|
||||
the /proc/sys/crypto/fips_name file.
|
||||
|
||||
config CRYPTO_FIPS_CUSTOM_VERSION
|
||||
bool "Use Custom FIPS Module Version"
|
||||
depends on CRYPTO_FIPS
|
||||
default n
|
||||
|
||||
config CRYPTO_FIPS_VERSION
|
||||
string "FIPS Module Version"
|
||||
default "(none)"
|
||||
depends on CRYPTO_FIPS_CUSTOM_VERSION
|
||||
help
|
||||
This option provides the ability to override the FIPS Module Version.
|
||||
By default the KERNELRELEASE value is used.
|
||||
|
||||
config CRYPTO_ALGAPI
|
||||
tristate
|
||||
select CRYPTO_ALGAPI2
|
||||
@ -461,6 +482,15 @@ config CRYPTO_PCBC
|
||||
PCBC: Propagating Cipher Block Chaining mode
|
||||
This block cipher algorithm is required for RxRPC.
|
||||
|
||||
config CRYPTO_XCTR
|
||||
tristate
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_MANAGER
|
||||
help
|
||||
XCTR: XOR Counter mode. This blockcipher mode is a variant of CTR mode
|
||||
using XORs and little-endian addition rather than big-endian arithmetic.
|
||||
XCTR mode is used to implement HCTR2.
|
||||
|
||||
config CRYPTO_XTS
|
||||
tristate "XTS support"
|
||||
select CRYPTO_SKCIPHER
|
||||
@ -524,6 +554,17 @@ config CRYPTO_ADIANTUM
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CRYPTO_HCTR2
|
||||
tristate "HCTR2 support"
|
||||
select CRYPTO_XCTR
|
||||
select CRYPTO_POLYVAL
|
||||
select CRYPTO_MANAGER
|
||||
help
|
||||
HCTR2 is a length-preserving encryption mode for storage encryption that
|
||||
is efficient on processors with instructions to accelerate AES and
|
||||
carryless multiplication, e.g. x86 processors with AES-NI and CLMUL, and
|
||||
ARM processors with the ARMv8 crypto extensions.
|
||||
|
||||
config CRYPTO_ESSIV
|
||||
tristate "ESSIV support for block encryption"
|
||||
select CRYPTO_AUTHENC
|
||||
@ -704,26 +745,8 @@ config CRYPTO_BLAKE2B
|
||||
|
||||
See https://blake2.net for further information.
|
||||
|
||||
config CRYPTO_BLAKE2S
|
||||
tristate "BLAKE2s digest algorithm"
|
||||
select CRYPTO_LIB_BLAKE2S_GENERIC
|
||||
select CRYPTO_HASH
|
||||
help
|
||||
Implementation of cryptographic hash function BLAKE2s
|
||||
optimized for 8-32bit platforms and can produce digests of any size
|
||||
between 1 to 32. The keyed hash is also implemented.
|
||||
|
||||
This module provides the following algorithms:
|
||||
|
||||
- blake2s-128
|
||||
- blake2s-160
|
||||
- blake2s-224
|
||||
- blake2s-256
|
||||
|
||||
See https://blake2.net for further information.
|
||||
|
||||
config CRYPTO_BLAKE2S_X86
|
||||
tristate "BLAKE2s digest algorithm (x86 accelerated version)"
|
||||
bool "BLAKE2s digest algorithm (x86 accelerated version)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_LIB_BLAKE2S_GENERIC
|
||||
select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
|
||||
@ -777,6 +800,23 @@ config CRYPTO_GHASH
|
||||
GHASH is the hash function used in GCM (Galois/Counter Mode).
|
||||
It is not a general-purpose cryptographic hash function.
|
||||
|
||||
config CRYPTO_POLYVAL
|
||||
tristate
|
||||
select CRYPTO_GF128MUL
|
||||
select CRYPTO_HASH
|
||||
help
|
||||
POLYVAL is the hash function used in HCTR2. It is not a general-purpose
|
||||
cryptographic hash function.
|
||||
|
||||
config CRYPTO_POLYVAL_CLMUL_NI
|
||||
tristate "POLYVAL hash function (CLMUL-NI accelerated)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_POLYVAL
|
||||
help
|
||||
This is the x86_64 CLMUL-NI accelerated implementation of POLYVAL. It is
|
||||
used to efficiently implement HCTR2 on x86-64 processors that support
|
||||
carry-less multiplication instructions.
|
||||
|
||||
config CRYPTO_POLY1305
|
||||
tristate "Poly1305 authenticator algorithm"
|
||||
select CRYPTO_HASH
|
||||
@ -861,7 +901,7 @@ config CRYPTO_RMD160
|
||||
|
||||
RIPEMD-160 is a 160-bit cryptographic hash function. It is intended
|
||||
to be used as a secure replacement for the 128-bit hash functions
|
||||
MD4, MD5 and it's predecessor RIPEMD
|
||||
MD4, MD5 and its predecessor RIPEMD
|
||||
(not to be confused with RIPEMD-128).
|
||||
|
||||
It's speed is comparable to SHA1 and there are no known attacks
|
||||
@ -873,6 +913,7 @@ config CRYPTO_RMD160
|
||||
config CRYPTO_SHA1
|
||||
tristate "SHA1 digest algorithm"
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_LIB_SHA1
|
||||
help
|
||||
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
|
||||
|
||||
@ -1214,7 +1255,7 @@ config CRYPTO_AES_NI_INTEL
|
||||
In addition to AES cipher algorithm support, the acceleration
|
||||
for some popular block cipher mode is supported too, including
|
||||
ECB, CBC, LRW, XTS. The 64 bit version has additional
|
||||
acceleration for CTR.
|
||||
acceleration for CTR and XCTR.
|
||||
|
||||
config CRYPTO_AES_SPARC64
|
||||
tristate "AES cipher algorithms (SPARC64)"
|
||||
@ -1603,6 +1644,21 @@ config CRYPTO_SEED
|
||||
See also:
|
||||
<http://www.kisa.or.kr/kisa/seed/jsp/seed_eng.jsp>
|
||||
|
||||
config CRYPTO_ARIA
|
||||
tristate "ARIA cipher algorithm"
|
||||
select CRYPTO_ALGAPI
|
||||
help
|
||||
ARIA cipher algorithm (RFC5794).
|
||||
|
||||
ARIA is a standard encryption algorithm of the Republic of Korea.
|
||||
The ARIA specifies three key sizes and rounds.
|
||||
128-bit: 12 rounds.
|
||||
192-bit: 14 rounds.
|
||||
256-bit: 16 rounds.
|
||||
|
||||
See also:
|
||||
<https://seed.kisa.or.kr/kisa/algorithm/EgovAriaInfo.do>
|
||||
|
||||
config CRYPTO_SERPENT
|
||||
tristate "Serpent cipher algorithm"
|
||||
select CRYPTO_ALGAPI
|
||||
|
@ -84,7 +84,6 @@ obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o
|
||||
obj-$(CONFIG_CRYPTO_WP512) += wp512.o
|
||||
CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
|
||||
obj-$(CONFIG_CRYPTO_BLAKE2B) += blake2b_generic.o
|
||||
obj-$(CONFIG_CRYPTO_BLAKE2S) += blake2s_generic.o
|
||||
obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
|
||||
obj-$(CONFIG_CRYPTO_ECB) += ecb.o
|
||||
obj-$(CONFIG_CRYPTO_CBC) += cbc.o
|
||||
@ -94,6 +93,8 @@ obj-$(CONFIG_CRYPTO_CTS) += cts.o
|
||||
obj-$(CONFIG_CRYPTO_LRW) += lrw.o
|
||||
obj-$(CONFIG_CRYPTO_XTS) += xts.o
|
||||
obj-$(CONFIG_CRYPTO_CTR) += ctr.o
|
||||
obj-$(CONFIG_CRYPTO_XCTR) += xctr.o
|
||||
obj-$(CONFIG_CRYPTO_HCTR2) += hctr2.o
|
||||
obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o
|
||||
obj-$(CONFIG_CRYPTO_ADIANTUM) += adiantum.o
|
||||
obj-$(CONFIG_CRYPTO_NHPOLY1305) += nhpoly1305.o
|
||||
@ -147,6 +148,7 @@ obj-$(CONFIG_CRYPTO_TEA) += tea.o
|
||||
obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o
|
||||
obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
|
||||
obj-$(CONFIG_CRYPTO_SEED) += seed.o
|
||||
obj-$(CONFIG_CRYPTO_ARIA) += aria.o
|
||||
obj-$(CONFIG_CRYPTO_CHACHA20) += chacha_generic.o
|
||||
obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
|
||||
obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
|
||||
@ -171,6 +173,7 @@ UBSAN_SANITIZE_jitterentropy.o = n
|
||||
jitterentropy_rng-y := jitterentropy.o jitterentropy-kcapi.o
|
||||
obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
|
||||
obj-$(CONFIG_CRYPTO_GHASH) += ghash-generic.o
|
||||
obj-$(CONFIG_CRYPTO_POLYVAL) += polyval-generic.o
|
||||
obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o
|
||||
obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o
|
||||
obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
|
||||
|
288
crypto/aria.c
Normal file
288
crypto/aria.c
Normal file
@ -0,0 +1,288 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Cryptographic API.
|
||||
*
|
||||
* ARIA Cipher Algorithm.
|
||||
*
|
||||
* Documentation of ARIA can be found in RFC 5794.
|
||||
* Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com>
|
||||
*
|
||||
* Information for ARIA
|
||||
* http://210.104.33.10/ARIA/index-e.html (English)
|
||||
* http://seed.kisa.or.kr/ (Korean)
|
||||
*
|
||||
* Public domain version is distributed above.
|
||||
*/
|
||||
|
||||
#include <crypto/aria.h>
|
||||
|
||||
static void aria_set_encrypt_key(struct aria_ctx *ctx, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
const __be32 *key = (const __be32 *)in_key;
|
||||
u32 w0[4], w1[4], w2[4], w3[4];
|
||||
u32 reg0, reg1, reg2, reg3;
|
||||
const u32 *ck;
|
||||
int rkidx = 0;
|
||||
|
||||
ck = &key_rc[(key_len - 16) / 8][0];
|
||||
|
||||
w0[0] = be32_to_cpu(key[0]);
|
||||
w0[1] = be32_to_cpu(key[1]);
|
||||
w0[2] = be32_to_cpu(key[2]);
|
||||
w0[3] = be32_to_cpu(key[3]);
|
||||
|
||||
reg0 = w0[0] ^ ck[0];
|
||||
reg1 = w0[1] ^ ck[1];
|
||||
reg2 = w0[2] ^ ck[2];
|
||||
reg3 = w0[3] ^ ck[3];
|
||||
|
||||
aria_subst_diff_odd(®0, ®1, ®2, ®3);
|
||||
|
||||
if (key_len > 16) {
|
||||
w1[0] = be32_to_cpu(key[4]);
|
||||
w1[1] = be32_to_cpu(key[5]);
|
||||
if (key_len > 24) {
|
||||
w1[2] = be32_to_cpu(key[6]);
|
||||
w1[3] = be32_to_cpu(key[7]);
|
||||
} else {
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
}
|
||||
} else {
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
}
|
||||
|
||||
w1[0] ^= reg0;
|
||||
w1[1] ^= reg1;
|
||||
w1[2] ^= reg2;
|
||||
w1[3] ^= reg3;
|
||||
|
||||
reg0 = w1[0];
|
||||
reg1 = w1[1];
|
||||
reg2 = w1[2];
|
||||
reg3 = w1[3];
|
||||
|
||||
reg0 ^= ck[4];
|
||||
reg1 ^= ck[5];
|
||||
reg2 ^= ck[6];
|
||||
reg3 ^= ck[7];
|
||||
|
||||
aria_subst_diff_even(®0, ®1, ®2, ®3);
|
||||
|
||||
reg0 ^= w0[0];
|
||||
reg1 ^= w0[1];
|
||||
reg2 ^= w0[2];
|
||||
reg3 ^= w0[3];
|
||||
|
||||
w2[0] = reg0;
|
||||
w2[1] = reg1;
|
||||
w2[2] = reg2;
|
||||
w2[3] = reg3;
|
||||
|
||||
reg0 ^= ck[8];
|
||||
reg1 ^= ck[9];
|
||||
reg2 ^= ck[10];
|
||||
reg3 ^= ck[11];
|
||||
|
||||
aria_subst_diff_odd(®0, ®1, ®2, ®3);
|
||||
|
||||
w3[0] = reg0 ^ w1[0];
|
||||
w3[1] = reg1 ^ w1[1];
|
||||
w3[2] = reg2 ^ w1[2];
|
||||
w3[3] = reg3 ^ w1[3];
|
||||
|
||||
aria_gsrk(ctx->enc_key[rkidx], w0, w1, 19);
|
||||
rkidx++;
|
||||
aria_gsrk(ctx->enc_key[rkidx], w1, w2, 19);
|
||||
rkidx++;
|
||||
aria_gsrk(ctx->enc_key[rkidx], w2, w3, 19);
|
||||
rkidx++;
|
||||
aria_gsrk(ctx->enc_key[rkidx], w3, w0, 19);
|
||||
|
||||
rkidx++;
|
||||
aria_gsrk(ctx->enc_key[rkidx], w0, w1, 31);
|
||||
rkidx++;
|
||||
aria_gsrk(ctx->enc_key[rkidx], w1, w2, 31);
|
||||
rkidx++;
|
||||
aria_gsrk(ctx->enc_key[rkidx], w2, w3, 31);
|
||||
rkidx++;
|
||||
aria_gsrk(ctx->enc_key[rkidx], w3, w0, 31);
|
||||
|
||||
rkidx++;
|
||||
aria_gsrk(ctx->enc_key[rkidx], w0, w1, 67);
|
||||
rkidx++;
|
||||
aria_gsrk(ctx->enc_key[rkidx], w1, w2, 67);
|
||||
rkidx++;
|
||||
aria_gsrk(ctx->enc_key[rkidx], w2, w3, 67);
|
||||
rkidx++;
|
||||
aria_gsrk(ctx->enc_key[rkidx], w3, w0, 67);
|
||||
|
||||
rkidx++;
|
||||
aria_gsrk(ctx->enc_key[rkidx], w0, w1, 97);
|
||||
if (key_len > 16) {
|
||||
rkidx++;
|
||||
aria_gsrk(ctx->enc_key[rkidx], w1, w2, 97);
|
||||
rkidx++;
|
||||
aria_gsrk(ctx->enc_key[rkidx], w2, w3, 97);
|
||||
|
||||
if (key_len > 24) {
|
||||
rkidx++;
|
||||
aria_gsrk(ctx->enc_key[rkidx], w3, w0, 97);
|
||||
|
||||
rkidx++;
|
||||
aria_gsrk(ctx->enc_key[rkidx], w0, w1, 109);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void aria_set_decrypt_key(struct aria_ctx *ctx)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
ctx->dec_key[0][i] = ctx->enc_key[ctx->rounds][i];
|
||||
ctx->dec_key[ctx->rounds][i] = ctx->enc_key[0][i];
|
||||
}
|
||||
|
||||
for (i = 1; i < ctx->rounds; i++) {
|
||||
ctx->dec_key[i][0] = aria_m(ctx->enc_key[ctx->rounds - i][0]);
|
||||
ctx->dec_key[i][1] = aria_m(ctx->enc_key[ctx->rounds - i][1]);
|
||||
ctx->dec_key[i][2] = aria_m(ctx->enc_key[ctx->rounds - i][2]);
|
||||
ctx->dec_key[i][3] = aria_m(ctx->enc_key[ctx->rounds - i][3]);
|
||||
|
||||
aria_diff_word(&ctx->dec_key[i][0], &ctx->dec_key[i][1],
|
||||
&ctx->dec_key[i][2], &ctx->dec_key[i][3]);
|
||||
aria_diff_byte(&ctx->dec_key[i][1],
|
||||
&ctx->dec_key[i][2], &ctx->dec_key[i][3]);
|
||||
aria_diff_word(&ctx->dec_key[i][0], &ctx->dec_key[i][1],
|
||||
&ctx->dec_key[i][2], &ctx->dec_key[i][3]);
|
||||
}
|
||||
}
|
||||
|
||||
static int aria_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aria_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
if (key_len != 16 && key_len != 24 && key_len != 32)
|
||||
return -EINVAL;
|
||||
|
||||
ctx->key_length = key_len;
|
||||
ctx->rounds = (key_len + 32) / 4;
|
||||
|
||||
aria_set_encrypt_key(ctx, in_key, key_len);
|
||||
aria_set_decrypt_key(ctx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __aria_crypt(struct aria_ctx *ctx, u8 *out, const u8 *in,
|
||||
u32 key[][ARIA_RD_KEY_WORDS])
|
||||
{
|
||||
const __be32 *src = (const __be32 *)in;
|
||||
__be32 *dst = (__be32 *)out;
|
||||
u32 reg0, reg1, reg2, reg3;
|
||||
int rounds, rkidx = 0;
|
||||
|
||||
rounds = ctx->rounds;
|
||||
|
||||
reg0 = be32_to_cpu(src[0]);
|
||||
reg1 = be32_to_cpu(src[1]);
|
||||
reg2 = be32_to_cpu(src[2]);
|
||||
reg3 = be32_to_cpu(src[3]);
|
||||
|
||||
aria_add_round_key(key[rkidx], ®0, ®1, ®2, ®3);
|
||||
rkidx++;
|
||||
|
||||
aria_subst_diff_odd(®0, ®1, ®2, ®3);
|
||||
aria_add_round_key(key[rkidx], ®0, ®1, ®2, ®3);
|
||||
rkidx++;
|
||||
|
||||
while ((rounds -= 2) > 0) {
|
||||
aria_subst_diff_even(®0, ®1, ®2, ®3);
|
||||
aria_add_round_key(key[rkidx], ®0, ®1, ®2, ®3);
|
||||
rkidx++;
|
||||
|
||||
aria_subst_diff_odd(®0, ®1, ®2, ®3);
|
||||
aria_add_round_key(key[rkidx], ®0, ®1, ®2, ®3);
|
||||
rkidx++;
|
||||
}
|
||||
|
||||
reg0 = key[rkidx][0] ^ make_u32((u8)(x1[get_u8(reg0, 0)]),
|
||||
(u8)(x2[get_u8(reg0, 1)] >> 8),
|
||||
(u8)(s1[get_u8(reg0, 2)]),
|
||||
(u8)(s2[get_u8(reg0, 3)]));
|
||||
reg1 = key[rkidx][1] ^ make_u32((u8)(x1[get_u8(reg1, 0)]),
|
||||
(u8)(x2[get_u8(reg1, 1)] >> 8),
|
||||
(u8)(s1[get_u8(reg1, 2)]),
|
||||
(u8)(s2[get_u8(reg1, 3)]));
|
||||
reg2 = key[rkidx][2] ^ make_u32((u8)(x1[get_u8(reg2, 0)]),
|
||||
(u8)(x2[get_u8(reg2, 1)] >> 8),
|
||||
(u8)(s1[get_u8(reg2, 2)]),
|
||||
(u8)(s2[get_u8(reg2, 3)]));
|
||||
reg3 = key[rkidx][3] ^ make_u32((u8)(x1[get_u8(reg3, 0)]),
|
||||
(u8)(x2[get_u8(reg3, 1)] >> 8),
|
||||
(u8)(s1[get_u8(reg3, 2)]),
|
||||
(u8)(s2[get_u8(reg3, 3)]));
|
||||
|
||||
dst[0] = cpu_to_be32(reg0);
|
||||
dst[1] = cpu_to_be32(reg1);
|
||||
dst[2] = cpu_to_be32(reg2);
|
||||
dst[3] = cpu_to_be32(reg3);
|
||||
}
|
||||
|
||||
static void aria_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
struct aria_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
__aria_crypt(ctx, out, in, ctx->enc_key);
|
||||
}
|
||||
|
||||
static void aria_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
struct aria_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
__aria_crypt(ctx, out, in, ctx->dec_key);
|
||||
}
|
||||
|
||||
static struct crypto_alg aria_alg = {
|
||||
.cra_name = "aria",
|
||||
.cra_driver_name = "aria-generic",
|
||||
.cra_priority = 100,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = ARIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct aria_ctx),
|
||||
.cra_alignmask = 3,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = ARIA_MIN_KEY_SIZE,
|
||||
.cia_max_keysize = ARIA_MAX_KEY_SIZE,
|
||||
.cia_setkey = aria_set_key,
|
||||
.cia_encrypt = aria_encrypt,
|
||||
.cia_decrypt = aria_decrypt
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static int __init aria_init(void)
|
||||
{
|
||||
return crypto_register_alg(&aria_alg);
|
||||
}
|
||||
|
||||
static void __exit aria_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&aria_alg);
|
||||
}
|
||||
|
||||
subsys_initcall(aria_init);
|
||||
module_exit(aria_fini);
|
||||
|
||||
MODULE_DESCRIPTION("ARIA Cipher Algorithm");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Taehee Yoo <ap420073@gmail.com>");
|
||||
MODULE_ALIAS_CRYPTO("aria");
|
@ -1,75 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* shash interface to the generic implementation of BLAKE2s
|
||||
*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <crypto/internal/blake2s.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
static int crypto_blake2s_update_generic(struct shash_desc *desc,
|
||||
const u8 *in, unsigned int inlen)
|
||||
{
|
||||
return crypto_blake2s_update(desc, in, inlen, true);
|
||||
}
|
||||
|
||||
static int crypto_blake2s_final_generic(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
return crypto_blake2s_final(desc, out, true);
|
||||
}
|
||||
|
||||
#define BLAKE2S_ALG(name, driver_name, digest_size) \
|
||||
{ \
|
||||
.base.cra_name = name, \
|
||||
.base.cra_driver_name = driver_name, \
|
||||
.base.cra_priority = 100, \
|
||||
.base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
|
||||
.base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \
|
||||
.base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \
|
||||
.base.cra_module = THIS_MODULE, \
|
||||
.digestsize = digest_size, \
|
||||
.setkey = crypto_blake2s_setkey, \
|
||||
.init = crypto_blake2s_init, \
|
||||
.update = crypto_blake2s_update_generic, \
|
||||
.final = crypto_blake2s_final_generic, \
|
||||
.descsize = sizeof(struct blake2s_state), \
|
||||
}
|
||||
|
||||
static struct shash_alg blake2s_algs[] = {
|
||||
BLAKE2S_ALG("blake2s-128", "blake2s-128-generic",
|
||||
BLAKE2S_128_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-160", "blake2s-160-generic",
|
||||
BLAKE2S_160_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-224", "blake2s-224-generic",
|
||||
BLAKE2S_224_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-256", "blake2s-256-generic",
|
||||
BLAKE2S_256_HASH_SIZE),
|
||||
};
|
||||
|
||||
static int __init blake2s_mod_init(void)
|
||||
{
|
||||
return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
|
||||
}
|
||||
|
||||
static void __exit blake2s_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
|
||||
}
|
||||
|
||||
subsys_initcall(blake2s_mod_init);
|
||||
module_exit(blake2s_mod_exit);
|
||||
|
||||
MODULE_ALIAS_CRYPTO("blake2s-128");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-128-generic");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-160");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-160-generic");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-224");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-224-generic");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-256");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-256-generic");
|
||||
MODULE_LICENSE("GPL v2");
|
@ -12,6 +12,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <generated/utsrelease.h>
|
||||
|
||||
int fips_enabled;
|
||||
EXPORT_SYMBOL_GPL(fips_enabled);
|
||||
@ -30,13 +31,37 @@ static int fips_enable(char *str)
|
||||
|
||||
__setup("fips=", fips_enable);
|
||||
|
||||
#define FIPS_MODULE_NAME CONFIG_CRYPTO_FIPS_NAME
|
||||
#ifdef CONFIG_CRYPTO_FIPS_CUSTOM_VERSION
|
||||
#define FIPS_MODULE_VERSION CONFIG_CRYPTO_FIPS_VERSION
|
||||
#else
|
||||
#define FIPS_MODULE_VERSION UTS_RELEASE
|
||||
#endif
|
||||
|
||||
static char fips_name[] = FIPS_MODULE_NAME;
|
||||
static char fips_version[] = FIPS_MODULE_VERSION;
|
||||
|
||||
static struct ctl_table crypto_sysctl_table[] = {
|
||||
{
|
||||
.procname = "fips_enabled",
|
||||
.data = &fips_enabled,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0444,
|
||||
.proc_handler = proc_dointvec
|
||||
.procname = "fips_enabled",
|
||||
.data = &fips_enabled,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0444,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
{
|
||||
.procname = "fips_name",
|
||||
.data = &fips_name,
|
||||
.maxlen = 64,
|
||||
.mode = 0444,
|
||||
.proc_handler = proc_dostring
|
||||
},
|
||||
{
|
||||
.procname = "fips_version",
|
||||
.data = &fips_version,
|
||||
.maxlen = 64,
|
||||
.mode = 0444,
|
||||
.proc_handler = proc_dostring
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
581
crypto/hctr2.c
Normal file
581
crypto/hctr2.c
Normal file
@ -0,0 +1,581 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* HCTR2 length-preserving encryption mode
|
||||
*
|
||||
* Copyright 2021 Google LLC
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* HCTR2 is a length-preserving encryption mode that is efficient on
|
||||
* processors with instructions to accelerate AES and carryless
|
||||
* multiplication, e.g. x86 processors with AES-NI and CLMUL, and ARM
|
||||
* processors with the ARMv8 crypto extensions.
|
||||
*
|
||||
* For more details, see the paper: "Length-preserving encryption with HCTR2"
|
||||
* (https://eprint.iacr.org/2021/1441.pdf)
|
||||
*/
|
||||
|
||||
#include <crypto/internal/cipher.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/polyval.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#define BLOCKCIPHER_BLOCK_SIZE 16
|
||||
|
||||
/*
|
||||
* The specification allows variable-length tweaks, but Linux's crypto API
|
||||
* currently only allows algorithms to support a single length. The "natural"
|
||||
* tweak length for HCTR2 is 16, since that fits into one POLYVAL block for
|
||||
* the best performance. But longer tweaks are useful for fscrypt, to avoid
|
||||
* needing to derive per-file keys. So instead we use two blocks, or 32 bytes.
|
||||
*/
|
||||
#define TWEAK_SIZE 32
|
||||
|
||||
struct hctr2_instance_ctx {
|
||||
struct crypto_cipher_spawn blockcipher_spawn;
|
||||
struct crypto_skcipher_spawn xctr_spawn;
|
||||
struct crypto_shash_spawn polyval_spawn;
|
||||
};
|
||||
|
||||
struct hctr2_tfm_ctx {
|
||||
struct crypto_cipher *blockcipher;
|
||||
struct crypto_skcipher *xctr;
|
||||
struct crypto_shash *polyval;
|
||||
u8 L[BLOCKCIPHER_BLOCK_SIZE];
|
||||
int hashed_tweak_offset;
|
||||
/*
|
||||
* This struct is allocated with extra space for two exported hash
|
||||
* states. Since the hash state size is not known at compile-time, we
|
||||
* can't add these to the struct directly.
|
||||
*
|
||||
* hashed_tweaklen_divisible;
|
||||
* hashed_tweaklen_remainder;
|
||||
*/
|
||||
};
|
||||
|
||||
struct hctr2_request_ctx {
|
||||
u8 first_block[BLOCKCIPHER_BLOCK_SIZE];
|
||||
u8 xctr_iv[BLOCKCIPHER_BLOCK_SIZE];
|
||||
struct scatterlist *bulk_part_dst;
|
||||
struct scatterlist *bulk_part_src;
|
||||
struct scatterlist sg_src[2];
|
||||
struct scatterlist sg_dst[2];
|
||||
/*
|
||||
* Sub-request sizes are unknown at compile-time, so they need to go
|
||||
* after the members with known sizes.
|
||||
*/
|
||||
union {
|
||||
struct shash_desc hash_desc;
|
||||
struct skcipher_request xctr_req;
|
||||
} u;
|
||||
/*
|
||||
* This struct is allocated with extra space for one exported hash
|
||||
* state. Since the hash state size is not known at compile-time, we
|
||||
* can't add it to the struct directly.
|
||||
*
|
||||
* hashed_tweak;
|
||||
*/
|
||||
};
|
||||
|
||||
static inline u8 *hctr2_hashed_tweaklen(const struct hctr2_tfm_ctx *tctx,
|
||||
bool has_remainder)
|
||||
{
|
||||
u8 *p = (u8 *)tctx + sizeof(*tctx);
|
||||
|
||||
if (has_remainder) /* For messages not a multiple of block length */
|
||||
p += crypto_shash_statesize(tctx->polyval);
|
||||
return p;
|
||||
}
|
||||
|
||||
static inline u8 *hctr2_hashed_tweak(const struct hctr2_tfm_ctx *tctx,
|
||||
struct hctr2_request_ctx *rctx)
|
||||
{
|
||||
return (u8 *)rctx + tctx->hashed_tweak_offset;
|
||||
}
|
||||
|
||||
/*
|
||||
* The input data for each HCTR2 hash step begins with a 16-byte block that
|
||||
* contains the tweak length and a flag that indicates whether the input is evenly
|
||||
* divisible into blocks. Since this implementation only supports one tweak
|
||||
* length, we precompute the two hash states resulting from hashing the two
|
||||
* possible values of this initial block. This reduces by one block the amount of
|
||||
* data that needs to be hashed for each encryption/decryption
|
||||
*
|
||||
* These precomputed hashes are stored in hctr2_tfm_ctx.
|
||||
*/
|
||||
static int hctr2_hash_tweaklen(struct hctr2_tfm_ctx *tctx, bool has_remainder)
|
||||
{
|
||||
SHASH_DESC_ON_STACK(shash, tfm->polyval);
|
||||
__le64 tweak_length_block[2];
|
||||
int err;
|
||||
|
||||
shash->tfm = tctx->polyval;
|
||||
memset(tweak_length_block, 0, sizeof(tweak_length_block));
|
||||
|
||||
tweak_length_block[0] = cpu_to_le64(TWEAK_SIZE * 8 * 2 + 2 + has_remainder);
|
||||
err = crypto_shash_init(shash);
|
||||
if (err)
|
||||
return err;
|
||||
err = crypto_shash_update(shash, (u8 *)tweak_length_block,
|
||||
POLYVAL_BLOCK_SIZE);
|
||||
if (err)
|
||||
return err;
|
||||
return crypto_shash_export(shash, hctr2_hashed_tweaklen(tctx, has_remainder));
|
||||
}
|
||||
|
||||
static int hctr2_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
|
||||
u8 hbar[BLOCKCIPHER_BLOCK_SIZE];
|
||||
int err;
|
||||
|
||||
crypto_cipher_clear_flags(tctx->blockcipher, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_cipher_set_flags(tctx->blockcipher,
|
||||
crypto_skcipher_get_flags(tfm) &
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
err = crypto_cipher_setkey(tctx->blockcipher, key, keylen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
crypto_skcipher_clear_flags(tctx->xctr, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_skcipher_set_flags(tctx->xctr,
|
||||
crypto_skcipher_get_flags(tfm) &
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
err = crypto_skcipher_setkey(tctx->xctr, key, keylen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
memset(hbar, 0, sizeof(hbar));
|
||||
crypto_cipher_encrypt_one(tctx->blockcipher, hbar, hbar);
|
||||
|
||||
memset(tctx->L, 0, sizeof(tctx->L));
|
||||
tctx->L[0] = 0x01;
|
||||
crypto_cipher_encrypt_one(tctx->blockcipher, tctx->L, tctx->L);
|
||||
|
||||
crypto_shash_clear_flags(tctx->polyval, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_shash_set_flags(tctx->polyval, crypto_skcipher_get_flags(tfm) &
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
err = crypto_shash_setkey(tctx->polyval, hbar, BLOCKCIPHER_BLOCK_SIZE);
|
||||
if (err)
|
||||
return err;
|
||||
memzero_explicit(hbar, sizeof(hbar));
|
||||
|
||||
return hctr2_hash_tweaklen(tctx, true) ?: hctr2_hash_tweaklen(tctx, false);
|
||||
}
|
||||
|
||||
static int hctr2_hash_tweak(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
const struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
|
||||
struct hctr2_request_ctx *rctx = skcipher_request_ctx(req);
|
||||
struct shash_desc *hash_desc = &rctx->u.hash_desc;
|
||||
int err;
|
||||
bool has_remainder = req->cryptlen % POLYVAL_BLOCK_SIZE;
|
||||
|
||||
hash_desc->tfm = tctx->polyval;
|
||||
err = crypto_shash_import(hash_desc, hctr2_hashed_tweaklen(tctx, has_remainder));
|
||||
if (err)
|
||||
return err;
|
||||
err = crypto_shash_update(hash_desc, req->iv, TWEAK_SIZE);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
// Store the hashed tweak, since we need it when computing both
|
||||
// H(T || N) and H(T || V).
|
||||
return crypto_shash_export(hash_desc, hctr2_hashed_tweak(tctx, rctx));
|
||||
}
|
||||
|
||||
static int hctr2_hash_message(struct skcipher_request *req,
|
||||
struct scatterlist *sgl,
|
||||
u8 digest[POLYVAL_DIGEST_SIZE])
|
||||
{
|
||||
static const u8 padding[BLOCKCIPHER_BLOCK_SIZE] = { 0x1 };
|
||||
struct hctr2_request_ctx *rctx = skcipher_request_ctx(req);
|
||||
struct shash_desc *hash_desc = &rctx->u.hash_desc;
|
||||
const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
|
||||
struct sg_mapping_iter miter;
|
||||
unsigned int remainder = bulk_len % BLOCKCIPHER_BLOCK_SIZE;
|
||||
int i;
|
||||
int err = 0;
|
||||
int n = 0;
|
||||
|
||||
sg_miter_start(&miter, sgl, sg_nents(sgl),
|
||||
SG_MITER_FROM_SG | SG_MITER_ATOMIC);
|
||||
for (i = 0; i < bulk_len; i += n) {
|
||||
sg_miter_next(&miter);
|
||||
n = min_t(unsigned int, miter.length, bulk_len - i);
|
||||
err = crypto_shash_update(hash_desc, miter.addr, n);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
sg_miter_stop(&miter);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (remainder) {
|
||||
err = crypto_shash_update(hash_desc, padding,
|
||||
BLOCKCIPHER_BLOCK_SIZE - remainder);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
return crypto_shash_final(hash_desc, digest);
|
||||
}
|
||||
|
||||
static int hctr2_finish(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
const struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
|
||||
struct hctr2_request_ctx *rctx = skcipher_request_ctx(req);
|
||||
u8 digest[POLYVAL_DIGEST_SIZE];
|
||||
struct shash_desc *hash_desc = &rctx->u.hash_desc;
|
||||
int err;
|
||||
|
||||
// U = UU ^ H(T || V)
|
||||
// or M = MM ^ H(T || N)
|
||||
hash_desc->tfm = tctx->polyval;
|
||||
err = crypto_shash_import(hash_desc, hctr2_hashed_tweak(tctx, rctx));
|
||||
if (err)
|
||||
return err;
|
||||
err = hctr2_hash_message(req, rctx->bulk_part_dst, digest);
|
||||
if (err)
|
||||
return err;
|
||||
crypto_xor(rctx->first_block, digest, BLOCKCIPHER_BLOCK_SIZE);
|
||||
|
||||
// Copy U (or M) into dst scatterlist
|
||||
scatterwalk_map_and_copy(rctx->first_block, req->dst,
|
||||
0, BLOCKCIPHER_BLOCK_SIZE, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hctr2_xctr_done(struct crypto_async_request *areq,
|
||||
int err)
|
||||
{
|
||||
struct skcipher_request *req = areq->data;
|
||||
|
||||
if (!err)
|
||||
err = hctr2_finish(req);
|
||||
|
||||
skcipher_request_complete(req, err);
|
||||
}
|
||||
|
||||
static int hctr2_crypt(struct skcipher_request *req, bool enc)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
const struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
|
||||
struct hctr2_request_ctx *rctx = skcipher_request_ctx(req);
|
||||
u8 digest[POLYVAL_DIGEST_SIZE];
|
||||
int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
|
||||
int err;
|
||||
|
||||
// Requests must be at least one block
|
||||
if (req->cryptlen < BLOCKCIPHER_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
// Copy M (or U) into a temporary buffer
|
||||
scatterwalk_map_and_copy(rctx->first_block, req->src,
|
||||
0, BLOCKCIPHER_BLOCK_SIZE, 0);
|
||||
|
||||
// Create scatterlists for N and V
|
||||
rctx->bulk_part_src = scatterwalk_ffwd(rctx->sg_src, req->src,
|
||||
BLOCKCIPHER_BLOCK_SIZE);
|
||||
rctx->bulk_part_dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
|
||||
BLOCKCIPHER_BLOCK_SIZE);
|
||||
|
||||
// MM = M ^ H(T || N)
|
||||
// or UU = U ^ H(T || V)
|
||||
err = hctr2_hash_tweak(req);
|
||||
if (err)
|
||||
return err;
|
||||
err = hctr2_hash_message(req, rctx->bulk_part_src, digest);
|
||||
if (err)
|
||||
return err;
|
||||
crypto_xor(digest, rctx->first_block, BLOCKCIPHER_BLOCK_SIZE);
|
||||
|
||||
// UU = E(MM)
|
||||
// or MM = D(UU)
|
||||
if (enc)
|
||||
crypto_cipher_encrypt_one(tctx->blockcipher, rctx->first_block,
|
||||
digest);
|
||||
else
|
||||
crypto_cipher_decrypt_one(tctx->blockcipher, rctx->first_block,
|
||||
digest);
|
||||
|
||||
// S = MM ^ UU ^ L
|
||||
crypto_xor(digest, rctx->first_block, BLOCKCIPHER_BLOCK_SIZE);
|
||||
crypto_xor_cpy(rctx->xctr_iv, digest, tctx->L, BLOCKCIPHER_BLOCK_SIZE);
|
||||
|
||||
// V = XCTR(S, N)
|
||||
// or N = XCTR(S, V)
|
||||
skcipher_request_set_tfm(&rctx->u.xctr_req, tctx->xctr);
|
||||
skcipher_request_set_crypt(&rctx->u.xctr_req, rctx->bulk_part_src,
|
||||
rctx->bulk_part_dst, bulk_len,
|
||||
rctx->xctr_iv);
|
||||
skcipher_request_set_callback(&rctx->u.xctr_req,
|
||||
req->base.flags,
|
||||
hctr2_xctr_done, req);
|
||||
return crypto_skcipher_encrypt(&rctx->u.xctr_req) ?:
|
||||
hctr2_finish(req);
|
||||
}
|
||||
|
||||
static int hctr2_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return hctr2_crypt(req, true);
|
||||
}
|
||||
|
||||
static int hctr2_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return hctr2_crypt(req, false);
|
||||
}
|
||||
|
||||
static int hctr2_init_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct skcipher_instance *inst = skcipher_alg_instance(tfm);
|
||||
struct hctr2_instance_ctx *ictx = skcipher_instance_ctx(inst);
|
||||
struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_skcipher *xctr;
|
||||
struct crypto_cipher *blockcipher;
|
||||
struct crypto_shash *polyval;
|
||||
unsigned int subreq_size;
|
||||
int err;
|
||||
|
||||
xctr = crypto_spawn_skcipher(&ictx->xctr_spawn);
|
||||
if (IS_ERR(xctr))
|
||||
return PTR_ERR(xctr);
|
||||
|
||||
blockcipher = crypto_spawn_cipher(&ictx->blockcipher_spawn);
|
||||
if (IS_ERR(blockcipher)) {
|
||||
err = PTR_ERR(blockcipher);
|
||||
goto err_free_xctr;
|
||||
}
|
||||
|
||||
polyval = crypto_spawn_shash(&ictx->polyval_spawn);
|
||||
if (IS_ERR(polyval)) {
|
||||
err = PTR_ERR(polyval);
|
||||
goto err_free_blockcipher;
|
||||
}
|
||||
|
||||
tctx->xctr = xctr;
|
||||
tctx->blockcipher = blockcipher;
|
||||
tctx->polyval = polyval;
|
||||
|
||||
BUILD_BUG_ON(offsetofend(struct hctr2_request_ctx, u) !=
|
||||
sizeof(struct hctr2_request_ctx));
|
||||
subreq_size = max(sizeof_field(struct hctr2_request_ctx, u.hash_desc) +
|
||||
crypto_shash_descsize(polyval),
|
||||
sizeof_field(struct hctr2_request_ctx, u.xctr_req) +
|
||||
crypto_skcipher_reqsize(xctr));
|
||||
|
||||
tctx->hashed_tweak_offset = offsetof(struct hctr2_request_ctx, u) +
|
||||
subreq_size;
|
||||
crypto_skcipher_set_reqsize(tfm, tctx->hashed_tweak_offset +
|
||||
crypto_shash_statesize(polyval));
|
||||
return 0;
|
||||
|
||||
err_free_blockcipher:
|
||||
crypto_free_cipher(blockcipher);
|
||||
err_free_xctr:
|
||||
crypto_free_skcipher(xctr);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void hctr2_exit_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
crypto_free_cipher(tctx->blockcipher);
|
||||
crypto_free_skcipher(tctx->xctr);
|
||||
crypto_free_shash(tctx->polyval);
|
||||
}
|
||||
|
||||
static void hctr2_free_instance(struct skcipher_instance *inst)
|
||||
{
|
||||
struct hctr2_instance_ctx *ictx = skcipher_instance_ctx(inst);
|
||||
|
||||
crypto_drop_cipher(&ictx->blockcipher_spawn);
|
||||
crypto_drop_skcipher(&ictx->xctr_spawn);
|
||||
crypto_drop_shash(&ictx->polyval_spawn);
|
||||
kfree(inst);
|
||||
}
|
||||
|
||||
static int hctr2_create_common(struct crypto_template *tmpl,
|
||||
struct rtattr **tb,
|
||||
const char *xctr_name,
|
||||
const char *polyval_name)
|
||||
{
|
||||
u32 mask;
|
||||
struct skcipher_instance *inst;
|
||||
struct hctr2_instance_ctx *ictx;
|
||||
struct skcipher_alg *xctr_alg;
|
||||
struct crypto_alg *blockcipher_alg;
|
||||
struct shash_alg *polyval_alg;
|
||||
char blockcipher_name[CRYPTO_MAX_ALG_NAME];
|
||||
int len;
|
||||
int err;
|
||||
|
||||
err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL);
|
||||
if (!inst)
|
||||
return -ENOMEM;
|
||||
ictx = skcipher_instance_ctx(inst);
|
||||
|
||||
/* Stream cipher, xctr(block_cipher) */
|
||||
err = crypto_grab_skcipher(&ictx->xctr_spawn,
|
||||
skcipher_crypto_instance(inst),
|
||||
xctr_name, 0, mask);
|
||||
if (err)
|
||||
goto err_free_inst;
|
||||
xctr_alg = crypto_spawn_skcipher_alg(&ictx->xctr_spawn);
|
||||
|
||||
err = -EINVAL;
|
||||
if (strncmp(xctr_alg->base.cra_name, "xctr(", 5))
|
||||
goto err_free_inst;
|
||||
len = strscpy(blockcipher_name, xctr_alg->base.cra_name + 5,
|
||||
sizeof(blockcipher_name));
|
||||
if (len < 1)
|
||||
goto err_free_inst;
|
||||
if (blockcipher_name[len - 1] != ')')
|
||||
goto err_free_inst;
|
||||
blockcipher_name[len - 1] = 0;
|
||||
|
||||
/* Block cipher, e.g. "aes" */
|
||||
err = crypto_grab_cipher(&ictx->blockcipher_spawn,
|
||||
skcipher_crypto_instance(inst),
|
||||
blockcipher_name, 0, mask);
|
||||
if (err)
|
||||
goto err_free_inst;
|
||||
blockcipher_alg = crypto_spawn_cipher_alg(&ictx->blockcipher_spawn);
|
||||
|
||||
/* Require blocksize of 16 bytes */
|
||||
err = -EINVAL;
|
||||
if (blockcipher_alg->cra_blocksize != BLOCKCIPHER_BLOCK_SIZE)
|
||||
goto err_free_inst;
|
||||
|
||||
/* Polyval ε-∆U hash function */
|
||||
err = crypto_grab_shash(&ictx->polyval_spawn,
|
||||
skcipher_crypto_instance(inst),
|
||||
polyval_name, 0, mask);
|
||||
if (err)
|
||||
goto err_free_inst;
|
||||
polyval_alg = crypto_spawn_shash_alg(&ictx->polyval_spawn);
|
||||
|
||||
/* Ensure Polyval is being used */
|
||||
err = -EINVAL;
|
||||
if (strcmp(polyval_alg->base.cra_name, "polyval") != 0)
|
||||
goto err_free_inst;
|
||||
|
||||
/* Instance fields */
|
||||
|
||||
err = -ENAMETOOLONG;
|
||||
if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "hctr2(%s)",
|
||||
blockcipher_alg->cra_name) >= CRYPTO_MAX_ALG_NAME)
|
||||
goto err_free_inst;
|
||||
if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
|
||||
"hctr2_base(%s,%s)",
|
||||
xctr_alg->base.cra_driver_name,
|
||||
polyval_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
|
||||
goto err_free_inst;
|
||||
|
||||
inst->alg.base.cra_blocksize = BLOCKCIPHER_BLOCK_SIZE;
|
||||
inst->alg.base.cra_ctxsize = sizeof(struct hctr2_tfm_ctx) +
|
||||
polyval_alg->statesize * 2;
|
||||
inst->alg.base.cra_alignmask = xctr_alg->base.cra_alignmask |
|
||||
polyval_alg->base.cra_alignmask;
|
||||
/*
|
||||
* The hash function is called twice, so it is weighted higher than the
|
||||
* xctr and blockcipher.
|
||||
*/
|
||||
inst->alg.base.cra_priority = (2 * xctr_alg->base.cra_priority +
|
||||
4 * polyval_alg->base.cra_priority +
|
||||
blockcipher_alg->cra_priority) / 7;
|
||||
|
||||
inst->alg.setkey = hctr2_setkey;
|
||||
inst->alg.encrypt = hctr2_encrypt;
|
||||
inst->alg.decrypt = hctr2_decrypt;
|
||||
inst->alg.init = hctr2_init_tfm;
|
||||
inst->alg.exit = hctr2_exit_tfm;
|
||||
inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(xctr_alg);
|
||||
inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(xctr_alg);
|
||||
inst->alg.ivsize = TWEAK_SIZE;
|
||||
|
||||
inst->free = hctr2_free_instance;
|
||||
|
||||
err = skcipher_register_instance(tmpl, inst);
|
||||
if (err) {
|
||||
err_free_inst:
|
||||
hctr2_free_instance(inst);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int hctr2_create_base(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
{
|
||||
const char *xctr_name;
|
||||
const char *polyval_name;
|
||||
|
||||
xctr_name = crypto_attr_alg_name(tb[1]);
|
||||
if (IS_ERR(xctr_name))
|
||||
return PTR_ERR(xctr_name);
|
||||
|
||||
polyval_name = crypto_attr_alg_name(tb[2]);
|
||||
if (IS_ERR(polyval_name))
|
||||
return PTR_ERR(polyval_name);
|
||||
|
||||
return hctr2_create_common(tmpl, tb, xctr_name, polyval_name);
|
||||
}
|
||||
|
||||
static int hctr2_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
{
|
||||
const char *blockcipher_name;
|
||||
char xctr_name[CRYPTO_MAX_ALG_NAME];
|
||||
|
||||
blockcipher_name = crypto_attr_alg_name(tb[1]);
|
||||
if (IS_ERR(blockcipher_name))
|
||||
return PTR_ERR(blockcipher_name);
|
||||
|
||||
if (snprintf(xctr_name, CRYPTO_MAX_ALG_NAME, "xctr(%s)",
|
||||
blockcipher_name) >= CRYPTO_MAX_ALG_NAME)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
return hctr2_create_common(tmpl, tb, xctr_name, "polyval");
|
||||
}
|
||||
|
||||
static struct crypto_template hctr2_tmpls[] = {
|
||||
{
|
||||
/* hctr2_base(xctr_name, polyval_name) */
|
||||
.name = "hctr2_base",
|
||||
.create = hctr2_create_base,
|
||||
.module = THIS_MODULE,
|
||||
}, {
|
||||
/* hctr2(blockcipher_name) */
|
||||
.name = "hctr2",
|
||||
.create = hctr2_create,
|
||||
.module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init hctr2_module_init(void)
|
||||
{
|
||||
return crypto_register_templates(hctr2_tmpls, ARRAY_SIZE(hctr2_tmpls));
|
||||
}
|
||||
|
||||
static void __exit hctr2_module_exit(void)
|
||||
{
|
||||
return crypto_unregister_templates(hctr2_tmpls,
|
||||
ARRAY_SIZE(hctr2_tmpls));
|
||||
}
|
||||
|
||||
subsys_initcall(hctr2_module_init);
|
||||
module_exit(hctr2_module_exit);
|
||||
|
||||
MODULE_DESCRIPTION("HCTR2 length-preserving encryption mode");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("hctr2");
|
||||
MODULE_IMPORT_NS(CRYPTO_INTERNAL);
|
245
crypto/polyval-generic.c
Normal file
245
crypto/polyval-generic.c
Normal file
@ -0,0 +1,245 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* POLYVAL: hash function for HCTR2.
|
||||
*
|
||||
* Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi>
|
||||
* Copyright (c) 2009 Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
* Copyright 2021 Google LLC
|
||||
*/
|
||||
|
||||
/*
|
||||
* Code based on crypto/ghash-generic.c
|
||||
*
|
||||
* POLYVAL is a keyed hash function similar to GHASH. POLYVAL uses a different
|
||||
* modulus for finite field multiplication which makes hardware accelerated
|
||||
* implementations on little-endian machines faster. POLYVAL is used in the
|
||||
* kernel to implement HCTR2, but was originally specified for AES-GCM-SIV
|
||||
* (RFC 8452).
|
||||
*
|
||||
* For more information see:
|
||||
* Length-preserving encryption with HCTR2:
|
||||
* https://eprint.iacr.org/2021/1441.pdf
|
||||
* AES-GCM-SIV: Nonce Misuse-Resistant Authenticated Encryption:
|
||||
* https://datatracker.ietf.org/doc/html/rfc8452
|
||||
*
|
||||
* Like GHASH, POLYVAL is not a cryptographic hash function and should
|
||||
* not be used outside of crypto modes explicitly designed to use POLYVAL.
|
||||
*
|
||||
* This implementation uses a convenient trick involving the GHASH and POLYVAL
|
||||
* fields. This trick allows multiplication in the POLYVAL field to be
|
||||
* implemented by using multiplication in the GHASH field as a subroutine. An
|
||||
* element of the POLYVAL field can be converted to an element of the GHASH
|
||||
* field by computing x*REVERSE(a), where REVERSE reverses the byte-ordering of
|
||||
* a. Similarly, an element of the GHASH field can be converted back to the
|
||||
* POLYVAL field by computing REVERSE(x^{-1}*a). For more information, see:
|
||||
* https://datatracker.ietf.org/doc/html/rfc8452#appendix-A
|
||||
*
|
||||
* By using this trick, we do not need to implement the POLYVAL field for the
|
||||
* generic implementation.
|
||||
*
|
||||
* Warning: this generic implementation is not intended to be used in practice
|
||||
* and is not constant time. For practical use, a hardware accelerated
|
||||
* implementation of POLYVAL should be used instead.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/gf128mul.h>
|
||||
#include <crypto/polyval.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
struct polyval_tfm_ctx {
|
||||
struct gf128mul_4k *gf128;
|
||||
};
|
||||
|
||||
struct polyval_desc_ctx {
|
||||
union {
|
||||
u8 buffer[POLYVAL_BLOCK_SIZE];
|
||||
be128 buffer128;
|
||||
};
|
||||
u32 bytes;
|
||||
};
|
||||
|
||||
static void copy_and_reverse(u8 dst[POLYVAL_BLOCK_SIZE],
|
||||
const u8 src[POLYVAL_BLOCK_SIZE])
|
||||
{
|
||||
u64 a = get_unaligned((const u64 *)&src[0]);
|
||||
u64 b = get_unaligned((const u64 *)&src[8]);
|
||||
|
||||
put_unaligned(swab64(a), (u64 *)&dst[8]);
|
||||
put_unaligned(swab64(b), (u64 *)&dst[0]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Performs multiplication in the POLYVAL field using the GHASH field as a
|
||||
* subroutine. This function is used as a fallback for hardware accelerated
|
||||
* implementations when simd registers are unavailable.
|
||||
*
|
||||
* Note: This function is not used for polyval-generic, instead we use the 4k
|
||||
* lookup table implementation for finite field multiplication.
|
||||
*/
|
||||
void polyval_mul_non4k(u8 *op1, const u8 *op2)
|
||||
{
|
||||
be128 a, b;
|
||||
|
||||
// Assume one argument is in Montgomery form and one is not.
|
||||
copy_and_reverse((u8 *)&a, op1);
|
||||
copy_and_reverse((u8 *)&b, op2);
|
||||
gf128mul_x_lle(&a, &a);
|
||||
gf128mul_lle(&a, &b);
|
||||
copy_and_reverse(op1, (u8 *)&a);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(polyval_mul_non4k);
|
||||
|
||||
/*
|
||||
* Perform a POLYVAL update using non4k multiplication. This function is used
|
||||
* as a fallback for hardware accelerated implementations when simd registers
|
||||
* are unavailable.
|
||||
*
|
||||
* Note: This function is not used for polyval-generic, instead we use the 4k
|
||||
* lookup table implementation of finite field multiplication.
|
||||
*/
|
||||
void polyval_update_non4k(const u8 *key, const u8 *in,
|
||||
size_t nblocks, u8 *accumulator)
|
||||
{
|
||||
while (nblocks--) {
|
||||
crypto_xor(accumulator, in, POLYVAL_BLOCK_SIZE);
|
||||
polyval_mul_non4k(accumulator, key);
|
||||
in += POLYVAL_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(polyval_update_non4k);
|
||||
|
||||
static int polyval_setkey(struct crypto_shash *tfm,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
struct polyval_tfm_ctx *ctx = crypto_shash_ctx(tfm);
|
||||
be128 k;
|
||||
|
||||
if (keylen != POLYVAL_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
gf128mul_free_4k(ctx->gf128);
|
||||
|
||||
BUILD_BUG_ON(sizeof(k) != POLYVAL_BLOCK_SIZE);
|
||||
copy_and_reverse((u8 *)&k, key);
|
||||
gf128mul_x_lle(&k, &k);
|
||||
|
||||
ctx->gf128 = gf128mul_init_4k_lle(&k);
|
||||
memzero_explicit(&k, POLYVAL_BLOCK_SIZE);
|
||||
|
||||
if (!ctx->gf128)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_init(struct shash_desc *desc)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
|
||||
memset(dctx, 0, sizeof(*dctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_update(struct shash_desc *desc,
|
||||
const u8 *src, unsigned int srclen)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
const struct polyval_tfm_ctx *ctx = crypto_shash_ctx(desc->tfm);
|
||||
u8 *pos;
|
||||
u8 tmp[POLYVAL_BLOCK_SIZE];
|
||||
int n;
|
||||
|
||||
if (dctx->bytes) {
|
||||
n = min(srclen, dctx->bytes);
|
||||
pos = dctx->buffer + dctx->bytes - 1;
|
||||
|
||||
dctx->bytes -= n;
|
||||
srclen -= n;
|
||||
|
||||
while (n--)
|
||||
*pos-- ^= *src++;
|
||||
|
||||
if (!dctx->bytes)
|
||||
gf128mul_4k_lle(&dctx->buffer128, ctx->gf128);
|
||||
}
|
||||
|
||||
while (srclen >= POLYVAL_BLOCK_SIZE) {
|
||||
copy_and_reverse(tmp, src);
|
||||
crypto_xor(dctx->buffer, tmp, POLYVAL_BLOCK_SIZE);
|
||||
gf128mul_4k_lle(&dctx->buffer128, ctx->gf128);
|
||||
src += POLYVAL_BLOCK_SIZE;
|
||||
srclen -= POLYVAL_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
if (srclen) {
|
||||
dctx->bytes = POLYVAL_BLOCK_SIZE - srclen;
|
||||
pos = dctx->buffer + POLYVAL_BLOCK_SIZE - 1;
|
||||
while (srclen--)
|
||||
*pos-- ^= *src++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_final(struct shash_desc *desc, u8 *dst)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
const struct polyval_tfm_ctx *ctx = crypto_shash_ctx(desc->tfm);
|
||||
|
||||
if (dctx->bytes)
|
||||
gf128mul_4k_lle(&dctx->buffer128, ctx->gf128);
|
||||
copy_and_reverse(dst, dctx->buffer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void polyval_exit_tfm(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct polyval_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
gf128mul_free_4k(ctx->gf128);
|
||||
}
|
||||
|
||||
static struct shash_alg polyval_alg = {
|
||||
.digestsize = POLYVAL_DIGEST_SIZE,
|
||||
.init = polyval_init,
|
||||
.update = polyval_update,
|
||||
.final = polyval_final,
|
||||
.setkey = polyval_setkey,
|
||||
.descsize = sizeof(struct polyval_desc_ctx),
|
||||
.base = {
|
||||
.cra_name = "polyval",
|
||||
.cra_driver_name = "polyval-generic",
|
||||
.cra_priority = 100,
|
||||
.cra_blocksize = POLYVAL_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct polyval_tfm_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_exit = polyval_exit_tfm,
|
||||
},
|
||||
};
|
||||
|
||||
static int __init polyval_mod_init(void)
|
||||
{
|
||||
return crypto_register_shash(&polyval_alg);
|
||||
}
|
||||
|
||||
static void __exit polyval_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_shash(&polyval_alg);
|
||||
}
|
||||
|
||||
subsys_initcall(polyval_mod_init);
|
||||
module_exit(polyval_mod_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("POLYVAL hash function");
|
||||
MODULE_ALIAS_CRYPTO("polyval");
|
||||
MODULE_ALIAS_CRYPTO("polyval-generic");
|
78
crypto/rsa.c
78
crypto/rsa.c
@ -17,6 +17,11 @@ struct rsa_mpi_key {
|
||||
MPI n;
|
||||
MPI e;
|
||||
MPI d;
|
||||
MPI p;
|
||||
MPI q;
|
||||
MPI dp;
|
||||
MPI dq;
|
||||
MPI qinv;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -35,16 +40,49 @@ static int _rsa_enc(const struct rsa_mpi_key *key, MPI c, MPI m)
|
||||
|
||||
/*
|
||||
* RSADP function [RFC3447 sec 5.1.2]
|
||||
* m = c^d mod n;
|
||||
* m_1 = c^dP mod p;
|
||||
* m_2 = c^dQ mod q;
|
||||
* h = (m_1 - m_2) * qInv mod p;
|
||||
* m = m_2 + q * h;
|
||||
*/
|
||||
static int _rsa_dec(const struct rsa_mpi_key *key, MPI m, MPI c)
|
||||
static int _rsa_dec_crt(const struct rsa_mpi_key *key, MPI m_or_m1_or_h, MPI c)
|
||||
{
|
||||
MPI m2, m12_or_qh;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
/* (1) Validate 0 <= c < n */
|
||||
if (mpi_cmp_ui(c, 0) < 0 || mpi_cmp(c, key->n) >= 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* (2) m = c^d mod n */
|
||||
return mpi_powm(m, c, key->d, key->n);
|
||||
m2 = mpi_alloc(0);
|
||||
m12_or_qh = mpi_alloc(0);
|
||||
if (!m2 || !m12_or_qh)
|
||||
goto err_free_mpi;
|
||||
|
||||
/* (2i) m_1 = c^dP mod p */
|
||||
ret = mpi_powm(m_or_m1_or_h, c, key->dp, key->p);
|
||||
if (ret)
|
||||
goto err_free_mpi;
|
||||
|
||||
/* (2i) m_2 = c^dQ mod q */
|
||||
ret = mpi_powm(m2, c, key->dq, key->q);
|
||||
if (ret)
|
||||
goto err_free_mpi;
|
||||
|
||||
/* (2iii) h = (m_1 - m_2) * qInv mod p */
|
||||
mpi_sub(m12_or_qh, m_or_m1_or_h, m2);
|
||||
mpi_mulm(m_or_m1_or_h, m12_or_qh, key->qinv, key->p);
|
||||
|
||||
/* (2iv) m = m_2 + q * h */
|
||||
mpi_mul(m12_or_qh, key->q, m_or_m1_or_h);
|
||||
mpi_addm(m_or_m1_or_h, m2, m12_or_qh, key->n);
|
||||
|
||||
ret = 0;
|
||||
|
||||
err_free_mpi:
|
||||
mpi_free(m12_or_qh);
|
||||
mpi_free(m2);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct rsa_mpi_key *rsa_get_key(struct crypto_akcipher *tfm)
|
||||
@ -112,7 +150,7 @@ static int rsa_dec(struct akcipher_request *req)
|
||||
if (!c)
|
||||
goto err_free_m;
|
||||
|
||||
ret = _rsa_dec(pkey, m, c);
|
||||
ret = _rsa_dec_crt(pkey, m, c);
|
||||
if (ret)
|
||||
goto err_free_c;
|
||||
|
||||
@ -134,9 +172,19 @@ static void rsa_free_mpi_key(struct rsa_mpi_key *key)
|
||||
mpi_free(key->d);
|
||||
mpi_free(key->e);
|
||||
mpi_free(key->n);
|
||||
mpi_free(key->p);
|
||||
mpi_free(key->q);
|
||||
mpi_free(key->dp);
|
||||
mpi_free(key->dq);
|
||||
mpi_free(key->qinv);
|
||||
key->d = NULL;
|
||||
key->e = NULL;
|
||||
key->n = NULL;
|
||||
key->p = NULL;
|
||||
key->q = NULL;
|
||||
key->dp = NULL;
|
||||
key->dq = NULL;
|
||||
key->qinv = NULL;
|
||||
}
|
||||
|
||||
static int rsa_check_key_length(unsigned int len)
|
||||
@ -217,6 +265,26 @@ static int rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
|
||||
if (!mpi_key->n)
|
||||
goto err;
|
||||
|
||||
mpi_key->p = mpi_read_raw_data(raw_key.p, raw_key.p_sz);
|
||||
if (!mpi_key->p)
|
||||
goto err;
|
||||
|
||||
mpi_key->q = mpi_read_raw_data(raw_key.q, raw_key.q_sz);
|
||||
if (!mpi_key->q)
|
||||
goto err;
|
||||
|
||||
mpi_key->dp = mpi_read_raw_data(raw_key.dp, raw_key.dp_sz);
|
||||
if (!mpi_key->dp)
|
||||
goto err;
|
||||
|
||||
mpi_key->dq = mpi_read_raw_data(raw_key.dq, raw_key.dq_sz);
|
||||
if (!mpi_key->dq)
|
||||
goto err;
|
||||
|
||||
mpi_key->qinv = mpi_read_raw_data(raw_key.qinv, raw_key.qinv_sz);
|
||||
if (!mpi_key->qinv)
|
||||
goto err;
|
||||
|
||||
if (rsa_check_key_length(mpi_get_size(mpi_key->n) << 3)) {
|
||||
rsa_free_mpi_key(mpi_key);
|
||||
return -EINVAL;
|
||||
|
@ -58,7 +58,7 @@
|
||||
*/
|
||||
static unsigned int sec;
|
||||
|
||||
static char *alg = NULL;
|
||||
static char *alg;
|
||||
static u32 type;
|
||||
static u32 mask;
|
||||
static int mode;
|
||||
@ -71,7 +71,7 @@ static const char *check[] = {
|
||||
"blowfish", "twofish", "serpent", "sha384", "sha512", "md4", "aes",
|
||||
"cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea",
|
||||
"khazad", "wp512", "wp384", "wp256", "xeta", "fcrypt",
|
||||
"camellia", "seed", "rmd160",
|
||||
"camellia", "seed", "rmd160", "aria",
|
||||
"lzo", "lzo-rle", "cts", "sha3-224", "sha3-256", "sha3-384",
|
||||
"sha3-512", "streebog256", "streebog512",
|
||||
NULL
|
||||
@ -1556,6 +1556,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
ret += tcrypt_test("rfc3686(ctr(aes))");
|
||||
ret += tcrypt_test("ofb(aes)");
|
||||
ret += tcrypt_test("cfb(aes)");
|
||||
ret += tcrypt_test("xctr(aes)");
|
||||
break;
|
||||
|
||||
case 11:
|
||||
@ -1669,10 +1670,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
ret += tcrypt_test("rmd160");
|
||||
break;
|
||||
|
||||
case 41:
|
||||
ret += tcrypt_test("blake2s-256");
|
||||
break;
|
||||
|
||||
case 42:
|
||||
ret += tcrypt_test("blake2b-512");
|
||||
break;
|
||||
@ -1729,6 +1726,14 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
ret += tcrypt_test("ccm(sm4)");
|
||||
break;
|
||||
|
||||
case 57:
|
||||
ret += tcrypt_test("polyval");
|
||||
break;
|
||||
|
||||
case 58:
|
||||
ret += tcrypt_test("gcm(aria)");
|
||||
break;
|
||||
|
||||
case 100:
|
||||
ret += tcrypt_test("hmac(md5)");
|
||||
break;
|
||||
@ -1865,6 +1870,12 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
ret += tcrypt_test("cfb(sm4)");
|
||||
ret += tcrypt_test("ctr(sm4)");
|
||||
break;
|
||||
case 192:
|
||||
ret += tcrypt_test("ecb(aria)");
|
||||
ret += tcrypt_test("cbc(aria)");
|
||||
ret += tcrypt_test("cfb(aria)");
|
||||
ret += tcrypt_test("ctr(aria)");
|
||||
break;
|
||||
case 200:
|
||||
test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16_24_32);
|
||||
@ -2186,6 +2197,37 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
16, 16, aead_speed_template_19, num_mb);
|
||||
break;
|
||||
|
||||
case 226:
|
||||
test_cipher_speed("hctr2(aes)", ENCRYPT, sec, NULL,
|
||||
0, speed_template_32);
|
||||
break;
|
||||
|
||||
case 227:
|
||||
test_cipher_speed("ecb(aria)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16_24_32);
|
||||
test_cipher_speed("ecb(aria)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16_24_32);
|
||||
test_cipher_speed("cbc(aria)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16_24_32);
|
||||
test_cipher_speed("cbc(aria)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16_24_32);
|
||||
test_cipher_speed("cfb(aria)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16_24_32);
|
||||
test_cipher_speed("cfb(aria)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16_24_32);
|
||||
test_cipher_speed("ctr(aria)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16_24_32);
|
||||
test_cipher_speed("ctr(aria)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16_24_32);
|
||||
break;
|
||||
|
||||
case 228:
|
||||
test_aead_speed("gcm(aria)", ENCRYPT, sec,
|
||||
NULL, 0, 16, 8, speed_template_16_24_32);
|
||||
test_aead_speed("gcm(aria)", DECRYPT, sec,
|
||||
NULL, 0, 16, 8, speed_template_16_24_32);
|
||||
break;
|
||||
|
||||
case 300:
|
||||
if (alg) {
|
||||
test_hash_speed(alg, sec, generic_hash_speed_template);
|
||||
@ -2240,10 +2282,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
test_hash_speed("rmd160", sec, generic_hash_speed_template);
|
||||
if (mode > 300 && mode < 400) break;
|
||||
fallthrough;
|
||||
case 316:
|
||||
test_hash_speed("blake2s-256", sec, generic_hash_speed_template);
|
||||
if (mode > 300 && mode < 400) break;
|
||||
fallthrough;
|
||||
case 317:
|
||||
test_hash_speed("blake2b-512", sec, generic_hash_speed_template);
|
||||
if (mode > 300 && mode < 400) break;
|
||||
@ -2352,10 +2390,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
test_ahash_speed("rmd160", sec, generic_hash_speed_template);
|
||||
if (mode > 400 && mode < 500) break;
|
||||
fallthrough;
|
||||
case 416:
|
||||
test_ahash_speed("blake2s-256", sec, generic_hash_speed_template);
|
||||
if (mode > 400 && mode < 500) break;
|
||||
fallthrough;
|
||||
case 417:
|
||||
test_ahash_speed("blake2b-512", sec, generic_hash_speed_template);
|
||||
if (mode > 400 && mode < 500) break;
|
||||
|
@ -4375,30 +4375,6 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.hash = __VECS(blake2b_512_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "blake2s-128",
|
||||
.test = alg_test_hash,
|
||||
.suite = {
|
||||
.hash = __VECS(blakes2s_128_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "blake2s-160",
|
||||
.test = alg_test_hash,
|
||||
.suite = {
|
||||
.hash = __VECS(blakes2s_160_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "blake2s-224",
|
||||
.test = alg_test_hash,
|
||||
.suite = {
|
||||
.hash = __VECS(blakes2s_224_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "blake2s-256",
|
||||
.test = alg_test_hash,
|
||||
.suite = {
|
||||
.hash = __VECS(blakes2s_256_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "cbc(aes)",
|
||||
.test = alg_test_skcipher,
|
||||
@ -4412,6 +4388,12 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.cipher = __VECS(anubis_cbc_tv_template)
|
||||
},
|
||||
}, {
|
||||
.alg = "cbc(aria)",
|
||||
.test = alg_test_skcipher,
|
||||
.suite = {
|
||||
.cipher = __VECS(aria_cbc_tv_template)
|
||||
},
|
||||
}, {
|
||||
.alg = "cbc(blowfish)",
|
||||
.test = alg_test_skcipher,
|
||||
@ -4529,6 +4511,12 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.cipher = __VECS(aes_cfb_tv_template)
|
||||
},
|
||||
}, {
|
||||
.alg = "cfb(aria)",
|
||||
.test = alg_test_skcipher,
|
||||
.suite = {
|
||||
.cipher = __VECS(aria_cfb_tv_template)
|
||||
},
|
||||
}, {
|
||||
.alg = "cfb(sm4)",
|
||||
.test = alg_test_skcipher,
|
||||
@ -4598,6 +4586,12 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.cipher = __VECS(aes_ctr_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "ctr(aria)",
|
||||
.test = alg_test_skcipher,
|
||||
.suite = {
|
||||
.cipher = __VECS(aria_ctr_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "ctr(blowfish)",
|
||||
.test = alg_test_skcipher,
|
||||
@ -4858,6 +4852,12 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.cipher = __VECS(arc4_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "ecb(aria)",
|
||||
.test = alg_test_skcipher,
|
||||
.suite = {
|
||||
.cipher = __VECS(aria_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "ecb(blowfish)",
|
||||
.test = alg_test_skcipher,
|
||||
@ -5074,6 +5074,13 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.aead = __VECS(aes_gcm_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "gcm(aria)",
|
||||
.generic_driver = "gcm_base(ctr(aria-generic),ghash-generic)",
|
||||
.test = alg_test_aead,
|
||||
.suite = {
|
||||
.aead = __VECS(aria_gcm_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "gcm(sm4)",
|
||||
.generic_driver = "gcm_base(ctr(sm4-generic),ghash-generic)",
|
||||
@ -5088,6 +5095,14 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.hash = __VECS(ghash_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "hctr2(aes)",
|
||||
.generic_driver =
|
||||
"hctr2_base(xctr(aes-generic),polyval-generic)",
|
||||
.test = alg_test_skcipher,
|
||||
.suite = {
|
||||
.cipher = __VECS(aes_hctr2_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "hmac(md5)",
|
||||
.test = alg_test_hash,
|
||||
@ -5342,6 +5357,12 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.hash = __VECS(poly1305_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "polyval",
|
||||
.test = alg_test_hash,
|
||||
.suite = {
|
||||
.hash = __VECS(polyval_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "rfc3686(ctr(aes))",
|
||||
.test = alg_test_skcipher,
|
||||
@ -5548,6 +5569,12 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.cipher = __VECS(xchacha20_tv_template)
|
||||
},
|
||||
}, {
|
||||
.alg = "xctr(aes)",
|
||||
.test = alg_test_skcipher,
|
||||
.suite = {
|
||||
.cipher = __VECS(aes_xctr_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "xts(aes)",
|
||||
.generic_driver = "xts(ecb(aes-generic))",
|
||||
|
4830
crypto/testmgr.h
4830
crypto/testmgr.h
File diff suppressed because it is too large
Load Diff
@ -298,7 +298,7 @@ static const u32 mds[4][256] = {
|
||||
* multiplication is inefficient without hardware support. To multiply
|
||||
* faster, I make use of the fact x is a generator for the nonzero elements,
|
||||
* so that every element p of GF(2)[x]/w(x) is either 0 or equal to (x)^n for
|
||||
* some n in 0..254. Note that that caret is exponentiation in GF(2^8),
|
||||
* some n in 0..254. Note that caret is exponentiation in GF(2^8),
|
||||
* *not* polynomial notation. So if I want to compute pq where p and q are
|
||||
* in GF(2^8), I can just say:
|
||||
* 1. if p=0 or q=0 then pq=0
|
||||
|
191
crypto/xctr.c
Normal file
191
crypto/xctr.c
Normal file
@ -0,0 +1,191 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* XCTR: XOR Counter mode - Adapted from ctr.c
|
||||
*
|
||||
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
|
||||
* Copyright 2021 Google LLC
|
||||
*/
|
||||
|
||||
/*
|
||||
* XCTR mode is a blockcipher mode of operation used to implement HCTR2. XCTR is
|
||||
* closely related to the CTR mode of operation; the main difference is that CTR
|
||||
* generates the keystream using E(CTR + IV) whereas XCTR generates the
|
||||
* keystream using E(CTR ^ IV). This allows implementations to avoid dealing
|
||||
* with multi-limb integers (as is required in CTR mode). XCTR is also specified
|
||||
* using little-endian arithmetic which makes it slightly faster on LE machines.
|
||||
*
|
||||
* See the HCTR2 paper for more details:
|
||||
* Length-preserving encryption with HCTR2
|
||||
* (https://eprint.iacr.org/2021/1441.pdf)
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/cipher.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
/* For now this implementation is limited to 16-byte blocks for simplicity */
|
||||
#define XCTR_BLOCKSIZE 16
|
||||
|
||||
static void crypto_xctr_crypt_final(struct skcipher_walk *walk,
|
||||
struct crypto_cipher *tfm, u32 byte_ctr)
|
||||
{
|
||||
u8 keystream[XCTR_BLOCKSIZE];
|
||||
const u8 *src = walk->src.virt.addr;
|
||||
u8 *dst = walk->dst.virt.addr;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
__le32 ctr32 = cpu_to_le32(byte_ctr / XCTR_BLOCKSIZE + 1);
|
||||
|
||||
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
|
||||
crypto_cipher_encrypt_one(tfm, keystream, walk->iv);
|
||||
crypto_xor_cpy(dst, keystream, src, nbytes);
|
||||
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
|
||||
}
|
||||
|
||||
static int crypto_xctr_crypt_segment(struct skcipher_walk *walk,
|
||||
struct crypto_cipher *tfm, u32 byte_ctr)
|
||||
{
|
||||
void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
|
||||
crypto_cipher_alg(tfm)->cia_encrypt;
|
||||
const u8 *src = walk->src.virt.addr;
|
||||
u8 *dst = walk->dst.virt.addr;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
__le32 ctr32 = cpu_to_le32(byte_ctr / XCTR_BLOCKSIZE + 1);
|
||||
|
||||
do {
|
||||
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
|
||||
fn(crypto_cipher_tfm(tfm), dst, walk->iv);
|
||||
crypto_xor(dst, src, XCTR_BLOCKSIZE);
|
||||
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
|
||||
|
||||
le32_add_cpu(&ctr32, 1);
|
||||
|
||||
src += XCTR_BLOCKSIZE;
|
||||
dst += XCTR_BLOCKSIZE;
|
||||
} while ((nbytes -= XCTR_BLOCKSIZE) >= XCTR_BLOCKSIZE);
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int crypto_xctr_crypt_inplace(struct skcipher_walk *walk,
|
||||
struct crypto_cipher *tfm, u32 byte_ctr)
|
||||
{
|
||||
void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
|
||||
crypto_cipher_alg(tfm)->cia_encrypt;
|
||||
unsigned long alignmask = crypto_cipher_alignmask(tfm);
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u8 *data = walk->src.virt.addr;
|
||||
u8 tmp[XCTR_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
|
||||
u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1);
|
||||
__le32 ctr32 = cpu_to_le32(byte_ctr / XCTR_BLOCKSIZE + 1);
|
||||
|
||||
do {
|
||||
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
|
||||
fn(crypto_cipher_tfm(tfm), keystream, walk->iv);
|
||||
crypto_xor(data, keystream, XCTR_BLOCKSIZE);
|
||||
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
|
||||
|
||||
le32_add_cpu(&ctr32, 1);
|
||||
|
||||
data += XCTR_BLOCKSIZE;
|
||||
} while ((nbytes -= XCTR_BLOCKSIZE) >= XCTR_BLOCKSIZE);
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int crypto_xctr_crypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_cipher *cipher = skcipher_cipher_simple(tfm);
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
u32 byte_ctr = 0;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while (walk.nbytes >= XCTR_BLOCKSIZE) {
|
||||
if (walk.src.virt.addr == walk.dst.virt.addr)
|
||||
nbytes = crypto_xctr_crypt_inplace(&walk, cipher,
|
||||
byte_ctr);
|
||||
else
|
||||
nbytes = crypto_xctr_crypt_segment(&walk, cipher,
|
||||
byte_ctr);
|
||||
|
||||
byte_ctr += walk.nbytes - nbytes;
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
if (walk.nbytes) {
|
||||
crypto_xctr_crypt_final(&walk, cipher, byte_ctr);
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int crypto_xctr_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
{
|
||||
struct skcipher_instance *inst;
|
||||
struct crypto_alg *alg;
|
||||
int err;
|
||||
|
||||
inst = skcipher_alloc_instance_simple(tmpl, tb);
|
||||
if (IS_ERR(inst))
|
||||
return PTR_ERR(inst);
|
||||
|
||||
alg = skcipher_ialg_simple(inst);
|
||||
|
||||
/* Block size must be 16 bytes. */
|
||||
err = -EINVAL;
|
||||
if (alg->cra_blocksize != XCTR_BLOCKSIZE)
|
||||
goto out_free_inst;
|
||||
|
||||
/* XCTR mode is a stream cipher. */
|
||||
inst->alg.base.cra_blocksize = 1;
|
||||
|
||||
/*
|
||||
* To simplify the implementation, configure the skcipher walk to only
|
||||
* give a partial block at the very end, never earlier.
|
||||
*/
|
||||
inst->alg.chunksize = alg->cra_blocksize;
|
||||
|
||||
inst->alg.encrypt = crypto_xctr_crypt;
|
||||
inst->alg.decrypt = crypto_xctr_crypt;
|
||||
|
||||
err = skcipher_register_instance(tmpl, inst);
|
||||
if (err) {
|
||||
out_free_inst:
|
||||
inst->free(inst);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct crypto_template crypto_xctr_tmpl = {
|
||||
.name = "xctr",
|
||||
.create = crypto_xctr_create,
|
||||
.module = THIS_MODULE,
|
||||
};
|
||||
|
||||
static int __init crypto_xctr_module_init(void)
|
||||
{
|
||||
return crypto_register_template(&crypto_xctr_tmpl);
|
||||
}
|
||||
|
||||
static void __exit crypto_xctr_module_exit(void)
|
||||
{
|
||||
crypto_unregister_template(&crypto_xctr_tmpl);
|
||||
}
|
||||
|
||||
subsys_initcall(crypto_xctr_module_init);
|
||||
module_exit(crypto_xctr_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("XCTR block cipher mode of operation");
|
||||
MODULE_ALIAS_CRYPTO("xctr");
|
||||
MODULE_IMPORT_NS(CRYPTO_INTERNAL);
|
@ -145,7 +145,7 @@ static int via_rng_init(struct hwrng *rng)
|
||||
}
|
||||
|
||||
/* Control the RNG via MSR. Tread lightly and pay very close
|
||||
* close attention to values written, as the reserved fields
|
||||
* attention to values written, as the reserved fields
|
||||
* are documented to be "undefined and unpredictable"; but it
|
||||
* does not say to write them as zero, so I make a guess that
|
||||
* we restore the values we find in the register.
|
||||
|
@ -170,6 +170,7 @@ dma_iv_error:
|
||||
while (i >= 0) {
|
||||
dma_unmap_single(ss->dev, rctx->p_iv[i], ivsize, DMA_TO_DEVICE);
|
||||
memzero_explicit(sf->iv[i], ivsize);
|
||||
i--;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
@ -528,25 +528,33 @@ static int allocate_flows(struct sun8i_ss_dev *ss)
|
||||
|
||||
ss->flows[i].biv = devm_kmalloc(ss->dev, AES_BLOCK_SIZE,
|
||||
GFP_KERNEL | GFP_DMA);
|
||||
if (!ss->flows[i].biv)
|
||||
if (!ss->flows[i].biv) {
|
||||
err = -ENOMEM;
|
||||
goto error_engine;
|
||||
}
|
||||
|
||||
for (j = 0; j < MAX_SG; j++) {
|
||||
ss->flows[i].iv[j] = devm_kmalloc(ss->dev, AES_BLOCK_SIZE,
|
||||
GFP_KERNEL | GFP_DMA);
|
||||
if (!ss->flows[i].iv[j])
|
||||
if (!ss->flows[i].iv[j]) {
|
||||
err = -ENOMEM;
|
||||
goto error_engine;
|
||||
}
|
||||
}
|
||||
|
||||
/* the padding could be up to two block. */
|
||||
ss->flows[i].pad = devm_kmalloc(ss->dev, MAX_PAD_SIZE,
|
||||
GFP_KERNEL | GFP_DMA);
|
||||
if (!ss->flows[i].pad)
|
||||
if (!ss->flows[i].pad) {
|
||||
err = -ENOMEM;
|
||||
goto error_engine;
|
||||
}
|
||||
ss->flows[i].result = devm_kmalloc(ss->dev, SHA256_DIGEST_SIZE,
|
||||
GFP_KERNEL | GFP_DMA);
|
||||
if (!ss->flows[i].result)
|
||||
if (!ss->flows[i].result) {
|
||||
err = -ENOMEM;
|
||||
goto error_engine;
|
||||
}
|
||||
|
||||
ss->flows[i].engine = crypto_engine_alloc_init(ss->dev, true);
|
||||
if (!ss->flows[i].engine) {
|
||||
|
@ -30,8 +30,8 @@ static int sun8i_ss_hashkey(struct sun8i_ss_hash_tfm_ctx *tfmctx, const u8 *key,
|
||||
int ret = 0;
|
||||
|
||||
xtfm = crypto_alloc_shash("sha1", 0, CRYPTO_ALG_NEED_FALLBACK);
|
||||
if (!xtfm)
|
||||
return -ENOMEM;
|
||||
if (IS_ERR(xtfm))
|
||||
return PTR_ERR(xtfm);
|
||||
|
||||
len = sizeof(*sdesc) + crypto_shash_descsize(xtfm);
|
||||
sdesc = kmalloc(len, GFP_KERNEL);
|
||||
@ -586,7 +586,8 @@ retry:
|
||||
rctx->t_dst[k + 1].len = rctx->t_dst[k].len;
|
||||
}
|
||||
addr_xpad = dma_map_single(ss->dev, tfmctx->ipad, bs, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(ss->dev, addr_xpad)) {
|
||||
err = dma_mapping_error(ss->dev, addr_xpad);
|
||||
if (err) {
|
||||
dev_err(ss->dev, "Fail to create DMA mapping of ipad\n");
|
||||
goto err_dma_xpad;
|
||||
}
|
||||
@ -612,7 +613,8 @@ retry:
|
||||
goto err_dma_result;
|
||||
}
|
||||
addr_xpad = dma_map_single(ss->dev, tfmctx->opad, bs, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(ss->dev, addr_xpad)) {
|
||||
err = dma_mapping_error(ss->dev, addr_xpad);
|
||||
if (err) {
|
||||
dev_err(ss->dev, "Fail to create DMA mapping of opad\n");
|
||||
goto err_dma_xpad;
|
||||
}
|
||||
|
@ -1378,6 +1378,7 @@ static int crypto4xx_probe(struct platform_device *ofdev)
|
||||
struct resource res;
|
||||
struct device *dev = &ofdev->dev;
|
||||
struct crypto4xx_core_device *core_dev;
|
||||
struct device_node *np;
|
||||
u32 pvr;
|
||||
bool is_revb = true;
|
||||
|
||||
@ -1385,29 +1386,36 @@ static int crypto4xx_probe(struct platform_device *ofdev)
|
||||
if (rc)
|
||||
return -ENODEV;
|
||||
|
||||
if (of_find_compatible_node(NULL, NULL, "amcc,ppc460ex-crypto")) {
|
||||
np = of_find_compatible_node(NULL, NULL, "amcc,ppc460ex-crypto");
|
||||
if (np) {
|
||||
mtdcri(SDR0, PPC460EX_SDR0_SRST,
|
||||
mfdcri(SDR0, PPC460EX_SDR0_SRST) | PPC460EX_CE_RESET);
|
||||
mtdcri(SDR0, PPC460EX_SDR0_SRST,
|
||||
mfdcri(SDR0, PPC460EX_SDR0_SRST) & ~PPC460EX_CE_RESET);
|
||||
} else if (of_find_compatible_node(NULL, NULL,
|
||||
"amcc,ppc405ex-crypto")) {
|
||||
mtdcri(SDR0, PPC405EX_SDR0_SRST,
|
||||
mfdcri(SDR0, PPC405EX_SDR0_SRST) | PPC405EX_CE_RESET);
|
||||
mtdcri(SDR0, PPC405EX_SDR0_SRST,
|
||||
mfdcri(SDR0, PPC405EX_SDR0_SRST) & ~PPC405EX_CE_RESET);
|
||||
is_revb = false;
|
||||
} else if (of_find_compatible_node(NULL, NULL,
|
||||
"amcc,ppc460sx-crypto")) {
|
||||
mtdcri(SDR0, PPC460SX_SDR0_SRST,
|
||||
mfdcri(SDR0, PPC460SX_SDR0_SRST) | PPC460SX_CE_RESET);
|
||||
mtdcri(SDR0, PPC460SX_SDR0_SRST,
|
||||
mfdcri(SDR0, PPC460SX_SDR0_SRST) & ~PPC460SX_CE_RESET);
|
||||
} else {
|
||||
printk(KERN_ERR "Crypto Function Not supported!\n");
|
||||
return -EINVAL;
|
||||
np = of_find_compatible_node(NULL, NULL, "amcc,ppc405ex-crypto");
|
||||
if (np) {
|
||||
mtdcri(SDR0, PPC405EX_SDR0_SRST,
|
||||
mfdcri(SDR0, PPC405EX_SDR0_SRST) | PPC405EX_CE_RESET);
|
||||
mtdcri(SDR0, PPC405EX_SDR0_SRST,
|
||||
mfdcri(SDR0, PPC405EX_SDR0_SRST) & ~PPC405EX_CE_RESET);
|
||||
is_revb = false;
|
||||
} else {
|
||||
np = of_find_compatible_node(NULL, NULL, "amcc,ppc460sx-crypto");
|
||||
if (np) {
|
||||
mtdcri(SDR0, PPC460SX_SDR0_SRST,
|
||||
mfdcri(SDR0, PPC460SX_SDR0_SRST) | PPC460SX_CE_RESET);
|
||||
mtdcri(SDR0, PPC460SX_SDR0_SRST,
|
||||
mfdcri(SDR0, PPC460SX_SDR0_SRST) & ~PPC460SX_CE_RESET);
|
||||
} else {
|
||||
printk(KERN_ERR "Crypto Function Not supported!\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
of_node_put(np);
|
||||
|
||||
core_dev = kzalloc(sizeof(struct crypto4xx_core_device), GFP_KERNEL);
|
||||
if (!core_dev)
|
||||
return -ENOMEM;
|
||||
|
@ -2669,8 +2669,7 @@ static int atmel_aes_remove(struct platform_device *pdev)
|
||||
struct atmel_aes_dev *aes_dd;
|
||||
|
||||
aes_dd = platform_get_drvdata(pdev);
|
||||
if (!aes_dd)
|
||||
return -ENODEV;
|
||||
|
||||
spin_lock(&atmel_aes.lock);
|
||||
list_del(&aes_dd->list);
|
||||
spin_unlock(&atmel_aes.lock);
|
||||
|
@ -349,8 +349,16 @@ static int atmel_ecc_remove(struct i2c_client *client)
|
||||
|
||||
/* Return EBUSY if i2c client already allocated. */
|
||||
if (atomic_read(&i2c_priv->tfm_count)) {
|
||||
dev_err(&client->dev, "Device is busy\n");
|
||||
return -EBUSY;
|
||||
/*
|
||||
* After we return here, the memory backing the device is freed.
|
||||
* That happens no matter what the return value of this function
|
||||
* is because in the Linux device model there is no error
|
||||
* handling for unbinding a driver.
|
||||
* If there is still some action pending, it probably involves
|
||||
* accessing the freed memory.
|
||||
*/
|
||||
dev_emerg(&client->dev, "Device is busy, expect memory corruption.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
crypto_unregister_kpp(&atmel_ecdh_nist_p256);
|
||||
|
@ -2666,11 +2666,8 @@ err_tasklet_kill:
|
||||
|
||||
static int atmel_sha_remove(struct platform_device *pdev)
|
||||
{
|
||||
struct atmel_sha_dev *sha_dd;
|
||||
struct atmel_sha_dev *sha_dd = platform_get_drvdata(pdev);
|
||||
|
||||
sha_dd = platform_get_drvdata(pdev);
|
||||
if (!sha_dd)
|
||||
return -ENODEV;
|
||||
spin_lock(&atmel_sha.lock);
|
||||
list_del(&sha_dd->list);
|
||||
spin_unlock(&atmel_sha.lock);
|
||||
|
@ -1263,11 +1263,8 @@ err_tasklet_kill:
|
||||
|
||||
static int atmel_tdes_remove(struct platform_device *pdev)
|
||||
{
|
||||
struct atmel_tdes_dev *tdes_dd;
|
||||
struct atmel_tdes_dev *tdes_dd = platform_get_drvdata(pdev);
|
||||
|
||||
tdes_dd = platform_get_drvdata(pdev);
|
||||
if (!tdes_dd)
|
||||
return -ENODEV;
|
||||
spin_lock(&atmel_tdes.lock);
|
||||
list_del(&tdes_dd->list);
|
||||
spin_unlock(&atmel_tdes.lock);
|
||||
|
@ -29,7 +29,7 @@
|
||||
SHA512_DIGEST_SIZE * 2)
|
||||
|
||||
/*
|
||||
* This is a a cache of buffers, from which the users of CAAM QI driver
|
||||
* This is a cache of buffers, from which the users of CAAM QI driver
|
||||
* can allocate short buffers. It's speedier than doing kmalloc on the hotpath.
|
||||
* NOTE: A more elegant solution would be to have some headroom in the frames
|
||||
* being processed. This can be added by the dpaa2-eth driver. This would
|
||||
@ -5083,8 +5083,9 @@ static int __cold dpaa2_dpseci_setup(struct fsl_mc_device *ls_dev)
|
||||
|
||||
ppriv->net_dev.dev = *dev;
|
||||
INIT_LIST_HEAD(&ppriv->net_dev.napi_list);
|
||||
netif_napi_add(&ppriv->net_dev, &ppriv->napi, dpaa2_dpseci_poll,
|
||||
DPAA2_CAAM_NAPI_WEIGHT);
|
||||
netif_napi_add_tx_weight(&ppriv->net_dev, &ppriv->napi,
|
||||
dpaa2_dpseci_poll,
|
||||
DPAA2_CAAM_NAPI_WEIGHT);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -22,7 +22,7 @@
|
||||
* @ctx_len: size of Context Register
|
||||
* @import_ctx: true if previous Context Register needs to be restored
|
||||
* must be true for ahash update and final
|
||||
* must be false for for ahash first and digest
|
||||
* must be false for ahash first and digest
|
||||
* @era: SEC Era
|
||||
*/
|
||||
void cnstr_shdsc_ahash(u32 * const desc, struct alginfo *adata, u32 state,
|
||||
|
@ -75,7 +75,7 @@ bool caam_congested __read_mostly;
|
||||
EXPORT_SYMBOL(caam_congested);
|
||||
|
||||
/*
|
||||
* This is a a cache of buffers, from which the users of CAAM QI driver
|
||||
* This is a cache of buffers, from which the users of CAAM QI driver
|
||||
* can allocate short (CAAM_QI_MEMCACHE_SIZE) buffers. It's faster than
|
||||
* doing malloc on the hotpath.
|
||||
* NOTE: A more elegant solution would be to have some headroom in the frames
|
||||
@ -749,8 +749,8 @@ int caam_qi_init(struct platform_device *caam_pdev)
|
||||
net_dev->dev = *qidev;
|
||||
INIT_LIST_HEAD(&net_dev->napi_list);
|
||||
|
||||
netif_napi_add(net_dev, irqtask, caam_qi_poll,
|
||||
CAAM_NAPI_WEIGHT);
|
||||
netif_napi_add_tx_weight(net_dev, irqtask, caam_qi_poll,
|
||||
CAAM_NAPI_WEIGHT);
|
||||
|
||||
napi_enable(irqtask);
|
||||
}
|
||||
|
@ -265,7 +265,7 @@ union cptx_pf_exe_bist_status {
|
||||
* big-endian format in memory.
|
||||
* iqb_ldwb:1 [7:7](R/W) Instruction load don't write back.
|
||||
* 0 = The hardware issues NCB transient load (LDT) towards the cache,
|
||||
* which if the line hits and is is dirty will cause the line to be
|
||||
* which if the line hits and it is dirty will cause the line to be
|
||||
* written back before being replaced.
|
||||
* 1 = The hardware issues NCB LDWB read-and-invalidate command towards
|
||||
* the cache when fetching the last word of instructions; as a result the
|
||||
|
@ -366,7 +366,7 @@ struct ccp_device {
|
||||
|
||||
/* Master lists that all cmds are queued on. Because there can be
|
||||
* more than one CCP command queue that can process a cmd a separate
|
||||
* backlog list is neeeded so that the backlog completion call
|
||||
* backlog list is needed so that the backlog completion call
|
||||
* completes before the cmd is available for execution.
|
||||
*/
|
||||
spinlock_t cmd_lock ____cacheline_aligned;
|
||||
|
@ -503,7 +503,7 @@ static int __sev_platform_shutdown_locked(int *error)
|
||||
struct sev_device *sev = psp_master->sev_data;
|
||||
int ret;
|
||||
|
||||
if (sev->state == SEV_STATE_UNINIT)
|
||||
if (!sev || sev->state == SEV_STATE_UNINIT)
|
||||
return 0;
|
||||
|
||||
ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
|
||||
@ -577,6 +577,8 @@ static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
|
||||
struct sev_user_data_status data;
|
||||
int ret;
|
||||
|
||||
memset(&data, 0, sizeof(data));
|
||||
|
||||
ret = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS, &data, &argp->error);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -630,7 +632,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable)
|
||||
if (input.length > SEV_FW_BLOB_MAX_SIZE)
|
||||
return -EFAULT;
|
||||
|
||||
blob = kmalloc(input.length, GFP_KERNEL);
|
||||
blob = kzalloc(input.length, GFP_KERNEL);
|
||||
if (!blob)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -854,7 +856,7 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp)
|
||||
input_address = (void __user *)input.address;
|
||||
|
||||
if (input.address && input.length) {
|
||||
id_blob = kmalloc(input.length, GFP_KERNEL);
|
||||
id_blob = kzalloc(input.length, GFP_KERNEL);
|
||||
if (!id_blob)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -973,14 +975,14 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable)
|
||||
if (input.cert_chain_len > SEV_FW_BLOB_MAX_SIZE)
|
||||
return -EFAULT;
|
||||
|
||||
pdh_blob = kmalloc(input.pdh_cert_len, GFP_KERNEL);
|
||||
pdh_blob = kzalloc(input.pdh_cert_len, GFP_KERNEL);
|
||||
if (!pdh_blob)
|
||||
return -ENOMEM;
|
||||
|
||||
data.pdh_cert_address = __psp_pa(pdh_blob);
|
||||
data.pdh_cert_len = input.pdh_cert_len;
|
||||
|
||||
cert_blob = kmalloc(input.cert_chain_len, GFP_KERNEL);
|
||||
cert_blob = kzalloc(input.cert_chain_len, GFP_KERNEL);
|
||||
if (!cert_blob) {
|
||||
ret = -ENOMEM;
|
||||
goto e_free_pdh;
|
||||
|
@ -427,6 +427,12 @@ static const struct sp_dev_vdata dev_vdata[] = {
|
||||
.bar = 2,
|
||||
#ifdef CONFIG_CRYPTO_DEV_SP_PSP
|
||||
.psp_vdata = &pspv2,
|
||||
#endif
|
||||
},
|
||||
{ /* 6 */
|
||||
.bar = 2,
|
||||
#ifdef CONFIG_CRYPTO_DEV_SP_PSP
|
||||
.psp_vdata = &pspv3,
|
||||
#endif
|
||||
},
|
||||
};
|
||||
@ -438,6 +444,7 @@ static const struct pci_device_id sp_pci_table[] = {
|
||||
{ PCI_VDEVICE(AMD, 0x15DF), (kernel_ulong_t)&dev_vdata[4] },
|
||||
{ PCI_VDEVICE(AMD, 0x1649), (kernel_ulong_t)&dev_vdata[4] },
|
||||
{ PCI_VDEVICE(AMD, 0x14CA), (kernel_ulong_t)&dev_vdata[5] },
|
||||
{ PCI_VDEVICE(AMD, 0x15C7), (kernel_ulong_t)&dev_vdata[6] },
|
||||
/* Last entry must be zero */
|
||||
{ 0, }
|
||||
};
|
||||
|
@ -372,17 +372,10 @@ static int init_cc_resources(struct platform_device *plat_dev)
|
||||
dev->dma_mask = &dev->coherent_dma_mask;
|
||||
|
||||
dma_mask = DMA_BIT_MASK(DMA_BIT_MASK_LEN);
|
||||
while (dma_mask > 0x7fffffffUL) {
|
||||
if (dma_supported(dev, dma_mask)) {
|
||||
rc = dma_set_coherent_mask(dev, dma_mask);
|
||||
if (!rc)
|
||||
break;
|
||||
}
|
||||
dma_mask >>= 1;
|
||||
}
|
||||
|
||||
rc = dma_set_coherent_mask(dev, dma_mask);
|
||||
if (rc) {
|
||||
dev_err(dev, "Failed in dma_set_mask, mask=%llx\n", dma_mask);
|
||||
dev_err(dev, "Failed in dma_set_coherent_mask, mask=%llx\n",
|
||||
dma_mask);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -41,6 +41,7 @@ static int cc_pm_resume(struct device *dev)
|
||||
/* wait for Cryptocell reset completion */
|
||||
if (!cc_wait_for_reset_completion(drvdata)) {
|
||||
dev_err(dev, "Cryptocell reset not completed");
|
||||
clk_disable_unprepare(drvdata->clk);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
@ -48,6 +49,7 @@ static int cc_pm_resume(struct device *dev)
|
||||
rc = init_cc_regs(drvdata);
|
||||
if (rc) {
|
||||
dev_err(dev, "init_cc_regs (%x)\n", rc);
|
||||
clk_disable_unprepare(drvdata->clk);
|
||||
return rc;
|
||||
}
|
||||
/* check if tee fips error occurred during power down */
|
||||
|
@ -252,7 +252,7 @@ static int hpre_prepare_dma_buf(struct hpre_asym_request *hpre_req,
|
||||
if (unlikely(shift < 0))
|
||||
return -EINVAL;
|
||||
|
||||
ptr = dma_alloc_coherent(dev, ctx->key_sz, tmp, GFP_KERNEL);
|
||||
ptr = dma_alloc_coherent(dev, ctx->key_sz, tmp, GFP_ATOMIC);
|
||||
if (unlikely(!ptr))
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -877,13 +877,6 @@ static void qm_pm_put_sync(struct hisi_qm *qm)
|
||||
pm_runtime_put_autosuspend(dev);
|
||||
}
|
||||
|
||||
static struct hisi_qp *qm_to_hisi_qp(struct hisi_qm *qm, struct qm_eqe *eqe)
|
||||
{
|
||||
u16 cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
|
||||
|
||||
return &qm->qp_array[cqn];
|
||||
}
|
||||
|
||||
static void qm_cq_head_update(struct hisi_qp *qp)
|
||||
{
|
||||
if (qp->qp_status.cq_head == QM_Q_DEPTH - 1) {
|
||||
@ -894,47 +887,37 @@ static void qm_cq_head_update(struct hisi_qp *qp)
|
||||
}
|
||||
}
|
||||
|
||||
static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
|
||||
static void qm_poll_req_cb(struct hisi_qp *qp)
|
||||
{
|
||||
if (unlikely(atomic_read(&qp->qp_status.flags) == QP_STOP))
|
||||
return;
|
||||
struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
|
||||
struct hisi_qm *qm = qp->qm;
|
||||
|
||||
if (qp->event_cb) {
|
||||
qp->event_cb(qp);
|
||||
return;
|
||||
}
|
||||
|
||||
if (qp->req_cb) {
|
||||
struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
|
||||
|
||||
while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
|
||||
dma_rmb();
|
||||
qp->req_cb(qp, qp->sqe + qm->sqe_size *
|
||||
le16_to_cpu(cqe->sq_head));
|
||||
qm_cq_head_update(qp);
|
||||
cqe = qp->cqe + qp->qp_status.cq_head;
|
||||
qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ,
|
||||
qp->qp_status.cq_head, 0);
|
||||
atomic_dec(&qp->qp_status.used);
|
||||
}
|
||||
|
||||
/* set c_flag */
|
||||
while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
|
||||
dma_rmb();
|
||||
qp->req_cb(qp, qp->sqe + qm->sqe_size *
|
||||
le16_to_cpu(cqe->sq_head));
|
||||
qm_cq_head_update(qp);
|
||||
cqe = qp->cqe + qp->qp_status.cq_head;
|
||||
qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ,
|
||||
qp->qp_status.cq_head, 1);
|
||||
qp->qp_status.cq_head, 0);
|
||||
atomic_dec(&qp->qp_status.used);
|
||||
}
|
||||
|
||||
/* set c_flag */
|
||||
qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ, qp->qp_status.cq_head, 1);
|
||||
}
|
||||
|
||||
static void qm_work_process(struct work_struct *work)
|
||||
static int qm_get_complete_eqe_num(struct hisi_qm_poll_data *poll_data)
|
||||
{
|
||||
struct hisi_qm *qm = container_of(work, struct hisi_qm, work);
|
||||
struct hisi_qm *qm = poll_data->qm;
|
||||
struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
|
||||
struct hisi_qp *qp;
|
||||
int eqe_num = 0;
|
||||
u16 cqn;
|
||||
|
||||
while (QM_EQE_PHASE(eqe) == qm->status.eqc_phase) {
|
||||
cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
|
||||
poll_data->qp_finish_id[eqe_num] = cqn;
|
||||
eqe_num++;
|
||||
qp = qm_to_hisi_qp(qm, eqe);
|
||||
qm_poll_qp(qp, qm);
|
||||
|
||||
if (qm->status.eq_head == QM_EQ_DEPTH - 1) {
|
||||
qm->status.eqc_phase = !qm->status.eqc_phase;
|
||||
@ -945,37 +928,70 @@ static void qm_work_process(struct work_struct *work)
|
||||
qm->status.eq_head++;
|
||||
}
|
||||
|
||||
if (eqe_num == QM_EQ_DEPTH / 2 - 1) {
|
||||
eqe_num = 0;
|
||||
qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
|
||||
}
|
||||
if (eqe_num == (QM_EQ_DEPTH >> 1) - 1)
|
||||
break;
|
||||
}
|
||||
|
||||
qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
|
||||
|
||||
return eqe_num;
|
||||
}
|
||||
|
||||
static irqreturn_t do_qm_irq(int irq, void *data)
|
||||
static void qm_work_process(struct work_struct *work)
|
||||
{
|
||||
struct hisi_qm *qm = (struct hisi_qm *)data;
|
||||
struct hisi_qm_poll_data *poll_data =
|
||||
container_of(work, struct hisi_qm_poll_data, work);
|
||||
struct hisi_qm *qm = poll_data->qm;
|
||||
struct hisi_qp *qp;
|
||||
int eqe_num, i;
|
||||
|
||||
/* the workqueue created by device driver of QM */
|
||||
if (qm->wq)
|
||||
queue_work(qm->wq, &qm->work);
|
||||
else
|
||||
schedule_work(&qm->work);
|
||||
/* Get qp id of completed tasks and re-enable the interrupt. */
|
||||
eqe_num = qm_get_complete_eqe_num(poll_data);
|
||||
for (i = eqe_num - 1; i >= 0; i--) {
|
||||
qp = &qm->qp_array[poll_data->qp_finish_id[i]];
|
||||
if (unlikely(atomic_read(&qp->qp_status.flags) == QP_STOP))
|
||||
continue;
|
||||
|
||||
return IRQ_HANDLED;
|
||||
if (qp->event_cb) {
|
||||
qp->event_cb(qp);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (likely(qp->req_cb))
|
||||
qm_poll_req_cb(qp);
|
||||
}
|
||||
}
|
||||
|
||||
static bool do_qm_irq(struct hisi_qm *qm)
|
||||
{
|
||||
struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
|
||||
struct hisi_qm_poll_data *poll_data;
|
||||
u16 cqn;
|
||||
|
||||
if (!readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
|
||||
return false;
|
||||
|
||||
if (QM_EQE_PHASE(eqe) == qm->status.eqc_phase) {
|
||||
cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
|
||||
poll_data = &qm->poll_data[cqn];
|
||||
queue_work(qm->wq, &poll_data->work);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static irqreturn_t qm_irq(int irq, void *data)
|
||||
{
|
||||
struct hisi_qm *qm = data;
|
||||
bool ret;
|
||||
|
||||
if (readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
|
||||
return do_qm_irq(irq, data);
|
||||
ret = do_qm_irq(qm);
|
||||
if (ret)
|
||||
return IRQ_HANDLED;
|
||||
|
||||
atomic64_inc(&qm->debug.dfx.err_irq_cnt);
|
||||
dev_err(&qm->pdev->dev, "invalid int source\n");
|
||||
qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
|
||||
|
||||
return IRQ_NONE;
|
||||
@ -3134,11 +3150,8 @@ static int qm_stop_qp_nolock(struct hisi_qp *qp)
|
||||
if (ret)
|
||||
dev_err(dev, "Failed to drain out data for stopping!\n");
|
||||
|
||||
if (qp->qm->wq)
|
||||
flush_workqueue(qp->qm->wq);
|
||||
else
|
||||
flush_work(&qp->qm->work);
|
||||
|
||||
flush_workqueue(qp->qm->wq);
|
||||
if (unlikely(qp->is_resetting && atomic_read(&qp->qp_status.used)))
|
||||
qp_stop_fail_cb(qp);
|
||||
|
||||
@ -3557,8 +3570,10 @@ static void hisi_qp_memory_uninit(struct hisi_qm *qm, int num)
|
||||
for (i = num - 1; i >= 0; i--) {
|
||||
qdma = &qm->qp_array[i].qdma;
|
||||
dma_free_coherent(dev, qdma->size, qdma->va, qdma->dma);
|
||||
kfree(qm->poll_data[i].qp_finish_id);
|
||||
}
|
||||
|
||||
kfree(qm->poll_data);
|
||||
kfree(qm->qp_array);
|
||||
}
|
||||
|
||||
@ -3567,12 +3582,18 @@ static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id)
|
||||
struct device *dev = &qm->pdev->dev;
|
||||
size_t off = qm->sqe_size * QM_Q_DEPTH;
|
||||
struct hisi_qp *qp;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
qm->poll_data[id].qp_finish_id = kcalloc(qm->qp_num, sizeof(u16),
|
||||
GFP_KERNEL);
|
||||
if (!qm->poll_data[id].qp_finish_id)
|
||||
return -ENOMEM;
|
||||
|
||||
qp = &qm->qp_array[id];
|
||||
qp->qdma.va = dma_alloc_coherent(dev, dma_size, &qp->qdma.dma,
|
||||
GFP_KERNEL);
|
||||
if (!qp->qdma.va)
|
||||
return -ENOMEM;
|
||||
goto err_free_qp_finish_id;
|
||||
|
||||
qp->sqe = qp->qdma.va;
|
||||
qp->sqe_dma = qp->qdma.dma;
|
||||
@ -3583,6 +3604,10 @@ static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id)
|
||||
qp->qp_id = id;
|
||||
|
||||
return 0;
|
||||
|
||||
err_free_qp_finish_id:
|
||||
kfree(qm->poll_data[id].qp_finish_id);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void hisi_qm_pre_init(struct hisi_qm *qm)
|
||||
@ -3672,6 +3697,26 @@ static void qm_last_regs_uninit(struct hisi_qm *qm)
|
||||
debug->qm_last_words = NULL;
|
||||
}
|
||||
|
||||
static void hisi_qm_unint_work(struct hisi_qm *qm)
|
||||
{
|
||||
destroy_workqueue(qm->wq);
|
||||
}
|
||||
|
||||
static void hisi_qm_memory_uninit(struct hisi_qm *qm)
|
||||
{
|
||||
struct device *dev = &qm->pdev->dev;
|
||||
|
||||
hisi_qp_memory_uninit(qm, qm->qp_num);
|
||||
if (qm->qdma.va) {
|
||||
hisi_qm_cache_wb(qm);
|
||||
dma_free_coherent(dev, qm->qdma.size,
|
||||
qm->qdma.va, qm->qdma.dma);
|
||||
}
|
||||
|
||||
idr_destroy(&qm->qp_idr);
|
||||
kfree(qm->factor);
|
||||
}
|
||||
|
||||
/**
|
||||
* hisi_qm_uninit() - Uninitialize qm.
|
||||
* @qm: The qm needed uninit.
|
||||
@ -3680,13 +3725,10 @@ static void qm_last_regs_uninit(struct hisi_qm *qm)
|
||||
*/
|
||||
void hisi_qm_uninit(struct hisi_qm *qm)
|
||||
{
|
||||
struct pci_dev *pdev = qm->pdev;
|
||||
struct device *dev = &pdev->dev;
|
||||
|
||||
qm_last_regs_uninit(qm);
|
||||
|
||||
qm_cmd_uninit(qm);
|
||||
kfree(qm->factor);
|
||||
hisi_qm_unint_work(qm);
|
||||
down_write(&qm->qps_lock);
|
||||
|
||||
if (!qm_avail_state(qm, QM_CLOSE)) {
|
||||
@ -3694,14 +3736,7 @@ void hisi_qm_uninit(struct hisi_qm *qm)
|
||||
return;
|
||||
}
|
||||
|
||||
hisi_qp_memory_uninit(qm, qm->qp_num);
|
||||
idr_destroy(&qm->qp_idr);
|
||||
|
||||
if (qm->qdma.va) {
|
||||
hisi_qm_cache_wb(qm);
|
||||
dma_free_coherent(dev, qm->qdma.size,
|
||||
qm->qdma.va, qm->qdma.dma);
|
||||
}
|
||||
hisi_qm_memory_uninit(qm);
|
||||
hisi_qm_set_state(qm, QM_NOT_READY);
|
||||
up_write(&qm->qps_lock);
|
||||
|
||||
@ -6018,14 +6053,28 @@ err_disable_pcidev:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void hisi_qm_init_work(struct hisi_qm *qm)
|
||||
static int hisi_qm_init_work(struct hisi_qm *qm)
|
||||
{
|
||||
INIT_WORK(&qm->work, qm_work_process);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < qm->qp_num; i++)
|
||||
INIT_WORK(&qm->poll_data[i].work, qm_work_process);
|
||||
|
||||
if (qm->fun_type == QM_HW_PF)
|
||||
INIT_WORK(&qm->rst_work, hisi_qm_controller_reset);
|
||||
|
||||
if (qm->ver > QM_HW_V2)
|
||||
INIT_WORK(&qm->cmd_process, qm_cmd_process);
|
||||
|
||||
qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_MEM_RECLAIM |
|
||||
WQ_UNBOUND, num_online_cpus(),
|
||||
pci_name(qm->pdev));
|
||||
if (!qm->wq) {
|
||||
pci_err(qm->pdev, "failed to alloc workqueue!\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hisi_qp_alloc_memory(struct hisi_qm *qm)
|
||||
@ -6038,11 +6087,18 @@ static int hisi_qp_alloc_memory(struct hisi_qm *qm)
|
||||
if (!qm->qp_array)
|
||||
return -ENOMEM;
|
||||
|
||||
qm->poll_data = kcalloc(qm->qp_num, sizeof(struct hisi_qm_poll_data), GFP_KERNEL);
|
||||
if (!qm->poll_data) {
|
||||
kfree(qm->qp_array);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* one more page for device or qp statuses */
|
||||
qp_dma_size = qm->sqe_size * QM_Q_DEPTH +
|
||||
sizeof(struct qm_cqe) * QM_Q_DEPTH;
|
||||
qp_dma_size = PAGE_ALIGN(qp_dma_size) + PAGE_SIZE;
|
||||
for (i = 0; i < qm->qp_num; i++) {
|
||||
qm->poll_data[i].qm = qm;
|
||||
ret = hisi_qp_memory_init(qm, qp_dma_size, i);
|
||||
if (ret)
|
||||
goto err_init_qp_mem;
|
||||
@ -6176,7 +6232,10 @@ int hisi_qm_init(struct hisi_qm *qm)
|
||||
if (ret)
|
||||
goto err_alloc_uacce;
|
||||
|
||||
hisi_qm_init_work(qm);
|
||||
ret = hisi_qm_init_work(qm);
|
||||
if (ret)
|
||||
goto err_free_qm_memory;
|
||||
|
||||
qm_cmd_init(qm);
|
||||
atomic_set(&qm->status.flags, QM_INIT);
|
||||
|
||||
@ -6184,6 +6243,8 @@ int hisi_qm_init(struct hisi_qm *qm)
|
||||
|
||||
return 0;
|
||||
|
||||
err_free_qm_memory:
|
||||
hisi_qm_memory_uninit(qm);
|
||||
err_alloc_uacce:
|
||||
if (qm->use_sva) {
|
||||
uacce_remove(qm->uacce);
|
||||
|
@ -449,7 +449,7 @@ static void sec_skcipher_alg_callback(struct sec_bd_info *sec_resp,
|
||||
*/
|
||||
}
|
||||
|
||||
mutex_lock(&ctx->queue->queuelock);
|
||||
spin_lock_bh(&ctx->queue->queuelock);
|
||||
/* Put the IV in place for chained cases */
|
||||
switch (ctx->cipher_alg) {
|
||||
case SEC_C_AES_CBC_128:
|
||||
@ -509,7 +509,7 @@ static void sec_skcipher_alg_callback(struct sec_bd_info *sec_resp,
|
||||
list_del(&backlog_req->backlog_head);
|
||||
}
|
||||
}
|
||||
mutex_unlock(&ctx->queue->queuelock);
|
||||
spin_unlock_bh(&ctx->queue->queuelock);
|
||||
|
||||
mutex_lock(&sec_req->lock);
|
||||
list_del(&sec_req_el->head);
|
||||
@ -798,7 +798,7 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq,
|
||||
*/
|
||||
|
||||
/* Grab a big lock for a long time to avoid concurrency issues */
|
||||
mutex_lock(&queue->queuelock);
|
||||
spin_lock_bh(&queue->queuelock);
|
||||
|
||||
/*
|
||||
* Can go on to queue if we have space in either:
|
||||
@ -814,15 +814,15 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq,
|
||||
ret = -EBUSY;
|
||||
if ((skreq->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
|
||||
list_add_tail(&sec_req->backlog_head, &ctx->backlog);
|
||||
mutex_unlock(&queue->queuelock);
|
||||
spin_unlock_bh(&queue->queuelock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
mutex_unlock(&queue->queuelock);
|
||||
spin_unlock_bh(&queue->queuelock);
|
||||
goto err_free_elements;
|
||||
}
|
||||
ret = sec_send_request(sec_req, queue);
|
||||
mutex_unlock(&queue->queuelock);
|
||||
spin_unlock_bh(&queue->queuelock);
|
||||
if (ret)
|
||||
goto err_free_elements;
|
||||
|
||||
@ -881,7 +881,7 @@ static int sec_alg_skcipher_init(struct crypto_skcipher *tfm)
|
||||
if (IS_ERR(ctx->queue))
|
||||
return PTR_ERR(ctx->queue);
|
||||
|
||||
mutex_init(&ctx->queue->queuelock);
|
||||
spin_lock_init(&ctx->queue->queuelock);
|
||||
ctx->queue->havesoftqueue = false;
|
||||
|
||||
return 0;
|
||||
|
@ -892,7 +892,7 @@ bool sec_queue_can_enqueue(struct sec_queue *queue, int num)
|
||||
static void sec_queue_hw_init(struct sec_queue *queue)
|
||||
{
|
||||
sec_queue_ar_alloc(queue, SEC_QUEUE_AR_FROCE_NOALLOC);
|
||||
sec_queue_aw_alloc(queue, SEC_QUEUE_AR_FROCE_NOALLOC);
|
||||
sec_queue_aw_alloc(queue, SEC_QUEUE_AW_FROCE_NOALLOC);
|
||||
sec_queue_ar_pkgattr(queue, 1);
|
||||
sec_queue_aw_pkgattr(queue, 1);
|
||||
|
||||
|
@ -347,7 +347,7 @@ struct sec_queue {
|
||||
DECLARE_BITMAP(unprocessed, SEC_QUEUE_LEN);
|
||||
DECLARE_KFIFO_PTR(softqueue, typeof(struct sec_request_el *));
|
||||
bool havesoftqueue;
|
||||
struct mutex queuelock;
|
||||
spinlock_t queuelock;
|
||||
void *shadow[SEC_QUEUE_LEN];
|
||||
};
|
||||
|
||||
|
@ -119,7 +119,7 @@ struct sec_qp_ctx {
|
||||
struct idr req_idr;
|
||||
struct sec_alg_res res[QM_Q_DEPTH];
|
||||
struct sec_ctx *ctx;
|
||||
struct mutex req_lock;
|
||||
spinlock_t req_lock;
|
||||
struct list_head backlog;
|
||||
struct hisi_acc_sgl_pool *c_in_pool;
|
||||
struct hisi_acc_sgl_pool *c_out_pool;
|
||||
@ -143,10 +143,10 @@ struct sec_ctx {
|
||||
/* Threshold for fake busy, trigger to return -EBUSY to user */
|
||||
u32 fake_req_limit;
|
||||
|
||||
/* Currrent cyclic index to select a queue for encipher */
|
||||
/* Current cyclic index to select a queue for encipher */
|
||||
atomic_t enc_qcyclic;
|
||||
|
||||
/* Currrent cyclic index to select a queue for decipher */
|
||||
/* Current cyclic index to select a queue for decipher */
|
||||
atomic_t dec_qcyclic;
|
||||
|
||||
enum sec_alg_type alg_type;
|
||||
|
@ -127,11 +127,11 @@ static int sec_alloc_req_id(struct sec_req *req, struct sec_qp_ctx *qp_ctx)
|
||||
{
|
||||
int req_id;
|
||||
|
||||
mutex_lock(&qp_ctx->req_lock);
|
||||
spin_lock_bh(&qp_ctx->req_lock);
|
||||
|
||||
req_id = idr_alloc_cyclic(&qp_ctx->req_idr, NULL,
|
||||
0, QM_Q_DEPTH, GFP_ATOMIC);
|
||||
mutex_unlock(&qp_ctx->req_lock);
|
||||
spin_unlock_bh(&qp_ctx->req_lock);
|
||||
if (unlikely(req_id < 0)) {
|
||||
dev_err(req->ctx->dev, "alloc req id fail!\n");
|
||||
return req_id;
|
||||
@ -156,9 +156,9 @@ static void sec_free_req_id(struct sec_req *req)
|
||||
qp_ctx->req_list[req_id] = NULL;
|
||||
req->qp_ctx = NULL;
|
||||
|
||||
mutex_lock(&qp_ctx->req_lock);
|
||||
spin_lock_bh(&qp_ctx->req_lock);
|
||||
idr_remove(&qp_ctx->req_idr, req_id);
|
||||
mutex_unlock(&qp_ctx->req_lock);
|
||||
spin_unlock_bh(&qp_ctx->req_lock);
|
||||
}
|
||||
|
||||
static u8 pre_parse_finished_bd(struct bd_status *status, void *resp)
|
||||
@ -273,7 +273,7 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req)
|
||||
!(req->flag & CRYPTO_TFM_REQ_MAY_BACKLOG))
|
||||
return -EBUSY;
|
||||
|
||||
mutex_lock(&qp_ctx->req_lock);
|
||||
spin_lock_bh(&qp_ctx->req_lock);
|
||||
ret = hisi_qp_send(qp_ctx->qp, &req->sec_sqe);
|
||||
|
||||
if (ctx->fake_req_limit <=
|
||||
@ -281,10 +281,10 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req)
|
||||
list_add_tail(&req->backlog_head, &qp_ctx->backlog);
|
||||
atomic64_inc(&ctx->sec->debug.dfx.send_cnt);
|
||||
atomic64_inc(&ctx->sec->debug.dfx.send_busy_cnt);
|
||||
mutex_unlock(&qp_ctx->req_lock);
|
||||
spin_unlock_bh(&qp_ctx->req_lock);
|
||||
return -EBUSY;
|
||||
}
|
||||
mutex_unlock(&qp_ctx->req_lock);
|
||||
spin_unlock_bh(&qp_ctx->req_lock);
|
||||
|
||||
if (unlikely(ret == -EBUSY))
|
||||
return -ENOBUFS;
|
||||
@ -487,7 +487,7 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
|
||||
|
||||
qp->req_cb = sec_req_cb;
|
||||
|
||||
mutex_init(&qp_ctx->req_lock);
|
||||
spin_lock_init(&qp_ctx->req_lock);
|
||||
idr_init(&qp_ctx->req_idr);
|
||||
INIT_LIST_HEAD(&qp_ctx->backlog);
|
||||
|
||||
@ -620,7 +620,7 @@ static int sec_auth_init(struct sec_ctx *ctx)
|
||||
{
|
||||
struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
|
||||
|
||||
a_ctx->a_key = dma_alloc_coherent(ctx->dev, SEC_MAX_KEY_SIZE,
|
||||
a_ctx->a_key = dma_alloc_coherent(ctx->dev, SEC_MAX_AKEY_SIZE,
|
||||
&a_ctx->a_key_dma, GFP_KERNEL);
|
||||
if (!a_ctx->a_key)
|
||||
return -ENOMEM;
|
||||
@ -632,8 +632,8 @@ static void sec_auth_uninit(struct sec_ctx *ctx)
|
||||
{
|
||||
struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
|
||||
|
||||
memzero_explicit(a_ctx->a_key, SEC_MAX_KEY_SIZE);
|
||||
dma_free_coherent(ctx->dev, SEC_MAX_KEY_SIZE,
|
||||
memzero_explicit(a_ctx->a_key, SEC_MAX_AKEY_SIZE);
|
||||
dma_free_coherent(ctx->dev, SEC_MAX_AKEY_SIZE,
|
||||
a_ctx->a_key, a_ctx->a_key_dma);
|
||||
}
|
||||
|
||||
@ -1382,7 +1382,7 @@ static struct sec_req *sec_back_req_clear(struct sec_ctx *ctx,
|
||||
{
|
||||
struct sec_req *backlog_req = NULL;
|
||||
|
||||
mutex_lock(&qp_ctx->req_lock);
|
||||
spin_lock_bh(&qp_ctx->req_lock);
|
||||
if (ctx->fake_req_limit >=
|
||||
atomic_read(&qp_ctx->qp->qp_status.used) &&
|
||||
!list_empty(&qp_ctx->backlog)) {
|
||||
@ -1390,7 +1390,7 @@ static struct sec_req *sec_back_req_clear(struct sec_ctx *ctx,
|
||||
typeof(*backlog_req), backlog_head);
|
||||
list_del(&backlog_req->backlog_head);
|
||||
}
|
||||
mutex_unlock(&qp_ctx->req_lock);
|
||||
spin_unlock_bh(&qp_ctx->req_lock);
|
||||
|
||||
return backlog_req;
|
||||
}
|
||||
|
@ -7,6 +7,7 @@
|
||||
#define SEC_AIV_SIZE 12
|
||||
#define SEC_IV_SIZE 24
|
||||
#define SEC_MAX_KEY_SIZE 64
|
||||
#define SEC_MAX_AKEY_SIZE 128
|
||||
#define SEC_COMM_SCENE 0
|
||||
#define SEC_MIN_BLOCK_SZ 1
|
||||
|
||||
|
@ -508,16 +508,17 @@ static int sec_engine_init(struct hisi_qm *qm)
|
||||
|
||||
writel(SEC_SAA_ENABLE, qm->io_base + SEC_SAA_EN_REG);
|
||||
|
||||
/* HW V2 enable sm4 extra mode, as ctr/ecb */
|
||||
if (qm->ver < QM_HW_V3)
|
||||
if (qm->ver < QM_HW_V3) {
|
||||
/* HW V2 enable sm4 extra mode, as ctr/ecb */
|
||||
writel_relaxed(SEC_BD_ERR_CHK_EN0,
|
||||
qm->io_base + SEC_BD_ERR_CHK_EN_REG0);
|
||||
|
||||
/* Enable sm4 xts mode multiple iv */
|
||||
writel_relaxed(SEC_BD_ERR_CHK_EN1,
|
||||
qm->io_base + SEC_BD_ERR_CHK_EN_REG1);
|
||||
writel_relaxed(SEC_BD_ERR_CHK_EN3,
|
||||
qm->io_base + SEC_BD_ERR_CHK_EN_REG3);
|
||||
/* HW V2 enable sm4 xts mode multiple iv */
|
||||
writel_relaxed(SEC_BD_ERR_CHK_EN1,
|
||||
qm->io_base + SEC_BD_ERR_CHK_EN_REG1);
|
||||
writel_relaxed(SEC_BD_ERR_CHK_EN3,
|
||||
qm->io_base + SEC_BD_ERR_CHK_EN_REG3);
|
||||
}
|
||||
|
||||
/* config endian */
|
||||
sec_set_endian(qm);
|
||||
@ -1002,8 +1003,6 @@ static int sec_pf_probe_init(struct sec_dev *sec)
|
||||
|
||||
static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
qm->pdev = pdev;
|
||||
qm->ver = pdev->revision;
|
||||
qm->algs = "cipher\ndigest\naead";
|
||||
@ -1029,25 +1028,7 @@ static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
|
||||
qm->qp_num = SEC_QUEUE_NUM_V1 - SEC_PF_DEF_Q_NUM;
|
||||
}
|
||||
|
||||
/*
|
||||
* WQ_HIGHPRI: SEC request must be low delayed,
|
||||
* so need a high priority workqueue.
|
||||
* WQ_UNBOUND: SEC task is likely with long
|
||||
* running CPU intensive workloads.
|
||||
*/
|
||||
qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_MEM_RECLAIM |
|
||||
WQ_UNBOUND, num_online_cpus(),
|
||||
pci_name(qm->pdev));
|
||||
if (!qm->wq) {
|
||||
pci_err(qm->pdev, "fail to alloc workqueue\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ret = hisi_qm_init(qm);
|
||||
if (ret)
|
||||
destroy_workqueue(qm->wq);
|
||||
|
||||
return ret;
|
||||
return hisi_qm_init(qm);
|
||||
}
|
||||
|
||||
static void sec_qm_uninit(struct hisi_qm *qm)
|
||||
@ -1078,8 +1059,6 @@ static int sec_probe_init(struct sec_dev *sec)
|
||||
static void sec_probe_uninit(struct hisi_qm *qm)
|
||||
{
|
||||
hisi_qm_dev_err_uninit(qm);
|
||||
|
||||
destroy_workqueue(qm->wq);
|
||||
}
|
||||
|
||||
static void sec_iommu_used_check(struct sec_dev *sec)
|
||||
|
@ -185,7 +185,7 @@ static int hisi_trng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
|
||||
struct hisi_trng *trng;
|
||||
int currsize = 0;
|
||||
u32 val = 0;
|
||||
u32 ret;
|
||||
int ret;
|
||||
|
||||
trng = container_of(rng, struct hisi_trng, rng);
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
/* Copyright (c) 2019 HiSilicon Limited. */
|
||||
#include <crypto/internal/acompress.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include "zip.h"
|
||||
@ -606,8 +607,7 @@ static int hisi_zip_create_req_q(struct hisi_zip_ctx *ctx)
|
||||
req_q = &ctx->qp_ctx[i].req_q;
|
||||
req_q->size = QM_Q_DEPTH;
|
||||
|
||||
req_q->req_bitmap = kcalloc(BITS_TO_LONGS(req_q->size),
|
||||
sizeof(long), GFP_KERNEL);
|
||||
req_q->req_bitmap = bitmap_zalloc(req_q->size, GFP_KERNEL);
|
||||
if (!req_q->req_bitmap) {
|
||||
ret = -ENOMEM;
|
||||
if (i == 0)
|
||||
@ -631,11 +631,11 @@ static int hisi_zip_create_req_q(struct hisi_zip_ctx *ctx)
|
||||
return 0;
|
||||
|
||||
err_free_loop1:
|
||||
kfree(ctx->qp_ctx[HZIP_QPC_DECOMP].req_q.req_bitmap);
|
||||
bitmap_free(ctx->qp_ctx[HZIP_QPC_DECOMP].req_q.req_bitmap);
|
||||
err_free_loop0:
|
||||
kfree(ctx->qp_ctx[HZIP_QPC_COMP].req_q.q);
|
||||
err_free_bitmap:
|
||||
kfree(ctx->qp_ctx[HZIP_QPC_COMP].req_q.req_bitmap);
|
||||
bitmap_free(ctx->qp_ctx[HZIP_QPC_COMP].req_q.req_bitmap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -645,7 +645,7 @@ static void hisi_zip_release_req_q(struct hisi_zip_ctx *ctx)
|
||||
|
||||
for (i = 0; i < HZIP_CTX_Q_NUM; i++) {
|
||||
kfree(ctx->qp_ctx[i].req_q.q);
|
||||
kfree(ctx->qp_ctx[i].req_q.req_bitmap);
|
||||
bitmap_free(ctx->qp_ctx[i].req_q.req_bitmap);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -990,8 +990,6 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
|
||||
|
||||
static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
qm->pdev = pdev;
|
||||
qm->ver = pdev->revision;
|
||||
if (pdev->revision >= QM_HW_V3)
|
||||
@ -1021,25 +1019,12 @@ static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
|
||||
qm->qp_num = HZIP_QUEUE_NUM_V1 - HZIP_PF_DEF_Q_NUM;
|
||||
}
|
||||
|
||||
qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_MEM_RECLAIM |
|
||||
WQ_UNBOUND, num_online_cpus(),
|
||||
pci_name(qm->pdev));
|
||||
if (!qm->wq) {
|
||||
pci_err(qm->pdev, "fail to alloc workqueue\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ret = hisi_qm_init(qm);
|
||||
if (ret)
|
||||
destroy_workqueue(qm->wq);
|
||||
|
||||
return ret;
|
||||
return hisi_qm_init(qm);
|
||||
}
|
||||
|
||||
static void hisi_zip_qm_uninit(struct hisi_qm *qm)
|
||||
{
|
||||
hisi_qm_uninit(qm);
|
||||
destroy_workqueue(qm->wq);
|
||||
}
|
||||
|
||||
static int hisi_zip_probe_init(struct hisi_zip *hisi_zip)
|
||||
|
@ -1831,6 +1831,8 @@ static const struct of_device_id safexcel_of_match_table[] = {
|
||||
{},
|
||||
};
|
||||
|
||||
MODULE_DEVICE_TABLE(of, safexcel_of_match_table);
|
||||
|
||||
static struct platform_driver crypto_safexcel = {
|
||||
.probe = safexcel_probe,
|
||||
.remove = safexcel_remove,
|
||||
|
@ -497,15 +497,15 @@ struct result_data_desc {
|
||||
u32 packet_length:17;
|
||||
u32 error_code:15;
|
||||
|
||||
u8 bypass_length:4;
|
||||
u8 e15:1;
|
||||
u16 rsvd0;
|
||||
u8 hash_bytes:1;
|
||||
u8 hash_length:6;
|
||||
u8 generic_bytes:1;
|
||||
u8 checksum:1;
|
||||
u8 next_header:1;
|
||||
u8 length:1;
|
||||
u32 bypass_length:4;
|
||||
u32 e15:1;
|
||||
u32 rsvd0:16;
|
||||
u32 hash_bytes:1;
|
||||
u32 hash_length:6;
|
||||
u32 generic_bytes:1;
|
||||
u32 checksum:1;
|
||||
u32 next_header:1;
|
||||
u32 length:1;
|
||||
|
||||
u16 application_id;
|
||||
u16 rsvd1;
|
||||
|
@ -976,8 +976,6 @@ static int kmb_ocs_ecc_remove(struct platform_device *pdev)
|
||||
struct ocs_ecc_dev *ecc_dev;
|
||||
|
||||
ecc_dev = platform_get_drvdata(pdev);
|
||||
if (!ecc_dev)
|
||||
return -ENODEV;
|
||||
|
||||
crypto_unregister_kpp(&ocs_ecdh_p384);
|
||||
crypto_unregister_kpp(&ocs_ecdh_p256);
|
||||
|
@ -51,11 +51,47 @@ static const struct devlink_param otx2_cpt_dl_params[] = {
|
||||
NULL),
|
||||
};
|
||||
|
||||
static int otx2_cpt_devlink_info_get(struct devlink *devlink,
|
||||
static int otx2_cpt_dl_info_firmware_version_put(struct devlink_info_req *req,
|
||||
struct otx2_cpt_eng_grp_info grp[],
|
||||
const char *ver_name, int eng_type)
|
||||
{
|
||||
struct otx2_cpt_engs_rsvd *eng;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) {
|
||||
eng = find_engines_by_type(&grp[i], eng_type);
|
||||
if (eng)
|
||||
return devlink_info_version_running_put(req, ver_name,
|
||||
eng->ucode->ver_str);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int otx2_cpt_devlink_info_get(struct devlink *dl,
|
||||
struct devlink_info_req *req,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
return devlink_info_driver_name_put(req, "rvu_cptpf");
|
||||
struct otx2_cpt_devlink *cpt_dl = devlink_priv(dl);
|
||||
struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf;
|
||||
int err;
|
||||
|
||||
err = devlink_info_driver_name_put(req, "rvu_cptpf");
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp,
|
||||
"fw.ae", OTX2_CPT_AE_TYPES);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp,
|
||||
"fw.se", OTX2_CPT_SE_TYPES);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp,
|
||||
"fw.ie", OTX2_CPT_IE_TYPES);
|
||||
}
|
||||
|
||||
static const struct devlink_ops otx2_cpt_devlink_ops = {
|
||||
|
@ -476,7 +476,7 @@ release_fw:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct otx2_cpt_engs_rsvd *find_engines_by_type(
|
||||
struct otx2_cpt_engs_rsvd *find_engines_by_type(
|
||||
struct otx2_cpt_eng_grp_info *eng_grp,
|
||||
int eng_type)
|
||||
{
|
||||
@ -1605,7 +1605,10 @@ int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf,
|
||||
if (!strncasecmp(val, "se", 2) && strchr(val, ':')) {
|
||||
if (has_se || ucode_idx)
|
||||
goto err_print;
|
||||
tmp = strim(strsep(&val, ":"));
|
||||
tmp = strsep(&val, ":");
|
||||
if (!tmp)
|
||||
goto err_print;
|
||||
tmp = strim(tmp);
|
||||
if (!val)
|
||||
goto err_print;
|
||||
if (strlen(tmp) != 2)
|
||||
@ -1617,7 +1620,10 @@ int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf,
|
||||
} else if (!strncasecmp(val, "ae", 2) && strchr(val, ':')) {
|
||||
if (has_ae || ucode_idx)
|
||||
goto err_print;
|
||||
tmp = strim(strsep(&val, ":"));
|
||||
tmp = strsep(&val, ":");
|
||||
if (!tmp)
|
||||
goto err_print;
|
||||
tmp = strim(tmp);
|
||||
if (!val)
|
||||
goto err_print;
|
||||
if (strlen(tmp) != 2)
|
||||
@ -1629,7 +1635,10 @@ int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf,
|
||||
} else if (!strncasecmp(val, "ie", 2) && strchr(val, ':')) {
|
||||
if (has_ie || ucode_idx)
|
||||
goto err_print;
|
||||
tmp = strim(strsep(&val, ":"));
|
||||
tmp = strsep(&val, ":");
|
||||
if (!tmp)
|
||||
goto err_print;
|
||||
tmp = strim(tmp);
|
||||
if (!val)
|
||||
goto err_print;
|
||||
if (strlen(tmp) != 2)
|
||||
|
@ -166,4 +166,7 @@ int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf,
|
||||
int otx2_cpt_dl_custom_egrp_delete(struct otx2_cptpf_dev *cptpf,
|
||||
struct devlink_param_gset_ctx *ctx);
|
||||
void otx2_cpt_print_uc_dbg_info(struct otx2_cptpf_dev *cptpf);
|
||||
struct otx2_cpt_engs_rsvd *find_engines_by_type(
|
||||
struct otx2_cpt_eng_grp_info *eng_grp,
|
||||
int eng_type);
|
||||
#endif /* __OTX2_CPTPF_UCODE_H */
|
||||
|
@ -75,7 +75,7 @@ static int (*nx842_powernv_exec)(const unsigned char *in,
|
||||
/**
|
||||
* setup_indirect_dde - Setup an indirect DDE
|
||||
*
|
||||
* The DDE is setup with the the DDE count, byte count, and address of
|
||||
* The DDE is setup with the DDE count, byte count, and address of
|
||||
* first direct DDE in the list.
|
||||
*/
|
||||
static void setup_indirect_dde(struct data_descriptor_entry *dde,
|
||||
|
@ -1208,10 +1208,13 @@ static struct vio_driver nx842_vio_driver = {
|
||||
static int __init nx842_pseries_init(void)
|
||||
{
|
||||
struct nx842_devdata *new_devdata;
|
||||
struct device_node *np;
|
||||
int ret;
|
||||
|
||||
if (!of_find_compatible_node(NULL, NULL, "ibm,compression"))
|
||||
np = of_find_compatible_node(NULL, NULL, "ibm,compression");
|
||||
if (!np)
|
||||
return -ENODEV;
|
||||
of_node_put(np);
|
||||
|
||||
RCU_INIT_POINTER(devdata, NULL);
|
||||
new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL);
|
||||
|
@ -1261,9 +1261,6 @@ static int omap_aes_remove(struct platform_device *pdev)
|
||||
struct aead_alg *aalg;
|
||||
int i, j;
|
||||
|
||||
if (!dd)
|
||||
return -ENODEV;
|
||||
|
||||
spin_lock_bh(&list_lock);
|
||||
list_del(&dd->list);
|
||||
spin_unlock_bh(&list_lock);
|
||||
@ -1279,7 +1276,6 @@ static int omap_aes_remove(struct platform_device *pdev)
|
||||
aalg = &dd->pdata->aead_algs_info->algs_list[i];
|
||||
crypto_unregister_aead(aalg);
|
||||
dd->pdata->aead_algs_info->registered--;
|
||||
|
||||
}
|
||||
|
||||
crypto_engine_exit(dd->engine);
|
||||
|
@ -1091,9 +1091,6 @@ static int omap_des_remove(struct platform_device *pdev)
|
||||
struct omap_des_dev *dd = platform_get_drvdata(pdev);
|
||||
int i, j;
|
||||
|
||||
if (!dd)
|
||||
return -ENODEV;
|
||||
|
||||
spin_lock_bh(&list_lock);
|
||||
list_del(&dd->list);
|
||||
spin_unlock_bh(&list_lock);
|
||||
@ -1106,7 +1103,6 @@ static int omap_des_remove(struct platform_device *pdev)
|
||||
tasklet_kill(&dd->done_task);
|
||||
omap_des_dma_cleanup(dd);
|
||||
pm_runtime_disable(dd->dev);
|
||||
dd = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -2197,8 +2197,7 @@ static int omap_sham_remove(struct platform_device *pdev)
|
||||
int i, j;
|
||||
|
||||
dd = platform_get_drvdata(pdev);
|
||||
if (!dd)
|
||||
return -ENODEV;
|
||||
|
||||
spin_lock_bh(&sham.lock);
|
||||
list_del(&dd->list);
|
||||
spin_unlock_bh(&sham.lock);
|
||||
|
@ -17,7 +17,7 @@ config CRYPTO_DEV_QAT
|
||||
|
||||
config CRYPTO_DEV_QAT_DH895xCC
|
||||
tristate "Support for Intel(R) DH895xCC"
|
||||
depends on X86 && PCI
|
||||
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
|
||||
select CRYPTO_DEV_QAT
|
||||
help
|
||||
Support for Intel(R) DH895xcc with Intel(R) QuickAssist Technology
|
||||
@ -28,7 +28,7 @@ config CRYPTO_DEV_QAT_DH895xCC
|
||||
|
||||
config CRYPTO_DEV_QAT_C3XXX
|
||||
tristate "Support for Intel(R) C3XXX"
|
||||
depends on X86 && PCI
|
||||
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
|
||||
select CRYPTO_DEV_QAT
|
||||
help
|
||||
Support for Intel(R) C3xxx with Intel(R) QuickAssist Technology
|
||||
@ -39,7 +39,7 @@ config CRYPTO_DEV_QAT_C3XXX
|
||||
|
||||
config CRYPTO_DEV_QAT_C62X
|
||||
tristate "Support for Intel(R) C62X"
|
||||
depends on X86 && PCI
|
||||
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
|
||||
select CRYPTO_DEV_QAT
|
||||
help
|
||||
Support for Intel(R) C62x with Intel(R) QuickAssist Technology
|
||||
@ -50,7 +50,7 @@ config CRYPTO_DEV_QAT_C62X
|
||||
|
||||
config CRYPTO_DEV_QAT_4XXX
|
||||
tristate "Support for Intel(R) QAT_4XXX"
|
||||
depends on X86 && PCI
|
||||
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
|
||||
select CRYPTO_DEV_QAT
|
||||
help
|
||||
Support for Intel(R) QuickAssist Technology QAT_4xxx
|
||||
@ -61,7 +61,7 @@ config CRYPTO_DEV_QAT_4XXX
|
||||
|
||||
config CRYPTO_DEV_QAT_DH895xCCVF
|
||||
tristate "Support for Intel(R) DH895xCC Virtual Function"
|
||||
depends on X86 && PCI
|
||||
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
|
||||
select PCI_IOV
|
||||
select CRYPTO_DEV_QAT
|
||||
|
||||
@ -74,7 +74,7 @@ config CRYPTO_DEV_QAT_DH895xCCVF
|
||||
|
||||
config CRYPTO_DEV_QAT_C3XXXVF
|
||||
tristate "Support for Intel(R) C3XXX Virtual Function"
|
||||
depends on X86 && PCI
|
||||
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
|
||||
select PCI_IOV
|
||||
select CRYPTO_DEV_QAT
|
||||
help
|
||||
@ -86,7 +86,7 @@ config CRYPTO_DEV_QAT_C3XXXVF
|
||||
|
||||
config CRYPTO_DEV_QAT_C62XVF
|
||||
tristate "Support for Intel(R) C62X Virtual Function"
|
||||
depends on X86 && PCI
|
||||
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
|
||||
select PCI_IOV
|
||||
select CRYPTO_DEV_QAT
|
||||
help
|
||||
|
@ -357,6 +357,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data)
|
||||
hw_data->ring_pair_reset = adf_gen4_ring_pair_reset;
|
||||
hw_data->enable_pm = adf_gen4_enable_pm;
|
||||
hw_data->handle_pm_interrupt = adf_gen4_handle_pm_interrupt;
|
||||
hw_data->dev_config = adf_crypto_dev_config;
|
||||
|
||||
adf_gen4_init_hw_csr_ops(&hw_data->csr_ops);
|
||||
adf_gen4_init_pf_pfvf_ops(&hw_data->pfvf_ops);
|
||||
|
@ -70,5 +70,6 @@ enum icp_qat_4xxx_slice_mask {
|
||||
|
||||
void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data);
|
||||
void adf_clean_hw_data_4xxx(struct adf_hw_device_data *hw_data);
|
||||
int adf_crypto_dev_config(struct adf_accel_dev *accel_dev);
|
||||
|
||||
#endif
|
||||
|
@ -53,7 +53,7 @@ static int adf_cfg_dev_init(struct adf_accel_dev *accel_dev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int adf_crypto_dev_config(struct adf_accel_dev *accel_dev)
|
||||
int adf_crypto_dev_config(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES];
|
||||
int banks = GET_MAX_BANKS(accel_dev);
|
||||
@ -289,6 +289,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
goto out_err_disable_aer;
|
||||
}
|
||||
|
||||
ret = adf_sysfs_init(accel_dev);
|
||||
if (ret)
|
||||
goto out_err_disable_aer;
|
||||
|
||||
ret = adf_crypto_dev_config(accel_dev);
|
||||
if (ret)
|
||||
goto out_err_disable_aer;
|
||||
|
@ -10,6 +10,7 @@ intel_qat-objs := adf_cfg.o \
|
||||
adf_transport.o \
|
||||
adf_admin.o \
|
||||
adf_hw_arbiter.o \
|
||||
adf_sysfs.o \
|
||||
adf_gen2_hw_data.o \
|
||||
adf_gen4_hw_data.o \
|
||||
adf_gen4_pm.o \
|
||||
|
@ -199,6 +199,7 @@ struct adf_hw_device_data {
|
||||
char *(*uof_get_name)(struct adf_accel_dev *accel_dev, u32 obj_num);
|
||||
u32 (*uof_get_num_objs)(void);
|
||||
u32 (*uof_get_ae_mask)(struct adf_accel_dev *accel_dev, u32 obj_num);
|
||||
int (*dev_config)(struct adf_accel_dev *accel_dev);
|
||||
struct adf_pfvf_ops pfvf_ops;
|
||||
struct adf_hw_csr_ops csr_ops;
|
||||
const char *fw_name;
|
||||
|
@ -128,6 +128,24 @@ static void adf_cfg_keyval_add(struct adf_cfg_key_val *new,
|
||||
list_add_tail(&new->list, &sec->param_head);
|
||||
}
|
||||
|
||||
static void adf_cfg_keyval_remove(const char *key, struct adf_cfg_section *sec)
|
||||
{
|
||||
struct list_head *head = &sec->param_head;
|
||||
struct list_head *list_ptr, *tmp;
|
||||
|
||||
list_for_each_prev_safe(list_ptr, tmp, head) {
|
||||
struct adf_cfg_key_val *ptr =
|
||||
list_entry(list_ptr, struct adf_cfg_key_val, list);
|
||||
|
||||
if (strncmp(ptr->key, key, sizeof(ptr->key)))
|
||||
continue;
|
||||
|
||||
list_del(list_ptr);
|
||||
kfree(ptr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void adf_cfg_keyval_del_all(struct list_head *head)
|
||||
{
|
||||
struct list_head *list_ptr, *tmp;
|
||||
@ -208,7 +226,8 @@ static int adf_cfg_key_val_get(struct adf_accel_dev *accel_dev,
|
||||
* @type: Type - string, int or address
|
||||
*
|
||||
* Function adds configuration key - value entry in the appropriate section
|
||||
* in the given acceleration device
|
||||
* in the given acceleration device. If the key exists already, the value
|
||||
* is updated.
|
||||
* To be used by QAT device specific drivers.
|
||||
*
|
||||
* Return: 0 on success, error code otherwise.
|
||||
@ -222,6 +241,8 @@ int adf_cfg_add_key_value_param(struct adf_accel_dev *accel_dev,
|
||||
struct adf_cfg_key_val *key_val;
|
||||
struct adf_cfg_section *section = adf_cfg_sec_find(accel_dev,
|
||||
section_name);
|
||||
char temp_val[ADF_CFG_MAX_VAL_LEN_IN_BYTES];
|
||||
|
||||
if (!section)
|
||||
return -EFAULT;
|
||||
|
||||
@ -246,6 +267,24 @@ int adf_cfg_add_key_value_param(struct adf_accel_dev *accel_dev,
|
||||
return -EINVAL;
|
||||
}
|
||||
key_val->type = type;
|
||||
|
||||
/* Add the key-value pair as below policy:
|
||||
* 1. if the key doesn't exist, add it;
|
||||
* 2. if the key already exists with a different value then update it
|
||||
* to the new value (the key is deleted and the newly created
|
||||
* key_val containing the new value is added to the database);
|
||||
* 3. if the key exists with the same value, then return without doing
|
||||
* anything (the newly created key_val is freed).
|
||||
*/
|
||||
if (!adf_cfg_key_val_get(accel_dev, section_name, key, temp_val)) {
|
||||
if (strncmp(temp_val, key_val->val, sizeof(temp_val))) {
|
||||
adf_cfg_keyval_remove(key, section);
|
||||
} else {
|
||||
kfree(key_val);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
down_write(&cfg->lock);
|
||||
adf_cfg_keyval_add(key_val, section);
|
||||
up_write(&cfg->lock);
|
||||
|
@ -49,11 +49,6 @@ struct service_hndl {
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
static inline int get_current_node(void)
|
||||
{
|
||||
return topology_physical_package_id(raw_smp_processor_id());
|
||||
}
|
||||
|
||||
int adf_service_register(struct service_hndl *service);
|
||||
int adf_service_unregister(struct service_hndl *service);
|
||||
|
||||
@ -61,6 +56,7 @@ int adf_dev_init(struct adf_accel_dev *accel_dev);
|
||||
int adf_dev_start(struct adf_accel_dev *accel_dev);
|
||||
void adf_dev_stop(struct adf_accel_dev *accel_dev);
|
||||
void adf_dev_shutdown(struct adf_accel_dev *accel_dev);
|
||||
int adf_dev_shutdown_cache_cfg(struct adf_accel_dev *accel_dev);
|
||||
|
||||
void adf_devmgr_update_class_index(struct adf_hw_device_data *hw_data);
|
||||
void adf_clean_vf_map(bool);
|
||||
@ -132,6 +128,8 @@ void adf_vf_isr_resource_free(struct adf_accel_dev *accel_dev);
|
||||
|
||||
int adf_pfvf_comms_disabled(struct adf_accel_dev *accel_dev);
|
||||
|
||||
int adf_sysfs_init(struct adf_accel_dev *accel_dev);
|
||||
|
||||
int qat_hal_init(struct adf_accel_dev *accel_dev);
|
||||
void qat_hal_deinit(struct icp_qat_fw_loader_handle *handle);
|
||||
int qat_hal_start(struct icp_qat_fw_loader_handle *handle);
|
||||
|
@ -363,3 +363,29 @@ int adf_dev_restarted_notify(struct adf_accel_dev *accel_dev)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int adf_dev_shutdown_cache_cfg(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0};
|
||||
int ret;
|
||||
|
||||
ret = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC,
|
||||
ADF_SERVICES_ENABLED, services);
|
||||
|
||||
adf_dev_stop(accel_dev);
|
||||
adf_dev_shutdown(accel_dev);
|
||||
|
||||
if (!ret) {
|
||||
ret = adf_cfg_section_add(accel_dev, ADF_GENERAL_SEC);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = adf_cfg_add_key_value_param(accel_dev, ADF_GENERAL_SEC,
|
||||
ADF_SERVICES_ENABLED,
|
||||
services, ADF_STR);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -120,32 +120,6 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(adf_disable_sriov);
|
||||
|
||||
static int adf_sriov_prepare_restart(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0};
|
||||
int ret;
|
||||
|
||||
ret = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC,
|
||||
ADF_SERVICES_ENABLED, services);
|
||||
|
||||
adf_dev_stop(accel_dev);
|
||||
adf_dev_shutdown(accel_dev);
|
||||
|
||||
if (!ret) {
|
||||
ret = adf_cfg_section_add(accel_dev, ADF_GENERAL_SEC);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = adf_cfg_add_key_value_param(accel_dev, ADF_GENERAL_SEC,
|
||||
ADF_SERVICES_ENABLED,
|
||||
services, ADF_STR);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* adf_sriov_configure() - Enable SRIOV for the device
|
||||
* @pdev: Pointer to PCI device.
|
||||
@ -185,7 +159,7 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs)
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
ret = adf_sriov_prepare_restart(accel_dev);
|
||||
ret = adf_dev_shutdown_cache_cfg(accel_dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
191
drivers/crypto/qat/qat_common/adf_sysfs.c
Normal file
191
drivers/crypto/qat/qat_common/adf_sysfs.c
Normal file
@ -0,0 +1,191 @@
|
||||
// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
|
||||
/* Copyright(c) 2022 Intel Corporation */
|
||||
#include <linux/device.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/pci.h>
|
||||
#include "adf_accel_devices.h"
|
||||
#include "adf_cfg.h"
|
||||
#include "adf_common_drv.h"
|
||||
|
||||
static const char * const state_operations[] = {
|
||||
[DEV_DOWN] = "down",
|
||||
[DEV_UP] = "up",
|
||||
};
|
||||
|
||||
static ssize_t state_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct adf_accel_dev *accel_dev;
|
||||
char *state;
|
||||
|
||||
accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
|
||||
if (!accel_dev)
|
||||
return -EINVAL;
|
||||
|
||||
state = adf_dev_started(accel_dev) ? "up" : "down";
|
||||
return sysfs_emit(buf, "%s\n", state);
|
||||
}
|
||||
|
||||
static ssize_t state_store(struct device *dev, struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct adf_accel_dev *accel_dev;
|
||||
u32 accel_id;
|
||||
int ret;
|
||||
|
||||
accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
|
||||
if (!accel_dev)
|
||||
return -EINVAL;
|
||||
|
||||
accel_id = accel_dev->accel_id;
|
||||
|
||||
if (adf_devmgr_in_reset(accel_dev) || adf_dev_in_use(accel_dev)) {
|
||||
dev_info(dev, "Device qat_dev%d is busy\n", accel_id);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
ret = sysfs_match_string(state_operations, buf);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
switch (ret) {
|
||||
case DEV_DOWN:
|
||||
if (!adf_dev_started(accel_dev)) {
|
||||
dev_info(dev, "Device qat_dev%d already down\n",
|
||||
accel_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
dev_info(dev, "Stopping device qat_dev%d\n", accel_id);
|
||||
|
||||
ret = adf_dev_shutdown_cache_cfg(accel_dev);
|
||||
if (ret < 0)
|
||||
return -EINVAL;
|
||||
|
||||
break;
|
||||
case DEV_UP:
|
||||
if (adf_dev_started(accel_dev)) {
|
||||
dev_info(dev, "Device qat_dev%d already up\n",
|
||||
accel_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
dev_info(dev, "Starting device qat_dev%d\n", accel_id);
|
||||
|
||||
ret = GET_HW_DATA(accel_dev)->dev_config(accel_dev);
|
||||
if (!ret)
|
||||
ret = adf_dev_init(accel_dev);
|
||||
if (!ret)
|
||||
ret = adf_dev_start(accel_dev);
|
||||
|
||||
if (ret < 0) {
|
||||
dev_err(dev, "Failed to start device qat_dev%d\n",
|
||||
accel_id);
|
||||
adf_dev_shutdown_cache_cfg(accel_dev);
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static const char * const services_operations[] = {
|
||||
ADF_CFG_CY,
|
||||
ADF_CFG_DC,
|
||||
};
|
||||
|
||||
static ssize_t cfg_services_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0};
|
||||
struct adf_accel_dev *accel_dev;
|
||||
int ret;
|
||||
|
||||
accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
|
||||
if (!accel_dev)
|
||||
return -EINVAL;
|
||||
|
||||
ret = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC,
|
||||
ADF_SERVICES_ENABLED, services);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return sysfs_emit(buf, "%s\n", services);
|
||||
}
|
||||
|
||||
static int adf_sysfs_update_dev_config(struct adf_accel_dev *accel_dev,
|
||||
const char *services)
|
||||
{
|
||||
return adf_cfg_add_key_value_param(accel_dev, ADF_GENERAL_SEC,
|
||||
ADF_SERVICES_ENABLED, services,
|
||||
ADF_STR);
|
||||
}
|
||||
|
||||
static ssize_t cfg_services_store(struct device *dev, struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct adf_hw_device_data *hw_data;
|
||||
struct adf_accel_dev *accel_dev;
|
||||
int ret;
|
||||
|
||||
ret = sysfs_match_string(services_operations, buf);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
|
||||
if (!accel_dev)
|
||||
return -EINVAL;
|
||||
|
||||
if (adf_dev_started(accel_dev)) {
|
||||
dev_info(dev, "Device qat_dev%d must be down to reconfigure the service.\n",
|
||||
accel_dev->accel_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = adf_sysfs_update_dev_config(accel_dev, services_operations[ret]);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
hw_data = GET_HW_DATA(accel_dev);
|
||||
|
||||
/* Update capabilities mask after change in configuration.
|
||||
* A call to this function is required as capabilities are, at the
|
||||
* moment, tied to configuration
|
||||
*/
|
||||
hw_data->accel_capabilities_mask = hw_data->get_accel_cap(accel_dev);
|
||||
if (!hw_data->accel_capabilities_mask)
|
||||
return -EINVAL;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RW(state);
|
||||
static DEVICE_ATTR_RW(cfg_services);
|
||||
|
||||
static struct attribute *qat_attrs[] = {
|
||||
&dev_attr_state.attr,
|
||||
&dev_attr_cfg_services.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group qat_group = {
|
||||
.attrs = qat_attrs,
|
||||
.name = "qat",
|
||||
};
|
||||
|
||||
int adf_sysfs_init(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = devm_device_add_group(&GET_DEV(accel_dev), &qat_group);
|
||||
if (ret) {
|
||||
dev_err(&GET_DEV(accel_dev),
|
||||
"Failed to create qat attribute group: %d\n", ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(adf_sysfs_init);
|
@ -605,7 +605,7 @@ static int qat_alg_aead_newkey(struct crypto_aead *tfm, const u8 *key,
|
||||
{
|
||||
struct qat_alg_aead_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct qat_crypto_instance *inst = NULL;
|
||||
int node = get_current_node();
|
||||
int node = numa_node_id();
|
||||
struct device *dev;
|
||||
int ret;
|
||||
|
||||
@ -1065,7 +1065,7 @@ static int qat_alg_skcipher_newkey(struct qat_alg_skcipher_ctx *ctx,
|
||||
{
|
||||
struct qat_crypto_instance *inst = NULL;
|
||||
struct device *dev;
|
||||
int node = get_current_node();
|
||||
int node = numa_node_id();
|
||||
int ret;
|
||||
|
||||
inst = qat_crypto_get_instance_node(node);
|
||||
|
@ -489,7 +489,7 @@ static int qat_dh_init_tfm(struct crypto_kpp *tfm)
|
||||
{
|
||||
struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm);
|
||||
struct qat_crypto_instance *inst =
|
||||
qat_crypto_get_instance_node(get_current_node());
|
||||
qat_crypto_get_instance_node(numa_node_id());
|
||||
|
||||
if (!inst)
|
||||
return -EINVAL;
|
||||
@ -1225,7 +1225,7 @@ static int qat_rsa_init_tfm(struct crypto_akcipher *tfm)
|
||||
{
|
||||
struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
|
||||
struct qat_crypto_instance *inst =
|
||||
qat_crypto_get_instance_node(get_current_node());
|
||||
qat_crypto_get_instance_node(numa_node_id());
|
||||
|
||||
if (!inst)
|
||||
return -EINVAL;
|
||||
|
@ -2321,9 +2321,6 @@ static int s5p_aes_remove(struct platform_device *pdev)
|
||||
struct s5p_aes_dev *pdata = platform_get_drvdata(pdev);
|
||||
int i;
|
||||
|
||||
if (!pdata)
|
||||
return -ENODEV;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(algs); i++)
|
||||
crypto_unregister_skcipher(&algs[i]);
|
||||
|
||||
|
@ -86,7 +86,6 @@ struct sa_match_data {
|
||||
u8 priv;
|
||||
u8 priv_id;
|
||||
u32 supported_algos;
|
||||
bool skip_engine_control;
|
||||
};
|
||||
|
||||
static struct device *sa_k3_dev;
|
||||
@ -2361,7 +2360,15 @@ static int sa_link_child(struct device *dev, void *data)
|
||||
static struct sa_match_data am654_match_data = {
|
||||
.priv = 1,
|
||||
.priv_id = 1,
|
||||
.supported_algos = GENMASK(SA_ALG_AUTHENC_SHA256_AES, 0),
|
||||
.supported_algos = BIT(SA_ALG_CBC_AES) |
|
||||
BIT(SA_ALG_EBC_AES) |
|
||||
BIT(SA_ALG_CBC_DES3) |
|
||||
BIT(SA_ALG_ECB_DES3) |
|
||||
BIT(SA_ALG_SHA1) |
|
||||
BIT(SA_ALG_SHA256) |
|
||||
BIT(SA_ALG_SHA512) |
|
||||
BIT(SA_ALG_AUTHENC_SHA1_AES) |
|
||||
BIT(SA_ALG_AUTHENC_SHA256_AES),
|
||||
};
|
||||
|
||||
static struct sa_match_data am64_match_data = {
|
||||
@ -2372,7 +2379,6 @@ static struct sa_match_data am64_match_data = {
|
||||
BIT(SA_ALG_SHA256) |
|
||||
BIT(SA_ALG_SHA512) |
|
||||
BIT(SA_ALG_AUTHENC_SHA256_AES),
|
||||
.skip_engine_control = true,
|
||||
};
|
||||
|
||||
static const struct of_device_id of_match[] = {
|
||||
@ -2390,6 +2396,7 @@ static int sa_ul_probe(struct platform_device *pdev)
|
||||
struct device_node *node = dev->of_node;
|
||||
static void __iomem *saul_base;
|
||||
struct sa_crypto_data *dev_data;
|
||||
u32 status, val;
|
||||
int ret;
|
||||
|
||||
dev_data = devm_kzalloc(dev, sizeof(*dev_data), GFP_KERNEL);
|
||||
@ -2426,13 +2433,13 @@ static int sa_ul_probe(struct platform_device *pdev)
|
||||
|
||||
spin_lock_init(&dev_data->scid_lock);
|
||||
|
||||
if (!dev_data->match_data->skip_engine_control) {
|
||||
u32 val = SA_EEC_ENCSS_EN | SA_EEC_AUTHSS_EN | SA_EEC_CTXCACH_EN |
|
||||
SA_EEC_CPPI_PORT_IN_EN | SA_EEC_CPPI_PORT_OUT_EN |
|
||||
SA_EEC_TRNG_EN;
|
||||
|
||||
val = SA_EEC_ENCSS_EN | SA_EEC_AUTHSS_EN | SA_EEC_CTXCACH_EN |
|
||||
SA_EEC_CPPI_PORT_IN_EN | SA_EEC_CPPI_PORT_OUT_EN |
|
||||
SA_EEC_TRNG_EN;
|
||||
status = readl_relaxed(saul_base + SA_ENGINE_STATUS);
|
||||
/* Only enable engines if all are not already enabled */
|
||||
if (val & ~status)
|
||||
writel_relaxed(val, saul_base + SA_ENGINE_ENABLE_CONTROL);
|
||||
}
|
||||
|
||||
sa_register_algos(dev_data);
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <crypto/sha1.h>
|
||||
#include <crypto/sha2.h>
|
||||
|
||||
#define SA_ENGINE_STATUS 0x0008
|
||||
#define SA_ENGINE_ENABLE_CONTROL 0x1000
|
||||
|
||||
struct sa_tfm_ctx;
|
||||
|
@ -421,7 +421,7 @@ static int hash_get_device_data(struct hash_ctx *ctx,
|
||||
* @keylen: The lengt of the key.
|
||||
*
|
||||
* Note! This function DOES NOT write to the NBLW registry, even though
|
||||
* specified in the the hw design spec. Either due to incorrect info in the
|
||||
* specified in the hw design spec. Either due to incorrect info in the
|
||||
* spec or due to a bug in the hw.
|
||||
*/
|
||||
static void hash_hw_write_key(struct hash_device_data *device_data,
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/b128ops.h>
|
||||
#include "aesp8-ppc.h"
|
||||
|
||||
void gcm_init_p8(u128 htable[16], const u64 Xi[2]);
|
||||
void gcm_gmult_p8(u64 Xi[2], const u128 htable[16]);
|
||||
|
@ -16,7 +16,7 @@
|
||||
# details see https://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# GHASH for for PowerISA v2.07.
|
||||
# GHASH for PowerISA v2.07.
|
||||
#
|
||||
# July 2014
|
||||
#
|
||||
|
@ -31,7 +31,7 @@
|
||||
#define FSCRYPT_CONTEXT_V2 2
|
||||
|
||||
/* Keep this in sync with include/uapi/linux/fscrypt.h */
|
||||
#define FSCRYPT_MODE_MAX FSCRYPT_MODE_ADIANTUM
|
||||
#define FSCRYPT_MODE_MAX FSCRYPT_MODE_AES_256_HCTR2
|
||||
|
||||
struct fscrypt_context_v1 {
|
||||
u8 version; /* FSCRYPT_CONTEXT_V1 */
|
||||
|
@ -53,6 +53,13 @@ struct fscrypt_mode fscrypt_modes[] = {
|
||||
.ivsize = 32,
|
||||
.blk_crypto_mode = BLK_ENCRYPTION_MODE_ADIANTUM,
|
||||
},
|
||||
[FSCRYPT_MODE_AES_256_HCTR2] = {
|
||||
.friendly_name = "AES-256-HCTR2",
|
||||
.cipher_str = "hctr2(aes)",
|
||||
.keysize = 32,
|
||||
.security_strength = 32,
|
||||
.ivsize = 32,
|
||||
},
|
||||
};
|
||||
|
||||
static DEFINE_MUTEX(fscrypt_mode_key_setup_mutex);
|
||||
|
@ -61,7 +61,7 @@ fscrypt_get_dummy_policy(struct super_block *sb)
|
||||
return sb->s_cop->get_dummy_policy(sb);
|
||||
}
|
||||
|
||||
static bool fscrypt_valid_enc_modes(u32 contents_mode, u32 filenames_mode)
|
||||
static bool fscrypt_valid_enc_modes_v1(u32 contents_mode, u32 filenames_mode)
|
||||
{
|
||||
if (contents_mode == FSCRYPT_MODE_AES_256_XTS &&
|
||||
filenames_mode == FSCRYPT_MODE_AES_256_CTS)
|
||||
@ -78,6 +78,14 @@ static bool fscrypt_valid_enc_modes(u32 contents_mode, u32 filenames_mode)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool fscrypt_valid_enc_modes_v2(u32 contents_mode, u32 filenames_mode)
|
||||
{
|
||||
if (contents_mode == FSCRYPT_MODE_AES_256_XTS &&
|
||||
filenames_mode == FSCRYPT_MODE_AES_256_HCTR2)
|
||||
return true;
|
||||
return fscrypt_valid_enc_modes_v1(contents_mode, filenames_mode);
|
||||
}
|
||||
|
||||
static bool supported_direct_key_modes(const struct inode *inode,
|
||||
u32 contents_mode, u32 filenames_mode)
|
||||
{
|
||||
@ -151,7 +159,7 @@ static bool supported_iv_ino_lblk_policy(const struct fscrypt_policy_v2 *policy,
|
||||
static bool fscrypt_supported_v1_policy(const struct fscrypt_policy_v1 *policy,
|
||||
const struct inode *inode)
|
||||
{
|
||||
if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
|
||||
if (!fscrypt_valid_enc_modes_v1(policy->contents_encryption_mode,
|
||||
policy->filenames_encryption_mode)) {
|
||||
fscrypt_warn(inode,
|
||||
"Unsupported encryption modes (contents %d, filenames %d)",
|
||||
@ -187,7 +195,7 @@ static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy,
|
||||
{
|
||||
int count = 0;
|
||||
|
||||
if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
|
||||
if (!fscrypt_valid_enc_modes_v2(policy->contents_encryption_mode,
|
||||
policy->filenames_encryption_mode)) {
|
||||
fscrypt_warn(inode,
|
||||
"Unsupported encryption modes (contents %d, filenames %d)",
|
||||
|
461
include/crypto/aria.h
Normal file
461
include/crypto/aria.h
Normal file
@ -0,0 +1,461 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/*
|
||||
* Cryptographic API.
|
||||
*
|
||||
* ARIA Cipher Algorithm.
|
||||
*
|
||||
* Documentation of ARIA can be found in RFC 5794.
|
||||
* Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com>
|
||||
* Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com>
|
||||
*
|
||||
* Information for ARIA
|
||||
* http://210.104.33.10/ARIA/index-e.html (English)
|
||||
* http://seed.kisa.or.kr/ (Korean)
|
||||
*
|
||||
* Public domain version is distributed above.
|
||||
*/
|
||||
|
||||
#ifndef _CRYPTO_ARIA_H
|
||||
#define _CRYPTO_ARIA_H
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <asm/byteorder.h>
|
||||
|
||||
#define ARIA_MIN_KEY_SIZE 16
|
||||
#define ARIA_MAX_KEY_SIZE 32
|
||||
#define ARIA_BLOCK_SIZE 16
|
||||
#define ARIA_MAX_RD_KEYS 17
|
||||
#define ARIA_RD_KEY_WORDS (ARIA_BLOCK_SIZE / sizeof(u32))
|
||||
|
||||
struct aria_ctx {
|
||||
int key_length;
|
||||
int rounds;
|
||||
u32 enc_key[ARIA_MAX_RD_KEYS][ARIA_RD_KEY_WORDS];
|
||||
u32 dec_key[ARIA_MAX_RD_KEYS][ARIA_RD_KEY_WORDS];
|
||||
};
|
||||
|
||||
static const u32 key_rc[5][4] = {
|
||||
{ 0x517cc1b7, 0x27220a94, 0xfe13abe8, 0xfa9a6ee0 },
|
||||
{ 0x6db14acc, 0x9e21c820, 0xff28b1d5, 0xef5de2b0 },
|
||||
{ 0xdb92371d, 0x2126e970, 0x03249775, 0x04e8c90e },
|
||||
{ 0x517cc1b7, 0x27220a94, 0xfe13abe8, 0xfa9a6ee0 },
|
||||
{ 0x6db14acc, 0x9e21c820, 0xff28b1d5, 0xef5de2b0 }
|
||||
};
|
||||
|
||||
static const u32 s1[256] = {
|
||||
0x00636363, 0x007c7c7c, 0x00777777, 0x007b7b7b,
|
||||
0x00f2f2f2, 0x006b6b6b, 0x006f6f6f, 0x00c5c5c5,
|
||||
0x00303030, 0x00010101, 0x00676767, 0x002b2b2b,
|
||||
0x00fefefe, 0x00d7d7d7, 0x00ababab, 0x00767676,
|
||||
0x00cacaca, 0x00828282, 0x00c9c9c9, 0x007d7d7d,
|
||||
0x00fafafa, 0x00595959, 0x00474747, 0x00f0f0f0,
|
||||
0x00adadad, 0x00d4d4d4, 0x00a2a2a2, 0x00afafaf,
|
||||
0x009c9c9c, 0x00a4a4a4, 0x00727272, 0x00c0c0c0,
|
||||
0x00b7b7b7, 0x00fdfdfd, 0x00939393, 0x00262626,
|
||||
0x00363636, 0x003f3f3f, 0x00f7f7f7, 0x00cccccc,
|
||||
0x00343434, 0x00a5a5a5, 0x00e5e5e5, 0x00f1f1f1,
|
||||
0x00717171, 0x00d8d8d8, 0x00313131, 0x00151515,
|
||||
0x00040404, 0x00c7c7c7, 0x00232323, 0x00c3c3c3,
|
||||
0x00181818, 0x00969696, 0x00050505, 0x009a9a9a,
|
||||
0x00070707, 0x00121212, 0x00808080, 0x00e2e2e2,
|
||||
0x00ebebeb, 0x00272727, 0x00b2b2b2, 0x00757575,
|
||||
0x00090909, 0x00838383, 0x002c2c2c, 0x001a1a1a,
|
||||
0x001b1b1b, 0x006e6e6e, 0x005a5a5a, 0x00a0a0a0,
|
||||
0x00525252, 0x003b3b3b, 0x00d6d6d6, 0x00b3b3b3,
|
||||
0x00292929, 0x00e3e3e3, 0x002f2f2f, 0x00848484,
|
||||
0x00535353, 0x00d1d1d1, 0x00000000, 0x00ededed,
|
||||
0x00202020, 0x00fcfcfc, 0x00b1b1b1, 0x005b5b5b,
|
||||
0x006a6a6a, 0x00cbcbcb, 0x00bebebe, 0x00393939,
|
||||
0x004a4a4a, 0x004c4c4c, 0x00585858, 0x00cfcfcf,
|
||||
0x00d0d0d0, 0x00efefef, 0x00aaaaaa, 0x00fbfbfb,
|
||||
0x00434343, 0x004d4d4d, 0x00333333, 0x00858585,
|
||||
0x00454545, 0x00f9f9f9, 0x00020202, 0x007f7f7f,
|
||||
0x00505050, 0x003c3c3c, 0x009f9f9f, 0x00a8a8a8,
|
||||
0x00515151, 0x00a3a3a3, 0x00404040, 0x008f8f8f,
|
||||
0x00929292, 0x009d9d9d, 0x00383838, 0x00f5f5f5,
|
||||
0x00bcbcbc, 0x00b6b6b6, 0x00dadada, 0x00212121,
|
||||
0x00101010, 0x00ffffff, 0x00f3f3f3, 0x00d2d2d2,
|
||||
0x00cdcdcd, 0x000c0c0c, 0x00131313, 0x00ececec,
|
||||
0x005f5f5f, 0x00979797, 0x00444444, 0x00171717,
|
||||
0x00c4c4c4, 0x00a7a7a7, 0x007e7e7e, 0x003d3d3d,
|
||||
0x00646464, 0x005d5d5d, 0x00191919, 0x00737373,
|
||||
0x00606060, 0x00818181, 0x004f4f4f, 0x00dcdcdc,
|
||||
0x00222222, 0x002a2a2a, 0x00909090, 0x00888888,
|
||||
0x00464646, 0x00eeeeee, 0x00b8b8b8, 0x00141414,
|
||||
0x00dedede, 0x005e5e5e, 0x000b0b0b, 0x00dbdbdb,
|
||||
0x00e0e0e0, 0x00323232, 0x003a3a3a, 0x000a0a0a,
|
||||
0x00494949, 0x00060606, 0x00242424, 0x005c5c5c,
|
||||
0x00c2c2c2, 0x00d3d3d3, 0x00acacac, 0x00626262,
|
||||
0x00919191, 0x00959595, 0x00e4e4e4, 0x00797979,
|
||||
0x00e7e7e7, 0x00c8c8c8, 0x00373737, 0x006d6d6d,
|
||||
0x008d8d8d, 0x00d5d5d5, 0x004e4e4e, 0x00a9a9a9,
|
||||
0x006c6c6c, 0x00565656, 0x00f4f4f4, 0x00eaeaea,
|
||||
0x00656565, 0x007a7a7a, 0x00aeaeae, 0x00080808,
|
||||
0x00bababa, 0x00787878, 0x00252525, 0x002e2e2e,
|
||||
0x001c1c1c, 0x00a6a6a6, 0x00b4b4b4, 0x00c6c6c6,
|
||||
0x00e8e8e8, 0x00dddddd, 0x00747474, 0x001f1f1f,
|
||||
0x004b4b4b, 0x00bdbdbd, 0x008b8b8b, 0x008a8a8a,
|
||||
0x00707070, 0x003e3e3e, 0x00b5b5b5, 0x00666666,
|
||||
0x00484848, 0x00030303, 0x00f6f6f6, 0x000e0e0e,
|
||||
0x00616161, 0x00353535, 0x00575757, 0x00b9b9b9,
|
||||
0x00868686, 0x00c1c1c1, 0x001d1d1d, 0x009e9e9e,
|
||||
0x00e1e1e1, 0x00f8f8f8, 0x00989898, 0x00111111,
|
||||
0x00696969, 0x00d9d9d9, 0x008e8e8e, 0x00949494,
|
||||
0x009b9b9b, 0x001e1e1e, 0x00878787, 0x00e9e9e9,
|
||||
0x00cecece, 0x00555555, 0x00282828, 0x00dfdfdf,
|
||||
0x008c8c8c, 0x00a1a1a1, 0x00898989, 0x000d0d0d,
|
||||
0x00bfbfbf, 0x00e6e6e6, 0x00424242, 0x00686868,
|
||||
0x00414141, 0x00999999, 0x002d2d2d, 0x000f0f0f,
|
||||
0x00b0b0b0, 0x00545454, 0x00bbbbbb, 0x00161616
|
||||
};
|
||||
|
||||
static const u32 s2[256] = {
|
||||
0xe200e2e2, 0x4e004e4e, 0x54005454, 0xfc00fcfc,
|
||||
0x94009494, 0xc200c2c2, 0x4a004a4a, 0xcc00cccc,
|
||||
0x62006262, 0x0d000d0d, 0x6a006a6a, 0x46004646,
|
||||
0x3c003c3c, 0x4d004d4d, 0x8b008b8b, 0xd100d1d1,
|
||||
0x5e005e5e, 0xfa00fafa, 0x64006464, 0xcb00cbcb,
|
||||
0xb400b4b4, 0x97009797, 0xbe00bebe, 0x2b002b2b,
|
||||
0xbc00bcbc, 0x77007777, 0x2e002e2e, 0x03000303,
|
||||
0xd300d3d3, 0x19001919, 0x59005959, 0xc100c1c1,
|
||||
0x1d001d1d, 0x06000606, 0x41004141, 0x6b006b6b,
|
||||
0x55005555, 0xf000f0f0, 0x99009999, 0x69006969,
|
||||
0xea00eaea, 0x9c009c9c, 0x18001818, 0xae00aeae,
|
||||
0x63006363, 0xdf00dfdf, 0xe700e7e7, 0xbb00bbbb,
|
||||
0x00000000, 0x73007373, 0x66006666, 0xfb00fbfb,
|
||||
0x96009696, 0x4c004c4c, 0x85008585, 0xe400e4e4,
|
||||
0x3a003a3a, 0x09000909, 0x45004545, 0xaa00aaaa,
|
||||
0x0f000f0f, 0xee00eeee, 0x10001010, 0xeb00ebeb,
|
||||
0x2d002d2d, 0x7f007f7f, 0xf400f4f4, 0x29002929,
|
||||
0xac00acac, 0xcf00cfcf, 0xad00adad, 0x91009191,
|
||||
0x8d008d8d, 0x78007878, 0xc800c8c8, 0x95009595,
|
||||
0xf900f9f9, 0x2f002f2f, 0xce00cece, 0xcd00cdcd,
|
||||
0x08000808, 0x7a007a7a, 0x88008888, 0x38003838,
|
||||
0x5c005c5c, 0x83008383, 0x2a002a2a, 0x28002828,
|
||||
0x47004747, 0xdb00dbdb, 0xb800b8b8, 0xc700c7c7,
|
||||
0x93009393, 0xa400a4a4, 0x12001212, 0x53005353,
|
||||
0xff00ffff, 0x87008787, 0x0e000e0e, 0x31003131,
|
||||
0x36003636, 0x21002121, 0x58005858, 0x48004848,
|
||||
0x01000101, 0x8e008e8e, 0x37003737, 0x74007474,
|
||||
0x32003232, 0xca00caca, 0xe900e9e9, 0xb100b1b1,
|
||||
0xb700b7b7, 0xab00abab, 0x0c000c0c, 0xd700d7d7,
|
||||
0xc400c4c4, 0x56005656, 0x42004242, 0x26002626,
|
||||
0x07000707, 0x98009898, 0x60006060, 0xd900d9d9,
|
||||
0xb600b6b6, 0xb900b9b9, 0x11001111, 0x40004040,
|
||||
0xec00ecec, 0x20002020, 0x8c008c8c, 0xbd00bdbd,
|
||||
0xa000a0a0, 0xc900c9c9, 0x84008484, 0x04000404,
|
||||
0x49004949, 0x23002323, 0xf100f1f1, 0x4f004f4f,
|
||||
0x50005050, 0x1f001f1f, 0x13001313, 0xdc00dcdc,
|
||||
0xd800d8d8, 0xc000c0c0, 0x9e009e9e, 0x57005757,
|
||||
0xe300e3e3, 0xc300c3c3, 0x7b007b7b, 0x65006565,
|
||||
0x3b003b3b, 0x02000202, 0x8f008f8f, 0x3e003e3e,
|
||||
0xe800e8e8, 0x25002525, 0x92009292, 0xe500e5e5,
|
||||
0x15001515, 0xdd00dddd, 0xfd00fdfd, 0x17001717,
|
||||
0xa900a9a9, 0xbf00bfbf, 0xd400d4d4, 0x9a009a9a,
|
||||
0x7e007e7e, 0xc500c5c5, 0x39003939, 0x67006767,
|
||||
0xfe00fefe, 0x76007676, 0x9d009d9d, 0x43004343,
|
||||
0xa700a7a7, 0xe100e1e1, 0xd000d0d0, 0xf500f5f5,
|
||||
0x68006868, 0xf200f2f2, 0x1b001b1b, 0x34003434,
|
||||
0x70007070, 0x05000505, 0xa300a3a3, 0x8a008a8a,
|
||||
0xd500d5d5, 0x79007979, 0x86008686, 0xa800a8a8,
|
||||
0x30003030, 0xc600c6c6, 0x51005151, 0x4b004b4b,
|
||||
0x1e001e1e, 0xa600a6a6, 0x27002727, 0xf600f6f6,
|
||||
0x35003535, 0xd200d2d2, 0x6e006e6e, 0x24002424,
|
||||
0x16001616, 0x82008282, 0x5f005f5f, 0xda00dada,
|
||||
0xe600e6e6, 0x75007575, 0xa200a2a2, 0xef00efef,
|
||||
0x2c002c2c, 0xb200b2b2, 0x1c001c1c, 0x9f009f9f,
|
||||
0x5d005d5d, 0x6f006f6f, 0x80008080, 0x0a000a0a,
|
||||
0x72007272, 0x44004444, 0x9b009b9b, 0x6c006c6c,
|
||||
0x90009090, 0x0b000b0b, 0x5b005b5b, 0x33003333,
|
||||
0x7d007d7d, 0x5a005a5a, 0x52005252, 0xf300f3f3,
|
||||
0x61006161, 0xa100a1a1, 0xf700f7f7, 0xb000b0b0,
|
||||
0xd600d6d6, 0x3f003f3f, 0x7c007c7c, 0x6d006d6d,
|
||||
0xed00eded, 0x14001414, 0xe000e0e0, 0xa500a5a5,
|
||||
0x3d003d3d, 0x22002222, 0xb300b3b3, 0xf800f8f8,
|
||||
0x89008989, 0xde00dede, 0x71007171, 0x1a001a1a,
|
||||
0xaf00afaf, 0xba00baba, 0xb500b5b5, 0x81008181
|
||||
};
|
||||
|
||||
static const u32 x1[256] = {
|
||||
0x52520052, 0x09090009, 0x6a6a006a, 0xd5d500d5,
|
||||
0x30300030, 0x36360036, 0xa5a500a5, 0x38380038,
|
||||
0xbfbf00bf, 0x40400040, 0xa3a300a3, 0x9e9e009e,
|
||||
0x81810081, 0xf3f300f3, 0xd7d700d7, 0xfbfb00fb,
|
||||
0x7c7c007c, 0xe3e300e3, 0x39390039, 0x82820082,
|
||||
0x9b9b009b, 0x2f2f002f, 0xffff00ff, 0x87870087,
|
||||
0x34340034, 0x8e8e008e, 0x43430043, 0x44440044,
|
||||
0xc4c400c4, 0xdede00de, 0xe9e900e9, 0xcbcb00cb,
|
||||
0x54540054, 0x7b7b007b, 0x94940094, 0x32320032,
|
||||
0xa6a600a6, 0xc2c200c2, 0x23230023, 0x3d3d003d,
|
||||
0xeeee00ee, 0x4c4c004c, 0x95950095, 0x0b0b000b,
|
||||
0x42420042, 0xfafa00fa, 0xc3c300c3, 0x4e4e004e,
|
||||
0x08080008, 0x2e2e002e, 0xa1a100a1, 0x66660066,
|
||||
0x28280028, 0xd9d900d9, 0x24240024, 0xb2b200b2,
|
||||
0x76760076, 0x5b5b005b, 0xa2a200a2, 0x49490049,
|
||||
0x6d6d006d, 0x8b8b008b, 0xd1d100d1, 0x25250025,
|
||||
0x72720072, 0xf8f800f8, 0xf6f600f6, 0x64640064,
|
||||
0x86860086, 0x68680068, 0x98980098, 0x16160016,
|
||||
0xd4d400d4, 0xa4a400a4, 0x5c5c005c, 0xcccc00cc,
|
||||
0x5d5d005d, 0x65650065, 0xb6b600b6, 0x92920092,
|
||||
0x6c6c006c, 0x70700070, 0x48480048, 0x50500050,
|
||||
0xfdfd00fd, 0xeded00ed, 0xb9b900b9, 0xdada00da,
|
||||
0x5e5e005e, 0x15150015, 0x46460046, 0x57570057,
|
||||
0xa7a700a7, 0x8d8d008d, 0x9d9d009d, 0x84840084,
|
||||
0x90900090, 0xd8d800d8, 0xabab00ab, 0x00000000,
|
||||
0x8c8c008c, 0xbcbc00bc, 0xd3d300d3, 0x0a0a000a,
|
||||
0xf7f700f7, 0xe4e400e4, 0x58580058, 0x05050005,
|
||||
0xb8b800b8, 0xb3b300b3, 0x45450045, 0x06060006,
|
||||
0xd0d000d0, 0x2c2c002c, 0x1e1e001e, 0x8f8f008f,
|
||||
0xcaca00ca, 0x3f3f003f, 0x0f0f000f, 0x02020002,
|
||||
0xc1c100c1, 0xafaf00af, 0xbdbd00bd, 0x03030003,
|
||||
0x01010001, 0x13130013, 0x8a8a008a, 0x6b6b006b,
|
||||
0x3a3a003a, 0x91910091, 0x11110011, 0x41410041,
|
||||
0x4f4f004f, 0x67670067, 0xdcdc00dc, 0xeaea00ea,
|
||||
0x97970097, 0xf2f200f2, 0xcfcf00cf, 0xcece00ce,
|
||||
0xf0f000f0, 0xb4b400b4, 0xe6e600e6, 0x73730073,
|
||||
0x96960096, 0xacac00ac, 0x74740074, 0x22220022,
|
||||
0xe7e700e7, 0xadad00ad, 0x35350035, 0x85850085,
|
||||
0xe2e200e2, 0xf9f900f9, 0x37370037, 0xe8e800e8,
|
||||
0x1c1c001c, 0x75750075, 0xdfdf00df, 0x6e6e006e,
|
||||
0x47470047, 0xf1f100f1, 0x1a1a001a, 0x71710071,
|
||||
0x1d1d001d, 0x29290029, 0xc5c500c5, 0x89890089,
|
||||
0x6f6f006f, 0xb7b700b7, 0x62620062, 0x0e0e000e,
|
||||
0xaaaa00aa, 0x18180018, 0xbebe00be, 0x1b1b001b,
|
||||
0xfcfc00fc, 0x56560056, 0x3e3e003e, 0x4b4b004b,
|
||||
0xc6c600c6, 0xd2d200d2, 0x79790079, 0x20200020,
|
||||
0x9a9a009a, 0xdbdb00db, 0xc0c000c0, 0xfefe00fe,
|
||||
0x78780078, 0xcdcd00cd, 0x5a5a005a, 0xf4f400f4,
|
||||
0x1f1f001f, 0xdddd00dd, 0xa8a800a8, 0x33330033,
|
||||
0x88880088, 0x07070007, 0xc7c700c7, 0x31310031,
|
||||
0xb1b100b1, 0x12120012, 0x10100010, 0x59590059,
|
||||
0x27270027, 0x80800080, 0xecec00ec, 0x5f5f005f,
|
||||
0x60600060, 0x51510051, 0x7f7f007f, 0xa9a900a9,
|
||||
0x19190019, 0xb5b500b5, 0x4a4a004a, 0x0d0d000d,
|
||||
0x2d2d002d, 0xe5e500e5, 0x7a7a007a, 0x9f9f009f,
|
||||
0x93930093, 0xc9c900c9, 0x9c9c009c, 0xefef00ef,
|
||||
0xa0a000a0, 0xe0e000e0, 0x3b3b003b, 0x4d4d004d,
|
||||
0xaeae00ae, 0x2a2a002a, 0xf5f500f5, 0xb0b000b0,
|
||||
0xc8c800c8, 0xebeb00eb, 0xbbbb00bb, 0x3c3c003c,
|
||||
0x83830083, 0x53530053, 0x99990099, 0x61610061,
|
||||
0x17170017, 0x2b2b002b, 0x04040004, 0x7e7e007e,
|
||||
0xbaba00ba, 0x77770077, 0xd6d600d6, 0x26260026,
|
||||
0xe1e100e1, 0x69690069, 0x14140014, 0x63630063,
|
||||
0x55550055, 0x21210021, 0x0c0c000c, 0x7d7d007d
|
||||
};
|
||||
|
||||
static const u32 x2[256] = {
|
||||
0x30303000, 0x68686800, 0x99999900, 0x1b1b1b00,
|
||||
0x87878700, 0xb9b9b900, 0x21212100, 0x78787800,
|
||||
0x50505000, 0x39393900, 0xdbdbdb00, 0xe1e1e100,
|
||||
0x72727200, 0x09090900, 0x62626200, 0x3c3c3c00,
|
||||
0x3e3e3e00, 0x7e7e7e00, 0x5e5e5e00, 0x8e8e8e00,
|
||||
0xf1f1f100, 0xa0a0a000, 0xcccccc00, 0xa3a3a300,
|
||||
0x2a2a2a00, 0x1d1d1d00, 0xfbfbfb00, 0xb6b6b600,
|
||||
0xd6d6d600, 0x20202000, 0xc4c4c400, 0x8d8d8d00,
|
||||
0x81818100, 0x65656500, 0xf5f5f500, 0x89898900,
|
||||
0xcbcbcb00, 0x9d9d9d00, 0x77777700, 0xc6c6c600,
|
||||
0x57575700, 0x43434300, 0x56565600, 0x17171700,
|
||||
0xd4d4d400, 0x40404000, 0x1a1a1a00, 0x4d4d4d00,
|
||||
0xc0c0c000, 0x63636300, 0x6c6c6c00, 0xe3e3e300,
|
||||
0xb7b7b700, 0xc8c8c800, 0x64646400, 0x6a6a6a00,
|
||||
0x53535300, 0xaaaaaa00, 0x38383800, 0x98989800,
|
||||
0x0c0c0c00, 0xf4f4f400, 0x9b9b9b00, 0xededed00,
|
||||
0x7f7f7f00, 0x22222200, 0x76767600, 0xafafaf00,
|
||||
0xdddddd00, 0x3a3a3a00, 0x0b0b0b00, 0x58585800,
|
||||
0x67676700, 0x88888800, 0x06060600, 0xc3c3c300,
|
||||
0x35353500, 0x0d0d0d00, 0x01010100, 0x8b8b8b00,
|
||||
0x8c8c8c00, 0xc2c2c200, 0xe6e6e600, 0x5f5f5f00,
|
||||
0x02020200, 0x24242400, 0x75757500, 0x93939300,
|
||||
0x66666600, 0x1e1e1e00, 0xe5e5e500, 0xe2e2e200,
|
||||
0x54545400, 0xd8d8d800, 0x10101000, 0xcecece00,
|
||||
0x7a7a7a00, 0xe8e8e800, 0x08080800, 0x2c2c2c00,
|
||||
0x12121200, 0x97979700, 0x32323200, 0xababab00,
|
||||
0xb4b4b400, 0x27272700, 0x0a0a0a00, 0x23232300,
|
||||
0xdfdfdf00, 0xefefef00, 0xcacaca00, 0xd9d9d900,
|
||||
0xb8b8b800, 0xfafafa00, 0xdcdcdc00, 0x31313100,
|
||||
0x6b6b6b00, 0xd1d1d100, 0xadadad00, 0x19191900,
|
||||
0x49494900, 0xbdbdbd00, 0x51515100, 0x96969600,
|
||||
0xeeeeee00, 0xe4e4e400, 0xa8a8a800, 0x41414100,
|
||||
0xdadada00, 0xffffff00, 0xcdcdcd00, 0x55555500,
|
||||
0x86868600, 0x36363600, 0xbebebe00, 0x61616100,
|
||||
0x52525200, 0xf8f8f800, 0xbbbbbb00, 0x0e0e0e00,
|
||||
0x82828200, 0x48484800, 0x69696900, 0x9a9a9a00,
|
||||
0xe0e0e000, 0x47474700, 0x9e9e9e00, 0x5c5c5c00,
|
||||
0x04040400, 0x4b4b4b00, 0x34343400, 0x15151500,
|
||||
0x79797900, 0x26262600, 0xa7a7a700, 0xdedede00,
|
||||
0x29292900, 0xaeaeae00, 0x92929200, 0xd7d7d700,
|
||||
0x84848400, 0xe9e9e900, 0xd2d2d200, 0xbababa00,
|
||||
0x5d5d5d00, 0xf3f3f300, 0xc5c5c500, 0xb0b0b000,
|
||||
0xbfbfbf00, 0xa4a4a400, 0x3b3b3b00, 0x71717100,
|
||||
0x44444400, 0x46464600, 0x2b2b2b00, 0xfcfcfc00,
|
||||
0xebebeb00, 0x6f6f6f00, 0xd5d5d500, 0xf6f6f600,
|
||||
0x14141400, 0xfefefe00, 0x7c7c7c00, 0x70707000,
|
||||
0x5a5a5a00, 0x7d7d7d00, 0xfdfdfd00, 0x2f2f2f00,
|
||||
0x18181800, 0x83838300, 0x16161600, 0xa5a5a500,
|
||||
0x91919100, 0x1f1f1f00, 0x05050500, 0x95959500,
|
||||
0x74747400, 0xa9a9a900, 0xc1c1c100, 0x5b5b5b00,
|
||||
0x4a4a4a00, 0x85858500, 0x6d6d6d00, 0x13131300,
|
||||
0x07070700, 0x4f4f4f00, 0x4e4e4e00, 0x45454500,
|
||||
0xb2b2b200, 0x0f0f0f00, 0xc9c9c900, 0x1c1c1c00,
|
||||
0xa6a6a600, 0xbcbcbc00, 0xececec00, 0x73737300,
|
||||
0x90909000, 0x7b7b7b00, 0xcfcfcf00, 0x59595900,
|
||||
0x8f8f8f00, 0xa1a1a100, 0xf9f9f900, 0x2d2d2d00,
|
||||
0xf2f2f200, 0xb1b1b100, 0x00000000, 0x94949400,
|
||||
0x37373700, 0x9f9f9f00, 0xd0d0d000, 0x2e2e2e00,
|
||||
0x9c9c9c00, 0x6e6e6e00, 0x28282800, 0x3f3f3f00,
|
||||
0x80808000, 0xf0f0f000, 0x3d3d3d00, 0xd3d3d300,
|
||||
0x25252500, 0x8a8a8a00, 0xb5b5b500, 0xe7e7e700,
|
||||
0x42424200, 0xb3b3b300, 0xc7c7c700, 0xeaeaea00,
|
||||
0xf7f7f700, 0x4c4c4c00, 0x11111100, 0x33333300,
|
||||
0x03030300, 0xa2a2a200, 0xacacac00, 0x60606000
|
||||
};
|
||||
|
||||
static inline u32 rotl32(u32 v, u32 r)
|
||||
{
|
||||
return ((v << r) | (v >> (32 - r)));
|
||||
}
|
||||
|
||||
static inline u32 rotr32(u32 v, u32 r)
|
||||
{
|
||||
return ((v >> r) | (v << (32 - r)));
|
||||
}
|
||||
|
||||
static inline u32 bswap32(u32 v)
|
||||
{
|
||||
return ((v << 24) ^
|
||||
(v >> 24) ^
|
||||
((v & 0x0000ff00) << 8) ^
|
||||
((v & 0x00ff0000) >> 8));
|
||||
}
|
||||
|
||||
static inline u8 get_u8(u32 x, u32 y)
|
||||
{
|
||||
return (x >> ((3 - y) * 8));
|
||||
}
|
||||
|
||||
static inline u32 make_u32(u8 v0, u8 v1, u8 v2, u8 v3)
|
||||
{
|
||||
return ((u32)v0 << 24) | ((u32)v1 << 16) | ((u32)v2 << 8) | ((u32)v3);
|
||||
}
|
||||
|
||||
static inline u32 aria_m(u32 t0)
|
||||
{
|
||||
return rotr32(t0, 8) ^ rotr32(t0 ^ rotr32(t0, 8), 16);
|
||||
}
|
||||
|
||||
/* S-Box Layer 1 + M */
|
||||
static inline void aria_sbox_layer1_with_pre_diff(u32 *t0, u32 *t1, u32 *t2,
|
||||
u32 *t3)
|
||||
{
|
||||
*t0 = s1[get_u8(*t0, 0)] ^
|
||||
s2[get_u8(*t0, 1)] ^
|
||||
x1[get_u8(*t0, 2)] ^
|
||||
x2[get_u8(*t0, 3)];
|
||||
*t1 = s1[get_u8(*t1, 0)] ^
|
||||
s2[get_u8(*t1, 1)] ^
|
||||
x1[get_u8(*t1, 2)] ^
|
||||
x2[get_u8(*t1, 3)];
|
||||
*t2 = s1[get_u8(*t2, 0)] ^
|
||||
s2[get_u8(*t2, 1)] ^
|
||||
x1[get_u8(*t2, 2)] ^
|
||||
x2[get_u8(*t2, 3)];
|
||||
*t3 = s1[get_u8(*t3, 0)] ^
|
||||
s2[get_u8(*t3, 1)] ^
|
||||
x1[get_u8(*t3, 2)] ^
|
||||
x2[get_u8(*t3, 3)];
|
||||
}
|
||||
|
||||
/* S-Box Layer 2 + M */
|
||||
static inline void aria_sbox_layer2_with_pre_diff(u32 *t0, u32 *t1, u32 *t2,
|
||||
u32 *t3)
|
||||
{
|
||||
*t0 = x1[get_u8(*t0, 0)] ^
|
||||
x2[get_u8(*t0, 1)] ^
|
||||
s1[get_u8(*t0, 2)] ^
|
||||
s2[get_u8(*t0, 3)];
|
||||
*t1 = x1[get_u8(*t1, 0)] ^
|
||||
x2[get_u8(*t1, 1)] ^
|
||||
s1[get_u8(*t1, 2)] ^
|
||||
s2[get_u8(*t1, 3)];
|
||||
*t2 = x1[get_u8(*t2, 0)] ^
|
||||
x2[get_u8(*t2, 1)] ^
|
||||
s1[get_u8(*t2, 2)] ^
|
||||
s2[get_u8(*t2, 3)];
|
||||
*t3 = x1[get_u8(*t3, 0)] ^
|
||||
x2[get_u8(*t3, 1)] ^
|
||||
s1[get_u8(*t3, 2)] ^
|
||||
s2[get_u8(*t3, 3)];
|
||||
}
|
||||
|
||||
/* Word-level diffusion */
|
||||
static inline void aria_diff_word(u32 *t0, u32 *t1, u32 *t2, u32 *t3)
|
||||
{
|
||||
*t1 ^= *t2;
|
||||
*t2 ^= *t3;
|
||||
*t0 ^= *t1;
|
||||
|
||||
*t3 ^= *t1;
|
||||
*t2 ^= *t0;
|
||||
*t1 ^= *t2;
|
||||
}
|
||||
|
||||
/* Byte-level diffusion */
|
||||
static inline void aria_diff_byte(u32 *t1, u32 *t2, u32 *t3)
|
||||
{
|
||||
*t1 = ((*t1 << 8) & 0xff00ff00) ^ ((*t1 >> 8) & 0x00ff00ff);
|
||||
*t2 = rotr32(*t2, 16);
|
||||
*t3 = bswap32(*t3);
|
||||
}
|
||||
|
||||
/* Key XOR Layer */
|
||||
static inline void aria_add_round_key(u32 *rk, u32 *t0, u32 *t1, u32 *t2,
|
||||
u32 *t3)
|
||||
{
|
||||
*t0 ^= rk[0];
|
||||
*t1 ^= rk[1];
|
||||
*t2 ^= rk[2];
|
||||
*t3 ^= rk[3];
|
||||
}
|
||||
/* Odd round Substitution & Diffusion */
|
||||
static inline void aria_subst_diff_odd(u32 *t0, u32 *t1, u32 *t2, u32 *t3)
|
||||
{
|
||||
aria_sbox_layer1_with_pre_diff(t0, t1, t2, t3);
|
||||
aria_diff_word(t0, t1, t2, t3);
|
||||
aria_diff_byte(t1, t2, t3);
|
||||
aria_diff_word(t0, t1, t2, t3);
|
||||
}
|
||||
|
||||
/* Even round Substitution & Diffusion */
|
||||
static inline void aria_subst_diff_even(u32 *t0, u32 *t1, u32 *t2, u32 *t3)
|
||||
{
|
||||
aria_sbox_layer2_with_pre_diff(t0, t1, t2, t3);
|
||||
aria_diff_word(t0, t1, t2, t3);
|
||||
aria_diff_byte(t3, t0, t1);
|
||||
aria_diff_word(t0, t1, t2, t3);
|
||||
}
|
||||
|
||||
/* Q, R Macro expanded ARIA GSRK */
|
||||
static inline void aria_gsrk(u32 *rk, u32 *x, u32 *y, u32 n)
|
||||
{
|
||||
int q = 4 - (n / 32);
|
||||
int r = n % 32;
|
||||
|
||||
rk[0] = (x[0]) ^
|
||||
((y[q % 4]) >> r) ^
|
||||
((y[(q + 3) % 4]) << (32 - r));
|
||||
rk[1] = (x[1]) ^
|
||||
((y[(q + 1) % 4]) >> r) ^
|
||||
((y[q % 4]) << (32 - r));
|
||||
rk[2] = (x[2]) ^
|
||||
((y[(q + 2) % 4]) >> r) ^
|
||||
((y[(q + 1) % 4]) << (32 - r));
|
||||
rk[3] = (x[3]) ^
|
||||
((y[(q + 3) % 4]) >> r) ^
|
||||
((y[(q + 2) % 4]) << (32 - r));
|
||||
}
|
||||
|
||||
#endif
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user