Merge patch series "RISC-V crypto with reworked asm files"

Eric Biggers <ebiggers@kernel.org> says:

This patchset, which applies to v6.8-rc1, adds cryptographic algorithm
implementations accelerated using the RISC-V vector crypto extensions
(https://github.com/riscv/riscv-crypto/releases/download/v1.0.0/riscv-crypto-spec-vector.pdf)
and RISC-V vector extension
(https://github.com/riscv/riscv-v-spec/releases/download/v1.0/riscv-v-spec-1.0.pdf).
The following algorithms are included: AES in ECB, CBC, CTR, and XTS modes;
ChaCha20; GHASH; SHA-2; SM3; and SM4.

In general, the assembly code requires a 64-bit RISC-V CPU with VLEN >= 128,
little endian byte order, and vector unaligned access support.  The ECB, CTR,
XTS, and ChaCha20 code is designed to naturally scale up to larger VLEN values.
Building the assembly code requires tip-of-tree binutils (future 2.42) or
tip-of-tree clang (future 18.x).  All algorithms pass testing in QEMU, using
CONFIG_CRYPTO_MANAGER_EXTRA_TESTS=y.  Much of the assembly code is derived from
OpenSSL code that was added by https://github.com/openssl/openssl/pull/21923.
It's been cleaned up for integration with the kernel, e.g. reducing code
duplication, eliminating use of .inst and perlasm, and fixing a few bugs.

This patchset incorporates the work of multiple people, including Jerry Shih,
Heiko Stuebner, Christoph Müllner, Phoebe Chen, Charalampos Mitrodimas, and
myself.  This patchset went through several versions from Heiko (last version
https://lore.kernel.org/linux-crypto/20230711153743.1970625-1-heiko@sntech.de),
then several versions from Jerry (last version:
https://lore.kernel.org/linux-crypto/20231231152743.6304-1-jerry.shih@sifive.com),
then finally several versions from me.  Thanks to everyone who has contributed
to this patchset or its prerequisites.

* b4-shazam-merge:
  crypto: riscv - add vector crypto accelerated SM4
  crypto: riscv - add vector crypto accelerated SM3
  crypto: riscv - add vector crypto accelerated SHA-{512,384}
  crypto: riscv - add vector crypto accelerated SHA-{256,224}
  crypto: riscv - add vector crypto accelerated GHASH
  crypto: riscv - add vector crypto accelerated ChaCha20
  crypto: riscv - add vector crypto accelerated AES-{ECB,CBC,CTR,XTS}
  RISC-V: hook new crypto subdir into build-system
  RISC-V: add TOOLCHAIN_HAS_VECTOR_CRYPTO
  RISC-V: add helper function to read the vector VLEN

Link: https://lore.kernel.org/r/20240122002024.27477-1-ebiggers@kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
Palmer Dabbelt 2024-01-22 17:55:40 -08:00
commit 67daf84203
No known key found for this signature in database
GPG Key ID: 2E1319F35FBB1889
23 changed files with 3274 additions and 0 deletions

View File

@ -2,6 +2,7 @@
obj-y += kernel/ mm/ net/
obj-$(CONFIG_BUILTIN_DTB) += boot/dts/
obj-$(CONFIG_CRYPTO) += crypto/
obj-y += errata/
obj-$(CONFIG_KVM) += kvm/

View File

@ -581,6 +581,13 @@ config TOOLCHAIN_HAS_ZBB
depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
depends on AS_HAS_OPTION_ARCH
# This symbol indicates that the toolchain supports all v1.0 vector crypto
# extensions, including Zvk*, Zvbb, and Zvbc. LLVM added all of these at once.
# binutils added all except Zvkb, then added Zvkb. So we just check for Zvkb.
config TOOLCHAIN_HAS_VECTOR_CRYPTO
def_bool $(as-instr, .option arch$(comma) +zvkb)
depends on AS_HAS_OPTION_ARCH
config RISCV_ISA_ZBB
bool "Zbb extension support for bit manipulation instructions"
depends on TOOLCHAIN_HAS_ZBB

93
arch/riscv/crypto/Kconfig Normal file
View File

@ -0,0 +1,93 @@
# SPDX-License-Identifier: GPL-2.0
menu "Accelerated Cryptographic Algorithms for CPU (riscv)"
config CRYPTO_AES_RISCV64
tristate "Ciphers: AES, modes: ECB, CBC, CTR, XTS"
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
select CRYPTO_ALGAPI
select CRYPTO_LIB_AES
select CRYPTO_SKCIPHER
help
Block cipher: AES cipher algorithms
Length-preserving ciphers: AES with ECB, CBC, CTR, XTS
Architecture: riscv64 using:
- Zvkned vector crypto extension
- Zvbb vector extension (XTS)
- Zvkb vector crypto extension (CTR)
- Zvkg vector crypto extension (XTS)
config CRYPTO_CHACHA_RISCV64
tristate "Ciphers: ChaCha"
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
select CRYPTO_SKCIPHER
select CRYPTO_LIB_CHACHA_GENERIC
help
Length-preserving ciphers: ChaCha20 stream cipher algorithm
Architecture: riscv64 using:
- Zvkb vector crypto extension
config CRYPTO_GHASH_RISCV64
tristate "Hash functions: GHASH"
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
select CRYPTO_GCM
help
GCM GHASH function (NIST SP 800-38D)
Architecture: riscv64 using:
- Zvkg vector crypto extension
config CRYPTO_SHA256_RISCV64
tristate "Hash functions: SHA-224 and SHA-256"
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
select CRYPTO_SHA256
help
SHA-224 and SHA-256 secure hash algorithm (FIPS 180)
Architecture: riscv64 using:
- Zvknha or Zvknhb vector crypto extensions
- Zvkb vector crypto extension
config CRYPTO_SHA512_RISCV64
tristate "Hash functions: SHA-384 and SHA-512"
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
select CRYPTO_SHA512
help
SHA-384 and SHA-512 secure hash algorithm (FIPS 180)
Architecture: riscv64 using:
- Zvknhb vector crypto extension
- Zvkb vector crypto extension
config CRYPTO_SM3_RISCV64
tristate "Hash functions: SM3 (ShangMi 3)"
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
select CRYPTO_HASH
select CRYPTO_SM3
help
SM3 (ShangMi 3) secure hash function (OSCCA GM/T 0004-2012)
Architecture: riscv64 using:
- Zvksh vector crypto extension
- Zvkb vector crypto extension
config CRYPTO_SM4_RISCV64
tristate "Ciphers: SM4 (ShangMi 4)"
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
select CRYPTO_ALGAPI
select CRYPTO_SM4
help
SM4 block cipher algorithm (OSCCA GB/T 32907-2016,
ISO/IEC 18033-3:2010/Amd 1:2021)
SM4 (GBT.32907-2016) is a cryptographic standard issued by the
Organization of State Commercial Administration of China (OSCCA)
as an authorized cryptographic algorithm for use within China.
Architecture: riscv64 using:
- Zvksed vector crypto extension
- Zvkb vector crypto extension
endmenu

View File

@ -0,0 +1,23 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_CRYPTO_AES_RISCV64) += aes-riscv64.o
aes-riscv64-y := aes-riscv64-glue.o aes-riscv64-zvkned.o \
aes-riscv64-zvkned-zvbb-zvkg.o aes-riscv64-zvkned-zvkb.o
obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o
chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
obj-$(CONFIG_CRYPTO_GHASH_RISCV64) += ghash-riscv64.o
ghash-riscv64-y := ghash-riscv64-glue.o ghash-riscv64-zvkg.o
obj-$(CONFIG_CRYPTO_SHA256_RISCV64) += sha256-riscv64.o
sha256-riscv64-y := sha256-riscv64-glue.o sha256-riscv64-zvknha_or_zvknhb-zvkb.o
obj-$(CONFIG_CRYPTO_SHA512_RISCV64) += sha512-riscv64.o
sha512-riscv64-y := sha512-riscv64-glue.o sha512-riscv64-zvknhb-zvkb.o
obj-$(CONFIG_CRYPTO_SM3_RISCV64) += sm3-riscv64.o
sm3-riscv64-y := sm3-riscv64-glue.o sm3-riscv64-zvksh-zvkb.o
obj-$(CONFIG_CRYPTO_SM4_RISCV64) += sm4-riscv64.o
sm4-riscv64-y := sm4-riscv64-glue.o sm4-riscv64-zvksed-zvkb.o

View File

@ -0,0 +1,156 @@
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
//
// This file is dual-licensed, meaning that you can use it under your
// choice of either of the following two licenses:
//
// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the Apache License 2.0 (the "License"). You can obtain
// a copy in the file LICENSE in the source distribution or at
// https://www.openssl.org/source/license.html
//
// or
//
// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
// Copyright 2024 Google LLC
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// This file contains macros that are shared by the other aes-*.S files. The
// generated code of these macros depends on the following RISC-V extensions:
// - RV64I
// - RISC-V Vector ('V') with VLEN >= 128
// - RISC-V Vector AES block cipher extension ('Zvkned')
// Loads the AES round keys from \keyp into vector registers and jumps to code
// specific to the length of the key. Specifically:
// - If AES-128, loads round keys into v1-v11 and jumps to \label128.
// - If AES-192, loads round keys into v1-v13 and jumps to \label192.
// - If AES-256, loads round keys into v1-v15 and continues onwards.
//
// Also sets vl=4 and vtype=e32,m1,ta,ma. Clobbers t0 and t1.
.macro aes_begin keyp, label128, label192
lwu t0, 480(\keyp) // t0 = key length in bytes
li t1, 24 // t1 = key length for AES-192
vsetivli zero, 4, e32, m1, ta, ma
vle32.v v1, (\keyp)
addi \keyp, \keyp, 16
vle32.v v2, (\keyp)
addi \keyp, \keyp, 16
vle32.v v3, (\keyp)
addi \keyp, \keyp, 16
vle32.v v4, (\keyp)
addi \keyp, \keyp, 16
vle32.v v5, (\keyp)
addi \keyp, \keyp, 16
vle32.v v6, (\keyp)
addi \keyp, \keyp, 16
vle32.v v7, (\keyp)
addi \keyp, \keyp, 16
vle32.v v8, (\keyp)
addi \keyp, \keyp, 16
vle32.v v9, (\keyp)
addi \keyp, \keyp, 16
vle32.v v10, (\keyp)
addi \keyp, \keyp, 16
vle32.v v11, (\keyp)
blt t0, t1, \label128 // If AES-128, goto label128.
addi \keyp, \keyp, 16
vle32.v v12, (\keyp)
addi \keyp, \keyp, 16
vle32.v v13, (\keyp)
beq t0, t1, \label192 // If AES-192, goto label192.
// Else, it's AES-256.
addi \keyp, \keyp, 16
vle32.v v14, (\keyp)
addi \keyp, \keyp, 16
vle32.v v15, (\keyp)
.endm
// Encrypts \data using zvkned instructions, using the round keys loaded into
// v1-v11 (for AES-128), v1-v13 (for AES-192), or v1-v15 (for AES-256). \keylen
// is the AES key length in bits. vl and vtype must already be set
// appropriately. Note that if vl > 4, multiple blocks are encrypted.
.macro aes_encrypt data, keylen
vaesz.vs \data, v1
vaesem.vs \data, v2
vaesem.vs \data, v3
vaesem.vs \data, v4
vaesem.vs \data, v5
vaesem.vs \data, v6
vaesem.vs \data, v7
vaesem.vs \data, v8
vaesem.vs \data, v9
vaesem.vs \data, v10
.if \keylen == 128
vaesef.vs \data, v11
.elseif \keylen == 192
vaesem.vs \data, v11
vaesem.vs \data, v12
vaesef.vs \data, v13
.else
vaesem.vs \data, v11
vaesem.vs \data, v12
vaesem.vs \data, v13
vaesem.vs \data, v14
vaesef.vs \data, v15
.endif
.endm
// Same as aes_encrypt, but decrypts instead of encrypts.
.macro aes_decrypt data, keylen
.if \keylen == 128
vaesz.vs \data, v11
.elseif \keylen == 192
vaesz.vs \data, v13
vaesdm.vs \data, v12
vaesdm.vs \data, v11
.else
vaesz.vs \data, v15
vaesdm.vs \data, v14
vaesdm.vs \data, v13
vaesdm.vs \data, v12
vaesdm.vs \data, v11
.endif
vaesdm.vs \data, v10
vaesdm.vs \data, v9
vaesdm.vs \data, v8
vaesdm.vs \data, v7
vaesdm.vs \data, v6
vaesdm.vs \data, v5
vaesdm.vs \data, v4
vaesdm.vs \data, v3
vaesdm.vs \data, v2
vaesdf.vs \data, v1
.endm
// Expands to aes_encrypt or aes_decrypt according to \enc, which is 1 or 0.
.macro aes_crypt data, enc, keylen
.if \enc
aes_encrypt \data, \keylen
.else
aes_decrypt \data, \keylen
.endif
.endm

View File

@ -0,0 +1,550 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* AES using the RISC-V vector crypto extensions. Includes the bare block
* cipher and the ECB, CBC, CTR, and XTS modes.
*
* Copyright (C) 2023 VRULL GmbH
* Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
*
* Copyright (C) 2023 SiFive, Inc.
* Author: Jerry Shih <jerry.shih@sifive.com>
*/
#include <asm/simd.h>
#include <asm/vector.h>
#include <crypto/aes.h>
#include <crypto/internal/cipher.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <crypto/xts.h>
#include <linux/linkage.h>
#include <linux/module.h>
asmlinkage void aes_encrypt_zvkned(const struct crypto_aes_ctx *key,
const u8 in[AES_BLOCK_SIZE],
u8 out[AES_BLOCK_SIZE]);
asmlinkage void aes_decrypt_zvkned(const struct crypto_aes_ctx *key,
const u8 in[AES_BLOCK_SIZE],
u8 out[AES_BLOCK_SIZE]);
asmlinkage void aes_ecb_encrypt_zvkned(const struct crypto_aes_ctx *key,
const u8 *in, u8 *out, size_t len);
asmlinkage void aes_ecb_decrypt_zvkned(const struct crypto_aes_ctx *key,
const u8 *in, u8 *out, size_t len);
asmlinkage void aes_cbc_encrypt_zvkned(const struct crypto_aes_ctx *key,
const u8 *in, u8 *out, size_t len,
u8 iv[AES_BLOCK_SIZE]);
asmlinkage void aes_cbc_decrypt_zvkned(const struct crypto_aes_ctx *key,
const u8 *in, u8 *out, size_t len,
u8 iv[AES_BLOCK_SIZE]);
asmlinkage void aes_ctr32_crypt_zvkned_zvkb(const struct crypto_aes_ctx *key,
const u8 *in, u8 *out, size_t len,
u8 iv[AES_BLOCK_SIZE]);
asmlinkage void aes_xts_encrypt_zvkned_zvbb_zvkg(
const struct crypto_aes_ctx *key,
const u8 *in, u8 *out, size_t len,
u8 tweak[AES_BLOCK_SIZE]);
asmlinkage void aes_xts_decrypt_zvkned_zvbb_zvkg(
const struct crypto_aes_ctx *key,
const u8 *in, u8 *out, size_t len,
u8 tweak[AES_BLOCK_SIZE]);
static int riscv64_aes_setkey(struct crypto_aes_ctx *ctx,
const u8 *key, unsigned int keylen)
{
/*
* For now we just use the generic key expansion, for these reasons:
*
* - zvkned's key expansion instructions don't support AES-192.
* So, non-zvkned fallback code would be needed anyway.
*
* - Users of AES in Linux usually don't change keys frequently.
* So, key expansion isn't performance-critical.
*
* - For single-block AES exposed as a "cipher" algorithm, it's
* necessary to use struct crypto_aes_ctx and initialize its 'key_dec'
* field with the round keys for the Equivalent Inverse Cipher. This
* is because with "cipher", decryption can be requested from a
* context where the vector unit isn't usable, necessitating a
* fallback to aes_decrypt(). But, zvkned can only generate and use
* the normal round keys. Of course, it's preferable to not have
* special code just for "cipher", as e.g. XTS also uses a
* single-block AES encryption. It's simplest to just use
* struct crypto_aes_ctx and aes_expandkey() everywhere.
*/
return aes_expandkey(ctx, key, keylen);
}
static int riscv64_aes_setkey_cipher(struct crypto_tfm *tfm,
const u8 *key, unsigned int keylen)
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
return riscv64_aes_setkey(ctx, key, keylen);
}
static int riscv64_aes_setkey_skcipher(struct crypto_skcipher *tfm,
const u8 *key, unsigned int keylen)
{
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
return riscv64_aes_setkey(ctx, key, keylen);
}
/* Bare AES, without a mode of operation */
static void riscv64_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
if (crypto_simd_usable()) {
kernel_vector_begin();
aes_encrypt_zvkned(ctx, src, dst);
kernel_vector_end();
} else {
aes_encrypt(ctx, dst, src);
}
}
static void riscv64_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
if (crypto_simd_usable()) {
kernel_vector_begin();
aes_decrypt_zvkned(ctx, src, dst);
kernel_vector_end();
} else {
aes_decrypt(ctx, dst, src);
}
}
/* AES-ECB */
static inline int riscv64_aes_ecb_crypt(struct skcipher_request *req, bool enc)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) != 0) {
kernel_vector_begin();
if (enc)
aes_ecb_encrypt_zvkned(ctx, walk.src.virt.addr,
walk.dst.virt.addr,
nbytes & ~(AES_BLOCK_SIZE - 1));
else
aes_ecb_decrypt_zvkned(ctx, walk.src.virt.addr,
walk.dst.virt.addr,
nbytes & ~(AES_BLOCK_SIZE - 1));
kernel_vector_end();
err = skcipher_walk_done(&walk, nbytes & (AES_BLOCK_SIZE - 1));
}
return err;
}
static int riscv64_aes_ecb_encrypt(struct skcipher_request *req)
{
return riscv64_aes_ecb_crypt(req, true);
}
static int riscv64_aes_ecb_decrypt(struct skcipher_request *req)
{
return riscv64_aes_ecb_crypt(req, false);
}
/* AES-CBC */
static inline int riscv64_aes_cbc_crypt(struct skcipher_request *req, bool enc)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) != 0) {
kernel_vector_begin();
if (enc)
aes_cbc_encrypt_zvkned(ctx, walk.src.virt.addr,
walk.dst.virt.addr,
nbytes & ~(AES_BLOCK_SIZE - 1),
walk.iv);
else
aes_cbc_decrypt_zvkned(ctx, walk.src.virt.addr,
walk.dst.virt.addr,
nbytes & ~(AES_BLOCK_SIZE - 1),
walk.iv);
kernel_vector_end();
err = skcipher_walk_done(&walk, nbytes & (AES_BLOCK_SIZE - 1));
}
return err;
}
static int riscv64_aes_cbc_encrypt(struct skcipher_request *req)
{
return riscv64_aes_cbc_crypt(req, true);
}
static int riscv64_aes_cbc_decrypt(struct skcipher_request *req)
{
return riscv64_aes_cbc_crypt(req, false);
}
/* AES-CTR */
static int riscv64_aes_ctr_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
unsigned int nbytes, p1_nbytes;
struct skcipher_walk walk;
u32 ctr32, nblocks;
int err;
/* Get the low 32-bit word of the 128-bit big endian counter. */
ctr32 = get_unaligned_be32(req->iv + 12);
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) != 0) {
if (nbytes < walk.total) {
/* Not the end yet, so keep the length block-aligned. */
nbytes = round_down(nbytes, AES_BLOCK_SIZE);
nblocks = nbytes / AES_BLOCK_SIZE;
} else {
/* It's the end, so include any final partial block. */
nblocks = DIV_ROUND_UP(nbytes, AES_BLOCK_SIZE);
}
ctr32 += nblocks;
kernel_vector_begin();
if (ctr32 >= nblocks) {
/* The low 32-bit word of the counter won't overflow. */
aes_ctr32_crypt_zvkned_zvkb(ctx, walk.src.virt.addr,
walk.dst.virt.addr, nbytes,
req->iv);
} else {
/*
* The low 32-bit word of the counter will overflow.
* The assembly doesn't handle this case, so split the
* operation into two at the point where the overflow
* will occur. After the first part, add the carry bit.
*/
p1_nbytes = min_t(unsigned int, nbytes,
(nblocks - ctr32) * AES_BLOCK_SIZE);
aes_ctr32_crypt_zvkned_zvkb(ctx, walk.src.virt.addr,
walk.dst.virt.addr,
p1_nbytes, req->iv);
crypto_inc(req->iv, 12);
if (ctr32) {
aes_ctr32_crypt_zvkned_zvkb(
ctx,
walk.src.virt.addr + p1_nbytes,
walk.dst.virt.addr + p1_nbytes,
nbytes - p1_nbytes, req->iv);
}
}
kernel_vector_end();
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
}
/* AES-XTS */
struct riscv64_aes_xts_ctx {
struct crypto_aes_ctx ctx1;
struct crypto_aes_ctx ctx2;
};
static int riscv64_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
struct riscv64_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
return xts_verify_key(tfm, key, keylen) ?:
riscv64_aes_setkey(&ctx->ctx1, key, keylen / 2) ?:
riscv64_aes_setkey(&ctx->ctx2, key + keylen / 2, keylen / 2);
}
static int riscv64_aes_xts_crypt(struct skcipher_request *req, bool enc)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
const struct riscv64_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int tail = req->cryptlen % AES_BLOCK_SIZE;
struct scatterlist sg_src[2], sg_dst[2];
struct skcipher_request subreq;
struct scatterlist *src, *dst;
struct skcipher_walk walk;
int err;
if (req->cryptlen < AES_BLOCK_SIZE)
return -EINVAL;
/* Encrypt the IV with the tweak key to get the first tweak. */
kernel_vector_begin();
aes_encrypt_zvkned(&ctx->ctx2, req->iv, req->iv);
kernel_vector_end();
err = skcipher_walk_virt(&walk, req, false);
/*
* If the message length isn't divisible by the AES block size and the
* full message isn't available in one step of the scatterlist walk,
* then separate off the last full block and the partial block. This
* ensures that they are processed in the same call to the assembly
* function, which is required for ciphertext stealing.
*/
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
skcipher_walk_abort(&walk);
skcipher_request_set_tfm(&subreq, tfm);
skcipher_request_set_callback(&subreq,
skcipher_request_flags(req),
NULL, NULL);
skcipher_request_set_crypt(&subreq, req->src, req->dst,
req->cryptlen - tail - AES_BLOCK_SIZE,
req->iv);
req = &subreq;
err = skcipher_walk_virt(&walk, req, false);
} else {
tail = 0;
}
while (walk.nbytes) {
unsigned int nbytes = walk.nbytes;
if (nbytes < walk.total)
nbytes = round_down(nbytes, AES_BLOCK_SIZE);
kernel_vector_begin();
if (enc)
aes_xts_encrypt_zvkned_zvbb_zvkg(
&ctx->ctx1, walk.src.virt.addr,
walk.dst.virt.addr, nbytes, req->iv);
else
aes_xts_decrypt_zvkned_zvbb_zvkg(
&ctx->ctx1, walk.src.virt.addr,
walk.dst.virt.addr, nbytes, req->iv);
kernel_vector_end();
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
if (err || likely(!tail))
return err;
/* Do ciphertext stealing with the last full block and partial block. */
dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
if (req->dst != req->src)
dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
req->iv);
err = skcipher_walk_virt(&walk, req, false);
if (err)
return err;
kernel_vector_begin();
if (enc)
aes_xts_encrypt_zvkned_zvbb_zvkg(
&ctx->ctx1, walk.src.virt.addr,
walk.dst.virt.addr, walk.nbytes, req->iv);
else
aes_xts_decrypt_zvkned_zvbb_zvkg(
&ctx->ctx1, walk.src.virt.addr,
walk.dst.virt.addr, walk.nbytes, req->iv);
kernel_vector_end();
return skcipher_walk_done(&walk, 0);
}
static int riscv64_aes_xts_encrypt(struct skcipher_request *req)
{
return riscv64_aes_xts_crypt(req, true);
}
static int riscv64_aes_xts_decrypt(struct skcipher_request *req)
{
return riscv64_aes_xts_crypt(req, false);
}
/* Algorithm definitions */
static struct crypto_alg riscv64_zvkned_aes_cipher_alg = {
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_priority = 300,
.cra_name = "aes",
.cra_driver_name = "aes-riscv64-zvkned",
.cra_cipher = {
.cia_min_keysize = AES_MIN_KEY_SIZE,
.cia_max_keysize = AES_MAX_KEY_SIZE,
.cia_setkey = riscv64_aes_setkey_cipher,
.cia_encrypt = riscv64_aes_encrypt,
.cia_decrypt = riscv64_aes_decrypt,
},
.cra_module = THIS_MODULE,
};
static struct skcipher_alg riscv64_zvkned_aes_skcipher_algs[] = {
{
.setkey = riscv64_aes_setkey_skcipher,
.encrypt = riscv64_aes_ecb_encrypt,
.decrypt = riscv64_aes_ecb_decrypt,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.walksize = 8 * AES_BLOCK_SIZE, /* matches LMUL=8 */
.base = {
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_priority = 300,
.cra_name = "ecb(aes)",
.cra_driver_name = "ecb-aes-riscv64-zvkned",
.cra_module = THIS_MODULE,
},
}, {
.setkey = riscv64_aes_setkey_skcipher,
.encrypt = riscv64_aes_cbc_encrypt,
.decrypt = riscv64_aes_cbc_decrypt,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.base = {
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_priority = 300,
.cra_name = "cbc(aes)",
.cra_driver_name = "cbc-aes-riscv64-zvkned",
.cra_module = THIS_MODULE,
},
}
};
static struct skcipher_alg riscv64_zvkned_zvkb_aes_skcipher_alg = {
.setkey = riscv64_aes_setkey_skcipher,
.encrypt = riscv64_aes_ctr_crypt,
.decrypt = riscv64_aes_ctr_crypt,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.chunksize = AES_BLOCK_SIZE,
.walksize = 4 * AES_BLOCK_SIZE, /* matches LMUL=4 */
.base = {
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_priority = 300,
.cra_name = "ctr(aes)",
.cra_driver_name = "ctr-aes-riscv64-zvkned-zvkb",
.cra_module = THIS_MODULE,
},
};
static struct skcipher_alg riscv64_zvkned_zvbb_zvkg_aes_skcipher_alg = {
.setkey = riscv64_aes_xts_setkey,
.encrypt = riscv64_aes_xts_encrypt,
.decrypt = riscv64_aes_xts_decrypt,
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.chunksize = AES_BLOCK_SIZE,
.walksize = 4 * AES_BLOCK_SIZE, /* matches LMUL=4 */
.base = {
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct riscv64_aes_xts_ctx),
.cra_priority = 300,
.cra_name = "xts(aes)",
.cra_driver_name = "xts-aes-riscv64-zvkned-zvbb-zvkg",
.cra_module = THIS_MODULE,
},
};
static inline bool riscv64_aes_xts_supported(void)
{
return riscv_isa_extension_available(NULL, ZVBB) &&
riscv_isa_extension_available(NULL, ZVKG) &&
riscv_vector_vlen() < 2048 /* Implementation limitation */;
}
static int __init riscv64_aes_mod_init(void)
{
int err = -ENODEV;
if (riscv_isa_extension_available(NULL, ZVKNED) &&
riscv_vector_vlen() >= 128) {
err = crypto_register_alg(&riscv64_zvkned_aes_cipher_alg);
if (err)
return err;
err = crypto_register_skciphers(
riscv64_zvkned_aes_skcipher_algs,
ARRAY_SIZE(riscv64_zvkned_aes_skcipher_algs));
if (err)
goto unregister_zvkned_cipher_alg;
if (riscv_isa_extension_available(NULL, ZVKB)) {
err = crypto_register_skcipher(
&riscv64_zvkned_zvkb_aes_skcipher_alg);
if (err)
goto unregister_zvkned_skcipher_algs;
}
if (riscv64_aes_xts_supported()) {
err = crypto_register_skcipher(
&riscv64_zvkned_zvbb_zvkg_aes_skcipher_alg);
if (err)
goto unregister_zvkned_zvkb_skcipher_alg;
}
}
return err;
unregister_zvkned_zvkb_skcipher_alg:
if (riscv_isa_extension_available(NULL, ZVKB))
crypto_unregister_skcipher(&riscv64_zvkned_zvkb_aes_skcipher_alg);
unregister_zvkned_skcipher_algs:
crypto_unregister_skciphers(riscv64_zvkned_aes_skcipher_algs,
ARRAY_SIZE(riscv64_zvkned_aes_skcipher_algs));
unregister_zvkned_cipher_alg:
crypto_unregister_alg(&riscv64_zvkned_aes_cipher_alg);
return err;
}
static void __exit riscv64_aes_mod_exit(void)
{
if (riscv64_aes_xts_supported())
crypto_unregister_skcipher(&riscv64_zvkned_zvbb_zvkg_aes_skcipher_alg);
if (riscv_isa_extension_available(NULL, ZVKB))
crypto_unregister_skcipher(&riscv64_zvkned_zvkb_aes_skcipher_alg);
crypto_unregister_skciphers(riscv64_zvkned_aes_skcipher_algs,
ARRAY_SIZE(riscv64_zvkned_aes_skcipher_algs));
crypto_unregister_alg(&riscv64_zvkned_aes_cipher_alg);
}
module_init(riscv64_aes_mod_init);
module_exit(riscv64_aes_mod_exit);
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS (RISC-V accelerated)");
MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("aes");
MODULE_ALIAS_CRYPTO("ecb(aes)");
MODULE_ALIAS_CRYPTO("cbc(aes)");
MODULE_ALIAS_CRYPTO("ctr(aes)");
MODULE_ALIAS_CRYPTO("xts(aes)");

View File

@ -0,0 +1,312 @@
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
//
// This file is dual-licensed, meaning that you can use it under your
// choice of either of the following two licenses:
//
// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the Apache License 2.0 (the "License"). You can obtain
// a copy in the file LICENSE in the source distribution or at
// https://www.openssl.org/source/license.html
//
// or
//
// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
// Copyright 2024 Google LLC
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// The generated code of this file depends on the following RISC-V extensions:
// - RV64I
// - RISC-V Vector ('V') with VLEN >= 128 && VLEN < 2048
// - RISC-V Vector AES block cipher extension ('Zvkned')
// - RISC-V Vector Bit-manipulation extension ('Zvbb')
// - RISC-V Vector GCM/GMAC extension ('Zvkg')
#include <linux/linkage.h>
.text
.option arch, +zvkned, +zvbb, +zvkg
#include "aes-macros.S"
#define KEYP a0
#define INP a1
#define OUTP a2
#define LEN a3
#define TWEAKP a4
#define LEN32 a5
#define TAIL_LEN a6
#define VL a7
#define VLMAX t4
// v1-v15 contain the AES round keys, but they are used for temporaries before
// the AES round keys have been loaded.
#define TWEAKS v16 // LMUL=4 (most of the time)
#define TWEAKS_BREV v20 // LMUL=4 (most of the time)
#define MULTS_BREV v24 // LMUL=4 (most of the time)
#define TMP0 v28
#define TMP1 v29
#define TMP2 v30
#define TMP3 v31
// xts_init initializes the following values:
//
// TWEAKS: N 128-bit tweaks T*(x^i) for i in 0..(N - 1)
// TWEAKS_BREV: same as TWEAKS, but bit-reversed
// MULTS_BREV: N 128-bit values x^N, bit-reversed. Only if N > 1.
//
// N is the maximum number of blocks that will be processed per loop iteration,
// computed using vsetvli.
//
// The field convention used by XTS is the same as that of GHASH, but with the
// bits reversed within each byte. The zvkg extension provides the vgmul
// instruction which does multiplication in this field. Therefore, for tweak
// computation we use vgmul to do multiplications in parallel, instead of
// serially multiplying by x using shifting+xoring. Note that for this to work,
// the inputs and outputs to vgmul must be bit-reversed (we do it with vbrev8).
.macro xts_init
// Load the first tweak T.
vsetivli zero, 4, e32, m1, ta, ma
vle32.v TWEAKS, (TWEAKP)
// If there's only one block (or no blocks at all), then skip the tweak
// sequence computation because (at most) T itself is needed.
li t0, 16
ble LEN, t0, .Linit_single_block\@
// Save a copy of T bit-reversed in v12.
vbrev8.v v12, TWEAKS
//
// Generate x^i for i in 0..(N - 1), i.e. 128-bit values 1 << i assuming
// that N <= 128. Though, this code actually requires N < 64 (or
// equivalently VLEN < 2048) due to the use of 64-bit intermediate
// values here and in the x^N computation later.
//
vsetvli VL, LEN32, e32, m4, ta, ma
srli t0, VL, 2 // t0 = N (num blocks)
// Generate two sequences, each with N 32-bit values:
// v0=[1, 1, 1, ...] and v1=[0, 1, 2, ...].
vsetvli zero, t0, e32, m1, ta, ma
vmv.v.i v0, 1
vid.v v1
// Use vzext to zero-extend the sequences to 64 bits. Reinterpret them
// as two sequences, each with 2*N 32-bit values:
// v2=[1, 0, 1, 0, 1, 0, ...] and v4=[0, 0, 1, 0, 2, 0, ...].
vsetvli zero, t0, e64, m2, ta, ma
vzext.vf2 v2, v0
vzext.vf2 v4, v1
slli t1, t0, 1 // t1 = 2*N
vsetvli zero, t1, e32, m2, ta, ma
// Use vwsll to compute [1<<0, 0<<0, 1<<1, 0<<0, 1<<2, 0<<0, ...],
// widening to 64 bits per element. When reinterpreted as N 128-bit
// values, this is the needed sequence of 128-bit values 1 << i (x^i).
vwsll.vv v8, v2, v4
// Copy the bit-reversed T to all N elements of TWEAKS_BREV, then
// multiply by x^i. This gives the sequence T*(x^i), bit-reversed.
vsetvli zero, LEN32, e32, m4, ta, ma
vmv.v.i TWEAKS_BREV, 0
vaesz.vs TWEAKS_BREV, v12
vbrev8.v v8, v8
vgmul.vv TWEAKS_BREV, v8
// Save a copy of the sequence T*(x^i) with the bit reversal undone.
vbrev8.v TWEAKS, TWEAKS_BREV
// Generate N copies of x^N, i.e. 128-bit values 1 << N, bit-reversed.
li t1, 1
sll t1, t1, t0 // t1 = 1 << N
vsetivli zero, 2, e64, m1, ta, ma
vmv.v.i v0, 0
vsetivli zero, 1, e64, m1, tu, ma
vmv.v.x v0, t1
vbrev8.v v0, v0
vsetvli zero, LEN32, e32, m4, ta, ma
vmv.v.i MULTS_BREV, 0
vaesz.vs MULTS_BREV, v0
j .Linit_done\@
.Linit_single_block\@:
vbrev8.v TWEAKS_BREV, TWEAKS
.Linit_done\@:
.endm
// Set the first 128 bits of MULTS_BREV to 0x40, i.e. 'x' bit-reversed. This is
// the multiplier required to advance the tweak by one.
.macro load_x
li t0, 0x40
vsetivli zero, 4, e32, m1, ta, ma
vmv.v.i MULTS_BREV, 0
vsetivli zero, 1, e8, m1, tu, ma
vmv.v.x MULTS_BREV, t0
.endm
.macro __aes_xts_crypt enc, keylen
// With 16 < len <= 31, there's no main loop, just ciphertext stealing.
beqz LEN32, .Lcts_without_main_loop\@
vsetvli VLMAX, zero, e32, m4, ta, ma
1:
vsetvli VL, LEN32, e32, m4, ta, ma
2:
// Encrypt or decrypt VL/4 blocks.
vle32.v TMP0, (INP)
vxor.vv TMP0, TMP0, TWEAKS
aes_crypt TMP0, \enc, \keylen
vxor.vv TMP0, TMP0, TWEAKS
vse32.v TMP0, (OUTP)
// Update the pointers and the remaining length.
slli t0, VL, 2
add INP, INP, t0
add OUTP, OUTP, t0
sub LEN32, LEN32, VL
// Check whether more blocks remain.
beqz LEN32, .Lmain_loop_done\@
// Compute the next sequence of tweaks by multiplying the previous
// sequence by x^N. Store the result in both bit-reversed order and
// regular order (i.e. with the bit reversal undone).
vgmul.vv TWEAKS_BREV, MULTS_BREV
vbrev8.v TWEAKS, TWEAKS_BREV
// Since we compute the tweak multipliers x^N in advance, we require
// that each iteration process the same length except possibly the last.
// This conflicts slightly with the behavior allowed by RISC-V Vector
// Extension, where CPUs can select a lower length for both of the last
// two iterations. E.g., vl might take the sequence of values
// [16, 16, 16, 12, 12], whereas we need [16, 16, 16, 16, 8] so that we
// can use x^4 again instead of computing x^3. Therefore, we explicitly
// keep the vl at VLMAX if there is at least VLMAX remaining.
bge LEN32, VLMAX, 2b
j 1b
.Lmain_loop_done\@:
load_x
// Compute the next tweak.
addi t0, VL, -4
vsetivli zero, 4, e32, m4, ta, ma
vslidedown.vx TWEAKS_BREV, TWEAKS_BREV, t0 // Extract last tweak
vsetivli zero, 4, e32, m1, ta, ma
vgmul.vv TWEAKS_BREV, MULTS_BREV // Advance to next tweak
bnez TAIL_LEN, .Lcts\@
// Update *TWEAKP to contain the next tweak.
vbrev8.v TWEAKS, TWEAKS_BREV
vse32.v TWEAKS, (TWEAKP)
ret
.Lcts_without_main_loop\@:
load_x
.Lcts\@:
// TWEAKS_BREV now contains the next tweak. Compute the one after that.
vsetivli zero, 4, e32, m1, ta, ma
vmv.v.v TMP0, TWEAKS_BREV
vgmul.vv TMP0, MULTS_BREV
// Undo the bit reversal of the next two tweaks and store them in TMP1
// and TMP2, such that TMP1 is the first needed and TMP2 the second.
.if \enc
vbrev8.v TMP1, TWEAKS_BREV
vbrev8.v TMP2, TMP0
.else
vbrev8.v TMP1, TMP0
vbrev8.v TMP2, TWEAKS_BREV
.endif
// Encrypt/decrypt the last full block.
vle32.v TMP0, (INP)
vxor.vv TMP0, TMP0, TMP1
aes_crypt TMP0, \enc, \keylen
vxor.vv TMP0, TMP0, TMP1
// Swap the first TAIL_LEN bytes of the above result with the tail.
// Note that to support in-place encryption/decryption, the load from
// the input tail must happen before the store to the output tail.
addi t0, INP, 16
addi t1, OUTP, 16
vmv.v.v TMP3, TMP0
vsetvli zero, TAIL_LEN, e8, m1, tu, ma
vle8.v TMP0, (t0)
vse8.v TMP3, (t1)
// Encrypt/decrypt again and store the last full block.
vsetivli zero, 4, e32, m1, ta, ma
vxor.vv TMP0, TMP0, TMP2
aes_crypt TMP0, \enc, \keylen
vxor.vv TMP0, TMP0, TMP2
vse32.v TMP0, (OUTP)
ret
.endm
.macro aes_xts_crypt enc
// Check whether the length is a multiple of the AES block size.
andi TAIL_LEN, LEN, 15
beqz TAIL_LEN, 1f
// The length isn't a multiple of the AES block size, so ciphertext
// stealing will be required. Ciphertext stealing involves special
// handling of the partial block and the last full block, so subtract
// the length of both from the length to be processed in the main loop.
sub LEN, LEN, TAIL_LEN
addi LEN, LEN, -16
1:
srli LEN32, LEN, 2
// LEN and LEN32 now contain the total length of the blocks that will be
// processed in the main loop, in bytes and 32-bit words respectively.
xts_init
aes_begin KEYP, 128f, 192f
__aes_xts_crypt \enc, 256
128:
__aes_xts_crypt \enc, 128
192:
__aes_xts_crypt \enc, 192
.endm
// void aes_xts_encrypt_zvkned_zvbb_zvkg(const struct crypto_aes_ctx *key,
// const u8 *in, u8 *out, size_t len,
// u8 tweak[16]);
//
// |key| is the data key. |tweak| contains the next tweak; the encryption of
// the original IV with the tweak key was already done. This function supports
// incremental computation, but |len| must always be >= 16 (AES_BLOCK_SIZE), and
// |len| must be a multiple of 16 except on the last call. If |len| is a
// multiple of 16, then this function updates |tweak| to contain the next tweak.
SYM_FUNC_START(aes_xts_encrypt_zvkned_zvbb_zvkg)
aes_xts_crypt 1
SYM_FUNC_END(aes_xts_encrypt_zvkned_zvbb_zvkg)
// Same prototype and calling convention as the encryption function
SYM_FUNC_START(aes_xts_decrypt_zvkned_zvbb_zvkg)
aes_xts_crypt 0
SYM_FUNC_END(aes_xts_decrypt_zvkned_zvbb_zvkg)

View File

@ -0,0 +1,146 @@
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
//
// This file is dual-licensed, meaning that you can use it under your
// choice of either of the following two licenses:
//
// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the Apache License 2.0 (the "License"). You can obtain
// a copy in the file LICENSE in the source distribution or at
// https://www.openssl.org/source/license.html
//
// or
//
// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
// Copyright 2024 Google LLC
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// The generated code of this file depends on the following RISC-V extensions:
// - RV64I
// - RISC-V Vector ('V') with VLEN >= 128
// - RISC-V Vector AES block cipher extension ('Zvkned')
// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
#include <linux/linkage.h>
.text
.option arch, +zvkned, +zvkb
#include "aes-macros.S"
#define KEYP a0
#define INP a1
#define OUTP a2
#define LEN a3
#define IVP a4
#define LEN32 a5
#define VL_E32 a6
#define VL_BLOCKS a7
.macro aes_ctr32_crypt keylen
// LEN32 = number of blocks, rounded up, in 32-bit words.
addi t0, LEN, 15
srli t0, t0, 4
slli LEN32, t0, 2
// Create a mask that selects the last 32-bit word of each 128-bit
// block. This is the word that contains the (big-endian) counter.
li t0, 0x88
vsetvli t1, zero, e8, m1, ta, ma
vmv.v.x v0, t0
// Load the IV into v31. The last 32-bit word contains the counter.
vsetivli zero, 4, e32, m1, ta, ma
vle32.v v31, (IVP)
// Convert the big-endian counter into little-endian.
vsetivli zero, 4, e32, m1, ta, mu
vrev8.v v31, v31, v0.t
// Splat the IV to v16 (with LMUL=4). The number of copies is the
// maximum number of blocks that will be processed per iteration.
vsetvli zero, LEN32, e32, m4, ta, ma
vmv.v.i v16, 0
vaesz.vs v16, v31
// v20 = [x, x, x, 0, x, x, x, 1, ...]
viota.m v20, v0, v0.t
// v16 = [IV0, IV1, IV2, counter+0, IV0, IV1, IV2, counter+1, ...]
vsetvli VL_E32, LEN32, e32, m4, ta, mu
vadd.vv v16, v16, v20, v0.t
j 2f
1:
// Set the number of blocks to process in this iteration. vl=VL_E32 is
// the length in 32-bit words, i.e. 4 times the number of blocks.
vsetvli VL_E32, LEN32, e32, m4, ta, mu
// Increment the counters by the number of blocks processed in the
// previous iteration.
vadd.vx v16, v16, VL_BLOCKS, v0.t
2:
// Prepare the AES inputs into v24.
vmv.v.v v24, v16
vrev8.v v24, v24, v0.t // Convert counters back to big-endian.
// Encrypt the AES inputs to create the next portion of the keystream.
aes_encrypt v24, \keylen
// XOR the data with the keystream.
vsetvli t0, LEN, e8, m4, ta, ma
vle8.v v20, (INP)
vxor.vv v20, v20, v24
vse8.v v20, (OUTP)
// Advance the pointers and update the remaining length.
add INP, INP, t0
add OUTP, OUTP, t0
sub LEN, LEN, t0
sub LEN32, LEN32, VL_E32
srli VL_BLOCKS, VL_E32, 2
// Repeat if more data remains.
bnez LEN, 1b
// Update *IVP to contain the next counter.
vsetivli zero, 4, e32, m1, ta, mu
vadd.vx v16, v16, VL_BLOCKS, v0.t
vrev8.v v16, v16, v0.t // Convert counters back to big-endian.
vse32.v v16, (IVP)
ret
.endm
// void aes_ctr32_crypt_zvkned_zvkb(const struct crypto_aes_ctx *key,
// const u8 *in, u8 *out, size_t len,
// u8 iv[16]);
SYM_FUNC_START(aes_ctr32_crypt_zvkned_zvkb)
aes_begin KEYP, 128f, 192f
aes_ctr32_crypt 256
128:
aes_ctr32_crypt 128
192:
aes_ctr32_crypt 192
SYM_FUNC_END(aes_ctr32_crypt_zvkned_zvkb)

View File

@ -0,0 +1,180 @@
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
//
// This file is dual-licensed, meaning that you can use it under your
// choice of either of the following two licenses:
//
// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the Apache License 2.0 (the "License"). You can obtain
// a copy in the file LICENSE in the source distribution or at
// https://www.openssl.org/source/license.html
//
// or
//
// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
// Copyright 2024 Google LLC
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// The generated code of this file depends on the following RISC-V extensions:
// - RV64I
// - RISC-V Vector ('V') with VLEN >= 128
// - RISC-V Vector AES block cipher extension ('Zvkned')
#include <linux/linkage.h>
.text
.option arch, +zvkned
#include "aes-macros.S"
#define KEYP a0
#define INP a1
#define OUTP a2
#define LEN a3
#define IVP a4
.macro __aes_crypt_zvkned enc, keylen
vle32.v v16, (INP)
aes_crypt v16, \enc, \keylen
vse32.v v16, (OUTP)
ret
.endm
.macro aes_crypt_zvkned enc
aes_begin KEYP, 128f, 192f
__aes_crypt_zvkned \enc, 256
128:
__aes_crypt_zvkned \enc, 128
192:
__aes_crypt_zvkned \enc, 192
.endm
// void aes_encrypt_zvkned(const struct crypto_aes_ctx *key,
// const u8 in[16], u8 out[16]);
SYM_FUNC_START(aes_encrypt_zvkned)
aes_crypt_zvkned 1
SYM_FUNC_END(aes_encrypt_zvkned)
// Same prototype and calling convention as the encryption function
SYM_FUNC_START(aes_decrypt_zvkned)
aes_crypt_zvkned 0
SYM_FUNC_END(aes_decrypt_zvkned)
.macro __aes_ecb_crypt enc, keylen
srli t0, LEN, 2
// t0 is the remaining length in 32-bit words. It's a multiple of 4.
1:
vsetvli t1, t0, e32, m8, ta, ma
sub t0, t0, t1 // Subtract number of words processed
slli t1, t1, 2 // Words to bytes
vle32.v v16, (INP)
aes_crypt v16, \enc, \keylen
vse32.v v16, (OUTP)
add INP, INP, t1
add OUTP, OUTP, t1
bnez t0, 1b
ret
.endm
.macro aes_ecb_crypt enc
aes_begin KEYP, 128f, 192f
__aes_ecb_crypt \enc, 256
128:
__aes_ecb_crypt \enc, 128
192:
__aes_ecb_crypt \enc, 192
.endm
// void aes_ecb_encrypt_zvkned(const struct crypto_aes_ctx *key,
// const u8 *in, u8 *out, size_t len);
//
// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE).
SYM_FUNC_START(aes_ecb_encrypt_zvkned)
aes_ecb_crypt 1
SYM_FUNC_END(aes_ecb_encrypt_zvkned)
// Same prototype and calling convention as the encryption function
SYM_FUNC_START(aes_ecb_decrypt_zvkned)
aes_ecb_crypt 0
SYM_FUNC_END(aes_ecb_decrypt_zvkned)
.macro aes_cbc_encrypt keylen
vle32.v v16, (IVP) // Load IV
1:
vle32.v v17, (INP) // Load plaintext block
vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block
aes_encrypt v16, \keylen // Encrypt
vse32.v v16, (OUTP) // Store ciphertext block
addi INP, INP, 16
addi OUTP, OUTP, 16
addi LEN, LEN, -16
bnez LEN, 1b
vse32.v v16, (IVP) // Store next IV
ret
.endm
.macro aes_cbc_decrypt keylen
vle32.v v16, (IVP) // Load IV
1:
vle32.v v17, (INP) // Load ciphertext block
vmv.v.v v18, v17 // Save ciphertext block
aes_decrypt v17, \keylen // Decrypt
vxor.vv v17, v17, v16 // XOR with IV or prev ciphertext block
vse32.v v17, (OUTP) // Store plaintext block
vmv.v.v v16, v18 // Next "IV" is prev ciphertext block
addi INP, INP, 16
addi OUTP, OUTP, 16
addi LEN, LEN, -16
bnez LEN, 1b
vse32.v v16, (IVP) // Store next IV
ret
.endm
// void aes_cbc_encrypt_zvkned(const struct crypto_aes_ctx *key,
// const u8 *in, u8 *out, size_t len, u8 iv[16]);
//
// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE).
SYM_FUNC_START(aes_cbc_encrypt_zvkned)
aes_begin KEYP, 128f, 192f
aes_cbc_encrypt 256
128:
aes_cbc_encrypt 128
192:
aes_cbc_encrypt 192
SYM_FUNC_END(aes_cbc_encrypt_zvkned)
// Same prototype and calling convention as the encryption function
SYM_FUNC_START(aes_cbc_decrypt_zvkned)
aes_begin KEYP, 128f, 192f
aes_cbc_decrypt 256
128:
aes_cbc_decrypt 128
192:
aes_cbc_decrypt 192
SYM_FUNC_END(aes_cbc_decrypt_zvkned)

View File

@ -0,0 +1,101 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* ChaCha20 using the RISC-V vector crypto extensions
*
* Copyright (C) 2023 SiFive, Inc.
* Author: Jerry Shih <jerry.shih@sifive.com>
*/
#include <asm/simd.h>
#include <asm/vector.h>
#include <crypto/internal/chacha.h>
#include <crypto/internal/skcipher.h>
#include <linux/linkage.h>
#include <linux/module.h>
asmlinkage void chacha20_zvkb(const u32 key[8], const u8 *in, u8 *out,
size_t len, const u32 iv[4]);
static int riscv64_chacha20_crypt(struct skcipher_request *req)
{
u32 iv[CHACHA_IV_SIZE / sizeof(u32)];
u8 block_buffer[CHACHA_BLOCK_SIZE];
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
const struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
unsigned int tail_bytes;
int err;
iv[0] = get_unaligned_le32(req->iv);
iv[1] = get_unaligned_le32(req->iv + 4);
iv[2] = get_unaligned_le32(req->iv + 8);
iv[3] = get_unaligned_le32(req->iv + 12);
err = skcipher_walk_virt(&walk, req, false);
while (walk.nbytes) {
nbytes = walk.nbytes & ~(CHACHA_BLOCK_SIZE - 1);
tail_bytes = walk.nbytes & (CHACHA_BLOCK_SIZE - 1);
kernel_vector_begin();
if (nbytes) {
chacha20_zvkb(ctx->key, walk.src.virt.addr,
walk.dst.virt.addr, nbytes, iv);
iv[0] += nbytes / CHACHA_BLOCK_SIZE;
}
if (walk.nbytes == walk.total && tail_bytes > 0) {
memcpy(block_buffer, walk.src.virt.addr + nbytes,
tail_bytes);
chacha20_zvkb(ctx->key, block_buffer, block_buffer,
CHACHA_BLOCK_SIZE, iv);
memcpy(walk.dst.virt.addr + nbytes, block_buffer,
tail_bytes);
tail_bytes = 0;
}
kernel_vector_end();
err = skcipher_walk_done(&walk, tail_bytes);
}
return err;
}
static struct skcipher_alg riscv64_chacha_alg = {
.setkey = chacha20_setkey,
.encrypt = riscv64_chacha20_crypt,
.decrypt = riscv64_chacha20_crypt,
.min_keysize = CHACHA_KEY_SIZE,
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = CHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.walksize = 4 * CHACHA_BLOCK_SIZE,
.base = {
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct chacha_ctx),
.cra_priority = 300,
.cra_name = "chacha20",
.cra_driver_name = "chacha20-riscv64-zvkb",
.cra_module = THIS_MODULE,
},
};
static int __init riscv64_chacha_mod_init(void)
{
if (riscv_isa_extension_available(NULL, ZVKB) &&
riscv_vector_vlen() >= 128)
return crypto_register_skcipher(&riscv64_chacha_alg);
return -ENODEV;
}
static void __exit riscv64_chacha_mod_exit(void)
{
crypto_unregister_skcipher(&riscv64_chacha_alg);
}
module_init(riscv64_chacha_mod_init);
module_exit(riscv64_chacha_mod_exit);
MODULE_DESCRIPTION("ChaCha20 (RISC-V accelerated)");
MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("chacha20");

View File

@ -0,0 +1,294 @@
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
//
// This file is dual-licensed, meaning that you can use it under your
// choice of either of the following two licenses:
//
// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the Apache License 2.0 (the "License"). You can obtain
// a copy in the file LICENSE in the source distribution or at
// https://www.openssl.org/source/license.html
//
// or
//
// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
// Copyright 2024 Google LLC
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// The generated code of this file depends on the following RISC-V extensions:
// - RV64I
// - RISC-V Vector ('V') with VLEN >= 128
// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
#include <linux/linkage.h>
.text
.option arch, +zvkb
#define KEYP a0
#define INP a1
#define OUTP a2
#define LEN a3
#define IVP a4
#define CONSTS0 a5
#define CONSTS1 a6
#define CONSTS2 a7
#define CONSTS3 t0
#define TMP t1
#define VL t2
#define STRIDE t3
#define NROUNDS t4
#define KEY0 s0
#define KEY1 s1
#define KEY2 s2
#define KEY3 s3
#define KEY4 s4
#define KEY5 s5
#define KEY6 s6
#define KEY7 s7
#define COUNTER s8
#define NONCE0 s9
#define NONCE1 s10
#define NONCE2 s11
.macro chacha_round a0, b0, c0, d0, a1, b1, c1, d1, \
a2, b2, c2, d2, a3, b3, c3, d3
// a += b; d ^= a; d = rol(d, 16);
vadd.vv \a0, \a0, \b0
vadd.vv \a1, \a1, \b1
vadd.vv \a2, \a2, \b2
vadd.vv \a3, \a3, \b3
vxor.vv \d0, \d0, \a0
vxor.vv \d1, \d1, \a1
vxor.vv \d2, \d2, \a2
vxor.vv \d3, \d3, \a3
vror.vi \d0, \d0, 32 - 16
vror.vi \d1, \d1, 32 - 16
vror.vi \d2, \d2, 32 - 16
vror.vi \d3, \d3, 32 - 16
// c += d; b ^= c; b = rol(b, 12);
vadd.vv \c0, \c0, \d0
vadd.vv \c1, \c1, \d1
vadd.vv \c2, \c2, \d2
vadd.vv \c3, \c3, \d3
vxor.vv \b0, \b0, \c0
vxor.vv \b1, \b1, \c1
vxor.vv \b2, \b2, \c2
vxor.vv \b3, \b3, \c3
vror.vi \b0, \b0, 32 - 12
vror.vi \b1, \b1, 32 - 12
vror.vi \b2, \b2, 32 - 12
vror.vi \b3, \b3, 32 - 12
// a += b; d ^= a; d = rol(d, 8);
vadd.vv \a0, \a0, \b0
vadd.vv \a1, \a1, \b1
vadd.vv \a2, \a2, \b2
vadd.vv \a3, \a3, \b3
vxor.vv \d0, \d0, \a0
vxor.vv \d1, \d1, \a1
vxor.vv \d2, \d2, \a2
vxor.vv \d3, \d3, \a3
vror.vi \d0, \d0, 32 - 8
vror.vi \d1, \d1, 32 - 8
vror.vi \d2, \d2, 32 - 8
vror.vi \d3, \d3, 32 - 8
// c += d; b ^= c; b = rol(b, 7);
vadd.vv \c0, \c0, \d0
vadd.vv \c1, \c1, \d1
vadd.vv \c2, \c2, \d2
vadd.vv \c3, \c3, \d3
vxor.vv \b0, \b0, \c0
vxor.vv \b1, \b1, \c1
vxor.vv \b2, \b2, \c2
vxor.vv \b3, \b3, \c3
vror.vi \b0, \b0, 32 - 7
vror.vi \b1, \b1, 32 - 7
vror.vi \b2, \b2, 32 - 7
vror.vi \b3, \b3, 32 - 7
.endm
// void chacha20_zvkb(const u32 key[8], const u8 *in, u8 *out, size_t len,
// const u32 iv[4]);
//
// |len| must be nonzero and a multiple of 64 (CHACHA_BLOCK_SIZE).
// The counter is treated as 32-bit, following the RFC7539 convention.
SYM_FUNC_START(chacha20_zvkb)
srli LEN, LEN, 6 // Bytes to blocks
addi sp, sp, -96
sd s0, 0(sp)
sd s1, 8(sp)
sd s2, 16(sp)
sd s3, 24(sp)
sd s4, 32(sp)
sd s5, 40(sp)
sd s6, 48(sp)
sd s7, 56(sp)
sd s8, 64(sp)
sd s9, 72(sp)
sd s10, 80(sp)
sd s11, 88(sp)
li STRIDE, 64
// Set up the initial state matrix in scalar registers.
li CONSTS0, 0x61707865 // "expa" little endian
li CONSTS1, 0x3320646e // "nd 3" little endian
li CONSTS2, 0x79622d32 // "2-by" little endian
li CONSTS3, 0x6b206574 // "te k" little endian
lw KEY0, 0(KEYP)
lw KEY1, 4(KEYP)
lw KEY2, 8(KEYP)
lw KEY3, 12(KEYP)
lw KEY4, 16(KEYP)
lw KEY5, 20(KEYP)
lw KEY6, 24(KEYP)
lw KEY7, 28(KEYP)
lw COUNTER, 0(IVP)
lw NONCE0, 4(IVP)
lw NONCE1, 8(IVP)
lw NONCE2, 12(IVP)
.Lblock_loop:
// Set vl to the number of blocks to process in this iteration.
vsetvli VL, LEN, e32, m1, ta, ma
// Set up the initial state matrix for the next VL blocks in v0-v15.
// v{i} holds the i'th 32-bit word of the state matrix for all blocks.
// Note that only the counter word, at index 12, differs across blocks.
vmv.v.x v0, CONSTS0
vmv.v.x v1, CONSTS1
vmv.v.x v2, CONSTS2
vmv.v.x v3, CONSTS3
vmv.v.x v4, KEY0
vmv.v.x v5, KEY1
vmv.v.x v6, KEY2
vmv.v.x v7, KEY3
vmv.v.x v8, KEY4
vmv.v.x v9, KEY5
vmv.v.x v10, KEY6
vmv.v.x v11, KEY7
vid.v v12
vadd.vx v12, v12, COUNTER
vmv.v.x v13, NONCE0
vmv.v.x v14, NONCE1
vmv.v.x v15, NONCE2
// Load the first half of the input data for each block into v16-v23.
// v{16+i} holds the i'th 32-bit word for all blocks.
vlsseg8e32.v v16, (INP), STRIDE
li NROUNDS, 20
.Lnext_doubleround:
addi NROUNDS, NROUNDS, -2
// column round
chacha_round v0, v4, v8, v12, v1, v5, v9, v13, \
v2, v6, v10, v14, v3, v7, v11, v15
// diagonal round
chacha_round v0, v5, v10, v15, v1, v6, v11, v12, \
v2, v7, v8, v13, v3, v4, v9, v14
bnez NROUNDS, .Lnext_doubleround
// Load the second half of the input data for each block into v24-v31.
// v{24+i} holds the {8+i}'th 32-bit word for all blocks.
addi TMP, INP, 32
vlsseg8e32.v v24, (TMP), STRIDE
// Finalize the first half of the keystream for each block.
vadd.vx v0, v0, CONSTS0
vadd.vx v1, v1, CONSTS1
vadd.vx v2, v2, CONSTS2
vadd.vx v3, v3, CONSTS3
vadd.vx v4, v4, KEY0
vadd.vx v5, v5, KEY1
vadd.vx v6, v6, KEY2
vadd.vx v7, v7, KEY3
// Encrypt/decrypt the first half of the data for each block.
vxor.vv v16, v16, v0
vxor.vv v17, v17, v1
vxor.vv v18, v18, v2
vxor.vv v19, v19, v3
vxor.vv v20, v20, v4
vxor.vv v21, v21, v5
vxor.vv v22, v22, v6
vxor.vv v23, v23, v7
// Store the first half of the output data for each block.
vssseg8e32.v v16, (OUTP), STRIDE
// Finalize the second half of the keystream for each block.
vadd.vx v8, v8, KEY4
vadd.vx v9, v9, KEY5
vadd.vx v10, v10, KEY6
vadd.vx v11, v11, KEY7
vid.v v0
vadd.vx v12, v12, COUNTER
vadd.vx v13, v13, NONCE0
vadd.vx v14, v14, NONCE1
vadd.vx v15, v15, NONCE2
vadd.vv v12, v12, v0
// Encrypt/decrypt the second half of the data for each block.
vxor.vv v24, v24, v8
vxor.vv v25, v25, v9
vxor.vv v26, v26, v10
vxor.vv v27, v27, v11
vxor.vv v29, v29, v13
vxor.vv v28, v28, v12
vxor.vv v30, v30, v14
vxor.vv v31, v31, v15
// Store the second half of the output data for each block.
addi TMP, OUTP, 32
vssseg8e32.v v24, (TMP), STRIDE
// Update the counter, the remaining number of blocks, and the input and
// output pointers according to the number of blocks processed (VL).
add COUNTER, COUNTER, VL
sub LEN, LEN, VL
slli TMP, VL, 6
add OUTP, OUTP, TMP
add INP, INP, TMP
bnez LEN, .Lblock_loop
ld s0, 0(sp)
ld s1, 8(sp)
ld s2, 16(sp)
ld s3, 24(sp)
ld s4, 32(sp)
ld s5, 40(sp)
ld s6, 48(sp)
ld s7, 56(sp)
ld s8, 64(sp)
ld s9, 72(sp)
ld s10, 80(sp)
ld s11, 88(sp)
addi sp, sp, 96
ret
SYM_FUNC_END(chacha20_zvkb)

View File

@ -0,0 +1,168 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* GHASH using the RISC-V vector crypto extensions
*
* Copyright (C) 2023 VRULL GmbH
* Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
*
* Copyright (C) 2023 SiFive, Inc.
* Author: Jerry Shih <jerry.shih@sifive.com>
*/
#include <asm/simd.h>
#include <asm/vector.h>
#include <crypto/ghash.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <linux/linkage.h>
#include <linux/module.h>
asmlinkage void ghash_zvkg(be128 *accumulator, const be128 *key, const u8 *data,
size_t len);
struct riscv64_ghash_tfm_ctx {
be128 key;
};
struct riscv64_ghash_desc_ctx {
be128 accumulator;
u8 buffer[GHASH_BLOCK_SIZE];
u32 bytes;
};
static int riscv64_ghash_setkey(struct crypto_shash *tfm, const u8 *key,
unsigned int keylen)
{
struct riscv64_ghash_tfm_ctx *tctx = crypto_shash_ctx(tfm);
if (keylen != GHASH_BLOCK_SIZE)
return -EINVAL;
memcpy(&tctx->key, key, GHASH_BLOCK_SIZE);
return 0;
}
static int riscv64_ghash_init(struct shash_desc *desc)
{
struct riscv64_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
*dctx = (struct riscv64_ghash_desc_ctx){};
return 0;
}
static inline void
riscv64_ghash_blocks(const struct riscv64_ghash_tfm_ctx *tctx,
struct riscv64_ghash_desc_ctx *dctx,
const u8 *src, size_t srclen)
{
/* The srclen is nonzero and a multiple of 16. */
if (crypto_simd_usable()) {
kernel_vector_begin();
ghash_zvkg(&dctx->accumulator, &tctx->key, src, srclen);
kernel_vector_end();
} else {
do {
crypto_xor((u8 *)&dctx->accumulator, src,
GHASH_BLOCK_SIZE);
gf128mul_lle(&dctx->accumulator, &tctx->key);
src += GHASH_BLOCK_SIZE;
srclen -= GHASH_BLOCK_SIZE;
} while (srclen);
}
}
static int riscv64_ghash_update(struct shash_desc *desc, const u8 *src,
unsigned int srclen)
{
const struct riscv64_ghash_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct riscv64_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
unsigned int len;
if (dctx->bytes) {
if (dctx->bytes + srclen < GHASH_BLOCK_SIZE) {
memcpy(dctx->buffer + dctx->bytes, src, srclen);
dctx->bytes += srclen;
return 0;
}
memcpy(dctx->buffer + dctx->bytes, src,
GHASH_BLOCK_SIZE - dctx->bytes);
riscv64_ghash_blocks(tctx, dctx, dctx->buffer,
GHASH_BLOCK_SIZE);
src += GHASH_BLOCK_SIZE - dctx->bytes;
srclen -= GHASH_BLOCK_SIZE - dctx->bytes;
dctx->bytes = 0;
}
len = round_down(srclen, GHASH_BLOCK_SIZE);
if (len) {
riscv64_ghash_blocks(tctx, dctx, src, len);
src += len;
srclen -= len;
}
if (srclen) {
memcpy(dctx->buffer, src, srclen);
dctx->bytes = srclen;
}
return 0;
}
static int riscv64_ghash_final(struct shash_desc *desc, u8 *out)
{
const struct riscv64_ghash_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct riscv64_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
int i;
if (dctx->bytes) {
for (i = dctx->bytes; i < GHASH_BLOCK_SIZE; i++)
dctx->buffer[i] = 0;
riscv64_ghash_blocks(tctx, dctx, dctx->buffer,
GHASH_BLOCK_SIZE);
}
memcpy(out, &dctx->accumulator, GHASH_DIGEST_SIZE);
return 0;
}
static struct shash_alg riscv64_ghash_alg = {
.init = riscv64_ghash_init,
.update = riscv64_ghash_update,
.final = riscv64_ghash_final,
.setkey = riscv64_ghash_setkey,
.descsize = sizeof(struct riscv64_ghash_desc_ctx),
.digestsize = GHASH_DIGEST_SIZE,
.base = {
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct riscv64_ghash_tfm_ctx),
.cra_priority = 300,
.cra_name = "ghash",
.cra_driver_name = "ghash-riscv64-zvkg",
.cra_module = THIS_MODULE,
},
};
static int __init riscv64_ghash_mod_init(void)
{
if (riscv_isa_extension_available(NULL, ZVKG) &&
riscv_vector_vlen() >= 128)
return crypto_register_shash(&riscv64_ghash_alg);
return -ENODEV;
}
static void __exit riscv64_ghash_mod_exit(void)
{
crypto_unregister_shash(&riscv64_ghash_alg);
}
module_init(riscv64_ghash_mod_init);
module_exit(riscv64_ghash_mod_exit);
MODULE_DESCRIPTION("GHASH (RISC-V accelerated)");
MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("ghash");

View File

@ -0,0 +1,72 @@
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
//
// This file is dual-licensed, meaning that you can use it under your
// choice of either of the following two licenses:
//
// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the Apache License 2.0 (the "License"). You can obtain
// a copy in the file LICENSE in the source distribution or at
// https://www.openssl.org/source/license.html
//
// or
//
// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
// Copyright 2024 Google LLC
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// The generated code of this file depends on the following RISC-V extensions:
// - RV64I
// - RISC-V Vector ('V') with VLEN >= 128
// - RISC-V Vector GCM/GMAC extension ('Zvkg')
#include <linux/linkage.h>
.text
.option arch, +zvkg
#define ACCUMULATOR a0
#define KEY a1
#define DATA a2
#define LEN a3
// void ghash_zvkg(be128 *accumulator, const be128 *key, const u8 *data,
// size_t len);
//
// |len| must be nonzero and a multiple of 16 (GHASH_BLOCK_SIZE).
SYM_FUNC_START(ghash_zvkg)
vsetivli zero, 4, e32, m1, ta, ma
vle32.v v1, (ACCUMULATOR)
vle32.v v2, (KEY)
.Lnext_block:
vle32.v v3, (DATA)
vghsh.vv v1, v2, v3
addi DATA, DATA, 16
addi LEN, LEN, -16
bnez LEN, .Lnext_block
vse32.v v1, (ACCUMULATOR)
ret
SYM_FUNC_END(ghash_zvkg)

View File

@ -0,0 +1,137 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* SHA-256 and SHA-224 using the RISC-V vector crypto extensions
*
* Copyright (C) 2022 VRULL GmbH
* Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
*
* Copyright (C) 2023 SiFive, Inc.
* Author: Jerry Shih <jerry.shih@sifive.com>
*/
#include <asm/simd.h>
#include <asm/vector.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/sha256_base.h>
#include <linux/linkage.h>
#include <linux/module.h>
/*
* Note: the asm function only uses the 'state' field of struct sha256_state.
* It is assumed to be the first field.
*/
asmlinkage void sha256_transform_zvknha_or_zvknhb_zvkb(
struct sha256_state *state, const u8 *data, int num_blocks);
static int riscv64_sha256_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
/*
* Ensure struct sha256_state begins directly with the SHA-256
* 256-bit internal state, as this is what the asm function expects.
*/
BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
if (crypto_simd_usable()) {
kernel_vector_begin();
sha256_base_do_update(desc, data, len,
sha256_transform_zvknha_or_zvknhb_zvkb);
kernel_vector_end();
} else {
crypto_sha256_update(desc, data, len);
}
return 0;
}
static int riscv64_sha256_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
if (crypto_simd_usable()) {
kernel_vector_begin();
if (len)
sha256_base_do_update(
desc, data, len,
sha256_transform_zvknha_or_zvknhb_zvkb);
sha256_base_do_finalize(
desc, sha256_transform_zvknha_or_zvknhb_zvkb);
kernel_vector_end();
return sha256_base_finish(desc, out);
}
return crypto_sha256_finup(desc, data, len, out);
}
static int riscv64_sha256_final(struct shash_desc *desc, u8 *out)
{
return riscv64_sha256_finup(desc, NULL, 0, out);
}
static int riscv64_sha256_digest(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return sha256_base_init(desc) ?:
riscv64_sha256_finup(desc, data, len, out);
}
static struct shash_alg riscv64_sha256_algs[] = {
{
.init = sha256_base_init,
.update = riscv64_sha256_update,
.final = riscv64_sha256_final,
.finup = riscv64_sha256_finup,
.digest = riscv64_sha256_digest,
.descsize = sizeof(struct sha256_state),
.digestsize = SHA256_DIGEST_SIZE,
.base = {
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_priority = 300,
.cra_name = "sha256",
.cra_driver_name = "sha256-riscv64-zvknha_or_zvknhb-zvkb",
.cra_module = THIS_MODULE,
},
}, {
.init = sha224_base_init,
.update = riscv64_sha256_update,
.final = riscv64_sha256_final,
.finup = riscv64_sha256_finup,
.descsize = sizeof(struct sha256_state),
.digestsize = SHA224_DIGEST_SIZE,
.base = {
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_priority = 300,
.cra_name = "sha224",
.cra_driver_name = "sha224-riscv64-zvknha_or_zvknhb-zvkb",
.cra_module = THIS_MODULE,
},
},
};
static int __init riscv64_sha256_mod_init(void)
{
/* Both zvknha and zvknhb provide the SHA-256 instructions. */
if ((riscv_isa_extension_available(NULL, ZVKNHA) ||
riscv_isa_extension_available(NULL, ZVKNHB)) &&
riscv_isa_extension_available(NULL, ZVKB) &&
riscv_vector_vlen() >= 128)
return crypto_register_shashes(riscv64_sha256_algs,
ARRAY_SIZE(riscv64_sha256_algs));
return -ENODEV;
}
static void __exit riscv64_sha256_mod_exit(void)
{
crypto_unregister_shashes(riscv64_sha256_algs,
ARRAY_SIZE(riscv64_sha256_algs));
}
module_init(riscv64_sha256_mod_init);
module_exit(riscv64_sha256_mod_exit);
MODULE_DESCRIPTION("SHA-256 (RISC-V accelerated)");
MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("sha256");
MODULE_ALIAS_CRYPTO("sha224");

View File

@ -0,0 +1,225 @@
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
//
// This file is dual-licensed, meaning that you can use it under your
// choice of either of the following two licenses:
//
// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the Apache License 2.0 (the "License"). You can obtain
// a copy in the file LICENSE in the source distribution or at
// https://www.openssl.org/source/license.html
//
// or
//
// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
// Copyright 2024 Google LLC
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// The generated code of this file depends on the following RISC-V extensions:
// - RV64I
// - RISC-V Vector ('V') with VLEN >= 128
// - RISC-V Vector SHA-2 Secure Hash extension ('Zvknha' or 'Zvknhb')
// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
#include <linux/cfi_types.h>
.text
.option arch, +zvknha, +zvkb
#define STATEP a0
#define DATA a1
#define NUM_BLOCKS a2
#define STATEP_C a3
#define MASK v0
#define INDICES v1
#define W0 v2
#define W1 v3
#define W2 v4
#define W3 v5
#define VTMP v6
#define FEBA v7
#define HGDC v8
#define K0 v10
#define K1 v11
#define K2 v12
#define K3 v13
#define K4 v14
#define K5 v15
#define K6 v16
#define K7 v17
#define K8 v18
#define K9 v19
#define K10 v20
#define K11 v21
#define K12 v22
#define K13 v23
#define K14 v24
#define K15 v25
#define PREV_FEBA v26
#define PREV_HGDC v27
// Do 4 rounds of SHA-256. w0 contains the current 4 message schedule words.
//
// If not all the message schedule words have been computed yet, then this also
// computes 4 more message schedule words. w1-w3 contain the next 3 groups of 4
// message schedule words; this macro computes the group after w3 and writes it
// to w0. This means that the next (w0, w1, w2, w3) is the current (w1, w2, w3,
// w0), so the caller must cycle through the registers accordingly.
.macro sha256_4rounds last, k, w0, w1, w2, w3
vadd.vv VTMP, \k, \w0
vsha2cl.vv HGDC, FEBA, VTMP
vsha2ch.vv FEBA, HGDC, VTMP
.if !\last
vmerge.vvm VTMP, \w2, \w1, MASK
vsha2ms.vv \w0, VTMP, \w3
.endif
.endm
.macro sha256_16rounds last, k0, k1, k2, k3
sha256_4rounds \last, \k0, W0, W1, W2, W3
sha256_4rounds \last, \k1, W1, W2, W3, W0
sha256_4rounds \last, \k2, W2, W3, W0, W1
sha256_4rounds \last, \k3, W3, W0, W1, W2
.endm
// void sha256_transform_zvknha_or_zvknhb_zvkb(u32 state[8], const u8 *data,
// int num_blocks);
SYM_TYPED_FUNC_START(sha256_transform_zvknha_or_zvknhb_zvkb)
// Load the round constants into K0-K15.
vsetivli zero, 4, e32, m1, ta, ma
la t0, K256
vle32.v K0, (t0)
addi t0, t0, 16
vle32.v K1, (t0)
addi t0, t0, 16
vle32.v K2, (t0)
addi t0, t0, 16
vle32.v K3, (t0)
addi t0, t0, 16
vle32.v K4, (t0)
addi t0, t0, 16
vle32.v K5, (t0)
addi t0, t0, 16
vle32.v K6, (t0)
addi t0, t0, 16
vle32.v K7, (t0)
addi t0, t0, 16
vle32.v K8, (t0)
addi t0, t0, 16
vle32.v K9, (t0)
addi t0, t0, 16
vle32.v K10, (t0)
addi t0, t0, 16
vle32.v K11, (t0)
addi t0, t0, 16
vle32.v K12, (t0)
addi t0, t0, 16
vle32.v K13, (t0)
addi t0, t0, 16
vle32.v K14, (t0)
addi t0, t0, 16
vle32.v K15, (t0)
// Setup mask for the vmerge to replace the first word (idx==0) in
// message scheduling. There are 4 words, so an 8-bit mask suffices.
vsetivli zero, 1, e8, m1, ta, ma
vmv.v.i MASK, 0x01
// Load the state. The state is stored as {a,b,c,d,e,f,g,h}, but we
// need {f,e,b,a},{h,g,d,c}. The dst vtype is e32m1 and the index vtype
// is e8mf4. We use index-load with the i8 indices {20, 16, 4, 0},
// loaded using the 32-bit little endian value 0x00041014.
li t0, 0x00041014
vsetivli zero, 1, e32, m1, ta, ma
vmv.v.x INDICES, t0
addi STATEP_C, STATEP, 8
vsetivli zero, 4, e32, m1, ta, ma
vluxei8.v FEBA, (STATEP), INDICES
vluxei8.v HGDC, (STATEP_C), INDICES
.Lnext_block:
addi NUM_BLOCKS, NUM_BLOCKS, -1
// Save the previous state, as it's needed later.
vmv.v.v PREV_FEBA, FEBA
vmv.v.v PREV_HGDC, HGDC
// Load the next 512-bit message block and endian-swap each 32-bit word.
vle32.v W0, (DATA)
vrev8.v W0, W0
addi DATA, DATA, 16
vle32.v W1, (DATA)
vrev8.v W1, W1
addi DATA, DATA, 16
vle32.v W2, (DATA)
vrev8.v W2, W2
addi DATA, DATA, 16
vle32.v W3, (DATA)
vrev8.v W3, W3
addi DATA, DATA, 16
// Do the 64 rounds of SHA-256.
sha256_16rounds 0, K0, K1, K2, K3
sha256_16rounds 0, K4, K5, K6, K7
sha256_16rounds 0, K8, K9, K10, K11
sha256_16rounds 1, K12, K13, K14, K15
// Add the previous state.
vadd.vv FEBA, FEBA, PREV_FEBA
vadd.vv HGDC, HGDC, PREV_HGDC
// Repeat if more blocks remain.
bnez NUM_BLOCKS, .Lnext_block
// Store the new state and return.
vsuxei8.v FEBA, (STATEP), INDICES
vsuxei8.v HGDC, (STATEP_C), INDICES
ret
SYM_FUNC_END(sha256_transform_zvknha_or_zvknhb_zvkb)
.section ".rodata"
.p2align 2
.type K256, @object
K256:
.word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
.word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
.word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
.word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
.word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
.word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
.word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
.word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
.word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
.word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
.word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
.word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
.word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
.word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
.word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
.word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
.size K256, . - K256

View File

@ -0,0 +1,133 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* SHA-512 and SHA-384 using the RISC-V vector crypto extensions
*
* Copyright (C) 2023 VRULL GmbH
* Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
*
* Copyright (C) 2023 SiFive, Inc.
* Author: Jerry Shih <jerry.shih@sifive.com>
*/
#include <asm/simd.h>
#include <asm/vector.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/sha512_base.h>
#include <linux/linkage.h>
#include <linux/module.h>
/*
* Note: the asm function only uses the 'state' field of struct sha512_state.
* It is assumed to be the first field.
*/
asmlinkage void sha512_transform_zvknhb_zvkb(
struct sha512_state *state, const u8 *data, int num_blocks);
static int riscv64_sha512_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
/*
* Ensure struct sha512_state begins directly with the SHA-512
* 512-bit internal state, as this is what the asm function expects.
*/
BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0);
if (crypto_simd_usable()) {
kernel_vector_begin();
sha512_base_do_update(desc, data, len,
sha512_transform_zvknhb_zvkb);
kernel_vector_end();
} else {
crypto_sha512_update(desc, data, len);
}
return 0;
}
static int riscv64_sha512_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
if (crypto_simd_usable()) {
kernel_vector_begin();
if (len)
sha512_base_do_update(desc, data, len,
sha512_transform_zvknhb_zvkb);
sha512_base_do_finalize(desc, sha512_transform_zvknhb_zvkb);
kernel_vector_end();
return sha512_base_finish(desc, out);
}
return crypto_sha512_finup(desc, data, len, out);
}
static int riscv64_sha512_final(struct shash_desc *desc, u8 *out)
{
return riscv64_sha512_finup(desc, NULL, 0, out);
}
static int riscv64_sha512_digest(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return sha512_base_init(desc) ?:
riscv64_sha512_finup(desc, data, len, out);
}
static struct shash_alg riscv64_sha512_algs[] = {
{
.init = sha512_base_init,
.update = riscv64_sha512_update,
.final = riscv64_sha512_final,
.finup = riscv64_sha512_finup,
.digest = riscv64_sha512_digest,
.descsize = sizeof(struct sha512_state),
.digestsize = SHA512_DIGEST_SIZE,
.base = {
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_priority = 300,
.cra_name = "sha512",
.cra_driver_name = "sha512-riscv64-zvknhb-zvkb",
.cra_module = THIS_MODULE,
},
}, {
.init = sha384_base_init,
.update = riscv64_sha512_update,
.final = riscv64_sha512_final,
.finup = riscv64_sha512_finup,
.descsize = sizeof(struct sha512_state),
.digestsize = SHA384_DIGEST_SIZE,
.base = {
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_priority = 300,
.cra_name = "sha384",
.cra_driver_name = "sha384-riscv64-zvknhb-zvkb",
.cra_module = THIS_MODULE,
},
},
};
static int __init riscv64_sha512_mod_init(void)
{
if (riscv_isa_extension_available(NULL, ZVKNHB) &&
riscv_isa_extension_available(NULL, ZVKB) &&
riscv_vector_vlen() >= 128)
return crypto_register_shashes(riscv64_sha512_algs,
ARRAY_SIZE(riscv64_sha512_algs));
return -ENODEV;
}
static void __exit riscv64_sha512_mod_exit(void)
{
crypto_unregister_shashes(riscv64_sha512_algs,
ARRAY_SIZE(riscv64_sha512_algs));
}
module_init(riscv64_sha512_mod_init);
module_exit(riscv64_sha512_mod_exit);
MODULE_DESCRIPTION("SHA-512 (RISC-V accelerated)");
MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("sha512");
MODULE_ALIAS_CRYPTO("sha384");

View File

@ -0,0 +1,203 @@
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
//
// This file is dual-licensed, meaning that you can use it under your
// choice of either of the following two licenses:
//
// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the Apache License 2.0 (the "License"). You can obtain
// a copy in the file LICENSE in the source distribution or at
// https://www.openssl.org/source/license.html
//
// or
//
// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
// Copyright 2024 Google LLC
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// The generated code of this file depends on the following RISC-V extensions:
// - RV64I
// - RISC-V Vector ('V') with VLEN >= 128
// - RISC-V Vector SHA-2 Secure Hash extension ('Zvknhb')
// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
#include <linux/cfi_types.h>
.text
.option arch, +zvknhb, +zvkb
#define STATEP a0
#define DATA a1
#define NUM_BLOCKS a2
#define STATEP_C a3
#define K a4
#define MASK v0
#define INDICES v1
#define W0 v10 // LMUL=2
#define W1 v12 // LMUL=2
#define W2 v14 // LMUL=2
#define W3 v16 // LMUL=2
#define VTMP v20 // LMUL=2
#define FEBA v22 // LMUL=2
#define HGDC v24 // LMUL=2
#define PREV_FEBA v26 // LMUL=2
#define PREV_HGDC v28 // LMUL=2
// Do 4 rounds of SHA-512. w0 contains the current 4 message schedule words.
//
// If not all the message schedule words have been computed yet, then this also
// computes 4 more message schedule words. w1-w3 contain the next 3 groups of 4
// message schedule words; this macro computes the group after w3 and writes it
// to w0. This means that the next (w0, w1, w2, w3) is the current (w1, w2, w3,
// w0), so the caller must cycle through the registers accordingly.
.macro sha512_4rounds last, w0, w1, w2, w3
vle64.v VTMP, (K)
addi K, K, 32
vadd.vv VTMP, VTMP, \w0
vsha2cl.vv HGDC, FEBA, VTMP
vsha2ch.vv FEBA, HGDC, VTMP
.if !\last
vmerge.vvm VTMP, \w2, \w1, MASK
vsha2ms.vv \w0, VTMP, \w3
.endif
.endm
.macro sha512_16rounds last
sha512_4rounds \last, W0, W1, W2, W3
sha512_4rounds \last, W1, W2, W3, W0
sha512_4rounds \last, W2, W3, W0, W1
sha512_4rounds \last, W3, W0, W1, W2
.endm
// void sha512_transform_zvknhb_zvkb(u64 state[8], const u8 *data,
// int num_blocks);
SYM_TYPED_FUNC_START(sha512_transform_zvknhb_zvkb)
// Setup mask for the vmerge to replace the first word (idx==0) in
// message scheduling. There are 4 words, so an 8-bit mask suffices.
vsetivli zero, 1, e8, m1, ta, ma
vmv.v.i MASK, 0x01
// Load the state. The state is stored as {a,b,c,d,e,f,g,h}, but we
// need {f,e,b,a},{h,g,d,c}. The dst vtype is e64m2 and the index vtype
// is e8mf4. We use index-load with the i8 indices {40, 32, 8, 0},
// loaded using the 32-bit little endian value 0x00082028.
li t0, 0x00082028
vsetivli zero, 1, e32, m1, ta, ma
vmv.v.x INDICES, t0
addi STATEP_C, STATEP, 16
vsetivli zero, 4, e64, m2, ta, ma
vluxei8.v FEBA, (STATEP), INDICES
vluxei8.v HGDC, (STATEP_C), INDICES
.Lnext_block:
la K, K512
addi NUM_BLOCKS, NUM_BLOCKS, -1
// Save the previous state, as it's needed later.
vmv.v.v PREV_FEBA, FEBA
vmv.v.v PREV_HGDC, HGDC
// Load the next 1024-bit message block and endian-swap each 64-bit word
vle64.v W0, (DATA)
vrev8.v W0, W0
addi DATA, DATA, 32
vle64.v W1, (DATA)
vrev8.v W1, W1
addi DATA, DATA, 32
vle64.v W2, (DATA)
vrev8.v W2, W2
addi DATA, DATA, 32
vle64.v W3, (DATA)
vrev8.v W3, W3
addi DATA, DATA, 32
// Do the 80 rounds of SHA-512.
sha512_16rounds 0
sha512_16rounds 0
sha512_16rounds 0
sha512_16rounds 0
sha512_16rounds 1
// Add the previous state.
vadd.vv FEBA, FEBA, PREV_FEBA
vadd.vv HGDC, HGDC, PREV_HGDC
// Repeat if more blocks remain.
bnez NUM_BLOCKS, .Lnext_block
// Store the new state and return.
vsuxei8.v FEBA, (STATEP), INDICES
vsuxei8.v HGDC, (STATEP_C), INDICES
ret
SYM_FUNC_END(sha512_transform_zvknhb_zvkb)
.section ".rodata"
.p2align 3
.type K512, @object
K512:
.dword 0x428a2f98d728ae22, 0x7137449123ef65cd
.dword 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc
.dword 0x3956c25bf348b538, 0x59f111f1b605d019
.dword 0x923f82a4af194f9b, 0xab1c5ed5da6d8118
.dword 0xd807aa98a3030242, 0x12835b0145706fbe
.dword 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2
.dword 0x72be5d74f27b896f, 0x80deb1fe3b1696b1
.dword 0x9bdc06a725c71235, 0xc19bf174cf692694
.dword 0xe49b69c19ef14ad2, 0xefbe4786384f25e3
.dword 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65
.dword 0x2de92c6f592b0275, 0x4a7484aa6ea6e483
.dword 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5
.dword 0x983e5152ee66dfab, 0xa831c66d2db43210
.dword 0xb00327c898fb213f, 0xbf597fc7beef0ee4
.dword 0xc6e00bf33da88fc2, 0xd5a79147930aa725
.dword 0x06ca6351e003826f, 0x142929670a0e6e70
.dword 0x27b70a8546d22ffc, 0x2e1b21385c26c926
.dword 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df
.dword 0x650a73548baf63de, 0x766a0abb3c77b2a8
.dword 0x81c2c92e47edaee6, 0x92722c851482353b
.dword 0xa2bfe8a14cf10364, 0xa81a664bbc423001
.dword 0xc24b8b70d0f89791, 0xc76c51a30654be30
.dword 0xd192e819d6ef5218, 0xd69906245565a910
.dword 0xf40e35855771202a, 0x106aa07032bbd1b8
.dword 0x19a4c116b8d2d0c8, 0x1e376c085141ab53
.dword 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8
.dword 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb
.dword 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3
.dword 0x748f82ee5defb2fc, 0x78a5636f43172f60
.dword 0x84c87814a1f0ab72, 0x8cc702081a6439ec
.dword 0x90befffa23631e28, 0xa4506cebde82bde9
.dword 0xbef9a3f7b2c67915, 0xc67178f2e372532b
.dword 0xca273eceea26619c, 0xd186b8c721c0c207
.dword 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178
.dword 0x06f067aa72176fba, 0x0a637dc5a2c898a6
.dword 0x113f9804bef90dae, 0x1b710b35131c471b
.dword 0x28db77f523047d84, 0x32caab7b40c72493
.dword 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c
.dword 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a
.dword 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
.size K512, . - K512

View File

@ -0,0 +1,112 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* SM3 using the RISC-V vector crypto extensions
*
* Copyright (C) 2023 VRULL GmbH
* Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
*
* Copyright (C) 2023 SiFive, Inc.
* Author: Jerry Shih <jerry.shih@sifive.com>
*/
#include <asm/simd.h>
#include <asm/vector.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/sm3_base.h>
#include <linux/linkage.h>
#include <linux/module.h>
/*
* Note: the asm function only uses the 'state' field of struct sm3_state.
* It is assumed to be the first field.
*/
asmlinkage void sm3_transform_zvksh_zvkb(
struct sm3_state *state, const u8 *data, int num_blocks);
static int riscv64_sm3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
/*
* Ensure struct sm3_state begins directly with the SM3
* 256-bit internal state, as this is what the asm function expects.
*/
BUILD_BUG_ON(offsetof(struct sm3_state, state) != 0);
if (crypto_simd_usable()) {
kernel_vector_begin();
sm3_base_do_update(desc, data, len, sm3_transform_zvksh_zvkb);
kernel_vector_end();
} else {
sm3_update(shash_desc_ctx(desc), data, len);
}
return 0;
}
static int riscv64_sm3_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sm3_state *ctx;
if (crypto_simd_usable()) {
kernel_vector_begin();
if (len)
sm3_base_do_update(desc, data, len,
sm3_transform_zvksh_zvkb);
sm3_base_do_finalize(desc, sm3_transform_zvksh_zvkb);
kernel_vector_end();
return sm3_base_finish(desc, out);
}
ctx = shash_desc_ctx(desc);
if (len)
sm3_update(ctx, data, len);
sm3_final(ctx, out);
return 0;
}
static int riscv64_sm3_final(struct shash_desc *desc, u8 *out)
{
return riscv64_sm3_finup(desc, NULL, 0, out);
}
static struct shash_alg riscv64_sm3_alg = {
.init = sm3_base_init,
.update = riscv64_sm3_update,
.final = riscv64_sm3_final,
.finup = riscv64_sm3_finup,
.descsize = sizeof(struct sm3_state),
.digestsize = SM3_DIGEST_SIZE,
.base = {
.cra_blocksize = SM3_BLOCK_SIZE,
.cra_priority = 300,
.cra_name = "sm3",
.cra_driver_name = "sm3-riscv64-zvksh-zvkb",
.cra_module = THIS_MODULE,
},
};
static int __init riscv64_sm3_mod_init(void)
{
if (riscv_isa_extension_available(NULL, ZVKSH) &&
riscv_isa_extension_available(NULL, ZVKB) &&
riscv_vector_vlen() >= 128)
return crypto_register_shash(&riscv64_sm3_alg);
return -ENODEV;
}
static void __exit riscv64_sm3_mod_exit(void)
{
crypto_unregister_shash(&riscv64_sm3_alg);
}
module_init(riscv64_sm3_mod_init);
module_exit(riscv64_sm3_mod_exit);
MODULE_DESCRIPTION("SM3 (RISC-V accelerated)");
MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("sm3");

View File

@ -0,0 +1,123 @@
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
//
// This file is dual-licensed, meaning that you can use it under your
// choice of either of the following two licenses:
//
// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the Apache License 2.0 (the "License"). You can obtain
// a copy in the file LICENSE in the source distribution or at
// https://www.openssl.org/source/license.html
//
// or
//
// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
// Copyright 2024 Google LLC
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// The generated code of this file depends on the following RISC-V extensions:
// - RV64I
// - RISC-V Vector ('V') with VLEN >= 128
// - RISC-V Vector SM3 Secure Hash extension ('Zvksh')
// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
#include <linux/cfi_types.h>
.text
.option arch, +zvksh, +zvkb
#define STATEP a0
#define DATA a1
#define NUM_BLOCKS a2
#define STATE v0 // LMUL=2
#define PREV_STATE v2 // LMUL=2
#define W0 v4 // LMUL=2
#define W1 v6 // LMUL=2
#define VTMP v8 // LMUL=2
.macro sm3_8rounds i, w0, w1
// Do 4 rounds using W_{0+i}..W_{7+i}.
vsm3c.vi STATE, \w0, \i + 0
vslidedown.vi VTMP, \w0, 2
vsm3c.vi STATE, VTMP, \i + 1
// Compute W_{4+i}..W_{11+i}.
vslidedown.vi VTMP, \w0, 4
vslideup.vi VTMP, \w1, 4
// Do 4 rounds using W_{4+i}..W_{11+i}.
vsm3c.vi STATE, VTMP, \i + 2
vslidedown.vi VTMP, VTMP, 2
vsm3c.vi STATE, VTMP, \i + 3
.if \i < 28
// Compute W_{16+i}..W_{23+i}.
vsm3me.vv \w0, \w1, \w0
.endif
// For the next 8 rounds, w0 and w1 are swapped.
.endm
// void sm3_transform_zvksh_zvkb(u32 state[8], const u8 *data, int num_blocks);
SYM_TYPED_FUNC_START(sm3_transform_zvksh_zvkb)
// Load the state and endian-swap each 32-bit word.
vsetivli zero, 8, e32, m2, ta, ma
vle32.v STATE, (STATEP)
vrev8.v STATE, STATE
.Lnext_block:
addi NUM_BLOCKS, NUM_BLOCKS, -1
// Save the previous state, as it's needed later.
vmv.v.v PREV_STATE, STATE
// Load the next 512-bit message block into W0-W1.
vle32.v W0, (DATA)
addi DATA, DATA, 32
vle32.v W1, (DATA)
addi DATA, DATA, 32
// Do the 64 rounds of SM3.
sm3_8rounds 0, W0, W1
sm3_8rounds 4, W1, W0
sm3_8rounds 8, W0, W1
sm3_8rounds 12, W1, W0
sm3_8rounds 16, W0, W1
sm3_8rounds 20, W1, W0
sm3_8rounds 24, W0, W1
sm3_8rounds 28, W1, W0
// XOR in the previous state.
vxor.vv STATE, STATE, PREV_STATE
// Repeat if more blocks remain.
bnez NUM_BLOCKS, .Lnext_block
// Store the new state and return.
vrev8.v STATE, STATE
vse32.v STATE, (STATEP)
ret
SYM_FUNC_END(sm3_transform_zvksh_zvkb)

View File

@ -0,0 +1,107 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* SM4 using the RISC-V vector crypto extensions
*
* Copyright (C) 2023 VRULL GmbH
* Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
*
* Copyright (C) 2023 SiFive, Inc.
* Author: Jerry Shih <jerry.shih@sifive.com>
*/
#include <asm/simd.h>
#include <asm/vector.h>
#include <crypto/internal/cipher.h>
#include <crypto/internal/simd.h>
#include <crypto/sm4.h>
#include <linux/linkage.h>
#include <linux/module.h>
asmlinkage void sm4_expandkey_zvksed_zvkb(const u8 user_key[SM4_KEY_SIZE],
u32 rkey_enc[SM4_RKEY_WORDS],
u32 rkey_dec[SM4_RKEY_WORDS]);
asmlinkage void sm4_crypt_zvksed_zvkb(const u32 rkey[SM4_RKEY_WORDS],
const u8 in[SM4_BLOCK_SIZE],
u8 out[SM4_BLOCK_SIZE]);
static int riscv64_sm4_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
if (crypto_simd_usable()) {
if (keylen != SM4_KEY_SIZE)
return -EINVAL;
kernel_vector_begin();
sm4_expandkey_zvksed_zvkb(key, ctx->rkey_enc, ctx->rkey_dec);
kernel_vector_end();
return 0;
}
return sm4_expandkey(ctx, key, keylen);
}
static void riscv64_sm4_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
if (crypto_simd_usable()) {
kernel_vector_begin();
sm4_crypt_zvksed_zvkb(ctx->rkey_enc, src, dst);
kernel_vector_end();
} else {
sm4_crypt_block(ctx->rkey_enc, dst, src);
}
}
static void riscv64_sm4_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
if (crypto_simd_usable()) {
kernel_vector_begin();
sm4_crypt_zvksed_zvkb(ctx->rkey_dec, src, dst);
kernel_vector_end();
} else {
sm4_crypt_block(ctx->rkey_dec, dst, src);
}
}
static struct crypto_alg riscv64_sm4_alg = {
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_priority = 300,
.cra_name = "sm4",
.cra_driver_name = "sm4-riscv64-zvksed-zvkb",
.cra_cipher = {
.cia_min_keysize = SM4_KEY_SIZE,
.cia_max_keysize = SM4_KEY_SIZE,
.cia_setkey = riscv64_sm4_setkey,
.cia_encrypt = riscv64_sm4_encrypt,
.cia_decrypt = riscv64_sm4_decrypt,
},
.cra_module = THIS_MODULE,
};
static int __init riscv64_sm4_mod_init(void)
{
if (riscv_isa_extension_available(NULL, ZVKSED) &&
riscv_isa_extension_available(NULL, ZVKB) &&
riscv_vector_vlen() >= 128)
return crypto_register_alg(&riscv64_sm4_alg);
return -ENODEV;
}
static void __exit riscv64_sm4_mod_exit(void)
{
crypto_unregister_alg(&riscv64_sm4_alg);
}
module_init(riscv64_sm4_mod_init);
module_exit(riscv64_sm4_mod_exit);
MODULE_DESCRIPTION("SM4 (RISC-V accelerated)");
MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("sm4");

View File

@ -0,0 +1,117 @@
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
//
// This file is dual-licensed, meaning that you can use it under your
// choice of either of the following two licenses:
//
// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the Apache License 2.0 (the "License"). You can obtain
// a copy in the file LICENSE in the source distribution or at
// https://www.openssl.org/source/license.html
//
// or
//
// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
// Copyright 2024 Google LLC
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// The generated code of this file depends on the following RISC-V extensions:
// - RV64I
// - RISC-V Vector ('V') with VLEN >= 128
// - RISC-V Vector SM4 Block Cipher extension ('Zvksed')
// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
#include <linux/linkage.h>
.text
.option arch, +zvksed, +zvkb
// void sm4_expandkey_zksed_zvkb(const u8 user_key[16], u32 rkey_enc[32],
// u32 rkey_dec[32]);
SYM_FUNC_START(sm4_expandkey_zvksed_zvkb)
vsetivli zero, 4, e32, m1, ta, ma
// Load the user key.
vle32.v v1, (a0)
vrev8.v v1, v1
// XOR the user key with the family key.
la t0, FAMILY_KEY
vle32.v v2, (t0)
vxor.vv v1, v1, v2
// Compute the round keys. Store them in forwards order in rkey_enc
// and in reverse order in rkey_dec.
addi a2, a2, 31*4
li t0, -4
.set i, 0
.rept 8
vsm4k.vi v1, v1, i
vse32.v v1, (a1) // Store to rkey_enc.
vsse32.v v1, (a2), t0 // Store to rkey_dec.
.if i < 7
addi a1, a1, 16
addi a2, a2, -16
.endif
.set i, i + 1
.endr
ret
SYM_FUNC_END(sm4_expandkey_zvksed_zvkb)
// void sm4_crypt_zvksed_zvkb(const u32 rkey[32], const u8 in[16], u8 out[16]);
SYM_FUNC_START(sm4_crypt_zvksed_zvkb)
vsetivli zero, 4, e32, m1, ta, ma
// Load the input data.
vle32.v v1, (a1)
vrev8.v v1, v1
// Do the 32 rounds of SM4, 4 at a time.
.set i, 0
.rept 8
vle32.v v2, (a0)
vsm4r.vs v1, v2
.if i < 7
addi a0, a0, 16
.endif
.set i, i + 1
.endr
// Store the output data (in reverse element order).
vrev8.v v1, v1
li t0, -4
addi a2, a2, 12
vsse32.v v1, (a2), t0
ret
SYM_FUNC_END(sm4_crypt_zvksed_zvkb)
.section ".rodata"
.p2align 2
.type FAMILY_KEY, @object
FAMILY_KEY:
.word 0xA3B1BAC6, 0x56AA3350, 0x677D9197, 0xB27022DC
.size FAMILY_KEY, . - FAMILY_KEY

View File

@ -284,4 +284,15 @@ static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
#endif /* CONFIG_RISCV_ISA_V */
/*
* Return the implementation's vlen value.
*
* riscv_v_vsize contains the value of "32 vector registers with vlenb length"
* so rebuild the vlen value in bits from it.
*/
static inline int riscv_vector_vlen(void)
{
return riscv_v_vsize / 32 * 8;
}
#endif /* ! __ASM_RISCV_VECTOR_H */

View File

@ -1496,6 +1496,9 @@ endif
if PPC
source "arch/powerpc/crypto/Kconfig"
endif
if RISCV
source "arch/riscv/crypto/Kconfig"
endif
if S390
source "arch/s390/crypto/Kconfig"
endif