forked from Minki/linux
3759ee0572
Implement a NEON fallback for systems that do support NEON but have no support for the optional 64x64->128 polynomial multiplication instruction that is part of the ARMv8 Crypto Extensions. It is based on the paper "Fast Software Polynomial Multiplication on ARM Processors Using the NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and Ricardo Dahab (https://hal.inria.fr/hal-01506572) On a 32-bit guest executing under KVM on a Cortex-A57, the new code is not only 4x faster than the generic table based GHASH driver, it is also time invariant. (Note that the existing vmull.p64 code is 16x faster on this core). Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
355 lines
9.4 KiB
C
355 lines
9.4 KiB
C
/*
|
|
* Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
|
|
*
|
|
* Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License version 2 as published
|
|
* by the Free Software Foundation.
|
|
*/
|
|
|
|
#include <asm/hwcap.h>
|
|
#include <asm/neon.h>
|
|
#include <asm/simd.h>
|
|
#include <asm/unaligned.h>
|
|
#include <crypto/cryptd.h>
|
|
#include <crypto/internal/hash.h>
|
|
#include <crypto/gf128mul.h>
|
|
#include <linux/cpufeature.h>
|
|
#include <linux/crypto.h>
|
|
#include <linux/module.h>
|
|
|
|
MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
|
|
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
|
MODULE_LICENSE("GPL v2");
|
|
MODULE_ALIAS_CRYPTO("ghash");
|
|
|
|
#define GHASH_BLOCK_SIZE 16
|
|
#define GHASH_DIGEST_SIZE 16
|
|
|
|
struct ghash_key {
|
|
u64 a;
|
|
u64 b;
|
|
};
|
|
|
|
struct ghash_desc_ctx {
|
|
u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
|
|
u8 buf[GHASH_BLOCK_SIZE];
|
|
u32 count;
|
|
};
|
|
|
|
struct ghash_async_ctx {
|
|
struct cryptd_ahash *cryptd_tfm;
|
|
};
|
|
|
|
asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
|
|
struct ghash_key const *k,
|
|
const char *head);
|
|
|
|
asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
|
|
struct ghash_key const *k,
|
|
const char *head);
|
|
|
|
static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
|
|
struct ghash_key const *k,
|
|
const char *head);
|
|
|
|
static int ghash_init(struct shash_desc *desc)
|
|
{
|
|
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
|
|
|
|
*ctx = (struct ghash_desc_ctx){};
|
|
return 0;
|
|
}
|
|
|
|
static int ghash_update(struct shash_desc *desc, const u8 *src,
|
|
unsigned int len)
|
|
{
|
|
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
|
|
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
|
|
|
|
ctx->count += len;
|
|
|
|
if ((partial + len) >= GHASH_BLOCK_SIZE) {
|
|
struct ghash_key *key = crypto_shash_ctx(desc->tfm);
|
|
int blocks;
|
|
|
|
if (partial) {
|
|
int p = GHASH_BLOCK_SIZE - partial;
|
|
|
|
memcpy(ctx->buf + partial, src, p);
|
|
src += p;
|
|
len -= p;
|
|
}
|
|
|
|
blocks = len / GHASH_BLOCK_SIZE;
|
|
len %= GHASH_BLOCK_SIZE;
|
|
|
|
kernel_neon_begin();
|
|
pmull_ghash_update(blocks, ctx->digest, src, key,
|
|
partial ? ctx->buf : NULL);
|
|
kernel_neon_end();
|
|
src += blocks * GHASH_BLOCK_SIZE;
|
|
partial = 0;
|
|
}
|
|
if (len)
|
|
memcpy(ctx->buf + partial, src, len);
|
|
return 0;
|
|
}
|
|
|
|
static int ghash_final(struct shash_desc *desc, u8 *dst)
|
|
{
|
|
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
|
|
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
|
|
|
|
if (partial) {
|
|
struct ghash_key *key = crypto_shash_ctx(desc->tfm);
|
|
|
|
memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
|
|
kernel_neon_begin();
|
|
pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
|
|
kernel_neon_end();
|
|
}
|
|
put_unaligned_be64(ctx->digest[1], dst);
|
|
put_unaligned_be64(ctx->digest[0], dst + 8);
|
|
|
|
*ctx = (struct ghash_desc_ctx){};
|
|
return 0;
|
|
}
|
|
|
|
static int ghash_setkey(struct crypto_shash *tfm,
|
|
const u8 *inkey, unsigned int keylen)
|
|
{
|
|
struct ghash_key *key = crypto_shash_ctx(tfm);
|
|
u64 a, b;
|
|
|
|
if (keylen != GHASH_BLOCK_SIZE) {
|
|
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* perform multiplication by 'x' in GF(2^128) */
|
|
b = get_unaligned_be64(inkey);
|
|
a = get_unaligned_be64(inkey + 8);
|
|
|
|
key->a = (a << 1) | (b >> 63);
|
|
key->b = (b << 1) | (a >> 63);
|
|
|
|
if (b >> 63)
|
|
key->b ^= 0xc200000000000000UL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct shash_alg ghash_alg = {
|
|
.digestsize = GHASH_DIGEST_SIZE,
|
|
.init = ghash_init,
|
|
.update = ghash_update,
|
|
.final = ghash_final,
|
|
.setkey = ghash_setkey,
|
|
.descsize = sizeof(struct ghash_desc_ctx),
|
|
.base = {
|
|
.cra_name = "__ghash",
|
|
.cra_driver_name = "__driver-ghash-ce",
|
|
.cra_priority = 0,
|
|
.cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_INTERNAL,
|
|
.cra_blocksize = GHASH_BLOCK_SIZE,
|
|
.cra_ctxsize = sizeof(struct ghash_key),
|
|
.cra_module = THIS_MODULE,
|
|
},
|
|
};
|
|
|
|
static int ghash_async_init(struct ahash_request *req)
|
|
{
|
|
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
|
|
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
|
|
struct ahash_request *cryptd_req = ahash_request_ctx(req);
|
|
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
|
|
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
|
|
struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
|
|
|
|
desc->tfm = child;
|
|
desc->flags = req->base.flags;
|
|
return crypto_shash_init(desc);
|
|
}
|
|
|
|
static int ghash_async_update(struct ahash_request *req)
|
|
{
|
|
struct ahash_request *cryptd_req = ahash_request_ctx(req);
|
|
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
|
|
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
|
|
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
|
|
|
|
if (!may_use_simd() ||
|
|
(in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
|
|
memcpy(cryptd_req, req, sizeof(*req));
|
|
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
|
|
return crypto_ahash_update(cryptd_req);
|
|
} else {
|
|
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
|
|
return shash_ahash_update(req, desc);
|
|
}
|
|
}
|
|
|
|
static int ghash_async_final(struct ahash_request *req)
|
|
{
|
|
struct ahash_request *cryptd_req = ahash_request_ctx(req);
|
|
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
|
|
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
|
|
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
|
|
|
|
if (!may_use_simd() ||
|
|
(in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
|
|
memcpy(cryptd_req, req, sizeof(*req));
|
|
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
|
|
return crypto_ahash_final(cryptd_req);
|
|
} else {
|
|
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
|
|
return crypto_shash_final(desc, req->result);
|
|
}
|
|
}
|
|
|
|
static int ghash_async_digest(struct ahash_request *req)
|
|
{
|
|
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
|
|
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
|
|
struct ahash_request *cryptd_req = ahash_request_ctx(req);
|
|
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
|
|
|
|
if (!may_use_simd() ||
|
|
(in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
|
|
memcpy(cryptd_req, req, sizeof(*req));
|
|
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
|
|
return crypto_ahash_digest(cryptd_req);
|
|
} else {
|
|
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
|
|
struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
|
|
|
|
desc->tfm = child;
|
|
desc->flags = req->base.flags;
|
|
return shash_ahash_digest(req, desc);
|
|
}
|
|
}
|
|
|
|
static int ghash_async_import(struct ahash_request *req, const void *in)
|
|
{
|
|
struct ahash_request *cryptd_req = ahash_request_ctx(req);
|
|
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
|
|
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
|
|
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
|
|
|
|
desc->tfm = cryptd_ahash_child(ctx->cryptd_tfm);
|
|
desc->flags = req->base.flags;
|
|
|
|
return crypto_shash_import(desc, in);
|
|
}
|
|
|
|
static int ghash_async_export(struct ahash_request *req, void *out)
|
|
{
|
|
struct ahash_request *cryptd_req = ahash_request_ctx(req);
|
|
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
|
|
|
|
return crypto_shash_export(desc, out);
|
|
}
|
|
|
|
static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
|
|
unsigned int keylen)
|
|
{
|
|
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
|
|
struct crypto_ahash *child = &ctx->cryptd_tfm->base;
|
|
int err;
|
|
|
|
crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
|
|
crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
|
|
& CRYPTO_TFM_REQ_MASK);
|
|
err = crypto_ahash_setkey(child, key, keylen);
|
|
crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child)
|
|
& CRYPTO_TFM_RES_MASK);
|
|
|
|
return err;
|
|
}
|
|
|
|
static int ghash_async_init_tfm(struct crypto_tfm *tfm)
|
|
{
|
|
struct cryptd_ahash *cryptd_tfm;
|
|
struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
|
|
|
|
cryptd_tfm = cryptd_alloc_ahash("__driver-ghash-ce",
|
|
CRYPTO_ALG_INTERNAL,
|
|
CRYPTO_ALG_INTERNAL);
|
|
if (IS_ERR(cryptd_tfm))
|
|
return PTR_ERR(cryptd_tfm);
|
|
ctx->cryptd_tfm = cryptd_tfm;
|
|
crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
|
|
sizeof(struct ahash_request) +
|
|
crypto_ahash_reqsize(&cryptd_tfm->base));
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
|
|
{
|
|
struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
|
|
|
|
cryptd_free_ahash(ctx->cryptd_tfm);
|
|
}
|
|
|
|
static struct ahash_alg ghash_async_alg = {
|
|
.init = ghash_async_init,
|
|
.update = ghash_async_update,
|
|
.final = ghash_async_final,
|
|
.setkey = ghash_async_setkey,
|
|
.digest = ghash_async_digest,
|
|
.import = ghash_async_import,
|
|
.export = ghash_async_export,
|
|
.halg.digestsize = GHASH_DIGEST_SIZE,
|
|
.halg.statesize = sizeof(struct ghash_desc_ctx),
|
|
.halg.base = {
|
|
.cra_name = "ghash",
|
|
.cra_driver_name = "ghash-ce",
|
|
.cra_priority = 300,
|
|
.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
|
|
.cra_blocksize = GHASH_BLOCK_SIZE,
|
|
.cra_type = &crypto_ahash_type,
|
|
.cra_ctxsize = sizeof(struct ghash_async_ctx),
|
|
.cra_module = THIS_MODULE,
|
|
.cra_init = ghash_async_init_tfm,
|
|
.cra_exit = ghash_async_exit_tfm,
|
|
},
|
|
};
|
|
|
|
static int __init ghash_ce_mod_init(void)
|
|
{
|
|
int err;
|
|
|
|
if (!(elf_hwcap & HWCAP_NEON))
|
|
return -ENODEV;
|
|
|
|
if (elf_hwcap2 & HWCAP2_PMULL)
|
|
pmull_ghash_update = pmull_ghash_update_p64;
|
|
else
|
|
pmull_ghash_update = pmull_ghash_update_p8;
|
|
|
|
err = crypto_register_shash(&ghash_alg);
|
|
if (err)
|
|
return err;
|
|
err = crypto_register_ahash(&ghash_async_alg);
|
|
if (err)
|
|
goto err_shash;
|
|
|
|
return 0;
|
|
|
|
err_shash:
|
|
crypto_unregister_shash(&ghash_alg);
|
|
return err;
|
|
}
|
|
|
|
static void __exit ghash_ce_mod_exit(void)
|
|
{
|
|
crypto_unregister_ahash(&ghash_async_alg);
|
|
crypto_unregister_shash(&ghash_alg);
|
|
}
|
|
|
|
module_init(ghash_ce_mod_init);
|
|
module_exit(ghash_ce_mod_exit);
|