604682551a
This patch adds ARM NEON assembly implementation of SHA-1 algorithm. tcrypt benchmark results on Cortex-A8, sha1-arm-asm vs sha1-neon-asm: block-size bytes/update old-vs-new 16 16 1.04x 64 16 1.02x 64 64 1.05x 256 16 1.03x 256 64 1.04x 256 256 1.30x 1024 16 1.03x 1024 256 1.36x 1024 1024 1.52x 2048 16 1.03x 2048 256 1.39x 2048 1024 1.55x 2048 2048 1.59x 4096 16 1.03x 4096 256 1.40x 4096 1024 1.57x 4096 4096 1.62x 8192 16 1.03x 8192 256 1.40x 8192 1024 1.58x 8192 4096 1.63x 8192 8192 1.63x Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
198 lines
4.8 KiB
C
198 lines
4.8 KiB
C
/*
|
|
* Glue code for the SHA1 Secure Hash Algorithm assembler implementation using
|
|
* ARM NEON instructions.
|
|
*
|
|
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
|
*
|
|
* This file is based on sha1_generic.c and sha1_ssse3_glue.c:
|
|
* Copyright (c) Alan Smithee.
|
|
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
|
|
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
|
|
* Copyright (c) Mathias Krause <minipli@googlemail.com>
|
|
* Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License as published by the Free
|
|
* Software Foundation; either version 2 of the License, or (at your option)
|
|
* any later version.
|
|
*
|
|
*/
|
|
|
|
#include <crypto/internal/hash.h>
|
|
#include <linux/init.h>
|
|
#include <linux/module.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/cryptohash.h>
|
|
#include <linux/types.h>
|
|
#include <crypto/sha.h>
|
|
#include <asm/byteorder.h>
|
|
#include <asm/neon.h>
|
|
#include <asm/simd.h>
|
|
#include <asm/crypto/sha1.h>
|
|
|
|
|
|
asmlinkage void sha1_transform_neon(void *state_h, const char *data,
|
|
unsigned int rounds);
|
|
|
|
|
|
static int sha1_neon_init(struct shash_desc *desc)
|
|
{
|
|
struct sha1_state *sctx = shash_desc_ctx(desc);
|
|
|
|
*sctx = (struct sha1_state){
|
|
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
|
|
};
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __sha1_neon_update(struct shash_desc *desc, const u8 *data,
|
|
unsigned int len, unsigned int partial)
|
|
{
|
|
struct sha1_state *sctx = shash_desc_ctx(desc);
|
|
unsigned int done = 0;
|
|
|
|
sctx->count += len;
|
|
|
|
if (partial) {
|
|
done = SHA1_BLOCK_SIZE - partial;
|
|
memcpy(sctx->buffer + partial, data, done);
|
|
sha1_transform_neon(sctx->state, sctx->buffer, 1);
|
|
}
|
|
|
|
if (len - done >= SHA1_BLOCK_SIZE) {
|
|
const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
|
|
|
|
sha1_transform_neon(sctx->state, data + done, rounds);
|
|
done += rounds * SHA1_BLOCK_SIZE;
|
|
}
|
|
|
|
memcpy(sctx->buffer, data + done, len - done);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
|
|
unsigned int len)
|
|
{
|
|
struct sha1_state *sctx = shash_desc_ctx(desc);
|
|
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
|
|
int res;
|
|
|
|
/* Handle the fast case right here */
|
|
if (partial + len < SHA1_BLOCK_SIZE) {
|
|
sctx->count += len;
|
|
memcpy(sctx->buffer + partial, data, len);
|
|
|
|
return 0;
|
|
}
|
|
|
|
if (!may_use_simd()) {
|
|
res = sha1_update_arm(desc, data, len);
|
|
} else {
|
|
kernel_neon_begin();
|
|
res = __sha1_neon_update(desc, data, len, partial);
|
|
kernel_neon_end();
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
|
|
/* Add padding and return the message digest. */
|
|
static int sha1_neon_final(struct shash_desc *desc, u8 *out)
|
|
{
|
|
struct sha1_state *sctx = shash_desc_ctx(desc);
|
|
unsigned int i, index, padlen;
|
|
__be32 *dst = (__be32 *)out;
|
|
__be64 bits;
|
|
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
|
|
|
|
bits = cpu_to_be64(sctx->count << 3);
|
|
|
|
/* Pad out to 56 mod 64 and append length */
|
|
index = sctx->count % SHA1_BLOCK_SIZE;
|
|
padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
|
|
if (!may_use_simd()) {
|
|
sha1_update_arm(desc, padding, padlen);
|
|
sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits));
|
|
} else {
|
|
kernel_neon_begin();
|
|
/* We need to fill a whole block for __sha1_neon_update() */
|
|
if (padlen <= 56) {
|
|
sctx->count += padlen;
|
|
memcpy(sctx->buffer + index, padding, padlen);
|
|
} else {
|
|
__sha1_neon_update(desc, padding, padlen, index);
|
|
}
|
|
__sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56);
|
|
kernel_neon_end();
|
|
}
|
|
|
|
/* Store state in digest */
|
|
for (i = 0; i < 5; i++)
|
|
dst[i] = cpu_to_be32(sctx->state[i]);
|
|
|
|
/* Wipe context */
|
|
memset(sctx, 0, sizeof(*sctx));
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int sha1_neon_export(struct shash_desc *desc, void *out)
|
|
{
|
|
struct sha1_state *sctx = shash_desc_ctx(desc);
|
|
|
|
memcpy(out, sctx, sizeof(*sctx));
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int sha1_neon_import(struct shash_desc *desc, const void *in)
|
|
{
|
|
struct sha1_state *sctx = shash_desc_ctx(desc);
|
|
|
|
memcpy(sctx, in, sizeof(*sctx));
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct shash_alg alg = {
|
|
.digestsize = SHA1_DIGEST_SIZE,
|
|
.init = sha1_neon_init,
|
|
.update = sha1_neon_update,
|
|
.final = sha1_neon_final,
|
|
.export = sha1_neon_export,
|
|
.import = sha1_neon_import,
|
|
.descsize = sizeof(struct sha1_state),
|
|
.statesize = sizeof(struct sha1_state),
|
|
.base = {
|
|
.cra_name = "sha1",
|
|
.cra_driver_name = "sha1-neon",
|
|
.cra_priority = 250,
|
|
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
|
.cra_blocksize = SHA1_BLOCK_SIZE,
|
|
.cra_module = THIS_MODULE,
|
|
}
|
|
};
|
|
|
|
static int __init sha1_neon_mod_init(void)
|
|
{
|
|
if (!cpu_has_neon())
|
|
return -ENODEV;
|
|
|
|
return crypto_register_shash(&alg);
|
|
}
|
|
|
|
static void __exit sha1_neon_mod_fini(void)
|
|
{
|
|
crypto_unregister_shash(&alg);
|
|
}
|
|
|
|
module_init(sha1_neon_mod_init);
|
|
module_exit(sha1_neon_mod_fini);
|
|
|
|
MODULE_LICENSE("GPL");
|
|
MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, NEON accelerated");
|
|
MODULE_ALIAS("sha1");
|