mirror of
https://github.com/torvalds/linux.git
synced 2024-11-22 04:02:20 +00:00
powerpc: Add a powerpc implementation of SHA-1
This patch adds a crypto driver which provides a powerpc accelerated implementation of SHA-1, accelerated in that it is written in asm. Original patch by Paul, minor fixups for upstream by moi. Lightly tested on 64-bit with the test program here: http://michael.ellerman.id.au/files/junkcode/sha1test.c Seems to work, and is "not slower" than the generic version. Needs testing on 32-bit. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Michael Ellerman <michael@ellerman.id.au> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
This commit is contained in:
parent
5c49985c21
commit
323a6bf1d6
@ -143,6 +143,7 @@ core-y += arch/powerpc/kernel/ \
|
||||
arch/powerpc/sysdev/ \
|
||||
arch/powerpc/platforms/ \
|
||||
arch/powerpc/math-emu/ \
|
||||
arch/powerpc/crypto/ \
|
||||
arch/powerpc/net/
|
||||
core-$(CONFIG_XMON) += arch/powerpc/xmon/
|
||||
core-$(CONFIG_KVM) += arch/powerpc/kvm/
|
||||
|
9
arch/powerpc/crypto/Makefile
Normal file
9
arch/powerpc/crypto/Makefile
Normal file
@ -0,0 +1,9 @@
|
||||
#
|
||||
# powerpc/crypto/Makefile
|
||||
#
|
||||
# Arch-specific CryptoAPI modules.
|
||||
#
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o
|
||||
|
||||
sha1-powerpc-y := sha1-powerpc-asm.o sha1.o
|
179
arch/powerpc/crypto/sha1-powerpc-asm.S
Normal file
179
arch/powerpc/crypto/sha1-powerpc-asm.S
Normal file
@ -0,0 +1,179 @@
|
||||
/*
|
||||
* SHA-1 implementation for PowerPC.
|
||||
*
|
||||
* Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
|
||||
*/
|
||||
|
||||
#include <asm/ppc_asm.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
/*
|
||||
* We roll the registers for T, A, B, C, D, E around on each
|
||||
* iteration; T on iteration t is A on iteration t+1, and so on.
|
||||
* We use registers 7 - 12 for this.
|
||||
*/
|
||||
#define RT(t) ((((t)+5)%6)+7)
|
||||
#define RA(t) ((((t)+4)%6)+7)
|
||||
#define RB(t) ((((t)+3)%6)+7)
|
||||
#define RC(t) ((((t)+2)%6)+7)
|
||||
#define RD(t) ((((t)+1)%6)+7)
|
||||
#define RE(t) ((((t)+0)%6)+7)
|
||||
|
||||
/* We use registers 16 - 31 for the W values */
|
||||
#define W(t) (((t)%16)+16)
|
||||
|
||||
#define LOADW(t) \
|
||||
lwz W(t),(t)*4(r4)
|
||||
|
||||
#define STEPD0_LOAD(t) \
|
||||
andc r0,RD(t),RB(t); \
|
||||
and r6,RB(t),RC(t); \
|
||||
rotlwi RT(t),RA(t),5; \
|
||||
or r6,r6,r0; \
|
||||
add r0,RE(t),r15; \
|
||||
add RT(t),RT(t),r6; \
|
||||
add r14,r0,W(t); \
|
||||
lwz W((t)+4),((t)+4)*4(r4); \
|
||||
rotlwi RB(t),RB(t),30; \
|
||||
add RT(t),RT(t),r14
|
||||
|
||||
#define STEPD0_UPDATE(t) \
|
||||
and r6,RB(t),RC(t); \
|
||||
andc r0,RD(t),RB(t); \
|
||||
rotlwi RT(t),RA(t),5; \
|
||||
rotlwi RB(t),RB(t),30; \
|
||||
or r6,r6,r0; \
|
||||
add r0,RE(t),r15; \
|
||||
xor r5,W((t)+4-3),W((t)+4-8); \
|
||||
add RT(t),RT(t),r6; \
|
||||
xor W((t)+4),W((t)+4-16),W((t)+4-14); \
|
||||
add r0,r0,W(t); \
|
||||
xor W((t)+4),W((t)+4),r5; \
|
||||
add RT(t),RT(t),r0; \
|
||||
rotlwi W((t)+4),W((t)+4),1
|
||||
|
||||
#define STEPD1(t) \
|
||||
xor r6,RB(t),RC(t); \
|
||||
rotlwi RT(t),RA(t),5; \
|
||||
rotlwi RB(t),RB(t),30; \
|
||||
xor r6,r6,RD(t); \
|
||||
add r0,RE(t),r15; \
|
||||
add RT(t),RT(t),r6; \
|
||||
add r0,r0,W(t); \
|
||||
add RT(t),RT(t),r0
|
||||
|
||||
#define STEPD1_UPDATE(t) \
|
||||
xor r6,RB(t),RC(t); \
|
||||
rotlwi RT(t),RA(t),5; \
|
||||
rotlwi RB(t),RB(t),30; \
|
||||
xor r6,r6,RD(t); \
|
||||
add r0,RE(t),r15; \
|
||||
xor r5,W((t)+4-3),W((t)+4-8); \
|
||||
add RT(t),RT(t),r6; \
|
||||
xor W((t)+4),W((t)+4-16),W((t)+4-14); \
|
||||
add r0,r0,W(t); \
|
||||
xor W((t)+4),W((t)+4),r5; \
|
||||
add RT(t),RT(t),r0; \
|
||||
rotlwi W((t)+4),W((t)+4),1
|
||||
|
||||
#define STEPD2_UPDATE(t) \
|
||||
and r6,RB(t),RC(t); \
|
||||
and r0,RB(t),RD(t); \
|
||||
rotlwi RT(t),RA(t),5; \
|
||||
or r6,r6,r0; \
|
||||
rotlwi RB(t),RB(t),30; \
|
||||
and r0,RC(t),RD(t); \
|
||||
xor r5,W((t)+4-3),W((t)+4-8); \
|
||||
or r6,r6,r0; \
|
||||
xor W((t)+4),W((t)+4-16),W((t)+4-14); \
|
||||
add r0,RE(t),r15; \
|
||||
add RT(t),RT(t),r6; \
|
||||
add r0,r0,W(t); \
|
||||
xor W((t)+4),W((t)+4),r5; \
|
||||
add RT(t),RT(t),r0; \
|
||||
rotlwi W((t)+4),W((t)+4),1
|
||||
|
||||
#define STEP0LD4(t) \
|
||||
STEPD0_LOAD(t); \
|
||||
STEPD0_LOAD((t)+1); \
|
||||
STEPD0_LOAD((t)+2); \
|
||||
STEPD0_LOAD((t)+3)
|
||||
|
||||
#define STEPUP4(t, fn) \
|
||||
STEP##fn##_UPDATE(t); \
|
||||
STEP##fn##_UPDATE((t)+1); \
|
||||
STEP##fn##_UPDATE((t)+2); \
|
||||
STEP##fn##_UPDATE((t)+3)
|
||||
|
||||
#define STEPUP20(t, fn) \
|
||||
STEPUP4(t, fn); \
|
||||
STEPUP4((t)+4, fn); \
|
||||
STEPUP4((t)+8, fn); \
|
||||
STEPUP4((t)+12, fn); \
|
||||
STEPUP4((t)+16, fn)
|
||||
|
||||
_GLOBAL(powerpc_sha_transform)
|
||||
PPC_STLU r1,-STACKFRAMESIZE(r1)
|
||||
SAVE_8GPRS(14, r1)
|
||||
SAVE_10GPRS(22, r1)
|
||||
|
||||
/* Load up A - E */
|
||||
lwz RA(0),0(r3) /* A */
|
||||
lwz RB(0),4(r3) /* B */
|
||||
lwz RC(0),8(r3) /* C */
|
||||
lwz RD(0),12(r3) /* D */
|
||||
lwz RE(0),16(r3) /* E */
|
||||
|
||||
LOADW(0)
|
||||
LOADW(1)
|
||||
LOADW(2)
|
||||
LOADW(3)
|
||||
|
||||
lis r15,0x5a82 /* K0-19 */
|
||||
ori r15,r15,0x7999
|
||||
STEP0LD4(0)
|
||||
STEP0LD4(4)
|
||||
STEP0LD4(8)
|
||||
STEPUP4(12, D0)
|
||||
STEPUP4(16, D0)
|
||||
|
||||
lis r15,0x6ed9 /* K20-39 */
|
||||
ori r15,r15,0xeba1
|
||||
STEPUP20(20, D1)
|
||||
|
||||
lis r15,0x8f1b /* K40-59 */
|
||||
ori r15,r15,0xbcdc
|
||||
STEPUP20(40, D2)
|
||||
|
||||
lis r15,0xca62 /* K60-79 */
|
||||
ori r15,r15,0xc1d6
|
||||
STEPUP4(60, D1)
|
||||
STEPUP4(64, D1)
|
||||
STEPUP4(68, D1)
|
||||
STEPUP4(72, D1)
|
||||
lwz r20,16(r3)
|
||||
STEPD1(76)
|
||||
lwz r19,12(r3)
|
||||
STEPD1(77)
|
||||
lwz r18,8(r3)
|
||||
STEPD1(78)
|
||||
lwz r17,4(r3)
|
||||
STEPD1(79)
|
||||
|
||||
lwz r16,0(r3)
|
||||
add r20,RE(80),r20
|
||||
add RD(0),RD(80),r19
|
||||
add RC(0),RC(80),r18
|
||||
add RB(0),RB(80),r17
|
||||
add RA(0),RA(80),r16
|
||||
mr RE(0),r20
|
||||
stw RA(0),0(r3)
|
||||
stw RB(0),4(r3)
|
||||
stw RC(0),8(r3)
|
||||
stw RD(0),12(r3)
|
||||
stw RE(0),16(r3)
|
||||
|
||||
REST_8GPRS(14, r1)
|
||||
REST_10GPRS(22, r1)
|
||||
addi r1,r1,STACKFRAMESIZE
|
||||
blr
|
157
arch/powerpc/crypto/sha1.c
Normal file
157
arch/powerpc/crypto/sha1.c
Normal file
@ -0,0 +1,157 @@
|
||||
/*
|
||||
* Cryptographic API.
|
||||
*
|
||||
* powerpc implementation of the SHA1 Secure Hash Algorithm.
|
||||
*
|
||||
* Derived from cryptoapi implementation, adapted for in-place
|
||||
* scatterlist interface.
|
||||
*
|
||||
* Derived from "crypto/sha1.c"
|
||||
* Copyright (c) Alan Smithee.
|
||||
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
|
||||
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
*/
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cryptohash.h>
|
||||
#include <linux/types.h>
|
||||
#include <crypto/sha.h>
|
||||
#include <asm/byteorder.h>
|
||||
|
||||
extern void powerpc_sha_transform(u32 *state, const u8 *src, u32 *temp);
|
||||
|
||||
static int sha1_init(struct shash_desc *desc)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
*sctx = (struct sha1_state){
|
||||
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
|
||||
};
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha1_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int partial, done;
|
||||
const u8 *src;
|
||||
|
||||
partial = sctx->count & 0x3f;
|
||||
sctx->count += len;
|
||||
done = 0;
|
||||
src = data;
|
||||
|
||||
if ((partial + len) > 63) {
|
||||
u32 temp[SHA_WORKSPACE_WORDS];
|
||||
|
||||
if (partial) {
|
||||
done = -partial;
|
||||
memcpy(sctx->buffer + partial, data, done + 64);
|
||||
src = sctx->buffer;
|
||||
}
|
||||
|
||||
do {
|
||||
powerpc_sha_transform(sctx->state, src, temp);
|
||||
done += 64;
|
||||
src = data + done;
|
||||
} while (done + 63 < len);
|
||||
|
||||
memset(temp, 0, sizeof(temp));
|
||||
partial = 0;
|
||||
}
|
||||
memcpy(sctx->buffer + partial, src, len - done);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Add padding and return the message digest. */
|
||||
static int sha1_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
__be32 *dst = (__be32 *)out;
|
||||
u32 i, index, padlen;
|
||||
__be64 bits;
|
||||
static const u8 padding[64] = { 0x80, };
|
||||
|
||||
bits = cpu_to_be64(sctx->count << 3);
|
||||
|
||||
/* Pad out to 56 mod 64 */
|
||||
index = sctx->count & 0x3f;
|
||||
padlen = (index < 56) ? (56 - index) : ((64+56) - index);
|
||||
sha1_update(desc, padding, padlen);
|
||||
|
||||
/* Append length */
|
||||
sha1_update(desc, (const u8 *)&bits, sizeof(bits));
|
||||
|
||||
/* Store state in digest */
|
||||
for (i = 0; i < 5; i++)
|
||||
dst[i] = cpu_to_be32(sctx->state[i]);
|
||||
|
||||
/* Wipe context */
|
||||
memset(sctx, 0, sizeof *sctx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha1_export(struct shash_desc *desc, void *out)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(out, sctx, sizeof(*sctx));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha1_import(struct shash_desc *desc, const void *in)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(sctx, in, sizeof(*sctx));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg alg = {
|
||||
.digestsize = SHA1_DIGEST_SIZE,
|
||||
.init = sha1_init,
|
||||
.update = sha1_update,
|
||||
.final = sha1_final,
|
||||
.export = sha1_export,
|
||||
.import = sha1_import,
|
||||
.descsize = sizeof(struct sha1_state),
|
||||
.statesize = sizeof(struct sha1_state),
|
||||
.base = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name= "sha1-powerpc",
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init sha1_powerpc_mod_init(void)
|
||||
{
|
||||
return crypto_register_shash(&alg);
|
||||
}
|
||||
|
||||
static void __exit sha1_powerpc_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&alg);
|
||||
}
|
||||
|
||||
module_init(sha1_powerpc_mod_init);
|
||||
module_exit(sha1_powerpc_mod_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");
|
||||
|
||||
MODULE_ALIAS("sha1-powerpc");
|
@ -479,6 +479,13 @@ config CRYPTO_SHA1_ARM
|
||||
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
|
||||
using optimized ARM assembler.
|
||||
|
||||
config CRYPTO_SHA1_PPC
|
||||
tristate "SHA1 digest algorithm (powerpc)"
|
||||
depends on PPC
|
||||
help
|
||||
This is the powerpc hardware accelerated implementation of the
|
||||
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
|
||||
|
||||
config CRYPTO_SHA256
|
||||
tristate "SHA224 and SHA256 digest algorithm"
|
||||
select CRYPTO_HASH
|
||||
|
Loading…
Reference in New Issue
Block a user