mirror of
https://github.com/torvalds/linux.git
synced 2024-11-26 06:02:05 +00:00
- Optimised assembly string/memory routines (based on the AArch64 Cortex
Strings library contributed to glibc but re-licensed under GPLv2) - Optimised crypto algorithms making use of the ARMv8 crypto extensions (together with kernel API for using FPSIMD instructions in interrupt context) - Ftrace support - CPU topology parsing from DT - ESR_EL1 (Exception Syndrome Register) exposed to user space signal handlers for SIGSEGV/SIGBUS (useful to emulation tools like Qemu) - 1GB section linear mapping if applicable - Barriers usage clean-up - Default pgprot clean-up -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.9 (GNU/Linux) iQIcBAABAgAGBQJTkb+CAAoJEGvWsS0AyF7xLyEQAJgL8s2SdDyd+R8aukNDu3n9 tCK7yVHO9Kg96dfeXVuSOVEo2jszo6R3nxzUL05FMovr230WBcmoeHvHz8ETGnw1 g0yO8Ltkckjevog4UleCa3wGtYISjvwwrTalzbqoEWzsF2AV8oiqv/yuIn/EdkUr jaOqfNsnAQa8TIz4vMhi/AVdJWTTU/F6WP80oqCbxqXu/WL2InuBlHtOJMbk1HDI u1DJUGDQ1B9OgSVRkAOjCjSsEtz8sDY3lXsg3V1qT5+NbZTyomYM2IiBLdgQcX4P t/rqX9nX4VmRQtzefeP5WhKFks2x80C0BKibWC4teeL++tJHbgbFkyjoZZGcP27o zued3cYABrjrcAEU6ko/LUiL2Q4ozBOzosClpjpWulCxNPzsOps82UZWo3F3XbAt xjE3k7WF9WeNBOJdDGrarEaSLdnjjgCLoWVs8cOUYLpOOrtdSw16D29jJ68U0Y5g 31wdwKxoueC8SFt8M9fP9J9Jyau08g+kvW1xQXrRmroppweFxjSpSy90imARyux/ wUFz79HxkQB79ZHpJ0I5TNrw/w+7pBnfVSKGPOzrk+ZUsaH76caNRBoffUCzFMzz T3Sc8A36TZtOIcGR/Q4DMZNFXlIUXDSzCHP2Iu0QoIjTd5Ex96cqNvy3nswCYWwv yGe3ZEqUq9+WL7snNW4v =Jj8U -----END PGP SIGNATURE----- Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux into next Pull arm64 updates from Catalin Marinas: - Optimised assembly string/memory routines (based on the AArch64 Cortex Strings library contributed to glibc but re-licensed under GPLv2) - Optimised crypto algorithms making use of the ARMv8 crypto extensions (together with kernel API for using FPSIMD instructions in interrupt context) - Ftrace support - CPU topology parsing from DT - ESR_EL1 (Exception Syndrome Register) exposed to user space signal handlers for SIGSEGV/SIGBUS (useful to emulation tools like Qemu) - 1GB section linear mapping if applicable - Barriers usage clean-up - Default pgprot clean-up Conflicts as per Catalin. * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (57 commits) arm64: kernel: initialize broadcast hrtimer based clock event device arm64: ftrace: Add system call tracepoint arm64: ftrace: Add CALLER_ADDRx macros arm64: ftrace: Add dynamic ftrace support arm64: Add ftrace support ftrace: Add arm64 support to recordmcount arm64: Add 'notrace' attribute to unwind_frame() for ftrace arm64: add __ASSEMBLY__ in asm/insn.h arm64: Fix linker script entry point arm64: lib: Implement optimized string length routines arm64: lib: Implement optimized string compare routines arm64: lib: Implement optimized memcmp routine arm64: lib: Implement optimized memset routine arm64: lib: Implement optimized memmove routine arm64: lib: Implement optimized memcpy routine arm64: defconfig: enable a few more common/useful options in defconfig ftrace: Make CALLER_ADDRx macros more generic arm64: Fix deadlock scenario with smp_send_stop() arm64: Fix machine_shutdown() definition arm64: Support arch_irq_work_raise() via self IPIs ...
This commit is contained in:
commit
cc07aabc53
@ -52,15 +52,7 @@ extern inline void *return_address(unsigned int level)
|
||||
|
||||
#endif
|
||||
|
||||
#define HAVE_ARCH_CALLER_ADDR
|
||||
|
||||
#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
|
||||
#define CALLER_ADDR1 ((unsigned long)return_address(1))
|
||||
#define CALLER_ADDR2 ((unsigned long)return_address(2))
|
||||
#define CALLER_ADDR3 ((unsigned long)return_address(3))
|
||||
#define CALLER_ADDR4 ((unsigned long)return_address(4))
|
||||
#define CALLER_ADDR5 ((unsigned long)return_address(5))
|
||||
#define CALLER_ADDR6 ((unsigned long)return_address(6))
|
||||
#define ftrace_return_addr(n) return_address(n)
|
||||
|
||||
#endif /* ifndef __ASSEMBLY__ */
|
||||
|
||||
|
@ -30,12 +30,17 @@ config ARM64
|
||||
select HAVE_ARCH_JUMP_LABEL
|
||||
select HAVE_ARCH_KGDB
|
||||
select HAVE_ARCH_TRACEHOOK
|
||||
select HAVE_C_RECORDMCOUNT
|
||||
select HAVE_DEBUG_BUGVERBOSE
|
||||
select HAVE_DEBUG_KMEMLEAK
|
||||
select HAVE_DMA_API_DEBUG
|
||||
select HAVE_DMA_ATTRS
|
||||
select HAVE_DMA_CONTIGUOUS
|
||||
select HAVE_DYNAMIC_FTRACE
|
||||
select HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
select HAVE_FTRACE_MCOUNT_RECORD
|
||||
select HAVE_FUNCTION_TRACER
|
||||
select HAVE_FUNCTION_GRAPH_TRACER
|
||||
select HAVE_GENERIC_DMA_COHERENT
|
||||
select HAVE_HW_BREAKPOINT if PERF_EVENTS
|
||||
select HAVE_MEMBLOCK
|
||||
@ -43,6 +48,7 @@ config ARM64
|
||||
select HAVE_PERF_EVENTS
|
||||
select HAVE_PERF_REGS
|
||||
select HAVE_PERF_USER_STACK_DUMP
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
select IRQ_DOMAIN
|
||||
select MODULES_USE_ELF_RELA
|
||||
select NO_BOOTMEM
|
||||
@ -245,6 +251,9 @@ config ARCH_WANT_HUGE_PMD_SHARE
|
||||
config HAVE_ARCH_TRANSPARENT_HUGEPAGE
|
||||
def_bool y
|
||||
|
||||
config ARCH_HAS_CACHE_LINE_SIZE
|
||||
def_bool y
|
||||
|
||||
source "mm/Kconfig"
|
||||
|
||||
config XEN_DOM0
|
||||
@ -359,5 +368,8 @@ source "arch/arm64/Kconfig.debug"
|
||||
source "security/Kconfig"
|
||||
|
||||
source "crypto/Kconfig"
|
||||
if CRYPTO
|
||||
source "arch/arm64/crypto/Kconfig"
|
||||
endif
|
||||
|
||||
source "lib/Kconfig"
|
||||
|
@ -45,6 +45,7 @@ export TEXT_OFFSET GZFLAGS
|
||||
core-y += arch/arm64/kernel/ arch/arm64/mm/
|
||||
core-$(CONFIG_KVM) += arch/arm64/kvm/
|
||||
core-$(CONFIG_XEN) += arch/arm64/xen/
|
||||
core-$(CONFIG_CRYPTO) += arch/arm64/crypto/
|
||||
libs-y := arch/arm64/lib/ $(libs-y)
|
||||
libs-y += $(LIBGCC)
|
||||
|
||||
|
@ -1,11 +1,11 @@
|
||||
# CONFIG_LOCALVERSION_AUTO is not set
|
||||
# CONFIG_SWAP is not set
|
||||
CONFIG_SYSVIPC=y
|
||||
CONFIG_POSIX_MQUEUE=y
|
||||
CONFIG_AUDIT=y
|
||||
CONFIG_NO_HZ_IDLE=y
|
||||
CONFIG_HIGH_RES_TIMERS=y
|
||||
CONFIG_BSD_PROCESS_ACCT=y
|
||||
CONFIG_BSD_PROCESS_ACCT_V3=y
|
||||
CONFIG_NO_HZ=y
|
||||
CONFIG_HIGH_RES_TIMERS=y
|
||||
CONFIG_IKCONFIG=y
|
||||
CONFIG_IKCONFIG_PROC=y
|
||||
CONFIG_LOG_BUF_SHIFT=14
|
||||
@ -27,6 +27,7 @@ CONFIG_ARCH_VEXPRESS=y
|
||||
CONFIG_ARCH_XGENE=y
|
||||
CONFIG_SMP=y
|
||||
CONFIG_PREEMPT=y
|
||||
CONFIG_TRANSPARENT_HUGEPAGE=y
|
||||
CONFIG_CMA=y
|
||||
CONFIG_CMDLINE="console=ttyAMA0"
|
||||
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
|
||||
@ -44,7 +45,7 @@ CONFIG_IP_PNP_BOOTP=y
|
||||
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
|
||||
CONFIG_DEVTMPFS=y
|
||||
CONFIG_DMA_CMA=y
|
||||
CONFIG_SCSI=y
|
||||
CONFIG_VIRTIO_BLK=y
|
||||
# CONFIG_SCSI_PROC_FS is not set
|
||||
CONFIG_BLK_DEV_SD=y
|
||||
# CONFIG_SCSI_LOWLEVEL is not set
|
||||
@ -56,20 +57,18 @@ CONFIG_SMC91X=y
|
||||
CONFIG_SMSC911X=y
|
||||
# CONFIG_WLAN is not set
|
||||
CONFIG_INPUT_EVDEV=y
|
||||
# CONFIG_SERIO_I8042 is not set
|
||||
# CONFIG_SERIO_SERPORT is not set
|
||||
CONFIG_LEGACY_PTY_COUNT=16
|
||||
CONFIG_SERIAL_8250=y
|
||||
CONFIG_SERIAL_8250_CONSOLE=y
|
||||
CONFIG_SERIAL_OF_PLATFORM=y
|
||||
CONFIG_SERIAL_AMBA_PL011=y
|
||||
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
|
||||
CONFIG_SERIAL_OF_PLATFORM=y
|
||||
# CONFIG_HW_RANDOM is not set
|
||||
# CONFIG_HWMON is not set
|
||||
CONFIG_REGULATOR=y
|
||||
CONFIG_REGULATOR_FIXED_VOLTAGE=y
|
||||
CONFIG_FB=y
|
||||
# CONFIG_VGA_CONSOLE is not set
|
||||
CONFIG_FRAMEBUFFER_CONSOLE=y
|
||||
CONFIG_LOGO=y
|
||||
# CONFIG_LOGO_LINUX_MONO is not set
|
||||
@ -79,27 +78,38 @@ CONFIG_USB_ISP1760_HCD=y
|
||||
CONFIG_USB_STORAGE=y
|
||||
CONFIG_MMC=y
|
||||
CONFIG_MMC_ARMMMCI=y
|
||||
CONFIG_VIRTIO_MMIO=y
|
||||
# CONFIG_IOMMU_SUPPORT is not set
|
||||
CONFIG_EXT2_FS=y
|
||||
CONFIG_EXT3_FS=y
|
||||
CONFIG_EXT4_FS=y
|
||||
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
|
||||
# CONFIG_EXT3_FS_XATTR is not set
|
||||
CONFIG_EXT4_FS=y
|
||||
CONFIG_FUSE_FS=y
|
||||
CONFIG_CUSE=y
|
||||
CONFIG_VFAT_FS=y
|
||||
CONFIG_TMPFS=y
|
||||
CONFIG_HUGETLBFS=y
|
||||
# CONFIG_MISC_FILESYSTEMS is not set
|
||||
CONFIG_NFS_FS=y
|
||||
CONFIG_ROOT_NFS=y
|
||||
CONFIG_NLS_CODEPAGE_437=y
|
||||
CONFIG_NLS_ISO8859_1=y
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
CONFIG_DEBUG_FS=y
|
||||
CONFIG_DEBUG_KERNEL=y
|
||||
# CONFIG_SCHED_DEBUG is not set
|
||||
CONFIG_VIRTUALIZATION=y
|
||||
CONFIG_KVM=y
|
||||
CONFIG_DEBUG_INFO=y
|
||||
CONFIG_DEBUG_FS=y
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
CONFIG_DEBUG_KERNEL=y
|
||||
CONFIG_LOCKUP_DETECTOR=y
|
||||
# CONFIG_SCHED_DEBUG is not set
|
||||
# CONFIG_FTRACE is not set
|
||||
CONFIG_ATOMIC64_SELFTEST=y
|
||||
CONFIG_VIRTIO_MMIO=y
|
||||
CONFIG_VIRTIO_BLK=y
|
||||
CONFIG_CRYPTO_ANSI_CPRNG=y
|
||||
CONFIG_ARM64_CRYPTO=y
|
||||
CONFIG_CRYPTO_SHA1_ARM64_CE=y
|
||||
CONFIG_CRYPTO_SHA2_ARM64_CE=y
|
||||
CONFIG_CRYPTO_GHASH_ARM64_CE=y
|
||||
CONFIG_CRYPTO_AES_ARM64_CE=y
|
||||
CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
|
||||
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
|
||||
CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y
|
||||
|
53
arch/arm64/crypto/Kconfig
Normal file
53
arch/arm64/crypto/Kconfig
Normal file
@ -0,0 +1,53 @@
|
||||
|
||||
menuconfig ARM64_CRYPTO
|
||||
bool "ARM64 Accelerated Cryptographic Algorithms"
|
||||
depends on ARM64
|
||||
help
|
||||
Say Y here to choose from a selection of cryptographic algorithms
|
||||
implemented using ARM64 specific CPU features or instructions.
|
||||
|
||||
if ARM64_CRYPTO
|
||||
|
||||
config CRYPTO_SHA1_ARM64_CE
|
||||
tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
|
||||
depends on ARM64 && KERNEL_MODE_NEON
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_SHA2_ARM64_CE
|
||||
tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
|
||||
depends on ARM64 && KERNEL_MODE_NEON
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_GHASH_ARM64_CE
|
||||
tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
|
||||
depends on ARM64 && KERNEL_MODE_NEON
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_AES_ARM64_CE
|
||||
tristate "AES core cipher using ARMv8 Crypto Extensions"
|
||||
depends on ARM64 && KERNEL_MODE_NEON
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_AES
|
||||
|
||||
config CRYPTO_AES_ARM64_CE_CCM
|
||||
tristate "AES in CCM mode using ARMv8 Crypto Extensions"
|
||||
depends on ARM64 && KERNEL_MODE_NEON
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_AES
|
||||
select CRYPTO_AEAD
|
||||
|
||||
config CRYPTO_AES_ARM64_CE_BLK
|
||||
tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
|
||||
depends on ARM64 && KERNEL_MODE_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_AES
|
||||
select CRYPTO_ABLK_HELPER
|
||||
|
||||
config CRYPTO_AES_ARM64_NEON_BLK
|
||||
tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
|
||||
depends on ARM64 && KERNEL_MODE_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_AES
|
||||
select CRYPTO_ABLK_HELPER
|
||||
|
||||
endif
|
38
arch/arm64/crypto/Makefile
Normal file
38
arch/arm64/crypto/Makefile
Normal file
@ -0,0 +1,38 @@
|
||||
#
|
||||
# linux/arch/arm64/crypto/Makefile
|
||||
#
|
||||
# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
#
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o
|
||||
sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o
|
||||
sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
|
||||
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
|
||||
CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
|
||||
aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM64_CE_BLK) += aes-ce-blk.o
|
||||
aes-ce-blk-y := aes-glue-ce.o aes-ce.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
|
||||
aes-neon-blk-y := aes-glue-neon.o aes-neon.o
|
||||
|
||||
AFLAGS_aes-ce.o := -DINTERLEAVE=2 -DINTERLEAVE_INLINE
|
||||
AFLAGS_aes-neon.o := -DINTERLEAVE=4
|
||||
|
||||
CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS
|
||||
|
||||
$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
|
||||
$(call if_changed_dep,cc_o_c)
|
222
arch/arm64/crypto/aes-ce-ccm-core.S
Normal file
222
arch/arm64/crypto/aes-ce-ccm-core.S
Normal file
@ -0,0 +1,222 @@
|
||||
/*
|
||||
* aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
|
||||
*
|
||||
* Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
.arch armv8-a+crypto
|
||||
|
||||
/*
|
||||
* void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
|
||||
* u32 *macp, u8 const rk[], u32 rounds);
|
||||
*/
|
||||
ENTRY(ce_aes_ccm_auth_data)
|
||||
ldr w8, [x3] /* leftover from prev round? */
|
||||
ld1 {v0.2d}, [x0] /* load mac */
|
||||
cbz w8, 1f
|
||||
sub w8, w8, #16
|
||||
eor v1.16b, v1.16b, v1.16b
|
||||
0: ldrb w7, [x1], #1 /* get 1 byte of input */
|
||||
subs w2, w2, #1
|
||||
add w8, w8, #1
|
||||
ins v1.b[0], w7
|
||||
ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
|
||||
beq 8f /* out of input? */
|
||||
cbnz w8, 0b
|
||||
eor v0.16b, v0.16b, v1.16b
|
||||
1: ld1 {v3.2d}, [x4] /* load first round key */
|
||||
prfm pldl1strm, [x1]
|
||||
cmp w5, #12 /* which key size? */
|
||||
add x6, x4, #16
|
||||
sub w7, w5, #2 /* modified # of rounds */
|
||||
bmi 2f
|
||||
bne 5f
|
||||
mov v5.16b, v3.16b
|
||||
b 4f
|
||||
2: mov v4.16b, v3.16b
|
||||
ld1 {v5.2d}, [x6], #16 /* load 2nd round key */
|
||||
3: aese v0.16b, v4.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
4: ld1 {v3.2d}, [x6], #16 /* load next round key */
|
||||
aese v0.16b, v5.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
5: ld1 {v4.2d}, [x6], #16 /* load next round key */
|
||||
subs w7, w7, #3
|
||||
aese v0.16b, v3.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
ld1 {v5.2d}, [x6], #16 /* load next round key */
|
||||
bpl 3b
|
||||
aese v0.16b, v4.16b
|
||||
subs w2, w2, #16 /* last data? */
|
||||
eor v0.16b, v0.16b, v5.16b /* final round */
|
||||
bmi 6f
|
||||
ld1 {v1.16b}, [x1], #16 /* load next input block */
|
||||
eor v0.16b, v0.16b, v1.16b /* xor with mac */
|
||||
bne 1b
|
||||
6: st1 {v0.2d}, [x0] /* store mac */
|
||||
beq 10f
|
||||
adds w2, w2, #16
|
||||
beq 10f
|
||||
mov w8, w2
|
||||
7: ldrb w7, [x1], #1
|
||||
umov w6, v0.b[0]
|
||||
eor w6, w6, w7
|
||||
strb w6, [x0], #1
|
||||
subs w2, w2, #1
|
||||
beq 10f
|
||||
ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
|
||||
b 7b
|
||||
8: mov w7, w8
|
||||
add w8, w8, #16
|
||||
9: ext v1.16b, v1.16b, v1.16b, #1
|
||||
adds w7, w7, #1
|
||||
bne 9b
|
||||
eor v0.16b, v0.16b, v1.16b
|
||||
st1 {v0.2d}, [x0]
|
||||
10: str w8, [x3]
|
||||
ret
|
||||
ENDPROC(ce_aes_ccm_auth_data)
|
||||
|
||||
/*
|
||||
* void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
|
||||
* u32 rounds);
|
||||
*/
|
||||
ENTRY(ce_aes_ccm_final)
|
||||
ld1 {v3.2d}, [x2], #16 /* load first round key */
|
||||
ld1 {v0.2d}, [x0] /* load mac */
|
||||
cmp w3, #12 /* which key size? */
|
||||
sub w3, w3, #2 /* modified # of rounds */
|
||||
ld1 {v1.2d}, [x1] /* load 1st ctriv */
|
||||
bmi 0f
|
||||
bne 3f
|
||||
mov v5.16b, v3.16b
|
||||
b 2f
|
||||
0: mov v4.16b, v3.16b
|
||||
1: ld1 {v5.2d}, [x2], #16 /* load next round key */
|
||||
aese v0.16b, v4.16b
|
||||
aese v1.16b, v4.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
aesmc v1.16b, v1.16b
|
||||
2: ld1 {v3.2d}, [x2], #16 /* load next round key */
|
||||
aese v0.16b, v5.16b
|
||||
aese v1.16b, v5.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
aesmc v1.16b, v1.16b
|
||||
3: ld1 {v4.2d}, [x2], #16 /* load next round key */
|
||||
subs w3, w3, #3
|
||||
aese v0.16b, v3.16b
|
||||
aese v1.16b, v3.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
aesmc v1.16b, v1.16b
|
||||
bpl 1b
|
||||
aese v0.16b, v4.16b
|
||||
aese v1.16b, v4.16b
|
||||
/* final round key cancels out */
|
||||
eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
|
||||
st1 {v0.2d}, [x0] /* store result */
|
||||
ret
|
||||
ENDPROC(ce_aes_ccm_final)
|
||||
|
||||
.macro aes_ccm_do_crypt,enc
|
||||
ldr x8, [x6, #8] /* load lower ctr */
|
||||
ld1 {v0.2d}, [x5] /* load mac */
|
||||
rev x8, x8 /* keep swabbed ctr in reg */
|
||||
0: /* outer loop */
|
||||
ld1 {v1.1d}, [x6] /* load upper ctr */
|
||||
prfm pldl1strm, [x1]
|
||||
add x8, x8, #1
|
||||
rev x9, x8
|
||||
cmp w4, #12 /* which key size? */
|
||||
sub w7, w4, #2 /* get modified # of rounds */
|
||||
ins v1.d[1], x9 /* no carry in lower ctr */
|
||||
ld1 {v3.2d}, [x3] /* load first round key */
|
||||
add x10, x3, #16
|
||||
bmi 1f
|
||||
bne 4f
|
||||
mov v5.16b, v3.16b
|
||||
b 3f
|
||||
1: mov v4.16b, v3.16b
|
||||
ld1 {v5.2d}, [x10], #16 /* load 2nd round key */
|
||||
2: /* inner loop: 3 rounds, 2x interleaved */
|
||||
aese v0.16b, v4.16b
|
||||
aese v1.16b, v4.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
aesmc v1.16b, v1.16b
|
||||
3: ld1 {v3.2d}, [x10], #16 /* load next round key */
|
||||
aese v0.16b, v5.16b
|
||||
aese v1.16b, v5.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
aesmc v1.16b, v1.16b
|
||||
4: ld1 {v4.2d}, [x10], #16 /* load next round key */
|
||||
subs w7, w7, #3
|
||||
aese v0.16b, v3.16b
|
||||
aese v1.16b, v3.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
aesmc v1.16b, v1.16b
|
||||
ld1 {v5.2d}, [x10], #16 /* load next round key */
|
||||
bpl 2b
|
||||
aese v0.16b, v4.16b
|
||||
aese v1.16b, v4.16b
|
||||
subs w2, w2, #16
|
||||
bmi 6f /* partial block? */
|
||||
ld1 {v2.16b}, [x1], #16 /* load next input block */
|
||||
.if \enc == 1
|
||||
eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
|
||||
eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
|
||||
.else
|
||||
eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
|
||||
eor v1.16b, v2.16b, v5.16b /* final round enc */
|
||||
.endif
|
||||
eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
|
||||
st1 {v1.16b}, [x0], #16 /* write output block */
|
||||
bne 0b
|
||||
rev x8, x8
|
||||
st1 {v0.2d}, [x5] /* store mac */
|
||||
str x8, [x6, #8] /* store lsb end of ctr (BE) */
|
||||
5: ret
|
||||
|
||||
6: eor v0.16b, v0.16b, v5.16b /* final round mac */
|
||||
eor v1.16b, v1.16b, v5.16b /* final round enc */
|
||||
st1 {v0.2d}, [x5] /* store mac */
|
||||
add w2, w2, #16 /* process partial tail block */
|
||||
7: ldrb w9, [x1], #1 /* get 1 byte of input */
|
||||
umov w6, v1.b[0] /* get top crypted ctr byte */
|
||||
umov w7, v0.b[0] /* get top mac byte */
|
||||
.if \enc == 1
|
||||
eor w7, w7, w9
|
||||
eor w9, w9, w6
|
||||
.else
|
||||
eor w9, w9, w6
|
||||
eor w7, w7, w9
|
||||
.endif
|
||||
strb w9, [x0], #1 /* store out byte */
|
||||
strb w7, [x5], #1 /* store mac byte */
|
||||
subs w2, w2, #1
|
||||
beq 5b
|
||||
ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
|
||||
ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
|
||||
b 7b
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
|
||||
* u8 const rk[], u32 rounds, u8 mac[],
|
||||
* u8 ctr[]);
|
||||
* void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
|
||||
* u8 const rk[], u32 rounds, u8 mac[],
|
||||
* u8 ctr[]);
|
||||
*/
|
||||
ENTRY(ce_aes_ccm_encrypt)
|
||||
aes_ccm_do_crypt 1
|
||||
ENDPROC(ce_aes_ccm_encrypt)
|
||||
|
||||
ENTRY(ce_aes_ccm_decrypt)
|
||||
aes_ccm_do_crypt 0
|
||||
ENDPROC(ce_aes_ccm_decrypt)
|
297
arch/arm64/crypto/aes-ce-ccm-glue.c
Normal file
297
arch/arm64/crypto/aes-ce-ccm-glue.c
Normal file
@ -0,0 +1,297 @@
|
||||
/*
|
||||
* aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
|
||||
*
|
||||
* Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
static int num_rounds(struct crypto_aes_ctx *ctx)
|
||||
{
|
||||
/*
|
||||
* # of rounds specified by AES:
|
||||
* 128 bit key 10 rounds
|
||||
* 192 bit key 12 rounds
|
||||
* 256 bit key 14 rounds
|
||||
* => n byte key => 6 + (n/4) rounds
|
||||
*/
|
||||
return 6 + ctx->key_length / 4;
|
||||
}
|
||||
|
||||
asmlinkage void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
|
||||
u32 *macp, u32 const rk[], u32 rounds);
|
||||
|
||||
asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
|
||||
u32 const rk[], u32 rounds, u8 mac[],
|
||||
u8 ctr[]);
|
||||
|
||||
asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
|
||||
u32 const rk[], u32 rounds, u8 mac[],
|
||||
u8 ctr[]);
|
||||
|
||||
asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
|
||||
u32 rounds);
|
||||
|
||||
static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
int ret;
|
||||
|
||||
ret = crypto_aes_expand_key(ctx, in_key, key_len);
|
||||
if (!ret)
|
||||
return 0;
|
||||
|
||||
tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
|
||||
{
|
||||
if ((authsize & 1) || authsize < 4)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
__be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8];
|
||||
u32 l = req->iv[0] + 1;
|
||||
|
||||
/* verify that CCM dimension 'L' is set correctly in the IV */
|
||||
if (l < 2 || l > 8)
|
||||
return -EINVAL;
|
||||
|
||||
/* verify that msglen can in fact be represented in L bytes */
|
||||
if (l < 4 && msglen >> (8 * l))
|
||||
return -EOVERFLOW;
|
||||
|
||||
/*
|
||||
* Even if the CCM spec allows L values of up to 8, the Linux cryptoapi
|
||||
* uses a u32 type to represent msglen so the top 4 bytes are always 0.
|
||||
*/
|
||||
n[0] = 0;
|
||||
n[1] = cpu_to_be32(msglen);
|
||||
|
||||
memcpy(maciv, req->iv, AES_BLOCK_SIZE - l);
|
||||
|
||||
/*
|
||||
* Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C)
|
||||
* - bits 0..2 : max # of bytes required to represent msglen, minus 1
|
||||
* (already set by caller)
|
||||
* - bits 3..5 : size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc)
|
||||
* - bit 6 : indicates presence of authenticate-only data
|
||||
*/
|
||||
maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2;
|
||||
if (req->assoclen)
|
||||
maciv[0] |= 0x40;
|
||||
|
||||
memset(&req->iv[AES_BLOCK_SIZE - l], 0, l);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
|
||||
struct __packed { __be16 l; __be32 h; u16 len; } ltag;
|
||||
struct scatter_walk walk;
|
||||
u32 len = req->assoclen;
|
||||
u32 macp = 0;
|
||||
|
||||
/* prepend the AAD with a length tag */
|
||||
if (len < 0xff00) {
|
||||
ltag.l = cpu_to_be16(len);
|
||||
ltag.len = 2;
|
||||
} else {
|
||||
ltag.l = cpu_to_be16(0xfffe);
|
||||
put_unaligned_be32(len, <ag.h);
|
||||
ltag.len = 6;
|
||||
}
|
||||
|
||||
ce_aes_ccm_auth_data(mac, (u8 *)<ag, ltag.len, &macp, ctx->key_enc,
|
||||
num_rounds(ctx));
|
||||
scatterwalk_start(&walk, req->assoc);
|
||||
|
||||
do {
|
||||
u32 n = scatterwalk_clamp(&walk, len);
|
||||
u8 *p;
|
||||
|
||||
if (!n) {
|
||||
scatterwalk_start(&walk, sg_next(walk.sg));
|
||||
n = scatterwalk_clamp(&walk, len);
|
||||
}
|
||||
p = scatterwalk_map(&walk);
|
||||
ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc,
|
||||
num_rounds(ctx));
|
||||
len -= n;
|
||||
|
||||
scatterwalk_unmap(p);
|
||||
scatterwalk_advance(&walk, n);
|
||||
scatterwalk_done(&walk, 0, len);
|
||||
} while (len);
|
||||
}
|
||||
|
||||
static int ccm_encrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
|
||||
struct blkcipher_desc desc = { .info = req->iv };
|
||||
struct blkcipher_walk walk;
|
||||
u8 __aligned(8) mac[AES_BLOCK_SIZE];
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
u32 len = req->cryptlen;
|
||||
int err;
|
||||
|
||||
err = ccm_init_mac(req, mac, len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
kernel_neon_begin_partial(6);
|
||||
|
||||
if (req->assoclen)
|
||||
ccm_calculate_auth_mac(req, mac);
|
||||
|
||||
/* preserve the original iv for the final round */
|
||||
memcpy(buf, req->iv, AES_BLOCK_SIZE);
|
||||
|
||||
blkcipher_walk_init(&walk, req->dst, req->src, len);
|
||||
err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
|
||||
AES_BLOCK_SIZE);
|
||||
|
||||
while (walk.nbytes) {
|
||||
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
|
||||
|
||||
if (walk.nbytes == len)
|
||||
tail = 0;
|
||||
|
||||
ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
walk.nbytes - tail, ctx->key_enc,
|
||||
num_rounds(ctx), mac, walk.iv);
|
||||
|
||||
len -= walk.nbytes - tail;
|
||||
err = blkcipher_walk_done(&desc, &walk, tail);
|
||||
}
|
||||
if (!err)
|
||||
ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
|
||||
|
||||
kernel_neon_end();
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* copy authtag to end of dst */
|
||||
scatterwalk_map_and_copy(mac, req->dst, req->cryptlen,
|
||||
crypto_aead_authsize(aead), 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ccm_decrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
|
||||
unsigned int authsize = crypto_aead_authsize(aead);
|
||||
struct blkcipher_desc desc = { .info = req->iv };
|
||||
struct blkcipher_walk walk;
|
||||
u8 __aligned(8) mac[AES_BLOCK_SIZE];
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
u32 len = req->cryptlen - authsize;
|
||||
int err;
|
||||
|
||||
err = ccm_init_mac(req, mac, len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
kernel_neon_begin_partial(6);
|
||||
|
||||
if (req->assoclen)
|
||||
ccm_calculate_auth_mac(req, mac);
|
||||
|
||||
/* preserve the original iv for the final round */
|
||||
memcpy(buf, req->iv, AES_BLOCK_SIZE);
|
||||
|
||||
blkcipher_walk_init(&walk, req->dst, req->src, len);
|
||||
err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
|
||||
AES_BLOCK_SIZE);
|
||||
|
||||
while (walk.nbytes) {
|
||||
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
|
||||
|
||||
if (walk.nbytes == len)
|
||||
tail = 0;
|
||||
|
||||
ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
walk.nbytes - tail, ctx->key_enc,
|
||||
num_rounds(ctx), mac, walk.iv);
|
||||
|
||||
len -= walk.nbytes - tail;
|
||||
err = blkcipher_walk_done(&desc, &walk, tail);
|
||||
}
|
||||
if (!err)
|
||||
ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
|
||||
|
||||
kernel_neon_end();
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* compare calculated auth tag with the stored one */
|
||||
scatterwalk_map_and_copy(buf, req->src, req->cryptlen - authsize,
|
||||
authsize, 0);
|
||||
|
||||
if (memcmp(mac, buf, authsize))
|
||||
return -EBADMSG;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct crypto_alg ccm_aes_alg = {
|
||||
.cra_name = "ccm(aes)",
|
||||
.cra_driver_name = "ccm-aes-ce",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_AEAD,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_aead_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_aead = {
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.maxauthsize = AES_BLOCK_SIZE,
|
||||
.setkey = ccm_setkey,
|
||||
.setauthsize = ccm_setauthsize,
|
||||
.encrypt = ccm_encrypt,
|
||||
.decrypt = ccm_decrypt,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init aes_mod_init(void)
|
||||
{
|
||||
if (!(elf_hwcap & HWCAP_AES))
|
||||
return -ENODEV;
|
||||
return crypto_register_alg(&ccm_aes_alg);
|
||||
}
|
||||
|
||||
static void __exit aes_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_alg(&ccm_aes_alg);
|
||||
}
|
||||
|
||||
module_init(aes_mod_init);
|
||||
module_exit(aes_mod_exit);
|
||||
|
||||
MODULE_DESCRIPTION("Synchronous AES in CCM mode using ARMv8 Crypto Extensions");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS("ccm(aes)");
|
155
arch/arm64/crypto/aes-ce-cipher.c
Normal file
155
arch/arm64/crypto/aes-ce-cipher.c
Normal file
@ -0,0 +1,155 @@
|
||||
/*
|
||||
* aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
|
||||
*
|
||||
* Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
struct aes_block {
|
||||
u8 b[AES_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
static int num_rounds(struct crypto_aes_ctx *ctx)
|
||||
{
|
||||
/*
|
||||
* # of rounds specified by AES:
|
||||
* 128 bit key 10 rounds
|
||||
* 192 bit key 12 rounds
|
||||
* 256 bit key 14 rounds
|
||||
* => n byte key => 6 + (n/4) rounds
|
||||
*/
|
||||
return 6 + ctx->key_length / 4;
|
||||
}
|
||||
|
||||
static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
struct aes_block *out = (struct aes_block *)dst;
|
||||
struct aes_block const *in = (struct aes_block *)src;
|
||||
void *dummy0;
|
||||
int dummy1;
|
||||
|
||||
kernel_neon_begin_partial(4);
|
||||
|
||||
__asm__(" ld1 {v0.16b}, %[in] ;"
|
||||
" ld1 {v1.2d}, [%[key]], #16 ;"
|
||||
" cmp %w[rounds], #10 ;"
|
||||
" bmi 0f ;"
|
||||
" bne 3f ;"
|
||||
" mov v3.16b, v1.16b ;"
|
||||
" b 2f ;"
|
||||
"0: mov v2.16b, v1.16b ;"
|
||||
" ld1 {v3.2d}, [%[key]], #16 ;"
|
||||
"1: aese v0.16b, v2.16b ;"
|
||||
" aesmc v0.16b, v0.16b ;"
|
||||
"2: ld1 {v1.2d}, [%[key]], #16 ;"
|
||||
" aese v0.16b, v3.16b ;"
|
||||
" aesmc v0.16b, v0.16b ;"
|
||||
"3: ld1 {v2.2d}, [%[key]], #16 ;"
|
||||
" subs %w[rounds], %w[rounds], #3 ;"
|
||||
" aese v0.16b, v1.16b ;"
|
||||
" aesmc v0.16b, v0.16b ;"
|
||||
" ld1 {v3.2d}, [%[key]], #16 ;"
|
||||
" bpl 1b ;"
|
||||
" aese v0.16b, v2.16b ;"
|
||||
" eor v0.16b, v0.16b, v3.16b ;"
|
||||
" st1 {v0.16b}, %[out] ;"
|
||||
|
||||
: [out] "=Q"(*out),
|
||||
[key] "=r"(dummy0),
|
||||
[rounds] "=r"(dummy1)
|
||||
: [in] "Q"(*in),
|
||||
"1"(ctx->key_enc),
|
||||
"2"(num_rounds(ctx) - 2)
|
||||
: "cc");
|
||||
|
||||
kernel_neon_end();
|
||||
}
|
||||
|
||||
static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
struct aes_block *out = (struct aes_block *)dst;
|
||||
struct aes_block const *in = (struct aes_block *)src;
|
||||
void *dummy0;
|
||||
int dummy1;
|
||||
|
||||
kernel_neon_begin_partial(4);
|
||||
|
||||
__asm__(" ld1 {v0.16b}, %[in] ;"
|
||||
" ld1 {v1.2d}, [%[key]], #16 ;"
|
||||
" cmp %w[rounds], #10 ;"
|
||||
" bmi 0f ;"
|
||||
" bne 3f ;"
|
||||
" mov v3.16b, v1.16b ;"
|
||||
" b 2f ;"
|
||||
"0: mov v2.16b, v1.16b ;"
|
||||
" ld1 {v3.2d}, [%[key]], #16 ;"
|
||||
"1: aesd v0.16b, v2.16b ;"
|
||||
" aesimc v0.16b, v0.16b ;"
|
||||
"2: ld1 {v1.2d}, [%[key]], #16 ;"
|
||||
" aesd v0.16b, v3.16b ;"
|
||||
" aesimc v0.16b, v0.16b ;"
|
||||
"3: ld1 {v2.2d}, [%[key]], #16 ;"
|
||||
" subs %w[rounds], %w[rounds], #3 ;"
|
||||
" aesd v0.16b, v1.16b ;"
|
||||
" aesimc v0.16b, v0.16b ;"
|
||||
" ld1 {v3.2d}, [%[key]], #16 ;"
|
||||
" bpl 1b ;"
|
||||
" aesd v0.16b, v2.16b ;"
|
||||
" eor v0.16b, v0.16b, v3.16b ;"
|
||||
" st1 {v0.16b}, %[out] ;"
|
||||
|
||||
: [out] "=Q"(*out),
|
||||
[key] "=r"(dummy0),
|
||||
[rounds] "=r"(dummy1)
|
||||
: [in] "Q"(*in),
|
||||
"1"(ctx->key_dec),
|
||||
"2"(num_rounds(ctx) - 2)
|
||||
: "cc");
|
||||
|
||||
kernel_neon_end();
|
||||
}
|
||||
|
||||
static struct crypto_alg aes_alg = {
|
||||
.cra_name = "aes",
|
||||
.cra_driver_name = "aes-ce",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_cipher = {
|
||||
.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cia_setkey = crypto_aes_set_key,
|
||||
.cia_encrypt = aes_cipher_encrypt,
|
||||
.cia_decrypt = aes_cipher_decrypt
|
||||
}
|
||||
};
|
||||
|
||||
static int __init aes_mod_init(void)
|
||||
{
|
||||
return crypto_register_alg(&aes_alg);
|
||||
}
|
||||
|
||||
static void __exit aes_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_alg(&aes_alg);
|
||||
}
|
||||
|
||||
module_cpu_feature_match(AES, aes_mod_init);
|
||||
module_exit(aes_mod_exit);
|
133
arch/arm64/crypto/aes-ce.S
Normal file
133
arch/arm64/crypto/aes-ce.S
Normal file
@ -0,0 +1,133 @@
|
||||
/*
|
||||
* linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
|
||||
* Crypto Extensions
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
#define AES_ENTRY(func) ENTRY(ce_ ## func)
|
||||
#define AES_ENDPROC(func) ENDPROC(ce_ ## func)
|
||||
|
||||
.arch armv8-a+crypto
|
||||
|
||||
/* preload all round keys */
|
||||
.macro load_round_keys, rounds, rk
|
||||
cmp \rounds, #12
|
||||
blo 2222f /* 128 bits */
|
||||
beq 1111f /* 192 bits */
|
||||
ld1 {v17.16b-v18.16b}, [\rk], #32
|
||||
1111: ld1 {v19.16b-v20.16b}, [\rk], #32
|
||||
2222: ld1 {v21.16b-v24.16b}, [\rk], #64
|
||||
ld1 {v25.16b-v28.16b}, [\rk], #64
|
||||
ld1 {v29.16b-v31.16b}, [\rk]
|
||||
.endm
|
||||
|
||||
/* prepare for encryption with key in rk[] */
|
||||
.macro enc_prepare, rounds, rk, ignore
|
||||
load_round_keys \rounds, \rk
|
||||
.endm
|
||||
|
||||
/* prepare for encryption (again) but with new key in rk[] */
|
||||
.macro enc_switch_key, rounds, rk, ignore
|
||||
load_round_keys \rounds, \rk
|
||||
.endm
|
||||
|
||||
/* prepare for decryption with key in rk[] */
|
||||
.macro dec_prepare, rounds, rk, ignore
|
||||
load_round_keys \rounds, \rk
|
||||
.endm
|
||||
|
||||
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
|
||||
aes\de \i0\().16b, \k\().16b
|
||||
.ifnb \i1
|
||||
aes\de \i1\().16b, \k\().16b
|
||||
.ifnb \i3
|
||||
aes\de \i2\().16b, \k\().16b
|
||||
aes\de \i3\().16b, \k\().16b
|
||||
.endif
|
||||
.endif
|
||||
aes\mc \i0\().16b, \i0\().16b
|
||||
.ifnb \i1
|
||||
aes\mc \i1\().16b, \i1\().16b
|
||||
.ifnb \i3
|
||||
aes\mc \i2\().16b, \i2\().16b
|
||||
aes\mc \i3\().16b, \i3\().16b
|
||||
.endif
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/* up to 4 interleaved encryption rounds with the same round key */
|
||||
.macro round_Nx, enc, k, i0, i1, i2, i3
|
||||
.ifc \enc, e
|
||||
do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3
|
||||
.else
|
||||
do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/* up to 4 interleaved final rounds */
|
||||
.macro fin_round_Nx, de, k, k2, i0, i1, i2, i3
|
||||
aes\de \i0\().16b, \k\().16b
|
||||
.ifnb \i1
|
||||
aes\de \i1\().16b, \k\().16b
|
||||
.ifnb \i3
|
||||
aes\de \i2\().16b, \k\().16b
|
||||
aes\de \i3\().16b, \k\().16b
|
||||
.endif
|
||||
.endif
|
||||
eor \i0\().16b, \i0\().16b, \k2\().16b
|
||||
.ifnb \i1
|
||||
eor \i1\().16b, \i1\().16b, \k2\().16b
|
||||
.ifnb \i3
|
||||
eor \i2\().16b, \i2\().16b, \k2\().16b
|
||||
eor \i3\().16b, \i3\().16b, \k2\().16b
|
||||
.endif
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/* up to 4 interleaved blocks */
|
||||
.macro do_block_Nx, enc, rounds, i0, i1, i2, i3
|
||||
cmp \rounds, #12
|
||||
blo 2222f /* 128 bits */
|
||||
beq 1111f /* 192 bits */
|
||||
round_Nx \enc, v17, \i0, \i1, \i2, \i3
|
||||
round_Nx \enc, v18, \i0, \i1, \i2, \i3
|
||||
1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3
|
||||
round_Nx \enc, v20, \i0, \i1, \i2, \i3
|
||||
2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
|
||||
round_Nx \enc, \key, \i0, \i1, \i2, \i3
|
||||
.endr
|
||||
fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3
|
||||
.endm
|
||||
|
||||
.macro encrypt_block, in, rounds, t0, t1, t2
|
||||
do_block_Nx e, \rounds, \in
|
||||
.endm
|
||||
|
||||
.macro encrypt_block2x, i0, i1, rounds, t0, t1, t2
|
||||
do_block_Nx e, \rounds, \i0, \i1
|
||||
.endm
|
||||
|
||||
.macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
|
||||
do_block_Nx e, \rounds, \i0, \i1, \i2, \i3
|
||||
.endm
|
||||
|
||||
.macro decrypt_block, in, rounds, t0, t1, t2
|
||||
do_block_Nx d, \rounds, \in
|
||||
.endm
|
||||
|
||||
.macro decrypt_block2x, i0, i1, rounds, t0, t1, t2
|
||||
do_block_Nx d, \rounds, \i0, \i1
|
||||
.endm
|
||||
|
||||
.macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
|
||||
do_block_Nx d, \rounds, \i0, \i1, \i2, \i3
|
||||
.endm
|
||||
|
||||
#include "aes-modes.S"
|
446
arch/arm64/crypto/aes-glue.c
Normal file
446
arch/arm64/crypto/aes-glue.c
Normal file
@ -0,0 +1,446 @@
|
||||
/*
|
||||
* linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <asm/hwcap.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/ablk_helper.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/cpufeature.h>
|
||||
|
||||
#ifdef USE_V8_CRYPTO_EXTENSIONS
|
||||
#define MODE "ce"
|
||||
#define PRIO 300
|
||||
#define aes_ecb_encrypt ce_aes_ecb_encrypt
|
||||
#define aes_ecb_decrypt ce_aes_ecb_decrypt
|
||||
#define aes_cbc_encrypt ce_aes_cbc_encrypt
|
||||
#define aes_cbc_decrypt ce_aes_cbc_decrypt
|
||||
#define aes_ctr_encrypt ce_aes_ctr_encrypt
|
||||
#define aes_xts_encrypt ce_aes_xts_encrypt
|
||||
#define aes_xts_decrypt ce_aes_xts_decrypt
|
||||
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
|
||||
#else
|
||||
#define MODE "neon"
|
||||
#define PRIO 200
|
||||
#define aes_ecb_encrypt neon_aes_ecb_encrypt
|
||||
#define aes_ecb_decrypt neon_aes_ecb_decrypt
|
||||
#define aes_cbc_encrypt neon_aes_cbc_encrypt
|
||||
#define aes_cbc_decrypt neon_aes_cbc_decrypt
|
||||
#define aes_ctr_encrypt neon_aes_ctr_encrypt
|
||||
#define aes_xts_encrypt neon_aes_xts_encrypt
|
||||
#define aes_xts_decrypt neon_aes_xts_decrypt
|
||||
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
|
||||
MODULE_ALIAS("ecb(aes)");
|
||||
MODULE_ALIAS("cbc(aes)");
|
||||
MODULE_ALIAS("ctr(aes)");
|
||||
MODULE_ALIAS("xts(aes)");
|
||||
#endif
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
/* defined in aes-modes.S */
|
||||
asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, int first);
|
||||
asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, int first);
|
||||
|
||||
asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[], int first);
|
||||
asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[], int first);
|
||||
|
||||
asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 ctr[], int first);
|
||||
|
||||
asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
|
||||
int rounds, int blocks, u8 const rk2[], u8 iv[],
|
||||
int first);
|
||||
asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
|
||||
int rounds, int blocks, u8 const rk2[], u8 iv[],
|
||||
int first);
|
||||
|
||||
struct crypto_aes_xts_ctx {
|
||||
struct crypto_aes_ctx key1;
|
||||
struct crypto_aes_ctx __aligned(8) key2;
|
||||
};
|
||||
|
||||
static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int ret;
|
||||
|
||||
ret = crypto_aes_expand_key(&ctx->key1, in_key, key_len / 2);
|
||||
if (!ret)
|
||||
ret = crypto_aes_expand_key(&ctx->key2, &in_key[key_len / 2],
|
||||
key_len / 2);
|
||||
if (!ret)
|
||||
return 0;
|
||||
|
||||
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
int err, first, rounds = 6 + ctx->key_length / 4;
|
||||
struct blkcipher_walk walk;
|
||||
unsigned int blocks;
|
||||
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
kernel_neon_begin();
|
||||
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
|
||||
aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_enc, rounds, blocks, first);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
kernel_neon_end();
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
int err, first, rounds = 6 + ctx->key_length / 4;
|
||||
struct blkcipher_walk walk;
|
||||
unsigned int blocks;
|
||||
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
kernel_neon_begin();
|
||||
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
|
||||
aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_dec, rounds, blocks, first);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
kernel_neon_end();
|
||||
return err;
|
||||
}
|
||||
|
||||
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
int err, first, rounds = 6 + ctx->key_length / 4;
|
||||
struct blkcipher_walk walk;
|
||||
unsigned int blocks;
|
||||
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
kernel_neon_begin();
|
||||
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
|
||||
aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
|
||||
first);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
kernel_neon_end();
|
||||
return err;
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
int err, first, rounds = 6 + ctx->key_length / 4;
|
||||
struct blkcipher_walk walk;
|
||||
unsigned int blocks;
|
||||
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
kernel_neon_begin();
|
||||
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
|
||||
aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_dec, rounds, blocks, walk.iv,
|
||||
first);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
kernel_neon_end();
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
int err, first, rounds = 6 + ctx->key_length / 4;
|
||||
struct blkcipher_walk walk;
|
||||
int blocks;
|
||||
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
|
||||
|
||||
first = 1;
|
||||
kernel_neon_begin();
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
|
||||
first);
|
||||
first = 0;
|
||||
nbytes -= blocks * AES_BLOCK_SIZE;
|
||||
if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
|
||||
break;
|
||||
err = blkcipher_walk_done(desc, &walk,
|
||||
walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
if (nbytes) {
|
||||
u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
u8 __aligned(8) tail[AES_BLOCK_SIZE];
|
||||
|
||||
/*
|
||||
* Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
|
||||
* to tell aes_ctr_encrypt() to only read half a block.
|
||||
*/
|
||||
blocks = (nbytes <= 8) ? -1 : 1;
|
||||
|
||||
aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds,
|
||||
blocks, walk.iv, first);
|
||||
memcpy(tdst, tail, nbytes);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
int err, first, rounds = 6 + ctx->key1.key_length / 4;
|
||||
struct blkcipher_walk walk;
|
||||
unsigned int blocks;
|
||||
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
kernel_neon_begin();
|
||||
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
|
||||
aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key1.key_enc, rounds, blocks,
|
||||
(u8 *)ctx->key2.key_enc, walk.iv, first);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
int err, first, rounds = 6 + ctx->key1.key_length / 4;
|
||||
struct blkcipher_walk walk;
|
||||
unsigned int blocks;
|
||||
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
kernel_neon_begin();
|
||||
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
|
||||
aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key1.key_dec, rounds, blocks,
|
||||
(u8 *)ctx->key2.key_enc, walk.iv, first);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct crypto_alg aes_algs[] = { {
|
||||
.cra_name = "__ecb-aes-" MODE,
|
||||
.cra_driver_name = "__driver-ecb-aes-" MODE,
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_blkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = crypto_aes_set_key,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__cbc-aes-" MODE,
|
||||
.cra_driver_name = "__driver-cbc-aes-" MODE,
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_blkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = crypto_aes_set_key,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__ctr-aes-" MODE,
|
||||
.cra_driver_name = "__driver-ctr-aes-" MODE,
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_blkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = crypto_aes_set_key,
|
||||
.encrypt = ctr_encrypt,
|
||||
.decrypt = ctr_encrypt,
|
||||
},
|
||||
}, {
|
||||
.cra_name = "__xts-aes-" MODE,
|
||||
.cra_driver_name = "__driver-xts-aes-" MODE,
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_blkcipher = {
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
.max_keysize = 2 * AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = xts_set_key,
|
||||
.encrypt = xts_encrypt,
|
||||
.decrypt = xts_decrypt,
|
||||
},
|
||||
}, {
|
||||
.cra_name = "ecb(aes)",
|
||||
.cra_driver_name = "ecb-aes-" MODE,
|
||||
.cra_priority = PRIO,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_ablkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
}
|
||||
}, {
|
||||
.cra_name = "cbc(aes)",
|
||||
.cra_driver_name = "cbc-aes-" MODE,
|
||||
.cra_priority = PRIO,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_ablkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
}
|
||||
}, {
|
||||
.cra_name = "ctr(aes)",
|
||||
.cra_driver_name = "ctr-aes-" MODE,
|
||||
.cra_priority = PRIO,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_ablkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
}
|
||||
}, {
|
||||
.cra_name = "xts(aes)",
|
||||
.cra_driver_name = "xts-aes-" MODE,
|
||||
.cra_priority = PRIO,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct async_helper_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_ablkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = ablk_init,
|
||||
.cra_exit = ablk_exit,
|
||||
.cra_ablkcipher = {
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
.max_keysize = 2 * AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = ablk_set_key,
|
||||
.encrypt = ablk_encrypt,
|
||||
.decrypt = ablk_decrypt,
|
||||
}
|
||||
} };
|
||||
|
||||
static int __init aes_init(void)
|
||||
{
|
||||
return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
|
||||
}
|
||||
|
||||
static void __exit aes_exit(void)
|
||||
{
|
||||
crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
|
||||
}
|
||||
|
||||
#ifdef USE_V8_CRYPTO_EXTENSIONS
|
||||
module_cpu_feature_match(AES, aes_init);
|
||||
#else
|
||||
module_init(aes_init);
|
||||
#endif
|
||||
module_exit(aes_exit);
|
532
arch/arm64/crypto/aes-modes.S
Normal file
532
arch/arm64/crypto/aes-modes.S
Normal file
@ -0,0 +1,532 @@
|
||||
/*
|
||||
* linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
/* included by aes-ce.S and aes-neon.S */
|
||||
|
||||
.text
|
||||
.align 4
|
||||
|
||||
/*
|
||||
* There are several ways to instantiate this code:
|
||||
* - no interleave, all inline
|
||||
* - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
|
||||
* - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
|
||||
* - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
|
||||
* - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
|
||||
*
|
||||
* Macros imported by this code:
|
||||
* - enc_prepare - setup NEON registers for encryption
|
||||
* - dec_prepare - setup NEON registers for decryption
|
||||
* - enc_switch_key - change to new key after having prepared for encryption
|
||||
* - encrypt_block - encrypt a single block
|
||||
* - decrypt block - decrypt a single block
|
||||
* - encrypt_block2x - encrypt 2 blocks in parallel (if INTERLEAVE == 2)
|
||||
* - decrypt_block2x - decrypt 2 blocks in parallel (if INTERLEAVE == 2)
|
||||
* - encrypt_block4x - encrypt 4 blocks in parallel (if INTERLEAVE == 4)
|
||||
* - decrypt_block4x - decrypt 4 blocks in parallel (if INTERLEAVE == 4)
|
||||
*/
|
||||
|
||||
#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
|
||||
#define FRAME_PUSH stp x29, x30, [sp,#-16]! ; mov x29, sp
|
||||
#define FRAME_POP ldp x29, x30, [sp],#16
|
||||
|
||||
#if INTERLEAVE == 2
|
||||
|
||||
aes_encrypt_block2x:
|
||||
encrypt_block2x v0, v1, w3, x2, x6, w7
|
||||
ret
|
||||
ENDPROC(aes_encrypt_block2x)
|
||||
|
||||
aes_decrypt_block2x:
|
||||
decrypt_block2x v0, v1, w3, x2, x6, w7
|
||||
ret
|
||||
ENDPROC(aes_decrypt_block2x)
|
||||
|
||||
#elif INTERLEAVE == 4
|
||||
|
||||
aes_encrypt_block4x:
|
||||
encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
|
||||
ret
|
||||
ENDPROC(aes_encrypt_block4x)
|
||||
|
||||
aes_decrypt_block4x:
|
||||
decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
|
||||
ret
|
||||
ENDPROC(aes_decrypt_block4x)
|
||||
|
||||
#else
|
||||
#error INTERLEAVE should equal 2 or 4
|
||||
#endif
|
||||
|
||||
.macro do_encrypt_block2x
|
||||
bl aes_encrypt_block2x
|
||||
.endm
|
||||
|
||||
.macro do_decrypt_block2x
|
||||
bl aes_decrypt_block2x
|
||||
.endm
|
||||
|
||||
.macro do_encrypt_block4x
|
||||
bl aes_encrypt_block4x
|
||||
.endm
|
||||
|
||||
.macro do_decrypt_block4x
|
||||
bl aes_decrypt_block4x
|
||||
.endm
|
||||
|
||||
#else
|
||||
#define FRAME_PUSH
|
||||
#define FRAME_POP
|
||||
|
||||
.macro do_encrypt_block2x
|
||||
encrypt_block2x v0, v1, w3, x2, x6, w7
|
||||
.endm
|
||||
|
||||
.macro do_decrypt_block2x
|
||||
decrypt_block2x v0, v1, w3, x2, x6, w7
|
||||
.endm
|
||||
|
||||
.macro do_encrypt_block4x
|
||||
encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
|
||||
.endm
|
||||
|
||||
.macro do_decrypt_block4x
|
||||
decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
|
||||
.endm
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, int first)
|
||||
* aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, int first)
|
||||
*/
|
||||
|
||||
AES_ENTRY(aes_ecb_encrypt)
|
||||
FRAME_PUSH
|
||||
cbz w5, .LecbencloopNx
|
||||
|
||||
enc_prepare w3, x2, x5
|
||||
|
||||
.LecbencloopNx:
|
||||
#if INTERLEAVE >= 2
|
||||
subs w4, w4, #INTERLEAVE
|
||||
bmi .Lecbenc1x
|
||||
#if INTERLEAVE == 2
|
||||
ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
|
||||
do_encrypt_block2x
|
||||
st1 {v0.16b-v1.16b}, [x0], #32
|
||||
#else
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
|
||||
do_encrypt_block4x
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
#endif
|
||||
b .LecbencloopNx
|
||||
.Lecbenc1x:
|
||||
adds w4, w4, #INTERLEAVE
|
||||
beq .Lecbencout
|
||||
#endif
|
||||
.Lecbencloop:
|
||||
ld1 {v0.16b}, [x1], #16 /* get next pt block */
|
||||
encrypt_block v0, w3, x2, x5, w6
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
bne .Lecbencloop
|
||||
.Lecbencout:
|
||||
FRAME_POP
|
||||
ret
|
||||
AES_ENDPROC(aes_ecb_encrypt)
|
||||
|
||||
|
||||
AES_ENTRY(aes_ecb_decrypt)
|
||||
FRAME_PUSH
|
||||
cbz w5, .LecbdecloopNx
|
||||
|
||||
dec_prepare w3, x2, x5
|
||||
|
||||
.LecbdecloopNx:
|
||||
#if INTERLEAVE >= 2
|
||||
subs w4, w4, #INTERLEAVE
|
||||
bmi .Lecbdec1x
|
||||
#if INTERLEAVE == 2
|
||||
ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
|
||||
do_decrypt_block2x
|
||||
st1 {v0.16b-v1.16b}, [x0], #32
|
||||
#else
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
|
||||
do_decrypt_block4x
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
#endif
|
||||
b .LecbdecloopNx
|
||||
.Lecbdec1x:
|
||||
adds w4, w4, #INTERLEAVE
|
||||
beq .Lecbdecout
|
||||
#endif
|
||||
.Lecbdecloop:
|
||||
ld1 {v0.16b}, [x1], #16 /* get next ct block */
|
||||
decrypt_block v0, w3, x2, x5, w6
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
bne .Lecbdecloop
|
||||
.Lecbdecout:
|
||||
FRAME_POP
|
||||
ret
|
||||
AES_ENDPROC(aes_ecb_decrypt)
|
||||
|
||||
|
||||
/*
|
||||
* aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, u8 iv[], int first)
|
||||
* aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, u8 iv[], int first)
|
||||
*/
|
||||
|
||||
AES_ENTRY(aes_cbc_encrypt)
|
||||
cbz w6, .Lcbcencloop
|
||||
|
||||
ld1 {v0.16b}, [x5] /* get iv */
|
||||
enc_prepare w3, x2, x5
|
||||
|
||||
.Lcbcencloop:
|
||||
ld1 {v1.16b}, [x1], #16 /* get next pt block */
|
||||
eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */
|
||||
encrypt_block v0, w3, x2, x5, w6
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
bne .Lcbcencloop
|
||||
ret
|
||||
AES_ENDPROC(aes_cbc_encrypt)
|
||||
|
||||
|
||||
AES_ENTRY(aes_cbc_decrypt)
|
||||
FRAME_PUSH
|
||||
cbz w6, .LcbcdecloopNx
|
||||
|
||||
ld1 {v7.16b}, [x5] /* get iv */
|
||||
dec_prepare w3, x2, x5
|
||||
|
||||
.LcbcdecloopNx:
|
||||
#if INTERLEAVE >= 2
|
||||
subs w4, w4, #INTERLEAVE
|
||||
bmi .Lcbcdec1x
|
||||
#if INTERLEAVE == 2
|
||||
ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
|
||||
mov v2.16b, v0.16b
|
||||
mov v3.16b, v1.16b
|
||||
do_decrypt_block2x
|
||||
eor v0.16b, v0.16b, v7.16b
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
mov v7.16b, v3.16b
|
||||
st1 {v0.16b-v1.16b}, [x0], #32
|
||||
#else
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
|
||||
mov v4.16b, v0.16b
|
||||
mov v5.16b, v1.16b
|
||||
mov v6.16b, v2.16b
|
||||
do_decrypt_block4x
|
||||
sub x1, x1, #16
|
||||
eor v0.16b, v0.16b, v7.16b
|
||||
eor v1.16b, v1.16b, v4.16b
|
||||
ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
|
||||
eor v2.16b, v2.16b, v5.16b
|
||||
eor v3.16b, v3.16b, v6.16b
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
#endif
|
||||
b .LcbcdecloopNx
|
||||
.Lcbcdec1x:
|
||||
adds w4, w4, #INTERLEAVE
|
||||
beq .Lcbcdecout
|
||||
#endif
|
||||
.Lcbcdecloop:
|
||||
ld1 {v1.16b}, [x1], #16 /* get next ct block */
|
||||
mov v0.16b, v1.16b /* ...and copy to v0 */
|
||||
decrypt_block v0, w3, x2, x5, w6
|
||||
eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
|
||||
mov v7.16b, v1.16b /* ct is next iv */
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
bne .Lcbcdecloop
|
||||
.Lcbcdecout:
|
||||
FRAME_POP
|
||||
ret
|
||||
AES_ENDPROC(aes_cbc_decrypt)
|
||||
|
||||
|
||||
/*
|
||||
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, u8 ctr[], int first)
|
||||
*/
|
||||
|
||||
AES_ENTRY(aes_ctr_encrypt)
|
||||
FRAME_PUSH
|
||||
cbnz w6, .Lctrfirst /* 1st time around? */
|
||||
umov x5, v4.d[1] /* keep swabbed ctr in reg */
|
||||
rev x5, x5
|
||||
#if INTERLEAVE >= 2
|
||||
cmn w5, w4 /* 32 bit overflow? */
|
||||
bcs .Lctrinc
|
||||
add x5, x5, #1 /* increment BE ctr */
|
||||
b .LctrincNx
|
||||
#else
|
||||
b .Lctrinc
|
||||
#endif
|
||||
.Lctrfirst:
|
||||
enc_prepare w3, x2, x6
|
||||
ld1 {v4.16b}, [x5]
|
||||
umov x5, v4.d[1] /* keep swabbed ctr in reg */
|
||||
rev x5, x5
|
||||
#if INTERLEAVE >= 2
|
||||
cmn w5, w4 /* 32 bit overflow? */
|
||||
bcs .Lctrloop
|
||||
.LctrloopNx:
|
||||
subs w4, w4, #INTERLEAVE
|
||||
bmi .Lctr1x
|
||||
#if INTERLEAVE == 2
|
||||
mov v0.8b, v4.8b
|
||||
mov v1.8b, v4.8b
|
||||
rev x7, x5
|
||||
add x5, x5, #1
|
||||
ins v0.d[1], x7
|
||||
rev x7, x5
|
||||
add x5, x5, #1
|
||||
ins v1.d[1], x7
|
||||
ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */
|
||||
do_encrypt_block2x
|
||||
eor v0.16b, v0.16b, v2.16b
|
||||
eor v1.16b, v1.16b, v3.16b
|
||||
st1 {v0.16b-v1.16b}, [x0], #32
|
||||
#else
|
||||
ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
|
||||
dup v7.4s, w5
|
||||
mov v0.16b, v4.16b
|
||||
add v7.4s, v7.4s, v8.4s
|
||||
mov v1.16b, v4.16b
|
||||
rev32 v8.16b, v7.16b
|
||||
mov v2.16b, v4.16b
|
||||
mov v3.16b, v4.16b
|
||||
mov v1.s[3], v8.s[0]
|
||||
mov v2.s[3], v8.s[1]
|
||||
mov v3.s[3], v8.s[2]
|
||||
ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
|
||||
do_encrypt_block4x
|
||||
eor v0.16b, v5.16b, v0.16b
|
||||
ld1 {v5.16b}, [x1], #16 /* get 1 input block */
|
||||
eor v1.16b, v6.16b, v1.16b
|
||||
eor v2.16b, v7.16b, v2.16b
|
||||
eor v3.16b, v5.16b, v3.16b
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
add x5, x5, #INTERLEAVE
|
||||
#endif
|
||||
cbz w4, .LctroutNx
|
||||
.LctrincNx:
|
||||
rev x7, x5
|
||||
ins v4.d[1], x7
|
||||
b .LctrloopNx
|
||||
.LctroutNx:
|
||||
sub x5, x5, #1
|
||||
rev x7, x5
|
||||
ins v4.d[1], x7
|
||||
b .Lctrout
|
||||
.Lctr1x:
|
||||
adds w4, w4, #INTERLEAVE
|
||||
beq .Lctrout
|
||||
#endif
|
||||
.Lctrloop:
|
||||
mov v0.16b, v4.16b
|
||||
encrypt_block v0, w3, x2, x6, w7
|
||||
subs w4, w4, #1
|
||||
bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
eor v3.16b, v0.16b, v3.16b
|
||||
st1 {v3.16b}, [x0], #16
|
||||
beq .Lctrout
|
||||
.Lctrinc:
|
||||
adds x5, x5, #1 /* increment BE ctr */
|
||||
rev x7, x5
|
||||
ins v4.d[1], x7
|
||||
bcc .Lctrloop /* no overflow? */
|
||||
umov x7, v4.d[0] /* load upper word of ctr */
|
||||
rev x7, x7 /* ... to handle the carry */
|
||||
add x7, x7, #1
|
||||
rev x7, x7
|
||||
ins v4.d[0], x7
|
||||
b .Lctrloop
|
||||
.Lctrhalfblock:
|
||||
ld1 {v3.8b}, [x1]
|
||||
eor v3.8b, v0.8b, v3.8b
|
||||
st1 {v3.8b}, [x0]
|
||||
.Lctrout:
|
||||
FRAME_POP
|
||||
ret
|
||||
AES_ENDPROC(aes_ctr_encrypt)
|
||||
.ltorg
|
||||
|
||||
|
||||
/*
|
||||
* aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
|
||||
* int blocks, u8 const rk2[], u8 iv[], int first)
|
||||
* aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
|
||||
* int blocks, u8 const rk2[], u8 iv[], int first)
|
||||
*/
|
||||
|
||||
.macro next_tweak, out, in, const, tmp
|
||||
sshr \tmp\().2d, \in\().2d, #63
|
||||
and \tmp\().16b, \tmp\().16b, \const\().16b
|
||||
add \out\().2d, \in\().2d, \in\().2d
|
||||
ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
|
||||
eor \out\().16b, \out\().16b, \tmp\().16b
|
||||
.endm
|
||||
|
||||
.Lxts_mul_x:
|
||||
.word 1, 0, 0x87, 0
|
||||
|
||||
AES_ENTRY(aes_xts_encrypt)
|
||||
FRAME_PUSH
|
||||
cbz w7, .LxtsencloopNx
|
||||
|
||||
ld1 {v4.16b}, [x6]
|
||||
enc_prepare w3, x5, x6
|
||||
encrypt_block v4, w3, x5, x6, w7 /* first tweak */
|
||||
enc_switch_key w3, x2, x6
|
||||
ldr q7, .Lxts_mul_x
|
||||
b .LxtsencNx
|
||||
|
||||
.LxtsencloopNx:
|
||||
ldr q7, .Lxts_mul_x
|
||||
next_tweak v4, v4, v7, v8
|
||||
.LxtsencNx:
|
||||
#if INTERLEAVE >= 2
|
||||
subs w4, w4, #INTERLEAVE
|
||||
bmi .Lxtsenc1x
|
||||
#if INTERLEAVE == 2
|
||||
ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
|
||||
next_tweak v5, v4, v7, v8
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
do_encrypt_block2x
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
st1 {v0.16b-v1.16b}, [x0], #32
|
||||
cbz w4, .LxtsencoutNx
|
||||
next_tweak v4, v5, v7, v8
|
||||
b .LxtsencNx
|
||||
.LxtsencoutNx:
|
||||
mov v4.16b, v5.16b
|
||||
b .Lxtsencout
|
||||
#else
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
|
||||
next_tweak v5, v4, v7, v8
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
next_tweak v6, v5, v7, v8
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
next_tweak v7, v6, v7, v8
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
do_encrypt_block4x
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
mov v4.16b, v7.16b
|
||||
cbz w4, .Lxtsencout
|
||||
b .LxtsencloopNx
|
||||
#endif
|
||||
.Lxtsenc1x:
|
||||
adds w4, w4, #INTERLEAVE
|
||||
beq .Lxtsencout
|
||||
#endif
|
||||
.Lxtsencloop:
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
eor v0.16b, v1.16b, v4.16b
|
||||
encrypt_block v0, w3, x2, x6, w7
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
beq .Lxtsencout
|
||||
next_tweak v4, v4, v7, v8
|
||||
b .Lxtsencloop
|
||||
.Lxtsencout:
|
||||
FRAME_POP
|
||||
ret
|
||||
AES_ENDPROC(aes_xts_encrypt)
|
||||
|
||||
|
||||
AES_ENTRY(aes_xts_decrypt)
|
||||
FRAME_PUSH
|
||||
cbz w7, .LxtsdecloopNx
|
||||
|
||||
ld1 {v4.16b}, [x6]
|
||||
enc_prepare w3, x5, x6
|
||||
encrypt_block v4, w3, x5, x6, w7 /* first tweak */
|
||||
dec_prepare w3, x2, x6
|
||||
ldr q7, .Lxts_mul_x
|
||||
b .LxtsdecNx
|
||||
|
||||
.LxtsdecloopNx:
|
||||
ldr q7, .Lxts_mul_x
|
||||
next_tweak v4, v4, v7, v8
|
||||
.LxtsdecNx:
|
||||
#if INTERLEAVE >= 2
|
||||
subs w4, w4, #INTERLEAVE
|
||||
bmi .Lxtsdec1x
|
||||
#if INTERLEAVE == 2
|
||||
ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
|
||||
next_tweak v5, v4, v7, v8
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
do_decrypt_block2x
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
st1 {v0.16b-v1.16b}, [x0], #32
|
||||
cbz w4, .LxtsdecoutNx
|
||||
next_tweak v4, v5, v7, v8
|
||||
b .LxtsdecNx
|
||||
.LxtsdecoutNx:
|
||||
mov v4.16b, v5.16b
|
||||
b .Lxtsdecout
|
||||
#else
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
|
||||
next_tweak v5, v4, v7, v8
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
next_tweak v6, v5, v7, v8
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
next_tweak v7, v6, v7, v8
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
do_decrypt_block4x
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
mov v4.16b, v7.16b
|
||||
cbz w4, .Lxtsdecout
|
||||
b .LxtsdecloopNx
|
||||
#endif
|
||||
.Lxtsdec1x:
|
||||
adds w4, w4, #INTERLEAVE
|
||||
beq .Lxtsdecout
|
||||
#endif
|
||||
.Lxtsdecloop:
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
eor v0.16b, v1.16b, v4.16b
|
||||
decrypt_block v0, w3, x2, x6, w7
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
beq .Lxtsdecout
|
||||
next_tweak v4, v4, v7, v8
|
||||
b .Lxtsdecloop
|
||||
.Lxtsdecout:
|
||||
FRAME_POP
|
||||
ret
|
||||
AES_ENDPROC(aes_xts_decrypt)
|
382
arch/arm64/crypto/aes-neon.S
Normal file
382
arch/arm64/crypto/aes-neon.S
Normal file
@ -0,0 +1,382 @@
|
||||
/*
|
||||
* linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
#define AES_ENTRY(func) ENTRY(neon_ ## func)
|
||||
#define AES_ENDPROC(func) ENDPROC(neon_ ## func)
|
||||
|
||||
/* multiply by polynomial 'x' in GF(2^8) */
|
||||
.macro mul_by_x, out, in, temp, const
|
||||
sshr \temp, \in, #7
|
||||
add \out, \in, \in
|
||||
and \temp, \temp, \const
|
||||
eor \out, \out, \temp
|
||||
.endm
|
||||
|
||||
/* preload the entire Sbox */
|
||||
.macro prepare, sbox, shiftrows, temp
|
||||
adr \temp, \sbox
|
||||
movi v12.16b, #0x40
|
||||
ldr q13, \shiftrows
|
||||
movi v14.16b, #0x1b
|
||||
ld1 {v16.16b-v19.16b}, [\temp], #64
|
||||
ld1 {v20.16b-v23.16b}, [\temp], #64
|
||||
ld1 {v24.16b-v27.16b}, [\temp], #64
|
||||
ld1 {v28.16b-v31.16b}, [\temp]
|
||||
.endm
|
||||
|
||||
/* do preload for encryption */
|
||||
.macro enc_prepare, ignore0, ignore1, temp
|
||||
prepare .LForward_Sbox, .LForward_ShiftRows, \temp
|
||||
.endm
|
||||
|
||||
.macro enc_switch_key, ignore0, ignore1, temp
|
||||
/* do nothing */
|
||||
.endm
|
||||
|
||||
/* do preload for decryption */
|
||||
.macro dec_prepare, ignore0, ignore1, temp
|
||||
prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp
|
||||
.endm
|
||||
|
||||
/* apply SubBytes transformation using the the preloaded Sbox */
|
||||
.macro sub_bytes, in
|
||||
sub v9.16b, \in\().16b, v12.16b
|
||||
tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
|
||||
sub v10.16b, v9.16b, v12.16b
|
||||
tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
|
||||
sub v11.16b, v10.16b, v12.16b
|
||||
tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
|
||||
tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
|
||||
.endm
|
||||
|
||||
/* apply MixColumns transformation */
|
||||
.macro mix_columns, in
|
||||
mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b
|
||||
rev32 v8.8h, \in\().8h
|
||||
eor \in\().16b, v10.16b, \in\().16b
|
||||
shl v9.4s, v8.4s, #24
|
||||
shl v11.4s, \in\().4s, #24
|
||||
sri v9.4s, v8.4s, #8
|
||||
sri v11.4s, \in\().4s, #8
|
||||
eor v9.16b, v9.16b, v8.16b
|
||||
eor v10.16b, v10.16b, v9.16b
|
||||
eor \in\().16b, v10.16b, v11.16b
|
||||
.endm
|
||||
|
||||
/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
|
||||
.macro inv_mix_columns, in
|
||||
mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b
|
||||
mul_by_x v11.16b, v11.16b, v10.16b, v14.16b
|
||||
eor \in\().16b, \in\().16b, v11.16b
|
||||
rev32 v11.8h, v11.8h
|
||||
eor \in\().16b, \in\().16b, v11.16b
|
||||
mix_columns \in
|
||||
.endm
|
||||
|
||||
.macro do_block, enc, in, rounds, rk, rkp, i
|
||||
ld1 {v15.16b}, [\rk]
|
||||
add \rkp, \rk, #16
|
||||
mov \i, \rounds
|
||||
1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
|
||||
tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
|
||||
sub_bytes \in
|
||||
ld1 {v15.16b}, [\rkp], #16
|
||||
subs \i, \i, #1
|
||||
beq 2222f
|
||||
.if \enc == 1
|
||||
mix_columns \in
|
||||
.else
|
||||
inv_mix_columns \in
|
||||
.endif
|
||||
b 1111b
|
||||
2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
|
||||
.endm
|
||||
|
||||
.macro encrypt_block, in, rounds, rk, rkp, i
|
||||
do_block 1, \in, \rounds, \rk, \rkp, \i
|
||||
.endm
|
||||
|
||||
.macro decrypt_block, in, rounds, rk, rkp, i
|
||||
do_block 0, \in, \rounds, \rk, \rkp, \i
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Interleaved versions: functionally equivalent to the
|
||||
* ones above, but applied to 2 or 4 AES states in parallel.
|
||||
*/
|
||||
|
||||
.macro sub_bytes_2x, in0, in1
|
||||
sub v8.16b, \in0\().16b, v12.16b
|
||||
sub v9.16b, \in1\().16b, v12.16b
|
||||
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
|
||||
tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
|
||||
sub v10.16b, v8.16b, v12.16b
|
||||
sub v11.16b, v9.16b, v12.16b
|
||||
tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
|
||||
tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
|
||||
sub v8.16b, v10.16b, v12.16b
|
||||
sub v9.16b, v11.16b, v12.16b
|
||||
tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
|
||||
tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
|
||||
tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
|
||||
tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
|
||||
.endm
|
||||
|
||||
.macro sub_bytes_4x, in0, in1, in2, in3
|
||||
sub v8.16b, \in0\().16b, v12.16b
|
||||
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
|
||||
sub v9.16b, \in1\().16b, v12.16b
|
||||
tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
|
||||
sub v10.16b, \in2\().16b, v12.16b
|
||||
tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
|
||||
sub v11.16b, \in3\().16b, v12.16b
|
||||
tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
|
||||
tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
|
||||
tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
|
||||
sub v8.16b, v8.16b, v12.16b
|
||||
tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
|
||||
sub v9.16b, v9.16b, v12.16b
|
||||
tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
|
||||
sub v10.16b, v10.16b, v12.16b
|
||||
tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
|
||||
sub v11.16b, v11.16b, v12.16b
|
||||
tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
|
||||
sub v8.16b, v8.16b, v12.16b
|
||||
tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
|
||||
sub v9.16b, v9.16b, v12.16b
|
||||
tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
|
||||
sub v10.16b, v10.16b, v12.16b
|
||||
tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
|
||||
sub v11.16b, v11.16b, v12.16b
|
||||
tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
|
||||
tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
|
||||
tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
|
||||
.endm
|
||||
|
||||
.macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
|
||||
sshr \tmp0\().16b, \in0\().16b, #7
|
||||
add \out0\().16b, \in0\().16b, \in0\().16b
|
||||
sshr \tmp1\().16b, \in1\().16b, #7
|
||||
and \tmp0\().16b, \tmp0\().16b, \const\().16b
|
||||
add \out1\().16b, \in1\().16b, \in1\().16b
|
||||
and \tmp1\().16b, \tmp1\().16b, \const\().16b
|
||||
eor \out0\().16b, \out0\().16b, \tmp0\().16b
|
||||
eor \out1\().16b, \out1\().16b, \tmp1\().16b
|
||||
.endm
|
||||
|
||||
.macro mix_columns_2x, in0, in1
|
||||
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
|
||||
rev32 v10.8h, \in0\().8h
|
||||
rev32 v11.8h, \in1\().8h
|
||||
eor \in0\().16b, v8.16b, \in0\().16b
|
||||
eor \in1\().16b, v9.16b, \in1\().16b
|
||||
shl v12.4s, v10.4s, #24
|
||||
shl v13.4s, v11.4s, #24
|
||||
eor v8.16b, v8.16b, v10.16b
|
||||
sri v12.4s, v10.4s, #8
|
||||
shl v10.4s, \in0\().4s, #24
|
||||
eor v9.16b, v9.16b, v11.16b
|
||||
sri v13.4s, v11.4s, #8
|
||||
shl v11.4s, \in1\().4s, #24
|
||||
sri v10.4s, \in0\().4s, #8
|
||||
eor \in0\().16b, v8.16b, v12.16b
|
||||
sri v11.4s, \in1\().4s, #8
|
||||
eor \in1\().16b, v9.16b, v13.16b
|
||||
eor \in0\().16b, v10.16b, \in0\().16b
|
||||
eor \in1\().16b, v11.16b, \in1\().16b
|
||||
.endm
|
||||
|
||||
.macro inv_mix_cols_2x, in0, in1
|
||||
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
|
||||
mul_by_x_2x v8, v9, v8, v9, v10, v11, v14
|
||||
eor \in0\().16b, \in0\().16b, v8.16b
|
||||
eor \in1\().16b, \in1\().16b, v9.16b
|
||||
rev32 v8.8h, v8.8h
|
||||
rev32 v9.8h, v9.8h
|
||||
eor \in0\().16b, \in0\().16b, v8.16b
|
||||
eor \in1\().16b, \in1\().16b, v9.16b
|
||||
mix_columns_2x \in0, \in1
|
||||
.endm
|
||||
|
||||
.macro inv_mix_cols_4x, in0, in1, in2, in3
|
||||
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
|
||||
mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14
|
||||
mul_by_x_2x v8, v9, v8, v9, v12, v13, v14
|
||||
mul_by_x_2x v10, v11, v10, v11, v12, v13, v14
|
||||
eor \in0\().16b, \in0\().16b, v8.16b
|
||||
eor \in1\().16b, \in1\().16b, v9.16b
|
||||
eor \in2\().16b, \in2\().16b, v10.16b
|
||||
eor \in3\().16b, \in3\().16b, v11.16b
|
||||
rev32 v8.8h, v8.8h
|
||||
rev32 v9.8h, v9.8h
|
||||
rev32 v10.8h, v10.8h
|
||||
rev32 v11.8h, v11.8h
|
||||
eor \in0\().16b, \in0\().16b, v8.16b
|
||||
eor \in1\().16b, \in1\().16b, v9.16b
|
||||
eor \in2\().16b, \in2\().16b, v10.16b
|
||||
eor \in3\().16b, \in3\().16b, v11.16b
|
||||
mix_columns_2x \in0, \in1
|
||||
mix_columns_2x \in2, \in3
|
||||
.endm
|
||||
|
||||
.macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i
|
||||
ld1 {v15.16b}, [\rk]
|
||||
add \rkp, \rk, #16
|
||||
mov \i, \rounds
|
||||
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
sub_bytes_2x \in0, \in1
|
||||
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
|
||||
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
|
||||
ld1 {v15.16b}, [\rkp], #16
|
||||
subs \i, \i, #1
|
||||
beq 2222f
|
||||
.if \enc == 1
|
||||
mix_columns_2x \in0, \in1
|
||||
ldr q13, .LForward_ShiftRows
|
||||
.else
|
||||
inv_mix_cols_2x \in0, \in1
|
||||
ldr q13, .LReverse_ShiftRows
|
||||
.endif
|
||||
movi v12.16b, #0x40
|
||||
b 1111b
|
||||
2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
.endm
|
||||
|
||||
.macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
|
||||
ld1 {v15.16b}, [\rk]
|
||||
add \rkp, \rk, #16
|
||||
mov \i, \rounds
|
||||
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
|
||||
eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
|
||||
sub_bytes_4x \in0, \in1, \in2, \in3
|
||||
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
|
||||
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
|
||||
tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
|
||||
tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
|
||||
ld1 {v15.16b}, [\rkp], #16
|
||||
subs \i, \i, #1
|
||||
beq 2222f
|
||||
.if \enc == 1
|
||||
mix_columns_2x \in0, \in1
|
||||
mix_columns_2x \in2, \in3
|
||||
ldr q13, .LForward_ShiftRows
|
||||
.else
|
||||
inv_mix_cols_4x \in0, \in1, \in2, \in3
|
||||
ldr q13, .LReverse_ShiftRows
|
||||
.endif
|
||||
movi v12.16b, #0x40
|
||||
b 1111b
|
||||
2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
|
||||
eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
|
||||
.endm
|
||||
|
||||
.macro encrypt_block2x, in0, in1, rounds, rk, rkp, i
|
||||
do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i
|
||||
.endm
|
||||
|
||||
.macro decrypt_block2x, in0, in1, rounds, rk, rkp, i
|
||||
do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i
|
||||
.endm
|
||||
|
||||
.macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
|
||||
do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
|
||||
.endm
|
||||
|
||||
.macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
|
||||
do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
|
||||
.endm
|
||||
|
||||
#include "aes-modes.S"
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.LForward_ShiftRows:
|
||||
.byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3
|
||||
.byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb
|
||||
|
||||
.LReverse_ShiftRows:
|
||||
.byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb
|
||||
.byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3
|
||||
|
||||
.LForward_Sbox:
|
||||
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
|
||||
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
|
||||
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
|
||||
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
|
||||
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
|
||||
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
|
||||
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
|
||||
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
|
||||
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
|
||||
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
|
||||
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
|
||||
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
|
||||
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
|
||||
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
|
||||
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
|
||||
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
|
||||
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
|
||||
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
|
||||
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
|
||||
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
|
||||
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
|
||||
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
|
||||
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
|
||||
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
|
||||
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
|
||||
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
|
||||
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
|
||||
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
|
||||
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
|
||||
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
|
||||
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
|
||||
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
|
||||
|
||||
.LReverse_Sbox:
|
||||
.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
|
||||
.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
|
||||
.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
|
||||
.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
|
||||
.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
|
||||
.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
|
||||
.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
|
||||
.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
|
||||
.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
|
||||
.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
|
||||
.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
|
||||
.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
|
||||
.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
|
||||
.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
|
||||
.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
|
||||
.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
|
||||
.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
|
||||
.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
|
||||
.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
|
||||
.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
|
||||
.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
|
||||
.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
|
||||
.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
|
||||
.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
|
||||
.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
|
||||
.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
|
||||
.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
|
||||
.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
|
||||
.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
|
||||
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
|
||||
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
|
||||
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
|
95
arch/arm64/crypto/ghash-ce-core.S
Normal file
95
arch/arm64/crypto/ghash-ce-core.S
Normal file
@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Accelerated GHASH implementation with ARMv8 PMULL instructions.
|
||||
*
|
||||
* Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* Based on arch/x86/crypto/ghash-pmullni-intel_asm.S
|
||||
*
|
||||
* Copyright (c) 2009 Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
* Vinodh Gopal
|
||||
* Erdinc Ozturk
|
||||
* Deniz Karakoyunlu
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
DATA .req v0
|
||||
SHASH .req v1
|
||||
IN1 .req v2
|
||||
T1 .req v2
|
||||
T2 .req v3
|
||||
T3 .req v4
|
||||
VZR .req v5
|
||||
|
||||
.text
|
||||
.arch armv8-a+crypto
|
||||
|
||||
/*
|
||||
* void pmull_ghash_update(int blocks, u64 dg[], const char *src,
|
||||
* struct ghash_key const *k, const char *head)
|
||||
*/
|
||||
ENTRY(pmull_ghash_update)
|
||||
ld1 {DATA.16b}, [x1]
|
||||
ld1 {SHASH.16b}, [x3]
|
||||
eor VZR.16b, VZR.16b, VZR.16b
|
||||
|
||||
/* do the head block first, if supplied */
|
||||
cbz x4, 0f
|
||||
ld1 {IN1.2d}, [x4]
|
||||
b 1f
|
||||
|
||||
0: ld1 {IN1.2d}, [x2], #16
|
||||
sub w0, w0, #1
|
||||
1: ext IN1.16b, IN1.16b, IN1.16b, #8
|
||||
CPU_LE( rev64 IN1.16b, IN1.16b )
|
||||
eor DATA.16b, DATA.16b, IN1.16b
|
||||
|
||||
/* multiply DATA by SHASH in GF(2^128) */
|
||||
ext T2.16b, DATA.16b, DATA.16b, #8
|
||||
ext T3.16b, SHASH.16b, SHASH.16b, #8
|
||||
eor T2.16b, T2.16b, DATA.16b
|
||||
eor T3.16b, T3.16b, SHASH.16b
|
||||
|
||||
pmull2 T1.1q, SHASH.2d, DATA.2d // a1 * b1
|
||||
pmull DATA.1q, SHASH.1d, DATA.1d // a0 * b0
|
||||
pmull T2.1q, T2.1d, T3.1d // (a1 + a0)(b1 + b0)
|
||||
eor T2.16b, T2.16b, T1.16b // (a0 * b1) + (a1 * b0)
|
||||
eor T2.16b, T2.16b, DATA.16b
|
||||
|
||||
ext T3.16b, VZR.16b, T2.16b, #8
|
||||
ext T2.16b, T2.16b, VZR.16b, #8
|
||||
eor DATA.16b, DATA.16b, T3.16b
|
||||
eor T1.16b, T1.16b, T2.16b // <T1:DATA> is result of
|
||||
// carry-less multiplication
|
||||
|
||||
/* first phase of the reduction */
|
||||
shl T3.2d, DATA.2d, #1
|
||||
eor T3.16b, T3.16b, DATA.16b
|
||||
shl T3.2d, T3.2d, #5
|
||||
eor T3.16b, T3.16b, DATA.16b
|
||||
shl T3.2d, T3.2d, #57
|
||||
ext T2.16b, VZR.16b, T3.16b, #8
|
||||
ext T3.16b, T3.16b, VZR.16b, #8
|
||||
eor DATA.16b, DATA.16b, T2.16b
|
||||
eor T1.16b, T1.16b, T3.16b
|
||||
|
||||
/* second phase of the reduction */
|
||||
ushr T2.2d, DATA.2d, #5
|
||||
eor T2.16b, T2.16b, DATA.16b
|
||||
ushr T2.2d, T2.2d, #1
|
||||
eor T2.16b, T2.16b, DATA.16b
|
||||
ushr T2.2d, T2.2d, #1
|
||||
eor T1.16b, T1.16b, T2.16b
|
||||
eor DATA.16b, DATA.16b, T1.16b
|
||||
|
||||
cbnz w0, 0b
|
||||
|
||||
st1 {DATA.16b}, [x1]
|
||||
ret
|
||||
ENDPROC(pmull_ghash_update)
|
155
arch/arm64/crypto/ghash-ce-glue.c
Normal file
155
arch/arm64/crypto/ghash-ce-glue.c
Normal file
@ -0,0 +1,155 @@
|
||||
/*
|
||||
* Accelerated GHASH implementation with ARMv8 PMULL instructions.
|
||||
*
|
||||
* Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
#define GHASH_BLOCK_SIZE 16
|
||||
#define GHASH_DIGEST_SIZE 16
|
||||
|
||||
struct ghash_key {
|
||||
u64 a;
|
||||
u64 b;
|
||||
};
|
||||
|
||||
struct ghash_desc_ctx {
|
||||
u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
|
||||
u8 buf[GHASH_BLOCK_SIZE];
|
||||
u32 count;
|
||||
};
|
||||
|
||||
asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
|
||||
struct ghash_key const *k, const char *head);
|
||||
|
||||
static int ghash_init(struct shash_desc *desc)
|
||||
{
|
||||
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
*ctx = (struct ghash_desc_ctx){};
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ghash_update(struct shash_desc *desc, const u8 *src,
|
||||
unsigned int len)
|
||||
{
|
||||
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
|
||||
|
||||
ctx->count += len;
|
||||
|
||||
if ((partial + len) >= GHASH_BLOCK_SIZE) {
|
||||
struct ghash_key *key = crypto_shash_ctx(desc->tfm);
|
||||
int blocks;
|
||||
|
||||
if (partial) {
|
||||
int p = GHASH_BLOCK_SIZE - partial;
|
||||
|
||||
memcpy(ctx->buf + partial, src, p);
|
||||
src += p;
|
||||
len -= p;
|
||||
}
|
||||
|
||||
blocks = len / GHASH_BLOCK_SIZE;
|
||||
len %= GHASH_BLOCK_SIZE;
|
||||
|
||||
kernel_neon_begin_partial(6);
|
||||
pmull_ghash_update(blocks, ctx->digest, src, key,
|
||||
partial ? ctx->buf : NULL);
|
||||
kernel_neon_end();
|
||||
src += blocks * GHASH_BLOCK_SIZE;
|
||||
}
|
||||
if (len)
|
||||
memcpy(ctx->buf + partial, src, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ghash_final(struct shash_desc *desc, u8 *dst)
|
||||
{
|
||||
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
|
||||
|
||||
if (partial) {
|
||||
struct ghash_key *key = crypto_shash_ctx(desc->tfm);
|
||||
|
||||
memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
|
||||
|
||||
kernel_neon_begin_partial(6);
|
||||
pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
|
||||
kernel_neon_end();
|
||||
}
|
||||
put_unaligned_be64(ctx->digest[1], dst);
|
||||
put_unaligned_be64(ctx->digest[0], dst + 8);
|
||||
|
||||
*ctx = (struct ghash_desc_ctx){};
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ghash_setkey(struct crypto_shash *tfm,
|
||||
const u8 *inkey, unsigned int keylen)
|
||||
{
|
||||
struct ghash_key *key = crypto_shash_ctx(tfm);
|
||||
u64 a, b;
|
||||
|
||||
if (keylen != GHASH_BLOCK_SIZE) {
|
||||
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* perform multiplication by 'x' in GF(2^128) */
|
||||
b = get_unaligned_be64(inkey);
|
||||
a = get_unaligned_be64(inkey + 8);
|
||||
|
||||
key->a = (a << 1) | (b >> 63);
|
||||
key->b = (b << 1) | (a >> 63);
|
||||
|
||||
if (b >> 63)
|
||||
key->b ^= 0xc200000000000000UL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg ghash_alg = {
|
||||
.digestsize = GHASH_DIGEST_SIZE,
|
||||
.init = ghash_init,
|
||||
.update = ghash_update,
|
||||
.final = ghash_final,
|
||||
.setkey = ghash_setkey,
|
||||
.descsize = sizeof(struct ghash_desc_ctx),
|
||||
.base = {
|
||||
.cra_name = "ghash",
|
||||
.cra_driver_name = "ghash-ce",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = GHASH_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct ghash_key),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
};
|
||||
|
||||
static int __init ghash_ce_mod_init(void)
|
||||
{
|
||||
return crypto_register_shash(&ghash_alg);
|
||||
}
|
||||
|
||||
static void __exit ghash_ce_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_shash(&ghash_alg);
|
||||
}
|
||||
|
||||
module_cpu_feature_match(PMULL, ghash_ce_mod_init);
|
||||
module_exit(ghash_ce_mod_exit);
|
153
arch/arm64/crypto/sha1-ce-core.S
Normal file
153
arch/arm64/crypto/sha1-ce-core.S
Normal file
@ -0,0 +1,153 @@
|
||||
/*
|
||||
* sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
|
||||
*
|
||||
* Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.text
|
||||
.arch armv8-a+crypto
|
||||
|
||||
k0 .req v0
|
||||
k1 .req v1
|
||||
k2 .req v2
|
||||
k3 .req v3
|
||||
|
||||
t0 .req v4
|
||||
t1 .req v5
|
||||
|
||||
dga .req q6
|
||||
dgav .req v6
|
||||
dgb .req s7
|
||||
dgbv .req v7
|
||||
|
||||
dg0q .req q12
|
||||
dg0s .req s12
|
||||
dg0v .req v12
|
||||
dg1s .req s13
|
||||
dg1v .req v13
|
||||
dg2s .req s14
|
||||
|
||||
.macro add_only, op, ev, rc, s0, dg1
|
||||
.ifc \ev, ev
|
||||
add t1.4s, v\s0\().4s, \rc\().4s
|
||||
sha1h dg2s, dg0s
|
||||
.ifnb \dg1
|
||||
sha1\op dg0q, \dg1, t0.4s
|
||||
.else
|
||||
sha1\op dg0q, dg1s, t0.4s
|
||||
.endif
|
||||
.else
|
||||
.ifnb \s0
|
||||
add t0.4s, v\s0\().4s, \rc\().4s
|
||||
.endif
|
||||
sha1h dg1s, dg0s
|
||||
sha1\op dg0q, dg2s, t1.4s
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
|
||||
sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s
|
||||
add_only \op, \ev, \rc, \s1, \dg1
|
||||
sha1su1 v\s0\().4s, v\s3\().4s
|
||||
.endm
|
||||
|
||||
/*
|
||||
* The SHA1 round constants
|
||||
*/
|
||||
.align 4
|
||||
.Lsha1_rcon:
|
||||
.word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
|
||||
|
||||
/*
|
||||
* void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
|
||||
* u8 *head, long bytes)
|
||||
*/
|
||||
ENTRY(sha1_ce_transform)
|
||||
/* load round constants */
|
||||
adr x6, .Lsha1_rcon
|
||||
ld1r {k0.4s}, [x6], #4
|
||||
ld1r {k1.4s}, [x6], #4
|
||||
ld1r {k2.4s}, [x6], #4
|
||||
ld1r {k3.4s}, [x6]
|
||||
|
||||
/* load state */
|
||||
ldr dga, [x2]
|
||||
ldr dgb, [x2, #16]
|
||||
|
||||
/* load partial state (if supplied) */
|
||||
cbz x3, 0f
|
||||
ld1 {v8.4s-v11.4s}, [x3]
|
||||
b 1f
|
||||
|
||||
/* load input */
|
||||
0: ld1 {v8.4s-v11.4s}, [x1], #64
|
||||
sub w0, w0, #1
|
||||
|
||||
1:
|
||||
CPU_LE( rev32 v8.16b, v8.16b )
|
||||
CPU_LE( rev32 v9.16b, v9.16b )
|
||||
CPU_LE( rev32 v10.16b, v10.16b )
|
||||
CPU_LE( rev32 v11.16b, v11.16b )
|
||||
|
||||
2: add t0.4s, v8.4s, k0.4s
|
||||
mov dg0v.16b, dgav.16b
|
||||
|
||||
add_update c, ev, k0, 8, 9, 10, 11, dgb
|
||||
add_update c, od, k0, 9, 10, 11, 8
|
||||
add_update c, ev, k0, 10, 11, 8, 9
|
||||
add_update c, od, k0, 11, 8, 9, 10
|
||||
add_update c, ev, k1, 8, 9, 10, 11
|
||||
|
||||
add_update p, od, k1, 9, 10, 11, 8
|
||||
add_update p, ev, k1, 10, 11, 8, 9
|
||||
add_update p, od, k1, 11, 8, 9, 10
|
||||
add_update p, ev, k1, 8, 9, 10, 11
|
||||
add_update p, od, k2, 9, 10, 11, 8
|
||||
|
||||
add_update m, ev, k2, 10, 11, 8, 9
|
||||
add_update m, od, k2, 11, 8, 9, 10
|
||||
add_update m, ev, k2, 8, 9, 10, 11
|
||||
add_update m, od, k2, 9, 10, 11, 8
|
||||
add_update m, ev, k3, 10, 11, 8, 9
|
||||
|
||||
add_update p, od, k3, 11, 8, 9, 10
|
||||
add_only p, ev, k3, 9
|
||||
add_only p, od, k3, 10
|
||||
add_only p, ev, k3, 11
|
||||
add_only p, od
|
||||
|
||||
/* update state */
|
||||
add dgbv.2s, dgbv.2s, dg1v.2s
|
||||
add dgav.4s, dgav.4s, dg0v.4s
|
||||
|
||||
cbnz w0, 0b
|
||||
|
||||
/*
|
||||
* Final block: add padding and total bit count.
|
||||
* Skip if we have no total byte count in x4. In that case, the input
|
||||
* size was not a round multiple of the block size, and the padding is
|
||||
* handled by the C code.
|
||||
*/
|
||||
cbz x4, 3f
|
||||
movi v9.2d, #0
|
||||
mov x8, #0x80000000
|
||||
movi v10.2d, #0
|
||||
ror x7, x4, #29 // ror(lsl(x4, 3), 32)
|
||||
fmov d8, x8
|
||||
mov x4, #0
|
||||
mov v11.d[0], xzr
|
||||
mov v11.d[1], x7
|
||||
b 2b
|
||||
|
||||
/* store new state */
|
||||
3: str dga, [x2]
|
||||
str dgb, [x2, #16]
|
||||
ret
|
||||
ENDPROC(sha1_ce_transform)
|
174
arch/arm64/crypto/sha1-ce-glue.c
Normal file
174
arch/arm64/crypto/sha1-ce-glue.c
Normal file
@ -0,0 +1,174 @@
|
||||
/*
|
||||
* sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
|
||||
*
|
||||
* Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/sha.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
|
||||
u8 *head, long bytes);
|
||||
|
||||
static int sha1_init(struct shash_desc *desc)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
*sctx = (struct sha1_state){
|
||||
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha1_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
|
||||
|
||||
sctx->count += len;
|
||||
|
||||
if ((partial + len) >= SHA1_BLOCK_SIZE) {
|
||||
int blocks;
|
||||
|
||||
if (partial) {
|
||||
int p = SHA1_BLOCK_SIZE - partial;
|
||||
|
||||
memcpy(sctx->buffer + partial, data, p);
|
||||
data += p;
|
||||
len -= p;
|
||||
}
|
||||
|
||||
blocks = len / SHA1_BLOCK_SIZE;
|
||||
len %= SHA1_BLOCK_SIZE;
|
||||
|
||||
kernel_neon_begin_partial(16);
|
||||
sha1_ce_transform(blocks, data, sctx->state,
|
||||
partial ? sctx->buffer : NULL, 0);
|
||||
kernel_neon_end();
|
||||
|
||||
data += blocks * SHA1_BLOCK_SIZE;
|
||||
partial = 0;
|
||||
}
|
||||
if (len)
|
||||
memcpy(sctx->buffer + partial, data, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha1_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
|
||||
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
__be64 bits = cpu_to_be64(sctx->count << 3);
|
||||
__be32 *dst = (__be32 *)out;
|
||||
int i;
|
||||
|
||||
u32 padlen = SHA1_BLOCK_SIZE
|
||||
- ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
|
||||
|
||||
sha1_update(desc, padding, padlen);
|
||||
sha1_update(desc, (const u8 *)&bits, sizeof(bits));
|
||||
|
||||
for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
|
||||
put_unaligned_be32(sctx->state[i], dst++);
|
||||
|
||||
*sctx = (struct sha1_state){};
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha1_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
__be32 *dst = (__be32 *)out;
|
||||
int blocks;
|
||||
int i;
|
||||
|
||||
if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
|
||||
sha1_update(desc, data, len);
|
||||
return sha1_final(desc, out);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use a fast path if the input is a multiple of 64 bytes. In
|
||||
* this case, there is no need to copy data around, and we can
|
||||
* perform the entire digest calculation in a single invocation
|
||||
* of sha1_ce_transform()
|
||||
*/
|
||||
blocks = len / SHA1_BLOCK_SIZE;
|
||||
|
||||
kernel_neon_begin_partial(16);
|
||||
sha1_ce_transform(blocks, data, sctx->state, NULL, len);
|
||||
kernel_neon_end();
|
||||
|
||||
for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
|
||||
put_unaligned_be32(sctx->state[i], dst++);
|
||||
|
||||
*sctx = (struct sha1_state){};
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha1_export(struct shash_desc *desc, void *out)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
struct sha1_state *dst = out;
|
||||
|
||||
*dst = *sctx;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha1_import(struct shash_desc *desc, const void *in)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
struct sha1_state const *src = in;
|
||||
|
||||
*sctx = *src;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg alg = {
|
||||
.init = sha1_init,
|
||||
.update = sha1_update,
|
||||
.final = sha1_final,
|
||||
.finup = sha1_finup,
|
||||
.export = sha1_export,
|
||||
.import = sha1_import,
|
||||
.descsize = sizeof(struct sha1_state),
|
||||
.digestsize = SHA1_DIGEST_SIZE,
|
||||
.statesize = sizeof(struct sha1_state),
|
||||
.base = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name = "sha1-ce",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init sha1_ce_mod_init(void)
|
||||
{
|
||||
return crypto_register_shash(&alg);
|
||||
}
|
||||
|
||||
static void __exit sha1_ce_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&alg);
|
||||
}
|
||||
|
||||
module_cpu_feature_match(SHA1, sha1_ce_mod_init);
|
||||
module_exit(sha1_ce_mod_fini);
|
156
arch/arm64/crypto/sha2-ce-core.S
Normal file
156
arch/arm64/crypto/sha2-ce-core.S
Normal file
@ -0,0 +1,156 @@
|
||||
/*
|
||||
* sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions
|
||||
*
|
||||
* Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.text
|
||||
.arch armv8-a+crypto
|
||||
|
||||
dga .req q20
|
||||
dgav .req v20
|
||||
dgb .req q21
|
||||
dgbv .req v21
|
||||
|
||||
t0 .req v22
|
||||
t1 .req v23
|
||||
|
||||
dg0q .req q24
|
||||
dg0v .req v24
|
||||
dg1q .req q25
|
||||
dg1v .req v25
|
||||
dg2q .req q26
|
||||
dg2v .req v26
|
||||
|
||||
.macro add_only, ev, rc, s0
|
||||
mov dg2v.16b, dg0v.16b
|
||||
.ifeq \ev
|
||||
add t1.4s, v\s0\().4s, \rc\().4s
|
||||
sha256h dg0q, dg1q, t0.4s
|
||||
sha256h2 dg1q, dg2q, t0.4s
|
||||
.else
|
||||
.ifnb \s0
|
||||
add t0.4s, v\s0\().4s, \rc\().4s
|
||||
.endif
|
||||
sha256h dg0q, dg1q, t1.4s
|
||||
sha256h2 dg1q, dg2q, t1.4s
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro add_update, ev, rc, s0, s1, s2, s3
|
||||
sha256su0 v\s0\().4s, v\s1\().4s
|
||||
add_only \ev, \rc, \s1
|
||||
sha256su1 v\s0\().4s, v\s2\().4s, v\s3\().4s
|
||||
.endm
|
||||
|
||||
/*
|
||||
* The SHA-256 round constants
|
||||
*/
|
||||
.align 4
|
||||
.Lsha2_rcon:
|
||||
.word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
|
||||
.word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
|
||||
.word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
|
||||
.word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
|
||||
.word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
|
||||
.word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
|
||||
.word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
|
||||
.word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
|
||||
.word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
|
||||
.word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
|
||||
.word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
|
||||
.word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
|
||||
.word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
|
||||
.word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
|
||||
.word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
|
||||
.word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
|
||||
/*
|
||||
* void sha2_ce_transform(int blocks, u8 const *src, u32 *state,
|
||||
* u8 *head, long bytes)
|
||||
*/
|
||||
ENTRY(sha2_ce_transform)
|
||||
/* load round constants */
|
||||
adr x8, .Lsha2_rcon
|
||||
ld1 { v0.4s- v3.4s}, [x8], #64
|
||||
ld1 { v4.4s- v7.4s}, [x8], #64
|
||||
ld1 { v8.4s-v11.4s}, [x8], #64
|
||||
ld1 {v12.4s-v15.4s}, [x8]
|
||||
|
||||
/* load state */
|
||||
ldp dga, dgb, [x2]
|
||||
|
||||
/* load partial input (if supplied) */
|
||||
cbz x3, 0f
|
||||
ld1 {v16.4s-v19.4s}, [x3]
|
||||
b 1f
|
||||
|
||||
/* load input */
|
||||
0: ld1 {v16.4s-v19.4s}, [x1], #64
|
||||
sub w0, w0, #1
|
||||
|
||||
1:
|
||||
CPU_LE( rev32 v16.16b, v16.16b )
|
||||
CPU_LE( rev32 v17.16b, v17.16b )
|
||||
CPU_LE( rev32 v18.16b, v18.16b )
|
||||
CPU_LE( rev32 v19.16b, v19.16b )
|
||||
|
||||
2: add t0.4s, v16.4s, v0.4s
|
||||
mov dg0v.16b, dgav.16b
|
||||
mov dg1v.16b, dgbv.16b
|
||||
|
||||
add_update 0, v1, 16, 17, 18, 19
|
||||
add_update 1, v2, 17, 18, 19, 16
|
||||
add_update 0, v3, 18, 19, 16, 17
|
||||
add_update 1, v4, 19, 16, 17, 18
|
||||
|
||||
add_update 0, v5, 16, 17, 18, 19
|
||||
add_update 1, v6, 17, 18, 19, 16
|
||||
add_update 0, v7, 18, 19, 16, 17
|
||||
add_update 1, v8, 19, 16, 17, 18
|
||||
|
||||
add_update 0, v9, 16, 17, 18, 19
|
||||
add_update 1, v10, 17, 18, 19, 16
|
||||
add_update 0, v11, 18, 19, 16, 17
|
||||
add_update 1, v12, 19, 16, 17, 18
|
||||
|
||||
add_only 0, v13, 17
|
||||
add_only 1, v14, 18
|
||||
add_only 0, v15, 19
|
||||
add_only 1
|
||||
|
||||
/* update state */
|
||||
add dgav.4s, dgav.4s, dg0v.4s
|
||||
add dgbv.4s, dgbv.4s, dg1v.4s
|
||||
|
||||
/* handled all input blocks? */
|
||||
cbnz w0, 0b
|
||||
|
||||
/*
|
||||
* Final block: add padding and total bit count.
|
||||
* Skip if we have no total byte count in x4. In that case, the input
|
||||
* size was not a round multiple of the block size, and the padding is
|
||||
* handled by the C code.
|
||||
*/
|
||||
cbz x4, 3f
|
||||
movi v17.2d, #0
|
||||
mov x8, #0x80000000
|
||||
movi v18.2d, #0
|
||||
ror x7, x4, #29 // ror(lsl(x4, 3), 32)
|
||||
fmov d16, x8
|
||||
mov x4, #0
|
||||
mov v19.d[0], xzr
|
||||
mov v19.d[1], x7
|
||||
b 2b
|
||||
|
||||
/* store new state */
|
||||
3: stp dga, dgb, [x2]
|
||||
ret
|
||||
ENDPROC(sha2_ce_transform)
|
255
arch/arm64/crypto/sha2-ce-glue.c
Normal file
255
arch/arm64/crypto/sha2-ce-glue.c
Normal file
@ -0,0 +1,255 @@
|
||||
/*
|
||||
* sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
|
||||
*
|
||||
* Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/sha.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state,
|
||||
u8 *head, long bytes);
|
||||
|
||||
static int sha224_init(struct shash_desc *desc)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
*sctx = (struct sha256_state){
|
||||
.state = {
|
||||
SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
|
||||
SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
|
||||
}
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha256_init(struct shash_desc *desc)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
*sctx = (struct sha256_state){
|
||||
.state = {
|
||||
SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
|
||||
SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
|
||||
}
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha2_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
|
||||
|
||||
sctx->count += len;
|
||||
|
||||
if ((partial + len) >= SHA256_BLOCK_SIZE) {
|
||||
int blocks;
|
||||
|
||||
if (partial) {
|
||||
int p = SHA256_BLOCK_SIZE - partial;
|
||||
|
||||
memcpy(sctx->buf + partial, data, p);
|
||||
data += p;
|
||||
len -= p;
|
||||
}
|
||||
|
||||
blocks = len / SHA256_BLOCK_SIZE;
|
||||
len %= SHA256_BLOCK_SIZE;
|
||||
|
||||
kernel_neon_begin_partial(28);
|
||||
sha2_ce_transform(blocks, data, sctx->state,
|
||||
partial ? sctx->buf : NULL, 0);
|
||||
kernel_neon_end();
|
||||
|
||||
data += blocks * SHA256_BLOCK_SIZE;
|
||||
partial = 0;
|
||||
}
|
||||
if (len)
|
||||
memcpy(sctx->buf + partial, data, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void sha2_final(struct shash_desc *desc)
|
||||
{
|
||||
static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
|
||||
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
__be64 bits = cpu_to_be64(sctx->count << 3);
|
||||
u32 padlen = SHA256_BLOCK_SIZE
|
||||
- ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
|
||||
|
||||
sha2_update(desc, padding, padlen);
|
||||
sha2_update(desc, (const u8 *)&bits, sizeof(bits));
|
||||
}
|
||||
|
||||
static int sha224_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
__be32 *dst = (__be32 *)out;
|
||||
int i;
|
||||
|
||||
sha2_final(desc);
|
||||
|
||||
for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
|
||||
put_unaligned_be32(sctx->state[i], dst++);
|
||||
|
||||
*sctx = (struct sha256_state){};
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha256_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
__be32 *dst = (__be32 *)out;
|
||||
int i;
|
||||
|
||||
sha2_final(desc);
|
||||
|
||||
for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
|
||||
put_unaligned_be32(sctx->state[i], dst++);
|
||||
|
||||
*sctx = (struct sha256_state){};
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void sha2_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
int blocks;
|
||||
|
||||
if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) {
|
||||
sha2_update(desc, data, len);
|
||||
sha2_final(desc);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use a fast path if the input is a multiple of 64 bytes. In
|
||||
* this case, there is no need to copy data around, and we can
|
||||
* perform the entire digest calculation in a single invocation
|
||||
* of sha2_ce_transform()
|
||||
*/
|
||||
blocks = len / SHA256_BLOCK_SIZE;
|
||||
|
||||
kernel_neon_begin_partial(28);
|
||||
sha2_ce_transform(blocks, data, sctx->state, NULL, len);
|
||||
kernel_neon_end();
|
||||
data += blocks * SHA256_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
static int sha224_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
__be32 *dst = (__be32 *)out;
|
||||
int i;
|
||||
|
||||
sha2_finup(desc, data, len);
|
||||
|
||||
for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
|
||||
put_unaligned_be32(sctx->state[i], dst++);
|
||||
|
||||
*sctx = (struct sha256_state){};
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha256_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
__be32 *dst = (__be32 *)out;
|
||||
int i;
|
||||
|
||||
sha2_finup(desc, data, len);
|
||||
|
||||
for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
|
||||
put_unaligned_be32(sctx->state[i], dst++);
|
||||
|
||||
*sctx = (struct sha256_state){};
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha2_export(struct shash_desc *desc, void *out)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
struct sha256_state *dst = out;
|
||||
|
||||
*dst = *sctx;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha2_import(struct shash_desc *desc, const void *in)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
struct sha256_state const *src = in;
|
||||
|
||||
*sctx = *src;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg algs[] = { {
|
||||
.init = sha224_init,
|
||||
.update = sha2_update,
|
||||
.final = sha224_final,
|
||||
.finup = sha224_finup,
|
||||
.export = sha2_export,
|
||||
.import = sha2_import,
|
||||
.descsize = sizeof(struct sha256_state),
|
||||
.digestsize = SHA224_DIGEST_SIZE,
|
||||
.statesize = sizeof(struct sha256_state),
|
||||
.base = {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name = "sha224-ce",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
}, {
|
||||
.init = sha256_init,
|
||||
.update = sha2_update,
|
||||
.final = sha256_final,
|
||||
.finup = sha256_finup,
|
||||
.export = sha2_export,
|
||||
.import = sha2_import,
|
||||
.descsize = sizeof(struct sha256_state),
|
||||
.digestsize = SHA256_DIGEST_SIZE,
|
||||
.statesize = sizeof(struct sha256_state),
|
||||
.base = {
|
||||
.cra_name = "sha256",
|
||||
.cra_driver_name = "sha256-ce",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
} };
|
||||
|
||||
static int __init sha2_ce_mod_init(void)
|
||||
{
|
||||
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
static void __exit sha2_ce_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
module_cpu_feature_match(SHA2, sha2_ce_mod_init);
|
||||
module_exit(sha2_ce_mod_fini);
|
@ -40,6 +40,7 @@ generic-y += segment.h
|
||||
generic-y += sembuf.h
|
||||
generic-y += serial.h
|
||||
generic-y += shmbuf.h
|
||||
generic-y += simd.h
|
||||
generic-y += sizes.h
|
||||
generic-y += socket.h
|
||||
generic-y += sockios.h
|
||||
|
@ -21,6 +21,7 @@
|
||||
#endif
|
||||
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/thread_info.h>
|
||||
|
||||
/*
|
||||
* Stack pushing/popping (register pairs only). Equivalent to store decrement
|
||||
@ -68,23 +69,31 @@
|
||||
msr daifclr, #8
|
||||
.endm
|
||||
|
||||
.macro disable_step, tmp
|
||||
.macro disable_step_tsk, flgs, tmp
|
||||
tbz \flgs, #TIF_SINGLESTEP, 9990f
|
||||
mrs \tmp, mdscr_el1
|
||||
bic \tmp, \tmp, #1
|
||||
msr mdscr_el1, \tmp
|
||||
isb // Synchronise with enable_dbg
|
||||
9990:
|
||||
.endm
|
||||
|
||||
.macro enable_step, tmp
|
||||
.macro enable_step_tsk, flgs, tmp
|
||||
tbz \flgs, #TIF_SINGLESTEP, 9990f
|
||||
disable_dbg
|
||||
mrs \tmp, mdscr_el1
|
||||
orr \tmp, \tmp, #1
|
||||
msr mdscr_el1, \tmp
|
||||
9990:
|
||||
.endm
|
||||
|
||||
.macro enable_dbg_if_not_stepping, tmp
|
||||
mrs \tmp, mdscr_el1
|
||||
tbnz \tmp, #0, 9990f
|
||||
enable_dbg
|
||||
9990:
|
||||
/*
|
||||
* Enable both debug exceptions and interrupts. This is likely to be
|
||||
* faster than two daifclr operations, since writes to this register
|
||||
* are self-synchronising.
|
||||
*/
|
||||
.macro enable_dbg_and_irq
|
||||
msr daifclr, #(8 | 2)
|
||||
.endm
|
||||
|
||||
/*
|
||||
|
@ -157,7 +157,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
|
||||
*/
|
||||
#define ATOMIC64_INIT(i) { (i) }
|
||||
|
||||
#define atomic64_read(v) (*(volatile long long *)&(v)->counter)
|
||||
#define atomic64_read(v) (*(volatile long *)&(v)->counter)
|
||||
#define atomic64_set(v,i) (((v)->counter) = (i))
|
||||
|
||||
static inline void atomic64_add(u64 i, atomic64_t *v)
|
||||
|
@ -25,12 +25,12 @@
|
||||
#define wfi() asm volatile("wfi" : : : "memory")
|
||||
|
||||
#define isb() asm volatile("isb" : : : "memory")
|
||||
#define dmb(opt) asm volatile("dmb sy" : : : "memory")
|
||||
#define dsb(opt) asm volatile("dsb sy" : : : "memory")
|
||||
#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
|
||||
#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
|
||||
|
||||
#define mb() dsb()
|
||||
#define rmb() asm volatile("dsb ld" : : : "memory")
|
||||
#define wmb() asm volatile("dsb st" : : : "memory")
|
||||
#define mb() dsb(sy)
|
||||
#define rmb() dsb(ld)
|
||||
#define wmb() dsb(st)
|
||||
|
||||
#ifndef CONFIG_SMP
|
||||
#define smp_mb() barrier()
|
||||
@ -40,7 +40,7 @@
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
smp_mb(); \
|
||||
barrier(); \
|
||||
ACCESS_ONCE(*p) = (v); \
|
||||
} while (0)
|
||||
|
||||
@ -48,15 +48,15 @@ do { \
|
||||
({ \
|
||||
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
smp_mb(); \
|
||||
barrier(); \
|
||||
___p1; \
|
||||
})
|
||||
|
||||
#else
|
||||
|
||||
#define smp_mb() asm volatile("dmb ish" : : : "memory")
|
||||
#define smp_rmb() asm volatile("dmb ishld" : : : "memory")
|
||||
#define smp_wmb() asm volatile("dmb ishst" : : : "memory")
|
||||
#define smp_mb() dmb(ish)
|
||||
#define smp_rmb() dmb(ishld)
|
||||
#define smp_wmb() dmb(ishst)
|
||||
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
|
@ -16,6 +16,8 @@
|
||||
#ifndef __ASM_CACHE_H
|
||||
#define __ASM_CACHE_H
|
||||
|
||||
#include <asm/cachetype.h>
|
||||
|
||||
#define L1_CACHE_SHIFT 6
|
||||
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
|
||||
|
||||
@ -27,6 +29,15 @@
|
||||
* the CPU.
|
||||
*/
|
||||
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
|
||||
#define ARCH_SLAB_MINALIGN 8
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
static inline int cache_line_size(void)
|
||||
{
|
||||
u32 cwg = cache_type_cwg();
|
||||
return cwg ? 4 << cwg : L1_CACHE_BYTES;
|
||||
}
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif
|
||||
|
@ -123,7 +123,7 @@ extern void flush_dcache_page(struct page *);
|
||||
static inline void __flush_icache_all(void)
|
||||
{
|
||||
asm("ic ialluis");
|
||||
dsb();
|
||||
dsb(ish);
|
||||
}
|
||||
|
||||
#define flush_dcache_mmap_lock(mapping) \
|
||||
@ -150,7 +150,7 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end)
|
||||
* set_pte_at() called from vmap_pte_range() does not
|
||||
* have a DSB after cleaning the cache line.
|
||||
*/
|
||||
dsb();
|
||||
dsb(ish);
|
||||
}
|
||||
|
||||
static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
|
||||
|
@ -20,12 +20,16 @@
|
||||
|
||||
#define CTR_L1IP_SHIFT 14
|
||||
#define CTR_L1IP_MASK 3
|
||||
#define CTR_CWG_SHIFT 24
|
||||
#define CTR_CWG_MASK 15
|
||||
|
||||
#define ICACHE_POLICY_RESERVED 0
|
||||
#define ICACHE_POLICY_AIVIVT 1
|
||||
#define ICACHE_POLICY_VIPT 2
|
||||
#define ICACHE_POLICY_PIPT 3
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
static inline u32 icache_policy(void)
|
||||
{
|
||||
return (read_cpuid_cachetype() >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK;
|
||||
@ -45,4 +49,11 @@ static inline int icache_is_aivivt(void)
|
||||
return icache_policy() == ICACHE_POLICY_AIVIVT;
|
||||
}
|
||||
|
||||
static inline u32 cache_type_cwg(void)
|
||||
{
|
||||
return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK;
|
||||
}
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* __ASM_CACHETYPE_H */
|
||||
|
@ -72,7 +72,12 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
|
||||
}
|
||||
|
||||
#define xchg(ptr,x) \
|
||||
((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
|
||||
({ \
|
||||
__typeof__(*(ptr)) __ret; \
|
||||
__ret = (__typeof__(*(ptr))) \
|
||||
__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
|
||||
unsigned long new, int size)
|
||||
|
@ -305,11 +305,6 @@ static inline int is_compat_thread(struct thread_info *thread)
|
||||
|
||||
#else /* !CONFIG_COMPAT */
|
||||
|
||||
static inline int is_compat_task(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int is_compat_thread(struct thread_info *thread)
|
||||
{
|
||||
return 0;
|
||||
|
@ -18,9 +18,11 @@
|
||||
#ifndef __ASM_ESR_H
|
||||
#define __ASM_ESR_H
|
||||
|
||||
#define ESR_EL1_EC_SHIFT (26)
|
||||
#define ESR_EL1_IL (1U << 25)
|
||||
#define ESR_EL1_WRITE (1 << 6)
|
||||
#define ESR_EL1_CM (1 << 8)
|
||||
#define ESR_EL1_IL (1 << 25)
|
||||
|
||||
#define ESR_EL1_EC_SHIFT (26)
|
||||
#define ESR_EL1_EC_UNKNOWN (0x00)
|
||||
#define ESR_EL1_EC_WFI (0x01)
|
||||
#define ESR_EL1_EC_CP15_32 (0x03)
|
||||
|
@ -37,8 +37,21 @@ struct fpsimd_state {
|
||||
u32 fpcr;
|
||||
};
|
||||
};
|
||||
/* the id of the last cpu to have restored this state */
|
||||
unsigned int cpu;
|
||||
};
|
||||
|
||||
/*
|
||||
* Struct for stacking the bottom 'n' FP/SIMD registers.
|
||||
*/
|
||||
struct fpsimd_partial_state {
|
||||
u32 fpsr;
|
||||
u32 fpcr;
|
||||
u32 num_regs;
|
||||
__uint128_t vregs[32];
|
||||
};
|
||||
|
||||
|
||||
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
|
||||
/* Masks for extracting the FPSR and FPCR from the FPSCR */
|
||||
#define VFP_FPSCR_STAT_MASK 0xf800009f
|
||||
@ -58,6 +71,16 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
|
||||
extern void fpsimd_thread_switch(struct task_struct *next);
|
||||
extern void fpsimd_flush_thread(void);
|
||||
|
||||
extern void fpsimd_preserve_current_state(void);
|
||||
extern void fpsimd_restore_current_state(void);
|
||||
extern void fpsimd_update_current_state(struct fpsimd_state *state);
|
||||
|
||||
extern void fpsimd_flush_task_state(struct task_struct *target);
|
||||
|
||||
extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
|
||||
u32 num_regs);
|
||||
extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -62,3 +62,38 @@
|
||||
ldr w\tmpnr, [\state, #16 * 2 + 4]
|
||||
msr fpcr, x\tmpnr
|
||||
.endm
|
||||
|
||||
.altmacro
|
||||
.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
|
||||
mrs x\tmpnr1, fpsr
|
||||
str w\numnr, [\state, #8]
|
||||
mrs x\tmpnr2, fpcr
|
||||
stp w\tmpnr1, w\tmpnr2, [\state]
|
||||
adr x\tmpnr1, 0f
|
||||
add \state, \state, x\numnr, lsl #4
|
||||
sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
|
||||
br x\tmpnr1
|
||||
.irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
|
||||
.irp qb, %(qa + 1)
|
||||
stp q\qa, q\qb, [\state, # -16 * \qa - 16]
|
||||
.endr
|
||||
.endr
|
||||
0:
|
||||
.endm
|
||||
|
||||
.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
|
||||
ldp w\tmpnr1, w\tmpnr2, [\state]
|
||||
msr fpsr, x\tmpnr1
|
||||
msr fpcr, x\tmpnr2
|
||||
adr x\tmpnr1, 0f
|
||||
ldr w\tmpnr2, [\state, #8]
|
||||
add \state, \state, x\tmpnr2, lsl #4
|
||||
sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
|
||||
br x\tmpnr1
|
||||
.irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
|
||||
.irp qb, %(qa + 1)
|
||||
ldp q\qa, q\qb, [\state, # -16 * \qa - 16]
|
||||
.endr
|
||||
.endr
|
||||
0:
|
||||
.endm
|
||||
|
59
arch/arm64/include/asm/ftrace.h
Normal file
59
arch/arm64/include/asm/ftrace.h
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* arch/arm64/include/asm/ftrace.h
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Limited
|
||||
* Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
#ifndef __ASM_FTRACE_H
|
||||
#define __ASM_FTRACE_H
|
||||
|
||||
#include <asm/insn.h>
|
||||
|
||||
#define MCOUNT_ADDR ((unsigned long)_mcount)
|
||||
#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <linux/compat.h>
|
||||
|
||||
extern void _mcount(unsigned long);
|
||||
extern void *return_address(unsigned int);
|
||||
|
||||
struct dyn_arch_ftrace {
|
||||
/* No extra data needed for arm64 */
|
||||
};
|
||||
|
||||
extern unsigned long ftrace_graph_call;
|
||||
|
||||
static inline unsigned long ftrace_call_adjust(unsigned long addr)
|
||||
{
|
||||
/*
|
||||
* addr is the address of the mcount call instruction.
|
||||
* recordmcount does the necessary offset calculation.
|
||||
*/
|
||||
return addr;
|
||||
}
|
||||
|
||||
#define ftrace_return_address(n) return_address(n)
|
||||
|
||||
/*
|
||||
* Because AArch32 mode does not share the same syscall table with AArch64,
|
||||
* tracing compat syscalls may result in reporting bogus syscalls or even
|
||||
* hang-up, so just do not trace them.
|
||||
* See kernel/trace/trace_syscalls.c
|
||||
*
|
||||
* x86 code says:
|
||||
* If the user realy wants these, then they should use the
|
||||
* raw syscall tracepoints with filtering.
|
||||
*/
|
||||
#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
|
||||
static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
|
||||
{
|
||||
return is_compat_task();
|
||||
}
|
||||
#endif /* ifndef __ASSEMBLY__ */
|
||||
|
||||
#endif /* __ASM_FTRACE_H */
|
@ -20,7 +20,7 @@
|
||||
#include <linux/threads.h>
|
||||
#include <asm/irq.h>
|
||||
|
||||
#define NR_IPI 5
|
||||
#define NR_IPI 6
|
||||
|
||||
typedef struct {
|
||||
unsigned int __softirq_pending;
|
||||
|
@ -21,6 +21,7 @@
|
||||
/* A64 instructions are always 32 bits. */
|
||||
#define AARCH64_INSN_SIZE 4
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
/*
|
||||
* ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
|
||||
* Section C3.1 "A64 instruction index by encoding":
|
||||
@ -104,5 +105,6 @@ bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
|
||||
int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
|
||||
int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
|
||||
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* __ASM_INSN_H */
|
||||
|
@ -230,19 +230,11 @@ extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot
|
||||
extern void __iounmap(volatile void __iomem *addr);
|
||||
extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
|
||||
|
||||
#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_DIRTY)
|
||||
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
|
||||
#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL_NC))
|
||||
#define PROT_NORMAL (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
|
||||
|
||||
#define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
|
||||
#define ioremap_nocache(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
|
||||
#define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
|
||||
#define iounmap __iounmap
|
||||
|
||||
#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF)
|
||||
#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PTE_PXN | PTE_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
|
||||
|
||||
#define ARCH_HAS_IOREMAP_WC
|
||||
#include <asm-generic/iomap.h>
|
||||
|
||||
|
@ -8,7 +8,11 @@
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define cpu_has_neon() (1)
|
||||
|
||||
void kernel_neon_begin(void);
|
||||
#define kernel_neon_begin() kernel_neon_begin_partial(32)
|
||||
|
||||
void kernel_neon_begin_partial(u32 num_regs);
|
||||
void kernel_neon_end(void);
|
||||
|
@ -29,6 +29,8 @@
|
||||
*/
|
||||
|
||||
#define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1)
|
||||
#define PUD_TYPE_MASK (_AT(pgdval_t, 3) << 0)
|
||||
#define PUD_TYPE_SECT (_AT(pgdval_t, 1) << 0)
|
||||
|
||||
/*
|
||||
* Level 2 descriptor (PMD).
|
||||
|
@ -52,66 +52,59 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
|
||||
#endif
|
||||
#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd))
|
||||
|
||||
/*
|
||||
* The pgprot_* and protection_map entries will be fixed up at runtime to
|
||||
* include the cachable and bufferable bits based on memory policy, as well as
|
||||
* any architecture dependent bits like global/ASID and SMP shared mapping
|
||||
* bits.
|
||||
*/
|
||||
#define _PAGE_DEFAULT PTE_TYPE_PAGE | PTE_AF
|
||||
#ifdef CONFIG_SMP
|
||||
#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
|
||||
#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
|
||||
#else
|
||||
#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF)
|
||||
#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF)
|
||||
#endif
|
||||
|
||||
extern pgprot_t pgprot_default;
|
||||
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
|
||||
#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
|
||||
#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
|
||||
|
||||
#define __pgprot_modify(prot,mask,bits) \
|
||||
__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
|
||||
#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
|
||||
#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
|
||||
#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
|
||||
|
||||
#define _MOD_PROT(p, b) __pgprot_modify(p, 0, b)
|
||||
#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
|
||||
|
||||
#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_PXN | PTE_UXN)
|
||||
#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
|
||||
#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
|
||||
#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
|
||||
#define PAGE_COPY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN)
|
||||
#define PAGE_READONLY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
|
||||
#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN)
|
||||
#define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
|
||||
#define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY | PTE_WRITE)
|
||||
#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
|
||||
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
|
||||
|
||||
#define PAGE_HYP _MOD_PROT(pgprot_default, PTE_HYP)
|
||||
#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP)
|
||||
#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
|
||||
|
||||
#define PAGE_S2 __pgprot_modify(pgprot_default, PTE_S2_MEMATTR_MASK, PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
|
||||
#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
|
||||
#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN)
|
||||
|
||||
#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
|
||||
#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
|
||||
#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
|
||||
#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
|
||||
#define __PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
|
||||
#define __PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
|
||||
#define __PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
|
||||
#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
|
||||
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
|
||||
#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
|
||||
#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
|
||||
#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
|
||||
#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
|
||||
#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#define __P000 PAGE_NONE
|
||||
#define __P001 PAGE_READONLY
|
||||
#define __P010 PAGE_COPY
|
||||
#define __P011 PAGE_COPY
|
||||
#define __P100 PAGE_READONLY_EXEC
|
||||
#define __P101 PAGE_READONLY_EXEC
|
||||
#define __P110 PAGE_COPY_EXEC
|
||||
#define __P111 PAGE_COPY_EXEC
|
||||
|
||||
#define __P000 __PAGE_NONE
|
||||
#define __P001 __PAGE_READONLY
|
||||
#define __P010 __PAGE_COPY
|
||||
#define __P011 __PAGE_COPY
|
||||
#define __P100 __PAGE_READONLY_EXEC
|
||||
#define __P101 __PAGE_READONLY_EXEC
|
||||
#define __P110 __PAGE_COPY_EXEC
|
||||
#define __P111 __PAGE_COPY_EXEC
|
||||
#define __S000 PAGE_NONE
|
||||
#define __S001 PAGE_READONLY
|
||||
#define __S010 PAGE_SHARED
|
||||
#define __S011 PAGE_SHARED
|
||||
#define __S100 PAGE_READONLY_EXEC
|
||||
#define __S101 PAGE_READONLY_EXEC
|
||||
#define __S110 PAGE_SHARED_EXEC
|
||||
#define __S111 PAGE_SHARED_EXEC
|
||||
|
||||
#define __S000 __PAGE_NONE
|
||||
#define __S001 __PAGE_READONLY
|
||||
#define __S010 __PAGE_SHARED
|
||||
#define __S011 __PAGE_SHARED
|
||||
#define __S100 __PAGE_READONLY_EXEC
|
||||
#define __S101 __PAGE_READONLY_EXEC
|
||||
#define __S110 __PAGE_SHARED_EXEC
|
||||
#define __S111 __PAGE_SHARED_EXEC
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
/*
|
||||
* ZERO_PAGE is a global shared page that is always zero: used
|
||||
* for zero-mapped memory areas etc..
|
||||
@ -265,6 +258,7 @@ static inline pmd_t pte_pmd(pte_t pte)
|
||||
#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
|
||||
|
||||
#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
|
||||
#define pud_pfn(pud) (((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
|
||||
|
||||
#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
|
||||
|
||||
@ -273,6 +267,9 @@ static inline int has_transparent_hugepage(void)
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define __pgprot_modify(prot,mask,bits) \
|
||||
__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
|
||||
|
||||
/*
|
||||
* Mark the prot value as uncacheable and unbufferable.
|
||||
*/
|
||||
@ -295,11 +292,17 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
|
||||
#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
|
||||
PMD_TYPE_SECT)
|
||||
|
||||
#ifdef ARM64_64K_PAGES
|
||||
#define pud_sect(pud) (0)
|
||||
#else
|
||||
#define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \
|
||||
PUD_TYPE_SECT)
|
||||
#endif
|
||||
|
||||
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
|
||||
{
|
||||
*pmdp = pmd;
|
||||
dsb();
|
||||
dsb(ishst);
|
||||
}
|
||||
|
||||
static inline void pmd_clear(pmd_t *pmdp)
|
||||
@ -329,7 +332,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
|
||||
static inline void set_pud(pud_t *pudp, pud_t pud)
|
||||
{
|
||||
*pudp = pud;
|
||||
dsb();
|
||||
dsb(ishst);
|
||||
}
|
||||
|
||||
static inline void pud_clear(pud_t *pudp)
|
||||
|
@ -79,6 +79,7 @@ struct thread_struct {
|
||||
unsigned long tp_value;
|
||||
struct fpsimd_state fpsimd_state;
|
||||
unsigned long fault_address; /* fault info */
|
||||
unsigned long fault_code; /* ESR_EL1 value */
|
||||
struct debug_info debug; /* debugging */
|
||||
};
|
||||
|
||||
|
@ -135,6 +135,11 @@ struct pt_regs {
|
||||
#define user_stack_pointer(regs) \
|
||||
(!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp)
|
||||
|
||||
static inline unsigned long regs_return_value(struct pt_regs *regs)
|
||||
{
|
||||
return regs->regs[0];
|
||||
}
|
||||
|
||||
/*
|
||||
* Are the current registers suitable for user mode? (used to maintain
|
||||
* security in signal handlers)
|
||||
|
@ -1,31 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2012 ARM Ltd.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef __ASM_SIGCONTEXT_H
|
||||
#define __ASM_SIGCONTEXT_H
|
||||
|
||||
#include <uapi/asm/sigcontext.h>
|
||||
|
||||
/*
|
||||
* Auxiliary context saved in the sigcontext.__reserved array. Not exported to
|
||||
* user space as it will change with the addition of new context. User space
|
||||
* should check the magic/size information.
|
||||
*/
|
||||
struct aux_context {
|
||||
struct fpsimd_context fpsimd;
|
||||
/* additional context to be added before "end" */
|
||||
struct _aarch64_ctx end;
|
||||
};
|
||||
#endif
|
@ -22,6 +22,18 @@ extern char *strrchr(const char *, int c);
|
||||
#define __HAVE_ARCH_STRCHR
|
||||
extern char *strchr(const char *, int c);
|
||||
|
||||
#define __HAVE_ARCH_STRCMP
|
||||
extern int strcmp(const char *, const char *);
|
||||
|
||||
#define __HAVE_ARCH_STRNCMP
|
||||
extern int strncmp(const char *, const char *, __kernel_size_t);
|
||||
|
||||
#define __HAVE_ARCH_STRLEN
|
||||
extern __kernel_size_t strlen(const char *);
|
||||
|
||||
#define __HAVE_ARCH_STRNLEN
|
||||
extern __kernel_size_t strnlen(const char *, __kernel_size_t);
|
||||
|
||||
#define __HAVE_ARCH_MEMCPY
|
||||
extern void *memcpy(void *, const void *, __kernel_size_t);
|
||||
|
||||
@ -34,4 +46,7 @@ extern void *memchr(const void *, int, __kernel_size_t);
|
||||
#define __HAVE_ARCH_MEMSET
|
||||
extern void *memset(void *, int, __kernel_size_t);
|
||||
|
||||
#define __HAVE_ARCH_MEMCMP
|
||||
extern int memcmp(const void *, const void *, size_t);
|
||||
|
||||
#endif
|
||||
|
@ -18,6 +18,7 @@
|
||||
|
||||
#include <linux/err.h>
|
||||
|
||||
extern const void *sys_call_table[];
|
||||
|
||||
static inline int syscall_get_nr(struct task_struct *task,
|
||||
struct pt_regs *regs)
|
||||
|
@ -91,6 +91,9 @@ static inline struct thread_info *current_thread_info(void)
|
||||
/*
|
||||
* thread information flags:
|
||||
* TIF_SYSCALL_TRACE - syscall trace active
|
||||
* TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace
|
||||
* TIF_SYSCALL_AUDIT - syscall auditing
|
||||
* TIF_SECOMP - syscall secure computing
|
||||
* TIF_SIGPENDING - signal pending
|
||||
* TIF_NEED_RESCHED - rescheduling necessary
|
||||
* TIF_NOTIFY_RESUME - callback before returning to user
|
||||
@ -99,7 +102,11 @@ static inline struct thread_info *current_thread_info(void)
|
||||
#define TIF_SIGPENDING 0
|
||||
#define TIF_NEED_RESCHED 1
|
||||
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
|
||||
#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
|
||||
#define TIF_SYSCALL_TRACE 8
|
||||
#define TIF_SYSCALL_AUDIT 9
|
||||
#define TIF_SYSCALL_TRACEPOINT 10
|
||||
#define TIF_SECCOMP 11
|
||||
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
|
||||
#define TIF_FREEZE 19
|
||||
#define TIF_RESTORE_SIGMASK 20
|
||||
@ -110,10 +117,18 @@ static inline struct thread_info *current_thread_info(void)
|
||||
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
|
||||
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
|
||||
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
|
||||
#define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
|
||||
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
|
||||
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
|
||||
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
|
||||
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
|
||||
#define _TIF_32BIT (1 << TIF_32BIT)
|
||||
|
||||
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
|
||||
_TIF_NOTIFY_RESUME)
|
||||
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
|
||||
|
||||
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
|
||||
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* __ASM_THREAD_INFO_H */
|
||||
|
@ -72,9 +72,9 @@ extern struct cpu_tlb_fns cpu_tlb;
|
||||
*/
|
||||
static inline void flush_tlb_all(void)
|
||||
{
|
||||
dsb();
|
||||
dsb(ishst);
|
||||
asm("tlbi vmalle1is");
|
||||
dsb();
|
||||
dsb(ish);
|
||||
isb();
|
||||
}
|
||||
|
||||
@ -82,9 +82,9 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
|
||||
{
|
||||
unsigned long asid = (unsigned long)ASID(mm) << 48;
|
||||
|
||||
dsb();
|
||||
dsb(ishst);
|
||||
asm("tlbi aside1is, %0" : : "r" (asid));
|
||||
dsb();
|
||||
dsb(ish);
|
||||
}
|
||||
|
||||
static inline void flush_tlb_page(struct vm_area_struct *vma,
|
||||
@ -93,16 +93,36 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
|
||||
unsigned long addr = uaddr >> 12 |
|
||||
((unsigned long)ASID(vma->vm_mm) << 48);
|
||||
|
||||
dsb();
|
||||
dsb(ishst);
|
||||
asm("tlbi vae1is, %0" : : "r" (addr));
|
||||
dsb();
|
||||
dsb(ish);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert calls to our calling convention.
|
||||
*/
|
||||
#define flush_tlb_range(vma,start,end) __cpu_flush_user_tlb_range(start,end,vma)
|
||||
#define flush_tlb_kernel_range(s,e) __cpu_flush_kern_tlb_range(s,e)
|
||||
static inline void flush_tlb_range(struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end)
|
||||
{
|
||||
unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
|
||||
unsigned long addr;
|
||||
start = asid | (start >> 12);
|
||||
end = asid | (end >> 12);
|
||||
|
||||
dsb(ishst);
|
||||
for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
|
||||
asm("tlbi vae1is, %0" : : "r"(addr));
|
||||
dsb(ish);
|
||||
}
|
||||
|
||||
static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
|
||||
{
|
||||
unsigned long addr;
|
||||
start >>= 12;
|
||||
end >>= 12;
|
||||
|
||||
dsb(ishst);
|
||||
for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
|
||||
asm("tlbi vaae1is, %0" : : "r"(addr));
|
||||
dsb(ish);
|
||||
}
|
||||
|
||||
/*
|
||||
* On AArch64, the cache coherency is handled via the set_pte_at() function.
|
||||
@ -114,7 +134,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
|
||||
* set_pte() does not have a DSB, so make sure that the page table
|
||||
* write is visible.
|
||||
*/
|
||||
dsb();
|
||||
dsb(ishst);
|
||||
}
|
||||
|
||||
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
|
||||
|
@ -20,9 +20,6 @@ extern struct cpu_topology cpu_topology[NR_CPUS];
|
||||
#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling)
|
||||
#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
|
||||
|
||||
#define mc_capable() (cpu_topology[0].cluster_id != -1)
|
||||
#define smt_capable() (cpu_topology[0].thread_id != -1)
|
||||
|
||||
void init_cpu_topology(void);
|
||||
void store_cpu_topology(unsigned int cpuid);
|
||||
const struct cpumask *cpu_coregroup_mask(int cpu);
|
||||
|
@ -29,3 +29,5 @@
|
||||
#endif
|
||||
#define __ARCH_WANT_SYS_CLONE
|
||||
#include <uapi/asm/unistd.h>
|
||||
|
||||
#define NR_syscalls (__NR_syscalls)
|
||||
|
@ -53,5 +53,12 @@ struct fpsimd_context {
|
||||
__uint128_t vregs[32];
|
||||
};
|
||||
|
||||
/* ESR_EL1 context */
|
||||
#define ESR_MAGIC 0x45535201
|
||||
|
||||
struct esr_context {
|
||||
struct _aarch64_ctx head;
|
||||
u64 esr;
|
||||
};
|
||||
|
||||
#endif /* _UAPI__ASM_SIGCONTEXT_H */
|
||||
|
@ -7,14 +7,19 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
|
||||
CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) \
|
||||
-I$(src)/../../../scripts/dtc/libfdt
|
||||
|
||||
CFLAGS_REMOVE_ftrace.o = -pg
|
||||
CFLAGS_REMOVE_insn.o = -pg
|
||||
CFLAGS_REMOVE_return_address.o = -pg
|
||||
|
||||
# Object file lists.
|
||||
arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \
|
||||
entry-fpsimd.o process.o ptrace.o setup.o signal.o \
|
||||
sys.o stacktrace.o time.o traps.o io.o vdso.o \
|
||||
hyp-stub.o psci.o cpu_ops.o insn.o
|
||||
hyp-stub.o psci.o cpu_ops.o insn.o return_address.o
|
||||
|
||||
arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
|
||||
sys_compat.o
|
||||
arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
|
||||
arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o
|
||||
arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o
|
||||
arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
|
||||
|
@ -44,10 +44,15 @@ EXPORT_SYMBOL(memstart_addr);
|
||||
/* string / mem functions */
|
||||
EXPORT_SYMBOL(strchr);
|
||||
EXPORT_SYMBOL(strrchr);
|
||||
EXPORT_SYMBOL(strcmp);
|
||||
EXPORT_SYMBOL(strncmp);
|
||||
EXPORT_SYMBOL(strlen);
|
||||
EXPORT_SYMBOL(strnlen);
|
||||
EXPORT_SYMBOL(memset);
|
||||
EXPORT_SYMBOL(memcpy);
|
||||
EXPORT_SYMBOL(memmove);
|
||||
EXPORT_SYMBOL(memchr);
|
||||
EXPORT_SYMBOL(memcmp);
|
||||
|
||||
/* atomic bitops */
|
||||
EXPORT_SYMBOL(set_bit);
|
||||
@ -56,3 +61,7 @@ EXPORT_SYMBOL(clear_bit);
|
||||
EXPORT_SYMBOL(test_and_clear_bit);
|
||||
EXPORT_SYMBOL(change_bit);
|
||||
EXPORT_SYMBOL(test_and_change_bit);
|
||||
|
||||
#ifdef CONFIG_FUNCTION_TRACER
|
||||
EXPORT_SYMBOL(_mcount);
|
||||
#endif
|
||||
|
@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state)
|
||||
fpsimd_restore x0, 8
|
||||
ret
|
||||
ENDPROC(fpsimd_load_state)
|
||||
|
||||
#ifdef CONFIG_KERNEL_MODE_NEON
|
||||
|
||||
/*
|
||||
* Save the bottom n FP registers.
|
||||
*
|
||||
* x0 - pointer to struct fpsimd_partial_state
|
||||
*/
|
||||
ENTRY(fpsimd_save_partial_state)
|
||||
fpsimd_save_partial x0, 1, 8, 9
|
||||
ret
|
||||
ENDPROC(fpsimd_load_partial_state)
|
||||
|
||||
/*
|
||||
* Load the bottom n FP registers.
|
||||
*
|
||||
* x0 - pointer to struct fpsimd_partial_state
|
||||
*/
|
||||
ENTRY(fpsimd_load_partial_state)
|
||||
fpsimd_restore_partial x0, 8, 9
|
||||
ret
|
||||
ENDPROC(fpsimd_load_partial_state)
|
||||
|
||||
#endif
|
||||
|
218
arch/arm64/kernel/entry-ftrace.S
Normal file
218
arch/arm64/kernel/entry-ftrace.S
Normal file
@ -0,0 +1,218 @@
|
||||
/*
|
||||
* arch/arm64/kernel/entry-ftrace.S
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Limited
|
||||
* Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/ftrace.h>
|
||||
#include <asm/insn.h>
|
||||
|
||||
/*
|
||||
* Gcc with -pg will put the following code in the beginning of each function:
|
||||
* mov x0, x30
|
||||
* bl _mcount
|
||||
* [function's body ...]
|
||||
* "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic
|
||||
* ftrace is enabled.
|
||||
*
|
||||
* Please note that x0 as an argument will not be used here because we can
|
||||
* get lr(x30) of instrumented function at any time by winding up call stack
|
||||
* as long as the kernel is compiled without -fomit-frame-pointer.
|
||||
* (or CONFIG_FRAME_POINTER, this is forced on arm64)
|
||||
*
|
||||
* stack layout after mcount_enter in _mcount():
|
||||
*
|
||||
* current sp/fp => 0:+-----+
|
||||
* in _mcount() | x29 | -> instrumented function's fp
|
||||
* +-----+
|
||||
* | x30 | -> _mcount()'s lr (= instrumented function's pc)
|
||||
* old sp => +16:+-----+
|
||||
* when instrumented | |
|
||||
* function calls | ... |
|
||||
* _mcount() | |
|
||||
* | |
|
||||
* instrumented => +xx:+-----+
|
||||
* function's fp | x29 | -> parent's fp
|
||||
* +-----+
|
||||
* | x30 | -> instrumented function's lr (= parent's pc)
|
||||
* +-----+
|
||||
* | ... |
|
||||
*/
|
||||
|
||||
.macro mcount_enter
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
.endm
|
||||
|
||||
.macro mcount_exit
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro mcount_adjust_addr rd, rn
|
||||
sub \rd, \rn, #AARCH64_INSN_SIZE
|
||||
.endm
|
||||
|
||||
/* for instrumented function's parent */
|
||||
.macro mcount_get_parent_fp reg
|
||||
ldr \reg, [x29]
|
||||
ldr \reg, [\reg]
|
||||
.endm
|
||||
|
||||
/* for instrumented function */
|
||||
.macro mcount_get_pc0 reg
|
||||
mcount_adjust_addr \reg, x30
|
||||
.endm
|
||||
|
||||
.macro mcount_get_pc reg
|
||||
ldr \reg, [x29, #8]
|
||||
mcount_adjust_addr \reg, \reg
|
||||
.endm
|
||||
|
||||
.macro mcount_get_lr reg
|
||||
ldr \reg, [x29]
|
||||
ldr \reg, [\reg, #8]
|
||||
mcount_adjust_addr \reg, \reg
|
||||
.endm
|
||||
|
||||
.macro mcount_get_lr_addr reg
|
||||
ldr \reg, [x29]
|
||||
add \reg, \reg, #8
|
||||
.endm
|
||||
|
||||
#ifndef CONFIG_DYNAMIC_FTRACE
|
||||
/*
|
||||
* void _mcount(unsigned long return_address)
|
||||
* @return_address: return address to instrumented function
|
||||
*
|
||||
* This function makes calls, if enabled, to:
|
||||
* - tracer function to probe instrumented function's entry,
|
||||
* - ftrace_graph_caller to set up an exit hook
|
||||
*/
|
||||
ENTRY(_mcount)
|
||||
#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
|
||||
ldr x0, =ftrace_trace_stop
|
||||
ldr x0, [x0] // if ftrace_trace_stop
|
||||
ret // return;
|
||||
#endif
|
||||
mcount_enter
|
||||
|
||||
ldr x0, =ftrace_trace_function
|
||||
ldr x2, [x0]
|
||||
adr x0, ftrace_stub
|
||||
cmp x0, x2 // if (ftrace_trace_function
|
||||
b.eq skip_ftrace_call // != ftrace_stub) {
|
||||
|
||||
mcount_get_pc x0 // function's pc
|
||||
mcount_get_lr x1 // function's lr (= parent's pc)
|
||||
blr x2 // (*ftrace_trace_function)(pc, lr);
|
||||
|
||||
#ifndef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
skip_ftrace_call: // return;
|
||||
mcount_exit // }
|
||||
#else
|
||||
mcount_exit // return;
|
||||
// }
|
||||
skip_ftrace_call:
|
||||
ldr x1, =ftrace_graph_return
|
||||
ldr x2, [x1] // if ((ftrace_graph_return
|
||||
cmp x0, x2 // != ftrace_stub)
|
||||
b.ne ftrace_graph_caller
|
||||
|
||||
ldr x1, =ftrace_graph_entry // || (ftrace_graph_entry
|
||||
ldr x2, [x1] // != ftrace_graph_entry_stub))
|
||||
ldr x0, =ftrace_graph_entry_stub
|
||||
cmp x0, x2
|
||||
b.ne ftrace_graph_caller // ftrace_graph_caller();
|
||||
|
||||
mcount_exit
|
||||
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|
||||
ENDPROC(_mcount)
|
||||
|
||||
#else /* CONFIG_DYNAMIC_FTRACE */
|
||||
/*
|
||||
* _mcount() is used to build the kernel with -pg option, but all the branch
|
||||
* instructions to _mcount() are replaced to NOP initially at kernel start up,
|
||||
* and later on, NOP to branch to ftrace_caller() when enabled or branch to
|
||||
* NOP when disabled per-function base.
|
||||
*/
|
||||
ENTRY(_mcount)
|
||||
ret
|
||||
ENDPROC(_mcount)
|
||||
|
||||
/*
|
||||
* void ftrace_caller(unsigned long return_address)
|
||||
* @return_address: return address to instrumented function
|
||||
*
|
||||
* This function is a counterpart of _mcount() in 'static' ftrace, and
|
||||
* makes calls to:
|
||||
* - tracer function to probe instrumented function's entry,
|
||||
* - ftrace_graph_caller to set up an exit hook
|
||||
*/
|
||||
ENTRY(ftrace_caller)
|
||||
mcount_enter
|
||||
|
||||
mcount_get_pc0 x0 // function's pc
|
||||
mcount_get_lr x1 // function's lr
|
||||
|
||||
.global ftrace_call
|
||||
ftrace_call: // tracer(pc, lr);
|
||||
nop // This will be replaced with "bl xxx"
|
||||
// where xxx can be any kind of tracer.
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
.global ftrace_graph_call
|
||||
ftrace_graph_call: // ftrace_graph_caller();
|
||||
nop // If enabled, this will be replaced
|
||||
// "b ftrace_graph_caller"
|
||||
#endif
|
||||
|
||||
mcount_exit
|
||||
ENDPROC(ftrace_caller)
|
||||
#endif /* CONFIG_DYNAMIC_FTRACE */
|
||||
|
||||
ENTRY(ftrace_stub)
|
||||
ret
|
||||
ENDPROC(ftrace_stub)
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
/*
|
||||
* void ftrace_graph_caller(void)
|
||||
*
|
||||
* Called from _mcount() or ftrace_caller() when function_graph tracer is
|
||||
* selected.
|
||||
* This function w/ prepare_ftrace_return() fakes link register's value on
|
||||
* the call stack in order to intercept instrumented function's return path
|
||||
* and run return_to_handler() later on its exit.
|
||||
*/
|
||||
ENTRY(ftrace_graph_caller)
|
||||
mcount_get_lr_addr x0 // pointer to function's saved lr
|
||||
mcount_get_pc x1 // function's pc
|
||||
mcount_get_parent_fp x2 // parent's fp
|
||||
bl prepare_ftrace_return // prepare_ftrace_return(&lr, pc, fp)
|
||||
|
||||
mcount_exit
|
||||
ENDPROC(ftrace_graph_caller)
|
||||
|
||||
/*
|
||||
* void return_to_handler(void)
|
||||
*
|
||||
* Run ftrace_return_to_handler() before going back to parent.
|
||||
* @fp is checked against the value passed by ftrace_graph_caller()
|
||||
* only when CONFIG_FUNCTION_GRAPH_FP_TEST is enabled.
|
||||
*/
|
||||
ENTRY(return_to_handler)
|
||||
str x0, [sp, #-16]!
|
||||
mov x0, x29 // parent's fp
|
||||
bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
|
||||
mov x30, x0 // restore the original return address
|
||||
ldr x0, [sp], #16
|
||||
ret
|
||||
END(return_to_handler)
|
||||
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|
@ -60,6 +60,9 @@
|
||||
push x0, x1
|
||||
.if \el == 0
|
||||
mrs x21, sp_el0
|
||||
get_thread_info tsk // Ensure MDSCR_EL1.SS is clear,
|
||||
ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug
|
||||
disable_step_tsk x19, x20 // exceptions when scheduling.
|
||||
.else
|
||||
add x21, sp, #S_FRAME_SIZE
|
||||
.endif
|
||||
@ -259,7 +262,7 @@ el1_da:
|
||||
* Data abort handling
|
||||
*/
|
||||
mrs x0, far_el1
|
||||
enable_dbg_if_not_stepping x2
|
||||
enable_dbg
|
||||
// re-enable interrupts if they were enabled in the aborted context
|
||||
tbnz x23, #7, 1f // PSR_I_BIT
|
||||
enable_irq
|
||||
@ -275,6 +278,7 @@ el1_sp_pc:
|
||||
* Stack or PC alignment exception handling
|
||||
*/
|
||||
mrs x0, far_el1
|
||||
enable_dbg
|
||||
mov x1, x25
|
||||
mov x2, sp
|
||||
b do_sp_pc_abort
|
||||
@ -282,6 +286,7 @@ el1_undef:
|
||||
/*
|
||||
* Undefined instruction
|
||||
*/
|
||||
enable_dbg
|
||||
mov x0, sp
|
||||
b do_undefinstr
|
||||
el1_dbg:
|
||||
@ -294,10 +299,11 @@ el1_dbg:
|
||||
mrs x0, far_el1
|
||||
mov x2, sp // struct pt_regs
|
||||
bl do_debug_exception
|
||||
|
||||
enable_dbg
|
||||
kernel_exit 1
|
||||
el1_inv:
|
||||
// TODO: add support for undefined instructions in kernel mode
|
||||
enable_dbg
|
||||
mov x0, sp
|
||||
mov x1, #BAD_SYNC
|
||||
mrs x2, esr_el1
|
||||
@ -307,7 +313,7 @@ ENDPROC(el1_sync)
|
||||
.align 6
|
||||
el1_irq:
|
||||
kernel_entry 1
|
||||
enable_dbg_if_not_stepping x0
|
||||
enable_dbg
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
bl trace_hardirqs_off
|
||||
#endif
|
||||
@ -332,8 +338,7 @@ ENDPROC(el1_irq)
|
||||
#ifdef CONFIG_PREEMPT
|
||||
el1_preempt:
|
||||
mov x24, lr
|
||||
1: enable_dbg
|
||||
bl preempt_schedule_irq // irq en/disable is done inside
|
||||
1: bl preempt_schedule_irq // irq en/disable is done inside
|
||||
ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS
|
||||
tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
|
||||
ret x24
|
||||
@ -349,7 +354,7 @@ el0_sync:
|
||||
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
|
||||
cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state
|
||||
b.eq el0_svc
|
||||
adr lr, ret_from_exception
|
||||
adr lr, ret_to_user
|
||||
cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0
|
||||
b.eq el0_da
|
||||
cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0
|
||||
@ -378,7 +383,7 @@ el0_sync_compat:
|
||||
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
|
||||
cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state
|
||||
b.eq el0_svc_compat
|
||||
adr lr, ret_from_exception
|
||||
adr lr, ret_to_user
|
||||
cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0
|
||||
b.eq el0_da
|
||||
cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0
|
||||
@ -423,11 +428,8 @@ el0_da:
|
||||
*/
|
||||
mrs x0, far_el1
|
||||
bic x0, x0, #(0xff << 56)
|
||||
disable_step x1
|
||||
isb
|
||||
enable_dbg
|
||||
// enable interrupts before calling the main handler
|
||||
enable_irq
|
||||
enable_dbg_and_irq
|
||||
mov x1, x25
|
||||
mov x2, sp
|
||||
b do_mem_abort
|
||||
@ -436,11 +438,8 @@ el0_ia:
|
||||
* Instruction abort handling
|
||||
*/
|
||||
mrs x0, far_el1
|
||||
disable_step x1
|
||||
isb
|
||||
enable_dbg
|
||||
// enable interrupts before calling the main handler
|
||||
enable_irq
|
||||
enable_dbg_and_irq
|
||||
orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts
|
||||
mov x2, sp
|
||||
b do_mem_abort
|
||||
@ -448,6 +447,7 @@ el0_fpsimd_acc:
|
||||
/*
|
||||
* Floating Point or Advanced SIMD access
|
||||
*/
|
||||
enable_dbg
|
||||
mov x0, x25
|
||||
mov x1, sp
|
||||
b do_fpsimd_acc
|
||||
@ -455,6 +455,7 @@ el0_fpsimd_exc:
|
||||
/*
|
||||
* Floating Point or Advanced SIMD exception
|
||||
*/
|
||||
enable_dbg
|
||||
mov x0, x25
|
||||
mov x1, sp
|
||||
b do_fpsimd_exc
|
||||
@ -463,11 +464,8 @@ el0_sp_pc:
|
||||
* Stack or PC alignment exception handling
|
||||
*/
|
||||
mrs x0, far_el1
|
||||
disable_step x1
|
||||
isb
|
||||
enable_dbg
|
||||
// enable interrupts before calling the main handler
|
||||
enable_irq
|
||||
enable_dbg_and_irq
|
||||
mov x1, x25
|
||||
mov x2, sp
|
||||
b do_sp_pc_abort
|
||||
@ -475,9 +473,9 @@ el0_undef:
|
||||
/*
|
||||
* Undefined instruction
|
||||
*/
|
||||
mov x0, sp
|
||||
// enable interrupts before calling the main handler
|
||||
enable_irq
|
||||
enable_dbg_and_irq
|
||||
mov x0, sp
|
||||
b do_undefinstr
|
||||
el0_dbg:
|
||||
/*
|
||||
@ -485,11 +483,13 @@ el0_dbg:
|
||||
*/
|
||||
tbnz x24, #0, el0_inv // EL0 only
|
||||
mrs x0, far_el1
|
||||
disable_step x1
|
||||
mov x1, x25
|
||||
mov x2, sp
|
||||
b do_debug_exception
|
||||
bl do_debug_exception
|
||||
enable_dbg
|
||||
b ret_to_user
|
||||
el0_inv:
|
||||
enable_dbg
|
||||
mov x0, sp
|
||||
mov x1, #BAD_SYNC
|
||||
mrs x2, esr_el1
|
||||
@ -500,15 +500,12 @@ ENDPROC(el0_sync)
|
||||
el0_irq:
|
||||
kernel_entry 0
|
||||
el0_irq_naked:
|
||||
disable_step x1
|
||||
isb
|
||||
enable_dbg
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
bl trace_hardirqs_off
|
||||
#endif
|
||||
|
||||
irq_handler
|
||||
get_thread_info tsk
|
||||
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
bl trace_hardirqs_on
|
||||
@ -516,14 +513,6 @@ el0_irq_naked:
|
||||
b ret_to_user
|
||||
ENDPROC(el0_irq)
|
||||
|
||||
/*
|
||||
* This is the return code to user mode for abort handlers
|
||||
*/
|
||||
ret_from_exception:
|
||||
get_thread_info tsk
|
||||
b ret_to_user
|
||||
ENDPROC(ret_from_exception)
|
||||
|
||||
/*
|
||||
* Register switch for AArch64. The callee-saved registers need to be saved
|
||||
* and restored. On entry:
|
||||
@ -563,10 +552,7 @@ ret_fast_syscall:
|
||||
ldr x1, [tsk, #TI_FLAGS]
|
||||
and x2, x1, #_TIF_WORK_MASK
|
||||
cbnz x2, fast_work_pending
|
||||
tbz x1, #TIF_SINGLESTEP, fast_exit
|
||||
disable_dbg
|
||||
enable_step x2
|
||||
fast_exit:
|
||||
enable_step_tsk x1, x2
|
||||
kernel_exit 0, ret = 1
|
||||
|
||||
/*
|
||||
@ -576,7 +562,7 @@ fast_work_pending:
|
||||
str x0, [sp, #S_X0] // returned x0
|
||||
work_pending:
|
||||
tbnz x1, #TIF_NEED_RESCHED, work_resched
|
||||
/* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */
|
||||
/* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
|
||||
ldr x2, [sp, #S_PSTATE]
|
||||
mov x0, sp // 'regs'
|
||||
tst x2, #PSR_MODE_MASK // user mode regs?
|
||||
@ -585,7 +571,6 @@ work_pending:
|
||||
bl do_notify_resume
|
||||
b ret_to_user
|
||||
work_resched:
|
||||
enable_dbg
|
||||
bl schedule
|
||||
|
||||
/*
|
||||
@ -596,9 +581,7 @@ ret_to_user:
|
||||
ldr x1, [tsk, #TI_FLAGS]
|
||||
and x2, x1, #_TIF_WORK_MASK
|
||||
cbnz x2, work_pending
|
||||
tbz x1, #TIF_SINGLESTEP, no_work_pending
|
||||
disable_dbg
|
||||
enable_step x2
|
||||
enable_step_tsk x1, x2
|
||||
no_work_pending:
|
||||
kernel_exit 0, ret = 0
|
||||
ENDPROC(ret_to_user)
|
||||
@ -625,14 +608,11 @@ el0_svc:
|
||||
mov sc_nr, #__NR_syscalls
|
||||
el0_svc_naked: // compat entry point
|
||||
stp x0, scno, [sp, #S_ORIG_X0] // save the original x0 and syscall number
|
||||
disable_step x16
|
||||
isb
|
||||
enable_dbg
|
||||
enable_irq
|
||||
enable_dbg_and_irq
|
||||
|
||||
get_thread_info tsk
|
||||
ldr x16, [tsk, #TI_FLAGS] // check for syscall tracing
|
||||
tbnz x16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls?
|
||||
ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks
|
||||
tst x16, #_TIF_SYSCALL_WORK
|
||||
b.ne __sys_trace
|
||||
adr lr, ret_fast_syscall // return address
|
||||
cmp scno, sc_nr // check upper syscall limit
|
||||
b.hs ni_sys
|
||||
@ -648,9 +628,8 @@ ENDPROC(el0_svc)
|
||||
* switches, and waiting for our parent to respond.
|
||||
*/
|
||||
__sys_trace:
|
||||
mov x1, sp
|
||||
mov w0, #0 // trace entry
|
||||
bl syscall_trace
|
||||
mov x0, sp
|
||||
bl syscall_trace_enter
|
||||
adr lr, __sys_trace_return // return address
|
||||
uxtw scno, w0 // syscall number (possibly new)
|
||||
mov x1, sp // pointer to regs
|
||||
@ -665,9 +644,8 @@ __sys_trace:
|
||||
|
||||
__sys_trace_return:
|
||||
str x0, [sp] // save returned x0
|
||||
mov x1, sp
|
||||
mov w0, #1 // trace exit
|
||||
bl syscall_trace
|
||||
mov x0, sp
|
||||
bl syscall_trace_exit
|
||||
b ret_to_user
|
||||
|
||||
/*
|
||||
|
@ -34,6 +34,60 @@
|
||||
#define FPEXC_IXF (1 << 4)
|
||||
#define FPEXC_IDF (1 << 7)
|
||||
|
||||
/*
|
||||
* In order to reduce the number of times the FPSIMD state is needlessly saved
|
||||
* and restored, we need to keep track of two things:
|
||||
* (a) for each task, we need to remember which CPU was the last one to have
|
||||
* the task's FPSIMD state loaded into its FPSIMD registers;
|
||||
* (b) for each CPU, we need to remember which task's userland FPSIMD state has
|
||||
* been loaded into its FPSIMD registers most recently, or whether it has
|
||||
* been used to perform kernel mode NEON in the meantime.
|
||||
*
|
||||
* For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to
|
||||
* the id of the current CPU everytime the state is loaded onto a CPU. For (b),
|
||||
* we add the per-cpu variable 'fpsimd_last_state' (below), which contains the
|
||||
* address of the userland FPSIMD state of the task that was loaded onto the CPU
|
||||
* the most recently, or NULL if kernel mode NEON has been performed after that.
|
||||
*
|
||||
* With this in place, we no longer have to restore the next FPSIMD state right
|
||||
* when switching between tasks. Instead, we can defer this check to userland
|
||||
* resume, at which time we verify whether the CPU's fpsimd_last_state and the
|
||||
* task's fpsimd_state.cpu are still mutually in sync. If this is the case, we
|
||||
* can omit the FPSIMD restore.
|
||||
*
|
||||
* As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to
|
||||
* indicate whether or not the userland FPSIMD state of the current task is
|
||||
* present in the registers. The flag is set unless the FPSIMD registers of this
|
||||
* CPU currently contain the most recent userland FPSIMD state of the current
|
||||
* task.
|
||||
*
|
||||
* For a certain task, the sequence may look something like this:
|
||||
* - the task gets scheduled in; if both the task's fpsimd_state.cpu field
|
||||
* contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
|
||||
* variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is
|
||||
* cleared, otherwise it is set;
|
||||
*
|
||||
* - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's
|
||||
* userland FPSIMD state is copied from memory to the registers, the task's
|
||||
* fpsimd_state.cpu field is set to the id of the current CPU, the current
|
||||
* CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the
|
||||
* TIF_FOREIGN_FPSTATE flag is cleared;
|
||||
*
|
||||
* - the task executes an ordinary syscall; upon return to userland, the
|
||||
* TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is
|
||||
* restored;
|
||||
*
|
||||
* - the task executes a syscall which executes some NEON instructions; this is
|
||||
* preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD
|
||||
* register contents to memory, clears the fpsimd_last_state per-cpu variable
|
||||
* and sets the TIF_FOREIGN_FPSTATE flag;
|
||||
*
|
||||
* - the task gets preempted after kernel_neon_end() is called; as we have not
|
||||
* returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
|
||||
* whatever is in the FPSIMD registers is not saved to memory, but discarded.
|
||||
*/
|
||||
static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
|
||||
|
||||
/*
|
||||
* Trapped FP/ASIMD access.
|
||||
*/
|
||||
@ -72,43 +126,137 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
|
||||
|
||||
void fpsimd_thread_switch(struct task_struct *next)
|
||||
{
|
||||
/* check if not kernel threads */
|
||||
if (current->mm)
|
||||
/*
|
||||
* Save the current FPSIMD state to memory, but only if whatever is in
|
||||
* the registers is in fact the most recent userland FPSIMD state of
|
||||
* 'current'.
|
||||
*/
|
||||
if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
|
||||
fpsimd_save_state(¤t->thread.fpsimd_state);
|
||||
if (next->mm)
|
||||
fpsimd_load_state(&next->thread.fpsimd_state);
|
||||
|
||||
if (next->mm) {
|
||||
/*
|
||||
* If we are switching to a task whose most recent userland
|
||||
* FPSIMD state is already in the registers of *this* cpu,
|
||||
* we can skip loading the state from memory. Otherwise, set
|
||||
* the TIF_FOREIGN_FPSTATE flag so the state will be loaded
|
||||
* upon the next return to userland.
|
||||
*/
|
||||
struct fpsimd_state *st = &next->thread.fpsimd_state;
|
||||
|
||||
if (__this_cpu_read(fpsimd_last_state) == st
|
||||
&& st->cpu == smp_processor_id())
|
||||
clear_ti_thread_flag(task_thread_info(next),
|
||||
TIF_FOREIGN_FPSTATE);
|
||||
else
|
||||
set_ti_thread_flag(task_thread_info(next),
|
||||
TIF_FOREIGN_FPSTATE);
|
||||
}
|
||||
}
|
||||
|
||||
void fpsimd_flush_thread(void)
|
||||
{
|
||||
preempt_disable();
|
||||
memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
|
||||
fpsimd_load_state(¤t->thread.fpsimd_state);
|
||||
set_thread_flag(TIF_FOREIGN_FPSTATE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Save the userland FPSIMD state of 'current' to memory, but only if the state
|
||||
* currently held in the registers does in fact belong to 'current'
|
||||
*/
|
||||
void fpsimd_preserve_current_state(void)
|
||||
{
|
||||
preempt_disable();
|
||||
if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
|
||||
fpsimd_save_state(¤t->thread.fpsimd_state);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
* Load the userland FPSIMD state of 'current' from memory, but only if the
|
||||
* FPSIMD state already held in the registers is /not/ the most recent FPSIMD
|
||||
* state of 'current'
|
||||
*/
|
||||
void fpsimd_restore_current_state(void)
|
||||
{
|
||||
preempt_disable();
|
||||
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
|
||||
struct fpsimd_state *st = ¤t->thread.fpsimd_state;
|
||||
|
||||
fpsimd_load_state(st);
|
||||
this_cpu_write(fpsimd_last_state, st);
|
||||
st->cpu = smp_processor_id();
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
* Load an updated userland FPSIMD state for 'current' from memory and set the
|
||||
* flag that indicates that the FPSIMD register contents are the most recent
|
||||
* FPSIMD state of 'current'
|
||||
*/
|
||||
void fpsimd_update_current_state(struct fpsimd_state *state)
|
||||
{
|
||||
preempt_disable();
|
||||
fpsimd_load_state(state);
|
||||
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
|
||||
struct fpsimd_state *st = ¤t->thread.fpsimd_state;
|
||||
|
||||
this_cpu_write(fpsimd_last_state, st);
|
||||
st->cpu = smp_processor_id();
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
* Invalidate live CPU copies of task t's FPSIMD state
|
||||
*/
|
||||
void fpsimd_flush_task_state(struct task_struct *t)
|
||||
{
|
||||
t->thread.fpsimd_state.cpu = NR_CPUS;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KERNEL_MODE_NEON
|
||||
|
||||
static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
|
||||
static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
|
||||
|
||||
/*
|
||||
* Kernel-side NEON support functions
|
||||
*/
|
||||
void kernel_neon_begin(void)
|
||||
void kernel_neon_begin_partial(u32 num_regs)
|
||||
{
|
||||
/* Avoid using the NEON in interrupt context */
|
||||
BUG_ON(in_interrupt());
|
||||
preempt_disable();
|
||||
if (in_interrupt()) {
|
||||
struct fpsimd_partial_state *s = this_cpu_ptr(
|
||||
in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
|
||||
|
||||
if (current->mm)
|
||||
fpsimd_save_state(¤t->thread.fpsimd_state);
|
||||
BUG_ON(num_regs > 32);
|
||||
fpsimd_save_partial_state(s, roundup(num_regs, 2));
|
||||
} else {
|
||||
/*
|
||||
* Save the userland FPSIMD state if we have one and if we
|
||||
* haven't done so already. Clear fpsimd_last_state to indicate
|
||||
* that there is no longer userland FPSIMD state in the
|
||||
* registers.
|
||||
*/
|
||||
preempt_disable();
|
||||
if (current->mm &&
|
||||
!test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
|
||||
fpsimd_save_state(¤t->thread.fpsimd_state);
|
||||
this_cpu_write(fpsimd_last_state, NULL);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(kernel_neon_begin);
|
||||
EXPORT_SYMBOL(kernel_neon_begin_partial);
|
||||
|
||||
void kernel_neon_end(void)
|
||||
{
|
||||
if (current->mm)
|
||||
fpsimd_load_state(¤t->thread.fpsimd_state);
|
||||
|
||||
preempt_enable();
|
||||
if (in_interrupt()) {
|
||||
struct fpsimd_partial_state *s = this_cpu_ptr(
|
||||
in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
|
||||
fpsimd_load_partial_state(s);
|
||||
} else {
|
||||
preempt_enable();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(kernel_neon_end);
|
||||
|
||||
@ -120,12 +268,12 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
|
||||
{
|
||||
switch (cmd) {
|
||||
case CPU_PM_ENTER:
|
||||
if (current->mm)
|
||||
if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
|
||||
fpsimd_save_state(¤t->thread.fpsimd_state);
|
||||
break;
|
||||
case CPU_PM_EXIT:
|
||||
if (current->mm)
|
||||
fpsimd_load_state(¤t->thread.fpsimd_state);
|
||||
set_thread_flag(TIF_FOREIGN_FPSTATE);
|
||||
break;
|
||||
case CPU_PM_ENTER_FAILED:
|
||||
default:
|
||||
|
176
arch/arm64/kernel/ftrace.c
Normal file
176
arch/arm64/kernel/ftrace.c
Normal file
@ -0,0 +1,176 @@
|
||||
/*
|
||||
* arch/arm64/kernel/ftrace.c
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Limited
|
||||
* Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/swab.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/ftrace.h>
|
||||
#include <asm/insn.h>
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
/*
|
||||
* Replace a single instruction, which may be a branch or NOP.
|
||||
* If @validate == true, a replaced instruction is checked against 'old'.
|
||||
*/
|
||||
static int ftrace_modify_code(unsigned long pc, u32 old, u32 new,
|
||||
bool validate)
|
||||
{
|
||||
u32 replaced;
|
||||
|
||||
/*
|
||||
* Note:
|
||||
* Due to modules and __init, code can disappear and change,
|
||||
* we need to protect against faulting as well as code changing.
|
||||
* We do this by aarch64_insn_*() which use the probe_kernel_*().
|
||||
*
|
||||
* No lock is held here because all the modifications are run
|
||||
* through stop_machine().
|
||||
*/
|
||||
if (validate) {
|
||||
if (aarch64_insn_read((void *)pc, &replaced))
|
||||
return -EFAULT;
|
||||
|
||||
if (replaced != old)
|
||||
return -EINVAL;
|
||||
}
|
||||
if (aarch64_insn_patch_text_nosync((void *)pc, new))
|
||||
return -EPERM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Replace tracer function in ftrace_caller()
|
||||
*/
|
||||
int ftrace_update_ftrace_func(ftrace_func_t func)
|
||||
{
|
||||
unsigned long pc;
|
||||
u32 new;
|
||||
|
||||
pc = (unsigned long)&ftrace_call;
|
||||
new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func, true);
|
||||
|
||||
return ftrace_modify_code(pc, 0, new, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Turn on the call to ftrace_caller() in instrumented function
|
||||
*/
|
||||
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
|
||||
{
|
||||
unsigned long pc = rec->ip;
|
||||
u32 old, new;
|
||||
|
||||
old = aarch64_insn_gen_nop();
|
||||
new = aarch64_insn_gen_branch_imm(pc, addr, true);
|
||||
|
||||
return ftrace_modify_code(pc, old, new, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Turn off the call to ftrace_caller() in instrumented function
|
||||
*/
|
||||
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
|
||||
unsigned long addr)
|
||||
{
|
||||
unsigned long pc = rec->ip;
|
||||
u32 old, new;
|
||||
|
||||
old = aarch64_insn_gen_branch_imm(pc, addr, true);
|
||||
new = aarch64_insn_gen_nop();
|
||||
|
||||
return ftrace_modify_code(pc, old, new, true);
|
||||
}
|
||||
|
||||
int __init ftrace_dyn_arch_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_DYNAMIC_FTRACE */
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
/*
|
||||
* function_graph tracer expects ftrace_return_to_handler() to be called
|
||||
* on the way back to parent. For this purpose, this function is called
|
||||
* in _mcount() or ftrace_caller() to replace return address (*parent) on
|
||||
* the call stack to return_to_handler.
|
||||
*
|
||||
* Note that @frame_pointer is used only for sanity check later.
|
||||
*/
|
||||
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
|
||||
unsigned long frame_pointer)
|
||||
{
|
||||
unsigned long return_hooker = (unsigned long)&return_to_handler;
|
||||
unsigned long old;
|
||||
struct ftrace_graph_ent trace;
|
||||
int err;
|
||||
|
||||
if (unlikely(atomic_read(¤t->tracing_graph_pause)))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Note:
|
||||
* No protection against faulting at *parent, which may be seen
|
||||
* on other archs. It's unlikely on AArch64.
|
||||
*/
|
||||
old = *parent;
|
||||
*parent = return_hooker;
|
||||
|
||||
trace.func = self_addr;
|
||||
trace.depth = current->curr_ret_stack + 1;
|
||||
|
||||
/* Only trace if the calling function expects to */
|
||||
if (!ftrace_graph_entry(&trace)) {
|
||||
*parent = old;
|
||||
return;
|
||||
}
|
||||
|
||||
err = ftrace_push_return_trace(old, self_addr, &trace.depth,
|
||||
frame_pointer);
|
||||
if (err == -EBUSY) {
|
||||
*parent = old;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
/*
|
||||
* Turn on/off the call to ftrace_graph_caller() in ftrace_caller()
|
||||
* depending on @enable.
|
||||
*/
|
||||
static int ftrace_modify_graph_caller(bool enable)
|
||||
{
|
||||
unsigned long pc = (unsigned long)&ftrace_graph_call;
|
||||
u32 branch, nop;
|
||||
|
||||
branch = aarch64_insn_gen_branch_imm(pc,
|
||||
(unsigned long)ftrace_graph_caller, false);
|
||||
nop = aarch64_insn_gen_nop();
|
||||
|
||||
if (enable)
|
||||
return ftrace_modify_code(pc, nop, branch, true);
|
||||
else
|
||||
return ftrace_modify_code(pc, branch, nop, true);
|
||||
}
|
||||
|
||||
int ftrace_enable_ftrace_graph_caller(void)
|
||||
{
|
||||
return ftrace_modify_graph_caller(true);
|
||||
}
|
||||
|
||||
int ftrace_disable_ftrace_graph_caller(void)
|
||||
{
|
||||
return ftrace_modify_graph_caller(false);
|
||||
}
|
||||
#endif /* CONFIG_DYNAMIC_FTRACE */
|
||||
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|
@ -342,11 +342,9 @@ ENTRY(set_cpu_boot_mode_flag)
|
||||
cmp w20, #BOOT_CPU_MODE_EL2
|
||||
b.ne 1f
|
||||
add x1, x1, #4
|
||||
1: dc cvac, x1 // Clean potentially dirty cache line
|
||||
dsb sy
|
||||
str w20, [x1] // This CPU has booted in EL1
|
||||
dc civac, x1 // Clean&invalidate potentially stale cache line
|
||||
dsb sy
|
||||
1: str w20, [x1] // This CPU has booted in EL1
|
||||
dmb sy
|
||||
dc ivac, x1 // Invalidate potentially stale cache line
|
||||
ret
|
||||
ENDPROC(set_cpu_boot_mode_flag)
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
|
||||
#define pr_fmt(fmt) "hw-breakpoint: " fmt
|
||||
|
||||
#include <linux/compat.h>
|
||||
#include <linux/cpu_pm.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
@ -27,7 +28,6 @@
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/compat.h>
|
||||
#include <asm/current.h>
|
||||
#include <asm/debug-monitors.h>
|
||||
#include <asm/hw_breakpoint.h>
|
||||
|
@ -20,6 +20,7 @@
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
#include <linux/compat.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kernel.h>
|
||||
@ -113,32 +114,62 @@ void arch_cpu_idle_dead(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Called by kexec, immediately prior to machine_kexec().
|
||||
*
|
||||
* This must completely disable all secondary CPUs; simply causing those CPUs
|
||||
* to execute e.g. a RAM-based pin loop is not sufficient. This allows the
|
||||
* kexec'd kernel to use any and all RAM as it sees fit, without having to
|
||||
* avoid any code or data used by any SW CPU pin loop. The CPU hotplug
|
||||
* functionality embodied in disable_nonboot_cpus() to achieve this.
|
||||
*/
|
||||
void machine_shutdown(void)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
smp_send_stop();
|
||||
#endif
|
||||
disable_nonboot_cpus();
|
||||
}
|
||||
|
||||
/*
|
||||
* Halting simply requires that the secondary CPUs stop performing any
|
||||
* activity (executing tasks, handling interrupts). smp_send_stop()
|
||||
* achieves this.
|
||||
*/
|
||||
void machine_halt(void)
|
||||
{
|
||||
machine_shutdown();
|
||||
local_irq_disable();
|
||||
smp_send_stop();
|
||||
while (1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Power-off simply requires that the secondary CPUs stop performing any
|
||||
* activity (executing tasks, handling interrupts). smp_send_stop()
|
||||
* achieves this. When the system power is turned off, it will take all CPUs
|
||||
* with it.
|
||||
*/
|
||||
void machine_power_off(void)
|
||||
{
|
||||
machine_shutdown();
|
||||
local_irq_disable();
|
||||
smp_send_stop();
|
||||
if (pm_power_off)
|
||||
pm_power_off();
|
||||
}
|
||||
|
||||
/*
|
||||
* Restart requires that the secondary CPUs stop performing any activity
|
||||
* while the primary CPU resets the system. Systems with a single CPU can
|
||||
* use soft_restart() as their machine descriptor's .restart hook, since that
|
||||
* will cause the only available CPU to reset. Systems with multiple CPUs must
|
||||
* provide a HW restart implementation, to ensure that all CPUs reset at once.
|
||||
* This is required so that any code running after reset on the primary CPU
|
||||
* doesn't have to co-ordinate with other CPUs to ensure they aren't still
|
||||
* executing pre-reset code, and using RAM that the primary CPU's code wishes
|
||||
* to use. Implementing such co-ordination would be essentially impossible.
|
||||
*/
|
||||
void machine_restart(char *cmd)
|
||||
{
|
||||
machine_shutdown();
|
||||
|
||||
/* Disable interrupts first */
|
||||
local_irq_disable();
|
||||
smp_send_stop();
|
||||
|
||||
/* Now call the architecture specific reboot code. */
|
||||
if (arm_pm_restart)
|
||||
@ -205,7 +236,7 @@ void release_thread(struct task_struct *dead_task)
|
||||
|
||||
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
|
||||
{
|
||||
fpsimd_save_state(¤t->thread.fpsimd_state);
|
||||
fpsimd_preserve_current_state();
|
||||
*dst = *src;
|
||||
return 0;
|
||||
}
|
||||
@ -300,7 +331,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
|
||||
* Complete any pending TLB or cache maintenance on this CPU in case
|
||||
* the thread migrates to a different CPU.
|
||||
*/
|
||||
dsb();
|
||||
dsb(ish);
|
||||
|
||||
/* the actual thread switch */
|
||||
last = cpu_switch_to(prev, next);
|
||||
|
@ -19,6 +19,7 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/compat.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
@ -41,6 +42,9 @@
|
||||
#include <asm/traps.h>
|
||||
#include <asm/system_misc.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/syscalls.h>
|
||||
|
||||
/*
|
||||
* TODO: does not yet catch signals sent when the child dies.
|
||||
* in exit.c or in signal.c.
|
||||
@ -517,6 +521,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
|
||||
return ret;
|
||||
|
||||
target->thread.fpsimd_state.user_fpsimd = newstate;
|
||||
fpsimd_flush_task_state(target);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -764,6 +769,7 @@ static int compat_vfp_set(struct task_struct *target,
|
||||
uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
|
||||
}
|
||||
|
||||
fpsimd_flush_task_state(target);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1058,35 +1064,49 @@ long arch_ptrace(struct task_struct *child, long request,
|
||||
return ptrace_request(child, request, addr, data);
|
||||
}
|
||||
|
||||
asmlinkage int syscall_trace(int dir, struct pt_regs *regs)
|
||||
enum ptrace_syscall_dir {
|
||||
PTRACE_SYSCALL_ENTER = 0,
|
||||
PTRACE_SYSCALL_EXIT,
|
||||
};
|
||||
|
||||
static void tracehook_report_syscall(struct pt_regs *regs,
|
||||
enum ptrace_syscall_dir dir)
|
||||
{
|
||||
int regno;
|
||||
unsigned long saved_reg;
|
||||
|
||||
if (!test_thread_flag(TIF_SYSCALL_TRACE))
|
||||
return regs->syscallno;
|
||||
/*
|
||||
* A scratch register (ip(r12) on AArch32, x7 on AArch64) is
|
||||
* used to denote syscall entry/exit:
|
||||
*/
|
||||
regno = (is_compat_task() ? 12 : 7);
|
||||
saved_reg = regs->regs[regno];
|
||||
regs->regs[regno] = dir;
|
||||
|
||||
if (is_compat_task()) {
|
||||
/* AArch32 uses ip (r12) for scratch */
|
||||
saved_reg = regs->regs[12];
|
||||
regs->regs[12] = dir;
|
||||
} else {
|
||||
/*
|
||||
* Save X7. X7 is used to denote syscall entry/exit:
|
||||
* X7 = 0 -> entry, = 1 -> exit
|
||||
*/
|
||||
saved_reg = regs->regs[7];
|
||||
regs->regs[7] = dir;
|
||||
}
|
||||
|
||||
if (dir)
|
||||
if (dir == PTRACE_SYSCALL_EXIT)
|
||||
tracehook_report_syscall_exit(regs, 0);
|
||||
else if (tracehook_report_syscall_entry(regs))
|
||||
regs->syscallno = ~0UL;
|
||||
|
||||
if (is_compat_task())
|
||||
regs->regs[12] = saved_reg;
|
||||
else
|
||||
regs->regs[7] = saved_reg;
|
||||
regs->regs[regno] = saved_reg;
|
||||
}
|
||||
|
||||
asmlinkage int syscall_trace_enter(struct pt_regs *regs)
|
||||
{
|
||||
if (test_thread_flag(TIF_SYSCALL_TRACE))
|
||||
tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
|
||||
|
||||
if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
|
||||
trace_sys_enter(regs, regs->syscallno);
|
||||
|
||||
return regs->syscallno;
|
||||
}
|
||||
|
||||
asmlinkage void syscall_trace_exit(struct pt_regs *regs)
|
||||
{
|
||||
if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
|
||||
trace_sys_exit(regs, regs_return_value(regs));
|
||||
|
||||
if (test_thread_flag(TIF_SYSCALL_TRACE))
|
||||
tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
|
||||
}
|
||||
|
55
arch/arm64/kernel/return_address.c
Normal file
55
arch/arm64/kernel/return_address.c
Normal file
@ -0,0 +1,55 @@
|
||||
/*
|
||||
* arch/arm64/kernel/return_address.c
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Limited
|
||||
* Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/ftrace.h>
|
||||
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
struct return_address_data {
|
||||
unsigned int level;
|
||||
void *addr;
|
||||
};
|
||||
|
||||
static int save_return_addr(struct stackframe *frame, void *d)
|
||||
{
|
||||
struct return_address_data *data = d;
|
||||
|
||||
if (!data->level) {
|
||||
data->addr = (void *)frame->pc;
|
||||
return 1;
|
||||
} else {
|
||||
--data->level;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void *return_address(unsigned int level)
|
||||
{
|
||||
struct return_address_data data;
|
||||
struct stackframe frame;
|
||||
register unsigned long current_sp asm ("sp");
|
||||
|
||||
data.level = level + 2;
|
||||
data.addr = NULL;
|
||||
|
||||
frame.fp = (unsigned long)__builtin_frame_address(0);
|
||||
frame.sp = current_sp;
|
||||
frame.pc = (unsigned long)return_address; /* dummy */
|
||||
|
||||
walk_stackframe(&frame, save_return_addr, &data);
|
||||
|
||||
if (!data.level)
|
||||
return data.addr;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(return_address);
|
@ -25,6 +25,7 @@
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/initrd.h>
|
||||
#include <linux/console.h>
|
||||
#include <linux/cache.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/screen_info.h>
|
||||
@ -200,6 +201,8 @@ static void __init setup_processor(void)
|
||||
{
|
||||
struct cpu_info *cpu_info;
|
||||
u64 features, block;
|
||||
u32 cwg;
|
||||
int cls;
|
||||
|
||||
cpu_info = lookup_processor_type(read_cpuid_id());
|
||||
if (!cpu_info) {
|
||||
@ -216,6 +219,18 @@ static void __init setup_processor(void)
|
||||
sprintf(init_utsname()->machine, ELF_PLATFORM);
|
||||
elf_hwcap = 0;
|
||||
|
||||
/*
|
||||
* Check for sane CTR_EL0.CWG value.
|
||||
*/
|
||||
cwg = cache_type_cwg();
|
||||
cls = cache_line_size();
|
||||
if (!cwg)
|
||||
pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
|
||||
cls);
|
||||
if (L1_CACHE_BYTES < cls)
|
||||
pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
|
||||
L1_CACHE_BYTES, cls);
|
||||
|
||||
/*
|
||||
* ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks.
|
||||
* The blocks we test below represent incremental functionality
|
||||
@ -363,7 +378,6 @@ void __init setup_arch(char **cmdline_p)
|
||||
|
||||
*cmdline_p = boot_command_line;
|
||||
|
||||
init_mem_pgprot();
|
||||
early_ioremap_init();
|
||||
|
||||
parse_early_param();
|
||||
|
@ -17,6 +17,7 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/compat.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/signal.h>
|
||||
#include <linux/personality.h>
|
||||
@ -25,7 +26,6 @@
|
||||
#include <linux/tracehook.h>
|
||||
#include <linux/ratelimit.h>
|
||||
|
||||
#include <asm/compat.h>
|
||||
#include <asm/debug-monitors.h>
|
||||
#include <asm/elf.h>
|
||||
#include <asm/cacheflush.h>
|
||||
@ -51,7 +51,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
|
||||
int err;
|
||||
|
||||
/* dump the hardware registers to the fpsimd_state structure */
|
||||
fpsimd_save_state(fpsimd);
|
||||
fpsimd_preserve_current_state();
|
||||
|
||||
/* copy the FP and status/control registers */
|
||||
err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs));
|
||||
@ -86,11 +86,8 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
|
||||
__get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
|
||||
|
||||
/* load the hardware registers from the fpsimd_state structure */
|
||||
if (!err) {
|
||||
preempt_disable();
|
||||
fpsimd_load_state(&fpsimd);
|
||||
preempt_enable();
|
||||
}
|
||||
if (!err)
|
||||
fpsimd_update_current_state(&fpsimd);
|
||||
|
||||
return err ? -EFAULT : 0;
|
||||
}
|
||||
@ -100,8 +97,7 @@ static int restore_sigframe(struct pt_regs *regs,
|
||||
{
|
||||
sigset_t set;
|
||||
int i, err;
|
||||
struct aux_context __user *aux =
|
||||
(struct aux_context __user *)sf->uc.uc_mcontext.__reserved;
|
||||
void *aux = sf->uc.uc_mcontext.__reserved;
|
||||
|
||||
err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
|
||||
if (err == 0)
|
||||
@ -121,8 +117,11 @@ static int restore_sigframe(struct pt_regs *regs,
|
||||
|
||||
err |= !valid_user_regs(®s->user_regs);
|
||||
|
||||
if (err == 0)
|
||||
err |= restore_fpsimd_context(&aux->fpsimd);
|
||||
if (err == 0) {
|
||||
struct fpsimd_context *fpsimd_ctx =
|
||||
container_of(aux, struct fpsimd_context, head);
|
||||
err |= restore_fpsimd_context(fpsimd_ctx);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -167,8 +166,8 @@ static int setup_sigframe(struct rt_sigframe __user *sf,
|
||||
struct pt_regs *regs, sigset_t *set)
|
||||
{
|
||||
int i, err = 0;
|
||||
struct aux_context __user *aux =
|
||||
(struct aux_context __user *)sf->uc.uc_mcontext.__reserved;
|
||||
void *aux = sf->uc.uc_mcontext.__reserved;
|
||||
struct _aarch64_ctx *end;
|
||||
|
||||
/* set up the stack frame for unwinding */
|
||||
__put_user_error(regs->regs[29], &sf->fp, err);
|
||||
@ -185,12 +184,27 @@ static int setup_sigframe(struct rt_sigframe __user *sf,
|
||||
|
||||
err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set));
|
||||
|
||||
if (err == 0)
|
||||
err |= preserve_fpsimd_context(&aux->fpsimd);
|
||||
if (err == 0) {
|
||||
struct fpsimd_context *fpsimd_ctx =
|
||||
container_of(aux, struct fpsimd_context, head);
|
||||
err |= preserve_fpsimd_context(fpsimd_ctx);
|
||||
aux += sizeof(*fpsimd_ctx);
|
||||
}
|
||||
|
||||
/* fault information, if valid */
|
||||
if (current->thread.fault_code) {
|
||||
struct esr_context *esr_ctx =
|
||||
container_of(aux, struct esr_context, head);
|
||||
__put_user_error(ESR_MAGIC, &esr_ctx->head.magic, err);
|
||||
__put_user_error(sizeof(*esr_ctx), &esr_ctx->head.size, err);
|
||||
__put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
|
||||
aux += sizeof(*esr_ctx);
|
||||
}
|
||||
|
||||
/* set the "end" magic */
|
||||
__put_user_error(0, &aux->end.magic, err);
|
||||
__put_user_error(0, &aux->end.size, err);
|
||||
end = aux;
|
||||
__put_user_error(0, &end->magic, err);
|
||||
__put_user_error(0, &end->size, err);
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -416,4 +430,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
|
||||
clear_thread_flag(TIF_NOTIFY_RESUME);
|
||||
tracehook_notify_resume(regs);
|
||||
}
|
||||
|
||||
if (thread_flags & _TIF_FOREIGN_FPSTATE)
|
||||
fpsimd_restore_current_state();
|
||||
|
||||
}
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/ratelimit.h>
|
||||
|
||||
#include <asm/esr.h>
|
||||
#include <asm/fpsimd.h>
|
||||
#include <asm/signal32.h>
|
||||
#include <asm/uaccess.h>
|
||||
@ -81,6 +82,8 @@ struct compat_vfp_sigframe {
|
||||
#define VFP_MAGIC 0x56465001
|
||||
#define VFP_STORAGE_SIZE sizeof(struct compat_vfp_sigframe)
|
||||
|
||||
#define FSR_WRITE_SHIFT (11)
|
||||
|
||||
struct compat_aux_sigframe {
|
||||
struct compat_vfp_sigframe vfp;
|
||||
|
||||
@ -219,7 +222,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
|
||||
* Note that this also saves V16-31, which aren't visible
|
||||
* in AArch32.
|
||||
*/
|
||||
fpsimd_save_state(fpsimd);
|
||||
fpsimd_preserve_current_state();
|
||||
|
||||
/* Place structure header on the stack */
|
||||
__put_user_error(magic, &frame->magic, err);
|
||||
@ -282,11 +285,8 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
|
||||
* We don't need to touch the exception register, so
|
||||
* reload the hardware state.
|
||||
*/
|
||||
if (!err) {
|
||||
preempt_disable();
|
||||
fpsimd_load_state(&fpsimd);
|
||||
preempt_enable();
|
||||
}
|
||||
if (!err)
|
||||
fpsimd_update_current_state(&fpsimd);
|
||||
|
||||
return err ? -EFAULT : 0;
|
||||
}
|
||||
@ -500,7 +500,9 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf,
|
||||
__put_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err);
|
||||
|
||||
__put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.trap_no, err);
|
||||
__put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.error_code, err);
|
||||
/* set the compat FSR WnR */
|
||||
__put_user_error(!!(current->thread.fault_code & ESR_EL1_WRITE) <<
|
||||
FSR_WRITE_SHIFT, &sf->uc.uc_mcontext.error_code, err);
|
||||
__put_user_error(current->thread.fault_address, &sf->uc.uc_mcontext.fault_address, err);
|
||||
__put_user_error(set->sig[0], &sf->uc.uc_mcontext.oldmask, err);
|
||||
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include <linux/clockchips.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/irq_work.h>
|
||||
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/cacheflush.h>
|
||||
@ -62,6 +63,7 @@ enum ipi_msg_type {
|
||||
IPI_CALL_FUNC_SINGLE,
|
||||
IPI_CPU_STOP,
|
||||
IPI_TIMER,
|
||||
IPI_IRQ_WORK,
|
||||
};
|
||||
|
||||
/*
|
||||
@ -477,6 +479,14 @@ void arch_send_call_function_single_ipi(int cpu)
|
||||
smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IRQ_WORK
|
||||
void arch_irq_work_raise(void)
|
||||
{
|
||||
if (smp_cross_call)
|
||||
smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
|
||||
}
|
||||
#endif
|
||||
|
||||
static const char *ipi_types[NR_IPI] = {
|
||||
#define S(x,s) [x - IPI_RESCHEDULE] = s
|
||||
S(IPI_RESCHEDULE, "Rescheduling interrupts"),
|
||||
@ -484,6 +494,7 @@ static const char *ipi_types[NR_IPI] = {
|
||||
S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
|
||||
S(IPI_CPU_STOP, "CPU stop interrupts"),
|
||||
S(IPI_TIMER, "Timer broadcast interrupts"),
|
||||
S(IPI_IRQ_WORK, "IRQ work interrupts"),
|
||||
};
|
||||
|
||||
void show_ipi_list(struct seq_file *p, int prec)
|
||||
@ -576,6 +587,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
|
||||
break;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_IRQ_WORK
|
||||
case IPI_IRQ_WORK:
|
||||
irq_enter();
|
||||
irq_work_run();
|
||||
irq_exit();
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
|
||||
break;
|
||||
|
@ -30,7 +30,6 @@ extern void secondary_holding_pen(void);
|
||||
volatile unsigned long secondary_holding_pen_release = INVALID_HWID;
|
||||
|
||||
static phys_addr_t cpu_release_addr[NR_CPUS];
|
||||
static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
/*
|
||||
* Write secondary_holding_pen_release in a way that is guaranteed to be
|
||||
@ -94,14 +93,6 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
|
||||
|
||||
static int smp_spin_table_cpu_boot(unsigned int cpu)
|
||||
{
|
||||
unsigned long timeout;
|
||||
|
||||
/*
|
||||
* Set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* Update the pen release flag.
|
||||
*/
|
||||
@ -112,34 +103,7 @@ static int smp_spin_table_cpu_boot(unsigned int cpu)
|
||||
*/
|
||||
sev();
|
||||
|
||||
timeout = jiffies + (1 * HZ);
|
||||
while (time_before(jiffies, timeout)) {
|
||||
if (secondary_holding_pen_release == INVALID_HWID)
|
||||
break;
|
||||
udelay(10);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
raw_spin_unlock(&boot_lock);
|
||||
|
||||
return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0;
|
||||
}
|
||||
|
||||
static void smp_spin_table_cpu_postboot(void)
|
||||
{
|
||||
/*
|
||||
* Let the primary processor know we're out of the pen.
|
||||
*/
|
||||
write_pen_release(INVALID_HWID);
|
||||
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
raw_spin_lock(&boot_lock);
|
||||
raw_spin_unlock(&boot_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct cpu_operations smp_spin_table_ops = {
|
||||
@ -147,5 +111,4 @@ const struct cpu_operations smp_spin_table_ops = {
|
||||
.cpu_init = smp_spin_table_cpu_init,
|
||||
.cpu_prepare = smp_spin_table_cpu_prepare,
|
||||
.cpu_boot = smp_spin_table_cpu_boot,
|
||||
.cpu_postboot = smp_spin_table_cpu_postboot,
|
||||
};
|
||||
|
@ -35,7 +35,7 @@
|
||||
* ldp x29, x30, [sp]
|
||||
* add sp, sp, #0x10
|
||||
*/
|
||||
int unwind_frame(struct stackframe *frame)
|
||||
int notrace unwind_frame(struct stackframe *frame)
|
||||
{
|
||||
unsigned long high, low;
|
||||
unsigned long fp = frame->fp;
|
||||
|
@ -18,6 +18,7 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/clockchips.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/interrupt.h>
|
||||
@ -69,6 +70,8 @@ void __init time_init(void)
|
||||
of_clk_init(NULL);
|
||||
clocksource_of_init();
|
||||
|
||||
tick_setup_hrtimer_broadcast();
|
||||
|
||||
arch_timer_rate = arch_timer_get_rate();
|
||||
if (!arch_timer_rate)
|
||||
panic("Unable to initialise architected timer.\n");
|
||||
|
@ -17,10 +17,192 @@
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/node.h>
|
||||
#include <linux/nodemask.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include <asm/topology.h>
|
||||
|
||||
static int __init get_cpu_for_node(struct device_node *node)
|
||||
{
|
||||
struct device_node *cpu_node;
|
||||
int cpu;
|
||||
|
||||
cpu_node = of_parse_phandle(node, "cpu", 0);
|
||||
if (!cpu_node)
|
||||
return -1;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (of_get_cpu_node(cpu, NULL) == cpu_node) {
|
||||
of_node_put(cpu_node);
|
||||
return cpu;
|
||||
}
|
||||
}
|
||||
|
||||
pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name);
|
||||
|
||||
of_node_put(cpu_node);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int __init parse_core(struct device_node *core, int cluster_id,
|
||||
int core_id)
|
||||
{
|
||||
char name[10];
|
||||
bool leaf = true;
|
||||
int i = 0;
|
||||
int cpu;
|
||||
struct device_node *t;
|
||||
|
||||
do {
|
||||
snprintf(name, sizeof(name), "thread%d", i);
|
||||
t = of_get_child_by_name(core, name);
|
||||
if (t) {
|
||||
leaf = false;
|
||||
cpu = get_cpu_for_node(t);
|
||||
if (cpu >= 0) {
|
||||
cpu_topology[cpu].cluster_id = cluster_id;
|
||||
cpu_topology[cpu].core_id = core_id;
|
||||
cpu_topology[cpu].thread_id = i;
|
||||
} else {
|
||||
pr_err("%s: Can't get CPU for thread\n",
|
||||
t->full_name);
|
||||
of_node_put(t);
|
||||
return -EINVAL;
|
||||
}
|
||||
of_node_put(t);
|
||||
}
|
||||
i++;
|
||||
} while (t);
|
||||
|
||||
cpu = get_cpu_for_node(core);
|
||||
if (cpu >= 0) {
|
||||
if (!leaf) {
|
||||
pr_err("%s: Core has both threads and CPU\n",
|
||||
core->full_name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cpu_topology[cpu].cluster_id = cluster_id;
|
||||
cpu_topology[cpu].core_id = core_id;
|
||||
} else if (leaf) {
|
||||
pr_err("%s: Can't get CPU for leaf core\n", core->full_name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init parse_cluster(struct device_node *cluster, int depth)
|
||||
{
|
||||
char name[10];
|
||||
bool leaf = true;
|
||||
bool has_cores = false;
|
||||
struct device_node *c;
|
||||
static int cluster_id __initdata;
|
||||
int core_id = 0;
|
||||
int i, ret;
|
||||
|
||||
/*
|
||||
* First check for child clusters; we currently ignore any
|
||||
* information about the nesting of clusters and present the
|
||||
* scheduler with a flat list of them.
|
||||
*/
|
||||
i = 0;
|
||||
do {
|
||||
snprintf(name, sizeof(name), "cluster%d", i);
|
||||
c = of_get_child_by_name(cluster, name);
|
||||
if (c) {
|
||||
leaf = false;
|
||||
ret = parse_cluster(c, depth + 1);
|
||||
of_node_put(c);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
}
|
||||
i++;
|
||||
} while (c);
|
||||
|
||||
/* Now check for cores */
|
||||
i = 0;
|
||||
do {
|
||||
snprintf(name, sizeof(name), "core%d", i);
|
||||
c = of_get_child_by_name(cluster, name);
|
||||
if (c) {
|
||||
has_cores = true;
|
||||
|
||||
if (depth == 0) {
|
||||
pr_err("%s: cpu-map children should be clusters\n",
|
||||
c->full_name);
|
||||
of_node_put(c);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (leaf) {
|
||||
ret = parse_core(c, cluster_id, core_id++);
|
||||
} else {
|
||||
pr_err("%s: Non-leaf cluster with core %s\n",
|
||||
cluster->full_name, name);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
of_node_put(c);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
}
|
||||
i++;
|
||||
} while (c);
|
||||
|
||||
if (leaf && !has_cores)
|
||||
pr_warn("%s: empty cluster\n", cluster->full_name);
|
||||
|
||||
if (leaf)
|
||||
cluster_id++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init parse_dt_topology(void)
|
||||
{
|
||||
struct device_node *cn, *map;
|
||||
int ret = 0;
|
||||
int cpu;
|
||||
|
||||
cn = of_find_node_by_path("/cpus");
|
||||
if (!cn) {
|
||||
pr_err("No CPU information found in DT\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* When topology is provided cpu-map is essentially a root
|
||||
* cluster with restricted subnodes.
|
||||
*/
|
||||
map = of_get_child_by_name(cn, "cpu-map");
|
||||
if (!map)
|
||||
goto out;
|
||||
|
||||
ret = parse_cluster(map, 0);
|
||||
if (ret != 0)
|
||||
goto out_map;
|
||||
|
||||
/*
|
||||
* Check that all cores are in the topology; the SMP code will
|
||||
* only mark cores described in the DT as possible.
|
||||
*/
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (cpu_topology[cpu].cluster_id == -1) {
|
||||
pr_err("CPU%d: No topology information specified\n",
|
||||
cpu);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
out_map:
|
||||
of_node_put(map);
|
||||
out:
|
||||
of_node_put(cn);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* cpu topology table
|
||||
*/
|
||||
@ -39,13 +221,9 @@ static void update_siblings_masks(unsigned int cpuid)
|
||||
|
||||
if (cpuid_topo->cluster_id == -1) {
|
||||
/*
|
||||
* DT does not contain topology information for this cpu
|
||||
* reset it to default behaviour
|
||||
* DT does not contain topology information for this cpu.
|
||||
*/
|
||||
pr_debug("CPU%u: No topology information configured\n", cpuid);
|
||||
cpuid_topo->core_id = 0;
|
||||
cpumask_set_cpu(cpuid, &cpuid_topo->core_sibling);
|
||||
cpumask_set_cpu(cpuid, &cpuid_topo->thread_sibling);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -74,22 +252,32 @@ void store_cpu_topology(unsigned int cpuid)
|
||||
update_siblings_masks(cpuid);
|
||||
}
|
||||
|
||||
/*
|
||||
* init_cpu_topology is called at boot when only one cpu is running
|
||||
* which prevent simultaneous write access to cpu_topology array
|
||||
*/
|
||||
void __init init_cpu_topology(void)
|
||||
static void __init reset_cpu_topology(void)
|
||||
{
|
||||
unsigned int cpu;
|
||||
|
||||
/* init core mask and power*/
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct cpu_topology *cpu_topo = &cpu_topology[cpu];
|
||||
|
||||
cpu_topo->thread_id = -1;
|
||||
cpu_topo->core_id = -1;
|
||||
cpu_topo->core_id = 0;
|
||||
cpu_topo->cluster_id = -1;
|
||||
|
||||
cpumask_clear(&cpu_topo->core_sibling);
|
||||
cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
|
||||
cpumask_clear(&cpu_topo->thread_sibling);
|
||||
cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
|
||||
}
|
||||
}
|
||||
|
||||
void __init init_cpu_topology(void)
|
||||
{
|
||||
reset_cpu_topology();
|
||||
|
||||
/*
|
||||
* Discard anything that was parsed if we hit an error so we
|
||||
* don't use partial information.
|
||||
*/
|
||||
if (parse_dt_topology())
|
||||
reset_cpu_topology();
|
||||
}
|
||||
|
@ -251,10 +251,13 @@ void die(const char *str, struct pt_regs *regs, int err)
|
||||
void arm64_notify_die(const char *str, struct pt_regs *regs,
|
||||
struct siginfo *info, int err)
|
||||
{
|
||||
if (user_mode(regs))
|
||||
if (user_mode(regs)) {
|
||||
current->thread.fault_address = 0;
|
||||
current->thread.fault_code = err;
|
||||
force_sig_info(info->si_signo, info, current);
|
||||
else
|
||||
} else {
|
||||
die(str, regs, err);
|
||||
}
|
||||
}
|
||||
|
||||
asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
|
||||
|
@ -13,7 +13,7 @@
|
||||
#define ARM_EXIT_DISCARD(x) x
|
||||
|
||||
OUTPUT_ARCH(aarch64)
|
||||
ENTRY(stext)
|
||||
ENTRY(_text)
|
||||
|
||||
jiffies = jiffies_64;
|
||||
|
||||
|
@ -630,9 +630,15 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
|
||||
* whole of Stage-1. Weep...
|
||||
*/
|
||||
tlbi ipas2e1is, x1
|
||||
dsb sy
|
||||
/*
|
||||
* We have to ensure completion of the invalidation at Stage-2,
|
||||
* since a table walk on another CPU could refill a TLB with a
|
||||
* complete (S1 + S2) walk based on the old Stage-2 mapping if
|
||||
* the Stage-1 invalidation happened first.
|
||||
*/
|
||||
dsb ish
|
||||
tlbi vmalle1is
|
||||
dsb sy
|
||||
dsb ish
|
||||
isb
|
||||
|
||||
msr vttbr_el2, xzr
|
||||
@ -643,7 +649,7 @@ ENTRY(__kvm_flush_vm_context)
|
||||
dsb ishst
|
||||
tlbi alle1is
|
||||
ic ialluis
|
||||
dsb sy
|
||||
dsb ish
|
||||
ret
|
||||
ENDPROC(__kvm_flush_vm_context)
|
||||
|
||||
|
@ -71,13 +71,13 @@ static u32 get_ccsidr(u32 csselr)
|
||||
static void do_dc_cisw(u32 val)
|
||||
{
|
||||
asm volatile("dc cisw, %x0" : : "r" (val));
|
||||
dsb();
|
||||
dsb(ish);
|
||||
}
|
||||
|
||||
static void do_dc_csw(u32 val)
|
||||
{
|
||||
asm volatile("dc csw, %x0" : : "r" (val));
|
||||
dsb();
|
||||
dsb(ish);
|
||||
}
|
||||
|
||||
/* See note at ARM ARM B1.14.4 */
|
||||
|
@ -1,4 +1,5 @@
|
||||
lib-y := bitops.o clear_user.o delay.o copy_from_user.o \
|
||||
copy_to_user.o copy_in_user.o copy_page.o \
|
||||
clear_page.o memchr.o memcpy.o memmove.o memset.o \
|
||||
memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
|
||||
strchr.o strrchr.o
|
||||
|
258
arch/arm64/lib/memcmp.S
Normal file
258
arch/arm64/lib/memcmp.S
Normal file
@ -0,0 +1,258 @@
|
||||
/*
|
||||
* Copyright (C) 2013 ARM Ltd.
|
||||
* Copyright (C) 2013 Linaro.
|
||||
*
|
||||
* This code is based on glibc cortex strings work originally authored by Linaro
|
||||
* and re-licensed under GPLv2 for the Linux kernel. The original code can
|
||||
* be found @
|
||||
*
|
||||
* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
|
||||
* files/head:/src/aarch64/
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
/*
|
||||
* compare memory areas(when two memory areas' offset are different,
|
||||
* alignment handled by the hardware)
|
||||
*
|
||||
* Parameters:
|
||||
* x0 - const memory area 1 pointer
|
||||
* x1 - const memory area 2 pointer
|
||||
* x2 - the maximal compare byte length
|
||||
* Returns:
|
||||
* x0 - a compare result, maybe less than, equal to, or greater than ZERO
|
||||
*/
|
||||
|
||||
/* Parameters and result. */
|
||||
src1 .req x0
|
||||
src2 .req x1
|
||||
limit .req x2
|
||||
result .req x0
|
||||
|
||||
/* Internal variables. */
|
||||
data1 .req x3
|
||||
data1w .req w3
|
||||
data2 .req x4
|
||||
data2w .req w4
|
||||
has_nul .req x5
|
||||
diff .req x6
|
||||
endloop .req x7
|
||||
tmp1 .req x8
|
||||
tmp2 .req x9
|
||||
tmp3 .req x10
|
||||
pos .req x11
|
||||
limit_wd .req x12
|
||||
mask .req x13
|
||||
|
||||
ENTRY(memcmp)
|
||||
cbz limit, .Lret0
|
||||
eor tmp1, src1, src2
|
||||
tst tmp1, #7
|
||||
b.ne .Lmisaligned8
|
||||
ands tmp1, src1, #7
|
||||
b.ne .Lmutual_align
|
||||
sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
|
||||
lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
|
||||
/*
|
||||
* The input source addresses are at alignment boundary.
|
||||
* Directly compare eight bytes each time.
|
||||
*/
|
||||
.Lloop_aligned:
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
.Lstart_realigned:
|
||||
subs limit_wd, limit_wd, #1
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
csinv endloop, diff, xzr, cs /* Last Dword or differences. */
|
||||
cbz endloop, .Lloop_aligned
|
||||
|
||||
/* Not reached the limit, must have found a diff. */
|
||||
tbz limit_wd, #63, .Lnot_limit
|
||||
|
||||
/* Limit % 8 == 0 => the diff is in the last 8 bytes. */
|
||||
ands limit, limit, #7
|
||||
b.eq .Lnot_limit
|
||||
/*
|
||||
* The remained bytes less than 8. It is needed to extract valid data
|
||||
* from last eight bytes of the intended memory range.
|
||||
*/
|
||||
lsl limit, limit, #3 /* bytes-> bits. */
|
||||
mov mask, #~0
|
||||
CPU_BE( lsr mask, mask, limit )
|
||||
CPU_LE( lsl mask, mask, limit )
|
||||
bic data1, data1, mask
|
||||
bic data2, data2, mask
|
||||
|
||||
orr diff, diff, mask
|
||||
b .Lnot_limit
|
||||
|
||||
.Lmutual_align:
|
||||
/*
|
||||
* Sources are mutually aligned, but are not currently at an
|
||||
* alignment boundary. Round down the addresses and then mask off
|
||||
* the bytes that precede the start point.
|
||||
*/
|
||||
bic src1, src1, #7
|
||||
bic src2, src2, #7
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
/*
|
||||
* We can not add limit with alignment offset(tmp1) here. Since the
|
||||
* addition probably make the limit overflown.
|
||||
*/
|
||||
sub limit_wd, limit, #1/*limit != 0, so no underflow.*/
|
||||
and tmp3, limit_wd, #7
|
||||
lsr limit_wd, limit_wd, #3
|
||||
add tmp3, tmp3, tmp1
|
||||
add limit_wd, limit_wd, tmp3, lsr #3
|
||||
add limit, limit, tmp1/* Adjust the limit for the extra. */
|
||||
|
||||
lsl tmp1, tmp1, #3/* Bytes beyond alignment -> bits.*/
|
||||
neg tmp1, tmp1/* Bits to alignment -64. */
|
||||
mov tmp2, #~0
|
||||
/*mask off the non-intended bytes before the start address.*/
|
||||
CPU_BE( lsl tmp2, tmp2, tmp1 )/*Big-endian.Early bytes are at MSB*/
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
CPU_LE( lsr tmp2, tmp2, tmp1 )
|
||||
|
||||
orr data1, data1, tmp2
|
||||
orr data2, data2, tmp2
|
||||
b .Lstart_realigned
|
||||
|
||||
/*src1 and src2 have different alignment offset.*/
|
||||
.Lmisaligned8:
|
||||
cmp limit, #8
|
||||
b.lo .Ltiny8proc /*limit < 8: compare byte by byte*/
|
||||
|
||||
and tmp1, src1, #7
|
||||
neg tmp1, tmp1
|
||||
add tmp1, tmp1, #8/*valid length in the first 8 bytes of src1*/
|
||||
and tmp2, src2, #7
|
||||
neg tmp2, tmp2
|
||||
add tmp2, tmp2, #8/*valid length in the first 8 bytes of src2*/
|
||||
subs tmp3, tmp1, tmp2
|
||||
csel pos, tmp1, tmp2, hi /*Choose the maximum.*/
|
||||
|
||||
sub limit, limit, pos
|
||||
/*compare the proceeding bytes in the first 8 byte segment.*/
|
||||
.Ltinycmp:
|
||||
ldrb data1w, [src1], #1
|
||||
ldrb data2w, [src2], #1
|
||||
subs pos, pos, #1
|
||||
ccmp data1w, data2w, #0, ne /* NZCV = 0b0000. */
|
||||
b.eq .Ltinycmp
|
||||
cbnz pos, 1f /*diff occurred before the last byte.*/
|
||||
cmp data1w, data2w
|
||||
b.eq .Lstart_align
|
||||
1:
|
||||
sub result, data1, data2
|
||||
ret
|
||||
|
||||
.Lstart_align:
|
||||
lsr limit_wd, limit, #3
|
||||
cbz limit_wd, .Lremain8
|
||||
|
||||
ands xzr, src1, #7
|
||||
b.eq .Lrecal_offset
|
||||
/*process more leading bytes to make src1 aligned...*/
|
||||
add src1, src1, tmp3 /*backwards src1 to alignment boundary*/
|
||||
add src2, src2, tmp3
|
||||
sub limit, limit, tmp3
|
||||
lsr limit_wd, limit, #3
|
||||
cbz limit_wd, .Lremain8
|
||||
/*load 8 bytes from aligned SRC1..*/
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
|
||||
subs limit_wd, limit_wd, #1
|
||||
eor diff, data1, data2 /*Non-zero if differences found.*/
|
||||
csinv endloop, diff, xzr, ne
|
||||
cbnz endloop, .Lunequal_proc
|
||||
/*How far is the current SRC2 from the alignment boundary...*/
|
||||
and tmp3, tmp3, #7
|
||||
|
||||
.Lrecal_offset:/*src1 is aligned now..*/
|
||||
neg pos, tmp3
|
||||
.Lloopcmp_proc:
|
||||
/*
|
||||
* Divide the eight bytes into two parts. First,backwards the src2
|
||||
* to an alignment boundary,load eight bytes and compare from
|
||||
* the SRC2 alignment boundary. If all 8 bytes are equal,then start
|
||||
* the second part's comparison. Otherwise finish the comparison.
|
||||
* This special handle can garantee all the accesses are in the
|
||||
* thread/task space in avoid to overrange access.
|
||||
*/
|
||||
ldr data1, [src1,pos]
|
||||
ldr data2, [src2,pos]
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
cbnz diff, .Lnot_limit
|
||||
|
||||
/*The second part process*/
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
subs limit_wd, limit_wd, #1
|
||||
csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
|
||||
cbz endloop, .Lloopcmp_proc
|
||||
.Lunequal_proc:
|
||||
cbz diff, .Lremain8
|
||||
|
||||
/*There is differnence occured in the latest comparison.*/
|
||||
.Lnot_limit:
|
||||
/*
|
||||
* For little endian,reverse the low significant equal bits into MSB,then
|
||||
* following CLZ can find how many equal bits exist.
|
||||
*/
|
||||
CPU_LE( rev diff, diff )
|
||||
CPU_LE( rev data1, data1 )
|
||||
CPU_LE( rev data2, data2 )
|
||||
|
||||
/*
|
||||
* The MS-non-zero bit of DIFF marks either the first bit
|
||||
* that is different, or the end of the significant data.
|
||||
* Shifting left now will bring the critical information into the
|
||||
* top bits.
|
||||
*/
|
||||
clz pos, diff
|
||||
lsl data1, data1, pos
|
||||
lsl data2, data2, pos
|
||||
/*
|
||||
* We need to zero-extend (char is unsigned) the value and then
|
||||
* perform a signed subtraction.
|
||||
*/
|
||||
lsr data1, data1, #56
|
||||
sub result, data1, data2, lsr #56
|
||||
ret
|
||||
|
||||
.Lremain8:
|
||||
/* Limit % 8 == 0 =>. all data are equal.*/
|
||||
ands limit, limit, #7
|
||||
b.eq .Lret0
|
||||
|
||||
.Ltiny8proc:
|
||||
ldrb data1w, [src1], #1
|
||||
ldrb data2w, [src2], #1
|
||||
subs limit, limit, #1
|
||||
|
||||
ccmp data1w, data2w, #0, ne /* NZCV = 0b0000. */
|
||||
b.eq .Ltiny8proc
|
||||
sub result, data1, data2
|
||||
ret
|
||||
.Lret0:
|
||||
mov result, #0
|
||||
ret
|
||||
ENDPROC(memcmp)
|
@ -1,5 +1,13 @@
|
||||
/*
|
||||
* Copyright (C) 2013 ARM Ltd.
|
||||
* Copyright (C) 2013 Linaro.
|
||||
*
|
||||
* This code is based on glibc cortex strings work originally authored by Linaro
|
||||
* and re-licensed under GPLv2 for the Linux kernel. The original code can
|
||||
* be found @
|
||||
*
|
||||
* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
|
||||
* files/head:/src/aarch64/
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
@ -16,6 +24,7 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/cache.h>
|
||||
|
||||
/*
|
||||
* Copy a buffer from src to dest (alignment handled by the hardware)
|
||||
@ -27,27 +36,166 @@
|
||||
* Returns:
|
||||
* x0 - dest
|
||||
*/
|
||||
dstin .req x0
|
||||
src .req x1
|
||||
count .req x2
|
||||
tmp1 .req x3
|
||||
tmp1w .req w3
|
||||
tmp2 .req x4
|
||||
tmp2w .req w4
|
||||
tmp3 .req x5
|
||||
tmp3w .req w5
|
||||
dst .req x6
|
||||
|
||||
A_l .req x7
|
||||
A_h .req x8
|
||||
B_l .req x9
|
||||
B_h .req x10
|
||||
C_l .req x11
|
||||
C_h .req x12
|
||||
D_l .req x13
|
||||
D_h .req x14
|
||||
|
||||
ENTRY(memcpy)
|
||||
mov x4, x0
|
||||
subs x2, x2, #8
|
||||
b.mi 2f
|
||||
1: ldr x3, [x1], #8
|
||||
subs x2, x2, #8
|
||||
str x3, [x4], #8
|
||||
b.pl 1b
|
||||
2: adds x2, x2, #4
|
||||
b.mi 3f
|
||||
ldr w3, [x1], #4
|
||||
sub x2, x2, #4
|
||||
str w3, [x4], #4
|
||||
3: adds x2, x2, #2
|
||||
b.mi 4f
|
||||
ldrh w3, [x1], #2
|
||||
sub x2, x2, #2
|
||||
strh w3, [x4], #2
|
||||
4: adds x2, x2, #1
|
||||
b.mi 5f
|
||||
ldrb w3, [x1]
|
||||
strb w3, [x4]
|
||||
5: ret
|
||||
mov dst, dstin
|
||||
cmp count, #16
|
||||
/*When memory length is less than 16, the accessed are not aligned.*/
|
||||
b.lo .Ltiny15
|
||||
|
||||
neg tmp2, src
|
||||
ands tmp2, tmp2, #15/* Bytes to reach alignment. */
|
||||
b.eq .LSrcAligned
|
||||
sub count, count, tmp2
|
||||
/*
|
||||
* Copy the leading memory data from src to dst in an increasing
|
||||
* address order.By this way,the risk of overwritting the source
|
||||
* memory data is eliminated when the distance between src and
|
||||
* dst is less than 16. The memory accesses here are alignment.
|
||||
*/
|
||||
tbz tmp2, #0, 1f
|
||||
ldrb tmp1w, [src], #1
|
||||
strb tmp1w, [dst], #1
|
||||
1:
|
||||
tbz tmp2, #1, 2f
|
||||
ldrh tmp1w, [src], #2
|
||||
strh tmp1w, [dst], #2
|
||||
2:
|
||||
tbz tmp2, #2, 3f
|
||||
ldr tmp1w, [src], #4
|
||||
str tmp1w, [dst], #4
|
||||
3:
|
||||
tbz tmp2, #3, .LSrcAligned
|
||||
ldr tmp1, [src],#8
|
||||
str tmp1, [dst],#8
|
||||
|
||||
.LSrcAligned:
|
||||
cmp count, #64
|
||||
b.ge .Lcpy_over64
|
||||
/*
|
||||
* Deal with small copies quickly by dropping straight into the
|
||||
* exit block.
|
||||
*/
|
||||
.Ltail63:
|
||||
/*
|
||||
* Copy up to 48 bytes of data. At this point we only need the
|
||||
* bottom 6 bits of count to be accurate.
|
||||
*/
|
||||
ands tmp1, count, #0x30
|
||||
b.eq .Ltiny15
|
||||
cmp tmp1w, #0x20
|
||||
b.eq 1f
|
||||
b.lt 2f
|
||||
ldp A_l, A_h, [src], #16
|
||||
stp A_l, A_h, [dst], #16
|
||||
1:
|
||||
ldp A_l, A_h, [src], #16
|
||||
stp A_l, A_h, [dst], #16
|
||||
2:
|
||||
ldp A_l, A_h, [src], #16
|
||||
stp A_l, A_h, [dst], #16
|
||||
.Ltiny15:
|
||||
/*
|
||||
* Prefer to break one ldp/stp into several load/store to access
|
||||
* memory in an increasing address order,rather than to load/store 16
|
||||
* bytes from (src-16) to (dst-16) and to backward the src to aligned
|
||||
* address,which way is used in original cortex memcpy. If keeping
|
||||
* the original memcpy process here, memmove need to satisfy the
|
||||
* precondition that src address is at least 16 bytes bigger than dst
|
||||
* address,otherwise some source data will be overwritten when memove
|
||||
* call memcpy directly. To make memmove simpler and decouple the
|
||||
* memcpy's dependency on memmove, withdrew the original process.
|
||||
*/
|
||||
tbz count, #3, 1f
|
||||
ldr tmp1, [src], #8
|
||||
str tmp1, [dst], #8
|
||||
1:
|
||||
tbz count, #2, 2f
|
||||
ldr tmp1w, [src], #4
|
||||
str tmp1w, [dst], #4
|
||||
2:
|
||||
tbz count, #1, 3f
|
||||
ldrh tmp1w, [src], #2
|
||||
strh tmp1w, [dst], #2
|
||||
3:
|
||||
tbz count, #0, .Lexitfunc
|
||||
ldrb tmp1w, [src]
|
||||
strb tmp1w, [dst]
|
||||
|
||||
.Lexitfunc:
|
||||
ret
|
||||
|
||||
.Lcpy_over64:
|
||||
subs count, count, #128
|
||||
b.ge .Lcpy_body_large
|
||||
/*
|
||||
* Less than 128 bytes to copy, so handle 64 here and then jump
|
||||
* to the tail.
|
||||
*/
|
||||
ldp A_l, A_h, [src],#16
|
||||
stp A_l, A_h, [dst],#16
|
||||
ldp B_l, B_h, [src],#16
|
||||
ldp C_l, C_h, [src],#16
|
||||
stp B_l, B_h, [dst],#16
|
||||
stp C_l, C_h, [dst],#16
|
||||
ldp D_l, D_h, [src],#16
|
||||
stp D_l, D_h, [dst],#16
|
||||
|
||||
tst count, #0x3f
|
||||
b.ne .Ltail63
|
||||
ret
|
||||
|
||||
/*
|
||||
* Critical loop. Start at a new cache line boundary. Assuming
|
||||
* 64 bytes per line this ensures the entire loop is in one line.
|
||||
*/
|
||||
.p2align L1_CACHE_SHIFT
|
||||
.Lcpy_body_large:
|
||||
/* pre-get 64 bytes data. */
|
||||
ldp A_l, A_h, [src],#16
|
||||
ldp B_l, B_h, [src],#16
|
||||
ldp C_l, C_h, [src],#16
|
||||
ldp D_l, D_h, [src],#16
|
||||
1:
|
||||
/*
|
||||
* interlace the load of next 64 bytes data block with store of the last
|
||||
* loaded 64 bytes data.
|
||||
*/
|
||||
stp A_l, A_h, [dst],#16
|
||||
ldp A_l, A_h, [src],#16
|
||||
stp B_l, B_h, [dst],#16
|
||||
ldp B_l, B_h, [src],#16
|
||||
stp C_l, C_h, [dst],#16
|
||||
ldp C_l, C_h, [src],#16
|
||||
stp D_l, D_h, [dst],#16
|
||||
ldp D_l, D_h, [src],#16
|
||||
subs count, count, #64
|
||||
b.ge 1b
|
||||
stp A_l, A_h, [dst],#16
|
||||
stp B_l, B_h, [dst],#16
|
||||
stp C_l, C_h, [dst],#16
|
||||
stp D_l, D_h, [dst],#16
|
||||
|
||||
tst count, #0x3f
|
||||
b.ne .Ltail63
|
||||
ret
|
||||
ENDPROC(memcpy)
|
||||
|
@ -1,5 +1,13 @@
|
||||
/*
|
||||
* Copyright (C) 2013 ARM Ltd.
|
||||
* Copyright (C) 2013 Linaro.
|
||||
*
|
||||
* This code is based on glibc cortex strings work originally authored by Linaro
|
||||
* and re-licensed under GPLv2 for the Linux kernel. The original code can
|
||||
* be found @
|
||||
*
|
||||
* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
|
||||
* files/head:/src/aarch64/
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
@ -16,6 +24,7 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/cache.h>
|
||||
|
||||
/*
|
||||
* Move a buffer from src to test (alignment handled by the hardware).
|
||||
@ -28,30 +37,161 @@
|
||||
* Returns:
|
||||
* x0 - dest
|
||||
*/
|
||||
dstin .req x0
|
||||
src .req x1
|
||||
count .req x2
|
||||
tmp1 .req x3
|
||||
tmp1w .req w3
|
||||
tmp2 .req x4
|
||||
tmp2w .req w4
|
||||
tmp3 .req x5
|
||||
tmp3w .req w5
|
||||
dst .req x6
|
||||
|
||||
A_l .req x7
|
||||
A_h .req x8
|
||||
B_l .req x9
|
||||
B_h .req x10
|
||||
C_l .req x11
|
||||
C_h .req x12
|
||||
D_l .req x13
|
||||
D_h .req x14
|
||||
|
||||
ENTRY(memmove)
|
||||
cmp x0, x1
|
||||
b.ls memcpy
|
||||
add x4, x0, x2
|
||||
add x1, x1, x2
|
||||
subs x2, x2, #8
|
||||
b.mi 2f
|
||||
1: ldr x3, [x1, #-8]!
|
||||
subs x2, x2, #8
|
||||
str x3, [x4, #-8]!
|
||||
b.pl 1b
|
||||
2: adds x2, x2, #4
|
||||
b.mi 3f
|
||||
ldr w3, [x1, #-4]!
|
||||
sub x2, x2, #4
|
||||
str w3, [x4, #-4]!
|
||||
3: adds x2, x2, #2
|
||||
b.mi 4f
|
||||
ldrh w3, [x1, #-2]!
|
||||
sub x2, x2, #2
|
||||
strh w3, [x4, #-2]!
|
||||
4: adds x2, x2, #1
|
||||
b.mi 5f
|
||||
ldrb w3, [x1, #-1]
|
||||
strb w3, [x4, #-1]
|
||||
5: ret
|
||||
cmp dstin, src
|
||||
b.lo memcpy
|
||||
add tmp1, src, count
|
||||
cmp dstin, tmp1
|
||||
b.hs memcpy /* No overlap. */
|
||||
|
||||
add dst, dstin, count
|
||||
add src, src, count
|
||||
cmp count, #16
|
||||
b.lo .Ltail15 /*probably non-alignment accesses.*/
|
||||
|
||||
ands tmp2, src, #15 /* Bytes to reach alignment. */
|
||||
b.eq .LSrcAligned
|
||||
sub count, count, tmp2
|
||||
/*
|
||||
* process the aligned offset length to make the src aligned firstly.
|
||||
* those extra instructions' cost is acceptable. It also make the
|
||||
* coming accesses are based on aligned address.
|
||||
*/
|
||||
tbz tmp2, #0, 1f
|
||||
ldrb tmp1w, [src, #-1]!
|
||||
strb tmp1w, [dst, #-1]!
|
||||
1:
|
||||
tbz tmp2, #1, 2f
|
||||
ldrh tmp1w, [src, #-2]!
|
||||
strh tmp1w, [dst, #-2]!
|
||||
2:
|
||||
tbz tmp2, #2, 3f
|
||||
ldr tmp1w, [src, #-4]!
|
||||
str tmp1w, [dst, #-4]!
|
||||
3:
|
||||
tbz tmp2, #3, .LSrcAligned
|
||||
ldr tmp1, [src, #-8]!
|
||||
str tmp1, [dst, #-8]!
|
||||
|
||||
.LSrcAligned:
|
||||
cmp count, #64
|
||||
b.ge .Lcpy_over64
|
||||
|
||||
/*
|
||||
* Deal with small copies quickly by dropping straight into the
|
||||
* exit block.
|
||||
*/
|
||||
.Ltail63:
|
||||
/*
|
||||
* Copy up to 48 bytes of data. At this point we only need the
|
||||
* bottom 6 bits of count to be accurate.
|
||||
*/
|
||||
ands tmp1, count, #0x30
|
||||
b.eq .Ltail15
|
||||
cmp tmp1w, #0x20
|
||||
b.eq 1f
|
||||
b.lt 2f
|
||||
ldp A_l, A_h, [src, #-16]!
|
||||
stp A_l, A_h, [dst, #-16]!
|
||||
1:
|
||||
ldp A_l, A_h, [src, #-16]!
|
||||
stp A_l, A_h, [dst, #-16]!
|
||||
2:
|
||||
ldp A_l, A_h, [src, #-16]!
|
||||
stp A_l, A_h, [dst, #-16]!
|
||||
|
||||
.Ltail15:
|
||||
tbz count, #3, 1f
|
||||
ldr tmp1, [src, #-8]!
|
||||
str tmp1, [dst, #-8]!
|
||||
1:
|
||||
tbz count, #2, 2f
|
||||
ldr tmp1w, [src, #-4]!
|
||||
str tmp1w, [dst, #-4]!
|
||||
2:
|
||||
tbz count, #1, 3f
|
||||
ldrh tmp1w, [src, #-2]!
|
||||
strh tmp1w, [dst, #-2]!
|
||||
3:
|
||||
tbz count, #0, .Lexitfunc
|
||||
ldrb tmp1w, [src, #-1]
|
||||
strb tmp1w, [dst, #-1]
|
||||
|
||||
.Lexitfunc:
|
||||
ret
|
||||
|
||||
.Lcpy_over64:
|
||||
subs count, count, #128
|
||||
b.ge .Lcpy_body_large
|
||||
/*
|
||||
* Less than 128 bytes to copy, so handle 64 bytes here and then jump
|
||||
* to the tail.
|
||||
*/
|
||||
ldp A_l, A_h, [src, #-16]
|
||||
stp A_l, A_h, [dst, #-16]
|
||||
ldp B_l, B_h, [src, #-32]
|
||||
ldp C_l, C_h, [src, #-48]
|
||||
stp B_l, B_h, [dst, #-32]
|
||||
stp C_l, C_h, [dst, #-48]
|
||||
ldp D_l, D_h, [src, #-64]!
|
||||
stp D_l, D_h, [dst, #-64]!
|
||||
|
||||
tst count, #0x3f
|
||||
b.ne .Ltail63
|
||||
ret
|
||||
|
||||
/*
|
||||
* Critical loop. Start at a new cache line boundary. Assuming
|
||||
* 64 bytes per line this ensures the entire loop is in one line.
|
||||
*/
|
||||
.p2align L1_CACHE_SHIFT
|
||||
.Lcpy_body_large:
|
||||
/* pre-load 64 bytes data. */
|
||||
ldp A_l, A_h, [src, #-16]
|
||||
ldp B_l, B_h, [src, #-32]
|
||||
ldp C_l, C_h, [src, #-48]
|
||||
ldp D_l, D_h, [src, #-64]!
|
||||
1:
|
||||
/*
|
||||
* interlace the load of next 64 bytes data block with store of the last
|
||||
* loaded 64 bytes data.
|
||||
*/
|
||||
stp A_l, A_h, [dst, #-16]
|
||||
ldp A_l, A_h, [src, #-16]
|
||||
stp B_l, B_h, [dst, #-32]
|
||||
ldp B_l, B_h, [src, #-32]
|
||||
stp C_l, C_h, [dst, #-48]
|
||||
ldp C_l, C_h, [src, #-48]
|
||||
stp D_l, D_h, [dst, #-64]!
|
||||
ldp D_l, D_h, [src, #-64]!
|
||||
subs count, count, #64
|
||||
b.ge 1b
|
||||
stp A_l, A_h, [dst, #-16]
|
||||
stp B_l, B_h, [dst, #-32]
|
||||
stp C_l, C_h, [dst, #-48]
|
||||
stp D_l, D_h, [dst, #-64]!
|
||||
|
||||
tst count, #0x3f
|
||||
b.ne .Ltail63
|
||||
ret
|
||||
ENDPROC(memmove)
|
||||
|
@ -1,5 +1,13 @@
|
||||
/*
|
||||
* Copyright (C) 2013 ARM Ltd.
|
||||
* Copyright (C) 2013 Linaro.
|
||||
*
|
||||
* This code is based on glibc cortex strings work originally authored by Linaro
|
||||
* and re-licensed under GPLv2 for the Linux kernel. The original code can
|
||||
* be found @
|
||||
*
|
||||
* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
|
||||
* files/head:/src/aarch64/
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
@ -16,6 +24,7 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/cache.h>
|
||||
|
||||
/*
|
||||
* Fill in the buffer with character c (alignment handled by the hardware)
|
||||
@ -27,27 +36,181 @@
|
||||
* Returns:
|
||||
* x0 - buf
|
||||
*/
|
||||
|
||||
dstin .req x0
|
||||
val .req w1
|
||||
count .req x2
|
||||
tmp1 .req x3
|
||||
tmp1w .req w3
|
||||
tmp2 .req x4
|
||||
tmp2w .req w4
|
||||
zva_len_x .req x5
|
||||
zva_len .req w5
|
||||
zva_bits_x .req x6
|
||||
|
||||
A_l .req x7
|
||||
A_lw .req w7
|
||||
dst .req x8
|
||||
tmp3w .req w9
|
||||
tmp3 .req x9
|
||||
|
||||
ENTRY(memset)
|
||||
mov x4, x0
|
||||
and w1, w1, #0xff
|
||||
orr w1, w1, w1, lsl #8
|
||||
orr w1, w1, w1, lsl #16
|
||||
orr x1, x1, x1, lsl #32
|
||||
subs x2, x2, #8
|
||||
b.mi 2f
|
||||
1: str x1, [x4], #8
|
||||
subs x2, x2, #8
|
||||
b.pl 1b
|
||||
2: adds x2, x2, #4
|
||||
b.mi 3f
|
||||
sub x2, x2, #4
|
||||
str w1, [x4], #4
|
||||
3: adds x2, x2, #2
|
||||
b.mi 4f
|
||||
sub x2, x2, #2
|
||||
strh w1, [x4], #2
|
||||
4: adds x2, x2, #1
|
||||
b.mi 5f
|
||||
strb w1, [x4]
|
||||
5: ret
|
||||
mov dst, dstin /* Preserve return value. */
|
||||
and A_lw, val, #255
|
||||
orr A_lw, A_lw, A_lw, lsl #8
|
||||
orr A_lw, A_lw, A_lw, lsl #16
|
||||
orr A_l, A_l, A_l, lsl #32
|
||||
|
||||
cmp count, #15
|
||||
b.hi .Lover16_proc
|
||||
/*All store maybe are non-aligned..*/
|
||||
tbz count, #3, 1f
|
||||
str A_l, [dst], #8
|
||||
1:
|
||||
tbz count, #2, 2f
|
||||
str A_lw, [dst], #4
|
||||
2:
|
||||
tbz count, #1, 3f
|
||||
strh A_lw, [dst], #2
|
||||
3:
|
||||
tbz count, #0, 4f
|
||||
strb A_lw, [dst]
|
||||
4:
|
||||
ret
|
||||
|
||||
.Lover16_proc:
|
||||
/*Whether the start address is aligned with 16.*/
|
||||
neg tmp2, dst
|
||||
ands tmp2, tmp2, #15
|
||||
b.eq .Laligned
|
||||
/*
|
||||
* The count is not less than 16, we can use stp to store the start 16 bytes,
|
||||
* then adjust the dst aligned with 16.This process will make the current
|
||||
* memory address at alignment boundary.
|
||||
*/
|
||||
stp A_l, A_l, [dst] /*non-aligned store..*/
|
||||
/*make the dst aligned..*/
|
||||
sub count, count, tmp2
|
||||
add dst, dst, tmp2
|
||||
|
||||
.Laligned:
|
||||
cbz A_l, .Lzero_mem
|
||||
|
||||
.Ltail_maybe_long:
|
||||
cmp count, #64
|
||||
b.ge .Lnot_short
|
||||
.Ltail63:
|
||||
ands tmp1, count, #0x30
|
||||
b.eq 3f
|
||||
cmp tmp1w, #0x20
|
||||
b.eq 1f
|
||||
b.lt 2f
|
||||
stp A_l, A_l, [dst], #16
|
||||
1:
|
||||
stp A_l, A_l, [dst], #16
|
||||
2:
|
||||
stp A_l, A_l, [dst], #16
|
||||
/*
|
||||
* The last store length is less than 16,use stp to write last 16 bytes.
|
||||
* It will lead some bytes written twice and the access is non-aligned.
|
||||
*/
|
||||
3:
|
||||
ands count, count, #15
|
||||
cbz count, 4f
|
||||
add dst, dst, count
|
||||
stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */
|
||||
4:
|
||||
ret
|
||||
|
||||
/*
|
||||
* Critical loop. Start at a new cache line boundary. Assuming
|
||||
* 64 bytes per line, this ensures the entire loop is in one line.
|
||||
*/
|
||||
.p2align L1_CACHE_SHIFT
|
||||
.Lnot_short:
|
||||
sub dst, dst, #16/* Pre-bias. */
|
||||
sub count, count, #64
|
||||
1:
|
||||
stp A_l, A_l, [dst, #16]
|
||||
stp A_l, A_l, [dst, #32]
|
||||
stp A_l, A_l, [dst, #48]
|
||||
stp A_l, A_l, [dst, #64]!
|
||||
subs count, count, #64
|
||||
b.ge 1b
|
||||
tst count, #0x3f
|
||||
add dst, dst, #16
|
||||
b.ne .Ltail63
|
||||
.Lexitfunc:
|
||||
ret
|
||||
|
||||
/*
|
||||
* For zeroing memory, check to see if we can use the ZVA feature to
|
||||
* zero entire 'cache' lines.
|
||||
*/
|
||||
.Lzero_mem:
|
||||
cmp count, #63
|
||||
b.le .Ltail63
|
||||
/*
|
||||
* For zeroing small amounts of memory, it's not worth setting up
|
||||
* the line-clear code.
|
||||
*/
|
||||
cmp count, #128
|
||||
b.lt .Lnot_short /*count is at least 128 bytes*/
|
||||
|
||||
mrs tmp1, dczid_el0
|
||||
tbnz tmp1, #4, .Lnot_short
|
||||
mov tmp3w, #4
|
||||
and zva_len, tmp1w, #15 /* Safety: other bits reserved. */
|
||||
lsl zva_len, tmp3w, zva_len
|
||||
|
||||
ands tmp3w, zva_len, #63
|
||||
/*
|
||||
* ensure the zva_len is not less than 64.
|
||||
* It is not meaningful to use ZVA if the block size is less than 64.
|
||||
*/
|
||||
b.ne .Lnot_short
|
||||
.Lzero_by_line:
|
||||
/*
|
||||
* Compute how far we need to go to become suitably aligned. We're
|
||||
* already at quad-word alignment.
|
||||
*/
|
||||
cmp count, zva_len_x
|
||||
b.lt .Lnot_short /* Not enough to reach alignment. */
|
||||
sub zva_bits_x, zva_len_x, #1
|
||||
neg tmp2, dst
|
||||
ands tmp2, tmp2, zva_bits_x
|
||||
b.eq 2f /* Already aligned. */
|
||||
/* Not aligned, check that there's enough to copy after alignment.*/
|
||||
sub tmp1, count, tmp2
|
||||
/*
|
||||
* grantee the remain length to be ZVA is bigger than 64,
|
||||
* avoid to make the 2f's process over mem range.*/
|
||||
cmp tmp1, #64
|
||||
ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */
|
||||
b.lt .Lnot_short
|
||||
/*
|
||||
* We know that there's at least 64 bytes to zero and that it's safe
|
||||
* to overrun by 64 bytes.
|
||||
*/
|
||||
mov count, tmp1
|
||||
1:
|
||||
stp A_l, A_l, [dst]
|
||||
stp A_l, A_l, [dst, #16]
|
||||
stp A_l, A_l, [dst, #32]
|
||||
subs tmp2, tmp2, #64
|
||||
stp A_l, A_l, [dst, #48]
|
||||
add dst, dst, #64
|
||||
b.ge 1b
|
||||
/* We've overrun a bit, so adjust dst downwards.*/
|
||||
add dst, dst, tmp2
|
||||
2:
|
||||
sub count, count, zva_len_x
|
||||
3:
|
||||
dc zva, dst
|
||||
add dst, dst, zva_len_x
|
||||
subs count, count, zva_len_x
|
||||
b.ge 3b
|
||||
ands count, count, zva_bits_x
|
||||
b.ne .Ltail_maybe_long
|
||||
ret
|
||||
ENDPROC(memset)
|
||||
|
234
arch/arm64/lib/strcmp.S
Normal file
234
arch/arm64/lib/strcmp.S
Normal file
@ -0,0 +1,234 @@
|
||||
/*
|
||||
* Copyright (C) 2013 ARM Ltd.
|
||||
* Copyright (C) 2013 Linaro.
|
||||
*
|
||||
* This code is based on glibc cortex strings work originally authored by Linaro
|
||||
* and re-licensed under GPLv2 for the Linux kernel. The original code can
|
||||
* be found @
|
||||
*
|
||||
* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
|
||||
* files/head:/src/aarch64/
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
/*
|
||||
* compare two strings
|
||||
*
|
||||
* Parameters:
|
||||
* x0 - const string 1 pointer
|
||||
* x1 - const string 2 pointer
|
||||
* Returns:
|
||||
* x0 - an integer less than, equal to, or greater than zero
|
||||
* if s1 is found, respectively, to be less than, to match,
|
||||
* or be greater than s2.
|
||||
*/
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
/* Parameters and result. */
|
||||
src1 .req x0
|
||||
src2 .req x1
|
||||
result .req x0
|
||||
|
||||
/* Internal variables. */
|
||||
data1 .req x2
|
||||
data1w .req w2
|
||||
data2 .req x3
|
||||
data2w .req w3
|
||||
has_nul .req x4
|
||||
diff .req x5
|
||||
syndrome .req x6
|
||||
tmp1 .req x7
|
||||
tmp2 .req x8
|
||||
tmp3 .req x9
|
||||
zeroones .req x10
|
||||
pos .req x11
|
||||
|
||||
ENTRY(strcmp)
|
||||
eor tmp1, src1, src2
|
||||
mov zeroones, #REP8_01
|
||||
tst tmp1, #7
|
||||
b.ne .Lmisaligned8
|
||||
ands tmp1, src1, #7
|
||||
b.ne .Lmutual_align
|
||||
|
||||
/*
|
||||
* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
* can be done in parallel across the entire word.
|
||||
*/
|
||||
.Lloop_aligned:
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
.Lstart_realigned:
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||
orr syndrome, diff, has_nul
|
||||
cbz syndrome, .Lloop_aligned
|
||||
b .Lcal_cmpresult
|
||||
|
||||
.Lmutual_align:
|
||||
/*
|
||||
* Sources are mutually aligned, but are not currently at an
|
||||
* alignment boundary. Round down the addresses and then mask off
|
||||
* the bytes that preceed the start point.
|
||||
*/
|
||||
bic src1, src1, #7
|
||||
bic src2, src2, #7
|
||||
lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
|
||||
ldr data1, [src1], #8
|
||||
neg tmp1, tmp1 /* Bits to alignment -64. */
|
||||
ldr data2, [src2], #8
|
||||
mov tmp2, #~0
|
||||
/* Big-endian. Early bytes are at MSB. */
|
||||
CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
|
||||
|
||||
orr data1, data1, tmp2
|
||||
orr data2, data2, tmp2
|
||||
b .Lstart_realigned
|
||||
|
||||
.Lmisaligned8:
|
||||
/*
|
||||
* Get the align offset length to compare per byte first.
|
||||
* After this process, one string's address will be aligned.
|
||||
*/
|
||||
and tmp1, src1, #7
|
||||
neg tmp1, tmp1
|
||||
add tmp1, tmp1, #8
|
||||
and tmp2, src2, #7
|
||||
neg tmp2, tmp2
|
||||
add tmp2, tmp2, #8
|
||||
subs tmp3, tmp1, tmp2
|
||||
csel pos, tmp1, tmp2, hi /*Choose the maximum. */
|
||||
.Ltinycmp:
|
||||
ldrb data1w, [src1], #1
|
||||
ldrb data2w, [src2], #1
|
||||
subs pos, pos, #1
|
||||
ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
|
||||
ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
|
||||
b.eq .Ltinycmp
|
||||
cbnz pos, 1f /*find the null or unequal...*/
|
||||
cmp data1w, #1
|
||||
ccmp data1w, data2w, #0, cs
|
||||
b.eq .Lstart_align /*the last bytes are equal....*/
|
||||
1:
|
||||
sub result, data1, data2
|
||||
ret
|
||||
|
||||
.Lstart_align:
|
||||
ands xzr, src1, #7
|
||||
b.eq .Lrecal_offset
|
||||
/*process more leading bytes to make str1 aligned...*/
|
||||
add src1, src1, tmp3
|
||||
add src2, src2, tmp3
|
||||
/*load 8 bytes from aligned str1 and non-aligned str2..*/
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
bic has_nul, tmp1, tmp2
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
orr syndrome, diff, has_nul
|
||||
cbnz syndrome, .Lcal_cmpresult
|
||||
/*How far is the current str2 from the alignment boundary...*/
|
||||
and tmp3, tmp3, #7
|
||||
.Lrecal_offset:
|
||||
neg pos, tmp3
|
||||
.Lloopcmp_proc:
|
||||
/*
|
||||
* Divide the eight bytes into two parts. First,backwards the src2
|
||||
* to an alignment boundary,load eight bytes from the SRC2 alignment
|
||||
* boundary,then compare with the relative bytes from SRC1.
|
||||
* If all 8 bytes are equal,then start the second part's comparison.
|
||||
* Otherwise finish the comparison.
|
||||
* This special handle can garantee all the accesses are in the
|
||||
* thread/task space in avoid to overrange access.
|
||||
*/
|
||||
ldr data1, [src1,pos]
|
||||
ldr data2, [src2,pos]
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
bic has_nul, tmp1, tmp2
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
orr syndrome, diff, has_nul
|
||||
cbnz syndrome, .Lcal_cmpresult
|
||||
|
||||
/*The second part process*/
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
bic has_nul, tmp1, tmp2
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
orr syndrome, diff, has_nul
|
||||
cbz syndrome, .Lloopcmp_proc
|
||||
|
||||
.Lcal_cmpresult:
|
||||
/*
|
||||
* reversed the byte-order as big-endian,then CLZ can find the most
|
||||
* significant zero bits.
|
||||
*/
|
||||
CPU_LE( rev syndrome, syndrome )
|
||||
CPU_LE( rev data1, data1 )
|
||||
CPU_LE( rev data2, data2 )
|
||||
|
||||
/*
|
||||
* For big-endian we cannot use the trick with the syndrome value
|
||||
* as carry-propagation can corrupt the upper bits if the trailing
|
||||
* bytes in the string contain 0x01.
|
||||
* However, if there is no NUL byte in the dword, we can generate
|
||||
* the result directly. We ca not just subtract the bytes as the
|
||||
* MSB might be significant.
|
||||
*/
|
||||
CPU_BE( cbnz has_nul, 1f )
|
||||
CPU_BE( cmp data1, data2 )
|
||||
CPU_BE( cset result, ne )
|
||||
CPU_BE( cneg result, result, lo )
|
||||
CPU_BE( ret )
|
||||
CPU_BE( 1: )
|
||||
/*Re-compute the NUL-byte detection, using a byte-reversed value. */
|
||||
CPU_BE( rev tmp3, data1 )
|
||||
CPU_BE( sub tmp1, tmp3, zeroones )
|
||||
CPU_BE( orr tmp2, tmp3, #REP8_7f )
|
||||
CPU_BE( bic has_nul, tmp1, tmp2 )
|
||||
CPU_BE( rev has_nul, has_nul )
|
||||
CPU_BE( orr syndrome, diff, has_nul )
|
||||
|
||||
clz pos, syndrome
|
||||
/*
|
||||
* The MS-non-zero bit of the syndrome marks either the first bit
|
||||
* that is different, or the top bit of the first zero byte.
|
||||
* Shifting left now will bring the critical information into the
|
||||
* top bits.
|
||||
*/
|
||||
lsl data1, data1, pos
|
||||
lsl data2, data2, pos
|
||||
/*
|
||||
* But we need to zero-extend (char is unsigned) the value and then
|
||||
* perform a signed 32-bit subtraction.
|
||||
*/
|
||||
lsr data1, data1, #56
|
||||
sub result, data1, data2, lsr #56
|
||||
ret
|
||||
ENDPROC(strcmp)
|
126
arch/arm64/lib/strlen.S
Normal file
126
arch/arm64/lib/strlen.S
Normal file
@ -0,0 +1,126 @@
|
||||
/*
|
||||
* Copyright (C) 2013 ARM Ltd.
|
||||
* Copyright (C) 2013 Linaro.
|
||||
*
|
||||
* This code is based on glibc cortex strings work originally authored by Linaro
|
||||
* and re-licensed under GPLv2 for the Linux kernel. The original code can
|
||||
* be found @
|
||||
*
|
||||
* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
|
||||
* files/head:/src/aarch64/
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
/*
|
||||
* calculate the length of a string
|
||||
*
|
||||
* Parameters:
|
||||
* x0 - const string pointer
|
||||
* Returns:
|
||||
* x0 - the return length of specific string
|
||||
*/
|
||||
|
||||
/* Arguments and results. */
|
||||
srcin .req x0
|
||||
len .req x0
|
||||
|
||||
/* Locals and temporaries. */
|
||||
src .req x1
|
||||
data1 .req x2
|
||||
data2 .req x3
|
||||
data2a .req x4
|
||||
has_nul1 .req x5
|
||||
has_nul2 .req x6
|
||||
tmp1 .req x7
|
||||
tmp2 .req x8
|
||||
tmp3 .req x9
|
||||
tmp4 .req x10
|
||||
zeroones .req x11
|
||||
pos .req x12
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
ENTRY(strlen)
|
||||
mov zeroones, #REP8_01
|
||||
bic src, srcin, #15
|
||||
ands tmp1, srcin, #15
|
||||
b.ne .Lmisaligned
|
||||
/*
|
||||
* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
* can be done in parallel across the entire word.
|
||||
*/
|
||||
/*
|
||||
* The inner loop deals with two Dwords at a time. This has a
|
||||
* slightly higher start-up cost, but we should win quite quickly,
|
||||
* especially on cores with a high number of issue slots per
|
||||
* cycle, as we get much better parallelism out of the operations.
|
||||
*/
|
||||
.Lloop:
|
||||
ldp data1, data2, [src], #16
|
||||
.Lrealigned:
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, #REP8_7f
|
||||
bic has_nul1, tmp1, tmp2
|
||||
bics has_nul2, tmp3, tmp4
|
||||
ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
|
||||
b.eq .Lloop
|
||||
|
||||
sub len, src, srcin
|
||||
cbz has_nul1, .Lnul_in_data2
|
||||
CPU_BE( mov data2, data1 ) /*prepare data to re-calculate the syndrome*/
|
||||
sub len, len, #8
|
||||
mov has_nul2, has_nul1
|
||||
.Lnul_in_data2:
|
||||
/*
|
||||
* For big-endian, carry propagation (if the final byte in the
|
||||
* string is 0x01) means we cannot use has_nul directly. The
|
||||
* easiest way to get the correct byte is to byte-swap the data
|
||||
* and calculate the syndrome a second time.
|
||||
*/
|
||||
CPU_BE( rev data2, data2 )
|
||||
CPU_BE( sub tmp1, data2, zeroones )
|
||||
CPU_BE( orr tmp2, data2, #REP8_7f )
|
||||
CPU_BE( bic has_nul2, tmp1, tmp2 )
|
||||
|
||||
sub len, len, #8
|
||||
rev has_nul2, has_nul2
|
||||
clz pos, has_nul2
|
||||
add len, len, pos, lsr #3 /* Bits to bytes. */
|
||||
ret
|
||||
|
||||
.Lmisaligned:
|
||||
cmp tmp1, #8
|
||||
neg tmp1, tmp1
|
||||
ldp data1, data2, [src], #16
|
||||
lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
|
||||
mov tmp2, #~0
|
||||
/* Big-endian. Early bytes are at MSB. */
|
||||
CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
|
||||
|
||||
orr data1, data1, tmp2
|
||||
orr data2a, data2, tmp2
|
||||
csinv data1, data1, xzr, le
|
||||
csel data2, data2, data2a, le
|
||||
b .Lrealigned
|
||||
ENDPROC(strlen)
|
310
arch/arm64/lib/strncmp.S
Normal file
310
arch/arm64/lib/strncmp.S
Normal file
@ -0,0 +1,310 @@
|
||||
/*
|
||||
* Copyright (C) 2013 ARM Ltd.
|
||||
* Copyright (C) 2013 Linaro.
|
||||
*
|
||||
* This code is based on glibc cortex strings work originally authored by Linaro
|
||||
* and re-licensed under GPLv2 for the Linux kernel. The original code can
|
||||
* be found @
|
||||
*
|
||||
* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
|
||||
* files/head:/src/aarch64/
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
/*
|
||||
* compare two strings
|
||||
*
|
||||
* Parameters:
|
||||
* x0 - const string 1 pointer
|
||||
* x1 - const string 2 pointer
|
||||
* x2 - the maximal length to be compared
|
||||
* Returns:
|
||||
* x0 - an integer less than, equal to, or greater than zero if s1 is found,
|
||||
* respectively, to be less than, to match, or be greater than s2.
|
||||
*/
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
/* Parameters and result. */
|
||||
src1 .req x0
|
||||
src2 .req x1
|
||||
limit .req x2
|
||||
result .req x0
|
||||
|
||||
/* Internal variables. */
|
||||
data1 .req x3
|
||||
data1w .req w3
|
||||
data2 .req x4
|
||||
data2w .req w4
|
||||
has_nul .req x5
|
||||
diff .req x6
|
||||
syndrome .req x7
|
||||
tmp1 .req x8
|
||||
tmp2 .req x9
|
||||
tmp3 .req x10
|
||||
zeroones .req x11
|
||||
pos .req x12
|
||||
limit_wd .req x13
|
||||
mask .req x14
|
||||
endloop .req x15
|
||||
|
||||
ENTRY(strncmp)
|
||||
cbz limit, .Lret0
|
||||
eor tmp1, src1, src2
|
||||
mov zeroones, #REP8_01
|
||||
tst tmp1, #7
|
||||
b.ne .Lmisaligned8
|
||||
ands tmp1, src1, #7
|
||||
b.ne .Lmutual_align
|
||||
/* Calculate the number of full and partial words -1. */
|
||||
/*
|
||||
* when limit is mulitply of 8, if not sub 1,
|
||||
* the judgement of last dword will wrong.
|
||||
*/
|
||||
sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
|
||||
lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
|
||||
|
||||
/*
|
||||
* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
* can be done in parallel across the entire word.
|
||||
*/
|
||||
.Lloop_aligned:
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
.Lstart_realigned:
|
||||
subs limit_wd, limit_wd, #1
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
csinv endloop, diff, xzr, pl /* Last Dword or differences.*/
|
||||
bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||
ccmp endloop, #0, #0, eq
|
||||
b.eq .Lloop_aligned
|
||||
|
||||
/*Not reached the limit, must have found the end or a diff. */
|
||||
tbz limit_wd, #63, .Lnot_limit
|
||||
|
||||
/* Limit % 8 == 0 => all bytes significant. */
|
||||
ands limit, limit, #7
|
||||
b.eq .Lnot_limit
|
||||
|
||||
lsl limit, limit, #3 /* Bits -> bytes. */
|
||||
mov mask, #~0
|
||||
CPU_BE( lsr mask, mask, limit )
|
||||
CPU_LE( lsl mask, mask, limit )
|
||||
bic data1, data1, mask
|
||||
bic data2, data2, mask
|
||||
|
||||
/* Make sure that the NUL byte is marked in the syndrome. */
|
||||
orr has_nul, has_nul, mask
|
||||
|
||||
.Lnot_limit:
|
||||
orr syndrome, diff, has_nul
|
||||
b .Lcal_cmpresult
|
||||
|
||||
.Lmutual_align:
|
||||
/*
|
||||
* Sources are mutually aligned, but are not currently at an
|
||||
* alignment boundary. Round down the addresses and then mask off
|
||||
* the bytes that precede the start point.
|
||||
* We also need to adjust the limit calculations, but without
|
||||
* overflowing if the limit is near ULONG_MAX.
|
||||
*/
|
||||
bic src1, src1, #7
|
||||
bic src2, src2, #7
|
||||
ldr data1, [src1], #8
|
||||
neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */
|
||||
ldr data2, [src2], #8
|
||||
mov tmp2, #~0
|
||||
sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
|
||||
/* Big-endian. Early bytes are at MSB. */
|
||||
CPU_BE( lsl tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
CPU_LE( lsr tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */
|
||||
|
||||
and tmp3, limit_wd, #7
|
||||
lsr limit_wd, limit_wd, #3
|
||||
/* Adjust the limit. Only low 3 bits used, so overflow irrelevant.*/
|
||||
add limit, limit, tmp1
|
||||
add tmp3, tmp3, tmp1
|
||||
orr data1, data1, tmp2
|
||||
orr data2, data2, tmp2
|
||||
add limit_wd, limit_wd, tmp3, lsr #3
|
||||
b .Lstart_realigned
|
||||
|
||||
/*when src1 offset is not equal to src2 offset...*/
|
||||
.Lmisaligned8:
|
||||
cmp limit, #8
|
||||
b.lo .Ltiny8proc /*limit < 8... */
|
||||
/*
|
||||
* Get the align offset length to compare per byte first.
|
||||
* After this process, one string's address will be aligned.*/
|
||||
and tmp1, src1, #7
|
||||
neg tmp1, tmp1
|
||||
add tmp1, tmp1, #8
|
||||
and tmp2, src2, #7
|
||||
neg tmp2, tmp2
|
||||
add tmp2, tmp2, #8
|
||||
subs tmp3, tmp1, tmp2
|
||||
csel pos, tmp1, tmp2, hi /*Choose the maximum. */
|
||||
/*
|
||||
* Here, limit is not less than 8, so directly run .Ltinycmp
|
||||
* without checking the limit.*/
|
||||
sub limit, limit, pos
|
||||
.Ltinycmp:
|
||||
ldrb data1w, [src1], #1
|
||||
ldrb data2w, [src2], #1
|
||||
subs pos, pos, #1
|
||||
ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
|
||||
ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
|
||||
b.eq .Ltinycmp
|
||||
cbnz pos, 1f /*find the null or unequal...*/
|
||||
cmp data1w, #1
|
||||
ccmp data1w, data2w, #0, cs
|
||||
b.eq .Lstart_align /*the last bytes are equal....*/
|
||||
1:
|
||||
sub result, data1, data2
|
||||
ret
|
||||
|
||||
.Lstart_align:
|
||||
lsr limit_wd, limit, #3
|
||||
cbz limit_wd, .Lremain8
|
||||
/*process more leading bytes to make str1 aligned...*/
|
||||
ands xzr, src1, #7
|
||||
b.eq .Lrecal_offset
|
||||
add src1, src1, tmp3 /*tmp3 is positive in this branch.*/
|
||||
add src2, src2, tmp3
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
|
||||
sub limit, limit, tmp3
|
||||
lsr limit_wd, limit, #3
|
||||
subs limit_wd, limit_wd, #1
|
||||
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
|
||||
bics has_nul, tmp1, tmp2
|
||||
ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
|
||||
b.ne .Lunequal_proc
|
||||
/*How far is the current str2 from the alignment boundary...*/
|
||||
and tmp3, tmp3, #7
|
||||
.Lrecal_offset:
|
||||
neg pos, tmp3
|
||||
.Lloopcmp_proc:
|
||||
/*
|
||||
* Divide the eight bytes into two parts. First,backwards the src2
|
||||
* to an alignment boundary,load eight bytes from the SRC2 alignment
|
||||
* boundary,then compare with the relative bytes from SRC1.
|
||||
* If all 8 bytes are equal,then start the second part's comparison.
|
||||
* Otherwise finish the comparison.
|
||||
* This special handle can garantee all the accesses are in the
|
||||
* thread/task space in avoid to overrange access.
|
||||
*/
|
||||
ldr data1, [src1,pos]
|
||||
ldr data2, [src2,pos]
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
csinv endloop, diff, xzr, eq
|
||||
cbnz endloop, .Lunequal_proc
|
||||
|
||||
/*The second part process*/
|
||||
ldr data1, [src1], #8
|
||||
ldr data2, [src2], #8
|
||||
subs limit_wd, limit_wd, #1
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
eor diff, data1, data2 /* Non-zero if differences found. */
|
||||
csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
|
||||
bics has_nul, tmp1, tmp2
|
||||
ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
|
||||
b.eq .Lloopcmp_proc
|
||||
|
||||
.Lunequal_proc:
|
||||
orr syndrome, diff, has_nul
|
||||
cbz syndrome, .Lremain8
|
||||
.Lcal_cmpresult:
|
||||
/*
|
||||
* reversed the byte-order as big-endian,then CLZ can find the most
|
||||
* significant zero bits.
|
||||
*/
|
||||
CPU_LE( rev syndrome, syndrome )
|
||||
CPU_LE( rev data1, data1 )
|
||||
CPU_LE( rev data2, data2 )
|
||||
/*
|
||||
* For big-endian we cannot use the trick with the syndrome value
|
||||
* as carry-propagation can corrupt the upper bits if the trailing
|
||||
* bytes in the string contain 0x01.
|
||||
* However, if there is no NUL byte in the dword, we can generate
|
||||
* the result directly. We can't just subtract the bytes as the
|
||||
* MSB might be significant.
|
||||
*/
|
||||
CPU_BE( cbnz has_nul, 1f )
|
||||
CPU_BE( cmp data1, data2 )
|
||||
CPU_BE( cset result, ne )
|
||||
CPU_BE( cneg result, result, lo )
|
||||
CPU_BE( ret )
|
||||
CPU_BE( 1: )
|
||||
/* Re-compute the NUL-byte detection, using a byte-reversed value.*/
|
||||
CPU_BE( rev tmp3, data1 )
|
||||
CPU_BE( sub tmp1, tmp3, zeroones )
|
||||
CPU_BE( orr tmp2, tmp3, #REP8_7f )
|
||||
CPU_BE( bic has_nul, tmp1, tmp2 )
|
||||
CPU_BE( rev has_nul, has_nul )
|
||||
CPU_BE( orr syndrome, diff, has_nul )
|
||||
/*
|
||||
* The MS-non-zero bit of the syndrome marks either the first bit
|
||||
* that is different, or the top bit of the first zero byte.
|
||||
* Shifting left now will bring the critical information into the
|
||||
* top bits.
|
||||
*/
|
||||
clz pos, syndrome
|
||||
lsl data1, data1, pos
|
||||
lsl data2, data2, pos
|
||||
/*
|
||||
* But we need to zero-extend (char is unsigned) the value and then
|
||||
* perform a signed 32-bit subtraction.
|
||||
*/
|
||||
lsr data1, data1, #56
|
||||
sub result, data1, data2, lsr #56
|
||||
ret
|
||||
|
||||
.Lremain8:
|
||||
/* Limit % 8 == 0 => all bytes significant. */
|
||||
ands limit, limit, #7
|
||||
b.eq .Lret0
|
||||
.Ltiny8proc:
|
||||
ldrb data1w, [src1], #1
|
||||
ldrb data2w, [src2], #1
|
||||
subs limit, limit, #1
|
||||
|
||||
ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
|
||||
ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
|
||||
b.eq .Ltiny8proc
|
||||
sub result, data1, data2
|
||||
ret
|
||||
|
||||
.Lret0:
|
||||
mov result, #0
|
||||
ret
|
||||
ENDPROC(strncmp)
|
171
arch/arm64/lib/strnlen.S
Normal file
171
arch/arm64/lib/strnlen.S
Normal file
@ -0,0 +1,171 @@
|
||||
/*
|
||||
* Copyright (C) 2013 ARM Ltd.
|
||||
* Copyright (C) 2013 Linaro.
|
||||
*
|
||||
* This code is based on glibc cortex strings work originally authored by Linaro
|
||||
* and re-licensed under GPLv2 for the Linux kernel. The original code can
|
||||
* be found @
|
||||
*
|
||||
* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
|
||||
* files/head:/src/aarch64/
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
/*
|
||||
* determine the length of a fixed-size string
|
||||
*
|
||||
* Parameters:
|
||||
* x0 - const string pointer
|
||||
* x1 - maximal string length
|
||||
* Returns:
|
||||
* x0 - the return length of specific string
|
||||
*/
|
||||
|
||||
/* Arguments and results. */
|
||||
srcin .req x0
|
||||
len .req x0
|
||||
limit .req x1
|
||||
|
||||
/* Locals and temporaries. */
|
||||
src .req x2
|
||||
data1 .req x3
|
||||
data2 .req x4
|
||||
data2a .req x5
|
||||
has_nul1 .req x6
|
||||
has_nul2 .req x7
|
||||
tmp1 .req x8
|
||||
tmp2 .req x9
|
||||
tmp3 .req x10
|
||||
tmp4 .req x11
|
||||
zeroones .req x12
|
||||
pos .req x13
|
||||
limit_wd .req x14
|
||||
|
||||
#define REP8_01 0x0101010101010101
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
ENTRY(strnlen)
|
||||
cbz limit, .Lhit_limit
|
||||
mov zeroones, #REP8_01
|
||||
bic src, srcin, #15
|
||||
ands tmp1, srcin, #15
|
||||
b.ne .Lmisaligned
|
||||
/* Calculate the number of full and partial words -1. */
|
||||
sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */
|
||||
lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */
|
||||
|
||||
/*
|
||||
* NUL detection works on the principle that (X - 1) & (~X) & 0x80
|
||||
* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
|
||||
* can be done in parallel across the entire word.
|
||||
*/
|
||||
/*
|
||||
* The inner loop deals with two Dwords at a time. This has a
|
||||
* slightly higher start-up cost, but we should win quite quickly,
|
||||
* especially on cores with a high number of issue slots per
|
||||
* cycle, as we get much better parallelism out of the operations.
|
||||
*/
|
||||
.Lloop:
|
||||
ldp data1, data2, [src], #16
|
||||
.Lrealigned:
|
||||
sub tmp1, data1, zeroones
|
||||
orr tmp2, data1, #REP8_7f
|
||||
sub tmp3, data2, zeroones
|
||||
orr tmp4, data2, #REP8_7f
|
||||
bic has_nul1, tmp1, tmp2
|
||||
bic has_nul2, tmp3, tmp4
|
||||
subs limit_wd, limit_wd, #1
|
||||
orr tmp1, has_nul1, has_nul2
|
||||
ccmp tmp1, #0, #0, pl /* NZCV = 0000 */
|
||||
b.eq .Lloop
|
||||
|
||||
cbz tmp1, .Lhit_limit /* No null in final Qword. */
|
||||
|
||||
/*
|
||||
* We know there's a null in the final Qword. The easiest thing
|
||||
* to do now is work out the length of the string and return
|
||||
* MIN (len, limit).
|
||||
*/
|
||||
sub len, src, srcin
|
||||
cbz has_nul1, .Lnul_in_data2
|
||||
CPU_BE( mov data2, data1 ) /*perpare data to re-calculate the syndrome*/
|
||||
|
||||
sub len, len, #8
|
||||
mov has_nul2, has_nul1
|
||||
.Lnul_in_data2:
|
||||
/*
|
||||
* For big-endian, carry propagation (if the final byte in the
|
||||
* string is 0x01) means we cannot use has_nul directly. The
|
||||
* easiest way to get the correct byte is to byte-swap the data
|
||||
* and calculate the syndrome a second time.
|
||||
*/
|
||||
CPU_BE( rev data2, data2 )
|
||||
CPU_BE( sub tmp1, data2, zeroones )
|
||||
CPU_BE( orr tmp2, data2, #REP8_7f )
|
||||
CPU_BE( bic has_nul2, tmp1, tmp2 )
|
||||
|
||||
sub len, len, #8
|
||||
rev has_nul2, has_nul2
|
||||
clz pos, has_nul2
|
||||
add len, len, pos, lsr #3 /* Bits to bytes. */
|
||||
cmp len, limit
|
||||
csel len, len, limit, ls /* Return the lower value. */
|
||||
ret
|
||||
|
||||
.Lmisaligned:
|
||||
/*
|
||||
* Deal with a partial first word.
|
||||
* We're doing two things in parallel here;
|
||||
* 1) Calculate the number of words (but avoiding overflow if
|
||||
* limit is near ULONG_MAX) - to do this we need to work out
|
||||
* limit + tmp1 - 1 as a 65-bit value before shifting it;
|
||||
* 2) Load and mask the initial data words - we force the bytes
|
||||
* before the ones we are interested in to 0xff - this ensures
|
||||
* early bytes will not hit any zero detection.
|
||||
*/
|
||||
ldp data1, data2, [src], #16
|
||||
|
||||
sub limit_wd, limit, #1
|
||||
and tmp3, limit_wd, #15
|
||||
lsr limit_wd, limit_wd, #4
|
||||
|
||||
add tmp3, tmp3, tmp1
|
||||
add limit_wd, limit_wd, tmp3, lsr #4
|
||||
|
||||
neg tmp4, tmp1
|
||||
lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */
|
||||
|
||||
mov tmp2, #~0
|
||||
/* Big-endian. Early bytes are at MSB. */
|
||||
CPU_BE( lsl tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */
|
||||
/* Little-endian. Early bytes are at LSB. */
|
||||
CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */
|
||||
|
||||
cmp tmp1, #8
|
||||
|
||||
orr data1, data1, tmp2
|
||||
orr data2a, data2, tmp2
|
||||
|
||||
csinv data1, data1, xzr, le
|
||||
csel data2, data2, data2a, le
|
||||
b .Lrealigned
|
||||
|
||||
.Lhit_limit:
|
||||
mov len, limit
|
||||
ret
|
||||
ENDPROC(strnlen)
|
@ -1,5 +1,5 @@
|
||||
obj-y := dma-mapping.o extable.o fault.o init.o \
|
||||
cache.o copypage.o flush.o \
|
||||
ioremap.o mmap.o pgd.o mmu.o \
|
||||
context.o tlb.o proc.o
|
||||
context.o proc.o
|
||||
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
|
||||
|
@ -31,7 +31,7 @@
|
||||
* Corrupted registers: x0-x7, x9-x11
|
||||
*/
|
||||
__flush_dcache_all:
|
||||
dsb sy // ensure ordering with previous memory accesses
|
||||
dmb sy // ensure ordering with previous memory accesses
|
||||
mrs x0, clidr_el1 // read clidr
|
||||
and x3, x0, #0x7000000 // extract loc from clidr
|
||||
lsr x3, x3, #23 // left align loc bit field
|
||||
@ -128,7 +128,7 @@ USER(9f, dc cvau, x4 ) // clean D line to PoU
|
||||
add x4, x4, x2
|
||||
cmp x4, x1
|
||||
b.lo 1b
|
||||
dsb sy
|
||||
dsb ish
|
||||
|
||||
icache_line_size x2, x3
|
||||
sub x3, x2, #1
|
||||
@ -139,7 +139,7 @@ USER(9f, ic ivau, x4 ) // invalidate I line PoU
|
||||
cmp x4, x1
|
||||
b.lo 1b
|
||||
9: // ignore any faulting cache operation
|
||||
dsb sy
|
||||
dsb ish
|
||||
isb
|
||||
ret
|
||||
ENDPROC(flush_icache_range)
|
||||
|
@ -115,7 +115,7 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
|
||||
for (i = 0; i < (size >> PAGE_SHIFT); i++)
|
||||
map[i] = page + i;
|
||||
coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP,
|
||||
__get_dma_pgprot(attrs, pgprot_default, false));
|
||||
__get_dma_pgprot(attrs, __pgprot(PROT_NORMAL_NC), false));
|
||||
kfree(map);
|
||||
if (!coherent_ptr)
|
||||
goto no_map;
|
||||
|
@ -32,6 +32,7 @@
|
||||
|
||||
#include <asm/exception.h>
|
||||
#include <asm/debug-monitors.h>
|
||||
#include <asm/esr.h>
|
||||
#include <asm/system_misc.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/tlbflush.h>
|
||||
@ -123,6 +124,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
|
||||
}
|
||||
|
||||
tsk->thread.fault_address = addr;
|
||||
tsk->thread.fault_code = esr;
|
||||
si.si_signo = sig;
|
||||
si.si_errno = 0;
|
||||
si.si_code = code;
|
||||
@ -148,8 +150,6 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
|
||||
#define VM_FAULT_BADMAP 0x010000
|
||||
#define VM_FAULT_BADACCESS 0x020000
|
||||
|
||||
#define ESR_WRITE (1 << 6)
|
||||
#define ESR_CM (1 << 8)
|
||||
#define ESR_LNX_EXEC (1 << 24)
|
||||
|
||||
static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
|
||||
@ -218,7 +218,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
|
||||
|
||||
if (esr & ESR_LNX_EXEC) {
|
||||
vm_flags = VM_EXEC;
|
||||
} else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
|
||||
} else if ((esr & ESR_EL1_WRITE) && !(esr & ESR_EL1_CM)) {
|
||||
vm_flags = VM_WRITE;
|
||||
mm_flags |= FAULT_FLAG_WRITE;
|
||||
}
|
||||
@ -525,7 +525,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
|
||||
info.si_errno = 0;
|
||||
info.si_code = inf->code;
|
||||
info.si_addr = (void __user *)addr;
|
||||
arm64_notify_die("", regs, &info, esr);
|
||||
arm64_notify_die("", regs, &info, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -43,11 +43,6 @@
|
||||
struct page *empty_zero_page;
|
||||
EXPORT_SYMBOL(empty_zero_page);
|
||||
|
||||
pgprot_t pgprot_default;
|
||||
EXPORT_SYMBOL(pgprot_default);
|
||||
|
||||
static pmdval_t prot_sect_kernel;
|
||||
|
||||
struct cachepolicy {
|
||||
const char policy[16];
|
||||
u64 mair;
|
||||
@ -122,33 +117,6 @@ static int __init early_cachepolicy(char *p)
|
||||
}
|
||||
early_param("cachepolicy", early_cachepolicy);
|
||||
|
||||
/*
|
||||
* Adjust the PMD section entries according to the CPU in use.
|
||||
*/
|
||||
void __init init_mem_pgprot(void)
|
||||
{
|
||||
pteval_t default_pgprot;
|
||||
int i;
|
||||
|
||||
default_pgprot = PTE_ATTRINDX(MT_NORMAL);
|
||||
prot_sect_kernel = PMD_TYPE_SECT | PMD_SECT_AF | PMD_ATTRINDX(MT_NORMAL);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Mark memory with the "shared" attribute for SMP systems
|
||||
*/
|
||||
default_pgprot |= PTE_SHARED;
|
||||
prot_sect_kernel |= PMD_SECT_S;
|
||||
#endif
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
unsigned long v = pgprot_val(protection_map[i]);
|
||||
protection_map[i] = __pgprot(v | default_pgprot);
|
||||
}
|
||||
|
||||
pgprot_default = __pgprot(PTE_TYPE_PAGE | PTE_AF | default_pgprot);
|
||||
}
|
||||
|
||||
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
|
||||
unsigned long size, pgprot_t vma_prot)
|
||||
{
|
||||
@ -196,11 +164,10 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
|
||||
pgprot_t prot_pte;
|
||||
|
||||
if (map_io) {
|
||||
prot_sect = PMD_TYPE_SECT | PMD_SECT_AF |
|
||||
PMD_ATTRINDX(MT_DEVICE_nGnRE);
|
||||
prot_sect = PROT_SECT_DEVICE_nGnRE;
|
||||
prot_pte = __pgprot(PROT_DEVICE_nGnRE);
|
||||
} else {
|
||||
prot_sect = prot_sect_kernel;
|
||||
prot_sect = PROT_SECT_NORMAL_EXEC;
|
||||
prot_pte = PAGE_KERNEL_EXEC;
|
||||
}
|
||||
|
||||
@ -242,7 +209,30 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
|
||||
|
||||
do {
|
||||
next = pud_addr_end(addr, end);
|
||||
alloc_init_pmd(pud, addr, next, phys, map_io);
|
||||
|
||||
/*
|
||||
* For 4K granule only, attempt to put down a 1GB block
|
||||
*/
|
||||
if (!map_io && (PAGE_SHIFT == 12) &&
|
||||
((addr | next | phys) & ~PUD_MASK) == 0) {
|
||||
pud_t old_pud = *pud;
|
||||
set_pud(pud, __pud(phys | PROT_SECT_NORMAL_EXEC));
|
||||
|
||||
/*
|
||||
* If we have an old value for a pud, it will
|
||||
* be pointing to a pmd table that we no longer
|
||||
* need (from swapper_pg_dir).
|
||||
*
|
||||
* Look up the old pmd table and free it.
|
||||
*/
|
||||
if (!pud_none(old_pud)) {
|
||||
phys_addr_t table = __pa(pmd_offset(&old_pud, 0));
|
||||
memblock_free(table, PAGE_SIZE);
|
||||
flush_tlb_all();
|
||||
}
|
||||
} else {
|
||||
alloc_init_pmd(pud, addr, next, phys, map_io);
|
||||
}
|
||||
phys += next - addr;
|
||||
} while (pud++, addr = next, addr != end);
|
||||
}
|
||||
@ -399,6 +389,9 @@ int kern_addr_valid(unsigned long addr)
|
||||
if (pud_none(*pud))
|
||||
return 0;
|
||||
|
||||
if (pud_sect(*pud))
|
||||
return pfn_valid(pud_pfn(*pud));
|
||||
|
||||
pmd = pmd_offset(pud, addr);
|
||||
if (pmd_none(*pmd))
|
||||
return 0;
|
||||
@ -446,7 +439,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
|
||||
set_pmd(pmd, __pmd(__pa(p) | prot_sect_kernel));
|
||||
set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL));
|
||||
} else
|
||||
vmemmap_verify((pte_t *)pmd, node, addr, next);
|
||||
} while (addr = next, addr != end);
|
||||
|
@ -182,7 +182,7 @@ ENDPROC(cpu_do_switch_mm)
|
||||
ENTRY(__cpu_setup)
|
||||
ic iallu // I+BTB cache invalidate
|
||||
tlbi vmalle1is // invalidate I + D TLBs
|
||||
dsb sy
|
||||
dsb ish
|
||||
|
||||
mov x0, #3 << 20
|
||||
msr cpacr_el1, x0 // Enable FP/ASIMD
|
||||
|
@ -1,71 +0,0 @@
|
||||
/*
|
||||
* Based on arch/arm/mm/tlb.S
|
||||
*
|
||||
* Copyright (C) 1997-2002 Russell King
|
||||
* Copyright (C) 2012 ARM Ltd.
|
||||
* Written by Catalin Marinas <catalin.marinas@arm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include "proc-macros.S"
|
||||
|
||||
/*
|
||||
* __cpu_flush_user_tlb_range(start, end, vma)
|
||||
*
|
||||
* Invalidate a range of TLB entries in the specified address space.
|
||||
*
|
||||
* - start - start address (may not be aligned)
|
||||
* - end - end address (exclusive, may not be aligned)
|
||||
* - vma - vma_struct describing address range
|
||||
*/
|
||||
ENTRY(__cpu_flush_user_tlb_range)
|
||||
vma_vm_mm x3, x2 // get vma->vm_mm
|
||||
mmid w3, x3 // get vm_mm->context.id
|
||||
dsb sy
|
||||
lsr x0, x0, #12 // align address
|
||||
lsr x1, x1, #12
|
||||
bfi x0, x3, #48, #16 // start VA and ASID
|
||||
bfi x1, x3, #48, #16 // end VA and ASID
|
||||
1: tlbi vae1is, x0 // TLB invalidate by address and ASID
|
||||
add x0, x0, #1
|
||||
cmp x0, x1
|
||||
b.lo 1b
|
||||
dsb sy
|
||||
ret
|
||||
ENDPROC(__cpu_flush_user_tlb_range)
|
||||
|
||||
/*
|
||||
* __cpu_flush_kern_tlb_range(start,end)
|
||||
*
|
||||
* Invalidate a range of kernel TLB entries.
|
||||
*
|
||||
* - start - start address (may not be aligned)
|
||||
* - end - end address (exclusive, may not be aligned)
|
||||
*/
|
||||
ENTRY(__cpu_flush_kern_tlb_range)
|
||||
dsb sy
|
||||
lsr x0, x0, #12 // align address
|
||||
lsr x1, x1, #12
|
||||
1: tlbi vaae1is, x0 // TLB invalidate by address
|
||||
add x0, x0, #1
|
||||
cmp x0, x1
|
||||
b.lo 1b
|
||||
dsb sy
|
||||
isb
|
||||
ret
|
||||
ENDPROC(__cpu_flush_kern_tlb_range)
|
@ -66,16 +66,7 @@ extern inline void *return_address(unsigned int level)
|
||||
|
||||
#endif /* CONFIG_FRAME_POINTER */
|
||||
|
||||
#define HAVE_ARCH_CALLER_ADDR
|
||||
|
||||
/* inline function or macro may lead to unexpected result */
|
||||
#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
|
||||
#define CALLER_ADDR1 ((unsigned long)return_address(1))
|
||||
#define CALLER_ADDR2 ((unsigned long)return_address(2))
|
||||
#define CALLER_ADDR3 ((unsigned long)return_address(3))
|
||||
#define CALLER_ADDR4 ((unsigned long)return_address(4))
|
||||
#define CALLER_ADDR5 ((unsigned long)return_address(5))
|
||||
#define CALLER_ADDR6 ((unsigned long)return_address(6))
|
||||
#define ftrace_return_address(n) return_address(n)
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
|
@ -24,15 +24,7 @@ extern void return_to_handler(void);
|
||||
|
||||
extern unsigned long return_address(unsigned int);
|
||||
|
||||
#define HAVE_ARCH_CALLER_ADDR
|
||||
|
||||
#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
|
||||
#define CALLER_ADDR1 return_address(1)
|
||||
#define CALLER_ADDR2 return_address(2)
|
||||
#define CALLER_ADDR3 return_address(3)
|
||||
#define CALLER_ADDR4 return_address(4)
|
||||
#define CALLER_ADDR5 return_address(5)
|
||||
#define CALLER_ADDR6 return_address(6)
|
||||
#define ftrace_return_address(n) return_address(n)
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
|
@ -40,15 +40,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
|
||||
/* arch/sh/kernel/return_address.c */
|
||||
extern void *return_address(unsigned int);
|
||||
|
||||
#define HAVE_ARCH_CALLER_ADDR
|
||||
|
||||
#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
|
||||
#define CALLER_ADDR1 ((unsigned long)return_address(1))
|
||||
#define CALLER_ADDR2 ((unsigned long)return_address(2))
|
||||
#define CALLER_ADDR3 ((unsigned long)return_address(3))
|
||||
#define CALLER_ADDR4 ((unsigned long)return_address(4))
|
||||
#define CALLER_ADDR5 ((unsigned long)return_address(5))
|
||||
#define CALLER_ADDR6 ((unsigned long)return_address(6))
|
||||
#define ftrace_return_address(n) return_address(n)
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
|
@ -12,24 +12,18 @@
|
||||
|
||||
#include <asm/processor.h>
|
||||
|
||||
#define HAVE_ARCH_CALLER_ADDR
|
||||
#ifndef __ASSEMBLY__
|
||||
#define CALLER_ADDR0 ({ unsigned long a0, a1; \
|
||||
#define ftrace_return_address0 ({ unsigned long a0, a1; \
|
||||
__asm__ __volatile__ ( \
|
||||
"mov %0, a0\n" \
|
||||
"mov %1, a1\n" \
|
||||
: "=r"(a0), "=r"(a1)); \
|
||||
MAKE_PC_FROM_RA(a0, a1); })
|
||||
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
extern unsigned long return_address(unsigned level);
|
||||
#define CALLER_ADDR1 return_address(1)
|
||||
#define CALLER_ADDR2 return_address(2)
|
||||
#define CALLER_ADDR3 return_address(3)
|
||||
#else /* CONFIG_FRAME_POINTER */
|
||||
#define CALLER_ADDR1 (0)
|
||||
#define CALLER_ADDR2 (0)
|
||||
#define CALLER_ADDR3 (0)
|
||||
#endif /* CONFIG_FRAME_POINTER */
|
||||
#define ftrace_return_address(n) return_address(n)
|
||||
#endif
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#ifdef CONFIG_FUNCTION_TRACER
|
||||
|
@ -4,22 +4,27 @@
|
||||
/*
|
||||
* This is the most generic implementation of unaligned accesses
|
||||
* and should work almost anywhere.
|
||||
*
|
||||
* If an architecture can handle unaligned accesses in hardware,
|
||||
* it may want to use the linux/unaligned/access_ok.h implementation
|
||||
* instead.
|
||||
*/
|
||||
#include <asm/byteorder.h>
|
||||
|
||||
/* Set by the arch if it can handle unaligned accesses in hardware. */
|
||||
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
# include <linux/unaligned/access_ok.h>
|
||||
#endif
|
||||
|
||||
#if defined(__LITTLE_ENDIAN)
|
||||
# include <linux/unaligned/le_struct.h>
|
||||
# include <linux/unaligned/be_byteshift.h>
|
||||
# ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
# include <linux/unaligned/le_struct.h>
|
||||
# include <linux/unaligned/be_byteshift.h>
|
||||
# endif
|
||||
# include <linux/unaligned/generic.h>
|
||||
# define get_unaligned __get_unaligned_le
|
||||
# define put_unaligned __put_unaligned_le
|
||||
#elif defined(__BIG_ENDIAN)
|
||||
# include <linux/unaligned/be_struct.h>
|
||||
# include <linux/unaligned/le_byteshift.h>
|
||||
# ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
# include <linux/unaligned/be_struct.h>
|
||||
# include <linux/unaligned/le_byteshift.h>
|
||||
# endif
|
||||
# include <linux/unaligned/generic.h>
|
||||
# define get_unaligned __get_unaligned_be
|
||||
# define put_unaligned __put_unaligned_be
|
||||
|
@ -616,25 +616,27 @@ static inline void __ftrace_enabled_restore(int enabled)
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef HAVE_ARCH_CALLER_ADDR
|
||||
/* All archs should have this, but we define it for consistency */
|
||||
#ifndef ftrace_return_address0
|
||||
# define ftrace_return_address0 __builtin_return_address(0)
|
||||
#endif
|
||||
|
||||
/* Archs may use other ways for ADDR1 and beyond */
|
||||
#ifndef ftrace_return_address
|
||||
# ifdef CONFIG_FRAME_POINTER
|
||||
# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
|
||||
# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1))
|
||||
# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2))
|
||||
# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3))
|
||||
# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4))
|
||||
# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5))
|
||||
# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6))
|
||||
# define ftrace_return_address(n) __builtin_return_address(n)
|
||||
# else
|
||||
# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
|
||||
# define CALLER_ADDR1 0UL
|
||||
# define CALLER_ADDR2 0UL
|
||||
# define CALLER_ADDR3 0UL
|
||||
# define CALLER_ADDR4 0UL
|
||||
# define CALLER_ADDR5 0UL
|
||||
# define CALLER_ADDR6 0UL
|
||||
# define ftrace_return_address(n) 0UL
|
||||
# endif
|
||||
#endif /* ifndef HAVE_ARCH_CALLER_ADDR */
|
||||
#endif
|
||||
|
||||
#define CALLER_ADDR0 ((unsigned long)ftrace_return_address0)
|
||||
#define CALLER_ADDR1 ((unsigned long)ftrace_return_address(1))
|
||||
#define CALLER_ADDR2 ((unsigned long)ftrace_return_address(2))
|
||||
#define CALLER_ADDR3 ((unsigned long)ftrace_return_address(3))
|
||||
#define CALLER_ADDR4 ((unsigned long)ftrace_return_address(4))
|
||||
#define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
|
||||
#define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))
|
||||
|
||||
#ifdef CONFIG_IRQSOFF_TRACER
|
||||
extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
|
||||
|
@ -40,6 +40,11 @@
|
||||
#define R_METAG_NONE 3
|
||||
#endif
|
||||
|
||||
#ifndef EM_AARCH64
|
||||
#define EM_AARCH64 183
|
||||
#define R_AARCH64_ABS64 257
|
||||
#endif
|
||||
|
||||
static int fd_map; /* File descriptor for file being modified. */
|
||||
static int mmap_failed; /* Boolean flag. */
|
||||
static void *ehdr_curr; /* current ElfXX_Ehdr * for resource cleanup */
|
||||
@ -347,6 +352,8 @@ do_file(char const *const fname)
|
||||
case EM_ARM: reltype = R_ARM_ABS32;
|
||||
altmcount = "__gnu_mcount_nc";
|
||||
break;
|
||||
case EM_AARCH64:
|
||||
reltype = R_AARCH64_ABS64; gpfx = '_'; break;
|
||||
case EM_IA_64: reltype = R_IA64_IMM64; gpfx = '_'; break;
|
||||
case EM_METAG: reltype = R_METAG_ADDR32;
|
||||
altmcount = "_mcount_wrapper";
|
||||
|
@ -279,6 +279,11 @@ if ($arch eq "x86_64") {
|
||||
$mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_ARM_(CALL|PC24|THM_CALL)" .
|
||||
"\\s+(__gnu_mcount_nc|mcount)\$";
|
||||
|
||||
} elsif ($arch eq "arm64") {
|
||||
$alignment = 3;
|
||||
$section_type = '%progbits';
|
||||
$mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_AARCH64_CALL26\\s+_mcount\$";
|
||||
$type = ".quad";
|
||||
} elsif ($arch eq "ia64") {
|
||||
$mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
|
||||
$type = "data8";
|
||||
|
Loading…
Reference in New Issue
Block a user