diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 4121662dea5a..ccf55cef6ab9 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -183,6 +183,7 @@ extern const void *__pv_table_begin, *__pv_table_end; #define PHYS_OFFSET ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT) #define PHYS_PFN_OFFSET (__pv_phys_pfn_offset) +#ifndef CONFIG_THUMB2_KERNEL #define __pv_stub(from,to,instr) \ __asm__("@ __pv_stub\n" \ "1: " instr " %0, %1, %2\n" \ @@ -192,26 +193,46 @@ extern const void *__pv_table_begin, *__pv_table_end; : "=r" (to) \ : "r" (from), "I" (__PV_BITS_31_24)) -#define __pv_stub_mov_hi(t) \ - __asm__ volatile("@ __pv_stub_mov\n" \ - "1: mov %R0, %1\n" \ - " .pushsection .pv_table,\"a\"\n" \ - " .long 1b - .\n" \ - " .popsection\n" \ - : "=r" (t) \ - : "I" (__PV_BITS_7_0)) - #define __pv_add_carry_stub(x, y) \ - __asm__ volatile("@ __pv_add_carry_stub\n" \ - "1: adds %Q0, %1, %2\n" \ + __asm__("@ __pv_add_carry_stub\n" \ + "0: movw %R0, #0\n" \ + " adds %Q0, %1, %R0, lsl #24\n" \ + "1: mov %R0, %2\n" \ " adc %R0, %R0, #0\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b - .\n" \ + " .long 0b - ., 1b - .\n" \ " .popsection\n" \ - : "+r" (y) \ - : "r" (x), "I" (__PV_BITS_31_24) \ + : "=&r" (y) \ + : "r" (x), "I" (__PV_BITS_7_0) \ : "cc") +#else +#define __pv_stub(from,to,instr) \ + __asm__("@ __pv_stub\n" \ + "0: movw %0, #0\n" \ + " lsl %0, #24\n" \ + " " instr " %0, %1, %0\n" \ + " .pushsection .pv_table,\"a\"\n" \ + " .long 0b - .\n" \ + " .popsection\n" \ + : "=&r" (to) \ + : "r" (from)) + +#define __pv_add_carry_stub(x, y) \ + __asm__("@ __pv_add_carry_stub\n" \ + "0: movw %R0, #0\n" \ + " lsls %R0, #24\n" \ + " adds %Q0, %1, %R0\n" \ + "1: mvn %R0, #0\n" \ + " adc %R0, %R0, #0\n" \ + " .pushsection .pv_table,\"a\"\n" \ + " .long 0b - ., 1b - .\n" \ + " .popsection\n" \ + : "=&r" (y) \ + : "r" (x) \ + : "cc") +#endif + static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x) { phys_addr_t t; @@ -219,7 +240,6 @@ static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x) if (sizeof(phys_addr_t) == 4) { __pv_stub(x, t, "add"); } else { - __pv_stub_mov_hi(t); __pv_add_carry_stub(x, t); } return t; diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S index be8fb0d89877..a4e364689663 100644 --- a/arch/arm/kernel/phys2virt.S +++ b/arch/arm/kernel/phys2virt.S @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 1994-2002 Russell King - * Copyright (c) 2003 ARM Limited + * Copyright (c) 2003, 2020 ARM Limited * All Rights Reserved */ @@ -58,55 +58,140 @@ __fixup_a_pv_table: mov r6, r6, lsr #24 cmn r0, #1 #ifdef CONFIG_THUMB2_KERNEL + @ + @ The Thumb-2 versions of the patchable sequences are + @ + @ phys-to-virt: movw , #offset<31:24> + @ lsl , #24 + @ sub , , + @ + @ virt-to-phys (non-LPAE): movw , #offset<31:24> + @ lsl , #24 + @ add , , + @ + @ virt-to-phys (LPAE): movw , #offset<31:24> + @ lsl , #24 + @ adds , , + @ mov , #offset<39:32> + @ adc , , #0 + @ + @ In the non-LPAE case, all patchable instructions are MOVW + @ instructions, where we need to patch in the offset into the + @ second halfword of the opcode (the 16-bit immediate is encoded + @ as imm4:i:imm3:imm8) + @ + @ 15 11 10 9 4 3 0 15 14 12 11 8 7 0 + @ +-----------+---+-------------+------++---+------+----+------+ + @ MOVW | 1 1 1 1 0 | i | 1 0 0 1 0 0 | imm4 || 0 | imm3 | Rd | imm8 | + @ +-----------+---+-------------+------++---+------+----+------+ + @ + @ In the LPAE case, we also need to patch in the high word of the + @ offset into the immediate field of the MOV instruction, or patch it + @ to a MVN instruction if the offset is negative. In this case, we + @ need to inspect the first halfword of the opcode, to check whether + @ it is MOVW or MOV/MVN, and to perform the MOV to MVN patching if + @ needed. The encoding of the immediate is rather complex for values + @ of i:imm3 != 0b0000, but fortunately, we never need more than 8 lower + @ order bits, which can be patched into imm8 directly (and i:imm3 + @ cleared) + @ + @ 15 11 10 9 5 0 15 14 12 11 8 7 0 + @ +-----------+---+---------------------++---+------+----+------+ + @ MOV | 1 1 1 1 0 | i | 0 0 0 1 0 0 1 1 1 1 || 0 | imm3 | Rd | imm8 | + @ MVN | 1 1 1 1 0 | i | 0 0 0 1 1 0 1 1 1 1 || 0 | imm3 | Rd | imm8 | + @ +-----------+---+---------------------++---+------+----+------+ + @ moveq r0, #0x200000 @ set bit 21, mov to mvn instruction - lsls r6, #24 - beq .Lnext - clz r7, r6 - lsr r6, #24 - lsl r6, r7 - bic r6, #0x0080 - lsrs r7, #1 - orrcs r6, #0x0080 - orr r6, r6, r7, lsl #12 - orr r6, #0x4000 b .Lnext .Lloop: add r7, r4 - adds r4, #4 - ldrh ip, [r7, #2] -ARM_BE8(rev16 ip, ip) - tst ip, #0x4000 - and ip, #0x8f00 - orrne ip, r6 @ mask in offset bits 31-24 - orreq ip, r0 @ mask in offset bits 7-0 -ARM_BE8(rev16 ip, ip) - strh ip, [r7, #2] - bne .Lnext + adds r4, #4 @ clears Z flag +#ifdef CONFIG_ARM_LPAE ldrh ip, [r7] ARM_BE8(rev16 ip, ip) - bic ip, #0x20 - orr ip, ip, r0, lsr #16 + tst ip, #0x200 @ MOVW has bit 9 set, MVN has it clear + bne 0f @ skip to MOVW handling (Z flag is clear) + bic ip, #0x20 @ clear bit 5 (MVN -> MOV) + orr ip, ip, r0, lsr #16 @ MOV -> MVN if offset < 0 ARM_BE8(rev16 ip, ip) strh ip, [r7] + @ Z flag is set +0: +#endif + ldrh ip, [r7, #2] +ARM_BE8(rev16 ip, ip) + and ip, #0xf00 @ clear everything except Rd field + orreq ip, r0 @ Z flag set -> MOV/MVN -> patch in high bits + orrne ip, r6 @ Z flag clear -> MOVW -> patch in low bits +ARM_BE8(rev16 ip, ip) + strh ip, [r7, #2] #else #ifdef CONFIG_CPU_ENDIAN_BE8 @ in BE8, we load data in BE, but instructions still in LE -#define PV_BIT22 0x00004000 +#define PV_BIT24 0x00000001 #define PV_IMM8_MASK 0xff000000 -#define PV_ROT_MASK 0x000f0000 #else -#define PV_BIT22 0x00400000 +#define PV_BIT24 0x01000000 #define PV_IMM8_MASK 0x000000ff -#define PV_ROT_MASK 0xf00 #endif + @ + @ The ARM versions of the patchable sequences are + @ + @ phys-to-virt: sub , , #offset<31:24>, lsl #24 + @ + @ virt-to-phys (non-LPAE): add , , #offset<31:24>, lsl #24 + @ + @ virt-to-phys (LPAE): movw , #offset<31:24> + @ adds , , , lsl #24 + @ mov , #offset<39:32> + @ adc , , #0 + @ + @ In the non-LPAE case, all patchable instructions are ADD or SUB + @ instructions, where we need to patch in the offset into the + @ immediate field of the opcode, which is emitted with the correct + @ rotation value. (The effective value of the immediate is imm12<7:0> + @ rotated right by [2 * imm12<11:8>] bits) + @ + @ 31 28 27 23 22 20 19 16 15 12 11 0 + @ +------+-----------------+------+------+-------+ + @ ADD | cond | 0 0 1 0 1 0 0 0 | Rn | Rd | imm12 | + @ SUB | cond | 0 0 1 0 0 1 0 0 | Rn | Rd | imm12 | + @ MOV | cond | 0 0 1 1 1 0 1 0 | Rn | Rd | imm12 | + @ MVN | cond | 0 0 1 1 1 1 1 0 | Rn | Rd | imm12 | + @ +------+-----------------+------+------+-------+ + @ + @ In the LPAE case, we use a MOVW instruction to carry the low offset + @ word, and patch in the high word of the offset into the immediate + @ field of the subsequent MOV instruction, or patch it to a MVN + @ instruction if the offset is negative. We can distinguish MOVW + @ instructions based on bits 23:22 of the opcode, and ADD/SUB can be + @ distinguished from MOV/MVN (all using the encodings above) using + @ bit 24. + @ + @ 31 28 27 23 22 20 19 16 15 12 11 0 + @ +------+-----------------+------+------+-------+ + @ MOVW | cond | 0 0 1 1 0 0 0 0 | imm4 | Rd | imm12 | + @ +------+-----------------+------+------+-------+ + @ moveq r0, #0x400000 @ set bit 22, mov to mvn instruction b .Lnext .Lloop: ldr ip, [r7, r4] +#ifdef CONFIG_ARM_LPAE + tst ip, #PV_BIT24 @ ADD/SUB have bit 24 clear + beq 1f +ARM_BE8(rev ip, ip) + tst ip, #0xc00000 @ MOVW has bits 23:22 clear + bic ip, ip, #0x400000 @ clear bit 22 + bfc ip, #0, #12 @ clear imm12 field of MOV[W] instruction + orreq ip, ip, r6 @ MOVW -> mask in offset bits 31-24 + orrne ip, ip, r0 @ MOV -> mask in offset bits 7-0 (or bit 22) +ARM_BE8(rev ip, ip) + b 2f +1: +#endif bic ip, ip, #PV_IMM8_MASK - tst ip, #PV_ROT_MASK @ check the rotation field - orrne ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 - biceq ip, ip, #PV_BIT22 @ clear bit 22 - orreq ip, ip, r0 ARM_BE8(, ror #8) @ mask in offset bits 7-0 (or bit 22) + orr ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 +2: str ip, [r7, r4] add r4, r4, #4 #endif