linux/arch/x86/lib/insn-eval.c
Ricardo Neri 536b815388 x86/insn-eval: Add utility function to identify string instructions
String instructions are special because, in protected mode, the linear
address is always obtained via the ES segment register in operands that
use the (E)DI register; the DS segment register in operands that use
the (E)SI register. Furthermore, segment override prefixes are ignored
when calculating a linear address involving the (E)DI register; segment
override prefixes can be used when calculating linear addresses involving
the (E)SI register.

It follows that linear addresses are calculated differently for the case of
string instructions. The purpose of this utility function is to identify
such instructions for callers to determine a linear address correctly.

Note that this function only identifies string instructions; it does not
determine what segment register to use in the address computation. That is
left to callers. A subsequent commmit introduces a function to determine
the segment register to use given the instruction, operands and
segment override prefixes.

Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: ricardo.neri@intel.com
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Huang Rui <ray.huang@amd.com>
Cc: Qiaowei Ren <qiaowei.ren@intel.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Colin Ian King <colin.king@canonical.com>
Cc: Chen Yucong <slaoub@gmail.com>
Cc: Adam Buchbinder <adam.buchbinder@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Garnier <thgarnie@google.com>
Link: https://lkml.kernel.org/r/1509135945-13762-13-git-send-email-ricardo.neri-calderon@linux.intel.com
2017-11-01 21:50:11 +01:00

212 lines
5.3 KiB
C

/*
* Utility functions for x86 operand and address decoding
*
* Copyright (C) Intel Corporation 2017
*/
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/ratelimit.h>
#include <asm/inat.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
#undef pr_fmt
#define pr_fmt(fmt) "insn: " fmt
enum reg_type {
REG_TYPE_RM = 0,
REG_TYPE_INDEX,
REG_TYPE_BASE,
};
/**
* is_string_insn() - Determine if instruction is a string instruction
* @insn: Instruction containing the opcode to inspect
*
* Returns:
*
* true if the instruction, determined by the opcode, is any of the
* string instructions as defined in the Intel Software Development manual.
* False otherwise.
*/
static bool is_string_insn(struct insn *insn)
{
insn_get_opcode(insn);
/* All string instructions have a 1-byte opcode. */
if (insn->opcode.nbytes != 1)
return false;
switch (insn->opcode.bytes[0]) {
case 0x6c ... 0x6f: /* INS, OUTS */
case 0xa4 ... 0xa7: /* MOVS, CMPS */
case 0xaa ... 0xaf: /* STOS, LODS, SCAS */
return true;
default:
return false;
}
}
static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
enum reg_type type)
{
int regno = 0;
static const int regoff[] = {
offsetof(struct pt_regs, ax),
offsetof(struct pt_regs, cx),
offsetof(struct pt_regs, dx),
offsetof(struct pt_regs, bx),
offsetof(struct pt_regs, sp),
offsetof(struct pt_regs, bp),
offsetof(struct pt_regs, si),
offsetof(struct pt_regs, di),
#ifdef CONFIG_X86_64
offsetof(struct pt_regs, r8),
offsetof(struct pt_regs, r9),
offsetof(struct pt_regs, r10),
offsetof(struct pt_regs, r11),
offsetof(struct pt_regs, r12),
offsetof(struct pt_regs, r13),
offsetof(struct pt_regs, r14),
offsetof(struct pt_regs, r15),
#endif
};
int nr_registers = ARRAY_SIZE(regoff);
/*
* Don't possibly decode a 32-bit instructions as
* reading a 64-bit-only register.
*/
if (IS_ENABLED(CONFIG_X86_64) && !insn->x86_64)
nr_registers -= 8;
switch (type) {
case REG_TYPE_RM:
regno = X86_MODRM_RM(insn->modrm.value);
if (X86_REX_B(insn->rex_prefix.value))
regno += 8;
break;
case REG_TYPE_INDEX:
regno = X86_SIB_INDEX(insn->sib.value);
if (X86_REX_X(insn->rex_prefix.value))
regno += 8;
/*
* If ModRM.mod != 3 and SIB.index = 4 the scale*index
* portion of the address computation is null. This is
* true only if REX.X is 0. In such a case, the SIB index
* is used in the address computation.
*/
if (X86_MODRM_MOD(insn->modrm.value) != 3 && regno == 4)
return -EDOM;
break;
case REG_TYPE_BASE:
regno = X86_SIB_BASE(insn->sib.value);
/*
* If ModRM.mod is 0 and SIB.base == 5, the base of the
* register-indirect addressing is 0. In this case, a
* 32-bit displacement follows the SIB byte.
*/
if (!X86_MODRM_MOD(insn->modrm.value) && regno == 5)
return -EDOM;
if (X86_REX_B(insn->rex_prefix.value))
regno += 8;
break;
default:
pr_err_ratelimited("invalid register type: %d\n", type);
return -EINVAL;
}
if (regno >= nr_registers) {
WARN_ONCE(1, "decoded an instruction with an invalid register");
return -EINVAL;
}
return regoff[regno];
}
/**
* insn_get_modrm_rm_off() - Obtain register in r/m part of the ModRM byte
* @insn: Instruction containing the ModRM byte
* @regs: Register values as seen when entering kernel mode
*
* Returns:
*
* The register indicated by the r/m part of the ModRM byte. The
* register is obtained as an offset from the base of pt_regs. In specific
* cases, the returned value can be -EDOM to indicate that the particular value
* of ModRM does not refer to a register and shall be ignored.
*/
int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs)
{
return get_reg_offset(insn, regs, REG_TYPE_RM);
}
/*
* return the address being referenced be instruction
* for rm=3 returning the content of the rm reg
* for rm!=3 calculates the address using SIB and Disp
*/
void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
{
int addr_offset, base_offset, indx_offset;
unsigned long linear_addr = -1L;
long eff_addr, base, indx;
insn_byte_t sib;
insn_get_modrm(insn);
insn_get_sib(insn);
sib = insn->sib.value;
if (X86_MODRM_MOD(insn->modrm.value) == 3) {
addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM);
if (addr_offset < 0)
goto out;
eff_addr = regs_get_register(regs, addr_offset);
} else {
if (insn->sib.nbytes) {
/*
* Negative values in the base and index offset means
* an error when decoding the SIB byte. Except -EDOM,
* which means that the registers should not be used
* in the address computation.
*/
base_offset = get_reg_offset(insn, regs, REG_TYPE_BASE);
if (base_offset == -EDOM)
base = 0;
else if (base_offset < 0)
goto out;
else
base = regs_get_register(regs, base_offset);
indx_offset = get_reg_offset(insn, regs, REG_TYPE_INDEX);
if (indx_offset == -EDOM)
indx = 0;
else if (indx_offset < 0)
goto out;
else
indx = regs_get_register(regs, indx_offset);
eff_addr = base + indx * (1 << X86_SIB_SCALE(sib));
} else {
addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM);
if (addr_offset < 0)
goto out;
eff_addr = regs_get_register(regs, addr_offset);
}
eff_addr += insn->displacement.value;
}
linear_addr = (unsigned long)eff_addr;
out:
return (void __user *)linear_addr;
}