4bb3c7a020
POWER9 has hardware bugs relating to transactional memory and thread reconfiguration (changes to hardware SMT mode). Specifically, the core does not have enough storage to store a complete checkpoint of all the architected state for all four threads. The DD2.2 version of POWER9 includes hardware modifications designed to allow hypervisor software to implement workarounds for these problems. This patch implements those workarounds in KVM code so that KVM guests see a full, working transactional memory implementation. The problems center around the use of TM suspended state, where the CPU has a checkpointed state but execution is not transactional. The workaround is to implement a "fake suspend" state, which looks to the guest like suspended state but the CPU does not store a checkpoint. In this state, any instruction that would cause a transition to transactional state (rfid, rfebb, mtmsrd, tresume) or would use the checkpointed state (treclaim) causes a "soft patch" interrupt (vector 0x1500) to the hypervisor so that it can be emulated. The trechkpt instruction also causes a soft patch interrupt. On POWER9 DD2.2, we avoid returning to the guest in any state which would require a checkpoint to be present. The trechkpt in the guest entry path which would normally create that checkpoint is replaced by either a transition to fake suspend state, if the guest is in suspend state, or a rollback to the pre-transactional state if the guest is in transactional state. Fake suspend state is indicated by a flag in the PACA plus a new bit in the PSSCR. The new PSSCR bit is write-only and reads back as 0. On exit from the guest, if the guest is in fake suspend state, we still do the treclaim instruction as we would in real suspend state, in order to get into non-transactional state, but we do not save the resulting register state since there was no checkpoint. Emulation of the instructions that cause a softpatch interrupt is handled in two paths. If the guest is in real suspend mode, we call kvmhv_p9_tm_emulation_early() to handle the cases where the guest is transitioning to transactional state. This is called before we do the treclaim in the guest exit path; because we haven't done treclaim, we can get back to the guest with the transaction still active. If the instruction is a case that kvmhv_p9_tm_emulation_early() doesn't handle, or if the guest is in fake suspend state, then we proceed to do the complete guest exit path and subsequently call kvmhv_p9_tm_emulation() in host context with the MMU on. This handles all the cases including the cases that generate program interrupts (illegal instruction or TM Bad Thing) and facility unavailable interrupts. The emulation is reasonably straightforward and is mostly concerned with checking for exception conditions and updating the state of registers such as MSR and CR0. The treclaim emulation takes care to ensure that the TEXASR register gets updated as if it were the guest treclaim instruction that had done failure recording, not the treclaim done in hypervisor state in the guest exit path. With this, the KVM_CAP_PPC_HTM capability returns true (1) even if transactional memory is not available to host userspace. Signed-off-by: Paul Mackerras <paulus@ozlabs.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
183 lines
4.4 KiB
C
183 lines
4.4 KiB
C
/*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License, version 2, as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*
|
|
* Copyright SUSE Linux Products GmbH 2009
|
|
*
|
|
* Authors: Alexander Graf <agraf@suse.de>
|
|
*/
|
|
|
|
#ifndef __ASM_KVM_BOOK3S_ASM_H__
|
|
#define __ASM_KVM_BOOK3S_ASM_H__
|
|
|
|
/* XICS ICP register offsets */
|
|
#define XICS_XIRR 4
|
|
#define XICS_MFRR 0xc
|
|
#define XICS_IPI 2 /* interrupt source # for IPIs */
|
|
|
|
/* Maximum number of threads per physical core */
|
|
#define MAX_SMT_THREADS 8
|
|
|
|
/* Maximum number of subcores per physical core */
|
|
#define MAX_SUBCORES 4
|
|
|
|
#ifdef __ASSEMBLY__
|
|
|
|
#ifdef CONFIG_KVM_BOOK3S_HANDLER
|
|
|
|
#include <asm/kvm_asm.h>
|
|
|
|
.macro DO_KVM intno
|
|
.if (\intno == BOOK3S_INTERRUPT_SYSTEM_RESET) || \
|
|
(\intno == BOOK3S_INTERRUPT_MACHINE_CHECK) || \
|
|
(\intno == BOOK3S_INTERRUPT_DATA_STORAGE) || \
|
|
(\intno == BOOK3S_INTERRUPT_INST_STORAGE) || \
|
|
(\intno == BOOK3S_INTERRUPT_DATA_SEGMENT) || \
|
|
(\intno == BOOK3S_INTERRUPT_INST_SEGMENT) || \
|
|
(\intno == BOOK3S_INTERRUPT_EXTERNAL) || \
|
|
(\intno == BOOK3S_INTERRUPT_EXTERNAL_HV) || \
|
|
(\intno == BOOK3S_INTERRUPT_ALIGNMENT) || \
|
|
(\intno == BOOK3S_INTERRUPT_PROGRAM) || \
|
|
(\intno == BOOK3S_INTERRUPT_FP_UNAVAIL) || \
|
|
(\intno == BOOK3S_INTERRUPT_DECREMENTER) || \
|
|
(\intno == BOOK3S_INTERRUPT_SYSCALL) || \
|
|
(\intno == BOOK3S_INTERRUPT_TRACE) || \
|
|
(\intno == BOOK3S_INTERRUPT_PERFMON) || \
|
|
(\intno == BOOK3S_INTERRUPT_ALTIVEC) || \
|
|
(\intno == BOOK3S_INTERRUPT_VSX)
|
|
|
|
b kvmppc_trampoline_\intno
|
|
kvmppc_resume_\intno:
|
|
|
|
.endif
|
|
.endm
|
|
|
|
#else
|
|
|
|
.macro DO_KVM intno
|
|
.endm
|
|
|
|
#endif /* CONFIG_KVM_BOOK3S_HANDLER */
|
|
|
|
#else /*__ASSEMBLY__ */
|
|
|
|
struct kvmppc_vcore;
|
|
|
|
/* Struct used for coordinating micro-threading (split-core) mode changes */
|
|
struct kvm_split_mode {
|
|
unsigned long rpr;
|
|
unsigned long pmmar;
|
|
unsigned long ldbar;
|
|
u8 subcore_size;
|
|
u8 do_nap;
|
|
u8 napped[MAX_SMT_THREADS];
|
|
struct kvmppc_vcore *vc[MAX_SUBCORES];
|
|
/* Bits for changing lpcr on P9 */
|
|
unsigned long lpcr_req;
|
|
unsigned long lpidr_req;
|
|
unsigned long host_lpcr;
|
|
u32 do_set;
|
|
u32 do_restore;
|
|
union {
|
|
u32 allphases;
|
|
u8 phase[4];
|
|
} lpcr_sync;
|
|
};
|
|
|
|
/*
|
|
* This struct goes in the PACA on 64-bit processors. It is used
|
|
* to store host state that needs to be saved when we enter a guest
|
|
* and restored when we exit, but isn't specific to any particular
|
|
* guest or vcpu. It also has some scratch fields used by the guest
|
|
* exit code.
|
|
*/
|
|
struct kvmppc_host_state {
|
|
ulong host_r1;
|
|
ulong host_r2;
|
|
ulong host_msr;
|
|
ulong vmhandler;
|
|
ulong scratch0;
|
|
ulong scratch1;
|
|
ulong scratch2;
|
|
u8 in_guest;
|
|
u8 restore_hid5;
|
|
u8 napping;
|
|
|
|
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
|
u8 hwthread_req;
|
|
u8 hwthread_state;
|
|
u8 host_ipi;
|
|
u8 ptid; /* thread number within subcore when split */
|
|
u8 tid; /* thread number within whole core */
|
|
u8 fake_suspend;
|
|
struct kvm_vcpu *kvm_vcpu;
|
|
struct kvmppc_vcore *kvm_vcore;
|
|
void __iomem *xics_phys;
|
|
void __iomem *xive_tima_phys;
|
|
void __iomem *xive_tima_virt;
|
|
u32 saved_xirr;
|
|
u64 dabr;
|
|
u64 host_mmcr[7]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */
|
|
u32 host_pmc[8];
|
|
u64 host_purr;
|
|
u64 host_spurr;
|
|
u64 host_dscr;
|
|
u64 dec_expires;
|
|
struct kvm_split_mode *kvm_split_mode;
|
|
#endif
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
|
u64 cfar;
|
|
u64 ppr;
|
|
u64 host_fscr;
|
|
#endif
|
|
};
|
|
|
|
struct kvmppc_book3s_shadow_vcpu {
|
|
bool in_use;
|
|
ulong gpr[14];
|
|
u32 cr;
|
|
ulong xer;
|
|
ulong ctr;
|
|
ulong lr;
|
|
ulong pc;
|
|
|
|
ulong shadow_srr1;
|
|
ulong fault_dar;
|
|
u32 fault_dsisr;
|
|
u32 last_inst;
|
|
|
|
#ifdef CONFIG_PPC_BOOK3S_32
|
|
u32 sr[16]; /* Guest SRs */
|
|
|
|
struct kvmppc_host_state hstate;
|
|
#endif
|
|
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
|
u8 slb_max; /* highest used guest slb entry */
|
|
struct {
|
|
u64 esid;
|
|
u64 vsid;
|
|
} slb[64]; /* guest SLB */
|
|
u64 shadow_fscr;
|
|
#endif
|
|
};
|
|
|
|
#endif /*__ASSEMBLY__ */
|
|
|
|
/* Values for kvm_state */
|
|
#define KVM_HWTHREAD_IN_KERNEL 0
|
|
#define KVM_HWTHREAD_IN_IDLE 1
|
|
#define KVM_HWTHREAD_IN_KVM 2
|
|
|
|
#endif /* __ASM_KVM_BOOK3S_ASM_H__ */
|