4bb3c7a020
POWER9 has hardware bugs relating to transactional memory and thread reconfiguration (changes to hardware SMT mode). Specifically, the core does not have enough storage to store a complete checkpoint of all the architected state for all four threads. The DD2.2 version of POWER9 includes hardware modifications designed to allow hypervisor software to implement workarounds for these problems. This patch implements those workarounds in KVM code so that KVM guests see a full, working transactional memory implementation. The problems center around the use of TM suspended state, where the CPU has a checkpointed state but execution is not transactional. The workaround is to implement a "fake suspend" state, which looks to the guest like suspended state but the CPU does not store a checkpoint. In this state, any instruction that would cause a transition to transactional state (rfid, rfebb, mtmsrd, tresume) or would use the checkpointed state (treclaim) causes a "soft patch" interrupt (vector 0x1500) to the hypervisor so that it can be emulated. The trechkpt instruction also causes a soft patch interrupt. On POWER9 DD2.2, we avoid returning to the guest in any state which would require a checkpoint to be present. The trechkpt in the guest entry path which would normally create that checkpoint is replaced by either a transition to fake suspend state, if the guest is in suspend state, or a rollback to the pre-transactional state if the guest is in transactional state. Fake suspend state is indicated by a flag in the PACA plus a new bit in the PSSCR. The new PSSCR bit is write-only and reads back as 0. On exit from the guest, if the guest is in fake suspend state, we still do the treclaim instruction as we would in real suspend state, in order to get into non-transactional state, but we do not save the resulting register state since there was no checkpoint. Emulation of the instructions that cause a softpatch interrupt is handled in two paths. If the guest is in real suspend mode, we call kvmhv_p9_tm_emulation_early() to handle the cases where the guest is transitioning to transactional state. This is called before we do the treclaim in the guest exit path; because we haven't done treclaim, we can get back to the guest with the transaction still active. If the instruction is a case that kvmhv_p9_tm_emulation_early() doesn't handle, or if the guest is in fake suspend state, then we proceed to do the complete guest exit path and subsequently call kvmhv_p9_tm_emulation() in host context with the MMU on. This handles all the cases including the cases that generate program interrupts (illegal instruction or TM Bad Thing) and facility unavailable interrupts. The emulation is reasonably straightforward and is mostly concerned with checking for exception conditions and updating the state of registers such as MSR and CR0. The treclaim emulation takes care to ensure that the TEXASR register gets updated as if it were the guest treclaim instruction that had done failure recording, not the treclaim done in hypervisor state in the guest exit path. With this, the KVM_CAP_PPC_HTM capability returns true (1) even if transactional memory is not available to host userspace. Signed-off-by: Paul Mackerras <paulus@ozlabs.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
376 lines
12 KiB
C
376 lines
12 KiB
C
/*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License, version 2, as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*
|
|
* Copyright SUSE Linux Products GmbH 2009
|
|
*
|
|
* Authors: Alexander Graf <agraf@suse.de>
|
|
*/
|
|
|
|
#ifndef __ASM_KVM_BOOK3S_H__
|
|
#define __ASM_KVM_BOOK3S_H__
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/kvm_host.h>
|
|
#include <asm/kvm_book3s_asm.h>
|
|
|
|
struct kvmppc_bat {
|
|
u64 raw;
|
|
u32 bepi;
|
|
u32 bepi_mask;
|
|
u32 brpn;
|
|
u8 wimg;
|
|
u8 pp;
|
|
bool vs : 1;
|
|
bool vp : 1;
|
|
};
|
|
|
|
struct kvmppc_sid_map {
|
|
u64 guest_vsid;
|
|
u64 guest_esid;
|
|
u64 host_vsid;
|
|
bool valid : 1;
|
|
};
|
|
|
|
#define SID_MAP_BITS 9
|
|
#define SID_MAP_NUM (1 << SID_MAP_BITS)
|
|
#define SID_MAP_MASK (SID_MAP_NUM - 1)
|
|
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
|
#define SID_CONTEXTS 1
|
|
#else
|
|
#define SID_CONTEXTS 128
|
|
#define VSID_POOL_SIZE (SID_CONTEXTS * 16)
|
|
#endif
|
|
|
|
struct hpte_cache {
|
|
struct hlist_node list_pte;
|
|
struct hlist_node list_pte_long;
|
|
struct hlist_node list_vpte;
|
|
struct hlist_node list_vpte_long;
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
|
struct hlist_node list_vpte_64k;
|
|
#endif
|
|
struct rcu_head rcu_head;
|
|
u64 host_vpn;
|
|
u64 pfn;
|
|
ulong slot;
|
|
struct kvmppc_pte pte;
|
|
int pagesize;
|
|
};
|
|
|
|
/*
|
|
* Struct for a virtual core.
|
|
* Note: entry_exit_map combines a bitmap of threads that have entered
|
|
* in the bottom 8 bits and a bitmap of threads that have exited in the
|
|
* next 8 bits. This is so that we can atomically set the entry bit
|
|
* iff the exit map is 0 without taking a lock.
|
|
*/
|
|
struct kvmppc_vcore {
|
|
int n_runnable;
|
|
int num_threads;
|
|
int entry_exit_map;
|
|
int napping_threads;
|
|
int first_vcpuid;
|
|
u16 pcpu;
|
|
u16 last_cpu;
|
|
u8 vcore_state;
|
|
u8 in_guest;
|
|
struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
|
|
struct list_head preempt_list;
|
|
spinlock_t lock;
|
|
struct swait_queue_head wq;
|
|
spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
|
|
u64 stolen_tb;
|
|
u64 preempt_tb;
|
|
struct kvm_vcpu *runner;
|
|
struct kvm *kvm;
|
|
u64 tb_offset; /* guest timebase - host timebase */
|
|
ulong lpcr;
|
|
u32 arch_compat;
|
|
ulong pcr;
|
|
ulong dpdes; /* doorbell state (POWER8) */
|
|
ulong vtb; /* virtual timebase */
|
|
ulong conferring_threads;
|
|
unsigned int halt_poll_ns;
|
|
};
|
|
|
|
struct kvmppc_vcpu_book3s {
|
|
struct kvmppc_sid_map sid_map[SID_MAP_NUM];
|
|
struct {
|
|
u64 esid;
|
|
u64 vsid;
|
|
} slb_shadow[64];
|
|
u8 slb_shadow_max;
|
|
struct kvmppc_bat ibat[8];
|
|
struct kvmppc_bat dbat[8];
|
|
u64 hid[6];
|
|
u64 gqr[8];
|
|
u64 sdr1;
|
|
u64 hior;
|
|
u64 msr_mask;
|
|
u64 vtb;
|
|
#ifdef CONFIG_PPC_BOOK3S_32
|
|
u32 vsid_pool[VSID_POOL_SIZE];
|
|
u32 vsid_next;
|
|
#else
|
|
u64 proto_vsid_first;
|
|
u64 proto_vsid_max;
|
|
u64 proto_vsid_next;
|
|
#endif
|
|
int context_id[SID_CONTEXTS];
|
|
|
|
bool hior_explicit; /* HIOR is set by ioctl, not PVR */
|
|
|
|
struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE];
|
|
struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG];
|
|
struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
|
|
struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG];
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
|
struct hlist_head hpte_hash_vpte_64k[HPTEG_HASH_NUM_VPTE_64K];
|
|
#endif
|
|
int hpte_cache_count;
|
|
spinlock_t mmu_lock;
|
|
};
|
|
|
|
#define VSID_REAL 0x07ffffffffc00000ULL
|
|
#define VSID_BAT 0x07ffffffffb00000ULL
|
|
#define VSID_64K 0x0800000000000000ULL
|
|
#define VSID_1T 0x1000000000000000ULL
|
|
#define VSID_REAL_DR 0x2000000000000000ULL
|
|
#define VSID_REAL_IR 0x4000000000000000ULL
|
|
#define VSID_PR 0x8000000000000000ULL
|
|
|
|
extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong ea, ulong ea_mask);
|
|
extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask);
|
|
extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end);
|
|
extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr);
|
|
extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu);
|
|
extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu);
|
|
extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu);
|
|
extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte,
|
|
bool iswrite);
|
|
extern void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
|
|
extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
|
|
extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size);
|
|
extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
|
|
extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run,
|
|
struct kvm_vcpu *vcpu, unsigned long addr,
|
|
unsigned long status);
|
|
extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr,
|
|
unsigned long slb_v, unsigned long valid);
|
|
extern int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
|
unsigned long gpa, gva_t ea, int is_store);
|
|
|
|
extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
|
|
extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
|
|
extern void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte);
|
|
extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu);
|
|
extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu);
|
|
extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
|
|
extern int kvmppc_mmu_hpte_sysinit(void);
|
|
extern void kvmppc_mmu_hpte_sysexit(void);
|
|
extern int kvmppc_mmu_hv_init(void);
|
|
extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
|
|
|
|
extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run,
|
|
struct kvm_vcpu *vcpu,
|
|
unsigned long ea, unsigned long dsisr);
|
|
extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
|
struct kvmppc_pte *gpte, bool data, bool iswrite);
|
|
extern int kvmppc_init_vm_radix(struct kvm *kvm);
|
|
extern void kvmppc_free_radix(struct kvm *kvm);
|
|
extern int kvmppc_radix_init(void);
|
|
extern void kvmppc_radix_exit(void);
|
|
extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
|
unsigned long gfn);
|
|
extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
|
unsigned long gfn);
|
|
extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
|
unsigned long gfn);
|
|
extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
|
|
struct kvm_memory_slot *memslot, unsigned long *map);
|
|
extern int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
|
|
|
|
/* XXX remove this export when load_last_inst() is generic */
|
|
extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
|
|
extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
|
|
extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
|
|
unsigned int vec);
|
|
extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags);
|
|
extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
|
|
bool upper, u32 val);
|
|
extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
|
|
extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
|
|
extern kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa,
|
|
bool writing, bool *writable);
|
|
extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
|
|
unsigned long *rmap, long pte_index, int realmode);
|
|
extern void kvmppc_update_dirty_map(struct kvm_memory_slot *memslot,
|
|
unsigned long gfn, unsigned long psize);
|
|
extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
|
|
unsigned long pte_index);
|
|
void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
|
|
unsigned long pte_index);
|
|
extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
|
|
unsigned long *nb_ret);
|
|
extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
|
|
unsigned long gpa, bool dirty);
|
|
extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
|
|
long pte_index, unsigned long pteh, unsigned long ptel,
|
|
pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
|
|
extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
|
|
unsigned long pte_index, unsigned long avpn,
|
|
unsigned long *hpret);
|
|
extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
|
|
struct kvm_memory_slot *memslot, unsigned long *map);
|
|
extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
|
|
struct kvm_memory_slot *memslot,
|
|
unsigned long *map);
|
|
extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
|
|
unsigned long mask);
|
|
extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);
|
|
|
|
extern int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu);
|
|
extern int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu);
|
|
extern void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu);
|
|
|
|
extern void kvmppc_entry_trampoline(void);
|
|
extern void kvmppc_hv_entry_trampoline(void);
|
|
extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
|
|
extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst);
|
|
extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
|
|
extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm);
|
|
extern int kvmppc_hcall_impl_pr(unsigned long cmd);
|
|
extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd);
|
|
extern void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu);
|
|
extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu);
|
|
extern int kvm_irq_bypass;
|
|
|
|
static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
|
|
{
|
|
return vcpu->arch.book3s;
|
|
}
|
|
|
|
/* Also add subarch specific defines */
|
|
|
|
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
|
|
#include <asm/kvm_book3s_32.h>
|
|
#endif
|
|
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
|
#include <asm/kvm_book3s_64.h>
|
|
#endif
|
|
|
|
static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
|
|
{
|
|
vcpu->arch.gpr[num] = val;
|
|
}
|
|
|
|
static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
|
|
{
|
|
return vcpu->arch.gpr[num];
|
|
}
|
|
|
|
static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
|
|
{
|
|
vcpu->arch.cr = val;
|
|
}
|
|
|
|
static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
|
|
{
|
|
return vcpu->arch.cr;
|
|
}
|
|
|
|
static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
|
|
{
|
|
vcpu->arch.xer = val;
|
|
}
|
|
|
|
static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
|
|
{
|
|
return vcpu->arch.xer;
|
|
}
|
|
|
|
static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
|
|
{
|
|
vcpu->arch.ctr = val;
|
|
}
|
|
|
|
static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
|
|
{
|
|
return vcpu->arch.ctr;
|
|
}
|
|
|
|
static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
|
|
{
|
|
vcpu->arch.lr = val;
|
|
}
|
|
|
|
static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
|
|
{
|
|
return vcpu->arch.lr;
|
|
}
|
|
|
|
static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
|
|
{
|
|
vcpu->arch.pc = val;
|
|
}
|
|
|
|
static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
|
|
{
|
|
return vcpu->arch.pc;
|
|
}
|
|
|
|
static inline u64 kvmppc_get_msr(struct kvm_vcpu *vcpu);
|
|
static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
|
|
{
|
|
return (kvmppc_get_msr(vcpu) & MSR_LE) != (MSR_KERNEL & MSR_LE);
|
|
}
|
|
|
|
static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
|
|
{
|
|
return vcpu->arch.fault_dar;
|
|
}
|
|
|
|
static inline bool is_kvmppc_resume_guest(int r)
|
|
{
|
|
return (r == RESUME_GUEST || r == RESUME_GUEST_NV);
|
|
}
|
|
|
|
static inline bool is_kvmppc_hv_enabled(struct kvm *kvm);
|
|
static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
|
|
{
|
|
/* Only PR KVM supports the magic page */
|
|
return !is_kvmppc_hv_enabled(vcpu->kvm);
|
|
}
|
|
|
|
extern int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu);
|
|
extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
|
|
|
|
/* Magic register values loaded into r3 and r4 before the 'sc' assembly
|
|
* instruction for the OSI hypercalls */
|
|
#define OSI_SC_MAGIC_R3 0x113724FA
|
|
#define OSI_SC_MAGIC_R4 0x77810F9B
|
|
|
|
#define INS_DCBZ 0x7c0007ec
|
|
/* TO = 31 for unconditional trap */
|
|
#define INS_TW 0x7fe00008
|
|
|
|
/* LPIDs we support with this build -- runtime limit may be lower */
|
|
#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
|
|
|
|
#define SPLIT_HACK_MASK 0xff000000
|
|
#define SPLIT_HACK_OFFS 0xfb000000
|
|
|
|
#endif /* __ASM_KVM_BOOK3S_H__ */
|