KVM: s390: Add FAULT_FLAG_RETRY_NOWAIT for guest fault

In the case of a fault, we will retry to exit sie64 but with gmap fault
indication for this thread set. This makes it possible to handle async
page faults.

Based on a patch from Martin Schwidefsky.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
This commit is contained in:
Dominik Dingel 2013-06-17 16:25:18 +02:00 committed by Christian Borntraeger
parent a91b8ebe86
commit 24eb3a824c
4 changed files with 46 additions and 6 deletions

View File

@ -767,6 +767,7 @@ static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
* @table: pointer to the page directory * @table: pointer to the page directory
* @asce: address space control element for gmap page table * @asce: address space control element for gmap page table
* @crst_list: list of all crst tables used in the guest address space * @crst_list: list of all crst tables used in the guest address space
* @pfault_enabled: defines if pfaults are applicable for the guest
*/ */
struct gmap { struct gmap {
struct list_head list; struct list_head list;
@ -775,6 +776,7 @@ struct gmap {
unsigned long asce; unsigned long asce;
void *private; void *private;
struct list_head crst_list; struct list_head crst_list;
bool pfault_enabled;
}; };
/** /**

View File

@ -79,6 +79,7 @@ struct thread_struct {
unsigned long ksp; /* kernel stack pointer */ unsigned long ksp; /* kernel stack pointer */
mm_segment_t mm_segment; mm_segment_t mm_segment;
unsigned long gmap_addr; /* address of last gmap fault. */ unsigned long gmap_addr; /* address of last gmap fault. */
unsigned int gmap_pfault; /* signal of a pending guest pfault */
struct per_regs per_user; /* User specified PER registers */ struct per_regs per_user; /* User specified PER registers */
struct per_event per_event; /* Cause of the last PER trap */ struct per_event per_event; /* Cause of the last PER trap */
unsigned long per_flags; /* Flags to control debug behavior */ unsigned long per_flags; /* Flags to control debug behavior */

View File

@ -255,6 +255,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.gmap) if (!kvm->arch.gmap)
goto out_nogmap; goto out_nogmap;
kvm->arch.gmap->private = kvm; kvm->arch.gmap->private = kvm;
kvm->arch.gmap->pfault_enabled = 0;
} }
kvm->arch.css_support = 0; kvm->arch.css_support = 0;
@ -701,6 +702,17 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
return 0; return 0;
} }
static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
{
long rc;
hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
struct mm_struct *mm = current->mm;
down_read(&mm->mmap_sem);
rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL);
up_read(&mm->mmap_sem);
return rc;
}
static int vcpu_pre_run(struct kvm_vcpu *vcpu) static int vcpu_pre_run(struct kvm_vcpu *vcpu)
{ {
int rc, cpuflags; int rc, cpuflags;
@ -730,7 +742,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
{ {
int rc; int rc = -1;
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode); vcpu->arch.sie_block->icptcode);
@ -744,7 +756,14 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
current->thread.gmap_addr; current->thread.gmap_addr;
vcpu->run->s390_ucontrol.pgm_code = 0x10; vcpu->run->s390_ucontrol.pgm_code = 0x10;
rc = -EREMOTE; rc = -EREMOTE;
} else {
} else if (current->thread.gmap_pfault) {
current->thread.gmap_pfault = 0;
if (kvm_arch_fault_in_sync(vcpu) >= 0)
rc = 0;
}
if (rc == -1) {
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
trace_kvm_s390_sie_fault(vcpu); trace_kvm_s390_sie_fault(vcpu);
rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);

View File

@ -50,6 +50,7 @@
#define VM_FAULT_BADMAP 0x020000 #define VM_FAULT_BADMAP 0x020000
#define VM_FAULT_BADACCESS 0x040000 #define VM_FAULT_BADACCESS 0x040000
#define VM_FAULT_SIGNAL 0x080000 #define VM_FAULT_SIGNAL 0x080000
#define VM_FAULT_PFAULT 0x100000
static unsigned long store_indication __read_mostly; static unsigned long store_indication __read_mostly;
@ -227,6 +228,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
return; return;
} }
case VM_FAULT_BADCONTEXT: case VM_FAULT_BADCONTEXT:
case VM_FAULT_PFAULT:
do_no_context(regs); do_no_context(regs);
break; break;
case VM_FAULT_SIGNAL: case VM_FAULT_SIGNAL:
@ -264,6 +266,9 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
*/ */
static inline int do_exception(struct pt_regs *regs, int access) static inline int do_exception(struct pt_regs *regs, int access)
{ {
#ifdef CONFIG_PGSTE
struct gmap *gmap;
#endif
struct task_struct *tsk; struct task_struct *tsk;
struct mm_struct *mm; struct mm_struct *mm;
struct vm_area_struct *vma; struct vm_area_struct *vma;
@ -304,9 +309,10 @@ static inline int do_exception(struct pt_regs *regs, int access)
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
#ifdef CONFIG_PGSTE #ifdef CONFIG_PGSTE
if ((current->flags & PF_VCPU) && S390_lowcore.gmap) { gmap = (struct gmap *)
address = __gmap_fault(address, ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0);
(struct gmap *) S390_lowcore.gmap); if (gmap) {
address = __gmap_fault(address, gmap);
if (address == -EFAULT) { if (address == -EFAULT) {
fault = VM_FAULT_BADMAP; fault = VM_FAULT_BADMAP;
goto out_up; goto out_up;
@ -315,6 +321,8 @@ static inline int do_exception(struct pt_regs *regs, int access)
fault = VM_FAULT_OOM; fault = VM_FAULT_OOM;
goto out_up; goto out_up;
} }
if (gmap->pfault_enabled)
flags |= FAULT_FLAG_RETRY_NOWAIT;
} }
#endif #endif
@ -371,9 +379,19 @@ retry:
regs, address); regs, address);
} }
if (fault & VM_FAULT_RETRY) { if (fault & VM_FAULT_RETRY) {
#ifdef CONFIG_PGSTE
if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) {
/* FAULT_FLAG_RETRY_NOWAIT has been set,
* mmap_sem has not been released */
current->thread.gmap_pfault = 1;
fault = VM_FAULT_PFAULT;
goto out_up;
}
#endif
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */ * of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY; flags &= ~(FAULT_FLAG_ALLOW_RETRY |
FAULT_FLAG_RETRY_NOWAIT);
flags |= FAULT_FLAG_TRIED; flags |= FAULT_FLAG_TRIED;
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
goto retry; goto retry;