KVM: Intelligent device lookup on I/O bus
Currently the method of dealing with an IO operation on a bus (PIO/MMIO) is to call the read or write callback for each device registered on the bus until we find a device which handles it. Since the number of devices on a bus can be significant due to ioeventfds and coalesced MMIO zones, this leads to a lot of overhead on each IO operation. Instead of registering devices, we now register ranges which points to a device. Lookup is done using an efficient bsearch instead of a linear search. Performance test was conducted by comparing exit count per second with 200 ioeventfds created on one byte and the guest is trying to access a different byte continuously (triggering usermode exits). Before the patch the guest has achieved 259k exits per second, after the patch the guest does 274k exits per second. Cc: Avi Kivity <avi@redhat.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Sasha Levin <levinsasha928@gmail.com> Signed-off-by: Avi Kivity <avi@redhat.com>
This commit is contained in:
		
							parent
							
								
									0d460ffc09
								
							
						
					
					
						commit
						743eeb0b01
					
				| @ -713,14 +713,16 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | ||||
| 	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); | ||||
| 
 | ||||
| 	kvm_iodevice_init(&pit->dev, &pit_dev_ops); | ||||
| 	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev); | ||||
| 	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS, | ||||
| 				      KVM_PIT_MEM_LENGTH, &pit->dev); | ||||
| 	if (ret < 0) | ||||
| 		goto fail; | ||||
| 
 | ||||
| 	if (flags & KVM_PIT_SPEAKER_DUMMY) { | ||||
| 		kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); | ||||
| 		ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, | ||||
| 						&pit->speaker_dev); | ||||
| 					      KVM_SPEAKER_BASE_ADDRESS, 4, | ||||
| 					      &pit->speaker_dev); | ||||
| 		if (ret < 0) | ||||
| 			goto fail_unregister; | ||||
| 	} | ||||
|  | ||||
| @ -459,15 +459,9 @@ static int picdev_in_range(gpa_t addr) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static inline struct kvm_pic *to_pic(struct kvm_io_device *dev) | ||||
| { | ||||
| 	return container_of(dev, struct kvm_pic, dev); | ||||
| } | ||||
| 
 | ||||
| static int picdev_write(struct kvm_io_device *this, | ||||
| static int picdev_write(struct kvm_pic *s, | ||||
| 			 gpa_t addr, int len, const void *val) | ||||
| { | ||||
| 	struct kvm_pic *s = to_pic(this); | ||||
| 	unsigned char data = *(unsigned char *)val; | ||||
| 	if (!picdev_in_range(addr)) | ||||
| 		return -EOPNOTSUPP; | ||||
| @ -494,10 +488,9 @@ static int picdev_write(struct kvm_io_device *this, | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int picdev_read(struct kvm_io_device *this, | ||||
| static int picdev_read(struct kvm_pic *s, | ||||
| 		       gpa_t addr, int len, void *val) | ||||
| { | ||||
| 	struct kvm_pic *s = to_pic(this); | ||||
| 	unsigned char data = 0; | ||||
| 	if (!picdev_in_range(addr)) | ||||
| 		return -EOPNOTSUPP; | ||||
| @ -525,6 +518,48 @@ static int picdev_read(struct kvm_io_device *this, | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int picdev_master_write(struct kvm_io_device *dev, | ||||
| 			       gpa_t addr, int len, const void *val) | ||||
| { | ||||
| 	return picdev_write(container_of(dev, struct kvm_pic, dev_master), | ||||
| 			    addr, len, val); | ||||
| } | ||||
| 
 | ||||
| static int picdev_master_read(struct kvm_io_device *dev, | ||||
| 			      gpa_t addr, int len, void *val) | ||||
| { | ||||
| 	return picdev_read(container_of(dev, struct kvm_pic, dev_master), | ||||
| 			    addr, len, val); | ||||
| } | ||||
| 
 | ||||
| static int picdev_slave_write(struct kvm_io_device *dev, | ||||
| 			      gpa_t addr, int len, const void *val) | ||||
| { | ||||
| 	return picdev_write(container_of(dev, struct kvm_pic, dev_slave), | ||||
| 			    addr, len, val); | ||||
| } | ||||
| 
 | ||||
| static int picdev_slave_read(struct kvm_io_device *dev, | ||||
| 			     gpa_t addr, int len, void *val) | ||||
| { | ||||
| 	return picdev_read(container_of(dev, struct kvm_pic, dev_slave), | ||||
| 			    addr, len, val); | ||||
| } | ||||
| 
 | ||||
| static int picdev_eclr_write(struct kvm_io_device *dev, | ||||
| 			     gpa_t addr, int len, const void *val) | ||||
| { | ||||
| 	return picdev_write(container_of(dev, struct kvm_pic, dev_eclr), | ||||
| 			    addr, len, val); | ||||
| } | ||||
| 
 | ||||
| static int picdev_eclr_read(struct kvm_io_device *dev, | ||||
| 			    gpa_t addr, int len, void *val) | ||||
| { | ||||
| 	return picdev_read(container_of(dev, struct kvm_pic, dev_eclr), | ||||
| 			    addr, len, val); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * callback when PIC0 irq status changed | ||||
|  */ | ||||
| @ -537,9 +572,19 @@ static void pic_irq_request(struct kvm *kvm, int level) | ||||
| 	s->output = level; | ||||
| } | ||||
| 
 | ||||
| static const struct kvm_io_device_ops picdev_ops = { | ||||
| 	.read     = picdev_read, | ||||
| 	.write    = picdev_write, | ||||
| static const struct kvm_io_device_ops picdev_master_ops = { | ||||
| 	.read     = picdev_master_read, | ||||
| 	.write    = picdev_master_write, | ||||
| }; | ||||
| 
 | ||||
| static const struct kvm_io_device_ops picdev_slave_ops = { | ||||
| 	.read     = picdev_slave_read, | ||||
| 	.write    = picdev_slave_write, | ||||
| }; | ||||
| 
 | ||||
| static const struct kvm_io_device_ops picdev_eclr_ops = { | ||||
| 	.read     = picdev_eclr_read, | ||||
| 	.write    = picdev_eclr_write, | ||||
| }; | ||||
| 
 | ||||
| struct kvm_pic *kvm_create_pic(struct kvm *kvm) | ||||
| @ -560,16 +605,39 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | ||||
| 	/*
 | ||||
| 	 * Initialize PIO device | ||||
| 	 */ | ||||
| 	kvm_iodevice_init(&s->dev, &picdev_ops); | ||||
| 	kvm_iodevice_init(&s->dev_master, &picdev_master_ops); | ||||
| 	kvm_iodevice_init(&s->dev_slave, &picdev_slave_ops); | ||||
| 	kvm_iodevice_init(&s->dev_eclr, &picdev_eclr_ops); | ||||
| 	mutex_lock(&kvm->slots_lock); | ||||
| 	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev); | ||||
| 	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2, | ||||
| 				      &s->dev_master); | ||||
| 	if (ret < 0) | ||||
| 		goto fail_unlock; | ||||
| 
 | ||||
| 	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2, &s->dev_slave); | ||||
| 	if (ret < 0) | ||||
| 		goto fail_unreg_2; | ||||
| 
 | ||||
| 	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2, &s->dev_eclr); | ||||
| 	if (ret < 0) | ||||
| 		goto fail_unreg_1; | ||||
| 
 | ||||
| 	mutex_unlock(&kvm->slots_lock); | ||||
| 	if (ret < 0) { | ||||
| 		kfree(s); | ||||
| 		return NULL; | ||||
| 	} | ||||
| 
 | ||||
| 	return s; | ||||
| 
 | ||||
| fail_unreg_1: | ||||
| 	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_slave); | ||||
| 
 | ||||
| fail_unreg_2: | ||||
| 	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_master); | ||||
| 
 | ||||
| fail_unlock: | ||||
| 	mutex_unlock(&kvm->slots_lock); | ||||
| 
 | ||||
| 	kfree(s); | ||||
| 
 | ||||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| void kvm_destroy_pic(struct kvm *kvm) | ||||
| @ -577,7 +645,9 @@ void kvm_destroy_pic(struct kvm *kvm) | ||||
| 	struct kvm_pic *vpic = kvm->arch.vpic; | ||||
| 
 | ||||
| 	if (vpic) { | ||||
| 		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev); | ||||
| 		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_master); | ||||
| 		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_slave); | ||||
| 		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_eclr); | ||||
| 		kvm->arch.vpic = NULL; | ||||
| 		kfree(vpic); | ||||
| 	} | ||||
|  | ||||
| @ -66,7 +66,9 @@ struct kvm_pic { | ||||
| 	struct kvm *kvm; | ||||
| 	struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | ||||
| 	int output;		/* intr from master PIC */ | ||||
| 	struct kvm_io_device dev; | ||||
| 	struct kvm_io_device dev_master; | ||||
| 	struct kvm_io_device dev_slave; | ||||
| 	struct kvm_io_device dev_eclr; | ||||
| 	void (*ack_notifier)(void *opaque, int irq); | ||||
| 	unsigned long irq_states[16]; | ||||
| }; | ||||
|  | ||||
| @ -3562,7 +3562,11 @@ long kvm_arch_vm_ioctl(struct file *filp, | ||||
| 			if (r) { | ||||
| 				mutex_lock(&kvm->slots_lock); | ||||
| 				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | ||||
| 							  &vpic->dev); | ||||
| 							  &vpic->dev_master); | ||||
| 				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | ||||
| 							  &vpic->dev_slave); | ||||
| 				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | ||||
| 							  &vpic->dev_eclr); | ||||
| 				mutex_unlock(&kvm->slots_lock); | ||||
| 				kfree(vpic); | ||||
| 				goto create_irqchip_unlock; | ||||
|  | ||||
| @ -55,16 +55,16 @@ struct kvm; | ||||
| struct kvm_vcpu; | ||||
| extern struct kmem_cache *kvm_vcpu_cache; | ||||
| 
 | ||||
| /*
 | ||||
|  * It would be nice to use something smarter than a linear search, TBD... | ||||
|  * Thankfully we dont expect many devices to register (famous last words :), | ||||
|  * so until then it will suffice.  At least its abstracted so we can change | ||||
|  * in one place. | ||||
|  */ | ||||
| struct kvm_io_range { | ||||
| 	gpa_t addr; | ||||
| 	int len; | ||||
| 	struct kvm_io_device *dev; | ||||
| }; | ||||
| 
 | ||||
| struct kvm_io_bus { | ||||
| 	int                   dev_count; | ||||
| #define NR_IOBUS_DEVS 300 | ||||
| 	struct kvm_io_device *devs[NR_IOBUS_DEVS]; | ||||
| 	struct kvm_io_range range[NR_IOBUS_DEVS]; | ||||
| }; | ||||
| 
 | ||||
| enum kvm_bus { | ||||
| @ -77,8 +77,8 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | ||||
| 		     int len, const void *val); | ||||
| int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, | ||||
| 		    void *val); | ||||
| int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, | ||||
| 			    struct kvm_io_device *dev); | ||||
| int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | ||||
| 			    int len, struct kvm_io_device *dev); | ||||
| int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | ||||
| 			      struct kvm_io_device *dev); | ||||
| 
 | ||||
|  | ||||
| @ -141,7 +141,8 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | ||||
| 	dev->zone = *zone; | ||||
| 
 | ||||
| 	mutex_lock(&kvm->slots_lock); | ||||
| 	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev); | ||||
| 	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr, | ||||
| 				      zone->size, &dev->dev); | ||||
| 	if (ret < 0) | ||||
| 		goto out_free_dev; | ||||
| 	list_add_tail(&dev->list, &kvm->coalesced_zones); | ||||
|  | ||||
| @ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | ||||
| 
 | ||||
| 	kvm_iodevice_init(&p->dev, &ioeventfd_ops); | ||||
| 
 | ||||
| 	ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev); | ||||
| 	ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length, | ||||
| 				      &p->dev); | ||||
| 	if (ret < 0) | ||||
| 		goto unlock_fail; | ||||
| 
 | ||||
|  | ||||
| @ -394,7 +394,8 @@ int kvm_ioapic_init(struct kvm *kvm) | ||||
| 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); | ||||
| 	ioapic->kvm = kvm; | ||||
| 	mutex_lock(&kvm->slots_lock); | ||||
| 	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); | ||||
| 	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address, | ||||
| 				      IOAPIC_MEM_LENGTH, &ioapic->dev); | ||||
| 	mutex_unlock(&kvm->slots_lock); | ||||
| 	if (ret < 0) { | ||||
| 		kvm->arch.vioapic = NULL; | ||||
|  | ||||
| @ -47,6 +47,8 @@ | ||||
| #include <linux/srcu.h> | ||||
| #include <linux/hugetlb.h> | ||||
| #include <linux/slab.h> | ||||
| #include <linux/sort.h> | ||||
| #include <linux/bsearch.h> | ||||
| 
 | ||||
| #include <asm/processor.h> | ||||
| #include <asm/io.h> | ||||
| @ -2391,24 +2393,92 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) | ||||
| 	int i; | ||||
| 
 | ||||
| 	for (i = 0; i < bus->dev_count; i++) { | ||||
| 		struct kvm_io_device *pos = bus->devs[i]; | ||||
| 		struct kvm_io_device *pos = bus->range[i].dev; | ||||
| 
 | ||||
| 		kvm_iodevice_destructor(pos); | ||||
| 	} | ||||
| 	kfree(bus); | ||||
| } | ||||
| 
 | ||||
| int kvm_io_bus_sort_cmp(const void *p1, const void *p2) | ||||
| { | ||||
| 	const struct kvm_io_range *r1 = p1; | ||||
| 	const struct kvm_io_range *r2 = p2; | ||||
| 
 | ||||
| 	if (r1->addr < r2->addr) | ||||
| 		return -1; | ||||
| 	if (r1->addr + r1->len > r2->addr + r2->len) | ||||
| 		return 1; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, | ||||
| 			  gpa_t addr, int len) | ||||
| { | ||||
| 	if (bus->dev_count == NR_IOBUS_DEVS) | ||||
| 		return -ENOSPC; | ||||
| 
 | ||||
| 	bus->range[bus->dev_count++] = (struct kvm_io_range) { | ||||
| 		.addr = addr, | ||||
| 		.len = len, | ||||
| 		.dev = dev, | ||||
| 	}; | ||||
| 
 | ||||
| 	sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range), | ||||
| 		kvm_io_bus_sort_cmp, NULL); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, | ||||
| 			     gpa_t addr, int len) | ||||
| { | ||||
| 	struct kvm_io_range *range, key; | ||||
| 	int off; | ||||
| 
 | ||||
| 	key = (struct kvm_io_range) { | ||||
| 		.addr = addr, | ||||
| 		.len = len, | ||||
| 	}; | ||||
| 
 | ||||
| 	range = bsearch(&key, bus->range, bus->dev_count, | ||||
| 			sizeof(struct kvm_io_range), kvm_io_bus_sort_cmp); | ||||
| 	if (range == NULL) | ||||
| 		return -ENOENT; | ||||
| 
 | ||||
| 	off = range - bus->range; | ||||
| 
 | ||||
| 	while (off > 0 && kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0) | ||||
| 		off--; | ||||
| 
 | ||||
| 	return off; | ||||
| } | ||||
| 
 | ||||
| /* kvm_io_bus_write - called under kvm->slots_lock */ | ||||
| int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | ||||
| 		     int len, const void *val) | ||||
| { | ||||
| 	int i; | ||||
| 	int idx; | ||||
| 	struct kvm_io_bus *bus; | ||||
| 	struct kvm_io_range range; | ||||
| 
 | ||||
| 	range = (struct kvm_io_range) { | ||||
| 		.addr = addr, | ||||
| 		.len = len, | ||||
| 	}; | ||||
| 
 | ||||
| 	bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | ||||
| 	for (i = 0; i < bus->dev_count; i++) | ||||
| 		if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) | ||||
| 	idx = kvm_io_bus_get_first_dev(bus, addr, len); | ||||
| 	if (idx < 0) | ||||
| 		return -EOPNOTSUPP; | ||||
| 
 | ||||
| 	while (idx < bus->dev_count && | ||||
| 		kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) { | ||||
| 		if (!kvm_iodevice_write(bus->range[idx].dev, addr, len, val)) | ||||
| 			return 0; | ||||
| 		idx++; | ||||
| 	} | ||||
| 
 | ||||
| 	return -EOPNOTSUPP; | ||||
| } | ||||
| 
 | ||||
| @ -2416,19 +2486,33 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | ||||
| int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | ||||
| 		    int len, void *val) | ||||
| { | ||||
| 	int i; | ||||
| 	int idx; | ||||
| 	struct kvm_io_bus *bus; | ||||
| 	struct kvm_io_range range; | ||||
| 
 | ||||
| 	range = (struct kvm_io_range) { | ||||
| 		.addr = addr, | ||||
| 		.len = len, | ||||
| 	}; | ||||
| 
 | ||||
| 	bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | ||||
| 	for (i = 0; i < bus->dev_count; i++) | ||||
| 		if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) | ||||
| 	idx = kvm_io_bus_get_first_dev(bus, addr, len); | ||||
| 	if (idx < 0) | ||||
| 		return -EOPNOTSUPP; | ||||
| 
 | ||||
| 	while (idx < bus->dev_count && | ||||
| 		kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) { | ||||
| 		if (!kvm_iodevice_read(bus->range[idx].dev, addr, len, val)) | ||||
| 			return 0; | ||||
| 		idx++; | ||||
| 	} | ||||
| 
 | ||||
| 	return -EOPNOTSUPP; | ||||
| } | ||||
| 
 | ||||
| /* Caller must hold slots_lock. */ | ||||
| int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, | ||||
| 			    struct kvm_io_device *dev) | ||||
| int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | ||||
| 			    int len, struct kvm_io_device *dev) | ||||
| { | ||||
| 	struct kvm_io_bus *new_bus, *bus; | ||||
| 
 | ||||
| @ -2440,7 +2524,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, | ||||
| 	if (!new_bus) | ||||
| 		return -ENOMEM; | ||||
| 	memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); | ||||
| 	new_bus->devs[new_bus->dev_count++] = dev; | ||||
| 	kvm_io_bus_insert_dev(new_bus, dev, addr, len); | ||||
| 	rcu_assign_pointer(kvm->buses[bus_idx], new_bus); | ||||
| 	synchronize_srcu_expedited(&kvm->srcu); | ||||
| 	kfree(bus); | ||||
| @ -2464,9 +2548,13 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | ||||
| 
 | ||||
| 	r = -ENOENT; | ||||
| 	for (i = 0; i < new_bus->dev_count; i++) | ||||
| 		if (new_bus->devs[i] == dev) { | ||||
| 		if (new_bus->range[i].dev == dev) { | ||||
| 			r = 0; | ||||
| 			new_bus->devs[i] = new_bus->devs[--new_bus->dev_count]; | ||||
| 			new_bus->dev_count--; | ||||
| 			new_bus->range[i] = new_bus->range[new_bus->dev_count]; | ||||
| 			sort(new_bus->range, new_bus->dev_count, | ||||
| 			     sizeof(struct kvm_io_range), | ||||
| 			     kvm_io_bus_sort_cmp, NULL); | ||||
| 			break; | ||||
| 		} | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user