io_uring: improve poll completion performance
For busy IORING_OP_POLL_ADD workloads, we can have enough contention on the completion lock that we fail the inline completion path quite often as we fail the trylock on that lock. Add a list for deferred completions that we can use in that case. This helps reduce the number of async offloads we have to do, as if we get multiple completions in a row, we'll piggy back on to the poll_llist instead of having to queue our own offload. Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									ad3eb2c89f
								
							
						
					
					
						commit
						e94f141bd2
					
				
							
								
								
									
										108
									
								
								fs/io_uring.c
									
									
									
									
									
								
							
							
						
						
									
										108
									
								
								fs/io_uring.c
									
									
									
									
									
								
							| @ -286,7 +286,8 @@ struct io_ring_ctx { | ||||
| 
 | ||||
| 	struct { | ||||
| 		spinlock_t		completion_lock; | ||||
| 		bool			poll_multi_file; | ||||
| 		struct llist_head	poll_llist; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * ->poll_list is protected by the ctx->uring_lock for | ||||
| 		 * io_uring instances that don't use IORING_SETUP_SQPOLL. | ||||
| @ -296,6 +297,7 @@ struct io_ring_ctx { | ||||
| 		struct list_head	poll_list; | ||||
| 		struct hlist_head	*cancel_hash; | ||||
| 		unsigned		cancel_hash_bits; | ||||
| 		bool			poll_multi_file; | ||||
| 
 | ||||
| 		spinlock_t		inflight_lock; | ||||
| 		struct list_head	inflight_list; | ||||
| @ -453,7 +455,14 @@ struct io_kiocb { | ||||
| 	}; | ||||
| 
 | ||||
| 	struct io_async_ctx		*io; | ||||
| 	struct file			*ring_file; | ||||
| 	union { | ||||
| 		/*
 | ||||
| 		 * ring_file is only used in the submission path, and | ||||
| 		 * llist_node is only used for poll deferred completions | ||||
| 		 */ | ||||
| 		struct file		*ring_file; | ||||
| 		struct llist_node	llist_node; | ||||
| 	}; | ||||
| 	int				ring_fd; | ||||
| 	bool				has_user; | ||||
| 	bool				in_async; | ||||
| @ -725,6 +734,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) | ||||
| 	mutex_init(&ctx->uring_lock); | ||||
| 	init_waitqueue_head(&ctx->wait); | ||||
| 	spin_lock_init(&ctx->completion_lock); | ||||
| 	init_llist_head(&ctx->poll_llist); | ||||
| 	INIT_LIST_HEAD(&ctx->poll_list); | ||||
| 	INIT_LIST_HEAD(&ctx->defer_list); | ||||
| 	INIT_LIST_HEAD(&ctx->timeout_list); | ||||
| @ -1320,6 +1330,20 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) | ||||
| 	return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head; | ||||
| } | ||||
| 
 | ||||
| static inline bool io_req_multi_free(struct io_kiocb *req) | ||||
| { | ||||
| 	/*
 | ||||
| 	 * If we're not using fixed files, we have to pair the completion part | ||||
| 	 * with the file put. Use regular completions for those, only batch | ||||
| 	 * free for fixed file and non-linked commands. | ||||
| 	 */ | ||||
| 	if (((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) == REQ_F_FIXED_FILE) | ||||
| 	    && !io_is_fallback_req(req) && !req->io) | ||||
| 		return true; | ||||
| 
 | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Find and free completed poll iocbs | ||||
|  */ | ||||
| @ -1339,14 +1363,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, | ||||
| 		(*nr_events)++; | ||||
| 
 | ||||
| 		if (refcount_dec_and_test(&req->refs)) { | ||||
| 			/* If we're not using fixed files, we have to pair the
 | ||||
| 			 * completion part with the file put. Use regular | ||||
| 			 * completions for those, only batch free for fixed | ||||
| 			 * file and non-linked commands. | ||||
| 			 */ | ||||
| 			if (((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) == | ||||
| 			    REQ_F_FIXED_FILE) && !io_is_fallback_req(req) && | ||||
| 			    !req->io) { | ||||
| 			if (io_req_multi_free(req)) { | ||||
| 				reqs[to_free++] = req; | ||||
| 				if (to_free == ARRAY_SIZE(reqs)) | ||||
| 					io_free_req_many(ctx, reqs, &to_free); | ||||
| @ -3081,6 +3098,44 @@ static void io_poll_complete_work(struct io_wq_work **workptr) | ||||
| 		io_wq_assign_next(workptr, nxt); | ||||
| } | ||||
| 
 | ||||
| static void __io_poll_flush(struct io_ring_ctx *ctx, struct llist_node *nodes) | ||||
| { | ||||
| 	void *reqs[IO_IOPOLL_BATCH]; | ||||
| 	struct io_kiocb *req, *tmp; | ||||
| 	int to_free = 0; | ||||
| 
 | ||||
| 	spin_lock_irq(&ctx->completion_lock); | ||||
| 	llist_for_each_entry_safe(req, tmp, nodes, llist_node) { | ||||
| 		hash_del(&req->hash_node); | ||||
| 		io_poll_complete(req, req->result, 0); | ||||
| 
 | ||||
| 		if (refcount_dec_and_test(&req->refs)) { | ||||
| 			if (io_req_multi_free(req)) { | ||||
| 				reqs[to_free++] = req; | ||||
| 				if (to_free == ARRAY_SIZE(reqs)) | ||||
| 					io_free_req_many(ctx, reqs, &to_free); | ||||
| 			} else { | ||||
| 				req->flags |= REQ_F_COMP_LOCKED; | ||||
| 				io_free_req(req); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	spin_unlock_irq(&ctx->completion_lock); | ||||
| 
 | ||||
| 	io_cqring_ev_posted(ctx); | ||||
| 	io_free_req_many(ctx, reqs, &to_free); | ||||
| } | ||||
| 
 | ||||
| static void io_poll_flush(struct io_wq_work **workptr) | ||||
| { | ||||
| 	struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); | ||||
| 	struct llist_node *nodes; | ||||
| 
 | ||||
| 	nodes = llist_del_all(&req->ctx->poll_llist); | ||||
| 	if (nodes) | ||||
| 		__io_poll_flush(req->ctx, nodes); | ||||
| } | ||||
| 
 | ||||
| static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, | ||||
| 			void *key) | ||||
| { | ||||
| @ -3088,7 +3143,6 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, | ||||
| 	struct io_kiocb *req = container_of(poll, struct io_kiocb, poll); | ||||
| 	struct io_ring_ctx *ctx = req->ctx; | ||||
| 	__poll_t mask = key_to_poll(key); | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	/* for instances that support it check for an event match first: */ | ||||
| 	if (mask && !(mask & poll->events)) | ||||
| @ -3102,17 +3156,31 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, | ||||
| 	 * If we have a link timeout we're going to need the completion_lock | ||||
| 	 * for finalizing the request, mark us as having grabbed that already. | ||||
| 	 */ | ||||
| 	if (mask && spin_trylock_irqsave(&ctx->completion_lock, flags)) { | ||||
| 		hash_del(&req->hash_node); | ||||
| 		io_poll_complete(req, mask, 0); | ||||
| 		req->flags |= REQ_F_COMP_LOCKED; | ||||
| 		io_put_req(req); | ||||
| 		spin_unlock_irqrestore(&ctx->completion_lock, flags); | ||||
| 	if (mask) { | ||||
| 		unsigned long flags; | ||||
| 
 | ||||
| 		io_cqring_ev_posted(ctx); | ||||
| 	} else { | ||||
| 		io_queue_async_work(req); | ||||
| 		if (llist_empty(&ctx->poll_llist) && | ||||
| 		    spin_trylock_irqsave(&ctx->completion_lock, flags)) { | ||||
| 			hash_del(&req->hash_node); | ||||
| 			io_poll_complete(req, mask, 0); | ||||
| 			req->flags |= REQ_F_COMP_LOCKED; | ||||
| 			io_put_req(req); | ||||
| 			spin_unlock_irqrestore(&ctx->completion_lock, flags); | ||||
| 
 | ||||
| 			io_cqring_ev_posted(ctx); | ||||
| 			req = NULL; | ||||
| 		} else { | ||||
| 			req->result = mask; | ||||
| 			req->llist_node.next = NULL; | ||||
| 			/* if the list wasn't empty, we're done */ | ||||
| 			if (!llist_add(&req->llist_node, &ctx->poll_llist)) | ||||
| 				req = NULL; | ||||
| 			else | ||||
| 				req->work.func = io_poll_flush; | ||||
| 		} | ||||
| 	} | ||||
| 	if (req) | ||||
| 		io_queue_async_work(req); | ||||
| 
 | ||||
| 	return 1; | ||||
| } | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user