RDS: add receive message trace used by application
Socket option to tap receive path latency in various stages in nano seconds. It can be enabled on selective sockets using using SO_RDS_MSG_RXPATH_LATENCY socket option. RDS will return the data to application with RDS_CMSG_RXPATH_LATENCY in defined format. Scope is left to add more trace points for future without need of change in the interface. Reviewed-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
This commit is contained in:
		
							parent
							
								
									f9fb69adb6
								
							
						
					
					
						commit
						3289025aed
					
				| @ -52,6 +52,13 @@ | ||||
| #define RDS_GET_MR_FOR_DEST		7 | ||||
| #define SO_RDS_TRANSPORT		8 | ||||
| 
 | ||||
| /* Socket option to tap receive path latency
 | ||||
|  *	SO_RDS: SO_RDS_MSG_RXPATH_LATENCY | ||||
|  *	Format used struct rds_rx_trace_so | ||||
|  */ | ||||
| #define SO_RDS_MSG_RXPATH_LATENCY	10 | ||||
| 
 | ||||
| 
 | ||||
| /* supported values for SO_RDS_TRANSPORT */ | ||||
| #define	RDS_TRANS_IB	0 | ||||
| #define	RDS_TRANS_IWARP	1 | ||||
| @ -77,6 +84,12 @@ | ||||
|  *	the same as for the GET_MR setsockopt. | ||||
|  * RDS_CMSG_RDMA_STATUS (recvmsg) | ||||
|  *	Returns the status of a completed RDMA operation. | ||||
|  * RDS_CMSG_RXPATH_LATENCY(recvmsg) | ||||
|  *	Returns rds message latencies in various stages of receive | ||||
|  *	path in nS. Its set per socket using SO_RDS_MSG_RXPATH_LATENCY | ||||
|  *	socket option. Legitimate points are defined in | ||||
|  *	enum rds_message_rxpath_latency. More points can be added in | ||||
|  *	future. CSMG format is struct rds_cmsg_rx_trace. | ||||
|  */ | ||||
| #define RDS_CMSG_RDMA_ARGS		1 | ||||
| #define RDS_CMSG_RDMA_DEST		2 | ||||
| @ -87,6 +100,7 @@ | ||||
| #define RDS_CMSG_ATOMIC_CSWP		7 | ||||
| #define RDS_CMSG_MASKED_ATOMIC_FADD	8 | ||||
| #define RDS_CMSG_MASKED_ATOMIC_CSWP	9 | ||||
| #define RDS_CMSG_RXPATH_LATENCY		11 | ||||
| 
 | ||||
| #define RDS_INFO_FIRST			10000 | ||||
| #define RDS_INFO_COUNTERS		10000 | ||||
| @ -171,6 +185,25 @@ struct rds_info_rdma_connection { | ||||
| 	uint32_t	rdma_mr_size; | ||||
| }; | ||||
| 
 | ||||
| /* RDS message Receive Path Latency points */ | ||||
| enum rds_message_rxpath_latency { | ||||
| 	RDS_MSG_RX_HDR_TO_DGRAM_START = 0, | ||||
| 	RDS_MSG_RX_DGRAM_REASSEMBLE, | ||||
| 	RDS_MSG_RX_DGRAM_DELIVERED, | ||||
| 	RDS_MSG_RX_DGRAM_TRACE_MAX | ||||
| }; | ||||
| 
 | ||||
| struct rds_rx_trace_so { | ||||
| 	u8 rx_traces; | ||||
| 	u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; | ||||
| }; | ||||
| 
 | ||||
| struct rds_cmsg_rx_trace { | ||||
| 	u8 rx_traces; | ||||
| 	u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; | ||||
| 	u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Congestion monitoring. | ||||
|  * Congestion control in RDS happens at the host connection | ||||
|  | ||||
| @ -298,6 +298,30 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval, | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval, | ||||
| 				  int optlen) | ||||
| { | ||||
| 	struct rds_rx_trace_so trace; | ||||
| 	int i; | ||||
| 
 | ||||
| 	if (optlen != sizeof(struct rds_rx_trace_so)) | ||||
| 		return -EFAULT; | ||||
| 
 | ||||
| 	if (copy_from_user(&trace, optval, sizeof(trace))) | ||||
| 		return -EFAULT; | ||||
| 
 | ||||
| 	rs->rs_rx_traces = trace.rx_traces; | ||||
| 	for (i = 0; i < rs->rs_rx_traces; i++) { | ||||
| 		if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) { | ||||
| 			rs->rs_rx_traces = 0; | ||||
| 			return -EFAULT; | ||||
| 		} | ||||
| 		rs->rs_rx_trace[i] = trace.rx_trace_pos[i]; | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int rds_setsockopt(struct socket *sock, int level, int optname, | ||||
| 			  char __user *optval, unsigned int optlen) | ||||
| { | ||||
| @ -338,6 +362,9 @@ static int rds_setsockopt(struct socket *sock, int level, int optname, | ||||
| 		ret = rds_enable_recvtstamp(sock->sk, optval, optlen); | ||||
| 		release_sock(sock->sk); | ||||
| 		break; | ||||
| 	case SO_RDS_MSG_RXPATH_LATENCY: | ||||
| 		ret = rds_recv_track_latency(rs, optval, optlen); | ||||
| 		break; | ||||
| 	default: | ||||
| 		ret = -ENOPROTOOPT; | ||||
| 	} | ||||
| @ -484,6 +511,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol) | ||||
| 	INIT_LIST_HEAD(&rs->rs_cong_list); | ||||
| 	spin_lock_init(&rs->rs_rdma_lock); | ||||
| 	rs->rs_rdma_keys = RB_ROOT; | ||||
| 	rs->rs_rx_traces = 0; | ||||
| 
 | ||||
| 	spin_lock_bh(&rds_sock_lock); | ||||
| 	list_add_tail(&rs->rs_item, &rds_sock_list); | ||||
|  | ||||
| @ -911,8 +911,12 @@ static void rds_ib_process_recv(struct rds_connection *conn, | ||||
| 		ic->i_ibinc = ibinc; | ||||
| 
 | ||||
| 		hdr = &ibinc->ii_inc.i_hdr; | ||||
| 		ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] = | ||||
| 				local_clock(); | ||||
| 		memcpy(hdr, ihdr, sizeof(*hdr)); | ||||
| 		ic->i_recv_data_rem = be32_to_cpu(hdr->h_len); | ||||
| 		ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_START] = | ||||
| 				local_clock(); | ||||
| 
 | ||||
| 		rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc, | ||||
| 			 ic->i_recv_data_rem, hdr->h_flags); | ||||
|  | ||||
| @ -253,6 +253,11 @@ struct rds_ext_header_rdma_dest { | ||||
| #define RDS_EXTHDR_GEN_NUM	6 | ||||
| 
 | ||||
| #define __RDS_EXTHDR_MAX	16 /* for now */ | ||||
| #define RDS_RX_MAX_TRACES	(RDS_MSG_RX_DGRAM_TRACE_MAX + 1) | ||||
| #define	RDS_MSG_RX_HDR		0 | ||||
| #define	RDS_MSG_RX_START	1 | ||||
| #define	RDS_MSG_RX_END		2 | ||||
| #define	RDS_MSG_RX_CMSG		3 | ||||
| 
 | ||||
| struct rds_incoming { | ||||
| 	atomic_t		i_refcount; | ||||
| @ -265,6 +270,7 @@ struct rds_incoming { | ||||
| 
 | ||||
| 	rds_rdma_cookie_t	i_rdma_cookie; | ||||
| 	struct timeval		i_rx_tstamp; | ||||
| 	u64			i_rx_lat_trace[RDS_RX_MAX_TRACES]; | ||||
| }; | ||||
| 
 | ||||
| struct rds_mr { | ||||
| @ -575,6 +581,10 @@ struct rds_sock { | ||||
| 	unsigned char		rs_recverr, | ||||
| 				rs_cong_monitor; | ||||
| 	u32			rs_hash_initval; | ||||
| 
 | ||||
| 	/* Socket receive path trace points*/ | ||||
| 	u8			rs_rx_traces; | ||||
| 	u8			rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; | ||||
| }; | ||||
| 
 | ||||
| static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk) | ||||
|  | ||||
| @ -43,6 +43,8 @@ | ||||
| void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, | ||||
| 		  __be32 saddr) | ||||
| { | ||||
| 	int i; | ||||
| 
 | ||||
| 	atomic_set(&inc->i_refcount, 1); | ||||
| 	INIT_LIST_HEAD(&inc->i_item); | ||||
| 	inc->i_conn = conn; | ||||
| @ -50,6 +52,9 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, | ||||
| 	inc->i_rdma_cookie = 0; | ||||
| 	inc->i_rx_tstamp.tv_sec = 0; | ||||
| 	inc->i_rx_tstamp.tv_usec = 0; | ||||
| 
 | ||||
| 	for (i = 0; i < RDS_RX_MAX_TRACES; i++) | ||||
| 		inc->i_rx_lat_trace[i] = 0; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(rds_inc_init); | ||||
| 
 | ||||
| @ -373,6 +378,7 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, | ||||
| 		if (sock_flag(sk, SOCK_RCVTSTAMP)) | ||||
| 			do_gettimeofday(&inc->i_rx_tstamp); | ||||
| 		rds_inc_addref(inc); | ||||
| 		inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock(); | ||||
| 		list_add_tail(&inc->i_item, &rs->rs_recv_queue); | ||||
| 		__rds_wake_sk_sleep(sk); | ||||
| 	} else { | ||||
| @ -534,7 +540,7 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg, | ||||
| 		ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST, | ||||
| 				sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie); | ||||
| 		if (ret) | ||||
| 			return ret; | ||||
| 			goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	if ((inc->i_rx_tstamp.tv_sec != 0) && | ||||
| @ -543,10 +549,30 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg, | ||||
| 			       sizeof(struct timeval), | ||||
| 			       &inc->i_rx_tstamp); | ||||
| 		if (ret) | ||||
| 			return ret; | ||||
| 			goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
| 	if (rs->rs_rx_traces) { | ||||
| 		struct rds_cmsg_rx_trace t; | ||||
| 		int i, j; | ||||
| 
 | ||||
| 		inc->i_rx_lat_trace[RDS_MSG_RX_CMSG] = local_clock(); | ||||
| 		t.rx_traces =  rs->rs_rx_traces; | ||||
| 		for (i = 0; i < rs->rs_rx_traces; i++) { | ||||
| 			j = rs->rs_rx_trace[i]; | ||||
| 			t.rx_trace_pos[i] = j; | ||||
| 			t.rx_trace[i] = inc->i_rx_lat_trace[j + 1] - | ||||
| 					  inc->i_rx_lat_trace[j]; | ||||
| 		} | ||||
| 
 | ||||
| 		ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RXPATH_LATENCY, | ||||
| 			       sizeof(t), &t); | ||||
| 		if (ret) | ||||
| 			goto out; | ||||
| 	} | ||||
| 
 | ||||
| out: | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, | ||||
|  | ||||
| @ -180,6 +180,9 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, | ||||
| 			rdsdebug("alloced tinc %p\n", tinc); | ||||
| 			rds_inc_path_init(&tinc->ti_inc, cp, | ||||
| 					  cp->cp_conn->c_faddr); | ||||
| 			tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] = | ||||
| 					local_clock(); | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * XXX * we might be able to use the __ variants when | ||||
| 			 * we've already serialized at a higher level. | ||||
| @ -204,6 +207,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, | ||||
| 				/* could be 0 for a 0 len message */ | ||||
| 				tc->t_tinc_data_rem = | ||||
| 					be32_to_cpu(tinc->ti_inc.i_hdr.h_len); | ||||
| 				tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_START] = | ||||
| 					local_clock(); | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user