// SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2020 Intel Corporation. */ /* * Some functions in this program are taken from * Linux kernel samples/bpf/xdpsock* and modified * for use. * * See test_xsk.sh for detailed information on test topology * and prerequisite network setup. * * This test program contains two threads, each thread is single socket with * a unique UMEM. It validates in-order packet delivery and packet content * by sending packets to each other. * * Tests Information: * ------------------ * These selftests test AF_XDP SKB and Native/DRV modes using veth * Virtual Ethernet interfaces. * * For each mode, the following tests are run: * a. nopoll - soft-irq processing * b. poll - using poll() syscall * c. Socket Teardown * Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy * both sockets, then repeat multiple times. Only nopoll mode is used * d. Bi-directional sockets * Configure sockets as bi-directional tx/rx sockets, sets up fill and * completion rings on each socket, tx/rx in both directions. Only nopoll * mode is used * e. Statistics * Trigger some error conditions and ensure that the appropriate statistics * are incremented. Within this test, the following statistics are tested: * i. rx dropped * Increase the UMEM frame headroom to a value which results in * insufficient space in the rx buffer for both the packet and the headroom. * ii. tx invalid * Set the 'len' field of tx descriptors to an invalid value (umem frame * size + 1). * iii. rx ring full * Reduce the size of the RX ring to a fraction of the fill ring size. * iv. fill queue empty * Do not populate the fill queue and then try to receive pkts. * * Total tests: 10 * * Flow: * ----- * - Single process spawns two threads: Tx and Rx * - Each of these two threads attach to a veth interface within their assigned * namespaces * - Each thread Creates one AF_XDP socket connected to a unique umem for each * veth interface * - Tx thread Transmits 10k packets from veth to veth * - Rx thread verifies if all 10k packets were received and delivered in-order, * and have the right content * * Enable/disable packet dump mode: * -------------------------- * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D") */ #define _GNU_SOURCE #include #include #include #include typedef __u16 __sum16; #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "xdpxceiver.h" #include "../kselftest.h" static void __exit_with_error(int error, const char *file, const char *func, int line) { if (configured_mode == TEST_MODE_UNCONFIGURED) { ksft_exit_fail_msg ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error)); } else { ksft_test_result_fail ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error)); ksft_exit_xfail(); } } #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__) #define print_ksft_result(void)\ (ksft_test_result_pass("PASS: %s %s %s%s%s\n", configured_mode ? "DRV" : "SKB",\ test_type == TEST_TYPE_POLL ? "POLL" : "NOPOLL",\ test_type == TEST_TYPE_TEARDOWN ? "Socket Teardown" : "",\ test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : "",\ test_type == TEST_TYPE_STATS ? "Stats" : "")) static void pthread_init_mutex(void) { pthread_mutex_init(&sync_mutex, NULL); pthread_mutex_init(&sync_mutex_tx, NULL); pthread_cond_init(&signal_rx_condition, NULL); pthread_cond_init(&signal_tx_condition, NULL); } static void pthread_destroy_mutex(void) { pthread_mutex_destroy(&sync_mutex); pthread_mutex_destroy(&sync_mutex_tx); pthread_cond_destroy(&signal_rx_condition); pthread_cond_destroy(&signal_tx_condition); } static void *memset32_htonl(void *dest, u32 val, u32 size) { u32 *ptr = (u32 *)dest; int i; val = htonl(val); for (i = 0; i < (size & (~0x3)); i += 4) ptr[i >> 2] = val; for (; i < size; i++) ((char *)dest)[i] = ((char *)&val)[i & 3]; return dest; } /* * This function code has been taken from * Linux kernel lib/checksum.c */ static inline unsigned short from32to16(unsigned int x) { /* add up 16-bit and 16-bit for 16+c bit */ x = (x & 0xffff) + (x >> 16); /* add up carry.. */ x = (x & 0xffff) + (x >> 16); return x; } /* * Fold a partial checksum * This function code has been taken from * Linux kernel include/asm-generic/checksum.h */ static inline __u16 csum_fold(__u32 csum) { u32 sum = (__force u32)csum; sum = (sum & 0xffff) + (sum >> 16); sum = (sum & 0xffff) + (sum >> 16); return (__force __u16)~sum; } /* * This function code has been taken from * Linux kernel lib/checksum.c */ static inline u32 from64to32(u64 x) { /* add up 32-bit and 32-bit for 32+c bit */ x = (x & 0xffffffff) + (x >> 32); /* add up carry.. */ x = (x & 0xffffffff) + (x >> 32); return (u32)x; } __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum); /* * This function code has been taken from * Linux kernel lib/checksum.c */ __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) { unsigned long long s = (__force u32)sum; s += (__force u32)saddr; s += (__force u32)daddr; #ifdef __BIG_ENDIAN__ s += proto + len; #else s += (proto + len) << 8; #endif return (__force __u32)from64to32(s); } /* * This function has been taken from * Linux kernel include/asm-generic/checksum.h */ static inline __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) { return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); } static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt) { u32 csum = 0; u32 cnt = 0; /* udp hdr and data */ for (; cnt < len; cnt += 2) csum += udp_pkt[cnt >> 1]; return csum_tcpudp_magic(saddr, daddr, len, proto, csum); } static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr) { memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN); memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN); eth_hdr->h_proto = htons(ETH_P_IP); } static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr) { ip_hdr->version = IP_PKT_VER; ip_hdr->ihl = 0x5; ip_hdr->tos = IP_PKT_TOS; ip_hdr->tot_len = htons(IP_PKT_SIZE); ip_hdr->id = 0; ip_hdr->frag_off = 0; ip_hdr->ttl = IPDEFTTL; ip_hdr->protocol = IPPROTO_UDP; ip_hdr->saddr = ifobject->src_ip; ip_hdr->daddr = ifobject->dst_ip; ip_hdr->check = 0; } static void gen_udp_hdr(struct generic_data *data, struct ifobject *ifobject, struct udphdr *udp_hdr) { udp_hdr->source = htons(ifobject->src_port); udp_hdr->dest = htons(ifobject->dst_port); udp_hdr->len = htons(UDP_PKT_SIZE); memset32_htonl(pkt_data + PKT_HDR_SIZE, htonl(data->seqnum), UDP_PKT_DATA_SIZE); } static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr) { udp_hdr->check = 0; udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr); } static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr) { memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE); } static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size) { int ret; struct xsk_umem_config cfg = { .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, .frame_headroom = frame_headroom, .flags = XSK_UMEM__DEFAULT_FLAGS }; data->umem = calloc(1, sizeof(struct xsk_umem_info)); if (!data->umem) exit_with_error(errno); ret = xsk_umem__create(&data->umem->umem, buffer, size, &data->umem->fq, &data->umem->cq, &cfg); if (ret) exit_with_error(ret); data->umem->buffer = buffer; } static void xsk_populate_fill_ring(struct xsk_umem_info *umem) { int ret, i; u32 idx = 0; ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx); if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS) exit_with_error(ret); for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++) *xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE; xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS); } static int xsk_configure_socket(struct ifobject *ifobject) { struct xsk_socket_config cfg; struct xsk_ring_cons *rxr; struct xsk_ring_prod *txr; int ret; ifobject->xsk = calloc(1, sizeof(struct xsk_socket_info)); if (!ifobject->xsk) exit_with_error(errno); ifobject->xsk->umem = ifobject->umem; cfg.rx_size = rxqsize; cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; cfg.libbpf_flags = 0; cfg.xdp_flags = xdp_flags; cfg.bind_flags = xdp_bind_flags; if (test_type != TEST_TYPE_BIDI) { rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL; txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL; } else { rxr = &ifobject->xsk->rx; txr = &ifobject->xsk->tx; } ret = xsk_socket__create(&ifobject->xsk->xsk, ifobject->ifname, opt_queue, ifobject->umem->umem, rxr, txr, &cfg); if (ret) return 1; return 0; } static struct option long_options[] = { {"interface", required_argument, 0, 'i'}, {"queue", optional_argument, 0, 'q'}, {"dump-pkts", optional_argument, 0, 'D'}, {"verbose", no_argument, 0, 'v'}, {"tx-pkt-count", optional_argument, 0, 'C'}, {0, 0, 0, 0} }; static void usage(const char *prog) { const char *str = " Usage: %s [OPTIONS]\n" " Options:\n" " -i, --interface Use interface\n" " -q, --queue=n Use queue n (default 0)\n" " -D, --dump-pkts Dump packets L2 - L5\n" " -v, --verbose Verbose output\n" " -C, --tx-pkt-count=n Number of packets to send\n"; ksft_print_msg(str, prog); } static bool switch_namespace(int idx) { char fqns[26] = "/var/run/netns/"; int nsfd; strncat(fqns, ifdict[idx]->nsname, sizeof(fqns) - strlen(fqns) - 1); nsfd = open(fqns, O_RDONLY); if (nsfd == -1) exit_with_error(errno); if (setns(nsfd, 0) == -1) exit_with_error(errno); return true; } static void *nsswitchthread(void *args) { struct targs *targs = args; targs->retptr = false; if (switch_namespace(targs->idx)) { ifdict[targs->idx]->ifindex = if_nametoindex(ifdict[targs->idx]->ifname); if (!ifdict[targs->idx]->ifindex) { ksft_test_result_fail("ERROR: [%s] interface \"%s\" does not exist\n", __func__, ifdict[targs->idx]->ifname); } else { print_verbose("Interface found: %s\n", ifdict[targs->idx]->ifname); targs->retptr = true; } } pthread_exit(NULL); } static int validate_interfaces(void) { bool ret = true; for (int i = 0; i < MAX_INTERFACES; i++) { if (!strcmp(ifdict[i]->ifname, "")) { ret = false; ksft_test_result_fail("ERROR: interfaces: -i , -i ,."); } if (strcmp(ifdict[i]->nsname, "")) { struct targs *targs; targs = malloc(sizeof(*targs)); if (!targs) exit_with_error(errno); targs->idx = i; if (pthread_create(&ns_thread, NULL, nsswitchthread, targs)) exit_with_error(errno); pthread_join(ns_thread, NULL); if (targs->retptr) print_verbose("NS switched: %s\n", ifdict[i]->nsname); free(targs); } else { ifdict[i]->ifindex = if_nametoindex(ifdict[i]->ifname); if (!ifdict[i]->ifindex) { ksft_test_result_fail ("ERROR: interface \"%s\" does not exist\n", ifdict[i]->ifname); ret = false; } else { print_verbose("Interface found: %s\n", ifdict[i]->ifname); } } } return ret; } static void parse_command_line(int argc, char **argv) { int option_index, interface_index = 0, c; opterr = 0; for (;;) { c = getopt_long(argc, argv, "i:q:DC:v", long_options, &option_index); if (c == -1) break; switch (c) { case 'i': if (interface_index == MAX_INTERFACES) break; char *sptr, *token; sptr = strndupa(optarg, strlen(optarg)); memcpy(ifdict[interface_index]->ifname, strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS); token = strsep(&sptr, ","); if (token) memcpy(ifdict[interface_index]->nsname, token, MAX_INTERFACES_NAMESPACE_CHARS); interface_index++; break; case 'q': opt_queue = atoi(optarg); break; case 'D': debug_pkt_dump = 1; break; case 'C': opt_pkt_count = atoi(optarg); break; case 'v': opt_verbose = 1; break; default: usage(basename(argv[0])); ksft_exit_xfail(); } } if (!opt_pkt_count) { print_verbose("No tx-pkt-count specified, using default %u\n", DEFAULT_PKT_CNT); opt_pkt_count = DEFAULT_PKT_CNT; } if (!validate_interfaces()) { usage(basename(argv[0])); ksft_exit_xfail(); } } static void kick_tx(struct xsk_socket_info *xsk) { int ret; ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) return; exit_with_error(errno); } static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size) { unsigned int rcvd; u32 idx; if (!xsk->outstanding_tx) return; if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx)) kick_tx(xsk); rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); if (rcvd) { xsk_ring_cons__release(&xsk->umem->cq, rcvd); xsk->outstanding_tx -= rcvd; xsk->tx_npkts += rcvd; } } static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds) { unsigned int rcvd, i; u32 idx_rx = 0, idx_fq = 0; int ret; rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); if (!rcvd) { if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { ret = poll(fds, 1, POLL_TMOUT); if (ret < 0) exit_with_error(ret); } return; } ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); while (ret != rcvd) { if (ret < 0) exit_with_error(ret); if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { ret = poll(fds, 1, POLL_TMOUT); if (ret < 0) exit_with_error(ret); } ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); } for (i = 0; i < rcvd; i++) { u64 addr, orig; addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); orig = xsk_umem__extract_addr(addr); addr = xsk_umem__add_offset_to_addr(addr); pkt_node_rx = malloc(sizeof(struct pkt) + PKT_SIZE); if (!pkt_node_rx) exit_with_error(errno); pkt_node_rx->pkt_frame = malloc(PKT_SIZE); if (!pkt_node_rx->pkt_frame) exit_with_error(errno); memcpy(pkt_node_rx->pkt_frame, xsk_umem__get_data(xsk->umem->buffer, addr), PKT_SIZE); TAILQ_INSERT_HEAD(&head, pkt_node_rx, pkt_nodes); *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig; } xsk_ring_prod__submit(&xsk->umem->fq, rcvd); xsk_ring_cons__release(&xsk->rx, rcvd); xsk->rx_npkts += rcvd; } static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size) { u32 idx = 0; unsigned int i; bool tx_invalid_test = stat_test_type == STAT_TEST_TX_INVALID; u32 len = tx_invalid_test ? XSK_UMEM__DEFAULT_FRAME_SIZE + 1 : PKT_SIZE; while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size) complete_tx_only(xsk, batch_size); for (i = 0; i < batch_size; i++) { struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; tx_desc->len = len; } xsk_ring_prod__submit(&xsk->tx, batch_size); if (!tx_invalid_test) { xsk->outstanding_tx += batch_size; } else { if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx)) kick_tx(xsk); } *frameptr += batch_size; *frameptr %= num_frames; complete_tx_only(xsk, batch_size); } static inline int get_batch_size(int pkt_cnt) { if (!opt_pkt_count) return BATCH_SIZE; if (pkt_cnt + BATCH_SIZE <= opt_pkt_count) return BATCH_SIZE; return opt_pkt_count - pkt_cnt; } static void complete_tx_only_all(struct ifobject *ifobject) { bool pending; do { pending = false; if (ifobject->xsk->outstanding_tx) { complete_tx_only(ifobject->xsk, BATCH_SIZE); pending = !!ifobject->xsk->outstanding_tx; } } while (pending); } static void tx_only_all(struct ifobject *ifobject) { struct pollfd fds[MAX_SOCKS] = { }; u32 frame_nb = 0; int pkt_cnt = 0; int ret; fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk); fds[0].events = POLLOUT; while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { int batch_size = get_batch_size(pkt_cnt); if (test_type == TEST_TYPE_POLL) { ret = poll(fds, 1, POLL_TMOUT); if (ret <= 0) continue; if (!(fds[0].revents & POLLOUT)) continue; } tx_only(ifobject->xsk, &frame_nb, batch_size); pkt_cnt += batch_size; } if (opt_pkt_count) complete_tx_only_all(ifobject); } static void worker_pkt_dump(void) { struct in_addr ipaddr; fprintf(stdout, "---------------------------------------\n"); for (int iter = 0; iter < num_frames - 1; iter++) { /*extract L2 frame */ fprintf(stdout, "DEBUG>> L2: dst mac: "); for (int i = 0; i < ETH_ALEN; i++) fprintf(stdout, "%02X", ((struct ethhdr *) pkt_buf[iter]->payload)->h_dest[i]); fprintf(stdout, "\nDEBUG>> L2: src mac: "); for (int i = 0; i < ETH_ALEN; i++) fprintf(stdout, "%02X", ((struct ethhdr *) pkt_buf[iter]->payload)->h_source[i]); /*extract L3 frame */ fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->ihl); ipaddr.s_addr = ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->saddr; fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", inet_ntoa(ipaddr)); ipaddr.s_addr = ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->daddr; fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", inet_ntoa(ipaddr)); /*extract L4 frame */ fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(((struct udphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr) + sizeof(struct iphdr)))->source)); fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(((struct udphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr) + sizeof(struct iphdr)))->dest)); /*extract L5 frame */ int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE)); if (payload == EOT) { print_verbose("End-of-transmission frame received\n"); fprintf(stdout, "---------------------------------------\n"); break; } fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload); fprintf(stdout, "---------------------------------------\n"); } } static void worker_stats_validate(struct ifobject *ifobject) { struct xdp_statistics stats; socklen_t optlen; int err; struct xsk_socket *xsk = stat_test_type == STAT_TEST_TX_INVALID ? ifdict[!ifobject->ifdict_index]->xsk->xsk : ifobject->xsk->xsk; int fd = xsk_socket__fd(xsk); unsigned long xsk_stat = 0, expected_stat = opt_pkt_count; sigvar = 0; optlen = sizeof(stats); err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); if (err) return; if (optlen == sizeof(struct xdp_statistics)) { switch (stat_test_type) { case STAT_TEST_RX_DROPPED: xsk_stat = stats.rx_dropped; break; case STAT_TEST_TX_INVALID: xsk_stat = stats.tx_invalid_descs; break; case STAT_TEST_RX_FULL: xsk_stat = stats.rx_ring_full; expected_stat -= RX_FULL_RXQSIZE; break; case STAT_TEST_RX_FILL_EMPTY: xsk_stat = stats.rx_fill_ring_empty_descs; break; default: break; } if (xsk_stat == expected_stat) sigvar = 1; } } static void worker_pkt_validate(void) { u32 payloadseqnum = -2; struct iphdr *iphdr; while (1) { pkt_node_rx_q = TAILQ_LAST(&head, head_s); if (!pkt_node_rx_q) break; iphdr = (struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr)); /*do not increment pktcounter if !(tos=0x9 and ipv4) */ if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) { payloadseqnum = *((uint32_t *)(pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE)); if (debug_pkt_dump && payloadseqnum != EOT) { pkt_obj = malloc(sizeof(*pkt_obj)); pkt_obj->payload = malloc(PKT_SIZE); memcpy(pkt_obj->payload, pkt_node_rx_q->pkt_frame, PKT_SIZE); pkt_buf[payloadseqnum] = pkt_obj; } if (payloadseqnum == EOT) { print_verbose("End-of-transmission frame received: PASS\n"); sigvar = 1; break; } if (prev_pkt + 1 != payloadseqnum) { ksft_test_result_fail ("ERROR: [%s] prev_pkt [%d], payloadseqnum [%d]\n", __func__, prev_pkt, payloadseqnum); ksft_exit_xfail(); } prev_pkt = payloadseqnum; pkt_counter++; } else { ksft_print_msg("Invalid frame received: "); ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version, iphdr->tos); } TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes); free(pkt_node_rx_q->pkt_frame); free(pkt_node_rx_q); pkt_node_rx_q = NULL; } } static void thread_common_ops(struct ifobject *ifobject, void *bufs, pthread_mutex_t *mutexptr, atomic_int *spinningptr) { int ctr = 0; int ret; xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE); ret = xsk_configure_socket(ifobject); /* Retry Create Socket if it fails as xsk_socket__create() * is asynchronous * * Essential to lock Mutex here to prevent Tx thread from * entering before Rx and causing a deadlock */ pthread_mutex_lock(mutexptr); while (ret && ctr < SOCK_RECONF_CTR) { atomic_store(spinningptr, 1); xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE); ret = xsk_configure_socket(ifobject); usleep(USLEEP_MAX); ctr++; } atomic_store(spinningptr, 0); pthread_mutex_unlock(mutexptr); if (ctr >= SOCK_RECONF_CTR) exit_with_error(ret); } static void *worker_testapp_validate(void *arg) { struct udphdr *udp_hdr = (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr)); struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr)); struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; struct ifobject *ifobject = (struct ifobject *)arg; struct generic_data data; void *bufs = NULL; pthread_attr_setstacksize(&attr, THREAD_STACK); if (!bidi_pass) { bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (bufs == MAP_FAILED) exit_with_error(errno); if (strcmp(ifobject->nsname, "")) switch_namespace(ifobject->ifdict_index); } if (ifobject->fv.vector == tx) { int spinningrxctr = 0; if (!bidi_pass) thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_tx); while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) { spinningrxctr++; usleep(USLEEP_MAX); } print_verbose("Interface [%s] vector [Tx]\n", ifobject->ifname); for (int i = 0; i < num_frames; i++) { /*send EOT frame */ if (i == (num_frames - 1)) data.seqnum = -1; else data.seqnum = i; gen_udp_hdr(&data, ifobject, udp_hdr); gen_ip_hdr(ifobject, ip_hdr); gen_udp_csum(udp_hdr, ip_hdr); gen_eth_hdr(ifobject, eth_hdr); gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE); } print_verbose("Sending %d packets on interface %s\n", (opt_pkt_count - 1), ifobject->ifname); tx_only_all(ifobject); } else if (ifobject->fv.vector == rx) { struct pollfd fds[MAX_SOCKS] = { }; int ret; if (!bidi_pass) thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_rx); print_verbose("Interface [%s] vector [Rx]\n", ifobject->ifname); if (stat_test_type != STAT_TEST_RX_FILL_EMPTY) xsk_populate_fill_ring(ifobject->umem); TAILQ_INIT(&head); if (debug_pkt_dump) { pkt_buf = calloc(num_frames, sizeof(*pkt_buf)); if (!pkt_buf) exit_with_error(errno); } fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk); fds[0].events = POLLIN; pthread_mutex_lock(&sync_mutex); pthread_cond_signal(&signal_rx_condition); pthread_mutex_unlock(&sync_mutex); while (1) { if (test_type == TEST_TYPE_POLL) { ret = poll(fds, 1, POLL_TMOUT); if (ret <= 0) continue; } if (test_type != TEST_TYPE_STATS) { rx_pkt(ifobject->xsk, fds); worker_pkt_validate(); } else { worker_stats_validate(ifobject); } if (sigvar) break; } if (test_type != TEST_TYPE_STATS) print_verbose("Received %d packets on interface %s\n", pkt_counter, ifobject->ifname); if (test_type == TEST_TYPE_TEARDOWN) print_verbose("Destroying socket\n"); } if ((test_type != TEST_TYPE_BIDI) || bidi_pass) { xsk_socket__delete(ifobject->xsk->xsk); (void)xsk_umem__delete(ifobject->umem->umem); } pthread_exit(NULL); } static void testapp_validate(void) { struct timespec max_wait = { 0, 0 }; bool bidi = test_type == TEST_TYPE_BIDI; pthread_attr_init(&attr); pthread_attr_setstacksize(&attr, THREAD_STACK); if ((test_type == TEST_TYPE_BIDI) && bidi_pass) { pthread_init_mutex(); if (!switching_notify) { print_verbose("Switching Tx/Rx vectors\n"); switching_notify++; } } pthread_mutex_lock(&sync_mutex); /*Spawn RX thread */ if (!bidi || !bidi_pass) { if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[1])) exit_with_error(errno); } else if (bidi && bidi_pass) { /*switch Tx/Rx vectors */ ifdict[0]->fv.vector = rx; if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[0])) exit_with_error(errno); } if (clock_gettime(CLOCK_REALTIME, &max_wait)) exit_with_error(errno); max_wait.tv_sec += TMOUT_SEC; if (pthread_cond_timedwait(&signal_rx_condition, &sync_mutex, &max_wait) == ETIMEDOUT) exit_with_error(errno); pthread_mutex_unlock(&sync_mutex); /*Spawn TX thread */ if (!bidi || !bidi_pass) { if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[0])) exit_with_error(errno); } else if (bidi && bidi_pass) { /*switch Tx/Rx vectors */ ifdict[1]->fv.vector = tx; if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[1])) exit_with_error(errno); } pthread_join(t1, NULL); pthread_join(t0, NULL); if (debug_pkt_dump) { worker_pkt_dump(); for (int iter = 0; iter < num_frames - 1; iter++) { free(pkt_buf[iter]->payload); free(pkt_buf[iter]); } free(pkt_buf); } if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !(test_type == TEST_TYPE_STATS)) print_ksft_result(); } static void testapp_sockets(void) { for (int i = 0; i < ((test_type == TEST_TYPE_TEARDOWN) ? MAX_TEARDOWN_ITER : MAX_BIDI_ITER); i++) { pkt_counter = 0; prev_pkt = -1; sigvar = 0; print_verbose("Creating socket\n"); testapp_validate(); test_type == TEST_TYPE_BIDI ? bidi_pass++ : bidi_pass; } print_ksft_result(); } static void testapp_stats(void) { for (int i = 0; i < STAT_TEST_TYPE_MAX; i++) { stat_test_type = i; /* reset defaults */ rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; switch (stat_test_type) { case STAT_TEST_RX_DROPPED: frame_headroom = XSK_UMEM__DEFAULT_FRAME_SIZE - XDP_PACKET_HEADROOM - 1; break; case STAT_TEST_RX_FULL: rxqsize = RX_FULL_RXQSIZE; break; default: break; } testapp_validate(); } print_ksft_result(); } static void init_iface_config(struct ifaceconfigobj *ifaceconfig) { /*Init interface0 */ ifdict[0]->fv.vector = tx; memcpy(ifdict[0]->dst_mac, ifaceconfig->dst_mac, ETH_ALEN); memcpy(ifdict[0]->src_mac, ifaceconfig->src_mac, ETH_ALEN); ifdict[0]->dst_ip = ifaceconfig->dst_ip.s_addr; ifdict[0]->src_ip = ifaceconfig->src_ip.s_addr; ifdict[0]->dst_port = ifaceconfig->dst_port; ifdict[0]->src_port = ifaceconfig->src_port; /*Init interface1 */ ifdict[1]->fv.vector = rx; memcpy(ifdict[1]->dst_mac, ifaceconfig->src_mac, ETH_ALEN); memcpy(ifdict[1]->src_mac, ifaceconfig->dst_mac, ETH_ALEN); ifdict[1]->dst_ip = ifaceconfig->src_ip.s_addr; ifdict[1]->src_ip = ifaceconfig->dst_ip.s_addr; ifdict[1]->dst_port = ifaceconfig->src_port; ifdict[1]->src_port = ifaceconfig->dst_port; } static void *nsdisablemodethread(void *args) { struct targs *targs = args; targs->retptr = false; if (switch_namespace(targs->idx)) { targs->retptr = bpf_set_link_xdp_fd(ifdict[targs->idx]->ifindex, -1, targs->flags); } else { targs->retptr = errno; print_verbose("Failed to switch namespace to %s\n", ifdict[targs->idx]->nsname); } pthread_exit(NULL); } static void disable_xdp_mode(int mode) { int err = 0; __u32 flags = XDP_FLAGS_UPDATE_IF_NOEXIST | mode; char *mode_str = mode & XDP_FLAGS_SKB_MODE ? "skb" : "drv"; for (int i = 0; i < MAX_INTERFACES; i++) { if (strcmp(ifdict[i]->nsname, "")) { struct targs *targs; targs = malloc(sizeof(*targs)); memset(targs, 0, sizeof(*targs)); if (!targs) exit_with_error(errno); targs->idx = i; targs->flags = flags; if (pthread_create(&ns_thread, NULL, nsdisablemodethread, targs)) exit_with_error(errno); pthread_join(ns_thread, NULL); err = targs->retptr; free(targs); } else { err = bpf_set_link_xdp_fd(ifdict[i]->ifindex, -1, flags); } if (err) { print_verbose("Failed to disable %s mode on interface %s\n", mode_str, ifdict[i]->ifname); exit_with_error(err); } print_verbose("Disabled %s mode for interface: %s\n", mode_str, ifdict[i]->ifname); configured_mode = mode & XDP_FLAGS_SKB_MODE ? TEST_MODE_DRV : TEST_MODE_SKB; } } static void run_pkt_test(int mode, int type) { test_type = type; /* reset defaults after potential previous test */ xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; pkt_counter = 0; switching_notify = 0; bidi_pass = 0; prev_pkt = -1; ifdict[0]->fv.vector = tx; ifdict[1]->fv.vector = rx; sigvar = 0; stat_test_type = -1; rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; switch (mode) { case (TEST_MODE_SKB): if (configured_mode == TEST_MODE_DRV) disable_xdp_mode(XDP_FLAGS_DRV_MODE); xdp_flags |= XDP_FLAGS_SKB_MODE; break; case (TEST_MODE_DRV): if (configured_mode == TEST_MODE_SKB) disable_xdp_mode(XDP_FLAGS_SKB_MODE); xdp_flags |= XDP_FLAGS_DRV_MODE; break; default: break; } pthread_init_mutex(); if (test_type == TEST_TYPE_STATS) testapp_stats(); else if ((test_type != TEST_TYPE_TEARDOWN) && (test_type != TEST_TYPE_BIDI)) testapp_validate(); else testapp_sockets(); pthread_destroy_mutex(); } int main(int argc, char **argv) { struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY }; if (setrlimit(RLIMIT_MEMLOCK, &_rlim)) exit_with_error(errno); const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; const char *IP1 = "192.168.100.162"; const char *IP2 = "192.168.100.161"; u16 UDP_DST_PORT = 2020; u16 UDP_SRC_PORT = 2121; int i, j; ifaceconfig = malloc(sizeof(struct ifaceconfigobj)); memcpy(ifaceconfig->dst_mac, MAC1, ETH_ALEN); memcpy(ifaceconfig->src_mac, MAC2, ETH_ALEN); inet_aton(IP1, &ifaceconfig->dst_ip); inet_aton(IP2, &ifaceconfig->src_ip); ifaceconfig->dst_port = UDP_DST_PORT; ifaceconfig->src_port = UDP_SRC_PORT; for (int i = 0; i < MAX_INTERFACES; i++) { ifdict[i] = malloc(sizeof(struct ifobject)); if (!ifdict[i]) exit_with_error(errno); ifdict[i]->ifdict_index = i; } setlocale(LC_ALL, ""); parse_command_line(argc, argv); num_frames = ++opt_pkt_count; init_iface_config(ifaceconfig); disable_xdp_mode(XDP_FLAGS_DRV_MODE); ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX); for (i = 0; i < TEST_MODE_MAX; i++) { for (j = 0; j < TEST_TYPE_MAX; j++) run_pkt_test(i, j); } for (int i = 0; i < MAX_INTERFACES; i++) free(ifdict[i]); ksft_exit_pass(); return 0; }