linux/drivers/net/thunderbolt.c
Mika Westerberg ff7cd07f30 net: thunderbolt: Enable DMA paths only after rings are enabled
If the other host starts sending packets early on it is possible that we
are still in the middle of populating the initial Rx ring packets to the
ring. This causes the tbnet_poll() to mess over the queue and causes
list corruption. This happens specifically when connected with macOS as
it seems start sending various IP discovery packets as soon as its side
of the paths are configured.

To prevent this we move the DMA path enabling to happen after we have
primed the Rx ring. This makes sure no incoming packets can arrive
before we are ready to handle them.

Fixes: e69b6c02b4 ("net: Add support for networking over Thunderbolt cable")
Cc: stable@vger.kernel.org
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-08-31 14:05:11 +01:00

1404 lines
36 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Networking over Thunderbolt cable using Apple ThunderboltIP protocol
*
* Copyright (C) 2017, Intel Corporation
* Authors: Amir Levy <amir.jer.levy@intel.com>
* Michael Jamet <michael.jamet@intel.com>
* Mika Westerberg <mika.westerberg@linux.intel.com>
*/
#include <linux/atomic.h>
#include <linux/highmem.h>
#include <linux/if_vlan.h>
#include <linux/jhash.h>
#include <linux/module.h>
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
#include <linux/sizes.h>
#include <linux/thunderbolt.h>
#include <linux/uuid.h>
#include <linux/workqueue.h>
#include <net/ip6_checksum.h>
/* Protocol timeouts in ms */
#define TBNET_LOGIN_DELAY 4500
#define TBNET_LOGIN_TIMEOUT 500
#define TBNET_LOGOUT_TIMEOUT 1000
#define TBNET_RING_SIZE 256
#define TBNET_LOGIN_RETRIES 60
#define TBNET_LOGOUT_RETRIES 10
#define TBNET_MATCH_FRAGS_ID BIT(1)
#define TBNET_64K_FRAMES BIT(2)
#define TBNET_MAX_MTU SZ_64K
#define TBNET_FRAME_SIZE SZ_4K
#define TBNET_MAX_PAYLOAD_SIZE \
(TBNET_FRAME_SIZE - sizeof(struct thunderbolt_ip_frame_header))
/* Rx packets need to hold space for skb_shared_info */
#define TBNET_RX_MAX_SIZE \
(TBNET_FRAME_SIZE + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
#define TBNET_RX_PAGE_ORDER get_order(TBNET_RX_MAX_SIZE)
#define TBNET_RX_PAGE_SIZE (PAGE_SIZE << TBNET_RX_PAGE_ORDER)
#define TBNET_L0_PORT_NUM(route) ((route) & GENMASK(5, 0))
/**
* struct thunderbolt_ip_frame_header - Header for each Thunderbolt frame
* @frame_size: size of the data with the frame
* @frame_index: running index on the frames
* @frame_id: ID of the frame to match frames to specific packet
* @frame_count: how many frames assembles a full packet
*
* Each data frame passed to the high-speed DMA ring has this header. If
* the XDomain network directory announces that %TBNET_MATCH_FRAGS_ID is
* supported then @frame_id is filled, otherwise it stays %0.
*/
struct thunderbolt_ip_frame_header {
u32 frame_size;
u16 frame_index;
u16 frame_id;
u32 frame_count;
};
enum thunderbolt_ip_frame_pdf {
TBIP_PDF_FRAME_START = 1,
TBIP_PDF_FRAME_END,
};
enum thunderbolt_ip_type {
TBIP_LOGIN,
TBIP_LOGIN_RESPONSE,
TBIP_LOGOUT,
TBIP_STATUS,
};
struct thunderbolt_ip_header {
u32 route_hi;
u32 route_lo;
u32 length_sn;
uuid_t uuid;
uuid_t initiator_uuid;
uuid_t target_uuid;
u32 type;
u32 command_id;
};
#define TBIP_HDR_LENGTH_MASK GENMASK(5, 0)
#define TBIP_HDR_SN_MASK GENMASK(28, 27)
#define TBIP_HDR_SN_SHIFT 27
struct thunderbolt_ip_login {
struct thunderbolt_ip_header hdr;
u32 proto_version;
u32 transmit_path;
u32 reserved[4];
};
#define TBIP_LOGIN_PROTO_VERSION 1
struct thunderbolt_ip_login_response {
struct thunderbolt_ip_header hdr;
u32 status;
u32 receiver_mac[2];
u32 receiver_mac_len;
u32 reserved[4];
};
struct thunderbolt_ip_logout {
struct thunderbolt_ip_header hdr;
};
struct thunderbolt_ip_status {
struct thunderbolt_ip_header hdr;
u32 status;
};
struct tbnet_stats {
u64 tx_packets;
u64 rx_packets;
u64 tx_bytes;
u64 rx_bytes;
u64 rx_errors;
u64 tx_errors;
u64 rx_length_errors;
u64 rx_over_errors;
u64 rx_crc_errors;
u64 rx_missed_errors;
};
struct tbnet_frame {
struct net_device *dev;
struct page *page;
struct ring_frame frame;
};
struct tbnet_ring {
struct tbnet_frame frames[TBNET_RING_SIZE];
unsigned int cons;
unsigned int prod;
struct tb_ring *ring;
};
/**
* struct tbnet - ThunderboltIP network driver private data
* @svc: XDomain service the driver is bound to
* @xd: XDomain the service blongs to
* @handler: ThunderboltIP configuration protocol handler
* @dev: Networking device
* @napi: NAPI structure for Rx polling
* @stats: Network statistics
* @skb: Network packet that is currently processed on Rx path
* @command_id: ID used for next configuration protocol packet
* @login_sent: ThunderboltIP login message successfully sent
* @login_received: ThunderboltIP login message received from the remote
* host
* @local_transmit_path: HopID we are using to send out packets
* @remote_transmit_path: HopID the other end is using to send packets to us
* @connection_lock: Lock serializing access to @login_sent,
* @login_received and @transmit_path.
* @login_retries: Number of login retries currently done
* @login_work: Worker to send ThunderboltIP login packets
* @connected_work: Worker that finalizes the ThunderboltIP connection
* setup and enables DMA paths for high speed data
* transfers
* @disconnect_work: Worker that handles tearing down the ThunderboltIP
* connection
* @rx_hdr: Copy of the currently processed Rx frame. Used when a
* network packet consists of multiple Thunderbolt frames.
* In host byte order.
* @rx_ring: Software ring holding Rx frames
* @frame_id: Frame ID use for next Tx packet
* (if %TBNET_MATCH_FRAGS_ID is supported in both ends)
* @tx_ring: Software ring holding Tx frames
*/
struct tbnet {
const struct tb_service *svc;
struct tb_xdomain *xd;
struct tb_protocol_handler handler;
struct net_device *dev;
struct napi_struct napi;
struct tbnet_stats stats;
struct sk_buff *skb;
atomic_t command_id;
bool login_sent;
bool login_received;
int local_transmit_path;
int remote_transmit_path;
struct mutex connection_lock;
int login_retries;
struct delayed_work login_work;
struct work_struct connected_work;
struct work_struct disconnect_work;
struct thunderbolt_ip_frame_header rx_hdr;
struct tbnet_ring rx_ring;
atomic_t frame_id;
struct tbnet_ring tx_ring;
};
/* Network property directory UUID: c66189ca-1cce-4195-bdb8-49592e5f5a4f */
static const uuid_t tbnet_dir_uuid =
UUID_INIT(0xc66189ca, 0x1cce, 0x4195,
0xbd, 0xb8, 0x49, 0x59, 0x2e, 0x5f, 0x5a, 0x4f);
/* ThunderboltIP protocol UUID: 798f589e-3616-8a47-97c6-5664a920c8dd */
static const uuid_t tbnet_svc_uuid =
UUID_INIT(0x798f589e, 0x3616, 0x8a47,
0x97, 0xc6, 0x56, 0x64, 0xa9, 0x20, 0xc8, 0xdd);
static struct tb_property_dir *tbnet_dir;
static void tbnet_fill_header(struct thunderbolt_ip_header *hdr, u64 route,
u8 sequence, const uuid_t *initiator_uuid, const uuid_t *target_uuid,
enum thunderbolt_ip_type type, size_t size, u32 command_id)
{
u32 length_sn;
/* Length does not include route_hi/lo and length_sn fields */
length_sn = (size - 3 * 4) / 4;
length_sn |= (sequence << TBIP_HDR_SN_SHIFT) & TBIP_HDR_SN_MASK;
hdr->route_hi = upper_32_bits(route);
hdr->route_lo = lower_32_bits(route);
hdr->length_sn = length_sn;
uuid_copy(&hdr->uuid, &tbnet_svc_uuid);
uuid_copy(&hdr->initiator_uuid, initiator_uuid);
uuid_copy(&hdr->target_uuid, target_uuid);
hdr->type = type;
hdr->command_id = command_id;
}
static int tbnet_login_response(struct tbnet *net, u64 route, u8 sequence,
u32 command_id)
{
struct thunderbolt_ip_login_response reply;
struct tb_xdomain *xd = net->xd;
memset(&reply, 0, sizeof(reply));
tbnet_fill_header(&reply.hdr, route, sequence, xd->local_uuid,
xd->remote_uuid, TBIP_LOGIN_RESPONSE, sizeof(reply),
command_id);
memcpy(reply.receiver_mac, net->dev->dev_addr, ETH_ALEN);
reply.receiver_mac_len = ETH_ALEN;
return tb_xdomain_response(xd, &reply, sizeof(reply),
TB_CFG_PKG_XDOMAIN_RESP);
}
static int tbnet_login_request(struct tbnet *net, u8 sequence)
{
struct thunderbolt_ip_login_response reply;
struct thunderbolt_ip_login request;
struct tb_xdomain *xd = net->xd;
memset(&request, 0, sizeof(request));
tbnet_fill_header(&request.hdr, xd->route, sequence, xd->local_uuid,
xd->remote_uuid, TBIP_LOGIN, sizeof(request),
atomic_inc_return(&net->command_id));
request.proto_version = TBIP_LOGIN_PROTO_VERSION;
request.transmit_path = net->local_transmit_path;
return tb_xdomain_request(xd, &request, sizeof(request),
TB_CFG_PKG_XDOMAIN_RESP, &reply,
sizeof(reply), TB_CFG_PKG_XDOMAIN_RESP,
TBNET_LOGIN_TIMEOUT);
}
static int tbnet_logout_response(struct tbnet *net, u64 route, u8 sequence,
u32 command_id)
{
struct thunderbolt_ip_status reply;
struct tb_xdomain *xd = net->xd;
memset(&reply, 0, sizeof(reply));
tbnet_fill_header(&reply.hdr, route, sequence, xd->local_uuid,
xd->remote_uuid, TBIP_STATUS, sizeof(reply),
atomic_inc_return(&net->command_id));
return tb_xdomain_response(xd, &reply, sizeof(reply),
TB_CFG_PKG_XDOMAIN_RESP);
}
static int tbnet_logout_request(struct tbnet *net)
{
struct thunderbolt_ip_logout request;
struct thunderbolt_ip_status reply;
struct tb_xdomain *xd = net->xd;
memset(&request, 0, sizeof(request));
tbnet_fill_header(&request.hdr, xd->route, 0, xd->local_uuid,
xd->remote_uuid, TBIP_LOGOUT, sizeof(request),
atomic_inc_return(&net->command_id));
return tb_xdomain_request(xd, &request, sizeof(request),
TB_CFG_PKG_XDOMAIN_RESP, &reply,
sizeof(reply), TB_CFG_PKG_XDOMAIN_RESP,
TBNET_LOGOUT_TIMEOUT);
}
static void start_login(struct tbnet *net)
{
mutex_lock(&net->connection_lock);
net->login_sent = false;
net->login_received = false;
mutex_unlock(&net->connection_lock);
queue_delayed_work(system_long_wq, &net->login_work,
msecs_to_jiffies(1000));
}
static void stop_login(struct tbnet *net)
{
cancel_delayed_work_sync(&net->login_work);
cancel_work_sync(&net->connected_work);
}
static inline unsigned int tbnet_frame_size(const struct tbnet_frame *tf)
{
return tf->frame.size ? : TBNET_FRAME_SIZE;
}
static void tbnet_free_buffers(struct tbnet_ring *ring)
{
unsigned int i;
for (i = 0; i < TBNET_RING_SIZE; i++) {
struct device *dma_dev = tb_ring_dma_device(ring->ring);
struct tbnet_frame *tf = &ring->frames[i];
enum dma_data_direction dir;
unsigned int order;
size_t size;
if (!tf->page)
continue;
if (ring->ring->is_tx) {
dir = DMA_TO_DEVICE;
order = 0;
size = TBNET_FRAME_SIZE;
} else {
dir = DMA_FROM_DEVICE;
order = TBNET_RX_PAGE_ORDER;
size = TBNET_RX_PAGE_SIZE;
}
if (tf->frame.buffer_phy)
dma_unmap_page(dma_dev, tf->frame.buffer_phy, size,
dir);
__free_pages(tf->page, order);
tf->page = NULL;
}
ring->cons = 0;
ring->prod = 0;
}
static void tbnet_tear_down(struct tbnet *net, bool send_logout)
{
netif_carrier_off(net->dev);
netif_stop_queue(net->dev);
stop_login(net);
mutex_lock(&net->connection_lock);
if (net->login_sent && net->login_received) {
int ret, retries = TBNET_LOGOUT_RETRIES;
while (send_logout && retries-- > 0) {
ret = tbnet_logout_request(net);
if (ret != -ETIMEDOUT)
break;
}
tb_ring_stop(net->rx_ring.ring);
tb_ring_stop(net->tx_ring.ring);
tbnet_free_buffers(&net->rx_ring);
tbnet_free_buffers(&net->tx_ring);
ret = tb_xdomain_disable_paths(net->xd,
net->local_transmit_path,
net->rx_ring.ring->hop,
net->remote_transmit_path,
net->tx_ring.ring->hop);
if (ret)
netdev_warn(net->dev, "failed to disable DMA paths\n");
tb_xdomain_release_in_hopid(net->xd, net->remote_transmit_path);
net->remote_transmit_path = 0;
}
net->login_retries = 0;
net->login_sent = false;
net->login_received = false;
mutex_unlock(&net->connection_lock);
}
static int tbnet_handle_packet(const void *buf, size_t size, void *data)
{
const struct thunderbolt_ip_login *pkg = buf;
struct tbnet *net = data;
u32 command_id;
int ret = 0;
u32 sequence;
u64 route;
/* Make sure the packet is for us */
if (size < sizeof(struct thunderbolt_ip_header))
return 0;
if (!uuid_equal(&pkg->hdr.initiator_uuid, net->xd->remote_uuid))
return 0;
if (!uuid_equal(&pkg->hdr.target_uuid, net->xd->local_uuid))
return 0;
route = ((u64)pkg->hdr.route_hi << 32) | pkg->hdr.route_lo;
route &= ~BIT_ULL(63);
if (route != net->xd->route)
return 0;
sequence = pkg->hdr.length_sn & TBIP_HDR_SN_MASK;
sequence >>= TBIP_HDR_SN_SHIFT;
command_id = pkg->hdr.command_id;
switch (pkg->hdr.type) {
case TBIP_LOGIN:
if (!netif_running(net->dev))
break;
ret = tbnet_login_response(net, route, sequence,
pkg->hdr.command_id);
if (!ret) {
mutex_lock(&net->connection_lock);
net->login_received = true;
net->remote_transmit_path = pkg->transmit_path;
/* If we reached the number of max retries or
* previous logout, schedule another round of
* login retries
*/
if (net->login_retries >= TBNET_LOGIN_RETRIES ||
!net->login_sent) {
net->login_retries = 0;
queue_delayed_work(system_long_wq,
&net->login_work, 0);
}
mutex_unlock(&net->connection_lock);
queue_work(system_long_wq, &net->connected_work);
}
break;
case TBIP_LOGOUT:
ret = tbnet_logout_response(net, route, sequence, command_id);
if (!ret)
queue_work(system_long_wq, &net->disconnect_work);
break;
default:
return 0;
}
if (ret)
netdev_warn(net->dev, "failed to send ThunderboltIP response\n");
return 1;
}
static unsigned int tbnet_available_buffers(const struct tbnet_ring *ring)
{
return ring->prod - ring->cons;
}
static int tbnet_alloc_rx_buffers(struct tbnet *net, unsigned int nbuffers)
{
struct tbnet_ring *ring = &net->rx_ring;
int ret;
while (nbuffers--) {
struct device *dma_dev = tb_ring_dma_device(ring->ring);
unsigned int index = ring->prod & (TBNET_RING_SIZE - 1);
struct tbnet_frame *tf = &ring->frames[index];
dma_addr_t dma_addr;
if (tf->page)
break;
/* Allocate page (order > 0) so that it can hold maximum
* ThunderboltIP frame (4kB) and the additional room for
* SKB shared info required by build_skb().
*/
tf->page = dev_alloc_pages(TBNET_RX_PAGE_ORDER);
if (!tf->page) {
ret = -ENOMEM;
goto err_free;
}
dma_addr = dma_map_page(dma_dev, tf->page, 0,
TBNET_RX_PAGE_SIZE, DMA_FROM_DEVICE);
if (dma_mapping_error(dma_dev, dma_addr)) {
ret = -ENOMEM;
goto err_free;
}
tf->frame.buffer_phy = dma_addr;
tf->dev = net->dev;
tb_ring_rx(ring->ring, &tf->frame);
ring->prod++;
}
return 0;
err_free:
tbnet_free_buffers(ring);
return ret;
}
static struct tbnet_frame *tbnet_get_tx_buffer(struct tbnet *net)
{
struct tbnet_ring *ring = &net->tx_ring;
struct device *dma_dev = tb_ring_dma_device(ring->ring);
struct tbnet_frame *tf;
unsigned int index;
if (!tbnet_available_buffers(ring))
return NULL;
index = ring->cons++ & (TBNET_RING_SIZE - 1);
tf = &ring->frames[index];
tf->frame.size = 0;
dma_sync_single_for_cpu(dma_dev, tf->frame.buffer_phy,
tbnet_frame_size(tf), DMA_TO_DEVICE);
return tf;
}
static void tbnet_tx_callback(struct tb_ring *ring, struct ring_frame *frame,
bool canceled)
{
struct tbnet_frame *tf = container_of(frame, typeof(*tf), frame);
struct tbnet *net = netdev_priv(tf->dev);
/* Return buffer to the ring */
net->tx_ring.prod++;
if (tbnet_available_buffers(&net->tx_ring) >= TBNET_RING_SIZE / 2)
netif_wake_queue(net->dev);
}
static int tbnet_alloc_tx_buffers(struct tbnet *net)
{
struct tbnet_ring *ring = &net->tx_ring;
struct device *dma_dev = tb_ring_dma_device(ring->ring);
unsigned int i;
for (i = 0; i < TBNET_RING_SIZE; i++) {
struct tbnet_frame *tf = &ring->frames[i];
dma_addr_t dma_addr;
tf->page = alloc_page(GFP_KERNEL);
if (!tf->page) {
tbnet_free_buffers(ring);
return -ENOMEM;
}
dma_addr = dma_map_page(dma_dev, tf->page, 0, TBNET_FRAME_SIZE,
DMA_TO_DEVICE);
if (dma_mapping_error(dma_dev, dma_addr)) {
__free_page(tf->page);
tf->page = NULL;
tbnet_free_buffers(ring);
return -ENOMEM;
}
tf->dev = net->dev;
tf->frame.buffer_phy = dma_addr;
tf->frame.callback = tbnet_tx_callback;
tf->frame.sof = TBIP_PDF_FRAME_START;
tf->frame.eof = TBIP_PDF_FRAME_END;
}
ring->cons = 0;
ring->prod = TBNET_RING_SIZE - 1;
return 0;
}
static void tbnet_connected_work(struct work_struct *work)
{
struct tbnet *net = container_of(work, typeof(*net), connected_work);
bool connected;
int ret;
if (netif_carrier_ok(net->dev))
return;
mutex_lock(&net->connection_lock);
connected = net->login_sent && net->login_received;
mutex_unlock(&net->connection_lock);
if (!connected)
return;
ret = tb_xdomain_alloc_in_hopid(net->xd, net->remote_transmit_path);
if (ret != net->remote_transmit_path) {
netdev_err(net->dev, "failed to allocate Rx HopID\n");
return;
}
/* Both logins successful so enable the rings, high-speed DMA
* paths and start the network device queue.
*
* Note we enable the DMA paths last to make sure we have primed
* the Rx ring before any incoming packets are allowed to
* arrive.
*/
tb_ring_start(net->tx_ring.ring);
tb_ring_start(net->rx_ring.ring);
ret = tbnet_alloc_rx_buffers(net, TBNET_RING_SIZE);
if (ret)
goto err_stop_rings;
ret = tbnet_alloc_tx_buffers(net);
if (ret)
goto err_free_rx_buffers;
ret = tb_xdomain_enable_paths(net->xd, net->local_transmit_path,
net->rx_ring.ring->hop,
net->remote_transmit_path,
net->tx_ring.ring->hop);
if (ret) {
netdev_err(net->dev, "failed to enable DMA paths\n");
goto err_free_tx_buffers;
}
netif_carrier_on(net->dev);
netif_start_queue(net->dev);
return;
err_free_tx_buffers:
tbnet_free_buffers(&net->tx_ring);
err_free_rx_buffers:
tbnet_free_buffers(&net->rx_ring);
err_stop_rings:
tb_ring_stop(net->rx_ring.ring);
tb_ring_stop(net->tx_ring.ring);
tb_xdomain_release_in_hopid(net->xd, net->remote_transmit_path);
}
static void tbnet_login_work(struct work_struct *work)
{
struct tbnet *net = container_of(work, typeof(*net), login_work.work);
unsigned long delay = msecs_to_jiffies(TBNET_LOGIN_DELAY);
int ret;
if (netif_carrier_ok(net->dev))
return;
ret = tbnet_login_request(net, net->login_retries % 4);
if (ret) {
if (net->login_retries++ < TBNET_LOGIN_RETRIES) {
queue_delayed_work(system_long_wq, &net->login_work,
delay);
} else {
netdev_info(net->dev, "ThunderboltIP login timed out\n");
}
} else {
net->login_retries = 0;
mutex_lock(&net->connection_lock);
net->login_sent = true;
mutex_unlock(&net->connection_lock);
queue_work(system_long_wq, &net->connected_work);
}
}
static void tbnet_disconnect_work(struct work_struct *work)
{
struct tbnet *net = container_of(work, typeof(*net), disconnect_work);
tbnet_tear_down(net, false);
}
static bool tbnet_check_frame(struct tbnet *net, const struct tbnet_frame *tf,
const struct thunderbolt_ip_frame_header *hdr)
{
u32 frame_id, frame_count, frame_size, frame_index;
unsigned int size;
if (tf->frame.flags & RING_DESC_CRC_ERROR) {
net->stats.rx_crc_errors++;
return false;
} else if (tf->frame.flags & RING_DESC_BUFFER_OVERRUN) {
net->stats.rx_over_errors++;
return false;
}
/* Should be greater than just header i.e. contains data */
size = tbnet_frame_size(tf);
if (size <= sizeof(*hdr)) {
net->stats.rx_length_errors++;
return false;
}
frame_count = le32_to_cpu(hdr->frame_count);
frame_size = le32_to_cpu(hdr->frame_size);
frame_index = le16_to_cpu(hdr->frame_index);
frame_id = le16_to_cpu(hdr->frame_id);
if ((frame_size > size - sizeof(*hdr)) || !frame_size) {
net->stats.rx_length_errors++;
return false;
}
/* In case we're in the middle of packet, validate the frame
* header based on first fragment of the packet.
*/
if (net->skb && net->rx_hdr.frame_count) {
/* Check the frame count fits the count field */
if (frame_count != net->rx_hdr.frame_count) {
net->stats.rx_length_errors++;
return false;
}
/* Check the frame identifiers are incremented correctly,
* and id is matching.
*/
if (frame_index != net->rx_hdr.frame_index + 1 ||
frame_id != net->rx_hdr.frame_id) {
net->stats.rx_missed_errors++;
return false;
}
if (net->skb->len + frame_size > TBNET_MAX_MTU) {
net->stats.rx_length_errors++;
return false;
}
return true;
}
/* Start of packet, validate the frame header */
if (frame_count == 0 || frame_count > TBNET_RING_SIZE / 4) {
net->stats.rx_length_errors++;
return false;
}
if (frame_index != 0) {
net->stats.rx_missed_errors++;
return false;
}
return true;
}
static int tbnet_poll(struct napi_struct *napi, int budget)
{
struct tbnet *net = container_of(napi, struct tbnet, napi);
unsigned int cleaned_count = tbnet_available_buffers(&net->rx_ring);
struct device *dma_dev = tb_ring_dma_device(net->rx_ring.ring);
unsigned int rx_packets = 0;
while (rx_packets < budget) {
const struct thunderbolt_ip_frame_header *hdr;
unsigned int hdr_size = sizeof(*hdr);
struct sk_buff *skb = NULL;
struct ring_frame *frame;
struct tbnet_frame *tf;
struct page *page;
bool last = true;
u32 frame_size;
/* Return some buffers to hardware, one at a time is too
* slow so allocate MAX_SKB_FRAGS buffers at the same
* time.
*/
if (cleaned_count >= MAX_SKB_FRAGS) {
tbnet_alloc_rx_buffers(net, cleaned_count);
cleaned_count = 0;
}
frame = tb_ring_poll(net->rx_ring.ring);
if (!frame)
break;
dma_unmap_page(dma_dev, frame->buffer_phy,
TBNET_RX_PAGE_SIZE, DMA_FROM_DEVICE);
tf = container_of(frame, typeof(*tf), frame);
page = tf->page;
tf->page = NULL;
net->rx_ring.cons++;
cleaned_count++;
hdr = page_address(page);
if (!tbnet_check_frame(net, tf, hdr)) {
__free_pages(page, TBNET_RX_PAGE_ORDER);
dev_kfree_skb_any(net->skb);
net->skb = NULL;
continue;
}
frame_size = le32_to_cpu(hdr->frame_size);
skb = net->skb;
if (!skb) {
skb = build_skb(page_address(page),
TBNET_RX_PAGE_SIZE);
if (!skb) {
__free_pages(page, TBNET_RX_PAGE_ORDER);
net->stats.rx_errors++;
break;
}
skb_reserve(skb, hdr_size);
skb_put(skb, frame_size);
net->skb = skb;
} else {
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
page, hdr_size, frame_size,
TBNET_RX_PAGE_SIZE - hdr_size);
}
net->rx_hdr.frame_size = frame_size;
net->rx_hdr.frame_count = le32_to_cpu(hdr->frame_count);
net->rx_hdr.frame_index = le16_to_cpu(hdr->frame_index);
net->rx_hdr.frame_id = le16_to_cpu(hdr->frame_id);
last = net->rx_hdr.frame_index == net->rx_hdr.frame_count - 1;
rx_packets++;
net->stats.rx_bytes += frame_size;
if (last) {
skb->protocol = eth_type_trans(skb, net->dev);
napi_gro_receive(&net->napi, skb);
net->skb = NULL;
}
}
net->stats.rx_packets += rx_packets;
if (cleaned_count)
tbnet_alloc_rx_buffers(net, cleaned_count);
if (rx_packets >= budget)
return budget;
napi_complete_done(napi, rx_packets);
/* Re-enable the ring interrupt */
tb_ring_poll_complete(net->rx_ring.ring);
return rx_packets;
}
static void tbnet_start_poll(void *data)
{
struct tbnet *net = data;
napi_schedule(&net->napi);
}
static int tbnet_open(struct net_device *dev)
{
struct tbnet *net = netdev_priv(dev);
struct tb_xdomain *xd = net->xd;
u16 sof_mask, eof_mask;
struct tb_ring *ring;
int hopid;
netif_carrier_off(dev);
ring = tb_ring_alloc_tx(xd->tb->nhi, -1, TBNET_RING_SIZE,
RING_FLAG_FRAME);
if (!ring) {
netdev_err(dev, "failed to allocate Tx ring\n");
return -ENOMEM;
}
net->tx_ring.ring = ring;
hopid = tb_xdomain_alloc_out_hopid(xd, -1);
if (hopid < 0) {
netdev_err(dev, "failed to allocate Tx HopID\n");
tb_ring_free(net->tx_ring.ring);
net->tx_ring.ring = NULL;
return hopid;
}
net->local_transmit_path = hopid;
sof_mask = BIT(TBIP_PDF_FRAME_START);
eof_mask = BIT(TBIP_PDF_FRAME_END);
ring = tb_ring_alloc_rx(xd->tb->nhi, -1, TBNET_RING_SIZE,
RING_FLAG_FRAME, 0, sof_mask, eof_mask,
tbnet_start_poll, net);
if (!ring) {
netdev_err(dev, "failed to allocate Rx ring\n");
tb_ring_free(net->tx_ring.ring);
net->tx_ring.ring = NULL;
return -ENOMEM;
}
net->rx_ring.ring = ring;
napi_enable(&net->napi);
start_login(net);
return 0;
}
static int tbnet_stop(struct net_device *dev)
{
struct tbnet *net = netdev_priv(dev);
napi_disable(&net->napi);
cancel_work_sync(&net->disconnect_work);
tbnet_tear_down(net, true);
tb_ring_free(net->rx_ring.ring);
net->rx_ring.ring = NULL;
tb_xdomain_release_out_hopid(net->xd, net->local_transmit_path);
tb_ring_free(net->tx_ring.ring);
net->tx_ring.ring = NULL;
return 0;
}
static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb,
struct tbnet_frame **frames, u32 frame_count)
{
struct thunderbolt_ip_frame_header *hdr = page_address(frames[0]->page);
struct device *dma_dev = tb_ring_dma_device(net->tx_ring.ring);
__wsum wsum = htonl(skb->len - skb_transport_offset(skb));
unsigned int i, len, offset = skb_transport_offset(skb);
__be16 protocol = skb->protocol;
void *data = skb->data;
void *dest = hdr + 1;
__sum16 *tucso;
if (skb->ip_summed != CHECKSUM_PARTIAL) {
/* No need to calculate checksum so we just update the
* total frame count and sync the frames for DMA.
*/
for (i = 0; i < frame_count; i++) {
hdr = page_address(frames[i]->page);
hdr->frame_count = cpu_to_le32(frame_count);
dma_sync_single_for_device(dma_dev,
frames[i]->frame.buffer_phy,
tbnet_frame_size(frames[i]), DMA_TO_DEVICE);
}
return true;
}
if (protocol == htons(ETH_P_8021Q)) {
struct vlan_hdr *vhdr, vh;
vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(vh), &vh);
if (!vhdr)
return false;
protocol = vhdr->h_vlan_encapsulated_proto;
}
/* Data points on the beginning of packet.
* Check is the checksum absolute place in the packet.
* ipcso will update IP checksum.
* tucso will update TCP/UPD checksum.
*/
if (protocol == htons(ETH_P_IP)) {
__sum16 *ipcso = dest + ((void *)&(ip_hdr(skb)->check) - data);
*ipcso = 0;
*ipcso = ip_fast_csum(dest + skb_network_offset(skb),
ip_hdr(skb)->ihl);
if (ip_hdr(skb)->protocol == IPPROTO_TCP)
tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data);
else if (ip_hdr(skb)->protocol == IPPROTO_UDP)
tucso = dest + ((void *)&(udp_hdr(skb)->check) - data);
else
return false;
*tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr, 0,
ip_hdr(skb)->protocol, 0);
} else if (skb_is_gso_v6(skb)) {
tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data);
*tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr, 0,
IPPROTO_TCP, 0);
return false;
} else if (protocol == htons(ETH_P_IPV6)) {
tucso = dest + skb_checksum_start_offset(skb) + skb->csum_offset;
*tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr, 0,
ipv6_hdr(skb)->nexthdr, 0);
} else {
return false;
}
/* First frame was headers, rest of the frames contain data.
* Calculate checksum over each frame.
*/
for (i = 0; i < frame_count; i++) {
hdr = page_address(frames[i]->page);
dest = (void *)(hdr + 1) + offset;
len = le32_to_cpu(hdr->frame_size) - offset;
wsum = csum_partial(dest, len, wsum);
hdr->frame_count = cpu_to_le32(frame_count);
offset = 0;
}
*tucso = csum_fold(wsum);
/* Checksum is finally calculated and we don't touch the memory
* anymore, so DMA sync the frames now.
*/
for (i = 0; i < frame_count; i++) {
dma_sync_single_for_device(dma_dev, frames[i]->frame.buffer_phy,
tbnet_frame_size(frames[i]), DMA_TO_DEVICE);
}
return true;
}
static void *tbnet_kmap_frag(struct sk_buff *skb, unsigned int frag_num,
unsigned int *len)
{
const skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_num];
*len = skb_frag_size(frag);
return kmap_atomic(skb_frag_page(frag)) + skb_frag_off(frag);
}
static netdev_tx_t tbnet_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct tbnet *net = netdev_priv(dev);
struct tbnet_frame *frames[MAX_SKB_FRAGS];
u16 frame_id = atomic_read(&net->frame_id);
struct thunderbolt_ip_frame_header *hdr;
unsigned int len = skb_headlen(skb);
unsigned int data_len = skb->len;
unsigned int nframes, i;
unsigned int frag = 0;
void *src = skb->data;
u32 frame_index = 0;
bool unmap = false;
void *dest;
nframes = DIV_ROUND_UP(data_len, TBNET_MAX_PAYLOAD_SIZE);
if (tbnet_available_buffers(&net->tx_ring) < nframes) {
netif_stop_queue(net->dev);
return NETDEV_TX_BUSY;
}
frames[frame_index] = tbnet_get_tx_buffer(net);
if (!frames[frame_index])
goto err_drop;
hdr = page_address(frames[frame_index]->page);
dest = hdr + 1;
/* If overall packet is bigger than the frame data size */
while (data_len > TBNET_MAX_PAYLOAD_SIZE) {
unsigned int size_left = TBNET_MAX_PAYLOAD_SIZE;
hdr->frame_size = cpu_to_le32(TBNET_MAX_PAYLOAD_SIZE);
hdr->frame_index = cpu_to_le16(frame_index);
hdr->frame_id = cpu_to_le16(frame_id);
do {
if (len > size_left) {
/* Copy data onto Tx buffer data with
* full frame size then break and go to
* next frame
*/
memcpy(dest, src, size_left);
len -= size_left;
dest += size_left;
src += size_left;
break;
}
memcpy(dest, src, len);
size_left -= len;
dest += len;
if (unmap) {
kunmap_atomic(src);
unmap = false;
}
/* Ensure all fragments have been processed */
if (frag < skb_shinfo(skb)->nr_frags) {
/* Map and then unmap quickly */
src = tbnet_kmap_frag(skb, frag++, &len);
unmap = true;
} else if (unlikely(size_left > 0)) {
goto err_drop;
}
} while (size_left > 0);
data_len -= TBNET_MAX_PAYLOAD_SIZE;
frame_index++;
frames[frame_index] = tbnet_get_tx_buffer(net);
if (!frames[frame_index])
goto err_drop;
hdr = page_address(frames[frame_index]->page);
dest = hdr + 1;
}
hdr->frame_size = cpu_to_le32(data_len);
hdr->frame_index = cpu_to_le16(frame_index);
hdr->frame_id = cpu_to_le16(frame_id);
frames[frame_index]->frame.size = data_len + sizeof(*hdr);
/* In case the remaining data_len is smaller than a frame */
while (len < data_len) {
memcpy(dest, src, len);
data_len -= len;
dest += len;
if (unmap) {
kunmap_atomic(src);
unmap = false;
}
if (frag < skb_shinfo(skb)->nr_frags) {
src = tbnet_kmap_frag(skb, frag++, &len);
unmap = true;
} else if (unlikely(data_len > 0)) {
goto err_drop;
}
}
memcpy(dest, src, data_len);
if (unmap)
kunmap_atomic(src);
if (!tbnet_xmit_csum_and_map(net, skb, frames, frame_index + 1))
goto err_drop;
for (i = 0; i < frame_index + 1; i++)
tb_ring_tx(net->tx_ring.ring, &frames[i]->frame);
if (net->svc->prtcstns & TBNET_MATCH_FRAGS_ID)
atomic_inc(&net->frame_id);
net->stats.tx_packets++;
net->stats.tx_bytes += skb->len;
dev_consume_skb_any(skb);
return NETDEV_TX_OK;
err_drop:
/* We can re-use the buffers */
net->tx_ring.cons -= frame_index;
dev_kfree_skb_any(skb);
net->stats.tx_errors++;
return NETDEV_TX_OK;
}
static void tbnet_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *stats)
{
struct tbnet *net = netdev_priv(dev);
stats->tx_packets = net->stats.tx_packets;
stats->rx_packets = net->stats.rx_packets;
stats->tx_bytes = net->stats.tx_bytes;
stats->rx_bytes = net->stats.rx_bytes;
stats->rx_errors = net->stats.rx_errors + net->stats.rx_length_errors +
net->stats.rx_over_errors + net->stats.rx_crc_errors +
net->stats.rx_missed_errors;
stats->tx_errors = net->stats.tx_errors;
stats->rx_length_errors = net->stats.rx_length_errors;
stats->rx_over_errors = net->stats.rx_over_errors;
stats->rx_crc_errors = net->stats.rx_crc_errors;
stats->rx_missed_errors = net->stats.rx_missed_errors;
}
static const struct net_device_ops tbnet_netdev_ops = {
.ndo_open = tbnet_open,
.ndo_stop = tbnet_stop,
.ndo_start_xmit = tbnet_start_xmit,
.ndo_get_stats64 = tbnet_get_stats64,
};
static void tbnet_generate_mac(struct net_device *dev)
{
const struct tbnet *net = netdev_priv(dev);
const struct tb_xdomain *xd = net->xd;
u8 addr[ETH_ALEN];
u8 phy_port;
u32 hash;
phy_port = tb_phy_port_from_link(TBNET_L0_PORT_NUM(xd->route));
/* Unicast and locally administered MAC */
addr[0] = phy_port << 4 | 0x02;
hash = jhash2((u32 *)xd->local_uuid, 4, 0);
memcpy(addr + 1, &hash, sizeof(hash));
hash = jhash2((u32 *)xd->local_uuid, 4, hash);
addr[5] = hash & 0xff;
eth_hw_addr_set(dev, addr);
}
static int tbnet_probe(struct tb_service *svc, const struct tb_service_id *id)
{
struct tb_xdomain *xd = tb_service_parent(svc);
struct net_device *dev;
struct tbnet *net;
int ret;
dev = alloc_etherdev(sizeof(*net));
if (!dev)
return -ENOMEM;
SET_NETDEV_DEV(dev, &svc->dev);
net = netdev_priv(dev);
INIT_DELAYED_WORK(&net->login_work, tbnet_login_work);
INIT_WORK(&net->connected_work, tbnet_connected_work);
INIT_WORK(&net->disconnect_work, tbnet_disconnect_work);
mutex_init(&net->connection_lock);
atomic_set(&net->command_id, 0);
atomic_set(&net->frame_id, 0);
net->svc = svc;
net->dev = dev;
net->xd = xd;
tbnet_generate_mac(dev);
strcpy(dev->name, "thunderbolt%d");
dev->netdev_ops = &tbnet_netdev_ops;
/* ThunderboltIP takes advantage of TSO packets but instead of
* segmenting them we just split the packet into Thunderbolt
* frames (maximum payload size of each frame is 4084 bytes) and
* calculate checksum over the whole packet here.
*
* The receiving side does the opposite if the host OS supports
* LRO, otherwise it needs to split the large packet into MTU
* sized smaller packets.
*
* In order to receive large packets from the networking stack,
* we need to announce support for most of the offloading
* features here.
*/
dev->hw_features = NETIF_F_SG | NETIF_F_ALL_TSO | NETIF_F_GRO |
NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
dev->features = dev->hw_features | NETIF_F_HIGHDMA;
dev->hard_header_len += sizeof(struct thunderbolt_ip_frame_header);
netif_napi_add(dev, &net->napi, tbnet_poll, NAPI_POLL_WEIGHT);
/* MTU range: 68 - 65522 */
dev->min_mtu = ETH_MIN_MTU;
dev->max_mtu = TBNET_MAX_MTU - ETH_HLEN;
net->handler.uuid = &tbnet_svc_uuid;
net->handler.callback = tbnet_handle_packet;
net->handler.data = net;
tb_register_protocol_handler(&net->handler);
tb_service_set_drvdata(svc, net);
ret = register_netdev(dev);
if (ret) {
tb_unregister_protocol_handler(&net->handler);
free_netdev(dev);
return ret;
}
return 0;
}
static void tbnet_remove(struct tb_service *svc)
{
struct tbnet *net = tb_service_get_drvdata(svc);
unregister_netdev(net->dev);
tb_unregister_protocol_handler(&net->handler);
free_netdev(net->dev);
}
static void tbnet_shutdown(struct tb_service *svc)
{
tbnet_tear_down(tb_service_get_drvdata(svc), true);
}
static int __maybe_unused tbnet_suspend(struct device *dev)
{
struct tb_service *svc = tb_to_service(dev);
struct tbnet *net = tb_service_get_drvdata(svc);
stop_login(net);
if (netif_running(net->dev)) {
netif_device_detach(net->dev);
tbnet_tear_down(net, true);
}
tb_unregister_protocol_handler(&net->handler);
return 0;
}
static int __maybe_unused tbnet_resume(struct device *dev)
{
struct tb_service *svc = tb_to_service(dev);
struct tbnet *net = tb_service_get_drvdata(svc);
tb_register_protocol_handler(&net->handler);
netif_carrier_off(net->dev);
if (netif_running(net->dev)) {
netif_device_attach(net->dev);
start_login(net);
}
return 0;
}
static const struct dev_pm_ops tbnet_pm_ops = {
SET_SYSTEM_SLEEP_PM_OPS(tbnet_suspend, tbnet_resume)
};
static const struct tb_service_id tbnet_ids[] = {
{ TB_SERVICE("network", 1) },
{ },
};
MODULE_DEVICE_TABLE(tbsvc, tbnet_ids);
static struct tb_service_driver tbnet_driver = {
.driver = {
.owner = THIS_MODULE,
.name = "thunderbolt-net",
.pm = &tbnet_pm_ops,
},
.probe = tbnet_probe,
.remove = tbnet_remove,
.shutdown = tbnet_shutdown,
.id_table = tbnet_ids,
};
static int __init tbnet_init(void)
{
int ret;
tbnet_dir = tb_property_create_dir(&tbnet_dir_uuid);
if (!tbnet_dir)
return -ENOMEM;
tb_property_add_immediate(tbnet_dir, "prtcid", 1);
tb_property_add_immediate(tbnet_dir, "prtcvers", 1);
tb_property_add_immediate(tbnet_dir, "prtcrevs", 1);
/* Currently only announce support for match frags ID (bit 1). Bit 0
* is reserved for full E2E flow control which we do not support at
* the moment.
*/
tb_property_add_immediate(tbnet_dir, "prtcstns",
TBNET_MATCH_FRAGS_ID | TBNET_64K_FRAMES);
ret = tb_register_property_dir("network", tbnet_dir);
if (ret) {
tb_property_free_dir(tbnet_dir);
return ret;
}
return tb_register_service_driver(&tbnet_driver);
}
module_init(tbnet_init);
static void __exit tbnet_exit(void)
{
tb_unregister_service_driver(&tbnet_driver);
tb_unregister_property_dir("network", tbnet_dir);
tb_property_free_dir(tbnet_dir);
}
module_exit(tbnet_exit);
MODULE_AUTHOR("Amir Levy <amir.jer.levy@intel.com>");
MODULE_AUTHOR("Michael Jamet <michael.jamet@intel.com>");
MODULE_AUTHOR("Mika Westerberg <mika.westerberg@linux.intel.com>");
MODULE_DESCRIPTION("Thunderbolt network driver");
MODULE_LICENSE("GPL v2");