From 3167b4d72226ec4c65bc295c25893230608d2135 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Mon, 26 Sep 2022 15:35:58 +0930 Subject: [PATCH 01/17] liteeth: LiteX Ethernet device LiteX is a soft system-on-chip that targets FPGAs. LiteETH is a basic network device that is commonly used in LiteX designs. Signed-off-by: Joel Stanley Reviewed-by: Ramon Fried --- drivers/net/Kconfig | 5 + drivers/net/Makefile | 1 + drivers/net/liteeth.c | 214 ++++++++++++++++++++++++++++++++++++++++++ include/linux/litex.h | 84 +++++++++++++++++ 4 files changed, 304 insertions(+) create mode 100644 drivers/net/liteeth.c create mode 100644 include/linux/litex.h diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 8df3dce6df..029bf3872a 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -438,6 +438,11 @@ config KSZ9477 This driver implements a DSA switch driver for the KSZ9477 family of GbE switches using the I2C interface. +config LITEETH + bool "LiteX LiteEth Ethernet MAC" + help + Driver for the LiteEth Ethernet MAC from LiteX. + config MVGBE bool "Marvell Orion5x/Kirkwood network interface support" depends on ARCH_KIRKWOOD || ARCH_ORION5X diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 96b7678e98..d3fc6b7d3e 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -47,6 +47,7 @@ obj-$(CONFIG_GMAC_ROCKCHIP) += gmac_rockchip.o obj-$(CONFIG_HIGMACV300_ETH) += higmacv300.o obj-$(CONFIG_KS8851_MLL) += ks8851_mll.o obj-$(CONFIG_KSZ9477) += ksz9477.o +obj-$(CONFIG_LITEETH) += liteeth.o obj-$(CONFIG_MACB) += macb.o obj-$(CONFIG_MCFFEC) += mcffec.o mcfmii.o obj-$(CONFIG_MDIO_IPQ4019) += mdio-ipq4019.o diff --git a/drivers/net/liteeth.c b/drivers/net/liteeth.c new file mode 100644 index 0000000000..84d3852723 --- /dev/null +++ b/drivers/net/liteeth.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * LiteX Liteeth Ethernet + * + * Copyright 2021 Joel Stanley , IBM Corp. + */ + +#include + +#include +#include +#include + +#define LITEETH_WRITER_SLOT 0x00 +#define LITEETH_WRITER_LENGTH 0x04 +#define LITEETH_WRITER_ERRORS 0x08 +#define LITEETH_WRITER_EV_STATUS 0x0C +#define LITEETH_WRITER_EV_PENDING 0x10 +#define LITEETH_WRITER_EV_ENABLE 0x14 +#define LITEETH_READER_START 0x18 +#define LITEETH_READER_READY 0x1C +#define LITEETH_READER_LEVEL 0x20 +#define LITEETH_READER_SLOT 0x24 +#define LITEETH_READER_LENGTH 0x28 +#define LITEETH_READER_EV_STATUS 0x2C +#define LITEETH_READER_EV_PENDING 0x30 +#define LITEETH_READER_EV_ENABLE 0x34 +#define LITEETH_PREAMBLE_CRC 0x38 +#define LITEETH_PREAMBLE_ERRORS 0x3C +#define LITEETH_CRC_ERRORS 0x40 + +struct liteeth { + struct udevice *dev; + + void __iomem *base; + u32 slot_size; + + /* Tx */ + u32 tx_slot; + u32 num_tx_slots; + void __iomem *tx_base; + + /* Rx */ + u32 rx_slot; + u32 num_rx_slots; + void __iomem *rx_base; +}; + +static int liteeth_recv(struct udevice *dev, int flags, uchar **packetp) +{ + struct liteeth *priv = dev_get_priv(dev); + u8 rx_slot; + int len; + + if (!litex_read8(priv->base + LITEETH_WRITER_EV_PENDING)) { + debug("liteeth: No packet ready\n"); + return -EAGAIN; + } + + rx_slot = litex_read8(priv->base + LITEETH_WRITER_SLOT); + len = litex_read32(priv->base + LITEETH_WRITER_LENGTH); + + debug("%s: slot %d len 0x%x\n", __func__, rx_slot, len); + + *packetp = priv->rx_base + rx_slot * priv->slot_size; + + return len; +} + +static int liteeth_free_pkt(struct udevice *dev, uchar *packet, int length) +{ + struct liteeth *priv = dev_get_priv(dev); + + litex_write8(priv->base + LITEETH_WRITER_EV_PENDING, 1); + + return 0; +} + +static int liteeth_start(struct udevice *dev) +{ + struct liteeth *priv = dev_get_priv(dev); + + /* Clear pending events */ + litex_write8(priv->base + LITEETH_WRITER_EV_PENDING, 1); + litex_write8(priv->base + LITEETH_READER_EV_PENDING, 1); + + /* Enable events */ + litex_write8(priv->base + LITEETH_WRITER_EV_ENABLE, 1); + litex_write8(priv->base + LITEETH_READER_EV_ENABLE, 1); + + return 0; +} + +static void liteeth_stop(struct udevice *dev) +{ + struct liteeth *priv = dev_get_priv(dev); + + litex_write8(priv->base + LITEETH_WRITER_EV_ENABLE, 0); + litex_write8(priv->base + LITEETH_READER_EV_ENABLE, 0); +} + +static int liteeth_send(struct udevice *dev, void *packet, int len) +{ + struct liteeth *priv = dev_get_priv(dev); + void __iomem *txbuffer; + + if (!litex_read8(priv->base + LITEETH_READER_READY)) { + printf("liteeth: reader not ready\n"); + return -EAGAIN; + } + + /* Reject oversize packets */ + if (unlikely(len > priv->slot_size)) + return -EMSGSIZE; + + txbuffer = priv->tx_base + priv->tx_slot * priv->slot_size; + memcpy_toio(txbuffer, packet, len); + litex_write8(priv->base + LITEETH_READER_SLOT, priv->tx_slot); + litex_write16(priv->base + LITEETH_READER_LENGTH, len); + litex_write8(priv->base + LITEETH_READER_START, 1); + + priv->tx_slot = (priv->tx_slot + 1) % priv->num_tx_slots; + + return 0; +} + +static void liteeth_setup_slots(struct liteeth *priv) +{ + int err; + + err = ofnode_read_u32(dev_ofnode(priv->dev), "litex,rx-slots", &priv->num_rx_slots); + if (err) { + dev_dbg(priv->dev, "unable to get litex,rx-slots, using 2\n"); + priv->num_rx_slots = 2; + } + + err = ofnode_read_u32(dev_ofnode(priv->dev), "litex,tx-slots", &priv->num_tx_slots); + if (err) { + dev_dbg(priv->dev, "unable to get litex,tx-slots, using 2\n"); + priv->num_tx_slots = 2; + } + + err = ofnode_read_u32(dev_ofnode(priv->dev), "litex,slot-size", &priv->slot_size); + if (err) { + dev_dbg(priv->dev, "unable to get litex,slot-size, using 0x800\n"); + priv->slot_size = 0x800; + } +} + +static int liteeth_remove(struct udevice *dev) +{ + liteeth_stop(dev); + + return 0; +} + +static const struct eth_ops liteeth_ops = { + .start = liteeth_start, + .stop = liteeth_stop, + .send = liteeth_send, + .recv = liteeth_recv, + .free_pkt = liteeth_free_pkt, +}; + +static int liteeth_of_to_plat(struct udevice *dev) +{ + struct eth_pdata *pdata = dev_get_plat(dev); + struct liteeth *priv = dev_get_priv(dev); + void __iomem *buf_base; + + pdata->iobase = dev_read_addr(dev); + + priv->dev = dev; + + priv->base = dev_remap_addr_name(dev, "mac"); + if (!priv->base) { + dev_err(dev, "failed to map registers\n"); + return -EINVAL; + } + + buf_base = dev_remap_addr_name(dev, "buffer"); + if (!buf_base) { + dev_err(dev, "failed to map buffer\n"); + return -EINVAL; + } + + liteeth_setup_slots(priv); + + /* Rx slots */ + priv->rx_base = buf_base; + priv->rx_slot = 0; + + /* Tx slots come after Rx slots */ + priv->tx_base = buf_base + priv->num_rx_slots * priv->slot_size; + priv->tx_slot = 0; + + return 0; +} + +static const struct udevice_id liteeth_ids[] = { + { .compatible = "litex,liteeth" }, + {} +}; + +U_BOOT_DRIVER(liteeth) = { + .name = "liteeth", + .id = UCLASS_ETH, + .of_match = liteeth_ids, + .of_to_plat = liteeth_of_to_plat, + .plat_auto = sizeof(struct eth_pdata), + .remove = liteeth_remove, + .ops = &liteeth_ops, + .priv_auto = sizeof(struct liteeth), +}; diff --git a/include/linux/litex.h b/include/linux/litex.h new file mode 100644 index 0000000000..5e91db41fd --- /dev/null +++ b/include/linux/litex.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common LiteX header providing + * helper functions for accessing CSRs. + * + * Copyright (C) 2019-2020 Antmicro + */ + +#ifndef _LINUX_LITEX_H +#define _LINUX_LITEX_H + +#include +#include + +static inline void _write_litex_subregister(u32 val, void __iomem *addr) +{ + writel((u32 __force)cpu_to_le32(val), addr); +} + +static inline u32 _read_litex_subregister(void __iomem *addr) +{ + return le32_to_cpu((__le32 __force)readl(addr)); +} + +/* + * LiteX SoC Generator, depending on the configuration, can split a single + * logical CSR (Control&Status Register) into a series of consecutive physical + * registers. + * + * For example, in the configuration with 8-bit CSR Bus, a 32-bit aligned, + * 32-bit wide logical CSR will be laid out as four 32-bit physical + * subregisters, each one containing one byte of meaningful data. + * + * For Linux support, upstream LiteX enforces a 32-bit wide CSR bus, which + * means that only larger-than-32-bit CSRs will be split across multiple + * subregisters (e.g., a 64-bit CSR will be spread across two consecutive + * 32-bit subregisters). + * + * For details see: https://github.com/enjoy-digital/litex/wiki/CSR-Bus + */ + +static inline void litex_write8(void __iomem *reg, u8 val) +{ + _write_litex_subregister(val, reg); +} + +static inline void litex_write16(void __iomem *reg, u16 val) +{ + _write_litex_subregister(val, reg); +} + +static inline void litex_write32(void __iomem *reg, u32 val) +{ + _write_litex_subregister(val, reg); +} + +static inline void litex_write64(void __iomem *reg, u64 val) +{ + _write_litex_subregister(val >> 32, reg); + _write_litex_subregister(val, reg + 4); +} + +static inline u8 litex_read8(void __iomem *reg) +{ + return _read_litex_subregister(reg); +} + +static inline u16 litex_read16(void __iomem *reg) +{ + return _read_litex_subregister(reg); +} + +static inline u32 litex_read32(void __iomem *reg) +{ + return _read_litex_subregister(reg); +} + +static inline u64 litex_read64(void __iomem *reg) +{ + return ((u64)_read_litex_subregister(reg) << 32) | + _read_litex_subregister(reg + 4); +} + +#endif /* _LINUX_LITEX_H */ From f94d008a9d8c3f5c0ef6a9863ae6590b3cbe48d5 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 9 Oct 2022 17:51:45 +0200 Subject: [PATCH 02/17] net: dwc_eth_qos: Split TX and RX DMA rings Separate TX and RX DMA rings to make their handling slightly clearer. This is a preparatory patch for bulk RX descriptor flushing. Signed-off-by: Marek Vasut Reviewed-by: Patrice Chotard Reviewed-by: Ramon Fried --- drivers/net/dwc_eth_qos.c | 33 ++++++++++++++++++++++----------- drivers/net/dwc_eth_qos.h | 3 ++- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/net/dwc_eth_qos.c b/drivers/net/dwc_eth_qos.c index 001b028fa1..dde2c183b0 100644 --- a/drivers/net/dwc_eth_qos.c +++ b/drivers/net/dwc_eth_qos.c @@ -75,9 +75,6 @@ */ static void *eqos_alloc_descs(struct eqos_priv *eqos, unsigned int num) { - eqos->desc_size = ALIGN(sizeof(struct eqos_desc), - (unsigned int)ARCH_DMA_MINALIGN); - return memalign(eqos->desc_size, num * eqos->desc_size); } @@ -89,8 +86,8 @@ static void eqos_free_descs(void *descs) static struct eqos_desc *eqos_get_desc(struct eqos_priv *eqos, unsigned int num, bool rx) { - return eqos->descs + - ((rx ? EQOS_DESCRIPTORS_TX : 0) + num) * eqos->desc_size; + return (rx ? eqos->rx_descs : eqos->tx_descs) + + (num * eqos->desc_size); } void eqos_inval_desc_generic(void *desc) @@ -1001,7 +998,8 @@ static int eqos_start(struct udevice *dev) /* Set up descriptors */ - memset(eqos->descs, 0, eqos->desc_size * EQOS_DESCRIPTORS_NUM); + memset(eqos->tx_descs, 0, eqos->desc_size * EQOS_DESCRIPTORS_TX); + memset(eqos->rx_descs, 0, eqos->desc_size * EQOS_DESCRIPTORS_RX); for (i = 0; i < EQOS_DESCRIPTORS_TX; i++) { struct eqos_desc *tx_desc = eqos_get_desc(eqos, i, false); @@ -1234,13 +1232,23 @@ static int eqos_probe_resources_core(struct udevice *dev) debug("%s(dev=%p):\n", __func__, dev); - eqos->descs = eqos_alloc_descs(eqos, EQOS_DESCRIPTORS_NUM); - if (!eqos->descs) { - debug("%s: eqos_alloc_descs() failed\n", __func__); + eqos->desc_size = ALIGN(sizeof(struct eqos_desc), + (unsigned int)ARCH_DMA_MINALIGN); + + eqos->tx_descs = eqos_alloc_descs(eqos, EQOS_DESCRIPTORS_TX); + if (!eqos->tx_descs) { + debug("%s: eqos_alloc_descs(tx) failed\n", __func__); ret = -ENOMEM; goto err; } + eqos->rx_descs = eqos_alloc_descs(eqos, EQOS_DESCRIPTORS_RX); + if (!eqos->rx_descs) { + debug("%s: eqos_alloc_descs(rx) failed\n", __func__); + ret = -ENOMEM; + goto err_free_tx_descs; + } + eqos->tx_dma_buf = memalign(EQOS_BUFFER_ALIGN, EQOS_MAX_PACKET_SIZE); if (!eqos->tx_dma_buf) { debug("%s: memalign(tx_dma_buf) failed\n", __func__); @@ -1276,7 +1284,9 @@ err_free_rx_dma_buf: err_free_tx_dma_buf: free(eqos->tx_dma_buf); err_free_descs: - eqos_free_descs(eqos->descs); + eqos_free_descs(eqos->rx_descs); +err_free_tx_descs: + eqos_free_descs(eqos->tx_descs); err: debug("%s: returns %d\n", __func__, ret); @@ -1292,7 +1302,8 @@ static int eqos_remove_resources_core(struct udevice *dev) free(eqos->rx_pkt); free(eqos->rx_dma_buf); free(eqos->tx_dma_buf); - eqos_free_descs(eqos->descs); + eqos_free_descs(eqos->rx_descs); + eqos_free_descs(eqos->tx_descs); debug("%s: OK\n", __func__); return 0; diff --git a/drivers/net/dwc_eth_qos.h b/drivers/net/dwc_eth_qos.h index b35e774263..e3e43c86d1 100644 --- a/drivers/net/dwc_eth_qos.h +++ b/drivers/net/dwc_eth_qos.h @@ -264,7 +264,8 @@ struct eqos_priv { struct phy_device *phy; ofnode phy_of_node; u32 max_speed; - void *descs; + void *tx_descs; + void *rx_descs; int tx_desc_idx, rx_desc_idx; unsigned int desc_size; void *tx_dma_buf; From e9d3fc7e46a81be3a9530713b4e75f4205961170 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 9 Oct 2022 17:51:46 +0200 Subject: [PATCH 03/17] net: dwc_eth_qos: Add support for bulk RX descriptor cleaning Add new desc_per_cacheline property which lets a platform run RX descriptor cleanup after every power-of-2 - 1 received packets instead of every packet. This is useful on platforms where (axi_bus_width EQOS_AXI_WIDTH_n * DMA DSL inter-descriptor word skip count + DMA descriptor size) is less than cache line size, which necessitates packing multiple DMA descriptors into single cache line. In case of TX descriptors, this is not a problem, since the driver always does synchronous TX, i.e. the TX descriptor is always written, flushed and polled for completion in eqos_send(). In case of RX descriptors, it is necessary to update their status in bulk, i.e. after the entire cache line worth of RX descriptors has been used up to receive data. Signed-off-by: Marek Vasut Reviewed-by: Patrice Chotard Reviewed-by: Ramon Fried --- drivers/net/dwc_eth_qos.c | 67 +++++++++++++++++++++++++-------------- drivers/net/dwc_eth_qos.h | 2 ++ 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/drivers/net/dwc_eth_qos.c b/drivers/net/dwc_eth_qos.c index dde2c183b0..afc47b56ff 100644 --- a/drivers/net/dwc_eth_qos.c +++ b/drivers/net/dwc_eth_qos.c @@ -75,7 +75,7 @@ */ static void *eqos_alloc_descs(struct eqos_priv *eqos, unsigned int num) { - return memalign(eqos->desc_size, num * eqos->desc_size); + return memalign(ARCH_DMA_MINALIGN, num * eqos->desc_size); } static void eqos_free_descs(void *descs) @@ -92,7 +92,7 @@ static struct eqos_desc *eqos_get_desc(struct eqos_priv *eqos, void eqos_inval_desc_generic(void *desc) { - unsigned long start = (unsigned long)desc; + unsigned long start = (unsigned long)desc & ~(ARCH_DMA_MINALIGN - 1); unsigned long end = ALIGN(start + sizeof(struct eqos_desc), ARCH_DMA_MINALIGN); @@ -101,7 +101,7 @@ void eqos_inval_desc_generic(void *desc) void eqos_flush_desc_generic(void *desc) { - unsigned long start = (unsigned long)desc; + unsigned long start = (unsigned long)desc & ~(ARCH_DMA_MINALIGN - 1); unsigned long end = ALIGN(start + sizeof(struct eqos_desc), ARCH_DMA_MINALIGN); @@ -1185,6 +1185,7 @@ static int eqos_recv(struct udevice *dev, int flags, uchar **packetp) static int eqos_free_pkt(struct udevice *dev, uchar *packet, int length) { struct eqos_priv *eqos = dev_get_priv(dev); + u32 idx, idx_mask = eqos->desc_per_cacheline - 1; uchar *packet_expected; struct eqos_desc *rx_desc; @@ -1200,24 +1201,30 @@ static int eqos_free_pkt(struct udevice *dev, uchar *packet, int length) eqos->config->ops->eqos_inval_buffer(packet, length); - rx_desc = eqos_get_desc(eqos, eqos->rx_desc_idx, true); - - rx_desc->des0 = 0; - mb(); - eqos->config->ops->eqos_flush_desc(rx_desc); - eqos->config->ops->eqos_inval_buffer(packet, length); - rx_desc->des0 = (u32)(ulong)packet; - rx_desc->des1 = 0; - rx_desc->des2 = 0; - /* - * Make sure that if HW sees the _OWN write below, it will see all the - * writes to the rest of the descriptor too. - */ - mb(); - rx_desc->des3 = EQOS_DESC3_OWN | EQOS_DESC3_BUF1V; - eqos->config->ops->eqos_flush_desc(rx_desc); - - writel((ulong)rx_desc, &eqos->dma_regs->ch0_rxdesc_tail_pointer); + if ((eqos->rx_desc_idx & idx_mask) == idx_mask) { + for (idx = eqos->rx_desc_idx - idx_mask; + idx <= eqos->rx_desc_idx; + idx++) { + rx_desc = eqos_get_desc(eqos, idx, true); + rx_desc->des0 = 0; + mb(); + eqos->config->ops->eqos_flush_desc(rx_desc); + eqos->config->ops->eqos_inval_buffer(packet, length); + rx_desc->des0 = (u32)(ulong)(eqos->rx_dma_buf + + (idx * EQOS_MAX_PACKET_SIZE)); + rx_desc->des1 = 0; + rx_desc->des2 = 0; + /* + * Make sure that if HW sees the _OWN write below, + * it will see all the writes to the rest of the + * descriptor too. + */ + mb(); + rx_desc->des3 = EQOS_DESC3_OWN | EQOS_DESC3_BUF1V; + eqos->config->ops->eqos_flush_desc(rx_desc); + } + writel((ulong)rx_desc, &eqos->dma_regs->ch0_rxdesc_tail_pointer); + } eqos->rx_desc_idx++; eqos->rx_desc_idx %= EQOS_DESCRIPTORS_RX; @@ -1228,12 +1235,26 @@ static int eqos_free_pkt(struct udevice *dev, uchar *packet, int length) static int eqos_probe_resources_core(struct udevice *dev) { struct eqos_priv *eqos = dev_get_priv(dev); + unsigned int desc_step; int ret; debug("%s(dev=%p):\n", __func__, dev); - eqos->desc_size = ALIGN(sizeof(struct eqos_desc), - (unsigned int)ARCH_DMA_MINALIGN); + /* Maximum distance between neighboring descriptors, in Bytes. */ + desc_step = sizeof(struct eqos_desc) + + EQOS_DMA_CH0_CONTROL_DSL_MASK * eqos->config->axi_bus_width; + if (desc_step < ARCH_DMA_MINALIGN) { + /* + * The EQoS hardware implementation cannot place one descriptor + * per cacheline, it is necessary to place multiple descriptors + * per cacheline in memory and do cache management carefully. + */ + eqos->desc_size = BIT(fls(desc_step) - 1); + } else { + eqos->desc_size = ALIGN(sizeof(struct eqos_desc), + (unsigned int)ARCH_DMA_MINALIGN); + } + eqos->desc_per_cacheline = ARCH_DMA_MINALIGN / eqos->desc_size; eqos->tx_descs = eqos_alloc_descs(eqos, EQOS_DESCRIPTORS_TX); if (!eqos->tx_descs) { diff --git a/drivers/net/dwc_eth_qos.h b/drivers/net/dwc_eth_qos.h index e3e43c86d1..8fccd6f057 100644 --- a/drivers/net/dwc_eth_qos.h +++ b/drivers/net/dwc_eth_qos.h @@ -162,6 +162,7 @@ struct eqos_dma_regs { #define EQOS_DMA_SYSBUS_MODE_BLEN4 BIT(1) #define EQOS_DMA_CH0_CONTROL_DSL_SHIFT 18 +#define EQOS_DMA_CH0_CONTROL_DSL_MASK 0x7 #define EQOS_DMA_CH0_CONTROL_PBLX8 BIT(16) #define EQOS_DMA_CH0_TX_CONTROL_TXPBL_SHIFT 16 @@ -268,6 +269,7 @@ struct eqos_priv { void *rx_descs; int tx_desc_idx, rx_desc_idx; unsigned int desc_size; + unsigned int desc_per_cacheline; void *tx_dma_buf; void *rx_dma_buf; void *rx_pkt; From b0fcc48cb37057ccbe29481d3297f7b9243a4b92 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 14 Oct 2022 19:43:37 +0200 Subject: [PATCH 04/17] net: improve check for no IP options There's no reason we should accept an IP packet with a malformed IHL field. So ensure that it is exactly 5, not just <= 5. Signed-off-by: Rasmus Villemoes Reviewed-by: Ramon Fried --- net/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/net.c b/net/net.c index b27b021d07..be4374ffc4 100644 --- a/net/net.c +++ b/net/net.c @@ -1226,7 +1226,7 @@ void net_process_received_packet(uchar *in_packet, int len) if ((ip->ip_hl_v & 0xf0) != 0x40) return; /* Can't deal with IP options (headers != 20 bytes) */ - if ((ip->ip_hl_v & 0x0f) > 0x05) + if ((ip->ip_hl_v & 0x0f) != 0x05) return; /* Check the Checksum of the header */ if (!ip_checksum_ok((uchar *)ip, IP_HDR_SIZE)) { From ad359d89ec5424fc18a289fa5fcc1a4947930dba Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 14 Oct 2022 19:43:38 +0200 Subject: [PATCH 05/17] net: compare received length to sizeof(ip_hdr), not sizeof(ip_udp_hdr) While the code mostly/only handles UDP packets, it's possible for the last fragment of a fragmented UDP packet to be smaller than 28 bytes; it can be as small as 21 bytes (an IP header plus one byte of payload). So until we've performed the defragmentation step and thus know whether we're now holding a full packet, we should only check for the existence of the fields in the ip header, i.e. that there are at least 20 bytes present. In practice, we always seem to be handed a "len" of minimum 60 from the device layer, i.e. minimal ethernet frame length minus FCS, so this is mostly theoretical. After we've fetched the header's claimed length and used that to update the len variable, check that the header itself claims to be the minimal possible length. This is probably how CVE-2022-30552 should have been dealt with in the first place, because net_defragment() is not the only place that wants to know the size of the IP datagram payload: If we receive a non-fragmented ICMP packet, we pass "len" to receive_icmp() which in turn may pass it to ping_receive() which does compute_ip_checksum(icmph, len - IP_HDR_SIZE) and due to the signature of compute_ip_checksum(), that would then lead to accessing ~4G of address space, very likely leading to a crash. Signed-off-by: Rasmus Villemoes --- net/net.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/net.c b/net/net.c index be4374ffc4..434c3b411e 100644 --- a/net/net.c +++ b/net/net.c @@ -1208,9 +1208,9 @@ void net_process_received_packet(uchar *in_packet, int len) case PROT_IP: debug_cond(DEBUG_NET_PKT, "Got IP\n"); /* Before we start poking the header, make sure it is there */ - if (len < IP_UDP_HDR_SIZE) { + if (len < IP_HDR_SIZE) { debug("len bad %d < %lu\n", len, - (ulong)IP_UDP_HDR_SIZE); + (ulong)IP_HDR_SIZE); return; } /* Check the packet length */ @@ -1219,6 +1219,10 @@ void net_process_received_packet(uchar *in_packet, int len) return; } len = ntohs(ip->ip_len); + if (len < IP_HDR_SIZE) { + debug("bad ip->ip_len %d < %d\n", len, (int)IP_HDR_SIZE); + return; + } debug_cond(DEBUG_NET_PKT, "len=%d, v=%02x\n", len, ip->ip_hl_v & 0xff); From 1817c3824a08bbad7fd2fbae1a6e73be896e8e5e Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 14 Oct 2022 19:43:39 +0200 Subject: [PATCH 06/17] net: (actually/better) deal with CVE-2022-{30790,30552} I hit a strange problem with v2022.10: Sometimes my tftp transfer would seemingly just hang. It only happened for some files. Moreover, changing tftpblocksize from 65464 to 65460 or 65000 made it work again for all the files I tried. So I started suspecting it had something to do with the file sizes and in particular the way the tftp blocks get fragmented and reassembled. v2022.01 showed no problems with any of the files or any value of tftpblocksize. Looking at what had changed in net.c or tftp.c since January showed only one remotely interesting thing, b85d130ea0ca. So I fired up wireshark on my host to see if somehow one of the packets would be too small. But no, with both v2022.01 and v2022.10, the exact same sequence of packets were sent, all but the last of size 1500, and the last being 1280 bytes. But then it struck me that 1280 is 5*256, so one of the two bytes on-the-wire is 0 and the other is 5, and when then looking at the code again the lack of endianness conversion becomes obvious. [ntohs is both applied to ip->ip_off just above, as well as to ip->ip_len just a little further down when the "len" is actually computed]. IOWs the current code would falsely reject any packet which happens to be a multiple of 256 bytes in size, breaking tftp transfers somewhat randomly, and if it did get one of those "malicious" packets with ip_len set to, say, 27, it would be seen by this check as being 6912 and hence not rejected. ==== Now, just adding the missing ntohs() would make my initial problem go away, in that I can now download the file where the last fragment ends up being 1280 bytes. But there's another bug in the code and/or analysis: The right-hand side is too strict, in that it is ok for the last fragment not to have a multiple of 8 bytes as payload - it really must be ok, because nothing in the IP spec says that IP datagrams must have a multiple of 8 bytes as payload. And comments in the code also mention this. To fix that, replace the comparison with <= IP_HDR_SIZE and add another check that len is actually a multiple of 8 when the "more fragments" bit is set - which it necessarily is for the case where offset8 ends up being 0, since we're only called when (ip_off & (IP_OFFS | IP_FLAGS_MFRAG)). ==== So, does this fix CVE-2022-30790 for real? It certainly correctly rejects the POC code which relies on sending a packet of size 27 with the MFRAG flag set. Can the attack be carried out with a size 27 packet that doesn't set MFRAG (hence must set a non-zero fragment offset)? I dunno. If we get a packet without MFRAG, we update h->last_byte in the hole we've found to be start+len, hence we'd enter one of if ((h >= thisfrag) && (h->last_byte <= start + len)) { or } else if (h->last_byte <= start + len) { and thus won't reach any of the /* overlaps with initial part of the hole: move this hole */ newh = thisfrag + (len / 8); /* fragment sits in the middle: split the hole */ newh = thisfrag + (len / 8); IOW these division are now guaranteed to be exact, and thus I think the scenario in CVE-2022-30790 cannot happen anymore. ==== However, there's a big elephant in the room, which has always been spelled out in the comments, and which makes me believe that one can still cause mayhem even with packets whose payloads are all 8-byte aligned: This code doesn't deal with a fragment that overlaps with two different holes (thus being a superset of a previously-received fragment). Suppose each character below represents 8 bytes, with D being already received data, H being a hole descriptor (struct hole), h being non-populated chunks, and P representing where the payload of a just received packet should go: DDDHhhhhDDDDHhhhDDDD PPPPPPPPP I'm pretty sure in this case we'd end up with h being the first hole, enter the simple } else if (h->last_byte <= start + len) { /* overlaps with final part of the hole: shorten this hole */ h->last_byte = start; case, and thus in the memcpy happily overwrite the second H with our chosen payload. This is probably worth fixing... Signed-off-by: Rasmus Villemoes --- net/net.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/net.c b/net/net.c index 434c3b411e..987c25931e 100644 --- a/net/net.c +++ b/net/net.c @@ -924,7 +924,11 @@ static struct ip_udp_hdr *__net_defragment(struct ip_udp_hdr *ip, int *lenp) int offset8, start, len, done = 0; u16 ip_off = ntohs(ip->ip_off); - if (ip->ip_len < IP_MIN_FRAG_DATAGRAM_SIZE) + /* + * Calling code already rejected <, but we don't have to deal + * with an IP fragment with no payload. + */ + if (ntohs(ip->ip_len) <= IP_HDR_SIZE) return NULL; /* payload starts after IP header, this fragment is in there */ @@ -934,6 +938,10 @@ static struct ip_udp_hdr *__net_defragment(struct ip_udp_hdr *ip, int *lenp) start = offset8 * 8; len = ntohs(ip->ip_len) - IP_HDR_SIZE; + /* All but last fragment must have a multiple-of-8 payload. */ + if ((len & 7) && (ip_off & IP_FLAGS_MFRAG)) + return NULL; + if (start + len > IP_MAXUDP) /* fragment extends too far */ return NULL; From 06653c701040f34e05d587bf14c2600f8cb3460f Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 14 Oct 2022 19:43:40 +0200 Subject: [PATCH 07/17] net: fix ip_len in reassembled IP datagram For some reason, the ip_len field in a reassembled IP datagram is set to just the size of the payload, but it should be set to the value it would have had if the datagram had never been fragmented in the first place, i.e. size of payload plus size of IP header. That latter value is currently returned correctly via the "len" variable. And before entering net_defragment(), len does have the value ntohs(ip->ip_len), so if we're not dealing with a fragment (so net_defragment leaves *len alone), that relationship of course also holds after the net_defragment() call. The only use I can find of ip->ip_len after the net_defragment call is the ntohs(ip->udp_len) > ntohs(ip->ip_len) sanity check - none of the functions that are passed the "ip" pointer themselves inspect ->ip_len but instead use the passed len. But that sanity check is a bit odd, since the RHS really should be "ntohs(ip->ip_len) - 20", i.e. the IP payload size. Now that we've fixed things so that len == ntohs(ip->ip_len) in all cases, change that sanity check to use len-20 as the RHS. Signed-off-by: Rasmus Villemoes --- net/net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/net.c b/net/net.c index 987c25931e..073fb681e5 100644 --- a/net/net.c +++ b/net/net.c @@ -1040,8 +1040,8 @@ static struct ip_udp_hdr *__net_defragment(struct ip_udp_hdr *ip, int *lenp) if (!done) return NULL; - localip->ip_len = htons(total_len); *lenp = total_len + IP_HDR_SIZE; + localip->ip_len = htons(*lenp); return localip; } @@ -1289,7 +1289,7 @@ void net_process_received_packet(uchar *in_packet, int len) return; } - if (ntohs(ip->udp_len) < UDP_HDR_SIZE || ntohs(ip->udp_len) > ntohs(ip->ip_len)) + if (ntohs(ip->udp_len) < UDP_HDR_SIZE || ntohs(ip->udp_len) > len - IP_HDR_SIZE) return; debug_cond(DEBUG_DEV_PKT, From 4b8c44e39c9eb1717831e3b3f31c33e0932b0767 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 14 Oct 2022 19:43:41 +0200 Subject: [PATCH 08/17] net: tftp: use IS_ENABLED(CONFIG_NET_TFTP_VARS) instead of #if Nothing inside this block depends on NET_TFTP_VARS to be set to parse correctly. Switch to C if() in preparation for adding code before this (to avoid a declaration-after-statement warning). Signed-off-by: Rasmus Villemoes [trini: Update to cover CONFIG_TFTP_PORT case as well] Signed-off-by: Tom Rini --- net/tftp.c | 60 +++++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/net/tftp.c b/net/tftp.c index dea9c25ffd..9d3370f60a 100644 --- a/net/tftp.c +++ b/net/tftp.c @@ -710,43 +710,43 @@ static int tftp_init_load_addr(void) void tftp_start(enum proto_t protocol) { -#if CONFIG_NET_TFTP_VARS - char *ep; /* Environment pointer */ + __maybe_unused char *ep; /* Environment pointer */ + if (IS_ENABLED(CONFIG_NET_TFTP_VARS)) { - /* - * Allow the user to choose TFTP blocksize and timeout. - * TFTP protocol has a minimal timeout of 1 second. - */ + /* + * Allow the user to choose TFTP blocksize and timeout. + * TFTP protocol has a minimal timeout of 1 second. + */ - ep = env_get("tftpblocksize"); - if (ep != NULL) - tftp_block_size_option = simple_strtol(ep, NULL, 10); + ep = env_get("tftpblocksize"); + if (ep != NULL) + tftp_block_size_option = simple_strtol(ep, NULL, 10); - ep = env_get("tftpwindowsize"); - if (ep != NULL) - tftp_window_size_option = simple_strtol(ep, NULL, 10); + ep = env_get("tftpwindowsize"); + if (ep != NULL) + tftp_window_size_option = simple_strtol(ep, NULL, 10); - ep = env_get("tftptimeout"); - if (ep != NULL) - timeout_ms = simple_strtol(ep, NULL, 10); + ep = env_get("tftptimeout"); + if (ep != NULL) + timeout_ms = simple_strtol(ep, NULL, 10); - if (timeout_ms < 1000) { - printf("TFTP timeout (%ld ms) too low, set min = 1000 ms\n", - timeout_ms); - timeout_ms = 1000; + if (timeout_ms < 1000) { + printf("TFTP timeout (%ld ms) too low, set min = 1000 ms\n", + timeout_ms); + timeout_ms = 1000; + } + + ep = env_get("tftptimeoutcountmax"); + if (ep != NULL) + tftp_timeout_count_max = simple_strtol(ep, NULL, 10); + + if (tftp_timeout_count_max < 0) { + printf("TFTP timeout count max (%d ms) negative, set to 0\n", + tftp_timeout_count_max); + tftp_timeout_count_max = 0; + } } - ep = env_get("tftptimeoutcountmax"); - if (ep != NULL) - tftp_timeout_count_max = simple_strtol(ep, NULL, 10); - - if (tftp_timeout_count_max < 0) { - printf("TFTP timeout count max (%d ms) negative, set to 0\n", - tftp_timeout_count_max); - tftp_timeout_count_max = 0; - } -#endif - debug("TFTP blocksize = %i, TFTP windowsize = %d timeout = %ld ms\n", tftp_block_size_option, tftp_window_size_option, timeout_ms); From 087648b5df8db0d4786ff86e61e7616ebc181cf4 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 14 Oct 2022 19:43:42 +0200 Subject: [PATCH 09/17] net: tftp: sanitize tftp block size, especially for TX U-Boot does not support IP fragmentation on TX (and unless CONFIG_IP_DEFRAG is set, neither on RX). So the blocks we send must fit in a single ethernet packet. Currently, if tftpblocksize is set to something like 5000 and I tftpput a large enough file, U-Boot crashes because we overflow net_tx_packet (which only has room for 1500 bytes plus change). Similarly, if tftpblocksize is set to something larger than what we can actually receive (e.g. 50000, with NET_MAXDEFRAG being 16384), any tftp get just hangs because we never receive any packets. Signed-off-by: Rasmus Villemoes Reviewed-by: Ramon Fried --- net/tftp.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/net/tftp.c b/net/tftp.c index 9d3370f60a..39421f8daa 100644 --- a/net/tftp.c +++ b/net/tftp.c @@ -708,9 +708,54 @@ static int tftp_init_load_addr(void) return 0; } +static int saved_tftp_block_size_option; +static void sanitize_tftp_block_size_option(enum proto_t protocol) +{ + int cap, max_defrag; + + switch (protocol) { + case TFTPGET: + max_defrag = config_opt_enabled(CONFIG_IP_DEFRAG, CONFIG_NET_MAXDEFRAG, 0); + if (max_defrag) { + /* Account for IP, UDP and TFTP headers. */ + cap = max_defrag - (20 + 8 + 4); + /* RFC2348 sets a hard upper limit. */ + cap = min(cap, 65464); + break; + } + /* + * If not CONFIG_IP_DEFRAG, cap at the same value as + * for tftp put, namely normal MTU minus protocol + * overhead. + */ + fallthrough; + case TFTPPUT: + default: + /* + * U-Boot does not support IP fragmentation on TX, so + * this must be small enough that it fits normal MTU + * (and small enough that it fits net_tx_packet which + * has room for PKTSIZE_ALIGN bytes). + */ + cap = 1468; + } + if (tftp_block_size_option > cap) { + printf("Capping tftp block size option to %d (was %d)\n", + cap, tftp_block_size_option); + saved_tftp_block_size_option = tftp_block_size_option; + tftp_block_size_option = cap; + } +} + void tftp_start(enum proto_t protocol) { __maybe_unused char *ep; /* Environment pointer */ + + if (saved_tftp_block_size_option) { + tftp_block_size_option = saved_tftp_block_size_option; + saved_tftp_block_size_option = 0; + } + if (IS_ENABLED(CONFIG_NET_TFTP_VARS)) { /* @@ -747,6 +792,8 @@ void tftp_start(enum proto_t protocol) } } + sanitize_tftp_block_size_option(protocol); + debug("TFTP blocksize = %i, TFTP windowsize = %d timeout = %ld ms\n", tftp_block_size_option, tftp_window_size_option, timeout_ms); From 068696863980f86504934bdb1e4d0d6359b76092 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 17 Oct 2022 09:52:51 +0200 Subject: [PATCH 10/17] net: deal with fragment-overlapping-two-holes case With a suitable sequence of malicious packets, it's currently possible to get a hole descriptor to contain arbitrary attacker-controlled contents, and then with one more packet to use that as an arbitrary write vector. While one could possibly change the algorithm so we instead loop over all holes, and in each hole puts as much of the current fragment as belongs there (taking care to carefully update the hole list as appropriate), it's not worth the complexity: In real, non-malicious scenarios, one never gets overlapping fragments, and certainly not fragments that would be supersets of one another. So instead opt for this simple protection: Simply don't allow the eventual memcpy() to write beyond the last_byte of the current hole. Signed-off-by: Rasmus Villemoes --- net/net.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/net.c b/net/net.c index 073fb681e5..6f0a48361c 100644 --- a/net/net.c +++ b/net/net.c @@ -985,10 +985,14 @@ static struct ip_udp_hdr *__net_defragment(struct ip_udp_hdr *ip, int *lenp) } /* - * There is some overlap: fix the hole list. This code doesn't - * deal with a fragment that overlaps with two different holes - * (thus being a superset of a previously-received fragment). + * There is some overlap: fix the hole list. This code deals + * with a fragment that overlaps with two different holes + * (thus being a superset of a previously-received fragment) + * by only using the part of the fragment that fits in the + * first hole. */ + if (h->last_byte < start + len) + len = h->last_byte - start; if ((h >= thisfrag) && (h->last_byte <= start + len)) { /* complete overlap with hole: remove hole */ From 3cdbbe52f70ff4fdd7aa9b66de2040b9f304c0b2 Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Thu, 3 Nov 2022 14:44:22 -0700 Subject: [PATCH 11/17] drivers: net: aquantia: fix typos Fix a couple of typos: - s/Acquantia/Aquantia/ - s/firmare/firmware/ Signed-off-by: Tim Harvey Reviewed-by: Ramon Fried --- drivers/net/phy/aquantia.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/aquantia.c b/drivers/net/phy/aquantia.c index 7e950fe0c2..79fbc11536 100644 --- a/drivers/net/phy/aquantia.c +++ b/drivers/net/phy/aquantia.c @@ -136,7 +136,7 @@ static int aquantia_read_fw(u8 **fw_addr, size_t *fw_length) *fw_addr = NULL; *fw_length = 0; - debug("Loading Acquantia microcode from %s %s\n", + debug("Loading Aquantia microcode from %s %s\n", CONFIG_PHY_AQUANTIA_FW_PART, CONFIG_PHY_AQUANTIA_FW_NAME); ret = fs_set_blk_dev("mmc", CONFIG_PHY_AQUANTIA_FW_PART, FS_TYPE_ANY); if (ret < 0) @@ -163,7 +163,7 @@ static int aquantia_read_fw(u8 **fw_addr, size_t *fw_length) *fw_addr = addr; *fw_length = length; - debug("Found Acquantia microcode.\n"); + debug("Found Aquantia microcode.\n"); cleanup: if (ret < 0) { @@ -257,7 +257,7 @@ static int aquantia_upload_firmware(struct phy_device *phydev) strlcpy(version, (char *)&addr[dram_offset + VERSION_STRING_OFFSET], VERSION_STRING_SIZE); - printf("%s loading firmare version '%s'\n", phydev->dev->name, version); + printf("%s loading firmware version '%s'\n", phydev->dev->name, version); /* stall the microcprocessor */ phy_write(phydev, MDIO_MMD_VEND1, UP_CONTROL, @@ -288,7 +288,7 @@ static int aquantia_upload_firmware(struct phy_device *phydev) phy_write(phydev, MDIO_MMD_VEND1, UP_CONTROL, UP_RUN_STALL_OVERRIDE); - printf("%s firmare loading done.\n", phydev->dev->name); + printf("%s firmware loading done.\n", phydev->dev->name); done: free(addr); return ret; From a3bf193bf4ea8703bcf96b1a34713fb2ae87aa39 Mon Sep 17 00:00:00 2001 From: "Ying-Chun Liu (PaulLiu)" Date: Tue, 8 Nov 2022 14:17:28 +0800 Subject: [PATCH 12/17] net: Add TCP protocol Currently file transfers are done using tftp or NFS both over udp. This requires a request to be sent from client (u-boot) to the boot server. The current standard is TCP with selective acknowledgment. Signed-off-by: Duncan Hare Signed-off-by: Duncan Hare Signed-off-by: Ying-Chun Liu (PaulLiu) Reviewed-by: Simon Glass Cc: Christian Gmeiner Cc: Joe Hershberger Cc: Michal Simek Cc: Ramon Fried Reviewed-by: Ramon Fried --- include/net.h | 34 ++- include/net/tcp.h | 299 +++++++++++++++++++ net/Kconfig | 16 ++ net/Makefile | 1 + net/net.c | 30 ++ net/tcp.c | 720 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 1092 insertions(+), 8 deletions(-) create mode 100644 include/net/tcp.h create mode 100644 net/tcp.c diff --git a/include/net.h b/include/net.h index 32364ed0ce..f4140523c2 100644 --- a/include/net.h +++ b/include/net.h @@ -365,6 +365,7 @@ struct vlan_ethernet_hdr { #define PROT_NCSI 0x88f8 /* NC-SI control packets */ #define IPPROTO_ICMP 1 /* Internet Control Message Protocol */ +#define IPPROTO_TCP 6 /* Transmission Control Protocol */ #define IPPROTO_UDP 17 /* User Datagram Protocol */ /* @@ -690,19 +691,36 @@ static inline void net_send_packet(uchar *pkt, int len) (void) eth_send(pkt, len); } -/* - * Transmit "net_tx_packet" as UDP packet, performing ARP request if needed - * (ether will be populated) +/** + * net_send_ip_packet() - Transmit "net_tx_packet" as UDP or TCP packet, + * send ARP request if needed (ether will be populated) + * @ether: Raw packet buffer + * @dest: IP address to send the datagram to + * @dport: Destination UDP port + * @sport: Source UDP port + * @payload_len: Length of data after the UDP header + * @action: TCP action to be performed + * @tcp_seq_num: TCP sequence number of this transmission + * @tcp_ack_num: TCP stream acknolegement number * - * @param ether Raw packet buffer - * @param dest IP address to send the datagram to - * @param dport Destination UDP port - * @param sport Source UDP port - * @param payload_len Length of data after the UDP header + * Return: 0 on success, other value on failure */ int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, int payload_len, int proto, u8 action, u32 tcp_seq_num, u32 tcp_ack_num); +/** + * net_send_tcp_packet() - Transmit TCP packet. + * @payload_len: length of payload + * @dport: Destination TCP port + * @sport: Source TCP port + * @action: TCP action to be performed + * @tcp_seq_num: TCP sequence number of this transmission + * @tcp_ack_num: TCP stream acknolegement number + * + * Return: 0 on success, other value on failure + */ +int net_send_tcp_packet(int payload_len, int dport, int sport, u8 action, + u32 tcp_seq_num, u32 tcp_ack_num); int net_send_udp_packet(uchar *ether, struct in_addr dest, int dport, int sport, int payload_len); diff --git a/include/net/tcp.h b/include/net/tcp.h new file mode 100644 index 0000000000..322551694f --- /dev/null +++ b/include/net/tcp.h @@ -0,0 +1,299 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * TCP Support with SACK for file transfer. + * + * Copyright 2017 Duncan Hare, All rights reserved. + */ + +#define TCP_ACTIVITY 127 /* Number of packets received */ + /* before console progress mark */ +/** + * struct ip_tcp_hdr - IP and TCP header + * @ip_hl_v: header length and version + * @ip_tos: type of service + * @ip_len: total length + * @ip_id: identification + * @ip_off: fragment offset field + * @ip_ttl: time to live + * @ip_p: protocol + * @ip_sum: checksum + * @ip_src: Source IP address + * @ip_dst: Destination IP address + * @tcp_src: TCP source port + * @tcp_dst: TCP destination port + * @tcp_seq: TCP sequence number + * @tcp_ack: TCP Acknowledgment number + * @tcp_hlen: 4 bits TCP header Length/4, 4 bits reserved, 2 more bits reserved + * @tcp_flag: flags of TCP + * @tcp_win: TCP windows size + * @tcp_xsum: Checksum + * @tcp_ugr: Pointer to urgent data + */ +struct ip_tcp_hdr { + u8 ip_hl_v; + u8 ip_tos; + u16 ip_len; + u16 ip_id; + u16 ip_off; + u8 ip_ttl; + u8 ip_p; + u16 ip_sum; + struct in_addr ip_src; + struct in_addr ip_dst; + u16 tcp_src; + u16 tcp_dst; + u32 tcp_seq; + u32 tcp_ack; + u8 tcp_hlen; + u8 tcp_flags; + u16 tcp_win; + u16 tcp_xsum; + u16 tcp_ugr; +} __packed; + +#define IP_TCP_HDR_SIZE (sizeof(struct ip_tcp_hdr)) +#define TCP_HDR_SIZE (IP_TCP_HDR_SIZE - IP_HDR_SIZE) + +#define TCP_DATA 0x00 /* Data Packet - internal use only */ +#define TCP_FIN 0x01 /* Finish flag */ +#define TCP_SYN 0x02 /* Synch (start) flag */ +#define TCP_RST 0x04 /* reset flag */ +#define TCP_PUSH 0x08 /* Push - Notify app */ +#define TCP_ACK 0x10 /* Acknowledgment of data received */ +#define TCP_URG 0x20 /* Urgent */ +#define TCP_ECE 0x40 /* Congestion control */ +#define TCP_CWR 0x80 /* Congestion Control */ + +/* + * TCP header options, Seq, MSS, and SACK + */ + +#define TCP_SACK 32 /* Number of packets analyzed */ + /* on leading edge of stream */ + +#define TCP_O_END 0x00 /* End of option list */ +#define TCP_1_NOP 0x01 /* Single padding NOP */ +#define TCP_O_NOP 0x01010101 /* NOPs pad to 32 bit boundary */ +#define TCP_O_MSS 0x02 /* MSS Size option */ +#define TCP_O_SCL 0x03 /* Window Scale option */ +#define TCP_P_SACK 0x04 /* SACK permitted */ +#define TCP_V_SACK 0x05 /* SACK values */ +#define TCP_O_TS 0x08 /* Timestamp option */ +#define TCP_OPT_LEN_2 0x02 +#define TCP_OPT_LEN_3 0x03 +#define TCP_OPT_LEN_4 0x04 +#define TCP_OPT_LEN_6 0x06 +#define TCP_OPT_LEN_8 0x08 +#define TCP_OPT_LEN_A 0x0a /* Timestamp Length */ +#define TCP_MSS 1460 /* Max segment size */ +#define TCP_SCALE 0x01 /* Scale */ + +/** + * struct tcp_mss - TCP option structure for MSS (Max segment size) + * @kind: Field ID + * @len: Field length + * @mss: Segment size value + */ +struct tcp_mss { + u8 kind; + u8 len; + u16 mss; +} __packed; + +/** + * struct tcp_scale - TCP option structure for Windows scale + * @kind: Field ID + * @len: Field length + * @scale: windows shift value used for networks with many hops. + * Typically 4 or more hops + */ +struct tcp_scale { + u8 kind; + u8 len; + u8 scale; +} __packed; + +/** + * struct tcp_sack_p - TCP option structure for SACK permitted + * @kind: Field ID + * @len: Field length + */ +struct tcp_sack_p { + u8 kind; + u8 len; +} __packed; + +/** + * struct sack_edges - structure for SACK edges + * @l: Left edge of stream + * @r: right edge of stream + */ +struct sack_edges { + u32 l; + u32 r; +} __packed; + +#define TCP_SACK_SIZE (sizeof(struct sack_edges)) + +/* + * A TCP stream has holes when packets are missing or disordered. + * A hill is the inverse of a hole, and is data received. + * TCP received hills (a sequence of data), and inferrs Holes + * from the "hills" or packets received. + */ + +#define TCP_SACK_HILLS 4 + +/** + * struct tcp_sack_v - TCP option structure for SACK + * @kind: Field ID + * @len: Field length + * @hill: L & R window edges + */ +struct tcp_sack_v { + u8 kind; + u8 len; + struct sack_edges hill[TCP_SACK_HILLS]; +} __packed; + +/** + * struct tcp_t_opt - TCP option structure for time stamps + * @kind: Field ID + * @len: Field length + * @t_snd: Sender timestamp + * @t_rcv: Receiver timestamp + */ +struct tcp_t_opt { + u8 kind; + u8 len; + u32 t_snd; + u32 t_rcv; +} __packed; + +#define TCP_TSOPT_SIZE (sizeof(struct tcp_t_opt)) + +/* + * ip tcp structure with options + */ + +/** + * struct ip_tcp_hdr_o - IP + TCP header + TCP options + * @hdr: IP + TCP header + * @mss: TCP MSS Option + * @scale: TCP Windows Scale Option + * @sack_p: TCP Sack-Permitted Option + * @t_opt: TCP Timestamp Option + * @end: end of options + */ +struct ip_tcp_hdr_o { + struct ip_tcp_hdr hdr; + struct tcp_mss mss; + struct tcp_scale scale; + struct tcp_sack_p sack_p; + struct tcp_t_opt t_opt; + u8 end; +} __packed; + +#define IP_TCP_O_SIZE (sizeof(struct ip_tcp_hdr_o)) + +/** + * struct ip_tcp_hdr_s - IP + TCP header + TCP options + * @hdr: IP + TCP header + * @t_opt: TCP Timestamp Option + * @sack_v: TCP SACK Option + * @end: end of options + */ +struct ip_tcp_hdr_s { + struct ip_tcp_hdr hdr; + struct tcp_t_opt t_opt; + struct tcp_sack_v sack_v; + u8 end; +} __packed; + +#define IP_TCP_SACK_SIZE (sizeof(struct ip_tcp_hdr_s)) + +/* + * TCP pseudo header definitions + */ +#define PSEUDO_PAD_SIZE 8 + +/** + * struct pseudo_hdr - Pseudo Header + * @padding: pseudo hdr size = ip_tcp hdr size + * @p_src: Source IP address + * @p_dst: Destination IP address + * @rsvd: reserved + * @p: protocol + * @len: length of header + */ +struct pseudo_hdr { + u8 padding[PSEUDO_PAD_SIZE]; + struct in_addr p_src; + struct in_addr p_dst; + u8 rsvd; + u8 p; + u16 len; +} __packed; + +#define PSEUDO_HDR_SIZE (sizeof(struct pseudo_hdr)) - PSEUDO_PAD_SIZE + +/** + * union tcp_build_pkt - union for building TCP/IP packet. + * @ph: pseudo header + * @ip: IP and TCP header plus TCP options + * @sack: IP and TCP header plus SACK options + * @raw: buffer + * + * Build Pseudo header in packed buffer + * first, calculate TCP checksum, then build IP header in packed buffer. + * + */ +union tcp_build_pkt { + struct pseudo_hdr ph; + struct ip_tcp_hdr_o ip; + struct ip_tcp_hdr_s sack; + uchar raw[1600]; +} __packed; + +/** + * enum tcp_state - TCP State machine states for connection + * @TCP_CLOSED: Need to send SYN to connect + * @TCP_SYN_SENT: Trying to connect, waiting for SYN ACK + * @TCP_ESTABLISHED: both server & client have a connection + * @TCP_CLOSE_WAIT: Rec FIN, passed to app for FIN, ACK rsp + * @TCP_CLOSING: Rec FIN, sent FIN, ACK waiting for ACK + * @TCP_FIN_WAIT_1: Sent FIN waiting for response + * @TCP_FIN_WAIT_2: Rec ACK from FIN sent, waiting for FIN + */ +enum tcp_state { + TCP_CLOSED, + TCP_SYN_SENT, + TCP_ESTABLISHED, + TCP_CLOSE_WAIT, + TCP_CLOSING, + TCP_FIN_WAIT_1, + TCP_FIN_WAIT_2 +}; + +enum tcp_state tcp_get_tcp_state(void); +void tcp_set_tcp_state(enum tcp_state new_state); +int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len, + u8 action, u32 tcp_seq_num, u32 tcp_ack_num); + +/** + * rxhand_tcp() - An incoming packet handler. + * @pkt: pointer to the application packet + * @dport: destination UDP port + * @sip: source IP address + * @sport: source UDP port + * @len: packet length + */ +typedef void rxhand_tcp(uchar *pkt, unsigned int dport, + struct in_addr sip, unsigned int sport, + unsigned int len); +void tcp_set_tcp_handler(rxhand_tcp *f); + +void rxhand_tcp_f(union tcp_build_pkt *b, unsigned int len); + +u16 tcp_set_pseudo_header(uchar *pkt, struct in_addr src, struct in_addr dest, + int tcp_len, int pkt_len); diff --git a/net/Kconfig b/net/Kconfig index 52e261884d..cb600fe5eb 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -174,6 +174,22 @@ config BOOTP_MAX_ROOT_PATH_LEN help Select maximal length of option 17 root path. +config PROT_TCP + bool "TCP stack" + help + Enable a generic tcp framework that allows defining a custom + handler for tcp protocol. + +config PROT_TCP_SACK + bool "TCP SACK support" + depends on PROT_TCP + help + TCP protocol with SACK. SACK means selective acknowledgements. + By turning this option on TCP will learn what segments are already + received. So that it improves TCP's retransmission efficiency. + This option should be turn on if you want to achieve the fastest + file transfer possible. + endif # if NET config SYS_RX_ETH_BUFFER diff --git a/net/Makefile b/net/Makefile index 6c812502d3..d131d1cb1a 100644 --- a/net/Makefile +++ b/net/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_CMD_TFTPBOOT) += tftp.o obj-$(CONFIG_UDP_FUNCTION_FASTBOOT) += fastboot.o obj-$(CONFIG_CMD_WOL) += wol.o obj-$(CONFIG_PROT_UDP) += udp.o +obj-$(CONFIG_PROT_TCP) += tcp.o # Disable this warning as it is triggered by: # sprintf(buf, index ? "foo%d" : "foo", index) diff --git a/net/net.c b/net/net.c index 6f0a48361c..9cb2aab09d 100644 --- a/net/net.c +++ b/net/net.c @@ -117,6 +117,7 @@ #if defined(CONFIG_CMD_WOL) #include "wol.h" #endif +#include /** BOOTP EXTENTIONS **/ @@ -387,6 +388,8 @@ int net_init(void) /* Only need to setup buffer pointers once. */ first_call = 0; + if (IS_ENABLED(CONFIG_PROT_TCP)) + tcp_set_tcp_state(TCP_CLOSED); } return net_init_loop(); @@ -833,6 +836,16 @@ int net_send_udp_packet(uchar *ether, struct in_addr dest, int dport, int sport, IPPROTO_UDP, 0, 0, 0); } +#if defined(CONFIG_PROT_TCP) +int net_send_tcp_packet(int payload_len, int dport, int sport, u8 action, + u32 tcp_seq_num, u32 tcp_ack_num) +{ + return net_send_ip_packet(net_server_ethaddr, net_server_ip, dport, + sport, payload_len, IPPROTO_TCP, action, + tcp_seq_num, tcp_ack_num); +} +#endif + int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, int payload_len, int proto, u8 action, u32 tcp_seq_num, u32 tcp_ack_num) @@ -864,6 +877,14 @@ int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, payload_len); pkt_hdr_size = eth_hdr_size + IP_UDP_HDR_SIZE; break; +#if defined(CONFIG_PROT_TCP) + case IPPROTO_TCP: + pkt_hdr_size = eth_hdr_size + + tcp_set_tcp_header(pkt + eth_hdr_size, dport, sport, + payload_len, action, tcp_seq_num, + tcp_ack_num); + break; +#endif default: return -EINVAL; } @@ -1289,6 +1310,15 @@ void net_process_received_packet(uchar *in_packet, int len) if (ip->ip_p == IPPROTO_ICMP) { receive_icmp(ip, len, src_ip, et); return; +#if defined(CONFIG_PROT_TCP) + } else if (ip->ip_p == IPPROTO_TCP) { + debug_cond(DEBUG_DEV_PKT, + "TCP PH (to=%pI4, from=%pI4, len=%d)\n", + &dst_ip, &src_ip, len); + + rxhand_tcp_f((union tcp_build_pkt *)ip, len); + return; +#endif } else if (ip->ip_p != IPPROTO_UDP) { /* Only UDP packets */ return; } diff --git a/net/tcp.c b/net/tcp.c new file mode 100644 index 0000000000..8d338c72e8 --- /dev/null +++ b/net/tcp.c @@ -0,0 +1,720 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2017 Duncan Hare, all rights reserved. + */ + +/* + * General Desription: + * + * TCP support for the wget command, for fast file downloading. + * + * HTTP/TCP Receiver: + * + * Prerequisites: - own ethernet address + * - own IP address + * - Server IP address + * - Server with TCP + * - TCP application (eg wget) + * Next Step HTTPS? + */ +#include +#include +#include +#include +#include +#include +#include + +/* + * TCP sliding window control used by us to request re-TX + */ +static struct tcp_sack_v tcp_lost; + +/* TCP option timestamp */ +static u32 loc_timestamp; +static u32 rmt_timestamp; + +static u32 tcp_seq_init; +static u32 tcp_ack_edge; +static u32 tcp_seq_max; + +static int tcp_activity_count; + +/* + * Search for TCP_SACK and review the comments before the code section + * TCP_SACK is the number of packets at the front of the stream + */ + +enum pkt_state {PKT, NOPKT}; +struct sack_r { + struct sack_edges se; + enum pkt_state st; +}; + +static struct sack_r edge_a[TCP_SACK]; +static unsigned int sack_idx; +static unsigned int prev_len; + +/* + * TCP lengths are stored as a rounded up number of 32 bit words. + * Add 3 to length round up, rounded, then divided into the + * length in 32 bit words. + */ +#define LEN_B_TO_DW(x) ((x) >> 2) +#define ROUND_TCPHDR_LEN(x) (LEN_B_TO_DW((x) + 3)) +#define SHIFT_TO_TCPHDRLEN_FIELD(x) ((x) << 4) +#define GET_TCP_HDR_LEN_IN_BYTES(x) ((x) >> 2) + +/* TCP connection state */ +static enum tcp_state current_tcp_state; + +/* Current TCP RX packet handler */ +static rxhand_tcp *tcp_packet_handler; + +/** + * tcp_get_tcp_state() - get current TCP state + * + * Return: Current TCP state + */ +enum tcp_state tcp_get_tcp_state(void) +{ + return current_tcp_state; +} + +/** + * tcp_set_tcp_state() - set current TCP state + * @new_state: new TCP state + */ +void tcp_set_tcp_state(enum tcp_state new_state) +{ + current_tcp_state = new_state; +} + +static void dummy_handler(uchar *pkt, unsigned int dport, + struct in_addr sip, unsigned int sport, + unsigned int len) +{ +} + +/** + * tcp_set_tcp_handler() - set a handler to receive data + * @f: handler + */ +void tcp_set_tcp_handler(rxhand_tcp *f) +{ + debug_cond(DEBUG_INT_STATE, "--- net_loop TCP handler set (%p)\n", f); + if (!f) + tcp_packet_handler = dummy_handler; + else + tcp_packet_handler = f; +} + +/** + * tcp_set_pseudo_header() - set TCP pseudo header + * @pkt: the packet + * @src: source IP address + * @dest: destinaion IP address + * @tcp_len: tcp length + * @pkt_len: packet length + * + * Return: the checksum of the packet + */ +u16 tcp_set_pseudo_header(uchar *pkt, struct in_addr src, struct in_addr dest, + int tcp_len, int pkt_len) +{ + union tcp_build_pkt *b = (union tcp_build_pkt *)pkt; + int checksum_len; + + /* + * Pseudo header + * + * Zero the byte after the last byte so that the header checksum + * will always work. + */ + pkt[pkt_len] = 0; + + net_copy_ip((void *)&b->ph.p_src, &src); + net_copy_ip((void *)&b->ph.p_dst, &dest); + b->ph.rsvd = 0; + b->ph.p = IPPROTO_TCP; + b->ph.len = htons(tcp_len); + checksum_len = tcp_len + PSEUDO_HDR_SIZE; + + debug_cond(DEBUG_DEV_PKT, + "TCP Pesudo Header (to=%pI4, from=%pI4, Len=%d)\n", + &b->ph.p_dst, &b->ph.p_src, checksum_len); + + return compute_ip_checksum(pkt + PSEUDO_PAD_SIZE, checksum_len); +} + +/** + * net_set_ack_options() - set TCP options in acknowledge packets + * @b: the packet + * + * Return: TCP header length + */ +int net_set_ack_options(union tcp_build_pkt *b) +{ + b->sack.hdr.tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(LEN_B_TO_DW(TCP_HDR_SIZE)); + + b->sack.t_opt.kind = TCP_O_TS; + b->sack.t_opt.len = TCP_OPT_LEN_A; + b->sack.t_opt.t_snd = htons(loc_timestamp); + b->sack.t_opt.t_rcv = rmt_timestamp; + b->sack.sack_v.kind = TCP_1_NOP; + b->sack.sack_v.len = 0; + + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) { + if (tcp_lost.len > TCP_OPT_LEN_2) { + debug_cond(DEBUG_DEV_PKT, "TCP ack opt lost.len %x\n", + tcp_lost.len); + b->sack.sack_v.len = tcp_lost.len; + b->sack.sack_v.kind = TCP_V_SACK; + b->sack.sack_v.hill[0].l = htonl(tcp_lost.hill[0].l); + b->sack.sack_v.hill[0].r = htonl(tcp_lost.hill[0].r); + + /* + * These SACK structures are initialized with NOPs to + * provide TCP header alignment padding. There are 4 + * SACK structures used for both header padding and + * internally. + */ + b->sack.sack_v.hill[1].l = htonl(tcp_lost.hill[1].l); + b->sack.sack_v.hill[1].r = htonl(tcp_lost.hill[1].r); + b->sack.sack_v.hill[2].l = htonl(tcp_lost.hill[2].l); + b->sack.sack_v.hill[2].r = htonl(tcp_lost.hill[2].r); + b->sack.sack_v.hill[3].l = TCP_O_NOP; + b->sack.sack_v.hill[3].r = TCP_O_NOP; + } + + b->sack.hdr.tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(ROUND_TCPHDR_LEN(TCP_HDR_SIZE + + TCP_TSOPT_SIZE + + tcp_lost.len)); + } else { + b->sack.sack_v.kind = 0; + b->sack.hdr.tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(ROUND_TCPHDR_LEN(TCP_HDR_SIZE + + TCP_TSOPT_SIZE)); + } + + /* + * This returns the actual rounded up length of the + * TCP header to add to the total packet length + */ + + return GET_TCP_HDR_LEN_IN_BYTES(b->sack.hdr.tcp_hlen); +} + +/** + * net_set_ack_options() - set TCP options in SYN packets + * @b: the packet + */ +void net_set_syn_options(union tcp_build_pkt *b) +{ + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) + tcp_lost.len = 0; + + b->ip.hdr.tcp_hlen = 0xa0; + + b->ip.mss.kind = TCP_O_MSS; + b->ip.mss.len = TCP_OPT_LEN_4; + b->ip.mss.mss = htons(TCP_MSS); + b->ip.scale.kind = TCP_O_SCL; + b->ip.scale.scale = TCP_SCALE; + b->ip.scale.len = TCP_OPT_LEN_3; + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) { + b->ip.sack_p.kind = TCP_P_SACK; + b->ip.sack_p.len = TCP_OPT_LEN_2; + } else { + b->ip.sack_p.kind = TCP_1_NOP; + b->ip.sack_p.len = TCP_1_NOP; + } + b->ip.t_opt.kind = TCP_O_TS; + b->ip.t_opt.len = TCP_OPT_LEN_A; + loc_timestamp = get_ticks(); + rmt_timestamp = 0; + b->ip.t_opt.t_snd = 0; + b->ip.t_opt.t_rcv = 0; + b->ip.end = TCP_O_END; +} + +int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len, + u8 action, u32 tcp_seq_num, u32 tcp_ack_num) +{ + union tcp_build_pkt *b = (union tcp_build_pkt *)pkt; + int pkt_hdr_len; + int pkt_len; + int tcp_len; + + /* + * Header: 5 32 bit words. 4 bits TCP header Length, + * 4 bits reserved options + */ + b->ip.hdr.tcp_flags = action; + pkt_hdr_len = IP_TCP_HDR_SIZE; + b->ip.hdr.tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(LEN_B_TO_DW(TCP_HDR_SIZE)); + + switch (action) { + case TCP_SYN: + debug_cond(DEBUG_DEV_PKT, + "TCP Hdr:SYN (%pI4, %pI4, sq=%d, ak=%d)\n", + &net_server_ip, &net_ip, + tcp_seq_num, tcp_ack_num); + tcp_activity_count = 0; + net_set_syn_options(b); + tcp_seq_num = 0; + tcp_ack_num = 0; + pkt_hdr_len = IP_TCP_O_SIZE; + if (current_tcp_state == TCP_SYN_SENT) { /* Too many SYNs */ + action = TCP_FIN; + current_tcp_state = TCP_FIN_WAIT_1; + } else { + current_tcp_state = TCP_SYN_SENT; + } + break; + case TCP_ACK: + pkt_hdr_len = IP_HDR_SIZE + net_set_ack_options(b); + b->ip.hdr.tcp_flags = action; + debug_cond(DEBUG_DEV_PKT, + "TCP Hdr:ACK (%pI4, %pI4, s=%d, a=%d, A=%x)\n", + &net_server_ip, &net_ip, tcp_seq_num, tcp_ack_num, + action); + break; + case TCP_FIN: + debug_cond(DEBUG_DEV_PKT, + "TCP Hdr:FIN (%pI4, %pI4, s=%d, a=%d)\n", + &net_server_ip, &net_ip, tcp_seq_num, tcp_ack_num); + payload_len = 0; + pkt_hdr_len = IP_TCP_HDR_SIZE; + current_tcp_state = TCP_FIN_WAIT_1; + break; + + /* Notify connection closing */ + + case (TCP_FIN | TCP_ACK): + case (TCP_FIN | TCP_ACK | TCP_PUSH): + if (current_tcp_state == TCP_CLOSE_WAIT) + current_tcp_state = TCP_CLOSING; + + tcp_ack_edge++; + debug_cond(DEBUG_DEV_PKT, + "TCP Hdr:FIN ACK PSH(%pI4, %pI4, s=%d, a=%d, A=%x)\n", + &net_server_ip, &net_ip, + tcp_seq_num, tcp_ack_edge, action); + fallthrough; + default: + pkt_hdr_len = IP_HDR_SIZE + net_set_ack_options(b); + b->ip.hdr.tcp_flags = action | TCP_PUSH | TCP_ACK; + debug_cond(DEBUG_DEV_PKT, + "TCP Hdr:dft (%pI4, %pI4, s=%d, a=%d, A=%x)\n", + &net_server_ip, &net_ip, + tcp_seq_num, tcp_ack_num, action); + } + + pkt_len = pkt_hdr_len + payload_len; + tcp_len = pkt_len - IP_HDR_SIZE; + + /* TCP Header */ + b->ip.hdr.tcp_ack = htonl(tcp_ack_edge); + b->ip.hdr.tcp_src = htons(sport); + b->ip.hdr.tcp_dst = htons(dport); + b->ip.hdr.tcp_seq = htonl(tcp_seq_num); + tcp_seq_num = tcp_seq_num + payload_len; + + /* + * TCP window size - TCP header variable tcp_win. + * Change tcp_win only if you have an understanding of network + * overrun, congestion, TCP segment sizes, TCP windows, TCP scale, + * queuing theory and packet buffering. If there are too few buffers, + * there will be data loss, recovery may work or the sending TCP, + * the server, could abort the stream transmission. + * MSS is governed by maximum Ethernet frame length. + * The number of buffers is governed by the desire to have a queue of + * full buffers to be processed at the destination to maximize + * throughput. Temporary memory use for the boot phase on modern + * SOCs is may not be considered a constraint to buffer space, if + * it is, then the u-boot tftp or nfs kernel netboot should be + * considered. + */ + b->ip.hdr.tcp_win = htons(PKTBUFSRX * TCP_MSS >> TCP_SCALE); + + b->ip.hdr.tcp_xsum = 0; + b->ip.hdr.tcp_ugr = 0; + + b->ip.hdr.tcp_xsum = tcp_set_pseudo_header(pkt, net_ip, net_server_ip, + tcp_len, pkt_len); + + net_set_ip_header((uchar *)&b->ip, net_server_ip, net_ip, + pkt_len, IPPROTO_TCP); + + return pkt_hdr_len; +} + +/** + * tcp_hole() - Selective Acknowledgment (Essential for fast stream transfer) + * @tcp_seq_num: TCP sequence start number + * @len: the length of sequence numbers + * @tcp_seq_max: maximum of sequence numbers + */ +void tcp_hole(u32 tcp_seq_num, u32 len, u32 tcp_seq_max) +{ + u32 idx_sack, sack_in; + u32 sack_end = TCP_SACK - 1; + u32 hill = 0; + enum pkt_state expect = PKT; + u32 seq = tcp_seq_num - tcp_seq_init; + u32 hol_l = tcp_ack_edge - tcp_seq_init; + u32 hol_r = 0; + + /* Place new seq number in correct place in receive array */ + if (prev_len == 0) + prev_len = len; + + idx_sack = sack_idx + ((tcp_seq_num - tcp_ack_edge) / prev_len); + if (idx_sack < TCP_SACK) { + edge_a[idx_sack].se.l = tcp_seq_num; + edge_a[idx_sack].se.r = tcp_seq_num + len; + edge_a[idx_sack].st = PKT; + + /* + * The fin (last) packet is not the same length as data + * packets, and if it's length is recorded and used for + * array index calculation, calculation breaks. + */ + if (prev_len < len) + prev_len = len; + } + + debug_cond(DEBUG_DEV_PKT, + "TCP 1 seq %d, edg %d, len %d, sack_idx %d, sack_end %d\n", + seq, hol_l, len, sack_idx, sack_end); + + /* Right edge of contiguous stream, is the left edge of first hill */ + hol_l = tcp_seq_num - tcp_seq_init; + hol_r = hol_l + len; + + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) + tcp_lost.len = TCP_OPT_LEN_2; + + debug_cond(DEBUG_DEV_PKT, + "TCP 1 in %d, seq %d, pkt_l %d, pkt_r %d, sack_idx %d, sack_end %d\n", + idx_sack, seq, hol_l, hol_r, sack_idx, sack_end); + + for (sack_in = sack_idx; sack_in < sack_end && hill < TCP_SACK_HILLS; + sack_in++) { + switch (expect) { + case NOPKT: + switch (edge_a[sack_in].st) { + case NOPKT: + debug_cond(DEBUG_INT_STATE, "N"); + break; + case PKT: + debug_cond(DEBUG_INT_STATE, "n"); + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) { + tcp_lost.hill[hill].l = + edge_a[sack_in].se.l; + tcp_lost.hill[hill].r = + edge_a[sack_in].se.r; + } + expect = PKT; + break; + } + break; + case PKT: + switch (edge_a[sack_in].st) { + case NOPKT: + debug_cond(DEBUG_INT_STATE, "p"); + if (sack_in > sack_idx && + hill < TCP_SACK_HILLS) { + hill++; + if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) + tcp_lost.len += TCP_OPT_LEN_8; + } + expect = NOPKT; + break; + case PKT: + debug_cond(DEBUG_INT_STATE, "P"); + + if (tcp_ack_edge == edge_a[sack_in].se.l) { + tcp_ack_edge = edge_a[sack_in].se.r; + edge_a[sack_in].st = NOPKT; + sack_idx++; + } else { + if (IS_ENABLED(CONFIG_PROT_TCP_SACK) && + hill < TCP_SACK_HILLS) + tcp_lost.hill[hill].r = + edge_a[sack_in].se.r; + if (IS_ENABLED(CONFIG_PROT_TCP_SACK) && + sack_in == sack_end - 1) + tcp_lost.hill[hill].r = + edge_a[sack_in].se.r; + } + break; + } + break; + } + } + debug_cond(DEBUG_INT_STATE, "\n"); + if (!IS_ENABLED(CONFIG_PROT_TCP_SACK) || tcp_lost.len <= TCP_OPT_LEN_2) + sack_idx = 0; +} + +/** + * tcp_parse_options() - parsing TCP options + * @o: pointer to the option field. + * @o_len: length of the option field. + */ +void tcp_parse_options(uchar *o, int o_len) +{ + struct tcp_t_opt *tsopt; + uchar *p = o; + + /* + * NOPs are options with a zero length, and thus are special. + * All other options have length fields. + */ + for (p = o; p < (o + o_len); p = p + p[1]) { + if (!p[1]) + return; /* Finished processing options */ + + switch (p[0]) { + case TCP_O_END: + return; + case TCP_O_MSS: + case TCP_O_SCL: + case TCP_P_SACK: + case TCP_V_SACK: + break; + case TCP_O_TS: + tsopt = (struct tcp_t_opt *)p; + rmt_timestamp = tsopt->t_snd; + return; + } + + /* Process optional NOPs */ + if (p[0] == TCP_O_NOP) + p++; + } +} + +static u8 tcp_state_machine(u8 tcp_flags, u32 *tcp_seq_num, int payload_len) +{ + u8 tcp_fin = tcp_flags & TCP_FIN; + u8 tcp_syn = tcp_flags & TCP_SYN; + u8 tcp_rst = tcp_flags & TCP_RST; + u8 tcp_push = tcp_flags & TCP_PUSH; + u8 tcp_ack = tcp_flags & TCP_ACK; + u8 action = TCP_DATA; + int i; + + /* + * tcp_flags are examined to determine TX action in a given state + * tcp_push is interpreted to mean "inform the app" + * urg, ece, cer and nonce flags are not supported. + * + * exe and crw are use to signal and confirm knowledge of congestion. + * This TCP only sends a file request and acks. If it generates + * congestion, the network is broken. + */ + debug_cond(DEBUG_INT_STATE, "TCP STATE ENTRY %x\n", action); + if (tcp_rst) { + action = TCP_DATA; + current_tcp_state = TCP_CLOSED; + net_set_state(NETLOOP_FAIL); + debug_cond(DEBUG_INT_STATE, "TCP Reset %x\n", tcp_flags); + return TCP_RST; + } + + switch (current_tcp_state) { + case TCP_CLOSED: + debug_cond(DEBUG_INT_STATE, "TCP CLOSED %x\n", tcp_flags); + if (tcp_ack) + action = TCP_DATA; + else if (tcp_syn) + action = TCP_RST; + else if (tcp_fin) + action = TCP_DATA; + break; + case TCP_SYN_SENT: + debug_cond(DEBUG_INT_STATE, "TCP_SYN_SENT %x, %d\n", + tcp_flags, *tcp_seq_num); + if (tcp_fin) { + action = action | TCP_PUSH; + current_tcp_state = TCP_CLOSE_WAIT; + } + if (tcp_syn) { + action = action | TCP_ACK | TCP_PUSH; + if (tcp_ack) { + tcp_seq_init = *tcp_seq_num; + *tcp_seq_num = *tcp_seq_num + 1; + tcp_seq_max = *tcp_seq_num; + tcp_ack_edge = *tcp_seq_num; + sack_idx = 0; + edge_a[sack_idx].se.l = *tcp_seq_num; + edge_a[sack_idx].se.r = *tcp_seq_num; + prev_len = 0; + current_tcp_state = TCP_ESTABLISHED; + for (i = 0; i < TCP_SACK; i++) + edge_a[i].st = NOPKT; + } + } else if (tcp_ack) { + action = TCP_DATA; + } + + break; + case TCP_ESTABLISHED: + debug_cond(DEBUG_INT_STATE, "TCP_ESTABLISHED %x\n", tcp_flags); + if (*tcp_seq_num > tcp_seq_max) + tcp_seq_max = *tcp_seq_num; + if (payload_len > 0) { + tcp_hole(*tcp_seq_num, payload_len, tcp_seq_max); + tcp_fin = TCP_DATA; /* cause standalone FIN */ + } + + if ((tcp_fin) && + (!IS_ENABLED(CONFIG_PROT_TCP_SACK) || + tcp_lost.len <= TCP_OPT_LEN_2)) { + action = action | TCP_FIN | TCP_PUSH | TCP_ACK; + current_tcp_state = TCP_CLOSE_WAIT; + } else if (tcp_ack) { + action = TCP_DATA; + } + + if (tcp_syn) + action = TCP_ACK + TCP_RST; + else if (tcp_push) + action = action | TCP_PUSH; + break; + case TCP_CLOSE_WAIT: + debug_cond(DEBUG_INT_STATE, "TCP_CLOSE_WAIT (%x)\n", tcp_flags); + action = TCP_DATA; + break; + case TCP_FIN_WAIT_2: + debug_cond(DEBUG_INT_STATE, "TCP_FIN_WAIT_2 (%x)\n", tcp_flags); + if (tcp_ack) { + action = TCP_PUSH | TCP_ACK; + current_tcp_state = TCP_CLOSED; + puts("\n"); + } else if (tcp_syn) { + action = TCP_DATA; + } else if (tcp_fin) { + action = TCP_DATA; + } + break; + case TCP_FIN_WAIT_1: + debug_cond(DEBUG_INT_STATE, "TCP_FIN_WAIT_1 (%x)\n", tcp_flags); + if (tcp_fin) { + action = TCP_ACK | TCP_FIN; + current_tcp_state = TCP_FIN_WAIT_2; + } + if (tcp_syn) + action = TCP_RST; + if (tcp_ack) { + current_tcp_state = TCP_CLOSED; + tcp_seq_num = tcp_seq_num + 1; + } + break; + case TCP_CLOSING: + debug_cond(DEBUG_INT_STATE, "TCP_CLOSING (%x)\n", tcp_flags); + if (tcp_ack) { + action = TCP_PUSH; + current_tcp_state = TCP_CLOSED; + puts("\n"); + } else if (tcp_syn) { + action = TCP_RST; + } else if (tcp_fin) { + action = TCP_DATA; + } + break; + } + return action; +} + +/** + * rxhand_tcp_f() - process receiving data and call data handler. + * @b: the packet + * @pkt_len: the length of packet. + */ +void rxhand_tcp_f(union tcp_build_pkt *b, unsigned int pkt_len) +{ + int tcp_len = pkt_len - IP_HDR_SIZE; + u16 tcp_rx_xsum = b->ip.hdr.ip_sum; + u8 tcp_action = TCP_DATA; + u32 tcp_seq_num, tcp_ack_num; + struct in_addr action_and_state; + int tcp_hdr_len, payload_len; + + /* Verify IP header */ + debug_cond(DEBUG_DEV_PKT, + "TCP RX in RX Sum (to=%pI4, from=%pI4, len=%d)\n", + &b->ip.hdr.ip_src, &b->ip.hdr.ip_dst, pkt_len); + + b->ip.hdr.ip_src = net_server_ip; + b->ip.hdr.ip_dst = net_ip; + b->ip.hdr.ip_sum = 0; + if (tcp_rx_xsum != compute_ip_checksum(b, IP_HDR_SIZE)) { + debug_cond(DEBUG_DEV_PKT, + "TCP RX IP xSum Error (%pI4, =%pI4, len=%d)\n", + &net_ip, &net_server_ip, pkt_len); + return; + } + + /* Build pseudo header and verify TCP header */ + tcp_rx_xsum = b->ip.hdr.tcp_xsum; + b->ip.hdr.tcp_xsum = 0; + if (tcp_rx_xsum != tcp_set_pseudo_header((uchar *)b, b->ip.hdr.ip_src, + b->ip.hdr.ip_dst, tcp_len, + pkt_len)) { + debug_cond(DEBUG_DEV_PKT, + "TCP RX TCP xSum Error (%pI4, %pI4, len=%d)\n", + &net_ip, &net_server_ip, tcp_len); + return; + } + + tcp_hdr_len = GET_TCP_HDR_LEN_IN_BYTES(b->ip.hdr.tcp_hlen); + payload_len = tcp_len - tcp_hdr_len; + + if (tcp_hdr_len > TCP_HDR_SIZE) + tcp_parse_options((uchar *)b + IP_TCP_HDR_SIZE, + tcp_hdr_len - TCP_HDR_SIZE); + /* + * Incoming sequence and ack numbers are server's view of the numbers. + * The app must swap the numbers when responding. + */ + tcp_seq_num = ntohl(b->ip.hdr.tcp_seq); + tcp_ack_num = ntohl(b->ip.hdr.tcp_ack); + + /* Packets are not ordered. Send to app as received. */ + tcp_action = tcp_state_machine(b->ip.hdr.tcp_flags, + &tcp_seq_num, payload_len); + + tcp_activity_count++; + if (tcp_activity_count > TCP_ACTIVITY) { + puts("| "); + tcp_activity_count = 0; + } + + if ((tcp_action & TCP_PUSH) || payload_len > 0) { + debug_cond(DEBUG_DEV_PKT, + "TCP Notify (action=%x, Seq=%d,Ack=%d,Pay%d)\n", + tcp_action, tcp_seq_num, tcp_ack_num, payload_len); + + action_and_state.s_addr = tcp_action; + (*tcp_packet_handler) ((uchar *)b + pkt_len - payload_len, + tcp_seq_num, action_and_state, + tcp_ack_num, payload_len); + + } else if (tcp_action != TCP_DATA) { + debug_cond(DEBUG_DEV_PKT, + "TCP Action (action=%x,Seq=%d,Ack=%d,Pay=%d)\n", + tcp_action, tcp_seq_num, tcp_ack_num, payload_len); + + /* + * Warning: Incoming Ack & Seq sequence numbers are transposed + * here to outgoing Seq & Ack sequence numbers + */ + net_send_tcp_packet(0, ntohs(b->ip.hdr.tcp_src), + ntohs(b->ip.hdr.tcp_dst), + (tcp_action & (~TCP_PUSH)), + tcp_seq_num, tcp_ack_num); + } +} From cfbae48219fd81f6c9e1a7b5ee160cdd3005f958 Mon Sep 17 00:00:00 2001 From: "Ying-Chun Liu (PaulLiu)" Date: Tue, 8 Nov 2022 14:17:29 +0800 Subject: [PATCH 13/17] net: Add wget application This commit adds a simple wget command that can download files from http server. The command syntax is wget ${loadaddr} Signed-off-by: Duncan Hare Signed-off-by: Ying-Chun Liu (PaulLiu) Reviewed-by: Simon Glass Cc: Christian Gmeiner Cc: Joe Hershberger Cc: Michal Simek Cc: Ramon Fried Reviewed-by: Ramon Fried --- cmd/Kconfig | 7 + cmd/net.c | 13 ++ include/net.h | 2 +- include/net/wget.h | 22 +++ net/Makefile | 1 + net/net.c | 6 + net/wget.c | 438 +++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 488 insertions(+), 1 deletion(-) create mode 100644 include/net/wget.h create mode 100644 net/wget.c diff --git a/cmd/Kconfig b/cmd/Kconfig index 79848cc6ab..a4b6fc6a5d 100644 --- a/cmd/Kconfig +++ b/cmd/Kconfig @@ -1812,6 +1812,13 @@ config SYS_DISABLE_AUTOLOAD is complete. Enable this option to disable this behavior and instead require files to be loaded over the network by subsequent commands. +config CMD_WGET + bool "wget" + select TCP + help + wget is a simple command to download kernel, or other files, + from a http server over TCP. + config CMD_MII bool "mii" imply CMD_MDIO diff --git a/cmd/net.c b/cmd/net.c index addcad3ac1..f6d9f5ea3a 100644 --- a/cmd/net.c +++ b/cmd/net.c @@ -125,6 +125,19 @@ U_BOOT_CMD( ); #endif +#if defined(CONFIG_CMD_WGET) +static int do_wget(struct cmd_tbl *cmdtp, int flag, int argc, char * const argv[]) +{ + return netboot_common(WGET, cmdtp, argc, argv); +} + +U_BOOT_CMD( + wget, 3, 1, do_wget, + "boot image via network using HTTP protocol", + "[loadAddress] [[hostIPaddr:]path and image name]" +); +#endif + static void netboot_update_env(void) { char tmp[22]; diff --git a/include/net.h b/include/net.h index f4140523c2..e0c7804827 100644 --- a/include/net.h +++ b/include/net.h @@ -561,7 +561,7 @@ extern int net_restart_wrap; /* Tried all network devices */ enum proto_t { BOOTP, RARP, ARP, TFTPGET, DHCP, PING, DNS, NFS, CDP, NETCONS, SNTP, - TFTPSRV, TFTPPUT, LINKLOCAL, FASTBOOT, WOL, UDP, NCSI + TFTPSRV, TFTPPUT, LINKLOCAL, FASTBOOT, WOL, UDP, NCSI, WGET }; extern char net_boot_file_name[1024];/* Boot File name */ diff --git a/include/net/wget.h b/include/net/wget.h new file mode 100644 index 0000000000..da0920de11 --- /dev/null +++ b/include/net/wget.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Duncan Hare Copyright 2017 + */ + +/** + * wget_start() - begin wget + */ +void wget_start(void); + +enum wget_state { + WGET_CLOSED, + WGET_CONNECTING, + WGET_CONNECTED, + WGET_TRANSFERRING, + WGET_TRANSFERRED +}; + +#define DEBUG_WGET 0 /* Set to 1 for debug messages */ +#define SERVER_PORT 80 +#define WGET_RETRY_COUNT 30 +#define WGET_TIMEOUT 2000UL diff --git a/net/Makefile b/net/Makefile index d131d1cb1a..4f757a224c 100644 --- a/net/Makefile +++ b/net/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_UDP_FUNCTION_FASTBOOT) += fastboot.o obj-$(CONFIG_CMD_WOL) += wol.o obj-$(CONFIG_PROT_UDP) += udp.o obj-$(CONFIG_PROT_TCP) += tcp.o +obj-$(CONFIG_CMD_WGET) += wget.o # Disable this warning as it is triggered by: # sprintf(buf, index ? "foo%d" : "foo", index) diff --git a/net/net.c b/net/net.c index 9cb2aab09d..aca20e43b0 100644 --- a/net/net.c +++ b/net/net.c @@ -118,6 +118,7 @@ #include "wol.h" #endif #include +#include /** BOOTP EXTENTIONS **/ @@ -517,6 +518,11 @@ restart: nfs_start(); break; #endif +#if defined(CONFIG_CMD_WGET) + case WGET: + wget_start(); + break; +#endif #if defined(CONFIG_CMD_CDP) case CDP: cdp_start(); diff --git a/net/wget.c b/net/wget.c new file mode 100644 index 0000000000..3826c4b364 --- /dev/null +++ b/net/wget.c @@ -0,0 +1,438 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * WGET/HTTP support driver based on U-BOOT's nfs.c + * Copyright Duncan Hare 2017 + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static const char bootfile1[] = "GET "; +static const char bootfile3[] = " HTTP/1.0\r\n\r\n"; +static const char http_eom[] = "\r\n\r\n"; +static const char http_ok[] = "200"; +static const char content_len[] = "Content-Length"; +static const char linefeed[] = "\r\n"; +static struct in_addr web_server_ip; +static int our_port; +static int wget_timeout_count; + +struct pkt_qd { + uchar *pkt; + unsigned int tcp_seq_num; + unsigned int len; +}; + +/* + * This is a control structure for out of order packets received. + * The actual packet bufers are in the kernel space, and are + * expected to be overwritten by the downloaded image. + */ +static struct pkt_qd pkt_q[PKTBUFSRX / 4]; +static int pkt_q_idx; +static unsigned long content_length; +static unsigned int packets; + +static unsigned int initial_data_seq_num; + +static enum wget_state current_wget_state; + +static char *image_url; +static unsigned int wget_timeout = WGET_TIMEOUT; + +static enum net_loop_state wget_loop_state; + +/* Timeout retry parameters */ +static u8 retry_action; /* actions for TCP retry */ +static unsigned int retry_tcp_ack_num; /* TCP retry acknowledge number*/ +static unsigned int retry_tcp_seq_num; /* TCP retry sequence number */ +static int retry_len; /* TCP retry length */ + +/** + * store_block() - store block in memory + * @src: source of data + * @offset: offset + * @len: length + */ +static inline int store_block(uchar *src, unsigned int offset, unsigned int len) +{ + ulong newsize = offset + len; + uchar *ptr; + + ptr = map_sysmem(image_load_addr + offset, len); + memcpy(ptr, src, len); + unmap_sysmem(ptr); + + if (net_boot_file_size < (offset + len)) + net_boot_file_size = newsize; + + return 0; +} + +/** + * wget_send_stored() - wget response dispatcher + * + * WARNING, This, and only this, is the place in wget.c where + * SEQUENCE NUMBERS are swapped between incoming (RX) + * and outgoing (TX). + * Procedure wget_handler() is correct for RX traffic. + */ +static void wget_send_stored(void) +{ + u8 action = retry_action; + int len = retry_len; + unsigned int tcp_ack_num = retry_tcp_ack_num + len; + unsigned int tcp_seq_num = retry_tcp_seq_num; + uchar *ptr, *offset; + + switch (current_wget_state) { + case WGET_CLOSED: + debug_cond(DEBUG_WGET, "wget: send SYN\n"); + current_wget_state = WGET_CONNECTING; + net_send_tcp_packet(0, SERVER_PORT, our_port, action, + tcp_seq_num, tcp_ack_num); + packets = 0; + break; + case WGET_CONNECTING: + pkt_q_idx = 0; + net_send_tcp_packet(0, SERVER_PORT, our_port, action, + tcp_seq_num, tcp_ack_num); + + ptr = net_tx_packet + net_eth_hdr_size() + + IP_TCP_HDR_SIZE + TCP_TSOPT_SIZE + 2; + offset = ptr; + + memcpy(offset, &bootfile1, strlen(bootfile1)); + offset += strlen(bootfile1); + + memcpy(offset, image_url, strlen(image_url)); + offset += strlen(image_url); + + memcpy(offset, &bootfile3, strlen(bootfile3)); + offset += strlen(bootfile3); + net_send_tcp_packet((offset - ptr), SERVER_PORT, our_port, + TCP_PUSH, tcp_seq_num, tcp_ack_num); + current_wget_state = WGET_CONNECTED; + break; + case WGET_CONNECTED: + case WGET_TRANSFERRING: + case WGET_TRANSFERRED: + net_send_tcp_packet(0, SERVER_PORT, our_port, action, + tcp_seq_num, tcp_ack_num); + break; + } +} + +static void wget_send(u8 action, unsigned int tcp_ack_num, + unsigned int tcp_seq_num, int len) +{ + retry_action = action; + retry_tcp_ack_num = tcp_ack_num; + retry_tcp_seq_num = tcp_seq_num; + retry_len = len; + + wget_send_stored(); +} + +void wget_fail(char *error_message, unsigned int tcp_seq_num, + unsigned int tcp_ack_num, u8 action) +{ + printf("wget: Transfer Fail - %s\n", error_message); + net_set_timeout_handler(0, NULL); + wget_send(action, tcp_seq_num, tcp_ack_num, 0); +} + +void wget_success(u8 action, unsigned int tcp_seq_num, + unsigned int tcp_ack_num, int len, int packets) +{ + printf("Packets received %d, Transfer Successful\n", packets); + wget_send(action, tcp_seq_num, tcp_ack_num, len); +} + +/* + * Interfaces of U-BOOT + */ +static void wget_timeout_handler(void) +{ + if (++wget_timeout_count > WGET_RETRY_COUNT) { + puts("\nRetry count exceeded; starting again\n"); + wget_send(TCP_RST, 0, 0, 0); + net_start_again(); + } else { + puts("T "); + net_set_timeout_handler(wget_timeout + + WGET_TIMEOUT * wget_timeout_count, + wget_timeout_handler); + wget_send_stored(); + } +} + +#define PKT_QUEUE_OFFSET 0x20000 +#define PKT_QUEUE_PACKET_SIZE 0x800 + +static void wget_connected(uchar *pkt, unsigned int tcp_seq_num, + struct in_addr action_and_state, + unsigned int tcp_ack_num, unsigned int len) +{ + u8 action = action_and_state.s_addr; + uchar *pkt_in_q; + char *pos; + int hlen, i; + uchar *ptr1; + + pkt[len] = '\0'; + pos = strstr((char *)pkt, http_eom); + + if (!pos) { + debug_cond(DEBUG_WGET, + "wget: Connected, data before Header %p\n", pkt); + pkt_in_q = (void *)image_load_addr + PKT_QUEUE_OFFSET + + (pkt_q_idx * PKT_QUEUE_PACKET_SIZE); + + ptr1 = map_sysmem((phys_addr_t)pkt_in_q, len); + memcpy(ptr1, pkt, len); + unmap_sysmem(ptr1); + + pkt_q[pkt_q_idx].pkt = pkt_in_q; + pkt_q[pkt_q_idx].tcp_seq_num = tcp_seq_num; + pkt_q[pkt_q_idx].len = len; + pkt_q_idx++; + } else { + debug_cond(DEBUG_WGET, "wget: Connected HTTP Header %p\n", pkt); + /* sizeof(http_eom) - 1 is the string length of (http_eom) */ + hlen = pos - (char *)pkt + sizeof(http_eom) - 1; + pos = strstr((char *)pkt, linefeed); + if (pos > 0) + i = pos - (char *)pkt; + else + i = hlen; + printf("%.*s", i, pkt); + + current_wget_state = WGET_TRANSFERRING; + + if (strstr((char *)pkt, http_ok) == 0) { + debug_cond(DEBUG_WGET, + "wget: Connected Bad Xfer\n"); + initial_data_seq_num = tcp_seq_num + hlen; + wget_loop_state = NETLOOP_FAIL; + wget_send(action, tcp_seq_num, tcp_ack_num, len); + } else { + debug_cond(DEBUG_WGET, + "wget: Connctd pkt %p hlen %x\n", + pkt, hlen); + initial_data_seq_num = tcp_seq_num + hlen; + + pos = strstr((char *)pkt, content_len); + if (!pos) { + content_length = -1; + } else { + pos += sizeof(content_len) + 2; + strict_strtoul(pos, 10, &content_length); + debug_cond(DEBUG_WGET, + "wget: Connected Len %lu\n", + content_length); + } + + net_boot_file_size = 0; + + if (len > hlen) + store_block(pkt + hlen, 0, len - hlen); + + debug_cond(DEBUG_WGET, + "wget: Connected Pkt %p hlen %x\n", + pkt, hlen); + + for (i = 0; i < pkt_q_idx; i++) { + ptr1 = map_sysmem( + (phys_addr_t)(pkt_q[i].pkt), + pkt_q[i].len); + store_block(ptr1, + pkt_q[i].tcp_seq_num - + initial_data_seq_num, + pkt_q[i].len); + unmap_sysmem(ptr1); + debug_cond(DEBUG_WGET, + "wget: Connctd pkt Q %p len %x\n", + pkt_q[i].pkt, pkt_q[i].len); + } + } + } + wget_send(action, tcp_seq_num, tcp_ack_num, len); +} + +/** + * wget_handler() - handler of wget + * @pkt: the pointer to the payload + * @tcp_seq_num: tcp sequence number + * @action_and_state: TCP state + * @tcp_ack_num: tcp acknowledge number + * @len: length of the payload + * + * In the "application push" invocation, the TCP header with all + * its information is pointed to by the packet pointer. + */ +static void wget_handler(uchar *pkt, unsigned int tcp_seq_num, + struct in_addr action_and_state, + unsigned int tcp_ack_num, unsigned int len) +{ + enum tcp_state wget_tcp_state = tcp_get_tcp_state(); + u8 action = action_and_state.s_addr; + + net_set_timeout_handler(wget_timeout, wget_timeout_handler); + packets++; + + switch (current_wget_state) { + case WGET_CLOSED: + debug_cond(DEBUG_WGET, "wget: Handler: Error!, State wrong\n"); + break; + case WGET_CONNECTING: + debug_cond(DEBUG_WGET, + "wget: Connecting In len=%x, Seq=%x, Ack=%x\n", + len, tcp_seq_num, tcp_ack_num); + if (!len) { + if (wget_tcp_state == TCP_ESTABLISHED) { + debug_cond(DEBUG_WGET, + "wget: Cting, send, len=%x\n", len); + wget_send(action, tcp_seq_num, tcp_ack_num, + len); + } else { + printf("%.*s", len, pkt); + wget_fail("wget: Handler Connected Fail\n", + tcp_seq_num, tcp_ack_num, action); + } + } + break; + case WGET_CONNECTED: + debug_cond(DEBUG_WGET, "wget: Connected seq=%x, len=%x\n", + tcp_seq_num, len); + if (!len) { + wget_fail("Image not found, no data returned\n", + tcp_seq_num, tcp_ack_num, action); + } else { + wget_connected(pkt, tcp_seq_num, action_and_state, + tcp_ack_num, len); + } + break; + case WGET_TRANSFERRING: + debug_cond(DEBUG_WGET, + "wget: Transferring, seq=%x, ack=%x,len=%x\n", + tcp_seq_num, tcp_ack_num, len); + + if (tcp_seq_num >= initial_data_seq_num && + store_block(pkt, tcp_seq_num - initial_data_seq_num, + len) != 0) { + wget_fail("wget: store error\n", + tcp_seq_num, tcp_ack_num, action); + return; + } + + switch (wget_tcp_state) { + case TCP_FIN_WAIT_2: + wget_send(TCP_ACK, tcp_seq_num, tcp_ack_num, len); + fallthrough; + case TCP_SYN_SENT: + case TCP_CLOSING: + case TCP_FIN_WAIT_1: + case TCP_CLOSED: + net_set_state(NETLOOP_FAIL); + break; + case TCP_ESTABLISHED: + wget_send(TCP_ACK, tcp_seq_num, tcp_ack_num, + len); + wget_loop_state = NETLOOP_SUCCESS; + break; + case TCP_CLOSE_WAIT: /* End of transfer */ + current_wget_state = WGET_TRANSFERRED; + wget_send(action | TCP_ACK | TCP_FIN, + tcp_seq_num, tcp_ack_num, len); + break; + } + break; + case WGET_TRANSFERRED: + printf("Packets received %d, Transfer Successful\n", packets); + net_set_state(wget_loop_state); + break; + } +} + +#define RANDOM_PORT_START 1024 +#define RANDOM_PORT_RANGE 0x4000 + +/** + * random_port() - make port a little random (1024-17407) + * + * Return: random port number from 1024 to 17407 + * + * This keeps the math somewhat trivial to compute, and seems to work with + * all supported protocols/clients/servers + */ +static unsigned int random_port(void) +{ + return RANDOM_PORT_START + (get_timer(0) % RANDOM_PORT_RANGE); +} + +#define BLOCKSIZE 512 + +void wget_start(void) +{ + image_url = strchr(net_boot_file_name, ':'); + if (image_url > 0) { + web_server_ip = string_to_ip(net_boot_file_name); + ++image_url; + net_server_ip = web_server_ip; + } else { + web_server_ip = net_server_ip; + image_url = net_boot_file_name; + } + + debug_cond(DEBUG_WGET, + "wget: Transfer HTTP Server %pI4; our IP %pI4\n", + &web_server_ip, &net_ip); + + /* Check if we need to send across this subnet */ + if (net_gateway.s_addr && net_netmask.s_addr) { + struct in_addr our_net; + struct in_addr server_net; + + our_net.s_addr = net_ip.s_addr & net_netmask.s_addr; + server_net.s_addr = net_server_ip.s_addr & net_netmask.s_addr; + if (our_net.s_addr != server_net.s_addr) + debug_cond(DEBUG_WGET, + "wget: sending through gateway %pI4", + &net_gateway); + } + debug_cond(DEBUG_WGET, "URL '%s'\n", image_url); + + if (net_boot_file_expected_size_in_blocks) { + debug_cond(DEBUG_WGET, "wget: Size is 0x%x Bytes = ", + net_boot_file_expected_size_in_blocks * BLOCKSIZE); + print_size(net_boot_file_expected_size_in_blocks * BLOCKSIZE, + ""); + } + debug_cond(DEBUG_WGET, + "\nwget:Load address: 0x%lx\nLoading: *\b", image_load_addr); + + net_set_timeout_handler(wget_timeout, wget_timeout_handler); + tcp_set_tcp_handler(wget_handler); + + wget_timeout_count = 0; + current_wget_state = WGET_CLOSED; + + our_port = random_port(); + + /* + * Zero out server ether to force arp resolution in case + * the server ip for the previous u-boot command, for example dns + * is not the same as the web server ip. + */ + + memset(net_server_ethaddr, 0, 6); + + wget_send(TCP_SYN, 0, 0, 0); +} From bfce0ca9d48dbee7c06526db808ae20759545db5 Mon Sep 17 00:00:00 2001 From: "Ying-Chun Liu (PaulLiu)" Date: Tue, 8 Nov 2022 14:17:30 +0800 Subject: [PATCH 14/17] doc: cmd: wget: add documentation Add documentation for the wget command. Signed-off-by: Ying-Chun Liu (PaulLiu) Cc: Christian Gmeiner Cc: Joe Hershberger Cc: Michal Simek Cc: Ramon Fried Cc: Simon Glass Reviewed-by: Simon Glass Reviewed-by: Ramon Fried --- doc/usage/cmd/wget.rst | 61 ++++++++++++++++++++++++++++++++++++++++++ doc/usage/index.rst | 1 + 2 files changed, 62 insertions(+) create mode 100644 doc/usage/cmd/wget.rst diff --git a/doc/usage/cmd/wget.rst b/doc/usage/cmd/wget.rst new file mode 100644 index 0000000000..4fcfa03954 --- /dev/null +++ b/doc/usage/cmd/wget.rst @@ -0,0 +1,61 @@ +.. SPDX-License-Identifier: GPL-2.0+: + +wget command +============ + +Synopsis +-------- + +:: + wget address [[hostIPaddr:]path] + +Description +----------- + +The wget command is used to download a file from an HTTP server. + +wget command will use HTTP over TCP to download files from an HTTP server. +Currently it can only download image from an HTTP server hosted on port 80. + +address + memory address for the data downloaded + +hostIPaddr + IP address of the HTTP server, defaults to the value of environment + variable *serverip* + +path + path of the file to be downloaded. + +Example +------- + +In the example the following steps are executed: + +* setup client network address +* download a file from the HTTP server + +:: + + => setenv autoload no + => dhcp + BOOTP broadcast 1 + *** Unhandled DHCP Option in OFFER/ACK: 23 + *** Unhandled DHCP Option in OFFER/ACK: 23 + DHCP client bound to address 192.168.1.105 (210 ms) + => wget ${loadaddr} 192.168.1.254:/index.html + HTTP/1.0 302 Found + Packets received 4, Transfer Successful + +Configuration +------------- + +The command is only available if CONFIG_CMD_WGET=y. + +CONFIG_PROT_TCP_SACK can be turned on for the TCP SACK options. This will +help increasing the downloading speed. + +Return value +------------ + +The return value $? is 0 (true) on success and 1 (false) otherwise. diff --git a/doc/usage/index.rst b/doc/usage/index.rst index d6283fab8e..0bc82887e9 100644 --- a/doc/usage/index.rst +++ b/doc/usage/index.rst @@ -79,6 +79,7 @@ Shell commands cmd/ums cmd/ut cmd/wdt + cmd/wget cmd/xxd Booting OS From d6abc7e2e027956eb0b5e73bb2be5cf995b15ae6 Mon Sep 17 00:00:00 2001 From: "Ying-Chun Liu (PaulLiu)" Date: Tue, 8 Nov 2022 14:17:31 +0800 Subject: [PATCH 15/17] test: cmd: add test for wget command. Simulate a TCP HTTP server's response for testing wget command. Signed-off-by: Ying-Chun Liu (PaulLiu) Cc: Christian Gmeiner Cc: Joe Hershberger Cc: Michal Simek Cc: Ramon Fried Cc: Simon Glass Reviewed-by: Simon Glass Reviewed-by: Ramon Fried --- test/cmd/Makefile | 1 + test/cmd/wget.c | 206 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 207 insertions(+) create mode 100644 test/cmd/wget.c diff --git a/test/cmd/Makefile b/test/cmd/Makefile index 6dd6e81875..bc961df3dc 100644 --- a/test/cmd/Makefile +++ b/test/cmd/Makefile @@ -20,3 +20,4 @@ ifdef CONFIG_SANDBOX obj-$(CONFIG_CMD_SETEXPR) += setexpr.o endif obj-$(CONFIG_CMD_TEMPERATURE) += temperature.o +obj-$(CONFIG_CMD_WGET) += wget.o diff --git a/test/cmd/wget.c b/test/cmd/wget.c new file mode 100644 index 0000000000..ed83fc94a5 --- /dev/null +++ b/test/cmd/wget.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022 Linaro + * + * (C) Copyright 2022 + * Ying-Chun Liu (PaulLiu) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SHIFT_TO_TCPHDRLEN_FIELD(x) ((x) << 4) +#define LEN_B_TO_DW(x) ((x) >> 2) + +static int sb_arp_handler(struct udevice *dev, void *packet, + unsigned int len) +{ + struct eth_sandbox_priv *priv = dev_get_priv(dev); + struct arp_hdr *arp = packet + ETHER_HDR_SIZE; + int ret = 0; + + if (ntohs(arp->ar_op) == ARPOP_REQUEST) { + priv->fake_host_ipaddr = net_read_ip(&arp->ar_spa); + + ret = sandbox_eth_recv_arp_req(dev); + if (ret) + return ret; + ret = sandbox_eth_arp_req_to_reply(dev, packet, len); + return ret; + } + + return -EPROTONOSUPPORT; +} + +static int sb_syn_handler(struct udevice *dev, void *packet, + unsigned int len) +{ + struct eth_sandbox_priv *priv = dev_get_priv(dev); + struct ethernet_hdr *eth = packet; + struct ip_tcp_hdr *tcp = packet + ETHER_HDR_SIZE; + struct ethernet_hdr *eth_send; + struct ip_tcp_hdr *tcp_send; + + /* Don't allow the buffer to overrun */ + if (priv->recv_packets >= PKTBUFSRX) + return 0; + + eth_send = (void *)priv->recv_packet_buffer[priv->recv_packets]; + memcpy(eth_send->et_dest, eth->et_src, ARP_HLEN); + memcpy(eth_send->et_src, priv->fake_host_hwaddr, ARP_HLEN); + eth_send->et_protlen = htons(PROT_IP); + tcp_send = (void *)eth_send + ETHER_HDR_SIZE; + tcp_send->tcp_src = tcp->tcp_dst; + tcp_send->tcp_dst = tcp->tcp_src; + tcp_send->tcp_seq = htonl(0); + tcp_send->tcp_ack = htonl(ntohl(tcp->tcp_seq) + 1); + tcp_send->tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(LEN_B_TO_DW(TCP_HDR_SIZE)); + tcp_send->tcp_flags = TCP_SYN | TCP_ACK; + tcp_send->tcp_win = htons(PKTBUFSRX * TCP_MSS >> TCP_SCALE); + tcp_send->tcp_xsum = 0; + tcp_send->tcp_ugr = 0; + tcp_send->tcp_xsum = tcp_set_pseudo_header((uchar *)tcp_send, + tcp->ip_src, + tcp->ip_dst, + TCP_HDR_SIZE, + IP_TCP_HDR_SIZE); + net_set_ip_header((uchar *)tcp_send, + tcp->ip_src, + tcp->ip_dst, + IP_TCP_HDR_SIZE, + IPPROTO_TCP); + + priv->recv_packet_length[priv->recv_packets] = + ETHER_HDR_SIZE + IP_TCP_HDR_SIZE; + ++priv->recv_packets; + + return 0; +} + +static int sb_ack_handler(struct udevice *dev, void *packet, + unsigned int len) +{ + struct eth_sandbox_priv *priv = dev_get_priv(dev); + struct ethernet_hdr *eth = packet; + struct ip_tcp_hdr *tcp = packet + ETHER_HDR_SIZE; + struct ethernet_hdr *eth_send; + struct ip_tcp_hdr *tcp_send; + void *data; + int pkt_len; + int payload_len = 0; + const char *payload1 = "HTTP/1.1 200 OK\r\n" + "Content-Length: 30\r\n\r\n\r\n" + "Hi\r\n"; + + /* Don't allow the buffer to overrun */ + if (priv->recv_packets >= PKTBUFSRX) + return 0; + + eth_send = (void *)priv->recv_packet_buffer[priv->recv_packets]; + memcpy(eth_send->et_dest, eth->et_src, ARP_HLEN); + memcpy(eth_send->et_src, priv->fake_host_hwaddr, ARP_HLEN); + eth_send->et_protlen = htons(PROT_IP); + tcp_send = (void *)eth_send + ETHER_HDR_SIZE; + tcp_send->tcp_src = tcp->tcp_dst; + tcp_send->tcp_dst = tcp->tcp_src; + data = (void *)tcp_send + IP_TCP_HDR_SIZE; + + if (ntohl(tcp->tcp_seq) == 1 && ntohl(tcp->tcp_ack) == 1) { + tcp_send->tcp_seq = htonl(ntohl(tcp->tcp_ack)); + tcp_send->tcp_ack = htonl(ntohl(tcp->tcp_seq) + 1); + payload_len = strlen(payload1); + memcpy(data, payload1, payload_len); + tcp_send->tcp_flags = TCP_ACK; + } else if (ntohl(tcp->tcp_seq) == 2) { + tcp_send->tcp_seq = htonl(ntohl(tcp->tcp_ack)); + tcp_send->tcp_ack = htonl(ntohl(tcp->tcp_seq) + 1); + payload_len = 0; + tcp_send->tcp_flags = TCP_ACK | TCP_FIN; + } + + tcp_send->tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(LEN_B_TO_DW(TCP_HDR_SIZE)); + tcp_send->tcp_win = htons(PKTBUFSRX * TCP_MSS >> TCP_SCALE); + tcp_send->tcp_xsum = 0; + tcp_send->tcp_ugr = 0; + pkt_len = IP_TCP_HDR_SIZE + payload_len; + tcp_send->tcp_xsum = tcp_set_pseudo_header((uchar *)tcp_send, + tcp->ip_src, + tcp->ip_dst, + pkt_len - IP_HDR_SIZE, + pkt_len); + net_set_ip_header((uchar *)tcp_send, + tcp->ip_src, + tcp->ip_dst, + pkt_len, + IPPROTO_TCP); + + if (ntohl(tcp->tcp_seq) == 1 || ntohl(tcp->tcp_seq) == 2) { + priv->recv_packet_length[priv->recv_packets] = + ETHER_HDR_SIZE + IP_TCP_HDR_SIZE + payload_len; + ++priv->recv_packets; + } + + return 0; +} + +static int sb_http_handler(struct udevice *dev, void *packet, + unsigned int len) +{ + struct ethernet_hdr *eth = packet; + struct ip_hdr *ip; + struct ip_tcp_hdr *tcp; + + if (ntohs(eth->et_protlen) == PROT_ARP) { + return sb_arp_handler(dev, packet, len); + } else if (ntohs(eth->et_protlen) == PROT_IP) { + ip = packet + ETHER_HDR_SIZE; + if (ip->ip_p == IPPROTO_TCP) { + tcp = packet + ETHER_HDR_SIZE; + if (tcp->tcp_flags == TCP_SYN) + return sb_syn_handler(dev, packet, len); + else if (tcp->tcp_flags & TCP_ACK && !(tcp->tcp_flags & TCP_SYN)) + return sb_ack_handler(dev, packet, len); + return 0; + } + return -EPROTONOSUPPORT; + } + + return -EPROTONOSUPPORT; +} + +static int net_test_wget(struct unit_test_state *uts) +{ + sandbox_eth_set_tx_handler(0, sb_http_handler); + sandbox_eth_set_priv(0, uts); + + env_set("ethact", "eth@10002000"); + env_set("ethrotate", "no"); + env_set("loadaddr", "0x20000"); + ut_assertok(run_command("wget ${loadaddr} 1.1.2.2:/index.html", 0)); + + sandbox_eth_set_tx_handler(0, NULL); + + ut_assertok(console_record_reset_enable()); + run_command("md5sum ${loadaddr} ${filesize}", 0); + ut_assert_nextline("md5 for 00020000 ... 0002001f ==> 234af48e94b0085060249ecb5942ab57"); + ut_assertok(ut_check_console_end(uts)); + + return 0; +} + +LIB_TEST(net_test_wget, 0); From d1559435d7f03517c7306e1c43e2ef497479f34b Mon Sep 17 00:00:00 2001 From: Yaron Micher Date: Thu, 10 Nov 2022 19:31:34 +0200 Subject: [PATCH 16/17] net: macb: Fix race caused by flushing unwanted descriptors The rx descriptor list is in cached memory, and there may be multiple descriptors per cache-line. After reclaim_rx_buffers marks a descriptor as unused it does a cache flush, which causes the entire cache-line to be written to memory, which may override other descriptors in the same cache-line that the controller may have written to. The fix skips freeing descriptors that are not the last in a cache-line, and if the freed descriptor is the last one in a cache-line, it marks all the descriptors in the cache-line as unused. This is similarly to what is done in drivers/net/fec_mxc.c In my case this bug caused tftpboot to fail some times when other packets are sent to u-boot in addition to the ongoing tftp (e.g. ping). The driver would stop receiving new packets because it is waiting on a descriptor that is marked unused, when in reality the descriptor contains a new unprocessed packet but while freeing the previous buffer descriptor & flushing the cache, the driver accidentally marked the descriptor as unused. Signed-off-by: Yaron Micher --- drivers/net/macb.c | 51 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/drivers/net/macb.c b/drivers/net/macb.c index e02a57b411..65ec1f24ad 100644 --- a/drivers/net/macb.c +++ b/drivers/net/macb.c @@ -98,6 +98,9 @@ struct macb_dma_desc_64 { #define MACB_RX_DMA_DESC_SIZE (DMA_DESC_BYTES(MACB_RX_RING_SIZE)) #define MACB_TX_DUMMY_DMA_DESC_SIZE (DMA_DESC_BYTES(1)) +#define DESC_PER_CACHELINE_32 (ARCH_DMA_MINALIGN/sizeof(struct macb_dma_desc)) +#define DESC_PER_CACHELINE_64 (ARCH_DMA_MINALIGN/DMA_DESC_SIZE) + #define RXBUF_FRMLEN_MASK 0x00000fff #define TXBUF_FRMLEN_MASK 0x000007ff @@ -401,32 +404,56 @@ static int _macb_send(struct macb_device *macb, const char *name, void *packet, return 0; } +static void reclaim_rx_buffer(struct macb_device *macb, + unsigned int idx) +{ + unsigned int mask; + unsigned int shift; + unsigned int i; + + /* + * There may be multiple descriptors per CPU cacheline, + * so a cache flush would flush the whole line, meaning the content of other descriptors + * in the cacheline would also flush. If one of the other descriptors had been + * written to by the controller, the flush would cause those changes to be lost. + * + * To circumvent this issue, we do the actual freeing only when we need to free + * the last descriptor in the current cacheline. When the current descriptor is the + * last in the cacheline, we free all the descriptors that belong to that cacheline. + */ + if (macb->config->hw_dma_cap & HW_DMA_CAP_64B) { + mask = DESC_PER_CACHELINE_64 - 1; + shift = 1; + } else { + mask = DESC_PER_CACHELINE_32 - 1; + shift = 0; + } + + /* we exit without freeing if idx is not the last descriptor in the cacheline */ + if ((idx & mask) != mask) + return; + + for (i = idx & (~mask); i <= idx; i++) + macb->rx_ring[i << shift].addr &= ~MACB_BIT(RX_USED); +} + static void reclaim_rx_buffers(struct macb_device *macb, unsigned int new_tail) { unsigned int i; - unsigned int count; i = macb->rx_tail; macb_invalidate_ring_desc(macb, RX); while (i > new_tail) { - if (macb->config->hw_dma_cap & HW_DMA_CAP_64B) - count = i * 2; - else - count = i; - macb->rx_ring[count].addr &= ~MACB_BIT(RX_USED); + reclaim_rx_buffer(macb, i); i++; - if (i > MACB_RX_RING_SIZE) + if (i >= MACB_RX_RING_SIZE) i = 0; } while (i < new_tail) { - if (macb->config->hw_dma_cap & HW_DMA_CAP_64B) - count = i * 2; - else - count = i; - macb->rx_ring[count].addr &= ~MACB_BIT(RX_USED); + reclaim_rx_buffer(macb, i); i++; } From 5e6c069b2c6b37083da685f39fa56ab5137dbdf9 Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Thu, 17 Nov 2022 13:27:09 -0800 Subject: [PATCH 17/17] phy: add driver for Intel XWAY PHY Add a driver for the Intel XWAY GbE PHY: - configure RGMII using dt phy-mode and standard delay properties - use genphy_config Signed-off-by: Tim Harvey --- drivers/net/phy/Kconfig | 5 ++++ drivers/net/phy/Makefile | 1 + drivers/net/phy/intel_xway.c | 48 ++++++++++++++++++++++++++++++++++++ drivers/net/phy/phy.c | 3 +++ include/phy.h | 1 + 5 files changed, 58 insertions(+) create mode 100644 drivers/net/phy/intel_xway.c diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 52ce08b3b3..86e698190f 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -321,6 +321,11 @@ config PHY_XILINX_GMII2RGMII as bridge between MAC connected over GMII and external phy that is connected over RGMII interface. +config PHY_XWAY + bool "Intel XWAY PHY support" + help + This adds support for the Intel XWAY (formerly Lantiq) Gbe PHYs. + config PHY_ETHERNET_ID bool "Read ethernet PHY id" depends on DM_GPIO diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 9d87eb212c..d38e99e717 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_PHY_TI_DP83867) += dp83867.o obj-$(CONFIG_PHY_TI_DP83869) += dp83869.o obj-$(CONFIG_PHY_XILINX) += xilinx_phy.o obj-$(CONFIG_PHY_XILINX_GMII2RGMII) += xilinx_gmii2rgmii.o +obj-$(CONFIG_PHY_XWAY) += intel_xway.o obj-$(CONFIG_PHY_ETHERNET_ID) += ethernet_id.o obj-$(CONFIG_PHY_VITESSE) += vitesse.o obj-$(CONFIG_PHY_MSCC) += mscc.o diff --git a/drivers/net/phy/intel_xway.c b/drivers/net/phy/intel_xway.c new file mode 100644 index 0000000000..dfce3f8332 --- /dev/null +++ b/drivers/net/phy/intel_xway.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include +#include +#include + +#define XWAY_MDIO_MIICTRL 0x17 /* mii control */ + +#define XWAY_MDIO_MIICTRL_RXSKEW_MASK GENMASK(14, 12) +#define XWAY_MDIO_MIICTRL_TXSKEW_MASK GENMASK(10, 8) + +static int xway_config(struct phy_device *phydev) +{ + ofnode node = phy_get_ofnode(phydev); + u32 val = 0; + + if (ofnode_valid(node)) { + u32 rx_delay, tx_delay; + + rx_delay = ofnode_read_u32_default(node, "rx-internal-delay-ps", 2000); + tx_delay = ofnode_read_u32_default(node, "tx-internal-delay-ps", 2000); + val |= FIELD_PREP(XWAY_MDIO_MIICTRL_TXSKEW_MASK, rx_delay / 500); + val |= FIELD_PREP(XWAY_MDIO_MIICTRL_RXSKEW_MASK, tx_delay / 500); + phy_modify(phydev, MDIO_DEVAD_NONE, XWAY_MDIO_MIICTRL, + XWAY_MDIO_MIICTRL_TXSKEW_MASK | + XWAY_MDIO_MIICTRL_RXSKEW_MASK, val); + } + + genphy_config_aneg(phydev); + + return 0; +} + +static struct phy_driver XWAY_driver = { + .name = "XWAY", + .uid = 0xD565A400, + .mask = 0xffffff00, + .features = PHY_GBIT_FEATURES, + .config = xway_config, + .startup = genphy_startup, + .shutdown = genphy_shutdown, +}; + +int phy_xway_init(void) +{ + phy_register(&XWAY_driver); + + return 0; +} diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 9087663053..92143cf236 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -556,6 +556,9 @@ int phy_init(void) #ifdef CONFIG_PHY_XILINX phy_xilinx_init(); #endif +#ifdef CONFIG_PHY_XWAY + phy_xway_init(); +#endif #ifdef CONFIG_PHY_MSCC phy_mscc_init(); #endif diff --git a/include/phy.h b/include/phy.h index 0737c4e8f9..ff69536fca 100644 --- a/include/phy.h +++ b/include/phy.h @@ -380,6 +380,7 @@ int phy_teranetics_init(void); int phy_ti_init(void); int phy_vitesse_init(void); int phy_xilinx_init(void); +int phy_xway_init(void); int phy_mscc_init(void); int phy_fixed_init(void); int phy_ncsi_init(void);