forked from Minki/linux
cxgb4/cxgb4vf: Add support for SGE doorbell queue timer
T6 introduced a Timer Mechanism in SGE called the SGE Doorbell Queue Timer. With this we can now configure TX Queues to get CIDX Updates when: Time(CIDX == PIDX) >= Timer Previously we rely on TX Queue Status Page updates by hardware for DMA completions. This will make Hardware/Firmware actually deliver the CIDX Updates as Ingress Queue messages with commensurate Interrupts. So we now have a new RX Path component for processing CIDX Updates and reclaiming TX Descriptors faster. Original work by: Casey Leedom <leedom@chelsio.com> Signed-off-by: Vishal Kulkarni <vishal@chelsio.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
f694be27b7
commit
d429005fdf
@ -617,6 +617,7 @@ enum { /* adapter flags */
|
||||
FW_OFLD_CONN = (1 << 9),
|
||||
ROOT_NO_RELAXED_ORDERING = (1 << 10),
|
||||
SHUTTING_DOWN = (1 << 11),
|
||||
SGE_DBQ_TIMER = (1 << 12),
|
||||
};
|
||||
|
||||
enum {
|
||||
@ -756,6 +757,8 @@ struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */
|
||||
#ifdef CONFIG_CHELSIO_T4_DCB
|
||||
u8 dcb_prio; /* DCB Priority bound to queue */
|
||||
#endif
|
||||
u8 dbqt; /* SGE Doorbell Queue Timer in use */
|
||||
unsigned int dbqtimerix; /* SGE Doorbell Queue Timer Index */
|
||||
unsigned long tso; /* # of TSO requests */
|
||||
unsigned long tx_cso; /* # of Tx checksum offloads */
|
||||
unsigned long vlan_ins; /* # of Tx VLAN insertions */
|
||||
@ -816,6 +819,7 @@ struct sge {
|
||||
u16 nqs_per_uld; /* # of Rx queues per ULD */
|
||||
u16 timer_val[SGE_NTIMERS];
|
||||
u8 counter_val[SGE_NCOUNTERS];
|
||||
u16 dbqtimer_val[SGE_NDBQTIMERS];
|
||||
u32 fl_pg_order; /* large page allocation size */
|
||||
u32 stat_len; /* length of status page at ring end */
|
||||
u32 pktshift; /* padding between CPL & packet data */
|
||||
@ -1402,7 +1406,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
|
||||
rspq_flush_handler_t flush_handler, int cong);
|
||||
int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
|
||||
struct net_device *dev, struct netdev_queue *netdevq,
|
||||
unsigned int iqid);
|
||||
unsigned int iqid, u8 dbqt);
|
||||
int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
|
||||
struct net_device *dev, unsigned int iqid,
|
||||
unsigned int cmplqid);
|
||||
@ -1415,6 +1419,8 @@ irqreturn_t t4_sge_intr_msix(int irq, void *cookie);
|
||||
int t4_sge_init(struct adapter *adap);
|
||||
void t4_sge_start(struct adapter *adap);
|
||||
void t4_sge_stop(struct adapter *adap);
|
||||
int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *q,
|
||||
int maxreclaim);
|
||||
void cxgb4_set_ethtool_ops(struct net_device *netdev);
|
||||
int cxgb4_write_rss(const struct port_info *pi, const u16 *queues);
|
||||
enum cpl_tx_tnl_lso_type cxgb_encap_offload_supported(struct sk_buff *skb);
|
||||
@ -1821,6 +1827,8 @@ int t4_ctrl_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf,
|
||||
int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf,
|
||||
unsigned int vf, unsigned int eqid);
|
||||
int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox, int ctxt_type);
|
||||
int t4_read_sge_dbqtimers(struct adapter *adap, unsigned int ndbqtimers,
|
||||
u16 *dbqtimers);
|
||||
void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl);
|
||||
int t4_update_port_info(struct port_info *pi);
|
||||
int t4_get_link_params(struct port_info *pi, unsigned int *link_okp,
|
||||
|
@ -575,7 +575,7 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
|
||||
struct sge_eth_txq *eq;
|
||||
|
||||
eq = container_of(txq, struct sge_eth_txq, q);
|
||||
netif_tx_wake_queue(eq->txq);
|
||||
t4_sge_eth_txq_egress_update(q->adap, eq, -1);
|
||||
} else {
|
||||
struct sge_uld_txq *oq;
|
||||
|
||||
@ -933,10 +933,13 @@ static int setup_sge_queues(struct adapter *adap)
|
||||
q->rspq.idx = j;
|
||||
memset(&q->stats, 0, sizeof(q->stats));
|
||||
}
|
||||
for (j = 0; j < pi->nqsets; j++, t++) {
|
||||
|
||||
q = &s->ethrxq[pi->first_qset];
|
||||
for (j = 0; j < pi->nqsets; j++, t++, q++) {
|
||||
err = t4_sge_alloc_eth_txq(adap, t, dev,
|
||||
netdev_get_tx_queue(dev, j),
|
||||
s->fw_evtq.cntxt_id);
|
||||
q->rspq.cntxt_id,
|
||||
!!(adap->flags & SGE_DBQ_TIMER));
|
||||
if (err)
|
||||
goto freeout;
|
||||
}
|
||||
@ -958,7 +961,7 @@ static int setup_sge_queues(struct adapter *adap)
|
||||
if (!is_t4(adap->params.chip)) {
|
||||
err = t4_sge_alloc_eth_txq(adap, &s->ptptxq, adap->port[0],
|
||||
netdev_get_tx_queue(adap->port[0], 0)
|
||||
, s->fw_evtq.cntxt_id);
|
||||
, s->fw_evtq.cntxt_id, false);
|
||||
if (err)
|
||||
goto freeout;
|
||||
}
|
||||
@ -4325,6 +4328,14 @@ static int adap_init0(struct adapter *adap)
|
||||
if (ret < 0)
|
||||
goto bye;
|
||||
|
||||
/* Grab the SGE Doorbell Queue Timer values. If successful, that
|
||||
* indicates that the Firmware and Hardware support this.
|
||||
*/
|
||||
ret = t4_read_sge_dbqtimers(adap, ARRAY_SIZE(adap->sge.dbqtimer_val),
|
||||
adap->sge.dbqtimer_val);
|
||||
if (!ret)
|
||||
adap->flags |= SGE_DBQ_TIMER;
|
||||
|
||||
if (is_bypass_device(adap->pdev->device))
|
||||
adap->params.bypass = 1;
|
||||
|
||||
|
@ -80,9 +80,10 @@
|
||||
* Max number of Tx descriptors we clean up at a time. Should be modest as
|
||||
* freeing skbs isn't cheap and it happens while holding locks. We just need
|
||||
* to free packets faster than they arrive, we eventually catch up and keep
|
||||
* the amortized cost reasonable. Must be >= 2 * TXQ_STOP_THRES.
|
||||
* the amortized cost reasonable. Must be >= 2 * TXQ_STOP_THRES. It should
|
||||
* also match the CIDX Flush Threshold.
|
||||
*/
|
||||
#define MAX_TX_RECLAIM 16
|
||||
#define MAX_TX_RECLAIM 32
|
||||
|
||||
/*
|
||||
* Max number of Rx buffers we replenish at a time. Again keep this modest,
|
||||
@ -400,6 +401,39 @@ static inline int reclaimable(const struct sge_txq *q)
|
||||
return hw_cidx < 0 ? hw_cidx + q->size : hw_cidx;
|
||||
}
|
||||
|
||||
/**
|
||||
* reclaim_completed_tx - reclaims completed TX Descriptors
|
||||
* @adap: the adapter
|
||||
* @q: the Tx queue to reclaim completed descriptors from
|
||||
* @maxreclaim: the maximum number of TX Descriptors to reclaim or -1
|
||||
* @unmap: whether the buffers should be unmapped for DMA
|
||||
*
|
||||
* Reclaims Tx Descriptors that the SGE has indicated it has processed,
|
||||
* and frees the associated buffers if possible. If @max == -1, then
|
||||
* we'll use a defaiult maximum. Called with the TX Queue locked.
|
||||
*/
|
||||
static inline int reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
|
||||
int maxreclaim, bool unmap)
|
||||
{
|
||||
int reclaim = reclaimable(q);
|
||||
|
||||
if (reclaim) {
|
||||
/*
|
||||
* Limit the amount of clean up work we do at a time to keep
|
||||
* the Tx lock hold time O(1).
|
||||
*/
|
||||
if (maxreclaim < 0)
|
||||
maxreclaim = MAX_TX_RECLAIM;
|
||||
if (reclaim > maxreclaim)
|
||||
reclaim = maxreclaim;
|
||||
|
||||
free_tx_desc(adap, q, reclaim, unmap);
|
||||
q->in_use -= reclaim;
|
||||
}
|
||||
|
||||
return reclaim;
|
||||
}
|
||||
|
||||
/**
|
||||
* cxgb4_reclaim_completed_tx - reclaims completed Tx descriptors
|
||||
* @adap: the adapter
|
||||
@ -410,22 +444,10 @@ static inline int reclaimable(const struct sge_txq *q)
|
||||
* and frees the associated buffers if possible. Called with the Tx
|
||||
* queue locked.
|
||||
*/
|
||||
inline void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
|
||||
bool unmap)
|
||||
void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
|
||||
bool unmap)
|
||||
{
|
||||
int avail = reclaimable(q);
|
||||
|
||||
if (avail) {
|
||||
/*
|
||||
* Limit the amount of clean up work we do at a time to keep
|
||||
* the Tx lock hold time O(1).
|
||||
*/
|
||||
if (avail > MAX_TX_RECLAIM)
|
||||
avail = MAX_TX_RECLAIM;
|
||||
|
||||
free_tx_desc(adap, q, avail, unmap);
|
||||
q->in_use -= avail;
|
||||
}
|
||||
(void)reclaim_completed_tx(adap, q, -1, unmap);
|
||||
}
|
||||
EXPORT_SYMBOL(cxgb4_reclaim_completed_tx);
|
||||
|
||||
@ -1287,6 +1309,44 @@ static inline void t6_fill_tnl_lso(struct sk_buff *skb,
|
||||
tnl_lso->EthLenOffset_Size = htonl(CPL_TX_TNL_LSO_SIZE_V(skb->len));
|
||||
}
|
||||
|
||||
/**
|
||||
* t4_sge_eth_txq_egress_update - handle Ethernet TX Queue update
|
||||
* @adap: the adapter
|
||||
* @eq: the Ethernet TX Queue
|
||||
* @maxreclaim: the maximum number of TX Descriptors to reclaim or -1
|
||||
*
|
||||
* We're typically called here to update the state of an Ethernet TX
|
||||
* Queue with respect to the hardware's progress in consuming the TX
|
||||
* Work Requests that we've put on that Egress Queue. This happens
|
||||
* when we get Egress Queue Update messages and also prophylactically
|
||||
* in regular timer-based Ethernet TX Queue maintenance.
|
||||
*/
|
||||
int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq,
|
||||
int maxreclaim)
|
||||
{
|
||||
struct sge_txq *q = &eq->q;
|
||||
unsigned int reclaimed;
|
||||
|
||||
if (!q->in_use || !__netif_tx_trylock(eq->txq))
|
||||
return 0;
|
||||
|
||||
/* Reclaim pending completed TX Descriptors. */
|
||||
reclaimed = reclaim_completed_tx(adap, &eq->q, maxreclaim, true);
|
||||
|
||||
/* If the TX Queue is currently stopped and there's now more than half
|
||||
* the queue available, restart it. Otherwise bail out since the rest
|
||||
* of what we want do here is with the possibility of shipping any
|
||||
* currently buffered Coalesced TX Work Request.
|
||||
*/
|
||||
if (netif_tx_queue_stopped(eq->txq) && txq_avail(q) > (q->size / 2)) {
|
||||
netif_tx_wake_queue(eq->txq);
|
||||
eq->q.restarts++;
|
||||
}
|
||||
|
||||
__netif_tx_unlock(eq->txq);
|
||||
return reclaimed;
|
||||
}
|
||||
|
||||
/**
|
||||
* cxgb4_eth_xmit - add a packet to an Ethernet Tx queue
|
||||
* @skb: the packet
|
||||
@ -1357,7 +1417,7 @@ out_free: dev_kfree_skb_any(skb);
|
||||
}
|
||||
skb_tx_timestamp(skb);
|
||||
|
||||
cxgb4_reclaim_completed_tx(adap, &q->q, true);
|
||||
reclaim_completed_tx(adap, &q->q, -1, true);
|
||||
cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
|
||||
|
||||
#ifdef CONFIG_CHELSIO_T4_FCOE
|
||||
@ -1400,8 +1460,25 @@ out_free: dev_kfree_skb_any(skb);
|
||||
|
||||
wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2));
|
||||
if (unlikely(credits < ETHTXQ_STOP_THRES)) {
|
||||
/* After we're done injecting the Work Request for this
|
||||
* packet, we'll be below our "stop threshold" so stop the TX
|
||||
* Queue now and schedule a request for an SGE Egress Queue
|
||||
* Update message. The queue will get started later on when
|
||||
* the firmware processes this Work Request and sends us an
|
||||
* Egress Queue Status Update message indicating that space
|
||||
* has opened up.
|
||||
*/
|
||||
eth_txq_stop(q);
|
||||
wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
|
||||
|
||||
/* If we're using the SGE Doorbell Queue Timer facility, we
|
||||
* don't need to ask the Firmware to send us Egress Queue CIDX
|
||||
* Updates: the Hardware will do this automatically. And
|
||||
* since we send the Ingress Queue CIDX Updates to the
|
||||
* corresponding Ethernet Response Queue, we'll get them very
|
||||
* quickly.
|
||||
*/
|
||||
if (!q->dbqt)
|
||||
wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
|
||||
}
|
||||
|
||||
wr = (void *)&q->q.desc[q->q.pidx];
|
||||
@ -1671,7 +1748,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
|
||||
/* Take this opportunity to reclaim any TX Descriptors whose DMA
|
||||
* transfers have completed.
|
||||
*/
|
||||
cxgb4_reclaim_completed_tx(adapter, &txq->q, true);
|
||||
reclaim_completed_tx(adapter, &txq->q, -1, true);
|
||||
|
||||
/* Calculate the number of flits and TX Descriptors we're going to
|
||||
* need along with how many TX Descriptors will be left over after
|
||||
@ -1715,7 +1792,16 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
|
||||
* has opened up.
|
||||
*/
|
||||
eth_txq_stop(txq);
|
||||
wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
|
||||
|
||||
/* If we're using the SGE Doorbell Queue Timer facility, we
|
||||
* don't need to ask the Firmware to send us Egress Queue CIDX
|
||||
* Updates: the Hardware will do this automatically. And
|
||||
* since we send the Ingress Queue CIDX Updates to the
|
||||
* corresponding Ethernet Response Queue, we'll get them very
|
||||
* quickly.
|
||||
*/
|
||||
if (!txq->dbqt)
|
||||
wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
|
||||
}
|
||||
|
||||
/* Start filling in our Work Request. Note that we do _not_ handle
|
||||
@ -2793,6 +2879,74 @@ static int t4_tx_hststamp(struct adapter *adapter, struct sk_buff *skb,
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* t4_tx_completion_handler - handle CPL_SGE_EGR_UPDATE messages
|
||||
* @rspq: Ethernet RX Response Queue associated with Ethernet TX Queue
|
||||
* @rsp: Response Entry pointer into Response Queue
|
||||
* @gl: Gather List pointer
|
||||
*
|
||||
* For adapters which support the SGE Doorbell Queue Timer facility,
|
||||
* we configure the Ethernet TX Queues to send CIDX Updates to the
|
||||
* Associated Ethernet RX Response Queue with CPL_SGE_EGR_UPDATE
|
||||
* messages. This adds a small load to PCIe Link RX bandwidth and,
|
||||
* potentially, higher CPU Interrupt load, but allows us to respond
|
||||
* much more quickly to the CIDX Updates. This is important for
|
||||
* Upper Layer Software which isn't willing to have a large amount
|
||||
* of TX Data outstanding before receiving DMA Completions.
|
||||
*/
|
||||
static void t4_tx_completion_handler(struct sge_rspq *rspq,
|
||||
const __be64 *rsp,
|
||||
const struct pkt_gl *gl)
|
||||
{
|
||||
u8 opcode = ((const struct rss_header *)rsp)->opcode;
|
||||
struct port_info *pi = netdev_priv(rspq->netdev);
|
||||
struct adapter *adapter = rspq->adap;
|
||||
struct sge *s = &adapter->sge;
|
||||
struct sge_eth_txq *txq;
|
||||
|
||||
/* skip RSS header */
|
||||
rsp++;
|
||||
|
||||
/* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
|
||||
*/
|
||||
if (unlikely(opcode == CPL_FW4_MSG &&
|
||||
((const struct cpl_fw4_msg *)rsp)->type ==
|
||||
FW_TYPE_RSSCPL)) {
|
||||
rsp++;
|
||||
opcode = ((const struct rss_header *)rsp)->opcode;
|
||||
rsp++;
|
||||
}
|
||||
|
||||
if (unlikely(opcode != CPL_SGE_EGR_UPDATE)) {
|
||||
pr_info("%s: unexpected FW4/CPL %#x on Rx queue\n",
|
||||
__func__, opcode);
|
||||
return;
|
||||
}
|
||||
|
||||
txq = &s->ethtxq[pi->first_qset + rspq->idx];
|
||||
|
||||
/* We've got the Hardware Consumer Index Update in the Egress Update
|
||||
* message. If we're using the SGE Doorbell Queue Timer mechanism,
|
||||
* these Egress Update messages will be our sole CIDX Updates we get
|
||||
* since we don't want to chew up PCIe bandwidth for both Ingress
|
||||
* Messages and Status Page writes. However, The code which manages
|
||||
* reclaiming successfully DMA'ed TX Work Requests uses the CIDX value
|
||||
* stored in the Status Page at the end of the TX Queue. It's easiest
|
||||
* to simply copy the CIDX Update value from the Egress Update message
|
||||
* to the Status Page. Also note that no Endian issues need to be
|
||||
* considered here since both are Big Endian and we're just copying
|
||||
* bytes consistently ...
|
||||
*/
|
||||
if (txq->dbqt) {
|
||||
struct cpl_sge_egr_update *egr;
|
||||
|
||||
egr = (struct cpl_sge_egr_update *)rsp;
|
||||
WRITE_ONCE(txq->q.stat->cidx, egr->cidx);
|
||||
}
|
||||
|
||||
t4_sge_eth_txq_egress_update(adapter, txq, -1);
|
||||
}
|
||||
|
||||
/**
|
||||
* t4_ethrx_handler - process an ingress ethernet packet
|
||||
* @q: the response queue that received the packet
|
||||
@ -2816,6 +2970,15 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
|
||||
struct port_info *pi;
|
||||
int ret = 0;
|
||||
|
||||
/* If we're looking at TX Queue CIDX Update, handle that separately
|
||||
* and return.
|
||||
*/
|
||||
if (unlikely((*(u8 *)rsp == CPL_FW4_MSG) ||
|
||||
(*(u8 *)rsp == CPL_SGE_EGR_UPDATE))) {
|
||||
t4_tx_completion_handler(q, rsp, si);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (unlikely(*(u8 *)rsp == cpl_trace_pkt))
|
||||
return handle_trace_pkt(q->adap, si);
|
||||
|
||||
@ -3289,10 +3452,10 @@ done:
|
||||
|
||||
static void sge_tx_timer_cb(struct timer_list *t)
|
||||
{
|
||||
unsigned long m;
|
||||
unsigned int i, budget;
|
||||
struct adapter *adap = from_timer(adap, t, sge.tx_timer);
|
||||
struct sge *s = &adap->sge;
|
||||
unsigned long m, period;
|
||||
unsigned int i, budget;
|
||||
|
||||
for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
|
||||
for (m = s->txq_maperr[i]; m; m &= m - 1) {
|
||||
@ -3320,29 +3483,29 @@ static void sge_tx_timer_cb(struct timer_list *t)
|
||||
budget = MAX_TIMER_TX_RECLAIM;
|
||||
i = s->ethtxq_rover;
|
||||
do {
|
||||
struct sge_eth_txq *q = &s->ethtxq[i];
|
||||
|
||||
if (q->q.in_use &&
|
||||
time_after_eq(jiffies, q->txq->trans_start + HZ / 100) &&
|
||||
__netif_tx_trylock(q->txq)) {
|
||||
int avail = reclaimable(&q->q);
|
||||
|
||||
if (avail) {
|
||||
if (avail > budget)
|
||||
avail = budget;
|
||||
|
||||
free_tx_desc(adap, &q->q, avail, true);
|
||||
q->q.in_use -= avail;
|
||||
budget -= avail;
|
||||
}
|
||||
__netif_tx_unlock(q->txq);
|
||||
}
|
||||
budget -= t4_sge_eth_txq_egress_update(adap, &s->ethtxq[i],
|
||||
budget);
|
||||
if (!budget)
|
||||
break;
|
||||
|
||||
if (++i >= s->ethqsets)
|
||||
i = 0;
|
||||
} while (budget && i != s->ethtxq_rover);
|
||||
} while (i != s->ethtxq_rover);
|
||||
s->ethtxq_rover = i;
|
||||
mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2));
|
||||
|
||||
if (budget == 0) {
|
||||
/* If we found too many reclaimable packets schedule a timer
|
||||
* in the near future to continue where we left off.
|
||||
*/
|
||||
period = 2;
|
||||
} else {
|
||||
/* We reclaimed all reclaimable TX Descriptors, so reschedule
|
||||
* at the normal period.
|
||||
*/
|
||||
period = TX_QCHECK_PERIOD;
|
||||
}
|
||||
|
||||
mod_timer(&s->tx_timer, jiffies + period);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -3421,7 +3584,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
|
||||
: FW_IQ_IQTYPE_OFLD));
|
||||
|
||||
if (fl) {
|
||||
enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip);
|
||||
unsigned int chip_ver =
|
||||
CHELSIO_CHIP_VERSION(adap->params.chip);
|
||||
|
||||
/* Allocate the ring for the hardware free list (with space
|
||||
* for its status page) along with the associated software
|
||||
@ -3459,10 +3623,10 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
|
||||
* the smaller 64-byte value there).
|
||||
*/
|
||||
c.fl0dcaen_to_fl0cidxfthresh =
|
||||
htons(FW_IQ_CMD_FL0FBMIN_V(chip <= CHELSIO_T5 ?
|
||||
htons(FW_IQ_CMD_FL0FBMIN_V(chip_ver <= CHELSIO_T5 ?
|
||||
FETCHBURSTMIN_128B_X :
|
||||
FETCHBURSTMIN_64B_X) |
|
||||
FW_IQ_CMD_FL0FBMAX_V((chip <= CHELSIO_T5) ?
|
||||
FETCHBURSTMIN_64B_T6_X) |
|
||||
FW_IQ_CMD_FL0FBMAX_V((chip_ver <= CHELSIO_T5) ?
|
||||
FETCHBURSTMAX_512B_X :
|
||||
FETCHBURSTMAX_256B_X));
|
||||
c.fl0size = htons(flsz);
|
||||
@ -3584,14 +3748,24 @@ static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id)
|
||||
adap->sge.egr_map[id - adap->sge.egr_start] = q;
|
||||
}
|
||||
|
||||
/**
|
||||
* t4_sge_alloc_eth_txq - allocate an Ethernet TX Queue
|
||||
* @adap: the adapter
|
||||
* @txq: the SGE Ethernet TX Queue to initialize
|
||||
* @dev: the Linux Network Device
|
||||
* @netdevq: the corresponding Linux TX Queue
|
||||
* @iqid: the Ingress Queue to which to deliver CIDX Update messages
|
||||
* @dbqt: whether this TX Queue will use the SGE Doorbell Queue Timers
|
||||
*/
|
||||
int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
|
||||
struct net_device *dev, struct netdev_queue *netdevq,
|
||||
unsigned int iqid)
|
||||
unsigned int iqid, u8 dbqt)
|
||||
{
|
||||
int ret, nentries;
|
||||
struct fw_eq_eth_cmd c;
|
||||
struct sge *s = &adap->sge;
|
||||
unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
|
||||
struct port_info *pi = netdev_priv(dev);
|
||||
struct sge *s = &adap->sge;
|
||||
struct fw_eq_eth_cmd c;
|
||||
int ret, nentries;
|
||||
|
||||
/* Add status entries */
|
||||
nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
|
||||
@ -3610,19 +3784,47 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
|
||||
FW_EQ_ETH_CMD_VFN_V(0));
|
||||
c.alloc_to_len16 = htonl(FW_EQ_ETH_CMD_ALLOC_F |
|
||||
FW_EQ_ETH_CMD_EQSTART_F | FW_LEN16(c));
|
||||
c.viid_pkd = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
|
||||
FW_EQ_ETH_CMD_VIID_V(pi->viid));
|
||||
|
||||
/* For TX Ethernet Queues using the SGE Doorbell Queue Timer
|
||||
* mechanism, we use Ingress Queue messages for Hardware Consumer
|
||||
* Index Updates on the TX Queue. Otherwise we have the Hardware
|
||||
* write the CIDX Updates into the Status Page at the end of the
|
||||
* TX Queue.
|
||||
*/
|
||||
c.autoequiqe_to_viid = htonl((dbqt
|
||||
? FW_EQ_ETH_CMD_AUTOEQUIQE_F
|
||||
: FW_EQ_ETH_CMD_AUTOEQUEQE_F) |
|
||||
FW_EQ_ETH_CMD_VIID_V(pi->viid));
|
||||
|
||||
c.fetchszm_to_iqid =
|
||||
htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) |
|
||||
htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(dbqt
|
||||
? HOSTFCMODE_INGRESS_QUEUE_X
|
||||
: HOSTFCMODE_STATUS_PAGE_X) |
|
||||
FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) |
|
||||
FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid));
|
||||
|
||||
/* Note that the CIDX Flush Threshold should match MAX_TX_RECLAIM. */
|
||||
c.dcaen_to_eqsize =
|
||||
htonl(FW_EQ_ETH_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) |
|
||||
htonl(FW_EQ_ETH_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
|
||||
? FETCHBURSTMIN_64B_X
|
||||
: FETCHBURSTMIN_64B_T6_X) |
|
||||
FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
|
||||
FW_EQ_ETH_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
|
||||
FW_EQ_ETH_CMD_EQSIZE_V(nentries));
|
||||
|
||||
c.eqaddr = cpu_to_be64(txq->q.phys_addr);
|
||||
|
||||
/* If we're using the SGE Doorbell Queue Timer mechanism, pass in the
|
||||
* currently configured Timer Index. THis can be changed later via an
|
||||
* ethtool -C tx-usecs {Timer Val} command. Note that the SGE
|
||||
* Doorbell Queue mode is currently automatically enabled in the
|
||||
* Firmware by setting either AUTOEQUEQE or AUTOEQUIQE ...
|
||||
*/
|
||||
if (dbqt)
|
||||
c.timeren_timerix =
|
||||
cpu_to_be32(FW_EQ_ETH_CMD_TIMEREN_F |
|
||||
FW_EQ_ETH_CMD_TIMERIX_V(txq->dbqtimerix));
|
||||
|
||||
ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c);
|
||||
if (ret) {
|
||||
kfree(txq->q.sdesc);
|
||||
@ -3639,6 +3841,8 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
|
||||
txq->txq = netdevq;
|
||||
txq->tso = txq->tx_cso = txq->vlan_ins = 0;
|
||||
txq->mapping_err = 0;
|
||||
txq->dbqt = dbqt;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3646,10 +3850,11 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
|
||||
struct net_device *dev, unsigned int iqid,
|
||||
unsigned int cmplqid)
|
||||
{
|
||||
int ret, nentries;
|
||||
struct fw_eq_ctrl_cmd c;
|
||||
struct sge *s = &adap->sge;
|
||||
unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
|
||||
struct port_info *pi = netdev_priv(dev);
|
||||
struct sge *s = &adap->sge;
|
||||
struct fw_eq_ctrl_cmd c;
|
||||
int ret, nentries;
|
||||
|
||||
/* Add status entries */
|
||||
nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
|
||||
@ -3673,7 +3878,9 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
|
||||
FW_EQ_CTRL_CMD_PCIECHN_V(pi->tx_chan) |
|
||||
FW_EQ_CTRL_CMD_FETCHRO_F | FW_EQ_CTRL_CMD_IQID_V(iqid));
|
||||
c.dcaen_to_eqsize =
|
||||
htonl(FW_EQ_CTRL_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) |
|
||||
htonl(FW_EQ_CTRL_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
|
||||
? FETCHBURSTMIN_64B_X
|
||||
: FETCHBURSTMIN_64B_T6_X) |
|
||||
FW_EQ_CTRL_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
|
||||
FW_EQ_CTRL_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
|
||||
FW_EQ_CTRL_CMD_EQSIZE_V(nentries));
|
||||
@ -3713,6 +3920,7 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
|
||||
struct net_device *dev, unsigned int iqid,
|
||||
unsigned int uld_type)
|
||||
{
|
||||
unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
|
||||
int ret, nentries;
|
||||
struct fw_eq_ofld_cmd c;
|
||||
struct sge *s = &adap->sge;
|
||||
@ -3743,7 +3951,9 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
|
||||
FW_EQ_OFLD_CMD_PCIECHN_V(pi->tx_chan) |
|
||||
FW_EQ_OFLD_CMD_FETCHRO_F | FW_EQ_OFLD_CMD_IQID_V(iqid));
|
||||
c.dcaen_to_eqsize =
|
||||
htonl(FW_EQ_OFLD_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) |
|
||||
htonl(FW_EQ_OFLD_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
|
||||
? FETCHBURSTMIN_64B_X
|
||||
: FETCHBURSTMIN_64B_T6_X) |
|
||||
FW_EQ_OFLD_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
|
||||
FW_EQ_OFLD_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
|
||||
FW_EQ_OFLD_CMD_EQSIZE_V(nentries));
|
||||
|
@ -6712,6 +6712,47 @@ int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox, int ctxt_type)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* t4_read_sge_dbqtimers - reag SGE Doorbell Queue Timer values
|
||||
* @adap - the adapter
|
||||
* @ndbqtimers: size of the provided SGE Doorbell Queue Timer table
|
||||
* @dbqtimers: SGE Doorbell Queue Timer table
|
||||
*
|
||||
* Reads the SGE Doorbell Queue Timer values into the provided table.
|
||||
* Returns 0 on success (Firmware and Hardware support this feature),
|
||||
* an error on failure.
|
||||
*/
|
||||
int t4_read_sge_dbqtimers(struct adapter *adap, unsigned int ndbqtimers,
|
||||
u16 *dbqtimers)
|
||||
{
|
||||
int ret, dbqtimerix;
|
||||
|
||||
ret = 0;
|
||||
dbqtimerix = 0;
|
||||
while (dbqtimerix < ndbqtimers) {
|
||||
int nparams, param;
|
||||
u32 params[7], vals[7];
|
||||
|
||||
nparams = ndbqtimers - dbqtimerix;
|
||||
if (nparams > ARRAY_SIZE(params))
|
||||
nparams = ARRAY_SIZE(params);
|
||||
|
||||
for (param = 0; param < nparams; param++)
|
||||
params[param] =
|
||||
(FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
|
||||
FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DBQ_TIMER) |
|
||||
FW_PARAMS_PARAM_Y_V(dbqtimerix + param));
|
||||
ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
|
||||
nparams, params, vals);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
for (param = 0; param < nparams; param++)
|
||||
dbqtimers[dbqtimerix++] = vals[param];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* t4_fw_hello - establish communication with FW
|
||||
* @adap: the adapter
|
||||
|
@ -91,6 +91,7 @@ enum {
|
||||
SGE_CTXT_SIZE = 24, /* size of SGE context */
|
||||
SGE_NTIMERS = 6, /* # of interrupt holdoff timer values */
|
||||
SGE_NCOUNTERS = 4, /* # of interrupt packet counter values */
|
||||
SGE_NDBQTIMERS = 8, /* # of Doorbell Queue Timer values */
|
||||
SGE_MAX_IQ_SIZE = 65520,
|
||||
|
||||
SGE_TIMER_RSTRT_CNTR = 6, /* restart RX packet threshold counter */
|
||||
|
@ -71,12 +71,18 @@
|
||||
#define FETCHBURSTMIN_64B_X 2
|
||||
#define FETCHBURSTMIN_128B_X 3
|
||||
|
||||
/* T6 and later use a single-bit encoding for FetchBurstMin */
|
||||
#define FETCHBURSTMIN_64B_T6_X 0
|
||||
#define FETCHBURSTMIN_128B_T6_X 1
|
||||
|
||||
#define FETCHBURSTMAX_256B_X 2
|
||||
#define FETCHBURSTMAX_512B_X 3
|
||||
|
||||
#define HOSTFCMODE_INGRESS_QUEUE_X 1
|
||||
#define HOSTFCMODE_STATUS_PAGE_X 2
|
||||
|
||||
#define CIDXFLUSHTHRESH_32_X 5
|
||||
#define CIDXFLUSHTHRESH_128_X 7
|
||||
|
||||
#define UPDATEDELIVERY_INTERRUPT_X 1
|
||||
|
||||
|
@ -1254,6 +1254,8 @@ enum fw_params_param_dev {
|
||||
FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21,
|
||||
FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR = 0x24,
|
||||
FW_PARAMS_PARAM_DEV_OPAQUE_VIID_SMT_EXTN = 0x27,
|
||||
FW_PARAMS_PARAM_DEV_DBQ_TIMER = 0x29,
|
||||
FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK = 0x2A,
|
||||
};
|
||||
|
||||
/*
|
||||
@ -1322,6 +1324,7 @@ enum fw_params_param_dmaq {
|
||||
FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL = 0x11,
|
||||
FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH = 0x12,
|
||||
FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH = 0x13,
|
||||
FW_PARAMS_PARAM_DMAQ_EQ_TIMERIX = 0x15,
|
||||
FW_PARAMS_PARAM_DMAQ_CONM_CTXT = 0x20,
|
||||
};
|
||||
|
||||
@ -1751,8 +1754,8 @@ struct fw_eq_eth_cmd {
|
||||
__be32 fetchszm_to_iqid;
|
||||
__be32 dcaen_to_eqsize;
|
||||
__be64 eqaddr;
|
||||
__be32 viid_pkd;
|
||||
__be32 r8_lo;
|
||||
__be32 autoequiqe_to_viid;
|
||||
__be32 timeren_timerix;
|
||||
__be64 r9;
|
||||
};
|
||||
|
||||
@ -1847,6 +1850,10 @@ struct fw_eq_eth_cmd {
|
||||
#define FW_EQ_ETH_CMD_EQSIZE_S 0
|
||||
#define FW_EQ_ETH_CMD_EQSIZE_V(x) ((x) << FW_EQ_ETH_CMD_EQSIZE_S)
|
||||
|
||||
#define FW_EQ_ETH_CMD_AUTOEQUIQE_S 31
|
||||
#define FW_EQ_ETH_CMD_AUTOEQUIQE_V(x) ((x) << FW_EQ_ETH_CMD_AUTOEQUIQE_S)
|
||||
#define FW_EQ_ETH_CMD_AUTOEQUIQE_F FW_EQ_ETH_CMD_AUTOEQUIQE_V(1U)
|
||||
|
||||
#define FW_EQ_ETH_CMD_AUTOEQUEQE_S 30
|
||||
#define FW_EQ_ETH_CMD_AUTOEQUEQE_V(x) ((x) << FW_EQ_ETH_CMD_AUTOEQUEQE_S)
|
||||
#define FW_EQ_ETH_CMD_AUTOEQUEQE_F FW_EQ_ETH_CMD_AUTOEQUEQE_V(1U)
|
||||
@ -1854,6 +1861,19 @@ struct fw_eq_eth_cmd {
|
||||
#define FW_EQ_ETH_CMD_VIID_S 16
|
||||
#define FW_EQ_ETH_CMD_VIID_V(x) ((x) << FW_EQ_ETH_CMD_VIID_S)
|
||||
|
||||
#define FW_EQ_ETH_CMD_TIMEREN_S 3
|
||||
#define FW_EQ_ETH_CMD_TIMEREN_M 0x1
|
||||
#define FW_EQ_ETH_CMD_TIMEREN_V(x) ((x) << FW_EQ_ETH_CMD_TIMEREN_S)
|
||||
#define FW_EQ_ETH_CMD_TIMEREN_G(x) \
|
||||
(((x) >> FW_EQ_ETH_CMD_TIMEREN_S) & FW_EQ_ETH_CMD_TIMEREN_M)
|
||||
#define FW_EQ_ETH_CMD_TIMEREN_F FW_EQ_ETH_CMD_TIMEREN_V(1U)
|
||||
|
||||
#define FW_EQ_ETH_CMD_TIMERIX_S 0
|
||||
#define FW_EQ_ETH_CMD_TIMERIX_M 0x7
|
||||
#define FW_EQ_ETH_CMD_TIMERIX_V(x) ((x) << FW_EQ_ETH_CMD_TIMERIX_S)
|
||||
#define FW_EQ_ETH_CMD_TIMERIX_G(x) \
|
||||
(((x) >> FW_EQ_ETH_CMD_TIMERIX_S) & FW_EQ_ETH_CMD_TIMERIX_M)
|
||||
|
||||
struct fw_eq_ctrl_cmd {
|
||||
__be32 op_to_vfn;
|
||||
__be32 alloc_to_len16;
|
||||
|
@ -2268,7 +2268,7 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq,
|
||||
cmd.iqaddr = cpu_to_be64(rspq->phys_addr);
|
||||
|
||||
if (fl) {
|
||||
enum chip_type chip =
|
||||
unsigned int chip_ver =
|
||||
CHELSIO_CHIP_VERSION(adapter->params.chip);
|
||||
/*
|
||||
* Allocate the ring for the hardware free list (with space
|
||||
@ -2319,10 +2319,10 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq,
|
||||
*/
|
||||
cmd.fl0dcaen_to_fl0cidxfthresh =
|
||||
cpu_to_be16(
|
||||
FW_IQ_CMD_FL0FBMIN_V(chip <= CHELSIO_T5 ?
|
||||
FETCHBURSTMIN_128B_X :
|
||||
FETCHBURSTMIN_64B_X) |
|
||||
FW_IQ_CMD_FL0FBMAX_V((chip <= CHELSIO_T5) ?
|
||||
FW_IQ_CMD_FL0FBMIN_V(chip_ver <= CHELSIO_T5
|
||||
? FETCHBURSTMIN_128B_X
|
||||
: FETCHBURSTMIN_64B_T6_X) |
|
||||
FW_IQ_CMD_FL0FBMAX_V((chip_ver <= CHELSIO_T5) ?
|
||||
FETCHBURSTMAX_512B_X :
|
||||
FETCHBURSTMAX_256B_X));
|
||||
cmd.fl0size = cpu_to_be16(flsz);
|
||||
@ -2411,10 +2411,11 @@ int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq,
|
||||
struct net_device *dev, struct netdev_queue *devq,
|
||||
unsigned int iqid)
|
||||
{
|
||||
unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
|
||||
struct port_info *pi = netdev_priv(dev);
|
||||
struct fw_eq_eth_cmd cmd, rpl;
|
||||
struct sge *s = &adapter->sge;
|
||||
int ret, nentries;
|
||||
struct fw_eq_eth_cmd cmd, rpl;
|
||||
struct port_info *pi = netdev_priv(dev);
|
||||
|
||||
/*
|
||||
* Calculate the size of the hardware TX Queue (including the Status
|
||||
@ -2448,17 +2449,19 @@ int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq,
|
||||
cmd.alloc_to_len16 = cpu_to_be32(FW_EQ_ETH_CMD_ALLOC_F |
|
||||
FW_EQ_ETH_CMD_EQSTART_F |
|
||||
FW_LEN16(cmd));
|
||||
cmd.viid_pkd = cpu_to_be32(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
|
||||
FW_EQ_ETH_CMD_VIID_V(pi->viid));
|
||||
cmd.autoequiqe_to_viid = cpu_to_be32(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
|
||||
FW_EQ_ETH_CMD_VIID_V(pi->viid));
|
||||
cmd.fetchszm_to_iqid =
|
||||
cpu_to_be32(FW_EQ_ETH_CMD_HOSTFCMODE_V(SGE_HOSTFCMODE_STPG) |
|
||||
FW_EQ_ETH_CMD_PCIECHN_V(pi->port_id) |
|
||||
FW_EQ_ETH_CMD_IQID_V(iqid));
|
||||
cmd.dcaen_to_eqsize =
|
||||
cpu_to_be32(FW_EQ_ETH_CMD_FBMIN_V(SGE_FETCHBURSTMIN_64B) |
|
||||
FW_EQ_ETH_CMD_FBMAX_V(SGE_FETCHBURSTMAX_512B) |
|
||||
cpu_to_be32(FW_EQ_ETH_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
|
||||
? FETCHBURSTMIN_64B_X
|
||||
: FETCHBURSTMIN_64B_T6_X) |
|
||||
FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
|
||||
FW_EQ_ETH_CMD_CIDXFTHRESH_V(
|
||||
SGE_CIDXFLUSHTHRESH_32) |
|
||||
CIDXFLUSHTHRESH_32_X) |
|
||||
FW_EQ_ETH_CMD_EQSIZE_V(nentries));
|
||||
cmd.eqaddr = cpu_to_be64(txq->q.phys_addr);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user