summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h26
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c11
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c160
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c133
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_values.h9
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/sge.c15
8 files changed, 262 insertions, 96 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 524d11098c56..1f52d9f66e41 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -328,6 +328,17 @@ struct adapter_params {
unsigned int max_ird_adapter; /* Max read depth per adapter */
};
+/* State needed to monitor the forward progress of SGE Ingress DMA activities
+ * and possible hangs.
+ */
+struct sge_idma_monitor_state {
+ unsigned int idma_1s_thresh; /* 1s threshold in Core Clock ticks */
+ unsigned int idma_stalled[2]; /* synthesized stalled timers in HZ */
+ unsigned int idma_state[2]; /* IDMA Hang detect state */
+ unsigned int idma_qid[2]; /* IDMA Hung Ingress Queue ID */
+ unsigned int idma_warn[2]; /* time to warning in HZ */
+};
+
#include "t4fw_api.h"
#define FW_VERSION(chip) ( \
@@ -630,12 +641,7 @@ struct sge {
u32 fl_align; /* response queue message alignment */
u32 fl_starve_thres; /* Free List starvation threshold */
- /* State variables for detecting an SGE Ingress DMA hang */
- unsigned int idma_1s_thresh;/* SGE same State Counter 1s threshold */
- unsigned int idma_stalled[2];/* SGE synthesized stalled timers in HZ */
- unsigned int idma_state[2]; /* SGE IDMA Hang detect state */
- unsigned int idma_qid[2]; /* SGE IDMA Hung Ingress Queue ID */
-
+ struct sge_idma_monitor_state idma_monitor;
unsigned int egr_start;
unsigned int egr_sz;
unsigned int ingr_start;
@@ -1055,7 +1061,7 @@ int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb);
int t4_ofld_send(struct adapter *adap, struct sk_buff *skb);
int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
struct net_device *dev, int intr_idx,
- struct sge_fl *fl, rspq_handler_t hnd);
+ struct sge_fl *fl, rspq_handler_t hnd, int cong);
int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
struct net_device *dev, struct netdev_queue *netdevq,
unsigned int iqid);
@@ -1215,6 +1221,7 @@ int t4_mc_read(struct adapter *adap, int idx, u32 addr, __be32 *data,
u64 *parity);
int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data,
u64 *parity);
+unsigned int t4_get_mps_bg_map(struct adapter *adapter, int idx);
void t4_pmtx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]);
void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]);
int t4_read_cim_ibq(struct adapter *adap, unsigned int qid, u32 *data,
@@ -1310,4 +1317,9 @@ int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox,
u32 addr, u32 val);
void t4_sge_decode_idma_state(struct adapter *adapter, int state);
void t4_free_mem(void *addr);
+void t4_idma_monitor_init(struct adapter *adapter,
+ struct sge_idma_monitor_state *idma);
+void t4_idma_monitor(struct adapter *adapter,
+ struct sge_idma_monitor_state *idma,
+ int hz, int ticks);
#endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 803d91beec6f..6c781c1b8fb8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -977,7 +977,7 @@ static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q,
err = t4_sge_alloc_rxq(adap, &q->rspq, false,
adap->port[i / per_chan],
msi_idx, q->fl.size ? &q->fl : NULL,
- uldrx_handler);
+ uldrx_handler, 0);
if (err)
return err;
memset(&q->stats, 0, sizeof(q->stats));
@@ -1007,7 +1007,7 @@ static int setup_sge_queues(struct adapter *adap)
msi_idx = 1; /* vector 0 is for non-queue interrupts */
else {
err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
- NULL, NULL);
+ NULL, NULL, -1);
if (err)
return err;
msi_idx = -((int)s->intrq.abs_id + 1);
@@ -1027,7 +1027,7 @@ static int setup_sge_queues(struct adapter *adap)
* new/deleted queues.
*/
err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
- msi_idx, NULL, fwevtq_handler);
+ msi_idx, NULL, fwevtq_handler, -1);
if (err) {
freeout: t4_free_sge_resources(adap);
return err;
@@ -1044,7 +1044,9 @@ freeout: t4_free_sge_resources(adap);
msi_idx++;
err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
msi_idx, &q->fl,
- t4_ethrx_handler);
+ t4_ethrx_handler,
+ t4_get_mps_bg_map(adap,
+ pi->tx_chan));
if (err)
goto freeout;
q->rspq.idx = j;
@@ -2432,6 +2434,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld)
lli.max_ordird_qp = adap->params.max_ordird_qp;
lli.max_ird_adapter = adap->params.max_ird_adapter;
lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
+ lli.nodeid = dev_to_node(adap->pdev_dev);
handle = ulds[uld].add(&lli);
if (IS_ERR(handle)) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 78ab4d406ce2..df34293f35e8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -264,6 +264,7 @@ struct cxgb4_lld_info {
unsigned int max_ordird_qp; /* Max ORD/IRD depth per RDMA QP */
unsigned int max_ird_adapter; /* Max IRD memory per adapter */
bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */
+ int nodeid; /* device numa node id */
};
struct cxgb4_uld_info {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 0d2eddab04ef..898842df38fc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -100,16 +100,6 @@
*/
#define TX_QCHECK_PERIOD (HZ / 2)
-/* SGE Hung Ingress DMA Threshold Warning time (in Hz) and Warning Repeat Rate
- * (in RX_QCHECK_PERIOD multiples). If we find one of the SGE Ingress DMA
- * State Machines in the same state for this amount of time (in HZ) then we'll
- * issue a warning about a potential hang. We'll repeat the warning as the
- * SGE Ingress DMA Channel appears to be hung every N RX_QCHECK_PERIODs till
- * the situation clears. If the situation clears, we'll note that as well.
- */
-#define SGE_IDMA_WARN_THRESH (1 * HZ)
-#define SGE_IDMA_WARN_REPEAT (20 * RX_QCHECK_PERIOD)
-
/*
* Max number of Tx descriptors to be reclaimed by the Tx timer.
*/
@@ -1130,7 +1120,6 @@ cxgb_fcoe_offload(struct sk_buff *skb, struct adapter *adap,
*/
netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
{
- int len;
u32 wr_mid;
u64 cntrl, *end;
int qidx, credits;
@@ -1143,6 +1132,7 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
const struct skb_shared_info *ssi;
dma_addr_t addr[MAX_SKB_FRAGS + 1];
bool immediate = false;
+ int len, max_pkt_len;
#ifdef CONFIG_CHELSIO_T4_FCOE
int err;
#endif /* CONFIG_CHELSIO_T4_FCOE */
@@ -1156,6 +1146,13 @@ out_free: dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
+ /* Discard the packet if the length is greater than mtu */
+ max_pkt_len = ETH_HLEN + dev->mtu;
+ if (skb_vlan_tag_present(skb))
+ max_pkt_len += VLAN_HLEN;
+ if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len)))
+ goto out_free;
+
pi = netdev_priv(dev);
adap = pi->adapter;
qidx = skb_get_queue_mapping(skb);
@@ -2279,7 +2276,7 @@ irq_handler_t t4_intr_handler(struct adapter *adap)
static void sge_rx_timer_cb(unsigned long data)
{
unsigned long m;
- unsigned int i, idma_same_state_cnt[2];
+ unsigned int i;
struct adapter *adap = (struct adapter *)data;
struct sge *s = &adap->sge;
@@ -2300,67 +2297,16 @@ static void sge_rx_timer_cb(unsigned long data)
set_bit(id, s->starving_fl);
}
}
+ /* The remainder of the SGE RX Timer Callback routine is dedicated to
+ * global Master PF activities like checking for chip ingress stalls,
+ * etc.
+ */
+ if (!(adap->flags & MASTER_PF))
+ goto done;
- t4_write_reg(adap, SGE_DEBUG_INDEX_A, 13);
- idma_same_state_cnt[0] = t4_read_reg(adap, SGE_DEBUG_DATA_HIGH_A);
- idma_same_state_cnt[1] = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A);
-
- for (i = 0; i < 2; i++) {
- u32 debug0, debug11;
-
- /* If the Ingress DMA Same State Counter ("timer") is less
- * than 1s, then we can reset our synthesized Stall Timer and
- * continue. If we have previously emitted warnings about a
- * potential stalled Ingress Queue, issue a note indicating
- * that the Ingress Queue has resumed forward progress.
- */
- if (idma_same_state_cnt[i] < s->idma_1s_thresh) {
- if (s->idma_stalled[i] >= SGE_IDMA_WARN_THRESH)
- CH_WARN(adap, "SGE idma%d, queue%u,resumed after %d sec\n",
- i, s->idma_qid[i],
- s->idma_stalled[i]/HZ);
- s->idma_stalled[i] = 0;
- continue;
- }
-
- /* Synthesize an SGE Ingress DMA Same State Timer in the Hz
- * domain. The first time we get here it'll be because we
- * passed the 1s Threshold; each additional time it'll be
- * because the RX Timer Callback is being fired on its regular
- * schedule.
- *
- * If the stall is below our Potential Hung Ingress Queue
- * Warning Threshold, continue.
- */
- if (s->idma_stalled[i] == 0)
- s->idma_stalled[i] = HZ;
- else
- s->idma_stalled[i] += RX_QCHECK_PERIOD;
-
- if (s->idma_stalled[i] < SGE_IDMA_WARN_THRESH)
- continue;
-
- /* We'll issue a warning every SGE_IDMA_WARN_REPEAT Hz */
- if (((s->idma_stalled[i] - HZ) % SGE_IDMA_WARN_REPEAT) != 0)
- continue;
-
- /* Read and save the SGE IDMA State and Queue ID information.
- * We do this every time in case it changes across time ...
- */
- t4_write_reg(adap, SGE_DEBUG_INDEX_A, 0);
- debug0 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A);
- s->idma_state[i] = (debug0 >> (i * 9)) & 0x3f;
-
- t4_write_reg(adap, SGE_DEBUG_INDEX_A, 11);
- debug11 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A);
- s->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff;
-
- CH_WARN(adap, "SGE idma%u, queue%u, maybe stuck state%u %dsecs (debug0=%#x, debug11=%#x)\n",
- i, s->idma_qid[i], s->idma_state[i],
- s->idma_stalled[i]/HZ, debug0, debug11);
- t4_sge_decode_idma_state(adap, s->idma_state[i]);
- }
+ t4_idma_monitor(adap, &s->idma_monitor, HZ, RX_QCHECK_PERIOD);
+done:
mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD);
}
@@ -2437,9 +2383,12 @@ static void __iomem *bar2_address(struct adapter *adapter,
return adapter->bar2 + bar2_qoffset;
}
+/* @intr_idx: MSI/MSI-X vector if >=0, -(absolute qid + 1) if < 0
+ * @cong: < 0 -> no congestion feedback, >= 0 -> congestion channel map
+ */
int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
struct net_device *dev, int intr_idx,
- struct sge_fl *fl, rspq_handler_t hnd)
+ struct sge_fl *fl, rspq_handler_t hnd, int cong)
{
int ret, flsz = 0;
struct fw_iq_cmd c;
@@ -2471,8 +2420,19 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
FW_IQ_CMD_IQESIZE_V(ilog2(iq->iqe_len) - 4));
c.iqsize = htons(iq->size);
c.iqaddr = cpu_to_be64(iq->phys_addr);
+ if (cong >= 0)
+ c.iqns_to_fl0congen = htonl(FW_IQ_CMD_IQFLINTCONGEN_F);
if (fl) {
+ /* Allocate the ring for the hardware free list (with space
+ * for its status page) along with the associated software
+ * descriptor ring. The free list size needs to be a multiple
+ * of the Egress Queue Unit and at least 2 Egress Units larger
+ * than the SGE's Egress Congrestion Threshold
+ * (fl_starve_thres - 1).
+ */
+ if (fl->size < s->fl_starve_thres - 1 + 2 * 8)
+ fl->size = s->fl_starve_thres - 1 + 2 * 8;
fl->size = roundup(fl->size, 8);
fl->desc = alloc_ring(adap->pdev_dev, fl->size, sizeof(__be64),
sizeof(struct rx_sw_desc), &fl->addr,
@@ -2481,10 +2441,15 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
goto fl_nomem;
flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
- c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN_F |
- FW_IQ_CMD_FL0FETCHRO_F |
- FW_IQ_CMD_FL0DATARO_F |
- FW_IQ_CMD_FL0PADEN_F);
+ c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F |
+ FW_IQ_CMD_FL0FETCHRO_F |
+ FW_IQ_CMD_FL0DATARO_F |
+ FW_IQ_CMD_FL0PADEN_F);
+ if (cong >= 0)
+ c.iqns_to_fl0congen |=
+ htonl(FW_IQ_CMD_FL0CNGCHMAP_V(cong) |
+ FW_IQ_CMD_FL0CONGCIF_F |
+ FW_IQ_CMD_FL0CONGEN_F);
c.fl0dcaen_to_fl0cidxfthresh = htons(FW_IQ_CMD_FL0FBMIN_V(2) |
FW_IQ_CMD_FL0FBMAX_V(3));
c.fl0size = htons(flsz);
@@ -2532,6 +2497,41 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
&fl->bar2_qid);
refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL);
}
+
+ /* For T5 and later we attempt to set up the Congestion Manager values
+ * of the new RX Ethernet Queue. This should really be handled by
+ * firmware because it's more complex than any host driver wants to
+ * get involved with and it's different per chip and this is almost
+ * certainly wrong. Firmware would be wrong as well, but it would be
+ * a lot easier to fix in one place ... For now we do something very
+ * simple (and hopefully less wrong).
+ */
+ if (!is_t4(adap->params.chip) && cong >= 0) {
+ u32 param, val;
+ int i;
+
+ param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
+ FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
+ FW_PARAMS_PARAM_YZ_V(iq->cntxt_id));
+ if (cong == 0) {
+ val = CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_QUEUE_X);
+ } else {
+ val =
+ CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_CHANNEL_X);
+ for (i = 0; i < 4; i++) {
+ if (cong & (1 << i))
+ val |=
+ CONMCTXT_CNGCHMAP_V(1 << (i << 2));
+ }
+ }
+ ret = t4_set_params(adap, adap->mbox, adap->fn, 0, 1,
+ &param, &val);
+ if (ret)
+ dev_warn(adap->pdev_dev, "Failed to set Congestion"
+ " Manager Context for Ingress Queue %d: %d\n",
+ iq->cntxt_id, -ret);
+ }
+
return 0;
fl_nomem:
@@ -2637,7 +2637,7 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
txq->q.desc = alloc_ring(adap->pdev_dev, nentries,
sizeof(struct tx_desc), 0, &txq->q.phys_addr,
- NULL, 0, NUMA_NO_NODE);
+ NULL, 0, dev_to_node(adap->pdev_dev));
if (!txq->q.desc)
return -ENOMEM;
@@ -3067,11 +3067,11 @@ int t4_sge_init(struct adapter *adap)
egress_threshold = EGRTHRESHOLDPACKING_G(sge_conm_ctrl);
s->fl_starve_thres = 2*egress_threshold + 1;
+ t4_idma_monitor_init(adap, &s->idma_monitor);
+
setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap);
setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap);
- s->idma_1s_thresh = core_ticks_per_usec(adap) * 1000000; /* 1 s */
- s->idma_stalled[0] = 0;
- s->idma_stalled[1] = 0;
+
spin_lock_init(&s->intrq_lock);
return 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index e8578a742f2a..6164ef3e1376 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -3401,7 +3401,7 @@ void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[])
}
/**
- * get_mps_bg_map - return the buffer groups associated with a port
+ * t4_get_mps_bg_map - return the buffer groups associated with a port
* @adap: the adapter
* @idx: the port index
*
@@ -3409,7 +3409,7 @@ void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[])
* with the given port. Bit i is set if buffer group i is used by the
* port.
*/
-static unsigned int get_mps_bg_map(struct adapter *adap, int idx)
+unsigned int t4_get_mps_bg_map(struct adapter *adap, int idx)
{
u32 n = NUMPORTS_G(t4_read_reg(adap, MPS_CMN_CTL_A));
@@ -3460,7 +3460,7 @@ const char *t4_get_port_type_description(enum fw_port_type port_type)
*/
void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p)
{
- u32 bgmap = get_mps_bg_map(adap, idx);
+ u32 bgmap = t4_get_mps_bg_map(adap, idx);
#define GET_STAT(name) \
t4_read_reg64(adap, \
@@ -5717,3 +5717,130 @@ void t4_tp_read_la(struct adapter *adap, u64 *la_buf, unsigned int *wrptr)
t4_write_reg(adap, TP_DBG_LA_CONFIG_A,
cfg | adap->params.tp.la_mask);
}
+
+/* SGE Hung Ingress DMA Warning Threshold time and Warning Repeat Rate (in
+ * seconds). If we find one of the SGE Ingress DMA State Machines in the same
+ * state for more than the Warning Threshold then we'll issue a warning about
+ * a potential hang. We'll repeat the warning as the SGE Ingress DMA Channel
+ * appears to be hung every Warning Repeat second till the situation clears.
+ * If the situation clears, we'll note that as well.
+ */
+#define SGE_IDMA_WARN_THRESH 1
+#define SGE_IDMA_WARN_REPEAT 300
+
+/**
+ * t4_idma_monitor_init - initialize SGE Ingress DMA Monitor
+ * @adapter: the adapter
+ * @idma: the adapter IDMA Monitor state
+ *
+ * Initialize the state of an SGE Ingress DMA Monitor.
+ */
+void t4_idma_monitor_init(struct adapter *adapter,
+ struct sge_idma_monitor_state *idma)
+{
+ /* Initialize the state variables for detecting an SGE Ingress DMA
+ * hang. The SGE has internal counters which count up on each clock
+ * tick whenever the SGE finds its Ingress DMA State Engines in the
+ * same state they were on the previous clock tick. The clock used is
+ * the Core Clock so we have a limit on the maximum "time" they can
+ * record; typically a very small number of seconds. For instance,
+ * with a 600MHz Core Clock, we can only count up to a bit more than
+ * 7s. So we'll synthesize a larger counter in order to not run the
+ * risk of having the "timers" overflow and give us the flexibility to
+ * maintain a Hung SGE State Machine of our own which operates across
+ * a longer time frame.
+ */
+ idma->idma_1s_thresh = core_ticks_per_usec(adapter) * 1000000; /* 1s */
+ idma->idma_stalled[0] = 0;
+ idma->idma_stalled[1] = 0;
+}
+
+/**
+ * t4_idma_monitor - monitor SGE Ingress DMA state
+ * @adapter: the adapter
+ * @idma: the adapter IDMA Monitor state
+ * @hz: number of ticks/second
+ * @ticks: number of ticks since the last IDMA Monitor call
+ */
+void t4_idma_monitor(struct adapter *adapter,
+ struct sge_idma_monitor_state *idma,
+ int hz, int ticks)
+{
+ int i, idma_same_state_cnt[2];
+
+ /* Read the SGE Debug Ingress DMA Same State Count registers. These
+ * are counters inside the SGE which count up on each clock when the
+ * SGE finds its Ingress DMA State Engines in the same states they
+ * were in the previous clock. The counters will peg out at
+ * 0xffffffff without wrapping around so once they pass the 1s
+ * threshold they'll stay above that till the IDMA state changes.
+ */
+ t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 13);
+ idma_same_state_cnt[0] = t4_read_reg(adapter, SGE_DEBUG_DATA_HIGH_A);
+ idma_same_state_cnt[1] = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A);
+
+ for (i = 0; i < 2; i++) {
+ u32 debug0, debug11;
+
+ /* If the Ingress DMA Same State Counter ("timer") is less
+ * than 1s, then we can reset our synthesized Stall Timer and
+ * continue. If we have previously emitted warnings about a
+ * potential stalled Ingress Queue, issue a note indicating
+ * that the Ingress Queue has resumed forward progress.
+ */
+ if (idma_same_state_cnt[i] < idma->idma_1s_thresh) {
+ if (idma->idma_stalled[i] >= SGE_IDMA_WARN_THRESH * hz)
+ dev_warn(adapter->pdev_dev, "SGE idma%d, queue %u, "
+ "resumed after %d seconds\n",
+ i, idma->idma_qid[i],
+ idma->idma_stalled[i] / hz);
+ idma->idma_stalled[i] = 0;
+ continue;
+ }
+
+ /* Synthesize an SGE Ingress DMA Same State Timer in the Hz
+ * domain. The first time we get here it'll be because we
+ * passed the 1s Threshold; each additional time it'll be
+ * because the RX Timer Callback is being fired on its regular
+ * schedule.
+ *
+ * If the stall is below our Potential Hung Ingress Queue
+ * Warning Threshold, continue.
+ */
+ if (idma->idma_stalled[i] == 0) {
+ idma->idma_stalled[i] = hz;
+ idma->idma_warn[i] = 0;
+ } else {
+ idma->idma_stalled[i] += ticks;
+ idma->idma_warn[i] -= ticks;
+ }
+
+ if (idma->idma_stalled[i] < SGE_IDMA_WARN_THRESH * hz)
+ continue;
+
+ /* We'll issue a warning every SGE_IDMA_WARN_REPEAT seconds.
+ */
+ if (idma->idma_warn[i] > 0)
+ continue;
+ idma->idma_warn[i] = SGE_IDMA_WARN_REPEAT * hz;
+
+ /* Read and save the SGE IDMA State and Queue ID information.
+ * We do this every time in case it changes across time ...
+ * can't be too careful ...
+ */
+ t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 0);
+ debug0 = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A);
+ idma->idma_state[i] = (debug0 >> (i * 9)) & 0x3f;
+
+ t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 11);
+ debug11 = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A);
+ idma->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff;
+
+ dev_warn(adapter->pdev_dev, "SGE idma%u, queue %u, potentially stuck in "
+ "state %u for %d seconds (debug0=%#x, debug11=%#x)\n",
+ i, idma->idma_qid[i], idma->idma_state[i],
+ idma->idma_stalled[i] / hz,
+ debug0, debug11);
+ t4_sge_decode_idma_state(adapter, idma->idma_state[i]);
+ }
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h
index 19b2dcf6acde..c4d9952f814b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h
@@ -61,6 +61,15 @@
#define SGE_TIMERREGS 6
#define TIMERREG_COUNTER0_X 0
+/* Congestion Manager Definitions.
+ */
+#define CONMCTXT_CNGTPMODE_S 19
+#define CONMCTXT_CNGTPMODE_V(x) ((x) << CONMCTXT_CNGTPMODE_S)
+#define CONMCTXT_CNGCHMAP_S 0
+#define CONMCTXT_CNGCHMAP_V(x) ((x) << CONMCTXT_CNGCHMAP_S)
+#define CONMCTXT_CNGTPMODE_CHANNEL_X 2
+#define CONMCTXT_CNGTPMODE_QUEUE_X 1
+
/* T5 and later support a new BAR2-based doorbell mechanism for Egress Queues.
* The User Doorbells are each 128 bytes in length with a Simple Doorbell at
* offsets 8x and a Write Combining single 64-byte Egress Queue Unit
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 03fbfd1fb3df..d75fca7695eb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1123,6 +1123,7 @@ enum fw_params_param_dmaq {
FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL = 0x11,
FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH = 0x12,
FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH = 0x13,
+ FW_PARAMS_PARAM_DMAQ_CONM_CTXT = 0x20,
};
enum fw_params_param_dev_diag {
@@ -1377,6 +1378,7 @@ struct fw_iq_cmd {
#define FW_IQ_CMD_IQFLINTCONGEN_S 27
#define FW_IQ_CMD_IQFLINTCONGEN_V(x) ((x) << FW_IQ_CMD_IQFLINTCONGEN_S)
+#define FW_IQ_CMD_IQFLINTCONGEN_F FW_IQ_CMD_IQFLINTCONGEN_V(1U)
#define FW_IQ_CMD_IQFLINTISCSIC_S 26
#define FW_IQ_CMD_IQFLINTISCSIC_V(x) ((x) << FW_IQ_CMD_IQFLINTISCSIC_S)
@@ -1399,6 +1401,7 @@ struct fw_iq_cmd {
#define FW_IQ_CMD_FL0CONGCIF_S 11
#define FW_IQ_CMD_FL0CONGCIF_V(x) ((x) << FW_IQ_CMD_FL0CONGCIF_S)
+#define FW_IQ_CMD_FL0CONGCIF_F FW_IQ_CMD_FL0CONGCIF_V(1U)
#define FW_IQ_CMD_FL0ONCHIP_S 10
#define FW_IQ_CMD_FL0ONCHIP_V(x) ((x) << FW_IQ_CMD_FL0ONCHIP_S)
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 482f6de6817d..98cd47c373c5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -1160,7 +1160,7 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev)
{
u32 wr_mid;
u64 cntrl, *end;
- int qidx, credits;
+ int qidx, credits, max_pkt_len;
unsigned int flits, ndesc;
struct adapter *adapter;
struct sge_eth_txq *txq;
@@ -1183,6 +1183,13 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev)
if (unlikely(skb->len < fw_hdr_copy_len))
goto out_free;
+ /* Discard the packet if the length is greater than mtu */
+ max_pkt_len = ETH_HLEN + dev->mtu;
+ if (skb_vlan_tag_present(skb))
+ max_pkt_len += VLAN_HLEN;
+ if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len)))
+ goto out_free;
+
/*
* Figure out which TX Queue we're going to use.
*/
@@ -2243,8 +2250,12 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq,
* Allocate the ring for the hardware free list (with space
* for its status page) along with the associated software
* descriptor ring. The free list size needs to be a multiple
- * of the Egress Queue Unit.
+ * of the Egress Queue Unit and at least 2 Egress Units larger
+ * than the SGE's Egress Congrestion Threshold
+ * (fl_starve_thres - 1).
*/
+ if (fl->size < s->fl_starve_thres - 1 + 2 * FL_PER_EQ_UNIT)
+ fl->size = s->fl_starve_thres - 1 + 2 * FL_PER_EQ_UNIT;
fl->size = roundup(fl->size, FL_PER_EQ_UNIT);
fl->desc = alloc_ring(adapter->pdev_dev, fl->size,
sizeof(__be64), sizeof(struct rx_sw_desc),