diff options
75 files changed, 791 insertions, 549 deletions
diff --git a/Documentation/devicetree/bindings/net/micrel.txt b/Documentation/devicetree/bindings/net/micrel.txt new file mode 100644 index 000000000000..98a3e61f9ee8 --- /dev/null +++ b/Documentation/devicetree/bindings/net/micrel.txt @@ -0,0 +1,18 @@ +Micrel PHY properties. + +These properties cover the base properties Micrel PHYs. + +Optional properties: + + - micrel,led-mode : LED mode value to set for PHYs with configurable LEDs. + + Configure the LED mode with single value. The list of PHYs and + the bits that are currently supported: + + KSZ8001: register 0x1e, bits 15..14 + KSZ8041: register 0x1e, bits 15..14 + KSZ8021: register 0x1f, bits 5..4 + KSZ8031: register 0x1f, bits 5..4 + KSZ8051: register 0x1f, bits 5..4 + + See the respective PHY datasheet for the mode values. diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index b41c9481b67b..f43e1c13b300 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -736,8 +736,8 @@ static void process_txdone_queue (struct fs_dev *dev, struct queue *q) skb = td->skb; if (skb == FS_VCC (ATM_SKB(skb)->vcc)->last_skb) { - wake_up_interruptible (& FS_VCC (ATM_SKB(skb)->vcc)->close_wait); FS_VCC (ATM_SKB(skb)->vcc)->last_skb = NULL; + wake_up_interruptible (& FS_VCC (ATM_SKB(skb)->vcc)->close_wait); } td->dev->ntxpckts--; @@ -1123,7 +1123,7 @@ static void fs_close(struct atm_vcc *atm_vcc) this sleep_on, we'll lose any reference to these packets. Memory leak! On the other hand, it's awfully convenient that we can abort a "close" that is taking too long. Maybe just use non-interruptible sleep on? -- REW */ - interruptible_sleep_on (& vcc->close_wait); + wait_event_interruptible(vcc->close_wait, !vcc->last_skb); } txtp = &atm_vcc->qos.txtp; diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 13ed54cf2c31..9988ac98b6d8 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -1739,10 +1739,10 @@ static int push_scqe(ns_dev * card, vc_map * vc, scq_info * scq, ns_scqe * tbd, } scq->full = 1; - spin_unlock_irqrestore(&scq->lock, flags); - interruptible_sleep_on_timeout(&scq->scqfull_waitq, - SCQFULL_TIMEOUT); - spin_lock_irqsave(&scq->lock, flags); + wait_event_interruptible_lock_irq_timeout(scq->scqfull_waitq, + scq->tail != scq->next, + scq->lock, + SCQFULL_TIMEOUT); if (scq->full) { spin_unlock_irqrestore(&scq->lock, flags); @@ -1789,10 +1789,10 @@ static int push_scqe(ns_dev * card, vc_map * vc, scq_info * scq, ns_scqe * tbd, scq->full = 1; if (has_run++) break; - spin_unlock_irqrestore(&scq->lock, flags); - interruptible_sleep_on_timeout(&scq->scqfull_waitq, - SCQFULL_TIMEOUT); - spin_lock_irqsave(&scq->lock, flags); + wait_event_interruptible_lock_irq_timeout(scq->scqfull_waitq, + scq->tail != scq->next, + scq->lock, + SCQFULL_TIMEOUT); } if (!scq->full) { diff --git a/drivers/isdn/divert/divert_procfs.c b/drivers/isdn/divert/divert_procfs.c index fb4f1bac0133..1c5dc345e7c5 100644 --- a/drivers/isdn/divert/divert_procfs.c +++ b/drivers/isdn/divert/divert_procfs.c @@ -86,12 +86,13 @@ isdn_divert_read(struct file *file, char __user *buf, size_t count, loff_t *off) struct divert_info *inf; int len; - if (!*((struct divert_info **) file->private_data)) { + if (!(inf = *((struct divert_info **) file->private_data))) { if (file->f_flags & O_NONBLOCK) return -EAGAIN; - interruptible_sleep_on(&(rd_queue)); + wait_event_interruptible(rd_queue, (inf = + *((struct divert_info **) file->private_data))); } - if (!(inf = *((struct divert_info **) file->private_data))) + if (!inf) return (0); inf->usage_cnt--; /* new usage count */ diff --git a/drivers/isdn/hisax/elsa.c b/drivers/isdn/hisax/elsa.c index 2be1c8a3bb5f..d8ef64da26f1 100644 --- a/drivers/isdn/hisax/elsa.c +++ b/drivers/isdn/hisax/elsa.c @@ -509,7 +509,8 @@ static void set_arcofi(struct IsdnCardState *cs, int bc) { cs->dc.isac.arcofi_bc = bc; arcofi_fsm(cs, ARCOFI_START, &ARCOFI_COP_5); - interruptible_sleep_on(&cs->dc.isac.arcofi_wait); + wait_event_interruptible(cs->dc.isac.arcofi_wait, + cs->dc.isac.arcofi_state == ARCOFI_NOP); } static int @@ -528,7 +529,8 @@ check_arcofi(struct IsdnCardState *cs) } cs->dc.isac.arcofi_bc = 0; arcofi_fsm(cs, ARCOFI_START, &ARCOFI_VERSION); - interruptible_sleep_on(&cs->dc.isac.arcofi_wait); + wait_event_interruptible(cs->dc.isac.arcofi_wait, + cs->dc.isac.arcofi_state == ARCOFI_NOP); if (!test_and_clear_bit(FLG_ARCOFI_ERROR, &cs->HW_Flags)) { debugl1(cs, "Arcofi response received %d bytes", cs->dc.isac.mon_rxp); p = cs->dc.isac.mon_rx; @@ -595,7 +597,8 @@ check_arcofi(struct IsdnCardState *cs) Elsa_Types[cs->subtyp], cs->hw.elsa.base + 8); arcofi_fsm(cs, ARCOFI_START, &ARCOFI_XOP_0); - interruptible_sleep_on(&cs->dc.isac.arcofi_wait); + wait_event_interruptible(cs->dc.isac.arcofi_wait, + cs->dc.isac.arcofi_state == ARCOFI_NOP); return (1); } return (0); diff --git a/drivers/isdn/hisax/elsa_ser.c b/drivers/isdn/hisax/elsa_ser.c index 3f84dd8f1757..a2a358c1dc8e 100644 --- a/drivers/isdn/hisax/elsa_ser.c +++ b/drivers/isdn/hisax/elsa_ser.c @@ -573,7 +573,8 @@ modem_l2l1(struct PStack *st, int pr, void *arg) test_and_clear_bit(BC_FLG_ACTIV, &bcs->Flag); bcs->cs->dc.isac.arcofi_bc = st->l1.bc; arcofi_fsm(bcs->cs, ARCOFI_START, &ARCOFI_XOP_0); - interruptible_sleep_on(&bcs->cs->dc.isac.arcofi_wait); + wait_event_interruptible(bcs->cs->dc.isac.arcofi_wait, + bcs->cs->dc.isac.arcofi_state == ARCOFI_NOP); bcs->cs->hw.elsa.MFlag = 1; } else { printk(KERN_WARNING "ElsaSer: unknown pr %x\n", pr); diff --git a/drivers/isdn/hysdn/hysdn_proclog.c b/drivers/isdn/hysdn/hysdn_proclog.c index b61e8d5e84ad..7b5fd8fb1761 100644 --- a/drivers/isdn/hysdn/hysdn_proclog.c +++ b/drivers/isdn/hysdn/hysdn_proclog.c @@ -175,14 +175,15 @@ hysdn_log_read(struct file *file, char __user *buf, size_t count, loff_t *off) int len; hysdn_card *card = PDE_DATA(file_inode(file)); - if (!*((struct log_data **) file->private_data)) { + if (!(inf = *((struct log_data **) file->private_data))) { struct procdata *pd = card->proclog; if (file->f_flags & O_NONBLOCK) return (-EAGAIN); - interruptible_sleep_on(&(pd->rd_queue)); + wait_event_interruptible(pd->rd_queue, (inf = + *((struct log_data **) file->private_data))); } - if (!(inf = *((struct log_data **) file->private_data))) + if (!inf) return (0); inf->usage_cnt--; /* new usage count */ diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c index 9bb12ba3191f..130f21643154 100644 --- a/drivers/isdn/i4l/isdn_common.c +++ b/drivers/isdn/i4l/isdn_common.c @@ -777,7 +777,8 @@ isdn_readbchan(int di, int channel, u_char *buf, u_char *fp, int len, wait_queue return 0; if (skb_queue_empty(&dev->drv[di]->rpqueue[channel])) { if (sleep) - interruptible_sleep_on(sleep); + wait_event_interruptible(*sleep, + !skb_queue_empty(&dev->drv[di]->rpqueue[channel])); else return 0; } @@ -1072,7 +1073,8 @@ isdn_read(struct file *file, char __user *buf, size_t count, loff_t *off) retval = -EAGAIN; goto out; } - interruptible_sleep_on(&(dev->info_waitq)); + wait_event_interruptible(dev->info_waitq, + file->private_data); } p = isdn_statstr(); file->private_data = NULL; @@ -1128,7 +1130,8 @@ isdn_read(struct file *file, char __user *buf, size_t count, loff_t *off) retval = -EAGAIN; goto out; } - interruptible_sleep_on(&(dev->drv[drvidx]->st_waitq)); + wait_event_interruptible(dev->drv[drvidx]->st_waitq, + dev->drv[drvidx]->stavail); } if (dev->drv[drvidx]->interface->readstat) { if (count > dev->drv[drvidx]->stavail) @@ -1188,8 +1191,8 @@ isdn_write(struct file *file, const char __user *buf, size_t count, loff_t *off) goto out; } chidx = isdn_minor2chan(minor); - while ((retval = isdn_writebuf_stub(drvidx, chidx, buf, count)) == 0) - interruptible_sleep_on(&dev->drv[drvidx]->snd_waitq[chidx]); + wait_event_interruptible(dev->drv[drvidx]->snd_waitq[chidx], + (retval = isdn_writebuf_stub(drvidx, chidx, buf, count))); goto out; } if (minor <= ISDN_MINOR_CTRLMAX) { diff --git a/drivers/isdn/pcbit/drv.c b/drivers/isdn/pcbit/drv.c index 1eaf62273903..f02cc506fbfa 100644 --- a/drivers/isdn/pcbit/drv.c +++ b/drivers/isdn/pcbit/drv.c @@ -796,6 +796,7 @@ static void set_running_timeout(unsigned long ptr) #endif dev = (struct pcbit_dev *) ptr; + dev->l2_state = L2_DOWN; wake_up_interruptible(&dev->set_running_wq); } @@ -818,7 +819,8 @@ static int set_protocol_running(struct pcbit_dev *dev) add_timer(&dev->set_running_timer); - interruptible_sleep_on(&dev->set_running_wq); + wait_event(dev->set_running_wq, dev->l2_state == L2_RUNNING || + dev->l2_state == L2_DOWN); del_timer(&dev->set_running_timer); @@ -842,8 +844,6 @@ static int set_protocol_running(struct pcbit_dev *dev) printk(KERN_DEBUG "pcbit: initialization failed\n"); printk(KERN_DEBUG "pcbit: firmware not loaded\n"); - dev->l2_state = L2_DOWN; - #ifdef DEBUG printk(KERN_DEBUG "Bank3 = %02x\n", readb(dev->sh_mem + BANK3)); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 5ee13af78e53..26bc25bd465d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -61,10 +61,14 @@ static void bnx2x_add_all_napi(struct bnx2x *bp) static int bnx2x_calc_num_queues(struct bnx2x *bp) { - return bnx2x_num_queues ? - min_t(int, bnx2x_num_queues, BNX2X_MAX_QUEUES(bp)) : - min_t(int, netif_get_num_default_rss_queues(), - BNX2X_MAX_QUEUES(bp)); + int nq = bnx2x_num_queues ? : netif_get_num_default_rss_queues(); + + /* Reduce memory usage in kdump environment by using only one queue */ + if (reset_devices) + nq = 1; + + nq = clamp(nq, 1, BNX2X_MAX_QUEUES(bp)); + return nq; } /** @@ -2228,8 +2232,10 @@ static int bnx2x_alloc_fw_stats_mem(struct bnx2x *bp) sizeof(struct per_queue_stats) * num_queue_stats + sizeof(struct stats_counter); - BNX2X_PCI_ALLOC(bp->fw_stats, &bp->fw_stats_mapping, - bp->fw_stats_data_sz + bp->fw_stats_req_sz); + bp->fw_stats = BNX2X_PCI_ALLOC(&bp->fw_stats_mapping, + bp->fw_stats_data_sz + bp->fw_stats_req_sz); + if (!bp->fw_stats) + goto alloc_mem_err; /* Set shortcuts */ bp->fw_stats_req = (struct bnx2x_fw_stats_req *)bp->fw_stats; @@ -4357,14 +4363,17 @@ static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index) if (!IS_FCOE_IDX(index)) { /* status blocks */ - if (!CHIP_IS_E1x(bp)) - BNX2X_PCI_ALLOC(sb->e2_sb, - &bnx2x_fp(bp, index, status_blk_mapping), - sizeof(struct host_hc_status_block_e2)); - else - BNX2X_PCI_ALLOC(sb->e1x_sb, - &bnx2x_fp(bp, index, status_blk_mapping), - sizeof(struct host_hc_status_block_e1x)); + if (!CHIP_IS_E1x(bp)) { + sb->e2_sb = BNX2X_PCI_ALLOC(&bnx2x_fp(bp, index, status_blk_mapping), + sizeof(struct host_hc_status_block_e2)); + if (!sb->e2_sb) + goto alloc_mem_err; + } else { + sb->e1x_sb = BNX2X_PCI_ALLOC(&bnx2x_fp(bp, index, status_blk_mapping), + sizeof(struct host_hc_status_block_e1x)); + if (!sb->e1x_sb) + goto alloc_mem_err; + } } /* FCoE Queue uses Default SB and doesn't ACK the SB, thus no need to @@ -4383,35 +4392,49 @@ static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index) "allocating tx memory of fp %d cos %d\n", index, cos); - BNX2X_ALLOC(txdata->tx_buf_ring, - sizeof(struct sw_tx_bd) * NUM_TX_BD); - BNX2X_PCI_ALLOC(txdata->tx_desc_ring, - &txdata->tx_desc_mapping, - sizeof(union eth_tx_bd_types) * NUM_TX_BD); + txdata->tx_buf_ring = kcalloc(NUM_TX_BD, + sizeof(struct sw_tx_bd), + GFP_KERNEL); + if (!txdata->tx_buf_ring) + goto alloc_mem_err; + txdata->tx_desc_ring = BNX2X_PCI_ALLOC(&txdata->tx_desc_mapping, + sizeof(union eth_tx_bd_types) * NUM_TX_BD); + if (!txdata->tx_desc_ring) + goto alloc_mem_err; } } /* Rx */ if (!skip_rx_queue(bp, index)) { /* fastpath rx rings: rx_buf rx_desc rx_comp */ - BNX2X_ALLOC(bnx2x_fp(bp, index, rx_buf_ring), - sizeof(struct sw_rx_bd) * NUM_RX_BD); - BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_desc_ring), - &bnx2x_fp(bp, index, rx_desc_mapping), - sizeof(struct eth_rx_bd) * NUM_RX_BD); + bnx2x_fp(bp, index, rx_buf_ring) = + kcalloc(NUM_RX_BD, sizeof(struct sw_rx_bd), GFP_KERNEL); + if (!bnx2x_fp(bp, index, rx_buf_ring)) + goto alloc_mem_err; + bnx2x_fp(bp, index, rx_desc_ring) = + BNX2X_PCI_ALLOC(&bnx2x_fp(bp, index, rx_desc_mapping), + sizeof(struct eth_rx_bd) * NUM_RX_BD); + if (!bnx2x_fp(bp, index, rx_desc_ring)) + goto alloc_mem_err; /* Seed all CQEs by 1s */ - BNX2X_PCI_FALLOC(bnx2x_fp(bp, index, rx_comp_ring), - &bnx2x_fp(bp, index, rx_comp_mapping), - sizeof(struct eth_fast_path_rx_cqe) * - NUM_RCQ_BD); + bnx2x_fp(bp, index, rx_comp_ring) = + BNX2X_PCI_FALLOC(&bnx2x_fp(bp, index, rx_comp_mapping), + sizeof(struct eth_fast_path_rx_cqe) * NUM_RCQ_BD); + if (!bnx2x_fp(bp, index, rx_comp_ring)) + goto alloc_mem_err; /* SGE ring */ - BNX2X_ALLOC(bnx2x_fp(bp, index, rx_page_ring), - sizeof(struct sw_rx_page) * NUM_RX_SGE); - BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_sge_ring), - &bnx2x_fp(bp, index, rx_sge_mapping), - BCM_PAGE_SIZE * NUM_RX_SGE_PAGES); + bnx2x_fp(bp, index, rx_page_ring) = + kcalloc(NUM_RX_SGE, sizeof(struct sw_rx_page), + GFP_KERNEL); + if (!bnx2x_fp(bp, index, rx_page_ring)) + goto alloc_mem_err; + bnx2x_fp(bp, index, rx_sge_ring) = + BNX2X_PCI_ALLOC(&bnx2x_fp(bp, index, rx_sge_mapping), + BCM_PAGE_SIZE * NUM_RX_SGE_PAGES); + if (!bnx2x_fp(bp, index, rx_sge_ring)) + goto alloc_mem_err; /* RX BD ring */ bnx2x_set_next_page_rx_bd(fp); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index ec02b15fba32..05f4f5f52635 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -47,31 +47,26 @@ extern int bnx2x_num_queues; } \ } while (0) -#define BNX2X_PCI_ALLOC(x, y, size) \ - do { \ - x = dma_zalloc_coherent(&bp->pdev->dev, size, y, GFP_KERNEL); \ - if (x == NULL) \ - goto alloc_mem_err; \ - DP(NETIF_MSG_HW, "BNX2X_PCI_ALLOC: Physical %Lx Virtual %p\n", \ - (unsigned long long)(*y), x); \ - } while (0) - -#define BNX2X_PCI_FALLOC(x, y, size) \ - do { \ - x = dma_alloc_coherent(&bp->pdev->dev, size, y, GFP_KERNEL); \ - if (x == NULL) \ - goto alloc_mem_err; \ - memset((void *)x, 0xFFFFFFFF, size); \ - DP(NETIF_MSG_HW, "BNX2X_PCI_FALLOC: Physical %Lx Virtual %p\n",\ - (unsigned long long)(*y), x); \ - } while (0) - -#define BNX2X_ALLOC(x, size) \ - do { \ - x = kzalloc(size, GFP_KERNEL); \ - if (x == NULL) \ - goto alloc_mem_err; \ - } while (0) +#define BNX2X_PCI_ALLOC(y, size) \ +({ \ + void *x = dma_zalloc_coherent(&bp->pdev->dev, size, y, GFP_KERNEL); \ + if (x) \ + DP(NETIF_MSG_HW, \ + "BNX2X_PCI_ALLOC: Physical %Lx Virtual %p\n", \ + (unsigned long long)(*y), x); \ + x; \ +}) +#define BNX2X_PCI_FALLOC(y, size) \ +({ \ + void *x = dma_alloc_coherent(&bp->pdev->dev, size, y, GFP_KERNEL); \ + if (x) { \ + memset(x, 0xff, size); \ + DP(NETIF_MSG_HW, \ + "BNX2X_PCI_FALLOC: Physical %Lx Virtual %p\n", \ + (unsigned long long)(*y), x); \ + } \ + x; \ +}) /*********************** Interfaces **************************** * Functions that need to be implemented by each driver version diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 84439152e499..5e74599b05c7 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -8001,19 +8001,25 @@ void bnx2x_free_mem(struct bnx2x *bp) int bnx2x_alloc_mem_cnic(struct bnx2x *bp) { - if (!CHIP_IS_E1x(bp)) + if (!CHIP_IS_E1x(bp)) { /* size = the status block + ramrod buffers */ - BNX2X_PCI_ALLOC(bp->cnic_sb.e2_sb, &bp->cnic_sb_mapping, - sizeof(struct host_hc_status_block_e2)); - else - BNX2X_PCI_ALLOC(bp->cnic_sb.e1x_sb, - &bp->cnic_sb_mapping, - sizeof(struct - host_hc_status_block_e1x)); + bp->cnic_sb.e2_sb = BNX2X_PCI_ALLOC(&bp->cnic_sb_mapping, + sizeof(struct host_hc_status_block_e2)); + if (!bp->cnic_sb.e2_sb) + goto alloc_mem_err; + } else { + bp->cnic_sb.e1x_sb = BNX2X_PCI_ALLOC(&bp->cnic_sb_mapping, + sizeof(struct host_hc_status_block_e1x)); + if (!bp->cnic_sb.e1x_sb) + goto alloc_mem_err; + } - if (CONFIGURE_NIC_MODE(bp) && !bp->t2) + if (CONFIGURE_NIC_MODE(bp) && !bp->t2) { /* allocate searcher T2 table, as it wasn't allocated before */ - BNX2X_PCI_ALLOC(bp->t2, &bp->t2_mapping, SRC_T2_SZ); + bp->t2 = BNX2X_PCI_ALLOC(&bp->t2_mapping, SRC_T2_SZ); + if (!bp->t2) + goto alloc_mem_err; + } /* write address to which L5 should insert its values */ bp->cnic_eth_dev.addr_drv_info_to_mcp = @@ -8034,15 +8040,22 @@ int bnx2x_alloc_mem(struct bnx2x *bp) { int i, allocated, context_size; - if (!CONFIGURE_NIC_MODE(bp) && !bp->t2) + if (!CONFIGURE_NIC_MODE(bp) && !bp->t2) { /* allocate searcher T2 table */ - BNX2X_PCI_ALLOC(bp->t2, &bp->t2_mapping, SRC_T2_SZ); + bp->t2 = BNX2X_PCI_ALLOC(&bp->t2_mapping, SRC_T2_SZ); + if (!bp->t2) + goto alloc_mem_err; + } - BNX2X_PCI_ALLOC(bp->def_status_blk, &bp->def_status_blk_mapping, - sizeof(struct host_sp_status_block)); + bp->def_status_blk = BNX2X_PCI_ALLOC(&bp->def_status_blk_mapping, + sizeof(struct host_sp_status_block)); + if (!bp->def_status_blk) + goto alloc_mem_err; - BNX2X_PCI_ALLOC(bp->slowpath, &bp->slowpath_mapping, - sizeof(struct bnx2x_slowpath)); + bp->slowpath = BNX2X_PCI_ALLOC(&bp->slowpath_mapping, + sizeof(struct bnx2x_slowpath)); + if (!bp->slowpath) + goto alloc_mem_err; /* Allocate memory for CDU context: * This memory is allocated separately and not in the generic ILT @@ -8062,12 +8075,16 @@ int bnx2x_alloc_mem(struct bnx2x *bp) for (i = 0, allocated = 0; allocated < context_size; i++) { bp->context[i].size = min(CDU_ILT_PAGE_SZ, (context_size - allocated)); - BNX2X_PCI_ALLOC(bp->context[i].vcxt, - &bp->context[i].cxt_mapping, - bp->context[i].size); + bp->context[i].vcxt = BNX2X_PCI_ALLOC(&bp->context[i].cxt_mapping, + bp->context[i].size); + if (!bp->context[i].vcxt) + goto alloc_mem_err; allocated += bp->context[i].size; } - BNX2X_ALLOC(bp->ilt->lines, sizeof(struct ilt_line) * ILT_MAX_LINES); + bp->ilt->lines = kcalloc(ILT_MAX_LINES, sizeof(struct ilt_line), + GFP_KERNEL); + if (!bp->ilt->lines) + goto alloc_mem_err; if (bnx2x_ilt_mem_op(bp, ILT_MEMOP_ALLOC)) goto alloc_mem_err; @@ -8076,11 +8093,15 @@ int bnx2x_alloc_mem(struct bnx2x *bp) goto alloc_mem_err; /* Slow path ring */ - BNX2X_PCI_ALLOC(bp->spq, &bp->spq_mapping, BCM_PAGE_SIZE); + bp->spq = BNX2X_PCI_ALLOC(&bp->spq_mapping, BCM_PAGE_SIZE); + if (!bp->spq) + goto alloc_mem_err; /* EQ */ - BNX2X_PCI_ALLOC(bp->eq_ring, &bp->eq_mapping, - BCM_PAGE_SIZE * NUM_EQ_PAGES); + bp->eq_ring = BNX2X_PCI_ALLOC(&bp->eq_mapping, + BCM_PAGE_SIZE * NUM_EQ_PAGES); + if (!bp->eq_ring) + goto alloc_mem_err; return 0; @@ -11783,6 +11804,8 @@ static int bnx2x_init_bp(struct bnx2x *bp) bp->disable_tpa = disable_tpa; bp->disable_tpa |= IS_MF_STORAGE_SD(bp) || IS_MF_FCOE_AFEX(bp); + /* Reduce memory usage in kdump environment by disabling TPA */ + bp->disable_tpa |= reset_devices; /* Set TPA flags */ if (bp->disable_tpa) { @@ -11954,7 +11977,7 @@ static int bnx2x_init_mcast_macs_list(struct bnx2x *bp, { int mc_count = netdev_mc_count(bp->dev); struct bnx2x_mcast_list_elem *mc_mac = - kzalloc(sizeof(*mc_mac) * mc_count, GFP_ATOMIC); + kcalloc(mc_count, sizeof(*mc_mac), GFP_ATOMIC); struct netdev_hw_addr *ha; if (!mc_mac) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 98b53671a652..61e6f606d8a4 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -2120,7 +2120,9 @@ int bnx2x_iov_alloc_mem(struct bnx2x *bp) cxt->size = min_t(size_t, tot_size, CDU_ILT_PAGE_SZ); if (cxt->size) { - BNX2X_PCI_ALLOC(cxt->addr, &cxt->mapping, cxt->size); + cxt->addr = BNX2X_PCI_ALLOC(&cxt->mapping, cxt->size); + if (!cxt->addr) + goto alloc_mem_err; } else { cxt->addr = NULL; cxt->mapping = 0; @@ -2130,20 +2132,28 @@ int bnx2x_iov_alloc_mem(struct bnx2x *bp) /* allocate vfs ramrods dma memory - client_init and set_mac */ tot_size = BNX2X_NR_VIRTFN(bp) * sizeof(struct bnx2x_vf_sp); - BNX2X_PCI_ALLOC(BP_VFDB(bp)->sp_dma.addr, &BP_VFDB(bp)->sp_dma.mapping, - tot_size); + BP_VFDB(bp)->sp_dma.addr = BNX2X_PCI_ALLOC(&BP_VFDB(bp)->sp_dma.mapping, + tot_size); + if (!BP_VFDB(bp)->sp_dma.addr) + goto alloc_mem_err; BP_VFDB(bp)->sp_dma.size = tot_size; /* allocate mailboxes */ tot_size = BNX2X_NR_VIRTFN(bp) * MBX_MSG_ALIGNED_SIZE; - BNX2X_PCI_ALLOC(BP_VF_MBX_DMA(bp)->addr, &BP_VF_MBX_DMA(bp)->mapping, - tot_size); + BP_VF_MBX_DMA(bp)->addr = BNX2X_PCI_ALLOC(&BP_VF_MBX_DMA(bp)->mapping, + tot_size); + if (!BP_VF_MBX_DMA(bp)->addr) + goto alloc_mem_err; + BP_VF_MBX_DMA(bp)->size = tot_size; /* allocate local bulletin boards */ tot_size = BNX2X_NR_VIRTFN(bp) * BULLETIN_CONTENT_SIZE; - BNX2X_PCI_ALLOC(BP_VF_BULLETIN_DMA(bp)->addr, - &BP_VF_BULLETIN_DMA(bp)->mapping, tot_size); + BP_VF_BULLETIN_DMA(bp)->addr = BNX2X_PCI_ALLOC(&BP_VF_BULLETIN_DMA(bp)->mapping, + tot_size); + if (!BP_VF_BULLETIN_DMA(bp)->addr) + goto alloc_mem_err; + BP_VF_BULLETIN_DMA(bp)->size = tot_size; return 0; @@ -3825,12 +3835,16 @@ int bnx2x_vf_pci_alloc(struct bnx2x *bp) mutex_init(&bp->vf2pf_mutex); /* allocate vf2pf mailbox for vf to pf channel */ - BNX2X_PCI_ALLOC(bp->vf2pf_mbox, &bp->vf2pf_mbox_mapping, - sizeof(struct bnx2x_vf_mbx_msg)); + bp->vf2pf_mbox = BNX2X_PCI_ALLOC(&bp->vf2pf_mbox_mapping, + sizeof(struct bnx2x_vf_mbx_msg)); + if (!bp->vf2pf_mbox) + goto alloc_mem_err; /* allocate pf 2 vf bulletin board */ - BNX2X_PCI_ALLOC(bp->pf2vf_bulletin, &bp->pf2vf_bulletin_mapping, - sizeof(union pf_vf_bulletin)); + bp->pf2vf_bulletin = BNX2X_PCI_ALLOC(&bp->pf2vf_bulletin_mapping, + sizeof(union pf_vf_bulletin)); + if (!bp->pf2vf_bulletin) + goto alloc_mem_err; return 0; diff --git a/drivers/net/ethernet/intel/igb/Makefile b/drivers/net/ethernet/intel/igb/Makefile index f19700e285bb..5bcb2de75933 100644 --- a/drivers/net/ethernet/intel/igb/Makefile +++ b/drivers/net/ethernet/intel/igb/Makefile @@ -1,7 +1,7 @@ ################################################################################ # # Intel 82575 PCI-Express Ethernet Linux driver -# Copyright(c) 1999 - 2013 Intel Corporation. +# Copyright(c) 1999 - 2014 Intel Corporation. # # This program is free software; you can redistribute it and/or modify it # under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ # more details. # # You should have received a copy of the GNU General Public License along with -# this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. +# this program; if not, see <http://www.gnu.org/licenses/>. # # The full GNU General Public License is included in this distribution in # the file called "COPYING". diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index 06df6928f44c..45947b3f7d92 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". @@ -2720,7 +2719,7 @@ static const u8 e1000_emc_therm_limit[4] = { * * Updates the temperatures in mac.thermal_sensor_data **/ -s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw) +static s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw) { s32 status = E1000_SUCCESS; u16 ets_offset; @@ -2774,7 +2773,7 @@ s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw) * Sets the thermal sensor thresholds according to the NVM map * and save off the threshold and location values into mac.thermal_sensor_data **/ -s32 igb_init_thermal_sensor_thresh_generic(struct e1000_hw *hw) +static s32 igb_init_thermal_sensor_thresh_generic(struct e1000_hw *hw) { s32 status = E1000_SUCCESS; u16 ets_offset; diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h index 8c2437722aad..f12b086e578d 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.h +++ b/drivers/net/ethernet/intel/igb/e1000_82575.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". @@ -266,8 +265,6 @@ u16 igb_rxpbs_adjust_82580(u32 data); s32 igb_read_emi_reg(struct e1000_hw *, u16 addr, u16 *data); s32 igb_set_eee_i350(struct e1000_hw *); s32 igb_set_eee_i354(struct e1000_hw *); -s32 igb_init_thermal_sensor_thresh_generic(struct e1000_hw *); -s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw); #define E1000_I2C_THERMAL_SENSOR_ADDR 0xF8 #define E1000_EMC_INTERNAL_DATA 0x00 diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index 0571b973be80..393c896ac7e7 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h index ab99e2b582a8..10741d170f2d 100644 --- a/drivers/net/ethernet/intel/igb/e1000_hw.h +++ b/drivers/net/ethernet/intel/igb/e1000_hw.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c index 0c0393316a3a..db963397cc27 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.c +++ b/drivers/net/ethernet/intel/igb/e1000_i210.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". @@ -35,6 +34,8 @@ #include "e1000_hw.h" #include "e1000_i210.h" +static s32 igb_update_flash_i210(struct e1000_hw *hw); + /** * igb_get_hw_semaphore_i210 - Acquire hardware semaphore * @hw: pointer to the HW structure @@ -111,7 +112,7 @@ static s32 igb_get_hw_semaphore_i210(struct e1000_hw *hw) * Return successful if access grant bit set, else clear the request for * EEPROM access and return -E1000_ERR_NVM (-1). **/ -s32 igb_acquire_nvm_i210(struct e1000_hw *hw) +static s32 igb_acquire_nvm_i210(struct e1000_hw *hw) { return igb_acquire_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); } @@ -123,7 +124,7 @@ s32 igb_acquire_nvm_i210(struct e1000_hw *hw) * Stop any current commands to the EEPROM and clear the EEPROM request bit, * then release the semaphores acquired. **/ -void igb_release_nvm_i210(struct e1000_hw *hw) +static void igb_release_nvm_i210(struct e1000_hw *hw) { igb_release_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); } @@ -206,8 +207,8 @@ void igb_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask) * Reads a 16 bit word from the Shadow Ram using the EERD register. * Uses necessary synchronization semaphores. **/ -s32 igb_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words, - u16 *data) +static s32 igb_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data) { s32 status = E1000_SUCCESS; u16 i, count; @@ -306,8 +307,8 @@ out: * If error code is returned, data and Shadow RAM may be inconsistent - buffer * partially written. **/ -s32 igb_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words, - u16 *data) +static s32 igb_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data) { s32 status = E1000_SUCCESS; u16 i, count; @@ -555,7 +556,7 @@ s32 igb_read_invm_version(struct e1000_hw *hw, * Calculates the EEPROM checksum by reading/adding each word of the EEPROM * and then verifies that the sum of the EEPROM is equal to 0xBABA. **/ -s32 igb_validate_nvm_checksum_i210(struct e1000_hw *hw) +static s32 igb_validate_nvm_checksum_i210(struct e1000_hw *hw) { s32 status = E1000_SUCCESS; s32 (*read_op_ptr)(struct e1000_hw *, u16, u16, u16 *); @@ -590,7 +591,7 @@ s32 igb_validate_nvm_checksum_i210(struct e1000_hw *hw) * up to the checksum. Then calculates the EEPROM checksum and writes the * value to the EEPROM. Next commit EEPROM data onto the Flash. **/ -s32 igb_update_nvm_checksum_i210(struct e1000_hw *hw) +static s32 igb_update_nvm_checksum_i210(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 checksum = 0; @@ -684,7 +685,7 @@ bool igb_get_flash_presence_i210(struct e1000_hw *hw) * @hw: pointer to the HW structure * **/ -s32 igb_update_flash_i210(struct e1000_hw *hw) +static s32 igb_update_flash_i210(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u32 flup; diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.h b/drivers/net/ethernet/intel/igb/e1000_i210.h index 2d913716573a..907fe99a9813 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.h +++ b/drivers/net/ethernet/intel/igb/e1000_i210.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". @@ -28,17 +27,8 @@ #ifndef _E1000_I210_H_ #define _E1000_I210_H_ -s32 igb_update_flash_i210(struct e1000_hw *hw); -s32 igb_update_nvm_checksum_i210(struct e1000_hw *hw); -s32 igb_validate_nvm_checksum_i210(struct e1000_hw *hw); -s32 igb_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words, - u16 *data); -s32 igb_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words, - u16 *data); s32 igb_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask); void igb_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask); -s32 igb_acquire_nvm_i210(struct e1000_hw *hw); -void igb_release_nvm_i210(struct e1000_hw *hw); s32 igb_valid_led_default_i210(struct e1000_hw *hw, u16 *data); s32 igb_read_invm_version(struct e1000_hw *hw, struct e1000_fw_version *invm_ver); diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c index 298f0ed50670..5910a932ea7c 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.c +++ b/drivers/net/ethernet/intel/igb/e1000_mac.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.h b/drivers/net/ethernet/intel/igb/e1000_mac.h index e4cbe8ef67b3..99299ba8ee3a 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.h +++ b/drivers/net/ethernet/intel/igb/e1000_mac.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.c b/drivers/net/ethernet/intel/igb/e1000_mbx.c index dac1447fabf7..d5b121771c31 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mbx.c +++ b/drivers/net/ethernet/intel/igb/e1000_mbx.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.h b/drivers/net/ethernet/intel/igb/e1000_mbx.h index de9bba41acf3..f52f5515e5a8 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mbx.h +++ b/drivers/net/ethernet/intel/igb/e1000_mbx.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c index a7db7f3db914..9abf82919c65 100644 --- a/drivers/net/ethernet/intel/igb/e1000_nvm.c +++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.h b/drivers/net/ethernet/intel/igb/e1000_nvm.h index 433b7419cb98..5b101170b17e 100644 --- a/drivers/net/ethernet/intel/igb/e1000_nvm.h +++ b/drivers/net/ethernet/intel/igb/e1000_nvm.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index ad2b74d95138..4009bbab7407 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". @@ -394,77 +393,6 @@ s32 igb_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data) } /** - * e1000_write_sfp_data_byte - Writes SFP module data. - * @hw: pointer to the HW structure - * @offset: byte location offset to write to - * @data: data to write - * - * Writes one byte to SFP module data stored - * in SFP resided EEPROM memory or SFP diagnostic area. - * Function should be called with - * E1000_I2CCMD_SFP_DATA_ADDR(<byte offset>) for SFP module database access - * E1000_I2CCMD_SFP_DIAG_ADDR(<byte offset>) for SFP diagnostics parameters - * access - **/ -s32 e1000_write_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 data) -{ - u32 i = 0; - u32 i2ccmd = 0; - u32 data_local = 0; - - if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) { - hw_dbg("I2CCMD command address exceeds upper limit\n"); - return -E1000_ERR_PHY; - } - /* The programming interface is 16 bits wide - * so we need to read the whole word first - * then update appropriate byte lane and write - * the updated word back. - */ - /* Set up Op-code, EEPROM Address,in the I2CCMD - * register. The MAC will take care of interfacing - * with an EEPROM to write the data given. - */ - i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | - E1000_I2CCMD_OPCODE_READ); - /* Set a command to read single word */ - wr32(E1000_I2CCMD, i2ccmd); - for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { - udelay(50); - /* Poll the ready bit to see if lastly - * launched I2C operation completed - */ - i2ccmd = rd32(E1000_I2CCMD); - if (i2ccmd & E1000_I2CCMD_READY) { - /* Check if this is READ or WRITE phase */ - if ((i2ccmd & E1000_I2CCMD_OPCODE_READ) == - E1000_I2CCMD_OPCODE_READ) { - /* Write the selected byte - * lane and update whole word - */ - data_local = i2ccmd & 0xFF00; - data_local |= data; - i2ccmd = ((offset << - E1000_I2CCMD_REG_ADDR_SHIFT) | - E1000_I2CCMD_OPCODE_WRITE | data_local); - wr32(E1000_I2CCMD, i2ccmd); - } else { - break; - } - } - } - if (!(i2ccmd & E1000_I2CCMD_READY)) { - hw_dbg("I2CCMD Write did not complete\n"); - return -E1000_ERR_PHY; - } - if (i2ccmd & E1000_I2CCMD_ERROR) { - hw_dbg("I2CCMD Error bit set\n"); - return -E1000_ERR_PHY; - } - return 0; -} - -/** * igb_read_phy_reg_igp - Read igp PHY register * @hw: pointer to the HW structure * @offset: register offset to be read diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.h b/drivers/net/ethernet/intel/igb/e1000_phy.h index 6a0873f2095a..4c2c36c46a73 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.h +++ b/drivers/net/ethernet/intel/igb/e1000_phy.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". @@ -70,7 +69,6 @@ s32 igb_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data); s32 igb_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data); s32 igb_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data); s32 igb_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data); -s32 e1000_write_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 data); s32 igb_copper_link_setup_82580(struct e1000_hw *hw); s32 igb_get_phy_info_82580(struct e1000_hw *hw); s32 igb_phy_force_speed_duplex_82580(struct e1000_hw *hw); diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 82632c6c53af..abdd935a9dad 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index ccf472f073dd..fc3fc2c6fe40 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". @@ -525,9 +524,7 @@ void igb_set_fw_version(struct igb_adapter *); void igb_ptp_init(struct igb_adapter *adapter); void igb_ptp_stop(struct igb_adapter *adapter); void igb_ptp_reset(struct igb_adapter *adapter); -void igb_ptp_tx_work(struct work_struct *work); void igb_ptp_rx_hang(struct igb_adapter *adapter); -void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter); void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb); void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, unsigned char *va, struct sk_buff *skb); diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 1df02378de69..c7f574165298 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". diff --git a/drivers/net/ethernet/intel/igb/igb_hwmon.c b/drivers/net/ethernet/intel/igb/igb_hwmon.c index e0af5bc61613..8333f67acf96 100644 --- a/drivers/net/ethernet/intel/igb/igb_hwmon.c +++ b/drivers/net/ethernet/intel/igb/igb_hwmon.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 84dfa3f0e3b8..3384156cf1b5 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. + Copyright(c) 2007-2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -13,8 +13,7 @@ more details. You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + this program; if not, see <http://www.gnu.org/licenses/>. The full GNU General Public License is included in this distribution in the file called "COPYING". @@ -70,7 +69,7 @@ char igb_driver_version[] = DRV_VERSION; static const char igb_driver_string[] = "Intel(R) Gigabit Ethernet Network Driver"; static const char igb_copyright[] = - "Copyright (c) 2007-2013 Intel Corporation."; + "Copyright (c) 2007-2014 Intel Corporation."; static const struct e1000_info *igb_info_tbl[] = { [board_82575] = &e1000_82575_info, diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 5a54e3dc535d..9c9c141f089a 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -12,9 +12,8 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * You should have received a copy of the GNU General Public License along with + * this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/module.h> #include <linux/device.h> @@ -75,6 +74,8 @@ #define INCVALUE_82576 (16 << IGB_82576_TSYNC_SHIFT) #define IGB_NBITS_82580 40 +static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter); + /* SYSTIM read access for the 82576 */ static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc) { @@ -372,7 +373,7 @@ static int igb_ptp_enable(struct ptp_clock_info *ptp, * This work function polls the TSYNCTXCTL valid bit to determine when a * timestamp has been taken for the current stored skb. **/ -void igb_ptp_tx_work(struct work_struct *work) +static void igb_ptp_tx_work(struct work_struct *work) { struct igb_adapter *adapter = container_of(work, struct igb_adapter, ptp_tx_work); @@ -466,7 +467,7 @@ void igb_ptp_rx_hang(struct igb_adapter *adapter) * available, then it must have been for this skb here because we only * allow only one such packet into the queue. **/ -void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) +static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct skb_shared_hwtstamps shhwtstamps; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 18076c4178b4..a345cc7b378c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2630,9 +2630,12 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data) switch (hw->mac.type) { case ixgbe_mac_82599EB: case ixgbe_mac_X540: - if (eicr & IXGBE_EICR_ECC) - e_info(link, "Received unrecoverable ECC Err, please " - "reboot\n"); + if (eicr & IXGBE_EICR_ECC) { + e_info(link, "Received ECC Err, initiating reset\n"); + adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED; + ixgbe_service_event_schedule(adapter); + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); + } /* Handle Flow Director Full threshold interrupt */ if (eicr & IXGBE_EICR_FLOW_DIR) { int reinit_count = 0; @@ -2846,9 +2849,12 @@ static irqreturn_t ixgbe_intr(int irq, void *data) ixgbe_check_sfp_event(adapter, eicr); /* Fall through */ case ixgbe_mac_X540: - if (eicr & IXGBE_EICR_ECC) - e_info(link, "Received unrecoverable ECC err, please " - "reboot\n"); + if (eicr & IXGBE_EICR_ECC) { + e_info(link, "Received ECC Err, initiating reset\n"); + adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED; + ixgbe_service_event_schedule(adapter); + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); + } ixgbe_check_overtemp_event(adapter, eicr); break; default: @@ -7792,6 +7798,7 @@ int ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id, case IXGBE_DEV_ID_82599_SFP: /* Only these subdevices could supports WOL */ switch (subdevice_id) { + case IXGBE_SUBDEV_ID_82599_SFP_WOL0: case IXGBE_SUBDEV_ID_82599_560FLR: /* only support first port */ if (hw->bus.func != 0) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 5184e2a1a7d8..9e54fcc13bc9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -840,7 +840,9 @@ void ixgbe_ptp_init(struct ixgbe_adapter *adapter) switch (adapter->hw.mac.type) { case ixgbe_mac_X540: - snprintf(adapter->ptp_caps.name, 16, "%s", netdev->name); + snprintf(adapter->ptp_caps.name, + sizeof(adapter->ptp_caps.name), + "%s", netdev->name); adapter->ptp_caps.owner = THIS_MODULE; adapter->ptp_caps.max_adj = 250000000; adapter->ptp_caps.n_alarm = 0; @@ -854,7 +856,9 @@ void ixgbe_ptp_init(struct ixgbe_adapter *adapter) adapter->ptp_caps.enable = ixgbe_ptp_enable; break; case ixgbe_mac_82599EB: - snprintf(adapter->ptp_caps.name, 16, "%s", netdev->name); + snprintf(adapter->ptp_caps.name, + sizeof(adapter->ptp_caps.name), + "%s", netdev->name); adapter->ptp_caps.owner = THIS_MODULE; adapter->ptp_caps.max_adj = 250000000; adapter->ptp_caps.n_alarm = 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 0d39cfc4a3bf..9283cffd1b57 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -54,6 +54,7 @@ #define IXGBE_DEV_ID_82599_BACKPLANE_FCOE 0x152a #define IXGBE_DEV_ID_82599_SFP_FCOE 0x1529 #define IXGBE_SUBDEV_ID_82599_SFP 0x11A9 +#define IXGBE_SUBDEV_ID_82599_SFP_WOL0 0x1071 #define IXGBE_SUBDEV_ID_82599_RNDC 0x1F72 #define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0 #define IXGBE_SUBDEV_ID_82599_SP_560FLR 0x211B diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index fad45316200a..2c0823bf3e05 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2341,7 +2341,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, netif_set_real_num_rx_queues(dev, prof->rx_ring_num); SET_NETDEV_DEV(dev, &mdev->dev->pdev->dev); - dev->dev_id = port - 1; + dev->dev_port = port - 1; /* * Initialize driver private data diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 34bf011584fb..dd9ef5e1c730 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -57,7 +57,7 @@ struct at86rf230_local { int rssi_base_val; }; -static inline int is_rf212(struct at86rf230_local *local) +static bool is_rf212(struct at86rf230_local *local) { return local->part == 7; } diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 5a8993b0cafc..0c9e4342f11d 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -148,15 +148,52 @@ static int ks8737_config_intr(struct phy_device *phydev) return rc < 0 ? rc : 0; } +static int kszphy_setup_led(struct phy_device *phydev, + unsigned int reg, unsigned int shift) +{ + + struct device *dev = &phydev->dev; + struct device_node *of_node = dev->of_node; + int rc, temp; + u32 val; + + if (!of_node && dev->parent->of_node) + of_node = dev->parent->of_node; + + if (of_property_read_u32(of_node, "micrel,led-mode", &val)) + return 0; + + temp = phy_read(phydev, reg); + if (temp < 0) + return temp; + + temp &= 3 << shift; + temp |= val << shift; + rc = phy_write(phydev, reg, temp); + + return rc < 0 ? rc : 0; +} + static int kszphy_config_init(struct phy_device *phydev) { return 0; } +static int kszphy_config_init_led8041(struct phy_device *phydev) +{ + /* single led control, register 0x1e bits 15..14 */ + return kszphy_setup_led(phydev, 0x1e, 14); +} + static int ksz8021_config_init(struct phy_device *phydev) { - int rc; const u16 val = KSZPHY_OMSO_B_CAST_OFF | KSZPHY_OMSO_RMII_OVERRIDE; + int rc; + + rc = kszphy_setup_led(phydev, 0x1f, 4); + if (rc) + dev_err(&phydev->dev, "failed to set led mode\n"); + phy_write(phydev, MII_KSZPHY_OMSO, val); rc = ksz_config_flags(phydev); return rc < 0 ? rc : 0; @@ -166,6 +203,10 @@ static int ks8051_config_init(struct phy_device *phydev) { int rc; + rc = kszphy_setup_led(phydev, 0x1f, 4); + if (rc) + dev_err(&phydev->dev, "failed to set led mode\n"); + rc = ksz_config_flags(phydev); return rc < 0 ? rc : 0; } @@ -327,7 +368,7 @@ static struct phy_driver ksphy_driver[] = { .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause), .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, - .config_init = kszphy_config_init, + .config_init = kszphy_config_init_led8041, .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, @@ -342,7 +383,7 @@ static struct phy_driver ksphy_driver[] = { .features = PHY_BASIC_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause, .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, - .config_init = kszphy_config_init, + .config_init = kszphy_config_init_led8041, .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, @@ -371,7 +412,7 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = 0x00ffffff, .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause), .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, - .config_init = kszphy_config_init, + .config_init = kszphy_config_init_led8041, .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index dec9820bc182..eb59b14d5ee0 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1132,7 +1132,6 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct vxlan_sock *vs; struct vxlanhdr *vxh; - __be16 port; /* Need Vxlan and inner Ethernet header to be present */ if (!pskb_may_pull(skb, VXLAN_HLEN)) @@ -1150,8 +1149,6 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB))) goto drop; - port = inet_sk(sk)->inet_sport; - vs = rcu_dereference_sk_user_data(sk); if (!vs) goto drop; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5e4756553c18..1a869488b8ae 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1147,6 +1147,89 @@ struct net_device_ops { void *priv); }; +/** + * enum net_device_priv_flags - &struct net_device priv_flags + * + * These are the &struct net_device, they are only set internally + * by drivers and used in the kernel. These flags are invisible to + * userspace, this means that the order of these flags can change + * during any kernel release. + * + * You should have a pretty good reason to be extending these flags. + * + * @IFF_802_1Q_VLAN: 802.1Q VLAN device + * @IFF_EBRIDGE: Ethernet bridging device + * @IFF_SLAVE_INACTIVE: bonding slave not the curr. active + * @IFF_MASTER_8023AD: bonding master, 802.3ad + * @IFF_MASTER_ALB: bonding master, balance-alb + * @IFF_BONDING: bonding master or slave + * @IFF_SLAVE_NEEDARP: need ARPs for validation + * @IFF_ISATAP: ISATAP interface (RFC4214) + * @IFF_MASTER_ARPMON: bonding master, ARP mon in use + * @IFF_WAN_HDLC: WAN HDLC device + * @IFF_XMIT_DST_RELEASE: dev_hard_start_xmit() is allowed to + * release skb->dst + * @IFF_DONT_BRIDGE: disallow bridging this ether dev + * @IFF_DISABLE_NETPOLL: disable netpoll at run-time + * @IFF_MACVLAN_PORT: device used as macvlan port + * @IFF_BRIDGE_PORT: device used as bridge port + * @IFF_OVS_DATAPATH: device used as Open vSwitch datapath port + * @IFF_TX_SKB_SHARING: The interface supports sharing skbs on transmit + * @IFF_UNICAST_FLT: Supports unicast filtering + * @IFF_TEAM_PORT: device used as team port + * @IFF_SUPP_NOFCS: device supports sending custom FCS + * @IFF_LIVE_ADDR_CHANGE: device supports hardware address + * change when it's running + * @IFF_MACVLAN: Macvlan device + */ +enum netdev_priv_flags { + IFF_802_1Q_VLAN = 1<<0, + IFF_EBRIDGE = 1<<1, + IFF_SLAVE_INACTIVE = 1<<2, + IFF_MASTER_8023AD = 1<<3, + IFF_MASTER_ALB = 1<<4, + IFF_BONDING = 1<<5, + IFF_SLAVE_NEEDARP = 1<<6, + IFF_ISATAP = 1<<7, + IFF_MASTER_ARPMON = 1<<8, + IFF_WAN_HDLC = 1<<9, + IFF_XMIT_DST_RELEASE = 1<<10, + IFF_DONT_BRIDGE = 1<<11, + IFF_DISABLE_NETPOLL = 1<<12, + IFF_MACVLAN_PORT = 1<<13, + IFF_BRIDGE_PORT = 1<<14, + IFF_OVS_DATAPATH = 1<<15, + IFF_TX_SKB_SHARING = 1<<16, + IFF_UNICAST_FLT = 1<<17, + IFF_TEAM_PORT = 1<<18, + IFF_SUPP_NOFCS = 1<<19, + IFF_LIVE_ADDR_CHANGE = 1<<20, + IFF_MACVLAN = 1<<21, +}; + +#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN +#define IFF_EBRIDGE IFF_EBRIDGE +#define IFF_SLAVE_INACTIVE IFF_SLAVE_INACTIVE +#define IFF_MASTER_8023AD IFF_MASTER_8023AD +#define IFF_MASTER_ALB IFF_MASTER_ALB +#define IFF_BONDING IFF_BONDING +#define IFF_SLAVE_NEEDARP IFF_SLAVE_NEEDARP +#define IFF_ISATAP IFF_ISATAP +#define IFF_MASTER_ARPMON IFF_MASTER_ARPMON +#define IFF_WAN_HDLC IFF_WAN_HDLC +#define IFF_XMIT_DST_RELEASE IFF_XMIT_DST_RELEASE +#define IFF_DONT_BRIDGE IFF_DONT_BRIDGE +#define IFF_DISABLE_NETPOLL IFF_DISABLE_NETPOLL +#define IFF_MACVLAN_PORT IFF_MACVLAN_PORT +#define IFF_BRIDGE_PORT IFF_BRIDGE_PORT +#define IFF_OVS_DATAPATH IFF_OVS_DATAPATH +#define IFF_TX_SKB_SHARING IFF_TX_SKB_SHARING +#define IFF_UNICAST_FLT IFF_UNICAST_FLT +#define IFF_TEAM_PORT IFF_TEAM_PORT +#define IFF_SUPP_NOFCS IFF_SUPP_NOFCS +#define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE +#define IFF_MACVLAN IFF_MACVLAN + /* * The DEVICE structure. * Actually, this whole structure is a big mistake. It mixes I/O @@ -1279,6 +1362,10 @@ struct net_device { * that share the same link * layer address */ + unsigned short dev_port; /* Used to differentiate + * devices that share the same + * function + */ spinlock_t addr_list_lock; struct netdev_hw_addr_list uc; /* Unicast mac addresses */ struct netdev_hw_addr_list mc; /* Multicast mac addresses */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 11b6925f0e96..6d8ed22accb4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -32,6 +32,7 @@ #include <linux/hrtimer.h> #include <linux/dma-mapping.h> #include <linux/netdev_features.h> +#include <linux/sched.h> #include <net/flow_keys.h> /* A. Checksumming of received packets by device. @@ -356,11 +357,62 @@ typedef unsigned int sk_buff_data_t; typedef unsigned char *sk_buff_data_t; #endif +/** + * struct skb_mstamp - multi resolution time stamps + * @stamp_us: timestamp in us resolution + * @stamp_jiffies: timestamp in jiffies + */ +struct skb_mstamp { + union { + u64 v64; + struct { + u32 stamp_us; + u32 stamp_jiffies; + }; + }; +}; + +/** + * skb_mstamp_get - get current timestamp + * @cl: place to store timestamps + */ +static inline void skb_mstamp_get(struct skb_mstamp *cl) +{ + u64 val = local_clock(); + + do_div(val, NSEC_PER_USEC); + cl->stamp_us = (u32)val; + cl->stamp_jiffies = (u32)jiffies; +} + +/** + * skb_mstamp_delta - compute the difference in usec between two skb_mstamp + * @t1: pointer to newest sample + * @t0: pointer to oldest sample + */ +static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, + const struct skb_mstamp *t0) +{ + s32 delta_us = t1->stamp_us - t0->stamp_us; + u32 delta_jiffies = t1->stamp_jiffies - t0->stamp_jiffies; + + /* If delta_us is negative, this might be because interval is too big, + * or local_clock() drift is too big : fallback using jiffies. + */ + if (delta_us <= 0 || + delta_jiffies >= (INT_MAX / (USEC_PER_SEC / HZ))) + + delta_us = jiffies_to_usecs(delta_jiffies); + + return delta_us; +} + + /** * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list - * @tstamp: Time we arrived + * @tstamp: Time we arrived/left * @sk: Socket we are owned by * @dev: Device we arrived on/are leaving by * @cb: Control buffer. Free for use by every layer. Put private vars here @@ -429,7 +481,10 @@ struct sk_buff { struct sk_buff *next; struct sk_buff *prev; - ktime_t tstamp; + union { + ktime_t tstamp; + struct skb_mstamp skb_mstamp; + }; struct sock *sk; struct net_device *dev; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4ad0706d40eb..239946868142 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -201,10 +201,10 @@ struct tcp_sock { u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ /* RTT measurement */ - u32 srtt; /* smoothed round trip time << 3 */ - u32 mdev; /* medium deviation */ - u32 mdev_max; /* maximal mdev for the last rtt period */ - u32 rttvar; /* smoothed mdev_max */ + u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + u32 mdev_us; /* medium deviation */ + u32 mdev_max_us; /* maximal mdev for the last rtt period */ + u32 rttvar_us; /* smoothed mdev_max */ u32 rtt_seq; /* sequence number to update rttvar */ u32 packets_out; /* Packets which are "in flight" */ diff --git a/include/net/ip.h b/include/net/ip.h index 4aa781b7f609..b885d75cede4 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -266,7 +266,8 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst) static inline bool ip_sk_accept_pmtu(const struct sock *sk) { - return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE; + return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE && + inet_sk(sk)->pmtudisc != IP_PMTUDISC_OMIT; } static inline bool ip_sk_use_pmtu(const struct sock *sk) @@ -274,6 +275,12 @@ static inline bool ip_sk_use_pmtu(const struct sock *sk) return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE; } +static inline bool ip_sk_local_df(const struct sock *sk) +{ + return inet_sk(sk)->pmtudisc < IP_PMTUDISC_DO || + inet_sk(sk)->pmtudisc == IP_PMTUDISC_OMIT; +} + static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 017badb1aec7..00e3f12cb2f9 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -171,7 +171,14 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb) static inline bool ip6_sk_accept_pmtu(const struct sock *sk) { - return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE; + return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE && + inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT; +} + +static inline bool ip6_sk_local_df(const struct sock *sk) +{ + return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO || + inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; } static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt) diff --git a/include/net/tcp.h b/include/net/tcp.h index 1f820537741a..93eab0b9da60 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -31,6 +31,7 @@ #include <linux/crypto.h> #include <linux/cryptohash.h> #include <linux/kref.h> +#include <linux/ktime.h> #include <net/inet_connection_sock.h> #include <net/inet_timewait_sock.h> @@ -478,7 +479,6 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt); #ifdef CONFIG_SYN_COOKIES -#include <linux/ktime.h> /* Syncookies use a monotonic timer which increments every 64 seconds. * This counter is used both as a hash input and partially encoded into @@ -619,7 +619,7 @@ static inline void tcp_bound_rto(const struct sock *sk) static inline u32 __tcp_set_rto(const struct tcp_sock *tp) { - return (tp->srtt >> 3) + tp->rttvar; + return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); } static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) @@ -656,6 +656,11 @@ static inline u32 tcp_rto_min(struct sock *sk) return rto_min; } +static inline u32 tcp_rto_min_us(struct sock *sk) +{ + return jiffies_to_usecs(tcp_rto_min(sk)); +} + /* Compute the actual receive window we are currently advertising. * Rcv_nxt can be after the window if our peer push more data * than the offered window. @@ -778,7 +783,6 @@ enum tcp_ca_event { #define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX) #define TCP_CONG_NON_RESTRICTED 0x1 -#define TCP_CONG_RTT_STAMP 0x2 struct tcp_congestion_ops { struct list_head list; diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h index d758163b0e43..9cf2394f0bcf 100644 --- a/include/uapi/linux/if.h +++ b/include/uapi/linux/if.h @@ -27,65 +27,91 @@ #define IFALIASZ 256 #include <linux/hdlc/ioctl.h> -/* Standard interface flags (netdevice->flags). */ -#define IFF_UP 0x1 /* interface is up */ -#define IFF_BROADCAST 0x2 /* broadcast address valid */ -#define IFF_DEBUG 0x4 /* turn on debugging */ -#define IFF_LOOPBACK 0x8 /* is a loopback net */ -#define IFF_POINTOPOINT 0x10 /* interface is has p-p link */ -#define IFF_NOTRAILERS 0x20 /* avoid use of trailers */ -#define IFF_RUNNING 0x40 /* interface RFC2863 OPER_UP */ -#define IFF_NOARP 0x80 /* no ARP protocol */ -#define IFF_PROMISC 0x100 /* receive all packets */ -#define IFF_ALLMULTI 0x200 /* receive all multicast packets*/ - -#define IFF_MASTER 0x400 /* master of a load balancer */ -#define IFF_SLAVE 0x800 /* slave of a load balancer */ - -#define IFF_MULTICAST 0x1000 /* Supports multicast */ - -#define IFF_PORTSEL 0x2000 /* can set media type */ -#define IFF_AUTOMEDIA 0x4000 /* auto media select active */ -#define IFF_DYNAMIC 0x8000 /* dialup device with changing addresses*/ - -#define IFF_LOWER_UP 0x10000 /* driver signals L1 up */ -#define IFF_DORMANT 0x20000 /* driver signals dormant */ +/** + * enum net_device_flags - &struct net_device flags + * + * These are the &struct net_device flags, they can be set by drivers, the + * kernel and some can be triggered by userspace. Userspace can query and + * set these flags using userspace utilities but there is also a sysfs + * entry available for all dev flags which can be queried and set. These flags + * are shared for all types of net_devices. The sysfs entries are available + * via /sys/class/net/<dev>/flags. Flags which can be toggled through sysfs + * are annotated below, note that only a few flags can be toggled and some + * other flags are always always preserved from the original net_device flags + * even if you try to set them via sysfs. Flags which are always preserved + * are kept under the flag grouping @IFF_VOLATILE. Flags which are volatile + * are annotated below as such. + * + * You should have a pretty good reason to be extending these flags. + * + * @IFF_UP: interface is up. Can be toggled through sysfs. + * @IFF_BROADCAST: broadcast address valid. Volatile. + * @IFF_DEBUG: turn on debugging. Can be toggled through sysfs. + * @IFF_LOOPBACK: is a loopback net. Volatile. + * @IFF_POINTOPOINT: interface is has p-p link. Volatile. + * @IFF_NOTRAILERS: avoid use of trailers. Can be toggled through sysfs. + * Volatile. + * @IFF_RUNNING: interface RFC2863 OPER_UP. Volatile. + * @IFF_NOARP: no ARP protocol. Can be toggled through sysfs. Volatile. + * @IFF_PROMISC: receive all packets. Can be toggled through sysfs. + * @IFF_ALLMULTI: receive all multicast packets. Can be toggled through + * sysfs. + * @IFF_MASTER: master of a load balancer. Volatile. + * @IFF_SLAVE: slave of a load balancer. Volatile. + * @IFF_MULTICAST: Supports multicast. Can be toggled through sysfs. + * @IFF_PORTSEL: can set media type. Can be toggled through sysfs. + * @IFF_AUTOMEDIA: auto media select active. Can be toggled through sysfs. + * @IFF_DYNAMIC: dialup device with changing addresses. Can be toggled + * through sysfs. + * @IFF_LOWER_UP: driver signals L1 up. Volatile. + * @IFF_DORMANT: driver signals dormant. Volatile. + * @IFF_ECHO: echo sent packets. Volatile. + */ +enum net_device_flags { + IFF_UP = 1<<0, /* sysfs */ + IFF_BROADCAST = 1<<1, /* volatile */ + IFF_DEBUG = 1<<2, /* sysfs */ + IFF_LOOPBACK = 1<<3, /* volatile */ + IFF_POINTOPOINT = 1<<4, /* volatile */ + IFF_NOTRAILERS = 1<<5, /* sysfs */ + IFF_RUNNING = 1<<6, /* volatile */ + IFF_NOARP = 1<<7, /* sysfs */ + IFF_PROMISC = 1<<8, /* sysfs */ + IFF_ALLMULTI = 1<<9, /* sysfs */ + IFF_MASTER = 1<<10, /* volatile */ + IFF_SLAVE = 1<<11, /* volatile */ + IFF_MULTICAST = 1<<12, /* sysfs */ + IFF_PORTSEL = 1<<13, /* sysfs */ + IFF_AUTOMEDIA = 1<<14, /* sysfs */ + IFF_DYNAMIC = 1<<15, /* sysfs */ + IFF_LOWER_UP = 1<<16, /* volatile */ + IFF_DORMANT = 1<<17, /* volatile */ + IFF_ECHO = 1<<18, /* volatile */ +}; -#define IFF_ECHO 0x40000 /* echo sent packets */ +#define IFF_UP IFF_UP +#define IFF_BROADCAST IFF_BROADCAST +#define IFF_DEBUG IFF_DEBUG +#define IFF_LOOPBACK IFF_LOOPBACK +#define IFF_POINTOPOINT IFF_POINTOPOINT +#define IFF_NOTRAILERS IFF_NOTRAILERS +#define IFF_RUNNING IFF_RUNNING +#define IFF_NOARP IFF_NOARP +#define IFF_PROMISC IFF_PROMISC +#define IFF_ALLMULTI IFF_ALLMULTI +#define IFF_MASTER IFF_MASTER +#define IFF_SLAVE IFF_SLAVE +#define IFF_MULTICAST IFF_MULTICAST +#define IFF_PORTSEL IFF_PORTSEL +#define IFF_AUTOMEDIA IFF_AUTOMEDIA +#define IFF_DYNAMIC IFF_DYNAMIC +#define IFF_LOWER_UP IFF_LOWER_UP +#define IFF_DORMANT IFF_DORMANT +#define IFF_ECHO IFF_ECHO #define IFF_VOLATILE (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\ IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT) -/* Private (from user) interface flags (netdevice->priv_flags). */ -#define IFF_802_1Q_VLAN 0x1 /* 802.1Q VLAN device. */ -#define IFF_EBRIDGE 0x2 /* Ethernet bridging device. */ -#define IFF_SLAVE_INACTIVE 0x4 /* bonding slave not the curr. active */ -#define IFF_MASTER_8023AD 0x8 /* bonding master, 802.3ad. */ -#define IFF_MASTER_ALB 0x10 /* bonding master, balance-alb. */ -#define IFF_BONDING 0x20 /* bonding master or slave */ -#define IFF_SLAVE_NEEDARP 0x40 /* need ARPs for validation */ -#define IFF_ISATAP 0x80 /* ISATAP interface (RFC4214) */ -#define IFF_MASTER_ARPMON 0x100 /* bonding master, ARP mon in use */ -#define IFF_WAN_HDLC 0x200 /* WAN HDLC device */ -#define IFF_XMIT_DST_RELEASE 0x400 /* dev_hard_start_xmit() is allowed to - * release skb->dst - */ -#define IFF_DONT_BRIDGE 0x800 /* disallow bridging this ether dev */ -#define IFF_DISABLE_NETPOLL 0x1000 /* disable netpoll at run-time */ -#define IFF_MACVLAN_PORT 0x2000 /* device used as macvlan port */ -#define IFF_BRIDGE_PORT 0x4000 /* device used as bridge port */ -#define IFF_OVS_DATAPATH 0x8000 /* device used as Open vSwitch - * datapath port */ -#define IFF_TX_SKB_SHARING 0x10000 /* The interface supports sharing - * skbs on transmit */ -#define IFF_UNICAST_FLT 0x20000 /* Supports unicast filtering */ -#define IFF_TEAM_PORT 0x40000 /* device used as team port */ -#define IFF_SUPP_NOFCS 0x80000 /* device supports sending custom FCS */ -#define IFF_LIVE_ADDR_CHANGE 0x100000 /* device supports hardware address - * change when it's running */ -#define IFF_MACVLAN 0x200000 /* Macvlan device */ - - #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 2ce0f6a78fa5..ba8b15f07940 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -68,11 +68,11 @@ #define ETH_P_SLOW 0x8809 /* Slow Protocol. See 802.3ad 43B */ #define ETH_P_WCCP 0x883E /* Web-cache coordination protocol * defined in draft-wilson-wrec-wccp-v2-00.txt */ -#define ETH_P_PPP_DISC 0x8863 /* PPPoE discovery messages */ -#define ETH_P_PPP_SES 0x8864 /* PPPoE session messages */ #define ETH_P_MPLS_UC 0x8847 /* MPLS Unicast traffic */ #define ETH_P_MPLS_MC 0x8848 /* MPLS Multicast traffic */ #define ETH_P_ATMMPOA 0x884c /* MultiProtocol Over ATM */ +#define ETH_P_PPP_DISC 0x8863 /* PPPoE discovery messages */ +#define ETH_P_PPP_SES 0x8864 /* PPPoE session messages */ #define ETH_P_LINK_CTL 0x886c /* HPNA, wlan link local tunnel */ #define ETH_P_ATMFATE 0x8884 /* Frame-based ATM Transport * over Ethernet diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 393c5de09d42..c33a65e3d62c 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -120,6 +120,10 @@ struct in_addr { * this socket to prevent accepting spoofed ones. */ #define IP_PMTUDISC_INTERFACE 4 +/* weaker version of IP_PMTUDISC_INTERFACE, which allos packets to get + * fragmented if they exeed the interface mtu + */ +#define IP_PMTUDISC_OMIT 5 #define IP_MULTICAST_IF 32 #define IP_MULTICAST_TTL 33 diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index e9a1d2d973b6..0d8e0f0342dc 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -185,6 +185,10 @@ struct in6_flowlabel_req { * also see comments on IP_PMTUDISC_INTERFACE */ #define IPV6_PMTUDISC_INTERFACE 4 +/* weaker version of IPV6_PMTUDISC_INTERFACE, which allows packets to + * get fragmented if they exceed the interface mtu + */ +#define IPV6_PMTUDISC_OMIT 5 /* Flowlabel */ #define IPV6_FLOWLABEL_MGR 32 diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index bbaba22f2d1b..8d64a7e5d371 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -259,6 +259,9 @@ enum LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES, /* TCPSpuriousRtxHostQueues */ LINUX_MIB_BUSYPOLLRXPACKETS, /* BusyPollRxPackets */ LINUX_MIB_TCPAUTOCORKING, /* TCPAutoCorking */ + LINUX_MIB_TCPFROMZEROWINDOWADV, /* TCPFromZeroWindowAdv */ + LINUX_MIB_TCPTOZEROWINDOWADV, /* TCPToZeroWindowAdv */ + LINUX_MIB_TCPWANTZEROWINDOWADV, /* TCPWantZeroWindowAdv */ __LINUX_MIB_MAX }; diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h index 54a37b13f2c4..93533926035c 100644 --- a/include/uapi/linux/tcp_metrics.h +++ b/include/uapi/linux/tcp_metrics.h @@ -11,12 +11,15 @@ #define TCP_METRICS_GENL_VERSION 0x1 enum tcp_metric_index { - TCP_METRIC_RTT, - TCP_METRIC_RTTVAR, + TCP_METRIC_RTT, /* in ms units */ + TCP_METRIC_RTTVAR, /* in ms units */ TCP_METRIC_SSTHRESH, TCP_METRIC_CWND, TCP_METRIC_REORDERING, + TCP_METRIC_RTT_US, /* in usec units */ + TCP_METRIC_RTTVAR_US, /* in usec units */ + /* Always last. */ __TCP_METRIC_MAX, }; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index b9e9e0d38672..36d3f8c1a2dd 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -836,10 +836,10 @@ out: static __inline__ int neigh_max_probes(struct neighbour *n) { struct neigh_parms *p = n->parms; - return (n->nud_state & NUD_PROBE) ? - NEIGH_VAR(p, UCAST_PROBES) : - NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + - NEIGH_VAR(p, MCAST_PROBES); + int max_probes = NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES); + if (!(n->nud_state & NUD_PROBE)) + max_probes += NEIGH_VAR(p, MCAST_PROBES); + return max_probes; } static void neigh_invalidate(struct neighbour *neigh) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 73aa594674ef..daed9a64c6f6 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -104,6 +104,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, } NETDEVICE_SHOW_RO(dev_id, fmt_hex); +NETDEVICE_SHOW_RO(dev_port, fmt_dec); NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec); NETDEVICE_SHOW_RO(addr_len, fmt_dec); NETDEVICE_SHOW_RO(iflink, fmt_dec); @@ -373,6 +374,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_netdev_group.attr, &dev_attr_type.attr, &dev_attr_dev_id.attr, + &dev_attr_dev_port.attr, &dev_attr_iflink.attr, &dev_attr_ifindex.attr, &dev_attr_addr_assign_type.attr, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8971780aec7c..e85445b2b102 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -449,7 +449,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) __be16 not_last_frag; struct rtable *rt = skb_rtable(skb); int err = 0; - bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; dev = rt->dst.dev; @@ -459,7 +458,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) iph = ip_hdr(skb); - mtu = ip_dst_mtu_maybe_forward(&rt->dst, forwarding); + mtu = ip_skb_dst_mtu(skb); if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) || (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size > mtu))) { @@ -825,8 +824,7 @@ static int __ip_append_data(struct sock *sk, fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ? - mtu : 0xFFFF; + maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu; if (cork->length + length > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -1149,8 +1147,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ? - mtu : 0xFFFF; + maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu; if (cork->length + size > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -1311,8 +1308,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, * to fragment the frame generated here. No matter, what transforms * how transforms change size of the packet, it will come out. */ - if (inet->pmtudisc < IP_PMTUDISC_DO) - skb->local_df = 1; + skb->local_df = ip_sk_local_df(sk); /* DF bit is set when we want to see DF on outgoing frames. * If local_df is set too, we still allow to fragment this frame diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 0968b28c4cf3..64741b938632 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -643,7 +643,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, inet->nodefrag = val ? 1 : 0; break; case IP_MTU_DISCOVER: - if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_INTERFACE) + if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) goto e_inval; inet->pmtudisc = val; break; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index a6c8a80ec9d6..99d2e9b6fac9 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -280,6 +280,9 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS), SNMP_MIB_ITEM("TCPAutoCorking", LINUX_MIB_TCPAUTOCORKING), + SNMP_MIB_ITEM("TCPFromZeroWindowAdv", LINUX_MIB_TCPFROMZEROWINDOWADV), + SNMP_MIB_ITEM("TCPToZeroWindowAdv", LINUX_MIB_TCPTOZEROWINDOWADV), + SNMP_MIB_ITEM("TCPWantZeroWindowAdv", LINUX_MIB_TCPWANTZEROWINDOWADV), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bed379c7abcd..7374905b3701 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -387,7 +387,7 @@ void tcp_init_sock(struct sock *sk) INIT_LIST_HEAD(&tp->tsq_node); icsk->icsk_rto = TCP_TIMEOUT_INIT; - tp->mdev = TCP_TIMEOUT_INIT; + tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control @@ -2339,7 +2339,7 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); - tp->srtt = 0; + tp->srtt_us = 0; if ((tp->write_seq += tp->max_window + 2) == 0) tp->write_seq = 1; icsk->icsk_backoff = 0; @@ -2783,8 +2783,8 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info) info->tcpi_pmtu = icsk->icsk_pmtu_cookie; info->tcpi_rcv_ssthresh = tp->rcv_ssthresh; - info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3; - info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2; + info->tcpi_rtt = tp->srtt_us >> 3; + info->tcpi_rttvar = tp->mdev_us >> 2; info->tcpi_snd_ssthresh = tp->snd_ssthresh; info->tcpi_snd_cwnd = tp->snd_cwnd; info->tcpi_advmss = tp->advmss; diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 828e4c3ffbaf..8bf224516ba2 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -476,10 +476,6 @@ static int __init cubictcp_register(void) /* divide by bic_scale and by constant Srtt (100ms) */ do_div(cube_factor, bic_scale * 10); - /* hystart needs ms clock resolution */ - if (hystart && HZ < 1000) - cubictcp.flags |= TCP_CONG_RTT_STAMP; - return tcp_register_congestion_control(&cubictcp); } diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 2a1a9e2a4e51..a15a799bf768 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -21,7 +21,7 @@ struct hybla { u32 rho2; /* Rho * Rho, integer part */ u32 rho_3ls; /* Rho parameter, <<3 */ u32 rho2_7ls; /* Rho^2, <<7 */ - u32 minrtt; /* Minimum smoothed round trip time value seen */ + u32 minrtt_us; /* Minimum smoothed round trip time value seen */ }; /* Hybla reference round trip time (default= 1/40 sec = 25 ms), in ms */ @@ -35,7 +35,9 @@ static inline void hybla_recalc_param (struct sock *sk) { struct hybla *ca = inet_csk_ca(sk); - ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8); + ca->rho_3ls = max_t(u32, + tcp_sk(sk)->srtt_us / (rtt0 * USEC_PER_MSEC), + 8U); ca->rho = ca->rho_3ls >> 3; ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1; ca->rho2 = ca->rho2_7ls >> 7; @@ -59,7 +61,7 @@ static void hybla_init(struct sock *sk) hybla_recalc_param(sk); /* set minimum rtt as this is the 1st ever seen */ - ca->minrtt = tp->srtt; + ca->minrtt_us = tp->srtt_us; tp->snd_cwnd = ca->rho; } @@ -94,9 +96,9 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked, int is_slowstart = 0; /* Recalculate rho only if this srtt is the lowest */ - if (tp->srtt < ca->minrtt){ + if (tp->srtt_us < ca->minrtt_us) { hybla_recalc_param(sk); - ca->minrtt = tp->srtt; + ca->minrtt_us = tp->srtt_us; } if (!tcp_is_cwnd_limited(sk, in_flight)) diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index be047c63ca10..863d105e3015 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -325,7 +325,6 @@ static void tcp_illinois_info(struct sock *sk, u32 ext, } static struct tcp_congestion_ops tcp_illinois __read_mostly = { - .flags = TCP_CONG_RTT_STAMP, .init = tcp_illinois_init, .ssthresh = tcp_illinois_ssthresh, .cong_avoid = tcp_illinois_cong_avoid, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 227cba79fa6b..23a41d978fad 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -667,11 +667,11 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics */ -static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) +static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) { struct tcp_sock *tp = tcp_sk(sk); - long m = mrtt; /* RTT */ - u32 srtt = tp->srtt; + long m = mrtt_us; /* RTT */ + u32 srtt = tp->srtt_us; /* The following amusing code comes from Jacobson's * article in SIGCOMM '88. Note that rtt and mdev @@ -694,7 +694,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) srtt += m; /* rtt = 7/8 rtt + 1/8 new */ if (m < 0) { m = -m; /* m is now abs(error) */ - m -= (tp->mdev >> 2); /* similar update on mdev */ + m -= (tp->mdev_us >> 2); /* similar update on mdev */ /* This is similar to one of Eifel findings. * Eifel blocks mdev updates when rtt decreases. * This solution is a bit different: we use finer gain @@ -706,28 +706,29 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) if (m > 0) m >>= 3; } else { - m -= (tp->mdev >> 2); /* similar update on mdev */ + m -= (tp->mdev_us >> 2); /* similar update on mdev */ } - tp->mdev += m; /* mdev = 3/4 mdev + 1/4 new */ - if (tp->mdev > tp->mdev_max) { - tp->mdev_max = tp->mdev; - if (tp->mdev_max > tp->rttvar) - tp->rttvar = tp->mdev_max; + tp->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ + if (tp->mdev_us > tp->mdev_max_us) { + tp->mdev_max_us = tp->mdev_us; + if (tp->mdev_max_us > tp->rttvar_us) + tp->rttvar_us = tp->mdev_max_us; } if (after(tp->snd_una, tp->rtt_seq)) { - if (tp->mdev_max < tp->rttvar) - tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2; + if (tp->mdev_max_us < tp->rttvar_us) + tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2; tp->rtt_seq = tp->snd_nxt; - tp->mdev_max = tcp_rto_min(sk); + tp->mdev_max_us = tcp_rto_min_us(sk); } } else { /* no previous measure. */ srtt = m << 3; /* take the measured time to be rtt */ - tp->mdev = m << 1; /* make sure rto = 3*rtt */ - tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); + tp->mdev_us = m << 1; /* make sure rto = 3*rtt */ + tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk)); + tp->mdev_max_us = tp->rttvar_us; tp->rtt_seq = tp->snd_nxt; } - tp->srtt = max(1U, srtt); + tp->srtt_us = max(1U, srtt); } /* Set the sk_pacing_rate to allow proper sizing of TSO packets. @@ -742,20 +743,12 @@ static void tcp_update_pacing_rate(struct sock *sk) u64 rate; /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ - rate = (u64)tp->mss_cache * 2 * (HZ << 3); + rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3); rate *= max(tp->snd_cwnd, tp->packets_out); - /* Correction for small srtt and scheduling constraints. - * For small rtt, consider noise is too high, and use - * the minimal value (srtt = 1 -> 125 us for HZ=1000) - * - * We probably need usec resolution in the future. - * Note: This also takes care of possible srtt=0 case, - * when tcp_rtt_estimator() was not yet called. - */ - if (tp->srtt > 8 + 2) - do_div(rate, tp->srtt); + if (likely(tp->srtt_us)) + do_div(rate, tp->srtt_us); /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate * without any lock. We want to make sure compiler wont store @@ -1122,10 +1115,10 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, } struct tcp_sacktag_state { - int reord; - int fack_count; - int flag; - s32 rtt; /* RTT measured by SACKing never-retransmitted data */ + int reord; + int fack_count; + long rtt_us; /* RTT measured by SACKing never-retransmitted data */ + int flag; }; /* Check if skb is fully within the SACK block. In presence of GSO skbs, @@ -1186,7 +1179,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, static u8 tcp_sacktag_one(struct sock *sk, struct tcp_sacktag_state *state, u8 sacked, u32 start_seq, u32 end_seq, - int dup_sack, int pcount, u32 xmit_time) + int dup_sack, int pcount, + const struct skb_mstamp *xmit_time) { struct tcp_sock *tp = tcp_sk(sk); int fack_count = state->fack_count; @@ -1227,8 +1221,13 @@ static u8 tcp_sacktag_one(struct sock *sk, if (!after(end_seq, tp->high_seq)) state->flag |= FLAG_ORIG_SACK_ACKED; /* Pick the earliest sequence sacked for RTT */ - if (state->rtt < 0) - state->rtt = tcp_time_stamp - xmit_time; + if (state->rtt_us < 0) { + struct skb_mstamp now; + + skb_mstamp_get(&now); + state->rtt_us = skb_mstamp_us_delta(&now, + xmit_time); + } } if (sacked & TCPCB_LOST) { @@ -1287,7 +1286,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, */ tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, start_seq, end_seq, dup_sack, pcount, - TCP_SKB_CB(skb)->when); + &skb->skb_mstamp); if (skb == tp->lost_skb_hint) tp->lost_cnt_hint += pcount; @@ -1565,7 +1564,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, TCP_SKB_CB(skb)->end_seq, dup_sack, tcp_skb_pcount(skb), - TCP_SKB_CB(skb)->when); + &skb->skb_mstamp); if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) @@ -1622,7 +1621,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl static int tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, - u32 prior_snd_una, s32 *sack_rtt) + u32 prior_snd_una, long *sack_rtt_us) { struct tcp_sock *tp = tcp_sk(sk); const unsigned char *ptr = (skb_transport_header(ack_skb) + @@ -1640,7 +1639,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, state.flag = 0; state.reord = tp->packets_out; - state.rtt = -1; + state.rtt_us = -1L; if (!tp->sacked_out) { if (WARN_ON(tp->fackets_out)) @@ -1824,7 +1823,7 @@ out: WARN_ON((int)tp->retrans_out < 0); WARN_ON((int)tcp_packets_in_flight(tp) < 0); #endif - *sack_rtt = state.rtt; + *sack_rtt_us = state.rtt_us; return state.flag; } @@ -2034,10 +2033,12 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag) * available, or RTO is scheduled to fire first. */ if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 || - (flag & FLAG_ECE) || !tp->srtt) + (flag & FLAG_ECE) || !tp->srtt_us) return false; - delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2)); + delay = max(usecs_to_jiffies(tp->srtt_us >> 5), + msecs_to_jiffies(2)); + if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) return false; @@ -2884,7 +2885,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, } static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, - s32 seq_rtt, s32 sack_rtt) + long seq_rtt_us, long sack_rtt_us) { const struct tcp_sock *tp = tcp_sk(sk); @@ -2894,10 +2895,10 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, * is acked (RFC6298). */ if (flag & FLAG_RETRANS_DATA_ACKED) - seq_rtt = -1; + seq_rtt_us = -1L; - if (seq_rtt < 0) - seq_rtt = sack_rtt; + if (seq_rtt_us < 0) + seq_rtt_us = sack_rtt_us; /* RTTM Rule: A TSecr value received in a segment is used to * update the averaged RTT measurement only if the segment @@ -2905,14 +2906,14 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, * left edge of the send window. * See draft-ietf-tcplw-high-performance-00, section 3.3. */ - if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && + if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED) - seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; + seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - tp->rx_opt.rcv_tsecr); - if (seq_rtt < 0) + if (seq_rtt_us < 0) return false; - tcp_rtt_estimator(sk, seq_rtt); + tcp_rtt_estimator(sk, seq_rtt_us); tcp_set_rto(sk); /* RFC6298: only reset backoff on valid RTT measurement. */ @@ -2924,16 +2925,16 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) { struct tcp_sock *tp = tcp_sk(sk); - s32 seq_rtt = -1; + long seq_rtt_us = -1L; if (synack_stamp && !tp->total_retrans) - seq_rtt = tcp_time_stamp - synack_stamp; + seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp); /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack() */ - if (!tp->srtt) - tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); + if (!tp->srtt_us) + tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L); } static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) @@ -3022,26 +3023,27 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) * arrived at the other end. */ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, - u32 prior_snd_una, s32 sack_rtt) + u32 prior_snd_una, long sack_rtt_us) { - struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - struct sk_buff *skb; - u32 now = tcp_time_stamp; + struct skb_mstamp first_ackt, last_ackt, now; + struct tcp_sock *tp = tcp_sk(sk); + u32 prior_sacked = tp->sacked_out; + u32 reord = tp->packets_out; bool fully_acked = true; - int flag = 0; + long ca_seq_rtt_us = -1L; + long seq_rtt_us = -1L; + struct sk_buff *skb; u32 pkts_acked = 0; - u32 reord = tp->packets_out; - u32 prior_sacked = tp->sacked_out; - s32 seq_rtt = -1; - s32 ca_seq_rtt = -1; - ktime_t last_ackt = net_invalid_timestamp(); bool rtt_update; + int flag = 0; + + first_ackt.v64 = 0; while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); - u32 acked_pcount; u8 sacked = scb->sacked; + u32 acked_pcount; /* Determine how many packets and what bytes were acked, tso and else */ if (after(scb->end_seq, tp->snd_una)) { @@ -3063,11 +3065,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->retrans_out -= acked_pcount; flag |= FLAG_RETRANS_DATA_ACKED; } else { - ca_seq_rtt = now - scb->when; - last_ackt = skb->tstamp; - if (seq_rtt < 0) { - seq_rtt = ca_seq_rtt; - } + last_ackt = skb->skb_mstamp; + if (!first_ackt.v64) + first_ackt = last_ackt; + if (!(sacked & TCPCB_SACKED_ACKED)) reord = min(pkts_acked, reord); if (!after(scb->end_seq, tp->high_seq)) @@ -3113,7 +3114,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; - rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt); + skb_mstamp_get(&now); + if (first_ackt.v64) { + seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); + ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); + } + + rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us); if (flag & FLAG_ACKED) { const struct tcp_congestion_ops *ca_ops @@ -3141,25 +3148,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->fackets_out -= min(pkts_acked, tp->fackets_out); - if (ca_ops->pkts_acked) { - s32 rtt_us = -1; - - /* Is the ACK triggering packet unambiguous? */ - if (!(flag & FLAG_RETRANS_DATA_ACKED)) { - /* High resolution needed and available? */ - if (ca_ops->flags & TCP_CONG_RTT_STAMP && - !ktime_equal(last_ackt, - net_invalid_timestamp())) - rtt_us = ktime_us_delta(ktime_get_real(), - last_ackt); - else if (ca_seq_rtt >= 0) - rtt_us = jiffies_to_usecs(ca_seq_rtt); - } + if (ca_ops->pkts_acked) + ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us); - ca_ops->pkts_acked(sk, pkts_acked, rtt_us); - } - } else if (skb && rtt_update && sack_rtt >= 0 && - sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) { + } else if (skb && rtt_update && sack_rtt_us >= 0 && + sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { /* Do not re-arm RTO if the sack RTT is measured from data sent * after when the head was last (re)transmitted. Otherwise the * timeout may continue to extend in loss recovery. @@ -3369,12 +3362,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; - u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt; + u32 prior_in_flight; u32 prior_fackets; int prior_packets = tp->packets_out; const int prior_unsacked = tp->packets_out - tp->sacked_out; int acked = 0; /* Number of packets newly acked */ - s32 sack_rtt = -1; + long sack_rtt_us = -1L; /* If the ack is older than previous acks * then we can probably ignore it. @@ -3432,7 +3425,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (TCP_SKB_CB(skb)->sacked) flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, - &sack_rtt); + &sack_rtt_us); if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb))) flag |= FLAG_ECE; @@ -3451,7 +3444,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ acked = tp->packets_out; - flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt); + flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, + sack_rtt_us); acked -= tp->packets_out; /* Advance cwnd if state allows */ @@ -3474,8 +3468,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (icsk->icsk_pending == ICSK_TIME_RETRANS) tcp_schedule_loss_probe(sk); - if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd) - tcp_update_pacing_rate(sk); + tcp_update_pacing_rate(sk); return 1; no_queue: @@ -3504,7 +3497,7 @@ old_ack: */ if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, - &sack_rtt); + &sack_rtt_us); tcp_fastretrans_alert(sk, acked, prior_unsacked, is_dupack, flag); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3cf976510497..17c0fb172fba 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -435,7 +435,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) break; icsk->icsk_backoff--; - inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) : + inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) : TCP_TIMEOUT_INIT) << icsk->icsk_backoff; tcp_bound_rto(sk); diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 503798f2fcd6..c9aecae31327 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -315,7 +315,6 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us) } static struct tcp_congestion_ops tcp_lp __read_mostly = { - .flags = TCP_CONG_RTT_STAMP, .init = tcp_lp_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_lp_cong_avoid, diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index d547075d8300..dcaf72f10216 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -33,6 +33,11 @@ struct tcp_fastopen_metrics { struct tcp_fastopen_cookie cookie; }; +/* TCP_METRIC_MAX includes 2 extra fields for userspace compatibility + * Kernel only stores RTT and RTTVAR in usec resolution + */ +#define TCP_METRIC_MAX_KERNEL (TCP_METRIC_MAX - 2) + struct tcp_metrics_block { struct tcp_metrics_block __rcu *tcpm_next; struct inetpeer_addr tcpm_saddr; @@ -41,7 +46,7 @@ struct tcp_metrics_block { u32 tcpm_ts; u32 tcpm_ts_stamp; u32 tcpm_lock; - u32 tcpm_vals[TCP_METRIC_MAX + 1]; + u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1]; struct tcp_fastopen_metrics tcpm_fastopen; struct rcu_head rcu_head; @@ -59,12 +64,6 @@ static u32 tcp_metric_get(struct tcp_metrics_block *tm, return tm->tcpm_vals[idx]; } -static u32 tcp_metric_get_jiffies(struct tcp_metrics_block *tm, - enum tcp_metric_index idx) -{ - return msecs_to_jiffies(tm->tcpm_vals[idx]); -} - static void tcp_metric_set(struct tcp_metrics_block *tm, enum tcp_metric_index idx, u32 val) @@ -72,13 +71,6 @@ static void tcp_metric_set(struct tcp_metrics_block *tm, tm->tcpm_vals[idx] = val; } -static void tcp_metric_set_msecs(struct tcp_metrics_block *tm, - enum tcp_metric_index idx, - u32 val) -{ - tm->tcpm_vals[idx] = jiffies_to_msecs(val); -} - static bool addr_same(const struct inetpeer_addr *a, const struct inetpeer_addr *b) { @@ -101,9 +93,11 @@ struct tcpm_hash_bucket { static DEFINE_SPINLOCK(tcp_metrics_lock); -static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, +static void tcpm_suck_dst(struct tcp_metrics_block *tm, + const struct dst_entry *dst, bool fastopen_clear) { + u32 msval; u32 val; tm->tcpm_stamp = jiffies; @@ -121,8 +115,11 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, val |= 1 << TCP_METRIC_REORDERING; tm->tcpm_lock = val; - tm->tcpm_vals[TCP_METRIC_RTT] = dst_metric_raw(dst, RTAX_RTT); - tm->tcpm_vals[TCP_METRIC_RTTVAR] = dst_metric_raw(dst, RTAX_RTTVAR); + msval = dst_metric_raw(dst, RTAX_RTT); + tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC; + + msval = dst_metric_raw(dst, RTAX_RTTVAR); + tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC; tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); @@ -384,7 +381,7 @@ void tcp_update_metrics(struct sock *sk) dst_confirm(dst); rcu_read_lock(); - if (icsk->icsk_backoff || !tp->srtt) { + if (icsk->icsk_backoff || !tp->srtt_us) { /* This session failed to estimate rtt. Why? * Probably, no packets returned in time. Reset our * results. @@ -399,8 +396,8 @@ void tcp_update_metrics(struct sock *sk) if (!tm) goto out_unlock; - rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); - m = rtt - tp->srtt; + rtt = tcp_metric_get(tm, TCP_METRIC_RTT); + m = rtt - tp->srtt_us; /* If newly calculated rtt larger than stored one, store new * one. Otherwise, use EWMA. Remember, rtt overestimation is @@ -408,10 +405,10 @@ void tcp_update_metrics(struct sock *sk) */ if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) { if (m <= 0) - rtt = tp->srtt; + rtt = tp->srtt_us; else rtt -= (m >> 3); - tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt); + tcp_metric_set(tm, TCP_METRIC_RTT, rtt); } if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) { @@ -422,16 +419,16 @@ void tcp_update_metrics(struct sock *sk) /* Scale deviation to rttvar fixed point */ m >>= 1; - if (m < tp->mdev) - m = tp->mdev; + if (m < tp->mdev_us) + m = tp->mdev_us; - var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR); + var = tcp_metric_get(tm, TCP_METRIC_RTTVAR); if (m >= var) var = m; else var -= (var - m) >> 2; - tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var); + tcp_metric_set(tm, TCP_METRIC_RTTVAR, var); } if (tcp_in_initial_slowstart(tp)) { @@ -528,7 +525,7 @@ void tcp_init_metrics(struct sock *sk) tp->reordering = val; } - crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); + crtt = tcp_metric_get(tm, TCP_METRIC_RTT); rcu_read_unlock(); reset: /* The initial RTT measurement from the SYN/SYN-ACK is not ideal @@ -551,18 +548,20 @@ reset: * to low value, and then abruptly stops to do it and starts to delay * ACKs, wait for troubles. */ - if (crtt > tp->srtt) { + if (crtt > tp->srtt_us) { /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */ - crtt >>= 3; + crtt /= 8 * USEC_PER_MSEC; inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk)); - } else if (tp->srtt == 0) { + } else if (tp->srtt_us == 0) { /* RFC6298: 5.7 We've failed to get a valid RTT sample from * 3WHS. This is most likely due to retransmission, * including spurious one. Reset the RTO back to 3secs * from the more aggressive 1sec to avoid more spurious * retransmission. */ - tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; + tp->rttvar_us = jiffies_to_usecs(TCP_TIMEOUT_FALLBACK); + tp->mdev_us = tp->mdev_max_us = tp->rttvar_us; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; } /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been @@ -809,10 +808,26 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS); if (!nest) goto nla_put_failure; - for (i = 0; i < TCP_METRIC_MAX + 1; i++) { - if (!tm->tcpm_vals[i]) + for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) { + u32 val = tm->tcpm_vals[i]; + + if (!val) continue; - if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0) + if (i == TCP_METRIC_RTT) { + if (nla_put_u32(msg, TCP_METRIC_RTT_US + 1, + val) < 0) + goto nla_put_failure; + n++; + val = max(val / 1000, 1U); + } + if (i == TCP_METRIC_RTTVAR) { + if (nla_put_u32(msg, TCP_METRIC_RTTVAR_US + 1, + val) < 0) + goto nla_put_failure; + n++; + val = max(val / 1000, 1U); + } + if (nla_put_u32(msg, i + 1, val) < 0) goto nla_put_failure; n++; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 7a436c517e44..ca788ada5bd3 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -398,8 +398,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_init_wl(newtp, treq->rcv_isn); - newtp->srtt = 0; - newtp->mdev = TCP_TIMEOUT_INIT; + newtp->srtt_us = 0; + newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); newicsk->icsk_rto = TCP_TIMEOUT_INIT; newtp->packets_out = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 21e8a9f33287..bf38b1fb63ab 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -269,6 +269,7 @@ EXPORT_SYMBOL(tcp_select_initial_window); static u16 tcp_select_window(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + u32 old_win = tp->rcv_wnd; u32 cur_win = tcp_receive_window(tp); u32 new_win = __tcp_select_window(sk); @@ -281,6 +282,9 @@ static u16 tcp_select_window(struct sock *sk) * * Relax Will Robinson. */ + if (new_win == 0) + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPWANTZEROWINDOWADV); new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale); } tp->rcv_wnd = new_win; @@ -298,8 +302,14 @@ static u16 tcp_select_window(struct sock *sk) new_win >>= tp->rx_opt.rcv_wscale; /* If we advertise zero window, disable fast path. */ - if (new_win == 0) + if (new_win == 0) { tp->pred_flags = 0; + if (old_win) + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPTOZEROWINDOWADV); + } else if (old_win == 0) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV); + } return new_win; } @@ -856,11 +866,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (clone_it) { const struct sk_buff *fclone = skb + 1; - /* If congestion control is doing timestamping, we must - * take such a timestamp before we potentially clone/copy. - */ - if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP) - __net_timestamp(skb); + skb_mstamp_get(&skb->skb_mstamp); if (unlikely(skb->fclone == SKB_FCLONE_ORIG && fclone->fclone == SKB_FCLONE_CLONE)) @@ -1964,7 +1970,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); u32 timeout, tlp_time_stamp, rto_time_stamp; - u32 rtt = tp->srtt >> 3; + u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) return false; @@ -1986,7 +1992,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) /* Schedule a loss probe in 2*RTT for SACK capable connections * in Open state, that are either limited by cwnd or application. */ - if (sysctl_tcp_early_retrans < 3 || !tp->srtt || !tp->packets_out || + if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out || !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) return false; @@ -3040,8 +3046,9 @@ void tcp_send_delayed_ack(struct sock *sk) * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements * directly. */ - if (tp->srtt) { - int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN); + if (tp->srtt_us) { + int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3), + TCP_DELACK_MIN); if (rtt < max_ato) max_ato = rtt; diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 1f2d37613c9e..3b66610d4156 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -154,7 +154,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, p->snd_wnd = tp->snd_wnd; p->rcv_wnd = tp->rcv_wnd; p->ssthresh = tcp_current_ssthresh(sk); - p->srtt = tp->srtt >> 3; + p->srtt = tp->srtt_us >> 3; tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); } diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index a022c17c9cf1..48539fff6357 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -306,7 +306,6 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) EXPORT_SYMBOL_GPL(tcp_vegas_get_info); static struct tcp_congestion_ops tcp_vegas __read_mostly = { - .flags = TCP_CONG_RTT_STAMP, .init = tcp_vegas_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_vegas_cong_avoid, diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 326475a94865..1b8e28fcd7e1 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -203,7 +203,6 @@ static u32 tcp_veno_ssthresh(struct sock *sk) } static struct tcp_congestion_ops tcp_veno __read_mostly = { - .flags = TCP_CONG_RTT_STAMP, .init = tcp_veno_init, .ssthresh = tcp_veno_ssthresh, .cong_avoid = tcp_veno_cong_avoid, diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 8eab02030ed0..5ede0e727945 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -227,7 +227,6 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) { } static struct tcp_congestion_ops tcp_yeah __read_mostly = { - .flags = TCP_CONG_RTT_STAMP, .init = tcp_yeah_init, .ssthresh = tcp_yeah_ssthresh, .cong_avoid = tcp_yeah_cong_avoid, diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index 72d198b8e4d2..ee7a97f510cb 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -79,7 +79,9 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) /* RFC 2460 section 8.1 says that we SHOULD log this error. Well, it is reasonable. */ - LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); + LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", + &ipv6_hdr(skb)->saddr, ntohs(uh->source), + &ipv6_hdr(skb)->daddr, ntohs(uh->dest)); return 1; } if (skb->ip_summed == CHECKSUM_COMPLETE && diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 070a2fae2375..be1b7f5a3a54 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1234,8 +1234,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; - maxnonfragsize = (np->pmtudisc >= IPV6_PMTUDISC_DO) ? - mtu : sizeof(struct ipv6hdr) + IPV6_MAXPLEN; + if (ip6_sk_local_df(sk)) + maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; + else + maxnonfragsize = mtu; /* dontfrag active */ if ((cork->length + length > mtu - headersize) && dontfrag && @@ -1543,8 +1545,7 @@ int ip6_push_pending_frames(struct sock *sk) } /* Allow local fragmentation. */ - if (np->pmtudisc < IPV6_PMTUDISC_DO) - skb->local_df = 1; + skb->local_df = ip6_sk_local_df(sk); *final_dst = fl6->daddr; __skb_pull(skb, skb_network_header_len(skb)); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 0a00f449de5e..edb58aff4ae7 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -722,7 +722,7 @@ done: case IPV6_MTU_DISCOVER: if (optlen < sizeof(int)) goto e_inval; - if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_INTERFACE) + if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) goto e_inval; np->pmtudisc = val; retv = 0; |