diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-17 16:31:08 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-17 16:31:08 -0400 |
commit | 388f997620cb57372c494a194e9698b28cc179b8 (patch) | |
tree | 31f2b7f01793f1711794193450f9047f78ee5370 | |
parent | e2fdae7e7c5a690b10b2d2891ec819e554dc033d (diff) | |
parent | e3122b7fae7b4e3d1d49fa84f6515bcbe6cbc6fc (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller:
1) Fix verifier memory corruption and other bugs in BPF layer, from
Alexei Starovoitov.
2) Add a conservative fix for doing BPF properly in the BPF classifier
of the packet scheduler on ingress. Also from Alexei.
3) The SKB scrubber should not clear out the packet MARK and security
label, from Herbert Xu.
4) Fix oops on rmmod in stmmac driver, from Bryan O'Donoghue.
5) Pause handling is not correct in the stmmac driver because it
doesn't take into consideration the RX and TX fifo sizes. From
Vince Bridgers.
6) Failure path missing unlock in FOU driver, from Wang Cong.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (44 commits)
net: dsa: use DEVICE_ATTR_RW to declare temp1_max
netns: remove BUG_ONs from net_generic()
IB/ipoib: Fix ndo_get_iflink
sfc: Fix memcpy() with const destination compiler warning.
altera tse: Fix network-delays and -retransmissions after high throughput.
net: remove unused 'dev' argument from netif_needs_gso()
act_mirred: Fix bogus header when redirecting from VLAN
inet_diag: fix access to tcp cc information
tcp: tcp_get_info() should fetch socket fields once
net: dsa: mv88e6xxx: Add missing initialization in mv88e6xxx_set_port_state()
skbuff: Do not scrub skb mark within the same name space
Revert "net: Reset secmark when scrubbing packet"
bpf: fix two bugs in verification logic when accessing 'ctx' pointer
bpf: fix bpf helpers to use skb->mac_header relative offsets
stmmac: Configure Flow Control to work correctly based on rxfifo size
stmmac: Enable unicast pause frame detect in GMAC Register 6
stmmac: Read tx-fifo-depth and rx-fifo-depth from the devicetree
stmmac: Add defines and documentation for enabling flow control
stmmac: Add properties for transmit and receive fifo sizes
stmmac: fix oops on rmmod after assigning ip addr
...
62 files changed, 656 insertions, 352 deletions
diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt index 3fc360523bc9..41b3f3f864e8 100644 --- a/Documentation/devicetree/bindings/net/ethernet.txt +++ b/Documentation/devicetree/bindings/net/ethernet.txt @@ -19,6 +19,12 @@ The following properties are common to the Ethernet controllers: - phy: the same as "phy-handle" property, not recommended for new bindings. - phy-device: the same as "phy-handle" property, not recommended for new bindings. +- rx-fifo-depth: the size of the controller's receive fifo in bytes. This + is used for components that can have configurable receive fifo sizes, + and is useful for determining certain configuration settings such as + flow control thresholds. +- tx-fifo-depth: the size of the controller's transmit fifo in bytes. This + is used for components that can have configurable fifo sizes. Child nodes of the Ethernet controller are typically the individual PHY devices connected via the MDIO bus (sometimes the MDIO bus controller is separate). diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt index 29aca8591b16..f34fc3c81a75 100644 --- a/Documentation/devicetree/bindings/net/stmmac.txt +++ b/Documentation/devicetree/bindings/net/stmmac.txt @@ -45,6 +45,8 @@ Optional properties: If not passed then the system clock will be used and this is fine on some platforms. - snps,burst_len: The AXI burst lenth value of the AXI BUS MODE register. +- tx-fifo-depth: See ethernet.txt file in the same directory +- rx-fifo-depth: See ethernet.txt file in the same directory Examples: @@ -59,6 +61,8 @@ Examples: phy-mode = "gmii"; snps,multicast-filter-bins = <256>; snps,perfect-filter-entries = <128>; + rx-fifo-depth = <16384>; + tx-fifo-depth = <16384>; clocks = <&clock>; clock-names = "stmmaceth"; }; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 6791fd16272c..3ef0cf9f5c44 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -73,7 +73,7 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, c4iw_init_wr_wait(&wr_wait); wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16); - skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + skb = alloc_skb(wr_len, GFP_KERNEL); if (!skb) return -ENOMEM; set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 657b89b1d291..915ad04a827e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -846,6 +846,11 @@ static int ipoib_get_iflink(const struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); + /* parent interface */ + if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) + return dev->ifindex; + + /* child/vlan interface */ return priv->parent->ifindex; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 4dd1313056a4..fca1a882de27 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -58,6 +58,7 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, /* MTU will be reset when mcast join happens */ priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; + priv->parent = ppriv->dev; set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); result = ipoib_set_dev_features(priv, ppriv->ca); @@ -84,8 +85,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, goto register_failed; } - priv->parent = ppriv->dev; - ipoib_create_debug_files(priv->dev); /* RTNL childs don't need proprietary sysfs entries */ diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index fc8d3b6ffe8e..9f0c2b9d58ae 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -602,8 +602,6 @@ static void _mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, u32 high = 0; if (s->reg >= 0x100) { - int ret; - ret = mv88e6xxx_reg_read(ds, REG_PORT(port), s->reg - 0x100); if (ret < 0) @@ -902,14 +900,16 @@ static int _mv88e6xxx_flush_fid(struct dsa_switch *ds, int fid) static int mv88e6xxx_set_port_state(struct dsa_switch *ds, int port, u8 state) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int reg, ret; + int reg, ret = 0; u8 oldstate; mutex_lock(&ps->smi_mutex); reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_CONTROL); - if (reg < 0) + if (reg < 0) { + ret = reg; goto abort; + } oldstate = reg & PORT_CONTROL_STATE_MASK; if (oldstate != state) { diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 79ea35869e1e..90a76306ad0f 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -376,8 +376,13 @@ static int tse_rx(struct altera_tse_private *priv, int limit) u16 pktlength; u16 pktstatus; - while (((rxstatus = priv->dmaops->get_rx_status(priv)) != 0) && - (count < limit)) { + /* Check for count < limit first as get_rx_status is changing + * the response-fifo so we must process the next packet + * after calling get_rx_status if a response is pending. + * (reading the last byte of the response pops the value from the fifo.) + */ + while ((count < limit) && + ((rxstatus = priv->dmaops->get_rx_status(priv)) != 0)) { pktstatus = rxstatus >> 16; pktlength = rxstatus & 0xffff; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 4085c4b31047..355d5fea5be9 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -531,20 +531,8 @@ struct bnx2x_fastpath { struct napi_struct napi; #ifdef CONFIG_NET_RX_BUSY_POLL - unsigned int state; -#define BNX2X_FP_STATE_IDLE 0 -#define BNX2X_FP_STATE_NAPI (1 << 0) /* NAPI owns this FP */ -#define BNX2X_FP_STATE_POLL (1 << 1) /* poll owns this FP */ -#define BNX2X_FP_STATE_DISABLED (1 << 2) -#define BNX2X_FP_STATE_NAPI_YIELD (1 << 3) /* NAPI yielded this FP */ -#define BNX2X_FP_STATE_POLL_YIELD (1 << 4) /* poll yielded this FP */ -#define BNX2X_FP_OWNED (BNX2X_FP_STATE_NAPI | BNX2X_FP_STATE_POLL) -#define BNX2X_FP_YIELD (BNX2X_FP_STATE_NAPI_YIELD | BNX2X_FP_STATE_POLL_YIELD) -#define BNX2X_FP_LOCKED (BNX2X_FP_OWNED | BNX2X_FP_STATE_DISABLED) -#define BNX2X_FP_USER_PEND (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_POLL_YIELD) - /* protect state */ - spinlock_t lock; -#endif /* CONFIG_NET_RX_BUSY_POLL */ + unsigned long busy_poll_state; +#endif union host_hc_status_block status_blk; /* chip independent shortcuts into sb structure */ @@ -619,104 +607,83 @@ struct bnx2x_fastpath { #define bnx2x_fp_qstats(bp, fp) (&((bp)->fp_stats[(fp)->index].eth_q_stats)) #ifdef CONFIG_NET_RX_BUSY_POLL -static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp) + +enum bnx2x_fp_state { + BNX2X_STATE_FP_NAPI = BIT(0), /* NAPI handler owns the queue */ + + BNX2X_STATE_FP_NAPI_REQ_BIT = 1, /* NAPI would like to own the queue */ + BNX2X_STATE_FP_NAPI_REQ = BIT(1), + + BNX2X_STATE_FP_POLL_BIT = 2, + BNX2X_STATE_FP_POLL = BIT(2), /* busy_poll owns the queue */ + + BNX2X_STATE_FP_DISABLE_BIT = 3, /* queue is dismantled */ +}; + +static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp) { - spin_lock_init(&fp->lock); - fp->state = BNX2X_FP_STATE_IDLE; + WRITE_ONCE(fp->busy_poll_state, 0); } /* called from the device poll routine to get ownership of a FP */ static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp) { - bool rc = true; - - spin_lock_bh(&fp->lock); - if (fp->state & BNX2X_FP_LOCKED) { - WARN_ON(fp->state & BNX2X_FP_STATE_NAPI); - fp->state |= BNX2X_FP_STATE_NAPI_YIELD; - rc = false; - } else { - /* we don't care if someone yielded */ - fp->state = BNX2X_FP_STATE_NAPI; + unsigned long prev, old = READ_ONCE(fp->busy_poll_state); + + while (1) { + switch (old) { + case BNX2X_STATE_FP_POLL: + /* make sure bnx2x_fp_lock_poll() wont starve us */ + set_bit(BNX2X_STATE_FP_NAPI_REQ_BIT, + &fp->busy_poll_state); + /* fallthrough */ + case BNX2X_STATE_FP_POLL | BNX2X_STATE_FP_NAPI_REQ: + return false; + default: + break; + } + prev = cmpxchg(&fp->busy_poll_state, old, BNX2X_STATE_FP_NAPI); + if (unlikely(prev != old)) { + old = prev; + continue; + } + return true; } - spin_unlock_bh(&fp->lock); - return rc; } -/* returns true is someone tried to get the FP while napi had it */ -static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp) +static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp) { - bool rc = false; - - spin_lock_bh(&fp->lock); - WARN_ON(fp->state & - (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_NAPI_YIELD)); - - if (fp->state & BNX2X_FP_STATE_POLL_YIELD) - rc = true; - - /* state ==> idle, unless currently disabled */ - fp->state &= BNX2X_FP_STATE_DISABLED; - spin_unlock_bh(&fp->lock); - return rc; + smp_wmb(); + fp->busy_poll_state = 0; } /* called from bnx2x_low_latency_poll() */ static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp) { - bool rc = true; - - spin_lock_bh(&fp->lock); - if ((fp->state & BNX2X_FP_LOCKED)) { - fp->state |= BNX2X_FP_STATE_POLL_YIELD; - rc = false; - } else { - /* preserve yield marks */ - fp->state |= BNX2X_FP_STATE_POLL; - } - spin_unlock_bh(&fp->lock); - return rc; + return cmpxchg(&fp->busy_poll_state, 0, BNX2X_STATE_FP_POLL) == 0; } -/* returns true if someone tried to get the FP while it was locked */ -static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp) +static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp) { - bool rc = false; - - spin_lock_bh(&fp->lock); - WARN_ON(fp->state & BNX2X_FP_STATE_NAPI); - - if (fp->state & BNX2X_FP_STATE_POLL_YIELD) - rc = true; - - /* state ==> idle, unless currently disabled */ - fp->state &= BNX2X_FP_STATE_DISABLED; - spin_unlock_bh(&fp->lock); - return rc; + smp_mb__before_atomic(); + clear_bit(BNX2X_STATE_FP_POLL_BIT, &fp->busy_poll_state); } -/* true if a socket is polling, even if it did not get the lock */ +/* true if a socket is polling */ static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp) { - WARN_ON(!(fp->state & BNX2X_FP_OWNED)); - return fp->state & BNX2X_FP_USER_PEND; + return READ_ONCE(fp->busy_poll_state) & BNX2X_STATE_FP_POLL; } /* false if fp is currently owned */ static inline bool bnx2x_fp_ll_disable(struct bnx2x_fastpath *fp) { - int rc = true; - - spin_lock_bh(&fp->lock); - if (fp->state & BNX2X_FP_OWNED) - rc = false; - fp->state |= BNX2X_FP_STATE_DISABLED; - spin_unlock_bh(&fp->lock); + set_bit(BNX2X_STATE_FP_DISABLE_BIT, &fp->busy_poll_state); + return !bnx2x_fp_ll_polling(fp); - return rc; } #else -static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp) +static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp) { } @@ -725,9 +692,8 @@ static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp) return true; } -static inline bool bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp) +static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp) { - return false; } static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp) @@ -735,9 +701,8 @@ static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp) return false; } -static inline bool bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp) +static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp) { - return false; } static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 0a9faa134a9a..2f63467bce46 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -1849,7 +1849,7 @@ static void bnx2x_napi_enable_cnic(struct bnx2x *bp) int i; for_each_rx_queue_cnic(bp, i) { - bnx2x_fp_init_lock(&bp->fp[i]); + bnx2x_fp_busy_poll_init(&bp->fp[i]); napi_enable(&bnx2x_fp(bp, i, napi)); } } @@ -1859,7 +1859,7 @@ static void bnx2x_napi_enable(struct bnx2x *bp) int i; for_each_eth_queue(bp, i) { - bnx2x_fp_init_lock(&bp->fp[i]); + bnx2x_fp_busy_poll_init(&bp->fp[i]); napi_enable(&bnx2x_fp(bp, i, napi)); } } @@ -3191,9 +3191,10 @@ static int bnx2x_poll(struct napi_struct *napi, int budget) } } + bnx2x_fp_unlock_napi(fp); + /* Fall out from the NAPI loop if needed */ - if (!bnx2x_fp_unlock_napi(fp) && - !(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) { + if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) { /* No need to update SB for FCoE L2 ring as long as * it's connected to the default SB and the SB diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 6de054404156..803d91beec6f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -1140,6 +1140,10 @@ static int set_filter_wr(struct adapter *adapter, int fidx) struct fw_filter_wr *fwr; unsigned int ftid; + skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); + if (!skb) + return -ENOMEM; + /* If the new filter requires loopback Destination MAC and/or VLAN * rewriting then we need to allocate a Layer 2 Table (L2T) entry for * the filter. @@ -1147,19 +1151,21 @@ static int set_filter_wr(struct adapter *adapter, int fidx) if (f->fs.newdmac || f->fs.newvlan) { /* allocate L2T entry for new filter */ f->l2t = t4_l2t_alloc_switching(adapter->l2t); - if (f->l2t == NULL) + if (f->l2t == NULL) { + kfree_skb(skb); return -EAGAIN; + } if (t4_l2t_set_switching(adapter, f->l2t, f->fs.vlan, f->fs.eport, f->fs.dmac)) { cxgb4_l2t_release(f->l2t); f->l2t = NULL; + kfree_skb(skb); return -ENOMEM; } } ftid = adapter->tids.ftid_base + fidx; - skb = alloc_skb(sizeof(*fwr), GFP_KERNEL | __GFP_NOFAIL); fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); memset(fwr, 0, sizeof(*fwr)); @@ -1257,7 +1263,10 @@ static int del_filter_wr(struct adapter *adapter, int fidx) len = sizeof(*fwr); ftid = adapter->tids.ftid_base + fidx; - skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL); + skb = alloc_skb(len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + fwr = (struct fw_filter_wr *)__skb_put(skb, len); t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index b72d238695d7..3b39fdddeb57 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -413,6 +413,15 @@ out: return count; } +static void hip04_start_tx_timer(struct hip04_priv *priv) +{ + unsigned long ns = priv->tx_coalesce_usecs * NSEC_PER_USEC / 2; + + /* allow timer to fire after half the time at the earliest */ + hrtimer_start_range_ns(&priv->tx_coalesce_timer, ns_to_ktime(ns), + ns, HRTIMER_MODE_REL); +} + static int hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct hip04_priv *priv = netdev_priv(ndev); @@ -466,8 +475,7 @@ static int hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) } } else if (!hrtimer_is_queued(&priv->tx_coalesce_timer)) { /* cleanup not pending yet, start a new timer */ - hrtimer_start_expires(&priv->tx_coalesce_timer, - HRTIMER_MODE_REL); + hip04_start_tx_timer(priv); } return NETDEV_TX_OK; @@ -549,7 +557,7 @@ done: /* clean up tx descriptors and start a new timer if necessary */ tx_remaining = hip04_tx_reclaim(ndev, false); if (rx < budget && tx_remaining) - hrtimer_start_expires(&priv->tx_coalesce_timer, HRTIMER_MODE_REL); + hip04_start_tx_timer(priv); return rx; } @@ -809,7 +817,6 @@ static int hip04_mac_probe(struct platform_device *pdev) struct hip04_priv *priv; struct resource *res; unsigned int irq; - ktime_t txtime; int ret; ndev = alloc_etherdev(sizeof(struct hip04_priv)); @@ -846,9 +853,6 @@ static int hip04_mac_probe(struct platform_device *pdev) */ priv->tx_coalesce_frames = TX_DESC_NUM * 3 / 4; priv->tx_coalesce_usecs = 200; - /* allow timer to fire after half the time at the earliest */ - txtime = ktime_set(0, priv->tx_coalesce_usecs * NSEC_PER_USEC / 2); - hrtimer_set_expires_range(&priv->tx_coalesce_timer, txtime, txtime); priv->tx_coalesce_timer.function = tx_done; priv->map = syscon_node_to_regmap(arg.np); diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index d596f6624025..0bae22da014d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -2397,6 +2397,7 @@ i40e_aq_erase_nvm_exit: #define I40E_DEV_FUNC_CAP_LED 0x61 #define I40E_DEV_FUNC_CAP_SDP 0x62 #define I40E_DEV_FUNC_CAP_MDIO 0x63 +#define I40E_DEV_FUNC_CAP_WR_CSR_PROT 0x64 /** * i40e_parse_discover_capabilities @@ -2541,11 +2542,18 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, p->fd_filters_guaranteed = number; p->fd_filters_best_effort = logical_id; break; + case I40E_DEV_FUNC_CAP_WR_CSR_PROT: + p->wr_csr_prot = (u64)number; + p->wr_csr_prot |= (u64)logical_id << 32; + break; default: break; } } + if (p->fcoe) + i40e_debug(hw, I40E_DEBUG_ALL, "device is FCoE capable\n"); + /* Software override ensuring FCoE is disabled if npar or mfp * mode because it is not supported in these modes. */ @@ -3503,6 +3511,63 @@ void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status) } /** + * i40e_aq_debug_dump + * @hw: pointer to the hardware structure + * @cluster_id: specific cluster to dump + * @table_id: table id within cluster + * @start_index: index of line in the block to read + * @buff_size: dump buffer size + * @buff: dump buffer + * @ret_buff_size: actual buffer size returned + * @ret_next_table: next block to read + * @ret_next_index: next index to read + * + * Dump internal FW/HW data for debug purposes. + * + **/ +i40e_status i40e_aq_debug_dump(struct i40e_hw *hw, u8 cluster_id, + u8 table_id, u32 start_index, u16 buff_size, + void *buff, u16 *ret_buff_size, + u8 *ret_next_table, u32 *ret_next_index, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_debug_dump_internals *cmd = + (struct i40e_aqc_debug_dump_internals *)&desc.params.raw; + struct i40e_aqc_debug_dump_internals *resp = + (struct i40e_aqc_debug_dump_internals *)&desc.params.raw; + i40e_status status; + + if (buff_size == 0 || !buff) + return I40E_ERR_PARAM; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_debug_dump_internals); + /* Indirect Command */ + desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF); + if (buff_size > I40E_AQ_LARGE_BUF) + desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); + + cmd->cluster_id = cluster_id; + cmd->table_id = table_id; + cmd->idx = cpu_to_le32(start_index); + + desc.datalen = cpu_to_le16(buff_size); + + status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details); + if (!status) { + if (ret_buff_size) + *ret_buff_size = le16_to_cpu(desc.datalen); + if (ret_next_table) + *ret_next_table = resp->table_id; + if (ret_next_index) + *ret_next_index = le32_to_cpu(resp->idx); + } + + return status; +} + +/** * i40e_read_bw_from_alt_ram * @hw: pointer to the hardware structure * @max_bw: pointer for max_bw read diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c index 6e1466756760..2547aa21b2ca 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c @@ -419,7 +419,7 @@ static void i40e_cee_to_dcb_v1_config( { u16 status, tlv_status = le16_to_cpu(cee_cfg->tlv_status); u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio); - u8 i, tc, err, sync, oper; + u8 i, tc, err; /* CEE PG data to ETS config */ dcbcfg->etscfg.maxtcs = cee_cfg->oper_num_tc; @@ -456,9 +456,7 @@ static void i40e_cee_to_dcb_v1_config( status = (tlv_status & I40E_AQC_CEE_APP_STATUS_MASK) >> I40E_AQC_CEE_APP_STATUS_SHIFT; err = (status & I40E_TLV_STATUS_ERR) ? 1 : 0; - sync = (status & I40E_TLV_STATUS_SYNC) ? 1 : 0; - oper = (status & I40E_TLV_STATUS_OPER) ? 1 : 0; - /* Add APPs if Error is False and Oper/Sync is True */ + /* Add APPs if Error is False */ if (!err) { /* CEE operating configuration supports FCoE/iSCSI/FIP only */ dcbcfg->numapps = I40E_CEE_OPER_MAX_APPS; diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index daa88263af66..34170eabca7d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1388,6 +1388,50 @@ static ssize_t i40e_dbg_command_write(struct file *filp, r_cfg->app[i].selector, r_cfg->app[i].protocolid); } + } else if (strncmp(&cmd_buf[5], "debug fwdata", 12) == 0) { + int cluster_id, table_id; + int index, ret; + u16 buff_len = 4096; + u32 next_index; + u8 next_table; + u8 *buff; + u16 rlen; + + cnt = sscanf(&cmd_buf[18], "%i %i %i", + &cluster_id, &table_id, &index); + if (cnt != 3) { + dev_info(&pf->pdev->dev, + "dump debug fwdata <cluster_id> <table_id> <index>\n"); + goto command_write_done; + } + + dev_info(&pf->pdev->dev, + "AQ debug dump fwdata params %x %x %x %x\n", + cluster_id, table_id, index, buff_len); + buff = kzalloc(buff_len, GFP_KERNEL); + if (!buff) + goto command_write_done; + + ret = i40e_aq_debug_dump(&pf->hw, cluster_id, table_id, + index, buff_len, buff, &rlen, + &next_table, &next_index, + NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "debug dump fwdata AQ Failed %d 0x%x\n", + ret, pf->hw.aq.asq_last_status); + kfree(buff); + buff = NULL; + goto command_write_done; + } + dev_info(&pf->pdev->dev, + "AQ debug dump fwdata rlen=0x%x next_table=0x%x next_index=0x%x\n", + rlen, next_table, next_index); + print_hex_dump(KERN_INFO, "AQ buffer WB: ", + DUMP_PREFIX_OFFSET, 16, 1, + buff, rlen, true); + kfree(buff); + buff = NULL; } else { dev_info(&pf->pdev->dev, "dump desc tx <vsi_seid> <ring_id> [<desc_n>], dump desc rx <vsi_seid> <ring_id> [<desc_n>],\n"); @@ -1903,6 +1947,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, dev_info(&pf->pdev->dev, " dump desc rx <vsi_seid> <ring_id> [<desc_n>]\n"); dev_info(&pf->pdev->dev, " dump desc aq\n"); dev_info(&pf->pdev->dev, " dump reset stats\n"); + dev_info(&pf->pdev->dev, " dump debug fwdata <cluster_id> <table_id> <index>\n"); dev_info(&pf->pdev->dev, " msg_enable [level]\n"); dev_info(&pf->pdev->dev, " read <reg>\n"); dev_info(&pf->pdev->dev, " write <reg> <value>\n"); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index c848b1862512..4cbaaeb902c4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -356,8 +356,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, /* Set speed and duplex */ switch (link_speed) { case I40E_LINK_SPEED_40GB: - /* need a SPEED_40000 in ethtool.h */ - ethtool_cmd_speed_set(ecmd, 40000); + ethtool_cmd_speed_set(ecmd, SPEED_40000); break; case I40E_LINK_SPEED_20GB: ethtool_cmd_speed_set(ecmd, SPEED_20000); @@ -1914,6 +1913,16 @@ static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf, else fsp->ring_cookie = rule->q_index; + if (rule->dest_vsi != pf->vsi[pf->lan_vsi]->id) { + struct i40e_vsi *vsi; + + vsi = i40e_find_vsi_from_id(pf, rule->dest_vsi); + if (vsi && vsi->type == I40E_VSI_SRIOV) { + fsp->h_ext.data[1] = htonl(vsi->vf_id); + fsp->m_ext.data[1] = htonl(0x1); + } + } + return 0; } @@ -2207,6 +2216,7 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi, struct i40e_fdir_filter *input; struct i40e_pf *pf; int ret = -EINVAL; + u16 vf_id; if (!vsi) return -EINVAL; @@ -2267,7 +2277,22 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi, input->dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src; input->src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst; + if (ntohl(fsp->m_ext.data[1])) { + if (ntohl(fsp->h_ext.data[1]) >= pf->num_alloc_vfs) { + netif_info(pf, drv, vsi->netdev, "Invalid VF id\n"); + goto free_input; + } + vf_id = ntohl(fsp->h_ext.data[1]); + /* Find vsi id from vf id and override dest vsi */ + input->dest_vsi = pf->vf[vf_id].lan_vsi_id; + if (input->q_index >= pf->vf[vf_id].num_queue_pairs) { + netif_info(pf, drv, vsi->netdev, "Invalid queue id\n"); + goto free_input; + } + } + ret = i40e_add_del_fdir(vsi, input, true); +free_input: if (ret) kfree(input); else diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 63de3f4b7a94..24481cd7e59a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -39,7 +39,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 3 -#define DRV_VERSION_BUILD 1 +#define DRV_VERSION_BUILD 2 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -7301,7 +7301,7 @@ err_out: * i40e_init_interrupt_scheme - Determine proper interrupt scheme * @pf: board private structure to initialize **/ -static void i40e_init_interrupt_scheme(struct i40e_pf *pf) +static int i40e_init_interrupt_scheme(struct i40e_pf *pf) { int vectors = 0; ssize_t size; @@ -7343,11 +7343,17 @@ static void i40e_init_interrupt_scheme(struct i40e_pf *pf) /* set up vector assignment tracking */ size = sizeof(struct i40e_lump_tracking) + (sizeof(u16) * vectors); pf->irq_pile = kzalloc(size, GFP_KERNEL); + if (!pf->irq_pile) { + dev_err(&pf->pdev->dev, "error allocating irq_pile memory\n"); + return -ENOMEM; + } pf->irq_pile->num_entries = vectors; pf->irq_pile->search_hint = 0; - /* track first vector for misc interrupts */ + /* track first vector for misc interrupts, ignore return */ (void)i40e_get_lump(pf, pf->irq_pile, 1, I40E_PILE_VALID_BIT - 1); + + return 0; } /** @@ -9827,7 +9833,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* set up the main switch operations */ i40e_determine_queue_usage(pf); - i40e_init_interrupt_scheme(pf); + err = i40e_init_interrupt_scheme(pf); + if (err) + goto err_switch_setup; /* The number of VSIs reported by the FW is the minimum guaranteed * to us; HW supports far more and we share the remaining pool with diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index e49acd2accd3..554e49d02683 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -821,13 +821,12 @@ static enum i40e_nvmupd_cmd i40e_nvmupd_validate_command(struct i40e_hw *hw, int *errno) { enum i40e_nvmupd_cmd upd_cmd; - u8 transaction, module; + u8 transaction; /* anything that doesn't match a recognized case is an error */ upd_cmd = I40E_NVMUPD_INVALID; transaction = i40e_nvmupd_get_transaction(cmd->config); - module = i40e_nvmupd_get_module(cmd->config); /* limits on data size */ if ((cmd->data_size < 1) || diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index fea0d37ecc72..7b34f1e660ea 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -303,4 +303,9 @@ i40e_status i40e_aq_add_rem_control_packet_filter(struct i40e_hw *hw, u16 vsi_seid, u16 queue, bool is_add, struct i40e_control_filter_stats *stats, struct i40e_asq_cmd_details *cmd_details); +i40e_status i40e_aq_debug_dump(struct i40e_hw *hw, u8 cluster_id, + u8 table_id, u32 start_index, u16 buff_size, + void *buff, u16 *ret_buff_size, + u8 *ret_next_table, u32 *ret_next_index, + struct i40e_asq_cmd_details *cmd_details); #endif /* _I40E_PROTOTYPE_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index 67c7bc9e9c21..568e855da0f3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -242,6 +242,7 @@ struct i40e_hw_capabilities { u8 rx_buf_chain_len; u32 enabled_tcmap; u32 maxtc; + u64 wr_csr_prot; }; struct i40e_mac_info { diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 4d69e1f04901..78d1c4ff565e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -26,6 +26,129 @@ #include "i40e.h" +/*********************notification routines***********************/ + +/** + * i40e_vc_vf_broadcast + * @pf: pointer to the PF structure + * @opcode: operation code + * @retval: return value + * @msg: pointer to the msg buffer + * @msglen: msg length + * + * send a message to all VFs on a given PF + **/ +static void i40e_vc_vf_broadcast(struct i40e_pf *pf, + enum i40e_virtchnl_ops v_opcode, + i40e_status v_retval, u8 *msg, + u16 msglen) +{ + struct i40e_hw *hw = &pf->hw; + struct i40e_vf *vf = pf->vf; + int i; + + for (i = 0; i < pf->num_alloc_vfs; i++, vf++) { + int abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id; + /* Not all vfs are enabled so skip the ones that are not */ + if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states) && + !test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) + continue; + + /* Ignore return value on purpose - a given VF may fail, but + * we need to keep going and send to all of them + */ + i40e_aq_send_msg_to_vf(hw, abs_vf_id, v_opcode, v_retval, + msg, msglen, NULL); + } +} + +/** + * i40e_vc_notify_link_state + * @vf: pointer to the VF structure + * + * send a link status message to a single VF + **/ +static void i40e_vc_notify_vf_link_state(struct i40e_vf *vf) +{ + struct i40e_virtchnl_pf_event pfe; + struct i40e_pf *pf = vf->pf; + struct i40e_hw *hw = &pf->hw; + struct i40e_link_status *ls = &pf->hw.phy.link_info; + int abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id; + + pfe.event = I40E_VIRTCHNL_EVENT_LINK_CHANGE; + pfe.severity = I40E_PF_EVENT_SEVERITY_INFO; + if (vf->link_forced) { + pfe.event_data.link_event.link_status = vf->link_up; + pfe.event_data.link_event.link_speed = + (vf->link_up ? I40E_LINK_SPEED_40GB : 0); + } else { + pfe.event_data.link_event.link_status = + ls->link_info & I40E_AQ_LINK_UP; + pfe.event_data.link_event.link_speed = ls->link_speed; + } + i40e_aq_send_msg_to_vf(hw, abs_vf_id, I40E_VIRTCHNL_OP_EVENT, + 0, (u8 *)&pfe, sizeof(pfe), NULL); +} + +/** + * i40e_vc_notify_link_state + * @pf: pointer to the PF structure + * + * send a link status message to all VFs on a given PF + **/ +void i40e_vc_notify_link_state(struct i40e_pf *pf) +{ + int i; + + for (i = 0; i < pf->num_alloc_vfs; i++) + i40e_vc_notify_vf_link_state(&pf->vf[i]); +} + +/** + * i40e_vc_notify_reset + * @pf: pointer to the PF structure + * + * indicate a pending reset to all VFs on a given PF + **/ +void i40e_vc_notify_reset(struct i40e_pf *pf) +{ + struct i40e_virtchnl_pf_event pfe; + + pfe.event = I40E_VIRTCHNL_EVENT_RESET_IMPENDING; + pfe.severity = I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM; + i40e_vc_vf_broadcast(pf, I40E_VIRTCHNL_OP_EVENT, 0, + (u8 *)&pfe, sizeof(struct i40e_virtchnl_pf_event)); +} + +/** + * i40e_vc_notify_vf_reset + * @vf: pointer to the VF structure + * + * indicate a pending reset to the given VF + **/ +void i40e_vc_notify_vf_reset(struct i40e_vf *vf) +{ + struct i40e_virtchnl_pf_event pfe; + int abs_vf_id; + + /* validate the request */ + if (!vf || vf->vf_id >= vf->pf->num_alloc_vfs) + return; + + /* verify if the VF is in either init or active before proceeding */ + if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states) && + !test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) + return; + + abs_vf_id = vf->vf_id + vf->pf->hw.func_caps.vf_base_id; + + pfe.event = I40E_VIRTCHNL_EVENT_RESET_IMPENDING; + pfe.severity = I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM; + i40e_aq_send_msg_to_vf(&vf->pf->hw, abs_vf_id, I40E_VIRTCHNL_OP_EVENT, + 0, (u8 *)&pfe, + sizeof(struct i40e_virtchnl_pf_event), NULL); +} /***********************misc routines*****************************/ /** @@ -689,6 +812,9 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr) } } + if (flr) + usleep_range(10000, 20000); + if (!rsd) dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n", vf->vf_id); @@ -733,6 +859,11 @@ void i40e_free_vfs(struct i40e_pf *pf) while (test_and_set_bit(__I40E_VF_DISABLE, &pf->state)) usleep_range(1000, 2000); + for (i = 0; i < pf->num_alloc_vfs; i++) + if (test_bit(I40E_VF_STAT_INIT, &pf->vf[i].vf_states)) + i40e_vsi_control_rings(pf->vsi[pf->vf[i].lan_vsi_idx], + false); + /* Disable IOV before freeing resources. This lets any VF drivers * running in the host get themselves cleaned up before we yank * the carpet out from underneath their feet. @@ -1762,6 +1893,7 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, u16 vf_id, u32 v_opcode, break; case I40E_VIRTCHNL_OP_ENABLE_QUEUES: ret = i40e_vc_enable_queues_msg(vf, msg, msglen); + i40e_vc_notify_vf_link_state(vf); break; case I40E_VIRTCHNL_OP_DISABLE_QUEUES: ret = i40e_vc_disable_queues_msg(vf, msg, msglen); @@ -1835,118 +1967,6 @@ int i40e_vc_process_vflr_event(struct i40e_pf *pf) } /** - * i40e_vc_vf_broadcast - * @pf: pointer to the PF structure - * @opcode: operation code - * @retval: return value - * @msg: pointer to the msg buffer - * @msglen: msg length - * - * send a message to all VFs on a given PF - **/ -static void i40e_vc_vf_broadcast(struct i40e_pf *pf, - enum i40e_virtchnl_ops v_opcode, - i40e_status v_retval, u8 *msg, - u16 msglen) -{ - struct i40e_hw *hw = &pf->hw; - struct i40e_vf *vf = pf->vf; - int i; - - for (i = 0; i < pf->num_alloc_vfs; i++, vf++) { - int abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id; - /* Not all VFs are enabled so skip the ones that are not */ - if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states) && - !test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) - continue; - - /* Ignore return value on purpose - a given VF may fail, but - * we need to keep going and send to all of them - */ - i40e_aq_send_msg_to_vf(hw, abs_vf_id, v_opcode, v_retval, - msg, msglen, NULL); - } -} - -/** - * i40e_vc_notify_link_state - * @pf: pointer to the PF structure - * - * send a link status message to all VFs on a given PF - **/ -void i40e_vc_notify_link_state(struct i40e_pf *pf) -{ - struct i40e_virtchnl_pf_event pfe; - struct i40e_hw *hw = &pf->hw; - struct i40e_vf *vf = pf->vf; - struct i40e_link_status *ls = &pf->hw.phy.link_info; - int i; - - pfe.event = I40E_VIRTCHNL_EVENT_LINK_CHANGE; - pfe.severity = I40E_PF_EVENT_SEVERITY_INFO; - for (i = 0; i < pf->num_alloc_vfs; i++, vf++) { - int abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id; - if (vf->link_forced) { - pfe.event_data.link_event.link_status = vf->link_up; - pfe.event_data.link_event.link_speed = - (vf->link_up ? I40E_LINK_SPEED_40GB : 0); - } else { - pfe.event_data.link_event.link_status = - ls->link_info & I40E_AQ_LINK_UP; - pfe.event_data.link_event.link_speed = ls->link_speed; - } - i40e_aq_send_msg_to_vf(hw, abs_vf_id, I40E_VIRTCHNL_OP_EVENT, - 0, (u8 *)&pfe, sizeof(pfe), - NULL); - } -} - -/** - * i40e_vc_notify_reset - * @pf: pointer to the PF structure - * - * indicate a pending reset to all VFs on a given PF - **/ -void i40e_vc_notify_reset(struct i40e_pf *pf) -{ - struct i40e_virtchnl_pf_event pfe; - - pfe.event = I40E_VIRTCHNL_EVENT_RESET_IMPENDING; - pfe.severity = I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM; - i40e_vc_vf_broadcast(pf, I40E_VIRTCHNL_OP_EVENT, I40E_SUCCESS, - (u8 *)&pfe, sizeof(struct i40e_virtchnl_pf_event)); -} - -/** - * i40e_vc_notify_vf_reset - * @vf: pointer to the VF structure - * - * indicate a pending reset to the given VF - **/ -void i40e_vc_notify_vf_reset(struct i40e_vf *vf) -{ - struct i40e_virtchnl_pf_event pfe; - int abs_vf_id; - - /* validate the request */ - if (!vf || vf->vf_id >= vf->pf->num_alloc_vfs) - return; - - /* verify if the VF is in either init or active before proceeding */ - if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states) && - !test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) - return; - - abs_vf_id = vf->vf_id + vf->pf->hw.func_caps.vf_base_id; - - pfe.event = I40E_VIRTCHNL_EVENT_RESET_IMPENDING; - pfe.severity = I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM; - i40e_aq_send_msg_to_vf(&vf->pf->hw, abs_vf_id, I40E_VIRTCHNL_OP_EVENT, - I40E_SUCCESS, (u8 *)&pfe, - sizeof(struct i40e_virtchnl_pf_event), NULL); -} - -/** * i40e_ndo_set_vf_mac * @netdev: network interface device structure * @vf_id: VF identifier diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index 9c79cb6abb2b..ec9d83a93379 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -242,6 +242,7 @@ struct i40e_hw_capabilities { u8 rx_buf_chain_len; u32 enabled_tcmap; u32 maxtc; + u64 wr_csr_prot; }; struct i40e_mac_info { diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 34c8565031f6..1b98c25b3092 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -225,7 +225,6 @@ struct i40evf_adapter { #define I40E_FLAG_RX_CSUM_ENABLED I40EVF_FLAG_RX_CSUM_ENABLED /* flags for admin queue service task */ u32 aq_required; - u32 aq_pending; #define I40EVF_FLAG_AQ_ENABLE_QUEUES (u32)(1) #define I40EVF_FLAG_AQ_DISABLE_QUEUES (u32)(1 << 1) #define I40EVF_FLAG_AQ_ADD_MAC_FILTER (u32)(1 << 2) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 6d5f3b21c68a..7c53aca4b5a6 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1008,7 +1008,6 @@ void i40evf_down(struct i40evf_adapter *adapter) adapter->state != __I40EVF_RESETTING) { /* cancel any current operation */ adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN; - adapter->aq_pending = 0; /* Schedule operations to close down the HW. Don't wait * here for this to complete. The watchdog is still running * and it will take care of this. @@ -1335,7 +1334,6 @@ static void i40evf_watchdog_task(struct work_struct *work) */ return; } - adapter->aq_pending = 0; adapter->aq_required = 0; adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN; goto watchdog_done; @@ -1355,7 +1353,6 @@ static void i40evf_watchdog_task(struct work_struct *work) adapter->flags |= I40EVF_FLAG_RESET_PENDING; dev_err(&adapter->pdev->dev, "Hardware reset detected\n"); schedule_work(&adapter->reset_task); - adapter->aq_pending = 0; adapter->aq_required = 0; adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN; goto watchdog_done; @@ -1364,7 +1361,7 @@ static void i40evf_watchdog_task(struct work_struct *work) /* Process admin queue tasks. After init, everything gets done * here so we don't race on the admin queue. */ - if (adapter->aq_pending) { + if (adapter->current_op) { if (!i40evf_asq_done(hw)) { dev_dbg(&adapter->pdev->dev, "Admin queue timeout\n"); i40evf_send_api_ver(adapter); @@ -2029,7 +2026,7 @@ static void i40evf_init_task(struct work_struct *work) if (err) { dev_err(&pdev->dev, "Failed to set MAC type (%d)\n", err); - goto err; + goto err; } err = i40evf_check_reset_complete(hw); if (err) { @@ -2249,7 +2246,6 @@ static void i40evf_shutdown(struct pci_dev *pdev) /* Prevent the watchdog from running. */ adapter->state = __I40EVF_REMOVE; adapter->aq_required = 0; - adapter->aq_pending = 0; #ifdef CONFIG_PM pci_save_state(pdev); @@ -2467,7 +2463,6 @@ static void i40evf_remove(struct pci_dev *pdev) /* Shut down all the garbage mashers on the detention level */ adapter->state = __I40EVF_REMOVE; adapter->aq_required = 0; - adapter->aq_pending = 0; i40evf_request_reset(adapter); msleep(20); /* If the FW isn't responding, kick it once, but only once. */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 4240a496dc50..61e090558f31 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -250,7 +250,6 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter) vqpi++; } - adapter->aq_pending |= I40EVF_FLAG_AQ_CONFIGURE_QUEUES; adapter->aq_required &= ~I40EVF_FLAG_AQ_CONFIGURE_QUEUES; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, (u8 *)vqci, len); @@ -277,7 +276,6 @@ void i40evf_enable_queues(struct i40evf_adapter *adapter) vqs.vsi_id = adapter->vsi_res->vsi_id; vqs.tx_queues = (1 << adapter->num_active_queues) - 1; vqs.rx_queues = vqs.tx_queues; - adapter->aq_pending |= I40EVF_FLAG_AQ_ENABLE_QUEUES; adapter->aq_required &= ~I40EVF_FLAG_AQ_ENABLE_QUEUES; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_ENABLE_QUEUES, (u8 *)&vqs, sizeof(vqs)); @@ -303,7 +301,6 @@ void i40evf_disable_queues(struct i40evf_adapter *adapter) vqs.vsi_id = adapter->vsi_res->vsi_id; vqs.tx_queues = (1 << adapter->num_active_queues) - 1; vqs.rx_queues = vqs.tx_queues; - adapter->aq_pending |= I40EVF_FLAG_AQ_DISABLE_QUEUES; adapter->aq_required &= ~I40EVF_FLAG_AQ_DISABLE_QUEUES; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_DISABLE_QUEUES, (u8 *)&vqs, sizeof(vqs)); @@ -354,7 +351,6 @@ void i40evf_map_queues(struct i40evf_adapter *adapter) vimi->vecmap[v_idx].txq_map = 0; vimi->vecmap[v_idx].rxq_map = 0; - adapter->aq_pending |= I40EVF_FLAG_AQ_MAP_VECTORS; adapter->aq_required &= ~I40EVF_FLAG_AQ_MAP_VECTORS; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, (u8 *)vimi, len); @@ -415,7 +411,6 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter) f->add = false; } } - adapter->aq_pending |= I40EVF_FLAG_AQ_ADD_MAC_FILTER; adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_MAC_FILTER; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, (u8 *)veal, len); @@ -476,7 +471,6 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter) kfree(f); } } - adapter->aq_pending |= I40EVF_FLAG_AQ_DEL_MAC_FILTER; adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_MAC_FILTER; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS, (u8 *)veal, len); @@ -537,7 +531,6 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter) f->add = false; } } - adapter->aq_pending |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER; adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_VLAN_FILTER; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len); kfree(vvfl); @@ -598,7 +591,6 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter) kfree(f); } } - adapter->aq_pending |= I40EVF_FLAG_AQ_DEL_VLAN_FILTER; adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_VLAN_FILTER; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_DEL_VLAN, (u8 *)vvfl, len); kfree(vvfl); @@ -720,9 +712,6 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, __func__, v_retval, v_opcode); } switch (v_opcode) { - case I40E_VIRTCHNL_OP_VERSION: - /* no action, but also not an error */ - break; case I40E_VIRTCHNL_OP_GET_STATS: { struct i40e_eth_stats *stats = (struct i40e_eth_stats *)msg; @@ -740,39 +729,30 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, adapter->current_stats = *stats; } break; - case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS: - adapter->aq_pending &= ~(I40EVF_FLAG_AQ_ADD_MAC_FILTER); - break; - case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS: - adapter->aq_pending &= ~(I40EVF_FLAG_AQ_DEL_MAC_FILTER); - break; - case I40E_VIRTCHNL_OP_ADD_VLAN: - adapter->aq_pending &= ~(I40EVF_FLAG_AQ_ADD_VLAN_FILTER); - break; - case I40E_VIRTCHNL_OP_DEL_VLAN: - adapter->aq_pending &= ~(I40EVF_FLAG_AQ_DEL_VLAN_FILTER); - break; case I40E_VIRTCHNL_OP_ENABLE_QUEUES: - adapter->aq_pending &= ~(I40EVF_FLAG_AQ_ENABLE_QUEUES); /* enable transmits */ i40evf_irq_enable(adapter, true); netif_tx_start_all_queues(adapter->netdev); netif_carrier_on(adapter->netdev); break; case I40E_VIRTCHNL_OP_DISABLE_QUEUES: - adapter->aq_pending &= ~(I40EVF_FLAG_AQ_DISABLE_QUEUES); i40evf_free_all_tx_resources(adapter); i40evf_free_all_rx_resources(adapter); break; - case I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES: - adapter->aq_pending &= ~(I40EVF_FLAG_AQ_CONFIGURE_QUEUES); - break; + case I40E_VIRTCHNL_OP_VERSION: + case I40E_VIRTCHNL_OP_GET_VF_RESOURCES: case I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP: - adapter->aq_pending &= ~(I40EVF_FLAG_AQ_MAP_VECTORS); + /* Don't display an error if we get these out of sequence. + * If the firmware needed to get kicked, we'll get these and + * it's no problem. + */ + if (v_opcode != adapter->current_op) + return; break; default: - dev_info(&adapter->pdev->dev, "Received unexpected message %d from PF\n", - v_opcode); + if (v_opcode != adapter->current_op) + dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n", + adapter->current_op, v_opcode); break; } /* switch v_opcode */ adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN; diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 4a42e960d331..f66641d961e3 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -41,7 +41,6 @@ #include <linux/skbuff.h> #include <linux/spi/spi.h> #include <linux/types.h> -#include <linux/version.h> #include "qca_7k.h" #include "qca_debug.h" diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index a87b177bd723..a570a60533be 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4759,6 +4759,7 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (pci_resource_len(pdev, 0) < ROCKER_PCI_BAR0_SIZE) { dev_err(&pdev->dev, "invalid PCI region size\n"); + err = -EINVAL; goto err_pci_resource_len_check; } diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c index 10b6173d557d..b605dfd5c7bc 100644 --- a/drivers/net/ethernet/sfc/selftest.c +++ b/drivers/net/ethernet/sfc/selftest.c @@ -46,7 +46,7 @@ struct efx_loopback_payload { struct iphdr ip; struct udphdr udp; __be16 iteration; - const char msg[64]; + char msg[64]; } __packed; /* Loopback test source MAC address */ diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index cd77289c3cfe..623c6ed8764a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -150,7 +150,7 @@ struct stmmac_extra_stats { #define MAC_CSR_H_FRQ_MASK 0x20 #define HASH_TABLE_SIZE 64 -#define PAUSE_TIME 0x200 +#define PAUSE_TIME 0xffff /* Flow Control defines */ #define FLOW_OFF 0 @@ -357,7 +357,8 @@ struct stmmac_dma_ops { void (*dump_regs) (void __iomem *ioaddr); /* Set tx/rx threshold in the csr6 register * An invalid value enables the store-and-forward mode */ - void (*dma_mode) (void __iomem *ioaddr, int txmode, int rxmode); + void (*dma_mode)(void __iomem *ioaddr, int txmode, int rxmode, + int rxfifosz); /* To track extra statistic (if supported) */ void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x, void __iomem *ioaddr); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h index 64d8f56a9c17..b3fe0575ff6b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h @@ -172,6 +172,7 @@ enum inter_frame_gap { /* GMAC FLOW CTRL defines */ #define GMAC_FLOW_CTRL_PT_MASK 0xffff0000 /* Pause Time Mask */ #define GMAC_FLOW_CTRL_PT_SHIFT 16 +#define GMAC_FLOW_CTRL_UP 0x00000008 /* Unicast pause frame enable */ #define GMAC_FLOW_CTRL_RFE 0x00000004 /* Rx Flow Control Enable */ #define GMAC_FLOW_CTRL_TFE 0x00000002 /* Tx Flow Control Enable */ #define GMAC_FLOW_CTRL_FCB_BPA 0x00000001 /* Flow Control Busy ... */ @@ -246,6 +247,56 @@ enum ttc_control { #define DMA_CONTROL_FEF 0x00000080 #define DMA_CONTROL_FUF 0x00000040 +/* Receive flow control activation field + * RFA field in DMA control register, bits 23,10:9 + */ +#define DMA_CONTROL_RFA_MASK 0x00800600 + +/* Receive flow control deactivation field + * RFD field in DMA control register, bits 22,12:11 + */ +#define DMA_CONTROL_RFD_MASK 0x00401800 + +/* RFD and RFA fields are encoded as follows + * + * Bit Field + * 0,00 - Full minus 1KB (only valid when rxfifo >= 4KB and EFC enabled) + * 0,01 - Full minus 2KB (only valid when rxfifo >= 4KB and EFC enabled) + * 0,10 - Full minus 3KB (only valid when rxfifo >= 4KB and EFC enabled) + * 0,11 - Full minus 4KB (only valid when rxfifo > 4KB and EFC enabled) + * 1,00 - Full minus 5KB (only valid when rxfifo > 8KB and EFC enabled) + * 1,01 - Full minus 6KB (only valid when rxfifo > 8KB and EFC enabled) + * 1,10 - Full minus 7KB (only valid when rxfifo > 8KB and EFC enabled) + * 1,11 - Reserved + * + * RFD should always be > RFA for a given FIFO size. RFD == RFA may work, + * but packet throughput performance may not be as expected. + * + * Be sure that bit 3 in GMAC Register 6 is set for Unicast Pause frame + * detection (IEEE Specification Requirement, Annex 31B, 31B.1, Pause + * Description). + * + * Be sure that DZPA (bit 7 in Flow Control Register, GMAC Register 6), + * is set to 0. This allows pause frames with a quanta of 0 to be sent + * as an XOFF message to the link peer. + */ + +#define RFA_FULL_MINUS_1K 0x00000000 +#define RFA_FULL_MINUS_2K 0x00000200 +#define RFA_FULL_MINUS_3K 0x00000400 +#define RFA_FULL_MINUS_4K 0x00000600 +#define RFA_FULL_MINUS_5K 0x00800000 +#define RFA_FULL_MINUS_6K 0x00800200 +#define RFA_FULL_MINUS_7K 0x00800400 + +#define RFD_FULL_MINUS_1K 0x00000000 +#define RFD_FULL_MINUS_2K 0x00000800 +#define RFD_FULL_MINUS_3K 0x00001000 +#define RFD_FULL_MINUS_4K 0x00001800 +#define RFD_FULL_MINUS_5K 0x00400000 +#define RFD_FULL_MINUS_6K 0x00400800 +#define RFD_FULL_MINUS_7K 0x00401000 + enum rtc_control { DMA_CONTROL_RTC_64 = 0x00000000, DMA_CONTROL_RTC_32 = 0x00000008, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 0adcf73cf722..371a669d69fd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -201,7 +201,10 @@ static void dwmac1000_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, unsigned int fc, unsigned int pause_time) { void __iomem *ioaddr = hw->pcsr; - unsigned int flow = 0; + /* Set flow such that DZPQ in Mac Register 6 is 0, + * and unicast pause detect is enabled. + */ + unsigned int flow = GMAC_FLOW_CTRL_UP; pr_debug("GMAC Flow-Control:\n"); if (fc & FLOW_RX) { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c index 59d92e811750..0e8937c1184a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c @@ -106,8 +106,29 @@ static int dwmac1000_dma_init(void __iomem *ioaddr, int pbl, int fb, int mb, return 0; } +static u32 dwmac1000_configure_fc(u32 csr6, int rxfifosz) +{ + csr6 &= ~DMA_CONTROL_RFA_MASK; + csr6 &= ~DMA_CONTROL_RFD_MASK; + + /* Leave flow control disabled if receive fifo size is less than + * 4K or 0. Otherwise, send XOFF when fifo is 1K less than full, + * and send XON when 2K less than full. + */ + if (rxfifosz < 4096) { + csr6 &= ~DMA_CONTROL_EFC; + pr_debug("GMAC: disabling flow control, rxfifo too small(%d)\n", + rxfifosz); + } else { + csr6 |= DMA_CONTROL_EFC; + csr6 |= RFA_FULL_MINUS_1K; + csr6 |= RFD_FULL_MINUS_2K; + } + return csr6; +} + static void dwmac1000_dma_operation_mode(void __iomem *ioaddr, int txmode, - int rxmode) + int rxmode, int rxfifosz) { u32 csr6 = readl(ioaddr + DMA_CONTROL); @@ -153,6 +174,9 @@ static void dwmac1000_dma_operation_mode(void __iomem *ioaddr, int txmode, csr6 |= DMA_CONTROL_RTC_128; } + /* Configure flow control based on rx fifo size */ + csr6 = dwmac1000_configure_fc(csr6, rxfifosz); + writel(csr6, ioaddr + DMA_CONTROL); } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c index 7d1dce9e7ffc..9d0971c1c2ee 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c @@ -72,7 +72,7 @@ static int dwmac100_dma_init(void __iomem *ioaddr, int pbl, int fb, int mb, * control register. */ static void dwmac100_dma_operation_mode(void __iomem *ioaddr, int txmode, - int rxmode) + int rxmode, int rxfifosz) { u32 csr6 = readl(ioaddr + DMA_CONTROL); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 06103cad7c77..05c146f718a3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1277,8 +1277,10 @@ static void free_dma_desc_resources(struct stmmac_priv *priv) */ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) { + int rxfifosz = priv->plat->rx_fifo_size; + if (priv->plat->force_thresh_dma_mode) - priv->hw->dma->dma_mode(priv->ioaddr, tc, tc); + priv->hw->dma->dma_mode(priv->ioaddr, tc, tc, rxfifosz); else if (priv->plat->force_sf_dma_mode || priv->plat->tx_coe) { /* * In case of GMAC, SF mode can be enabled @@ -1287,10 +1289,12 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) * 2) There is no bugged Jumbo frame support * that needs to not insert csum in the TDES. */ - priv->hw->dma->dma_mode(priv->ioaddr, SF_DMA_MODE, SF_DMA_MODE); + priv->hw->dma->dma_mode(priv->ioaddr, SF_DMA_MODE, SF_DMA_MODE, + rxfifosz); priv->xstats.threshold = SF_DMA_MODE; } else - priv->hw->dma->dma_mode(priv->ioaddr, tc, SF_DMA_MODE); + priv->hw->dma->dma_mode(priv->ioaddr, tc, SF_DMA_MODE, + rxfifosz); } /** @@ -1442,6 +1446,7 @@ static void stmmac_tx_err(struct stmmac_priv *priv) static void stmmac_dma_interrupt(struct stmmac_priv *priv) { int status; + int rxfifosz = priv->plat->rx_fifo_size; status = priv->hw->dma->dma_interrupt(priv->ioaddr, &priv->xstats); if (likely((status & handle_rx)) || (status & handle_tx)) { @@ -1456,10 +1461,11 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv) (tc <= 256)) { tc += 64; if (priv->plat->force_thresh_dma_mode) - priv->hw->dma->dma_mode(priv->ioaddr, tc, tc); + priv->hw->dma->dma_mode(priv->ioaddr, tc, tc, + rxfifosz); else priv->hw->dma->dma_mode(priv->ioaddr, tc, - SF_DMA_MODE); + SF_DMA_MODE, rxfifosz); priv->xstats.threshold = tc; } } else if (unlikely(status == tx_hard_error)) @@ -2970,15 +2976,15 @@ int stmmac_dvr_remove(struct net_device *ndev) priv->hw->dma->stop_tx(priv->ioaddr); stmmac_set_mac(priv->ioaddr, false); - if (priv->pcs != STMMAC_PCS_RGMII && priv->pcs != STMMAC_PCS_TBI && - priv->pcs != STMMAC_PCS_RTBI) - stmmac_mdio_unregister(ndev); netif_carrier_off(ndev); unregister_netdev(ndev); if (priv->stmmac_rst) reset_control_assert(priv->stmmac_rst); clk_disable_unprepare(priv->pclk); clk_disable_unprepare(priv->stmmac_clk); + if (priv->pcs != STMMAC_PCS_RGMII && priv->pcs != STMMAC_PCS_TBI && + priv->pcs != STMMAC_PCS_RTBI) + stmmac_mdio_unregister(ndev); free_netdev(ndev); return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index f9b42f11950f..705bbdf93940 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -181,6 +181,10 @@ static int stmmac_probe_config_dt(struct platform_device *pdev, sizeof(struct stmmac_mdio_bus_data), GFP_KERNEL); + of_property_read_u32(np, "tx-fifo-depth", &plat->tx_fifo_size); + + of_property_read_u32(np, "rx-fifo-depth", &plat->rx_fifo_size); + plat->force_sf_dma_mode = of_property_read_bool(np, "snps,force_sf_dma_mode"); diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 34f846b4bd05..94570aace241 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -105,7 +105,7 @@ static void ri_tasklet(unsigned long dev) if (from & AT_EGRESS) { dev_queue_xmit(skb); } else if (from & AT_INGRESS) { - skb_pull(skb, skb->dev->hard_header_len); + skb_pull(skb, skb->mac_len); netif_receive_skb(skb); } else BUG(); diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 9c91ff872485..8c350c5d54ad 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -313,7 +313,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) */ if (q->flags & IFF_VNET_HDR) features |= vlan->tap_features; - if (netif_needs_gso(dev, skb, features)) { + if (netif_needs_gso(skb, features)) { struct sk_buff *segs = __skb_gso_segment(skb, features, false); if (IS_ERR(segs)) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 4c08f98f4484..3f45afd4382e 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -560,7 +560,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(!netif_carrier_ok(dev) || (slots > 1 && !xennet_can_sg(dev)) || - netif_needs_gso(dev, skb, netif_skb_features(skb)))) { + netif_needs_gso(skb, netif_skb_features(skb)))) { spin_unlock_irqrestore(&queue->tx_lock, flags); goto drop; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b5679aed660b..bcbde799ec69 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3713,7 +3713,7 @@ static inline bool skb_gso_ok(struct sk_buff *skb, netdev_features_t features) (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST)); } -static inline bool netif_needs_gso(struct net_device *dev, struct sk_buff *skb, +static inline bool netif_needs_gso(struct sk_buff *skb, netdev_features_t features) { return skb_is_gso(skb) && (!skb_gso_ok(skb, features) || diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index cd63851b57f2..7f484a239f53 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -114,6 +114,8 @@ struct plat_stmmacenet_data { int maxmtu; int multicast_filter_bins; int unicast_filter_entries; + int tx_fifo_size; + int rx_fifo_size; void (*fix_mac_speed)(void *priv, unsigned int speed); void (*bus_setup)(void __iomem *ioaddr); void *(*setup)(struct platform_device *pdev); diff --git a/include/net/netns/generic.h b/include/net/netns/generic.h index 0931618c0f7f..70e158551704 100644 --- a/include/net/netns/generic.h +++ b/include/net/netns/generic.h @@ -38,11 +38,9 @@ static inline void *net_generic(const struct net *net, int id) rcu_read_lock(); ng = rcu_dereference(net->gen); - BUG_ON(id == 0 || id > ng->len); ptr = ng->ptr[id - 1]; rcu_read_unlock(); - BUG_ON(!ptr); return ptr; } #endif diff --git a/include/net/tcp.h b/include/net/tcp.h index 9598871485ce..051dc5c2802d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -829,7 +829,7 @@ struct tcp_congestion_ops { /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, u32 num_acked, s32 rtt_us); /* get info for inet_diag (optional) */ - void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); + int (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); char name[TCP_CA_NAME_MAX]; struct module *owner; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5c1cee11f777..a9ebdf5701e8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -177,7 +177,7 @@ enum bpf_func_id { /** * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet * @skb: pointer to skb - * @offset: offset within packet from skb->data + * @offset: offset within packet from skb->mac_header * @from: pointer where to copy bytes from * @len: number of bytes to store into packet * @flags: bit 0 - if true, recompute skb->csum diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h index 34c7936ca114..c97340e43dd6 100644 --- a/include/uapi/linux/filter.h +++ b/include/uapi/linux/filter.h @@ -79,8 +79,11 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_RANDOM 56 #define SKF_AD_VLAN_TPID 60 #define SKF_AD_MAX 64 -#define SKF_NET_OFF (-0x100000) -#define SKF_LL_OFF (-0x200000) +#define SKF_NET_OFF (-0x100000) +#define SKF_LL_OFF (-0x200000) + +#define BPF_NET_OFF SKF_NET_OFF +#define BPF_LL_OFF SKF_LL_OFF #endif /* _UAPI__LINUX_FILTER_H__ */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 630a7bac1e51..47dcd3aa6e23 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1397,7 +1397,8 @@ peek_stack: /* tell verifier to check for equivalent states * after every call and jump */ - env->explored_states[t + 1] = STATE_LIST_MARK; + if (t + 1 < insn_cnt) + env->explored_states[t + 1] = STATE_LIST_MARK; } else { /* conditional jump with two edges */ ret = push_insn(t, t + 1, FALLTHROUGH, env); @@ -1636,6 +1637,8 @@ static int do_check(struct verifier_env *env) if (err) return err; + src_reg_type = regs[insn->src_reg].type; + /* check that memory (src_reg + off) is readable, * the state of dst_reg will be updated by this func */ @@ -1645,9 +1648,12 @@ static int do_check(struct verifier_env *env) if (err) return err; - src_reg_type = regs[insn->src_reg].type; + if (BPF_SIZE(insn->code) != BPF_W) { + insn_idx++; + continue; + } - if (insn->imm == 0 && BPF_SIZE(insn->code) == BPF_W) { + if (insn->imm == 0) { /* saw a valid insn * dst_reg = *(u32 *)(src_reg + off) * use reserved 'imm' field to mark this insn diff --git a/net/core/dev.c b/net/core/dev.c index af4a1b0adc10..1796cef55ab5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2713,7 +2713,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device if (unlikely(!skb)) goto out_null; - if (netif_needs_gso(dev, skb, features)) { + if (netif_needs_gso(skb, features)) { struct sk_buff *segs; segs = skb_gso_segment(skb, features); diff --git a/net/core/filter.c b/net/core/filter.c index b669e75d2b36..bf831a85c315 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1175,12 +1175,27 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) return 0; } +/** + * bpf_skb_clone_not_writable - is the header of a clone not writable + * @skb: buffer to check + * @len: length up to which to write, can be negative + * + * Returns true if modifying the header part of the cloned buffer + * does require the data to be copied. I.e. this version works with + * negative lengths needed for eBPF case! + */ +static bool bpf_skb_clone_unwritable(const struct sk_buff *skb, int len) +{ + return skb_header_cloned(skb) || + (int) skb_headroom(skb) + len > skb->hdr_len; +} + #define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; - unsigned int offset = (unsigned int) r2; + int offset = (int) r2; void *from = (void *) (long) r3; unsigned int len = (unsigned int) r4; char buf[16]; @@ -1194,10 +1209,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) * * so check for invalid 'offset' and too large 'len' */ - if (unlikely(offset > 0xffff || len > sizeof(buf))) + if (unlikely((u32) offset > 0xffff || len > sizeof(buf))) return -EFAULT; - if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len)) + offset -= skb->data - skb_mac_header(skb); + if (unlikely(skb_cloned(skb) && + bpf_skb_clone_unwritable(skb, offset + len))) return -EFAULT; ptr = skb_header_pointer(skb, offset, len, buf); @@ -1232,15 +1249,18 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = { #define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f) #define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10) -static u64 bpf_l3_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags) +static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; + int offset = (int) r2; __sum16 sum, *ptr; - if (unlikely(offset > 0xffff)) + if (unlikely((u32) offset > 0xffff)) return -EFAULT; - if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum))) + offset -= skb->data - skb_mac_header(skb); + if (unlikely(skb_cloned(skb) && + bpf_skb_clone_unwritable(skb, offset + sizeof(sum)))) return -EFAULT; ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); @@ -1276,16 +1296,19 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = { .arg5_type = ARG_ANYTHING, }; -static u64 bpf_l4_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags) +static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags); + int offset = (int) r2; __sum16 sum, *ptr; - if (unlikely(offset > 0xffff)) + if (unlikely((u32) offset > 0xffff)) return -EFAULT; - if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum))) + offset -= skb->data - skb_mac_header(skb); + if (unlikely(skb_cloned(skb) && + bpf_skb_clone_unwritable(skb, offset + sizeof(sum)))) return -EFAULT; ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index a3abb719221f..78fc04ad36fc 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -16,7 +16,6 @@ #include <linux/export.h> #include <linux/user_namespace.h> #include <linux/net_namespace.h> -#include <linux/rtnetlink.h> #include <net/sock.h> #include <net/netlink.h> #include <net/net_namespace.h> diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3b6e5830256e..d1967dab9cc6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4124,19 +4124,21 @@ EXPORT_SYMBOL(skb_try_coalesce); */ void skb_scrub_packet(struct sk_buff *skb, bool xnet) { - if (xnet) - skb_orphan(skb); skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; skb->ignore_df = 0; skb_dst_drop(skb); - skb->mark = 0; skb_sender_cpu_clear(skb); - skb_init_secmark(skb); secpath_reset(skb); nf_reset(skb); nf_reset_trace(skb); + + if (!xnet) + return; + + skb_orphan(skb); + skb->mark = 0; } EXPORT_SYMBOL_GPL(skb_scrub_packet); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 5eaadabe23a1..079a224471e7 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -124,7 +124,7 @@ static ssize_t temp1_max_store(struct device *dev, return count; } -static DEVICE_ATTR(temp1_max, S_IRUGO, temp1_max_show, temp1_max_store); +static DEVICE_ATTR_RW(temp1_max); static ssize_t temp1_max_alarm_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -159,8 +159,8 @@ static umode_t dsa_hwmon_attrs_visible(struct kobject *kobj, if (index == 1) { if (!drv->get_temp_limit) mode = 0; - else if (drv->set_temp_limit) - mode |= S_IWUSR; + else if (!drv->set_temp_limit) + mode &= ~S_IWUSR; } else if (index == 2 && !drv->get_temp_alarm) { mode = 0; } diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index af150b43b214..34968cd5c146 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -711,11 +711,10 @@ static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, NLM_F_MULTI, skb, FOU_CMD_GET); if (ret) - goto done; + break; } mutex_unlock(&fn->fou_lock); -done: cb->args[0] = idx; return skb->len; } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 70e8b3c308ec..bb77ebdae3b3 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -111,6 +111,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); + const struct tcp_congestion_ops *ca_ops; const struct inet_diag_handler *handler; int ext = req->idiag_ext; struct inet_diag_msg *r; @@ -208,16 +209,31 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, info = nla_data(attr); } - if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) - if (nla_put_string(skb, INET_DIAG_CONG, - icsk->icsk_ca_ops->name) < 0) + if (ext & (1 << (INET_DIAG_CONG - 1))) { + int err = 0; + + rcu_read_lock(); + ca_ops = READ_ONCE(icsk->icsk_ca_ops); + if (ca_ops) + err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name); + rcu_read_unlock(); + if (err < 0) goto errout; + } handler->idiag_get_info(sk, r, info); - if (sk->sk_state < TCP_TIME_WAIT && - icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) - icsk->icsk_ca_ops->get_info(sk, ext, skb); + if (sk->sk_state < TCP_TIME_WAIT) { + int err = 0; + + rcu_read_lock(); + ca_ops = READ_ONCE(icsk->icsk_ca_ops); + if (ca_ops && ca_ops->get_info) + err = ca_ops->get_info(sk, ext, skb); + rcu_read_unlock(); + if (err < 0) + goto errout; + } out: nlmsg_end(skb, nlh); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 18e3a12eb1b2..59c8a027721b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2595,6 +2595,7 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info) const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; + u32 rate; memset(info, 0, sizeof(*info)); @@ -2655,10 +2656,11 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info) info->tcpi_total_retrans = tp->total_retrans; - info->tcpi_pacing_rate = sk->sk_pacing_rate != ~0U ? - sk->sk_pacing_rate : ~0ULL; - info->tcpi_max_pacing_rate = sk->sk_max_pacing_rate != ~0U ? - sk->sk_max_pacing_rate : ~0ULL; + rate = READ_ONCE(sk->sk_pacing_rate); + info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL; + + rate = READ_ONCE(sk->sk_max_pacing_rate); + info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index b504371af742..4376016f7fa5 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -277,7 +277,7 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) } } -static void dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) +static int dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) { const struct dctcp *ca = inet_csk_ca(sk); @@ -297,8 +297,9 @@ static void dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) info.dctcp_ab_tot = ca->acked_bytes_total; } - nla_put(skb, INET_DIAG_DCTCPINFO, sizeof(info), &info); + return nla_put(skb, INET_DIAG_DCTCPINFO, sizeof(info), &info); } + return 0; } static struct tcp_congestion_ops dctcp __read_mostly = { diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 1d5a30a90adf..67476f085e48 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -300,8 +300,7 @@ static u32 tcp_illinois_ssthresh(struct sock *sk) } /* Extract info for Tcp socket info provided via netlink. */ -static void tcp_illinois_info(struct sock *sk, u32 ext, - struct sk_buff *skb) +static int tcp_illinois_info(struct sock *sk, u32 ext, struct sk_buff *skb) { const struct illinois *ca = inet_csk_ca(sk); @@ -318,8 +317,9 @@ static void tcp_illinois_info(struct sock *sk, u32 ext, do_div(t, info.tcpv_rttcnt); info.tcpv_rtt = t; } - nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); + return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); } + return 0; } static struct tcp_congestion_ops tcp_illinois __read_mostly = { diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index a6afde666ab1..c71a1b8f7bde 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -286,7 +286,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) } /* Extract info for Tcp socket info provided via netlink. */ -void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) +int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) { const struct vegas *ca = inet_csk_ca(sk); if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { @@ -297,8 +297,9 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) .tcpv_minrtt = ca->minRTT, }; - nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); + return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); } + return 0; } EXPORT_SYMBOL_GPL(tcp_vegas_get_info); diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h index 0531b99d8637..e8a6b33cc61d 100644 --- a/net/ipv4/tcp_vegas.h +++ b/net/ipv4/tcp_vegas.h @@ -19,6 +19,6 @@ void tcp_vegas_init(struct sock *sk); void tcp_vegas_state(struct sock *sk, u8 ca_state); void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us); void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); -void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb); +int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb); #endif /* __TCP_VEGAS_H */ diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index bb63fba47d47..b3c57cceb990 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -256,8 +256,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) } /* Extract info for Tcp socket info provided via netlink. */ -static void tcp_westwood_info(struct sock *sk, u32 ext, - struct sk_buff *skb) +static int tcp_westwood_info(struct sock *sk, u32 ext, struct sk_buff *skb) { const struct westwood *ca = inet_csk_ca(sk); @@ -268,8 +267,9 @@ static void tcp_westwood_info(struct sock *sk, u32 ext, .tcpv_minrtt = jiffies_to_usecs(ca->rtt_min), }; - nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); + return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); } + return 0; } static struct tcp_congestion_ops tcp_westwood __read_mostly = { diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 4d2cede17468..dc6a2d324bd8 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -38,6 +38,9 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, struct tcf_bpf *prog = act->priv; int action, filter_res; + if (unlikely(!skb_mac_header_was_set(skb))) + return TC_ACT_UNSPEC; + spin_lock(&prog->tcf_lock); prog->tcf_tm.lastuse = jiffies; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 5953517ec059..3f63ceac8e01 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -157,7 +157,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, if (!(at & AT_EGRESS)) { if (m->tcfm_ok_push) - skb_push(skb2, skb2->dev->hard_header_len); + skb_push(skb2, skb->mac_len); } /* mirror is always swallowed */ diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 5c4171c5d2bd..91bd9c19471d 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -66,6 +66,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_bpf_prog *prog; int ret = -1; + if (unlikely(!skb_mac_header_was_set(skb))) + return -1; + /* Needed here for accessing maps. */ rcu_read_lock(); list_for_each_entry_rcu(prog, &head->plist, link) { diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c index 7cf3f42a6e39..7c27710f8296 100644 --- a/samples/bpf/tcbpf1_kern.c +++ b/samples/bpf/tcbpf1_kern.c @@ -4,6 +4,8 @@ #include <uapi/linux/ip.h> #include <uapi/linux/in.h> #include <uapi/linux/tcp.h> +#include <uapi/linux/filter.h> + #include "bpf_helpers.h" /* compiler workaround */ @@ -14,18 +16,12 @@ static inline void set_dst_mac(struct __sk_buff *skb, char *mac) bpf_skb_store_bytes(skb, 0, mac, ETH_ALEN, 1); } -/* use 1 below for ingress qdisc and 0 for egress */ -#if 0 -#undef ETH_HLEN -#define ETH_HLEN 0 -#endif - #define IP_CSUM_OFF (ETH_HLEN + offsetof(struct iphdr, check)) #define TOS_OFF (ETH_HLEN + offsetof(struct iphdr, tos)) static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos) { - __u8 old_tos = load_byte(skb, TOS_OFF); + __u8 old_tos = load_byte(skb, BPF_LL_OFF + TOS_OFF); bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2); bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0); @@ -38,7 +34,7 @@ static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos) static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip) { - __u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF)); + __u32 old_ip = _htonl(load_word(skb, BPF_LL_OFF + IP_SRC_OFF)); bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip)); bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip)); @@ -48,7 +44,7 @@ static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip) #define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest)) static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port) { - __u16 old_port = htons(load_half(skb, TCP_DPORT_OFF)); + __u16 old_port = htons(load_half(skb, BPF_LL_OFF + TCP_DPORT_OFF)); bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port)); bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0); @@ -57,7 +53,7 @@ static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port) SEC("classifier") int bpf_prog1(struct __sk_buff *skb) { - __u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol)); + __u8 proto = load_byte(skb, BPF_LL_OFF + ETH_HLEN + offsetof(struct iphdr, protocol)); long *value; if (proto == IPPROTO_TCP) { diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index 9ab645698ffb..12f3780af73f 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -721,6 +721,28 @@ static struct bpf_test tests[] = { .errstr = "different pointers", .result = REJECT, }, + { + "access skb fields bad4", + .insns = { + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 3), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JA, 0, 0, -13), + }, + .fixup = {7}, + .errstr = "different pointers", + .result = REJECT, + }, }; static int probe_filter_length(struct bpf_insn *fp) |