diff options
47 files changed, 2180 insertions, 155 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index c5fd905206e7..3075a2a29511 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1800,6 +1800,9 @@ F: include/linux/cfag12864b.h CFG80211 and NL80211 M: Johannes Berg <johannes@sipsolutions.net> L: linux-wireless@vger.kernel.org +W: http://wireless.kernel.org/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git S: Maintained F: include/linux/nl80211.h F: include/net/cfg80211.h @@ -4340,7 +4343,8 @@ MAC80211 M: Johannes Berg <johannes@sipsolutions.net> L: linux-wireless@vger.kernel.org W: http://wireless.kernel.org/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git S: Maintained F: Documentation/networking/mac80211-injection.txt F: include/net/mac80211.h @@ -4351,7 +4355,8 @@ M: Stefano Brivio <stefano.brivio@polimi.it> M: Mattias Nissler <mattias.nissler@gmx.de> L: linux-wireless@vger.kernel.org W: http://wireless.kernel.org/en/developers/Documentation/mac80211/RateControl/PID -T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git S: Maintained F: net/mac80211/rc80211_pid* @@ -5695,6 +5700,9 @@ F: include/linux/remoteproc.h RFKILL M: Johannes Berg <johannes@sipsolutions.net> L: linux-wireless@vger.kernel.org +W: http://wireless.kernel.org/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git S: Maintained F: Documentation/rfkill.txt F: net/rfkill/ diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c index 9a96f14c8f47..c32ebd537abe 100644 --- a/drivers/bcma/driver_pci.c +++ b/drivers/bcma/driver_pci.c @@ -232,17 +232,19 @@ void __devinit bcma_core_pci_init(struct bcma_drv_pci *pc) int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, struct bcma_device *core, bool enable) { - struct pci_dev *pdev = pc->core->bus->host_pci; + struct pci_dev *pdev; u32 coremask, tmp; int err = 0; - if (core->bus->hosttype != BCMA_HOSTTYPE_PCI) { + if (!pc || core->bus->hosttype != BCMA_HOSTTYPE_PCI) { /* This bcma device is not on a PCI host-bus. So the IRQs are * not routed through the PCI core. * So we must not enable routing through the PCI core. */ goto out; } + pdev = pc->core->bus->host_pci; + err = pci_read_config_dword(pdev, BCMA_PCI_IRQMASK, &tmp); if (err) goto out; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 9e2301eef386..af506321500b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -76,6 +76,7 @@ #include <net/route.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/pkt_sched.h> #include "bonding.h" #include "bond_3ad.h" #include "bond_alb.h" @@ -381,8 +382,6 @@ struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr) return next; } -#define bond_queue_mapping(skb) (*(u16 *)((skb)->cb)) - /** * bond_dev_queue_xmit - Prepare skb for xmit. * @@ -395,7 +394,9 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, { skb->dev = slave_dev; - skb->queue_mapping = bond_queue_mapping(skb); + BUILD_BUG_ON(sizeof(skb->queue_mapping) != + sizeof(qdisc_skb_cb(skb)->bond_queue_mapping)); + skb->queue_mapping = qdisc_skb_cb(skb)->bond_queue_mapping; if (unlikely(netpoll_tx_running(slave_dev))) bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb); @@ -4174,7 +4175,7 @@ static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb) /* * Save the original txq to restore before passing to the driver */ - bond_queue_mapping(skb) = skb->queue_mapping; + qdisc_skb_cb(skb)->bond_queue_mapping = skb->queue_mapping; if (unlikely(txq >= dev->real_num_tx_queues)) { do { diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index aef42f045320..485bedb8278c 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -1082,8 +1082,12 @@ static ssize_t bonding_store_primary(struct device *d, } } - pr_info("%s: Unable to set %.*s as primary slave.\n", - bond->dev->name, (int)strlen(buf) - 1, buf); + strncpy(bond->params.primary, ifname, IFNAMSIZ); + bond->params.primary[IFNAMSIZ - 1] = 0; + + pr_info("%s: Recording %s as primary, " + "but it has not been enslaved to %s yet.\n", + bond->dev->name, ifname, bond->dev->name); out: write_unlock_bh(&bond->curr_slave_lock); read_unlock(&bond->lock); diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index 442d91a2747b..bab0158f1cc3 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -187,8 +187,10 @@ static int __init dummy_init_module(void) rtnl_lock(); err = __rtnl_link_register(&dummy_link_ops); - for (i = 0; i < numdummies && !err; i++) + for (i = 0; i < numdummies && !err; i++) { err = dummy_init_one(); + cond_resched(); + } if (err < 0) __rtnl_link_unregister(&dummy_link_ops); rtnl_unlock(); diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index edeeb516807a..e47ff8be1d7b 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -14275,7 +14275,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) } } - if (tg3_flag(tp, 5755_PLUS)) + if (tg3_flag(tp, 5755_PLUS) || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) tg3_flag_set(tp, SHORT_DMA_BUG); if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 896f283967d4..5a34503b6a14 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -736,6 +736,8 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, copied = make_tx_wrbs(adapter, txq, skb, wrb_cnt, dummy_wrb); if (copied) { + int gso_segs = skb_shinfo(skb)->gso_segs; + /* record the sent skb in the sent_skb table */ BUG_ON(txo->sent_skb_list[start]); txo->sent_skb_list[start] = skb; @@ -753,8 +755,7 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, be_txq_notify(adapter, txq->id, wrb_cnt); - be_tx_stats_update(txo, wrb_cnt, copied, - skb_shinfo(skb)->gso_segs, stopped); + be_tx_stats_update(txo, wrb_cnt, copied, gso_segs, stopped); } else { txq->head = start; dev_kfree_skb_any(skb); diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index cace36f2ab92..28a54451a3e5 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4381,10 +4381,12 @@ static int sky2_set_features(struct net_device *dev, netdev_features_t features) struct sky2_port *sky2 = netdev_priv(dev); netdev_features_t changed = dev->features ^ features; - if (changed & NETIF_F_RXCSUM) { - bool on = features & NETIF_F_RXCSUM; - sky2_write32(sky2->hw, Q_ADDR(rxqaddr[sky2->port], Q_CSR), - on ? BMU_ENA_RX_CHKSUM : BMU_DIS_RX_CHKSUM); + if ((changed & NETIF_F_RXCSUM) && + !(sky2->hw->flags & SKY2_HW_NEW_LE)) { + sky2_write32(sky2->hw, + Q_ADDR(rxqaddr[sky2->port], Q_CSR), + (features & NETIF_F_RXCSUM) + ? BMU_ENA_RX_CHKSUM : BMU_DIS_RX_CHKSUM); } if (changed & NETIF_F_RXHASH) diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 8d2666fcffd7..083d6715335c 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -946,16 +946,16 @@ static void __lpc_handle_xmit(struct net_device *ndev) /* Update stats */ ndev->stats.tx_packets++; ndev->stats.tx_bytes += skb->len; - - /* Free buffer */ - dev_kfree_skb_irq(skb); } + dev_kfree_skb_irq(skb); txcidx = readl(LPC_ENET_TXCONSUMEINDEX(pldat->net_base)); } - if (netif_queue_stopped(ndev)) - netif_wake_queue(ndev); + if (pldat->num_used_tx_buffs <= ENET_TX_DESC/2) { + if (netif_queue_stopped(ndev)) + netif_wake_queue(ndev); + } } static int __lpc_handle_recv(struct net_device *ndev, int budget) @@ -1320,6 +1320,7 @@ static const struct net_device_ops lpc_netdev_ops = { .ndo_set_rx_mode = lpc_eth_set_multicast_list, .ndo_do_ioctl = lpc_eth_ioctl, .ndo_set_mac_address = lpc_set_mac_address, + .ndo_change_mtu = eth_change_mtu, }; static int lpc_eth_drv_probe(struct platform_device *pdev) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 9757ce3543a0..7260aa79466a 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -5889,11 +5889,7 @@ static void rtl_slow_event_work(struct rtl8169_private *tp) if (status & LinkChg) __rtl8169_check_link_status(dev, tp, tp->mmio_addr, true); - napi_disable(&tp->napi); - rtl_irq_disable(tp); - - napi_enable(&tp->napi); - napi_schedule(&tp->napi); + rtl_irq_enable_all(tp); } static void rtl_task(struct work_struct *work) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 0076f770e637..9f448279e12a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -15,6 +15,7 @@ if STMMAC_ETH config STMMAC_PLATFORM bool "STMMAC Platform bus support" depends on STMMAC_ETH + default y ---help--- This selects the platform specific bus support for the stmmac device driver. This is the driver used diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 6d07ba2c8661..dc20c56efc9d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -26,6 +26,7 @@ #include <linux/clk.h> #include <linux/stmmac.h> #include <linux/phy.h> +#include <linux/pci.h> #include "common.h" #ifdef CONFIG_STMMAC_TIMER #include "stmmac_timer.h" @@ -95,8 +96,6 @@ extern int stmmac_mdio_register(struct net_device *ndev); extern void stmmac_set_ethtool_ops(struct net_device *netdev); extern const struct stmmac_desc_ops enh_desc_ops; extern const struct stmmac_desc_ops ndesc_ops; -extern struct pci_driver stmmac_pci_driver; -extern struct platform_driver stmmac_pltfr_driver; int stmmac_freeze(struct net_device *ndev); int stmmac_restore(struct net_device *ndev); int stmmac_resume(struct net_device *ndev); @@ -110,7 +109,7 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device, static inline int stmmac_clk_enable(struct stmmac_priv *priv) { if (!IS_ERR(priv->stmmac_clk)) - return clk_enable(priv->stmmac_clk); + return clk_prepare_enable(priv->stmmac_clk); return 0; } @@ -120,7 +119,7 @@ static inline void stmmac_clk_disable(struct stmmac_priv *priv) if (IS_ERR(priv->stmmac_clk)) return; - clk_disable(priv->stmmac_clk); + clk_disable_unprepare(priv->stmmac_clk); } static inline int stmmac_clk_get(struct stmmac_priv *priv) { @@ -144,3 +143,60 @@ static inline int stmmac_clk_get(struct stmmac_priv *priv) return 0; } #endif /* CONFIG_HAVE_CLK */ + + +#ifdef CONFIG_STMMAC_PLATFORM +extern struct platform_driver stmmac_pltfr_driver; +static inline int stmmac_register_platform(void) +{ + int err; + + err = platform_driver_register(&stmmac_pltfr_driver); + if (err) + pr_err("stmmac: failed to register the platform driver\n"); + + return err; +} +static inline void stmmac_unregister_platform(void) +{ + platform_driver_register(&stmmac_pltfr_driver); +} +#else +static inline int stmmac_register_platform(void) +{ + pr_debug("stmmac: do not register the platf driver\n"); + + return -EINVAL; +} +static inline void stmmac_unregister_platform(void) +{ +} +#endif /* CONFIG_STMMAC_PLATFORM */ + +#ifdef CONFIG_STMMAC_PCI +extern struct pci_driver stmmac_pci_driver; +static inline int stmmac_register_pci(void) +{ + int err; + + err = pci_register_driver(&stmmac_pci_driver); + if (err) + pr_err("stmmac: failed to register the PCI driver\n"); + + return err; +} +static inline void stmmac_unregister_pci(void) +{ + pci_unregister_driver(&stmmac_pci_driver); +} +#else +static inline int stmmac_register_pci(void) +{ + pr_debug("stmmac: do not register the PCI driver\n"); + + return -EINVAL; +} +static inline void stmmac_unregister_pci(void) +{ +} +#endif /* CONFIG_STMMAC_PCI */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 8899e105da9f..590e95b4cbfa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -42,7 +42,6 @@ #include <linux/dma-mapping.h> #include <linux/slab.h> #include <linux/prefetch.h> -#include <linux/pci.h> #ifdef CONFIG_STMMAC_DEBUG_FS #include <linux/debugfs.h> #include <linux/seq_file.h> @@ -2093,25 +2092,29 @@ int stmmac_restore(struct net_device *ndev) } #endif /* CONFIG_PM */ +/* Driver can be configured w/ and w/ both PCI and Platf drivers + * depending on the configuration selected. + */ static int __init stmmac_init(void) { - int err = 0; + int err_plt = 0; + int err_pci = 0; - err = platform_driver_register(&stmmac_pltfr_driver); + err_plt = stmmac_register_platform(); + err_pci = stmmac_register_pci(); - if (!err) { - err = pci_register_driver(&stmmac_pci_driver); - if (err) - platform_driver_unregister(&stmmac_pltfr_driver); + if ((err_pci) && (err_plt)) { + pr_err("stmmac: driver registration failed\n"); + return -EINVAL; } - return err; + return 0; } static void __exit stmmac_exit(void) { - pci_unregister_driver(&stmmac_pci_driver); - platform_driver_unregister(&stmmac_pltfr_driver); + stmmac_unregister_platform(); + stmmac_unregister_pci(); } module_init(stmmac_init); diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 703c8cce2a2c..8c726b7004d3 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -3598,7 +3598,6 @@ static int release_tx_packet(struct niu *np, struct tx_ring_info *rp, int idx) static void niu_tx_work(struct niu *np, struct tx_ring_info *rp) { struct netdev_queue *txq; - unsigned int tx_bytes; u16 pkt_cnt, tmp; int cons, index; u64 cs; @@ -3621,18 +3620,12 @@ static void niu_tx_work(struct niu *np, struct tx_ring_info *rp) netif_printk(np, tx_done, KERN_DEBUG, np->dev, "%s() pkt_cnt[%u] cons[%d]\n", __func__, pkt_cnt, cons); - tx_bytes = 0; - tmp = pkt_cnt; - while (tmp--) { - tx_bytes += rp->tx_buffs[cons].skb->len; + while (pkt_cnt--) cons = release_tx_packet(np, rp, cons); - } rp->cons = cons; smp_mb(); - netdev_tx_completed_queue(txq, pkt_cnt, tx_bytes); - out: if (unlikely(netif_tx_queue_stopped(txq) && (niu_tx_avail(rp) > NIU_TX_WAKEUP_THRESH(rp)))) { @@ -4333,7 +4326,6 @@ static void niu_free_channels(struct niu *np) struct tx_ring_info *rp = &np->tx_rings[i]; niu_free_tx_ring_info(np, rp); - netdev_tx_reset_queue(netdev_get_tx_queue(np->dev, i)); } kfree(np->tx_rings); np->tx_rings = NULL; @@ -6739,8 +6731,6 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, prod = NEXT_TX(rp, prod); } - netdev_tx_sent_queue(txq, skb->len); - if (prod < rp->prod) rp->wrap_bit ^= TX_RING_KICK_WRAP; rp->prod = prod; diff --git a/drivers/net/ethernet/tile/Kconfig b/drivers/net/ethernet/tile/Kconfig index 2d9218f86bca..098b1c42b393 100644 --- a/drivers/net/ethernet/tile/Kconfig +++ b/drivers/net/ethernet/tile/Kconfig @@ -7,6 +7,8 @@ config TILE_NET depends on TILE default y select CRC32 + select TILE_GXIO_MPIPE if TILEGX + select HIGH_RES_TIMERS if TILEGX ---help--- This is a standard Linux network device driver for the on-chip Tilera Gigabit Ethernet and XAUI interfaces. diff --git a/drivers/net/ethernet/tile/Makefile b/drivers/net/ethernet/tile/Makefile index f634f142cab4..0ef9eefd3211 100644 --- a/drivers/net/ethernet/tile/Makefile +++ b/drivers/net/ethernet/tile/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_TILE_NET) += tile_net.o ifdef CONFIG_TILEGX -tile_net-objs := tilegx.o mpipe.o iorpc_mpipe.o dma_queue.o +tile_net-y := tilegx.o else -tile_net-objs := tilepro.o +tile_net-y := tilepro.o endif diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c new file mode 100644 index 000000000000..83b4b388ad49 --- /dev/null +++ b/drivers/net/ethernet/tile/tilegx.c @@ -0,0 +1,1898 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/moduleparam.h> +#include <linux/sched.h> +#include <linux/kernel.h> /* printk() */ +#include <linux/slab.h> /* kmalloc() */ +#include <linux/errno.h> /* error codes */ +#include <linux/types.h> /* size_t */ +#include <linux/interrupt.h> +#include <linux/in.h> +#include <linux/irq.h> +#include <linux/netdevice.h> /* struct device, and other headers */ +#include <linux/etherdevice.h> /* eth_type_trans */ +#include <linux/skbuff.h> +#include <linux/ioctl.h> +#include <linux/cdev.h> +#include <linux/hugetlb.h> +#include <linux/in6.h> +#include <linux/timer.h> +#include <linux/hrtimer.h> +#include <linux/ktime.h> +#include <linux/io.h> +#include <linux/ctype.h> +#include <linux/ip.h> +#include <linux/tcp.h> + +#include <asm/checksum.h> +#include <asm/homecache.h> +#include <gxio/mpipe.h> +#include <arch/sim.h> + +/* Default transmit lockup timeout period, in jiffies. */ +#define TILE_NET_TIMEOUT (5 * HZ) + +/* The maximum number of distinct channels (idesc.channel is 5 bits). */ +#define TILE_NET_CHANNELS 32 + +/* Maximum number of idescs to handle per "poll". */ +#define TILE_NET_BATCH 128 + +/* Maximum number of packets to handle per "poll". */ +#define TILE_NET_WEIGHT 64 + +/* Number of entries in each iqueue. */ +#define IQUEUE_ENTRIES 512 + +/* Number of entries in each equeue. */ +#define EQUEUE_ENTRIES 2048 + +/* Total header bytes per equeue slot. Must be big enough for 2 bytes + * of NET_IP_ALIGN alignment, plus 14 bytes (?) of L2 header, plus up to + * 60 bytes of actual TCP header. We round up to align to cache lines. + */ +#define HEADER_BYTES 128 + +/* Maximum completions per cpu per device (must be a power of two). + * ISSUE: What is the right number here? If this is too small, then + * egress might block waiting for free space in a completions array. + * ISSUE: At the least, allocate these only for initialized echannels. + */ +#define TILE_NET_MAX_COMPS 64 + +#define MAX_FRAGS (MAX_SKB_FRAGS + 1) + +/* Size of completions data to allocate. + * ISSUE: Probably more than needed since we don't use all the channels. + */ +#define COMPS_SIZE (TILE_NET_CHANNELS * sizeof(struct tile_net_comps)) + +/* Size of NotifRing data to allocate. */ +#define NOTIF_RING_SIZE (IQUEUE_ENTRIES * sizeof(gxio_mpipe_idesc_t)) + +/* Timeout to wake the per-device TX timer after we stop the queue. + * We don't want the timeout too short (adds overhead, and might end + * up causing stop/wake/stop/wake cycles) or too long (affects performance). + * For the 10 Gb NIC, 30 usec means roughly 30+ 1500-byte packets. + */ +#define TX_TIMER_DELAY_USEC 30 + +/* Timeout to wake the per-cpu egress timer to free completions. */ +#define EGRESS_TIMER_DELAY_USEC 1000 + +MODULE_AUTHOR("Tilera Corporation"); +MODULE_LICENSE("GPL"); + +/* A "packet fragment" (a chunk of memory). */ +struct frag { + void *buf; + size_t length; +}; + +/* A single completion. */ +struct tile_net_comp { + /* The "complete_count" when the completion will be complete. */ + s64 when; + /* The buffer to be freed when the completion is complete. */ + struct sk_buff *skb; +}; + +/* The completions for a given cpu and echannel. */ +struct tile_net_comps { + /* The completions. */ + struct tile_net_comp comp_queue[TILE_NET_MAX_COMPS]; + /* The number of completions used. */ + unsigned long comp_next; + /* The number of completions freed. */ + unsigned long comp_last; +}; + +/* The transmit wake timer for a given cpu and echannel. */ +struct tile_net_tx_wake { + struct hrtimer timer; + struct net_device *dev; +}; + +/* Info for a specific cpu. */ +struct tile_net_info { + /* The NAPI struct. */ + struct napi_struct napi; + /* Packet queue. */ + gxio_mpipe_iqueue_t iqueue; + /* Our cpu. */ + int my_cpu; + /* True if iqueue is valid. */ + bool has_iqueue; + /* NAPI flags. */ + bool napi_added; + bool napi_enabled; + /* Number of small sk_buffs which must still be provided. */ + unsigned int num_needed_small_buffers; + /* Number of large sk_buffs which must still be provided. */ + unsigned int num_needed_large_buffers; + /* A timer for handling egress completions. */ + struct hrtimer egress_timer; + /* True if "egress_timer" is scheduled. */ + bool egress_timer_scheduled; + /* Comps for each egress channel. */ + struct tile_net_comps *comps_for_echannel[TILE_NET_CHANNELS]; + /* Transmit wake timer for each egress channel. */ + struct tile_net_tx_wake tx_wake[TILE_NET_CHANNELS]; +}; + +/* Info for egress on a particular egress channel. */ +struct tile_net_egress { + /* The "equeue". */ + gxio_mpipe_equeue_t *equeue; + /* The headers for TSO. */ + unsigned char *headers; +}; + +/* Info for a specific device. */ +struct tile_net_priv { + /* Our network device. */ + struct net_device *dev; + /* The primary link. */ + gxio_mpipe_link_t link; + /* The primary channel, if open, else -1. */ + int channel; + /* The "loopify" egress link, if needed. */ + gxio_mpipe_link_t loopify_link; + /* The "loopify" egress channel, if open, else -1. */ + int loopify_channel; + /* The egress channel (channel or loopify_channel). */ + int echannel; + /* Total stats. */ + struct net_device_stats stats; +}; + +/* Egress info, indexed by "priv->echannel" (lazily created as needed). */ +static struct tile_net_egress egress_for_echannel[TILE_NET_CHANNELS]; + +/* Devices currently associated with each channel. + * NOTE: The array entry can become NULL after ifconfig down, but + * we do not free the underlying net_device structures, so it is + * safe to use a pointer after reading it from this array. + */ +static struct net_device *tile_net_devs_for_channel[TILE_NET_CHANNELS]; + +/* A mutex for "tile_net_devs_for_channel". */ +static DEFINE_MUTEX(tile_net_devs_for_channel_mutex); + +/* The per-cpu info. */ +static DEFINE_PER_CPU(struct tile_net_info, per_cpu_info); + +/* The "context" for all devices. */ +static gxio_mpipe_context_t context; + +/* Buffer sizes and mpipe enum codes for buffer stacks. + * See arch/tile/include/gxio/mpipe.h for the set of possible values. + */ +#define BUFFER_SIZE_SMALL_ENUM GXIO_MPIPE_BUFFER_SIZE_128 +#define BUFFER_SIZE_SMALL 128 +#define BUFFER_SIZE_LARGE_ENUM GXIO_MPIPE_BUFFER_SIZE_1664 +#define BUFFER_SIZE_LARGE 1664 + +/* The small/large "buffer stacks". */ +static int small_buffer_stack = -1; +static int large_buffer_stack = -1; + +/* Amount of memory allocated for each buffer stack. */ +static size_t buffer_stack_size; + +/* The actual memory allocated for the buffer stacks. */ +static void *small_buffer_stack_va; +static void *large_buffer_stack_va; + +/* The buckets. */ +static int first_bucket = -1; +static int num_buckets = 1; + +/* The ingress irq. */ +static int ingress_irq = -1; + +/* Text value of tile_net.cpus if passed as a module parameter. */ +static char *network_cpus_string; + +/* The actual cpus in "network_cpus". */ +static struct cpumask network_cpus_map; + +/* If "loopify=LINK" was specified, this is "LINK". */ +static char *loopify_link_name; + +/* If "tile_net.custom" was specified, this is non-NULL. */ +static char *custom_str; + +/* The "tile_net.cpus" argument specifies the cpus that are dedicated + * to handle ingress packets. + * + * The parameter should be in the form "tile_net.cpus=m-n[,x-y]", where + * m, n, x, y are integer numbers that represent the cpus that can be + * neither a dedicated cpu nor a dataplane cpu. + */ +static bool network_cpus_init(void) +{ + char buf[1024]; + int rc; + + if (network_cpus_string == NULL) + return false; + + rc = cpulist_parse_crop(network_cpus_string, &network_cpus_map); + if (rc != 0) { + pr_warn("tile_net.cpus=%s: malformed cpu list\n", + network_cpus_string); + return false; + } + + /* Remove dedicated cpus. */ + cpumask_and(&network_cpus_map, &network_cpus_map, cpu_possible_mask); + + if (cpumask_empty(&network_cpus_map)) { + pr_warn("Ignoring empty tile_net.cpus='%s'.\n", + network_cpus_string); + return false; + } + + cpulist_scnprintf(buf, sizeof(buf), &network_cpus_map); + pr_info("Linux network CPUs: %s\n", buf); + return true; +} + +module_param_named(cpus, network_cpus_string, charp, 0444); +MODULE_PARM_DESC(cpus, "cpulist of cores that handle network interrupts"); + +/* The "tile_net.loopify=LINK" argument causes the named device to + * actually use "loop0" for ingress, and "loop1" for egress. This + * allows an app to sit between the actual link and linux, passing + * (some) packets along to linux, and forwarding (some) packets sent + * out by linux. + */ +module_param_named(loopify, loopify_link_name, charp, 0444); +MODULE_PARM_DESC(loopify, "name the device to use loop0/1 for ingress/egress"); + +/* The "tile_net.custom" argument causes us to ignore the "conventional" + * classifier metadata, in particular, the "l2_offset". + */ +module_param_named(custom, custom_str, charp, 0444); +MODULE_PARM_DESC(custom, "indicates a (heavily) customized classifier"); + +/* Atomically update a statistics field. + * Note that on TILE-Gx, this operation is fire-and-forget on the + * issuing core (single-cycle dispatch) and takes only a few cycles + * longer than a regular store when the request reaches the home cache. + * No expensive bus management overhead is required. + */ +static void tile_net_stats_add(unsigned long value, unsigned long *field) +{ + BUILD_BUG_ON(sizeof(atomic_long_t) != sizeof(unsigned long)); + atomic_long_add(value, (atomic_long_t *)field); +} + +/* Allocate and push a buffer. */ +static bool tile_net_provide_buffer(bool small) +{ + int stack = small ? small_buffer_stack : large_buffer_stack; + const unsigned long buffer_alignment = 128; + struct sk_buff *skb; + int len; + + len = sizeof(struct sk_buff **) + buffer_alignment; + len += (small ? BUFFER_SIZE_SMALL : BUFFER_SIZE_LARGE); + skb = dev_alloc_skb(len); + if (skb == NULL) + return false; + + /* Make room for a back-pointer to 'skb' and guarantee alignment. */ + skb_reserve(skb, sizeof(struct sk_buff **)); + skb_reserve(skb, -(long)skb->data & (buffer_alignment - 1)); + + /* Save a back-pointer to 'skb'. */ + *(struct sk_buff **)(skb->data - sizeof(struct sk_buff **)) = skb; + + /* Make sure "skb" and the back-pointer have been flushed. */ + wmb(); + + gxio_mpipe_push_buffer(&context, stack, + (void *)va_to_tile_io_addr(skb->data)); + + return true; +} + +/* Convert a raw mpipe buffer to its matching skb pointer. */ +static struct sk_buff *mpipe_buf_to_skb(void *va) +{ + /* Acquire the associated "skb". */ + struct sk_buff **skb_ptr = va - sizeof(*skb_ptr); + struct sk_buff *skb = *skb_ptr; + + /* Paranoia. */ + if (skb->data != va) { + /* Panic here since there's a reasonable chance + * that corrupt buffers means generic memory + * corruption, with unpredictable system effects. + */ + panic("Corrupt linux buffer! va=%p, skb=%p, skb->data=%p", + va, skb, skb->data); + } + + return skb; +} + +static void tile_net_pop_all_buffers(int stack) +{ + for (;;) { + tile_io_addr_t addr = + (tile_io_addr_t)gxio_mpipe_pop_buffer(&context, stack); + if (addr == 0) + break; + dev_kfree_skb_irq(mpipe_buf_to_skb(tile_io_addr_to_va(addr))); + } +} + +/* Provide linux buffers to mPIPE. */ +static void tile_net_provide_needed_buffers(void) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + + while (info->num_needed_small_buffers != 0) { + if (!tile_net_provide_buffer(true)) + goto oops; + info->num_needed_small_buffers--; + } + + while (info->num_needed_large_buffers != 0) { + if (!tile_net_provide_buffer(false)) + goto oops; + info->num_needed_large_buffers--; + } + + return; + +oops: + /* Add a description to the page allocation failure dump. */ + pr_notice("Tile %d still needs some buffers\n", info->my_cpu); +} + +static inline bool filter_packet(struct net_device *dev, void *buf) +{ + /* Filter packets received before we're up. */ + if (dev == NULL || !(dev->flags & IFF_UP)) + return true; + + /* Filter out packets that aren't for us. */ + if (!(dev->flags & IFF_PROMISC) && + !is_multicast_ether_addr(buf) && + compare_ether_addr(dev->dev_addr, buf) != 0) + return true; + + return false; +} + +static void tile_net_receive_skb(struct net_device *dev, struct sk_buff *skb, + gxio_mpipe_idesc_t *idesc, unsigned long len) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_priv *priv = netdev_priv(dev); + + /* Encode the actual packet length. */ + skb_put(skb, len); + + skb->protocol = eth_type_trans(skb, dev); + + /* Acknowledge "good" hardware checksums. */ + if (idesc->cs && idesc->csum_seed_val == 0xFFFF) + skb->ip_summed = CHECKSUM_UNNECESSARY; + + netif_receive_skb(skb); + + /* Update stats. */ + tile_net_stats_add(1, &priv->stats.rx_packets); + tile_net_stats_add(len, &priv->stats.rx_bytes); + + /* Need a new buffer. */ + if (idesc->size == BUFFER_SIZE_SMALL_ENUM) + info->num_needed_small_buffers++; + else + info->num_needed_large_buffers++; +} + +/* Handle a packet. Return true if "processed", false if "filtered". */ +static bool tile_net_handle_packet(gxio_mpipe_idesc_t *idesc) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct net_device *dev = tile_net_devs_for_channel[idesc->channel]; + uint8_t l2_offset; + void *va; + void *buf; + unsigned long len; + bool filter; + + /* Drop packets for which no buffer was available. + * NOTE: This happens under heavy load. + */ + if (idesc->be) { + struct tile_net_priv *priv = netdev_priv(dev); + tile_net_stats_add(1, &priv->stats.rx_dropped); + gxio_mpipe_iqueue_consume(&info->iqueue, idesc); + if (net_ratelimit()) + pr_info("Dropping packet (insufficient buffers).\n"); + return false; + } + + /* Get the "l2_offset", if allowed. */ + l2_offset = custom_str ? 0 : gxio_mpipe_idesc_get_l2_offset(idesc); + + /* Get the raw buffer VA (includes "headroom"). */ + va = tile_io_addr_to_va((unsigned long)(long)idesc->va); + + /* Get the actual packet start/length. */ + buf = va + l2_offset; + len = idesc->l2_size - l2_offset; + + /* Point "va" at the raw buffer. */ + va -= NET_IP_ALIGN; + + filter = filter_packet(dev, buf); + if (filter) { + gxio_mpipe_iqueue_drop(&info->iqueue, idesc); + } else { + struct sk_buff *skb = mpipe_buf_to_skb(va); + + /* Skip headroom, and any custom header. */ + skb_reserve(skb, NET_IP_ALIGN + l2_offset); + + tile_net_receive_skb(dev, skb, idesc, len); + } + + gxio_mpipe_iqueue_consume(&info->iqueue, idesc); + return !filter; +} + +/* Handle some packets for the current CPU. + * + * This function handles up to TILE_NET_BATCH idescs per call. + * + * ISSUE: Since we do not provide new buffers until this function is + * complete, we must initially provide enough buffers for each network + * cpu to fill its iqueue and also its batched idescs. + * + * ISSUE: The "rotting packet" race condition occurs if a packet + * arrives after the queue appears to be empty, and before the + * hypervisor interrupt is re-enabled. + */ +static int tile_net_poll(struct napi_struct *napi, int budget) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + unsigned int work = 0; + gxio_mpipe_idesc_t *idesc; + int i, n; + + /* Process packets. */ + while ((n = gxio_mpipe_iqueue_try_peek(&info->iqueue, &idesc)) > 0) { + for (i = 0; i < n; i++) { + if (i == TILE_NET_BATCH) + goto done; + if (tile_net_handle_packet(idesc + i)) { + if (++work >= budget) + goto done; + } + } + } + + /* There are no packets left. */ + napi_complete(&info->napi); + + /* Re-enable hypervisor interrupts. */ + gxio_mpipe_enable_notif_ring_interrupt(&context, info->iqueue.ring); + + /* HACK: Avoid the "rotting packet" problem. */ + if (gxio_mpipe_iqueue_try_peek(&info->iqueue, &idesc) > 0) + napi_schedule(&info->napi); + + /* ISSUE: Handle completions? */ + +done: + tile_net_provide_needed_buffers(); + + return work; +} + +/* Handle an ingress interrupt on the current cpu. */ +static irqreturn_t tile_net_handle_ingress_irq(int irq, void *unused) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + napi_schedule(&info->napi); + return IRQ_HANDLED; +} + +/* Free some completions. This must be called with interrupts blocked. */ +static int tile_net_free_comps(gxio_mpipe_equeue_t *equeue, + struct tile_net_comps *comps, + int limit, bool force_update) +{ + int n = 0; + while (comps->comp_last < comps->comp_next) { + unsigned int cid = comps->comp_last % TILE_NET_MAX_COMPS; + struct tile_net_comp *comp = &comps->comp_queue[cid]; + if (!gxio_mpipe_equeue_is_complete(equeue, comp->when, + force_update || n == 0)) + break; + dev_kfree_skb_irq(comp->skb); + comps->comp_last++; + if (++n == limit) + break; + } + return n; +} + +/* Add a completion. This must be called with interrupts blocked. + * tile_net_equeue_try_reserve() will have ensured a free completion entry. + */ +static void add_comp(gxio_mpipe_equeue_t *equeue, + struct tile_net_comps *comps, + uint64_t when, struct sk_buff *skb) +{ + int cid = comps->comp_next % TILE_NET_MAX_COMPS; + comps->comp_queue[cid].when = when; + comps->comp_queue[cid].skb = skb; + comps->comp_next++; +} + +static void tile_net_schedule_tx_wake_timer(struct net_device *dev) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_priv *priv = netdev_priv(dev); + + hrtimer_start(&info->tx_wake[priv->echannel].timer, + ktime_set(0, TX_TIMER_DELAY_USEC * 1000UL), + HRTIMER_MODE_REL_PINNED); +} + +static enum hrtimer_restart tile_net_handle_tx_wake_timer(struct hrtimer *t) +{ + struct tile_net_tx_wake *tx_wake = + container_of(t, struct tile_net_tx_wake, timer); + netif_wake_subqueue(tx_wake->dev, smp_processor_id()); + return HRTIMER_NORESTART; +} + +/* Make sure the egress timer is scheduled. */ +static void tile_net_schedule_egress_timer(void) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + + if (!info->egress_timer_scheduled) { + hrtimer_start(&info->egress_timer, + ktime_set(0, EGRESS_TIMER_DELAY_USEC * 1000UL), + HRTIMER_MODE_REL_PINNED); + info->egress_timer_scheduled = true; + } +} + +/* The "function" for "info->egress_timer". + * + * This timer will reschedule itself as long as there are any pending + * completions expected for this tile. + */ +static enum hrtimer_restart tile_net_handle_egress_timer(struct hrtimer *t) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + unsigned long irqflags; + bool pending = false; + int i; + + local_irq_save(irqflags); + + /* The timer is no longer scheduled. */ + info->egress_timer_scheduled = false; + + /* Free all possible comps for this tile. */ + for (i = 0; i < TILE_NET_CHANNELS; i++) { + struct tile_net_egress *egress = &egress_for_echannel[i]; + struct tile_net_comps *comps = info->comps_for_echannel[i]; + if (comps->comp_last >= comps->comp_next) + continue; + tile_net_free_comps(egress->equeue, comps, -1, true); + pending = pending || (comps->comp_last < comps->comp_next); + } + + /* Reschedule timer if needed. */ + if (pending) + tile_net_schedule_egress_timer(); + + local_irq_restore(irqflags); + + return HRTIMER_NORESTART; +} + +/* Helper function for "tile_net_update()". + * "dev" (i.e. arg) is the device being brought up or down, + * or NULL if all devices are now down. + */ +static void tile_net_update_cpu(void *arg) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct net_device *dev = arg; + + if (!info->has_iqueue) + return; + + if (dev != NULL) { + if (!info->napi_added) { + netif_napi_add(dev, &info->napi, + tile_net_poll, TILE_NET_WEIGHT); + info->napi_added = true; + } + if (!info->napi_enabled) { + napi_enable(&info->napi); + info->napi_enabled = true; + } + enable_percpu_irq(ingress_irq, 0); + } else { + disable_percpu_irq(ingress_irq); + if (info->napi_enabled) { + napi_disable(&info->napi); + info->napi_enabled = false; + } + /* FIXME: Drain the iqueue. */ + } +} + +/* Helper function for tile_net_open() and tile_net_stop(). + * Always called under tile_net_devs_for_channel_mutex. + */ +static int tile_net_update(struct net_device *dev) +{ + static gxio_mpipe_rules_t rules; /* too big to fit on the stack */ + bool saw_channel = false; + int channel; + int rc; + int cpu; + + gxio_mpipe_rules_init(&rules, &context); + + for (channel = 0; channel < TILE_NET_CHANNELS; channel++) { + if (tile_net_devs_for_channel[channel] == NULL) + continue; + if (!saw_channel) { + saw_channel = true; + gxio_mpipe_rules_begin(&rules, first_bucket, + num_buckets, NULL); + gxio_mpipe_rules_set_headroom(&rules, NET_IP_ALIGN); + } + gxio_mpipe_rules_add_channel(&rules, channel); + } + + /* NOTE: This can fail if there is no classifier. + * ISSUE: Can anything else cause it to fail? + */ + rc = gxio_mpipe_rules_commit(&rules); + if (rc != 0) { + netdev_warn(dev, "gxio_mpipe_rules_commit failed: %d\n", rc); + return -EIO; + } + + /* Update all cpus, sequentially (to protect "netif_napi_add()"). */ + for_each_online_cpu(cpu) + smp_call_function_single(cpu, tile_net_update_cpu, + (saw_channel ? dev : NULL), 1); + + /* HACK: Allow packets to flow in the simulator. */ + if (saw_channel) + sim_enable_mpipe_links(0, -1); + + return 0; +} + +/* Allocate and initialize mpipe buffer stacks, and register them in + * the mPIPE TLBs, for both small and large packet sizes. + * This routine supports tile_net_init_mpipe(), below. + */ +static int init_buffer_stacks(struct net_device *dev, int num_buffers) +{ + pte_t hash_pte = pte_set_home((pte_t) { 0 }, PAGE_HOME_HASH); + int rc; + + /* Compute stack bytes; we round up to 64KB and then use + * alloc_pages() so we get the required 64KB alignment as well. + */ + buffer_stack_size = + ALIGN(gxio_mpipe_calc_buffer_stack_bytes(num_buffers), + 64 * 1024); + + /* Allocate two buffer stack indices. */ + rc = gxio_mpipe_alloc_buffer_stacks(&context, 2, 0, 0); + if (rc < 0) { + netdev_err(dev, "gxio_mpipe_alloc_buffer_stacks failed: %d\n", + rc); + return rc; + } + small_buffer_stack = rc; + large_buffer_stack = rc + 1; + + /* Allocate the small memory stack. */ + small_buffer_stack_va = + alloc_pages_exact(buffer_stack_size, GFP_KERNEL); + if (small_buffer_stack_va == NULL) { + netdev_err(dev, + "Could not alloc %zd bytes for buffer stacks\n", + buffer_stack_size); + return -ENOMEM; + } + rc = gxio_mpipe_init_buffer_stack(&context, small_buffer_stack, + BUFFER_SIZE_SMALL_ENUM, + small_buffer_stack_va, + buffer_stack_size, 0); + if (rc != 0) { + netdev_err(dev, "gxio_mpipe_init_buffer_stack: %d\n", rc); + return rc; + } + rc = gxio_mpipe_register_client_memory(&context, small_buffer_stack, + hash_pte, 0); + if (rc != 0) { + netdev_err(dev, + "gxio_mpipe_register_buffer_memory failed: %d\n", + rc); + return rc; + } + + /* Allocate the large buffer stack. */ + large_buffer_stack_va = + alloc_pages_exact(buffer_stack_size, GFP_KERNEL); + if (large_buffer_stack_va == NULL) { + netdev_err(dev, + "Could not alloc %zd bytes for buffer stacks\n", + buffer_stack_size); + return -ENOMEM; + } + rc = gxio_mpipe_init_buffer_stack(&context, large_buffer_stack, + BUFFER_SIZE_LARGE_ENUM, + large_buffer_stack_va, + buffer_stack_size, 0); + if (rc != 0) { + netdev_err(dev, "gxio_mpipe_init_buffer_stack failed: %d\n", + rc); + return rc; + } + rc = gxio_mpipe_register_client_memory(&context, large_buffer_stack, + hash_pte, 0); + if (rc != 0) { + netdev_err(dev, + "gxio_mpipe_register_buffer_memory failed: %d\n", + rc); + return rc; + } + + return 0; +} + +/* Allocate per-cpu resources (memory for completions and idescs). + * This routine supports tile_net_init_mpipe(), below. + */ +static int alloc_percpu_mpipe_resources(struct net_device *dev, + int cpu, int ring) +{ + struct tile_net_info *info = &per_cpu(per_cpu_info, cpu); + int order, i, rc; + struct page *page; + void *addr; + + /* Allocate the "comps". */ + order = get_order(COMPS_SIZE); + page = homecache_alloc_pages(GFP_KERNEL, order, cpu); + if (page == NULL) { + netdev_err(dev, "Failed to alloc %zd bytes comps memory\n", + COMPS_SIZE); + return -ENOMEM; + } + addr = pfn_to_kaddr(page_to_pfn(page)); + memset(addr, 0, COMPS_SIZE); + for (i = 0; i < TILE_NET_CHANNELS; i++) + info->comps_for_echannel[i] = + addr + i * sizeof(struct tile_net_comps); + + /* If this is a network cpu, create an iqueue. */ + if (cpu_isset(cpu, network_cpus_map)) { + order = get_order(NOTIF_RING_SIZE); + page = homecache_alloc_pages(GFP_KERNEL, order, cpu); + if (page == NULL) { + netdev_err(dev, + "Failed to alloc %zd bytes iqueue memory\n", + NOTIF_RING_SIZE); + return -ENOMEM; + } + addr = pfn_to_kaddr(page_to_pfn(page)); + rc = gxio_mpipe_iqueue_init(&info->iqueue, &context, ring++, + addr, NOTIF_RING_SIZE, 0); + if (rc < 0) { + netdev_err(dev, + "gxio_mpipe_iqueue_init failed: %d\n", rc); + return rc; + } + info->has_iqueue = true; + } + + return ring; +} + +/* Initialize NotifGroup and buckets. + * This routine supports tile_net_init_mpipe(), below. + */ +static int init_notif_group_and_buckets(struct net_device *dev, + int ring, int network_cpus_count) +{ + int group, rc; + + /* Allocate one NotifGroup. */ + rc = gxio_mpipe_alloc_notif_groups(&context, 1, 0, 0); + if (rc < 0) { + netdev_err(dev, "gxio_mpipe_alloc_notif_groups failed: %d\n", + rc); + return rc; + } + group = rc; + + /* Initialize global num_buckets value. */ + if (network_cpus_count > 4) + num_buckets = 256; + else if (network_cpus_count > 1) + num_buckets = 16; + + /* Allocate some buckets, and set global first_bucket value. */ + rc = gxio_mpipe_alloc_buckets(&context, num_buckets, 0, 0); + if (rc < 0) { + netdev_err(dev, "gxio_mpipe_alloc_buckets failed: %d\n", rc); + return rc; + } + first_bucket = rc; + + /* Init group and buckets. */ + rc = gxio_mpipe_init_notif_group_and_buckets( + &context, group, ring, network_cpus_count, + first_bucket, num_buckets, + GXIO_MPIPE_BUCKET_STICKY_FLOW_LOCALITY); + if (rc != 0) { + netdev_err( + dev, + "gxio_mpipe_init_notif_group_and_buckets failed: %d\n", + rc); + return rc; + } + + return 0; +} + +/* Create an irq and register it, then activate the irq and request + * interrupts on all cores. Note that "ingress_irq" being initialized + * is how we know not to call tile_net_init_mpipe() again. + * This routine supports tile_net_init_mpipe(), below. + */ +static int tile_net_setup_interrupts(struct net_device *dev) +{ + int cpu, rc; + + rc = create_irq(); + if (rc < 0) { + netdev_err(dev, "create_irq failed: %d\n", rc); + return rc; + } + ingress_irq = rc; + tile_irq_activate(ingress_irq, TILE_IRQ_PERCPU); + rc = request_irq(ingress_irq, tile_net_handle_ingress_irq, + 0, NULL, NULL); + if (rc != 0) { + netdev_err(dev, "request_irq failed: %d\n", rc); + destroy_irq(ingress_irq); + ingress_irq = -1; + return rc; + } + + for_each_online_cpu(cpu) { + struct tile_net_info *info = &per_cpu(per_cpu_info, cpu); + if (info->has_iqueue) { + gxio_mpipe_request_notif_ring_interrupt( + &context, cpu_x(cpu), cpu_y(cpu), + 1, ingress_irq, info->iqueue.ring); + } + } + + return 0; +} + +/* Undo any state set up partially by a failed call to tile_net_init_mpipe. */ +static void tile_net_init_mpipe_fail(void) +{ + int cpu; + + /* Do cleanups that require the mpipe context first. */ + if (small_buffer_stack >= 0) + tile_net_pop_all_buffers(small_buffer_stack); + if (large_buffer_stack >= 0) + tile_net_pop_all_buffers(large_buffer_stack); + + /* Destroy mpipe context so the hardware no longer owns any memory. */ + gxio_mpipe_destroy(&context); + + for_each_online_cpu(cpu) { + struct tile_net_info *info = &per_cpu(per_cpu_info, cpu); + free_pages((unsigned long)(info->comps_for_echannel[0]), + get_order(COMPS_SIZE)); + info->comps_for_echannel[0] = NULL; + free_pages((unsigned long)(info->iqueue.idescs), + get_order(NOTIF_RING_SIZE)); + info->iqueue.idescs = NULL; + } + + if (small_buffer_stack_va) + free_pages_exact(small_buffer_stack_va, buffer_stack_size); + if (large_buffer_stack_va) + free_pages_exact(large_buffer_stack_va, buffer_stack_size); + + small_buffer_stack_va = NULL; + large_buffer_stack_va = NULL; + large_buffer_stack = -1; + small_buffer_stack = -1; + first_bucket = -1; +} + +/* The first time any tilegx network device is opened, we initialize + * the global mpipe state. If this step fails, we fail to open the + * device, but if it succeeds, we never need to do it again, and since + * tile_net can't be unloaded, we never undo it. + * + * Note that some resources in this path (buffer stack indices, + * bindings from init_buffer_stack, etc.) are hypervisor resources + * that are freed implicitly by gxio_mpipe_destroy(). + */ +static int tile_net_init_mpipe(struct net_device *dev) +{ + int i, num_buffers, rc; + int cpu; + int first_ring, ring; + int network_cpus_count = cpus_weight(network_cpus_map); + + if (!hash_default) { + netdev_err(dev, "Networking requires hash_default!\n"); + return -EIO; + } + + rc = gxio_mpipe_init(&context, 0); + if (rc != 0) { + netdev_err(dev, "gxio_mpipe_init failed: %d\n", rc); + return -EIO; + } + + /* Set up the buffer stacks. */ + num_buffers = + network_cpus_count * (IQUEUE_ENTRIES + TILE_NET_BATCH); + rc = init_buffer_stacks(dev, num_buffers); + if (rc != 0) + goto fail; + + /* Provide initial buffers. */ + rc = -ENOMEM; + for (i = 0; i < num_buffers; i++) { + if (!tile_net_provide_buffer(true)) { + netdev_err(dev, "Cannot allocate initial sk_bufs!\n"); + goto fail; + } + } + for (i = 0; i < num_buffers; i++) { + if (!tile_net_provide_buffer(false)) { + netdev_err(dev, "Cannot allocate initial sk_bufs!\n"); + goto fail; + } + } + + /* Allocate one NotifRing for each network cpu. */ + rc = gxio_mpipe_alloc_notif_rings(&context, network_cpus_count, 0, 0); + if (rc < 0) { + netdev_err(dev, "gxio_mpipe_alloc_notif_rings failed %d\n", + rc); + goto fail; + } + + /* Init NotifRings per-cpu. */ + first_ring = rc; + ring = first_ring; + for_each_online_cpu(cpu) { + rc = alloc_percpu_mpipe_resources(dev, cpu, ring); + if (rc < 0) + goto fail; + ring = rc; + } + + /* Initialize NotifGroup and buckets. */ + rc = init_notif_group_and_buckets(dev, first_ring, network_cpus_count); + if (rc != 0) + goto fail; + + /* Create and enable interrupts. */ + rc = tile_net_setup_interrupts(dev); + if (rc != 0) + goto fail; + + return 0; + +fail: + tile_net_init_mpipe_fail(); + return rc; +} + +/* Create persistent egress info for a given egress channel. + * Note that this may be shared between, say, "gbe0" and "xgbe0". + * ISSUE: Defer header allocation until TSO is actually needed? + */ +static int tile_net_init_egress(struct net_device *dev, int echannel) +{ + struct page *headers_page, *edescs_page, *equeue_page; + gxio_mpipe_edesc_t *edescs; + gxio_mpipe_equeue_t *equeue; + unsigned char *headers; + int headers_order, edescs_order, equeue_order; + size_t edescs_size; + int edma; + int rc = -ENOMEM; + + /* Only initialize once. */ + if (egress_for_echannel[echannel].equeue != NULL) + return 0; + + /* Allocate memory for the "headers". */ + headers_order = get_order(EQUEUE_ENTRIES * HEADER_BYTES); + headers_page = alloc_pages(GFP_KERNEL, headers_order); + if (headers_page == NULL) { + netdev_warn(dev, + "Could not alloc %zd bytes for TSO headers.\n", + PAGE_SIZE << headers_order); + goto fail; + } + headers = pfn_to_kaddr(page_to_pfn(headers_page)); + + /* Allocate memory for the "edescs". */ + edescs_size = EQUEUE_ENTRIES * sizeof(*edescs); + edescs_order = get_order(edescs_size); + edescs_page = alloc_pages(GFP_KERNEL, edescs_order); + if (edescs_page == NULL) { + netdev_warn(dev, + "Could not alloc %zd bytes for eDMA ring.\n", + edescs_size); + goto fail_headers; + } + edescs = pfn_to_kaddr(page_to_pfn(edescs_page)); + + /* Allocate memory for the "equeue". */ + equeue_order = get_order(sizeof(*equeue)); + equeue_page = alloc_pages(GFP_KERNEL, equeue_order); + if (equeue_page == NULL) { + netdev_warn(dev, + "Could not alloc %zd bytes for equeue info.\n", + PAGE_SIZE << equeue_order); + goto fail_edescs; + } + equeue = pfn_to_kaddr(page_to_pfn(equeue_page)); + + /* Allocate an edma ring. Note that in practice this can't + * fail, which is good, because we will leak an edma ring if so. + */ + rc = gxio_mpipe_alloc_edma_rings(&context, 1, 0, 0); + if (rc < 0) { + netdev_warn(dev, "gxio_mpipe_alloc_edma_rings failed: %d\n", + rc); + goto fail_equeue; + } + edma = rc; + + /* Initialize the equeue. */ + rc = gxio_mpipe_equeue_init(equeue, &context, edma, echannel, + edescs, edescs_size, 0); + if (rc != 0) { + netdev_err(dev, "gxio_mpipe_equeue_init failed: %d\n", rc); + goto fail_equeue; + } + + /* Done. */ + egress_for_echannel[echannel].equeue = equeue; + egress_for_echannel[echannel].headers = headers; + return 0; + +fail_equeue: + __free_pages(equeue_page, equeue_order); + +fail_edescs: + __free_pages(edescs_page, edescs_order); + +fail_headers: + __free_pages(headers_page, headers_order); + +fail: + return rc; +} + +/* Return channel number for a newly-opened link. */ +static int tile_net_link_open(struct net_device *dev, gxio_mpipe_link_t *link, + const char *link_name) +{ + int rc = gxio_mpipe_link_open(link, &context, link_name, 0); + if (rc < 0) { + netdev_err(dev, "Failed to open '%s'\n", link_name); + return rc; + } + rc = gxio_mpipe_link_channel(link); + if (rc < 0 || rc >= TILE_NET_CHANNELS) { + netdev_err(dev, "gxio_mpipe_link_channel bad value: %d\n", rc); + gxio_mpipe_link_close(link); + return -EINVAL; + } + return rc; +} + +/* Help the kernel activate the given network interface. */ +static int tile_net_open(struct net_device *dev) +{ + struct tile_net_priv *priv = netdev_priv(dev); + int cpu, rc; + + mutex_lock(&tile_net_devs_for_channel_mutex); + + /* Do one-time initialization the first time any device is opened. */ + if (ingress_irq < 0) { + rc = tile_net_init_mpipe(dev); + if (rc != 0) + goto fail; + } + + /* Determine if this is the "loopify" device. */ + if (unlikely((loopify_link_name != NULL) && + !strcmp(dev->name, loopify_link_name))) { + rc = tile_net_link_open(dev, &priv->link, "loop0"); + if (rc < 0) + goto fail; + priv->channel = rc; + rc = tile_net_link_open(dev, &priv->loopify_link, "loop1"); + if (rc < 0) + goto fail; + priv->loopify_channel = rc; + priv->echannel = rc; + } else { + rc = tile_net_link_open(dev, &priv->link, dev->name); + if (rc < 0) + goto fail; + priv->channel = rc; + priv->echannel = rc; + } + + /* Initialize egress info (if needed). Once ever, per echannel. */ + rc = tile_net_init_egress(dev, priv->echannel); + if (rc != 0) + goto fail; + + tile_net_devs_for_channel[priv->channel] = dev; + + rc = tile_net_update(dev); + if (rc != 0) + goto fail; + + mutex_unlock(&tile_net_devs_for_channel_mutex); + + /* Initialize the transmit wake timer for this device for each cpu. */ + for_each_online_cpu(cpu) { + struct tile_net_info *info = &per_cpu(per_cpu_info, cpu); + struct tile_net_tx_wake *tx_wake = + &info->tx_wake[priv->echannel]; + + hrtimer_init(&tx_wake->timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + tx_wake->timer.function = tile_net_handle_tx_wake_timer; + tx_wake->dev = dev; + } + + for_each_online_cpu(cpu) + netif_start_subqueue(dev, cpu); + netif_carrier_on(dev); + return 0; + +fail: + if (priv->loopify_channel >= 0) { + if (gxio_mpipe_link_close(&priv->loopify_link) != 0) + netdev_warn(dev, "Failed to close loopify link!\n"); + priv->loopify_channel = -1; + } + if (priv->channel >= 0) { + if (gxio_mpipe_link_close(&priv->link) != 0) + netdev_warn(dev, "Failed to close link!\n"); + priv->channel = -1; + } + priv->echannel = -1; + tile_net_devs_for_channel[priv->channel] = NULL; + mutex_unlock(&tile_net_devs_for_channel_mutex); + + /* Don't return raw gxio error codes to generic Linux. */ + return (rc > -512) ? rc : -EIO; +} + +/* Help the kernel deactivate the given network interface. */ +static int tile_net_stop(struct net_device *dev) +{ + struct tile_net_priv *priv = netdev_priv(dev); + int cpu; + + for_each_online_cpu(cpu) { + struct tile_net_info *info = &per_cpu(per_cpu_info, cpu); + struct tile_net_tx_wake *tx_wake = + &info->tx_wake[priv->echannel]; + + hrtimer_cancel(&tx_wake->timer); + netif_stop_subqueue(dev, cpu); + } + + mutex_lock(&tile_net_devs_for_channel_mutex); + tile_net_devs_for_channel[priv->channel] = NULL; + (void)tile_net_update(dev); + if (priv->loopify_channel >= 0) { + if (gxio_mpipe_link_close(&priv->loopify_link) != 0) + netdev_warn(dev, "Failed to close loopify link!\n"); + priv->loopify_channel = -1; + } + if (priv->channel >= 0) { + if (gxio_mpipe_link_close(&priv->link) != 0) + netdev_warn(dev, "Failed to close link!\n"); + priv->channel = -1; + } + priv->echannel = -1; + mutex_unlock(&tile_net_devs_for_channel_mutex); + + return 0; +} + +/* Determine the VA for a fragment. */ +static inline void *tile_net_frag_buf(skb_frag_t *f) +{ + unsigned long pfn = page_to_pfn(skb_frag_page(f)); + return pfn_to_kaddr(pfn) + f->page_offset; +} + +/* Acquire a completion entry and an egress slot, or if we can't, + * stop the queue and schedule the tx_wake timer. + */ +static s64 tile_net_equeue_try_reserve(struct net_device *dev, + struct tile_net_comps *comps, + gxio_mpipe_equeue_t *equeue, + int num_edescs) +{ + /* Try to acquire a completion entry. */ + if (comps->comp_next - comps->comp_last < TILE_NET_MAX_COMPS - 1 || + tile_net_free_comps(equeue, comps, 32, false) != 0) { + + /* Try to acquire an egress slot. */ + s64 slot = gxio_mpipe_equeue_try_reserve(equeue, num_edescs); + if (slot >= 0) + return slot; + + /* Freeing some completions gives the equeue time to drain. */ + tile_net_free_comps(equeue, comps, TILE_NET_MAX_COMPS, false); + + slot = gxio_mpipe_equeue_try_reserve(equeue, num_edescs); + if (slot >= 0) + return slot; + } + + /* Still nothing; give up and stop the queue for a short while. */ + netif_stop_subqueue(dev, smp_processor_id()); + tile_net_schedule_tx_wake_timer(dev); + return -1; +} + +/* Determine how many edesc's are needed for TSO. + * + * Sometimes, if "sendfile()" requires copying, we will be called with + * "data" containing the header and payload, with "frags" being empty. + * Sometimes, for example when using NFS over TCP, a single segment can + * span 3 fragments. This requires special care. + */ +static int tso_count_edescs(struct sk_buff *skb) +{ + struct skb_shared_info *sh = skb_shinfo(skb); + unsigned int data_len = skb->data_len; + unsigned int p_len = sh->gso_size; + long f_id = -1; /* id of the current fragment */ + long f_size = -1; /* size of the current fragment */ + long f_used = -1; /* bytes used from the current fragment */ + long n; /* size of the current piece of payload */ + int num_edescs = 0; + int segment; + + for (segment = 0; segment < sh->gso_segs; segment++) { + + unsigned int p_used = 0; + + /* One edesc for header and for each piece of the payload. */ + for (num_edescs++; p_used < p_len; num_edescs++) { + + /* Advance as needed. */ + while (f_used >= f_size) { + f_id++; + f_size = sh->frags[f_id].size; + f_used = 0; + } + + /* Use bytes from the current fragment. */ + n = p_len - p_used; + if (n > f_size - f_used) + n = f_size - f_used; + f_used += n; + p_used += n; + } + + /* The last segment may be less than gso_size. */ + data_len -= p_len; + if (data_len < p_len) + p_len = data_len; + } + + return num_edescs; +} + +/* Prepare modified copies of the skbuff headers. + * FIXME: add support for IPv6. + */ +static void tso_headers_prepare(struct sk_buff *skb, unsigned char *headers, + s64 slot) +{ + struct skb_shared_info *sh = skb_shinfo(skb); + struct iphdr *ih; + struct tcphdr *th; + unsigned int data_len = skb->data_len; + unsigned char *data = skb->data; + unsigned int ih_off, th_off, sh_len, p_len; + unsigned int isum_seed, tsum_seed, id, seq; + long f_id = -1; /* id of the current fragment */ + long f_size = -1; /* size of the current fragment */ + long f_used = -1; /* bytes used from the current fragment */ + long n; /* size of the current piece of payload */ + int segment; + + /* Locate original headers and compute various lengths. */ + ih = ip_hdr(skb); + th = tcp_hdr(skb); + ih_off = skb_network_offset(skb); + th_off = skb_transport_offset(skb); + sh_len = th_off + tcp_hdrlen(skb); + p_len = sh->gso_size; + + /* Set up seed values for IP and TCP csum and initialize id and seq. */ + isum_seed = ((0xFFFF - ih->check) + + (0xFFFF - ih->tot_len) + + (0xFFFF - ih->id)); + tsum_seed = th->check + (0xFFFF ^ htons(skb->len)); + id = ntohs(ih->id); + seq = ntohl(th->seq); + + /* Prepare all the headers. */ + for (segment = 0; segment < sh->gso_segs; segment++) { + unsigned char *buf; + unsigned int p_used = 0; + + /* Copy to the header memory for this segment. */ + buf = headers + (slot % EQUEUE_ENTRIES) * HEADER_BYTES + + NET_IP_ALIGN; + memcpy(buf, data, sh_len); + + /* Update copied ip header. */ + ih = (struct iphdr *)(buf + ih_off); + ih->tot_len = htons(sh_len + p_len - ih_off); + ih->id = htons(id); + ih->check = csum_long(isum_seed + ih->tot_len + + ih->id) ^ 0xffff; + + /* Update copied tcp header. */ + th = (struct tcphdr *)(buf + th_off); + th->seq = htonl(seq); + th->check = csum_long(tsum_seed + htons(sh_len + p_len)); + if (segment != sh->gso_segs - 1) { + th->fin = 0; + th->psh = 0; + } + + /* Skip past the header. */ + slot++; + + /* Skip past the payload. */ + while (p_used < p_len) { + + /* Advance as needed. */ + while (f_used >= f_size) { + f_id++; + f_size = sh->frags[f_id].size; + f_used = 0; + } + + /* Use bytes from the current fragment. */ + n = p_len - p_used; + if (n > f_size - f_used) + n = f_size - f_used; + f_used += n; + p_used += n; + + slot++; + } + + id++; + seq += p_len; + + /* The last segment may be less than gso_size. */ + data_len -= p_len; + if (data_len < p_len) + p_len = data_len; + } + + /* Flush the headers so they are ready for hardware DMA. */ + wmb(); +} + +/* Pass all the data to mpipe for egress. */ +static void tso_egress(struct net_device *dev, gxio_mpipe_equeue_t *equeue, + struct sk_buff *skb, unsigned char *headers, s64 slot) +{ + struct tile_net_priv *priv = netdev_priv(dev); + struct skb_shared_info *sh = skb_shinfo(skb); + unsigned int data_len = skb->data_len; + unsigned int p_len = sh->gso_size; + gxio_mpipe_edesc_t edesc_head = { { 0 } }; + gxio_mpipe_edesc_t edesc_body = { { 0 } }; + long f_id = -1; /* id of the current fragment */ + long f_size = -1; /* size of the current fragment */ + long f_used = -1; /* bytes used from the current fragment */ + long n; /* size of the current piece of payload */ + unsigned long tx_packets = 0, tx_bytes = 0; + unsigned int csum_start, sh_len; + int segment; + + /* Prepare to egress the headers: set up header edesc. */ + csum_start = skb_checksum_start_offset(skb); + sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + edesc_head.csum = 1; + edesc_head.csum_start = csum_start; + edesc_head.csum_dest = csum_start + skb->csum_offset; + edesc_head.xfer_size = sh_len; + + /* This is only used to specify the TLB. */ + edesc_head.stack_idx = large_buffer_stack; + edesc_body.stack_idx = large_buffer_stack; + + /* Egress all the edescs. */ + for (segment = 0; segment < sh->gso_segs; segment++) { + void *va; + unsigned char *buf; + unsigned int p_used = 0; + + /* Egress the header. */ + buf = headers + (slot % EQUEUE_ENTRIES) * HEADER_BYTES + + NET_IP_ALIGN; + edesc_head.va = va_to_tile_io_addr(buf); + gxio_mpipe_equeue_put_at(equeue, edesc_head, slot); + slot++; + + /* Egress the payload. */ + while (p_used < p_len) { + + /* Advance as needed. */ + while (f_used >= f_size) { + f_id++; + f_size = sh->frags[f_id].size; + f_used = 0; + } + + va = tile_net_frag_buf(&sh->frags[f_id]) + f_used; + + /* Use bytes from the current fragment. */ + n = p_len - p_used; + if (n > f_size - f_used) + n = f_size - f_used; + f_used += n; + p_used += n; + + /* Egress a piece of the payload. */ + edesc_body.va = va_to_tile_io_addr(va); + edesc_body.xfer_size = n; + edesc_body.bound = !(p_used < p_len); + gxio_mpipe_equeue_put_at(equeue, edesc_body, slot); + slot++; + } + + tx_packets++; + tx_bytes += sh_len + p_len; + + /* The last segment may be less than gso_size. */ + data_len -= p_len; + if (data_len < p_len) + p_len = data_len; + } + + /* Update stats. */ + tile_net_stats_add(tx_packets, &priv->stats.tx_packets); + tile_net_stats_add(tx_bytes, &priv->stats.tx_bytes); +} + +/* Do "TSO" handling for egress. + * + * Normally drivers set NETIF_F_TSO only to support hardware TSO; + * otherwise the stack uses scatter-gather to implement GSO in software. + * On our testing, enabling GSO support (via NETIF_F_SG) drops network + * performance down to around 7.5 Gbps on the 10G interfaces, although + * also dropping cpu utilization way down, to under 8%. But + * implementing "TSO" in the driver brings performance back up to line + * rate, while dropping cpu usage even further, to less than 4%. In + * practice, profiling of GSO shows that skb_segment() is what causes + * the performance overheads; we benefit in the driver from using + * preallocated memory to duplicate the TCP/IP headers. + */ +static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_priv *priv = netdev_priv(dev); + int channel = priv->echannel; + struct tile_net_egress *egress = &egress_for_echannel[channel]; + struct tile_net_comps *comps = info->comps_for_echannel[channel]; + gxio_mpipe_equeue_t *equeue = egress->equeue; + unsigned long irqflags; + int num_edescs; + s64 slot; + + /* Determine how many mpipe edesc's are needed. */ + num_edescs = tso_count_edescs(skb); + + local_irq_save(irqflags); + + /* Try to acquire a completion entry and an egress slot. */ + slot = tile_net_equeue_try_reserve(dev, comps, equeue, num_edescs); + if (slot < 0) { + local_irq_restore(irqflags); + return NETDEV_TX_BUSY; + } + + /* Set up copies of header data properly. */ + tso_headers_prepare(skb, egress->headers, slot); + + /* Actually pass the data to the network hardware. */ + tso_egress(dev, equeue, skb, egress->headers, slot); + + /* Add a completion record. */ + add_comp(equeue, comps, slot + num_edescs - 1, skb); + + local_irq_restore(irqflags); + + /* Make sure the egress timer is scheduled. */ + tile_net_schedule_egress_timer(); + + return NETDEV_TX_OK; +} + +/* Analyze the body and frags for a transmit request. */ +static unsigned int tile_net_tx_frags(struct frag *frags, + struct sk_buff *skb, + void *b_data, unsigned int b_len) +{ + unsigned int i, n = 0; + + struct skb_shared_info *sh = skb_shinfo(skb); + + if (b_len != 0) { + frags[n].buf = b_data; + frags[n++].length = b_len; + } + + for (i = 0; i < sh->nr_frags; i++) { + skb_frag_t *f = &sh->frags[i]; + frags[n].buf = tile_net_frag_buf(f); + frags[n++].length = skb_frag_size(f); + } + + return n; +} + +/* Help the kernel transmit a packet. */ +static int tile_net_tx(struct sk_buff *skb, struct net_device *dev) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_priv *priv = netdev_priv(dev); + struct tile_net_egress *egress = &egress_for_echannel[priv->echannel]; + gxio_mpipe_equeue_t *equeue = egress->equeue; + struct tile_net_comps *comps = + info->comps_for_echannel[priv->echannel]; + unsigned int len = skb->len; + unsigned char *data = skb->data; + unsigned int num_edescs; + struct frag frags[MAX_FRAGS]; + gxio_mpipe_edesc_t edescs[MAX_FRAGS]; + unsigned long irqflags; + gxio_mpipe_edesc_t edesc = { { 0 } }; + unsigned int i; + s64 slot; + + if (skb_is_gso(skb)) + return tile_net_tx_tso(skb, dev); + + num_edescs = tile_net_tx_frags(frags, skb, data, skb_headlen(skb)); + + /* This is only used to specify the TLB. */ + edesc.stack_idx = large_buffer_stack; + + /* Prepare the edescs. */ + for (i = 0; i < num_edescs; i++) { + edesc.xfer_size = frags[i].length; + edesc.va = va_to_tile_io_addr(frags[i].buf); + edescs[i] = edesc; + } + + /* Mark the final edesc. */ + edescs[num_edescs - 1].bound = 1; + + /* Add checksum info to the initial edesc, if needed. */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + unsigned int csum_start = skb_checksum_start_offset(skb); + edescs[0].csum = 1; + edescs[0].csum_start = csum_start; + edescs[0].csum_dest = csum_start + skb->csum_offset; + } + + local_irq_save(irqflags); + + /* Try to acquire a completion entry and an egress slot. */ + slot = tile_net_equeue_try_reserve(dev, comps, equeue, num_edescs); + if (slot < 0) { + local_irq_restore(irqflags); + return NETDEV_TX_BUSY; + } + + for (i = 0; i < num_edescs; i++) + gxio_mpipe_equeue_put_at(equeue, edescs[i], slot++); + + /* Add a completion record. */ + add_comp(equeue, comps, slot - 1, skb); + + /* NOTE: Use ETH_ZLEN for short packets (e.g. 42 < 60). */ + tile_net_stats_add(1, &priv->stats.tx_packets); + tile_net_stats_add(max_t(unsigned int, len, ETH_ZLEN), + &priv->stats.tx_bytes); + + local_irq_restore(irqflags); + + /* Make sure the egress timer is scheduled. */ + tile_net_schedule_egress_timer(); + + return NETDEV_TX_OK; +} + +/* Return subqueue id on this core (one per core). */ +static u16 tile_net_select_queue(struct net_device *dev, struct sk_buff *skb) +{ + return smp_processor_id(); +} + +/* Deal with a transmit timeout. */ +static void tile_net_tx_timeout(struct net_device *dev) +{ + int cpu; + + for_each_online_cpu(cpu) + netif_wake_subqueue(dev, cpu); +} + +/* Ioctl commands. */ +static int tile_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + return -EOPNOTSUPP; +} + +/* Get system network statistics for device. */ +static struct net_device_stats *tile_net_get_stats(struct net_device *dev) +{ + struct tile_net_priv *priv = netdev_priv(dev); + return &priv->stats; +} + +/* Change the MTU. */ +static int tile_net_change_mtu(struct net_device *dev, int new_mtu) +{ + if ((new_mtu < 68) || (new_mtu > 1500)) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +/* Change the Ethernet address of the NIC. + * + * The hypervisor driver does not support changing MAC address. However, + * the hardware does not do anything with the MAC address, so the address + * which gets used on outgoing packets, and which is accepted on incoming + * packets, is completely up to us. + * + * Returns 0 on success, negative on failure. + */ +static int tile_net_set_mac_address(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EINVAL; + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + return 0; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +/* Polling 'interrupt' - used by things like netconsole to send skbs + * without having to re-enable interrupts. It's not called while + * the interrupt routine is executing. + */ +static void tile_net_netpoll(struct net_device *dev) +{ + disable_percpu_irq(ingress_irq); + tile_net_handle_ingress_irq(ingress_irq, NULL); + enable_percpu_irq(ingress_irq, 0); +} +#endif + +static const struct net_device_ops tile_net_ops = { + .ndo_open = tile_net_open, + .ndo_stop = tile_net_stop, + .ndo_start_xmit = tile_net_tx, + .ndo_select_queue = tile_net_select_queue, + .ndo_do_ioctl = tile_net_ioctl, + .ndo_get_stats = tile_net_get_stats, + .ndo_change_mtu = tile_net_change_mtu, + .ndo_tx_timeout = tile_net_tx_timeout, + .ndo_set_mac_address = tile_net_set_mac_address, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = tile_net_netpoll, +#endif +}; + +/* The setup function. + * + * This uses ether_setup() to assign various fields in dev, including + * setting IFF_BROADCAST and IFF_MULTICAST, then sets some extra fields. + */ +static void tile_net_setup(struct net_device *dev) +{ + ether_setup(dev); + dev->netdev_ops = &tile_net_ops; + dev->watchdog_timeo = TILE_NET_TIMEOUT; + dev->features |= NETIF_F_LLTX; + dev->features |= NETIF_F_HW_CSUM; + dev->features |= NETIF_F_SG; + dev->features |= NETIF_F_TSO; + dev->mtu = 1500; +} + +/* Allocate the device structure, register the device, and obtain the + * MAC address from the hypervisor. + */ +static void tile_net_dev_init(const char *name, const uint8_t *mac) +{ + int ret; + int i; + int nz_addr = 0; + struct net_device *dev; + struct tile_net_priv *priv; + + /* HACK: Ignore "loop" links. */ + if (strncmp(name, "loop", 4) == 0) + return; + + /* Allocate the device structure. Normally, "name" is a + * template, instantiated by register_netdev(), but not for us. + */ + dev = alloc_netdev_mqs(sizeof(*priv), name, tile_net_setup, + NR_CPUS, 1); + if (!dev) { + pr_err("alloc_netdev_mqs(%s) failed\n", name); + return; + } + + /* Initialize "priv". */ + priv = netdev_priv(dev); + memset(priv, 0, sizeof(*priv)); + priv->dev = dev; + priv->channel = -1; + priv->loopify_channel = -1; + priv->echannel = -1; + + /* Get the MAC address and set it in the device struct; this must + * be done before the device is opened. If the MAC is all zeroes, + * we use a random address, since we're probably on the simulator. + */ + for (i = 0; i < 6; i++) + nz_addr |= mac[i]; + + if (nz_addr) { + memcpy(dev->dev_addr, mac, 6); + dev->addr_len = 6; + } else { + random_ether_addr(dev->dev_addr); + } + + /* Register the network device. */ + ret = register_netdev(dev); + if (ret) { + netdev_err(dev, "register_netdev failed %d\n", ret); + free_netdev(dev); + return; + } +} + +/* Per-cpu module initialization. */ +static void tile_net_init_module_percpu(void *unused) +{ + struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + int my_cpu = smp_processor_id(); + + info->has_iqueue = false; + + info->my_cpu = my_cpu; + + /* Initialize the egress timer. */ + hrtimer_init(&info->egress_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + info->egress_timer.function = tile_net_handle_egress_timer; +} + +/* Module initialization. */ +static int __init tile_net_init_module(void) +{ + int i; + char name[GXIO_MPIPE_LINK_NAME_LEN]; + uint8_t mac[6]; + + pr_info("Tilera Network Driver\n"); + + mutex_init(&tile_net_devs_for_channel_mutex); + + /* Initialize each CPU. */ + on_each_cpu(tile_net_init_module_percpu, NULL, 1); + + /* Find out what devices we have, and initialize them. */ + for (i = 0; gxio_mpipe_link_enumerate_mac(i, name, mac) >= 0; i++) + tile_net_dev_init(name, mac); + + if (!network_cpus_init()) + network_cpus_map = *cpu_online_mask; + + return 0; +} + +module_init(tile_net_init_module); diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index d4a4992b4935..31470b0d0c32 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -99,7 +99,7 @@ static int of_mdio_bus_match(struct device *dev, void *mdio_bus_np) } /** * of_mdio_find_bus - Given an mii_bus node, find the mii_bus. - * @mdio_np: Pointer to the mii_bus. + * @mdio_bus_np: Pointer to the mii_bus. * * Returns a pointer to the mii_bus, or NULL if none found. * diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 5214b1eceb95..f18149ae2588 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -42,7 +42,8 @@ module_param(gso, bool, 0444); #define VIRTNET_DRIVER_VERSION "1.0.0" struct virtnet_stats { - struct u64_stats_sync syncp; + struct u64_stats_sync tx_syncp; + struct u64_stats_sync rx_syncp; u64 tx_bytes; u64 tx_packets; @@ -300,10 +301,10 @@ static void receive_buf(struct net_device *dev, void *buf, unsigned int len) hdr = skb_vnet_hdr(skb); - u64_stats_update_begin(&stats->syncp); + u64_stats_update_begin(&stats->rx_syncp); stats->rx_bytes += skb->len; stats->rx_packets++; - u64_stats_update_end(&stats->syncp); + u64_stats_update_end(&stats->rx_syncp); if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { pr_debug("Needs csum!\n"); @@ -565,10 +566,10 @@ static unsigned int free_old_xmit_skbs(struct virtnet_info *vi) while ((skb = virtqueue_get_buf(vi->svq, &len)) != NULL) { pr_debug("Sent skb %p\n", skb); - u64_stats_update_begin(&stats->syncp); + u64_stats_update_begin(&stats->tx_syncp); stats->tx_bytes += skb->len; stats->tx_packets++; - u64_stats_update_end(&stats->syncp); + u64_stats_update_end(&stats->tx_syncp); tot_sgs += skb_vnet_hdr(skb)->num_sg; dev_kfree_skb_any(skb); @@ -703,12 +704,16 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev, u64 tpackets, tbytes, rpackets, rbytes; do { - start = u64_stats_fetch_begin(&stats->syncp); + start = u64_stats_fetch_begin(&stats->tx_syncp); tpackets = stats->tx_packets; tbytes = stats->tx_bytes; + } while (u64_stats_fetch_retry(&stats->tx_syncp, start)); + + do { + start = u64_stats_fetch_begin(&stats->rx_syncp); rpackets = stats->rx_packets; rbytes = stats->rx_bytes; - } while (u64_stats_fetch_retry(&stats->syncp, start)); + } while (u64_stats_fetch_retry(&stats->rx_syncp, start)); tot->rx_packets += rpackets; tot->tx_packets += tpackets; diff --git a/drivers/net/wireless/b43/b43.h b/drivers/net/wireless/b43/b43.h index 67c13af6f206..c06b6cb5c91e 100644 --- a/drivers/net/wireless/b43/b43.h +++ b/drivers/net/wireless/b43/b43.h @@ -877,6 +877,10 @@ struct b43_wl { * from the mac80211 subsystem. */ u16 mac80211_initially_registered_queues; + /* Set this if we call ieee80211_register_hw() and check if we call + * ieee80211_unregister_hw(). */ + bool hw_registred; + /* We can only have one operating interface (802.11 core) * at a time. General information about this interface follows. */ diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 5a39b226b2e3..acd03a4f9730 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -2437,6 +2437,7 @@ start_ieee80211: err = ieee80211_register_hw(wl->hw); if (err) goto err_one_core_detach; + wl->hw_registred = true; b43_leds_register(wl->current_dev); goto out; @@ -5299,6 +5300,7 @@ static struct b43_wl *b43_wireless_init(struct b43_bus_dev *dev) hw->queues = modparam_qos ? B43_QOS_QUEUE_NUM : 1; wl->mac80211_initially_registered_queues = hw->queues; + wl->hw_registred = false; hw->max_rates = 2; SET_IEEE80211_DEV(hw, dev->dev); if (is_valid_ether_addr(sprom->et1mac)) @@ -5370,12 +5372,15 @@ static void b43_bcma_remove(struct bcma_device *core) * as the ieee80211 unreg will destroy the workqueue. */ cancel_work_sync(&wldev->restart_work); - /* Restore the queues count before unregistering, because firmware detect - * might have modified it. Restoring is important, so the networking - * stack can properly free resources. */ - wl->hw->queues = wl->mac80211_initially_registered_queues; - b43_leds_stop(wldev); - ieee80211_unregister_hw(wl->hw); + B43_WARN_ON(!wl); + if (wl->current_dev == wldev && wl->hw_registred) { + /* Restore the queues count before unregistering, because firmware detect + * might have modified it. Restoring is important, so the networking + * stack can properly free resources. */ + wl->hw->queues = wl->mac80211_initially_registered_queues; + b43_leds_stop(wldev); + ieee80211_unregister_hw(wl->hw); + } b43_one_core_detach(wldev->dev); @@ -5446,7 +5451,7 @@ static void b43_ssb_remove(struct ssb_device *sdev) cancel_work_sync(&wldev->restart_work); B43_WARN_ON(!wl); - if (wl->current_dev == wldev) { + if (wl->current_dev == wldev && wl->hw_registred) { /* Restore the queues count before unregistering, because firmware detect * might have modified it. Restoring is important, so the networking * stack can properly free resources. */ diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c index 9cfae0c08707..95aa8e1683ec 100644 --- a/drivers/net/wireless/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/ipw2x00/ipw2100.c @@ -1903,14 +1903,6 @@ static void ipw2100_down(struct ipw2100_priv *priv) netif_stop_queue(priv->net_dev); } -/* Called by register_netdev() */ -static int ipw2100_net_init(struct net_device *dev) -{ - struct ipw2100_priv *priv = libipw_priv(dev); - - return ipw2100_up(priv, 1); -} - static int ipw2100_wdev_init(struct net_device *dev) { struct ipw2100_priv *priv = libipw_priv(dev); @@ -6087,7 +6079,6 @@ static const struct net_device_ops ipw2100_netdev_ops = { .ndo_stop = ipw2100_close, .ndo_start_xmit = libipw_xmit, .ndo_change_mtu = libipw_change_mtu, - .ndo_init = ipw2100_net_init, .ndo_tx_timeout = ipw2100_tx_timeout, .ndo_set_mac_address = ipw2100_set_address, .ndo_validate_addr = eth_validate_addr, @@ -6329,6 +6320,10 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev, printk(KERN_INFO DRV_NAME ": Detected Intel PRO/Wireless 2100 Network Connection\n"); + err = ipw2100_up(priv, 1); + if (err) + goto fail; + err = ipw2100_wdev_init(dev); if (err) goto fail; @@ -6338,12 +6333,7 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev, * network device we would call ipw2100_up. This introduced a race * condition with newer hotplug configurations (network was coming * up and making calls before the device was initialized). - * - * If we called ipw2100_up before we registered the device, then the - * device name wasn't registered. So, we instead use the net_dev->init - * member to call a function that then just turns and calls ipw2100_up. - * net_dev->init is called after name allocation but before the - * notifier chain is called */ + */ err = register_netdev(dev); if (err) { printk(KERN_WARNING DRV_NAME diff --git a/drivers/net/wireless/iwlwifi/dvm/sta.c b/drivers/net/wireless/iwlwifi/dvm/sta.c index 286ce4e18068..b29b798f7550 100644 --- a/drivers/net/wireless/iwlwifi/dvm/sta.c +++ b/drivers/net/wireless/iwlwifi/dvm/sta.c @@ -1251,7 +1251,7 @@ int iwl_remove_dynamic_key(struct iwl_priv *priv, key_flags |= STA_KEY_MULTICAST_MSK; sta_cmd.key.key_flags = key_flags; - sta_cmd.key.key_offset = WEP_INVALID_OFFSET; + sta_cmd.key.key_offset = keyconf->hw_key_idx; sta_cmd.sta.modify_mask = STA_MODIFY_KEY_MASK; sta_cmd.mode = STA_CONTROL_MODIFY_MSK; diff --git a/drivers/net/wireless/iwlwifi/iwl-prph.h b/drivers/net/wireless/iwlwifi/iwl-prph.h index a9f0415916c7..9253ef1dba72 100644 --- a/drivers/net/wireless/iwlwifi/iwl-prph.h +++ b/drivers/net/wireless/iwlwifi/iwl-prph.h @@ -224,6 +224,7 @@ #define SCD_TXFACT (SCD_BASE + 0x10) #define SCD_ACTIVE (SCD_BASE + 0x14) #define SCD_QUEUECHAIN_SEL (SCD_BASE + 0xe8) +#define SCD_CHAINEXT_EN (SCD_BASE + 0x244) #define SCD_AGGR_SEL (SCD_BASE + 0x248) #define SCD_INTERRUPT_MASK (SCD_BASE + 0x108) diff --git a/drivers/net/wireless/iwlwifi/pcie/6000.c b/drivers/net/wireless/iwlwifi/pcie/6000.c index 8dd8a6fe61e8..cb08ba03aae7 100644 --- a/drivers/net/wireless/iwlwifi/pcie/6000.c +++ b/drivers/net/wireless/iwlwifi/pcie/6000.c @@ -35,17 +35,20 @@ #define IWL6000_UCODE_API_MAX 6 #define IWL6050_UCODE_API_MAX 5 #define IWL6000G2_UCODE_API_MAX 6 +#define IWL6035_UCODE_API_MAX 6 /* Oldest version we won't warn about */ #define IWL6000_UCODE_API_OK 4 #define IWL6000G2_UCODE_API_OK 5 #define IWL6050_UCODE_API_OK 5 #define IWL6000G2B_UCODE_API_OK 6 +#define IWL6035_UCODE_API_OK 6 /* Lowest firmware API version supported */ #define IWL6000_UCODE_API_MIN 4 #define IWL6050_UCODE_API_MIN 4 -#define IWL6000G2_UCODE_API_MIN 4 +#define IWL6000G2_UCODE_API_MIN 5 +#define IWL6035_UCODE_API_MIN 6 /* EEPROM versions */ #define EEPROM_6000_TX_POWER_VERSION (4) @@ -243,9 +246,25 @@ const struct iwl_cfg iwl6030_2bg_cfg = { IWL_DEVICE_6030, }; +#define IWL_DEVICE_6035 \ + .fw_name_pre = IWL6030_FW_PRE, \ + .ucode_api_max = IWL6035_UCODE_API_MAX, \ + .ucode_api_ok = IWL6035_UCODE_API_OK, \ + .ucode_api_min = IWL6035_UCODE_API_MIN, \ + .device_family = IWL_DEVICE_FAMILY_6030, \ + .max_inst_size = IWL60_RTC_INST_SIZE, \ + .max_data_size = IWL60_RTC_DATA_SIZE, \ + .eeprom_ver = EEPROM_6030_EEPROM_VERSION, \ + .eeprom_calib_ver = EEPROM_6030_TX_POWER_VERSION, \ + .base_params = &iwl6000_g2_base_params, \ + .bt_params = &iwl6000_bt_params, \ + .need_temp_offset_calib = true, \ + .led_mode = IWL_LED_RF_STATE, \ + .adv_pm = true + const struct iwl_cfg iwl6035_2agn_cfg = { .name = "Intel(R) Centrino(R) Advanced-N 6235 AGN", - IWL_DEVICE_6030, + IWL_DEVICE_6035, .ht_params = &iwl6000_ht_params, }; diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 969f78f421df..7461a6a14338 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -1065,6 +1065,12 @@ static void iwl_tx_start(struct iwl_trans *trans) /* Activate all Tx DMA/FIFO channels */ iwl_trans_txq_set_sched(trans, IWL_MASK(0, 7)); + /* The chain extension of the SCD doesn't work well. This feature is + * enabled by default by the HW, so we need to disable it manually. + */ + iwl_write_prph(trans, SCD_CHAINEXT_EN, 0); + + /* Enable DMA channel */ for (chan = 0; chan < FH_TCSR_CHNL_NUM ; chan++) iwl_write_direct32(trans, FH_TCSR_CHNL_TX_CONFIG_REG(chan), diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 4c9336cee817..a0b7cfd34685 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1555,6 +1555,7 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, hdr = (struct ieee80211_hdr *) skb->data; mac80211_hwsim_monitor_ack(data2->hw, hdr->addr2); } + txi->flags |= IEEE80211_TX_STAT_ACK; } ieee80211_tx_status_irqsafe(data2->hw, skb); return 0; diff --git a/drivers/net/wireless/rtl818x/rtl8187/leds.c b/drivers/net/wireless/rtl818x/rtl8187/leds.c index 2e0de2f5f0f9..c2d5b495c179 100644 --- a/drivers/net/wireless/rtl818x/rtl8187/leds.c +++ b/drivers/net/wireless/rtl818x/rtl8187/leds.c @@ -117,7 +117,7 @@ static void rtl8187_led_brightness_set(struct led_classdev *led_dev, radio_on = true; } else if (radio_on) { radio_on = false; - cancel_delayed_work_sync(&priv->led_on); + cancel_delayed_work(&priv->led_on); ieee80211_queue_delayed_work(hw, &priv->led_off, 0); } } else if (radio_on) { diff --git a/include/linux/netfilter/xt_HMARK.h b/include/linux/netfilter/xt_HMARK.h index abb1650940d2..826fc5807577 100644 --- a/include/linux/netfilter/xt_HMARK.h +++ b/include/linux/netfilter/xt_HMARK.h @@ -27,7 +27,12 @@ union hmark_ports { __u16 src; __u16 dst; } p16; + struct { + __be16 src; + __be16 dst; + } b16; __u32 v32; + __be32 b32; }; struct xt_hmark_info { diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 23e8234f75a5..7d3bcedc062a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -69,16 +69,16 @@ union tcp_word_hdr { #define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) enum { - TCP_FLAG_CWR = __cpu_to_be32(0x00800000), - TCP_FLAG_ECE = __cpu_to_be32(0x00400000), - TCP_FLAG_URG = __cpu_to_be32(0x00200000), - TCP_FLAG_ACK = __cpu_to_be32(0x00100000), - TCP_FLAG_PSH = __cpu_to_be32(0x00080000), - TCP_FLAG_RST = __cpu_to_be32(0x00040000), - TCP_FLAG_SYN = __cpu_to_be32(0x00020000), - TCP_FLAG_FIN = __cpu_to_be32(0x00010000), - TCP_RESERVED_BITS = __cpu_to_be32(0x0F000000), - TCP_DATA_OFFSET = __cpu_to_be32(0xF0000000) + TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000), + TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000), + TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000), + TCP_FLAG_ACK = __constant_cpu_to_be32(0x00100000), + TCP_FLAG_PSH = __constant_cpu_to_be32(0x00080000), + TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000), + TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000), + TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000), + TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000), + TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000) }; /* diff --git a/include/net/route.h b/include/net/route.h index 2bfbc9329ea9..a36ae429ed5d 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -164,9 +164,9 @@ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr, { struct flowi4 fl4 = { .flowi4_oif = oif, + .flowi4_tos = tos, .daddr = daddr, .saddr = saddr, - .flowi4_tos = tos, }; return ip_route_output_key(net, &fl4); } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 55ce96b53b09..9d7d54a00e63 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -220,13 +220,16 @@ struct tcf_proto { struct qdisc_skb_cb { unsigned int pkt_len; - unsigned char data[24]; + u16 bond_queue_mapping; + u16 _pad; + unsigned char data[20]; }; static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) { struct qdisc_skb_cb *qcb; - BUILD_BUG_ON(sizeof(skb->cb) < sizeof(unsigned int) + sz); + + BUILD_BUG_ON(sizeof(skb->cb) < offsetof(struct qdisc_skb_cb, data) + sz); BUILD_BUG_ON(sizeof(qcb->data) < sz); } diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 0301b328cf0f..86852963b7f7 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1208,9 +1208,7 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr, if (addr->sat_addr.s_node == ATADDR_BCAST && !sock_flag(sk, SOCK_BROADCAST)) { #if 1 - printk(KERN_WARNING "%s is broken and did not set " - "SO_BROADCAST. It will break when 2.2 is " - "released.\n", + pr_warn("atalk_connect: %s is broken and did not set SO_BROADCAST.\n", current->comm); #else return -EACCES; diff --git a/net/core/filter.c b/net/core/filter.c index a3eddb515d1b..d4ce2dc712e3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -616,9 +616,9 @@ static int __sk_prepare_filter(struct sk_filter *fp) /** * sk_unattached_filter_create - create an unattached filter * @fprog: the filter program - * @sk: the socket to use + * @pfp: the unattached filter that is created * - * Create a filter independent ofr any socket. We first run some + * Create a filter independent of any socket. We first run some * sanity checks on it to make sure it does not explode on us later. * If an error occurs or there is insufficient memory for the filter * a negative errno code is returned. On success the return is zero. diff --git a/net/core/neighbour.c b/net/core/neighbour.c index eb09f8bbbf07..d81d026138f0 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2219,9 +2219,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); - for (h = 0; h < (1 << nht->hash_shift); h++) { - if (h < s_h) - continue; + for (h = s_h; h < (1 << nht->hash_shift); h++) { if (h > s_h) s_idx = 0; for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; @@ -2260,9 +2258,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, read_lock_bh(&tbl->lock); - for (h = 0; h <= PNEIGH_HASHMASK; h++) { - if (h < s_h) - continue; + for (h = s_h; h <= PNEIGH_HASHMASK; h++) { if (h > s_h) s_idx = 0; for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { @@ -2297,7 +2293,7 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) struct neigh_table *tbl; int t, family, s_t; int proxy = 0; - int err = 0; + int err; read_lock(&neigh_tbl_lock); family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; @@ -2311,7 +2307,7 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; - for (tbl = neigh_tables, t = 0; tbl && (err >= 0); + for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) { if (t < s_t || (family && tbl->family != family)) continue; @@ -2322,6 +2318,8 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) err = pneigh_dump_table(tbl, skb, cb); else err = neigh_dump_table(tbl, skb, cb); + if (err < 0) + break; } read_unlock(&neigh_tbl_lock); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1d74cea22aaa..5b21522ed0e1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3361,7 +3361,7 @@ EXPORT_SYMBOL(kfree_skb_partial); * @to: prior buffer * @from: buffer to add * @fragstolen: pointer to boolean - * + * @delta_truesize: how much more was allocated than was requested */ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, bool *fragstolen, int *delta_truesize) diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index e5c44fc586ab..ab09b126423c 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -44,6 +44,7 @@ static int ip_forward_finish(struct sk_buff *skb) struct ip_options *opt = &(IPCB(skb)->opt); IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len); if (unlikely(opt->optlen)) ip_forward_options(skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a9e519ad6db5..c94bbc6f2ba3 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1574,6 +1574,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb) struct ip_options *opt = &(IPCB(skb)->opt); IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len); if (unlikely(opt->optlen)) ip_forward_options(skb); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 7ef0743f06f0..215afc74d8aa 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1562,7 +1562,7 @@ static int fib6_age(struct rt6_info *rt, void *arg) neigh_flags = neigh->flags; neigh_release(neigh); } - if (neigh_flags & NTF_ROUTER) { + if (!(neigh_flags & NTF_ROUTER)) { RT6_TRACE("purging route %p via non-router but gateway\n", rt); return -1; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ee1bb450bfe4..a233a7ccbc3a 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -513,6 +513,7 @@ int ip6_forward(struct sk_buff *skb) hdr->hop_limit--; IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index b15dc08643a4..461e47c8e956 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1886,6 +1886,8 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb) { IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTOCTETS, skb->len); return dst_output(skb); } diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 443591d629ca..185f12f4a5fa 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -162,6 +162,7 @@ static void l2tp_eth_delete(struct l2tp_session *session) if (dev) { unregister_netdev(dev); spriv->dev = NULL; + module_put(THIS_MODULE); } } } @@ -249,6 +250,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p if (rc < 0) goto out_del_dev; + __module_get(THIS_MODULE); /* Must be done after register_netdev() */ strlcpy(session->ifname, dev->name, IFNAMSIZ); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 70614e7affab..61d8b75d2686 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -464,10 +464,12 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m sk->sk_bound_dev_if); if (IS_ERR(rt)) goto no_route; - if (connected) + if (connected) { sk_setup_caps(sk, &rt->dst); - else - dst_release(&rt->dst); /* safe since we hold rcu_read_lock */ + } else { + skb_dst_set(skb, &rt->dst); + goto xmit; + } } /* We dont need to clone dst here, it is guaranteed to not disappear. @@ -475,6 +477,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m */ skb_dst_set_noref(skb, &rt->dst); +xmit: /* Queue the packet to IP for output */ rc = ip_queue_xmit(skb, &inet->cork.fl); rcu_read_unlock(); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0f45d02e0ba7..d7134c170336 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3100,7 +3100,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, } local->oper_channel = cbss->channel; - ieee80211_hw_config(local, 0); + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); if (!have_sta) { u32 rates = 0, basic_rates = 0; diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 46d69d7f1bb4..31f50bc3a312 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -270,9 +270,8 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, return 0; /* RTP port is even */ - port &= htons(~1); - rtp_port = port; - rtcp_port = htons(ntohs(port) + 1); + rtp_port = port & ~htons(1); + rtcp_port = port | htons(1); /* Create expect for RTP */ if ((rtp_exp = nf_ct_expect_alloc(ct)) == NULL) diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c index 0a96a43108ed..1686ca1b53a1 100644 --- a/net/netfilter/xt_HMARK.c +++ b/net/netfilter/xt_HMARK.c @@ -32,13 +32,13 @@ MODULE_ALIAS("ipt_HMARK"); MODULE_ALIAS("ip6t_HMARK"); struct hmark_tuple { - u32 src; - u32 dst; + __be32 src; + __be32 dst; union hmark_ports uports; - uint8_t proto; + u8 proto; }; -static inline u32 hmark_addr6_mask(const __u32 *addr32, const __u32 *mask) +static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask) { return (addr32[0] & mask[0]) ^ (addr32[1] & mask[1]) ^ @@ -46,8 +46,8 @@ static inline u32 hmark_addr6_mask(const __u32 *addr32, const __u32 *mask) (addr32[3] & mask[3]); } -static inline u32 -hmark_addr_mask(int l3num, const __u32 *addr32, const __u32 *mask) +static inline __be32 +hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask) { switch (l3num) { case AF_INET: @@ -58,6 +58,22 @@ hmark_addr_mask(int l3num, const __u32 *addr32, const __u32 *mask) return 0; } +static inline void hmark_swap_ports(union hmark_ports *uports, + const struct xt_hmark_info *info) +{ + union hmark_ports hp; + u16 src, dst; + + hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32; + src = ntohs(hp.b16.src); + dst = ntohs(hp.b16.dst); + + if (dst > src) + uports->v32 = (dst << 16) | src; + else + uports->v32 = (src << 16) | dst; +} + static int hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, const struct xt_hmark_info *info) @@ -74,22 +90,19 @@ hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.all, - info->src_mask.all); - t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.all, - info->dst_mask.all); + t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6, + info->src_mask.ip6); + t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6, + info->dst_mask.ip6); if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) return 0; t->proto = nf_ct_protonum(ct); if (t->proto != IPPROTO_ICMP) { - t->uports.p16.src = otuple->src.u.all; - t->uports.p16.dst = rtuple->src.u.all; - t->uports.v32 = (t->uports.v32 & info->port_mask.v32) | - info->port_set.v32; - if (t->uports.p16.dst < t->uports.p16.src) - swap(t->uports.p16.dst, t->uports.p16.src); + t->uports.b16.src = otuple->src.u.all; + t->uports.b16.dst = rtuple->src.u.all; + hmark_swap_ports(&t->uports, info); } return 0; @@ -98,15 +111,19 @@ hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, #endif } +/* This hash function is endian independent, to ensure consistent hashing if + * the cluster is composed of big and little endian systems. */ static inline u32 hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info) { u32 hash; + u32 src = ntohl(t->src); + u32 dst = ntohl(t->dst); - if (t->dst < t->src) - swap(t->src, t->dst); + if (dst < src) + swap(src, dst); - hash = jhash_3words(t->src, t->dst, t->uports.v32, info->hashrnd); + hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd); hash = hash ^ (t->proto & info->proto_mask); return (((u64)hash * info->hmodulus) >> 32) + info->hoffset; @@ -126,11 +143,7 @@ hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff, if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0) return; - t->uports.v32 = (t->uports.v32 & info->port_mask.v32) | - info->port_set.v32; - - if (t->uports.p16.dst < t->uports.p16.src) - swap(t->uports.p16.dst, t->uports.p16.src); + hmark_swap_ports(&t->uports, info); } #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) @@ -178,8 +191,8 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t, return -1; } noicmp: - t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.all); - t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.all); + t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6); + t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6); if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) return 0; @@ -255,11 +268,8 @@ hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t, } } - t->src = (__force u32) ip->saddr; - t->dst = (__force u32) ip->daddr; - - t->src &= info->src_mask.ip; - t->dst &= info->dst_mask.ip; + t->src = ip->saddr & info->src_mask.ip; + t->dst = ip->daddr & info->dst_mask.ip; if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) return 0; diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index 30e3cc71be7a..2c0b317344b7 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -292,6 +292,9 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *addr, pr_debug("%p\n", sk); + if (llcp_sock == NULL) + return -EBADFD; + addr->sa_family = AF_NFC; *len = sizeof(struct sockaddr_nfc_llcp); |