diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/isdn/hardware/eicon/divasmain.c | 2 | ||||
-rw-r--r-- | drivers/isdn/sc/init.c | 2 | ||||
-rw-r--r-- | drivers/net/bonding/bond_3ad.c | 10 | ||||
-rw-r--r-- | drivers/net/bonding/bond_alb.c | 58 | ||||
-rw-r--r-- | drivers/net/bonding/bond_sysfs.c | 30 | ||||
-rw-r--r-- | drivers/net/bonding/bonding.h | 14 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/fw.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/main.c | 29 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/mcg.c | 6 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/srq.c | 1 | ||||
-rw-r--r-- | drivers/net/xen-netback/common.h | 12 | ||||
-rw-r--r-- | drivers/net/xen-netback/interface.c | 16 | ||||
-rw-r--r-- | drivers/net/xen-netback/netback.c | 294 | ||||
-rw-r--r-- | drivers/net/xen-netback/xenbus.c | 52 |
16 files changed, 426 insertions, 108 deletions
diff --git a/drivers/isdn/hardware/eicon/divasmain.c b/drivers/isdn/hardware/eicon/divasmain.c index 52377b4bf039..a2e0ed6c9a4d 100644 --- a/drivers/isdn/hardware/eicon/divasmain.c +++ b/drivers/isdn/hardware/eicon/divasmain.c @@ -481,7 +481,7 @@ void __inline__ outpp(void __iomem *addr, word p) int diva_os_register_irq(void *context, byte irq, const char *name) { int result = request_irq(irq, diva_os_irq_wrapper, - IRQF_DISABLED | IRQF_SHARED, name, context); + IRQF_SHARED, name, context); return (result); } diff --git a/drivers/isdn/sc/init.c b/drivers/isdn/sc/init.c index ca997bd4e818..92acc81f844d 100644 --- a/drivers/isdn/sc/init.c +++ b/drivers/isdn/sc/init.c @@ -336,7 +336,7 @@ static int __init sc_init(void) */ sc_adapter[cinst]->interrupt = irq[b]; if (request_irq(sc_adapter[cinst]->interrupt, interrupt_handler, - IRQF_DISABLED, interface->id, + 0, interface->id, (void *)(unsigned long) cinst)) { kfree(sc_adapter[cinst]->channel); diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index ea3e64e22e22..187b1b7772ef 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2344,7 +2344,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond, struct slave *slave; struct port *port; - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave).port); if (port->aggregator && port->aggregator->is_active) { aggregator = port->aggregator; @@ -2369,9 +2369,9 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) { int ret; - read_lock(&bond->lock); + rcu_read_lock(); ret = __bond_3ad_get_active_agg_info(bond, ad_info); - read_unlock(&bond->lock); + rcu_read_unlock(); return ret; } @@ -2388,7 +2388,6 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) int res = 1; int agg_id; - read_lock(&bond->lock); if (__bond_3ad_get_active_agg_info(bond, &ad_info)) { pr_debug("%s: Error: __bond_3ad_get_active_agg_info failed\n", dev->name); @@ -2406,7 +2405,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) slave_agg_no = bond_xmit_hash(bond, skb, slaves_in_agg); first_ok_slave = NULL; - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { agg = SLAVE_AD_INFO(slave).port.aggregator; if (!agg || agg->aggregator_identifier != agg_id) continue; @@ -2436,7 +2435,6 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) res = bond_dev_queue_xmit(bond, skb, first_ok_slave->dev); out: - read_unlock(&bond->lock); if (res) { /* no suitable interface, frame not sent */ kfree_skb(skb); diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 576cceae026a..02872405d35d 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -230,7 +230,7 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond) max_gap = LLONG_MIN; /* Find the slave with the largest gap */ - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { if (SLAVE_IS_OK(slave)) { long long gap = compute_gap(slave); @@ -412,6 +412,39 @@ static struct slave *rlb_next_rx_slave(struct bonding *bond) return rx_slave; } +/* Caller must hold rcu_read_lock() for read */ +static struct slave *__rlb_next_rx_slave(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct slave *before = NULL, *rx_slave = NULL, *slave; + struct list_head *iter; + bool found = false; + + bond_for_each_slave_rcu(bond, slave, iter) { + if (!SLAVE_IS_OK(slave)) + continue; + if (!found) { + if (!before || before->speed < slave->speed) + before = slave; + } else { + if (!rx_slave || rx_slave->speed < slave->speed) + rx_slave = slave; + } + if (slave == bond_info->rx_slave) + found = true; + } + /* we didn't find anything after the current or we have something + * better before and up to the current slave + */ + if (!rx_slave || (before && rx_slave->speed < before->speed)) + rx_slave = before; + + if (rx_slave) + bond_info->rx_slave = rx_slave; + + return rx_slave; +} + /* teach the switch the mac of a disabled slave * on the primary for fault tolerance * @@ -628,12 +661,14 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct arp_pkt *arp = arp_pkt(skb); - struct slave *assigned_slave; + struct slave *assigned_slave, *curr_active_slave; struct rlb_client_info *client_info; u32 hash_index = 0; _lock_rx_hashtbl(bond); + curr_active_slave = rcu_dereference(bond->curr_active_slave); + hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst)); client_info = &(bond_info->rx_hashtbl[hash_index]); @@ -658,14 +693,14 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon * that the new client can be assigned to this entry. */ if (bond->curr_active_slave && - client_info->slave != bond->curr_active_slave) { - client_info->slave = bond->curr_active_slave; + client_info->slave != curr_active_slave) { + client_info->slave = curr_active_slave; rlb_update_client(client_info); } } } /* assign a new slave */ - assigned_slave = rlb_next_rx_slave(bond); + assigned_slave = __rlb_next_rx_slave(bond); if (assigned_slave) { if (!(client_info->assigned && @@ -728,7 +763,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) /* Don't modify or load balance ARPs that do not originate locally * (e.g.,arrive via a bridge). */ - if (!bond_slave_has_mac(bond, arp->mac_src)) + if (!bond_slave_has_mac_rcu(bond, arp->mac_src)) return NULL; if (arp->op_code == htons(ARPOP_REPLY)) { @@ -1343,11 +1378,6 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) skb_reset_mac_header(skb); eth_data = eth_hdr(skb); - /* make sure that the curr_active_slave do not change during tx - */ - read_lock(&bond->lock); - read_lock(&bond->curr_slave_lock); - switch (ntohs(skb->protocol)) { case ETH_P_IP: { const struct iphdr *iph = ip_hdr(skb); @@ -1429,12 +1459,12 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) if (!tx_slave) { /* unbalanced or unassigned, send through primary */ - tx_slave = bond->curr_active_slave; + tx_slave = rcu_dereference(bond->curr_active_slave); bond_info->unbalanced_load += skb->len; } if (tx_slave && SLAVE_IS_OK(tx_slave)) { - if (tx_slave != bond->curr_active_slave) { + if (tx_slave != rcu_dereference(bond->curr_active_slave)) { memcpy(eth_data->h_source, tx_slave->dev->dev_addr, ETH_ALEN); @@ -1449,8 +1479,6 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) } } - read_unlock(&bond->curr_slave_lock); - read_unlock(&bond->lock); if (res) { /* no suitable interface, frame not sent */ kfree_skb(skb); diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index e9249527e7e7..03bed0ca935e 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -179,7 +179,9 @@ static ssize_t bonding_show_slaves(struct device *d, struct slave *slave; int res = 0; - read_lock(&bond->lock); + if (!rtnl_trylock()) + return restart_syscall(); + bond_for_each_slave(bond, slave, iter) { if (res > (PAGE_SIZE - IFNAMSIZ)) { /* not enough space for another interface name */ @@ -190,7 +192,9 @@ static ssize_t bonding_show_slaves(struct device *d, } res += sprintf(buf + res, "%s ", slave->dev->name); } - read_unlock(&bond->lock); + + rtnl_unlock(); + if (res) buf[res-1] = '\n'; /* eat the leftover space */ @@ -626,6 +630,9 @@ static ssize_t bonding_store_arp_targets(struct device *d, unsigned long *targets_rx; int ind, i, j, ret = -EINVAL; + if (!rtnl_trylock()) + return restart_syscall(); + targets = bond->params.arp_targets; newtarget = in_aton(buf + 1); /* look for adds */ @@ -699,6 +706,7 @@ static ssize_t bonding_store_arp_targets(struct device *d, ret = count; out: + rtnl_unlock(); return ret; } static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR , bonding_show_arp_targets, bonding_store_arp_targets); @@ -1467,7 +1475,6 @@ static ssize_t bonding_show_queue_id(struct device *d, if (!rtnl_trylock()) return restart_syscall(); - read_lock(&bond->lock); bond_for_each_slave(bond, slave, iter) { if (res > (PAGE_SIZE - IFNAMSIZ - 6)) { /* not enough space for another interface_name:queue_id pair */ @@ -1479,9 +1486,9 @@ static ssize_t bonding_show_queue_id(struct device *d, res += sprintf(buf + res, "%s:%d ", slave->dev->name, slave->queue_id); } - read_unlock(&bond->lock); if (res) buf[res-1] = '\n'; /* eat the leftover space */ + rtnl_unlock(); return res; @@ -1530,8 +1537,6 @@ static ssize_t bonding_store_queue_id(struct device *d, if (!sdev) goto err_no_cmd; - read_lock(&bond->lock); - /* Search for thes slave and check for duplicate qids */ update_slave = NULL; bond_for_each_slave(bond, slave, iter) { @@ -1542,23 +1547,20 @@ static ssize_t bonding_store_queue_id(struct device *d, */ update_slave = slave; else if (qid && qid == slave->queue_id) { - goto err_no_cmd_unlock; + goto err_no_cmd; } } if (!update_slave) - goto err_no_cmd_unlock; + goto err_no_cmd; /* Actually set the qids for the slave */ update_slave->queue_id = qid; - read_unlock(&bond->lock); out: rtnl_unlock(); return ret; -err_no_cmd_unlock: - read_unlock(&bond->lock); err_no_cmd: pr_info("invalid input for queue_id set for %s.\n", bond->dev->name); @@ -1591,6 +1593,9 @@ static ssize_t bonding_store_slaves_active(struct device *d, struct list_head *iter; struct slave *slave; + if (!rtnl_trylock()) + return restart_syscall(); + if (sscanf(buf, "%d", &new_value) != 1) { pr_err("%s: no all_slaves_active value specified.\n", bond->dev->name); @@ -1610,7 +1615,6 @@ static ssize_t bonding_store_slaves_active(struct device *d, goto out; } - read_lock(&bond->lock); bond_for_each_slave(bond, slave, iter) { if (!bond_is_active_slave(slave)) { if (new_value) @@ -1619,8 +1623,8 @@ static ssize_t bonding_store_slaves_active(struct device *d, slave->inactive = 1; } } - read_unlock(&bond->lock); out: + rtnl_unlock(); return ret; } static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR, diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 0bd04fbda8e9..bb5c731e2560 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -464,6 +464,20 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond, return NULL; } +/* Caller must hold rcu_read_lock() for read */ +static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond, + const u8 *mac) +{ + struct list_head *iter; + struct slave *tmp; + + bond_for_each_slave_rcu(bond, tmp, iter) + if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) + return tmp; + + return NULL; +} + /* Check if the ip is present in arp ip list, or first free slot if ip == 0 * Returns -1 if not found, index if found */ diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index ea20182c6969..735765c21c95 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2253,7 +2253,6 @@ EXPORT_SYMBOL_GPL(mlx4_set_vf_mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) { struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_vport_oper_state *vf_oper; struct mlx4_vport_state *vf_admin; int slave; @@ -2269,7 +2268,6 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) return -EINVAL; vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; - vf_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if ((0 == vlan) && (0 == qos)) vf_admin->default_vlan = MLX4_VGT; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index fa37b7a61213..85d91665d400 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1733,7 +1733,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) /* Unregister Mac address for the port */ mlx4_en_put_qp(priv); - if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN)) + if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN)) mdev->mac_removed[priv->port] = 1; /* Free RX Rings */ diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 0d63daa2f422..c151e7a6710a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -652,7 +652,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) QUERY_DEV_CAP_RSVD_LKEY_OFFSET); MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC); if (field & 1<<6) - dev_cap->flags2 |= MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN; + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN; MLX4_GET(dev_cap->max_icm_sz, outbox, QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET); if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS) @@ -1713,7 +1713,6 @@ void mlx4_opreq_action(struct work_struct *work) u32 *outbox; u32 modifier; u16 token; - u16 type_m; u16 type; int err; u32 num_qps; @@ -1746,7 +1745,6 @@ void mlx4_opreq_action(struct work_struct *work) MLX4_GET(modifier, outbox, GET_OP_REQ_MODIFIER_OFFSET); MLX4_GET(token, outbox, GET_OP_REQ_TOKEN_OFFSET); MLX4_GET(type, outbox, GET_OP_REQ_TYPE_OFFSET); - type_m = type >> 12; type &= 0xfff; switch (type) { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 60c9f4f103fc..179d26709c94 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -42,6 +42,7 @@ #include <linux/io-mapping.h> #include <linux/delay.h> #include <linux/netdevice.h> +#include <linux/kmod.h> #include <linux/mlx4/device.h> #include <linux/mlx4/doorbell.h> @@ -650,6 +651,27 @@ err_mem: return err; } +static void mlx4_request_modules(struct mlx4_dev *dev) +{ + int port; + int has_ib_port = false; + int has_eth_port = false; +#define EN_DRV_NAME "mlx4_en" +#define IB_DRV_NAME "mlx4_ib" + + for (port = 1; port <= dev->caps.num_ports; port++) { + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) + has_ib_port = true; + else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) + has_eth_port = true; + } + + if (has_ib_port) + request_module_nowait(IB_DRV_NAME); + if (has_eth_port) + request_module_nowait(EN_DRV_NAME); +} + /* * Change the port configuration of the device. * Every user of this function must hold the port mutex. @@ -681,6 +703,11 @@ int mlx4_change_port_types(struct mlx4_dev *dev, } mlx4_set_port_mask(dev); err = mlx4_register_device(dev); + if (err) { + mlx4_err(dev, "Failed to register device\n"); + goto out; + } + mlx4_request_modules(dev); } out: @@ -2305,6 +2332,8 @@ slave_start: if (err) goto err_port; + mlx4_request_modules(dev); + mlx4_sense_init(dev); mlx4_start_sense(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 55f6245efb6c..70f0213d68c4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -645,7 +645,7 @@ static const u8 __promisc_mode[] = { int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev, enum mlx4_net_trans_promisc_mode flow_type) { - if (flow_type >= MLX4_FS_MODE_NUM || flow_type < 0) { + if (flow_type >= MLX4_FS_MODE_NUM) { mlx4_err(dev, "Invalid flow type. type = %d\n", flow_type); return -EINVAL; } @@ -681,7 +681,7 @@ const u16 __sw_id_hw[] = { int mlx4_map_sw_to_hw_steering_id(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id) { - if (id >= MLX4_NET_TRANS_RULE_NUM || id < 0) { + if (id >= MLX4_NET_TRANS_RULE_NUM) { mlx4_err(dev, "Invalid network rule id. id = %d\n", id); return -EINVAL; } @@ -706,7 +706,7 @@ static const int __rule_hw_sz[] = { int mlx4_hw_rule_sz(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id) { - if (id >= MLX4_NET_TRANS_RULE_NUM || id < 0) { + if (id >= MLX4_NET_TRANS_RULE_NUM) { mlx4_err(dev, "Invalid network rule id. id = %d\n", id); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index 79fd269e2c54..9e08e35ce351 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -34,6 +34,7 @@ #include <linux/init.h> #include <linux/mlx4/cmd.h> +#include <linux/mlx4/srq.h> #include <linux/export.h> #include <linux/gfp.h> diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 5715318d6bab..55b8dec86233 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -87,9 +87,13 @@ struct pending_tx_info { struct xenvif_rx_meta { int id; int size; + int gso_type; int gso_size; }; +#define GSO_BIT(type) \ + (1 << XEN_NETIF_GSO_TYPE_ ## type) + /* Discriminate from any valid pending_idx value. */ #define INVALID_PENDING_IDX 0xFFFF @@ -150,10 +154,12 @@ struct xenvif { u8 fe_dev_addr[6]; /* Frontend feature information. */ + int gso_mask; + int gso_prefix_mask; + u8 can_sg:1; - u8 gso:1; - u8 gso_prefix:1; - u8 csum:1; + u8 ip_csum:1; + u8 ipv6_csum:1; /* Internal feature information. */ u8 can_queue:1; /* can queue packets for receiver? */ diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 01bb854c7f62..e4aa26748f80 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -214,10 +214,14 @@ static netdev_features_t xenvif_fix_features(struct net_device *dev, if (!vif->can_sg) features &= ~NETIF_F_SG; - if (!vif->gso && !vif->gso_prefix) + if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV4)) features &= ~NETIF_F_TSO; - if (!vif->csum) + if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV6)) + features &= ~NETIF_F_TSO6; + if (!vif->ip_csum) features &= ~NETIF_F_IP_CSUM; + if (!vif->ipv6_csum) + features &= ~NETIF_F_IPV6_CSUM; return features; } @@ -306,7 +310,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, vif->domid = domid; vif->handle = handle; vif->can_sg = 1; - vif->csum = 1; + vif->ip_csum = 1; vif->dev = dev; vif->credit_bytes = vif->remaining_credit = ~0UL; @@ -316,8 +320,10 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, vif->credit_timeout.expires = jiffies; dev->netdev_ops = &xenvif_netdev_ops; - dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO; - dev->features = dev->hw_features; + dev->hw_features = NETIF_F_SG | + NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_TSO | NETIF_F_TSO6; + dev->features = dev->hw_features | NETIF_F_RXCSUM; SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops); dev->tx_queue_len = XENVIF_QUEUE_LENGTH; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index f3e591c611de..828fdab4f1a4 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -109,15 +109,12 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif, return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); } -/* - * This is the amount of packet we copy rather than map, so that the - * guest can't fiddle with the contents of the headers while we do - * packet processing on them (netfilter, routing, etc). +/* This is a miniumum size for the linear area to avoid lots of + * calls to __pskb_pull_tail() as we set up checksum offsets. The + * value 128 was chosen as it covers all IPv4 and most likely + * IPv6 headers. */ -#define PKT_PROT_LEN (ETH_HLEN + \ - VLAN_HLEN + \ - sizeof(struct iphdr) + MAX_IPOPTLEN + \ - sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE) +#define PKT_PROT_LEN 128 static u16 frag_get_pending_idx(skb_frag_t *frag) { @@ -145,7 +142,7 @@ static int max_required_rx_slots(struct xenvif *vif) int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ - if (vif->can_sg || vif->gso || vif->gso_prefix) + if (vif->can_sg || vif->gso_mask || vif->gso_prefix_mask) max += MAX_SKB_FRAGS + 1; /* extra_info + frags */ return max; @@ -317,6 +314,7 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif, req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); meta = npo->meta + npo->meta_prod++; + meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; meta->gso_size = 0; meta->size = 0; meta->id = req->id; @@ -339,6 +337,7 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, struct gnttab_copy *copy_gop; struct xenvif_rx_meta *meta; unsigned long bytes; + int gso_type; /* Data must not cross a page boundary. */ BUG_ON(size + offset > PAGE_SIZE<<compound_order(page)); @@ -397,7 +396,14 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, } /* Leave a gap for the GSO descriptor. */ - if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix) + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) + gso_type = XEN_NETIF_GSO_TYPE_TCPV4; + else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + gso_type = XEN_NETIF_GSO_TYPE_TCPV6; + else + gso_type = XEN_NETIF_GSO_TYPE_NONE; + + if (*head && ((1 << gso_type) & vif->gso_mask)) vif->rx.req_cons++; *head = 0; /* There must be something in this buffer now. */ @@ -428,14 +434,28 @@ static int xenvif_gop_skb(struct sk_buff *skb, unsigned char *data; int head = 1; int old_meta_prod; + int gso_type; + int gso_size; old_meta_prod = npo->meta_prod; + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { + gso_type = XEN_NETIF_GSO_TYPE_TCPV4; + gso_size = skb_shinfo(skb)->gso_size; + } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { + gso_type = XEN_NETIF_GSO_TYPE_TCPV6; + gso_size = skb_shinfo(skb)->gso_size; + } else { + gso_type = XEN_NETIF_GSO_TYPE_NONE; + gso_size = 0; + } + /* Set up a GSO prefix descriptor, if necessary */ - if (skb_shinfo(skb)->gso_size && vif->gso_prefix) { + if ((1 << skb_shinfo(skb)->gso_type) & vif->gso_prefix_mask) { req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); meta = npo->meta + npo->meta_prod++; - meta->gso_size = skb_shinfo(skb)->gso_size; + meta->gso_type = gso_type; + meta->gso_size = gso_size; meta->size = 0; meta->id = req->id; } @@ -443,10 +463,13 @@ static int xenvif_gop_skb(struct sk_buff *skb, req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); meta = npo->meta + npo->meta_prod++; - if (!vif->gso_prefix) - meta->gso_size = skb_shinfo(skb)->gso_size; - else + if ((1 << gso_type) & vif->gso_mask) { + meta->gso_type = gso_type; + meta->gso_size = gso_size; + } else { + meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; meta->gso_size = 0; + } meta->size = 0; meta->id = req->id; @@ -592,7 +615,8 @@ void xenvif_rx_action(struct xenvif *vif) vif = netdev_priv(skb->dev); - if (vif->meta[npo.meta_cons].gso_size && vif->gso_prefix) { + if ((1 << vif->meta[npo.meta_cons].gso_type) & + vif->gso_prefix_mask) { resp = RING_GET_RESPONSE(&vif->rx, vif->rx.rsp_prod_pvt++); @@ -629,7 +653,8 @@ void xenvif_rx_action(struct xenvif *vif) vif->meta[npo.meta_cons].size, flags); - if (vif->meta[npo.meta_cons].gso_size && !vif->gso_prefix) { + if ((1 << vif->meta[npo.meta_cons].gso_type) & + vif->gso_mask) { struct xen_netif_extra_info *gso = (struct xen_netif_extra_info *) RING_GET_RESPONSE(&vif->rx, @@ -637,8 +662,8 @@ void xenvif_rx_action(struct xenvif *vif) resp->flags |= XEN_NETRXF_extra_info; + gso->u.gso.type = vif->meta[npo.meta_cons].gso_type; gso->u.gso.size = vif->meta[npo.meta_cons].gso_size; - gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; gso->u.gso.pad = 0; gso->u.gso.features = 0; @@ -1101,15 +1126,20 @@ static int xenvif_set_skb_gso(struct xenvif *vif, return -EINVAL; } - /* Currently only TCPv4 S.O. is supported. */ - if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { + switch (gso->u.gso.type) { + case XEN_NETIF_GSO_TYPE_TCPV4: + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + break; + case XEN_NETIF_GSO_TYPE_TCPV6: + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + break; + default: netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type); xenvif_fatal_tx_err(vif); return -EINVAL; } skb_shinfo(skb)->gso_size = gso->u.gso.size; - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; /* Header must be checked, and gso_segs computed. */ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; @@ -1118,61 +1148,74 @@ static int xenvif_set_skb_gso(struct xenvif *vif, return 0; } -static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) +static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len) +{ + if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) { + /* If we need to pullup then pullup to the max, so we + * won't need to do it again. + */ + int target = min_t(int, skb->len, MAX_TCP_HEADER); + __pskb_pull_tail(skb, target - skb_headlen(skb)); + } +} + +static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, + int recalculate_partial_csum) { - struct iphdr *iph; + struct iphdr *iph = (void *)skb->data; + unsigned int header_size; + unsigned int off; int err = -EPROTO; - int recalculate_partial_csum = 0; - /* - * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy - * peers can fail to set NETRXF_csum_blank when sending a GSO - * frame. In this case force the SKB to CHECKSUM_PARTIAL and - * recalculate the partial checksum. - */ - if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { - vif->rx_gso_checksum_fixup++; - skb->ip_summed = CHECKSUM_PARTIAL; - recalculate_partial_csum = 1; - } + off = sizeof(struct iphdr); - /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ - if (skb->ip_summed != CHECKSUM_PARTIAL) - return 0; + header_size = skb->network_header + off + MAX_IPOPTLEN; + maybe_pull_tail(skb, header_size); - if (skb->protocol != htons(ETH_P_IP)) - goto out; + off = iph->ihl * 4; - iph = (void *)skb->data; switch (iph->protocol) { case IPPROTO_TCP: - if (!skb_partial_csum_set(skb, 4 * iph->ihl, + if (!skb_partial_csum_set(skb, off, offsetof(struct tcphdr, check))) goto out; if (recalculate_partial_csum) { struct tcphdr *tcph = tcp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct tcphdr); + maybe_pull_tail(skb, header_size); + tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - iph->ihl*4, + skb->len - off, IPPROTO_TCP, 0); } break; case IPPROTO_UDP: - if (!skb_partial_csum_set(skb, 4 * iph->ihl, + if (!skb_partial_csum_set(skb, off, offsetof(struct udphdr, check))) goto out; if (recalculate_partial_csum) { struct udphdr *udph = udp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct udphdr); + maybe_pull_tail(skb, header_size); + udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - iph->ihl*4, + skb->len - off, IPPROTO_UDP, 0); } break; default: if (net_ratelimit()) netdev_err(vif->dev, - "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n", + "Attempting to checksum a non-TCP/UDP packet, " + "dropping a protocol %d packet\n", iph->protocol); goto out; } @@ -1183,6 +1226,158 @@ out: return err; } +static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, + int recalculate_partial_csum) +{ + int err = -EPROTO; + struct ipv6hdr *ipv6h = (void *)skb->data; + u8 nexthdr; + unsigned int header_size; + unsigned int off; + bool fragment; + bool done; + + done = false; + + off = sizeof(struct ipv6hdr); + + header_size = skb->network_header + off; + maybe_pull_tail(skb, header_size); + + nexthdr = ipv6h->nexthdr; + + while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) && + !done) { + switch (nexthdr) { + case IPPROTO_DSTOPTS: + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: { + struct ipv6_opt_hdr *hp = (void *)(skb->data + off); + + header_size = skb->network_header + + off + + sizeof(struct ipv6_opt_hdr); + maybe_pull_tail(skb, header_size); + + nexthdr = hp->nexthdr; + off += ipv6_optlen(hp); + break; + } + case IPPROTO_AH: { + struct ip_auth_hdr *hp = (void *)(skb->data + off); + + header_size = skb->network_header + + off + + sizeof(struct ip_auth_hdr); + maybe_pull_tail(skb, header_size); + + nexthdr = hp->nexthdr; + off += (hp->hdrlen+2)<<2; + break; + } + case IPPROTO_FRAGMENT: + fragment = true; + /* fall through */ + default: + done = true; + break; + } + } + + if (!done) { + if (net_ratelimit()) + netdev_err(vif->dev, "Failed to parse packet header\n"); + goto out; + } + + if (fragment) { + if (net_ratelimit()) + netdev_err(vif->dev, "Packet is a fragment!\n"); + goto out; + } + + switch (nexthdr) { + case IPPROTO_TCP: + if (!skb_partial_csum_set(skb, off, + offsetof(struct tcphdr, check))) + goto out; + + if (recalculate_partial_csum) { + struct tcphdr *tcph = tcp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct tcphdr); + maybe_pull_tail(skb, header_size); + + tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, + &ipv6h->daddr, + skb->len - off, + IPPROTO_TCP, 0); + } + break; + case IPPROTO_UDP: + if (!skb_partial_csum_set(skb, off, + offsetof(struct udphdr, check))) + goto out; + + if (recalculate_partial_csum) { + struct udphdr *udph = udp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct udphdr); + maybe_pull_tail(skb, header_size); + + udph->check = ~csum_ipv6_magic(&ipv6h->saddr, + &ipv6h->daddr, + skb->len - off, + IPPROTO_UDP, 0); + } + break; + default: + if (net_ratelimit()) + netdev_err(vif->dev, + "Attempting to checksum a non-TCP/UDP packet, " + "dropping a protocol %d packet\n", + nexthdr); + goto out; + } + + err = 0; + +out: + return err; +} + +static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) +{ + int err = -EPROTO; + int recalculate_partial_csum = 0; + + /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy + * peers can fail to set NETRXF_csum_blank when sending a GSO + * frame. In this case force the SKB to CHECKSUM_PARTIAL and + * recalculate the partial checksum. + */ + if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { + vif->rx_gso_checksum_fixup++; + skb->ip_summed = CHECKSUM_PARTIAL; + recalculate_partial_csum = 1; + } + + /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + if (skb->protocol == htons(ETH_P_IP)) + err = checksum_setup_ip(vif, skb, recalculate_partial_csum); + else if (skb->protocol == htons(ETH_P_IPV6)) + err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum); + + return err; +} + static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) { unsigned long now = jiffies; @@ -1428,12 +1623,7 @@ static int xenvif_tx_submit(struct xenvif *vif, int budget) xenvif_fill_frags(vif, skb); - /* - * If the initial fragment was < PKT_PROT_LEN then - * pull through some bytes from the other fragments to - * increase the linear region to PKT_PROT_LEN bytes. - */ - if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) { + if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) { int target = min_t(int, skb->len, PKT_PROT_LEN); __pskb_pull_tail(skb, target - skb_headlen(skb)); } diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 1b08d8798372..f0358992b04f 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -105,6 +105,22 @@ static int netback_probe(struct xenbus_device *dev, goto abort_transaction; } + err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", + "%d", sg); + if (err) { + message = "writing feature-gso-tcpv6"; + goto abort_transaction; + } + + /* We support partial checksum setup for IPv6 packets */ + err = xenbus_printf(xbt, dev->nodename, + "feature-ipv6-csum-offload", + "%d", 1); + if (err) { + message = "writing feature-ipv6-csum-offload"; + goto abort_transaction; + } + /* We support rx-copy path. */ err = xenbus_printf(xbt, dev->nodename, "feature-rx-copy", "%d", 1); @@ -561,20 +577,50 @@ static int connect_rings(struct backend_info *be) val = 0; vif->can_sg = !!val; + vif->gso_mask = 0; + vif->gso_prefix_mask = 0; + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d", &val) < 0) val = 0; - vif->gso = !!val; + if (val) + vif->gso_mask |= GSO_BIT(TCPV4); if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix", "%d", &val) < 0) val = 0; - vif->gso_prefix = !!val; + if (val) + vif->gso_prefix_mask |= GSO_BIT(TCPV4); + + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6", + "%d", &val) < 0) + val = 0; + if (val) + vif->gso_mask |= GSO_BIT(TCPV6); + + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6-prefix", + "%d", &val) < 0) + val = 0; + if (val) + vif->gso_prefix_mask |= GSO_BIT(TCPV6); + + if (vif->gso_mask & vif->gso_prefix_mask) { + xenbus_dev_fatal(dev, err, + "%s: gso and gso prefix flags are not " + "mutually exclusive", + dev->otherend); + return -EOPNOTSUPP; + } if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload", "%d", &val) < 0) val = 0; - vif->csum = !val; + vif->ip_csum = !val; + + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-ipv6-csum-offload", + "%d", &val) < 0) + val = 0; + vif->ipv6_csum = !!val; /* Map the shared frame, irq etc. */ err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref, |