diff options
author | David S. Miller <davem@davemloft.net> | 2017-02-09 17:24:30 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-02-09 17:24:30 -0500 |
commit | b66a8043d0baa9818a407ed857eada5cb21cd9bb (patch) | |
tree | a6ed185caf60e4db08ef708993c6a4eaac1129b9 /drivers/net/ethernet/cisco/enic/enic_main.c | |
parent | c0e4dadb3494bd0bf4fdeac341509deac8f4b47c (diff) | |
parent | 9c744d10870c5d6f36264be39bf5de87447f1052 (diff) |
Merge branch 'enic-vxlan-offload'
Govindarajulu Varadarajan says:
====================
enic: add vxlan offload support
This series adds vxlan offload support for enic driver. The first
patch adds vxlan devcmd for configuring vxland offload parameters.
Second patch adds ndo_udp_tunnel_add/del and offload on rx path.
There are to modes in which fw supports vxlan offload.
mode 0: fcoe bit is set for encapsulated packet. fcoe_fc_crc_ok is set
if checksum of csum is ok. This bit is or of ip_csum_ok and
tcp_udp_csum_ok
mode 2: BIT(0) in rss_hash is set if it is encapsulated packet.
BIT(1) is set if outer_ip_csum_ok/
BIT(2) is set if outer_tcp_csum_ok
Some hw supports only mode 0, some support mode 0 and 2. Driver gets
the supported modes bitmap using get_supported_feature_ver devcmd
and selects the highest mode both driver and fw supports.
Third patch adds offload support on tx path by adding
enic_features_check().
v2: Order local variable declarations from longest to shortest line,
on all three patches.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/cisco/enic/enic_main.c')
-rw-r--r-- | drivers/net/ethernet/cisco/enic/enic_main.c | 282 |
1 files changed, 267 insertions, 15 deletions
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index c009f6ddabf7..4b87beeabce1 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -45,6 +45,7 @@ #endif #include <linux/crash_dump.h> #include <net/busy_poll.h> +#include <net/vxlan.h> #include "cq_enet_desc.h" #include "vnic_dev.h" @@ -176,6 +177,134 @@ static void enic_unset_affinity_hint(struct enic *enic) irq_set_affinity_hint(enic->msix_entry[i].vector, NULL); } +static void enic_udp_tunnel_add(struct net_device *netdev, + struct udp_tunnel_info *ti) +{ + struct enic *enic = netdev_priv(netdev); + __be16 port = ti->port; + int err; + + spin_lock_bh(&enic->devcmd_lock); + + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) { + netdev_info(netdev, "udp_tnl: only vxlan tunnel offload supported"); + goto error; + } + + if (ti->sa_family != AF_INET) { + netdev_info(netdev, "vxlan: only IPv4 offload supported"); + goto error; + } + + if (enic->vxlan.vxlan_udp_port_number) { + if (ntohs(port) == enic->vxlan.vxlan_udp_port_number) + netdev_warn(netdev, "vxlan: udp port already offloaded"); + else + netdev_info(netdev, "vxlan: offload supported for only one UDP port"); + + goto error; + } + + err = vnic_dev_overlay_offload_cfg(enic->vdev, + OVERLAY_CFG_VXLAN_PORT_UPDATE, + ntohs(port)); + if (err) + goto error; + + err = vnic_dev_overlay_offload_ctrl(enic->vdev, OVERLAY_FEATURE_VXLAN, + enic->vxlan.patch_level); + if (err) + goto error; + + enic->vxlan.vxlan_udp_port_number = ntohs(port); + + netdev_info(netdev, "vxlan fw-vers-%d: offload enabled for udp port: %d, sa_family: %d ", + (int)enic->vxlan.patch_level, ntohs(port), ti->sa_family); + + goto unlock; + +error: + netdev_info(netdev, "failed to offload udp port: %d, sa_family: %d, type: %d", + ntohs(port), ti->sa_family, ti->type); +unlock: + spin_unlock_bh(&enic->devcmd_lock); +} + +static void enic_udp_tunnel_del(struct net_device *netdev, + struct udp_tunnel_info *ti) +{ + struct enic *enic = netdev_priv(netdev); + int err; + + spin_lock_bh(&enic->devcmd_lock); + + if ((ti->sa_family != AF_INET) || + ((ntohs(ti->port) != enic->vxlan.vxlan_udp_port_number)) || + (ti->type != UDP_TUNNEL_TYPE_VXLAN)) { + netdev_info(netdev, "udp_tnl: port:%d, sa_family: %d, type: %d not offloaded", + ntohs(ti->port), ti->sa_family, ti->type); + goto unlock; + } + + err = vnic_dev_overlay_offload_ctrl(enic->vdev, OVERLAY_FEATURE_VXLAN, + OVERLAY_OFFLOAD_DISABLE); + if (err) { + netdev_err(netdev, "vxlan: del offload udp port: %d failed", + ntohs(ti->port)); + goto unlock; + } + + enic->vxlan.vxlan_udp_port_number = 0; + + netdev_info(netdev, "vxlan: del offload udp port %d, family %d\n", + ntohs(ti->port), ti->sa_family); + +unlock: + spin_unlock_bh(&enic->devcmd_lock); +} + +static netdev_features_t enic_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb); + struct enic *enic = netdev_priv(dev); + struct udphdr *udph; + u16 port = 0; + u16 proto; + + if (!skb->encapsulation) + return features; + + features = vxlan_features_check(skb, features); + + /* hardware only supports IPv4 vxlan tunnel */ + if (vlan_get_protocol(skb) != htons(ETH_P_IP)) + goto out; + + /* hardware does not support offload of ipv6 inner pkt */ + if (eth->h_proto != ntohs(ETH_P_IP)) + goto out; + + proto = ip_hdr(skb)->protocol; + + if (proto == IPPROTO_UDP) { + udph = udp_hdr(skb); + port = be16_to_cpu(udph->dest); + } + + /* HW supports offload of only one UDP port. Remove CSUM and GSO MASK + * for other UDP port tunnels + */ + if (port != enic->vxlan.vxlan_udp_port_number) + goto out; + + return features; + +out: + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); +} + int enic_is_dynamic(struct enic *enic) { return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN; @@ -504,20 +633,19 @@ static int enic_queue_wq_skb_csum_l4(struct enic *enic, struct vnic_wq *wq, return err; } -static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq, - struct sk_buff *skb, unsigned int mss, - int vlan_tag_insert, unsigned int vlan_tag, - int loopback) +static void enic_preload_tcp_csum_encap(struct sk_buff *skb) { - unsigned int frag_len_left = skb_headlen(skb); - unsigned int len_left = skb->len - frag_len_left; - unsigned int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); - int eop = (len_left == 0); - unsigned int len; - dma_addr_t dma_addr; - unsigned int offset = 0; - skb_frag_t *frag; + if (skb->protocol == cpu_to_be16(ETH_P_IP)) { + inner_ip_hdr(skb)->check = 0; + inner_tcp_hdr(skb)->check = + ~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr, + inner_ip_hdr(skb)->daddr, 0, + IPPROTO_TCP, 0); + } +} +static void enic_preload_tcp_csum(struct sk_buff *skb) +{ /* Preload TCP csum field with IP pseudo hdr calculated * with IP length set to zero. HW will later add in length * to each TCP segment resulting from the TSO. @@ -531,6 +659,30 @@ static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq, tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); } +} + +static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq, + struct sk_buff *skb, unsigned int mss, + int vlan_tag_insert, unsigned int vlan_tag, + int loopback) +{ + unsigned int frag_len_left = skb_headlen(skb); + unsigned int len_left = skb->len - frag_len_left; + int eop = (len_left == 0); + unsigned int offset = 0; + unsigned int hdr_len; + dma_addr_t dma_addr; + unsigned int len; + skb_frag_t *frag; + + if (skb->encapsulation) { + hdr_len = skb_inner_transport_header(skb) - skb->data; + hdr_len += inner_tcp_hdrlen(skb); + enic_preload_tcp_csum_encap(skb); + } else { + hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + enic_preload_tcp_csum(skb); + } /* Queue WQ_ENET_MAX_DESC_LEN length descriptors * for the main skb fragment @@ -579,6 +731,38 @@ static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq, return 0; } +static inline int enic_queue_wq_skb_encap(struct enic *enic, struct vnic_wq *wq, + struct sk_buff *skb, + int vlan_tag_insert, + unsigned int vlan_tag, int loopback) +{ + unsigned int head_len = skb_headlen(skb); + unsigned int len_left = skb->len - head_len; + /* Hardware will overwrite the checksum fields, calculating from + * scratch and ignoring the value placed by software. + * Offload mode = 00 + * mss[2], mss[1], mss[0] bits are set + */ + unsigned int mss_or_csum = 7; + int eop = (len_left == 0); + dma_addr_t dma_addr; + int err = 0; + + dma_addr = pci_map_single(enic->pdev, skb->data, head_len, + PCI_DMA_TODEVICE); + if (unlikely(enic_dma_map_check(enic, dma_addr))) + return -ENOMEM; + + enic_queue_wq_desc_ex(wq, skb, dma_addr, head_len, mss_or_csum, 0, + vlan_tag_insert, vlan_tag, + WQ_ENET_OFFLOAD_MODE_CSUM, eop, 1 /* SOP */, eop, + loopback); + if (!eop) + err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); + + return err; +} + static inline void enic_queue_wq_skb(struct enic *enic, struct vnic_wq *wq, struct sk_buff *skb) { @@ -601,6 +785,9 @@ static inline void enic_queue_wq_skb(struct enic *enic, err = enic_queue_wq_skb_tso(enic, wq, skb, mss, vlan_tag_insert, vlan_tag, loopback); + else if (skb->encapsulation) + err = enic_queue_wq_skb_encap(enic, wq, skb, vlan_tag_insert, + vlan_tag, loopback); else if (skb->ip_summed == CHECKSUM_PARTIAL) err = enic_queue_wq_skb_csum_l4(enic, wq, skb, vlan_tag_insert, vlan_tag, loopback); @@ -1113,6 +1300,7 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, u8 packet_error; u16 q_number, completed_index, bytes_written, vlan_tci, checksum; u32 rss_hash; + bool outer_csum_ok = true, encap = false; if (skipped) return; @@ -1161,7 +1349,8 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, skb_put(skb, bytes_written); skb->protocol = eth_type_trans(skb, netdev); skb_record_rx_queue(skb, q_number); - if (netdev->features & NETIF_F_RXHASH) { + if ((netdev->features & NETIF_F_RXHASH) && rss_hash && + (type == 3)) { switch (rss_type) { case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4: case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6: @@ -1175,15 +1364,39 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, break; } } + if (enic->vxlan.vxlan_udp_port_number) { + switch (enic->vxlan.patch_level) { + case 0: + if (fcoe) { + encap = true; + outer_csum_ok = fcoe_fc_crc_ok; + } + break; + case 2: + if ((type == 7) && + (rss_hash & BIT(0))) { + encap = true; + outer_csum_ok = (rss_hash & BIT(1)) && + (rss_hash & BIT(2)); + } + break; + } + } /* Hardware does not provide whole packet checksum. It only * provides pseudo checksum. Since hw validates the packet * checksum but not provide us the checksum value. use * CHECSUM_UNNECESSARY. + * + * In case of encap pkt tcp_udp_csum_ok/tcp_udp_csum_ok is + * inner csum_ok. outer_csum_ok is set by hw when outer udp + * csum is correct or is zero. */ - if ((netdev->features & NETIF_F_RXCSUM) && tcp_udp_csum_ok && - ipv4_csum_ok) + if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc && + tcp_udp_csum_ok && ipv4_csum_ok && outer_csum_ok) { skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = encap; + } if (vlan_stripped) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); @@ -2285,6 +2498,9 @@ static const struct net_device_ops enic_netdev_dynamic_ops = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = enic_rx_flow_steer, #endif + .ndo_udp_tunnel_add = enic_udp_tunnel_add, + .ndo_udp_tunnel_del = enic_udp_tunnel_del, + .ndo_features_check = enic_features_check, }; static const struct net_device_ops enic_netdev_ops = { @@ -2308,6 +2524,9 @@ static const struct net_device_ops enic_netdev_ops = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = enic_rx_flow_steer, #endif + .ndo_udp_tunnel_add = enic_udp_tunnel_add, + .ndo_udp_tunnel_del = enic_udp_tunnel_del, + .ndo_features_check = enic_features_check, }; static void enic_dev_deinit(struct enic *enic) @@ -2683,6 +2902,39 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features |= NETIF_F_RXHASH; if (ENIC_SETTING(enic, RXCSUM)) netdev->hw_features |= NETIF_F_RXCSUM; + if (ENIC_SETTING(enic, VXLAN)) { + u64 patch_level; + + netdev->hw_enc_features |= NETIF_F_RXCSUM | + NETIF_F_TSO | + NETIF_F_TSO_ECN | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_HW_CSUM | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_features |= netdev->hw_enc_features; + /* get bit mask from hw about supported offload bit level + * BIT(0) = fw supports patch_level 0 + * fcoe bit = encap + * fcoe_fc_crc_ok = outer csum ok + * BIT(1) = always set by fw + * BIT(2) = fw supports patch_level 2 + * BIT(0) in rss_hash = encap + * BIT(1,2) in rss_hash = outer_ip_csum_ok/ + * outer_tcp_csum_ok + * used in enic_rq_indicate_buf + */ + err = vnic_dev_get_supported_feature_ver(enic->vdev, + VIC_FEATURE_VXLAN, + &patch_level); + if (err) + patch_level = 0; + /* mask bits that are supported by driver + */ + patch_level &= BIT_ULL(0) | BIT_ULL(2); + patch_level = fls(patch_level); + patch_level = patch_level ? patch_level - 1 : 0; + enic->vxlan.patch_level = patch_level; + } netdev->features |= netdev->hw_features; netdev->vlan_features |= netdev->features; |