diff options
Diffstat (limited to 'drivers/net/vrf.c')
-rw-r--r-- | drivers/net/vrf.c | 272 |
1 files changed, 150 insertions, 122 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 8a8f1e58b415..dff08842f26d 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -42,12 +42,9 @@ #define DRV_NAME "vrf" #define DRV_VERSION "1.0" -#define vrf_master_get_rcu(dev) \ - ((struct net_device *)rcu_dereference(dev->rx_handler_data)) - struct net_vrf { - struct rtable *rth; - struct rt6_info *rt6; + struct rtable __rcu *rth; + struct rt6_info __rcu *rt6; u32 tb_id; }; @@ -60,90 +57,12 @@ struct pcpu_dstats { struct u64_stats_sync syncp; }; -/* neighbor handling is done with actual device; do not want - * to flip skb->dev for those ndisc packets. This really fails - * for multiple next protocols (e.g., NEXTHDR_HOP). But it is - * a start. - */ -#if IS_ENABLED(CONFIG_IPV6) -static bool check_ipv6_frame(const struct sk_buff *skb) -{ - const struct ipv6hdr *ipv6h; - struct ipv6hdr _ipv6h; - bool rc = true; - - ipv6h = skb_header_pointer(skb, 0, sizeof(_ipv6h), &_ipv6h); - if (!ipv6h) - goto out; - - if (ipv6h->nexthdr == NEXTHDR_ICMP) { - const struct icmp6hdr *icmph; - struct icmp6hdr _icmph; - - icmph = skb_header_pointer(skb, sizeof(_ipv6h), - sizeof(_icmph), &_icmph); - if (!icmph) - goto out; - - switch (icmph->icmp6_type) { - case NDISC_ROUTER_SOLICITATION: - case NDISC_ROUTER_ADVERTISEMENT: - case NDISC_NEIGHBOUR_SOLICITATION: - case NDISC_NEIGHBOUR_ADVERTISEMENT: - case NDISC_REDIRECT: - rc = false; - break; - } - } - -out: - return rc; -} -#else -static bool check_ipv6_frame(const struct sk_buff *skb) -{ - return false; -} -#endif - -static bool is_ip_rx_frame(struct sk_buff *skb) -{ - switch (skb->protocol) { - case htons(ETH_P_IP): - return true; - case htons(ETH_P_IPV6): - return check_ipv6_frame(skb); - } - return false; -} - static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb) { vrf_dev->stats.tx_errors++; kfree_skb(skb); } -/* note: already called with rcu_read_lock */ -static rx_handler_result_t vrf_handle_frame(struct sk_buff **pskb) -{ - struct sk_buff *skb = *pskb; - - if (is_ip_rx_frame(skb)) { - struct net_device *dev = vrf_master_get_rcu(skb->dev); - struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); - - u64_stats_update_begin(&dstats->syncp); - dstats->rx_pkts++; - dstats->rx_bytes += skb->len; - u64_stats_update_end(&dstats->syncp); - - skb->dev = dev; - - return RX_HANDLER_ANOTHER; - } - return RX_HANDLER_PASS; -} - static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { @@ -354,28 +273,40 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb) !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } +/* holding rtnl */ static void vrf_rt6_release(struct net_vrf *vrf) { - dst_release(&vrf->rt6->dst); - vrf->rt6 = NULL; + struct rt6_info *rt6 = rtnl_dereference(vrf->rt6); + + rcu_assign_pointer(vrf->rt6, NULL); + + if (rt6) + dst_release(&rt6->dst); } static int vrf_rt6_create(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); struct net *net = dev_net(dev); + struct fib6_table *rt6i_table; struct rt6_info *rt6; int rc = -ENOMEM; + rt6i_table = fib6_new_table(net, vrf->tb_id); + if (!rt6i_table) + goto out; + rt6 = ip6_dst_alloc(net, dev, DST_HOST | DST_NOPOLICY | DST_NOXFRM | DST_NOCACHE); if (!rt6) goto out; - rt6->dst.output = vrf_output6; - rt6->rt6i_table = fib6_get_table(net, vrf->tb_id); dst_hold(&rt6->dst); - vrf->rt6 = rt6; + + rt6->rt6i_table = rt6i_table; + rt6->dst.output = vrf_output6; + rcu_assign_pointer(vrf->rt6, rt6); + rc = 0; out: return rc; @@ -449,26 +380,35 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb) !(IPCB(skb)->flags & IPSKB_REROUTED)); } +/* holding rtnl */ static void vrf_rtable_release(struct net_vrf *vrf) { - struct dst_entry *dst = (struct dst_entry *)vrf->rth; + struct rtable *rth = rtnl_dereference(vrf->rth); - dst_release(dst); - vrf->rth = NULL; + rcu_assign_pointer(vrf->rth, NULL); + + if (rth) + dst_release(&rth->dst); } -static struct rtable *vrf_rtable_create(struct net_device *dev) +static int vrf_rtable_create(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); struct rtable *rth; + if (!fib_new_table(dev_net(dev), vrf->tb_id)) + return -ENOMEM; + rth = rt_dst_alloc(dev, 0, RTN_UNICAST, 1, 1, 0); - if (rth) { - rth->dst.output = vrf_output; - rth->rt_table_id = vrf->tb_id; - } + if (!rth) + return -ENOMEM; - return rth; + rth->dst.output = vrf_output; + rth->rt_table_id = vrf->tb_id; + + rcu_assign_pointer(vrf->rth, rth); + + return 0; } /**************************** device handling ********************/ @@ -497,28 +437,14 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) { int ret; - /* register the packet handler for slave ports */ - ret = netdev_rx_handler_register(port_dev, vrf_handle_frame, dev); - if (ret) { - netdev_err(port_dev, - "Device %s failed to register rx_handler\n", - port_dev->name); - goto out_fail; - } - ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL); if (ret < 0) - goto out_unregister; + return ret; port_dev->priv_flags |= IFF_L3MDEV_SLAVE; cycle_netdev(port_dev); return 0; - -out_unregister: - netdev_rx_handler_unregister(port_dev); -out_fail: - return ret; } static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev) @@ -535,8 +461,6 @@ static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev) netdev_upper_dev_unlink(port_dev, dev); port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; - netdev_rx_handler_unregister(port_dev); - cycle_netdev(port_dev); return 0; @@ -572,8 +496,7 @@ static int vrf_dev_init(struct net_device *dev) goto out_nomem; /* create the default dst which points back to us */ - vrf->rth = vrf_rtable_create(dev); - if (!vrf->rth) + if (vrf_rtable_create(dev) != 0) goto out_stats; if (vrf_rt6_create(dev) != 0) @@ -616,8 +539,13 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev, if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) { struct net_vrf *vrf = netdev_priv(dev); - rth = vrf->rth; - dst_hold(&rth->dst); + rcu_read_lock(); + + rth = rcu_dereference(vrf->rth); + if (likely(rth)) + dst_hold(&rth->dst); + + rcu_read_unlock(); } return rth; @@ -639,6 +567,8 @@ static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4) fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF; fl4->flowi4_iif = LOOPBACK_IFINDEX; + /* make sure oif is set to VRF device for lookup */ + fl4->flowi4_oif = dev->ifindex; fl4->flowi4_tos = tos & IPTOS_RT_MASK; fl4->flowi4_scope = ((tos & RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); @@ -659,19 +589,116 @@ static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4) } #if IS_ENABLED(CONFIG_IPV6) +/* neighbor handling is done with actual device; do not want + * to flip skb->dev for those ndisc packets. This really fails + * for multiple next protocols (e.g., NEXTHDR_HOP). But it is + * a start. + */ +static bool ipv6_ndisc_frame(const struct sk_buff *skb) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + bool rc = false; + + if (iph->nexthdr == NEXTHDR_ICMP) { + const struct icmp6hdr *icmph; + struct icmp6hdr _icmph; + + icmph = skb_header_pointer(skb, sizeof(*iph), + sizeof(_icmph), &_icmph); + if (!icmph) + goto out; + + switch (icmph->icmp6_type) { + case NDISC_ROUTER_SOLICITATION: + case NDISC_ROUTER_ADVERTISEMENT: + case NDISC_NEIGHBOUR_SOLICITATION: + case NDISC_NEIGHBOUR_ADVERTISEMENT: + case NDISC_REDIRECT: + rc = true; + break; + } + } + +out: + return rc; +} + +static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, + struct sk_buff *skb) +{ + /* if packet is NDISC keep the ingress interface */ + if (!ipv6_ndisc_frame(skb)) { + skb->dev = vrf_dev; + skb->skb_iif = vrf_dev->ifindex; + + skb_push(skb, skb->mac_len); + dev_queue_xmit_nit(skb, vrf_dev); + skb_pull(skb, skb->mac_len); + + IP6CB(skb)->flags |= IP6SKB_L3SLAVE; + } + + return skb; +} + +#else +static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, + struct sk_buff *skb) +{ + return skb; +} +#endif + +static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, + struct sk_buff *skb) +{ + skb->dev = vrf_dev; + skb->skb_iif = vrf_dev->ifindex; + + skb_push(skb, skb->mac_len); + dev_queue_xmit_nit(skb, vrf_dev); + skb_pull(skb, skb->mac_len); + + return skb; +} + +/* called with rcu lock held */ +static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev, + struct sk_buff *skb, + u16 proto) +{ + switch (proto) { + case AF_INET: + return vrf_ip_rcv(vrf_dev, skb); + case AF_INET6: + return vrf_ip6_rcv(vrf_dev, skb); + } + + return skb; +} + +#if IS_ENABLED(CONFIG_IPV6) static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, const struct flowi6 *fl6) { - struct rt6_info *rt = NULL; + struct dst_entry *dst = NULL; if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) { struct net_vrf *vrf = netdev_priv(dev); + struct rt6_info *rt; + + rcu_read_lock(); + + rt = rcu_dereference(vrf->rt6); + if (likely(rt)) { + dst = &rt->dst; + dst_hold(dst); + } - rt = vrf->rt6; - dst_hold(&rt->dst); + rcu_read_unlock(); } - return (struct dst_entry *)rt; + return dst; } #endif @@ -679,6 +706,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, .l3mdev_get_rtable = vrf_get_rtable, .l3mdev_get_saddr = vrf_get_saddr, + .l3mdev_l3_rcv = vrf_l3_rcv, #if IS_ENABLED(CONFIG_IPV6) .l3mdev_get_rt6_dst = vrf_get_rt6_dst, #endif |