diff options
author | David S. Miller <davem@davemloft.net> | 2016-06-17 21:25:29 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-06-17 21:25:29 -0700 |
commit | 8c0c07a3fcf824d39aed92f5e4e18ef9643dceff (patch) | |
tree | 70ba765b4370e8f37a8b7859f054ec4e18727074 | |
parent | 0023a061d75a37f50fb6952015b826d86de8d2c4 (diff) | |
parent | afbac6010aec514998214fb19a1f37732b7a1d77 (diff) |
Merge branch 'vrf-next'
David Ahern says:
====================
net: vrf: Fix ipv6 source address selection
IPv6 address selection is currently messed up for several use cases such
as unnumbered deployments with global addresses on the VRF device and none
on the enslaved devices.
Update the source address selection to consider the real output route as
opposed to the VRF route that sends packets to the VRF device first (ie.,
implement get_saddr6 similar to the IPv4 method) and update the IPv6
address selection to consider L3 domains and preference for addresses on
the VRF device).
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/vrf.c | 41 | ||||
-rw-r--r-- | include/net/ip6_route.h | 21 | ||||
-rw-r--r-- | include/net/l3mdev.h | 42 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 48 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 12 | ||||
-rw-r--r-- | net/ipv6/route.c | 17 | ||||
-rw-r--r-- | net/l3mdev/l3mdev.c | 24 |
7 files changed, 183 insertions, 22 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 32173aa9208e..b3762822b653 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -999,6 +999,46 @@ static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, return dst; } + +/* called under rcu_read_lock */ +static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk, + struct flowi6 *fl6) +{ + struct net *net = dev_net(dev); + struct dst_entry *dst; + struct rt6_info *rt; + int err; + + if (rt6_need_strict(&fl6->daddr)) { + rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, + RT6_LOOKUP_F_IFACE); + if (unlikely(!rt)) + return 0; + + dst = &rt->dst; + } else { + __u8 flags = fl6->flowi6_flags; + + fl6->flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; + fl6->flowi6_flags |= FLOWI_FLAG_SKIP_NH_OIF; + + dst = ip6_route_output(net, sk, fl6); + rt = (struct rt6_info *)dst; + + fl6->flowi6_flags = flags; + } + + err = dst->error; + if (!err) { + err = ip6_route_get_saddr(net, rt, &fl6->daddr, + sk ? inet6_sk(sk)->srcprefs : 0, + &fl6->saddr); + } + + dst_release(dst); + + return err; +} #endif static const struct l3mdev_ops vrf_l3mdev_ops = { @@ -1008,6 +1048,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_l3_rcv = vrf_l3_rcv, #if IS_ENABLED(CONFIG_IPV6) .l3mdev_get_rt6_dst = vrf_get_rt6_dst, + .l3mdev_get_saddr6 = vrf_get_saddr6, #endif }; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f55bf3d294aa..d97305d0e71f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -18,6 +18,7 @@ struct route_info { __u8 prefix[0]; /* 0,8 or 16 */ }; +#include <net/addrconf.h> #include <net/flow.h> #include <net/ip6_fib.h> #include <net/sock.h> @@ -88,9 +89,23 @@ int ip6_route_add(struct fib6_config *cfg); int ip6_ins_rt(struct rt6_info *); int ip6_del_rt(struct rt6_info *); -int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, - const struct in6_addr *daddr, unsigned int prefs, - struct in6_addr *saddr); +static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, + const struct in6_addr *daddr, + unsigned int prefs, + struct in6_addr *saddr) +{ + struct inet6_dev *idev = + rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL; + int err = 0; + + if (rt && rt->rt6i_prefsrc.plen) + *saddr = rt->rt6i_prefsrc.addr; + else + err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, + daddr, prefs, saddr); + + return err; +} struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int flags); diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index f8a416ec674c..e90095091aa0 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -39,6 +39,9 @@ struct l3mdev_ops { /* IPv6 ops */ struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev, struct flowi6 *fl6); + int (*l3mdev_get_saddr6)(struct net_device *dev, + const struct sock *sk, + struct flowi6 *fl6); }; #ifdef CONFIG_NET_L3_MASTER_DEV @@ -76,6 +79,31 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) return rc; } +static inline +const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) +{ + /* netdev_master_upper_dev_get_rcu calls + * list_first_or_null_rcu to walk the upper dev list. + * list_first_or_null_rcu does not handle a const arg. We aren't + * making changes, just want the master device from that list so + * typecast to remove the const + */ + struct net_device *dev = (struct net_device *)_dev; + const struct net_device *master; + + if (!dev) + return NULL; + + if (netif_is_l3_master(dev)) + master = dev; + else if (netif_is_l3_slave(dev)) + master = netdev_master_upper_dev_get_rcu(dev); + else + master = NULL; + + return master; +} + /* get index of an interface to use for FIB lookups. For devices * enslaved to an L3 master device FIB lookups are based on the * master index @@ -140,6 +168,8 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4); struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6); +int l3mdev_get_saddr6(struct net *net, const struct sock *sk, + struct flowi6 *fl6); static inline struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) @@ -185,6 +215,12 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) return 0; } +static inline +const struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) +{ + return NULL; +} + static inline int l3mdev_fib_oif_rcu(struct net_device *dev) { return dev ? dev->ifindex : 0; @@ -230,6 +266,12 @@ struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6) return NULL; } +static inline int l3mdev_get_saddr6(struct net *net, const struct sock *sk, + struct flowi6 *fl6) +{ + return 0; +} + static inline struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6c8fc3f96b11..a1f6b7b31531 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1524,6 +1524,28 @@ out: return hiscore_idx; } +static int ipv6_get_saddr_master(struct net *net, + const struct net_device *dst_dev, + const struct net_device *master, + struct ipv6_saddr_dst *dst, + struct ipv6_saddr_score *scores, + int hiscore_idx) +{ + struct inet6_dev *idev; + + idev = __in6_dev_get(dst_dev); + if (idev) + hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev, + scores, hiscore_idx); + + idev = __in6_dev_get(master); + if (idev) + hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev, + scores, hiscore_idx); + + return hiscore_idx; +} + int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, const struct in6_addr *daddr, unsigned int prefs, struct in6_addr *saddr) @@ -1577,13 +1599,39 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, if (idev) hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx); } else { + const struct net_device *master; + int master_idx = 0; + + /* if dst_dev exists and is enslaved to an L3 device, then + * prefer addresses from dst_dev and then the master over + * any other enslaved devices in the L3 domain. + */ + master = l3mdev_master_dev_rcu(dst_dev); + if (master) { + master_idx = master->ifindex; + + hiscore_idx = ipv6_get_saddr_master(net, dst_dev, + master, &dst, + scores, hiscore_idx); + + if (scores[hiscore_idx].ifa) + goto out; + } + for_each_netdev_rcu(net, dev) { + /* only consider addresses on devices in the + * same L3 domain + */ + if (l3mdev_master_ifindex_rcu(dev) != master_idx) + continue; idev = __in6_dev_get(dev); if (!idev) continue; hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx); } } + +out: rcu_read_unlock(); hiscore = &scores[hiscore_idx]; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index fd3217579b65..1dfc402d9ad1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -910,6 +910,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, int err; int flags = 0; + if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif && + (!*dst || !(*dst)->error)) { + err = l3mdev_get_saddr6(net, sk, fl6); + if (err) + goto out_err; + } + /* The correct way to handle this would be to do * ip6_route_get_saddr, and then ip6_route_output; however, * the route-specific preferred source forces the @@ -999,10 +1006,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, return 0; out_err_release: - if (err == -ENETUNREACH) - IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); dst_release(*dst); *dst = NULL; +out_err: + if (err == -ENETUNREACH) + IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); return err; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9e1516785dac..08b77f421268 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2586,23 +2586,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, return rt; } -int ip6_route_get_saddr(struct net *net, - struct rt6_info *rt, - const struct in6_addr *daddr, - unsigned int prefs, - struct in6_addr *saddr) -{ - struct inet6_dev *idev = - rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL; - int err = 0; - if (rt && rt->rt6i_prefsrc.plen) - *saddr = rt->rt6i_prefsrc.addr; - else - err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, - daddr, prefs, saddr); - return err; -} - /* remove deleted ip from prefsrc entries */ struct arg_dev_net_ip { struct net_device *dev; diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index d90e4ef09e85..c4a1c3e84e12 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -162,6 +162,30 @@ int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4) } EXPORT_SYMBOL_GPL(l3mdev_get_saddr); +int l3mdev_get_saddr6(struct net *net, const struct sock *sk, + struct flowi6 *fl6) +{ + struct net_device *dev; + int rc = 0; + + if (fl6->flowi6_oif) { + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); + if (dev && netif_is_l3_slave(dev)) + dev = netdev_master_upper_dev_get_rcu(dev); + + if (dev && netif_is_l3_master(dev) && + dev->l3mdev_ops->l3mdev_get_saddr6) + rc = dev->l3mdev_ops->l3mdev_get_saddr6(dev, sk, fl6); + + rcu_read_unlock(); + } + + return rc; +} +EXPORT_SYMBOL_GPL(l3mdev_get_saddr6); + /** * l3mdev_fib_rule_match - Determine if flowi references an * L3 master device |