diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 229 |
1 files changed, 140 insertions, 89 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6fdf1c195d8e..11ddc276776e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -434,37 +434,46 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr) { + const struct rtable *rt = container_of(dst, struct rtable, dst); struct net_device *dev = dst->dev; - const __be32 *pkey = daddr; - const struct rtable *rt; struct neighbour *n; - rt = (const struct rtable *) dst; - if (rt->rt_gateway) - pkey = (const __be32 *) &rt->rt_gateway; - else if (skb) - pkey = &ip_hdr(skb)->daddr; + rcu_read_lock_bh(); + + if (likely(rt->rt_gw_family == AF_INET)) { + n = ip_neigh_gw4(dev, rt->rt_gw4); + } else if (rt->rt_gw_family == AF_INET6) { + n = ip_neigh_gw6(dev, &rt->rt_gw6); + } else { + __be32 pkey; + + pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr); + n = ip_neigh_gw4(dev, pkey); + } + + if (n && !refcount_inc_not_zero(&n->refcnt)) + n = NULL; + + rcu_read_unlock_bh(); - n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey); - if (n) - return n; - return neigh_create(&arp_tbl, pkey, dev); + return n; } static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) { + const struct rtable *rt = container_of(dst, struct rtable, dst); struct net_device *dev = dst->dev; const __be32 *pkey = daddr; - const struct rtable *rt; - rt = (const struct rtable *)dst; - if (rt->rt_gateway) - pkey = (const __be32 *)&rt->rt_gateway; - else if (!daddr || + if (rt->rt_gw_family == AF_INET) { + pkey = (const __be32 *)&rt->rt_gw4; + } else if (rt->rt_gw_family == AF_INET6) { + return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6); + } else if (!daddr || (rt->rt_flags & - (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) + (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) { return; - + } __ipv4_confirm_neigh(dev, *(__force u32 *)pkey); } @@ -500,15 +509,17 @@ EXPORT_SYMBOL(ip_idents_reserve); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) { - static u32 ip_idents_hashrnd __read_mostly; u32 hash, id; - net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); + /* Note the following code is not safe, but this is okay. */ + if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) + get_random_bytes(&net->ipv4.ip_id_key, + sizeof(net->ipv4.ip_id_key)); - hash = jhash_3words((__force u32)iph->daddr, + hash = siphash_3u32((__force u32)iph->daddr, (__force u32)iph->saddr, - iph->protocol ^ net_hash_mix(net), - ip_idents_hashrnd); + iph->protocol, + &net->ipv4.ip_id_key); id = ip_idents_reserve(hash, segs); iph->id = htons(id); } @@ -627,13 +638,14 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh if (fnhe->fnhe_gw) { rt->rt_flags |= RTCF_REDIRECTED; - rt->rt_gateway = fnhe->fnhe_gw; - rt->rt_uses_gateway = 1; + rt->rt_gw_family = AF_INET; + rt->rt_gw4 = fnhe->fnhe_gw; } } -static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, - u32 pmtu, bool lock, unsigned long expires) +static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr, + __be32 gw, u32 pmtu, bool lock, + unsigned long expires) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; @@ -642,17 +654,17 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, unsigned int i; int depth; - genid = fnhe_genid(dev_net(nh->nh_dev)); + genid = fnhe_genid(dev_net(nhc->nhc_dev)); hval = fnhe_hashfun(daddr); spin_lock_bh(&fnhe_lock); - hash = rcu_dereference(nh->nh_exceptions); + hash = rcu_dereference(nhc->nhc_exceptions); if (!hash) { hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC); if (!hash) goto out_unlock; - rcu_assign_pointer(nh->nh_exceptions, hash); + rcu_assign_pointer(nhc->nhc_exceptions, hash); } hash += hval; @@ -704,13 +716,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, * stale, so anyone caching it rechecks if this exception * applies to them. */ - rt = rcu_dereference(nh->nh_rth_input); + rt = rcu_dereference(nhc->nhc_rth_input); if (rt) rt->dst.obsolete = DST_OBSOLETE_KILL; for_each_possible_cpu(i) { struct rtable __rcu **prt; - prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i); + prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i); rt = rcu_dereference(*prt); if (rt) rt->dst.obsolete = DST_OBSOLETE_KILL; @@ -745,7 +757,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow return; } - if (rt->rt_gateway != old_gw) + if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw) return; in_dev = __in_dev_get_rcu(dev); @@ -776,9 +788,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow neigh_event_send(n, NULL); } else { if (fib_lookup(net, fl4, &res, 0) == 0) { - struct fib_nh *nh = &FIB_RES_NH(res); + struct fib_nh_common *nhc = FIB_RES_NHC(res); - update_or_create_fnhe(nh, fl4->daddr, new_gw, + update_or_create_fnhe(nhc, fl4->daddr, new_gw, 0, false, jiffies + ip_rt_gc_timeout); } @@ -1025,9 +1037,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) rcu_read_lock(); if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { - struct fib_nh *nh = &FIB_RES_NH(res); + struct fib_nh_common *nhc = FIB_RES_NHC(res); - update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock, + update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + ip_rt_mtu_expires); } rcu_read_unlock(); @@ -1176,7 +1188,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) * * When a PMTU/redirect information update invalidates a route, * this is indicated by setting obsolete to DST_OBSOLETE_KILL or - * DST_OBSOLETE_DEAD by dst_free(). + * DST_OBSOLETE_DEAD. */ if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt)) return NULL; @@ -1261,7 +1273,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) rcu_read_lock(); if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0) - src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); + src = fib_result_prefsrc(dev_net(rt->dst.dev), &res); else src = inet_select_addr(rt->dst.dev, rt_nexthop(rt, iph->daddr), @@ -1304,7 +1316,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = READ_ONCE(dst->dev->mtu); if (unlikely(ip_mtu_locked(dst))) { - if (rt->rt_uses_gateway && mtu > 576) + if (rt->rt_gw_family && mtu > 576) mtu = 576; } @@ -1313,7 +1325,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } -static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) +static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe, __rcu **fnhe_p; @@ -1321,7 +1333,7 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) spin_lock_bh(&fnhe_lock); - hash = rcu_dereference_protected(nh->nh_exceptions, + hash = rcu_dereference_protected(nhc->nhc_exceptions, lockdep_is_held(&fnhe_lock)); hash += hval; @@ -1347,9 +1359,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) spin_unlock_bh(&fnhe_lock); } -static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) +static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc, + __be32 daddr) { - struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions); + struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions); struct fib_nh_exception *fnhe; u32 hval; @@ -1363,7 +1376,7 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) if (fnhe->fnhe_daddr == daddr) { if (fnhe->fnhe_expires && time_after(jiffies, fnhe->fnhe_expires)) { - ip_del_fnhe(nh, daddr); + ip_del_fnhe(nhc, daddr); break; } return fnhe; @@ -1380,9 +1393,9 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) { + struct fib_nh_common *nhc = res->nhc; + struct net_device *dev = nhc->nhc_dev; struct fib_info *fi = res->fi; - struct fib_nh *nh = &fi->fib_nh[res->nh_sel]; - struct net_device *dev = nh->nh_dev; u32 mtu = 0; if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu || @@ -1392,7 +1405,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) if (likely(!mtu)) { struct fib_nh_exception *fnhe; - fnhe = find_exception(nh, daddr); + fnhe = find_exception(nhc, daddr); if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires)) mtu = fnhe->fnhe_pmtu; } @@ -1400,7 +1413,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) if (likely(!mtu)) mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU); - return mtu - lwtunnel_headroom(nh->nh_lwtstate, mtu); + return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu); } static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, @@ -1431,8 +1444,10 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, orig = NULL; } fill_route_from_fnhe(rt, fnhe); - if (!rt->rt_gateway) - rt->rt_gateway = daddr; + if (!rt->rt_gw4) { + rt->rt_gw4 = daddr; + rt->rt_gw_family = AF_INET; + } if (do_cache) { dst_hold(&rt->dst); @@ -1451,15 +1466,15 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, return ret; } -static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) +static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt) { struct rtable *orig, *prev, **p; bool ret = true; if (rt_is_input_route(rt)) { - p = (struct rtable **)&nh->nh_rth_input; + p = (struct rtable **)&nhc->nhc_rth_input; } else { - p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output); + p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output); } orig = *p; @@ -1555,30 +1570,42 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, bool cached = false; if (fi) { - struct fib_nh *nh = &FIB_RES_NH(*res); + struct fib_nh_common *nhc = FIB_RES_NHC(*res); - if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) { - rt->rt_gateway = nh->nh_gw; - rt->rt_uses_gateway = 1; + if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) { + rt->rt_gw_family = nhc->nhc_gw_family; + /* only INET and INET6 are supported */ + if (likely(nhc->nhc_gw_family == AF_INET)) + rt->rt_gw4 = nhc->nhc_gw.ipv4; + else + rt->rt_gw6 = nhc->nhc_gw.ipv6; } + ip_dst_init_metrics(&rt->dst, fi->fib_metrics); #ifdef CONFIG_IP_ROUTE_CLASSID - rt->dst.tclassid = nh->nh_tclassid; + { + struct fib_nh *nh; + + nh = container_of(nhc, struct fib_nh, nh_common); + rt->dst.tclassid = nh->nh_tclassid; + } #endif - rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate); + rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); if (unlikely(fnhe)) cached = rt_bind_exception(rt, fnhe, daddr, do_cache); else if (do_cache) - cached = rt_cache_route(nh, rt); + cached = rt_cache_route(nhc, rt); if (unlikely(!cached)) { /* Routes we intend to cache in nexthop exception or * FIB nexthop have the DST_NOCACHE bit clear. * However, if we are unsuccessful at storing this * route into the cache we really need to set it. */ - if (!rt->rt_gateway) - rt->rt_gateway = daddr; + if (!rt->rt_gw4) { + rt->rt_gw_family = AF_INET; + rt->rt_gw4 = daddr; + } rt_add_uncached_list(rt); } } else @@ -1611,8 +1638,8 @@ struct rtable *rt_dst_alloc(struct net_device *dev, rt->rt_iif = 0; rt->rt_pmtu = 0; rt->rt_mtu_locked = 0; - rt->rt_gateway = 0; - rt->rt_uses_gateway = 0; + rt->rt_gw_family = 0; + rt->rt_gw4 = 0; INIT_LIST_HEAD(&rt->rt_uncached); rt->dst.output = ip_output; @@ -1725,6 +1752,8 @@ static int __mkroute_input(struct sk_buff *skb, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) { + struct fib_nh_common *nhc = FIB_RES_NHC(*res); + struct net_device *dev = nhc->nhc_dev; struct fib_nh_exception *fnhe; struct rtable *rth; int err; @@ -1733,7 +1762,7 @@ static int __mkroute_input(struct sk_buff *skb, u32 itag = 0; /* get a working reference to the output device */ - out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); + out_dev = __in_dev_get_rcu(dev); if (!out_dev) { net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n"); return -EINVAL; @@ -1750,10 +1779,14 @@ static int __mkroute_input(struct sk_buff *skb, do_cache = res->fi && !itag; if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && - skb->protocol == htons(ETH_P_IP) && - (IN_DEV_SHARED_MEDIA(out_dev) || - inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) - IPCB(skb)->flags |= IPSKB_DOREDIRECT; + skb->protocol == htons(ETH_P_IP)) { + __be32 gw; + + gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0; + if (IN_DEV_SHARED_MEDIA(out_dev) || + inet_addr_onlink(out_dev, saddr, gw)) + IPCB(skb)->flags |= IPSKB_DOREDIRECT; + } if (skb->protocol != htons(ETH_P_IP)) { /* Not IP (i.e. ARP). Do not create route, if it is @@ -1770,12 +1803,12 @@ static int __mkroute_input(struct sk_buff *skb, } } - fnhe = find_exception(&FIB_RES_NH(*res), daddr); + fnhe = find_exception(nhc, daddr); if (do_cache) { if (fnhe) rth = rcu_dereference(fnhe->fnhe_rth_input); else - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + rth = rcu_dereference(nhc->nhc_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -2069,7 +2102,9 @@ local_input: do_cache = false; if (res->fi) { if (!itag) { - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + struct fib_nh_common *nhc = FIB_RES_NHC(*res); + + rth = rcu_dereference(nhc->nhc_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); err = 0; @@ -2099,16 +2134,16 @@ local_input: } if (do_cache) { - struct fib_nh *nh = &FIB_RES_NH(*res); + struct fib_nh_common *nhc = FIB_RES_NHC(*res); - rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate); + rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); if (lwtunnel_input_redirect(rth->dst.lwtstate)) { WARN_ON(rth->dst.input == lwtunnel_input); rth->dst.lwtstate->orig_input = rth->dst.input; rth->dst.input = lwtunnel_input; } - if (unlikely(!rt_cache_route(nh, rth))) + if (unlikely(!rt_cache_route(nhc, rth))) rt_add_uncached_list(rth); } skb_dst_set(skb, &rth->dst); @@ -2279,10 +2314,10 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fnhe = NULL; do_cache &= fi != NULL; if (fi) { + struct fib_nh_common *nhc = FIB_RES_NHC(*res); struct rtable __rcu **prth; - struct fib_nh *nh = &FIB_RES_NH(*res); - fnhe = find_exception(nh, fl4->daddr); + fnhe = find_exception(nhc, fl4->daddr); if (!do_cache) goto add; if (fnhe) { @@ -2290,12 +2325,12 @@ static struct rtable *__mkroute_output(const struct fib_result *res, } else { if (unlikely(fl4->flowi4_flags & FLOWI_FLAG_KNOWN_NH && - !(nh->nh_gw && - nh->nh_scope == RT_SCOPE_LINK))) { + !(nhc->nhc_gw_family && + nhc->nhc_scope == RT_SCOPE_LINK))) { do_cache = false; goto add; } - prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); + prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output); } rth = rcu_dereference(*prth); if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst)) @@ -2600,8 +2635,11 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_genid = rt_genid_ipv4(net); rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; - rt->rt_gateway = ort->rt_gateway; - rt->rt_uses_gateway = ort->rt_uses_gateway; + rt->rt_gw_family = ort->rt_gw_family; + if (rt->rt_gw_family == AF_INET) + rt->rt_gw4 = ort->rt_gw4; + else if (rt->rt_gw_family == AF_INET6) + rt->rt_gw6 = ort->rt_gw6; INIT_LIST_HEAD(&rt->rt_uncached); } @@ -2680,9 +2718,22 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr)) goto nla_put_failure; } - if (rt->rt_uses_gateway && - nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway)) + if (rt->rt_gw_family == AF_INET && + nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) { goto nla_put_failure; + } else if (rt->rt_gw_family == AF_INET6) { + int alen = sizeof(struct in6_addr); + struct nlattr *nla; + struct rtvia *via; + + nla = nla_reserve(skb, RTA_VIA, alen + 2); + if (!nla) + goto nla_put_failure; + + via = nla_data(nla); + via->rtvia_family = AF_INET6; + memcpy(via->rtvia_addr, &rt->rt_gw6, alen); + } expires = rt->dst.expires; if (expires) { @@ -2819,8 +2870,8 @@ static int inet_rtm_valid_getroute_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv4_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); rtm = nlmsg_data(nlh); if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || @@ -2838,8 +2889,8 @@ static int inet_rtm_valid_getroute_req(struct sk_buff *skb, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv4_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); if (err) return err; |