summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c27
-rw-r--r--net/ipv6/addrconf_core.c40
-rw-r--r--net/ipv6/af_inet6.c24
-rw-r--r--net/ipv6/fib6_rules.c53
-rw-r--r--net/ipv6/icmp.c59
-rw-r--r--net/ipv6/ila/ila_main.c5
-rw-r--r--net/ipv6/ip6_fib.c50
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ip6mr.c1
-rw-r--r--net/ipv6/ndisc.c25
-rw-r--r--net/ipv6/netfilter/Kconfig19
-rw-r--r--net/ipv6/netfilter/Makefile2
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c81
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c91
-rw-r--r--net/ipv6/output_core.c30
-rw-r--r--net/ipv6/raw.c1
-rw-r--r--net/ipv6/route.c1139
-rw-r--r--net/ipv6/seg6.c5
-rw-r--r--net/ipv6/tcp_ipv6.c57
-rw-r--r--net/ipv6/udp.c10
20 files changed, 828 insertions, 893 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4ae17a966ae3..340a0f06f974 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -173,7 +173,8 @@ static int addrconf_ifdown(struct net_device *dev, int how);
static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
int plen,
const struct net_device *dev,
- u32 flags, u32 noflags);
+ u32 flags, u32 noflags,
+ bool no_gw);
static void addrconf_dad_start(struct inet6_ifaddr *ifp);
static void addrconf_dad_work(struct work_struct *w);
@@ -1230,10 +1231,8 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_r
{
struct fib6_info *f6i;
- f6i = addrconf_get_prefix_route(&ifp->addr,
- ifp->prefix_len,
- ifp->idev->dev,
- 0, RTF_GATEWAY | RTF_DEFAULT);
+ f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len,
+ ifp->idev->dev, 0, RTF_DEFAULT, true);
if (f6i) {
if (del_rt)
ip6_del_rt(dev_net(ifp->idev->dev), f6i);
@@ -2402,7 +2401,8 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, u32 metric,
static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
int plen,
const struct net_device *dev,
- u32 flags, u32 noflags)
+ u32 flags, u32 noflags,
+ bool no_gw)
{
struct fib6_node *fn;
struct fib6_info *rt = NULL;
@@ -2419,7 +2419,9 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
goto out;
for_each_fib6_node_rt_rcu(fn) {
- if (rt->fib6_nh.nh_dev->ifindex != dev->ifindex)
+ if (rt->fib6_nh.fib_nh_dev->ifindex != dev->ifindex)
+ continue;
+ if (no_gw && rt->fib6_nh.fib_nh_gw_family)
continue;
if ((rt->fib6_flags & flags) != flags)
continue;
@@ -2717,7 +2719,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
pinfo->prefix_len,
dev,
RTF_ADDRCONF | RTF_PREFIX_RT,
- RTF_GATEWAY | RTF_DEFAULT);
+ RTF_DEFAULT, true);
if (rt) {
/* Autoconf prefix route */
@@ -4588,10 +4590,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
struct fib6_info *f6i;
u32 prio;
- f6i = addrconf_get_prefix_route(&ifp->addr,
- ifp->prefix_len,
- ifp->idev->dev,
- 0, RTF_GATEWAY | RTF_DEFAULT);
+ f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len,
+ ifp->idev->dev, 0, RTF_DEFAULT, true);
if (!f6i)
return -ENOENT;
@@ -5972,7 +5972,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
struct fib6_info *rt;
rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
- ifp->idev->dev, 0, 0);
+ ifp->idev->dev, 0, 0,
+ false);
if (rt)
ip6_del_rt(net, rt);
}
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 6c79af056d9b..763a947e0d14 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -5,7 +5,7 @@
#include <linux/export.h>
#include <net/ipv6.h>
-#include <net/addrconf.h>
+#include <net/ipv6_stubs.h>
#include <net/ip.h>
/* if ipv6 module registers this function is used by xfrm to force all
@@ -144,43 +144,53 @@ static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id)
return NULL;
}
-static struct fib6_info *
+static int
eafnosupport_fib6_table_lookup(struct net *net, struct fib6_table *table,
- int oif, struct flowi6 *fl6, int flags)
+ int oif, struct flowi6 *fl6,
+ struct fib6_result *res, int flags)
{
- return NULL;
+ return -EAFNOSUPPORT;
}
-static struct fib6_info *
+static int
eafnosupport_fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
- int flags)
+ struct fib6_result *res, int flags)
{
- return NULL;
+ return -EAFNOSUPPORT;
}
-static struct fib6_info *
-eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i,
- struct flowi6 *fl6, int oif,
- const struct sk_buff *skb, int strict)
+static void
+eafnosupport_fib6_select_path(const struct net *net, struct fib6_result *res,
+ struct flowi6 *fl6, int oif, bool have_oif_match,
+ const struct sk_buff *skb, int strict)
{
- return f6i;
}
static u32
-eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
- struct in6_addr *saddr)
+eafnosupport_ip6_mtu_from_fib6(const struct fib6_result *res,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
return 0;
}
+static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
+ struct fib6_config *cfg, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack)
+{
+ NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel");
+ return -EAFNOSUPPORT;
+}
+
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
.ipv6_route_input = eafnosupport_ipv6_route_input,
.fib6_get_table = eafnosupport_fib6_get_table,
.fib6_table_lookup = eafnosupport_fib6_table_lookup,
.fib6_lookup = eafnosupport_fib6_lookup,
- .fib6_multipath_select = eafnosupport_fib6_multipath_select,
+ .fib6_select_path = eafnosupport_fib6_select_path,
.ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
+ .fib6_nh_init = eafnosupport_fib6_nh_init,
};
EXPORT_SYMBOL_GPL(ipv6_stub);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 2f45d2a3e3a3..c04ae282f4e4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -56,6 +56,7 @@
#include <net/transp_v6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
+#include <net/ipv6_stubs.h>
#include <net/ndisc.h>
#ifdef CONFIG_IPV6_TUNNEL
#include <net/ip6_tunnel.h>
@@ -546,12 +547,6 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct net *net = sock_net(sk);
switch (cmd) {
- case SIOCGSTAMP:
- return sock_get_timestamp(sk, (struct timeval __user *)arg);
-
- case SIOCGSTAMPNS:
- return sock_get_timestampns(sk, (struct timespec __user *)arg);
-
case SIOCADDRT:
case SIOCDELRT:
@@ -584,6 +579,7 @@ const struct proto_ops inet6_stream_ops = {
.getname = inet6_getname,
.poll = tcp_poll, /* ok */
.ioctl = inet6_ioctl, /* must change */
+ .gettstamp = sock_gettstamp,
.listen = inet_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
.setsockopt = sock_common_setsockopt, /* ok */
@@ -617,6 +613,7 @@ const struct proto_ops inet6_dgram_ops = {
.getname = inet6_getname,
.poll = udp_poll, /* ok */
.ioctl = inet6_ioctl, /* must change */
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
.setsockopt = sock_common_setsockopt, /* ok */
@@ -847,6 +844,17 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.bindv6only = 0;
net->ipv6.sysctl.icmpv6_time = 1*HZ;
net->ipv6.sysctl.icmpv6_echo_ignore_all = 0;
+ net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0;
+ net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0;
+
+ /* By default, rate limit error messages.
+ * Except for pmtu discovery, it would break it.
+ * proc_do_large_bitmap needs pointer to the bitmap.
+ */
+ bitmap_set(net->ipv6.sysctl.icmpv6_ratemask, 0, ICMPV6_ERRMSG_MAX + 1);
+ bitmap_clear(net->ipv6.sysctl.icmpv6_ratemask, ICMPV6_PKT_TOOBIG, 1);
+ net->ipv6.sysctl.icmpv6_ratemask_ptr = net->ipv6.sysctl.icmpv6_ratemask;
+
net->ipv6.sysctl.flowlabel_consistency = 1;
net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS;
net->ipv6.sysctl.idgen_retries = 3;
@@ -914,8 +922,10 @@ static const struct ipv6_stub ipv6_stub_impl = {
.fib6_get_table = fib6_get_table,
.fib6_table_lookup = fib6_table_lookup,
.fib6_lookup = fib6_lookup,
- .fib6_multipath_select = fib6_multipath_select,
+ .fib6_select_path = fib6_select_path,
.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
+ .fib6_nh_init = fib6_nh_init,
+ .fib6_nh_release = fib6_nh_release,
.udpv6_encap_enable = udpv6_encap_enable,
.ndisc_send_na = ndisc_send_na,
.nd_tbl = &nd_tbl,
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index f590446595d8..06d1b7763600 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -61,16 +61,16 @@ unsigned int fib6_rules_seq_read(struct net *net)
}
/* called with rcu lock held; no reference taken on fib6_info */
-struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
- int flags)
+int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+ struct fib6_result *res, int flags)
{
- struct fib6_info *f6i;
int err;
if (net->ipv6.fib6_has_custom_rules) {
struct fib_lookup_arg arg = {
.lookup_ptr = fib6_table_lookup,
.lookup_data = &oif,
+ .result = res,
.flags = FIB_LOOKUP_NOREF,
};
@@ -78,19 +78,15 @@ struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
err = fib_rules_lookup(net->ipv6.fib6_rules_ops,
flowi6_to_flowi(fl6), flags, &arg);
- if (err)
- return ERR_PTR(err);
-
- f6i = arg.result ? : net->ipv6.fib6_null_entry;
} else {
- f6i = fib6_table_lookup(net, net->ipv6.fib6_local_tbl,
- oif, fl6, flags);
- if (!f6i || f6i == net->ipv6.fib6_null_entry)
- f6i = fib6_table_lookup(net, net->ipv6.fib6_main_tbl,
- oif, fl6, flags);
+ err = fib6_table_lookup(net, net->ipv6.fib6_local_tbl, oif,
+ fl6, res, flags);
+ if (err || res->f6i == net->ipv6.fib6_null_entry)
+ err = fib6_table_lookup(net, net->ipv6.fib6_main_tbl,
+ oif, fl6, res, flags);
}
- return f6i;
+ return err;
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
@@ -98,9 +94,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
int flags, pol_lookup_t lookup)
{
if (net->ipv6.fib6_has_custom_rules) {
+ struct fib6_result res = {};
struct fib_lookup_arg arg = {
.lookup_ptr = lookup,
.lookup_data = skb,
+ .result = &res,
.flags = FIB_LOOKUP_NOREF,
};
@@ -110,8 +108,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
fib_rules_lookup(net->ipv6.fib6_rules_ops,
flowi6_to_flowi(fl6), flags, &arg);
- if (arg.result)
- return arg.result;
+ if (res.rt6)
+ return &res.rt6->dst;
} else {
struct rt6_info *rt;
@@ -157,11 +155,11 @@ static int fib6_rule_saddr(struct net *net, struct fib_rule *rule, int flags,
static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
+ struct fib6_result *res = arg->result;
struct flowi6 *flp6 = &flp->u.ip6;
struct net *net = rule->fr_net;
struct fib6_table *table;
- struct fib6_info *f6i;
- int err = -EAGAIN, *oif;
+ int err, *oif;
u32 tb_id;
switch (rule->action) {
@@ -182,14 +180,12 @@ static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp,
return -EAGAIN;
oif = (int *)arg->lookup_data;
- f6i = fib6_table_lookup(net, table, *oif, flp6, flags);
- if (f6i != net->ipv6.fib6_null_entry) {
+ err = fib6_table_lookup(net, table, *oif, flp6, res, flags);
+ if (!err && res->f6i != net->ipv6.fib6_null_entry)
err = fib6_rule_saddr(net, rule, flags, flp6,
- fib6_info_nh_dev(f6i));
-
- if (likely(!err))
- arg->result = f6i;
- }
+ res->nh->fib_nh_dev);
+ else
+ err = -EAGAIN;
return err;
}
@@ -197,6 +193,7 @@ static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp,
static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
+ struct fib6_result *res = arg->result;
struct flowi6 *flp6 = &flp->u.ip6;
struct rt6_info *rt = NULL;
struct fib6_table *table;
@@ -251,7 +248,7 @@ again:
discard_pkt:
dst_hold(&rt->dst);
out:
- arg->result = rt;
+ res->rt6 = rt;
return err;
}
@@ -266,9 +263,13 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
{
- struct rt6_info *rt = (struct rt6_info *) arg->result;
+ struct fib6_result *res = arg->result;
+ struct rt6_info *rt = res->rt6;
struct net_device *dev = NULL;
+ if (!rt)
+ return false;
+
if (rt->rt6i_idev)
dev = rt->rt6i_idev->dev;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 802faa2fcc0e..afb915807cd0 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -168,22 +168,21 @@ static bool is_ineligible(const struct sk_buff *skb)
return false;
}
-static bool icmpv6_mask_allow(int type)
+static bool icmpv6_mask_allow(struct net *net, int type)
{
- /* Informational messages are not limited. */
- if (type & ICMPV6_INFOMSG_MASK)
+ if (type > ICMPV6_MSG_MAX)
return true;
- /* Do not limit pmtu discovery, it would break it. */
- if (type == ICMPV6_PKT_TOOBIG)
+ /* Limit if icmp type is set in ratemask. */
+ if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
return true;
return false;
}
-static bool icmpv6_global_allow(int type)
+static bool icmpv6_global_allow(struct net *net, int type)
{
- if (icmpv6_mask_allow(type))
+ if (icmpv6_mask_allow(net, type))
return true;
if (icmp_global_allow())
@@ -202,7 +201,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
struct dst_entry *dst;
bool res = false;
- if (icmpv6_mask_allow(type))
+ if (icmpv6_mask_allow(net, type))
return true;
/*
@@ -511,7 +510,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit */
- if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
+ if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
goto out_bh_enable;
mip6_addr_swap(skb);
@@ -683,12 +682,20 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
struct dst_entry *dst;
struct ipcm6_cookie ipc6;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
+ bool acast;
+
+ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
+ net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
+ return;
saddr = &ipv6_hdr(skb)->daddr;
+ acast = ipv6_anycast_destination(skb_dst(skb), saddr);
+ if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
+ return;
+
if (!ipv6_unicast_destination(skb) &&
- !(net->ipv6.sysctl.anycast_src_echo_reply &&
- ipv6_anycast_destination(skb_dst(skb), saddr)))
+ !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
saddr = NULL;
memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
@@ -723,6 +730,11 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
if (IS_ERR(dst))
goto out;
+ /* Check the ratelimit */
+ if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
+ !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
+ goto out_dst_release;
+
idev = __in6_dev_get(skb->dev);
msg.skb = skb;
@@ -743,6 +755,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
skb->len + sizeof(struct icmp6hdr));
}
+out_dst_release:
dst_release(dst);
out:
icmpv6_xmit_unlock(sk);
@@ -1115,6 +1128,27 @@ static struct ctl_table ipv6_icmp_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "echo_ignore_multicast",
+ .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "echo_ignore_anycast",
+ .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "ratemask",
+ .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
+ .maxlen = ICMPV6_MSG_MAX + 1,
+ .mode = 0644,
+ .proc_handler = proc_do_large_bitmap,
+ },
{ },
};
@@ -1129,6 +1163,9 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
if (table) {
table[0].data = &net->ipv6.sysctl.icmpv6_time;
table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
+ table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
+ table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
+ table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
}
return table;
}
diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c
index 18fac76b9520..8d31a5066d0c 100644
--- a/net/ipv6/ila/ila_main.c
+++ b/net/ipv6/ila/ila_main.c
@@ -17,19 +17,16 @@ static const struct genl_ops ila_nl_ops[] = {
{
.cmd = ILA_CMD_ADD,
.doit = ila_xlat_nl_cmd_add_mapping,
- .policy = ila_nl_policy,
.flags = GENL_ADMIN_PERM,
},
{
.cmd = ILA_CMD_DEL,
.doit = ila_xlat_nl_cmd_del_mapping,
- .policy = ila_nl_policy,
.flags = GENL_ADMIN_PERM,
},
{
.cmd = ILA_CMD_FLUSH,
.doit = ila_xlat_nl_cmd_flush,
- .policy = ila_nl_policy,
.flags = GENL_ADMIN_PERM,
},
{
@@ -38,7 +35,6 @@ static const struct genl_ops ila_nl_ops[] = {
.start = ila_xlat_nl_dump_start,
.dumpit = ila_xlat_nl_dump,
.done = ila_xlat_nl_dump_done,
- .policy = ila_nl_policy,
},
};
@@ -49,6 +45,7 @@ struct genl_family ila_nl_family __ro_after_init = {
.name = ILA_GENL_NAME,
.version = ILA_GENL_VERSION,
.maxattr = ILA_ATTR_MAX,
+ .policy = ila_nl_policy,
.netnsok = true,
.parallel_ops = true,
.module = THIS_MODULE,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 6613d8dbb0e5..a8919c217cc2 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -162,7 +162,7 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
}
INIT_LIST_HEAD(&f6i->fib6_siblings);
- atomic_inc(&f6i->fib6_ref);
+ refcount_set(&f6i->fib6_ref, 1);
return f6i;
}
@@ -175,10 +175,7 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
WARN_ON(f6i->fib6_node);
bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
- if (bucket) {
- f6i->rt6i_exception_bucket = NULL;
- kfree(bucket);
- }
+ kfree(bucket);
if (f6i->rt6i_pcpu) {
int cpu;
@@ -199,10 +196,7 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
free_percpu(f6i->rt6i_pcpu);
}
- lwtstate_put(f6i->fib6_nh.nh_lwtstate);
-
- if (f6i->fib6_nh.nh_dev)
- dev_put(f6i->fib6_nh.nh_dev);
+ fib6_nh_release(&f6i->fib6_nh);
ip_fib_metrics_put(f6i->fib6_metrics);
@@ -357,10 +351,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
}
/* called with rcu lock held; no reference taken on fib6_info */
-struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
- int flags)
+int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+ struct fib6_result *res, int flags)
{
- return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, flags);
+ return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6,
+ res, flags);
}
static void __net_init fib6_tables_init(struct net *net)
@@ -851,8 +846,8 @@ insert_above:
RCU_INIT_POINTER(in->parent, pn);
in->leaf = fn->leaf;
- atomic_inc(&rcu_dereference_protected(in->leaf,
- lockdep_is_held(&table->tb6_lock))->fib6_ref);
+ fib6_info_hold(rcu_dereference_protected(in->leaf,
+ lockdep_is_held(&table->tb6_lock)));
/* update parent pointer */
if (dir)
@@ -934,7 +929,7 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
{
struct fib6_table *table = rt->fib6_table;
- if (atomic_read(&rt->fib6_ref) != 1) {
+ if (refcount_read(&rt->fib6_ref) != 1) {
/* This route is used as dummy address holder in some split
* nodes. It is not leaked, but it still holds other resources,
* which must be released in time. So, scan ascendant nodes
@@ -947,7 +942,7 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
struct fib6_info *new_leaf;
if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
new_leaf = fib6_find_prefix(net, table, fn);
- atomic_inc(&new_leaf->fib6_ref);
+ fib6_info_hold(new_leaf);
rcu_assign_pointer(fn->leaf, new_leaf);
fib6_info_release(rt);
@@ -1113,7 +1108,7 @@ add:
return err;
rcu_assign_pointer(rt->fib6_next, iter);
- atomic_inc(&rt->fib6_ref);
+ fib6_info_hold(rt);
rcu_assign_pointer(rt->fib6_node, fn);
rcu_assign_pointer(*ins, rt);
if (!info->skip_notify)
@@ -1141,7 +1136,7 @@ add:
if (err)
return err;
- atomic_inc(&rt->fib6_ref);
+ fib6_info_hold(rt);
rcu_assign_pointer(rt->fib6_node, fn);
rt->fib6_next = iter->fib6_next;
rcu_assign_pointer(*ins, rt);
@@ -1283,7 +1278,7 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
if (!sfn)
goto failure;
- atomic_inc(&info->nl_net->ipv6.fib6_null_entry->fib6_ref);
+ fib6_info_hold(info->nl_net->ipv6.fib6_null_entry);
rcu_assign_pointer(sfn->leaf,
info->nl_net->ipv6.fib6_null_entry);
sfn->fn_flags = RTN_ROOT;
@@ -1326,7 +1321,7 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
rcu_assign_pointer(fn->leaf,
info->nl_net->ipv6.fib6_null_entry);
} else {
- atomic_inc(&rt->fib6_ref);
+ fib6_info_hold(rt);
rcu_assign_pointer(fn->leaf, rt);
}
}
@@ -2297,6 +2292,7 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
{
struct fib6_info *rt = v;
struct ipv6_route_iter *iter = seq->private;
+ unsigned int flags = rt->fib6_flags;
const struct net_device *dev;
seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
@@ -2306,15 +2302,17 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
#else
seq_puts(seq, "00000000000000000000000000000000 00 ");
#endif
- if (rt->fib6_flags & RTF_GATEWAY)
- seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw);
- else
+ if (rt->fib6_nh.fib_nh_gw_family) {
+ flags |= RTF_GATEWAY;
+ seq_printf(seq, "%pi6", &rt->fib6_nh.fib_nh_gw6);
+ } else {
seq_puts(seq, "00000000000000000000000000000000");
+ }
- dev = rt->fib6_nh.nh_dev;
+ dev = rt->fib6_nh.fib_nh_dev;
seq_printf(seq, " %08x %08x %08x %08x %8s\n",
- rt->fib6_metric, atomic_read(&rt->fib6_ref), 0,
- rt->fib6_flags, dev ? dev->name : "");
+ rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
+ flags, dev ? dev->name : "");
iter->w.leaf = NULL;
return 0;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e51f3c648b09..adef2236abe2 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -117,7 +117,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
- ret = neigh_output(neigh, skb);
+ ret = neigh_output(neigh, skb, false);
rcu_read_unlock_bh();
return ret;
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index e4dd57976737..4e69847ed5be 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -355,7 +355,6 @@ static const struct rhashtable_params ip6mr_rht_params = {
.key_offset = offsetof(struct mfc6_cache, cmparg),
.key_len = sizeof(struct mfc6_cache_cmp_arg),
.nelem_hint = 3,
- .locks_mul = 1,
.obj_cmpfn = ip6mr_hash_cmp,
.automatic_shrinking = true,
};
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 659ecf4e4b3c..4c8e2ea8bf19 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -77,6 +77,8 @@ static u32 ndisc_hash(const void *pkey,
const struct net_device *dev,
__u32 *hash_rnd);
static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey);
+static bool ndisc_allow_add(const struct net_device *dev,
+ struct netlink_ext_ack *extack);
static int ndisc_constructor(struct neighbour *neigh);
static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -117,6 +119,7 @@ struct neigh_table nd_tbl = {
.pconstructor = pndisc_constructor,
.pdestructor = pndisc_destructor,
.proxy_redo = pndisc_redo,
+ .allow_add = ndisc_allow_add,
.id = "ndisc_cache",
.parms = {
.tbl = &nd_tbl,
@@ -392,6 +395,20 @@ static void pndisc_destructor(struct pneigh_entry *n)
ipv6_dev_mc_dec(dev, &maddr);
}
+/* called with rtnl held */
+static bool ndisc_allow_add(const struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct inet6_dev *idev = __in6_dev_get(dev);
+
+ if (!idev || idev->cnf.disable_ipv6) {
+ NL_SET_ERR_MSG(extack, "IPv6 is disabled on this device");
+ return false;
+ }
+
+ return true;
+}
+
static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
int len)
{
@@ -1276,8 +1293,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
if (rt) {
- neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw,
- rt->fib6_nh.nh_dev, NULL,
+ neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6,
+ rt->fib6_nh.fib_nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
if (!neigh) {
ND_PRINTK(0, err,
@@ -1306,8 +1323,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
- neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw,
- rt->fib6_nh.nh_dev, NULL,
+ neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6,
+ rt->fib6_nh.fib_nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
if (!neigh) {
ND_PRINTK(0, err,
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index ddc99a1653aa..086fc669279e 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -23,14 +23,6 @@ config NF_TABLES_IPV6
if NF_TABLES_IPV6
-config NFT_CHAIN_ROUTE_IPV6
- tristate "IPv6 nf_tables route chain support"
- help
- This option enables the "route" chain for IPv6 in nf_tables. This
- chain type is used to force packet re-routing after mangling header
- fields such as the source, destination, flowlabel, hop-limit and
- the packet mark.
-
config NFT_REJECT_IPV6
select NF_REJECT_IPV6
default NFT_REJECT
@@ -278,15 +270,10 @@ if IP6_NF_NAT
config IP6_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
- select NF_NAT_MASQUERADE
+ select NETFILTER_XT_TARGET_MASQUERADE
help
- Masquerading is a special case of NAT: all outgoing connections are
- changed to seem to come from a particular interface's address, and
- if the interface goes down, those connections are lost. This is
- only useful for dialup accounts with dynamic IP address (ie. your IP
- address will be different on next dialup).
-
- To compile it as a module, choose M here. If unsure, say N.
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE.
config IP6_NF_TARGET_NPT
tristate "NPT (Network Prefix translation) target support"
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 3853c648ebaa..731a74c60dca 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -27,7 +27,6 @@ obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o
obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o
# nf_tables
-obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
@@ -47,7 +46,6 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
obj-$(CONFIG_IP6_NF_MATCH_SRH) += ip6t_srh.o
# targets
-obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o
obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
obj-$(CONFIG_IP6_NF_TARGET_SYNPROXY) += ip6t_SYNPROXY.o
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
deleted file mode 100644
index 29c7f1915a96..000000000000
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6
- * NAT funded by Astaro.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/ipv6.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/addrconf.h>
-#include <net/ipv6.h>
-#include <net/netfilter/ipv6/nf_nat_masquerade.h>
-
-static unsigned int
-masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
-{
- return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par));
-}
-
-static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
-{
- const struct nf_nat_range2 *range = par->targinfo;
-
- if (range->flags & NF_NAT_RANGE_MAP_IPS)
- return -EINVAL;
- return nf_ct_netns_get(par->net, par->family);
-}
-
-static void masquerade_tg6_destroy(const struct xt_tgdtor_param *par)
-{
- nf_ct_netns_put(par->net, par->family);
-}
-
-static struct xt_target masquerade_tg6_reg __read_mostly = {
- .name = "MASQUERADE",
- .family = NFPROTO_IPV6,
- .checkentry = masquerade_tg6_checkentry,
- .destroy = masquerade_tg6_destroy,
- .target = masquerade_tg6,
- .targetsize = sizeof(struct nf_nat_range),
- .table = "nat",
- .hooks = 1 << NF_INET_POST_ROUTING,
- .me = THIS_MODULE,
-};
-
-static int __init masquerade_tg6_init(void)
-{
- int err;
-
- err = xt_register_target(&masquerade_tg6_reg);
- if (err)
- return err;
-
- err = nf_nat_masquerade_ipv6_register_notifier();
- if (err)
- xt_unregister_target(&masquerade_tg6_reg);
-
- return err;
-}
-static void __exit masquerade_tg6_exit(void)
-{
- nf_nat_masquerade_ipv6_unregister_notifier();
- xt_unregister_target(&masquerade_tg6_reg);
-}
-
-module_init(masquerade_tg6_init);
-module_exit(masquerade_tg6_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("Xtables: automatic address SNAT");
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
deleted file mode 100644
index da3f1f8cb325..000000000000
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/skbuff.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-#include <net/route.h>
-
-static unsigned int nf_route_table_hook(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- unsigned int ret;
- struct nft_pktinfo pkt;
- struct in6_addr saddr, daddr;
- u_int8_t hop_limit;
- u32 mark, flowlabel;
- int err;
-
- nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_ipv6(&pkt, skb);
-
- /* save source/dest address, mark, hoplimit, flowlabel, priority */
- memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
- memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
- mark = skb->mark;
- hop_limit = ipv6_hdr(skb)->hop_limit;
-
- /* flowlabel and prio (includes version, which shouldn't change either */
- flowlabel = *((u32 *)ipv6_hdr(skb));
-
- ret = nft_do_chain(&pkt, priv);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
- memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
- skb->mark != mark ||
- ipv6_hdr(skb)->hop_limit != hop_limit ||
- flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
- err = ip6_route_me_harder(state->net, skb);
- if (err < 0)
- ret = NF_DROP_ERR(err);
- }
-
- return ret;
-}
-
-static const struct nft_chain_type nft_chain_route_ipv6 = {
- .name = "route",
- .type = NFT_CHAIN_T_ROUTE,
- .family = NFPROTO_IPV6,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_INET_LOCAL_OUT),
- .hooks = {
- [NF_INET_LOCAL_OUT] = nf_route_table_hook,
- },
-};
-
-static int __init nft_chain_route_init(void)
-{
- nft_register_chain_type(&nft_chain_route_ipv6);
-
- return 0;
-}
-
-static void __exit nft_chain_route_exit(void)
-{
- nft_unregister_chain_type(&nft_chain_route_ipv6);
-}
-
-module_init(nft_chain_route_init);
-module_exit(nft_chain_route_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route");
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 4fe7c90962dd..868ae23dbae1 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -10,15 +10,25 @@
#include <net/secure_seq.h>
#include <linux/netfilter.h>
-static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
+static u32 __ipv6_select_ident(struct net *net,
const struct in6_addr *dst,
const struct in6_addr *src)
{
+ const struct {
+ struct in6_addr dst;
+ struct in6_addr src;
+ } __aligned(SIPHASH_ALIGNMENT) combined = {
+ .dst = *dst,
+ .src = *src,
+ };
u32 hash, id;
- hash = __ipv6_addr_jhash(dst, hashrnd);
- hash = __ipv6_addr_jhash(src, hash);
- hash ^= net_hash_mix(net);
+ /* Note the following code is not safe, but this is okay. */
+ if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
+ get_random_bytes(&net->ipv4.ip_id_key,
+ sizeof(net->ipv4.ip_id_key));
+
+ hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key);
/* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
* set the hight order instead thus minimizing possible future
@@ -41,7 +51,6 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
*/
__be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
{
- static u32 ip6_proxy_idents_hashrnd __read_mostly;
struct in6_addr buf[2];
struct in6_addr *addrs;
u32 id;
@@ -53,11 +62,7 @@ __be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
if (!addrs)
return 0;
- net_get_random_once(&ip6_proxy_idents_hashrnd,
- sizeof(ip6_proxy_idents_hashrnd));
-
- id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd,
- &addrs[1], &addrs[0]);
+ id = __ipv6_select_ident(net, &addrs[1], &addrs[0]);
return htonl(id);
}
EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
@@ -66,12 +71,9 @@ __be32 ipv6_select_ident(struct net *net,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
- static u32 ip6_idents_hashrnd __read_mostly;
u32 id;
- net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
-
- id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr);
+ id = __ipv6_select_ident(net, daddr, saddr);
return htonl(id);
}
EXPORT_SYMBOL(ipv6_select_ident);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 5a426226c762..84dbe21b71e5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1356,6 +1356,7 @@ const struct proto_ops inet6_sockraw_ops = {
.getname = inet6_getname,
.poll = datagram_poll, /* ok */
.ioctl = inet6_ioctl, /* must change */
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
.setsockopt = sock_common_setsockopt, /* ok */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7178e32eb15d..9c0127a44f9f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -59,7 +59,7 @@
#include <net/xfrm.h>
#include <net/netevent.h>
#include <net/netlink.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
#include <net/lwtunnel.h>
#include <net/ip_tunnels.h>
#include <net/l3mdev.h>
@@ -102,14 +102,15 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
-static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
+static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
+ int strict);
static size_t rt6_nlmsg_size(struct fib6_info *rt);
static int rt6_fill_node(struct net *net, struct sk_buff *skb,
struct fib6_info *rt, struct dst_entry *dst,
struct in6_addr *dest, struct in6_addr *src,
int iif, int type, u32 portid, u32 seq,
unsigned int flags);
-static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
+static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
struct in6_addr *daddr,
struct in6_addr *saddr);
@@ -295,7 +296,7 @@ static const struct fib6_info fib6_null_entry_template = {
.fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
.fib6_protocol = RTPROT_KERNEL,
.fib6_metric = ~(u32)0,
- .fib6_ref = ATOMIC_INIT(1),
+ .fib6_ref = REFCOUNT_INIT(1),
.fib6_type = RTN_UNREACHABLE,
.fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
};
@@ -427,13 +428,15 @@ static bool rt6_check_expired(const struct rt6_info *rt)
return false;
}
-struct fib6_info *fib6_multipath_select(const struct net *net,
- struct fib6_info *match,
- struct flowi6 *fl6, int oif,
- const struct sk_buff *skb,
- int strict)
+void fib6_select_path(const struct net *net, struct fib6_result *res,
+ struct flowi6 *fl6, int oif, bool have_oif_match,
+ const struct sk_buff *skb, int strict)
{
struct fib6_info *sibling, *next_sibling;
+ struct fib6_info *match = res->f6i;
+
+ if (!match->fib6_nsiblings || have_oif_match)
+ goto out;
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
@@ -441,61 +444,89 @@ struct fib6_info *fib6_multipath_select(const struct net *net,
if (!fl6->mp_hash)
fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
- if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
- return match;
+ if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
+ goto out;
list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
fib6_siblings) {
+ const struct fib6_nh *nh = &sibling->fib6_nh;
int nh_upper_bound;
- nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
+ nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
if (fl6->mp_hash > nh_upper_bound)
continue;
- if (rt6_score_route(sibling, oif, strict) < 0)
+ if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
break;
match = sibling;
break;
}
- return match;
+out:
+ res->f6i = match;
+ res->nh = &match->fib6_nh;
}
/*
* Route lookup. rcu_read_lock() should be held.
*/
-static inline struct fib6_info *rt6_device_match(struct net *net,
- struct fib6_info *rt,
- const struct in6_addr *saddr,
- int oif,
- int flags)
+static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
+ const struct in6_addr *saddr, int oif, int flags)
{
- struct fib6_info *sprt;
+ const struct net_device *dev;
- if (!oif && ipv6_addr_any(saddr) &&
- !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
- return rt;
+ if (nh->fib_nh_flags & RTNH_F_DEAD)
+ return false;
- for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
- const struct net_device *dev = sprt->fib6_nh.nh_dev;
+ dev = nh->fib_nh_dev;
+ if (oif) {
+ if (dev->ifindex == oif)
+ return true;
+ } else {
+ if (ipv6_chk_addr(net, saddr, dev,
+ flags & RT6_LOOKUP_F_IFACE))
+ return true;
+ }
- if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
- continue;
+ return false;
+}
- if (oif) {
- if (dev->ifindex == oif)
- return sprt;
- } else {
- if (ipv6_chk_addr(net, saddr, dev,
- flags & RT6_LOOKUP_F_IFACE))
- return sprt;
+static void rt6_device_match(struct net *net, struct fib6_result *res,
+ const struct in6_addr *saddr, int oif, int flags)
+{
+ struct fib6_info *f6i = res->f6i;
+ struct fib6_info *spf6i;
+ struct fib6_nh *nh;
+
+ if (!oif && ipv6_addr_any(saddr)) {
+ nh = &f6i->fib6_nh;
+ if (!(nh->fib_nh_flags & RTNH_F_DEAD))
+ goto out;
+ }
+
+ for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
+ nh = &spf6i->fib6_nh;
+ if (__rt6_device_match(net, nh, saddr, oif, flags)) {
+ res->f6i = spf6i;
+ goto out;
}
}
- if (oif && flags & RT6_LOOKUP_F_IFACE)
- return net->ipv6.fib6_null_entry;
+ if (oif && flags & RT6_LOOKUP_F_IFACE) {
+ res->f6i = net->ipv6.fib6_null_entry;
+ nh = &res->f6i->fib6_nh;
+ goto out;
+ }
- return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
+ nh = &f6i->fib6_nh;
+ if (nh->fib_nh_flags & RTNH_F_DEAD) {
+ res->f6i = net->ipv6.fib6_null_entry;
+ nh = &res->f6i->fib6_nh;
+ }
+out:
+ res->nh = nh;
+ res->fib6_type = res->f6i->fib6_type;
+ res->fib6_flags = res->f6i->fib6_flags;
}
#ifdef CONFIG_IPV6_ROUTER_PREF
@@ -517,7 +548,7 @@ static void rt6_probe_deferred(struct work_struct *w)
kfree(work);
}
-static void rt6_probe(struct fib6_info *rt)
+static void rt6_probe(struct fib6_nh *fib6_nh)
{
struct __rt6_probe_work *work = NULL;
const struct in6_addr *nh_gw;
@@ -533,11 +564,11 @@ static void rt6_probe(struct fib6_info *rt)
* Router Reachability Probe MUST be rate-limited
* to no more than one per minute.
*/
- if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
+ if (fib6_nh->fib_nh_gw_family)
return;
- nh_gw = &rt->fib6_nh.nh_gw;
- dev = rt->fib6_nh.nh_dev;
+ nh_gw = &fib6_nh->fib_nh_gw6;
+ dev = fib6_nh->fib_nh_dev;
rcu_read_lock_bh();
idev = __in6_dev_get(dev);
neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
@@ -554,13 +585,13 @@ static void rt6_probe(struct fib6_info *rt)
__neigh_set_probe_once(neigh);
}
write_unlock(&neigh->lock);
- } else if (time_after(jiffies, rt->last_probe +
+ } else if (time_after(jiffies, fib6_nh->last_probe +
idev->cnf.rtr_probe_interval)) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
}
if (work) {
- rt->last_probe = jiffies;
+ fib6_nh->last_probe = jiffies;
INIT_WORK(&work->work, rt6_probe_deferred);
work->target = *nh_gw;
dev_hold(dev);
@@ -572,7 +603,7 @@ out:
rcu_read_unlock_bh();
}
#else
-static inline void rt6_probe(struct fib6_info *rt)
+static inline void rt6_probe(struct fib6_nh *fib6_nh)
{
}
#endif
@@ -580,27 +611,14 @@ static inline void rt6_probe(struct fib6_info *rt)
/*
* Default Router Selection (RFC 2461 6.3.6)
*/
-static inline int rt6_check_dev(struct fib6_info *rt, int oif)
-{
- const struct net_device *dev = rt->fib6_nh.nh_dev;
-
- if (!oif || dev->ifindex == oif)
- return 2;
- return 0;
-}
-
-static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
+static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
{
enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
struct neighbour *neigh;
- if (rt->fib6_flags & RTF_NONEXTHOP ||
- !(rt->fib6_flags & RTF_GATEWAY))
- return RT6_NUD_SUCCEED;
-
rcu_read_lock_bh();
- neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
- &rt->fib6_nh.nh_gw);
+ neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
+ &fib6_nh->fib_nh_gw6);
if (neigh) {
read_lock(&neigh->lock);
if (neigh->nud_state & NUD_VALID)
@@ -621,58 +639,44 @@ static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
return ret;
}
-static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
+static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
+ int strict)
{
- int m;
+ int m = 0;
+
+ if (!oif || nh->fib_nh_dev->ifindex == oif)
+ m = 2;
- m = rt6_check_dev(rt, oif);
if (!m && (strict & RT6_LOOKUP_F_IFACE))
return RT6_NUD_FAIL_HARD;
#ifdef CONFIG_IPV6_ROUTER_PREF
- m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
+ m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
#endif
- if (strict & RT6_LOOKUP_F_REACHABLE) {
- int n = rt6_check_neigh(rt);
+ if ((strict & RT6_LOOKUP_F_REACHABLE) &&
+ !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
+ int n = rt6_check_neigh(nh);
if (n < 0)
return n;
}
return m;
}
-/* called with rc_read_lock held */
-static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
+static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
+ int oif, int strict, int *mpri, bool *do_rr)
{
- const struct net_device *dev = fib6_info_nh_dev(f6i);
+ bool match_do_rr = false;
bool rc = false;
-
- if (dev) {
- const struct inet6_dev *idev = __in6_dev_get(dev);
-
- rc = !!idev->cnf.ignore_routes_with_linkdown;
- }
-
- return rc;
-}
-
-static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
- int *mpri, struct fib6_info *match,
- bool *do_rr)
-{
int m;
- bool match_do_rr = false;
- if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
+ if (nh->fib_nh_flags & RTNH_F_DEAD)
goto out;
- if (fib6_ignore_linkdown(rt) &&
- rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
+ if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
+ nh->fib_nh_flags & RTNH_F_LINKDOWN &&
!(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
goto out;
- if (fib6_check_expired(rt))
- goto out;
-
- m = rt6_score_route(rt, oif, strict);
+ m = rt6_score_route(nh, fib6_flags, oif, strict);
if (m == RT6_NUD_FAIL_DO_RR) {
match_do_rr = true;
m = 0; /* lowest valid score */
@@ -681,67 +685,82 @@ static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
}
if (strict & RT6_LOOKUP_F_REACHABLE)
- rt6_probe(rt);
+ rt6_probe(nh);
/* note that m can be RT6_NUD_FAIL_PROBE at this point */
if (m > *mpri) {
*do_rr = match_do_rr;
*mpri = m;
- match = rt;
+ rc = true;
}
out:
- return match;
+ return rc;
}
-static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
- struct fib6_info *leaf,
- struct fib6_info *rr_head,
- u32 metric, int oif, int strict,
- bool *do_rr)
+static void __find_rr_leaf(struct fib6_info *f6i_start,
+ struct fib6_info *nomatch, u32 metric,
+ struct fib6_result *res, struct fib6_info **cont,
+ int oif, int strict, bool *do_rr, int *mpri)
{
- struct fib6_info *rt, *match, *cont;
- int mpri = -1;
+ struct fib6_info *f6i;
- match = NULL;
- cont = NULL;
- for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
- if (rt->fib6_metric != metric) {
- cont = rt;
- break;
+ for (f6i = f6i_start;
+ f6i && f6i != nomatch;
+ f6i = rcu_dereference(f6i->fib6_next)) {
+ struct fib6_nh *nh;
+
+ if (cont && f6i->fib6_metric != metric) {
+ *cont = f6i;
+ return;
}
- match = find_match(rt, oif, strict, &mpri, match, do_rr);
- }
+ if (fib6_check_expired(f6i))
+ continue;
- for (rt = leaf; rt && rt != rr_head;
- rt = rcu_dereference(rt->fib6_next)) {
- if (rt->fib6_metric != metric) {
- cont = rt;
- break;
+ nh = &f6i->fib6_nh;
+ if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) {
+ res->f6i = f6i;
+ res->nh = nh;
+ res->fib6_flags = f6i->fib6_flags;
+ res->fib6_type = f6i->fib6_type;
}
-
- match = find_match(rt, oif, strict, &mpri, match, do_rr);
}
+}
- if (match || !cont)
- return match;
+static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
+ struct fib6_info *rr_head, int oif, int strict,
+ bool *do_rr, struct fib6_result *res)
+{
+ u32 metric = rr_head->fib6_metric;
+ struct fib6_info *cont = NULL;
+ int mpri = -1;
- for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
- match = find_match(rt, oif, strict, &mpri, match, do_rr);
+ __find_rr_leaf(rr_head, NULL, metric, res, &cont,
+ oif, strict, do_rr, &mpri);
- return match;
+ __find_rr_leaf(leaf, rr_head, metric, res, &cont,
+ oif, strict, do_rr, &mpri);
+
+ if (res->f6i || !cont)
+ return;
+
+ __find_rr_leaf(cont, NULL, metric, res, NULL,
+ oif, strict, do_rr, &mpri);
}
-static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
- int oif, int strict)
+static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
+ struct fib6_result *res, int strict)
{
struct fib6_info *leaf = rcu_dereference(fn->leaf);
- struct fib6_info *match, *rt0;
+ struct fib6_info *rt0;
bool do_rr = false;
int key_plen;
+ /* make sure this function or its helpers sets f6i */
+ res->f6i = NULL;
+
if (!leaf || leaf == net->ipv6.fib6_null_entry)
- return net->ipv6.fib6_null_entry;
+ goto out;
rt0 = rcu_dereference(fn->rr_ptr);
if (!rt0)
@@ -758,11 +777,9 @@ static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
key_plen = rt0->fib6_src.plen;
#endif
if (fn->fn_bit != key_plen)
- return net->ipv6.fib6_null_entry;
-
- match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
- &do_rr);
+ goto out;
+ find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
if (do_rr) {
struct fib6_info *next = rcu_dereference(rt0->fib6_next);
@@ -779,12 +796,19 @@ static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
}
}
- return match ? match : net->ipv6.fib6_null_entry;
+out:
+ if (!res->f6i) {
+ res->f6i = net->ipv6.fib6_null_entry;
+ res->nh = &res->f6i->fib6_nh;
+ res->fib6_flags = res->f6i->fib6_flags;
+ res->fib6_type = res->f6i->fib6_type;
+ }
}
-static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
+static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
{
- return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
+ return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
+ res->nh->fib_nh_gw_family;
}
#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -868,17 +892,17 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
*/
/* called with rcu_lock held */
-static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
+static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
{
- struct net_device *dev = rt->fib6_nh.nh_dev;
+ struct net_device *dev = res->nh->fib_nh_dev;
- if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
+ if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
/* for copies of local routes, dst->dev needs to be the
* device if it is a master device, the master device if
* device is enslaved, and the loopback as the default
*/
if (netif_is_l3_slave(dev) &&
- !rt6_need_strict(&rt->fib6_dst.addr))
+ !rt6_need_strict(&res->f6i->fib6_dst.addr))
dev = l3mdev_master_dev_rcu(dev);
else if (!netif_is_l3_master(dev))
dev = dev_net(dev)->loopback_dev;
@@ -924,11 +948,11 @@ static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
return flags;
}
-static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
+static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
{
- rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
+ rt->dst.error = ip6_rt_type_to_error(fib6_type);
- switch (ort->fib6_type) {
+ switch (fib6_type) {
case RTN_BLACKHOLE:
rt->dst.output = dst_discard_out;
rt->dst.input = dst_discard;
@@ -946,26 +970,28 @@ static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
}
}
-static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
+static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
{
- if (ort->fib6_flags & RTF_REJECT) {
- ip6_rt_init_dst_reject(rt, ort);
+ struct fib6_info *f6i = res->f6i;
+
+ if (res->fib6_flags & RTF_REJECT) {
+ ip6_rt_init_dst_reject(rt, res->fib6_type);
return;
}
rt->dst.error = 0;
rt->dst.output = ip6_output;
- if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
+ if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
rt->dst.input = ip6_input;
- } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
+ } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
rt->dst.input = ip6_mc_input;
} else {
rt->dst.input = ip6_forward;
}
- if (ort->fib6_nh.nh_lwtstate) {
- rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
+ if (res->nh->fib_nh_lws) {
+ rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
lwtunnel_set_redirect(&rt->dst);
}
@@ -980,20 +1006,25 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
}
-/* Caller must already hold reference to @ort */
-static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
+/* Caller must already hold reference to f6i in result */
+static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
{
- struct net_device *dev = fib6_info_nh_dev(ort);
+ const struct fib6_nh *nh = res->nh;
+ const struct net_device *dev = nh->fib_nh_dev;
+ struct fib6_info *f6i = res->f6i;
- ip6_rt_init_dst(rt, ort);
+ ip6_rt_init_dst(rt, res);
- rt->rt6i_dst = ort->fib6_dst;
+ rt->rt6i_dst = f6i->fib6_dst;
rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
- rt->rt6i_gateway = ort->fib6_nh.nh_gw;
- rt->rt6i_flags = ort->fib6_flags;
- rt6_set_from(rt, ort);
+ rt->rt6i_flags = res->fib6_flags;
+ if (nh->fib_nh_gw_family) {
+ rt->rt6i_gateway = nh->fib_nh_gw6;
+ rt->rt6i_flags |= RTF_GATEWAY;
+ }
+ rt6_set_from(rt, f6i);
#ifdef CONFIG_IPV6_SUBTREES
- rt->rt6i_src = ort->fib6_src;
+ rt->rt6i_src = f6i->fib6_src;
#endif
}
@@ -1015,14 +1046,13 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
}
}
-static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
- bool null_fallback)
+static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
{
struct rt6_info *rt = *prt;
if (dst_hold_safe(&rt->dst))
return true;
- if (null_fallback) {
+ if (net) {
rt = net->ipv6.ip6_null_entry;
dst_hold(&rt->dst);
} else {
@@ -1033,22 +1063,24 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
}
/* called with rcu_lock held */
-static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
+static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
{
- unsigned short flags = fib6_info_dst_flags(rt);
- struct net_device *dev = rt->fib6_nh.nh_dev;
+ struct net_device *dev = res->nh->fib_nh_dev;
+ struct fib6_info *f6i = res->f6i;
+ unsigned short flags;
struct rt6_info *nrt;
- if (!fib6_info_hold_safe(rt))
+ if (!fib6_info_hold_safe(f6i))
goto fallback;
+ flags = fib6_info_dst_flags(f6i);
nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
if (!nrt) {
- fib6_info_release(rt);
+ fib6_info_release(f6i);
goto fallback;
}
- ip6_rt_copy_init(nrt, rt);
+ ip6_rt_copy_init(nrt, res);
return nrt;
fallback:
@@ -1063,7 +1095,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
const struct sk_buff *skb,
int flags)
{
- struct fib6_info *f6i;
+ struct fib6_result res = {};
struct fib6_node *fn;
struct rt6_info *rt;
@@ -1073,37 +1105,38 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
rcu_read_lock();
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
- f6i = rcu_dereference(fn->leaf);
- if (!f6i) {
- f6i = net->ipv6.fib6_null_entry;
- } else {
- f6i = rt6_device_match(net, f6i, &fl6->saddr,
- fl6->flowi6_oif, flags);
- if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
- f6i = fib6_multipath_select(net, f6i, fl6,
- fl6->flowi6_oif, skb,
- flags);
- }
- if (f6i == net->ipv6.fib6_null_entry) {
+ res.f6i = rcu_dereference(fn->leaf);
+ if (!res.f6i)
+ res.f6i = net->ipv6.fib6_null_entry;
+ else
+ rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
+ flags);
+
+ if (res.f6i == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
goto restart;
+
+ rt = net->ipv6.ip6_null_entry;
+ dst_hold(&rt->dst);
+ goto out;
}
- trace_fib6_table_lookup(net, f6i, table, fl6);
+ fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
+ fl6->flowi6_oif != 0, skb, flags);
/* Search through exception table */
- rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+ rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
if (rt) {
- if (ip6_hold_safe(net, &rt, true))
+ if (ip6_hold_safe(net, &rt))
dst_use_noref(&rt->dst, jiffies);
- } else if (f6i == net->ipv6.fib6_null_entry) {
- rt = net->ipv6.ip6_null_entry;
- dst_hold(&rt->dst);
} else {
- rt = ip6_create_rt_rcu(f6i);
+ rt = ip6_create_rt_rcu(&res);
}
+out:
+ trace_fib6_table_lookup(net, &res, table, fl6);
+
rcu_read_unlock();
return rt;
@@ -1169,10 +1202,11 @@ int ip6_ins_rt(struct net *net, struct fib6_info *rt)
return __ip6_ins_rt(rt, &info, NULL);
}
-static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
+static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
+ struct fib6_info *f6i = res->f6i;
struct net_device *dev;
struct rt6_info *rt;
@@ -1180,25 +1214,25 @@ static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
* Clone the route.
*/
- if (!fib6_info_hold_safe(ort))
+ if (!fib6_info_hold_safe(f6i))
return NULL;
- dev = ip6_rt_get_dev_rcu(ort);
+ dev = ip6_rt_get_dev_rcu(res);
rt = ip6_dst_alloc(dev_net(dev), dev, 0);
if (!rt) {
- fib6_info_release(ort);
+ fib6_info_release(f6i);
return NULL;
}
- ip6_rt_copy_init(rt, ort);
+ ip6_rt_copy_init(rt, res);
rt->rt6i_flags |= RTF_CACHE;
rt->dst.flags |= DST_HOST;
rt->rt6i_dst.addr = *daddr;
rt->rt6i_dst.plen = 128;
- if (!rt6_is_gw_or_nonexthop(ort)) {
- if (ort->fib6_dst.plen != 128 &&
- ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
+ if (!rt6_is_gw_or_nonexthop(res)) {
+ if (f6i->fib6_dst.plen != 128 &&
+ ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
rt->rt6i_flags |= RTF_ANYCAST;
#ifdef CONFIG_IPV6_SUBTREES
if (rt->rt6i_src.plen && saddr) {
@@ -1211,55 +1245,56 @@ static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
return rt;
}
-static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
+static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
{
- unsigned short flags = fib6_info_dst_flags(rt);
+ struct fib6_info *f6i = res->f6i;
+ unsigned short flags = fib6_info_dst_flags(f6i);
struct net_device *dev;
struct rt6_info *pcpu_rt;
- if (!fib6_info_hold_safe(rt))
+ if (!fib6_info_hold_safe(f6i))
return NULL;
rcu_read_lock();
- dev = ip6_rt_get_dev_rcu(rt);
+ dev = ip6_rt_get_dev_rcu(res);
pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
rcu_read_unlock();
if (!pcpu_rt) {
- fib6_info_release(rt);
+ fib6_info_release(f6i);
return NULL;
}
- ip6_rt_copy_init(pcpu_rt, rt);
+ ip6_rt_copy_init(pcpu_rt, res);
pcpu_rt->rt6i_flags |= RTF_PCPU;
return pcpu_rt;
}
/* It should be called with rcu_read_lock() acquired */
-static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
+static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
{
struct rt6_info *pcpu_rt, **p;
- p = this_cpu_ptr(rt->rt6i_pcpu);
+ p = this_cpu_ptr(res->f6i->rt6i_pcpu);
pcpu_rt = *p;
if (pcpu_rt)
- ip6_hold_safe(NULL, &pcpu_rt, false);
+ ip6_hold_safe(NULL, &pcpu_rt);
return pcpu_rt;
}
static struct rt6_info *rt6_make_pcpu_route(struct net *net,
- struct fib6_info *rt)
+ const struct fib6_result *res)
{
struct rt6_info *pcpu_rt, *prev, **p;
- pcpu_rt = ip6_rt_pcpu_alloc(rt);
+ pcpu_rt = ip6_rt_pcpu_alloc(res);
if (!pcpu_rt) {
dst_hold(&net->ipv6.ip6_null_entry->dst);
return net->ipv6.ip6_null_entry;
}
dst_hold(&pcpu_rt->dst);
- p = this_cpu_ptr(rt->rt6i_pcpu);
+ p = this_cpu_ptr(res->f6i->rt6i_pcpu);
prev = cmpxchg(p, NULL, pcpu_rt);
BUG_ON(prev);
@@ -1402,14 +1437,15 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
return NULL;
}
-static unsigned int fib6_mtu(const struct fib6_info *rt)
+static unsigned int fib6_mtu(const struct fib6_result *res)
{
+ const struct fib6_nh *nh = res->nh;
unsigned int mtu;
- if (rt->fib6_pmtu) {
- mtu = rt->fib6_pmtu;
+ if (res->f6i->fib6_pmtu) {
+ mtu = res->f6i->fib6_pmtu;
} else {
- struct net_device *dev = fib6_info_nh_dev(rt);
+ struct net_device *dev = nh->fib_nh_dev;
struct inet6_dev *idev;
rcu_read_lock();
@@ -1420,26 +1456,27 @@ static unsigned int fib6_mtu(const struct fib6_info *rt)
mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
- return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
+ return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
}
static int rt6_insert_exception(struct rt6_info *nrt,
- struct fib6_info *ort)
+ const struct fib6_result *res)
{
struct net *net = dev_net(nrt->dst.dev);
struct rt6_exception_bucket *bucket;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
+ struct fib6_info *f6i = res->f6i;
int err = 0;
spin_lock_bh(&rt6_exception_lock);
- if (ort->exception_bucket_flushed) {
+ if (f6i->exception_bucket_flushed) {
err = -EINVAL;
goto out;
}
- bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
+ bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket,
lockdep_is_held(&rt6_exception_lock));
if (!bucket) {
bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
@@ -1448,24 +1485,24 @@ static int rt6_insert_exception(struct rt6_info *nrt,
err = -ENOMEM;
goto out;
}
- rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
+ rcu_assign_pointer(f6i->rt6i_exception_bucket, bucket);
}
#ifdef CONFIG_IPV6_SUBTREES
- /* rt6i_src.plen != 0 indicates ort is in subtree
+ /* fib6_src.plen != 0 indicates f6i is in subtree
* and exception table is indexed by a hash of
- * both rt6i_dst and rt6i_src.
+ * both fib6_dst and fib6_src.
* Otherwise, the exception table is indexed by
- * a hash of only rt6i_dst.
+ * a hash of only fib6_dst.
*/
- if (ort->fib6_src.plen)
+ if (f6i->fib6_src.plen)
src_key = &nrt->rt6i_src.addr;
#endif
- /* rt6_mtu_change() might lower mtu on ort.
+ /* rt6_mtu_change() might lower mtu on f6i.
* Only insert this exception route if its mtu
- * is less than ort's mtu value.
+ * is less than f6i's mtu value.
*/
- if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
+ if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
err = -EINVAL;
goto out;
}
@@ -1494,9 +1531,9 @@ out:
/* Update fn->fn_sernum to invalidate all cached dst */
if (!err) {
- spin_lock_bh(&ort->fib6_table->tb6_lock);
- fib6_update_sernum(net, ort);
- spin_unlock_bh(&ort->fib6_table->tb6_lock);
+ spin_lock_bh(&f6i->fib6_table->tb6_lock);
+ fib6_update_sernum(net, f6i);
+ spin_unlock_bh(&f6i->fib6_table->tb6_lock);
fib6_force_start_gc(net);
}
@@ -1533,33 +1570,33 @@ out:
/* Find cached rt in the hash table inside passed in rt
* Caller has to hold rcu_read_lock()
*/
-static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
+static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
struct in6_addr *daddr,
struct in6_addr *saddr)
{
struct rt6_exception_bucket *bucket;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
- struct rt6_info *res = NULL;
+ struct rt6_info *ret = NULL;
- bucket = rcu_dereference(rt->rt6i_exception_bucket);
+ bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
#ifdef CONFIG_IPV6_SUBTREES
- /* rt6i_src.plen != 0 indicates rt is in subtree
+ /* fib6i_src.plen != 0 indicates f6i is in subtree
* and exception table is indexed by a hash of
- * both rt6i_dst and rt6i_src.
+ * both fib6_dst and fib6_src.
* Otherwise, the exception table is indexed by
- * a hash of only rt6i_dst.
+ * a hash of only fib6_dst.
*/
- if (rt->fib6_src.plen)
+ if (res->f6i->fib6_src.plen)
src_key = saddr;
#endif
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
- res = rt6_ex->rt6i;
+ ret = rt6_ex->rt6i;
- return res;
+ return ret;
}
/* Remove the passed in cached rt from the hash table that contains it */
@@ -1807,11 +1844,10 @@ void rt6_age_exceptions(struct fib6_info *rt,
}
/* must be called with rcu lock held */
-struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
- int oif, struct flowi6 *fl6, int strict)
+int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
+ struct flowi6 *fl6, struct fib6_result *res, int strict)
{
struct fib6_node *fn, *saved_fn;
- struct fib6_info *f6i;
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
saved_fn = fn;
@@ -1820,8 +1856,8 @@ struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
oif = 0;
redo_rt6_select:
- f6i = rt6_select(net, fn, oif, strict);
- if (f6i == net->ipv6.fib6_null_entry) {
+ rt6_select(net, fn, oif, res, strict);
+ if (res->f6i == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
goto redo_rt6_select;
@@ -1833,16 +1869,16 @@ redo_rt6_select:
}
}
- trace_fib6_table_lookup(net, f6i, table, fl6);
+ trace_fib6_table_lookup(net, res, table, fl6);
- return f6i;
+ return 0;
}
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
int oif, struct flowi6 *fl6,
const struct sk_buff *skb, int flags)
{
- struct fib6_info *f6i;
+ struct fib6_result res = {};
struct rt6_info *rt;
int strict = 0;
@@ -1853,27 +1889,26 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
rcu_read_lock();
- f6i = fib6_table_lookup(net, table, oif, fl6, strict);
- if (f6i->fib6_nsiblings)
- f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
-
- if (f6i == net->ipv6.fib6_null_entry) {
+ fib6_table_lookup(net, table, oif, fl6, &res, strict);
+ if (res.f6i == net->ipv6.fib6_null_entry) {
rt = net->ipv6.ip6_null_entry;
rcu_read_unlock();
dst_hold(&rt->dst);
return rt;
}
+ fib6_select_path(net, &res, fl6, oif, false, skb, strict);
+
/*Search through exception table */
- rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+ rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
if (rt) {
- if (ip6_hold_safe(net, &rt, true))
+ if (ip6_hold_safe(net, &rt))
dst_use_noref(&rt->dst, jiffies);
rcu_read_unlock();
return rt;
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
- !(f6i->fib6_flags & RTF_GATEWAY))) {
+ !res.nh->fib_nh_gw_family)) {
/* Create a RTF_CACHE clone which will not be
* owned by the fib6 tree. It is for the special case where
* the daddr in the skb during the neighbor look-up is different
@@ -1881,7 +1916,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
*/
struct rt6_info *uncached_rt;
- uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
+ uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
rcu_read_unlock();
@@ -1903,10 +1938,10 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
struct rt6_info *pcpu_rt;
local_bh_disable();
- pcpu_rt = rt6_get_pcpu_route(f6i);
+ pcpu_rt = rt6_get_pcpu_route(&res);
if (!pcpu_rt)
- pcpu_rt = rt6_make_pcpu_route(net, f6i);
+ pcpu_rt = rt6_make_pcpu_route(net, &res);
local_bh_enable();
rcu_read_unlock();
@@ -2325,19 +2360,23 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (rt6->rt6i_flags & RTF_CACHE)
rt6_update_exception_stamp_rt(rt6);
} else if (daddr) {
- struct fib6_info *from;
+ struct fib6_result res = {};
struct rt6_info *nrt6;
rcu_read_lock();
- from = rcu_dereference(rt6->from);
- if (!from) {
+ res.f6i = rcu_dereference(rt6->from);
+ if (!res.f6i) {
rcu_read_unlock();
return;
}
- nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
+ res.nh = &res.f6i->fib6_nh;
+ res.fib6_flags = res.f6i->fib6_flags;
+ res.fib6_type = res.f6i->fib6_type;
+
+ nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
if (nrt6) {
rt6_do_update_pmtu(nrt6, mtu);
- if (rt6_insert_exception(nrt6, from))
+ if (rt6_insert_exception(nrt6, &res))
dst_release_immediate(&nrt6->dst);
}
rcu_read_unlock();
@@ -2410,6 +2449,36 @@ void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
NULL);
}
+static bool ip6_redirect_nh_match(const struct fib6_result *res,
+ struct flowi6 *fl6,
+ const struct in6_addr *gw,
+ struct rt6_info **ret)
+{
+ const struct fib6_nh *nh = res->nh;
+
+ if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
+ fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
+ return false;
+
+ /* rt_cache's gateway might be different from its 'parent'
+ * in the case of an ip redirect.
+ * So we keep searching in the exception table if the gateway
+ * is different.
+ */
+ if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
+ struct rt6_info *rt_cache;
+
+ rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
+ if (rt_cache &&
+ ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
+ *ret = rt_cache;
+ return true;
+ }
+ return false;
+ }
+ return true;
+}
+
/* Handle redirects */
struct ip6rd_flowi {
struct flowi6 fl6;
@@ -2423,7 +2492,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
int flags)
{
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
- struct rt6_info *ret = NULL, *rt_cache;
+ struct rt6_info *ret = NULL;
+ struct fib6_result res = {};
struct fib6_info *rt;
struct fib6_node *fn;
@@ -2441,34 +2511,15 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
for_each_fib6_node_rt_rcu(fn) {
- if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
- continue;
+ res.f6i = rt;
+ res.nh = &rt->fib6_nh;
+
if (fib6_check_expired(rt))
continue;
if (rt->fib6_flags & RTF_REJECT)
break;
- if (!(rt->fib6_flags & RTF_GATEWAY))
- continue;
- if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
- continue;
- /* rt_cache's gateway might be different from its 'parent'
- * in the case of an ip redirect.
- * So we keep searching in the exception table if the gateway
- * is different.
- */
- if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
- rt_cache = rt6_find_cached_rt(rt,
- &fl6->daddr,
- &fl6->saddr);
- if (rt_cache &&
- ipv6_addr_equal(&rdfl->gateway,
- &rt_cache->rt6i_gateway)) {
- ret = rt_cache;
- break;
- }
- continue;
- }
- break;
+ if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, &ret))
+ goto out;
}
if (!rt)
@@ -2484,15 +2535,20 @@ restart:
goto restart;
}
+ res.f6i = rt;
+ res.nh = &rt->fib6_nh;
out:
- if (ret)
- ip6_hold_safe(net, &ret, true);
- else
- ret = ip6_create_rt_rcu(rt);
+ if (ret) {
+ ip6_hold_safe(net, &ret);
+ } else {
+ res.fib6_flags = res.f6i->fib6_flags;
+ res.fib6_type = res.f6i->fib6_type;
+ ret = ip6_create_rt_rcu(&res);
+ }
rcu_read_unlock();
- trace_fib6_table_lookup(net, rt, table, fl6);
+ trace_fib6_table_lookup(net, &res, table, fl6);
return ret;
};
@@ -2610,12 +2666,15 @@ out:
* based on ip6_dst_mtu_forward and exception logic of
* rt6_find_cached_rt; called with rcu_read_lock
*/
-u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
- struct in6_addr *saddr)
+u32 ip6_mtu_from_fib6(const struct fib6_result *res,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
struct rt6_exception_bucket *bucket;
+ const struct fib6_nh *nh = res->nh;
+ struct fib6_info *f6i = res->f6i;
+ const struct in6_addr *src_key;
struct rt6_exception *rt6_ex;
- struct in6_addr *src_key;
struct inet6_dev *idev;
u32 mtu = 0;
@@ -2637,7 +2696,7 @@ u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
if (likely(!mtu)) {
- struct net_device *dev = fib6_info_nh_dev(f6i);
+ struct net_device *dev = nh->fib_nh_dev;
mtu = IPV6_MIN_MTU;
idev = __in6_dev_get(dev);
@@ -2647,7 +2706,7 @@ u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
out:
- return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
+ return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
}
struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
@@ -2903,17 +2962,143 @@ out:
return err;
}
+static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
+{
+ if ((flags & RTF_REJECT) ||
+ (dev && (dev->flags & IFF_LOOPBACK) &&
+ !(addr_type & IPV6_ADDR_LOOPBACK) &&
+ !(flags & RTF_LOCAL)))
+ return true;
+
+ return false;
+}
+
+int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
+ struct fib6_config *cfg, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *dev = NULL;
+ struct inet6_dev *idev = NULL;
+ int addr_type;
+ int err;
+
+ fib6_nh->fib_nh_family = AF_INET6;
+
+ err = -ENODEV;
+ if (cfg->fc_ifindex) {
+ dev = dev_get_by_index(net, cfg->fc_ifindex);
+ if (!dev)
+ goto out;
+ idev = in6_dev_get(dev);
+ if (!idev)
+ goto out;
+ }
+
+ if (cfg->fc_flags & RTNH_F_ONLINK) {
+ if (!dev) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop device required for onlink");
+ goto out;
+ }
+
+ if (!(dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ err = -ENETDOWN;
+ goto out;
+ }
+
+ fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
+ }
+
+ fib6_nh->fib_nh_weight = 1;
+
+ /* We cannot add true routes via loopback here,
+ * they would result in kernel looping; promote them to reject routes
+ */
+ addr_type = ipv6_addr_type(&cfg->fc_dst);
+ if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
+ /* hold loopback dev/idev if we haven't done so. */
+ if (dev != net->loopback_dev) {
+ if (dev) {
+ dev_put(dev);
+ in6_dev_put(idev);
+ }
+ dev = net->loopback_dev;
+ dev_hold(dev);
+ idev = in6_dev_get(dev);
+ if (!idev) {
+ err = -ENODEV;
+ goto out;
+ }
+ }
+ goto set_dev;
+ }
+
+ if (cfg->fc_flags & RTF_GATEWAY) {
+ err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
+ if (err)
+ goto out;
+
+ fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
+ fib6_nh->fib_nh_gw_family = AF_INET6;
+ }
+
+ err = -ENODEV;
+ if (!dev)
+ goto out;
+
+ if (idev->cnf.disable_ipv6) {
+ NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
+ err = -EACCES;
+ goto out;
+ }
+
+ if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ err = -ENETDOWN;
+ goto out;
+ }
+
+ if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
+ !netif_carrier_ok(dev))
+ fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+
+ err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
+ cfg->fc_encap_type, cfg, gfp_flags, extack);
+ if (err)
+ goto out;
+set_dev:
+ fib6_nh->fib_nh_dev = dev;
+ fib6_nh->fib_nh_oif = dev->ifindex;
+ err = 0;
+out:
+ if (idev)
+ in6_dev_put(idev);
+
+ if (err) {
+ lwtstate_put(fib6_nh->fib_nh_lws);
+ fib6_nh->fib_nh_lws = NULL;
+ if (dev)
+ dev_put(dev);
+ }
+
+ return err;
+}
+
+void fib6_nh_release(struct fib6_nh *fib6_nh)
+{
+ fib_nh_common_release(&fib6_nh->nh_common);
+}
+
static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
gfp_t gfp_flags,
struct netlink_ext_ack *extack)
{
struct net *net = cfg->fc_nlinfo.nl_net;
struct fib6_info *rt = NULL;
- struct net_device *dev = NULL;
- struct inet6_dev *idev = NULL;
struct fib6_table *table;
- int addr_type;
int err = -EINVAL;
+ int addr_type;
/* RTF_PCPU is an internal flag; can not be set by userspace */
if (cfg->fc_flags & RTF_PCPU) {
@@ -2947,33 +3132,6 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
#endif
- if (cfg->fc_ifindex) {
- err = -ENODEV;
- dev = dev_get_by_index(net, cfg->fc_ifindex);
- if (!dev)
- goto out;
- idev = in6_dev_get(dev);
- if (!idev)
- goto out;
- }
-
- if (cfg->fc_metric == 0)
- cfg->fc_metric = IP6_RT_PRIO_USER;
-
- if (cfg->fc_flags & RTNH_F_ONLINK) {
- if (!dev) {
- NL_SET_ERR_MSG(extack,
- "Nexthop device required for onlink");
- err = -ENODEV;
- goto out;
- }
-
- if (!(dev->flags & IFF_UP)) {
- NL_SET_ERR_MSG(extack, "Nexthop device is not up");
- err = -ENETDOWN;
- goto out;
- }
- }
err = -ENOBUFS;
if (cfg->fc_nlinfo.nlh &&
@@ -3017,18 +3175,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
cfg->fc_protocol = RTPROT_BOOT;
rt->fib6_protocol = cfg->fc_protocol;
- addr_type = ipv6_addr_type(&cfg->fc_dst);
-
- if (cfg->fc_encap) {
- struct lwtunnel_state *lwtstate;
-
- err = lwtunnel_build_state(cfg->fc_encap_type,
- cfg->fc_encap, AF_INET6, cfg,
- &lwtstate, extack);
- if (err)
- goto out;
- rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
- }
+ rt->fib6_table = table;
+ rt->fib6_metric = cfg->fc_metric;
+ rt->fib6_type = cfg->fc_type;
+ rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
rt->fib6_dst.plen = cfg->fc_dst_len;
@@ -3039,62 +3189,20 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
rt->fib6_src.plen = cfg->fc_src_len;
#endif
-
- rt->fib6_metric = cfg->fc_metric;
- rt->fib6_nh.nh_weight = 1;
-
- rt->fib6_type = cfg->fc_type;
+ err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
+ if (err)
+ goto out;
/* We cannot add true routes via loopback here,
- they would result in kernel looping; promote them to reject routes
+ * they would result in kernel looping; promote them to reject routes
*/
- if ((cfg->fc_flags & RTF_REJECT) ||
- (dev && (dev->flags & IFF_LOOPBACK) &&
- !(addr_type & IPV6_ADDR_LOOPBACK) &&
- !(cfg->fc_flags & RTF_LOCAL))) {
- /* hold loopback dev/idev if we haven't done so. */
- if (dev != net->loopback_dev) {
- if (dev) {
- dev_put(dev);
- in6_dev_put(idev);
- }
- dev = net->loopback_dev;
- dev_hold(dev);
- idev = in6_dev_get(dev);
- if (!idev) {
- err = -ENODEV;
- goto out;
- }
- }
- rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
- goto install_route;
- }
-
- if (cfg->fc_flags & RTF_GATEWAY) {
- err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
- if (err)
- goto out;
-
- rt->fib6_nh.nh_gw = cfg->fc_gateway;
- }
-
- err = -ENODEV;
- if (!dev)
- goto out;
-
- if (idev->cnf.disable_ipv6) {
- NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
- err = -EACCES;
- goto out;
- }
-
- if (!(dev->flags & IFF_UP)) {
- NL_SET_ERR_MSG(extack, "Nexthop device is not up");
- err = -ENETDOWN;
- goto out;
- }
+ addr_type = ipv6_addr_type(&cfg->fc_dst);
+ if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
+ rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
+ struct net_device *dev = fib6_info_nh_dev(rt);
+
if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
NL_SET_ERR_MSG(extack, "Invalid source address");
err = -EINVAL;
@@ -3105,26 +3213,8 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
} else
rt->fib6_prefsrc.plen = 0;
- rt->fib6_flags = cfg->fc_flags;
-
-install_route:
- if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
- !netif_carrier_ok(dev))
- rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
- rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
- rt->fib6_nh.nh_dev = dev;
- rt->fib6_table = table;
-
- if (idev)
- in6_dev_put(idev);
-
return rt;
out:
- if (dev)
- dev_put(dev);
- if (idev)
- in6_dev_put(idev);
-
fib6_info_release(rt);
return ERR_PTR(err);
}
@@ -3265,10 +3355,16 @@ static int ip6_route_del(struct fib6_config *cfg,
if (fn) {
for_each_fib6_node_rt_rcu(fn) {
+ struct fib6_nh *nh;
+
if (cfg->fc_flags & RTF_CACHE) {
+ struct fib6_result res = {
+ .f6i = rt,
+ };
int rc;
- rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
+ rt_cache = rt6_find_cached_rt(&res,
+ &cfg->fc_dst,
&cfg->fc_src);
if (rt_cache) {
rc = ip6_del_cached_rt(rt_cache, cfg);
@@ -3279,12 +3375,14 @@ static int ip6_route_del(struct fib6_config *cfg,
}
continue;
}
+
+ nh = &rt->fib6_nh;
if (cfg->fc_ifindex &&
- (!rt->fib6_nh.nh_dev ||
- rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
+ (!nh->fib_nh_dev ||
+ nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
continue;
if (cfg->fc_flags & RTF_GATEWAY &&
- !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
+ !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
continue;
if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
continue;
@@ -3310,10 +3408,10 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
{
struct netevent_redirect netevent;
struct rt6_info *rt, *nrt = NULL;
+ struct fib6_result res = {};
struct ndisc_options ndopts;
struct inet6_dev *in6_dev;
struct neighbour *neigh;
- struct fib6_info *from;
struct rd_msg *msg;
int optlen, on_link;
u8 *lladdr;
@@ -3396,14 +3494,17 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
NDISC_REDIRECT, &ndopts);
rcu_read_lock();
- from = rcu_dereference(rt->from);
+ res.f6i = rcu_dereference(rt->from);
/* This fib6_info_hold() is safe here because we hold reference to rt
* and rt already holds reference to fib6_info.
*/
- fib6_info_hold(from);
+ fib6_info_hold(res.f6i);
rcu_read_unlock();
- nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
+ res.nh = &res.f6i->fib6_nh;
+ res.fib6_flags = res.f6i->fib6_flags;
+ res.fib6_type = res.f6i->fib6_type;
+ nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
if (!nrt)
goto out;
@@ -3417,7 +3518,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
* a cached route because rt6_insert_exception() will
* takes care of it
*/
- if (rt6_insert_exception(nrt, from)) {
+ if (rt6_insert_exception(nrt, &res)) {
dst_release_immediate(&nrt->dst);
goto out;
}
@@ -3429,7 +3530,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
out:
- fib6_info_release(from);
+ fib6_info_release(res.f6i);
neigh_release(neigh);
}
@@ -3455,11 +3556,12 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
goto out;
for_each_fib6_node_rt_rcu(fn) {
- if (rt->fib6_nh.nh_dev->ifindex != ifindex)
+ if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
continue;
- if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
+ if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
+ !rt->fib6_nh.fib_nh_gw_family)
continue;
- if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
+ if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
continue;
if (!fib6_info_hold_safe(rt))
continue;
@@ -3517,9 +3619,11 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
rcu_read_lock();
for_each_fib6_node_rt_rcu(&table->tb6_root) {
- if (dev == rt->fib6_nh.nh_dev &&
+ struct fib6_nh *nh = &rt->fib6_nh;
+
+ if (dev == nh->fib_nh_dev &&
((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
- ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
+ ipv6_addr_equal(&nh->fib_nh_gw6, addr))
break;
}
if (rt && !fib6_info_hold_safe(rt))
@@ -3610,7 +3714,7 @@ static void rtmsg_to_fib6_config(struct net *net,
.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
: RT6_TABLE_MAIN,
.fc_ifindex = rtmsg->rtmsg_ifindex,
- .fc_metric = rtmsg->rtmsg_metric,
+ .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
.fc_expires = rtmsg->rtmsg_info,
.fc_dst_len = rtmsg->rtmsg_dst_len,
.fc_src_len = rtmsg->rtmsg_src_len,
@@ -3721,36 +3825,26 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
const struct in6_addr *addr,
bool anycast, gfp_t gfp_flags)
{
- u32 tb_id;
- struct net_device *dev = idev->dev;
- struct fib6_info *f6i;
-
- f6i = fib6_info_alloc(gfp_flags);
- if (!f6i)
- return ERR_PTR(-ENOMEM);
+ struct fib6_config cfg = {
+ .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
+ .fc_ifindex = idev->dev->ifindex,
+ .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
+ .fc_dst = *addr,
+ .fc_dst_len = 128,
+ .fc_protocol = RTPROT_KERNEL,
+ .fc_nlinfo.nl_net = net,
+ .fc_ignore_dev_down = true,
+ };
- f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0, NULL);
- f6i->dst_nocount = true;
- f6i->dst_host = true;
- f6i->fib6_protocol = RTPROT_KERNEL;
- f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
if (anycast) {
- f6i->fib6_type = RTN_ANYCAST;
- f6i->fib6_flags |= RTF_ANYCAST;
+ cfg.fc_type = RTN_ANYCAST;
+ cfg.fc_flags |= RTF_ANYCAST;
} else {
- f6i->fib6_type = RTN_LOCAL;
- f6i->fib6_flags |= RTF_LOCAL;
+ cfg.fc_type = RTN_LOCAL;
+ cfg.fc_flags |= RTF_LOCAL;
}
- f6i->fib6_nh.nh_gw = *addr;
- dev_hold(dev);
- f6i->fib6_nh.nh_dev = dev;
- f6i->fib6_dst.addr = *addr;
- f6i->fib6_dst.plen = 128;
- tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
- f6i->fib6_table = fib6_get_table(net, tb_id);
-
- return f6i;
+ return ip6_route_info_create(&cfg, gfp_flags, NULL);
}
/* remove deleted ip from prefsrc entries */
@@ -3766,7 +3860,7 @@ static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
struct net *net = ((struct arg_dev_net_ip *)arg)->net;
struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
- if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
+ if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
rt != net->ipv6.fib6_null_entry &&
ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
spin_lock_bh(&rt6_exception_lock);
@@ -3788,7 +3882,7 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
fib6_clean_all(net, fib6_remove_prefsrc, &adni);
}
-#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
+#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT)
/* Remove routers and update dst entries when gateway turn into host. */
static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
@@ -3796,7 +3890,8 @@ static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
struct in6_addr *gateway = (struct in6_addr *)arg;
if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
- ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
+ rt->fib6_nh.fib_nh_gw_family &&
+ ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
return -1;
}
@@ -3817,7 +3912,7 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
struct arg_netdev_event {
const struct net_device *dev;
union {
- unsigned int nh_flags;
+ unsigned char nh_flags;
unsigned long event;
};
};
@@ -3844,9 +3939,9 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
static bool rt6_is_dead(const struct fib6_info *rt)
{
- if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
- (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
- fib6_ignore_linkdown(rt)))
+ if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
+ (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
+ ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
return true;
return false;
@@ -3858,11 +3953,11 @@ static int rt6_multipath_total_weight(const struct fib6_info *rt)
int total = 0;
if (!rt6_is_dead(rt))
- total += rt->fib6_nh.nh_weight;
+ total += rt->fib6_nh.fib_nh_weight;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
if (!rt6_is_dead(iter))
- total += iter->fib6_nh.nh_weight;
+ total += iter->fib6_nh.fib_nh_weight;
}
return total;
@@ -3873,11 +3968,11 @@ static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
int upper_bound = -1;
if (!rt6_is_dead(rt)) {
- *weight += rt->fib6_nh.nh_weight;
+ *weight += rt->fib6_nh.fib_nh_weight;
upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
total) - 1;
}
- atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
+ atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
}
static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
@@ -3920,8 +4015,9 @@ static int fib6_ifup(struct fib6_info *rt, void *p_arg)
const struct arg_netdev_event *arg = p_arg;
struct net *net = dev_net(arg->dev);
- if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
- rt->fib6_nh.nh_flags &= ~arg->nh_flags;
+ if (rt != net->ipv6.fib6_null_entry &&
+ rt->fib6_nh.fib_nh_dev == arg->dev) {
+ rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
fib6_update_sernum_upto_root(net, rt);
rt6_multipath_rebalance(rt);
}
@@ -3929,7 +4025,7 @@ static int fib6_ifup(struct fib6_info *rt, void *p_arg)
return 0;
}
-void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
+void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
{
struct arg_netdev_event arg = {
.dev = dev,
@@ -3949,10 +4045,10 @@ static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
{
struct fib6_info *iter;
- if (rt->fib6_nh.nh_dev == dev)
+ if (rt->fib6_nh.fib_nh_dev == dev)
return true;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
- if (iter->fib6_nh.nh_dev == dev)
+ if (iter->fib6_nh.fib_nh_dev == dev)
return true;
return false;
@@ -3973,12 +4069,12 @@ static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
struct fib6_info *iter;
unsigned int dead = 0;
- if (rt->fib6_nh.nh_dev == down_dev ||
- rt->fib6_nh.nh_flags & RTNH_F_DEAD)
+ if (rt->fib6_nh.fib_nh_dev == down_dev ||
+ rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
dead++;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
- if (iter->fib6_nh.nh_dev == down_dev ||
- iter->fib6_nh.nh_flags & RTNH_F_DEAD)
+ if (iter->fib6_nh.fib_nh_dev == down_dev ||
+ iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
dead++;
return dead;
@@ -3986,15 +4082,15 @@ static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
const struct net_device *dev,
- unsigned int nh_flags)
+ unsigned char nh_flags)
{
struct fib6_info *iter;
- if (rt->fib6_nh.nh_dev == dev)
- rt->fib6_nh.nh_flags |= nh_flags;
+ if (rt->fib6_nh.fib_nh_dev == dev)
+ rt->fib6_nh.fib_nh_flags |= nh_flags;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
- if (iter->fib6_nh.nh_dev == dev)
- iter->fib6_nh.nh_flags |= nh_flags;
+ if (iter->fib6_nh.fib_nh_dev == dev)
+ iter->fib6_nh.fib_nh_flags |= nh_flags;
}
/* called with write lock held for table with rt */
@@ -4009,12 +4105,12 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
switch (arg->event) {
case NETDEV_UNREGISTER:
- return rt->fib6_nh.nh_dev == dev ? -1 : 0;
+ return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
case NETDEV_DOWN:
if (rt->should_flush)
return -1;
if (!rt->fib6_nsiblings)
- return rt->fib6_nh.nh_dev == dev ? -1 : 0;
+ return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
if (rt6_multipath_uses_dev(rt, dev)) {
unsigned int count;
@@ -4030,10 +4126,10 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
}
return -2;
case NETDEV_CHANGE:
- if (rt->fib6_nh.nh_dev != dev ||
+ if (rt->fib6_nh.fib_nh_dev != dev ||
rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
break;
- rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
+ rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
rt6_multipath_rebalance(rt);
break;
}
@@ -4089,7 +4185,7 @@ static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
Since RFC 1981 doesn't include administrative MTU increase
update PMTU increase is a MUST. (i.e. jumbo frame)
*/
- if (rt->fib6_nh.nh_dev == arg->dev &&
+ if (rt->fib6_nh.fib_nh_dev == arg->dev &&
!fib6_metric_locked(rt, RTAX_MTU)) {
u32 mtu = rt->fib6_pmtu;
@@ -4380,7 +4476,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
goto cleanup;
}
- rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
+ rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
rt, &r_cfg);
@@ -4530,6 +4626,9 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ if (cfg.fc_metric == 0)
+ cfg.fc_metric = IP6_RT_PRIO_USER;
+
if (cfg.fc_mp)
return ip6_route_multipath_add(&cfg, extack);
else
@@ -4544,7 +4643,7 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
+ NLA_ALIGN(sizeof(struct rtnexthop))
+ nla_total_size(16) /* RTA_GATEWAY */
- + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
+ + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
nexthop_len *= rt->fib6_nsiblings;
}
@@ -4562,77 +4661,10 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
+ nla_total_size(sizeof(struct rta_cacheinfo))
+ nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
+ nla_total_size(1) /* RTA_PREF */
- + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
+ + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
+ nexthop_len;
}
-static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
- unsigned int *flags, bool skip_oif)
-{
- if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
- *flags |= RTNH_F_DEAD;
-
- if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
- *flags |= RTNH_F_LINKDOWN;
-
- rcu_read_lock();
- if (fib6_ignore_linkdown(rt))
- *flags |= RTNH_F_DEAD;
- rcu_read_unlock();
- }
-
- if (rt->fib6_flags & RTF_GATEWAY) {
- if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
- goto nla_put_failure;
- }
-
- *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
- if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
- *flags |= RTNH_F_OFFLOAD;
-
- /* not needed for multipath encoding b/c it has a rtnexthop struct */
- if (!skip_oif && rt->fib6_nh.nh_dev &&
- nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
- goto nla_put_failure;
-
- if (rt->fib6_nh.nh_lwtstate &&
- lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
- goto nla_put_failure;
-
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-
-/* add multipath next hop */
-static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
-{
- const struct net_device *dev = rt->fib6_nh.nh_dev;
- struct rtnexthop *rtnh;
- unsigned int flags = 0;
-
- rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
- if (!rtnh)
- goto nla_put_failure;
-
- rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
- rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
-
- if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
- goto nla_put_failure;
-
- rtnh->rtnh_flags = flags;
-
- /* length of rtnetlink header + attributes */
- rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
-
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-
static int rt6_fill_node(struct net *net, struct sk_buff *skb,
struct fib6_info *rt, struct dst_entry *dst,
struct in6_addr *dest, struct in6_addr *src,
@@ -4749,19 +4781,26 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
if (!mp)
goto nla_put_failure;
- if (rt6_add_nexthop(skb, rt) < 0)
+ if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common,
+ rt->fib6_nh.fib_nh_weight) < 0)
goto nla_put_failure;
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings) {
- if (rt6_add_nexthop(skb, sibling) < 0)
+ if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common,
+ sibling->fib6_nh.fib_nh_weight) < 0)
goto nla_put_failure;
}
nla_nest_end(skb, mp);
} else {
- if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
+ unsigned char nh_flags = 0;
+
+ if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
+ &nh_flags, false) < 0)
goto nla_put_failure;
+
+ rtm->rtm_flags |= nh_flags;
}
if (rt6_flags & RTF_EXPIRES) {
@@ -4787,7 +4826,7 @@ nla_put_failure:
static bool fib6_info_uses_dev(const struct fib6_info *f6i,
const struct net_device *dev)
{
- if (f6i->fib6_nh.nh_dev == dev)
+ if (f6i->fib6_nh.fib_nh_dev == dev)
return true;
if (f6i->fib6_nsiblings) {
@@ -4795,7 +4834,7 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i,
list_for_each_entry_safe(sibling, next_sibling,
&f6i->fib6_siblings, fib6_siblings) {
- if (sibling->fib6_nh.nh_dev == dev)
+ if (sibling->fib6_nh.fib_nh_dev == dev)
return true;
}
}
@@ -5080,7 +5119,7 @@ static int ip6_route_dev_notify(struct notifier_block *this,
return NOTIFY_OK;
if (event == NETDEV_REGISTER) {
- net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
+ net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
net->ipv6.ip6_null_entry->dst.dev = dev;
net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -5415,7 +5454,7 @@ void __init ip6_route_init_special_entries(void)
/* Registering of the loopback is done before this portion of code,
* the loopback reference in rt6_info will not be taken, do it
* manually for init_net */
- init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
+ init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 9b2f272ca164..ceff773471e7 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -399,7 +399,6 @@ static const struct genl_ops seg6_genl_ops[] = {
{
.cmd = SEG6_CMD_SETHMAC,
.doit = seg6_genl_sethmac,
- .policy = seg6_genl_policy,
.flags = GENL_ADMIN_PERM,
},
{
@@ -407,19 +406,16 @@ static const struct genl_ops seg6_genl_ops[] = {
.start = seg6_genl_dumphmac_start,
.dumpit = seg6_genl_dumphmac,
.done = seg6_genl_dumphmac_done,
- .policy = seg6_genl_policy,
.flags = GENL_ADMIN_PERM,
},
{
.cmd = SEG6_CMD_SET_TUNSRC,
.doit = seg6_genl_set_tunsrc,
- .policy = seg6_genl_policy,
.flags = GENL_ADMIN_PERM,
},
{
.cmd = SEG6_CMD_GET_TUNSRC,
.doit = seg6_genl_get_tunsrc,
- .policy = seg6_genl_policy,
.flags = GENL_ADMIN_PERM,
},
};
@@ -429,6 +425,7 @@ static struct genl_family seg6_genl_family __ro_after_init = {
.name = SEG6_GENL_NAME,
.version = SEG6_GENL_VERSION,
.maxattr = SEG6_ATTR_MAX,
+ .policy = seg6_genl_policy,
.netnsok = true,
.parallel_ops = true,
.ops = seg6_genl_ops,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 44d431849d39..82018bdce863 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -90,6 +90,18 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
}
#endif
+/* Helper returning the inet6 address from a given tcp socket.
+ * It can be used in TCP stack instead of inet6_sk(sk).
+ * This avoids a dereference and allow compiler optimizations.
+ * It is a specialized version of inet6_sk_generic().
+ */
+static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
+{
+ unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
+
+ return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
+}
+
static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -99,7 +111,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
sk->sk_rx_dst = dst;
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
- inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
+ tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
}
}
@@ -138,7 +150,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
struct inet_sock *inet = inet_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
struct ipv6_txoptions *opt;
@@ -390,7 +402,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (sk->sk_state == TCP_CLOSE)
goto out;
- if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
+ if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
goto out;
}
@@ -405,7 +417,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
goto out;
}
- np = inet6_sk(sk);
+ np = tcp_inet6_sk(sk);
if (type == NDISC_REDIRECT) {
if (!sock_owned_by_user(sk)) {
@@ -478,7 +490,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
enum tcp_synack_type synack_type)
{
struct inet_request_sock *ireq = inet_rsk(req);
- struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct ipv6_txoptions *opt;
struct flowi6 *fl6 = &fl->u.ip6;
struct sk_buff *skb;
@@ -737,7 +749,7 @@ static void tcp_v6_init_req(struct request_sock *req,
{
bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
struct inet_request_sock *ireq = inet_rsk(req);
- const struct ipv6_pinfo *np = inet6_sk(sk_listener);
+ const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
@@ -1066,9 +1078,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
{
struct inet_request_sock *ireq;
struct ipv6_pinfo *newnp;
- const struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct ipv6_txoptions *opt;
- struct tcp6_sock *newtcp6sk;
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
@@ -1088,11 +1099,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
if (!newsk)
return NULL;
- newtcp6sk = (struct tcp6_sock *)newsk;
- inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
+ inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
newinet = inet_sk(newsk);
- newnp = inet6_sk(newsk);
+ newnp = tcp_inet6_sk(newsk);
newtp = tcp_sk(newsk);
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
@@ -1156,12 +1166,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
ip6_dst_store(newsk, dst, NULL, NULL);
inet6_sk_rx_dst_set(newsk, skb);
- newtcp6sk = (struct tcp6_sock *)newsk;
- inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
+ inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
newtp = tcp_sk(newsk);
newinet = inet_sk(newsk);
- newnp = inet6_sk(newsk);
+ newnp = tcp_inet6_sk(newsk);
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
@@ -1276,9 +1285,9 @@ out:
*/
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct tcp_sock *tp;
+ struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct sk_buff *opt_skb = NULL;
+ struct tcp_sock *tp;
/* Imagine: socket is IPv6. IPv4 packet arrives,
goes to IPv4 receive handler and backlogged.
@@ -1428,6 +1437,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
static int tcp_v6_rcv(struct sk_buff *skb)
{
+ struct sk_buff *skb_to_free;
int sdif = inet6_sdif(skb);
const struct tcphdr *th;
const struct ipv6hdr *hdr;
@@ -1524,7 +1534,7 @@ process:
return 0;
}
}
- if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
+ if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
goto discard_and_relse;
}
@@ -1554,12 +1564,17 @@ process:
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
if (!sock_owned_by_user(sk)) {
+ skb_to_free = sk->sk_rx_skb_cache;
+ sk->sk_rx_skb_cache = NULL;
ret = tcp_v6_do_rcv(sk, skb);
- } else if (tcp_add_backlog(sk, skb)) {
- goto discard_and_relse;
+ } else {
+ if (tcp_add_backlog(sk, skb))
+ goto discard_and_relse;
+ skb_to_free = NULL;
}
bh_unlock_sock(sk);
-
+ if (skb_to_free)
+ __kfree_skb(skb_to_free);
put_and_return:
if (refcounted)
sock_put(sk);
@@ -1669,7 +1684,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
if (dst)
- dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
+ dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
if (dst &&
inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
skb_dst_set_noref(skb, dst);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 622eeaf5732b..2464fba569b4 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -285,8 +285,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
unsigned int ulen, copied;
- int peeked, peeking, off;
- int err;
+ int off, err, peeking = flags & MSG_PEEK;
int is_udplite = IS_UDPLITE(sk);
struct udp_mib __percpu *mib;
bool checksum_valid = false;
@@ -299,9 +298,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
try_again:
- peeking = flags & MSG_PEEK;
off = sk_peek_offset(sk, flags);
- skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
+ skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
if (!skb)
return err;
@@ -340,14 +338,14 @@ try_again:
goto csum_copy_err;
}
if (unlikely(err)) {
- if (!peeked) {
+ if (!peeking) {
atomic_inc(&sk->sk_drops);
SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
}
kfree_skb(skb);
return err;
}
- if (!peeked)
+ if (!peeking)
SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS);
sock_recv_ts_and_drops(msg, sk, skb);