31 files changed, 1155 insertions, 860 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index eab3ebde981e..5183a2daba64 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -136,6 +136,10 @@ void inet_sock_destruct(struct sock *sk)
 	struct inet_sock *inet = inet_sk(sk);
 
 	__skb_queue_purge(&sk->sk_receive_queue);
+	if (sk->sk_rx_skb_cache) {
+		__kfree_skb(sk->sk_rx_skb_cache);
+		sk->sk_rx_skb_cache = NULL;
+	}
 	__skb_queue_purge(&sk->sk_error_queue);
 
 	sk_mem_reclaim(sk);
@@ -156,7 +160,7 @@ void inet_sock_destruct(struct sock *sk)
 	WARN_ON(sk->sk_forward_alloc);
 
 	kfree(rcu_dereference_protected(inet->inet_opt, 1));
-	dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
+	dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
 	dst_release(sk->sk_rx_dst);
 	sk_refcnt_debug_dec(sk);
 }
@@ -911,12 +915,6 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	struct rtentry rt;
 
 	switch (cmd) {
-	case SIOCGSTAMP:
-		err = sock_get_timestamp(sk, (struct timeval __user *)arg);
-		break;
-	case SIOCGSTAMPNS:
-		err = sock_get_timestampns(sk, (struct timespec __user *)arg);
-		break;
 	case SIOCADDRT:
 	case SIOCDELRT:
 		if (copy_from_user(&rt, p, sizeof(struct rtentry)))
@@ -988,6 +986,7 @@ const struct proto_ops inet_stream_ops = {
 	.getname	   = inet_getname,
 	.poll		   = tcp_poll,
 	.ioctl		   = inet_ioctl,
+	.gettstamp	   = sock_gettstamp,
 	.listen		   = inet_listen,
 	.shutdown	   = inet_shutdown,
 	.setsockopt	   = sock_common_setsockopt,
@@ -1023,6 +1022,7 @@ const struct proto_ops inet_dgram_ops = {
 	.getname	   = inet_getname,
 	.poll		   = udp_poll,
 	.ioctl		   = inet_ioctl,
+	.gettstamp	   = sock_gettstamp,
 	.listen		   = sock_no_listen,
 	.shutdown	   = inet_shutdown,
 	.setsockopt	   = sock_common_setsockopt,
@@ -1055,6 +1055,7 @@ static const struct proto_ops inet_sockraw_ops = {
 	.getname	   = inet_getname,
 	.poll		   = datagram_poll,
 	.ioctl		   = inet_ioctl,
+	.gettstamp	   = sock_gettstamp,
 	.listen		   = sock_no_listen,
 	.shutdown	   = inet_shutdown,
 	.setsockopt	   = sock_common_setsockopt,
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
index 1e976bb93d99..15427163a041 100644
--- a/net/ipv4/bpfilter/sockopt.c
+++ b/net/ipv4/bpfilter/sockopt.c
@@ -77,5 +77,4 @@ static int __init bpfilter_sockopt_init(void)
 
 	return 0;
 }
-
-module_init(bpfilter_sockopt_init);
+device_initcall(bpfilter_sockopt_init);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index eb514f312e6f..701c5d113a34 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -621,8 +621,8 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	ASSERT_RTNL();
 
-	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+				     ifa_ipv4_policy, extack);
 	if (err < 0)
 		goto errout;
 
@@ -793,8 +793,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
 	struct in_device *in_dev;
 	int err;
 
-	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
-			  extack);
+	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+				     ifa_ipv4_policy, extack);
 	if (err < 0)
 		goto errout;
 
@@ -1689,8 +1689,8 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
 		fillargs->flags |= NLM_F_DUMP_FILTERED;
 	}
 
-	err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
-				 ifa_ipv4_policy, extack);
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
+					    ifa_ipv4_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1906,7 +1906,8 @@ static int inet_validate_link_af(const struct net_device *dev,
 	if (dev && !__in_dev_get_rcu(dev))
 		return -EAFNOSUPPORT;
 
-	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
+	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
+					  inet_af_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -1934,7 +1935,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
 	if (!in_dev)
 		return -EAFNOSUPPORT;
 
-	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
+	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
 		BUG();
 
 	if (tb[IFLA_INET_CONF]) {
@@ -2076,11 +2077,13 @@ static int inet_netconf_valid_get_req(struct sk_buff *skb,
 	}
 
 	if (!netlink_strict_get_check(skb))
-		return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb,
-				   NETCONFA_MAX, devconf_ipv4_policy, extack);
+		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
+					      tb, NETCONFA_MAX,
+					      devconf_ipv4_policy, extack);
 
-	err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb,
-				 NETCONFA_MAX, devconf_ipv4_policy, extack);
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
+					    tb, NETCONFA_MAX,
+					    devconf_ipv4_policy, extack);
 	if (err)
 		return err;
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ed14ec245584..b298255f6fdb 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -307,7 +307,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
 			.flowi4_mark = vmark ? skb->mark : 0,
 		};
 		if (!fib_lookup(net, &fl4, &res, 0))
-			return FIB_RES_PREFSRC(net, res);
+			return fib_result_prefsrc(net, &res);
 	} else {
 		scope = RT_SCOPE_LINK;
 	}
@@ -324,16 +324,16 @@ bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev)
 	for (ret = 0; ret < fi->fib_nhs; ret++) {
 		struct fib_nh *nh = &fi->fib_nh[ret];
 
-		if (nh->nh_dev == dev) {
+		if (nh->fib_nh_dev == dev) {
 			dev_match = true;
 			break;
-		} else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
+		} else if (l3mdev_master_ifindex_rcu(nh->fib_nh_dev) == dev->ifindex) {
 			dev_match = true;
 			break;
 		}
 	}
 #else
-	if (fi->fib_nh[0].nh_dev == dev)
+	if (fi->fib_nh[0].fib_nh_dev == dev)
 		dev_match = true;
 #endif
 
@@ -390,7 +390,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 
 	dev_match = fib_info_nh_uses_dev(res.fi, dev);
 	if (dev_match) {
-		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
+		ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST;
 		return ret;
 	}
 	if (no_addr)
@@ -402,7 +402,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 	ret = 0;
 	if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) {
 		if (res.type == RTN_UNICAST)
-			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
+			ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST;
 	}
 	return ret;
 
@@ -558,7 +558,8 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
 	if (rt->rt_gateway.sa_family == AF_INET && addr) {
 		unsigned int addr_type;
 
-		cfg->fc_gw = addr;
+		cfg->fc_gw4 = addr;
+		cfg->fc_gw_family = AF_INET;
 		addr_type = inet_addr_type_table(net, addr, cfg->fc_table);
 		if (rt->rt_flags & RTF_GATEWAY &&
 		    addr_type == RTN_UNICAST)
@@ -568,7 +569,7 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
 	if (cmd == SIOCDELRT)
 		return 0;
 
-	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
+	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw_family)
 		return -EINVAL;
 
 	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
@@ -664,16 +665,61 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
 	[RTA_DPORT]		= { .type = NLA_U16 },
 };
 
+int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
+		    struct netlink_ext_ack *extack)
+{
+	struct rtvia *via;
+	int alen;
+
+	if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) {
+		NL_SET_ERR_MSG(extack, "Invalid attribute length for RTA_VIA");
+		return -EINVAL;
+	}
+
+	via = nla_data(nla);
+	alen = nla_len(nla) - offsetof(struct rtvia, rtvia_addr);
+
+	switch (via->rtvia_family) {
+	case AF_INET:
+		if (alen != sizeof(__be32)) {
+			NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_VIA");
+			return -EINVAL;
+		}
+		cfg->fc_gw_family = AF_INET;
+		cfg->fc_gw4 = *((__be32 *)via->rtvia_addr);
+		break;
+	case AF_INET6:
+#ifdef CONFIG_IPV6
+		if (alen != sizeof(struct in6_addr)) {
+			NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_VIA");
+			return -EINVAL;
+		}
+		cfg->fc_gw_family = AF_INET6;
+		cfg->fc_gw6 = *((struct in6_addr *)via->rtvia_addr);
+#else
+		NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel");
+		return -EINVAL;
+#endif
+		break;
+	default:
+		NL_SET_ERR_MSG(extack, "Unsupported address family in RTA_VIA");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 			     struct nlmsghdr *nlh, struct fib_config *cfg,
 			     struct netlink_ext_ack *extack)
 {
+	bool has_gw = false, has_via = false;
 	struct nlattr *attr;
 	int err, remaining;
 	struct rtmsg *rtm;
 
-	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy,
-			     extack);
+	err = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX,
+					rtm_ipv4_policy, extack);
 	if (err < 0)
 		goto errout;
 
@@ -708,12 +754,17 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 			cfg->fc_oif = nla_get_u32(attr);
 			break;
 		case RTA_GATEWAY:
-			cfg->fc_gw = nla_get_be32(attr);
+			has_gw = true;
+			cfg->fc_gw4 = nla_get_be32(attr);
+			if (cfg->fc_gw4)
+				cfg->fc_gw_family = AF_INET;
 			break;
 		case RTA_VIA:
-			NL_SET_ERR_MSG(extack, "IPv4 does not support RTA_VIA attribute");
-			err = -EINVAL;
-			goto errout;
+			has_via = true;
+			err = fib_gw_from_via(cfg, attr, extack);
+			if (err)
+				goto errout;
+			break;
 		case RTA_PRIORITY:
 			cfg->fc_priority = nla_get_u32(attr);
 			break;
@@ -752,6 +803,12 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 		}
 	}
 
+	if (has_gw && has_via) {
+		NL_SET_ERR_MSG(extack,
+			       "Nexthop configuration can not contain both GATEWAY and VIA");
+		goto errout;
+	}
+
 	return 0;
 errout:
 	return err;
@@ -839,8 +896,8 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
 	filter->rt_type  = rtm->rtm_type;
 	filter->table_id = rtm->rtm_table;
 
-	err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
-				 rtm_ipv4_policy, extack);
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
+					    rtm_ipv4_policy, extack);
 	if (err < 0)
 		return err;
 
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index e6ff282bb7f4..7945f0534db7 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -45,6 +45,7 @@ static inline void fib_result_assign(struct fib_result *res,
 {
 	/* we used to play games with refcounts, but we now use RCU */
 	res->fi = fi;
+	res->nhc = fib_info_nhc(fi, 0);
 }
 
 struct fib_prop {
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 8e185b5a2bf6..71c2165a2ce3 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -41,10 +41,12 @@
 #include <net/tcp.h>
 #include <net/sock.h>
 #include <net/ip_fib.h>
+#include <net/ip6_fib.h>
 #include <net/netlink.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
 #include <net/lwtunnel.h>
 #include <net/fib_notifier.h>
+#include <net/addrconf.h>
 
 #include "fib_lookup.h"
 
@@ -204,18 +206,34 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
 	free_percpu(rtp);
 }
 
+void fib_nh_common_release(struct fib_nh_common *nhc)
+{
+	if (nhc->nhc_dev)
+		dev_put(nhc->nhc_dev);
+
+	lwtstate_put(nhc->nhc_lwtstate);
+}
+EXPORT_SYMBOL_GPL(fib_nh_common_release);
+
+void fib_nh_release(struct net *net, struct fib_nh *fib_nh)
+{
+#ifdef CONFIG_IP_ROUTE_CLASSID
+	if (fib_nh->nh_tclassid)
+		net->ipv4.fib_num_tclassid_users--;
+#endif
+	fib_nh_common_release(&fib_nh->nh_common);
+	free_nh_exceptions(fib_nh);
+	rt_fibinfo_free_cpus(fib_nh->nh_pcpu_rth_output);
+	rt_fibinfo_free(&fib_nh->nh_rth_input);
+}
+
 /* Release a nexthop info record */
 static void free_fib_info_rcu(struct rcu_head *head)
 {
 	struct fib_info *fi = container_of(head, struct fib_info, rcu);
 
 	change_nexthops(fi) {
-		if (nexthop_nh->nh_dev)
-			dev_put(nexthop_nh->nh_dev);
-		lwtstate_put(nexthop_nh->nh_lwtstate);
-		free_nh_exceptions(nexthop_nh);
-		rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
-		rt_fibinfo_free(&nexthop_nh->nh_rth_input);
+		fib_nh_release(fi->fib_net, nexthop_nh);
 	} endfor_nexthops(fi);
 
 	ip_fib_metrics_put(fi->fib_metrics);
@@ -230,12 +248,7 @@ void free_fib_info(struct fib_info *fi)
 		return;
 	}
 	fib_info_cnt--;
-#ifdef CONFIG_IP_ROUTE_CLASSID
-	change_nexthops(fi) {
-		if (nexthop_nh->nh_tclassid)
-			fi->fib_net->ipv4.fib_num_tclassid_users--;
-	} endfor_nexthops(fi);
-#endif
+
 	call_rcu(&fi->rcu, free_fib_info_rcu);
 }
 EXPORT_SYMBOL_GPL(free_fib_info);
@@ -248,7 +261,7 @@ void fib_release_info(struct fib_info *fi)
 		if (fi->fib_prefsrc)
 			hlist_del(&fi->fib_lhash);
 		change_nexthops(fi) {
-			if (!nexthop_nh->nh_dev)
+			if (!nexthop_nh->fib_nh_dev)
 				continue;
 			hlist_del(&nexthop_nh->nh_hash);
 		} endfor_nexthops(fi)
@@ -263,18 +276,27 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
 	const struct fib_nh *onh = ofi->fib_nh;
 
 	for_nexthops(fi) {
-		if (nh->nh_oif != onh->nh_oif ||
-		    nh->nh_gw  != onh->nh_gw ||
-		    nh->nh_scope != onh->nh_scope ||
+		if (nh->fib_nh_oif != onh->fib_nh_oif ||
+		    nh->fib_nh_gw_family != onh->fib_nh_gw_family ||
+		    nh->fib_nh_scope != onh->fib_nh_scope ||
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-		    nh->nh_weight != onh->nh_weight ||
+		    nh->fib_nh_weight != onh->fib_nh_weight ||
 #endif
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		    nh->nh_tclassid != onh->nh_tclassid ||
 #endif
-		    lwtunnel_cmp_encap(nh->nh_lwtstate, onh->nh_lwtstate) ||
-		    ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK))
+		    lwtunnel_cmp_encap(nh->fib_nh_lws, onh->fib_nh_lws) ||
+		    ((nh->fib_nh_flags ^ onh->fib_nh_flags) & ~RTNH_COMPARE_MASK))
+			return -1;
+
+		if (nh->fib_nh_gw_family == AF_INET &&
+		    nh->fib_nh_gw4 != onh->fib_nh_gw4)
 			return -1;
+
+		if (nh->fib_nh_gw_family == AF_INET6 &&
+		    ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6))
+			return -1;
+
 		onh++;
 	} endfor_nexthops(fi);
 	return 0;
@@ -298,7 +320,7 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
 	val ^= (__force u32)fi->fib_prefsrc;
 	val ^= fi->fib_priority;
 	for_nexthops(fi) {
-		val ^= fib_devindex_hashfn(nh->nh_oif);
+		val ^= fib_devindex_hashfn(nh->fib_nh_oif);
 	} endfor_nexthops(fi)
 
 	return (val ^ (val >> 7) ^ (val >> 12)) & mask;
@@ -347,9 +369,9 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
 	hash = fib_devindex_hashfn(dev->ifindex);
 	head = &fib_info_devhash[hash];
 	hlist_for_each_entry(nh, head, nh_hash) {
-		if (nh->nh_dev == dev &&
-		    nh->nh_gw == gw &&
-		    !(nh->nh_flags & RTNH_F_DEAD)) {
+		if (nh->fib_nh_dev == dev &&
+		    nh->fib_nh_gw4 == gw &&
+		    !(nh->fib_nh_flags & RTNH_F_DEAD)) {
 			spin_unlock(&fib_info_lock);
 			return 0;
 		}
@@ -384,10 +406,10 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
 
 		/* grab encap info */
 		for_nexthops(fi) {
-			if (nh->nh_lwtstate) {
+			if (nh->fib_nh_lws) {
 				/* RTA_ENCAP_TYPE */
 				nh_encapsize += lwtunnel_get_encap_size(
-						nh->nh_lwtstate);
+						nh->fib_nh_lws);
 				/* RTA_ENCAP */
 				nh_encapsize +=  nla_total_size(2);
 			}
@@ -435,10 +457,18 @@ static int fib_detect_death(struct fib_info *fi, int order,
 			    struct fib_info **last_resort, int *last_idx,
 			    int dflt)
 {
+	const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
 	struct neighbour *n;
 	int state = NUD_NONE;
 
-	n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
+	if (likely(nhc->nhc_gw_family == AF_INET))
+		n = neigh_lookup(&arp_tbl, &nhc->nhc_gw.ipv4, nhc->nhc_dev);
+	else if (nhc->nhc_gw_family == AF_INET6)
+		n = neigh_lookup(ipv6_stub->nd_tbl, &nhc->nhc_gw.ipv6,
+				 nhc->nhc_dev);
+	else
+		n = NULL;
+
 	if (n) {
 		state = n->nud_state;
 		neigh_release(n);
@@ -457,6 +487,73 @@ static int fib_detect_death(struct fib_info *fi, int order,
 	return 1;
 }
 
+int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap,
+		       u16 encap_type, void *cfg, gfp_t gfp_flags,
+		       struct netlink_ext_ack *extack)
+{
+	if (encap) {
+		struct lwtunnel_state *lwtstate;
+		int err;
+
+		if (encap_type == LWTUNNEL_ENCAP_NONE) {
+			NL_SET_ERR_MSG(extack, "LWT encap type not specified");
+			return -EINVAL;
+		}
+		err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family,
+					   cfg, &lwtstate, extack);
+		if (err)
+			return err;
+
+		nhc->nhc_lwtstate = lwtstate_get(lwtstate);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fib_nh_common_init);
+
+int fib_nh_init(struct net *net, struct fib_nh *nh,
+		struct fib_config *cfg, int nh_weight,
+		struct netlink_ext_ack *extack)
+{
+	int err = -ENOMEM;
+
+	nh->fib_nh_family = AF_INET;
+
+	nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
+	if (!nh->nh_pcpu_rth_output)
+		goto err_out;
+
+	err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap,
+				 cfg->fc_encap_type, cfg, GFP_KERNEL, extack);
+	if (err)
+		goto init_failure;
+
+	nh->fib_nh_oif = cfg->fc_oif;
+	nh->fib_nh_gw_family = cfg->fc_gw_family;
+	if (cfg->fc_gw_family == AF_INET)
+		nh->fib_nh_gw4 = cfg->fc_gw4;
+	else if (cfg->fc_gw_family == AF_INET6)
+		nh->fib_nh_gw6 = cfg->fc_gw6;
+
+	nh->fib_nh_flags = cfg->fc_flags;
+
+#ifdef CONFIG_IP_ROUTE_CLASSID
+	nh->nh_tclassid = cfg->fc_flow;
+	if (nh->nh_tclassid)
+		net->ipv4.fib_num_tclassid_users++;
+#endif
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	nh->fib_nh_weight = nh_weight;
+#endif
+	return 0;
+
+init_failure:
+	rt_fibinfo_free_cpus(nh->nh_pcpu_rth_output);
+	nh->nh_pcpu_rth_output = NULL;
+err_out:
+	return err;
+}
+
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 
 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
@@ -483,11 +580,15 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 		       int remaining, struct fib_config *cfg,
 		       struct netlink_ext_ack *extack)
 {
+	struct net *net = fi->fib_net;
+	struct fib_config fib_cfg;
 	int ret;
 
 	change_nexthops(fi) {
 		int attrlen;
 
+		memset(&fib_cfg, 0, sizeof(fib_cfg));
+
 		if (!rtnh_ok(rtnh, remaining)) {
 			NL_SET_ERR_MSG(extack,
 				       "Invalid nexthop configuration - extra data after nexthop");
@@ -500,56 +601,73 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 			return -EINVAL;
 		}
 
-		nexthop_nh->nh_flags =
-			(cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
-		nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
-		nexthop_nh->nh_weight = rtnh->rtnh_hops + 1;
+		fib_cfg.fc_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
+		fib_cfg.fc_oif = rtnh->rtnh_ifindex;
 
 		attrlen = rtnh_attrlen(rtnh);
 		if (attrlen > 0) {
-			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+			struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);
 
 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
-			nexthop_nh->nh_gw = nla ? nla_get_in_addr(nla) : 0;
-#ifdef CONFIG_IP_ROUTE_CLASSID
-			nla = nla_find(attrs, attrlen, RTA_FLOW);
-			nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
-			if (nexthop_nh->nh_tclassid)
-				fi->fib_net->ipv4.fib_num_tclassid_users++;
-#endif
-			nla = nla_find(attrs, attrlen, RTA_ENCAP);
+			nlav = nla_find(attrs, attrlen, RTA_VIA);
+			if (nla && nlav) {
+				NL_SET_ERR_MSG(extack,
+					       "Nexthop configuration can not contain both GATEWAY and VIA");
+				return -EINVAL;
+			}
 			if (nla) {
-				struct lwtunnel_state *lwtstate;
-				struct nlattr *nla_entype;
-
-				nla_entype = nla_find(attrs, attrlen,
-						      RTA_ENCAP_TYPE);
-				if (!nla_entype) {
-					NL_SET_BAD_ATTR(extack, nla);
-					NL_SET_ERR_MSG(extack,
-						       "Encap type is missing");
-					goto err_inval;
-				}
-
-				ret = lwtunnel_build_state(nla_get_u16(
-							   nla_entype),
-							   nla,  AF_INET, cfg,
-							   &lwtstate, extack);
+				fib_cfg.fc_gw4 = nla_get_in_addr(nla);
+				if (fib_cfg.fc_gw4)
+					fib_cfg.fc_gw_family = AF_INET;
+			} else if (nlav) {
+				ret = fib_gw_from_via(&fib_cfg, nlav, extack);
 				if (ret)
 					goto errout;
-				nexthop_nh->nh_lwtstate =
-					lwtstate_get(lwtstate);
 			}
+
+			nla = nla_find(attrs, attrlen, RTA_FLOW);
+			if (nla)
+				fib_cfg.fc_flow = nla_get_u32(nla);
+
+			fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
+			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
+			if (nla)
+				fib_cfg.fc_encap_type = nla_get_u16(nla);
 		}
 
+		ret = fib_nh_init(net, nexthop_nh, &fib_cfg,
+				  rtnh->rtnh_hops + 1, extack);
+		if (ret)
+			goto errout;
+
 		rtnh = rtnh_next(rtnh, &remaining);
 	} endfor_nexthops(fi);
 
-	return 0;
-
-err_inval:
 	ret = -EINVAL;
-
+	if (cfg->fc_oif && fi->fib_nh->fib_nh_oif != cfg->fc_oif) {
+		NL_SET_ERR_MSG(extack,
+			       "Nexthop device index does not match RTA_OIF");
+		goto errout;
+	}
+	if (cfg->fc_gw_family) {
+		if (cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family ||
+		    (cfg->fc_gw_family == AF_INET &&
+		     fi->fib_nh->fib_nh_gw4 != cfg->fc_gw4) ||
+		    (cfg->fc_gw_family == AF_INET6 &&
+		     ipv6_addr_cmp(&fi->fib_nh->fib_nh_gw6, &cfg->fc_gw6))) {
+			NL_SET_ERR_MSG(extack,
+				       "Nexthop gateway does not match RTA_GATEWAY or RTA_VIA");
+			goto errout;
+		}
+	}
+#ifdef CONFIG_IP_ROUTE_CLASSID
+	if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
+		NL_SET_ERR_MSG(extack,
+			       "Nexthop class id does not match RTA_FLOW");
+		goto errout;
+	}
+#endif
+	ret = 0;
 errout:
 	return ret;
 }
@@ -558,49 +676,51 @@ static void fib_rebalance(struct fib_info *fi)
 {
 	int total;
 	int w;
-	struct in_device *in_dev;
 
 	if (fi->fib_nhs < 2)
 		return;
 
 	total = 0;
 	for_nexthops(fi) {
-		if (nh->nh_flags & RTNH_F_DEAD)
+		if (nh->fib_nh_flags & RTNH_F_DEAD)
 			continue;
 
-		in_dev = __in_dev_get_rtnl(nh->nh_dev);
-
-		if (in_dev &&
-		    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
-		    nh->nh_flags & RTNH_F_LINKDOWN)
+		if (ip_ignore_linkdown(nh->fib_nh_dev) &&
+		    nh->fib_nh_flags & RTNH_F_LINKDOWN)
 			continue;
 
-		total += nh->nh_weight;
+		total += nh->fib_nh_weight;
 	} endfor_nexthops(fi);
 
 	w = 0;
 	change_nexthops(fi) {
 		int upper_bound;
 
-		in_dev = __in_dev_get_rtnl(nexthop_nh->nh_dev);
-
-		if (nexthop_nh->nh_flags & RTNH_F_DEAD) {
+		if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) {
 			upper_bound = -1;
-		} else if (in_dev &&
-			   IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
-			   nexthop_nh->nh_flags & RTNH_F_LINKDOWN) {
+		} else if (ip_ignore_linkdown(nexthop_nh->fib_nh_dev) &&
+			   nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
 			upper_bound = -1;
 		} else {
-			w += nexthop_nh->nh_weight;
+			w += nexthop_nh->fib_nh_weight;
 			upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31,
 							    total) - 1;
 		}
 
-		atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
+		atomic_set(&nexthop_nh->fib_nh_upper_bound, upper_bound);
 	} endfor_nexthops(fi);
 }
 #else /* CONFIG_IP_ROUTE_MULTIPATH */
 
+static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
+		       int remaining, struct fib_config *cfg,
+		       struct netlink_ext_ack *extack)
+{
+	NL_SET_ERR_MSG(extack, "Multipath support not enabled in kernel");
+
+	return -EINVAL;
+}
+
 #define fib_rebalance(fi) do { } while (0)
 
 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
@@ -620,7 +740,7 @@ static int fib_encap_match(u16 encap_type,
 	ret = lwtunnel_build_state(encap_type, encap, AF_INET,
 				   cfg, &lwtstate, extack);
 	if (!ret) {
-		result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate);
+		result = lwtunnel_cmp_encap(lwtstate, nh->fib_nh_lws);
 		lwtstate_free(lwtstate);
 	}
 
@@ -638,7 +758,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
 	if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
 		return 1;
 
-	if (cfg->fc_oif || cfg->fc_gw) {
+	if (cfg->fc_oif || cfg->fc_gw_family) {
 		if (cfg->fc_encap) {
 			if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
 					    fi->fib_nh, cfg, extack))
@@ -649,10 +769,20 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
 		    cfg->fc_flow != fi->fib_nh->nh_tclassid)
 			return 1;
 #endif
-		if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
-		    (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
-			return 0;
-		return 1;
+		if ((cfg->fc_oif && cfg->fc_oif != fi->fib_nh->fib_nh_oif) ||
+		    (cfg->fc_gw_family &&
+		     cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family))
+			return 1;
+
+		if (cfg->fc_gw_family == AF_INET &&
+		    cfg->fc_gw4 != fi->fib_nh->fib_nh_gw4)
+			return 1;
+
+		if (cfg->fc_gw_family == AF_INET6 &&
+		    ipv6_addr_cmp(&cfg->fc_gw6, &fi->fib_nh->fib_nh_gw6))
+			return 1;
+
+		return 0;
 	}
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -668,16 +798,48 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
 		if (!rtnh_ok(rtnh, remaining))
 			return -EINVAL;
 
-		if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
+		if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->fib_nh_oif)
 			return 1;
 
 		attrlen = rtnh_attrlen(rtnh);
 		if (attrlen > 0) {
-			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+			struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);
 
 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
-			if (nla && nla_get_in_addr(nla) != nh->nh_gw)
-				return 1;
+			nlav = nla_find(attrs, attrlen, RTA_VIA);
+			if (nla && nlav) {
+				NL_SET_ERR_MSG(extack,
+					       "Nexthop configuration can not contain both GATEWAY and VIA");
+				return -EINVAL;
+			}
+
+			if (nla) {
+				if (nh->fib_nh_gw_family != AF_INET ||
+				    nla_get_in_addr(nla) != nh->fib_nh_gw4)
+					return 1;
+			} else if (nlav) {
+				struct fib_config cfg2;
+				int err;
+
+				err = fib_gw_from_via(&cfg2, nlav, extack);
+				if (err)
+					return err;
+
+				switch (nh->fib_nh_gw_family) {
+				case AF_INET:
+					if (cfg2.fc_gw_family != AF_INET ||
+					    cfg2.fc_gw4 != nh->fib_nh_gw4)
+						return 1;
+					break;
+				case AF_INET6:
+					if (cfg2.fc_gw_family != AF_INET6 ||
+					    ipv6_addr_cmp(&cfg2.fc_gw6,
+							  &nh->fib_nh_gw6))
+						return 1;
+					break;
+				}
+			}
+
 #ifdef CONFIG_IP_ROUTE_CLASSID
 			nla = nla_find(attrs, attrlen, RTA_FLOW);
 			if (nla && nla_get_u32(nla) != nh->nh_tclassid)
@@ -731,6 +893,30 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
 	return true;
 }
 
+static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh,
+			      u32 table, struct netlink_ext_ack *extack)
+{
+	struct fib6_config cfg = {
+		.fc_table = table,
+		.fc_flags = nh->fib_nh_flags | RTF_GATEWAY,
+		.fc_ifindex = nh->fib_nh_oif,
+		.fc_gateway = nh->fib_nh_gw6,
+	};
+	struct fib6_nh fib6_nh = {};
+	int err;
+
+	err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack);
+	if (!err) {
+		nh->fib_nh_dev = fib6_nh.fib_nh_dev;
+		dev_hold(nh->fib_nh_dev);
+		nh->fib_nh_oif = nh->fib_nh_dev->ifindex;
+		nh->fib_nh_scope = RT_SCOPE_LINK;
+
+		ipv6_stub->fib6_nh_release(&fib6_nh);
+	}
+
+	return err;
+}
 
 /*
  * Picture
@@ -775,133 +961,152 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
  *					|
  *					|-> {local prefix} (terminal node)
  */
-static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
-			struct netlink_ext_ack *extack)
+static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table,
+			      u8 scope, struct netlink_ext_ack *extack)
 {
-	int err = 0;
-	struct net *net;
 	struct net_device *dev;
+	struct fib_result res;
+	int err;
 
-	net = cfg->fc_nlinfo.nl_net;
-	if (nh->nh_gw) {
-		struct fib_result res;
-
-		if (nh->nh_flags & RTNH_F_ONLINK) {
-			unsigned int addr_type;
+	if (nh->fib_nh_flags & RTNH_F_ONLINK) {
+		unsigned int addr_type;
 
-			if (cfg->fc_scope >= RT_SCOPE_LINK) {
-				NL_SET_ERR_MSG(extack,
-					       "Nexthop has invalid scope");
-				return -EINVAL;
-			}
-			dev = __dev_get_by_index(net, nh->nh_oif);
-			if (!dev) {
-				NL_SET_ERR_MSG(extack, "Nexthop device required for onlink");
-				return -ENODEV;
-			}
-			if (!(dev->flags & IFF_UP)) {
-				NL_SET_ERR_MSG(extack,
-					       "Nexthop device is not up");
-				return -ENETDOWN;
-			}
-			addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw);
-			if (addr_type != RTN_UNICAST) {
-				NL_SET_ERR_MSG(extack,
-					       "Nexthop has invalid gateway");
-				return -EINVAL;
-			}
-			if (!netif_carrier_ok(dev))
-				nh->nh_flags |= RTNH_F_LINKDOWN;
-			nh->nh_dev = dev;
-			dev_hold(dev);
-			nh->nh_scope = RT_SCOPE_LINK;
-			return 0;
+		if (scope >= RT_SCOPE_LINK) {
+			NL_SET_ERR_MSG(extack, "Nexthop has invalid scope");
+			return -EINVAL;
 		}
-		rcu_read_lock();
-		{
-			struct fib_table *tbl = NULL;
-			struct flowi4 fl4 = {
-				.daddr = nh->nh_gw,
-				.flowi4_scope = cfg->fc_scope + 1,
-				.flowi4_oif = nh->nh_oif,
-				.flowi4_iif = LOOPBACK_IFINDEX,
-			};
-
-			/* It is not necessary, but requires a bit of thinking */
-			if (fl4.flowi4_scope < RT_SCOPE_LINK)
-				fl4.flowi4_scope = RT_SCOPE_LINK;
-
-			if (cfg->fc_table)
-				tbl = fib_get_table(net, cfg->fc_table);
-
-			if (tbl)
-				err = fib_table_lookup(tbl, &fl4, &res,
-						       FIB_LOOKUP_IGNORE_LINKSTATE |
-						       FIB_LOOKUP_NOREF);
-
-			/* on error or if no table given do full lookup. This
-			 * is needed for example when nexthops are in the local
-			 * table rather than the given table
-			 */
-			if (!tbl || err) {
-				err = fib_lookup(net, &fl4, &res,
-						 FIB_LOOKUP_IGNORE_LINKSTATE);
-			}
-
-			if (err) {
-				NL_SET_ERR_MSG(extack,
-					       "Nexthop has invalid gateway");
-				rcu_read_unlock();
-				return err;
-			}
+		dev = __dev_get_by_index(net, nh->fib_nh_oif);
+		if (!dev) {
+			NL_SET_ERR_MSG(extack, "Nexthop device required for onlink");
+			return -ENODEV;
 		}
-		err = -EINVAL;
-		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
-			NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
-			goto out;
+		if (!(dev->flags & IFF_UP)) {
+			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+			return -ENETDOWN;
 		}
-		nh->nh_scope = res.scope;
-		nh->nh_oif = FIB_RES_OIF(res);
-		nh->nh_dev = dev = FIB_RES_DEV(res);
-		if (!dev) {
-			NL_SET_ERR_MSG(extack,
-				       "No egress device for nexthop gateway");
-			goto out;
+		addr_type = inet_addr_type_dev_table(net, dev, nh->fib_nh_gw4);
+		if (addr_type != RTN_UNICAST) {
+			NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
+			return -EINVAL;
 		}
-		dev_hold(dev);
 		if (!netif_carrier_ok(dev))
-			nh->nh_flags |= RTNH_F_LINKDOWN;
-		err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
-	} else {
-		struct in_device *in_dev;
-
-		if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
-			NL_SET_ERR_MSG(extack,
-				       "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
-			return -EINVAL;
+			nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+		nh->fib_nh_dev = dev;
+		dev_hold(dev);
+		nh->fib_nh_scope = RT_SCOPE_LINK;
+		return 0;
+	}
+	rcu_read_lock();
+	{
+		struct fib_table *tbl = NULL;
+		struct flowi4 fl4 = {
+			.daddr = nh->fib_nh_gw4,
+			.flowi4_scope = scope + 1,
+			.flowi4_oif = nh->fib_nh_oif,
+			.flowi4_iif = LOOPBACK_IFINDEX,
+		};
+
+		/* It is not necessary, but requires a bit of thinking */
+		if (fl4.flowi4_scope < RT_SCOPE_LINK)
+			fl4.flowi4_scope = RT_SCOPE_LINK;
+
+		if (table)
+			tbl = fib_get_table(net, table);
+
+		if (tbl)
+			err = fib_table_lookup(tbl, &fl4, &res,
+					       FIB_LOOKUP_IGNORE_LINKSTATE |
+					       FIB_LOOKUP_NOREF);
+
+		/* on error or if no table given do full lookup. This
+		 * is needed for example when nexthops are in the local
+		 * table rather than the given table
+		 */
+		if (!tbl || err) {
+			err = fib_lookup(net, &fl4, &res,
+					 FIB_LOOKUP_IGNORE_LINKSTATE);
 		}
-		rcu_read_lock();
-		err = -ENODEV;
-		in_dev = inetdev_by_index(net, nh->nh_oif);
-		if (!in_dev)
-			goto out;
-		err = -ENETDOWN;
-		if (!(in_dev->dev->flags & IFF_UP)) {
-			NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
+
+		if (err) {
+			NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
 			goto out;
 		}
-		nh->nh_dev = in_dev->dev;
-		dev_hold(nh->nh_dev);
-		nh->nh_scope = RT_SCOPE_HOST;
-		if (!netif_carrier_ok(nh->nh_dev))
-			nh->nh_flags |= RTNH_F_LINKDOWN;
-		err = 0;
 	}
+
+	err = -EINVAL;
+	if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
+		NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
+		goto out;
+	}
+	nh->fib_nh_scope = res.scope;
+	nh->fib_nh_oif = FIB_RES_OIF(res);
+	nh->fib_nh_dev = dev = FIB_RES_DEV(res);
+	if (!dev) {
+		NL_SET_ERR_MSG(extack,
+			       "No egress device for nexthop gateway");
+		goto out;
+	}
+	dev_hold(dev);
+	if (!netif_carrier_ok(dev))
+		nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+	err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
+out:
+	rcu_read_unlock();
+	return err;
+}
+
+static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh,
+			      struct netlink_ext_ack *extack)
+{
+	struct in_device *in_dev;
+	int err;
+
+	if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
+		return -EINVAL;
+	}
+
+	rcu_read_lock();
+
+	err = -ENODEV;
+	in_dev = inetdev_by_index(net, nh->fib_nh_oif);
+	if (!in_dev)
+		goto out;
+	err = -ENETDOWN;
+	if (!(in_dev->dev->flags & IFF_UP)) {
+		NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
+		goto out;
+	}
+
+	nh->fib_nh_dev = in_dev->dev;
+	dev_hold(nh->fib_nh_dev);
+	nh->fib_nh_scope = RT_SCOPE_HOST;
+	if (!netif_carrier_ok(nh->fib_nh_dev))
+		nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+	err = 0;
 out:
 	rcu_read_unlock();
 	return err;
 }
 
+static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
+			struct netlink_ext_ack *extack)
+{
+	struct net *net = cfg->fc_nlinfo.nl_net;
+	u32 table = cfg->fc_table;
+	int err;
+
+	if (nh->fib_nh_gw_family == AF_INET)
+		err = fib_check_nh_v4_gw(net, nh, table, cfg->fc_scope, extack);
+	else if (nh->fib_nh_gw_family == AF_INET6)
+		err = fib_check_nh_v6_gw(net, nh, table, extack);
+	else
+		err = fib_check_nh_nongw(net, nh, extack);
+
+	return err;
+}
+
 static inline unsigned int fib_laddr_hashfn(__be32 val)
 {
 	unsigned int mask = (fib_info_hash_size - 1);
@@ -986,14 +1191,29 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
 
 __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
 {
-	nh->nh_saddr = inet_select_addr(nh->nh_dev,
-					nh->nh_gw,
+	nh->nh_saddr = inet_select_addr(nh->fib_nh_dev,
+					nh->fib_nh_gw4,
 					nh->nh_parent->fib_scope);
 	nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
 
 	return nh->nh_saddr;
 }
 
+__be32 fib_result_prefsrc(struct net *net, struct fib_result *res)
+{
+	struct fib_nh_common *nhc = res->nhc;
+	struct fib_nh *nh;
+
+	if (res->fi->fib_prefsrc)
+		return res->fi->fib_prefsrc;
+
+	nh = container_of(nhc, struct fib_nh, nh_common);
+	if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid))
+		return nh->nh_saddr;
+
+	return fib_info_update_nh_saddr(net, nh);
+}
+
 static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
 {
 	if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
@@ -1096,72 +1316,18 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 	fi->fib_nhs = nhs;
 	change_nexthops(fi) {
 		nexthop_nh->nh_parent = fi;
-		nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
-		if (!nexthop_nh->nh_pcpu_rth_output)
-			goto failure;
 	} endfor_nexthops(fi)
 
-	if (cfg->fc_mp) {
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	if (cfg->fc_mp)
 		err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
-		if (err != 0)
-			goto failure;
-		if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) {
-			NL_SET_ERR_MSG(extack,
-				       "Nexthop device index does not match RTA_OIF");
-			goto err_inval;
-		}
-		if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) {
-			NL_SET_ERR_MSG(extack,
-				       "Nexthop gateway does not match RTA_GATEWAY");
-			goto err_inval;
-		}
-#ifdef CONFIG_IP_ROUTE_CLASSID
-		if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
-			NL_SET_ERR_MSG(extack,
-				       "Nexthop class id does not match RTA_FLOW");
-			goto err_inval;
-		}
-#endif
-#else
-		NL_SET_ERR_MSG(extack,
-			       "Multipath support not enabled in kernel");
-		goto err_inval;
-#endif
-	} else {
-		struct fib_nh *nh = fi->fib_nh;
-
-		if (cfg->fc_encap) {
-			struct lwtunnel_state *lwtstate;
-
-			if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) {
-				NL_SET_ERR_MSG(extack,
-					       "LWT encap type not specified");
-				goto err_inval;
-			}
-			err = lwtunnel_build_state(cfg->fc_encap_type,
-						   cfg->fc_encap, AF_INET, cfg,
-						   &lwtstate, extack);
-			if (err)
-				goto failure;
+	else
+		err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack);
 
-			nh->nh_lwtstate = lwtstate_get(lwtstate);
-		}
-		nh->nh_oif = cfg->fc_oif;
-		nh->nh_gw = cfg->fc_gw;
-		nh->nh_flags = cfg->fc_flags;
-#ifdef CONFIG_IP_ROUTE_CLASSID
-		nh->nh_tclassid = cfg->fc_flow;
-		if (nh->nh_tclassid)
-			fi->fib_net->ipv4.fib_num_tclassid_users++;
-#endif
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-		nh->nh_weight = 1;
-#endif
-	}
+	if (err != 0)
+		goto failure;
 
 	if (fib_props[cfg->fc_type].error) {
-		if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) {
+		if (cfg->fc_gw_family || cfg->fc_oif || cfg->fc_mp) {
 			NL_SET_ERR_MSG(extack,
 				       "Gateway, device and multipath can not be specified for this route type");
 			goto err_inval;
@@ -1195,15 +1361,15 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 				       "Route with host scope can not have multiple nexthops");
 			goto err_inval;
 		}
-		if (nh->nh_gw) {
+		if (nh->fib_nh_gw_family) {
 			NL_SET_ERR_MSG(extack,
 				       "Route with host scope can not have a gateway");
 			goto err_inval;
 		}
-		nh->nh_scope = RT_SCOPE_NOWHERE;
-		nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
+		nh->fib_nh_scope = RT_SCOPE_NOWHERE;
+		nh->fib_nh_dev = dev_get_by_index(net, fi->fib_nh->fib_nh_oif);
 		err = -ENODEV;
-		if (!nh->nh_dev)
+		if (!nh->fib_nh_dev)
 			goto failure;
 	} else {
 		int linkdown = 0;
@@ -1212,7 +1378,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 			err = fib_check_nh(cfg, nexthop_nh, extack);
 			if (err != 0)
 				goto failure;
-			if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
+			if (nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN)
 				linkdown++;
 		} endfor_nexthops(fi)
 		if (linkdown == fi->fib_nhs)
@@ -1226,6 +1392,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 
 	change_nexthops(fi) {
 		fib_info_update_nh_saddr(net, nexthop_nh);
+		if (nexthop_nh->fib_nh_gw_family == AF_INET6)
+			fi->fib_nh_is_v6 = true;
 	} endfor_nexthops(fi)
 
 	fib_rebalance(fi);
@@ -1254,9 +1422,9 @@ link_it:
 		struct hlist_head *head;
 		unsigned int hash;
 
-		if (!nexthop_nh->nh_dev)
+		if (!nexthop_nh->fib_nh_dev)
 			continue;
-		hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
+		hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
 		head = &fib_info_devhash[hash];
 		hlist_add_head(&nexthop_nh->nh_hash, head);
 	} endfor_nexthops(fi)
@@ -1275,6 +1443,141 @@ failure:
 	return ERR_PTR(err);
 }
 
+int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc,
+		     unsigned char *flags, bool skip_oif)
+{
+	if (nhc->nhc_flags & RTNH_F_DEAD)
+		*flags |= RTNH_F_DEAD;
+
+	if (nhc->nhc_flags & RTNH_F_LINKDOWN) {
+		*flags |= RTNH_F_LINKDOWN;
+
+		rcu_read_lock();
+		switch (nhc->nhc_family) {
+		case AF_INET:
+			if (ip_ignore_linkdown(nhc->nhc_dev))
+				*flags |= RTNH_F_DEAD;
+			break;
+		case AF_INET6:
+			if (ip6_ignore_linkdown(nhc->nhc_dev))
+				*flags |= RTNH_F_DEAD;
+			break;
+		}
+		rcu_read_unlock();
+	}
+
+	switch (nhc->nhc_gw_family) {
+	case AF_INET:
+		if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4))
+			goto nla_put_failure;
+		break;
+	case AF_INET6:
+		/* if gateway family does not match nexthop family
+		 * gateway is encoded as RTA_VIA
+		 */
+		if (nhc->nhc_gw_family != nhc->nhc_family) {
+			int alen = sizeof(struct in6_addr);
+			struct nlattr *nla;
+			struct rtvia *via;
+
+			nla = nla_reserve(skb, RTA_VIA, alen + 2);
+			if (!nla)
+				goto nla_put_failure;
+
+			via = nla_data(nla);
+			via->rtvia_family = AF_INET6;
+			memcpy(via->rtvia_addr, &nhc->nhc_gw.ipv6, alen);
+		} else if (nla_put_in6_addr(skb, RTA_GATEWAY,
+					    &nhc->nhc_gw.ipv6) < 0) {
+			goto nla_put_failure;
+		}
+		break;
+	}
+
+	*flags |= (nhc->nhc_flags & RTNH_F_ONLINK);
+	if (nhc->nhc_flags & RTNH_F_OFFLOAD)
+		*flags |= RTNH_F_OFFLOAD;
+
+	if (!skip_oif && nhc->nhc_dev &&
+	    nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex))
+		goto nla_put_failure;
+
+	if (nhc->nhc_lwtstate &&
+	    lwtunnel_fill_encap(skb, nhc->nhc_lwtstate,
+				RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(fib_nexthop_info);
+
+#if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6)
+int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc,
+		    int nh_weight)
+{
+	const struct net_device *dev = nhc->nhc_dev;
+	struct rtnexthop *rtnh;
+	unsigned char flags = 0;
+
+	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
+	if (!rtnh)
+		goto nla_put_failure;
+
+	rtnh->rtnh_hops = nh_weight - 1;
+	rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
+
+	if (fib_nexthop_info(skb, nhc, &flags, true) < 0)
+		goto nla_put_failure;
+
+	rtnh->rtnh_flags = flags;
+
+	/* length of rtnetlink header + attributes */
+	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(fib_add_nexthop);
+#endif
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
+{
+	struct nlattr *mp;
+
+	mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
+	if (!mp)
+		goto nla_put_failure;
+
+	for_nexthops(fi) {
+		if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight) < 0)
+			goto nla_put_failure;
+#ifdef CONFIG_IP_ROUTE_CLASSID
+		if (nh->nh_tclassid &&
+		    nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
+			goto nla_put_failure;
+#endif
+	} endfor_nexthops(fi);
+
+	nla_nest_end(skb, mp);
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+#else
+static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
+{
+	return 0;
+}
+#endif
+
 int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 		  u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
 		  struct fib_info *fi, unsigned int flags)
@@ -1315,80 +1618,23 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 	    nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
 		goto nla_put_failure;
 	if (fi->fib_nhs == 1) {
-		if (fi->fib_nh->nh_gw &&
-		    nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
-			goto nla_put_failure;
-		if (fi->fib_nh->nh_oif &&
-		    nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
+		struct fib_nh *nh = &fi->fib_nh[0];
+		unsigned char flags = 0;
+
+		if (fib_nexthop_info(skb, &nh->nh_common, &flags, false) < 0)
 			goto nla_put_failure;
-		if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
-			struct in_device *in_dev;
-
-			rcu_read_lock();
-			in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev);
-			if (in_dev &&
-			    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
-				rtm->rtm_flags |= RTNH_F_DEAD;
-			rcu_read_unlock();
-		}
-		if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD)
-			rtm->rtm_flags |= RTNH_F_OFFLOAD;
+
+		rtm->rtm_flags = flags;
 #ifdef CONFIG_IP_ROUTE_CLASSID
-		if (fi->fib_nh[0].nh_tclassid &&
-		    nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
+		if (nh->nh_tclassid &&
+		    nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
 			goto nla_put_failure;
 #endif
-		if (fi->fib_nh->nh_lwtstate &&
-		    lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0)
+	} else {
+		if (fib_add_multipath(skb, fi) < 0)
 			goto nla_put_failure;
 	}
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (fi->fib_nhs > 1) {
-		struct rtnexthop *rtnh;
-		struct nlattr *mp;
 
-		mp = nla_nest_start(skb, RTA_MULTIPATH);
-		if (!mp)
-			goto nla_put_failure;
-
-		for_nexthops(fi) {
-			rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
-			if (!rtnh)
-				goto nla_put_failure;
-
-			rtnh->rtnh_flags = nh->nh_flags & 0xFF;
-			if (nh->nh_flags & RTNH_F_LINKDOWN) {
-				struct in_device *in_dev;
-
-				rcu_read_lock();
-				in_dev = __in_dev_get_rcu(nh->nh_dev);
-				if (in_dev &&
-				    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
-					rtnh->rtnh_flags |= RTNH_F_DEAD;
-				rcu_read_unlock();
-			}
-			rtnh->rtnh_hops = nh->nh_weight - 1;
-			rtnh->rtnh_ifindex = nh->nh_oif;
-
-			if (nh->nh_gw &&
-			    nla_put_in_addr(skb, RTA_GATEWAY, nh->nh_gw))
-				goto nla_put_failure;
-#ifdef CONFIG_IP_ROUTE_CLASSID
-			if (nh->nh_tclassid &&
-			    nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
-				goto nla_put_failure;
-#endif
-			if (nh->nh_lwtstate &&
-			    lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0)
-				goto nla_put_failure;
-
-			/* length of rtnetlink header + attributes */
-			rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
-		} endfor_nexthops(fi);
-
-		nla_nest_end(skb, mp);
-	}
-#endif
 	nlmsg_end(skb, nlh);
 	return 0;
 
@@ -1427,28 +1673,26 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local)
 	return ret;
 }
 
-static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
+static int call_fib_nh_notifiers(struct fib_nh *nh,
 				 enum fib_event_type event_type)
 {
-	struct in_device *in_dev = __in_dev_get_rtnl(fib_nh->nh_dev);
+	bool ignore_link_down = ip_ignore_linkdown(nh->fib_nh_dev);
 	struct fib_nh_notifier_info info = {
-		.fib_nh = fib_nh,
+		.fib_nh = nh,
 	};
 
 	switch (event_type) {
 	case FIB_EVENT_NH_ADD:
-		if (fib_nh->nh_flags & RTNH_F_DEAD)
+		if (nh->fib_nh_flags & RTNH_F_DEAD)
 			break;
-		if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
-		    fib_nh->nh_flags & RTNH_F_LINKDOWN)
+		if (ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN)
 			break;
-		return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type,
+		return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type,
 					   &info.info);
 	case FIB_EVENT_NH_DEL:
-		if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
-		     fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
-		    (fib_nh->nh_flags & RTNH_F_DEAD))
-			return call_fib4_notifiers(dev_net(fib_nh->nh_dev),
+		if ((ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) ||
+		    (nh->fib_nh_flags & RTNH_F_DEAD))
+			return call_fib4_notifiers(dev_net(nh->fib_nh_dev),
 						   event_type, &info.info);
 	default:
 		break;
@@ -1502,7 +1746,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
 	struct fib_nh *nh;
 
 	hlist_for_each_entry(nh, head, nh_hash) {
-		if (nh->nh_dev == dev)
+		if (nh->fib_nh_dev == dev)
 			nh_update_mtu(nh, dev->mtu, orig_mtu);
 	}
 }
@@ -1530,22 +1774,22 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
 		int dead;
 
 		BUG_ON(!fi->fib_nhs);
-		if (nh->nh_dev != dev || fi == prev_fi)
+		if (nh->fib_nh_dev != dev || fi == prev_fi)
 			continue;
 		prev_fi = fi;
 		dead = 0;
 		change_nexthops(fi) {
-			if (nexthop_nh->nh_flags & RTNH_F_DEAD)
+			if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD)
 				dead++;
-			else if (nexthop_nh->nh_dev == dev &&
-				 nexthop_nh->nh_scope != scope) {
+			else if (nexthop_nh->fib_nh_dev == dev &&
+				 nexthop_nh->fib_nh_scope != scope) {
 				switch (event) {
 				case NETDEV_DOWN:
 				case NETDEV_UNREGISTER:
-					nexthop_nh->nh_flags |= RTNH_F_DEAD;
+					nexthop_nh->fib_nh_flags |= RTNH_F_DEAD;
 					/* fall through */
 				case NETDEV_CHANGE:
-					nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
+					nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
 					break;
 				}
 				call_fib_nh_notifiers(nexthop_nh,
@@ -1554,7 +1798,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
 			}
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 			if (event == NETDEV_UNREGISTER &&
-			    nexthop_nh->nh_dev == dev) {
+			    nexthop_nh->fib_nh_dev == dev) {
 				dead = fi->fib_nhs;
 				break;
 			}
@@ -1614,8 +1858,8 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
 		if (next_fi->fib_scope != res->scope ||
 		    fa->fa_type != RTN_UNICAST)
 			continue;
-		if (!next_fi->fib_nh[0].nh_gw ||
-		    next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
+		if (!next_fi->fib_nh[0].fib_nh_gw4 ||
+		    next_fi->fib_nh[0].fib_nh_scope != RT_SCOPE_LINK)
 			continue;
 
 		fib_alias_accessed(fa);
@@ -1658,7 +1902,7 @@ out:
  * Dead device goes up. We wake up dead nexthops.
  * It takes sense only on multipath routes.
  */
-int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
+int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
 {
 	struct fib_info *prev_fi;
 	unsigned int hash;
@@ -1686,24 +1930,24 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
 		int alive;
 
 		BUG_ON(!fi->fib_nhs);
-		if (nh->nh_dev != dev || fi == prev_fi)
+		if (nh->fib_nh_dev != dev || fi == prev_fi)
 			continue;
 
 		prev_fi = fi;
 		alive = 0;
 		change_nexthops(fi) {
-			if (!(nexthop_nh->nh_flags & nh_flags)) {
+			if (!(nexthop_nh->fib_nh_flags & nh_flags)) {
 				alive++;
 				continue;
 			}
-			if (!nexthop_nh->nh_dev ||
-			    !(nexthop_nh->nh_dev->flags & IFF_UP))
+			if (!nexthop_nh->fib_nh_dev ||
+			    !(nexthop_nh->fib_nh_dev->flags & IFF_UP))
 				continue;
-			if (nexthop_nh->nh_dev != dev ||
+			if (nexthop_nh->fib_nh_dev != dev ||
 			    !__in_dev_get_rtnl(dev))
 				continue;
 			alive++;
-			nexthop_nh->nh_flags &= ~nh_flags;
+			nexthop_nh->fib_nh_flags &= ~nh_flags;
 			call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD);
 		} endfor_nexthops(fi)
 
@@ -1723,13 +1967,19 @@ static bool fib_good_nh(const struct fib_nh *nh)
 {
 	int state = NUD_REACHABLE;
 
-	if (nh->nh_scope == RT_SCOPE_LINK) {
+	if (nh->fib_nh_scope == RT_SCOPE_LINK) {
 		struct neighbour *n;
 
 		rcu_read_lock_bh();
 
-		n = __ipv4_neigh_lookup_noref(nh->nh_dev,
-					      (__force u32)nh->nh_gw);
+		if (likely(nh->fib_nh_gw_family == AF_INET))
+			n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
+						   (__force u32)nh->fib_nh_gw4);
+		else if (nh->fib_nh_gw_family == AF_INET6)
+			n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev,
+							   &nh->fib_nh_gw6);
+		else
+			n = NULL;
 		if (n)
 			state = n->nud_state;
 
@@ -1745,20 +1995,22 @@ void fib_select_multipath(struct fib_result *res, int hash)
 	struct net *net = fi->fib_net;
 	bool first = false;
 
-	for_nexthops(fi) {
+	change_nexthops(fi) {
 		if (net->ipv4.sysctl_fib_multipath_use_neigh) {
-			if (!fib_good_nh(nh))
+			if (!fib_good_nh(nexthop_nh))
 				continue;
 			if (!first) {
 				res->nh_sel = nhsel;
+				res->nhc = &nexthop_nh->nh_common;
 				first = true;
 			}
 		}
 
-		if (hash > atomic_read(&nh->nh_upper_bound))
+		if (hash > atomic_read(&nexthop_nh->fib_nh_upper_bound))
 			continue;
 
 		res->nh_sel = nhsel;
+		res->nhc = &nexthop_nh->nh_common;
 		return;
 	} endfor_nexthops(fi);
 }
@@ -1785,5 +2037,5 @@ void fib_select_path(struct net *net, struct fib_result *res,
 
 check_saddr:
 	if (!fl4->saddr)
-		fl4->saddr = FIB_RES_PREFSRC(net, *res);
+		fl4->saddr = fib_result_prefsrc(net, res);
 }
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1704f432de1f..334f723bdf80 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1470,19 +1470,17 @@ found:
 		if (fi->fib_flags & RTNH_F_DEAD)
 			continue;
 		for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
-			const struct fib_nh *nh = &fi->fib_nh[nhsel];
-			struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev);
+			struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
 
-			if (nh->nh_flags & RTNH_F_DEAD)
+			if (nhc->nhc_flags & RTNH_F_DEAD)
 				continue;
-			if (in_dev &&
-			    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
-			    nh->nh_flags & RTNH_F_LINKDOWN &&
+			if (ip_ignore_linkdown(nhc->nhc_dev) &&
+			    nhc->nhc_flags & RTNH_F_LINKDOWN &&
 			    !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
 				continue;
 			if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
 				if (flp->flowi4_oif &&
-				    flp->flowi4_oif != nh->nh_oif)
+				    flp->flowi4_oif != nhc->nhc_oif)
 					continue;
 			}
 
@@ -1492,6 +1490,7 @@ found:
 			res->prefix = htonl(n->key);
 			res->prefixlen = KEYLENGTH - fa->fa_slen;
 			res->nh_sel = nhsel;
+			res->nhc = nhc;
 			res->type = fa->fa_type;
 			res->scope = fi->fib_scope;
 			res->fi = fi;
@@ -1500,7 +1499,7 @@ found:
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 			this_cpu_inc(stats->semantic_match_passed);
 #endif
-			trace_fib_table_lookup(tb->tb_id, flp, nh, err);
+			trace_fib_table_lookup(tb->tb_id, flp, nhc, err);
 
 			return err;
 		}
@@ -2653,7 +2652,7 @@ static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info
 
 	if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
 		flags = RTF_REJECT;
-	if (fi && fi->fib_nh->nh_gw)
+	if (fi && fi->fib_nh->fib_nh_gw4)
 		flags |= RTF_GATEWAY;
 	if (mask == htonl(0xFFFFFFFF))
 		flags |= RTF_HOST;
@@ -2704,7 +2703,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 				   "%d\t%08X\t%d\t%u\t%u",
 				   fi->fib_dev ? fi->fib_dev->name : "*",
 				   prefix,
-				   fi->fib_nh->nh_gw, flags, 0, 0,
+				   fi->fib_nh->fib_nh_gw4, flags, 0, 0,
 				   fi->fib_priority,
 				   mask,
 				   (fi->fib_advmss ?
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index a23fbb52d265..ca95051317ed 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -121,6 +121,7 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
 	struct guehdr *guehdr;
 	void *data;
 	u16 doffset = 0;
+	u8 proto_ctype;
 
 	if (!fou)
 		return 1;
@@ -136,7 +137,7 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
 		break;
 
 	case 1: {
-		/* Direct encasulation of IPv4 or IPv6 */
+		/* Direct encapsulation of IPv4 or IPv6 */
 
 		int prot;
 
@@ -170,9 +171,7 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
 	/* guehdr may change after pull */
 	guehdr = (struct guehdr *)&udp_hdr(skb)[1];
 
-	hdrlen = sizeof(struct guehdr) + optlen;
-
-	if (guehdr->version != 0 || validate_gue_flags(guehdr, optlen))
+	if (validate_gue_flags(guehdr, optlen))
 		goto drop;
 
 	hdrlen = sizeof(struct guehdr) + optlen;
@@ -212,13 +211,14 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
 	if (unlikely(guehdr->control))
 		return gue_control_message(skb, guehdr);
 
+	proto_ctype = guehdr->proto_ctype;
 	__skb_pull(skb, sizeof(struct udphdr) + hdrlen);
 	skb_reset_transport_header(skb);
 
 	if (iptunnel_pull_offloads(skb))
 		goto drop;
 
-	return -guehdr->proto_ctype;
+	return -proto_ctype;
 
 drop:
 	kfree_skb(skb);
@@ -499,15 +499,45 @@ out_unlock:
 	return err;
 }
 
-static int fou_add_to_port_list(struct net *net, struct fou *fou)
+static bool fou_cfg_cmp(struct fou *fou, struct fou_cfg *cfg)
+{
+	struct sock *sk = fou->sock->sk;
+	struct udp_port_cfg *udp_cfg = &cfg->udp_config;
+
+	if (fou->family != udp_cfg->family ||
+	    fou->port != udp_cfg->local_udp_port ||
+	    sk->sk_dport != udp_cfg->peer_udp_port ||
+	    sk->sk_bound_dev_if != udp_cfg->bind_ifindex)
+		return false;
+
+	if (fou->family == AF_INET) {
+		if (sk->sk_rcv_saddr != udp_cfg->local_ip.s_addr ||
+		    sk->sk_daddr != udp_cfg->peer_ip.s_addr)
+			return false;
+		else
+			return true;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		if (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, &udp_cfg->local_ip6) ||
+		    ipv6_addr_cmp(&sk->sk_v6_daddr, &udp_cfg->peer_ip6))
+			return false;
+		else
+			return true;
+#endif
+	}
+
+	return false;
+}
+
+static int fou_add_to_port_list(struct net *net, struct fou *fou,
+				struct fou_cfg *cfg)
 {
 	struct fou_net *fn = net_generic(net, fou_net_id);
 	struct fou *fout;
 
 	mutex_lock(&fn->fou_lock);
 	list_for_each_entry(fout, &fn->fou_list, list) {
-		if (fou->port == fout->port &&
-		    fou->family == fout->family) {
+		if (fou_cfg_cmp(fout, cfg)) {
 			mutex_unlock(&fn->fou_lock);
 			return -EALREADY;
 		}
@@ -585,7 +615,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
 
 	sk->sk_allocation = GFP_ATOMIC;
 
-	err = fou_add_to_port_list(net, fou);
+	err = fou_add_to_port_list(net, fou, cfg);
 	if (err)
 		goto error;
 
@@ -605,14 +635,12 @@ error:
 static int fou_destroy(struct net *net, struct fou_cfg *cfg)
 {
 	struct fou_net *fn = net_generic(net, fou_net_id);
-	__be16 port = cfg->udp_config.local_udp_port;
-	u8 family = cfg->udp_config.family;
 	int err = -EINVAL;
 	struct fou *fou;
 
 	mutex_lock(&fn->fou_lock);
 	list_for_each_entry(fou, &fn->fou_list, list) {
-		if (fou->port == port && fou->family == family) {
+		if (fou_cfg_cmp(fou, cfg)) {
 			fou_release(fou);
 			err = 0;
 			break;
@@ -626,16 +654,27 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg)
 static struct genl_family fou_nl_family;
 
 static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
-	[FOU_ATTR_PORT] = { .type = NLA_U16, },
-	[FOU_ATTR_AF] = { .type = NLA_U8, },
-	[FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
-	[FOU_ATTR_TYPE] = { .type = NLA_U8, },
-	[FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, },
+	[FOU_ATTR_PORT]			= { .type = NLA_U16, },
+	[FOU_ATTR_AF]			= { .type = NLA_U8, },
+	[FOU_ATTR_IPPROTO]		= { .type = NLA_U8, },
+	[FOU_ATTR_TYPE]			= { .type = NLA_U8, },
+	[FOU_ATTR_REMCSUM_NOPARTIAL]	= { .type = NLA_FLAG, },
+	[FOU_ATTR_LOCAL_V4]		= { .type = NLA_U32, },
+	[FOU_ATTR_PEER_V4]		= { .type = NLA_U32, },
+	[FOU_ATTR_LOCAL_V6]		= { .type = sizeof(struct in6_addr), },
+	[FOU_ATTR_PEER_V6]		= { .type = sizeof(struct in6_addr), },
+	[FOU_ATTR_PEER_PORT]		= { .type = NLA_U16, },
+	[FOU_ATTR_IFINDEX]		= { .type = NLA_S32, },
 };
 
 static int parse_nl_config(struct genl_info *info,
 			   struct fou_cfg *cfg)
 {
+	bool has_local = false, has_peer = false;
+	struct nlattr *attr;
+	int ifindex;
+	__be16 port;
+
 	memset(cfg, 0, sizeof(*cfg));
 
 	cfg->udp_config.family = AF_INET;
@@ -657,8 +696,7 @@ static int parse_nl_config(struct genl_info *info,
 	}
 
 	if (info->attrs[FOU_ATTR_PORT]) {
-		__be16 port = nla_get_be16(info->attrs[FOU_ATTR_PORT]);
-
+		port = nla_get_be16(info->attrs[FOU_ATTR_PORT]);
 		cfg->udp_config.local_udp_port = port;
 	}
 
@@ -671,6 +709,52 @@ static int parse_nl_config(struct genl_info *info,
 	if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL])
 		cfg->flags |= FOU_F_REMCSUM_NOPARTIAL;
 
+	if (cfg->udp_config.family == AF_INET) {
+		if (info->attrs[FOU_ATTR_LOCAL_V4]) {
+			attr = info->attrs[FOU_ATTR_LOCAL_V4];
+			cfg->udp_config.local_ip.s_addr = nla_get_in_addr(attr);
+			has_local = true;
+		}
+
+		if (info->attrs[FOU_ATTR_PEER_V4]) {
+			attr = info->attrs[FOU_ATTR_PEER_V4];
+			cfg->udp_config.peer_ip.s_addr = nla_get_in_addr(attr);
+			has_peer = true;
+		}
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		if (info->attrs[FOU_ATTR_LOCAL_V6]) {
+			attr = info->attrs[FOU_ATTR_LOCAL_V6];
+			cfg->udp_config.local_ip6 = nla_get_in6_addr(attr);
+			has_local = true;
+		}
+
+		if (info->attrs[FOU_ATTR_PEER_V6]) {
+			attr = info->attrs[FOU_ATTR_PEER_V6];
+			cfg->udp_config.peer_ip6 = nla_get_in6_addr(attr);
+			has_peer = true;
+		}
+#endif
+	}
+
+	if (has_peer) {
+		if (info->attrs[FOU_ATTR_PEER_PORT]) {
+			port = nla_get_be16(info->attrs[FOU_ATTR_PEER_PORT]);
+			cfg->udp_config.peer_udp_port = port;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	if (info->attrs[FOU_ATTR_IFINDEX]) {
+		if (!has_local)
+			return -EINVAL;
+
+		ifindex = nla_get_s32(info->attrs[FOU_ATTR_IFINDEX]);
+
+		cfg->udp_config.bind_ifindex = ifindex;
+	}
+
 	return 0;
 }
 
@@ -702,15 +786,37 @@ static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info)
 
 static int fou_fill_info(struct fou *fou, struct sk_buff *msg)
 {
+	struct sock *sk = fou->sock->sk;
+
 	if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) ||
 	    nla_put_be16(msg, FOU_ATTR_PORT, fou->port) ||
+	    nla_put_be16(msg, FOU_ATTR_PEER_PORT, sk->sk_dport) ||
 	    nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) ||
-	    nla_put_u8(msg, FOU_ATTR_TYPE, fou->type))
+	    nla_put_u8(msg, FOU_ATTR_TYPE, fou->type) ||
+	    nla_put_s32(msg, FOU_ATTR_IFINDEX, sk->sk_bound_dev_if))
 		return -1;
 
 	if (fou->flags & FOU_F_REMCSUM_NOPARTIAL)
 		if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL))
 			return -1;
+
+	if (fou->sock->sk->sk_family == AF_INET) {
+		if (nla_put_in_addr(msg, FOU_ATTR_LOCAL_V4, sk->sk_rcv_saddr))
+			return -1;
+
+		if (nla_put_in_addr(msg, FOU_ATTR_PEER_V4, sk->sk_daddr))
+			return -1;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		if (nla_put_in6_addr(msg, FOU_ATTR_LOCAL_V6,
+				     &sk->sk_v6_rcv_saddr))
+			return -1;
+
+		if (nla_put_in6_addr(msg, FOU_ATTR_PEER_V6, &sk->sk_v6_daddr))
+			return -1;
+#endif
+	}
+
 	return 0;
 }
 
@@ -763,7 +869,7 @@ static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info)
 	ret = -ESRCH;
 	mutex_lock(&fn->fou_lock);
 	list_for_each_entry(fout, &fn->fou_list, list) {
-		if (port == fout->port && family == fout->family) {
+		if (fou_cfg_cmp(fout, &cfg)) {
 			ret = fou_dump_info(fout, info->snd_portid,
 					    info->snd_seq, 0, msg,
 					    info->genlhdr->cmd);
@@ -807,16 +913,19 @@ static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 static const struct genl_ops fou_nl_ops[] = {
 	{
 		.cmd = FOU_CMD_ADD,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = fou_nl_cmd_add_port,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = FOU_CMD_DEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = fou_nl_cmd_rm_port,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = FOU_CMD_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = fou_nl_cmd_get_port,
 		.dumpit = fou_nl_dump,
 	},
@@ -1031,7 +1140,7 @@ static int gue_err(struct sk_buff *skb, u32 info)
 	case 0: /* Full GUE header present */
 		break;
 	case 1: {
-		/* Direct encasulation of IPv4 or IPv6 */
+		/* Direct encapsulation of IPv4 or IPv6 */
 		skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
 
 		switch (((struct iphdr *)guehdr)->version) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 6ea523d71947..a175e3e7ae97 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -564,7 +564,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
 		goto no_route;
-	if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
+	if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
 		goto route_err;
 	rcu_read_unlock();
 	return &rt->dst;
@@ -602,7 +602,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
 		goto no_route;
-	if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
+	if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
 		goto route_err;
 	return &rt->dst;
 
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 00ec819f949b..06f6f280b9ff 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -123,7 +123,7 @@ int ip_forward(struct sk_buff *skb)
 
 	rt = skb_rtable(skb);
 
-	if (opt->is_strictroute && rt->rt_uses_gateway)
+	if (opt->is_strictroute && rt->rt_gw_family)
 		goto sr_failed;
 
 	IPCB(skb)->flags |= IPSKB_FORWARDED;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index fd219f7bd3ea..4b0526441476 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -259,7 +259,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 	struct net *net = dev_net(skb->dev);
 	struct metadata_dst *tun_dst = NULL;
 	struct erspan_base_hdr *ershdr;
-	struct erspan_metadata *pkt_md;
 	struct ip_tunnel_net *itn;
 	struct ip_tunnel *tunnel;
 	const struct iphdr *iph;
@@ -282,9 +281,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 		if (unlikely(!pskb_may_pull(skb, len)))
 			return PACKET_REJECT;
 
-		ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
-		pkt_md = (struct erspan_metadata *)(ershdr + 1);
-
 		if (__iptunnel_pull_header(skb,
 					   len,
 					   htons(ETH_P_TEB),
@@ -292,8 +288,9 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 			goto drop;
 
 		if (tunnel->collect_md) {
+			struct erspan_metadata *pkt_md, *md;
 			struct ip_tunnel_info *info;
-			struct erspan_metadata *md;
+			unsigned char *gh;
 			__be64 tun_id;
 			__be16 flags;
 
@@ -306,6 +303,14 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 			if (!tun_dst)
 				return PACKET_REJECT;
 
+			/* skb can be uncloned in __iptunnel_pull_header, so
+			 * old pkt_md is no longer valid and we need to reset
+			 * it
+			 */
+			gh = skb_network_header(skb) +
+			     skb_network_header_len(skb);
+			pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
+							    sizeof(*ershdr));
 			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
 			md->version = ver;
 			md2 = &md->u.md2;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index ecce2dc78f17..1132d6d1796a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -257,11 +257,10 @@ int ip_local_deliver(struct sk_buff *skb)
 		       ip_local_deliver_finish);
 }
 
-static inline bool ip_rcv_options(struct sk_buff *skb)
+static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_options *opt;
 	const struct iphdr *iph;
-	struct net_device *dev = skb->dev;
 
 	/* It looks as overkill, because not all
 	   IP options require packet mangling.
@@ -297,7 +296,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb)
 			}
 		}
 
-		if (ip_options_rcv_srr(skb))
+		if (ip_options_rcv_srr(skb, dev))
 			goto drop;
 	}
 
@@ -353,7 +352,7 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
 	}
 #endif
 
-	if (iph->ihl > 5 && ip_rcv_options(skb))
+	if (iph->ihl > 5 && ip_rcv_options(skb, dev))
 		goto drop;
 
 	rt = skb_rtable(skb);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 32a35043c9f5..3db31bb9df50 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -612,7 +612,7 @@ void ip_forward_options(struct sk_buff *skb)
 	}
 }
 
-int ip_options_rcv_srr(struct sk_buff *skb)
+int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_options *opt = &(IPCB(skb)->opt);
 	int srrspace, srrptr;
@@ -647,7 +647,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 
 		orefdst = skb->_skb_refdst;
 		skb_dst_set(skb, NULL);
-		err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
+		err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev);
 		rt2 = skb_rtable(skb);
 		if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
 			skb_dst_drop(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c80188875f39..4e42c1974ba2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -188,7 +188,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
 	struct net_device *dev = dst->dev;
 	unsigned int hh_len = LL_RESERVED_SPACE(dev);
 	struct neighbour *neigh;
-	u32 nexthop;
+	bool is_v6gw = false;
 
 	if (rt->rt_type == RTN_MULTICAST) {
 		IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
@@ -218,16 +218,13 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
 	}
 
 	rcu_read_lock_bh();
-	nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
-	neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
-	if (unlikely(!neigh))
-		neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
+	neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
 	if (!IS_ERR(neigh)) {
 		int res;
 
 		sock_confirm_neigh(skb, neigh);
-		res = neigh_output(neigh, skb);
-
+		/* if crossing protocols, can not use the cached header */
+		res = neigh_output(neigh, skb, is_v6gw);
 		rcu_read_unlock_bh();
 		return res;
 	}
@@ -472,7 +469,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	skb_dst_set_noref(skb, &rt->dst);
 
 packet_routed:
-	if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway)
+	if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gw_family)
 		goto no_route;
 
 	/* OK, we know where to send it, allocate and build IP header. */
@@ -693,11 +690,8 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 			return 0;
 		}
 
-		while (frag) {
-			skb = frag->next;
-			kfree_skb(frag);
-			frag = skb;
-		}
+		kfree_skb_list(frag);
+
 		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
 		return err;
 
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index c3f3d28d1087..30c1c264bdfc 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -239,8 +239,8 @@ static int ip_tun_build_state(struct nlattr *attr,
 	struct nlattr *tb[LWTUNNEL_IP_MAX + 1];
 	int err;
 
-	err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy,
-			       extack);
+	err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_MAX, attr,
+					  ip_tun_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -356,8 +356,8 @@ static int ip6_tun_build_state(struct nlattr *attr,
 	struct nlattr *tb[LWTUNNEL_IP6_MAX + 1];
 	int err;
 
-	err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy,
-			       extack);
+	err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP6_MAX, attr,
+					  ip6_tun_policy, extack);
 	if (err < 0)
 		return err;
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 2c931120c494..2c61e10a60e3 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -66,7 +66,7 @@
 #include <net/netlink.h>
 #include <net/fib_rules.h>
 #include <linux/netconf.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
 
 #include <linux/nospec.h>
 
@@ -373,7 +373,6 @@ static const struct rhashtable_params ipmr_rht_params = {
 	.key_offset = offsetof(struct mfc_cache, cmparg),
 	.key_len = sizeof(struct mfc_cache_cmp_arg),
 	.nelem_hint = 3,
-	.locks_mul = 1,
 	.obj_cmpfn = ipmr_hash_cmp,
 	.automatic_shrinking = true,
 };
@@ -2499,8 +2498,8 @@ static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb,
 	}
 
 	if (!netlink_strict_get_check(skb))
-		return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
-				   rtm_ipv4_policy, extack);
+		return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
+					      rtm_ipv4_policy, extack);
 
 	rtm = nlmsg_data(nlh);
 	if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
@@ -2511,8 +2510,8 @@ static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb,
 		return -EINVAL;
 	}
 
-	err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
-				 rtm_ipv4_policy, extack);
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
+					    rtm_ipv4_policy, extack);
 	if (err)
 		return err;
 
@@ -2675,8 +2674,8 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
 	struct rtmsg *rtm;
 	int ret, rem;
 
-	ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy,
-			     extack);
+	ret = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX,
+					rtm_ipmr_policy, extack);
 	if (ret < 0)
 		goto out;
 	rtm = nlmsg_data(nlh);
@@ -2784,7 +2783,7 @@ static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb)
 		return true;
 
 	vif = &mrt->vif_table[vifid];
-	vif_nest = nla_nest_start(skb, IPMRA_VIF);
+	vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF);
 	if (!vif_nest)
 		return false;
 	if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) ||
@@ -2868,7 +2867,7 @@ static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb)
 		memset(hdr, 0, sizeof(*hdr));
 		hdr->ifi_family = RTNL_FAMILY_IPMR;
 
-		af = nla_nest_start(skb, IFLA_AF_SPEC);
+		af = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
 		if (!af) {
 			nlmsg_cancel(skb, nlh);
 			goto out;
@@ -2879,7 +2878,7 @@ static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb)
 			goto out;
 		}
 
-		vifs = nla_nest_start(skb, IPMRA_TABLE_VIFS);
+		vifs = nla_nest_start_noflag(skb, IPMRA_TABLE_VIFS);
 		if (!vifs) {
 			nla_nest_end(skb, af);
 			nlmsg_end(skb, nlh);
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 3e614cc824f7..278834d4babc 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -228,7 +228,7 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 	if (c->mfc_flags & MFC_OFFLOAD)
 		rtm->rtm_flags |= RTNH_F_OFFLOAD;
 
-	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
+	mp_attr = nla_nest_start_noflag(skb, RTA_MULTIPATH);
 	if (!mp_attr)
 		return -EMSGSIZE;
 
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index c98391d49200..1412b029f37f 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -27,14 +27,6 @@ config NF_TABLES_IPV4
 
 if NF_TABLES_IPV4
 
-config NFT_CHAIN_ROUTE_IPV4
-	tristate "IPv4 nf_tables route chain support"
-	help
-	  This option enables the "route" chain for IPv4 in nf_tables. This
-	  chain type is used to force packet re-routing after mangling header
-	  fields such as the source, destination, type of service and
-	  the packet mark.
-
 config NFT_REJECT_IPV4
 	select NF_REJECT_IPV4
 	default NFT_REJECT
@@ -232,16 +224,10 @@ if IP_NF_NAT
 
 config IP_NF_TARGET_MASQUERADE
 	tristate "MASQUERADE target support"
-	select NF_NAT_MASQUERADE
-	default m if NETFILTER_ADVANCED=n
+	select NETFILTER_XT_TARGET_MASQUERADE
 	help
-	  Masquerading is a special case of NAT: all outgoing connections are
-	  changed to seem to come from a particular interface's address, and
-	  if the interface goes down, those connections are lost.  This is
-	  only useful for dialup accounts with dynamic IP address (ie. your IP
-	  address will be different on next dialup).
-
-	  To compile it as a module, choose M here.  If unsure, say N.
+	  This is a backwards-compat option for the user's convenience
+	  (e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE.
 
 config IP_NF_TARGET_NETMAP
 	tristate "NETMAP target support"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index e241f5188ebe..c50e0ec095d2 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -24,7 +24,6 @@ nf_nat_snmp_basic-y := nf_nat_snmp_basic.asn1.o nf_nat_snmp_basic_main.o
 $(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h
 obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
 
-obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
 obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
 obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
 obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
@@ -49,7 +48,6 @@ obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o
 # targets
 obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
 obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
-obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
 obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
 obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o
 
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
deleted file mode 100644
index fd3f9e8a74da..000000000000
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/* Masquerade.  Simple mapping which alters range to a local IP address
-   (depending on route). */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/types.h>
-#include <linux/inetdevice.h>
-#include <linux/ip.h>
-#include <linux/timer.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <net/protocol.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <net/route.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/ipv4/nf_nat_masquerade.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
-
-/* FIXME: Multiple targets. --RR */
-static int masquerade_tg_check(const struct xt_tgchk_param *par)
-{
-	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-
-	if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
-		pr_debug("bad MAP_IPS.\n");
-		return -EINVAL;
-	}
-	if (mr->rangesize != 1) {
-		pr_debug("bad rangesize %u\n", mr->rangesize);
-		return -EINVAL;
-	}
-	return nf_ct_netns_get(par->net, par->family);
-}
-
-static unsigned int
-masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
-	struct nf_nat_range2 range;
-	const struct nf_nat_ipv4_multi_range_compat *mr;
-
-	mr = par->targinfo;
-	range.flags = mr->range[0].flags;
-	range.min_proto = mr->range[0].min;
-	range.max_proto = mr->range[0].max;
-
-	return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), &range,
-				      xt_out(par));
-}
-
-static void masquerade_tg_destroy(const struct xt_tgdtor_param *par)
-{
-	nf_ct_netns_put(par->net, par->family);
-}
-
-static struct xt_target masquerade_tg_reg __read_mostly = {
-	.name		= "MASQUERADE",
-	.family		= NFPROTO_IPV4,
-	.target		= masquerade_tg,
-	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
-	.table		= "nat",
-	.hooks		= 1 << NF_INET_POST_ROUTING,
-	.checkentry	= masquerade_tg_check,
-	.destroy	= masquerade_tg_destroy,
-	.me		= THIS_MODULE,
-};
-
-static int __init masquerade_tg_init(void)
-{
-	int ret;
-
-	ret = xt_register_target(&masquerade_tg_reg);
-	if (ret)
-		return ret;
-
-	ret = nf_nat_masquerade_ipv4_register_notifier();
-	if (ret)
-		xt_unregister_target(&masquerade_tg_reg);
-
-	return ret;
-}
-
-static void __exit masquerade_tg_exit(void)
-{
-	xt_unregister_target(&masquerade_tg_reg);
-	nf_nat_masquerade_ipv4_unregister_notifier();
-}
-
-module_init(masquerade_tg_init);
-module_exit(masquerade_tg_exit);
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
deleted file mode 100644
index 7d82934c46f4..000000000000
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/skbuff.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_ipv4.h>
-#include <net/route.h>
-#include <net/ip.h>
-
-static unsigned int nf_route_table_hook(void *priv,
-					struct sk_buff *skb,
-					const struct nf_hook_state *state)
-{
-	unsigned int ret;
-	struct nft_pktinfo pkt;
-	u32 mark;
-	__be32 saddr, daddr;
-	u_int8_t tos;
-	const struct iphdr *iph;
-	int err;
-
-	nft_set_pktinfo(&pkt, skb, state);
-	nft_set_pktinfo_ipv4(&pkt, skb);
-
-	mark = skb->mark;
-	iph = ip_hdr(skb);
-	saddr = iph->saddr;
-	daddr = iph->daddr;
-	tos = iph->tos;
-
-	ret = nft_do_chain(&pkt, priv);
-	if (ret != NF_DROP && ret != NF_STOLEN) {
-		iph = ip_hdr(skb);
-
-		if (iph->saddr != saddr ||
-		    iph->daddr != daddr ||
-		    skb->mark != mark ||
-		    iph->tos != tos) {
-			err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
-			if (err < 0)
-				ret = NF_DROP_ERR(err);
-		}
-	}
-	return ret;
-}
-
-static const struct nft_chain_type nft_chain_route_ipv4 = {
-	.name		= "route",
-	.type		= NFT_CHAIN_T_ROUTE,
-	.family		= NFPROTO_IPV4,
-	.owner		= THIS_MODULE,
-	.hook_mask	= (1 << NF_INET_LOCAL_OUT),
-	.hooks		= {
-		[NF_INET_LOCAL_OUT]	= nf_route_table_hook,
-	},
-};
-
-static int __init nft_chain_route_init(void)
-{
-	nft_register_chain_type(&nft_chain_route_ipv4);
-
-	return 0;
-}
-
-static void __exit nft_chain_route_exit(void)
-{
-	nft_unregister_chain_type(&nft_chain_route_ipv4);
-}
-
-module_init(nft_chain_route_init);
-module_exit(nft_chain_route_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET, "route");
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 14c7fdacaa72..795aed6e4720 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -434,37 +434,46 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
 					   struct sk_buff *skb,
 					   const void *daddr)
 {
+	const struct rtable *rt = container_of(dst, struct rtable, dst);
 	struct net_device *dev = dst->dev;
-	const __be32 *pkey = daddr;
-	const struct rtable *rt;
 	struct neighbour *n;
 
-	rt = (const struct rtable *) dst;
-	if (rt->rt_gateway)
-		pkey = (const __be32 *) &rt->rt_gateway;
-	else if (skb)
-		pkey = &ip_hdr(skb)->daddr;
+	rcu_read_lock_bh();
+
+	if (likely(rt->rt_gw_family == AF_INET)) {
+		n = ip_neigh_gw4(dev, rt->rt_gw4);
+	} else if (rt->rt_gw_family == AF_INET6) {
+		n = ip_neigh_gw6(dev, &rt->rt_gw6);
+        } else {
+		__be32 pkey;
+
+		pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
+		n = ip_neigh_gw4(dev, pkey);
+	}
+
+	if (n && !refcount_inc_not_zero(&n->refcnt))
+		n = NULL;
+
+	rcu_read_unlock_bh();
 
-	n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
-	if (n)
-		return n;
-	return neigh_create(&arp_tbl, pkey, dev);
+	return n;
 }
 
 static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 {
+	const struct rtable *rt = container_of(dst, struct rtable, dst);
 	struct net_device *dev = dst->dev;
 	const __be32 *pkey = daddr;
-	const struct rtable *rt;
 
-	rt = (const struct rtable *)dst;
-	if (rt->rt_gateway)
-		pkey = (const __be32 *)&rt->rt_gateway;
-	else if (!daddr ||
+	if (rt->rt_gw_family == AF_INET) {
+		pkey = (const __be32 *)&rt->rt_gw4;
+	} else if (rt->rt_gw_family == AF_INET6) {
+		return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6);
+	} else if (!daddr ||
 		 (rt->rt_flags &
-		  (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL)))
+		  (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) {
 		return;
-
+	}
 	__ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
 }
 
@@ -500,15 +509,17 @@ EXPORT_SYMBOL(ip_idents_reserve);
 
 void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
 {
-	static u32 ip_idents_hashrnd __read_mostly;
 	u32 hash, id;
 
-	net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
+	/* Note the following code is not safe, but this is okay. */
+	if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
+		get_random_bytes(&net->ipv4.ip_id_key,
+				 sizeof(net->ipv4.ip_id_key));
 
-	hash = jhash_3words((__force u32)iph->daddr,
+	hash = siphash_3u32((__force u32)iph->daddr,
 			    (__force u32)iph->saddr,
-			    iph->protocol ^ net_hash_mix(net),
-			    ip_idents_hashrnd);
+			    iph->protocol,
+			    &net->ipv4.ip_id_key);
 	id = ip_idents_reserve(hash, segs);
 	iph->id = htons(id);
 }
@@ -627,8 +638,8 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
 
 	if (fnhe->fnhe_gw) {
 		rt->rt_flags |= RTCF_REDIRECTED;
-		rt->rt_gateway = fnhe->fnhe_gw;
-		rt->rt_uses_gateway = 1;
+		rt->rt_gw_family = AF_INET;
+		rt->rt_gw4 = fnhe->fnhe_gw;
 	}
 }
 
@@ -642,7 +653,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 	unsigned int i;
 	int depth;
 
-	genid = fnhe_genid(dev_net(nh->nh_dev));
+	genid = fnhe_genid(dev_net(nh->fib_nh_dev));
 	hval = fnhe_hashfun(daddr);
 
 	spin_lock_bh(&fnhe_lock);
@@ -745,7 +756,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
 		return;
 	}
 
-	if (rt->rt_gateway != old_gw)
+	if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw)
 		return;
 
 	in_dev = __in_dev_get_rcu(dev);
@@ -776,8 +787,10 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
 			neigh_event_send(n, NULL);
 		} else {
 			if (fib_lookup(net, fl4, &res, 0) == 0) {
-				struct fib_nh *nh = &FIB_RES_NH(res);
+				struct fib_nh_common *nhc = FIB_RES_NHC(res);
+				struct fib_nh *nh;
 
+				nh = container_of(nhc, struct fib_nh, nh_common);
 				update_or_create_fnhe(nh, fl4->daddr, new_gw,
 						0, false,
 						jiffies + ip_rt_gc_timeout);
@@ -1025,8 +1038,10 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 
 	rcu_read_lock();
 	if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
-		struct fib_nh *nh = &FIB_RES_NH(res);
+		struct fib_nh_common *nhc = FIB_RES_NHC(res);
+		struct fib_nh *nh;
 
+		nh = container_of(nhc, struct fib_nh, nh_common);
 		update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
 				      jiffies + ip_rt_mtu_expires);
 	}
@@ -1183,11 +1198,39 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
 	return dst;
 }
 
+static void ipv4_send_dest_unreach(struct sk_buff *skb)
+{
+	struct ip_options opt;
+	int res;
+
+	/* Recompile ip options since IPCB may not be valid anymore.
+	 * Also check we have a reasonable ipv4 header.
+	 */
+	if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) ||
+	    ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5)
+		return;
+
+	memset(&opt, 0, sizeof(opt));
+	if (ip_hdr(skb)->ihl > 5) {
+		if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4))
+			return;
+		opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
+
+		rcu_read_lock();
+		res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
+		rcu_read_unlock();
+
+		if (res)
+			return;
+	}
+	__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
+}
+
 static void ipv4_link_failure(struct sk_buff *skb)
 {
 	struct rtable *rt;
 
-	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+	ipv4_send_dest_unreach(skb);
 
 	rt = skb_rtable(skb);
 	if (rt)
@@ -1233,7 +1276,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
 
 		rcu_read_lock();
 		if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
-			src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
+			src = fib_result_prefsrc(dev_net(rt->dst.dev), &res);
 		else
 			src = inet_select_addr(rt->dst.dev,
 					       rt_nexthop(rt, iph->daddr),
@@ -1276,7 +1319,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
 	mtu = READ_ONCE(dst->dev->mtu);
 
 	if (unlikely(ip_mtu_locked(dst))) {
-		if (rt->rt_uses_gateway && mtu > 576)
+		if (rt->rt_gw_family && mtu > 576)
 			mtu = 576;
 	}
 
@@ -1352,9 +1395,9 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
 
 u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
 {
+	struct fib_nh_common *nhc = res->nhc;
+	struct net_device *dev = nhc->nhc_dev;
 	struct fib_info *fi = res->fi;
-	struct fib_nh *nh = &fi->fib_nh[res->nh_sel];
-	struct net_device *dev = nh->nh_dev;
 	u32 mtu = 0;
 
 	if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
@@ -1362,6 +1405,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
 		mtu = fi->fib_mtu;
 
 	if (likely(!mtu)) {
+		struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
 		struct fib_nh_exception *fnhe;
 
 		fnhe = find_exception(nh, daddr);
@@ -1372,7 +1416,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
 	if (likely(!mtu))
 		mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
 
-	return mtu - lwtunnel_headroom(nh->nh_lwtstate, mtu);
+	return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu);
 }
 
 static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
@@ -1403,8 +1447,10 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
 			orig = NULL;
 		}
 		fill_route_from_fnhe(rt, fnhe);
-		if (!rt->rt_gateway)
-			rt->rt_gateway = daddr;
+		if (!rt->rt_gw4) {
+			rt->rt_gw4 = daddr;
+			rt->rt_gw_family = AF_INET;
+		}
 
 		if (do_cache) {
 			dst_hold(&rt->dst);
@@ -1527,18 +1573,25 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 	bool cached = false;
 
 	if (fi) {
-		struct fib_nh *nh = &FIB_RES_NH(*res);
-
-		if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
-			rt->rt_gateway = nh->nh_gw;
-			rt->rt_uses_gateway = 1;
+		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
+		struct fib_nh *nh;
+
+		if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
+			rt->rt_gw_family = nhc->nhc_gw_family;
+			/* only INET and INET6 are supported */
+			if (likely(nhc->nhc_gw_family == AF_INET))
+				rt->rt_gw4 = nhc->nhc_gw.ipv4;
+			else
+				rt->rt_gw6 = nhc->nhc_gw.ipv6;
 		}
+
 		ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
 
+		nh = container_of(nhc, struct fib_nh, nh_common);
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		rt->dst.tclassid = nh->nh_tclassid;
 #endif
-		rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
+		rt->dst.lwtstate = lwtstate_get(nh->fib_nh_lws);
 		if (unlikely(fnhe))
 			cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
 		else if (do_cache)
@@ -1549,8 +1602,10 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 			 * However, if we are unsuccessful at storing this
 			 * route into the cache we really need to set it.
 			 */
-			if (!rt->rt_gateway)
-				rt->rt_gateway = daddr;
+			if (!rt->rt_gw4) {
+				rt->rt_gw_family = AF_INET;
+				rt->rt_gw4 = daddr;
+			}
 			rt_add_uncached_list(rt);
 		}
 	} else
@@ -1583,8 +1638,8 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
 		rt->rt_iif = 0;
 		rt->rt_pmtu = 0;
 		rt->rt_mtu_locked = 0;
-		rt->rt_gateway = 0;
-		rt->rt_uses_gateway = 0;
+		rt->rt_gw_family = 0;
+		rt->rt_gw4 = 0;
 		INIT_LIST_HEAD(&rt->rt_uncached);
 
 		rt->dst.output = ip_output;
@@ -1697,15 +1752,18 @@ static int __mkroute_input(struct sk_buff *skb,
 			   struct in_device *in_dev,
 			   __be32 daddr, __be32 saddr, u32 tos)
 {
+	struct fib_nh_common *nhc = FIB_RES_NHC(*res);
+	struct net_device *dev = nhc->nhc_dev;
 	struct fib_nh_exception *fnhe;
 	struct rtable *rth;
+	struct fib_nh *nh;
 	int err;
 	struct in_device *out_dev;
 	bool do_cache;
 	u32 itag = 0;
 
 	/* get a working reference to the output device */
-	out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
+	out_dev = __in_dev_get_rcu(dev);
 	if (!out_dev) {
 		net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
 		return -EINVAL;
@@ -1722,10 +1780,14 @@ static int __mkroute_input(struct sk_buff *skb,
 
 	do_cache = res->fi && !itag;
 	if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
-	    skb->protocol == htons(ETH_P_IP) &&
-	    (IN_DEV_SHARED_MEDIA(out_dev) ||
-	     inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
-		IPCB(skb)->flags |= IPSKB_DOREDIRECT;
+	    skb->protocol == htons(ETH_P_IP)) {
+		__be32 gw;
+
+		gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
+		if (IN_DEV_SHARED_MEDIA(out_dev) ||
+		    inet_addr_onlink(out_dev, saddr, gw))
+			IPCB(skb)->flags |= IPSKB_DOREDIRECT;
+	}
 
 	if (skb->protocol != htons(ETH_P_IP)) {
 		/* Not IP (i.e. ARP). Do not create route, if it is
@@ -1742,12 +1804,13 @@ static int __mkroute_input(struct sk_buff *skb,
 		}
 	}
 
-	fnhe = find_exception(&FIB_RES_NH(*res), daddr);
+	nh = container_of(nhc, struct fib_nh, nh_common);
+	fnhe = find_exception(nh, daddr);
 	if (do_cache) {
 		if (fnhe)
 			rth = rcu_dereference(fnhe->fnhe_rth_input);
 		else
-			rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+			rth = rcu_dereference(nh->nh_rth_input);
 		if (rt_cache_valid(rth)) {
 			skb_dst_set_noref(skb, &rth->dst);
 			goto out;
@@ -2041,7 +2104,11 @@ local_input:
 	do_cache = false;
 	if (res->fi) {
 		if (!itag) {
-			rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+			struct fib_nh_common *nhc = FIB_RES_NHC(*res);
+			struct fib_nh *nh;
+
+			nh = container_of(nhc, struct fib_nh, nh_common);
+			rth = rcu_dereference(nh->nh_rth_input);
 			if (rt_cache_valid(rth)) {
 				skb_dst_set_noref(skb, &rth->dst);
 				err = 0;
@@ -2071,15 +2138,17 @@ local_input:
 	}
 
 	if (do_cache) {
-		struct fib_nh *nh = &FIB_RES_NH(*res);
+		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
+		struct fib_nh *nh;
 
-		rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
+		rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
 		if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
 			WARN_ON(rth->dst.input == lwtunnel_input);
 			rth->dst.lwtstate->orig_input = rth->dst.input;
 			rth->dst.input = lwtunnel_input;
 		}
 
+		nh = container_of(nhc, struct fib_nh, nh_common);
 		if (unlikely(!rt_cache_route(nh, rth)))
 			rt_add_uncached_list(rth);
 	}
@@ -2251,8 +2320,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 	fnhe = NULL;
 	do_cache &= fi != NULL;
 	if (fi) {
+		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
+		struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
 		struct rtable __rcu **prth;
-		struct fib_nh *nh = &FIB_RES_NH(*res);
 
 		fnhe = find_exception(nh, fl4->daddr);
 		if (!do_cache)
@@ -2262,8 +2332,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 		} else {
 			if (unlikely(fl4->flowi4_flags &
 				     FLOWI_FLAG_KNOWN_NH &&
-				     !(nh->nh_gw &&
-				       nh->nh_scope == RT_SCOPE_LINK))) {
+				     !(nhc->nhc_gw_family &&
+				       nhc->nhc_scope == RT_SCOPE_LINK))) {
 				do_cache = false;
 				goto add;
 			}
@@ -2572,8 +2642,11 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 		rt->rt_genid = rt_genid_ipv4(net);
 		rt->rt_flags = ort->rt_flags;
 		rt->rt_type = ort->rt_type;
-		rt->rt_gateway = ort->rt_gateway;
-		rt->rt_uses_gateway = ort->rt_uses_gateway;
+		rt->rt_gw_family = ort->rt_gw_family;
+		if (rt->rt_gw_family == AF_INET)
+			rt->rt_gw4 = ort->rt_gw4;
+		else if (rt->rt_gw_family == AF_INET6)
+			rt->rt_gw6 = ort->rt_gw6;
 
 		INIT_LIST_HEAD(&rt->rt_uncached);
 	}
@@ -2652,9 +2725,22 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
 		if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
 			goto nla_put_failure;
 	}
-	if (rt->rt_uses_gateway &&
-	    nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway))
+	if (rt->rt_gw_family == AF_INET &&
+	    nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
 		goto nla_put_failure;
+	} else if (rt->rt_gw_family == AF_INET6) {
+		int alen = sizeof(struct in6_addr);
+		struct nlattr *nla;
+		struct rtvia *via;
+
+		nla = nla_reserve(skb, RTA_VIA, alen + 2);
+		if (!nla)
+			goto nla_put_failure;
+
+		via = nla_data(nla);
+		via->rtvia_family = AF_INET6;
+		memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
+	}
 
 	expires = rt->dst.expires;
 	if (expires) {
@@ -2791,8 +2877,8 @@ static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
 	}
 
 	if (!netlink_strict_get_check(skb))
-		return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
-				   rtm_ipv4_policy, extack);
+		return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
+					      rtm_ipv4_policy, extack);
 
 	rtm = nlmsg_data(nlh);
 	if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
@@ -2810,8 +2896,8 @@ static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
 		return -EINVAL;
 	}
 
-	err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
-				 rtm_ipv4_policy, extack);
+	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
+					    rtm_ipv4_policy, extack);
 	if (err)
 		return err;
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 2316c08e9591..875867b64d6a 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -49,6 +49,7 @@ static int ip_ping_group_range_min[] = { 0, 0 };
 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
 static int comp_sack_nr_max = 255;
 static u32 u32_max_div_HZ = UINT_MAX / HZ;
+static int one_day_secs = 24 * 3600;
 
 /* obsolete */
 static int sysctl_tcp_low_latency __read_mostly;
@@ -1160,7 +1161,9 @@ static struct ctl_table ipv4_net_table[] = {
 		.data		= &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one_day_secs
 	},
 	{
 		.procname	= "tcp_autocorking",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6baa6dc1b13b..f7567a3698eb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -865,6 +865,18 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
 {
 	struct sk_buff *skb;
 
+	if (likely(!size)) {
+		skb = sk->sk_tx_skb_cache;
+		if (skb && !skb_cloned(skb)) {
+			skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
+			sk->sk_tx_skb_cache = NULL;
+			pskb_trim(skb, 0);
+			INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
+			skb_shinfo(skb)->tx_flags = 0;
+			memset(TCP_SKB_CB(skb), 0, sizeof(struct tcp_skb_cb));
+			return skb;
+		}
+	}
 	/* The TCP header must be at least 32-bit aligned.  */
 	size = ALIGN(size, 4);
 
@@ -1098,30 +1110,6 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
 }
 EXPORT_SYMBOL(tcp_sendpage);
 
-/* Do not bother using a page frag for very small frames.
- * But use this heuristic only for the first skb in write queue.
- *
- * Having no payload in skb->head allows better SACK shifting
- * in tcp_shift_skb_data(), reducing sack/rack overhead, because
- * write queue has less skbs.
- * Each skb can hold up to MAX_SKB_FRAGS * 32Kbytes, or ~0.5 MB.
- * This also speeds up tso_fragment(), since it wont fallback
- * to tcp_fragment().
- */
-static int linear_payload_sz(bool first_skb)
-{
-	if (first_skb)
-		return SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
-	return 0;
-}
-
-static int select_size(bool first_skb, bool zc)
-{
-	if (zc)
-		return 0;
-	return linear_payload_sz(first_skb);
-}
-
 void tcp_free_fastopen_req(struct tcp_sock *tp)
 {
 	if (tp->fastopen_req) {
@@ -1272,7 +1260,6 @@ restart:
 
 		if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
 			bool first_skb;
-			int linear;
 
 new_segment:
 			if (!sk_stream_memory_free(sk))
@@ -1283,8 +1270,7 @@ new_segment:
 				goto restart;
 			}
 			first_skb = tcp_rtx_and_write_queues_empty(sk);
-			linear = select_size(first_skb, zc);
-			skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
+			skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
 						  first_skb);
 			if (!skb)
 				goto wait_for_memory;
@@ -2552,6 +2538,11 @@ void tcp_write_queue_purge(struct sock *sk)
 		sk_wmem_free_skb(sk, skb);
 	}
 	tcp_rtx_queue_purge(sk);
+	skb = sk->sk_tx_skb_cache;
+	if (skb) {
+		__kfree_skb(skb);
+		sk->sk_tx_skb_cache = NULL;
+	}
 	INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
 	sk_mem_reclaim(sk);
 	tcp_clear_all_retrans_hints(tcp_sk(sk));
@@ -2587,6 +2578,10 @@ int tcp_disconnect(struct sock *sk, int flags)
 
 	tcp_clear_xmit_timers(sk);
 	__skb_queue_purge(&sk->sk_receive_queue);
+	if (sk->sk_rx_skb_cache) {
+		__kfree_skb(sk->sk_rx_skb_cache);
+		sk->sk_rx_skb_cache = NULL;
+	}
 	tp->copied_seq = tp->rcv_nxt;
 	tp->urg_data = 0;
 	tcp_write_queue_purge(sk);
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index cd4814f7e962..477cb4aa456c 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -49,9 +49,8 @@
 #define DCTCP_MAX_ALPHA	1024U
 
 struct dctcp {
-	u32 acked_bytes_ecn;
-	u32 acked_bytes_total;
-	u32 prior_snd_una;
+	u32 old_delivered;
+	u32 old_delivered_ce;
 	u32 prior_rcv_nxt;
 	u32 dctcp_alpha;
 	u32 next_seq;
@@ -67,19 +66,14 @@ static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA;
 module_param(dctcp_alpha_on_init, uint, 0644);
 MODULE_PARM_DESC(dctcp_alpha_on_init, "parameter for initial alpha value");
 
-static unsigned int dctcp_clamp_alpha_on_loss __read_mostly;
-module_param(dctcp_clamp_alpha_on_loss, uint, 0644);
-MODULE_PARM_DESC(dctcp_clamp_alpha_on_loss,
-		 "parameter for clamping alpha on loss");
-
 static struct tcp_congestion_ops dctcp_reno;
 
 static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca)
 {
 	ca->next_seq = tp->snd_nxt;
 
-	ca->acked_bytes_ecn = 0;
-	ca->acked_bytes_total = 0;
+	ca->old_delivered = tp->delivered;
+	ca->old_delivered_ce = tp->delivered_ce;
 }
 
 static void dctcp_init(struct sock *sk)
@@ -91,7 +85,6 @@ static void dctcp_init(struct sock *sk)
 	     sk->sk_state == TCP_CLOSE)) {
 		struct dctcp *ca = inet_csk_ca(sk);
 
-		ca->prior_snd_una = tp->snd_una;
 		ca->prior_rcv_nxt = tp->rcv_nxt;
 
 		ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
@@ -123,37 +116,25 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct dctcp *ca = inet_csk_ca(sk);
-	u32 acked_bytes = tp->snd_una - ca->prior_snd_una;
-
-	/* If ack did not advance snd_una, count dupack as MSS size.
-	 * If ack did update window, do not count it at all.
-	 */
-	if (acked_bytes == 0 && !(flags & CA_ACK_WIN_UPDATE))
-		acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss;
-	if (acked_bytes) {
-		ca->acked_bytes_total += acked_bytes;
-		ca->prior_snd_una = tp->snd_una;
-
-		if (flags & CA_ACK_ECE)
-			ca->acked_bytes_ecn += acked_bytes;
-	}
 
 	/* Expired RTT */
 	if (!before(tp->snd_una, ca->next_seq)) {
-		u64 bytes_ecn = ca->acked_bytes_ecn;
+		u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce;
 		u32 alpha = ca->dctcp_alpha;
 
 		/* alpha = (1 - g) * alpha + g * F */
 
 		alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g);
-		if (bytes_ecn) {
+		if (delivered_ce) {
+			u32 delivered = tp->delivered - ca->old_delivered;
+
 			/* If dctcp_shift_g == 1, a 32bit value would overflow
-			 * after 8 Mbytes.
+			 * after 8 M packets.
 			 */
-			bytes_ecn <<= (10 - dctcp_shift_g);
-			do_div(bytes_ecn, max(1U, ca->acked_bytes_total));
+			delivered_ce <<= (10 - dctcp_shift_g);
+			delivered_ce /= max(1U, delivered);
 
-			alpha = min(alpha + (u32)bytes_ecn, DCTCP_MAX_ALPHA);
+			alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA);
 		}
 		/* dctcp_alpha can be read from dctcp_get_info() without
 		 * synchro, so we ask compiler to not use dctcp_alpha
@@ -164,21 +145,23 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
 	}
 }
 
-static void dctcp_state(struct sock *sk, u8 new_state)
+static void dctcp_react_to_loss(struct sock *sk)
 {
-	if (dctcp_clamp_alpha_on_loss && new_state == TCP_CA_Loss) {
-		struct dctcp *ca = inet_csk_ca(sk);
+	struct dctcp *ca = inet_csk_ca(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
 
-		/* If this extension is enabled, we clamp dctcp_alpha to
-		 * max on packet loss; the motivation is that dctcp_alpha
-		 * is an indicator to the extend of congestion and packet
-		 * loss is an indicator of extreme congestion; setting
-		 * this in practice turned out to be beneficial, and
-		 * effectively assumes total congestion which reduces the
-		 * window by half.
-		 */
-		ca->dctcp_alpha = DCTCP_MAX_ALPHA;
-	}
+	ca->loss_cwnd = tp->snd_cwnd;
+	tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
+}
+
+static void dctcp_state(struct sock *sk, u8 new_state)
+{
+	if (new_state == TCP_CA_Recovery &&
+	    new_state != inet_csk(sk)->icsk_ca_state)
+		dctcp_react_to_loss(sk);
+	/* We handle RTO in dctcp_cwnd_event to ensure that we perform only
+	 * one loss-adjustment per RTT.
+	 */
 }
 
 static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
@@ -190,6 +173,9 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
 	case CA_EVENT_ECN_NO_CE:
 		dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state);
 		break;
+	case CA_EVENT_LOSS:
+		dctcp_react_to_loss(sk);
+		break;
 	default:
 		/* Don't care for the rest. */
 		break;
@@ -200,6 +186,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
 			     union tcp_cc_info *info)
 {
 	const struct dctcp *ca = inet_csk_ca(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 
 	/* Fill it also in case of VEGASINFO due to req struct limits.
 	 * We can still correctly retrieve it later.
@@ -211,8 +198,10 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
 			info->dctcp.dctcp_enabled = 1;
 			info->dctcp.dctcp_ce_state = (u16) ca->ce_state;
 			info->dctcp.dctcp_alpha = ca->dctcp_alpha;
-			info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn;
-			info->dctcp.dctcp_ab_tot = ca->acked_bytes_total;
+			info->dctcp.dctcp_ab_ecn = tp->mss_cache *
+						   (tp->delivered_ce - ca->old_delivered_ce);
+			info->dctcp.dctcp_ab_tot = tp->mss_cache *
+						   (tp->delivered - ca->old_delivered);
 		}
 
 		*attr = INET_DIAG_DCTCPINFO;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5dfbc333e79a..97671bff597a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -402,11 +402,12 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
 static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	int room;
+
+	room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
 
 	/* Check #1 */
-	if (tp->rcv_ssthresh < tp->window_clamp &&
-	    (int)tp->rcv_ssthresh < tcp_space(sk) &&
-	    !tcp_under_memory_pressure(sk)) {
+	if (room > 0 && !tcp_under_memory_pressure(sk)) {
 		int incr;
 
 		/* Check #2. Increase window, if skb with such overhead
@@ -419,8 +420,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
 
 		if (incr) {
 			incr = max_t(int, incr, 2 * skb->len);
-			tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
-					       tp->window_clamp);
+			tp->rcv_ssthresh += min(room, incr);
 			inet_csk(sk)->icsk_ack.quick |= 1;
 		}
 	}
@@ -6263,6 +6263,11 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
  * congestion control: Linux DCTCP asserts ECT on all packets,
  * including SYN, which is most optimal solution; however,
  * others, such as FreeBSD do not.
+ *
+ * Exception: At least one of the reserved bits of the TCP header (th->res1) is
+ * set, indicating the use of a future TCP extension (such as AccECN). See
+ * RFC8311 §4.3 which updates RFC3168 to allow the development of such
+ * extensions.
  */
 static void tcp_ecn_create_request(struct request_sock *req,
 				   const struct sk_buff *skb,
@@ -6282,7 +6287,7 @@ static void tcp_ecn_create_request(struct request_sock *req,
 	ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
 	ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
 
-	if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
+	if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
 	    (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
 	    tcp_bpf_ca_needs_ecn((struct sock *)req))
 		inet_rsk(req)->ecn_ok = 1;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 277d71239d75..faa6fa619f59 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1774,6 +1774,7 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
 int tcp_v4_rcv(struct sk_buff *skb)
 {
 	struct net *net = dev_net(skb->dev);
+	struct sk_buff *skb_to_free;
 	int sdif = inet_sdif(skb);
 	const struct iphdr *iph;
 	const struct tcphdr *th;
@@ -1905,11 +1906,17 @@ process:
 	tcp_segs_in(tcp_sk(sk), skb);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
+		skb_to_free = sk->sk_rx_skb_cache;
+		sk->sk_rx_skb_cache = NULL;
 		ret = tcp_v4_do_rcv(sk, skb);
-	} else if (tcp_add_backlog(sk, skb)) {
-		goto discard_and_relse;
+	} else {
+		if (tcp_add_backlog(sk, skb))
+			goto discard_and_relse;
+		skb_to_free = NULL;
 	}
 	bh_unlock_sock(sk);
+	if (skb_to_free)
+		__kfree_skb(skb_to_free);
 
 put_and_return:
 	if (refcounted)
@@ -2578,7 +2585,8 @@ static void __net_exit tcp_sk_exit(struct net *net)
 {
 	int cpu;
 
-	module_put(net->ipv4.tcp_congestion_control->owner);
+	if (net->ipv4.tcp_congestion_control)
+		module_put(net->ipv4.tcp_congestion_control->owner);
 
 	for_each_possible_cpu(cpu)
 		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 4ccec4c705f7..f262f2cace29 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -658,7 +658,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
 	{
 		int n = 0;
 
-		nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS);
+		nest = nla_nest_start_noflag(msg, TCP_METRICS_ATTR_VALS);
 		if (!nest)
 			goto nla_put_failure;
 		for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
@@ -951,11 +951,13 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info)
 static const struct genl_ops tcp_metrics_nl_ops[] = {
 	{
 		.cmd = TCP_METRICS_CMD_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = tcp_metrics_nl_cmd_get,
 		.dumpit = tcp_metrics_nl_dump,
 	},
 	{
 		.cmd = TCP_METRICS_CMD_DEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = tcp_metrics_nl_cmd_del,
 		.flags = GENL_ADMIN_PERM,
 	},
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e265d1aeeb66..32061928b054 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3088,7 +3088,6 @@ void tcp_send_fin(struct sock *sk)
 		tskb = skb_rb_last(&sk->tcp_rtx_queue);
 
 	if (tskb) {
-coalesce:
 		TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
 		TCP_SKB_CB(tskb)->end_seq++;
 		tp->write_seq++;
@@ -3104,11 +3103,9 @@ coalesce:
 		}
 	} else {
 		skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
-		if (unlikely(!skb)) {
-			if (tskb)
-				goto coalesce;
+		if (unlikely(!skb))
 			return;
-		}
+
 		INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
 		skb_reserve(skb, MAX_TCP_HEADER);
 		sk_forced_mem_schedule(sk, skb->truesize);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 372fdc5381a9..3c58ba02af7d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1631,7 +1631,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 EXPORT_SYMBOL(udp_ioctl);
 
 struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
-			       int noblock, int *peeked, int *off, int *err)
+			       int noblock, int *off, int *err)
 {
 	struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
 	struct sk_buff_head *queue;
@@ -1650,13 +1650,11 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
 			break;
 
 		error = -EAGAIN;
-		*peeked = 0;
 		do {
 			spin_lock_bh(&queue->lock);
 			skb = __skb_try_recv_from_queue(sk, queue, flags,
 							udp_skb_destructor,
-							peeked, off, err,
-							&last);
+							off, err, &last);
 			if (skb) {
 				spin_unlock_bh(&queue->lock);
 				return skb;
@@ -1677,8 +1675,7 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
 
 			skb = __skb_try_recv_from_queue(sk, queue, flags,
 							udp_skb_dtor_locked,
-							peeked, off, err,
-							&last);
+							off, err, &last);
 			spin_unlock(&sk_queue->lock);
 			spin_unlock_bh(&queue->lock);
 			if (skb)
@@ -1713,8 +1710,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
 	DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
 	struct sk_buff *skb;
 	unsigned int ulen, copied;
-	int peeked, peeking, off;
-	int err;
+	int off, err, peeking = flags & MSG_PEEK;
 	int is_udplite = IS_UDPLITE(sk);
 	bool checksum_valid = false;
 
@@ -1722,9 +1718,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
 		return ip_recv_error(sk, msg, len, addr_len);
 
 try_again:
-	peeking = flags & MSG_PEEK;
 	off = sk_peek_offset(sk, flags);
-	skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
+	skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
 	if (!skb)
 		return err;
 
@@ -1762,7 +1757,7 @@ try_again:
 	}
 
 	if (unlikely(err)) {
-		if (!peeked) {
+		if (!peeking) {
 			atomic_inc(&sk->sk_drops);
 			UDP_INC_STATS(sock_net(sk),
 				      UDP_MIB_INERRORS, is_udplite);
@@ -1771,7 +1766,7 @@ try_again:
 		return err;
 	}
 
-	if (!peeked)
+	if (!peeking)
 		UDP_INC_STATS(sock_net(sk),
 			      UDP_MIB_INDATAGRAMS, is_udplite);
 
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 414ab0420604..cdef8f9a3b01 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -85,8 +85,11 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
 					      RTCF_LOCAL);
 	xdst->u.rt.rt_type = rt->rt_type;
-	xdst->u.rt.rt_gateway = rt->rt_gateway;
-	xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
+	xdst->u.rt.rt_gw_family = rt->rt_gw_family;
+	if (rt->rt_gw_family == AF_INET)
+		xdst->u.rt.rt_gw4 = rt->rt_gw4;
+	else if (rt->rt_gw_family == AF_INET6)
+		xdst->u.rt.rt_gw6 = rt->rt_gw6;
 	xdst->u.rt.rt_pmtu = rt->rt_pmtu;
 	xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
 	INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);