summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/Makefile2
-rw-r--r--net/ipv6/addrconf.c200
-rw-r--r--net/ipv6/addrlabel.c59
-rw-r--r--net/ipv6/ah6.c80
-rw-r--r--net/ipv6/esp6.c26
-rw-r--r--net/ipv6/exthdrs_core.c2
-rw-r--r--net/ipv6/exthdrs_offload.c4
-rw-r--r--net/ipv6/icmp.c4
-rw-r--r--net/ipv6/ip6_checksum.c4
-rw-r--r--net/ipv6/ip6_fib.c121
-rw-r--r--net/ipv6/ip6_flowlabel.c6
-rw-r--r--net/ipv6/ip6_gre.c9
-rw-r--r--net/ipv6/ip6_offload.c20
-rw-r--r--net/ipv6/ip6_output.c53
-rw-r--r--net/ipv6/ip6_tunnel.c13
-rw-r--r--net/ipv6/ip6_vti.c316
-rw-r--r--net/ipv6/ip6mr.c13
-rw-r--r--net/ipv6/ipcomp6.c22
-rw-r--r--net/ipv6/ipv6_sockglue.c2
-rw-r--r--net/ipv6/mcast.c11
-rw-r--r--net/ipv6/netfilter/Kconfig5
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/ip6_tables.c6
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c76
-rw-r--r--net/ipv6/output_core.c27
-rw-r--r--net/ipv6/ping.c5
-rw-r--r--net/ipv6/route.c52
-rw-r--r--net/ipv6/sit.c29
-rw-r--r--net/ipv6/tcp_ipv6.c46
-rw-r--r--net/ipv6/udp_offload.c2
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c63
-rw-r--r--net/ipv6/xfrm6_policy.c7
-rw-r--r--net/ipv6/xfrm6_protocol.c270
34 files changed, 1078 insertions, 479 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index d92e5586783e..438a73aa777c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -138,6 +138,7 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION
config IPV6_VTI
tristate "Virtual (secure) IPv6: tunneling"
select IPV6_TUNNEL
+ select NET_IP_TUNNEL
depends on INET6_XFRM_MODE_TUNNEL
---help---
Tunneling means encapsulating data of one protocol type within
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 17bb830872db..2fe68364bb20 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -16,7 +16,7 @@ ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o
ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
- xfrm6_output.o
+ xfrm6_output.o xfrm6_protocol.o
ipv6-$(CONFIG_NETFILTER) += netfilter.o
ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
ipv6-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ad235690684c..6c7fa0853fc7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -133,10 +133,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
static DEFINE_SPINLOCK(addrconf_hash_lock);
-static void addrconf_verify(unsigned long);
+static void addrconf_verify(void);
+static void addrconf_verify_rtnl(void);
+static void addrconf_verify_work(struct work_struct *);
-static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
-static DEFINE_SPINLOCK(addrconf_verify_lock);
+static struct workqueue_struct *addrconf_wq;
+static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work);
static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
@@ -151,7 +153,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
u32 flags, u32 noflags);
static void addrconf_dad_start(struct inet6_ifaddr *ifp);
-static void addrconf_dad_timer(unsigned long data);
+static void addrconf_dad_work(struct work_struct *w);
static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
static void addrconf_dad_run(struct inet6_dev *idev);
static void addrconf_rs_timer(unsigned long data);
@@ -247,9 +249,9 @@ static void addrconf_del_rs_timer(struct inet6_dev *idev)
__in6_dev_put(idev);
}
-static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp)
+static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)
{
- if (del_timer(&ifp->dad_timer))
+ if (cancel_delayed_work(&ifp->dad_work))
__in6_ifa_put(ifp);
}
@@ -261,12 +263,12 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev,
mod_timer(&idev->rs_timer, jiffies + when);
}
-static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp,
- unsigned long when)
+static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
+ unsigned long delay)
{
- if (!timer_pending(&ifp->dad_timer))
+ if (!delayed_work_pending(&ifp->dad_work))
in6_ifa_hold(ifp);
- mod_timer(&ifp->dad_timer, jiffies + when);
+ mod_delayed_work(addrconf_wq, &ifp->dad_work, delay);
}
static int snmp6_alloc_dev(struct inet6_dev *idev)
@@ -751,8 +753,9 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
in6_dev_put(ifp->idev);
- if (del_timer(&ifp->dad_timer))
- pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
+ if (cancel_delayed_work(&ifp->dad_work))
+ pr_notice("delayed DAD work was pending while freeing ifa=%p\n",
+ ifp);
if (ifp->state != INET6_IFADDR_STATE_DEAD) {
pr_warn("Freeing alive inet6 address %p\n", ifp);
@@ -849,8 +852,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
spin_lock_init(&ifa->lock);
spin_lock_init(&ifa->state_lock);
- setup_timer(&ifa->dad_timer, addrconf_dad_timer,
- (unsigned long)ifa);
+ INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work);
INIT_HLIST_NODE(&ifa->addr_lst);
ifa->scope = scope;
ifa->prefix_len = pfxlen;
@@ -990,6 +992,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP;
unsigned long expires;
+ ASSERT_RTNL();
+
spin_lock_bh(&ifp->state_lock);
state = ifp->state;
ifp->state = INET6_IFADDR_STATE_DEAD;
@@ -1021,7 +1025,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
write_unlock_bh(&ifp->idev->lock);
- addrconf_del_dad_timer(ifp);
+ addrconf_del_dad_work(ifp);
ipv6_ifa_notify(RTM_DELADDR, ifp);
@@ -1103,8 +1107,11 @@ retry:
* Lifetime is greater than REGEN_ADVANCE time units. In particular,
* an implementation must not create a temporary address with a zero
* Preferred Lifetime.
+ * Use age calculation as in addrconf_verify to avoid unnecessary
+ * temporary addresses being generated.
*/
- if (tmp_prefered_lft <= regen_advance) {
+ age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
+ if (tmp_prefered_lft <= regen_advance + age) {
in6_ifa_put(ifp);
in6_dev_put(idev);
ret = -1;
@@ -1601,7 +1608,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
{
if (ifp->flags&IFA_F_PERMANENT) {
spin_lock_bh(&ifp->lock);
- addrconf_del_dad_timer(ifp);
+ addrconf_del_dad_work(ifp);
ifp->flags |= IFA_F_TENTATIVE;
if (dad_failed)
ifp->flags |= IFA_F_DADFAILED;
@@ -1622,20 +1629,21 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
spin_unlock_bh(&ifp->lock);
}
ipv6_del_addr(ifp);
- } else
+ } else {
ipv6_del_addr(ifp);
+ }
}
static int addrconf_dad_end(struct inet6_ifaddr *ifp)
{
int err = -ENOENT;
- spin_lock(&ifp->state_lock);
+ spin_lock_bh(&ifp->state_lock);
if (ifp->state == INET6_IFADDR_STATE_DAD) {
ifp->state = INET6_IFADDR_STATE_POSTDAD;
err = 0;
}
- spin_unlock(&ifp->state_lock);
+ spin_unlock_bh(&ifp->state_lock);
return err;
}
@@ -1668,7 +1676,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
}
}
- addrconf_dad_stop(ifp, 1);
+ spin_lock_bh(&ifp->state_lock);
+ /* transition from _POSTDAD to _ERRDAD */
+ ifp->state = INET6_IFADDR_STATE_ERRDAD;
+ spin_unlock_bh(&ifp->state_lock);
+
+ addrconf_mod_dad_work(ifp, 0);
}
/* Join to solicited addr multicast group. */
@@ -1677,6 +1690,8 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
{
struct in6_addr maddr;
+ ASSERT_RTNL();
+
if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
return;
@@ -1688,6 +1703,8 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct in6_addr maddr;
+ ASSERT_RTNL();
+
if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
return;
@@ -1698,6 +1715,9 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
{
struct in6_addr addr;
+
+ ASSERT_RTNL();
+
if (ifp->prefix_len >= 127) /* RFC 6164 */
return;
ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -1709,6 +1729,9 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
{
struct in6_addr addr;
+
+ ASSERT_RTNL();
+
if (ifp->prefix_len >= 127) /* RFC 6164 */
return;
ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -2268,11 +2291,13 @@ ok:
return;
}
- ifp->flags |= IFA_F_MANAGETEMPADDR;
update_lft = 0;
create = 1;
+ spin_lock_bh(&ifp->lock);
+ ifp->flags |= IFA_F_MANAGETEMPADDR;
ifp->cstamp = jiffies;
ifp->tokenized = tokenized;
+ spin_unlock_bh(&ifp->lock);
addrconf_dad_start(ifp);
}
@@ -2323,7 +2348,7 @@ ok:
create, now);
in6_ifa_put(ifp);
- addrconf_verify(0);
+ addrconf_verify();
}
}
inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
@@ -2472,7 +2497,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
manage_tempaddrs(idev, ifp, valid_lft, prefered_lft,
true, jiffies);
in6_ifa_put(ifp);
- addrconf_verify(0);
+ addrconf_verify_rtnl();
return 0;
}
@@ -2783,6 +2808,8 @@ static void addrconf_gre_config(struct net_device *dev)
ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
addrconf_add_linklocal(idev, &addr);
+ else
+ addrconf_prefix_route(&addr, 64, dev, 0, 0);
}
#endif
@@ -3006,7 +3033,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
hlist_for_each_entry_rcu(ifa, h, addr_lst) {
if (ifa->idev == idev) {
hlist_del_init_rcu(&ifa->addr_lst);
- addrconf_del_dad_timer(ifa);
+ addrconf_del_dad_work(ifa);
goto restart;
}
}
@@ -3044,7 +3071,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
while (!list_empty(&idev->addr_list)) {
ifa = list_first_entry(&idev->addr_list,
struct inet6_ifaddr, if_list);
- addrconf_del_dad_timer(ifa);
+ addrconf_del_dad_work(ifa);
list_del(&ifa->if_list);
@@ -3143,10 +3170,10 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1);
ifp->dad_probes = idev->cnf.dad_transmits;
- addrconf_mod_dad_timer(ifp, rand_num);
+ addrconf_mod_dad_work(ifp, rand_num);
}
-static void addrconf_dad_start(struct inet6_ifaddr *ifp)
+static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
struct net_device *dev = idev->dev;
@@ -3198,25 +3225,68 @@ out:
read_unlock_bh(&idev->lock);
}
-static void addrconf_dad_timer(unsigned long data)
+static void addrconf_dad_start(struct inet6_ifaddr *ifp)
+{
+ bool begin_dad = false;
+
+ spin_lock_bh(&ifp->state_lock);
+ if (ifp->state != INET6_IFADDR_STATE_DEAD) {
+ ifp->state = INET6_IFADDR_STATE_PREDAD;
+ begin_dad = true;
+ }
+ spin_unlock_bh(&ifp->state_lock);
+
+ if (begin_dad)
+ addrconf_mod_dad_work(ifp, 0);
+}
+
+static void addrconf_dad_work(struct work_struct *w)
{
- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+ struct inet6_ifaddr *ifp = container_of(to_delayed_work(w),
+ struct inet6_ifaddr,
+ dad_work);
struct inet6_dev *idev = ifp->idev;
struct in6_addr mcaddr;
+ enum {
+ DAD_PROCESS,
+ DAD_BEGIN,
+ DAD_ABORT,
+ } action = DAD_PROCESS;
+
+ rtnl_lock();
+
+ spin_lock_bh(&ifp->state_lock);
+ if (ifp->state == INET6_IFADDR_STATE_PREDAD) {
+ action = DAD_BEGIN;
+ ifp->state = INET6_IFADDR_STATE_DAD;
+ } else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) {
+ action = DAD_ABORT;
+ ifp->state = INET6_IFADDR_STATE_POSTDAD;
+ }
+ spin_unlock_bh(&ifp->state_lock);
+
+ if (action == DAD_BEGIN) {
+ addrconf_dad_begin(ifp);
+ goto out;
+ } else if (action == DAD_ABORT) {
+ addrconf_dad_stop(ifp, 1);
+ goto out;
+ }
+
if (!ifp->dad_probes && addrconf_dad_end(ifp))
goto out;
- write_lock(&idev->lock);
+ write_lock_bh(&idev->lock);
if (idev->dead || !(idev->if_flags & IF_READY)) {
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
goto out;
}
spin_lock(&ifp->lock);
if (ifp->state == INET6_IFADDR_STATE_DEAD) {
spin_unlock(&ifp->lock);
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
goto out;
}
@@ -3227,7 +3297,7 @@ static void addrconf_dad_timer(unsigned long data)
ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
spin_unlock(&ifp->lock);
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
addrconf_dad_completed(ifp);
@@ -3235,16 +3305,17 @@ static void addrconf_dad_timer(unsigned long data)
}
ifp->dad_probes--;
- addrconf_mod_dad_timer(ifp,
- NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME));
+ addrconf_mod_dad_work(ifp,
+ NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME));
spin_unlock(&ifp->lock);
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
/* send a neighbour solicitation for our addr */
addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
out:
in6_ifa_put(ifp);
+ rtnl_unlock();
}
/* ifp->idev must be at least read locked */
@@ -3271,7 +3342,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
struct in6_addr lladdr;
bool send_rs, send_mld;
- addrconf_del_dad_timer(ifp);
+ addrconf_del_dad_work(ifp);
/*
* Configure the address for reception. Now it is valid.
@@ -3512,23 +3583,23 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
* Periodic address status verification
*/
-static void addrconf_verify(unsigned long foo)
+static void addrconf_verify_rtnl(void)
{
unsigned long now, next, next_sec, next_sched;
struct inet6_ifaddr *ifp;
int i;
+ ASSERT_RTNL();
+
rcu_read_lock_bh();
- spin_lock(&addrconf_verify_lock);
now = jiffies;
next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
- del_timer(&addr_chk_timer);
+ cancel_delayed_work(&addr_chk_work);
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
restart:
- hlist_for_each_entry_rcu_bh(ifp,
- &inet6_addr_lst[i], addr_lst) {
+ hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) {
unsigned long age;
/* When setting preferred_lft to a value not zero or
@@ -3623,13 +3694,22 @@ restart:
ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
now, next, next_sec, next_sched);
-
- addr_chk_timer.expires = next_sched;
- add_timer(&addr_chk_timer);
- spin_unlock(&addrconf_verify_lock);
+ mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
rcu_read_unlock_bh();
}
+static void addrconf_verify_work(struct work_struct *w)
+{
+ rtnl_lock();
+ addrconf_verify_rtnl();
+ rtnl_unlock();
+}
+
+static void addrconf_verify(void)
+{
+ mod_delayed_work(addrconf_wq, &addr_chk_work, 0);
+}
+
static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
struct in6_addr **peer_pfx)
{
@@ -3686,6 +3766,8 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
bool was_managetempaddr;
bool had_prefixroute;
+ ASSERT_RTNL();
+
if (!valid_lft || (prefered_lft > valid_lft))
return -EINVAL;
@@ -3751,7 +3833,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
!was_managetempaddr, jiffies);
}
- addrconf_verify(0);
+ addrconf_verify_rtnl();
return 0;
}
@@ -4381,6 +4463,8 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
bool update_rs = false;
struct in6_addr ll_addr;
+ ASSERT_RTNL();
+
if (token == NULL)
return -EINVAL;
if (ipv6_addr_any(token))
@@ -4429,7 +4513,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
}
write_unlock_bh(&idev->lock);
- addrconf_verify(0);
+ addrconf_verify_rtnl();
return 0;
}
@@ -4631,6 +4715,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
{
struct net *net = dev_net(ifp->idev->dev);
+ if (event)
+ ASSERT_RTNL();
+
inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
switch (event) {
@@ -5239,6 +5326,12 @@ int __init addrconf_init(void)
if (err < 0)
goto out_addrlabel;
+ addrconf_wq = create_workqueue("ipv6_addrconf");
+ if (!addrconf_wq) {
+ err = -ENOMEM;
+ goto out_nowq;
+ }
+
/* The addrconf netdev notifier requires that loopback_dev
* has it's ipv6 private information allocated and setup
* before it can bring up and give link-local addresses
@@ -5269,7 +5362,7 @@ int __init addrconf_init(void)
register_netdevice_notifier(&ipv6_dev_notf);
- addrconf_verify(0);
+ addrconf_verify();
rtnl_af_register(&inet6_ops);
@@ -5297,6 +5390,8 @@ errout:
rtnl_af_unregister(&inet6_ops);
unregister_netdevice_notifier(&ipv6_dev_notf);
errlo:
+ destroy_workqueue(addrconf_wq);
+out_nowq:
unregister_pernet_subsys(&addrconf_ops);
out_addrlabel:
ipv6_addr_label_cleanup();
@@ -5332,7 +5427,8 @@ void addrconf_cleanup(void)
for (i = 0; i < IN6_ADDR_HSIZE; i++)
WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
spin_unlock_bh(&addrconf_hash_lock);
-
- del_timer(&addr_chk_timer);
+ cancel_delayed_work(&addr_chk_work);
rtnl_unlock();
+
+ destroy_workqueue(addrconf_wq);
}
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index b30ad3741b46..731e1e1722d9 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -6,7 +6,7 @@
*/
/*
* Author:
- * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
+ * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
*/
#include <linux/kernel.h>
@@ -22,14 +22,13 @@
#if 0
#define ADDRLABEL(x...) printk(x)
#else
-#define ADDRLABEL(x...) do { ; } while(0)
+#define ADDRLABEL(x...) do { ; } while (0)
#endif
/*
* Policy Table
*/
-struct ip6addrlbl_entry
-{
+struct ip6addrlbl_entry {
#ifdef CONFIG_NET_NS
struct net *lbl_net;
#endif
@@ -88,39 +87,39 @@ static const __net_initconst struct ip6addrlbl_init_table
{ /* ::/0 */
.prefix = &in6addr_any,
.label = 1,
- },{ /* fc00::/7 */
- .prefix = &(struct in6_addr){{{ 0xfc }}},
+ }, { /* fc00::/7 */
+ .prefix = &(struct in6_addr){ { { 0xfc } } } ,
.prefixlen = 7,
.label = 5,
- },{ /* fec0::/10 */
- .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}},
+ }, { /* fec0::/10 */
+ .prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } },
.prefixlen = 10,
.label = 11,
- },{ /* 2002::/16 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
+ }, { /* 2002::/16 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x02 } } },
.prefixlen = 16,
.label = 2,
- },{ /* 3ffe::/16 */
- .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}},
+ }, { /* 3ffe::/16 */
+ .prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } },
.prefixlen = 16,
.label = 12,
- },{ /* 2001::/32 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
+ }, { /* 2001::/32 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x01 } } },
.prefixlen = 32,
.label = 6,
- },{ /* 2001:10::/28 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}},
+ }, { /* 2001:10::/28 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } },
.prefixlen = 28,
.label = 7,
- },{ /* ::ffff:0:0 */
- .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}},
+ }, { /* ::ffff:0:0 */
+ .prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } },
.prefixlen = 96,
.label = 4,
- },{ /* ::/96 */
+ }, { /* ::/96 */
.prefix = &in6addr_any,
.prefixlen = 96,
.label = 3,
- },{ /* ::1/128 */
+ }, { /* ::1/128 */
.prefix = &in6addr_loopback,
.prefixlen = 128,
.label = 0,
@@ -441,7 +440,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)
if (label == IPV6_ADDR_LABEL_DEFAULT)
return -EINVAL;
- switch(nlh->nlmsg_type) {
+ switch (nlh->nlmsg_type) {
case RTM_NEWADDRLABEL:
if (ifal->ifal_index &&
!__dev_get_by_index(net, ifal->ifal_index))
@@ -505,12 +504,13 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
if (idx >= s_idx &&
net_eq(ip6addrlbl_net(p), net)) {
- if ((err = ip6addrlbl_fill(skb, p,
- ip6addrlbl_table.seq,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWADDRLABEL,
- NLM_F_MULTI)) <= 0)
+ err = ip6addrlbl_fill(skb, p,
+ ip6addrlbl_table.seq,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWADDRLABEL,
+ NLM_F_MULTI);
+ if (err <= 0)
break;
}
idx++;
@@ -527,7 +527,7 @@ static inline int ip6addrlbl_msgsize(void)
+ nla_total_size(4); /* IFAL_LABEL */
}
-static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh)
+static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(in_skb->sk);
struct ifaddrlblmsg *ifal;
@@ -568,7 +568,8 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh)
goto out;
}
- if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) {
+ skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL);
+ if (!skb) {
ip6addrlbl_put(p);
return -ENOBUFS;
}
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 81e496a2e008..72a4930bdc0a 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -346,6 +346,10 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
struct ip_auth_hdr *ah;
struct ah_data *ahp;
struct tmp_ext *iph_ext;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
ahp = x->data;
ahash = ahp->ahash;
@@ -359,15 +363,22 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
if (extlen)
extlen += sizeof(*iph_ext);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
err = -ENOMEM;
- iph_base = ah_alloc_tmp(ahash, nfrags, IPV6HDR_BASELEN + extlen);
+ iph_base = ah_alloc_tmp(ahash, nfrags + sglists, IPV6HDR_BASELEN +
+ extlen + seqhi_len);
if (!iph_base)
goto out;
iph_ext = ah_tmp_ext(iph_base);
- icv = ah_tmp_icv(ahash, iph_ext, extlen);
+ seqhi = (__be32 *)((char *)iph_ext + extlen);
+ icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
ah = ip_auth_hdr(skb);
memset(ah->auth_data, 0, ahp->icv_trunc_len);
@@ -411,10 +422,15 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
ah->spi = x->id.spi;
ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah6_output_done, skb);
AH_SKB_CB(skb)->tmp = iph_base;
@@ -514,6 +530,10 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
int nexthdr;
int nfrags;
int err = -ENOMEM;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
goto out;
@@ -550,14 +570,22 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
skb_push(skb, hdr_len);
- work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
+
+ work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len +
+ ahp->icv_trunc_len + seqhi_len);
if (!work_iph)
goto out;
- auth_data = ah_tmp_auth(work_iph, hdr_len);
- icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
+ auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len);
+ seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len);
+ icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
memcpy(work_iph, ip6h, hdr_len);
memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
@@ -572,10 +600,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
ip6h->flow_lbl[2] = 0;
ip6h->hop_limit = 0;
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah6_input_done, skb);
AH_SKB_CB(skb)->tmp = work_iph;
@@ -609,8 +643,8 @@ out:
return err;
}
-static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
@@ -619,17 +653,19 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static int ah6_init_state(struct xfrm_state *x)
@@ -714,6 +750,11 @@ static void ah6_destroy(struct xfrm_state *x)
kfree(ahp);
}
+static int ah6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type ah6_type =
{
.description = "AH6",
@@ -727,10 +768,11 @@ static const struct xfrm_type ah6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol ah6_protocol = {
+static struct xfrm6_protocol ah6_protocol = {
.handler = xfrm6_rcv,
+ .cb_handler = ah6_rcv_cb,
.err_handler = ah6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init ah6_init(void)
@@ -740,7 +782,7 @@ static int __init ah6_init(void)
return -EAGAIN;
}
- if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) {
+ if (xfrm6_protocol_register(&ah6_protocol, IPPROTO_AH) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ah6_type, AF_INET6);
return -EAGAIN;
@@ -751,7 +793,7 @@ static int __init ah6_init(void)
static void __exit ah6_fini(void)
{
- if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0)
+ if (xfrm6_protocol_deregister(&ah6_protocol, IPPROTO_AH) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0)
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 6eef8a7e35f2..d15da1377149 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -421,8 +421,8 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
net_adj) & ~(blksize - 1)) + net_adj - 2;
}
-static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
@@ -431,18 +431,20 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
esph->spi, IPPROTO_ESP, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static void esp6_destroy(struct xfrm_state *x)
@@ -614,6 +616,11 @@ error:
return err;
}
+static int esp6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type esp6_type =
{
.description = "ESP6",
@@ -628,10 +635,11 @@ static const struct xfrm_type esp6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol esp6_protocol = {
- .handler = xfrm6_rcv,
+static struct xfrm6_protocol esp6_protocol = {
+ .handler = xfrm6_rcv,
+ .cb_handler = esp6_rcv_cb,
.err_handler = esp6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init esp6_init(void)
@@ -640,7 +648,7 @@ static int __init esp6_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) {
+ if (xfrm6_protocol_register(&esp6_protocol, IPPROTO_ESP) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&esp6_type, AF_INET6);
return -EAGAIN;
@@ -651,7 +659,7 @@ static int __init esp6_init(void)
static void __exit esp6_fini(void)
{
- if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0)
+ if (xfrm6_protocol_deregister(&esp6_protocol, IPPROTO_ESP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 140748debc4a..8af3eb57f438 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -212,7 +212,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
found = (nexthdr == target);
if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
- if (target < 0)
+ if (target < 0 || found)
break;
return -ENOENT;
}
diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
index cf77f3abfd06..447a7fbd1bb6 100644
--- a/net/ipv6/exthdrs_offload.c
+++ b/net/ipv6/exthdrs_offload.c
@@ -25,11 +25,11 @@ int __init ipv6_exthdrs_offload_init(void)
int ret;
ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING);
- if (!ret)
+ if (ret)
goto out;
ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS);
- if (!ret)
+ if (ret)
goto out_rt;
out:
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index f81f59686f21..7b326529e6a2 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -414,7 +414,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
addr_type = ipv6_addr_type(&hdr->daddr);
if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
- ipv6_anycast_destination(skb))
+ ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
saddr = &hdr->daddr;
/*
@@ -520,7 +520,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
np->tclass, NULL, &fl6, (struct rt6_info *)dst,
MSG_DONTWAIT, np->dontfrag);
if (err) {
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index 72d198b8e4d2..ee7a97f510cb 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -79,7 +79,9 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
/* RFC 2460 section 8.1 says that we SHOULD log
this error. Well, it is reasonable.
*/
- LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
+ LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
+ &ipv6_hdr(skb)->saddr, ntohs(uh->source),
+ &ipv6_hdr(skb)->daddr, ntohs(uh->dest));
return 1;
}
if (skb->ip_summed == CHECKSUM_COMPLETE &&
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 075602fc6b6a..34e0ded5c14b 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -9,14 +9,12 @@
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
- */
-
-/*
- * Changes:
- * Yuji SEKIYA @USAGI: Support default route on router node;
- * remove ip6_null_entry from the top of
- * routing table.
- * Ville Nuorvala: Fixed routing subtrees.
+ *
+ * Changes:
+ * Yuji SEKIYA @USAGI: Support default route on router node;
+ * remove ip6_null_entry from the top of
+ * routing table.
+ * Ville Nuorvala: Fixed routing subtrees.
*/
#define pr_fmt(fmt) "IPv6: " fmt
@@ -46,10 +44,9 @@
#define RT6_TRACE(x...) do { ; } while (0)
#endif
-static struct kmem_cache * fib6_node_kmem __read_mostly;
+static struct kmem_cache *fib6_node_kmem __read_mostly;
-enum fib_walk_state_t
-{
+enum fib_walk_state_t {
#ifdef CONFIG_IPV6_SUBTREES
FWS_S,
#endif
@@ -59,8 +56,7 @@ enum fib_walk_state_t
FWS_U
};
-struct fib6_cleaner_t
-{
+struct fib6_cleaner_t {
struct fib6_walker_t w;
struct net *net;
int (*func)(struct rt6_info *, void *arg);
@@ -138,7 +134,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit)
const __be32 *addr = token;
/*
* Here,
- * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
+ * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
* is optimized version of
* htonl(1 << ((~fn_bit)&0x1F))
* See include/asm-generic/bitops/le.h.
@@ -147,7 +143,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit)
addr[fn_bit >> 5];
}
-static __inline__ struct fib6_node * node_alloc(void)
+static __inline__ struct fib6_node *node_alloc(void)
{
struct fib6_node *fn;
@@ -156,7 +152,7 @@ static __inline__ struct fib6_node * node_alloc(void)
return fn;
}
-static __inline__ void node_free(struct fib6_node * fn)
+static __inline__ void node_free(struct fib6_node *fn)
{
kmem_cache_free(fib6_node_kmem, fn);
}
@@ -292,7 +288,7 @@ static int fib6_dump_node(struct fib6_walker_t *w)
static void fib6_dump_end(struct netlink_callback *cb)
{
- struct fib6_walker_t *w = (void*)cb->args[2];
+ struct fib6_walker_t *w = (void *)cb->args[2];
if (w) {
if (cb->args[4]) {
@@ -302,7 +298,7 @@ static void fib6_dump_end(struct netlink_callback *cb)
cb->args[2] = 0;
kfree(w);
}
- cb->done = (void*)cb->args[3];
+ cb->done = (void *)cb->args[3];
cb->args[1] = 3;
}
@@ -485,7 +481,7 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
fn->fn_sernum = sernum;
dir = addr_bit_set(addr, fn->fn_bit);
pn = fn;
- fn = dir ? fn->right: fn->left;
+ fn = dir ? fn->right : fn->left;
} while (fn);
if (!allow_create) {
@@ -638,12 +634,41 @@ static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt)
RTF_GATEWAY;
}
+static int fib6_commit_metrics(struct dst_entry *dst,
+ struct nlattr *mx, int mx_len)
+{
+ struct nlattr *nla;
+ int remaining;
+ u32 *mp;
+
+ if (dst->flags & DST_HOST) {
+ mp = dst_metrics_write_ptr(dst);
+ } else {
+ mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+ if (!mp)
+ return -ENOMEM;
+ dst_init_metrics(dst, mp, 0);
+ }
+
+ nla_for_each_attr(nla, mx, mx_len, remaining) {
+ int type = nla_type(nla);
+
+ if (type) {
+ if (type > RTAX_MAX)
+ return -EINVAL;
+
+ mp[type - 1] = nla_get_u32(nla);
+ }
+ }
+ return 0;
+}
+
/*
* Insert routing information in a node.
*/
static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
- struct nl_info *info)
+ struct nl_info *info, struct nlattr *mx, int mx_len)
{
struct rt6_info *iter = NULL;
struct rt6_info **ins;
@@ -653,6 +678,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
(info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0;
bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
+ int err;
ins = &fn->leaf;
@@ -751,6 +777,11 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
pr_warn("NLM_F_CREATE should be set when creating new route\n");
add:
+ if (mx) {
+ err = fib6_commit_metrics(&rt->dst, mx, mx_len);
+ if (err)
+ return err;
+ }
rt->dst.rt6_next = iter;
*ins = rt;
rt->rt6i_node = fn;
@@ -770,6 +801,11 @@ add:
pr_warn("NLM_F_REPLACE set, but no existing node found!\n");
return -ENOENT;
}
+ if (mx) {
+ err = fib6_commit_metrics(&rt->dst, mx, mx_len);
+ if (err)
+ return err;
+ }
*ins = rt;
rt->rt6i_node = fn;
rt->dst.rt6_next = iter->dst.rt6_next;
@@ -806,7 +842,8 @@ void fib6_force_start_gc(struct net *net)
* with source addr info in sub-trees
*/
-int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
+int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
+ struct nlattr *mx, int mx_len)
{
struct fib6_node *fn, *pn = NULL;
int err = -ENOMEM;
@@ -900,7 +937,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
}
#endif
- err = fib6_add_rt2node(fn, rt, info);
+ err = fib6_add_rt2node(fn, rt, info, mx, mx_len);
if (!err) {
fib6_start_gc(info->nl_net, rt);
if (!(rt->rt6i_flags & RTF_CACHE))
@@ -955,8 +992,8 @@ struct lookup_args {
const struct in6_addr *addr; /* search key */
};
-static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
- struct lookup_args *args)
+static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
+ struct lookup_args *args)
{
struct fib6_node *fn;
__be32 dir;
@@ -1018,8 +1055,8 @@ backtrack:
return NULL;
}
-struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
- const struct in6_addr *saddr)
+struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
struct fib6_node *fn;
struct lookup_args args[] = {
@@ -1051,9 +1088,9 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *da
*/
-static struct fib6_node * fib6_locate_1(struct fib6_node *root,
- const struct in6_addr *addr,
- int plen, int offset)
+static struct fib6_node *fib6_locate_1(struct fib6_node *root,
+ const struct in6_addr *addr,
+ int plen, int offset)
{
struct fib6_node *fn;
@@ -1081,9 +1118,9 @@ static struct fib6_node * fib6_locate_1(struct fib6_node *root,
return NULL;
}
-struct fib6_node * fib6_locate(struct fib6_node *root,
- const struct in6_addr *daddr, int dst_len,
- const struct in6_addr *saddr, int src_len)
+struct fib6_node *fib6_locate(struct fib6_node *root,
+ const struct in6_addr *daddr, int dst_len,
+ const struct in6_addr *saddr, int src_len)
{
struct fib6_node *fn;
@@ -1151,8 +1188,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
children = 0;
child = NULL;
- if (fn->right) child = fn->right, children |= 1;
- if (fn->left) child = fn->left, children |= 2;
+ if (fn->right)
+ child = fn->right, children |= 1;
+ if (fn->left)
+ child = fn->left, children |= 2;
if (children == 3 || FIB6_SUBTREE(fn)
#ifdef CONFIG_IPV6_SUBTREES
@@ -1180,8 +1219,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
} else {
WARN_ON(fn->fn_flags & RTN_ROOT);
#endif
- if (pn->right == fn) pn->right = child;
- else if (pn->left == fn) pn->left = child;
+ if (pn->right == fn)
+ pn->right = child;
+ else if (pn->left == fn)
+ pn->left = child;
#if RT6_DEBUG >= 2
else
WARN_ON(1);
@@ -1213,10 +1254,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
w->node = child;
if (children&2) {
RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
- w->state = w->state>=FWS_R ? FWS_U : FWS_INIT;
+ w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
} else {
RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
- w->state = w->state>=FWS_C ? FWS_U : FWS_INIT;
+ w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
}
}
}
@@ -1314,7 +1355,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
struct rt6_info **rtp;
#if RT6_DEBUG >= 2
- if (rt->dst.obsolete>0) {
+ if (rt->dst.obsolete > 0) {
WARN_ON(fn != NULL);
return -ENOENT;
}
@@ -1707,7 +1748,7 @@ out_rt6_stats:
kfree(net->ipv6.rt6_stats);
out_timer:
return -ENOMEM;
- }
+}
static void fib6_net_exit(struct net *net)
{
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index dfa41bb4e0dc..0961b5ef866d 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -15,9 +15,7 @@
#include <linux/socket.h>
#include <linux/net.h>
#include <linux/netdevice.h>
-#include <linux/if_arp.h>
#include <linux/in6.h>
-#include <linux/route.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
@@ -28,12 +26,8 @@
#include <net/sock.h>
#include <net/ipv6.h>
-#include <net/ndisc.h>
-#include <net/protocol.h>
-#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/rawv6.h>
-#include <net/icmp.h>
#include <net/transp_v6.h>
#include <asm/uaccess.h>
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index f3ffb43f59c0..c98338b81d30 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1454,7 +1454,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
static int ip6gre_tap_init(struct net_device *dev)
{
struct ip6_tnl *tunnel;
- int i;
tunnel = netdev_priv(dev);
@@ -1464,16 +1463,10 @@ static int ip6gre_tap_init(struct net_device *dev)
ip6gre_tnl_link_config(tunnel, 1);
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ip6gre_tap_stats;
- ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ip6gre_tap_stats->syncp);
- }
-
return 0;
}
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 1e8683b135bb..59f95affceb0 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -89,7 +89,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
unsigned int unfrag_ip6hlen;
u8 *prevhdr;
int offset = 0;
- bool tunnel;
+ bool encap, udpfrag;
int nhoff;
if (unlikely(skb_shinfo(skb)->gso_type &
@@ -110,8 +110,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
goto out;
- tunnel = SKB_GSO_CB(skb)->encap_level > 0;
- if (tunnel)
+ encap = SKB_GSO_CB(skb)->encap_level > 0;
+ if (encap)
features = skb->dev->hw_enc_features & netif_skb_features(skb);
SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h);
@@ -121,6 +121,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
+ if (skb->encapsulation &&
+ skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP))
+ udpfrag = proto == IPPROTO_UDP && encap;
+ else
+ udpfrag = proto == IPPROTO_UDP && !skb->encapsulation;
+
ops = rcu_dereference(inet6_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment)) {
skb_reset_transport_header(skb);
@@ -133,13 +139,9 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
for (skb = segs; skb; skb = skb->next) {
ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h));
- if (tunnel) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
skb->network_header = (u8 *)ipv6h - skb->head;
- if (!tunnel && proto == IPPROTO_UDP) {
+ if (udpfrag) {
unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen);
fptr->frag_off = htons(offset);
@@ -148,6 +150,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
offset += (ntohs(ipv6h->payload_len) -
sizeof(struct frag_hdr));
}
+ if (encap)
+ skb_reset_inner_headers(skb);
}
out:
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ef02b26ccf81..3284d61577c0 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -342,6 +342,20 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
return mtu;
}
+static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu || skb->local_df)
+ return false;
+
+ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
+ return true;
+
+ if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ return false;
+
+ return true;
+}
+
int ip6_forward(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -353,6 +367,9 @@ int ip6_forward(struct sk_buff *skb)
if (net->ipv6.devconf_all->forwarding == 0)
goto error;
+ if (skb->pkt_type != PACKET_HOST)
+ goto drop;
+
if (skb_warn_if_lro(skb))
goto drop;
@@ -362,9 +379,6 @@ int ip6_forward(struct sk_buff *skb)
goto drop;
}
- if (skb->pkt_type != PACKET_HOST)
- goto drop;
-
skb_forward_csum(skb);
/*
@@ -466,8 +480,7 @@ int ip6_forward(struct sk_buff *skb)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
- (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
+ if (ip6_pkt_too_big(skb, mtu)) {
/* Again, force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -517,9 +530,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
-#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
- to->nf_trace = from->nf_trace;
-#endif
skb_copy_secmark(to, from);
}
@@ -1091,21 +1101,19 @@ static void ip6_append_data_mtu(unsigned int *mtu,
unsigned int fragheaderlen,
struct sk_buff *skb,
struct rt6_info *rt,
- bool pmtuprobe)
+ unsigned int orig_mtu)
{
if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
if (skb == NULL) {
/* first fragment, reserve header_len */
- *mtu = *mtu - rt->dst.header_len;
+ *mtu = orig_mtu - rt->dst.header_len;
} else {
/*
* this fragment is not first, the headers
* space is regarded as data space.
*/
- *mtu = min(*mtu, pmtuprobe ?
- rt->dst.dev->mtu :
- dst_mtu(rt->dst.path));
+ *mtu = orig_mtu;
}
*maxfraglen = ((*mtu - fragheaderlen) & ~7)
+ fragheaderlen - sizeof(struct frag_hdr);
@@ -1122,7 +1130,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_cork *cork;
struct sk_buff *skb, *skb_prev = NULL;
- unsigned int maxfraglen, fragheaderlen, mtu;
+ unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
int exthdrlen;
int dst_exthdrlen;
int hh_len;
@@ -1204,6 +1212,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
dst_exthdrlen = 0;
mtu = cork->fragsize;
}
+ orig_mtu = mtu;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1221,8 +1230,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
sizeof(struct frag_hdr) : 0) +
rt->rt6i_nfheader_len;
- maxnonfragsize = (np->pmtudisc >= IPV6_PMTUDISC_DO) ?
- mtu : sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
+ if (ip6_sk_local_df(sk))
+ maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
+ else
+ maxnonfragsize = mtu;
/* dontfrag active */
if ((cork->length + length > mtu - headersize) && dontfrag &&
@@ -1301,8 +1312,7 @@ alloc_new_skb:
if (skb == NULL || skb_prev == NULL)
ip6_append_data_mtu(&mtu, &maxfraglen,
fragheaderlen, skb, rt,
- np->pmtudisc >=
- IPV6_PMTUDISC_PROBE);
+ orig_mtu);
skb_prev = skb;
@@ -1530,8 +1540,7 @@ int ip6_push_pending_frames(struct sock *sk)
}
/* Allow local fragmentation. */
- if (np->pmtudisc < IPV6_PMTUDISC_DO)
- skb->local_df = 1;
+ skb->local_df = ip6_sk_local_df(sk);
*final_dst = fl6->daddr;
__skb_pull(skb, skb_network_header_len(skb));
@@ -1558,8 +1567,8 @@ int ip6_push_pending_frames(struct sock *sk)
if (proto == IPPROTO_ICMPV6) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
- ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
+ ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
}
err = ip6_local_out(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5db8d310f9c0..e1df691d78be 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -108,12 +108,12 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
per_cpu_ptr(dev->tstats, i);
do {
- start = u64_stats_fetch_begin_bh(&tstats->syncp);
+ start = u64_stats_fetch_begin_irq(&tstats->syncp);
tmp.rx_packets = tstats->rx_packets;
tmp.rx_bytes = tstats->rx_bytes;
tmp.tx_packets = tstats->tx_packets;
tmp.tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
sum.rx_packets += tmp.rx_packets;
sum.rx_bytes += tmp.rx_bytes;
@@ -1502,19 +1502,12 @@ static inline int
ip6_tnl_dev_init_gen(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- int i;
t->dev = dev;
t->net = dev_net(dev);
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
-
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ip6_tnl_stats;
- ip6_tnl_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ip6_tnl_stats->syncp);
- }
return 0;
}
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 2d19272b8cee..b7c0f827140b 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -278,7 +278,6 @@ static void vti6_dev_uninit(struct net_device *dev)
RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
else
vti6_tnl_unlink(ip6n, t);
- ip6_tnl_dst_reset(t);
dev_put(dev);
}
@@ -288,11 +287,8 @@ static int vti6_rcv(struct sk_buff *skb)
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
rcu_read_lock();
-
if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
&ipv6h->daddr)) != NULL) {
- struct pcpu_sw_netstats *tstats;
-
if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) {
rcu_read_unlock();
goto discard;
@@ -309,27 +305,58 @@ static int vti6_rcv(struct sk_buff *skb)
goto discard;
}
- tstats = this_cpu_ptr(t->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
-
- skb->mark = 0;
- secpath_reset(skb);
- skb->dev = t->dev;
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
+ skb->mark = be32_to_cpu(t->parms.i_key);
rcu_read_unlock();
- return 0;
+
+ return xfrm6_rcv(skb);
}
rcu_read_unlock();
- return 1;
-
+ return -EINVAL;
discard:
kfree_skb(skb);
return 0;
}
+static int vti6_rcv_cb(struct sk_buff *skb, int err)
+{
+ unsigned short family;
+ struct net_device *dev;
+ struct pcpu_sw_netstats *tstats;
+ struct xfrm_state *x;
+ struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
+
+ if (!t)
+ return 1;
+
+ dev = t->dev;
+
+ if (err) {
+ dev->stats.rx_errors++;
+ dev->stats.rx_dropped++;
+
+ return 0;
+ }
+
+ x = xfrm_input_state(skb);
+ family = x->inner_mode->afinfo->family;
+
+ if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+ return -EPERM;
+
+ skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev)));
+ skb->dev = dev;
+
+ tstats = this_cpu_ptr(dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
+
+ return 0;
+}
+
/**
* vti6_addr_conflict - compare packet addresses to tunnel's own
* @t: the outgoing tunnel device
@@ -349,44 +376,56 @@ vti6_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
}
+static bool vti6_state_check(const struct xfrm_state *x,
+ const struct in6_addr *dst,
+ const struct in6_addr *src)
+{
+ xfrm_address_t *daddr = (xfrm_address_t *)dst;
+ xfrm_address_t *saddr = (xfrm_address_t *)src;
+
+ /* if there is no transform then this tunnel is not functional.
+ * Or if the xfrm is not mode tunnel.
+ */
+ if (!x || x->props.mode != XFRM_MODE_TUNNEL ||
+ x->props.family != AF_INET6)
+ return false;
+
+ if (ipv6_addr_any(dst))
+ return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET6);
+
+ if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET6))
+ return false;
+
+ return true;
+}
+
/**
* vti6_xmit - send a packet
* @skb: the outgoing socket buffer
* @dev: the outgoing tunnel device
+ * @fl: the flow informations for the xfrm_lookup
**/
-static int vti6_xmit(struct sk_buff *skb, struct net_device *dev)
+static int
+vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
{
- struct net *net = dev_net(dev);
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
- struct dst_entry *dst = NULL, *ndst = NULL;
- struct flowi6 fl6;
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev;
int err = -1;
- if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
- !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h))
- return err;
-
- dst = ip6_tnl_dst_check(t);
- if (!dst) {
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-
- ndst = ip6_route_output(net, NULL, &fl6);
+ if (!dst)
+ goto tx_err_link_failure;
- if (ndst->error)
- goto tx_err_link_failure;
- ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(&fl6), NULL, 0);
- if (IS_ERR(ndst)) {
- err = PTR_ERR(ndst);
- ndst = NULL;
- goto tx_err_link_failure;
- }
- dst = ndst;
+ dst_hold(dst);
+ dst = xfrm_lookup(t->net, dst, fl, NULL, 0);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ dst = NULL;
+ goto tx_err_link_failure;
}
- if (!dst->xfrm || dst->xfrm->props.mode != XFRM_MODE_TUNNEL)
+ if (!vti6_state_check(dst->xfrm, &t->parms.raddr, &t->parms.laddr))
goto tx_err_link_failure;
tdev = dst->dev;
@@ -398,14 +437,21 @@ static int vti6_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_err_dst_release;
}
+ skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
+ skb_dst_set(skb, dst);
+ skb->dev = skb_dst(skb)->dev;
- skb_dst_drop(skb);
- skb_dst_set_noref(skb, dst);
+ err = dst_output(skb);
+ if (net_xmit_eval(err) == 0) {
+ struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
- ip6tunnel_xmit(skb, dev);
- if (ndst) {
- dev->mtu = dst_mtu(ndst);
- ip6_tnl_dst_store(t, ndst);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->tx_bytes += skb->len;
+ tstats->tx_packets++;
+ u64_stats_update_end(&tstats->syncp);
+ } else {
+ stats->tx_errors++;
+ stats->tx_aborted_errors++;
}
return 0;
@@ -413,7 +459,7 @@ tx_err_link_failure:
stats->tx_carrier_errors++;
dst_link_failure(skb);
tx_err_dst_release:
- dst_release(ndst);
+ dst_release(dst);
return err;
}
@@ -422,16 +468,33 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
+ struct ipv6hdr *ipv6h;
+ struct flowi fl;
int ret;
+ memset(&fl, 0, sizeof(fl));
+ skb->mark = be32_to_cpu(t->parms.o_key);
+
switch (skb->protocol) {
case htons(ETH_P_IPV6):
- ret = vti6_xmit(skb, dev);
+ ipv6h = ipv6_hdr(skb);
+
+ if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
+ !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h))
+ goto tx_err;
+
+ xfrm_decode_session(skb, &fl, AF_INET6);
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ break;
+ case htons(ETH_P_IP):
+ xfrm_decode_session(skb, &fl, AF_INET);
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
break;
default:
goto tx_err;
}
+ ret = vti6_xmit(skb, dev, &fl);
if (ret < 0)
goto tx_err;
@@ -444,24 +507,66 @@ tx_err:
return NETDEV_TX_OK;
}
+static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ __be32 spi;
+ struct xfrm_state *x;
+ struct ip6_tnl *t;
+ struct ip_esp_hdr *esph;
+ struct ip_auth_hdr *ah;
+ struct ip_comp_hdr *ipch;
+ struct net *net = dev_net(skb->dev);
+ const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+ int protocol = iph->nexthdr;
+
+ t = vti6_tnl_lookup(dev_net(skb->dev), &iph->daddr, &iph->saddr);
+ if (!t)
+ return -1;
+
+ switch (protocol) {
+ case IPPROTO_ESP:
+ esph = (struct ip_esp_hdr *)(skb->data + offset);
+ spi = esph->spi;
+ break;
+ case IPPROTO_AH:
+ ah = (struct ip_auth_hdr *)(skb->data + offset);
+ spi = ah->spi;
+ break;
+ case IPPROTO_COMP:
+ ipch = (struct ip_comp_hdr *)(skb->data + offset);
+ spi = htonl(ntohs(ipch->cpi));
+ break;
+ default:
+ return 0;
+ }
+
+ if (type != ICMPV6_PKT_TOOBIG &&
+ type != NDISC_REDIRECT)
+ return 0;
+
+ x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+ spi, protocol, AF_INET6);
+ if (!x)
+ return 0;
+
+ if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
+ else
+ ip6_update_pmtu(skb, net, info, 0, 0);
+ xfrm_state_put(x);
+
+ return 0;
+}
+
static void vti6_link_config(struct ip6_tnl *t)
{
- struct dst_entry *dst;
struct net_device *dev = t->dev;
struct __ip6_tnl_parm *p = &t->parms;
- struct flowi6 *fl6 = &t->fl.u.ip6;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
- /* Set up flowi template */
- fl6->saddr = p->laddr;
- fl6->daddr = p->raddr;
- fl6->flowi6_oif = p->link;
- fl6->flowi6_mark = be32_to_cpu(p->i_key);
- fl6->flowi6_proto = p->proto;
- fl6->flowlabel = 0;
-
p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV |
IP6_TNL_F_CAP_PER_PACKET);
p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
@@ -472,28 +577,6 @@ static void vti6_link_config(struct ip6_tnl *t)
dev->flags &= ~IFF_POINTOPOINT;
dev->iflink = p->link;
-
- if (p->flags & IP6_TNL_F_CAP_XMIT) {
-
- dst = ip6_route_output(dev_net(dev), NULL, fl6);
- if (dst->error)
- return;
-
- dst = xfrm_lookup(dev_net(dev), dst, flowi6_to_flowi(fl6),
- NULL, 0);
- if (IS_ERR(dst))
- return;
-
- if (dst->dev) {
- dev->hard_header_len = dst->dev->hard_header_len;
-
- dev->mtu = dst_mtu(dst);
-
- if (dev->mtu < IPV6_MIN_MTU)
- dev->mtu = IPV6_MIN_MTU;
- }
- dst_release(dst);
- }
}
/**
@@ -720,7 +803,6 @@ static void vti6_dev_setup(struct net_device *dev)
t = netdev_priv(dev);
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
}
@@ -731,18 +813,12 @@ static void vti6_dev_setup(struct net_device *dev)
static inline int vti6_dev_init_gen(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- int i;
t->dev = dev;
t->net = dev_net(dev);
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *stats;
- stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&stats->syncp);
- }
return 0;
}
@@ -914,11 +990,6 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = {
.fill_info = vti6_fill_info,
};
-static struct xfrm_tunnel_notifier vti6_handler __read_mostly = {
- .handler = vti6_rcv,
- .priority = 1,
-};
-
static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
{
int h;
@@ -990,6 +1061,27 @@ static struct pernet_operations vti6_net_ops = {
.size = sizeof(struct vti6_net),
};
+static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
+static struct xfrm6_protocol vti_ah6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
+static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
/**
* vti6_tunnel_init - register protocol and reserve needed resources
*
@@ -1003,11 +1095,33 @@ static int __init vti6_tunnel_init(void)
if (err < 0)
goto out_pernet;
- err = xfrm6_mode_tunnel_input_register(&vti6_handler);
+ err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP);
if (err < 0) {
- pr_err("%s: can't register vti6\n", __func__);
+ unregister_pernet_device(&vti6_net_ops);
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
goto out;
}
+
+ err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH);
+ if (err < 0) {
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+ unregister_pernet_device(&vti6_net_ops);
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
+ goto out;
+ }
+
+ err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP);
+ if (err < 0) {
+ xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+ unregister_pernet_device(&vti6_net_ops);
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
+ goto out;
+ }
+
err = rtnl_link_register(&vti6_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -1015,7 +1129,9 @@ static int __init vti6_tunnel_init(void)
return 0;
rtnl_link_failed:
- xfrm6_mode_tunnel_input_deregister(&vti6_handler);
+ xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP);
+ xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
out:
unregister_pernet_device(&vti6_net_ops);
out_pernet:
@@ -1028,8 +1144,12 @@ out_pernet:
static void __exit vti6_tunnel_cleanup(void)
{
rtnl_link_unregister(&vti6_link_ops);
- if (xfrm6_mode_tunnel_input_deregister(&vti6_handler))
- pr_info("%s: can't deregister vti6\n", __func__);
+ if (xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP))
+ pr_info("%s: can't deregister protocol\n", __func__);
+ if (xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH))
+ pr_info("%s: can't deregister protocol\n", __func__);
+ if (xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP))
+ pr_info("%s: can't deregister protocol\n", __func__);
unregister_pernet_device(&vti6_net_ops);
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 0eb4038a4d63..8737400af0a0 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2349,13 +2349,14 @@ int ip6mr_get_route(struct net *net,
}
static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- u32 portid, u32 seq, struct mfc6_cache *c, int cmd)
+ u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
+ int flags)
{
struct nlmsghdr *nlh;
struct rtmsg *rtm;
int err;
- nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI);
+ nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -2423,7 +2424,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
if (skb == NULL)
goto errout;
- err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd);
+ err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
if (err < 0)
goto errout;
@@ -2462,7 +2463,8 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
if (ip6mr_fill_mroute(mrt, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE) < 0)
+ mfc, RTM_NEWROUTE,
+ NLM_F_MULTI) < 0)
goto done;
next_entry:
e++;
@@ -2476,7 +2478,8 @@ next_entry:
if (ip6mr_fill_mroute(mrt, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE) < 0) {
+ mfc, RTM_NEWROUTE,
+ NLM_F_MULTI) < 0) {
spin_unlock_bh(&mfc_unres_lock);
goto done;
}
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index da9becb42e81..d1c793cffcb5 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -53,7 +53,7 @@
#include <linux/icmpv6.h>
#include <linux/mutex.h>
-static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
@@ -65,19 +65,21 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
spi = htonl(ntohs(ipcomph->cpi));
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
spi, IPPROTO_COMP, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
@@ -174,6 +176,11 @@ out:
return err;
}
+static int ipcomp6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type ipcomp6_type =
{
.description = "IPCOMP6",
@@ -186,11 +193,12 @@ static const struct xfrm_type ipcomp6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol ipcomp6_protocol =
+static struct xfrm6_protocol ipcomp6_protocol =
{
.handler = xfrm6_rcv,
+ .cb_handler = ipcomp6_rcv_cb,
.err_handler = ipcomp6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init ipcomp6_init(void)
@@ -199,7 +207,7 @@ static int __init ipcomp6_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
+ if (xfrm6_protocol_register(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ipcomp6_type, AF_INET6);
return -EAGAIN;
@@ -209,7 +217,7 @@ static int __init ipcomp6_init(void)
static void __exit ipcomp6_fini(void)
{
- if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0)
+ if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 0a00f449de5e..edb58aff4ae7 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -722,7 +722,7 @@ done:
case IPV6_MTU_DISCOVER:
if (optlen < sizeof(int))
goto e_inval;
- if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_INTERFACE)
+ if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
goto e_inval;
np->pmtudisc = val;
retv = 0;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index e1e47350784b..08b367c6b9cf 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1620,11 +1620,12 @@ static void mld_sendpack(struct sk_buff *skb)
dst_output);
out:
if (!err) {
- ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT);
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
- IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
- } else
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
+ } else {
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ }
rcu_read_unlock();
return;
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 35750df744dc..4bff1f297e39 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -50,6 +50,11 @@ config NFT_CHAIN_NAT_IPV6
packet transformations such as the source, destination address and
source and destination ports.
+config NFT_REJECT_IPV6
+ depends on NF_TABLES_IPV6
+ default NFT_REJECT
+ tristate
+
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
depends on INET && IPV6
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index d1b4928f34f7..70d3dd66f2cd 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
+obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 710238f58aa9..e080fbbbc0e5 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1241,8 +1241,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
xt_free_table_info(oldinfo);
if (copy_to_user(counters_ptr, counters,
- sizeof(struct xt_counters) * num_counters) != 0)
- ret = -EFAULT;
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
new file mode 100644
index 000000000000..0bc19fa87821
--- /dev/null
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_reject.h>
+#include <net/netfilter/ipv6/nf_reject.h>
+
+void nft_reject_ipv6_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+ struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ nf_send_unreach6(net, pkt->skb, priv->icmp_code,
+ pkt->ops->hooknum);
+ break;
+ case NFT_REJECT_TCP_RST:
+ nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+ break;
+ }
+
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+EXPORT_SYMBOL_GPL(nft_reject_ipv6_eval);
+
+static struct nft_expr_type nft_reject_ipv6_type;
+static const struct nft_expr_ops nft_reject_ipv6_ops = {
+ .type = &nft_reject_ipv6_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+ .eval = nft_reject_ipv6_eval,
+ .init = nft_reject_init,
+ .dump = nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_ipv6_type __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .name = "reject",
+ .ops = &nft_reject_ipv6_ops,
+ .policy = nft_reject_policy,
+ .maxattr = NFTA_REJECT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_reject_ipv6_module_init(void)
+{
+ return nft_register_expr(&nft_reject_ipv6_type);
+}
+
+static void __exit nft_reject_ipv6_module_exit(void)
+{
+ nft_unregister_expr(&nft_reject_ipv6_type);
+}
+
+module_init(nft_reject_ipv6_module_init);
+module_exit(nft_reject_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "reject");
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 827f795209cf..6313abd53c9d 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -6,24 +6,24 @@
#include <net/ipv6.h>
#include <net/ip6_fib.h>
#include <net/addrconf.h>
+#include <net/secure_seq.h>
void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
{
static atomic_t ipv6_fragmentation_id;
+ struct in6_addr addr;
int old, new;
#if IS_ENABLED(CONFIG_IPV6)
- if (rt && !(rt->dst.flags & DST_NOPEER)) {
- struct inet_peer *peer;
- struct net *net;
-
- net = dev_net(rt->dst.dev);
- peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
- if (peer) {
- fhdr->identification = htonl(inet_getid(peer, 0));
- inet_putpeer(peer);
- return;
- }
+ struct inet_peer *peer;
+ struct net *net;
+
+ net = dev_net(rt->dst.dev);
+ peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+ if (peer) {
+ fhdr->identification = htonl(inet_getid(peer, 0));
+ inet_putpeer(peer);
+ return;
}
#endif
do {
@@ -32,7 +32,10 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
if (!new)
new = 1;
} while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
- fhdr->identification = htonl(new);
+
+ addr = rt->rt6i_dst.addr;
+ addr.s6_addr32[0] ^= (__force __be32)new;
+ fhdr->identification = htonl(secure_ipv6_id(addr.s6_addr32));
}
EXPORT_SYMBOL(ipv6_select_ident);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index fb9beb78f00b..bda74291c3e0 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -135,6 +135,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
fl6.flowi6_proto = IPPROTO_ICMPV6;
fl6.saddr = np->saddr;
fl6.daddr = *daddr;
+ fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_icmp_type = user_icmph.icmp6_type;
fl6.fl6_icmp_code = user_icmph.icmp6_code;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
@@ -181,8 +182,8 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
MSG_DONTWAIT, np->dontfrag);
if (err) {
- ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev,
- ICMP6_MIB_OUTERRORS);
+ ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
+ ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
err = icmpv6_push_pending_frames(sk, &fl6,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 11dac21e6586..5015c50a5ba7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -149,7 +149,8 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
unsigned long prev, new;
p = peer->metrics;
- if (inet_metrics_new(peer))
+ if (inet_metrics_new(peer) ||
+ (old & DST_METRICS_FORCE_OVERWRITE))
memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
new = (unsigned long) p;
@@ -373,12 +374,6 @@ static bool rt6_check_expired(const struct rt6_info *rt)
return false;
}
-static bool rt6_need_strict(const struct in6_addr *daddr)
-{
- return ipv6_addr_type(daddr) &
- (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
-}
-
/* Multipath route selection:
* Hash based function using packet header and flowlabel.
* Adapted from fib_info_hashfn()
@@ -857,14 +852,15 @@ EXPORT_SYMBOL(rt6_lookup);
be destroyed.
*/
-static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
+static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
+ struct nlattr *mx, int mx_len)
{
int err;
struct fib6_table *table;
table = rt->rt6i_table;
write_lock_bh(&table->tb6_lock);
- err = fib6_add(&table->tb6_root, rt, info);
+ err = fib6_add(&table->tb6_root, rt, info, mx, mx_len);
write_unlock_bh(&table->tb6_lock);
return err;
@@ -875,7 +871,7 @@ int ip6_ins_rt(struct rt6_info *rt)
struct nl_info info = {
.nl_net = dev_net(rt->dst.dev),
};
- return __ip6_ins_rt(rt, &info);
+ return __ip6_ins_rt(rt, &info, NULL, 0);
}
static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
@@ -1513,7 +1509,7 @@ int ip6_route_add(struct fib6_config *cfg)
if (!table)
goto out;
- rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
+ rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
if (!rt) {
err = -ENOMEM;
@@ -1543,17 +1539,11 @@ int ip6_route_add(struct fib6_config *cfg)
ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
rt->rt6i_dst.plen = cfg->fc_dst_len;
- if (rt->rt6i_dst.plen == 128)
- rt->dst.flags |= DST_HOST;
-
- if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
- u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
- if (!metrics) {
- err = -ENOMEM;
- goto out;
- }
- dst_init_metrics(&rt->dst, metrics, 0);
+ if (rt->rt6i_dst.plen == 128) {
+ rt->dst.flags |= DST_HOST;
+ dst_metrics_set_force_overwrite(&rt->dst);
}
+
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
rt->rt6i_src.plen = cfg->fc_src_len;
@@ -1672,31 +1662,13 @@ int ip6_route_add(struct fib6_config *cfg)
rt->rt6i_flags = cfg->fc_flags;
install_route:
- if (cfg->fc_mx) {
- struct nlattr *nla;
- int remaining;
-
- nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
- int type = nla_type(nla);
-
- if (type) {
- if (type > RTAX_MAX) {
- err = -EINVAL;
- goto out;
- }
-
- dst_metric_set(&rt->dst, type, nla_get_u32(nla));
- }
- }
- }
-
rt->dst.dev = dev;
rt->rt6i_idev = idev;
rt->rt6i_table = table;
cfg->fc_nlinfo.nl_net = dev_net(dev);
- return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
+ return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len);
out:
if (dev)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3dfbcf1dcb1c..1693c8d885f0 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -475,6 +475,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
ipip6_tunnel_unlink(sitn, tunnel);
ipip6_tunnel_del_prl(tunnel, NULL);
}
+ ip_tunnel_dst_reset_all(tunnel);
dev_put(dev);
}
@@ -1082,6 +1083,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
t->parms.link = p->link;
ipip6_tunnel_bind_dev(t->dev);
}
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(t->dev);
}
@@ -1112,6 +1114,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
t->ip6rd.relay_prefix = relay_prefix;
t->ip6rd.prefixlen = ip6rd->prefixlen;
t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(t->dev);
return 0;
}
@@ -1271,6 +1274,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
break;
}
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(dev);
break;
@@ -1326,6 +1330,9 @@ static const struct net_device_ops ipip6_netdev_ops = {
static void ipip6_dev_free(struct net_device *dev)
{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+
+ free_percpu(tunnel->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
}
@@ -1356,7 +1363,6 @@ static void ipip6_tunnel_setup(struct net_device *dev)
static int ipip6_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- int i;
tunnel->dev = dev;
tunnel->net = dev_net(dev);
@@ -1365,14 +1371,14 @@ static int ipip6_tunnel_init(struct net_device *dev)
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
ipip6_tunnel_bind_dev(dev);
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ipip6_tunnel_stats;
- ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ipip6_tunnel_stats->syncp);
+ tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
+ if (!tunnel->dst_cache) {
+ free_percpu(dev->tstats);
+ return -ENOMEM;
}
return 0;
@@ -1384,7 +1390,6 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
struct iphdr *iph = &tunnel->parms.iph;
struct net *net = dev_net(dev);
struct sit_net *sitn = net_generic(net, sit_net_id);
- int i;
tunnel->dev = dev;
tunnel->net = dev_net(dev);
@@ -1395,14 +1400,14 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
iph->ihl = 5;
iph->ttl = 64;
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ipip6_fb_stats;
- ipip6_fb_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ipip6_fb_stats->syncp);
+ tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
+ if (!tunnel->dst_cache) {
+ free_percpu(dev->tstats);
+ return -ENOMEM;
}
dev_hold(dev);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 889079b2ea85..5ca56cee2dae 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -39,7 +39,7 @@
#include <linux/ipsec.h>
#include <linux/times.h>
#include <linux/slab.h>
-
+#include <linux/uaccess.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
@@ -65,8 +65,6 @@
#include <net/tcp_memcontrol.h>
#include <net/busy_poll.h>
-#include <asm/uaccess.h>
-
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -501,8 +499,10 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
int res;
res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0);
- if (!res)
+ if (!res) {
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ }
return res;
}
@@ -530,8 +530,8 @@ static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr);
}
-static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
- int optlen)
+static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
+ int optlen)
{
struct tcp_md5sig cmd;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
@@ -715,7 +715,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
.send_ack = tcp_v6_reqsk_send_ack,
.destructor = tcp_v6_reqsk_destructor,
.send_reset = tcp_v6_send_reset,
- .syn_ack_timeout = tcp_syn_ack_timeout,
+ .syn_ack_timeout = tcp_syn_ack_timeout,
};
#ifdef CONFIG_TCP_MD5SIG
@@ -726,7 +726,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
#endif
static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
- u32 tsval, u32 tsecr,
+ u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key, int rst, u8 tclass,
u32 label)
{
@@ -798,8 +798,10 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
fl6.flowi6_proto = IPPROTO_TCP;
- if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
+ if (rt6_need_strict(&fl6.daddr) || !oif)
fl6.flowi6_oif = inet6_iif(skb);
+ else
+ fl6.flowi6_oif = oif;
fl6.fl6_dport = t1->dest;
fl6.fl6_sport = t1->source;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
@@ -833,6 +835,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
int genhash;
struct sock *sk1 = NULL;
#endif
+ int oif;
if (th->rst)
return;
@@ -876,7 +879,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
(th->doff << 2);
- tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0, 0);
+ oif = sk ? sk->sk_bound_dev_if : 0;
+ tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
#ifdef CONFIG_TCP_MD5SIG
release_sk1:
@@ -888,11 +892,11 @@ release_sk1:
}
static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
- u32 win, u32 tsval, u32 tsecr,
+ u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key, u8 tclass,
u32 label)
{
- tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass,
+ tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass,
label);
}
@@ -904,7 +908,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp + tcptw->tw_ts_offset,
- tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
+ tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
tw->tw_tclass, (tw->tw_flowlabel << 12));
inet_twsk_put(tw);
@@ -914,7 +918,7 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1,
- req->rcv_wnd, tcp_time_stamp, req->ts_recent,
+ req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
0, 0);
}
@@ -1259,7 +1263,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
#ifdef CONFIG_TCP_MD5SIG
/* Copy over the MD5 key from the original socket */
- if ((key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr)) != NULL) {
+ key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
+ if (key != NULL) {
/* We're using one, so create a matching key
* on the newsk structure. If we fail to get
* memory, then we end up not copying the key
@@ -1303,9 +1308,8 @@ static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr, 0));
- if (skb->len <= 76) {
+ if (skb->len <= 76)
return __skb_checksum_complete(skb);
- }
return 0;
}
@@ -1335,7 +1339,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return tcp_v4_do_rcv(sk, skb);
#ifdef CONFIG_TCP_MD5SIG
- if (tcp_v6_inbound_md5_hash (sk, skb))
+ if (tcp_v6_inbound_md5_hash(sk, skb))
goto discard;
#endif
@@ -1602,7 +1606,8 @@ do_time_wait:
break;
case TCP_TW_RST:
goto no_tcp_socket;
- case TCP_TW_SUCCESS:;
+ case TCP_TW_SUCCESS:
+ ;
}
goto discard_it;
}
@@ -1647,7 +1652,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
.twsk_unique = tcp_twsk_unique,
- .twsk_destructor= tcp_twsk_destructor,
+ .twsk_destructor = tcp_twsk_destructor,
};
static const struct inet_connection_sock_af_ops ipv6_specific = {
@@ -1681,7 +1686,6 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
/*
* TCP over IPv4 via INET6 API
*/
-
static const struct inet_connection_sock_af_ops ipv6_mapped = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index e7359f9eaa8d..b261ee8b83fc 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -113,7 +113,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
fptr->nexthdr = nexthdr;
fptr->reserved = 0;
- ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
+ fptr->identification = skb_shinfo(skb)->ip6_frag_id;
/* Fragment the skb. ipv6 header and the remaining fields of the
* fragment header are updated in ipv6_gso_segment()
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index cb04f7a16b5e..901ef6f8addc 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -18,65 +18,6 @@
#include <net/ipv6.h>
#include <net/xfrm.h>
-/* Informational hook. The decap is still done here. */
-static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly;
-static DEFINE_MUTEX(xfrm6_mode_tunnel_input_mutex);
-
-int xfrm6_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler)
-{
- struct xfrm_tunnel_notifier __rcu **pprev;
- struct xfrm_tunnel_notifier *t;
- int ret = -EEXIST;
- int priority = handler->priority;
-
- mutex_lock(&xfrm6_mode_tunnel_input_mutex);
-
- for (pprev = &rcv_notify_handlers;
- (t = rcu_dereference_protected(*pprev,
- lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL;
- pprev = &t->next) {
- if (t->priority > priority)
- break;
- if (t->priority == priority)
- goto err;
-
- }
-
- handler->next = *pprev;
- rcu_assign_pointer(*pprev, handler);
-
- ret = 0;
-
-err:
- mutex_unlock(&xfrm6_mode_tunnel_input_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_register);
-
-int xfrm6_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler)
-{
- struct xfrm_tunnel_notifier __rcu **pprev;
- struct xfrm_tunnel_notifier *t;
- int ret = -ENOENT;
-
- mutex_lock(&xfrm6_mode_tunnel_input_mutex);
- for (pprev = &rcv_notify_handlers;
- (t = rcu_dereference_protected(*pprev,
- lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL;
- pprev = &t->next) {
- if (t == handler) {
- *pprev = handler->next;
- ret = 0;
- break;
- }
- }
- mutex_unlock(&xfrm6_mode_tunnel_input_mutex);
- synchronize_net();
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_deregister);
-
static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
{
const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
@@ -130,7 +71,6 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct xfrm_tunnel_notifier *handler;
int err = -EINVAL;
if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
@@ -138,9 +78,6 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto out;
- for_each_input_rcu(rcv_notify_handlers, handler)
- handler->handler(skb);
-
err = skb_unclone(skb, GFP_ATOMIC);
if (err)
goto out;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 5f8e128c512d..2a0bbda2c76a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -389,11 +389,17 @@ int __init xfrm6_init(void)
if (ret)
goto out_policy;
+ ret = xfrm6_protocol_init();
+ if (ret)
+ goto out_state;
+
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm6_net_ops);
#endif
out:
return ret;
+out_state:
+ xfrm6_state_fini();
out_policy:
xfrm6_policy_fini();
goto out;
@@ -404,6 +410,7 @@ void xfrm6_fini(void)
#ifdef CONFIG_SYSCTL
unregister_pernet_subsys(&xfrm6_net_ops);
#endif
+ xfrm6_protocol_fini();
xfrm6_policy_fini();
xfrm6_state_fini();
dst_entries_destroy(&xfrm6_dst_ops);
diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c
new file mode 100644
index 000000000000..6ab989c486f7
--- /dev/null
+++ b/net/ipv6/xfrm6_protocol.c
@@ -0,0 +1,270 @@
+/* xfrm6_protocol.c - Generic xfrm protocol multiplexer for ipv6.
+ *
+ * Copyright (C) 2013 secunet Security Networks AG
+ *
+ * Author:
+ * Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * Based on:
+ * net/ipv4/xfrm4_protocol.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/skbuff.h>
+#include <linux/icmpv6.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+
+static struct xfrm6_protocol __rcu *esp6_handlers __read_mostly;
+static struct xfrm6_protocol __rcu *ah6_handlers __read_mostly;
+static struct xfrm6_protocol __rcu *ipcomp6_handlers __read_mostly;
+static DEFINE_MUTEX(xfrm6_protocol_mutex);
+
+static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp6_handlers;
+ case IPPROTO_AH:
+ return &ah6_handlers;
+ case IPPROTO_COMP:
+ return &ipcomp6_handlers;
+ }
+
+ return NULL;
+}
+
+#define for_each_protocol_rcu(head, handler) \
+ for (handler = rcu_dereference(head); \
+ handler != NULL; \
+ handler = rcu_dereference(handler->next)) \
+
+int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(*proto_handlers(protocol), handler)
+ if ((ret = handler->cb_handler(skb, err)) <= 0)
+ return ret;
+
+ return 0;
+}
+EXPORT_SYMBOL(xfrm6_rcv_cb);
+
+static int xfrm6_esp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(esp6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(esp6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static int xfrm6_ah_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(ah6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(ah6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static int xfrm6_ipcomp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(ipcomp6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(ipcomp6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static const struct inet6_protocol esp6_protocol = {
+ .handler = xfrm6_esp_rcv,
+ .err_handler = xfrm6_esp_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static const struct inet6_protocol ah6_protocol = {
+ .handler = xfrm6_ah_rcv,
+ .err_handler = xfrm6_ah_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static const struct inet6_protocol ipcomp6_protocol = {
+ .handler = xfrm6_ipcomp_rcv,
+ .err_handler = xfrm6_ipcomp_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static struct xfrm_input_afinfo xfrm6_input_afinfo = {
+ .family = AF_INET6,
+ .owner = THIS_MODULE,
+ .callback = xfrm6_rcv_cb,
+};
+
+static inline const struct inet6_protocol *netproto(unsigned char protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp6_protocol;
+ case IPPROTO_AH:
+ return &ah6_protocol;
+ case IPPROTO_COMP:
+ return &ipcomp6_protocol;
+ }
+
+ return NULL;
+}
+
+int xfrm6_protocol_register(struct xfrm6_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm6_protocol __rcu **pprev;
+ struct xfrm6_protocol *t;
+ bool add_netproto = false;
+
+ int ret = -EEXIST;
+ int priority = handler->priority;
+
+ mutex_lock(&xfrm6_protocol_mutex);
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm6_protocol_mutex)))
+ add_netproto = true;
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm6_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t->priority < priority)
+ break;
+ if (t->priority == priority)
+ goto err;
+ }
+
+ handler->next = *pprev;
+ rcu_assign_pointer(*pprev, handler);
+
+ ret = 0;
+
+err:
+ mutex_unlock(&xfrm6_protocol_mutex);
+
+ if (add_netproto) {
+ if (inet6_add_protocol(netproto(protocol), protocol)) {
+ pr_err("%s: can't add protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm6_protocol_register);
+
+int xfrm6_protocol_deregister(struct xfrm6_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm6_protocol __rcu **pprev;
+ struct xfrm6_protocol *t;
+ int ret = -ENOENT;
+
+ mutex_lock(&xfrm6_protocol_mutex);
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm6_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t == handler) {
+ *pprev = handler->next;
+ ret = 0;
+ break;
+ }
+ }
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm6_protocol_mutex))) {
+ if (inet6_del_protocol(netproto(protocol), protocol) < 0) {
+ pr_err("%s: can't remove protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ mutex_unlock(&xfrm6_protocol_mutex);
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm6_protocol_deregister);
+
+int __init xfrm6_protocol_init(void)
+{
+ return xfrm_input_register_afinfo(&xfrm6_input_afinfo);
+}
+
+void xfrm6_protocol_fini(void)
+{
+ xfrm_input_unregister_afinfo(&xfrm6_input_afinfo);
+}