summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Makefile2
-rw-r--r--net/ipv6/addrconf.c414
-rw-r--r--net/ipv6/addrconf_core.c9
-rw-r--r--net/ipv6/addrlabel.c61
-rw-r--r--net/ipv6/af_inet6.c52
-rw-r--r--net/ipv6/ah6.c80
-rw-r--r--net/ipv6/anycast.c31
-rw-r--r--net/ipv6/datagram.c1
-rw-r--r--net/ipv6/esp6.c26
-rw-r--r--net/ipv6/icmp.c56
-rw-r--r--net/ipv6/inet6_connection_sock.c5
-rw-r--r--net/ipv6/ip6_checksum.c61
-rw-r--r--net/ipv6/ip6_fib.c156
-rw-r--r--net/ipv6/ip6_flowlabel.c7
-rw-r--r--net/ipv6/ip6_gre.c99
-rw-r--r--net/ipv6/ip6_offload.c8
-rw-r--r--net/ipv6/ip6_output.c97
-rw-r--r--net/ipv6/ip6_tunnel.c38
-rw-r--r--net/ipv6/ip6_vti.c334
-rw-r--r--net/ipv6/ip6mr.c19
-rw-r--r--net/ipv6/ipcomp6.c22
-rw-r--r--net/ipv6/ipv6_sockglue.c15
-rw-r--r--net/ipv6/mcast.c38
-rw-r--r--net/ipv6/ndisc.c65
-rw-r--r--net/ipv6/netfilter.c6
-rw-r--r--net/ipv6/netfilter/Kconfig31
-rw-r--r--net/ipv6/netfilter/Makefile5
-rw-r--r--net/ipv6/netfilter/ip6_tables.c6
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c1
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c1
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c14
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c51
-rw-r--r--net/ipv6/netfilter/nf_log_ipv6.c417
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c12
-rw-r--r--net/ipv6/output_core.c31
-rw-r--r--net/ipv6/ping.c12
-rw-r--r--net/ipv6/proc.c18
-rw-r--r--net/ipv6/raw.c25
-rw-r--r--net/ipv6/reassembly.c90
-rw-r--r--net/ipv6/route.c103
-rw-r--r--net/ipv6/sit.c55
-rw-r--r--net/ipv6/syncookies.c6
-rw-r--r--net/ipv6/sysctl_net_ipv6.c16
-rw-r--r--net/ipv6/tcp_ipv6.c319
-rw-r--r--net/ipv6/tcpv6_offload.c4
-rw-r--r--net/ipv6/udp.c175
-rw-r--r--net/ipv6/udp_offload.c5
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c63
-rw-r--r--net/ipv6/xfrm6_output.c30
-rw-r--r--net/ipv6/xfrm6_policy.c7
-rw-r--r--net/ipv6/xfrm6_protocol.c279
53 files changed, 2281 insertions, 1202 deletions
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 17bb830872db..2fe68364bb20 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -16,7 +16,7 @@ ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o
ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
- xfrm6_output.o
+ xfrm6_output.o xfrm6_protocol.o
ipv6-$(CONFIG_NETFILTER) += netfilter.o
ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
ipv6-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 344e972426df..3e118dfddd02 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -108,11 +108,12 @@ static inline u32 cstamp_delta(unsigned long cstamp)
}
#ifdef CONFIG_SYSCTL
-static void addrconf_sysctl_register(struct inet6_dev *idev);
+static int addrconf_sysctl_register(struct inet6_dev *idev);
static void addrconf_sysctl_unregister(struct inet6_dev *idev);
#else
-static inline void addrconf_sysctl_register(struct inet6_dev *idev)
+static inline int addrconf_sysctl_register(struct inet6_dev *idev)
{
+ return 0;
}
static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
@@ -133,10 +134,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
static DEFINE_SPINLOCK(addrconf_hash_lock);
-static void addrconf_verify(unsigned long);
+static void addrconf_verify(void);
+static void addrconf_verify_rtnl(void);
+static void addrconf_verify_work(struct work_struct *);
-static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
-static DEFINE_SPINLOCK(addrconf_verify_lock);
+static struct workqueue_struct *addrconf_wq;
+static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work);
static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
@@ -151,7 +154,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
u32 flags, u32 noflags);
static void addrconf_dad_start(struct inet6_ifaddr *ifp);
-static void addrconf_dad_timer(unsigned long data);
+static void addrconf_dad_work(struct work_struct *w);
static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
static void addrconf_dad_run(struct inet6_dev *idev);
static void addrconf_rs_timer(unsigned long data);
@@ -184,6 +187,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.max_desync_factor = MAX_DESYNC_FACTOR,
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
+ .accept_ra_from_local = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -220,6 +224,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.max_desync_factor = MAX_DESYNC_FACTOR,
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
+ .accept_ra_from_local = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -247,9 +252,9 @@ static void addrconf_del_rs_timer(struct inet6_dev *idev)
__in6_dev_put(idev);
}
-static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp)
+static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)
{
- if (del_timer(&ifp->dad_timer))
+ if (cancel_delayed_work(&ifp->dad_work))
__in6_ifa_put(ifp);
}
@@ -261,31 +266,26 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev,
mod_timer(&idev->rs_timer, jiffies + when);
}
-static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp,
- unsigned long when)
+static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
+ unsigned long delay)
{
- if (!timer_pending(&ifp->dad_timer))
+ if (!delayed_work_pending(&ifp->dad_work))
in6_ifa_hold(ifp);
- mod_timer(&ifp->dad_timer, jiffies + when);
+ mod_delayed_work(addrconf_wq, &ifp->dad_work, delay);
}
static int snmp6_alloc_dev(struct inet6_dev *idev)
{
int i;
- if (snmp_mib_init((void __percpu **)idev->stats.ipv6,
- sizeof(struct ipstats_mib),
- __alignof__(struct ipstats_mib)) < 0)
+ idev->stats.ipv6 = alloc_percpu(struct ipstats_mib);
+ if (!idev->stats.ipv6)
goto err_ip;
for_each_possible_cpu(i) {
struct ipstats_mib *addrconf_stats;
- addrconf_stats = per_cpu_ptr(idev->stats.ipv6[0], i);
- u64_stats_init(&addrconf_stats->syncp);
-#if SNMP_ARRAY_SZ == 2
- addrconf_stats = per_cpu_ptr(idev->stats.ipv6[1], i);
+ addrconf_stats = per_cpu_ptr(idev->stats.ipv6, i);
u64_stats_init(&addrconf_stats->syncp);
-#endif
}
@@ -303,7 +303,7 @@ static int snmp6_alloc_dev(struct inet6_dev *idev)
err_icmpmsg:
kfree(idev->stats.icmpv6dev);
err_icmp:
- snmp_mib_free((void __percpu **)idev->stats.ipv6);
+ free_percpu(idev->stats.ipv6);
err_ip:
return -ENOMEM;
}
@@ -311,16 +311,16 @@ err_ip:
static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
{
struct inet6_dev *ndev;
+ int err = -ENOMEM;
ASSERT_RTNL();
if (dev->mtu < IPV6_MIN_MTU)
- return NULL;
+ return ERR_PTR(-EINVAL);
ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL);
-
if (ndev == NULL)
- return NULL;
+ return ERR_PTR(err);
rwlock_init(&ndev->lock);
ndev->dev = dev;
@@ -333,7 +333,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
if (ndev->nd_parms == NULL) {
kfree(ndev);
- return NULL;
+ return ERR_PTR(err);
}
if (ndev->cnf.forwarding)
dev_disable_lro(dev);
@@ -347,17 +347,14 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
neigh_parms_release(&nd_tbl, ndev->nd_parms);
dev_put(dev);
kfree(ndev);
- return NULL;
+ return ERR_PTR(err);
}
if (snmp6_register_dev(ndev) < 0) {
ADBG(KERN_WARNING
"%s: cannot create /proc/net/dev_snmp6/%s\n",
__func__, dev->name);
- neigh_parms_release(&nd_tbl, ndev->nd_parms);
- ndev->dead = 1;
- in6_dev_finish_destroy(ndev);
- return NULL;
+ goto err_release;
}
/* One reference from device. We must do this before
@@ -395,7 +392,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ipv6_mc_init_dev(ndev);
ndev->tstamp = jiffies;
- addrconf_sysctl_register(ndev);
+ err = addrconf_sysctl_register(ndev);
+ if (err) {
+ ipv6_mc_destroy_dev(ndev);
+ del_timer(&ndev->regen_timer);
+ goto err_release;
+ }
/* protected by rtnl_lock */
rcu_assign_pointer(dev->ip6_ptr, ndev);
@@ -410,6 +412,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
return ndev;
+
+err_release:
+ neigh_parms_release(&nd_tbl, ndev->nd_parms);
+ ndev->dead = 1;
+ in6_dev_finish_destroy(ndev);
+ return ERR_PTR(err);
}
static struct inet6_dev *ipv6_find_idev(struct net_device *dev)
@@ -421,7 +429,7 @@ static struct inet6_dev *ipv6_find_idev(struct net_device *dev)
idev = __in6_dev_get(dev);
if (!idev) {
idev = ipv6_add_dev(dev);
- if (!idev)
+ if (IS_ERR(idev))
return NULL;
}
@@ -751,8 +759,9 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
in6_dev_put(ifp->idev);
- if (del_timer(&ifp->dad_timer))
- pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
+ if (cancel_delayed_work(&ifp->dad_work))
+ pr_notice("delayed DAD work was pending while freeing ifa=%p\n",
+ ifp);
if (ifp->state != INET6_IFADDR_STATE_DEAD) {
pr_warn("Freeing alive inet6 address %p\n", ifp);
@@ -849,8 +858,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
spin_lock_init(&ifa->lock);
spin_lock_init(&ifa->state_lock);
- setup_timer(&ifa->dad_timer, addrconf_dad_timer,
- (unsigned long)ifa);
+ INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work);
INIT_HLIST_NODE(&ifa->addr_lst);
ifa->scope = scope;
ifa->prefix_len = pfxlen;
@@ -990,6 +998,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP;
unsigned long expires;
+ ASSERT_RTNL();
+
spin_lock_bh(&ifp->state_lock);
state = ifp->state;
ifp->state = INET6_IFADDR_STATE_DEAD;
@@ -1021,7 +1031,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
write_unlock_bh(&ifp->idev->lock);
- addrconf_del_dad_timer(ifp);
+ addrconf_del_dad_work(ifp);
ipv6_ifa_notify(RTM_DELADDR, ifp);
@@ -1604,7 +1614,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
{
if (ifp->flags&IFA_F_PERMANENT) {
spin_lock_bh(&ifp->lock);
- addrconf_del_dad_timer(ifp);
+ addrconf_del_dad_work(ifp);
ifp->flags |= IFA_F_TENTATIVE;
if (dad_failed)
ifp->flags |= IFA_F_DADFAILED;
@@ -1625,20 +1635,21 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
spin_unlock_bh(&ifp->lock);
}
ipv6_del_addr(ifp);
- } else
+ } else {
ipv6_del_addr(ifp);
+ }
}
static int addrconf_dad_end(struct inet6_ifaddr *ifp)
{
int err = -ENOENT;
- spin_lock(&ifp->state_lock);
+ spin_lock_bh(&ifp->state_lock);
if (ifp->state == INET6_IFADDR_STATE_DAD) {
ifp->state = INET6_IFADDR_STATE_POSTDAD;
err = 0;
}
- spin_unlock(&ifp->state_lock);
+ spin_unlock_bh(&ifp->state_lock);
return err;
}
@@ -1671,11 +1682,16 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
}
}
- addrconf_dad_stop(ifp, 1);
-}
+ spin_lock_bh(&ifp->state_lock);
+ /* transition from _POSTDAD to _ERRDAD */
+ ifp->state = INET6_IFADDR_STATE_ERRDAD;
+ spin_unlock_bh(&ifp->state_lock);
-/* Join to solicited addr multicast group. */
+ addrconf_mod_dad_work(ifp, 0);
+}
+/* Join to solicited addr multicast group.
+ * caller must hold RTNL */
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
{
struct in6_addr maddr;
@@ -1687,6 +1703,7 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
ipv6_dev_mc_inc(dev, &maddr);
}
+/* caller must hold RTNL */
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct in6_addr maddr;
@@ -1698,9 +1715,11 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
__ipv6_dev_mc_dec(idev, &maddr);
}
+/* caller must hold RTNL */
static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
{
struct in6_addr addr;
+
if (ifp->prefix_len >= 127) /* RFC 6164 */
return;
ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -1709,9 +1728,11 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
ipv6_dev_ac_inc(ifp->idev->dev, &addr);
}
+/* caller must hold RTNL */
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
{
struct in6_addr addr;
+
if (ifp->prefix_len >= 127) /* RFC 6164 */
return;
ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -2271,11 +2292,13 @@ ok:
return;
}
- ifp->flags |= IFA_F_MANAGETEMPADDR;
update_lft = 0;
create = 1;
+ spin_lock_bh(&ifp->lock);
+ ifp->flags |= IFA_F_MANAGETEMPADDR;
ifp->cstamp = jiffies;
ifp->tokenized = tokenized;
+ spin_unlock_bh(&ifp->lock);
addrconf_dad_start(ifp);
}
@@ -2326,7 +2349,7 @@ ok:
create, now);
in6_ifa_put(ifp);
- addrconf_verify(0);
+ addrconf_verify();
}
}
inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
@@ -2475,15 +2498,15 @@ static int inet6_addr_add(struct net *net, int ifindex,
manage_tempaddrs(idev, ifp, valid_lft, prefered_lft,
true, jiffies);
in6_ifa_put(ifp);
- addrconf_verify(0);
+ addrconf_verify_rtnl();
return 0;
}
return PTR_ERR(ifp);
}
-static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *pfx,
- unsigned int plen)
+static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
+ const struct in6_addr *pfx, unsigned int plen)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
@@ -2506,7 +2529,12 @@ static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *p
in6_ifa_hold(ifp);
read_unlock_bh(&idev->lock);
+ if (!(ifp->flags & IFA_F_TEMPORARY) &&
+ (ifa_flags & IFA_F_MANAGETEMPADDR))
+ manage_tempaddrs(idev, ifp, 0, 0, false,
+ jiffies);
ipv6_del_addr(ifp);
+ addrconf_verify_rtnl();
return 0;
}
}
@@ -2546,7 +2574,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
return -EFAULT;
rtnl_lock();
- err = inet6_addr_del(net, ireq.ifr6_ifindex, &ireq.ifr6_addr,
+ err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr,
ireq.ifr6_prefixlen);
rtnl_unlock();
return err;
@@ -2706,9 +2734,25 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr
}
}
+static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
+{
+ if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) {
+ struct in6_addr addr;
+
+ ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
+ /* addrconf_add_linklocal also adds a prefix_route and we
+ * only need to care about prefix routes if ipv6_generate_eui64
+ * couldn't generate one.
+ */
+ if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0)
+ addrconf_add_linklocal(idev, &addr);
+ else if (prefix_route)
+ addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
+ }
+}
+
static void addrconf_dev_config(struct net_device *dev)
{
- struct in6_addr addr;
struct inet6_dev *idev;
ASSERT_RTNL();
@@ -2729,11 +2773,7 @@ static void addrconf_dev_config(struct net_device *dev)
if (IS_ERR(idev))
return;
- memset(&addr, 0, sizeof(struct in6_addr));
- addr.s6_addr32[0] = htonl(0xFE800000);
-
- if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0)
- addrconf_add_linklocal(idev, &addr);
+ addrconf_addr_gen(idev, false);
}
#if IS_ENABLED(CONFIG_IPV6_SIT)
@@ -2755,11 +2795,7 @@ static void addrconf_sit_config(struct net_device *dev)
}
if (dev->priv_flags & IFF_ISATAP) {
- struct in6_addr addr;
-
- ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
- if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
- addrconf_add_linklocal(idev, &addr);
+ addrconf_addr_gen(idev, false);
return;
}
@@ -2774,7 +2810,6 @@ static void addrconf_sit_config(struct net_device *dev)
static void addrconf_gre_config(struct net_device *dev)
{
struct inet6_dev *idev;
- struct in6_addr addr;
ASSERT_RTNL();
@@ -2783,26 +2818,10 @@ static void addrconf_gre_config(struct net_device *dev)
return;
}
- ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
- if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
- addrconf_add_linklocal(idev, &addr);
- else
- addrconf_prefix_route(&addr, 64, dev, 0, 0);
+ addrconf_addr_gen(idev, true);
}
#endif
-static inline int
-ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
-{
- struct in6_addr lladdr;
-
- if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) {
- addrconf_add_linklocal(idev, &lladdr);
- return 0;
- }
- return -1;
-}
-
static int addrconf_notify(struct notifier_block *this, unsigned long event,
void *ptr)
{
@@ -2815,8 +2834,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
case NETDEV_REGISTER:
if (!idev && dev->mtu >= IPV6_MIN_MTU) {
idev = ipv6_add_dev(dev);
- if (!idev)
- return notifier_from_errno(-ENOMEM);
+ if (IS_ERR(idev))
+ return notifier_from_errno(PTR_ERR(idev));
}
break;
@@ -2836,7 +2855,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (!idev && dev->mtu >= IPV6_MIN_MTU)
idev = ipv6_add_dev(dev);
- if (idev) {
+ if (!IS_ERR_OR_NULL(idev)) {
idev->if_flags |= IF_READY;
run_pending = 1;
}
@@ -2879,7 +2898,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
break;
}
- if (idev) {
+ if (!IS_ERR_OR_NULL(idev)) {
if (run_pending)
addrconf_dad_run(idev);
@@ -2914,7 +2933,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (!idev && dev->mtu >= IPV6_MIN_MTU) {
idev = ipv6_add_dev(dev);
- if (idev)
+ if (!IS_ERR(idev))
break;
}
@@ -2935,10 +2954,14 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (idev) {
snmp6_unregister_dev(idev);
addrconf_sysctl_unregister(idev);
- addrconf_sysctl_register(idev);
- err = snmp6_register_dev(idev);
+ err = addrconf_sysctl_register(idev);
if (err)
return notifier_from_errno(err);
+ err = snmp6_register_dev(idev);
+ if (err) {
+ addrconf_sysctl_unregister(idev);
+ return notifier_from_errno(err);
+ }
}
break;
@@ -3011,7 +3034,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
hlist_for_each_entry_rcu(ifa, h, addr_lst) {
if (ifa->idev == idev) {
hlist_del_init_rcu(&ifa->addr_lst);
- addrconf_del_dad_timer(ifa);
+ addrconf_del_dad_work(ifa);
goto restart;
}
}
@@ -3049,7 +3072,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
while (!list_empty(&idev->addr_list)) {
ifa = list_first_entry(&idev->addr_list,
struct inet6_ifaddr, if_list);
- addrconf_del_dad_timer(ifa);
+ addrconf_del_dad_work(ifa);
list_del(&ifa->if_list);
@@ -3071,11 +3094,13 @@ static int addrconf_ifdown(struct net_device *dev, int how)
write_unlock_bh(&idev->lock);
- /* Step 5: Discard multicast list */
- if (how)
+ /* Step 5: Discard anycast and multicast list */
+ if (how) {
+ ipv6_ac_destroy_dev(idev);
ipv6_mc_destroy_dev(idev);
- else
+ } else {
ipv6_mc_down(idev);
+ }
idev->tstamp = jiffies;
@@ -3148,10 +3173,10 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1);
ifp->dad_probes = idev->cnf.dad_transmits;
- addrconf_mod_dad_timer(ifp, rand_num);
+ addrconf_mod_dad_work(ifp, rand_num);
}
-static void addrconf_dad_start(struct inet6_ifaddr *ifp)
+static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
struct net_device *dev = idev->dev;
@@ -3203,25 +3228,68 @@ out:
read_unlock_bh(&idev->lock);
}
-static void addrconf_dad_timer(unsigned long data)
+static void addrconf_dad_start(struct inet6_ifaddr *ifp)
+{
+ bool begin_dad = false;
+
+ spin_lock_bh(&ifp->state_lock);
+ if (ifp->state != INET6_IFADDR_STATE_DEAD) {
+ ifp->state = INET6_IFADDR_STATE_PREDAD;
+ begin_dad = true;
+ }
+ spin_unlock_bh(&ifp->state_lock);
+
+ if (begin_dad)
+ addrconf_mod_dad_work(ifp, 0);
+}
+
+static void addrconf_dad_work(struct work_struct *w)
{
- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+ struct inet6_ifaddr *ifp = container_of(to_delayed_work(w),
+ struct inet6_ifaddr,
+ dad_work);
struct inet6_dev *idev = ifp->idev;
struct in6_addr mcaddr;
+ enum {
+ DAD_PROCESS,
+ DAD_BEGIN,
+ DAD_ABORT,
+ } action = DAD_PROCESS;
+
+ rtnl_lock();
+
+ spin_lock_bh(&ifp->state_lock);
+ if (ifp->state == INET6_IFADDR_STATE_PREDAD) {
+ action = DAD_BEGIN;
+ ifp->state = INET6_IFADDR_STATE_DAD;
+ } else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) {
+ action = DAD_ABORT;
+ ifp->state = INET6_IFADDR_STATE_POSTDAD;
+ }
+ spin_unlock_bh(&ifp->state_lock);
+
+ if (action == DAD_BEGIN) {
+ addrconf_dad_begin(ifp);
+ goto out;
+ } else if (action == DAD_ABORT) {
+ addrconf_dad_stop(ifp, 1);
+ goto out;
+ }
+
if (!ifp->dad_probes && addrconf_dad_end(ifp))
goto out;
- write_lock(&idev->lock);
+ write_lock_bh(&idev->lock);
if (idev->dead || !(idev->if_flags & IF_READY)) {
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
goto out;
}
spin_lock(&ifp->lock);
if (ifp->state == INET6_IFADDR_STATE_DEAD) {
spin_unlock(&ifp->lock);
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
goto out;
}
@@ -3232,7 +3300,7 @@ static void addrconf_dad_timer(unsigned long data)
ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
spin_unlock(&ifp->lock);
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
addrconf_dad_completed(ifp);
@@ -3240,16 +3308,17 @@ static void addrconf_dad_timer(unsigned long data)
}
ifp->dad_probes--;
- addrconf_mod_dad_timer(ifp,
- NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME));
+ addrconf_mod_dad_work(ifp,
+ NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME));
spin_unlock(&ifp->lock);
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
/* send a neighbour solicitation for our addr */
addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
out:
in6_ifa_put(ifp);
+ rtnl_unlock();
}
/* ifp->idev must be at least read locked */
@@ -3276,7 +3345,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
struct in6_addr lladdr;
bool send_rs, send_mld;
- addrconf_del_dad_timer(ifp);
+ addrconf_del_dad_work(ifp);
/*
* Configure the address for reception. Now it is valid.
@@ -3517,23 +3586,23 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
* Periodic address status verification
*/
-static void addrconf_verify(unsigned long foo)
+static void addrconf_verify_rtnl(void)
{
unsigned long now, next, next_sec, next_sched;
struct inet6_ifaddr *ifp;
int i;
+ ASSERT_RTNL();
+
rcu_read_lock_bh();
- spin_lock(&addrconf_verify_lock);
now = jiffies;
next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
- del_timer(&addr_chk_timer);
+ cancel_delayed_work(&addr_chk_work);
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
restart:
- hlist_for_each_entry_rcu_bh(ifp,
- &inet6_addr_lst[i], addr_lst) {
+ hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) {
unsigned long age;
/* When setting preferred_lft to a value not zero or
@@ -3628,13 +3697,22 @@ restart:
ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
now, next, next_sec, next_sched);
-
- addr_chk_timer.expires = next_sched;
- add_timer(&addr_chk_timer);
- spin_unlock(&addrconf_verify_lock);
+ mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
rcu_read_unlock_bh();
}
+static void addrconf_verify_work(struct work_struct *w)
+{
+ rtnl_lock();
+ addrconf_verify_rtnl();
+ rtnl_unlock();
+}
+
+static void addrconf_verify(void)
+{
+ mod_delayed_work(addrconf_wq, &addr_chk_work, 0);
+}
+
static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
struct in6_addr **peer_pfx)
{
@@ -3668,6 +3746,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
struct in6_addr *pfx, *peer_pfx;
+ u32 ifa_flags;
int err;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
@@ -3679,7 +3758,13 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
if (pfx == NULL)
return -EINVAL;
- return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+ ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags;
+
+ /* We ignore other flags so far. */
+ ifa_flags &= IFA_F_MANAGETEMPADDR;
+
+ return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
+ ifm->ifa_prefixlen);
}
static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
@@ -3691,6 +3776,8 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
bool was_managetempaddr;
bool had_prefixroute;
+ ASSERT_RTNL();
+
if (!valid_lft || (prefered_lft > valid_lft))
return -EINVAL;
@@ -3756,7 +3843,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
!was_managetempaddr, jiffies);
}
- addrconf_verify(0);
+ addrconf_verify_rtnl();
return 0;
}
@@ -4249,6 +4336,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
+ array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
}
static inline size_t inet6_ifla6_size(void)
@@ -4286,7 +4374,7 @@ static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib,
memset(&stats[items], 0, pad);
}
-static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib,
+static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib,
int items, int bytes, size_t syncpoff)
{
int i;
@@ -4306,7 +4394,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
{
switch (attrtype) {
case IFLA_INET6_STATS:
- __snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6,
+ __snmp6_fill_stats64(stats, idev->stats.ipv6,
IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));
break;
case IFLA_INET6_ICMP6STATS:
@@ -4348,6 +4436,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
if (nla == NULL)
goto nla_put_failure;
+
+ if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->addr_gen_mode))
+ goto nla_put_failure;
+
read_lock_bh(&idev->lock);
memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
read_unlock_bh(&idev->lock);
@@ -4386,6 +4478,8 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
bool update_rs = false;
struct in6_addr ll_addr;
+ ASSERT_RTNL();
+
if (token == NULL)
return -EINVAL;
if (ipv6_addr_any(token))
@@ -4434,7 +4528,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
}
write_unlock_bh(&idev->lock);
- addrconf_verify(0);
+ addrconf_verify_rtnl();
return 0;
}
@@ -4450,8 +4544,21 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0)
BUG();
- if (tb[IFLA_INET6_TOKEN])
+ if (tb[IFLA_INET6_TOKEN]) {
err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]));
+ if (err)
+ return err;
+ }
+
+ if (tb[IFLA_INET6_ADDR_GEN_MODE]) {
+ u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]);
+
+ if (mode != IN6_ADDR_GEN_MODE_EUI64 &&
+ mode != IN6_ADDR_GEN_MODE_NONE)
+ return -EINVAL;
+ idev->addr_gen_mode = mode;
+ err = 0;
+ }
return err;
}
@@ -4636,6 +4743,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
{
struct net *net = dev_net(ifp->idev->dev);
+ if (event)
+ ASSERT_RTNL();
+
inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
switch (event) {
@@ -4660,24 +4770,21 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
addrconf_leave_solict(ifp->idev, &ifp->addr);
if (!ipv6_addr_any(&ifp->peer_addr)) {
struct rt6_info *rt;
- struct net_device *dev = ifp->idev->dev;
-
- rt = rt6_lookup(dev_net(dev), &ifp->peer_addr, NULL,
- dev->ifindex, 1);
- if (rt) {
- dst_hold(&rt->dst);
- if (ip6_del_rt(rt))
- dst_free(&rt->dst);
- }
+
+ rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
+ ifp->idev->dev, 0, 0);
+ if (rt && ip6_del_rt(rt))
+ dst_free(&rt->dst);
}
dst_hold(&ifp->rt->dst);
if (ip6_del_rt(ifp->rt))
dst_free(&ifp->rt->dst);
+
+ rt_genid_bump_ipv6(net);
break;
}
atomic_inc(&net->ipv6.dev_addr_genid);
- rt_genid_bump_ipv6(net);
}
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -5091,6 +5198,13 @@ static struct addrconf_sysctl_table
.proc_handler = proc_dointvec
},
{
+ .procname = "accept_ra_from_local",
+ .data = &ipv6_devconf.accept_ra_from_local,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
/* sentinel */
}
},
@@ -5141,12 +5255,23 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
kfree(t);
}
-static void addrconf_sysctl_register(struct inet6_dev *idev)
+static int addrconf_sysctl_register(struct inet6_dev *idev)
{
- neigh_sysctl_register(idev->dev, idev->nd_parms,
- &ndisc_ifinfo_sysctl_change);
- __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
- idev, &idev->cnf);
+ int err;
+
+ if (!sysctl_dev_name_is_allowed(idev->dev->name))
+ return -EINVAL;
+
+ err = neigh_sysctl_register(idev->dev, idev->nd_parms,
+ &ndisc_ifinfo_sysctl_change);
+ if (err)
+ return err;
+ err = __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
+ idev, &idev->cnf);
+ if (err)
+ neigh_sysctl_unregister(idev->nd_parms);
+
+ return err;
}
static void addrconf_sysctl_unregister(struct inet6_dev *idev)
@@ -5231,6 +5356,7 @@ static struct rtnl_af_ops inet6_ops = {
int __init addrconf_init(void)
{
+ struct inet6_dev *idev;
int i, err;
err = ipv6_addr_label_init();
@@ -5244,6 +5370,12 @@ int __init addrconf_init(void)
if (err < 0)
goto out_addrlabel;
+ addrconf_wq = create_workqueue("ipv6_addrconf");
+ if (!addrconf_wq) {
+ err = -ENOMEM;
+ goto out_nowq;
+ }
+
/* The addrconf netdev notifier requires that loopback_dev
* has it's ipv6 private information allocated and setup
* before it can bring up and give link-local addresses
@@ -5263,18 +5395,19 @@ int __init addrconf_init(void)
* device and it being up should be removed.
*/
rtnl_lock();
- if (!ipv6_add_dev(init_net.loopback_dev))
- err = -ENOMEM;
+ idev = ipv6_add_dev(init_net.loopback_dev);
rtnl_unlock();
- if (err)
+ if (IS_ERR(idev)) {
+ err = PTR_ERR(idev);
goto errlo;
+ }
for (i = 0; i < IN6_ADDR_HSIZE; i++)
INIT_HLIST_HEAD(&inet6_addr_lst[i]);
register_netdevice_notifier(&ipv6_dev_notf);
- addrconf_verify(0);
+ addrconf_verify();
rtnl_af_register(&inet6_ops);
@@ -5302,6 +5435,8 @@ errout:
rtnl_af_unregister(&inet6_ops);
unregister_netdevice_notifier(&ipv6_dev_notf);
errlo:
+ destroy_workqueue(addrconf_wq);
+out_nowq:
unregister_pernet_subsys(&addrconf_ops);
out_addrlabel:
ipv6_addr_label_cleanup();
@@ -5337,7 +5472,8 @@ void addrconf_cleanup(void)
for (i = 0; i < IN6_ADDR_HSIZE; i++)
WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
spin_unlock_bh(&addrconf_hash_lock);
-
- del_timer(&addr_chk_timer);
+ cancel_delayed_work(&addr_chk_work);
rtnl_unlock();
+
+ destroy_workqueue(addrconf_wq);
}
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 4c11cbcf8308..98cc4cd570e2 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -8,6 +8,13 @@
#include <net/addrconf.h>
#include <net/ip.h>
+/* if ipv6 module registers this function is used by xfrm to force all
+ * sockets to relookup their nodes - this is fairly expensive, be
+ * careful
+ */
+void (*__fib6_flush_trees)(struct net *);
+EXPORT_SYMBOL(__fib6_flush_trees);
+
#define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16)
static inline unsigned int ipv6_addr_scope2type(unsigned int scope)
@@ -123,7 +130,7 @@ static void snmp6_free_dev(struct inet6_dev *idev)
{
kfree(idev->stats.icmpv6msgdev);
kfree(idev->stats.icmpv6dev);
- snmp_mib_free((void __percpu **)idev->stats.ipv6);
+ free_percpu(idev->stats.ipv6);
}
/* Nobody refers to this device, we may destroy it. */
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index b30ad3741b46..fd0dc47f471d 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -6,7 +6,7 @@
*/
/*
* Author:
- * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
+ * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
*/
#include <linux/kernel.h>
@@ -22,14 +22,13 @@
#if 0
#define ADDRLABEL(x...) printk(x)
#else
-#define ADDRLABEL(x...) do { ; } while(0)
+#define ADDRLABEL(x...) do { ; } while (0)
#endif
/*
* Policy Table
*/
-struct ip6addrlbl_entry
-{
+struct ip6addrlbl_entry {
#ifdef CONFIG_NET_NS
struct net *lbl_net;
#endif
@@ -88,39 +87,39 @@ static const __net_initconst struct ip6addrlbl_init_table
{ /* ::/0 */
.prefix = &in6addr_any,
.label = 1,
- },{ /* fc00::/7 */
- .prefix = &(struct in6_addr){{{ 0xfc }}},
+ }, { /* fc00::/7 */
+ .prefix = &(struct in6_addr){ { { 0xfc } } } ,
.prefixlen = 7,
.label = 5,
- },{ /* fec0::/10 */
- .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}},
+ }, { /* fec0::/10 */
+ .prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } },
.prefixlen = 10,
.label = 11,
- },{ /* 2002::/16 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
+ }, { /* 2002::/16 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x02 } } },
.prefixlen = 16,
.label = 2,
- },{ /* 3ffe::/16 */
- .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}},
+ }, { /* 3ffe::/16 */
+ .prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } },
.prefixlen = 16,
.label = 12,
- },{ /* 2001::/32 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
+ }, { /* 2001::/32 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x01 } } },
.prefixlen = 32,
.label = 6,
- },{ /* 2001:10::/28 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}},
+ }, { /* 2001:10::/28 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } },
.prefixlen = 28,
.label = 7,
- },{ /* ::ffff:0:0 */
- .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}},
+ }, { /* ::ffff:0:0 */
+ .prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } },
.prefixlen = 96,
.label = 4,
- },{ /* ::/96 */
+ }, { /* ::/96 */
.prefix = &in6addr_any,
.prefixlen = 96,
.label = 3,
- },{ /* ::1/128 */
+ }, { /* ::1/128 */
.prefix = &in6addr_loopback,
.prefixlen = 128,
.label = 0,
@@ -278,7 +277,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
last = p;
}
if (last)
- hlist_add_after_rcu(&last->list, &newp->list);
+ hlist_add_behind_rcu(&newp->list, &last->list);
else
hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
out:
@@ -441,7 +440,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)
if (label == IPV6_ADDR_LABEL_DEFAULT)
return -EINVAL;
- switch(nlh->nlmsg_type) {
+ switch (nlh->nlmsg_type) {
case RTM_NEWADDRLABEL:
if (ifal->ifal_index &&
!__dev_get_by_index(net, ifal->ifal_index))
@@ -505,12 +504,13 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
if (idx >= s_idx &&
net_eq(ip6addrlbl_net(p), net)) {
- if ((err = ip6addrlbl_fill(skb, p,
- ip6addrlbl_table.seq,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWADDRLABEL,
- NLM_F_MULTI)) <= 0)
+ err = ip6addrlbl_fill(skb, p,
+ ip6addrlbl_table.seq,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWADDRLABEL,
+ NLM_F_MULTI);
+ if (err <= 0)
break;
}
idx++;
@@ -527,7 +527,7 @@ static inline int ip6addrlbl_msgsize(void)
+ nla_total_size(4); /* IFAL_LABEL */
}
-static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh)
+static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(in_skb->sk);
struct ifaddrlblmsg *ifal;
@@ -568,7 +568,8 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh)
goto out;
}
- if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) {
+ skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL);
+ if (!skb) {
ip6addrlbl_put(p);
return -ENOBUFS;
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d935889f1008..2daa3a133e49 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -106,7 +106,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
struct inet_protosw *answer;
struct proto *answer_prot;
unsigned char answer_flags;
- char answer_no_check;
int try_loading_module = 0;
int err;
@@ -162,7 +161,6 @@ lookup_protocol:
sock->ops = answer->ops;
answer_prot = answer->prot;
- answer_no_check = answer->no_check;
answer_flags = answer->flags;
rcu_read_unlock();
@@ -176,7 +174,6 @@ lookup_protocol:
sock_init_data(sock, sk);
err = 0;
- sk->sk_no_check = answer_no_check;
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = SK_CAN_REUSE;
@@ -200,7 +197,7 @@ lookup_protocol:
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
np->mc_loop = 1;
np->pmtudisc = IPV6_PMTUDISC_WANT;
- np->ipv6only = net->ipv6.sysctl.bindv6only;
+ sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
/* Init the ipv4 part of the socket since we can have sockets
* using v6 API for ipv4.
@@ -297,7 +294,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
/* Binding to v4-mapped address on a v6-only socket
* makes no sense
*/
- if (np->ipv6only) {
+ if (sk->sk_ipv6only) {
err = -EINVAL;
goto out;
}
@@ -374,7 +371,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_type != IPV6_ADDR_ANY) {
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
if (addr_type != IPV6_ADDR_MAPPED)
- np->ipv6only = 1;
+ sk->sk_ipv6only = 1;
}
if (snum)
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
@@ -715,33 +712,25 @@ static int __net_init ipv6_init_mibs(struct net *net)
{
int i;
- if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6,
- sizeof(struct udp_mib),
- __alignof__(struct udp_mib)) < 0)
+ net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib);
+ if (!net->mib.udp_stats_in6)
return -ENOMEM;
- if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6,
- sizeof(struct udp_mib),
- __alignof__(struct udp_mib)) < 0)
+ net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib);
+ if (!net->mib.udplite_stats_in6)
goto err_udplite_mib;
- if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics,
- sizeof(struct ipstats_mib),
- __alignof__(struct ipstats_mib)) < 0)
+ net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib);
+ if (!net->mib.ipv6_statistics)
goto err_ip_mib;
for_each_possible_cpu(i) {
struct ipstats_mib *af_inet6_stats;
- af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[0], i);
+ af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i);
u64_stats_init(&af_inet6_stats->syncp);
-#if SNMP_ARRAY_SZ == 2
- af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[1], i);
- u64_stats_init(&af_inet6_stats->syncp);
-#endif
}
- if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics,
- sizeof(struct icmpv6_mib),
- __alignof__(struct icmpv6_mib)) < 0)
+ net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib);
+ if (!net->mib.icmpv6_statistics)
goto err_icmp_mib;
net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib),
GFP_KERNEL);
@@ -750,22 +739,22 @@ static int __net_init ipv6_init_mibs(struct net *net)
return 0;
err_icmpmsg_mib:
- snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
+ free_percpu(net->mib.icmpv6_statistics);
err_icmp_mib:
- snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
+ free_percpu(net->mib.ipv6_statistics);
err_ip_mib:
- snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
+ free_percpu(net->mib.udplite_stats_in6);
err_udplite_mib:
- snmp_mib_free((void __percpu **)net->mib.udp_stats_in6);
+ free_percpu(net->mib.udp_stats_in6);
return -ENOMEM;
}
static void ipv6_cleanup_mibs(struct net *net)
{
- snmp_mib_free((void __percpu **)net->mib.udp_stats_in6);
- snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
- snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
- snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
+ free_percpu(net->mib.udp_stats_in6);
+ free_percpu(net->mib.udplite_stats_in6);
+ free_percpu(net->mib.ipv6_statistics);
+ free_percpu(net->mib.icmpv6_statistics);
kfree(net->mib.icmpv6msg_statistics);
}
@@ -776,6 +765,7 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.bindv6only = 0;
net->ipv6.sysctl.icmpv6_time = 1*HZ;
net->ipv6.sysctl.flowlabel_consistency = 1;
+ net->ipv6.sysctl.auto_flowlabels = 0;
atomic_set(&net->ipv6.rt_genid, 0);
err = ipv6_init_mibs(net);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 81e496a2e008..72a4930bdc0a 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -346,6 +346,10 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
struct ip_auth_hdr *ah;
struct ah_data *ahp;
struct tmp_ext *iph_ext;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
ahp = x->data;
ahash = ahp->ahash;
@@ -359,15 +363,22 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
if (extlen)
extlen += sizeof(*iph_ext);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
err = -ENOMEM;
- iph_base = ah_alloc_tmp(ahash, nfrags, IPV6HDR_BASELEN + extlen);
+ iph_base = ah_alloc_tmp(ahash, nfrags + sglists, IPV6HDR_BASELEN +
+ extlen + seqhi_len);
if (!iph_base)
goto out;
iph_ext = ah_tmp_ext(iph_base);
- icv = ah_tmp_icv(ahash, iph_ext, extlen);
+ seqhi = (__be32 *)((char *)iph_ext + extlen);
+ icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
ah = ip_auth_hdr(skb);
memset(ah->auth_data, 0, ahp->icv_trunc_len);
@@ -411,10 +422,15 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
ah->spi = x->id.spi;
ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah6_output_done, skb);
AH_SKB_CB(skb)->tmp = iph_base;
@@ -514,6 +530,10 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
int nexthdr;
int nfrags;
int err = -ENOMEM;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
goto out;
@@ -550,14 +570,22 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
skb_push(skb, hdr_len);
- work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
+
+ work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len +
+ ahp->icv_trunc_len + seqhi_len);
if (!work_iph)
goto out;
- auth_data = ah_tmp_auth(work_iph, hdr_len);
- icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
+ auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len);
+ seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len);
+ icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
memcpy(work_iph, ip6h, hdr_len);
memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
@@ -572,10 +600,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
ip6h->flow_lbl[2] = 0;
ip6h->hop_limit = 0;
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah6_input_done, skb);
AH_SKB_CB(skb)->tmp = work_iph;
@@ -609,8 +643,8 @@ out:
return err;
}
-static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
@@ -619,17 +653,19 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static int ah6_init_state(struct xfrm_state *x)
@@ -714,6 +750,11 @@ static void ah6_destroy(struct xfrm_state *x)
kfree(ahp);
}
+static int ah6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type ah6_type =
{
.description = "AH6",
@@ -727,10 +768,11 @@ static const struct xfrm_type ah6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol ah6_protocol = {
+static struct xfrm6_protocol ah6_protocol = {
.handler = xfrm6_rcv,
+ .cb_handler = ah6_rcv_cb,
.err_handler = ah6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init ah6_init(void)
@@ -740,7 +782,7 @@ static int __init ah6_init(void)
return -EAGAIN;
}
- if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) {
+ if (xfrm6_protocol_register(&ah6_protocol, IPPROTO_AH) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ah6_type, AF_INET6);
return -EAGAIN;
@@ -751,7 +793,7 @@ static int __init ah6_init(void)
static void __exit ah6_fini(void)
{
- if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0)
+ if (xfrm6_protocol_deregister(&ah6_protocol, IPPROTO_AH) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0)
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 210183244689..9a386842fd62 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -77,6 +77,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
pac->acl_next = NULL;
pac->acl_addr = *addr;
+ rtnl_lock();
rcu_read_lock();
if (ifindex == 0) {
struct rt6_info *rt;
@@ -137,6 +138,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
error:
rcu_read_unlock();
+ rtnl_unlock();
if (pac)
sock_kfree_s(sk, pac, sizeof(*pac));
return err;
@@ -171,11 +173,13 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
spin_unlock_bh(&ipv6_sk_ac_lock);
+ rtnl_lock();
rcu_read_lock();
dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
if (dev)
ipv6_dev_ac_dec(dev, &pac->acl_addr);
rcu_read_unlock();
+ rtnl_unlock();
sock_kfree_s(sk, pac, sizeof(*pac));
return 0;
@@ -198,6 +202,7 @@ void ipv6_sock_ac_close(struct sock *sk)
spin_unlock_bh(&ipv6_sk_ac_lock);
prev_index = 0;
+ rtnl_lock();
rcu_read_lock();
while (pac) {
struct ipv6_ac_socklist *next = pac->acl_next;
@@ -212,6 +217,7 @@ void ipv6_sock_ac_close(struct sock *sk)
pac = next;
}
rcu_read_unlock();
+ rtnl_unlock();
}
static void aca_put(struct ifacaddr6 *ac)
@@ -233,6 +239,8 @@ int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
struct rt6_info *rt;
int err;
+ ASSERT_RTNL();
+
idev = in6_dev_get(dev);
if (idev == NULL)
@@ -302,6 +310,8 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct ifacaddr6 *aca, *prev_aca;
+ ASSERT_RTNL();
+
write_lock_bh(&idev->lock);
prev_aca = NULL;
for (aca = idev->ac_list; aca; aca = aca->aca_next) {
@@ -341,6 +351,27 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
return __ipv6_dev_ac_dec(idev, addr);
}
+void ipv6_ac_destroy_dev(struct inet6_dev *idev)
+{
+ struct ifacaddr6 *aca;
+
+ write_lock_bh(&idev->lock);
+ while ((aca = idev->ac_list) != NULL) {
+ idev->ac_list = aca->aca_next;
+ write_unlock_bh(&idev->lock);
+
+ addrconf_leave_solict(idev, &aca->aca_addr);
+
+ dst_hold(&aca->aca_rt->dst);
+ ip6_del_rt(aca->aca_rt);
+
+ aca_put(aca);
+
+ write_lock_bh(&idev->lock);
+ }
+ write_unlock_bh(&idev->lock);
+}
+
/*
* check if the interface has this anycast address
* called with rcu_read_lock()
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index c3bf2d2e519e..2753319524f1 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -199,6 +199,7 @@ ipv4_connected:
NULL);
sk->sk_state = TCP_ESTABLISHED;
+ ip6_set_txhash(sk);
out:
fl6_sock_release(flowlabel);
return err;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 6eef8a7e35f2..d15da1377149 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -421,8 +421,8 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
net_adj) & ~(blksize - 1)) + net_adj - 2;
}
-static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
@@ -431,18 +431,20 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
esph->spi, IPPROTO_ESP, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static void esp6_destroy(struct xfrm_state *x)
@@ -614,6 +616,11 @@ error:
return err;
}
+static int esp6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type esp6_type =
{
.description = "ESP6",
@@ -628,10 +635,11 @@ static const struct xfrm_type esp6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol esp6_protocol = {
- .handler = xfrm6_rcv,
+static struct xfrm6_protocol esp6_protocol = {
+ .handler = xfrm6_rcv,
+ .cb_handler = esp6_rcv_cb,
.err_handler = esp6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init esp6_init(void)
@@ -640,7 +648,7 @@ static int __init esp6_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) {
+ if (xfrm6_protocol_register(&esp6_protocol, IPPROTO_ESP) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&esp6_type, AF_INET6);
return -EAGAIN;
@@ -651,7 +659,7 @@ static int __init esp6_init(void)
static void __exit esp6_fini(void)
{
- if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0)
+ if (xfrm6_protocol_deregister(&esp6_protocol, IPPROTO_ESP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index f2610e157660..06ba3e58320b 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
int len;
int hlimit;
int err = 0;
+ u32 mark = IP6_REPLY_MARK(net, skb->mark);
if ((u8 *)hdr < skb->head ||
(skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
@@ -466,6 +467,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
fl6.daddr = hdr->saddr;
if (saddr)
fl6.saddr = *saddr;
+ fl6.flowi6_mark = mark;
fl6.flowi6_oif = iif;
fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code;
@@ -474,6 +476,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
sk = icmpv6_xmit_lock(net);
if (sk == NULL)
return;
+ sk->sk_mark = mark;
np = inet6_sk(sk);
if (!icmpv6_xrlim_allow(sk, type, &fl6))
@@ -493,12 +496,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
if (IS_ERR(dst))
goto out;
- if (ipv6_addr_is_multicast(&fl6.daddr))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
msg.skb = skb;
msg.offset = skb_network_offset(skb);
@@ -520,7 +518,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
np->tclass, NULL, &fl6, (struct rt6_info *)dst,
MSG_DONTWAIT, np->dontfrag);
if (err) {
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
@@ -556,6 +554,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
int err = 0;
int hlimit;
u8 tclass;
+ u32 mark = IP6_REPLY_MARK(net, skb->mark);
saddr = &ipv6_hdr(skb)->daddr;
@@ -574,11 +573,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
fl6.saddr = *saddr;
fl6.flowi6_oif = skb->dev->ifindex;
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
+ fl6.flowi6_mark = mark;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net);
if (sk == NULL)
return;
+ sk->sk_mark = mark;
np = inet6_sk(sk);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
@@ -593,12 +594,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
if (IS_ERR(dst))
goto out;
- if (ipv6_addr_is_multicast(&fl6.daddr))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
idev = __in6_dev_get(skb->dev);
@@ -630,9 +626,10 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
int inner_offset;
__be16 frag_off;
u8 nexthdr;
+ struct net *net = dev_net(skb->dev);
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
- return;
+ goto out;
nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
if (ipv6_ext_hdr(nexthdr)) {
@@ -640,14 +637,14 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
&nexthdr, &frag_off);
if (inner_offset<0)
- return;
+ goto out;
} else {
inner_offset = sizeof(struct ipv6hdr);
}
/* Checkin header including 8 bytes of inner protocol header. */
if (!pskb_may_pull(skb, inner_offset+8))
- return;
+ goto out;
/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
Without this we will not able f.e. to make source routed
@@ -656,13 +653,15 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
--ANK (980726)
*/
- rcu_read_lock();
ipprot = rcu_dereference(inet6_protos[nexthdr]);
if (ipprot && ipprot->err_handler)
ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
- rcu_read_unlock();
raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
+ return;
+
+out:
+ ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
}
/*
@@ -702,22 +701,11 @@ static int icmpv6_rcv(struct sk_buff *skb)
saddr = &ipv6_hdr(skb)->saddr;
daddr = &ipv6_hdr(skb)->daddr;
- /* Perform checksum. */
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
- skb->csum))
- break;
- /* fall through */
- case CHECKSUM_NONE:
- skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
- IPPROTO_ICMPV6, 0));
- if (__skb_checksum_complete(skb)) {
- LIMIT_NETDEBUG(KERN_DEBUG
- "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
- saddr, daddr);
- goto csum_error;
- }
+ if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
+ LIMIT_NETDEBUG(KERN_DEBUG
+ "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
+ saddr, daddr);
+ goto csum_error;
}
if (!pskb_pull(skb, sizeof(*hdr)))
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index c9138189415a..a245e5ddffbd 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -81,7 +81,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
final_p = fl6_update_dst(fl6, np->opt, &final);
fl6->saddr = ireq->ir_v6_loc_addr;
fl6->flowi6_oif = ireq->ir_iif;
- fl6->flowi6_mark = sk->sk_mark;
+ fl6->flowi6_mark = ireq->ir_mark;
fl6->fl6_dport = ireq->ir_rmt_port;
fl6->fl6_sport = htons(ireq->ir_num);
security_req_classify_flow(req, flowi6_to_flowi(fl6));
@@ -224,9 +224,8 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
return dst;
}
-int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
+int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused)
{
- struct sock *sk = skb->sk;
struct ipv6_pinfo *np = inet6_sk(sk);
struct flowi6 fl6;
struct dst_entry *dst;
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index 72d198b8e4d2..9a4d7322fb22 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -75,23 +75,50 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
return err;
}
- if (uh->check == 0) {
- /* RFC 2460 section 8.1 says that we SHOULD log
- this error. Well, it is reasonable.
- */
- LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
- return 1;
- }
- if (skb->ip_summed == CHECKSUM_COMPLETE &&
- !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
- skb->len, proto, skb->csum))
- skb->ip_summed = CHECKSUM_UNNECESSARY;
+ /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels)
+ * we accept a checksum of zero here. When we find the socket
+ * for the UDP packet we'll check if that socket allows zero checksum
+ * for IPv6 (set by socket option).
+ */
+ return skb_checksum_init_zero_check(skb, proto, uh->check,
+ ip6_compute_pseudo);
+}
+EXPORT_SYMBOL(udp6_csum_init);
+
+/* Function to set UDP checksum for an IPv6 UDP packet. This is intended
+ * for the simple case like when setting the checksum for a UDP tunnel.
+ */
+void udp6_set_csum(bool nocheck, struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr, int len)
+{
+ struct udphdr *uh = udp_hdr(skb);
+
+ if (nocheck)
+ uh->check = 0;
+ else if (skb_is_gso(skb))
+ uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+ else if (skb_dst(skb) && skb_dst(skb)->dev &&
+ (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
- if (!skb_csum_unnecessary(skb))
- skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr,
- skb->len, proto, 0));
+ BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
- return 0;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+ } else {
+ __wsum csum;
+
+ BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+ uh->check = 0;
+ csum = skb_checksum(skb, 0, len, 0);
+ uh->check = udp_v6_check(len, saddr, daddr, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
}
-EXPORT_SYMBOL(udp6_csum_init);
+EXPORT_SYMBOL(udp6_set_csum);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 075602fc6b6a..97b9fa8de377 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -9,14 +9,12 @@
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
- */
-
-/*
- * Changes:
- * Yuji SEKIYA @USAGI: Support default route on router node;
- * remove ip6_null_entry from the top of
- * routing table.
- * Ville Nuorvala: Fixed routing subtrees.
+ *
+ * Changes:
+ * Yuji SEKIYA @USAGI: Support default route on router node;
+ * remove ip6_null_entry from the top of
+ * routing table.
+ * Ville Nuorvala: Fixed routing subtrees.
*/
#define pr_fmt(fmt) "IPv6: " fmt
@@ -46,10 +44,9 @@
#define RT6_TRACE(x...) do { ; } while (0)
#endif
-static struct kmem_cache * fib6_node_kmem __read_mostly;
+static struct kmem_cache *fib6_node_kmem __read_mostly;
-enum fib_walk_state_t
-{
+enum fib_walk_state_t {
#ifdef CONFIG_IPV6_SUBTREES
FWS_S,
#endif
@@ -59,8 +56,7 @@ enum fib_walk_state_t
FWS_U
};
-struct fib6_cleaner_t
-{
+struct fib6_cleaner_t {
struct fib6_walker_t w;
struct net *net;
int (*func)(struct rt6_info *, void *arg);
@@ -75,8 +71,7 @@ static DEFINE_RWLOCK(fib6_walker_lock);
#define FWS_INIT FWS_L
#endif
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
- struct rt6_info *rt);
+static void fib6_prune_clones(struct net *net, struct fib6_node *fn);
static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
static int fib6_walk(struct fib6_walker_t *w);
@@ -138,7 +133,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit)
const __be32 *addr = token;
/*
* Here,
- * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
+ * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
* is optimized version of
* htonl(1 << ((~fn_bit)&0x1F))
* See include/asm-generic/bitops/le.h.
@@ -147,7 +142,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit)
addr[fn_bit >> 5];
}
-static __inline__ struct fib6_node * node_alloc(void)
+static __inline__ struct fib6_node *node_alloc(void)
{
struct fib6_node *fn;
@@ -156,7 +151,7 @@ static __inline__ struct fib6_node * node_alloc(void)
return fn;
}
-static __inline__ void node_free(struct fib6_node * fn)
+static __inline__ void node_free(struct fib6_node *fn)
{
kmem_cache_free(fib6_node_kmem, fn);
}
@@ -292,7 +287,7 @@ static int fib6_dump_node(struct fib6_walker_t *w)
static void fib6_dump_end(struct netlink_callback *cb)
{
- struct fib6_walker_t *w = (void*)cb->args[2];
+ struct fib6_walker_t *w = (void *)cb->args[2];
if (w) {
if (cb->args[4]) {
@@ -302,7 +297,7 @@ static void fib6_dump_end(struct netlink_callback *cb)
cb->args[2] = 0;
kfree(w);
}
- cb->done = (void*)cb->args[3];
+ cb->done = (void *)cb->args[3];
cb->args[1] = 3;
}
@@ -485,7 +480,7 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
fn->fn_sernum = sernum;
dir = addr_bit_set(addr, fn->fn_bit);
pn = fn;
- fn = dir ? fn->right: fn->left;
+ fn = dir ? fn->right : fn->left;
} while (fn);
if (!allow_create) {
@@ -638,12 +633,41 @@ static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt)
RTF_GATEWAY;
}
+static int fib6_commit_metrics(struct dst_entry *dst,
+ struct nlattr *mx, int mx_len)
+{
+ struct nlattr *nla;
+ int remaining;
+ u32 *mp;
+
+ if (dst->flags & DST_HOST) {
+ mp = dst_metrics_write_ptr(dst);
+ } else {
+ mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
+ if (!mp)
+ return -ENOMEM;
+ dst_init_metrics(dst, mp, 0);
+ }
+
+ nla_for_each_attr(nla, mx, mx_len, remaining) {
+ int type = nla_type(nla);
+
+ if (type) {
+ if (type > RTAX_MAX)
+ return -EINVAL;
+
+ mp[type - 1] = nla_get_u32(nla);
+ }
+ }
+ return 0;
+}
+
/*
* Insert routing information in a node.
*/
static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
- struct nl_info *info)
+ struct nl_info *info, struct nlattr *mx, int mx_len)
{
struct rt6_info *iter = NULL;
struct rt6_info **ins;
@@ -653,6 +677,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
(info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0;
bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
+ int err;
ins = &fn->leaf;
@@ -751,6 +776,11 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
pr_warn("NLM_F_CREATE should be set when creating new route\n");
add:
+ if (mx) {
+ err = fib6_commit_metrics(&rt->dst, mx, mx_len);
+ if (err)
+ return err;
+ }
rt->dst.rt6_next = iter;
*ins = rt;
rt->rt6i_node = fn;
@@ -770,6 +800,11 @@ add:
pr_warn("NLM_F_REPLACE set, but no existing node found!\n");
return -ENOENT;
}
+ if (mx) {
+ err = fib6_commit_metrics(&rt->dst, mx, mx_len);
+ if (err)
+ return err;
+ }
*ins = rt;
rt->rt6i_node = fn;
rt->dst.rt6_next = iter->dst.rt6_next;
@@ -806,7 +841,8 @@ void fib6_force_start_gc(struct net *net)
* with source addr info in sub-trees
*/
-int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
+int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
+ struct nlattr *mx, int mx_len)
{
struct fib6_node *fn, *pn = NULL;
int err = -ENOMEM;
@@ -900,11 +936,11 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
}
#endif
- err = fib6_add_rt2node(fn, rt, info);
+ err = fib6_add_rt2node(fn, rt, info, mx, mx_len);
if (!err) {
fib6_start_gc(info->nl_net, rt);
if (!(rt->rt6i_flags & RTF_CACHE))
- fib6_prune_clones(info->nl_net, pn, rt);
+ fib6_prune_clones(info->nl_net, pn);
}
out:
@@ -955,8 +991,8 @@ struct lookup_args {
const struct in6_addr *addr; /* search key */
};
-static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
- struct lookup_args *args)
+static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
+ struct lookup_args *args)
{
struct fib6_node *fn;
__be32 dir;
@@ -1018,8 +1054,8 @@ backtrack:
return NULL;
}
-struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
- const struct in6_addr *saddr)
+struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
struct fib6_node *fn;
struct lookup_args args[] = {
@@ -1051,9 +1087,9 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *da
*/
-static struct fib6_node * fib6_locate_1(struct fib6_node *root,
- const struct in6_addr *addr,
- int plen, int offset)
+static struct fib6_node *fib6_locate_1(struct fib6_node *root,
+ const struct in6_addr *addr,
+ int plen, int offset)
{
struct fib6_node *fn;
@@ -1081,9 +1117,9 @@ static struct fib6_node * fib6_locate_1(struct fib6_node *root,
return NULL;
}
-struct fib6_node * fib6_locate(struct fib6_node *root,
- const struct in6_addr *daddr, int dst_len,
- const struct in6_addr *saddr, int src_len)
+struct fib6_node *fib6_locate(struct fib6_node *root,
+ const struct in6_addr *daddr, int dst_len,
+ const struct in6_addr *saddr, int src_len)
{
struct fib6_node *fn;
@@ -1151,8 +1187,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
children = 0;
child = NULL;
- if (fn->right) child = fn->right, children |= 1;
- if (fn->left) child = fn->left, children |= 2;
+ if (fn->right)
+ child = fn->right, children |= 1;
+ if (fn->left)
+ child = fn->left, children |= 2;
if (children == 3 || FIB6_SUBTREE(fn)
#ifdef CONFIG_IPV6_SUBTREES
@@ -1180,8 +1218,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
} else {
WARN_ON(fn->fn_flags & RTN_ROOT);
#endif
- if (pn->right == fn) pn->right = child;
- else if (pn->left == fn) pn->left = child;
+ if (pn->right == fn)
+ pn->right = child;
+ else if (pn->left == fn)
+ pn->left = child;
#if RT6_DEBUG >= 2
else
WARN_ON(1);
@@ -1213,10 +1253,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
w->node = child;
if (children&2) {
RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
- w->state = w->state>=FWS_R ? FWS_U : FWS_INIT;
+ w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
} else {
RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
- w->state = w->state>=FWS_C ? FWS_U : FWS_INIT;
+ w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
}
}
}
@@ -1314,7 +1354,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
struct rt6_info **rtp;
#if RT6_DEBUG >= 2
- if (rt->dst.obsolete>0) {
+ if (rt->dst.obsolete > 0) {
WARN_ON(fn != NULL);
return -ENOENT;
}
@@ -1334,7 +1374,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
pn = pn->parent;
}
#endif
- fib6_prune_clones(info->nl_net, pn, rt);
+ fib6_prune_clones(info->nl_net, pn);
}
/*
@@ -1418,7 +1458,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
if (w->skip) {
w->skip--;
- continue;
+ goto skip;
}
err = w->func(w);
@@ -1428,6 +1468,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
w->count++;
continue;
}
+skip:
w->state = FWS_U;
case FWS_U:
if (fn == w->root)
@@ -1559,10 +1600,27 @@ static int fib6_prune_clone(struct rt6_info *rt, void *arg)
return 0;
}
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
- struct rt6_info *rt)
+static void fib6_prune_clones(struct net *net, struct fib6_node *fn)
{
- fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt);
+ fib6_clean_tree(net, fn, fib6_prune_clone, 1, NULL);
+}
+
+static int fib6_update_sernum(struct rt6_info *rt, void *arg)
+{
+ __u32 sernum = *(__u32 *)arg;
+
+ if (rt->rt6i_node &&
+ rt->rt6i_node->fn_sernum != sernum)
+ rt->rt6i_node->fn_sernum = sernum;
+
+ return 0;
+}
+
+static void fib6_flush_trees(struct net *net)
+{
+ __u32 new_sernum = fib6_new_sernum();
+
+ fib6_clean_all(net, fib6_update_sernum, &new_sernum);
}
/*
@@ -1707,7 +1765,7 @@ out_rt6_stats:
kfree(net->ipv6.rt6_stats);
out_timer:
return -ENOMEM;
- }
+}
static void fib6_net_exit(struct net *net)
{
@@ -1748,6 +1806,8 @@ int __init fib6_init(void)
NULL);
if (ret)
goto out_unregister_subsys;
+
+ __fib6_flush_trees = fib6_flush_trees;
out:
return ret;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index dfa41bb4e0dc..4052694c6f2c 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -15,9 +15,7 @@
#include <linux/socket.h>
#include <linux/net.h>
#include <linux/netdevice.h>
-#include <linux/if_arp.h>
#include <linux/in6.h>
-#include <linux/route.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
@@ -28,12 +26,7 @@
#include <net/sock.h>
#include <net/ipv6.h>
-#include <net/ndisc.h>
-#include <net/protocol.h>
-#include <net/ip6_route.h>
-#include <net/addrconf.h>
#include <net/rawv6.h>
-#include <net/icmp.h>
#include <net/transp_v6.h>
#include <asm/uaccess.h>
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index f3ffb43f59c0..f304471477dc 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -72,6 +72,7 @@ struct ip6gre_net {
};
static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
+static struct rtnl_link_ops ip6gre_tap_ops __read_mostly;
static int ip6gre_tunnel_init(struct net_device *dev);
static void ip6gre_tunnel_setup(struct net_device *dev);
static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
@@ -313,6 +314,8 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE);
+ if (t && create)
+ return NULL;
if (t || !create)
return t;
@@ -321,7 +324,8 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
else
strcpy(name, "ip6gre%d");
- dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup);
+ dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
+ ip6gre_tunnel_setup);
if (!dev)
return NULL;
@@ -353,10 +357,10 @@ failed_free:
static void ip6gre_tunnel_uninit(struct net_device *dev)
{
- struct net *net = dev_net(dev);
- struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
- ip6gre_tunnel_unlink(ign, netdev_priv(dev));
+ ip6gre_tunnel_unlink(ign, t);
dev_put(dev);
}
@@ -467,17 +471,7 @@ static int ip6gre_rcv(struct sk_buff *skb)
goto drop;
if (flags&GRE_CSUM) {
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- csum = csum_fold(skb->csum);
- if (!csum)
- break;
- /* fall through */
- case CHECKSUM_NONE:
- skb->csum = 0;
- csum = __skb_checksum_complete(skb);
- skb->ip_summed = CHECKSUM_COMPLETE;
- }
+ csum = skb_checksum_simple_validate(skb);
offset += 4;
}
if (flags&GRE_KEY) {
@@ -611,8 +605,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
int encap_limit,
__u32 *pmtu)
{
- struct net *net = dev_net(dev);
struct ip6_tnl *tunnel = netdev_priv(dev);
+ struct net *net = tunnel->net;
struct net_device *tdev; /* Device to other host */
struct ipv6hdr *ipv6h; /* Our new IP header */
unsigned int max_headroom = 0; /* The extra header space needed */
@@ -732,7 +726,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
* Push down and install the IP header.
*/
ipv6h = ipv6_hdr(skb);
- ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
+ ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+ ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
ipv6h->hop_limit = tunnel->parms.hop_limit;
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
@@ -979,7 +974,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
int strict = (ipv6_addr_type(&p->raddr) &
(IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
- struct rt6_info *rt = rt6_lookup(dev_net(dev),
+ struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
p->link, strict);
@@ -1063,13 +1058,12 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
int err = 0;
struct ip6_tnl_parm2 p;
struct __ip6_tnl_parm p1;
- struct ip6_tnl *t;
- struct net *net = dev_net(dev);
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = t->net;
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
switch (cmd) {
case SIOCGETTUNNEL:
- t = NULL;
if (dev == ign->fb_tunnel_dev) {
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
err = -EFAULT;
@@ -1077,9 +1071,9 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
}
ip6gre_tnl_parm_from_user(&p1, &p);
t = ip6gre_tunnel_locate(net, &p1, 0);
+ if (t == NULL)
+ t = netdev_priv(dev);
}
- if (t == NULL)
- t = netdev_priv(dev);
memset(&p, 0, sizeof(p));
ip6gre_tnl_parm_to_user(&p, &t->parms);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
@@ -1184,7 +1178,9 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
__be16 *p = (__be16 *)(ipv6h+1);
- ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel);
+ ip6_flow_hdr(ipv6h, 0,
+ ip6_make_flowlabel(dev_net(dev), skb,
+ t->fl.u.ip6.flowlabel, false));
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = NEXTHDR_GRE;
ipv6h->saddr = t->parms.laddr;
@@ -1242,7 +1238,6 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
dev->flags |= IFF_NOARP;
dev->iflink = 0;
dev->addr_len = sizeof(struct in6_addr);
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
}
@@ -1297,11 +1292,17 @@ static struct inet6_protocol ip6gre_protocol __read_mostly = {
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
-static void ip6gre_destroy_tunnels(struct ip6gre_net *ign,
- struct list_head *head)
+static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
{
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ struct net_device *dev, *aux;
int prio;
+ for_each_netdev_safe(net, dev, aux)
+ if (dev->rtnl_link_ops == &ip6gre_link_ops ||
+ dev->rtnl_link_ops == &ip6gre_tap_ops)
+ unregister_netdevice_queue(dev, head);
+
for (prio = 0; prio < 4; prio++) {
int h;
for (h = 0; h < HASH_SIZE; h++) {
@@ -1310,7 +1311,12 @@ static void ip6gre_destroy_tunnels(struct ip6gre_net *ign,
t = rtnl_dereference(ign->tunnels[prio][h]);
while (t != NULL) {
- unregister_netdevice_queue(t->dev, head);
+ /* If dev is in the same netns, it has already
+ * been added to the list by the previous loop.
+ */
+ if (!net_eq(dev_net(t->dev), net))
+ unregister_netdevice_queue(t->dev,
+ head);
t = rtnl_dereference(t->next);
}
}
@@ -1323,12 +1329,18 @@ static int __net_init ip6gre_init_net(struct net *net)
int err;
ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
- ip6gre_tunnel_setup);
+ NET_NAME_UNKNOWN,
+ ip6gre_tunnel_setup);
if (!ign->fb_tunnel_dev) {
err = -ENOMEM;
goto err_alloc_dev;
}
dev_net_set(ign->fb_tunnel_dev, net);
+ /* FB netdevice is special: we have one, and only one per netns.
+ * Allowing to move it to another netns is clearly unsafe.
+ */
+ ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+
ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
@@ -1349,12 +1361,10 @@ err_alloc_dev:
static void __net_exit ip6gre_exit_net(struct net *net)
{
- struct ip6gre_net *ign;
LIST_HEAD(list);
- ign = net_generic(net, ip6gre_net_id);
rtnl_lock();
- ip6gre_destroy_tunnels(ign, &list);
+ ip6gre_destroy_tunnels(net, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
}
@@ -1454,7 +1464,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
static int ip6gre_tap_init(struct net_device *dev)
{
struct ip6_tnl *tunnel;
- int i;
tunnel = netdev_priv(dev);
@@ -1464,16 +1473,10 @@ static int ip6gre_tap_init(struct net_device *dev)
ip6gre_tnl_link_config(tunnel, 1);
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ip6gre_tap_stats;
- ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ip6gre_tap_stats->syncp);
- }
-
return 0;
}
@@ -1538,15 +1541,14 @@ out:
static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
- struct ip6_tnl *t, *nt;
- struct net *net = dev_net(dev);
+ struct ip6_tnl *t, *nt = netdev_priv(dev);
+ struct net *net = nt->net;
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
struct __ip6_tnl_parm p;
if (dev == ign->fb_tunnel_dev)
return -EINVAL;
- nt = netdev_priv(dev);
ip6gre_netlink_parms(data, &p);
t = ip6gre_tunnel_locate(net, &p, 0);
@@ -1566,6 +1568,15 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
return 0;
}
+static void ip6gre_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ if (dev != ign->fb_tunnel_dev)
+ unregister_netdevice_queue(dev, head);
+}
+
static size_t ip6gre_get_size(const struct net_device *dev)
{
return
@@ -1643,6 +1654,7 @@ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
.validate = ip6gre_tunnel_validate,
.newlink = ip6gre_newlink,
.changelink = ip6gre_changelink,
+ .dellink = ip6gre_dellink,
.get_size = ip6gre_get_size,
.fill_info = ip6gre_fill_info,
};
@@ -1714,4 +1726,5 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
MODULE_ALIAS_RTNL_LINK("ip6gre");
+MODULE_ALIAS_RTNL_LINK("ip6gretap");
MODULE_ALIAS_NETDEV("ip6gre0");
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 59f95affceb0..65eda2a8af48 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -97,9 +97,11 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
SKB_GSO_SIT |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
SKB_GSO_MPLS |
SKB_GSO_TCPV6 |
0)))
@@ -196,7 +198,6 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
unsigned int off;
u16 flush = 1;
int proto;
- __wsum csum;
off = skb_gro_offset(skb);
hlen = off + sizeof(*iph);
@@ -264,13 +265,10 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
NAPI_GRO_CB(skb)->flush |= flush;
- csum = skb->csum;
- skb_postpull_rcsum(skb, iph, skb_network_header_len(skb));
+ skb_gro_postpull_rcsum(skb, iph, nlen);
pp = ops->callbacks.gro_receive(head, skb);
- skb->csum = csum;
-
out_unlock:
rcu_read_unlock();
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 16f91a2e7888..0a3448b2888f 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -132,7 +132,7 @@ static int ip6_finish_output(struct sk_buff *skb)
return ip6_finish_output2(skb);
}
-int ip6_output(struct sk_buff *skb)
+int ip6_output(struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
@@ -205,7 +205,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst);
- ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
+ ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
+ np->autoflowlabel));
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
@@ -219,7 +220,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
skb->mark = sk->sk_mark;
mtu = dst_mtu(dst);
- if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
+ if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUT, skb->len);
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
@@ -344,12 +345,16 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
{
- if (skb->len <= mtu || skb->local_df)
+ if (skb->len <= mtu)
return false;
+ /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
return true;
+ if (skb->ignore_df)
+ return false;
+
if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
return false;
@@ -367,6 +372,9 @@ int ip6_forward(struct sk_buff *skb)
if (net->ipv6.devconf_all->forwarding == 0)
goto error;
+ if (skb->pkt_type != PACKET_HOST)
+ goto drop;
+
if (skb_warn_if_lro(skb))
goto drop;
@@ -376,9 +384,6 @@ int ip6_forward(struct sk_buff *skb)
goto drop;
}
- if (skb->pkt_type != PACKET_HOST)
- goto drop;
-
skb_forward_csum(skb);
/*
@@ -533,6 +538,20 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from);
}
+static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
+{
+ static u32 ip6_idents_hashrnd __read_mostly;
+ u32 hash, id;
+
+ net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
+
+ hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
+ hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash);
+
+ id = ip_idents_reserve(hash, 1);
+ fhdr->identification = htonl(id);
+}
+
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
{
struct sk_buff *frag;
@@ -555,7 +574,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
/* We must not fragment if the socket is set to force MTU discovery
* or if the skb it not generated by a local socket.
*/
- if (unlikely(!skb->local_df && skb->len > mtu) ||
+ if (unlikely(!skb->ignore_df && skb->len > mtu) ||
(IP6CB(skb)->frag_max_size &&
IP6CB(skb)->frag_max_size > mtu)) {
if (skb->sk && dst_allfrag(skb_dst(skb)))
@@ -784,8 +803,8 @@ slow_path:
/*
* Copy a block of the IP datagram.
*/
- if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
- BUG();
+ BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
+ len));
left -= len;
fh->frag_off = htons(offset);
@@ -990,7 +1009,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
if (final_dst)
fl6->daddr = *final_dst;
- return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+ return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
@@ -1022,7 +1041,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
if (final_dst)
fl6->daddr = *final_dst;
- return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+ return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
@@ -1101,21 +1120,19 @@ static void ip6_append_data_mtu(unsigned int *mtu,
unsigned int fragheaderlen,
struct sk_buff *skb,
struct rt6_info *rt,
- bool pmtuprobe)
+ unsigned int orig_mtu)
{
if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
if (skb == NULL) {
/* first fragment, reserve header_len */
- *mtu = *mtu - rt->dst.header_len;
+ *mtu = orig_mtu - rt->dst.header_len;
} else {
/*
* this fragment is not first, the headers
* space is regarded as data space.
*/
- *mtu = min(*mtu, pmtuprobe ?
- rt->dst.dev->mtu :
- dst_mtu(rt->dst.path));
+ *mtu = orig_mtu;
}
*maxfraglen = ((*mtu - fragheaderlen) & ~7)
+ fragheaderlen - sizeof(struct frag_hdr);
@@ -1132,7 +1149,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_cork *cork;
struct sk_buff *skb, *skb_prev = NULL;
- unsigned int maxfraglen, fragheaderlen, mtu;
+ unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
int exthdrlen;
int dst_exthdrlen;
int hh_len;
@@ -1140,6 +1157,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
int err;
int offset = 0;
__u8 tx_flags = 0;
+ u32 tskey = 0;
if (flags&MSG_PROBE)
return 0;
@@ -1214,6 +1232,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
dst_exthdrlen = 0;
mtu = cork->fragsize;
}
+ orig_mtu = mtu;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1226,13 +1245,15 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
unsigned int maxnonfragsize, headersize;
headersize = sizeof(struct ipv6hdr) +
- (opt ? opt->tot_len : 0) +
+ (opt ? opt->opt_flen + opt->opt_nflen : 0) +
(dst_allfrag(&rt->dst) ?
sizeof(struct frag_hdr) : 0) +
rt->rt6i_nfheader_len;
- maxnonfragsize = (np->pmtudisc >= IPV6_PMTUDISC_DO) ?
- mtu : sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
+ if (ip6_sk_ignore_df(sk))
+ maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
+ else
+ maxnonfragsize = mtu;
/* dontfrag active */
if ((cork->length + length > mtu - headersize) && dontfrag &&
@@ -1252,9 +1273,12 @@ emsgsize:
}
}
- /* For UDP, check if TX timestamp is enabled */
- if (sk->sk_type == SOCK_DGRAM)
+ if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
sock_tx_timestamp(sk, &tx_flags);
+ if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
+ sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ tskey = sk->sk_tskey++;
+ }
/*
* Let's try using as much space as possible.
@@ -1311,8 +1335,7 @@ alloc_new_skb:
if (skb == NULL || skb_prev == NULL)
ip6_append_data_mtu(&mtu, &maxfraglen,
fragheaderlen, skb, rt,
- np->pmtudisc >=
- IPV6_PMTUDISC_PROBE);
+ orig_mtu);
skb_prev = skb;
@@ -1363,12 +1386,6 @@ alloc_new_skb:
sk->sk_allocation);
if (unlikely(skb == NULL))
err = -ENOBUFS;
- else {
- /* Only the initial fragment
- * is time stamped.
- */
- tx_flags = 0;
- }
}
if (skb == NULL)
goto error;
@@ -1382,8 +1399,11 @@ alloc_new_skb:
skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
dst_exthdrlen);
- if (sk->sk_type == SOCK_DGRAM)
- skb_shinfo(skb)->tx_flags = tx_flags;
+ /* Only the initial fragment is time stamped */
+ skb_shinfo(skb)->tx_flags = tx_flags;
+ tx_flags = 0;
+ skb_shinfo(skb)->tskey = tskey;
+ tskey = 0;
/*
* Find where to start putting bytes
@@ -1540,8 +1560,7 @@ int ip6_push_pending_frames(struct sock *sk)
}
/* Allow local fragmentation. */
- if (np->pmtudisc < IPV6_PMTUDISC_DO)
- skb->local_df = 1;
+ skb->ignore_df = ip6_sk_ignore_df(sk);
*final_dst = fl6->daddr;
__skb_pull(skb, skb_network_header_len(skb));
@@ -1554,7 +1573,9 @@ int ip6_push_pending_frames(struct sock *sk)
skb_reset_network_header(skb);
hdr = ipv6_hdr(skb);
- ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
+ ip6_flow_hdr(hdr, np->cork.tclass,
+ ip6_make_flowlabel(net, skb, fl6->flowlabel,
+ np->autoflowlabel));
hdr->hop_limit = np->cork.hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
@@ -1568,8 +1589,8 @@ int ip6_push_pending_frames(struct sock *sk)
if (proto == IPPROTO_ICMPV6) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
- ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
+ ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
}
err = ip6_local_out(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5db8d310f9c0..69a84b464009 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -61,6 +61,7 @@
MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("ip6tnl");
MODULE_ALIAS_NETDEV("ip6tnl0");
#ifdef IP6_TNL_DEBUG
@@ -108,12 +109,12 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
per_cpu_ptr(dev->tstats, i);
do {
- start = u64_stats_fetch_begin_bh(&tstats->syncp);
+ start = u64_stats_fetch_begin_irq(&tstats->syncp);
tmp.rx_packets = tstats->rx_packets;
tmp.rx_bytes = tstats->rx_bytes;
tmp.tx_packets = tstats->tx_packets;
tmp.tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
sum.rx_packets += tmp.rx_packets;
sum.rx_bytes += tmp.rx_bytes;
@@ -314,7 +315,8 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
else
sprintf(name, "ip6tnl%%d");
- dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
+ dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
+ ip6_tnl_dev_setup);
if (dev == NULL)
goto failed;
@@ -362,8 +364,12 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net,
(t = rtnl_dereference(*tp)) != NULL;
tp = &t->next) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr))
+ ipv6_addr_equal(remote, &t->parms.raddr)) {
+ if (create)
+ return NULL;
+
return t;
+ }
}
if (!create)
return NULL;
@@ -1045,7 +1051,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
- ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
+ ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+ ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
@@ -1340,8 +1347,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
int err = 0;
struct ip6_tnl_parm p;
struct __ip6_tnl_parm p1;
- struct ip6_tnl *t = NULL;
- struct net *net = dev_net(dev);
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = t->net;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
switch (cmd) {
@@ -1353,11 +1360,11 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
}
ip6_tnl_parm_from_user(&p1, &p);
t = ip6_tnl_locate(net, &p1, 0);
+ if (t == NULL)
+ t = netdev_priv(dev);
} else {
memset(&p, 0, sizeof(p));
}
- if (t == NULL)
- t = netdev_priv(dev);
ip6_tnl_parm_to_user(&p, &t->parms);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
err = -EFAULT;
@@ -1502,19 +1509,12 @@ static inline int
ip6_tnl_dev_init_gen(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- int i;
t->dev = dev;
t->net = dev_net(dev);
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
-
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ip6_tnl_stats;
- ip6_tnl_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ip6_tnl_stats->syncp);
- }
return 0;
}
@@ -1564,7 +1564,7 @@ static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[])
{
u8 proto;
- if (!data)
+ if (!data || !data[IFLA_IPTUN_PROTO])
return 0;
proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
@@ -1778,7 +1778,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
err = -ENOMEM;
ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
- ip6_tnl_dev_setup);
+ NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
if (!ip6n->fb_tnl_dev)
goto err_alloc_dev;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 2d19272b8cee..5833a2244467 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -204,7 +204,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p
else
sprintf(name, "ip6_vti%%d");
- dev = alloc_netdev(sizeof(*t), name, vti6_dev_setup);
+ dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, vti6_dev_setup);
if (dev == NULL)
goto failed;
@@ -253,8 +253,12 @@ static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p,
(t = rtnl_dereference(*tp)) != NULL;
tp = &t->next) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr))
+ ipv6_addr_equal(remote, &t->parms.raddr)) {
+ if (create)
+ return NULL;
+
return t;
+ }
}
if (!create)
return NULL;
@@ -278,7 +282,6 @@ static void vti6_dev_uninit(struct net_device *dev)
RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
else
vti6_tnl_unlink(ip6n, t);
- ip6_tnl_dst_reset(t);
dev_put(dev);
}
@@ -288,11 +291,8 @@ static int vti6_rcv(struct sk_buff *skb)
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
rcu_read_lock();
-
if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
&ipv6h->daddr)) != NULL) {
- struct pcpu_sw_netstats *tstats;
-
if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) {
rcu_read_unlock();
goto discard;
@@ -309,27 +309,58 @@ static int vti6_rcv(struct sk_buff *skb)
goto discard;
}
- tstats = this_cpu_ptr(t->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
-
- skb->mark = 0;
- secpath_reset(skb);
- skb->dev = t->dev;
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
+ skb->mark = be32_to_cpu(t->parms.i_key);
rcu_read_unlock();
- return 0;
+
+ return xfrm6_rcv(skb);
}
rcu_read_unlock();
- return 1;
-
+ return -EINVAL;
discard:
kfree_skb(skb);
return 0;
}
+static int vti6_rcv_cb(struct sk_buff *skb, int err)
+{
+ unsigned short family;
+ struct net_device *dev;
+ struct pcpu_sw_netstats *tstats;
+ struct xfrm_state *x;
+ struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
+
+ if (!t)
+ return 1;
+
+ dev = t->dev;
+
+ if (err) {
+ dev->stats.rx_errors++;
+ dev->stats.rx_dropped++;
+
+ return 0;
+ }
+
+ x = xfrm_input_state(skb);
+ family = x->inner_mode->afinfo->family;
+
+ if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+ return -EPERM;
+
+ skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev)));
+ skb->dev = dev;
+
+ tstats = this_cpu_ptr(dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
+
+ return 0;
+}
+
/**
* vti6_addr_conflict - compare packet addresses to tunnel's own
* @t: the outgoing tunnel device
@@ -349,44 +380,56 @@ vti6_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
}
+static bool vti6_state_check(const struct xfrm_state *x,
+ const struct in6_addr *dst,
+ const struct in6_addr *src)
+{
+ xfrm_address_t *daddr = (xfrm_address_t *)dst;
+ xfrm_address_t *saddr = (xfrm_address_t *)src;
+
+ /* if there is no transform then this tunnel is not functional.
+ * Or if the xfrm is not mode tunnel.
+ */
+ if (!x || x->props.mode != XFRM_MODE_TUNNEL ||
+ x->props.family != AF_INET6)
+ return false;
+
+ if (ipv6_addr_any(dst))
+ return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET6);
+
+ if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET6))
+ return false;
+
+ return true;
+}
+
/**
* vti6_xmit - send a packet
* @skb: the outgoing socket buffer
* @dev: the outgoing tunnel device
+ * @fl: the flow informations for the xfrm_lookup
**/
-static int vti6_xmit(struct sk_buff *skb, struct net_device *dev)
+static int
+vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
{
- struct net *net = dev_net(dev);
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
- struct dst_entry *dst = NULL, *ndst = NULL;
- struct flowi6 fl6;
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev;
int err = -1;
- if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
- !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h))
- return err;
-
- dst = ip6_tnl_dst_check(t);
- if (!dst) {
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-
- ndst = ip6_route_output(net, NULL, &fl6);
+ if (!dst)
+ goto tx_err_link_failure;
- if (ndst->error)
- goto tx_err_link_failure;
- ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(&fl6), NULL, 0);
- if (IS_ERR(ndst)) {
- err = PTR_ERR(ndst);
- ndst = NULL;
- goto tx_err_link_failure;
- }
- dst = ndst;
+ dst_hold(dst);
+ dst = xfrm_lookup(t->net, dst, fl, NULL, 0);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ dst = NULL;
+ goto tx_err_link_failure;
}
- if (!dst->xfrm || dst->xfrm->props.mode != XFRM_MODE_TUNNEL)
+ if (!vti6_state_check(dst->xfrm, &t->parms.raddr, &t->parms.laddr))
goto tx_err_link_failure;
tdev = dst->dev;
@@ -398,14 +441,21 @@ static int vti6_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_err_dst_release;
}
+ skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
+ skb_dst_set(skb, dst);
+ skb->dev = skb_dst(skb)->dev;
- skb_dst_drop(skb);
- skb_dst_set_noref(skb, dst);
+ err = dst_output(skb);
+ if (net_xmit_eval(err) == 0) {
+ struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
- ip6tunnel_xmit(skb, dev);
- if (ndst) {
- dev->mtu = dst_mtu(ndst);
- ip6_tnl_dst_store(t, ndst);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->tx_bytes += skb->len;
+ tstats->tx_packets++;
+ u64_stats_update_end(&tstats->syncp);
+ } else {
+ stats->tx_errors++;
+ stats->tx_aborted_errors++;
}
return 0;
@@ -413,7 +463,7 @@ tx_err_link_failure:
stats->tx_carrier_errors++;
dst_link_failure(skb);
tx_err_dst_release:
- dst_release(ndst);
+ dst_release(dst);
return err;
}
@@ -422,16 +472,33 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
+ struct ipv6hdr *ipv6h;
+ struct flowi fl;
int ret;
+ memset(&fl, 0, sizeof(fl));
+ skb->mark = be32_to_cpu(t->parms.o_key);
+
switch (skb->protocol) {
case htons(ETH_P_IPV6):
- ret = vti6_xmit(skb, dev);
+ ipv6h = ipv6_hdr(skb);
+
+ if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
+ !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h))
+ goto tx_err;
+
+ xfrm_decode_session(skb, &fl, AF_INET6);
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ break;
+ case htons(ETH_P_IP):
+ xfrm_decode_session(skb, &fl, AF_INET);
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
break;
default:
goto tx_err;
}
+ ret = vti6_xmit(skb, dev, &fl);
if (ret < 0)
goto tx_err;
@@ -444,24 +511,69 @@ tx_err:
return NETDEV_TX_OK;
}
+static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ __be32 spi;
+ __u32 mark;
+ struct xfrm_state *x;
+ struct ip6_tnl *t;
+ struct ip_esp_hdr *esph;
+ struct ip_auth_hdr *ah;
+ struct ip_comp_hdr *ipch;
+ struct net *net = dev_net(skb->dev);
+ const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+ int protocol = iph->nexthdr;
+
+ t = vti6_tnl_lookup(dev_net(skb->dev), &iph->daddr, &iph->saddr);
+ if (!t)
+ return -1;
+
+ mark = be32_to_cpu(t->parms.o_key);
+
+ switch (protocol) {
+ case IPPROTO_ESP:
+ esph = (struct ip_esp_hdr *)(skb->data + offset);
+ spi = esph->spi;
+ break;
+ case IPPROTO_AH:
+ ah = (struct ip_auth_hdr *)(skb->data + offset);
+ spi = ah->spi;
+ break;
+ case IPPROTO_COMP:
+ ipch = (struct ip_comp_hdr *)(skb->data + offset);
+ spi = htonl(ntohs(ipch->cpi));
+ break;
+ default:
+ return 0;
+ }
+
+ if (type != ICMPV6_PKT_TOOBIG &&
+ type != NDISC_REDIRECT)
+ return 0;
+
+ x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr,
+ spi, protocol, AF_INET6);
+ if (!x)
+ return 0;
+
+ if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
+ else
+ ip6_update_pmtu(skb, net, info, 0, 0);
+ xfrm_state_put(x);
+
+ return 0;
+}
+
static void vti6_link_config(struct ip6_tnl *t)
{
- struct dst_entry *dst;
struct net_device *dev = t->dev;
struct __ip6_tnl_parm *p = &t->parms;
- struct flowi6 *fl6 = &t->fl.u.ip6;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
- /* Set up flowi template */
- fl6->saddr = p->laddr;
- fl6->daddr = p->raddr;
- fl6->flowi6_oif = p->link;
- fl6->flowi6_mark = be32_to_cpu(p->i_key);
- fl6->flowi6_proto = p->proto;
- fl6->flowlabel = 0;
-
p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV |
IP6_TNL_F_CAP_PER_PACKET);
p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
@@ -472,28 +584,6 @@ static void vti6_link_config(struct ip6_tnl *t)
dev->flags &= ~IFF_POINTOPOINT;
dev->iflink = p->link;
-
- if (p->flags & IP6_TNL_F_CAP_XMIT) {
-
- dst = ip6_route_output(dev_net(dev), NULL, fl6);
- if (dst->error)
- return;
-
- dst = xfrm_lookup(dev_net(dev), dst, flowi6_to_flowi(fl6),
- NULL, 0);
- if (IS_ERR(dst))
- return;
-
- if (dst->dev) {
- dev->hard_header_len = dst->dev->hard_header_len;
-
- dev->mtu = dst_mtu(dst);
-
- if (dev->mtu < IPV6_MIN_MTU)
- dev->mtu = IPV6_MIN_MTU;
- }
- dst_release(dst);
- }
}
/**
@@ -709,18 +799,14 @@ static const struct net_device_ops vti6_netdev_ops = {
**/
static void vti6_dev_setup(struct net_device *dev)
{
- struct ip6_tnl *t;
-
dev->netdev_ops = &vti6_netdev_ops;
dev->destructor = vti6_dev_free;
dev->type = ARPHRD_TUNNEL6;
dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
dev->mtu = ETH_DATA_LEN;
- t = netdev_priv(dev);
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
}
@@ -731,18 +817,12 @@ static void vti6_dev_setup(struct net_device *dev)
static inline int vti6_dev_init_gen(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- int i;
t->dev = dev;
t->net = dev_net(dev);
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *stats;
- stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&stats->syncp);
- }
return 0;
}
@@ -914,11 +994,6 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = {
.fill_info = vti6_fill_info,
};
-static struct xfrm_tunnel_notifier vti6_handler __read_mostly = {
- .handler = vti6_rcv,
- .priority = 1,
-};
-
static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
{
int h;
@@ -949,7 +1024,7 @@ static int __net_init vti6_init_net(struct net *net)
err = -ENOMEM;
ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6_vti0",
- vti6_dev_setup);
+ NET_NAME_UNKNOWN, vti6_dev_setup);
if (!ip6n->fb_tnl_dev)
goto err_alloc_dev;
@@ -990,6 +1065,27 @@ static struct pernet_operations vti6_net_ops = {
.size = sizeof(struct vti6_net),
};
+static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
+static struct xfrm6_protocol vti_ah6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
+static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
/**
* vti6_tunnel_init - register protocol and reserve needed resources
*
@@ -997,17 +1093,26 @@ static struct pernet_operations vti6_net_ops = {
**/
static int __init vti6_tunnel_init(void)
{
- int err;
+ const char *msg;
+ int err;
+ msg = "tunnel device";
err = register_pernet_device(&vti6_net_ops);
if (err < 0)
- goto out_pernet;
+ goto pernet_dev_failed;
- err = xfrm6_mode_tunnel_input_register(&vti6_handler);
- if (err < 0) {
- pr_err("%s: can't register vti6\n", __func__);
- goto out;
- }
+ msg = "tunnel protocols";
+ err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP);
+ if (err < 0)
+ goto xfrm_proto_esp_failed;
+ err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH);
+ if (err < 0)
+ goto xfrm_proto_ah_failed;
+ err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP);
+ if (err < 0)
+ goto xfrm_proto_comp_failed;
+
+ msg = "netlink interface";
err = rtnl_link_register(&vti6_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -1015,10 +1120,15 @@ static int __init vti6_tunnel_init(void)
return 0;
rtnl_link_failed:
- xfrm6_mode_tunnel_input_deregister(&vti6_handler);
-out:
+ xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP);
+xfrm_proto_comp_failed:
+ xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+xfrm_proto_ah_failed:
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+xfrm_proto_esp_failed:
unregister_pernet_device(&vti6_net_ops);
-out_pernet:
+pernet_dev_failed:
+ pr_err("vti6 init: failed to register %s\n", msg);
return err;
}
@@ -1028,9 +1138,9 @@ out_pernet:
static void __exit vti6_tunnel_cleanup(void)
{
rtnl_link_unregister(&vti6_link_ops);
- if (xfrm6_mode_tunnel_input_deregister(&vti6_handler))
- pr_info("%s: can't deregister vti6\n", __func__);
-
+ xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP);
+ xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti6_net_ops);
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 0eb4038a4d63..f9a3fd320d1d 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -700,7 +700,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
struct mr6_table *mrt;
struct flowi6 fl6 = {
.flowi6_oif = dev->ifindex,
- .flowi6_iif = skb->skb_iif,
+ .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi6_mark = skb->mark,
};
int err;
@@ -744,7 +744,7 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
else
sprintf(name, "pim6reg%u", mrt->id);
- dev = alloc_netdev(0, name, reg_vif_setup);
+ dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
if (dev == NULL)
return NULL;
@@ -1633,7 +1633,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
{
struct mr6_table *mrt;
struct flowi6 fl6 = {
- .flowi6_iif = skb->skb_iif,
+ .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi6_oif = skb->dev->ifindex,
.flowi6_mark = skb->mark,
};
@@ -2349,13 +2349,14 @@ int ip6mr_get_route(struct net *net,
}
static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- u32 portid, u32 seq, struct mfc6_cache *c, int cmd)
+ u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
+ int flags)
{
struct nlmsghdr *nlh;
struct rtmsg *rtm;
int err;
- nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI);
+ nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -2423,7 +2424,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
if (skb == NULL)
goto errout;
- err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd);
+ err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
if (err < 0)
goto errout;
@@ -2462,7 +2463,8 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
if (ip6mr_fill_mroute(mrt, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE) < 0)
+ mfc, RTM_NEWROUTE,
+ NLM_F_MULTI) < 0)
goto done;
next_entry:
e++;
@@ -2476,7 +2478,8 @@ next_entry:
if (ip6mr_fill_mroute(mrt, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE) < 0) {
+ mfc, RTM_NEWROUTE,
+ NLM_F_MULTI) < 0) {
spin_unlock_bh(&mfc_unres_lock);
goto done;
}
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index da9becb42e81..d1c793cffcb5 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -53,7 +53,7 @@
#include <linux/icmpv6.h>
#include <linux/mutex.h>
-static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
@@ -65,19 +65,21 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
spi = htonl(ntohs(ipcomph->cpi));
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
spi, IPPROTO_COMP, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
@@ -174,6 +176,11 @@ out:
return err;
}
+static int ipcomp6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type ipcomp6_type =
{
.description = "IPCOMP6",
@@ -186,11 +193,12 @@ static const struct xfrm_type ipcomp6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol ipcomp6_protocol =
+static struct xfrm6_protocol ipcomp6_protocol =
{
.handler = xfrm6_rcv,
+ .cb_handler = ipcomp6_rcv_cb,
.err_handler = ipcomp6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init ipcomp6_init(void)
@@ -199,7 +207,7 @@ static int __init ipcomp6_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
+ if (xfrm6_protocol_register(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ipcomp6_type, AF_INET6);
return -EAGAIN;
@@ -209,7 +217,7 @@ static int __init ipcomp6_init(void)
static void __exit ipcomp6_fini(void)
{
- if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0)
+ if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 0a00f449de5e..0c289982796d 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -235,7 +235,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optlen < sizeof(int) ||
inet_sk(sk)->inet_num)
goto e_inval;
- np->ipv6only = valbool;
+ sk->sk_ipv6only = valbool;
retv = 0;
break;
@@ -722,7 +722,7 @@ done:
case IPV6_MTU_DISCOVER:
if (optlen < sizeof(int))
goto e_inval;
- if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_INTERFACE)
+ if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
goto e_inval;
np->pmtudisc = val;
retv = 0;
@@ -834,6 +834,10 @@ pref_skip_coa:
np->dontfrag = valbool;
retv = 0;
break;
+ case IPV6_AUTOFLOWLABEL:
+ np->autoflowlabel = valbool;
+ retv = 0;
+ break;
}
release_sock(sk);
@@ -1058,7 +1062,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
case IPV6_V6ONLY:
- val = np->ipv6only;
+ val = sk->sk_ipv6only;
break;
case IPV6_RECVPKTINFO:
@@ -1158,7 +1162,6 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
return 0;
- break;
}
case IPV6_TRANSPARENT:
@@ -1273,6 +1276,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->dontfrag;
break;
+ case IPV6_AUTOFLOWLABEL:
+ val = np->autoflowlabel;
+ break;
+
default:
return -ENOPROTOOPT;
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index e1e47350784b..a23b655a7627 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -172,6 +172,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
mc_lst->next = NULL;
mc_lst->addr = *addr;
+ rtnl_lock();
rcu_read_lock();
if (ifindex == 0) {
struct rt6_info *rt;
@@ -185,6 +186,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (dev == NULL) {
rcu_read_unlock();
+ rtnl_unlock();
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
return -ENODEV;
}
@@ -202,6 +204,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (err) {
rcu_read_unlock();
+ rtnl_unlock();
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
return err;
}
@@ -212,6 +215,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
spin_unlock(&ipv6_sk_mc_lock);
rcu_read_unlock();
+ rtnl_unlock();
return 0;
}
@@ -229,6 +233,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (!ipv6_addr_is_multicast(addr))
return -EINVAL;
+ rtnl_lock();
spin_lock(&ipv6_sk_mc_lock);
for (lnk = &np->ipv6_mc_list;
(mc_lst = rcu_dereference_protected(*lnk,
@@ -252,12 +257,15 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
} else
(void) ip6_mc_leave_src(sk, mc_lst, NULL);
rcu_read_unlock();
+ rtnl_unlock();
+
atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
kfree_rcu(mc_lst, rcu);
return 0;
}
}
spin_unlock(&ipv6_sk_mc_lock);
+ rtnl_unlock();
return -EADDRNOTAVAIL;
}
@@ -302,6 +310,7 @@ void ipv6_sock_mc_close(struct sock *sk)
if (!rcu_access_pointer(np->ipv6_mc_list))
return;
+ rtnl_lock();
spin_lock(&ipv6_sk_mc_lock);
while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list,
lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) {
@@ -328,6 +337,7 @@ void ipv6_sock_mc_close(struct sock *sk)
spin_lock(&ipv6_sk_mc_lock);
}
spin_unlock(&ipv6_sk_mc_lock);
+ rtnl_unlock();
}
int ip6_mc_source(int add, int omode, struct sock *sk,
@@ -845,6 +855,8 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
struct ifmcaddr6 *mc;
struct inet6_dev *idev;
+ ASSERT_RTNL();
+
/* we need to take a reference on idev */
idev = in6_dev_get(dev);
@@ -916,6 +928,8 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct ifmcaddr6 *ma, **map;
+ ASSERT_RTNL();
+
write_lock_bh(&idev->lock);
for (map = &idev->mc_list; (ma=*map) != NULL; map = &ma->next) {
if (ipv6_addr_equal(&ma->mca_addr, addr)) {
@@ -1301,8 +1315,17 @@ int igmp6_event_query(struct sk_buff *skb)
len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
len -= skb_network_header_len(skb);
- /* Drop queries with not link local source */
- if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
+ /* RFC3810 6.2
+ * Upon reception of an MLD message that contains a Query, the node
+ * checks if the source address of the message is a valid link-local
+ * address, if the Hop Limit is set to 1, and if the Router Alert
+ * option is present in the Hop-By-Hop Options header of the IPv6
+ * packet. If any of these checks fails, the packet is dropped.
+ */
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL) ||
+ ipv6_hdr(skb)->hop_limit != 1 ||
+ !(IP6CB(skb)->flags & IP6SKB_ROUTERALERT) ||
+ IP6CB(skb)->ra != htons(IPV6_OPT_ROUTERALERT_MLD))
return -EINVAL;
idev = __in6_dev_get(skb->dev);
@@ -1620,11 +1643,12 @@ static void mld_sendpack(struct sk_buff *skb)
dst_output);
out:
if (!err) {
- ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT);
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
- IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
- } else
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
+ } else {
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ }
rcu_read_unlock();
return;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 09a22f4f36c9..339078f95d1b 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -851,7 +851,7 @@ out:
static void ndisc_recv_na(struct sk_buff *skb)
{
struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
- const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+ struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
u8 *lladdr = NULL;
u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
@@ -944,10 +944,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
/*
* Change: router to host
*/
- struct rt6_info *rt;
- rt = rt6_get_dflt_router(saddr, dev);
- if (rt)
- ip6_del_rt(rt);
+ rt6_clean_tohost(dev_net(dev), saddr);
}
out:
@@ -1073,6 +1070,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) -
sizeof(struct ra_msg);
+ ND_PRINTK(2, info,
+ "RA: %s, dev: %s\n",
+ __func__, skb->dev->name);
if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
ND_PRINTK(2, warn, "RA: source address is not link-local\n");
return;
@@ -1105,13 +1105,21 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
- if (!ipv6_accept_ra(in6_dev))
+ if (!ipv6_accept_ra(in6_dev)) {
+ ND_PRINTK(2, info,
+ "RA: %s, did not accept ra for dev: %s\n",
+ __func__, skb->dev->name);
goto skip_linkparms;
+ }
#ifdef CONFIG_IPV6_NDISC_NODETYPE
/* skip link-specific parameters from interior routers */
- if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
+ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
+ ND_PRINTK(2, info,
+ "RA: %s, nodetype is NODEFAULT, dev: %s\n",
+ __func__, skb->dev->name);
goto skip_linkparms;
+ }
#endif
if (in6_dev->if_flags & IF_RS_SENT) {
@@ -1133,11 +1141,24 @@ static void ndisc_router_discovery(struct sk_buff *skb)
(ra_msg->icmph.icmp6_addrconf_other ?
IF_RA_OTHERCONF : 0);
- if (!in6_dev->cnf.accept_ra_defrtr)
+ if (!in6_dev->cnf.accept_ra_defrtr) {
+ ND_PRINTK(2, info,
+ "RA: %s, defrtr is false for dev: %s\n",
+ __func__, skb->dev->name);
goto skip_defrtr;
+ }
- if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
+ /* Do not accept RA with source-addr found on local machine unless
+ * accept_ra_from_local is set to true.
+ */
+ if (!in6_dev->cnf.accept_ra_from_local &&
+ ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
+ NULL, 0)) {
+ ND_PRINTK(2, info,
+ "RA from local address detected on dev: %s: default router ignored\n",
+ skb->dev->name);
goto skip_defrtr;
+ }
lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
@@ -1166,8 +1187,10 @@ static void ndisc_router_discovery(struct sk_buff *skb)
rt = NULL;
}
+ ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, for dev: %s\n",
+ rt, lifetime, skb->dev->name);
if (rt == NULL && lifetime) {
- ND_PRINTK(3, dbg, "RA: adding default router\n");
+ ND_PRINTK(3, info, "RA: adding default router\n");
rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
if (rt == NULL) {
@@ -1263,12 +1286,22 @@ skip_linkparms:
NEIGH_UPDATE_F_ISROUTER);
}
- if (!ipv6_accept_ra(in6_dev))
+ if (!ipv6_accept_ra(in6_dev)) {
+ ND_PRINTK(2, info,
+ "RA: %s, accept_ra is false for dev: %s\n",
+ __func__, skb->dev->name);
goto out;
+ }
#ifdef CONFIG_IPV6_ROUTE_INFO
- if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
+ if (!in6_dev->cnf.accept_ra_from_local &&
+ ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
+ NULL, 0)) {
+ ND_PRINTK(2, info,
+ "RA from local address detected on dev: %s: router info ignored.\n",
+ skb->dev->name);
goto skip_routeinfo;
+ }
if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
struct nd_opt_hdr *p;
@@ -1296,8 +1329,12 @@ skip_routeinfo:
#ifdef CONFIG_IPV6_NDISC_NODETYPE
/* skip link-specific ndopts from interior routers */
- if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
+ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
+ ND_PRINTK(2, info,
+ "RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n",
+ __func__, skb->dev->name);
goto out;
+ }
#endif
if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
@@ -1731,7 +1768,7 @@ int __init ndisc_init(void)
#ifdef CONFIG_SYSCTL
err = neigh_sysctl_register(NULL, &nd_tbl.parms,
- &ndisc_ifinfo_sysctl_change);
+ ndisc_ifinfo_sysctl_change);
if (err)
goto out_unregister_pernet;
out:
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 95f3f1da0d7f..d38e6a8d8b9f 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -30,13 +30,15 @@ int ip6_route_me_harder(struct sk_buff *skb)
.daddr = iph->daddr,
.saddr = iph->saddr,
};
+ int err;
dst = ip6_route_output(net, skb->sk, &fl6);
- if (dst->error) {
+ err = dst->error;
+ if (err) {
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
dst_release(dst);
- return dst->error;
+ return err;
}
/* Drop old route. */
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 4bff1f297e39..2812816aabdc 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -55,6 +55,21 @@ config NFT_REJECT_IPV6
default NFT_REJECT
tristate
+config NF_LOG_IPV6
+ tristate "IPv6 packet logging"
+ default m if NETFILTER_ADVANCED=n
+ select NF_LOG_COMMON
+
+config NF_NAT_IPV6
+ tristate "IPv6 NAT"
+ depends on NF_CONNTRACK_IPV6
+ depends on NETFILTER_ADVANCED
+ select NF_NAT
+ help
+ The IPv6 NAT option allows masquerading, port forwarding and other
+ forms of full Network Address Port Translation. This can be
+ controlled by iptables or nft.
+
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
depends on INET && IPV6
@@ -227,19 +242,21 @@ config IP6_NF_SECURITY
If unsure, say N.
-config NF_NAT_IPV6
- tristate "IPv6 NAT"
+config IP6_NF_NAT
+ tristate "ip6tables NAT support"
depends on NF_CONNTRACK_IPV6
depends on NETFILTER_ADVANCED
select NF_NAT
+ select NF_NAT_IPV6
+ select NETFILTER_XT_NAT
help
- The IPv6 NAT option allows masquerading, port forwarding and other
- forms of full Network Address Port Translation. It is controlled by
- the `nat' table in ip6tables, see the man page for ip6tables(8).
+ This enables the `nat' table in ip6tables. This allows masquerading,
+ port forwarding and other forms of full Network Address Port
+ Translation.
To compile it as a module, choose M here. If unsure, say N.
-if NF_NAT_IPV6
+if IP6_NF_NAT
config IP6_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
@@ -260,7 +277,7 @@ config IP6_NF_TARGET_NPT
To compile it as a module, choose M here. If unsure, say N.
-endif # NF_NAT_IPV6
+endif # IP6_NF_NAT
endif # IP6_NF_IPTABLES
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 70d3dd66f2cd..c3d3286db4bb 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
-obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o
+obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o
# objects for l3 independent conntrack
nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
@@ -23,6 +23,9 @@ obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
+# logging
+obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o
+
# nf_tables
obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 710238f58aa9..e080fbbbc0e5 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1241,8 +1241,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
xt_free_table_info(oldinfo);
if (copy_to_user(counters_ptr, counters,
- sizeof(struct xt_counters) * num_counters) != 0)
- ret = -EFAULT;
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 54bd9790603f..8b147440fbdc 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -94,7 +94,6 @@ ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par)
break;
default:
return false;
- break;
}
nexthdr = hp->nexthdr;
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index e0983f3648a6..790e0c6b19e1 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -33,6 +33,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
struct ipv6hdr *iph = ipv6_hdr(skb);
bool ret = false;
struct flowi6 fl6 = {
+ .flowi6_iif = LOOPBACK_IFINDEX,
.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
.flowi6_proto = iph->nexthdr,
.daddr = iph->saddr,
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 84c7f33d0cf8..387d8b8fc18d 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -90,17 +90,9 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
if (nf_ct_is_untracked(ct))
return NF_ACCEPT;
- nat = nfct_nat(ct);
- if (!nat) {
- /* NAT module was loaded late. */
- if (nf_ct_is_confirmed(ct))
- return NF_ACCEPT;
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
- if (nat == NULL) {
- pr_debug("failed to add NAT extension\n");
- return NF_ACCEPT;
- }
- }
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
switch (ctinfo) {
case IP_CT_RELATED:
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 767ab8da8218..6f187c8d8a1b 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -50,6 +50,7 @@
#include <linux/module.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+static const char nf_frags_cache_name[] = "nf-frags";
struct nf_ct_frag6_skb_cb
{
@@ -63,6 +64,8 @@ struct nf_ct_frag6_skb_cb
static struct inet_frags nf_frags;
#ifdef CONFIG_SYSCTL
+static int zero;
+
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_timeout",
@@ -76,14 +79,17 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
.data = &init_net.nf_frag.frags.low_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &init_net.nf_frag.frags.high_thresh
},
{
.procname = "nf_conntrack_frag6_high_thresh",
.data = &init_net.nf_frag.frags.high_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &init_net.nf_frag.frags.low_thresh
},
{ }
};
@@ -102,7 +108,10 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
table[0].data = &net->nf_frag.frags.timeout;
table[1].data = &net->nf_frag.frags.low_thresh;
+ table[1].extra2 = &net->nf_frag.frags.high_thresh;
table[2].data = &net->nf_frag.frags.high_thresh;
+ table[2].extra1 = &net->nf_frag.frags.low_thresh;
+ table[2].extra2 = &init_net.nf_frag.frags.high_thresh;
}
hdr = register_net_sysctl(net, "net/netfilter", table);
@@ -147,16 +156,13 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
const struct in6_addr *daddr)
{
- u32 c;
-
net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
- c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, nf_frags.rnd);
- return c & (INETFRAGS_HASHSZ - 1);
+ return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+ (__force u32)id, nf_frags.rnd);
}
-static unsigned int nf_hashfn(struct inet_frag_queue *q)
+static unsigned int nf_hashfn(const struct inet_frag_queue *q)
{
const struct frag_queue *nq;
@@ -196,7 +202,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
arg.dst = dst;
arg.ecn = ecn;
- read_lock_bh(&nf_frags.lock);
+ local_bh_disable();
hash = nf_hash_frag(id, src, dst);
q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
@@ -217,7 +223,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
int offset, end;
u8 ecn;
- if (fq->q.last_in & INET_FRAG_COMPLETE) {
+ if (fq->q.flags & INET_FRAG_COMPLETE) {
pr_debug("Already completed\n");
goto err;
}
@@ -248,11 +254,11 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* or have different end, the segment is corrupted.
*/
if (end < fq->q.len ||
- ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) {
+ ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) {
pr_debug("already received last fragment\n");
goto err;
}
- fq->q.last_in |= INET_FRAG_LAST_IN;
+ fq->q.flags |= INET_FRAG_LAST_IN;
fq->q.len = end;
} else {
/* Check if the fragment is rounded to 8 bytes.
@@ -267,7 +273,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
}
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
- if (fq->q.last_in & INET_FRAG_LAST_IN) {
+ if (fq->q.flags & INET_FRAG_LAST_IN) {
pr_debug("last packet already reached.\n");
goto err;
}
@@ -349,10 +355,9 @@ found:
*/
if (offset == 0) {
fq->nhoffset = nhoff;
- fq->q.last_in |= INET_FRAG_FIRST_IN;
+ fq->q.flags |= INET_FRAG_FIRST_IN;
}
- inet_frag_lru_move(&fq->q);
return 0;
discard_fq:
@@ -451,7 +456,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
}
sub_frag_mem_limit(&fq->q, head->truesize);
- head->local_df = 1;
+ head->ignore_df = 1;
head->next = NULL;
head->dev = dev;
head->tstamp = fq->q.stamp;
@@ -597,10 +602,6 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(clone);
fhdr = (struct frag_hdr *)skb_transport_header(clone);
- local_bh_disable();
- inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
- local_bh_enable();
-
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
ip6_frag_ecn(hdr));
if (fq == NULL) {
@@ -617,7 +618,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
goto ret_orig;
}
- if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
ret_skb = nf_ct_frag6_reasm(fq, dev);
if (ret_skb == NULL)
@@ -677,13 +678,15 @@ int nf_ct_frag6_init(void)
nf_frags.qsize = sizeof(struct frag_queue);
nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
- nf_frags.secret_interval = 10 * 60 * HZ;
- inet_frags_init(&nf_frags);
-
+ nf_frags.frags_cache_name = nf_frags_cache_name;
+ ret = inet_frags_init(&nf_frags);
+ if (ret)
+ goto out;
ret = register_pernet_subsys(&nf_ct_net_ops);
if (ret)
inet_frags_fini(&nf_frags);
+out:
return ret;
}
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
new file mode 100644
index 000000000000..7b17a0be93e7
--- /dev/null
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -0,0 +1,417 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/xt_LOG.h>
+#include <net/netfilter/nf_log.h>
+
+static struct nf_loginfo default_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = 5,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+/* One level of recursion won't kill us */
+static void dump_ipv6_packet(struct nf_log_buf *m,
+ const struct nf_loginfo *info,
+ const struct sk_buff *skb, unsigned int ip6hoff,
+ int recurse)
+{
+ u_int8_t currenthdr;
+ int fragment;
+ struct ipv6hdr _ip6h;
+ const struct ipv6hdr *ih;
+ unsigned int ptr;
+ unsigned int hdrlen = 0;
+ unsigned int logflags;
+
+ if (info->type == NF_LOG_TYPE_LOG)
+ logflags = info->u.log.logflags;
+ else
+ logflags = NF_LOG_MASK;
+
+ ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
+ if (ih == NULL) {
+ nf_log_buf_add(m, "TRUNCATED");
+ return;
+ }
+
+ /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
+ nf_log_buf_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr);
+
+ /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
+ nf_log_buf_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
+ ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
+ (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20,
+ ih->hop_limit,
+ (ntohl(*(__be32 *)ih) & 0x000fffff));
+
+ fragment = 0;
+ ptr = ip6hoff + sizeof(struct ipv6hdr);
+ currenthdr = ih->nexthdr;
+ while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) {
+ struct ipv6_opt_hdr _hdr;
+ const struct ipv6_opt_hdr *hp;
+
+ hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
+ if (hp == NULL) {
+ nf_log_buf_add(m, "TRUNCATED");
+ return;
+ }
+
+ /* Max length: 48 "OPT (...) " */
+ if (logflags & XT_LOG_IPOPT)
+ nf_log_buf_add(m, "OPT ( ");
+
+ switch (currenthdr) {
+ case IPPROTO_FRAGMENT: {
+ struct frag_hdr _fhdr;
+ const struct frag_hdr *fh;
+
+ nf_log_buf_add(m, "FRAG:");
+ fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
+ &_fhdr);
+ if (fh == NULL) {
+ nf_log_buf_add(m, "TRUNCATED ");
+ return;
+ }
+
+ /* Max length: 6 "65535 " */
+ nf_log_buf_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8);
+
+ /* Max length: 11 "INCOMPLETE " */
+ if (fh->frag_off & htons(0x0001))
+ nf_log_buf_add(m, "INCOMPLETE ");
+
+ nf_log_buf_add(m, "ID:%08x ",
+ ntohl(fh->identification));
+
+ if (ntohs(fh->frag_off) & 0xFFF8)
+ fragment = 1;
+
+ hdrlen = 8;
+
+ break;
+ }
+ case IPPROTO_DSTOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_HOPOPTS:
+ if (fragment) {
+ if (logflags & XT_LOG_IPOPT)
+ nf_log_buf_add(m, ")");
+ return;
+ }
+ hdrlen = ipv6_optlen(hp);
+ break;
+ /* Max Length */
+ case IPPROTO_AH:
+ if (logflags & XT_LOG_IPOPT) {
+ struct ip_auth_hdr _ahdr;
+ const struct ip_auth_hdr *ah;
+
+ /* Max length: 3 "AH " */
+ nf_log_buf_add(m, "AH ");
+
+ if (fragment) {
+ nf_log_buf_add(m, ")");
+ return;
+ }
+
+ ah = skb_header_pointer(skb, ptr, sizeof(_ahdr),
+ &_ahdr);
+ if (ah == NULL) {
+ /*
+ * Max length: 26 "INCOMPLETE [65535
+ * bytes] )"
+ */
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] )",
+ skb->len - ptr);
+ return;
+ }
+
+ /* Length: 15 "SPI=0xF1234567 */
+ nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi));
+
+ }
+
+ hdrlen = (hp->hdrlen+2)<<2;
+ break;
+ case IPPROTO_ESP:
+ if (logflags & XT_LOG_IPOPT) {
+ struct ip_esp_hdr _esph;
+ const struct ip_esp_hdr *eh;
+
+ /* Max length: 4 "ESP " */
+ nf_log_buf_add(m, "ESP ");
+
+ if (fragment) {
+ nf_log_buf_add(m, ")");
+ return;
+ }
+
+ /*
+ * Max length: 26 "INCOMPLETE [65535 bytes] )"
+ */
+ eh = skb_header_pointer(skb, ptr, sizeof(_esph),
+ &_esph);
+ if (eh == NULL) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] )",
+ skb->len - ptr);
+ return;
+ }
+
+ /* Length: 16 "SPI=0xF1234567 )" */
+ nf_log_buf_add(m, "SPI=0x%x )",
+ ntohl(eh->spi));
+ }
+ return;
+ default:
+ /* Max length: 20 "Unknown Ext Hdr 255" */
+ nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr);
+ return;
+ }
+ if (logflags & XT_LOG_IPOPT)
+ nf_log_buf_add(m, ") ");
+
+ currenthdr = hp->nexthdr;
+ ptr += hdrlen;
+ }
+
+ switch (currenthdr) {
+ case IPPROTO_TCP:
+ if (nf_log_dump_tcp_header(m, skb, currenthdr, fragment,
+ ptr, logflags))
+ return;
+ break;
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ if (nf_log_dump_udp_header(m, skb, currenthdr, fragment, ptr))
+ return;
+ break;
+ case IPPROTO_ICMPV6: {
+ struct icmp6hdr _icmp6h;
+ const struct icmp6hdr *ic;
+
+ /* Max length: 13 "PROTO=ICMPv6 " */
+ nf_log_buf_add(m, "PROTO=ICMPv6 ");
+
+ if (fragment)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
+ if (ic == NULL) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+ skb->len - ptr);
+ return;
+ }
+
+ /* Max length: 18 "TYPE=255 CODE=255 " */
+ nf_log_buf_add(m, "TYPE=%u CODE=%u ",
+ ic->icmp6_type, ic->icmp6_code);
+
+ switch (ic->icmp6_type) {
+ case ICMPV6_ECHO_REQUEST:
+ case ICMPV6_ECHO_REPLY:
+ /* Max length: 19 "ID=65535 SEQ=65535 " */
+ nf_log_buf_add(m, "ID=%u SEQ=%u ",
+ ntohs(ic->icmp6_identifier),
+ ntohs(ic->icmp6_sequence));
+ break;
+ case ICMPV6_MGM_QUERY:
+ case ICMPV6_MGM_REPORT:
+ case ICMPV6_MGM_REDUCTION:
+ break;
+
+ case ICMPV6_PARAMPROB:
+ /* Max length: 17 "POINTER=ffffffff " */
+ nf_log_buf_add(m, "POINTER=%08x ",
+ ntohl(ic->icmp6_pointer));
+ /* Fall through */
+ case ICMPV6_DEST_UNREACH:
+ case ICMPV6_PKT_TOOBIG:
+ case ICMPV6_TIME_EXCEED:
+ /* Max length: 3+maxlen */
+ if (recurse) {
+ nf_log_buf_add(m, "[");
+ dump_ipv6_packet(m, info, skb,
+ ptr + sizeof(_icmp6h), 0);
+ nf_log_buf_add(m, "] ");
+ }
+
+ /* Max length: 10 "MTU=65535 " */
+ if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) {
+ nf_log_buf_add(m, "MTU=%u ",
+ ntohl(ic->icmp6_mtu));
+ }
+ }
+ break;
+ }
+ /* Max length: 10 "PROTO=255 " */
+ default:
+ nf_log_buf_add(m, "PROTO=%u ", currenthdr);
+ }
+
+ /* Max length: 15 "UID=4294967295 " */
+ if ((logflags & XT_LOG_UID) && recurse)
+ nf_log_dump_sk_uid_gid(m, skb->sk);
+
+ /* Max length: 16 "MARK=0xFFFFFFFF " */
+ if (recurse && skb->mark)
+ nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
+}
+
+static void dump_ipv6_mac_header(struct nf_log_buf *m,
+ const struct nf_loginfo *info,
+ const struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ unsigned int logflags = 0;
+
+ if (info->type == NF_LOG_TYPE_LOG)
+ logflags = info->u.log.logflags;
+
+ if (!(logflags & XT_LOG_MACDECODE))
+ goto fallback;
+
+ switch (dev->type) {
+ case ARPHRD_ETHER:
+ nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
+ eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+ ntohs(eth_hdr(skb)->h_proto));
+ return;
+ default:
+ break;
+ }
+
+fallback:
+ nf_log_buf_add(m, "MAC=");
+ if (dev->hard_header_len &&
+ skb->mac_header != skb->network_header) {
+ const unsigned char *p = skb_mac_header(skb);
+ unsigned int len = dev->hard_header_len;
+ unsigned int i;
+
+ if (dev->type == ARPHRD_SIT) {
+ p -= ETH_HLEN;
+
+ if (p < skb->head)
+ p = NULL;
+ }
+
+ if (p != NULL) {
+ nf_log_buf_add(m, "%02x", *p++);
+ for (i = 1; i < len; i++)
+ nf_log_buf_add(m, ":%02x", *p++);
+ }
+ nf_log_buf_add(m, " ");
+
+ if (dev->type == ARPHRD_SIT) {
+ const struct iphdr *iph =
+ (struct iphdr *)skb_mac_header(skb);
+ nf_log_buf_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr,
+ &iph->daddr);
+ }
+ } else {
+ nf_log_buf_add(m, " ");
+ }
+}
+
+static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
+ unsigned int hooknum, const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *prefix)
+{
+ struct nf_log_buf *m;
+
+ /* FIXME: Disabled from containers until syslog ns is supported */
+ if (!net_eq(net, &init_net))
+ return;
+
+ m = nf_log_buf_open();
+
+ if (!loginfo)
+ loginfo = &default_loginfo;
+
+ nf_log_dump_packet_common(m, pf, hooknum, skb, in, out,
+ loginfo, prefix);
+
+ if (in != NULL)
+ dump_ipv6_mac_header(m, loginfo, skb);
+
+ dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1);
+
+ nf_log_buf_close(m);
+}
+
+static struct nf_logger nf_ip6_logger __read_mostly = {
+ .name = "nf_log_ipv6",
+ .type = NF_LOG_TYPE_LOG,
+ .logfn = nf_log_ip6_packet,
+ .me = THIS_MODULE,
+};
+
+static int __net_init nf_log_ipv6_net_init(struct net *net)
+{
+ nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger);
+ return 0;
+}
+
+static void __net_exit nf_log_ipv6_net_exit(struct net *net)
+{
+ nf_log_unset(net, &nf_ip6_logger);
+}
+
+static struct pernet_operations nf_log_ipv6_net_ops = {
+ .init = nf_log_ipv6_net_init,
+ .exit = nf_log_ipv6_net_exit,
+};
+
+static int __init nf_log_ipv6_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&nf_log_ipv6_net_ops);
+ if (ret < 0)
+ return ret;
+
+ nf_log_register(NFPROTO_IPV6, &nf_ip6_logger);
+ return 0;
+}
+
+static void __exit nf_log_ipv6_exit(void)
+{
+ unregister_pernet_subsys(&nf_log_ipv6_net_ops);
+ nf_log_unregister(&nf_ip6_logger);
+}
+
+module_init(nf_log_ipv6_init);
+module_exit(nf_log_ipv6_exit);
+
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter IPv4 packet logging");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NF_LOGGER(AF_INET6, 0);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index abfe75a2e316..fc8e49b2ff3e 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -158,6 +158,7 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
htons(oldlen), htons(datalen), 1);
}
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
struct nf_nat_range *range)
{
@@ -175,6 +176,7 @@ static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
return 0;
}
+#endif
static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
.l3proto = NFPROTO_IPV6,
@@ -183,7 +185,9 @@ static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
.manip_pkt = nf_nat_ipv6_manip_pkt,
.csum_update = nf_nat_ipv6_csum_update,
.csum_recalc = nf_nat_ipv6_csum_recalc,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_ipv6_nlattr_to_range,
+#endif
#ifdef CONFIG_XFRM
.decode_session = nf_nat_ipv6_decode_session,
#endif
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 9c3297a768fd..d189fcb437fe 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -47,15 +47,9 @@ static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
if (ct == NULL || nf_ct_is_untracked(ct))
return NF_ACCEPT;
- nat = nfct_nat(ct);
- if (nat == NULL) {
- /* Conntrack module was loaded late, can't add extension. */
- if (nf_ct_is_confirmed(ct))
- return NF_ACCEPT;
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
- if (nat == NULL)
- return NF_ACCEPT;
- }
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
switch (ctinfo) {
case IP_CT_RELATED:
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 827f795209cf..5ec867e4a8b7 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -6,35 +6,7 @@
#include <net/ipv6.h>
#include <net/ip6_fib.h>
#include <net/addrconf.h>
-
-void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
-{
- static atomic_t ipv6_fragmentation_id;
- int old, new;
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (rt && !(rt->dst.flags & DST_NOPEER)) {
- struct inet_peer *peer;
- struct net *net;
-
- net = dev_net(rt->dst.dev);
- peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
- if (peer) {
- fhdr->identification = htonl(inet_getid(peer, 0));
- inet_putpeer(peer);
- return;
- }
- }
-#endif
- do {
- old = atomic_read(&ipv6_fragmentation_id);
- new = old + 1;
- if (!new)
- new = 1;
- } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
- fhdr->identification = htonl(new);
-}
-EXPORT_SYMBOL(ipv6_select_ident);
+#include <net/secure_seq.h>
int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
{
@@ -106,6 +78,7 @@ int __ip6_local_out(struct sk_buff *skb)
if (len > IPV6_MAXPLEN)
len = 0;
ipv6_hdr(skb)->payload_len = htons(len);
+ IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
skb_dst(skb)->dev, dst_output);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 587bbdcb22b4..5b7a1ed2aba9 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -51,7 +51,6 @@ static struct inet_protosw pingv6_protosw = {
.protocol = IPPROTO_ICMPV6,
.prot = &pingv6_prot,
.ops = &inet6_dgram_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
};
@@ -168,12 +167,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
pfh.wcheck = 0;
pfh.family = AF_INET6;
- if (ipv6_addr_is_multicast(&fl6.daddr))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
lock_sock(sk);
err = ip6_append_data(sk, ping_getfrag, &pfh, len,
@@ -182,8 +176,8 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
MSG_DONTWAIT, np->dontfrag);
if (err) {
- ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev,
- ICMP6_MIB_OUTERRORS);
+ ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
+ ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
err = icmpv6_push_pending_frames(sk, &fl6,
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 091d066a57b3..2d6f860e5c1e 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -33,6 +33,7 @@
static int sockstat6_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
+ unsigned int frag_mem = ip6_frag_mem(net);
seq_printf(seq, "TCP6: inuse %d\n",
sock_prot_inuse_get(net, &tcpv6_prot));
@@ -42,8 +43,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplitev6_prot));
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(net, &rawv6_prot));
- seq_printf(seq, "FRAG6: inuse %d memory %d\n",
- ip6_frag_nqueues(net), ip6_frag_mem(net));
+ seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem);
return 0;
}
@@ -186,7 +186,7 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib)
/* can be called either with percpu mib (pcpumib != NULL),
* or shared one (smib != NULL)
*/
-static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib,
+static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib,
atomic_long_t *smib,
const struct snmp_mib *itemlist)
{
@@ -201,7 +201,7 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib,
}
}
-static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib,
+static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
const struct snmp_mib *itemlist, size_t syncpoff)
{
int i;
@@ -215,14 +215,14 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
{
struct net *net = (struct net *)seq->private;
- snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics,
+ snmp6_seq_show_item64(seq, net->mib.ipv6_statistics,
snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
- snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics,
+ snmp6_seq_show_item(seq, net->mib.icmpv6_statistics,
NULL, snmp6_icmp6_list);
snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs);
- snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6,
+ snmp6_seq_show_item(seq, net->mib.udp_stats_in6,
NULL, snmp6_udp6_list);
- snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6,
+ snmp6_seq_show_item(seq, net->mib.udplite_stats_in6,
NULL, snmp6_udplite6_list);
return 0;
}
@@ -245,7 +245,7 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
struct inet6_dev *idev = (struct inet6_dev *)seq->private;
seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
- snmp6_seq_show_item64(seq, (void __percpu **)idev->stats.ipv6,
+ snmp6_seq_show_item64(seq, idev->stats.ipv6,
snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs,
snmp6_icmp6_list);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 1f29996e368a..39d44226e402 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -176,7 +176,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
goto out;
net = dev_net(skb->dev);
- sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
+ sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, inet6_iif(skb));
while (sk) {
int filtered;
@@ -220,7 +220,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
}
}
sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr,
- IP6CB(skb)->iif);
+ inet6_iif(skb));
}
out:
read_unlock(&raw_v6_hashinfo.lock);
@@ -375,7 +375,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
net = dev_net(skb->dev);
while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
- IP6CB(skb)->iif))) {
+ inet6_iif(skb)))) {
rawv6_err(sk, skb, NULL, type, code,
inner_offset, info);
sk = sk_next(sk);
@@ -506,7 +506,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
sin6->sin6_addr = ipv6_hdr(skb)->saddr;
sin6->sin6_flowinfo = 0;
sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
- IP6CB(skb)->iif);
+ inet6_iif(skb));
*addr_len = sizeof(*sin6);
}
@@ -588,8 +588,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
}
offset += skb_transport_offset(skb);
- if (skb_copy_bits(skb, offset, &csum, 2))
- BUG();
+ BUG_ON(skb_copy_bits(skb, offset, &csum, 2));
/* in case cksum was not initialized */
if (unlikely(csum))
@@ -601,8 +600,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP)
csum = CSUM_MANGLED_0;
- if (skb_store_bits(skb, offset, &csum, 2))
- BUG();
+ BUG_ON(skb_store_bits(skb, offset, &csum, 2));
send:
err = ip6_push_pending_frames(sk);
@@ -873,14 +871,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
err = PTR_ERR(dst);
goto out;
}
- if (hlimit < 0) {
- if (ipv6_addr_is_multicast(&fl6.daddr))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
- }
+ if (hlimit < 0)
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
if (tclass < 0)
tclass = np->tclass;
@@ -1328,7 +1320,6 @@ static struct inet_protosw rawv6_protosw = {
.protocol = IPPROTO_IP, /* wild card */
.prot = &rawv6_prot,
.ops = &inet6_sockraw_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
};
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index cc85a9ba5010..c6557d9f7808 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -60,6 +60,8 @@
#include <net/inet_frag.h>
#include <net/inet_ecn.h>
+static const char ip6_frag_cache_name[] = "ip6-frags";
+
struct ip6frag_skb_cb
{
struct inet6_skb_parm h;
@@ -85,27 +87,23 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
const struct in6_addr *daddr)
{
- u32 c;
-
net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
- c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, ip6_frags.rnd);
-
- return c & (INETFRAGS_HASHSZ - 1);
+ return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+ (__force u32)id, ip6_frags.rnd);
}
-static unsigned int ip6_hashfn(struct inet_frag_queue *q)
+static unsigned int ip6_hashfn(const struct inet_frag_queue *q)
{
- struct frag_queue *fq;
+ const struct frag_queue *fq;
fq = container_of(q, struct frag_queue, q);
return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
}
-bool ip6_frag_match(struct inet_frag_queue *q, void *a)
+bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
{
- struct frag_queue *fq;
- struct ip6_create_arg *arg = a;
+ const struct frag_queue *fq;
+ const struct ip6_create_arg *arg = a;
fq = container_of(q, struct frag_queue, q);
return fq->id == arg->id &&
@@ -115,10 +113,10 @@ bool ip6_frag_match(struct inet_frag_queue *q, void *a)
}
EXPORT_SYMBOL(ip6_frag_match);
-void ip6_frag_init(struct inet_frag_queue *q, void *a)
+void ip6_frag_init(struct inet_frag_queue *q, const void *a)
{
struct frag_queue *fq = container_of(q, struct frag_queue, q);
- struct ip6_create_arg *arg = a;
+ const struct ip6_create_arg *arg = a;
fq->id = arg->id;
fq->user = arg->user;
@@ -135,7 +133,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
spin_lock(&fq->q.lock);
- if (fq->q.last_in & INET_FRAG_COMPLETE)
+ if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
inet_frag_kill(&fq->q, frags);
@@ -145,17 +143,20 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
if (!dev)
goto out_rcu_unlock;
- IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+ if (fq->q.flags & INET_FRAG_EVICTED)
+ goto out_rcu_unlock;
+
+ IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+
/* Don't send error if the first segment did not arrive. */
- if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments)
+ if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
goto out_rcu_unlock;
- /*
- But use as source device on which LAST ARRIVED
- segment was received. And do not use fq->dev
- pointer directly, device might already disappeared.
+ /* But use as source device on which LAST ARRIVED
+ * segment was received. And do not use fq->dev
+ * pointer directly, device might already disappeared.
*/
fq->q.fragments->dev = dev;
icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
@@ -192,7 +193,6 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src,
arg.dst = dst;
arg.ecn = ecn;
- read_lock(&ip6_frags.lock);
hash = inet6_hash_frag(id, src, dst);
q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
@@ -212,7 +212,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
struct net *net = dev_net(skb_dst(skb)->dev);
u8 ecn;
- if (fq->q.last_in & INET_FRAG_COMPLETE)
+ if (fq->q.flags & INET_FRAG_COMPLETE)
goto err;
offset = ntohs(fhdr->frag_off) & ~0x7;
@@ -243,9 +243,9 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
* or have different end, the segment is corrupted.
*/
if (end < fq->q.len ||
- ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len))
+ ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
goto err;
- fq->q.last_in |= INET_FRAG_LAST_IN;
+ fq->q.flags |= INET_FRAG_LAST_IN;
fq->q.len = end;
} else {
/* Check if the fragment is rounded to 8 bytes.
@@ -263,7 +263,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
}
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
- if (fq->q.last_in & INET_FRAG_LAST_IN)
+ if (fq->q.flags & INET_FRAG_LAST_IN)
goto err;
fq->q.len = end;
}
@@ -338,10 +338,10 @@ found:
*/
if (offset == 0) {
fq->nhoffset = nhoff;
- fq->q.last_in |= INET_FRAG_FIRST_IN;
+ fq->q.flags |= INET_FRAG_FIRST_IN;
}
- if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
int res;
unsigned long orefdst = skb->_skb_refdst;
@@ -353,14 +353,13 @@ found:
}
skb_dst_drop(skb);
- inet_frag_lru_move(&fq->q);
return -1;
discard_fq:
inet_frag_kill(&fq->q, &ip6_frags);
err:
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_REASMFAILS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
return -1;
}
@@ -523,7 +522,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
struct frag_queue *fq;
const struct ipv6hdr *hdr = ipv6_hdr(skb);
struct net *net = dev_net(skb_dst(skb)->dev);
- int evicted;
if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
goto fail_hdr;
@@ -552,11 +550,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return 1;
}
- evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false);
- if (evicted)
- IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_REASMFAILS, evicted);
-
fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
ip6_frag_ecn(hdr));
if (fq != NULL) {
@@ -576,7 +569,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return -1;
fail_hdr:
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
return -1;
}
@@ -588,20 +582,25 @@ static const struct inet6_protocol frag_protocol =
};
#ifdef CONFIG_SYSCTL
+static int zero;
+
static struct ctl_table ip6_frags_ns_ctl_table[] = {
{
.procname = "ip6frag_high_thresh",
.data = &init_net.ipv6.frags.high_thresh,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &init_net.ipv6.frags.low_thresh
},
{
.procname = "ip6frag_low_thresh",
.data = &init_net.ipv6.frags.low_thresh,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &init_net.ipv6.frags.high_thresh
},
{
.procname = "ip6frag_time",
@@ -613,10 +612,12 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
{ }
};
+/* secret interval has been deprecated */
+static int ip6_frags_secret_interval_unused;
static struct ctl_table ip6_frags_ctl_table[] = {
{
.procname = "ip6frag_secret_interval",
- .data = &ip6_frags.secret_interval,
+ .data = &ip6_frags_secret_interval_unused,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -636,7 +637,10 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
goto err_alloc;
table[0].data = &net->ipv6.frags.high_thresh;
+ table[0].extra1 = &net->ipv6.frags.low_thresh;
+ table[0].extra2 = &init_net.ipv6.frags.high_thresh;
table[1].data = &net->ipv6.frags.low_thresh;
+ table[1].extra2 = &net->ipv6.frags.high_thresh;
table[2].data = &net->ipv6.frags.timeout;
/* Don't export sysctls to unprivileged users */
@@ -746,8 +750,10 @@ int __init ipv6_frag_init(void)
ip6_frags.qsize = sizeof(struct frag_queue);
ip6_frags.match = ip6_frag_match;
ip6_frags.frag_expire = ip6_frag_expire;
- ip6_frags.secret_interval = 10 * 60 * HZ;
- inet_frags_init(&ip6_frags);
+ ip6_frags.frags_cache_name = ip6_frag_cache_name;
+ ret = inet_frags_init(&ip6_frags);
+ if (ret)
+ goto err_pernet;
out:
return ret;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fba54a407bb2..bafde82324c5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -84,9 +84,9 @@ static void ip6_dst_ifdown(struct dst_entry *,
static int ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
-static int ip6_pkt_discard_out(struct sk_buff *skb);
+static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
static int ip6_pkt_prohibit(struct sk_buff *skb);
-static int ip6_pkt_prohibit_out(struct sk_buff *skb);
+static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
@@ -149,7 +149,8 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
unsigned long prev, new;
p = peer->metrics;
- if (inet_metrics_new(peer))
+ if (inet_metrics_new(peer) ||
+ (old & DST_METRICS_FORCE_OVERWRITE))
memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
new = (unsigned long) p;
@@ -289,7 +290,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
.obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -EINVAL,
.input = dst_discard,
- .output = dst_discard,
+ .output = dst_discard_sk,
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
.rt6i_protocol = RTPROT_KERNEL,
@@ -313,7 +314,6 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
- rt->rt6i_genid = rt_genid_ipv6(net);
INIT_LIST_HEAD(&rt->rt6i_siblings);
}
return rt;
@@ -373,12 +373,6 @@ static bool rt6_check_expired(const struct rt6_info *rt)
return false;
}
-static bool rt6_need_strict(const struct in6_addr *daddr)
-{
- return ipv6_addr_type(daddr) &
- (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
-}
-
/* Multipath route selection:
* Hash based function using packet header and flowlabel.
* Adapted from fib_info_hashfn()
@@ -857,14 +851,15 @@ EXPORT_SYMBOL(rt6_lookup);
be destroyed.
*/
-static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
+static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
+ struct nlattr *mx, int mx_len)
{
int err;
struct fib6_table *table;
table = rt->rt6i_table;
write_lock_bh(&table->tb6_lock);
- err = fib6_add(&table->tb6_root, rt, info);
+ err = fib6_add(&table->tb6_root, rt, info, mx, mx_len);
write_unlock_bh(&table->tb6_lock);
return err;
@@ -875,7 +870,7 @@ int ip6_ins_rt(struct rt6_info *rt)
struct nl_info info = {
.nl_net = dev_net(rt->dst.dev),
};
- return __ip6_ins_rt(rt, &info);
+ return __ip6_ins_rt(rt, &info, NULL, 0);
}
static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
@@ -1062,7 +1057,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
new->__use = 1;
new->input = dst_discard;
- new->output = dst_discard;
+ new->output = dst_discard_sk;
if (dst_metrics_read_only(&ort->dst))
new->_metrics = ort->dst._metrics;
@@ -1102,9 +1097,6 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
* DST_OBSOLETE_FORCE_CHK which forces validation calls down
* into this function always.
*/
- if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
- return NULL;
-
if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
return NULL;
@@ -1180,7 +1172,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = oif;
- fl6.flowi6_mark = mark;
+ fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
fl6.daddr = iph->daddr;
fl6.saddr = iph->saddr;
fl6.flowlabel = ip6_flowinfo(iph);
@@ -1277,6 +1269,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
struct flowi6 fl6;
memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_iif = LOOPBACK_IFINDEX;
fl6.flowi6_oif = oif;
fl6.flowi6_mark = mark;
fl6.daddr = iph->daddr;
@@ -1298,6 +1291,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
struct flowi6 fl6;
memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_iif = LOOPBACK_IFINDEX;
fl6.flowi6_oif = oif;
fl6.flowi6_mark = mark;
fl6.daddr = msg->dest;
@@ -1342,7 +1336,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
- return mtu;
+ goto out;
mtu = IPV6_MIN_MTU;
@@ -1352,7 +1346,8 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
mtu = idev->cnf.mtu6;
rcu_read_unlock();
- return mtu;
+out:
+ return min_t(unsigned int, mtu, IP6_MAX_MTU);
}
static struct dst_entry *icmp6_dst_gc_list;
@@ -1456,7 +1451,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
goto out;
net->ipv6.ip6_rt_gc_expire++;
- fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
+ fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
entries = dst_entries_get_slow(ops);
if (entries < ops->gc_thresh)
net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
@@ -1543,17 +1538,11 @@ int ip6_route_add(struct fib6_config *cfg)
ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
rt->rt6i_dst.plen = cfg->fc_dst_len;
- if (rt->rt6i_dst.plen == 128)
- rt->dst.flags |= DST_HOST;
-
- if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
- u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
- if (!metrics) {
- err = -ENOMEM;
- goto out;
- }
- dst_init_metrics(&rt->dst, metrics, 0);
+ if (rt->rt6i_dst.plen == 128) {
+ rt->dst.flags |= DST_HOST;
+ dst_metrics_set_force_overwrite(&rt->dst);
}
+
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
rt->rt6i_src.plen = cfg->fc_src_len;
@@ -1586,7 +1575,7 @@ int ip6_route_add(struct fib6_config *cfg)
switch (cfg->fc_type) {
case RTN_BLACKHOLE:
rt->dst.error = -EINVAL;
- rt->dst.output = dst_discard;
+ rt->dst.output = dst_discard_sk;
rt->dst.input = dst_discard;
break;
case RTN_PROHIBIT:
@@ -1672,31 +1661,13 @@ int ip6_route_add(struct fib6_config *cfg)
rt->rt6i_flags = cfg->fc_flags;
install_route:
- if (cfg->fc_mx) {
- struct nlattr *nla;
- int remaining;
-
- nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
- int type = nla_type(nla);
-
- if (type) {
- if (type > RTAX_MAX) {
- err = -EINVAL;
- goto out;
- }
-
- dst_metric_set(&rt->dst, type, nla_get_u32(nla));
- }
- }
- }
-
rt->dst.dev = dev;
rt->rt6i_idev = idev;
rt->rt6i_table = table;
cfg->fc_nlinfo.nl_net = dev_net(dev);
- return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
+ return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len);
out:
if (dev)
@@ -2156,7 +2127,7 @@ static int ip6_pkt_discard(struct sk_buff *skb)
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_discard_out(struct sk_buff *skb)
+static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
@@ -2167,7 +2138,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb)
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_prohibit_out(struct sk_buff *skb)
+static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
@@ -2259,6 +2230,27 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
fib6_clean_all(net, fib6_remove_prefsrc, &adni);
}
+#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
+#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
+
+/* Remove routers and update dst entries when gateway turn into host. */
+static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
+{
+ struct in6_addr *gateway = (struct in6_addr *)arg;
+
+ if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
+ ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
+ ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
+ return -1;
+ }
+ return 0;
+}
+
+void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
+{
+ fib6_clean_all(net, fib6_clean_tohost, gateway);
+}
+
struct arg_dev_net {
struct net_device *dev;
struct net *net;
@@ -2734,6 +2726,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
if (tb[RTA_OIF])
oif = nla_get_u32(tb[RTA_OIF]);
+ if (tb[RTA_MARK])
+ fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
+
if (iif) {
struct net_device *dev;
int flags = 0;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b4d74c86586c..6163f851dc01 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -101,19 +101,19 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
- (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
if (remote == t->parms.iph.daddr &&
- (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
if (local == t->parms.iph.saddr &&
- (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
@@ -250,7 +250,8 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
else
strcpy(name, "sit%d");
- dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup);
+ dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
+ ipip6_tunnel_setup);
if (dev == NULL)
return NULL;
@@ -560,12 +561,12 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->dev->ifindex, 0, IPPROTO_IPV6, 0);
+ t->parms.link, 0, IPPROTO_IPV6, 0);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
+ ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
IPPROTO_IPV6, 0);
err = 0;
goto out;
@@ -974,8 +975,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
goto out;
}
- err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos,
- ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+ err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
+ IPPROTO_IPV6, tos, ttl, df,
+ !net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return NETDEV_TX_OK;
@@ -1126,8 +1128,8 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
int err = 0;
struct ip_tunnel_parm p;
struct ip_tunnel_prl prl;
- struct ip_tunnel *t;
- struct net *net = dev_net(dev);
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct net *net = t->net;
struct sit_net *sitn = net_generic(net, sit_net_id);
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
@@ -1138,16 +1140,15 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
#ifdef CONFIG_IPV6_SIT_6RD
case SIOCGET6RD:
#endif
- t = NULL;
if (dev == sitn->fb_tunnel_dev) {
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
err = -EFAULT;
break;
}
t = ipip6_tunnel_locate(net, &p, 0);
+ if (t == NULL)
+ t = netdev_priv(dev);
}
- if (t == NULL)
- t = netdev_priv(dev);
err = -EFAULT;
if (cmd == SIOCGETTUNNEL) {
@@ -1243,9 +1244,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = -EINVAL;
if (dev == sitn->fb_tunnel_dev)
goto done;
- err = -ENOENT;
- if (!(t = netdev_priv(dev)))
- goto done;
err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data);
break;
@@ -1261,9 +1259,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = -EFAULT;
if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
goto done;
- err = -ENOENT;
- if (!(t = netdev_priv(dev)))
- goto done;
switch (cmd) {
case SIOCDELPRL:
@@ -1291,8 +1286,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
sizeof(ip6rd)))
goto done;
- t = netdev_priv(dev);
-
if (cmd != SIOCDEL6RD) {
err = ipip6_tunnel_update_6rd(t, &ip6rd);
if (err < 0)
@@ -1363,7 +1356,6 @@ static void ipip6_tunnel_setup(struct net_device *dev)
static int ipip6_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- int i;
tunnel->dev = dev;
tunnel->net = dev_net(dev);
@@ -1372,16 +1364,10 @@ static int ipip6_tunnel_init(struct net_device *dev)
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
ipip6_tunnel_bind_dev(dev);
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ipip6_tunnel_stats;
- ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ipip6_tunnel_stats->syncp);
- }
-
tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
if (!tunnel->dst_cache) {
free_percpu(dev->tstats);
@@ -1397,7 +1383,6 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
struct iphdr *iph = &tunnel->parms.iph;
struct net *net = dev_net(dev);
struct sit_net *sitn = net_generic(net, sit_net_id);
- int i;
tunnel->dev = dev;
tunnel->net = dev_net(dev);
@@ -1408,16 +1393,10 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
iph->ihl = 5;
iph->ttl = 64;
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ipip6_fb_stats;
- ipip6_fb_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ipip6_fb_stats->syncp);
- }
-
tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
if (!tunnel->dst_cache) {
free_percpu(dev->tstats);
@@ -1751,6 +1730,7 @@ static int __net_init sit_init_net(struct net *net)
sitn->tunnels[3] = sitn->tunnels_r_l;
sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
+ NET_NAME_UNKNOWN,
ipip6_tunnel_setup);
if (!sitn->fb_tunnel_dev) {
err = -ENOMEM;
@@ -1850,4 +1830,5 @@ xfrm_tunnel_failed:
module_init(sit_init);
module_exit(sit_cleanup);
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("sit");
MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb53a5e73c1a..83cea1d39466 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -187,7 +187,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
+ req = inet_reqsk_alloc(&tcp6_request_sock_ops);
if (!req)
goto out;
@@ -216,6 +216,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = inet6_iif(skb);
+ ireq->ir_mark = inet_request_mark(sk, skb);
+
req->expires = 0UL;
req->num_retrans = 0;
ireq->ecn_ok = ecn_ok;
@@ -242,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
final_p = fl6_update_dst(&fl6, np->opt, &final);
fl6.saddr = ireq->ir_v6_loc_addr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_mark = ireq->ir_mark;
fl6.fl6_dport = ireq->ir_rmt_port;
fl6.fl6_sport = inet_sk(sk)->inet_sport;
security_req_classify_flow(req, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7f405a168822..0c56c93619e0 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -38,6 +38,20 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "auto_flowlabels",
+ .data = &init_net.ipv6.sysctl.auto_flowlabels,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "fwmark_reflect",
+ .data = &init_net.ipv6.sysctl.fwmark_reflect,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{ }
};
@@ -67,6 +81,8 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[0].data = &net->ipv6.sysctl.bindv6only;
ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply;
ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency;
+ ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels;
+ ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect;
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index cb21fccf2089..03a5d1ed3340 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -39,7 +39,7 @@
#include <linux/ipsec.h>
#include <linux/times.h>
#include <linux/slab.h>
-
+#include <linux/uaccess.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
@@ -64,8 +64,6 @@
#include <net/tcp_memcontrol.h>
#include <net/busy_poll.h>
-#include <asm/uaccess.h>
-
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -199,6 +197,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_v6_daddr = usin->sin6_addr;
np->flow_label = fl6.flowlabel;
+ ip6_set_txhash(sk);
+
/*
* TCP over IPv4
*/
@@ -341,7 +341,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct sock *sk;
int err;
struct tcp_sock *tp;
- __u32 seq;
+ struct request_sock *fastopen;
+ __u32 seq, snd_una;
struct net *net = dev_net(skb->dev);
sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
@@ -372,8 +373,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
tp = tcp_sk(sk);
seq = ntohl(th->seq);
+ /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
+ fastopen = tp->fastopen_rsk;
+ snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
if (sk->sk_state != TCP_LISTEN &&
- !between(seq, tp->snd_una, tp->snd_nxt)) {
+ !between(seq, snd_una, tp->snd_nxt)) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -437,8 +441,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
goto out;
case TCP_SYN_SENT:
- case TCP_SYN_RECV: /* Cannot happen.
- It can, it SYNs are crossed. --ANK */
+ case TCP_SYN_RECV:
+ /* Only in fast or simultaneous open. If a fast open socket is
+ * is already accepted it is treated as a connected one below.
+ */
+ if (fastopen && fastopen->sk == NULL)
+ break;
+
if (!sock_owned_by_user(sk)) {
sk->sk_err = err;
sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
@@ -462,12 +471,14 @@ out:
static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
- struct flowi6 *fl6,
+ struct flowi *fl,
struct request_sock *req,
- u16 queue_mapping)
+ u16 queue_mapping,
+ struct tcp_fastopen_cookie *foc)
{
struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct flowi6 *fl6 = &fl->u.ip6;
struct sk_buff *skb;
int err = -ENOMEM;
@@ -475,7 +486,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
goto done;
- skb = tcp_make_synack(sk, dst, req, NULL);
+ skb = tcp_make_synack(sk, dst, req, foc);
if (skb) {
__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
@@ -494,16 +505,6 @@ done:
return err;
}
-static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
-{
- struct flowi6 fl6;
- int res;
-
- res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0);
- if (!res)
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
- return res;
-}
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
@@ -529,8 +530,8 @@ static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr);
}
-static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
- int optlen)
+static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
+ int optlen)
{
struct tcp_md5sig cmd;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
@@ -665,7 +666,8 @@ clear_hash_noput:
return 1;
}
-static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
+static int __tcp_v6_inbound_md5_hash(struct sock *sk,
+ const struct sk_buff *skb)
{
const __u8 *hash_location = NULL;
struct tcp_md5sig_key *hash_expected;
@@ -705,27 +707,83 @@ static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
}
return 0;
}
+
+static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
+{
+ int ret;
+
+ rcu_read_lock();
+ ret = __tcp_v6_inbound_md5_hash(sk, skb);
+ rcu_read_unlock();
+
+ return ret;
+}
+
#endif
+static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+ ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+
+ ireq->ir_iif = sk->sk_bound_dev_if;
+
+ /* So that link locals have meaning */
+ if (!sk->sk_bound_dev_if &&
+ ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ ireq->ir_iif = inet6_iif(skb);
+
+ if (!TCP_SKB_CB(skb)->when &&
+ (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo ||
+ np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
+ np->rxopt.bits.rxohlim || np->repflow)) {
+ atomic_inc(&skb->users);
+ ireq->pktopts = skb;
+ }
+}
+
+static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl,
+ const struct request_sock *req,
+ bool *strict)
+{
+ if (strict)
+ *strict = true;
+ return inet6_csk_route_req(sk, &fl->u.ip6, req);
+}
+
struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
.family = AF_INET6,
.obj_size = sizeof(struct tcp6_request_sock),
- .rtx_syn_ack = tcp_v6_rtx_synack,
+ .rtx_syn_ack = tcp_rtx_synack,
.send_ack = tcp_v6_reqsk_send_ack,
.destructor = tcp_v6_reqsk_destructor,
.send_reset = tcp_v6_send_reset,
- .syn_ack_timeout = tcp_syn_ack_timeout,
+ .syn_ack_timeout = tcp_syn_ack_timeout,
};
-#ifdef CONFIG_TCP_MD5SIG
static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
+ .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
+ sizeof(struct ipv6hdr),
+#ifdef CONFIG_TCP_MD5SIG
.md5_lookup = tcp_v6_reqsk_md5_lookup,
.calc_md5_hash = tcp_v6_md5_hash_skb,
-};
#endif
+ .init_req = tcp_v6_init_req,
+#ifdef CONFIG_SYN_COOKIES
+ .cookie_init_seq = cookie_v6_init_sequence,
+#endif
+ .route_req = tcp_v6_route_req,
+ .init_seq = tcp_v6_init_sequence,
+ .send_synack = tcp_v6_send_synack,
+ .queue_hash_add = inet6_csk_reqsk_queue_hash_add,
+};
static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
- u32 tsval, u32 tsecr,
+ u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key, int rst, u8 tclass,
u32 label)
{
@@ -797,8 +855,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
fl6.flowi6_proto = IPPROTO_TCP;
- if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
+ if (rt6_need_strict(&fl6.daddr) && !oif)
fl6.flowi6_oif = inet6_iif(skb);
+ else
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
fl6.fl6_dport = t1->dest;
fl6.fl6_sport = t1->source;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
@@ -832,6 +893,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
int genhash;
struct sock *sk1 = NULL;
#endif
+ int oif;
if (th->rst)
return;
@@ -875,7 +937,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
(th->doff << 2);
- tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0, 0);
+ oif = sk ? sk->sk_bound_dev_if : 0;
+ tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
#ifdef CONFIG_TCP_MD5SIG
release_sk1:
@@ -887,11 +950,11 @@ release_sk1:
}
static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
- u32 win, u32 tsval, u32 tsecr,
+ u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key, u8 tclass,
u32 label)
{
- tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass,
+ tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass,
label);
}
@@ -903,7 +966,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp + tcptw->tw_ts_offset,
- tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
+ tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
tw->tw_tclass, (tw->tw_flowlabel << 12));
inet_twsk_put(tw);
@@ -912,8 +975,13 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
- tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1,
- req->rcv_wnd, tcp_time_stamp, req->ts_recent,
+ /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
+ * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
+ */
+ tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
+ tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+ tcp_rsk(req)->rcv_nxt,
+ req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
0, 0);
}
@@ -952,142 +1020,17 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
return sk;
}
-/* FIXME: this is substantially similar to the ipv4 code.
- * Can some kind of merge be done? -- erics
- */
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
- struct tcp_options_received tmp_opt;
- struct request_sock *req;
- struct inet_request_sock *ireq;
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- __u32 isn = TCP_SKB_CB(skb)->when;
- struct dst_entry *dst = NULL;
- struct flowi6 fl6;
- bool want_cookie = false;
-
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_conn_request(sk, skb);
if (!ipv6_unicast_destination(skb))
goto drop;
- if ((sysctl_tcp_syncookies == 2 ||
- inet_csk_reqsk_queue_is_full(sk)) && !isn) {
- want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
- if (!want_cookie)
- goto drop;
- }
-
- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
- goto drop;
- }
+ return tcp_conn_request(&tcp6_request_sock_ops,
+ &tcp_request_sock_ipv6_ops, sk, skb);
- req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
- if (req == NULL)
- goto drop;
-
-#ifdef CONFIG_TCP_MD5SIG
- tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
-#endif
-
- tcp_clear_options(&tmp_opt);
- tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
- tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, 0, NULL);
-
- if (want_cookie && !tmp_opt.saw_tstamp)
- tcp_clear_options(&tmp_opt);
-
- tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
- tcp_openreq_init(req, &tmp_opt, skb);
-
- ireq = inet_rsk(req);
- ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
- ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
- if (!want_cookie || tmp_opt.tstamp_ok)
- TCP_ECN_create_request(req, skb, sock_net(sk));
-
- ireq->ir_iif = sk->sk_bound_dev_if;
-
- /* So that link locals have meaning */
- if (!sk->sk_bound_dev_if &&
- ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
- ireq->ir_iif = inet6_iif(skb);
-
- if (!isn) {
- if (ipv6_opt_accepted(sk, skb) ||
- np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
- np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim ||
- np->repflow) {
- atomic_inc(&skb->users);
- ireq->pktopts = skb;
- }
-
- if (want_cookie) {
- isn = cookie_v6_init_sequence(sk, skb, &req->mss);
- req->cookie_ts = tmp_opt.tstamp_ok;
- goto have_isn;
- }
-
- /* VJ's idea. We save last timestamp seen
- * from the destination in peer table, when entering
- * state TIME-WAIT, and check against it before
- * accepting new connection request.
- *
- * If "isn" is not zero, this request hit alive
- * timewait bucket, so that all the necessary checks
- * are made in the function processing timewait state.
- */
- if (tmp_opt.saw_tstamp &&
- tcp_death_row.sysctl_tw_recycle &&
- (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
- if (!tcp_peer_is_proven(req, dst, true)) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
- goto drop_and_release;
- }
- }
- /* Kill the following clause, if you dislike this way. */
- else if (!sysctl_tcp_syncookies &&
- (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
- (sysctl_max_syn_backlog >> 2)) &&
- !tcp_peer_is_proven(req, dst, false)) {
- /* Without syncookies last quarter of
- * backlog is filled with destinations,
- * proven to be alive.
- * It means that we continue to communicate
- * to destinations, already remembered
- * to the moment of synflood.
- */
- LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
- &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source));
- goto drop_and_release;
- }
-
- isn = tcp_v6_init_sequence(skb);
- }
-have_isn:
- tcp_rsk(req)->snt_isn = isn;
-
- if (security_inet_conn_request(sk, skb, req))
- goto drop_and_release;
-
- if (tcp_v6_send_synack(sk, dst, &fl6, req,
- skb_get_queue_mapping(skb)) ||
- want_cookie)
- goto drop_and_free;
-
- tcp_rsk(req)->snt_synack = tcp_time_stamp;
- tcp_rsk(req)->listener = NULL;
- inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
- return 0;
-
-drop_and_release:
- dst_release(dst);
-drop_and_free:
- reqsk_free(req);
drop:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
return 0; /* don't send reset */
@@ -1203,6 +1146,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
newsk->sk_bound_dev_if = ireq->ir_iif;
+ ip6_set_txhash(newsk);
+
/* Now IPv6 options...
First: no IPv4 options.
@@ -1258,7 +1203,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
#ifdef CONFIG_TCP_MD5SIG
/* Copy over the MD5 key from the original socket */
- if ((key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr)) != NULL) {
+ key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
+ if (key != NULL) {
/* We're using one, so create a matching key
* on the newsk structure. If we fail to get
* memory, then we end up not copying the key
@@ -1288,26 +1234,6 @@ out:
return NULL;
}
-static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
-{
- if (skb->ip_summed == CHECKSUM_COMPLETE) {
- if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, skb->csum)) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- return 0;
- }
- }
-
- skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
- &ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, 0));
-
- if (skb->len <= 76) {
- return __skb_checksum_complete(skb);
- }
- return 0;
-}
-
/* The socket must have it's spinlock held when we get
* here.
*
@@ -1333,11 +1259,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_do_rcv(sk, skb);
-#ifdef CONFIG_TCP_MD5SIG
- if (tcp_v6_inbound_md5_hash (sk, skb))
- goto discard;
-#endif
-
if (sk_filter(sk, skb))
goto discard;
@@ -1481,7 +1402,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, th->doff*4))
goto discard_it;
- if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
+ if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
goto csum_error;
th = tcp_hdr(skb);
@@ -1510,6 +1431,11 @@ process:
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
+#ifdef CONFIG_TCP_MD5SIG
+ if (tcp_v6_inbound_md5_hash(sk, skb))
+ goto discard_and_relse;
+#endif
+
if (sk_filter(sk, skb))
goto discard_and_relse;
@@ -1591,7 +1517,8 @@ do_time_wait:
break;
case TCP_TW_RST:
goto no_tcp_socket;
- case TCP_TW_SUCCESS:;
+ case TCP_TW_SUCCESS:
+ ;
}
goto discard_it;
}
@@ -1636,7 +1563,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
.twsk_unique = tcp_twsk_unique,
- .twsk_destructor= tcp_twsk_destructor,
+ .twsk_destructor = tcp_twsk_destructor,
};
static const struct inet_connection_sock_af_ops ipv6_specific = {
@@ -1657,6 +1584,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
#endif
+ .mtu_reduced = tcp_v6_mtu_reduced,
};
#ifdef CONFIG_TCP_MD5SIG
@@ -1670,7 +1598,6 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
/*
* TCP over IPv4 via INET6 API
*/
-
static const struct inet_connection_sock_af_ops ipv6_mapped = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
@@ -1688,6 +1615,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
#endif
+ .mtu_reduced = tcp_v4_mtu_reduced,
};
#ifdef CONFIG_TCP_MD5SIG
@@ -1764,6 +1692,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
const struct inet_sock *inet = inet_sk(sp);
const struct tcp_sock *tp = tcp_sk(sp);
const struct inet_connection_sock *icsk = inet_csk(sp);
+ struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
dest = &sp->sk_v6_daddr;
src = &sp->sk_v6_rcv_saddr;
@@ -1806,7 +1735,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
tp->snd_cwnd,
- tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
+ sp->sk_state == TCP_LISTEN ?
+ (fastopenq ? fastopenq->max_qlen : 0) :
+ (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
);
}
@@ -1924,7 +1855,6 @@ struct proto tcpv6_prot = {
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
.release_cb = tcp_release_cb,
- .mtu_reduced = tcp_v6_mtu_reduced,
.hash = tcp_v6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
@@ -1966,7 +1896,6 @@ static struct inet_protosw tcpv6_protosw = {
.protocol = IPPROTO_TCP,
.prot = &tcpv6_prot,
.ops = &inet6_stream_ops,
- .no_check = 0,
.flags = INET_PROTOSW_PERMANENT |
INET_PROTOSW_ICSK,
};
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 0d78132ff18a..01b0ff9a0c2c 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -42,7 +42,7 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
if (NAPI_GRO_CB(skb)->flush)
goto skip_csum;
- wsum = skb->csum;
+ wsum = NAPI_GRO_CB(skb)->csum;
switch (skb->ip_summed) {
case CHECKSUM_NONE:
@@ -73,7 +73,7 @@ static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
&iph->daddr, 0);
- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
return tcp_gro_complete(skb);
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1e586d92260e..4836af8f582d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -79,7 +79,6 @@ static unsigned int udp6_ehashfn(struct net *net,
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
{
const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- int sk_ipv6only = ipv6_only_sock(sk);
int sk2_ipv6only = inet_v6_ipv6only(sk2);
int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
@@ -95,7 +94,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
return 1;
if (addr_type == IPV6_ADDR_ANY &&
- !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
+ !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
return 1;
if (sk2_rcv_saddr6 &&
@@ -473,7 +472,7 @@ try_again:
sin6->sin6_addr = ipv6_hdr(skb)->saddr;
sin6->sin6_scope_id =
ipv6_iface_scope_id(&sin6->sin6_addr,
- IP6CB(skb)->iif);
+ inet6_iif(skb));
}
*addr_len = sizeof(*sin6);
}
@@ -534,11 +533,15 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct udphdr *uh = (struct udphdr*)(skb->data+offset);
struct sock *sk;
int err;
+ struct net *net = dev_net(skb->dev);
- sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest,
+ sk = __udp6_lib_lookup(net, daddr, uh->dest,
saddr, uh->source, inet6_iif(skb), udptable);
- if (sk == NULL)
+ if (sk == NULL) {
+ ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
+ }
if (type == ICMPV6_PKT_TOOBIG) {
if (!ip6_sk_accept_pmtu(sk))
@@ -634,6 +637,10 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) {
int ret;
+ /* Verify checksum before giving to encap */
+ if (udp_lib_checksum_complete(skb))
+ goto csum_error;
+
ret = encap_rcv(sk, skb);
if (ret <= 0) {
UDP_INC_STATS_BH(sock_net(sk),
@@ -670,8 +677,11 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto csum_error;
}
- if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
+ if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
+ UDP6_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, is_udplite);
goto drop;
+ }
skb_dst_drop(skb);
@@ -686,6 +696,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
bh_unlock_sock(sk);
return rc;
+
csum_error:
UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
drop:
@@ -695,44 +706,26 @@ drop:
return -1;
}
-static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
- __be16 loc_port, const struct in6_addr *loc_addr,
- __be16 rmt_port, const struct in6_addr *rmt_addr,
- int dif)
+static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
+ __be16 loc_port, const struct in6_addr *loc_addr,
+ __be16 rmt_port, const struct in6_addr *rmt_addr,
+ int dif, unsigned short hnum)
{
- struct hlist_nulls_node *node;
- struct sock *s = sk;
- unsigned short num = ntohs(loc_port);
-
- sk_nulls_for_each_from(s, node) {
- struct inet_sock *inet = inet_sk(s);
-
- if (!net_eq(sock_net(s), net))
- continue;
-
- if (udp_sk(s)->udp_port_hash == num &&
- s->sk_family == PF_INET6) {
- if (inet->inet_dport) {
- if (inet->inet_dport != rmt_port)
- continue;
- }
- if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
- !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
- continue;
-
- if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
- continue;
+ struct inet_sock *inet = inet_sk(sk);
- if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
- continue;
- }
- if (!inet6_mc_check(s, loc_addr, rmt_addr))
- continue;
- return s;
- }
- }
- return NULL;
+ if (!net_eq(sock_net(sk), net))
+ return false;
+
+ if (udp_sk(sk)->udp_port_hash != hnum ||
+ sk->sk_family != PF_INET6 ||
+ (inet->inet_dport && inet->inet_dport != rmt_port) ||
+ (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+ !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
+ (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ return false;
+ if (!inet6_mc_check(sk, loc_addr, rmt_addr))
+ return false;
+ return true;
}
static void flush_stack(struct sock **stack, unsigned int count,
@@ -756,10 +749,22 @@ static void flush_stack(struct sock **stack, unsigned int count,
if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0)
skb1 = NULL;
+ sock_put(sk);
}
if (unlikely(skb1))
kfree_skb(skb1);
}
+
+static void udp6_csum_zero_error(struct sk_buff *skb)
+{
+ /* RFC 2460 section 8.1 says that we SHOULD log
+ * this error. Well, it is reasonable.
+ */
+ LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
+ &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
+ &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
+}
+
/*
* Note: called only from the BH handler context,
* so we don't need to lock the hashes.
@@ -770,38 +775,51 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
{
struct sock *sk, *stack[256 / sizeof(struct sock *)];
const struct udphdr *uh = udp_hdr(skb);
- struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
- int dif;
- unsigned int i, count = 0;
+ struct hlist_nulls_node *node;
+ unsigned short hnum = ntohs(uh->dest);
+ struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
+ int dif = inet6_iif(skb);
+ unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
+ unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
+
+ if (use_hash2) {
+ hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
+ udp_table.mask;
+ hash2 = udp6_portaddr_hash(net, daddr, hnum) & udp_table.mask;
+start_lookup:
+ hslot = &udp_table.hash2[hash2];
+ offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
+ }
spin_lock(&hslot->lock);
- sk = sk_nulls_head(&hslot->head);
- dif = inet6_iif(skb);
- sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
- while (sk) {
- stack[count++] = sk;
- sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
- uh->source, saddr, dif);
- if (unlikely(count == ARRAY_SIZE(stack))) {
- if (!sk)
- break;
- flush_stack(stack, count, skb, ~0);
- count = 0;
+ sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) {
+ if (__udp_v6_is_mcast_sock(net, sk,
+ uh->dest, daddr,
+ uh->source, saddr,
+ dif, hnum) &&
+ /* If zero checksum and no_check is not on for
+ * the socket then skip it.
+ */
+ (uh->check || udp_sk(sk)->no_check6_rx)) {
+ if (unlikely(count == ARRAY_SIZE(stack))) {
+ flush_stack(stack, count, skb, ~0);
+ count = 0;
+ }
+ stack[count++] = sk;
+ sock_hold(sk);
}
}
- /*
- * before releasing the lock, we must take reference on sockets
- */
- for (i = 0; i < count; i++)
- sock_hold(stack[i]);
spin_unlock(&hslot->lock);
+ /* Also lookup *:port if we are using hash2 and haven't done so yet. */
+ if (use_hash2 && hash2 != hash2_any) {
+ hash2 = hash2_any;
+ goto start_lookup;
+ }
+
if (count) {
flush_stack(stack, count, skb, count - 1);
-
- for (i = 0; i < count; i++)
- sock_put(stack[i]);
} else {
kfree_skb(skb);
}
@@ -867,6 +885,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (sk != NULL) {
int ret;
+ if (!uh->check && !udp_sk(sk)->no_check6_rx) {
+ sock_put(sk);
+ udp6_csum_zero_error(skb);
+ goto csum_error;
+ }
+
ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
@@ -879,6 +903,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
return 0;
}
+ if (!uh->check) {
+ udp6_csum_zero_error(skb);
+ goto csum_error;
+ }
+
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard;
@@ -1006,7 +1035,10 @@ static int udp_v6_push_pending_frames(struct sock *sk)
if (is_udplite)
csum = udplite_csum_outgoing(sk, skb);
- else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
+ else if (up->no_check6_tx) { /* UDP csum disabled */
+ skb->ip_summed = CHECKSUM_NONE;
+ goto send;
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr,
up->len);
goto send;
@@ -1232,14 +1264,8 @@ do_udp_sendmsg:
goto out;
}
- if (hlimit < 0) {
- if (ipv6_addr_is_multicast(&fl6.daddr))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
- }
+ if (hlimit < 0)
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
if (tclass < 0)
tclass = np->tclass;
@@ -1479,7 +1505,6 @@ static struct inet_protosw udpv6_protosw = {
.protocol = IPPROTO_UDP,
.prot = &udpv6_prot,
.ops = &inet6_dgram_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_PERMANENT,
};
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index b261ee8b83fc..0ae3d98f83e0 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -63,7 +63,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (unlikely(type & ~(SKB_GSO_UDP |
SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
SKB_GSO_SIT |
SKB_GSO_MPLS) ||
@@ -76,7 +78,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
goto out;
}
- if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
+ if (skb->encapsulation && skb_shinfo(skb)->gso_type &
+ (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
segs = skb_udp_tunnel_segment(skb, features);
else {
/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index dfcc4be46898..9cf097e206e9 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -64,7 +64,6 @@ static struct inet_protosw udplite6_protosw = {
.protocol = IPPROTO_UDPLITE,
.prot = &udplitev6_prot,
.ops = &inet6_dgram_ops,
- .no_check = 0,
.flags = INET_PROTOSW_PERMANENT,
};
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index cb04f7a16b5e..901ef6f8addc 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -18,65 +18,6 @@
#include <net/ipv6.h>
#include <net/xfrm.h>
-/* Informational hook. The decap is still done here. */
-static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly;
-static DEFINE_MUTEX(xfrm6_mode_tunnel_input_mutex);
-
-int xfrm6_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler)
-{
- struct xfrm_tunnel_notifier __rcu **pprev;
- struct xfrm_tunnel_notifier *t;
- int ret = -EEXIST;
- int priority = handler->priority;
-
- mutex_lock(&xfrm6_mode_tunnel_input_mutex);
-
- for (pprev = &rcv_notify_handlers;
- (t = rcu_dereference_protected(*pprev,
- lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL;
- pprev = &t->next) {
- if (t->priority > priority)
- break;
- if (t->priority == priority)
- goto err;
-
- }
-
- handler->next = *pprev;
- rcu_assign_pointer(*pprev, handler);
-
- ret = 0;
-
-err:
- mutex_unlock(&xfrm6_mode_tunnel_input_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_register);
-
-int xfrm6_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler)
-{
- struct xfrm_tunnel_notifier __rcu **pprev;
- struct xfrm_tunnel_notifier *t;
- int ret = -ENOENT;
-
- mutex_lock(&xfrm6_mode_tunnel_input_mutex);
- for (pprev = &rcv_notify_handlers;
- (t = rcu_dereference_protected(*pprev,
- lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL;
- pprev = &t->next) {
- if (t == handler) {
- *pprev = handler->next;
- ret = 0;
- break;
- }
- }
- mutex_unlock(&xfrm6_mode_tunnel_input_mutex);
- synchronize_net();
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_deregister);
-
static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
{
const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
@@ -130,7 +71,6 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct xfrm_tunnel_notifier *handler;
int err = -EINVAL;
if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
@@ -138,9 +78,6 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto out;
- for_each_input_rcu(rcv_notify_handlers, handler)
- handler->handler(skb);
-
err = skb_unclone(skb, GFP_ATOMIC);
if (err)
goto out;
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6cd625e37706..433672d07d0b 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -78,7 +78,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- if (!skb->local_df && skb->len > mtu) {
+ if (!skb->ignore_df && skb->len > mtu) {
skb->dev = dst->dev;
if (xfrm6_local_dontfrag(skb))
@@ -114,13 +114,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
if (err)
return err;
- memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
-#ifdef CONFIG_NETFILTER
- IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
-#endif
-
- skb->protocol = htons(ETH_P_IPV6);
- skb->local_df = 1;
+ skb->ignore_df = 1;
return x->outer_mode->output2(x, skb);
}
@@ -128,11 +122,13 @@ EXPORT_SYMBOL(xfrm6_prepare_output);
int xfrm6_output_finish(struct sk_buff *skb)
{
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ skb->protocol = htons(ETH_P_IPV6);
+
#ifdef CONFIG_NETFILTER
IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
#endif
- skb->protocol = htons(ETH_P_IPV6);
return xfrm_output(skb);
}
@@ -142,6 +138,13 @@ static int __xfrm6_output(struct sk_buff *skb)
struct xfrm_state *x = dst->xfrm;
int mtu;
+#ifdef CONFIG_NETFILTER
+ if (!x) {
+ IP6CB(skb)->flags |= IP6SKB_REROUTED;
+ return dst_output(skb);
+ }
+#endif
+
if (skb->protocol == htons(ETH_P_IPV6))
mtu = ip6_skb_dst_mtu(skb);
else
@@ -150,7 +153,7 @@ static int __xfrm6_output(struct sk_buff *skb)
if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
xfrm6_local_rxpmtu(skb, mtu);
return -EMSGSIZE;
- } else if (!skb->local_df && skb->len > mtu && skb->sk) {
+ } else if (!skb->ignore_df && skb->len > mtu && skb->sk) {
xfrm_local_error(skb, mtu);
return -EMSGSIZE;
}
@@ -163,8 +166,9 @@ static int __xfrm6_output(struct sk_buff *skb)
return x->outer_mode->afinfo->output_finish(skb);
}
-int xfrm6_output(struct sk_buff *skb)
+int xfrm6_output(struct sock *sk, struct sk_buff *skb)
{
- return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
- skb_dst(skb)->dev, __xfrm6_output);
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb,
+ NULL, skb_dst(skb)->dev, __xfrm6_output,
+ !(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 5f8e128c512d..2a0bbda2c76a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -389,11 +389,17 @@ int __init xfrm6_init(void)
if (ret)
goto out_policy;
+ ret = xfrm6_protocol_init();
+ if (ret)
+ goto out_state;
+
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm6_net_ops);
#endif
out:
return ret;
+out_state:
+ xfrm6_state_fini();
out_policy:
xfrm6_policy_fini();
goto out;
@@ -404,6 +410,7 @@ void xfrm6_fini(void)
#ifdef CONFIG_SYSCTL
unregister_pernet_subsys(&xfrm6_net_ops);
#endif
+ xfrm6_protocol_fini();
xfrm6_policy_fini();
xfrm6_state_fini();
dst_entries_destroy(&xfrm6_dst_ops);
diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c
new file mode 100644
index 000000000000..54d13f8dbbae
--- /dev/null
+++ b/net/ipv6/xfrm6_protocol.c
@@ -0,0 +1,279 @@
+/* xfrm6_protocol.c - Generic xfrm protocol multiplexer for ipv6.
+ *
+ * Copyright (C) 2013 secunet Security Networks AG
+ *
+ * Author:
+ * Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * Based on:
+ * net/ipv4/xfrm4_protocol.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/skbuff.h>
+#include <linux/icmpv6.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+
+static struct xfrm6_protocol __rcu *esp6_handlers __read_mostly;
+static struct xfrm6_protocol __rcu *ah6_handlers __read_mostly;
+static struct xfrm6_protocol __rcu *ipcomp6_handlers __read_mostly;
+static DEFINE_MUTEX(xfrm6_protocol_mutex);
+
+static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp6_handlers;
+ case IPPROTO_AH:
+ return &ah6_handlers;
+ case IPPROTO_COMP:
+ return &ipcomp6_handlers;
+ }
+
+ return NULL;
+}
+
+#define for_each_protocol_rcu(head, handler) \
+ for (handler = rcu_dereference(head); \
+ handler != NULL; \
+ handler = rcu_dereference(handler->next)) \
+
+int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+ struct xfrm6_protocol __rcu **head = proto_handlers(protocol);
+
+ if (!head)
+ return 0;
+
+ for_each_protocol_rcu(*proto_handlers(protocol), handler)
+ if ((ret = handler->cb_handler(skb, err)) <= 0)
+ return ret;
+
+ return 0;
+}
+EXPORT_SYMBOL(xfrm6_rcv_cb);
+
+static int xfrm6_esp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(esp6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(esp6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static int xfrm6_ah_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(ah6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(ah6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static int xfrm6_ipcomp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(ipcomp6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(ipcomp6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static const struct inet6_protocol esp6_protocol = {
+ .handler = xfrm6_esp_rcv,
+ .err_handler = xfrm6_esp_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static const struct inet6_protocol ah6_protocol = {
+ .handler = xfrm6_ah_rcv,
+ .err_handler = xfrm6_ah_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static const struct inet6_protocol ipcomp6_protocol = {
+ .handler = xfrm6_ipcomp_rcv,
+ .err_handler = xfrm6_ipcomp_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static struct xfrm_input_afinfo xfrm6_input_afinfo = {
+ .family = AF_INET6,
+ .owner = THIS_MODULE,
+ .callback = xfrm6_rcv_cb,
+};
+
+static inline const struct inet6_protocol *netproto(unsigned char protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp6_protocol;
+ case IPPROTO_AH:
+ return &ah6_protocol;
+ case IPPROTO_COMP:
+ return &ipcomp6_protocol;
+ }
+
+ return NULL;
+}
+
+int xfrm6_protocol_register(struct xfrm6_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm6_protocol __rcu **pprev;
+ struct xfrm6_protocol *t;
+ bool add_netproto = false;
+ int ret = -EEXIST;
+ int priority = handler->priority;
+
+ if (!proto_handlers(protocol) || !netproto(protocol))
+ return -EINVAL;
+
+ mutex_lock(&xfrm6_protocol_mutex);
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm6_protocol_mutex)))
+ add_netproto = true;
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm6_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t->priority < priority)
+ break;
+ if (t->priority == priority)
+ goto err;
+ }
+
+ handler->next = *pprev;
+ rcu_assign_pointer(*pprev, handler);
+
+ ret = 0;
+
+err:
+ mutex_unlock(&xfrm6_protocol_mutex);
+
+ if (add_netproto) {
+ if (inet6_add_protocol(netproto(protocol), protocol)) {
+ pr_err("%s: can't add protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm6_protocol_register);
+
+int xfrm6_protocol_deregister(struct xfrm6_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm6_protocol __rcu **pprev;
+ struct xfrm6_protocol *t;
+ int ret = -ENOENT;
+
+ if (!proto_handlers(protocol) || !netproto(protocol))
+ return -EINVAL;
+
+ mutex_lock(&xfrm6_protocol_mutex);
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm6_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t == handler) {
+ *pprev = handler->next;
+ ret = 0;
+ break;
+ }
+ }
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm6_protocol_mutex))) {
+ if (inet6_del_protocol(netproto(protocol), protocol) < 0) {
+ pr_err("%s: can't remove protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ mutex_unlock(&xfrm6_protocol_mutex);
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm6_protocol_deregister);
+
+int __init xfrm6_protocol_init(void)
+{
+ return xfrm_input_register_afinfo(&xfrm6_input_afinfo);
+}
+
+void xfrm6_protocol_fini(void)
+{
+ xfrm_input_unregister_afinfo(&xfrm6_input_afinfo);
+}