From bd8c78e78d5011d8111bc2533ee73b13a3bd6c42 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 30 Jul 2014 14:55:26 +0200 Subject: nl80211: clear skb cb before passing to netlink In testmode and vendor command reply/event SKBs we use the skb cb data to store nl80211 parameters between allocation and sending. This causes the code for CONFIG_NETLINK_MMAP to get confused, because it takes ownership of the skb cb data when the SKB is handed off to netlink, and it doesn't explicitly clear it. Clear the skb cb explicitly when we're done and before it gets passed to netlink to avoid this issue. Cc: stable@vger.kernel.org [this goes way back] Reported-by: Assaf Azulay Reported-by: David Spinadel Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index df7b1332a1ec..7257164af91b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6969,6 +6969,9 @@ void __cfg80211_send_event_skb(struct sk_buff *skb, gfp_t gfp) struct nlattr *data = ((void **)skb->cb)[2]; enum nl80211_multicast_groups mcgrp = NL80211_MCGRP_TESTMODE; + /* clear CB data for netlink core to own from now on */ + memset(skb->cb, 0, sizeof(skb->cb)); + nla_nest_end(skb, data); genlmsg_end(skb, hdr); @@ -9294,6 +9297,9 @@ int cfg80211_vendor_cmd_reply(struct sk_buff *skb) void *hdr = ((void **)skb->cb)[1]; struct nlattr *data = ((void **)skb->cb)[2]; + /* clear CB data for netlink core to own from now on */ + memset(skb->cb, 0, sizeof(skb->cb)); + if (WARN_ON(!rdev->cur_cmd_info)) { kfree_skb(skb); return -EINVAL; -- cgit v1.2.3 From 785e21a89d77923852869f83ebd2689ec4d5ce54 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 3 Sep 2014 15:25:04 +0300 Subject: mac80211: use bss_conf->dtim_period instead of conf.ps_dtim_period sta_set_sinfo is obviously takes data for specific station. This specific station is attached to a specific virtual interface. Hence we should use the dtim_period from this virtual interface rather than the system wide dtim_period. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 441875f03750..a1e433b88c66 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1822,7 +1822,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE; if (sdata->vif.bss_conf.use_short_slot) sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME; - sinfo->bss_param.dtim_period = sdata->local->hw.conf.ps_dtim_period; + sinfo->bss_param.dtim_period = sdata->vif.bss_conf.dtim_period; sinfo->bss_param.beacon_interval = sdata->vif.bss_conf.beacon_int; sinfo->sta_flags.set = 0; -- cgit v1.2.3 From 82d5e2b8b466d5bfc7c6278a7c04a53b9b287673 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Sep 2014 04:00:00 -0700 Subject: net: fix skb_page_frag_refill() kerneldoc In commit d9b2938aabf7 ("net: attempt a single high order allocation) I forgot to update kerneldoc, as @prio parameter was renamed to @gfp Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index d372b4bd3f99..9c3f823e76a9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1866,7 +1866,7 @@ EXPORT_SYMBOL(sock_alloc_send_skb); * skb_page_frag_refill - check that a page_frag contains enough room * @sz: minimum size of the fragment we want to get * @pfrag: pointer to page_frag - * @prio: priority for memory allocation + * @gfp: priority for memory allocation * * Note: While this allocator tries to use high order pages, there is * no guarantee that allocations succeed. Therefore, @sz MUST be -- cgit v1.2.3 From 6a2a2b3ae0759843b22c929881cc184b00cc63ff Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Mon, 8 Sep 2014 14:49:59 -0700 Subject: net:socket: set msg_namelen to 0 if msg_name is passed as NULL in msghdr struct from userland. Linux manpage for recvmsg and sendmsg calls does not explicitly mention setting msg_namelen to 0 when msg_name passed set as NULL. When developers don't set msg_namelen member in msghdr, it might contain garbage value which will fail the validation check and sendmsg and recvmsg calls from kernel will return EINVAL. This will break old binaries and any code for which there is no access to source code. To fix this, we set msg_namelen to 0 when msg_name is passed as NULL from userland. Signed-off-by: Ani Sinha Signed-off-by: David S. Miller --- net/socket.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 2e2586e2dee1..4cdbc107606f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1996,6 +1996,9 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) return -EFAULT; + if (kmsg->msg_name == NULL) + kmsg->msg_namelen = 0; + if (kmsg->msg_namelen < 0) return -EINVAL; -- cgit v1.2.3 From ed3bfdfdced76aa7edb0b05c0d739ee3a2a6e619 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Sep 2014 15:19:44 +0100 Subject: RxRPC: Fix missing __user annotation Fix a missing __user annotation in a cast of a user space pointer (found by checker). Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/ar-key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index b45d080e64a7..1b24191167f1 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -1143,7 +1143,7 @@ static long rxrpc_read(const struct key *key, if (copy_to_user(xdr, (s), _l) != 0) \ goto fault; \ if (_l & 3 && \ - copy_to_user((u8 *)xdr + _l, &zero, 4 - (_l & 3)) != 0) \ + copy_to_user((u8 __user *)xdr + _l, &zero, 4 - (_l & 3)) != 0) \ goto fault; \ xdr += (_l + 3) >> 2; \ } while(0) -- cgit v1.2.3 From 381f4dca48d23e155b936b86ccd3ff12f073cf0f Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 10 Sep 2014 23:23:02 +0200 Subject: ipv6: clean up anycast when an interface is destroyed If we try to rmmod the driver for an interface while sockets with setsockopt(JOIN_ANYCAST) are alive, some refcounts aren't cleaned up and we get stuck on: unregister_netdevice: waiting for ens3 to become free. Usage count = 1 If we LEAVE_ANYCAST/close everything before rmmod'ing, there is no problem. We need to perform a cleanup similar to the one for multicast in addrconf_ifdown(how == 1). Signed-off-by: Sabrina Dubroca Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/addrconf.h | 1 + net/ipv6/addrconf.c | 8 +++++--- net/ipv6/anycast.c | 21 +++++++++++++++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f679877bb601..ec51e673b4b6 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -204,6 +204,7 @@ void ipv6_sock_ac_close(struct sock *sk); int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr); int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr); +void ipv6_ac_destroy_dev(struct inet6_dev *idev); bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fc1fac2a0528..3342ee64f2e3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3094,11 +3094,13 @@ static int addrconf_ifdown(struct net_device *dev, int how) write_unlock_bh(&idev->lock); - /* Step 5: Discard multicast list */ - if (how) + /* Step 5: Discard anycast and multicast list */ + if (how) { + ipv6_ac_destroy_dev(idev); ipv6_mc_destroy_dev(idev); - else + } else { ipv6_mc_down(idev); + } idev->tstamp = jiffies; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index ff2de7d9d8e6..9a386842fd62 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -351,6 +351,27 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr) return __ipv6_dev_ac_dec(idev, addr); } +void ipv6_ac_destroy_dev(struct inet6_dev *idev) +{ + struct ifacaddr6 *aca; + + write_lock_bh(&idev->lock); + while ((aca = idev->ac_list) != NULL) { + idev->ac_list = aca->aca_next; + write_unlock_bh(&idev->lock); + + addrconf_leave_solict(idev, &aca->aca_addr); + + dst_hold(&aca->aca_rt->dst); + ip6_del_rt(aca->aca_rt); + + aca_put(aca); + + write_lock_bh(&idev->lock); + } + write_unlock_bh(&idev->lock); +} + /* * check if the interface has this anycast address * called with rcu_read_lock() -- cgit v1.2.3 From 20adfa1a81af00bf2027644507ad4fa9cd2849cf Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 12 Sep 2014 16:26:16 -0400 Subject: bridge: Check if vlan filtering is enabled only once. The bridge code checks if vlan filtering is enabled on both ingress and egress. When the state flip happens, it is possible for the bridge to currently be forwarding packets and forwarding behavior becomes non-deterministic. Bridge may drop packets on some interfaces, but not others. This patch solves this by caching the filtered state of the packet into skb_cb on ingress. The skb_cb is guaranteed to not be over-written between the time packet entres bridge forwarding path and the time it leaves it. On egress, we can then check the cached state to see if we need to apply filtering information. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- net/bridge/br_private.h | 3 +++ net/bridge/br_vlan.c | 14 ++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 62a7fa2e3569..b6c04cbcfdc5 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -309,6 +309,9 @@ struct br_input_skb_cb { int igmp; int mrouters_only; #endif +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + bool vlan_filtered; +#endif }; #define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index e1bcd653899b..f645197b33d1 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -125,7 +125,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, { u16 vid; - if (!br->vlan_enabled) + /* If this packet was not filtered at input, let it pass */ + if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) goto out; /* Vlan filter table must be configured at this point. The @@ -164,8 +165,10 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, /* If VLAN filtering is disabled on the bridge, all packets are * permitted. */ - if (!br->vlan_enabled) + if (!br->vlan_enabled) { + BR_INPUT_SKB_CB(skb)->vlan_filtered = false; return true; + } /* If there are no vlan in the permitted list, all packets are * rejected. @@ -173,6 +176,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, if (!v) goto drop; + BR_INPUT_SKB_CB(skb)->vlan_filtered = true; proto = br->vlan_proto; /* If vlan tx offload is disabled on bridge device and frame was @@ -251,7 +255,8 @@ bool br_allowed_egress(struct net_bridge *br, { u16 vid; - if (!br->vlan_enabled) + /* If this packet was not filtered at input, let it pass */ + if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) return true; if (!v) @@ -270,7 +275,8 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) struct net_bridge *br = p->br; struct net_port_vlans *v; - if (!br->vlan_enabled) + /* If filtering was disabled at input, let it pass. */ + if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) return true; v = rcu_dereference(p->vlan_info); -- cgit v1.2.3 From 635126b7ca13a01d6322fbf7bdc5d1c738d26807 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 12 Sep 2014 16:26:17 -0400 Subject: bridge: Allow clearing of pvid and untagged bitmap Currently, it is possible to modify the vlan filter configuration to add pvid or untagged support. For example: bridge vlan add vid 10 dev eth0 bridge vlan add vid 10 dev eth0 untagged pvid The second statement will modify vlan 10 to include untagged and pvid configuration. However, it is currently impossible to go backwards bridge vlan add vid 10 dev eth0 untagged pvid bridge vlan add vid 10 dev eth0 Here nothing happens. This patch correct this so that any modifiers not supplied are removed from the configuration. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/bridge/br_vlan.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index f645197b33d1..4b86738eca9a 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -27,9 +27,13 @@ static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags) { if (flags & BRIDGE_VLAN_INFO_PVID) __vlan_add_pvid(v, vid); + else + __vlan_delete_pvid(v, vid); if (flags & BRIDGE_VLAN_INFO_UNTAGGED) set_bit(vid, v->untagged_bitmap); + else + clear_bit(vid, v->untagged_bitmap); } static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) -- cgit v1.2.3 From 7ce64c79c4decdeb1afe0bf2f6ef834b382871d1 Mon Sep 17 00:00:00 2001 From: "Alexander Y. Fomichev" Date: Mon, 15 Sep 2014 14:22:35 +0400 Subject: net: fix creation adjacent device symlinks __netdev_adjacent_dev_insert may add adjust device of different net namespace, without proper check it leads to emergence of broken sysfs links from/to devices in another namespace. Fix: rewrite netdev_adjacent_is_neigh_list macro as a function, move net_eq check into netdev_adjacent_is_neigh_list. (thanks David) related to: 4c75431ac3520631f1d9e74aa88407e6374dbbc4 Signed-off-by: Alexander Fomichev Signed-off-by: David S. Miller --- net/core/dev.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index ab9a16530c36..cf8a95f48cff 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4809,9 +4809,14 @@ static void netdev_adjacent_sysfs_del(struct net_device *dev, sysfs_remove_link(&(dev->dev.kobj), linkname); } -#define netdev_adjacent_is_neigh_list(dev, dev_list) \ - (dev_list == &dev->adj_list.upper || \ - dev_list == &dev->adj_list.lower) +static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev, + struct net_device *adj_dev, + struct list_head *dev_list) +{ + return (dev_list == &dev->adj_list.upper || + dev_list == &dev->adj_list.lower) && + net_eq(dev_net(dev), dev_net(adj_dev)); +} static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, @@ -4841,7 +4846,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, pr_debug("dev_hold for %s, because of link added from %s to %s\n", adj_dev->name, dev->name, adj_dev->name); - if (netdev_adjacent_is_neigh_list(dev, dev_list)) { + if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) { ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list); if (ret) goto free_adj; @@ -4862,7 +4867,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, return 0; remove_symlinks: - if (netdev_adjacent_is_neigh_list(dev, dev_list)) + if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); free_adj: kfree(adj); @@ -4895,8 +4900,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, if (adj->master) sysfs_remove_link(&(dev->dev.kobj), "master"); - if (netdev_adjacent_is_neigh_list(dev, dev_list) && - net_eq(dev_net(dev),dev_net(adj_dev))) + if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); list_del_rcu(&adj->list); -- cgit v1.2.3 From c095f248e63ada504dd90c90baae673ae10ee3fe Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 15 Sep 2014 15:24:26 -0400 Subject: bridge: Fix br_should_learn to check vlan_enabled As Toshiaki Makita pointed out, the BRIDGE_INPUT_SKB_CB will not be initialized in br_should_learn() as that function is called only from br_handle_local_finish(). That is an input handler for link-local ethernet traffic so it perfectly correct to check br->vlan_enabled here. Reported-by: Toshiaki Makita Fixes: 20adfa1 bridge: Check if vlan filtering is enabled only once. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- net/bridge/br_vlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 4b86738eca9a..3ba57fcdcd13 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -280,7 +280,7 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) struct net_port_vlans *v; /* If filtering was disabled at input, let it pass. */ - if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) + if (!br->vlan_enabled) return true; v = rcu_dereference(p->vlan_info); -- cgit v1.2.3 From f92ee61982d6da15a9e49664ecd6405a15a2ee56 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 16 Sep 2014 10:08:40 +0200 Subject: xfrm: Generate blackhole routes only from route lookup functions Currently we genarate a blackhole route route whenever we have matching policies but can not resolve the states. Here we assume that dst_output() is called to kill the balckholed packets. Unfortunately this assumption is not true in all cases, so it is possible that these packets leave the system unwanted. We fix this by generating blackhole routes only from the route lookup functions, here we can guarantee a call to dst_output() afterwards. Fixes: 2774c131b1d ("xfrm: Handle blackhole route creation via afinfo.") Reported-by: Konstantinos Kolelis Signed-off-by: Steffen Klassert --- include/net/dst.h | 15 ++++++++++++++- net/ipv4/route.c | 6 +++--- net/ipv6/ip6_output.c | 4 ++-- net/xfrm/xfrm_policy.c | 18 +++++++++++++++++- 4 files changed, 36 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/net/dst.h b/include/net/dst.h index 71c60f42be48..fa11c904d219 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -490,7 +490,16 @@ static inline struct dst_entry *xfrm_lookup(struct net *net, int flags) { return dst_orig; -} +} + +static inline struct dst_entry *xfrm_lookup_route(struct net *net, + struct dst_entry *dst_orig, + const struct flowi *fl, + struct sock *sk, + int flags) +{ + return dst_orig; +} static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) { @@ -502,6 +511,10 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, struct sock *sk, int flags); +struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, + const struct flowi *fl, struct sock *sk, + int flags); + /* skb attached with this dst needs transformation if dst->xfrm is valid */ static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index eaa4b000c7b4..173e7ea54c70 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2265,9 +2265,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, return rt; if (flp4->flowi4_proto) - rt = (struct rtable *) xfrm_lookup(net, &rt->dst, - flowi4_to_flowi(flp4), - sk, 0); + rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst, + flowi4_to_flowi(flp4), + sk, 0); return rt; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 315a55d66079..0a3448b2888f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1009,7 +1009,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, if (final_dst) fl6->daddr = *final_dst; - return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); @@ -1041,7 +1041,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, if (final_dst) fl6->daddr = *final_dst; - return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index beeed602aeb3..7505674c9faa 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2138,7 +2138,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, xfrm_pols_put(pols, drop_pols); XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); - return make_blackhole(net, family, dst_orig); + return ERR_PTR(-EREMOTE); } err = -EAGAIN; @@ -2195,6 +2195,22 @@ dropdst: } EXPORT_SYMBOL(xfrm_lookup); +/* Callers of xfrm_lookup_route() must ensure a call to dst_output(). + * Otherwise we may send out blackholed packets. + */ +struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, + const struct flowi *fl, + struct sock *sk, int flags) +{ + struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, flags); + + if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) + return make_blackhole(net, dst_orig->ops->family, dst_orig); + + return dst; +} +EXPORT_SYMBOL(xfrm_lookup_route); + static inline int xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) { -- cgit v1.2.3 From b8c203b2d2fc961bafd53b41d5396bbcdec55998 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 16 Sep 2014 10:08:49 +0200 Subject: xfrm: Generate queueing routes only from route lookup functions Currently we genarate a queueing route if we have matching policies but can not resolve the states and the sysctl xfrm_larval_drop is disabled. Here we assume that dst_output() is called to kill the queued packets. Unfortunately this assumption is not true in all cases, so it is possible that these packets leave the system unwanted. We fix this by generating queueing routes only from the route lookup functions, here we can guarantee a call to dst_output() afterwards. Fixes: a0073fe18e71 ("xfrm: Add a state resolution packet queue") Reported-by: Konstantinos Kolelis Signed-off-by: Steffen Klassert --- include/net/dst.h | 1 + net/xfrm/xfrm_policy.c | 32 ++++++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/net/dst.h b/include/net/dst.h index fa11c904d219..a8ae4e760778 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -480,6 +480,7 @@ void dst_init(void); /* Flags for xfrm_lookup flags argument. */ enum { XFRM_LOOKUP_ICMP = 1 << 0, + XFRM_LOOKUP_QUEUE = 1 << 1, }; struct flowi; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7505674c9faa..fdde51f4271a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -39,6 +39,11 @@ #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) #define XFRM_MAX_QUEUE_LEN 100 +struct xfrm_flo { + struct dst_entry *dst_orig; + u8 flags; +}; + static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] __read_mostly; @@ -1877,13 +1882,14 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb) } static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, - struct dst_entry *dst, + struct xfrm_flo *xflo, const struct flowi *fl, int num_xfrms, u16 family) { int err; struct net_device *dev; + struct dst_entry *dst; struct dst_entry *dst1; struct xfrm_dst *xdst; @@ -1891,9 +1897,12 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, if (IS_ERR(xdst)) return xdst; - if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0) + if (!(xflo->flags & XFRM_LOOKUP_QUEUE) || + net->xfrm.sysctl_larval_drop || + num_xfrms <= 0) return xdst; + dst = xflo->dst_orig; dst1 = &xdst->u.dst; dst_hold(dst); xdst->route = dst; @@ -1935,7 +1944,7 @@ static struct flow_cache_object * xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct flow_cache_object *oldflo, void *ctx) { - struct dst_entry *dst_orig = (struct dst_entry *)ctx; + struct xfrm_flo *xflo = (struct xfrm_flo *)ctx; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct xfrm_dst *xdst, *new_xdst; int num_pols = 0, num_xfrms = 0, i, err, pol_dead; @@ -1976,7 +1985,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, goto make_dummy_bundle; } - new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig); + new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, + xflo->dst_orig); if (IS_ERR(new_xdst)) { err = PTR_ERR(new_xdst); if (err != -EAGAIN) @@ -2010,7 +2020,7 @@ make_dummy_bundle: /* We found policies, but there's no bundles to instantiate: * either because the policy blocks, has no transformations or * we could not build template (no xfrm_states).*/ - xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family); + xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family); if (IS_ERR(xdst)) { xfrm_pols_put(pols, num_pols); return ERR_CAST(xdst); @@ -2104,13 +2114,18 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, } if (xdst == NULL) { + struct xfrm_flo xflo; + + xflo.dst_orig = dst_orig; + xflo.flags = flags; + /* To accelerate a bit... */ if ((dst_orig->flags & DST_NOXFRM) || !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; flo = flow_cache_lookup(net, fl, family, dir, - xfrm_bundle_lookup, dst_orig); + xfrm_bundle_lookup, &xflo); if (flo == NULL) goto nopol; if (IS_ERR(flo)) { @@ -2202,7 +2217,8 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, struct sock *sk, int flags) { - struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, flags); + struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, + flags | XFRM_LOOKUP_QUEUE); if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) return make_blackhole(net, dst_orig->ops->family, dst_orig); @@ -2476,7 +2492,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) skb_dst_force(skb); - dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0); + dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) { res = 0; dst = NULL; -- cgit v1.2.3 From dda3b191eb6c5a56d443723dcb71ade60d97c04f Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 12 Sep 2014 21:49:28 +0200 Subject: net: rfkill: gpio: Enable module auto-loading for ACPI based switches For the ACPI based switches the MODULE_DEVICE_TABLE is missing to export the entries for module auto-loading. Signed-off-by: Marcel Holtmann Signed-off-by: John W. Linville --- net/rfkill/rfkill-gpio.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 02a86a27fd84..5fa54dd78e25 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -163,6 +163,7 @@ static const struct acpi_device_id rfkill_acpi_match[] = { { "LNV4752", RFKILL_TYPE_GPS }, { }, }; +MODULE_DEVICE_TABLE(acpi, rfkill_acpi_match); #endif static struct platform_driver rfkill_gpio_driver = { -- cgit v1.2.3 From 9b67aa4a82492f128adfccc63e61ab57c1ce1dfd Mon Sep 17 00:00:00 2001 From: Samuel Gauthier Date: Thu, 18 Sep 2014 10:31:04 +0200 Subject: openvswitch: restore OVS_FLOW_CMD_NEW notifications Since commit fb5d1e9e127a ("openvswitch: Build flow cmd netlink reply only if needed."), the new flows are not notified to the listeners of OVS_FLOW_MCGROUP. This commit fixes the problem by using the genl function, ie genl_has_listerners() instead of netlink_has_listeners(). Signed-off-by: Samuel Gauthier Signed-off-by: Nicolas Dichtel Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 91d66b7e64ac..64dc864a417f 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -78,11 +78,12 @@ static const struct genl_multicast_group ovs_dp_vport_multicast_group = { /* Check if need to build a reply message. * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ -static bool ovs_must_notify(struct genl_info *info, - const struct genl_multicast_group *grp) +static bool ovs_must_notify(struct genl_family *family, struct genl_info *info, + unsigned int group) { return info->nlhdr->nlmsg_flags & NLM_F_ECHO || - netlink_has_listeners(genl_info_net(info)->genl_sock, 0); + genl_has_listeners(family, genl_info_net(info)->genl_sock, + group); } static void ovs_notify(struct genl_family *family, @@ -763,7 +764,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act { struct sk_buff *skb; - if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group)) + if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0)) return NULL; skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL); -- cgit v1.2.3 From 257117862634d89de33fec74858b1a0ba5ab444b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Sep 2014 08:02:05 -0700 Subject: net: sched: shrink struct qdisc_skb_cb to 28 bytes We cannot make struct qdisc_skb_cb bigger without impacting IPoIB, or increasing skb->cb[] size. Commit e0f31d849867 ("flow_keys: Record IP layer protocol in skb_flow_dissect()") broke IPoIB. Only current offender is sch_choke, and this one do not need an absolutely precise flow key. If we store 17 bytes of flow key, its more than enough. (Its the actual size of flow_keys if it was a packed structure, but we might add new fields at the end of it later) Signed-off-by: Eric Dumazet Fixes: e0f31d849867 ("flow_keys: Record IP layer protocol in skb_flow_dissect()") Signed-off-by: David S. Miller --- include/net/sch_generic.h | 3 ++- net/sched/sch_choke.c | 18 ++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a3cfb8ebeb53..620e086c0cbe 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -231,7 +231,8 @@ struct qdisc_skb_cb { unsigned int pkt_len; u16 slave_dev_queue_mapping; u16 _pad; - unsigned char data[24]; +#define QDISC_CB_PRIV_LEN 20 + unsigned char data[QDISC_CB_PRIV_LEN]; }; static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index ed30e436128b..fb666d1e4de3 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -133,10 +133,16 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx) --sch->q.qlen; } +/* private part of skb->cb[] that a qdisc is allowed to use + * is limited to QDISC_CB_PRIV_LEN bytes. + * As a flow key might be too large, we store a part of it only. + */ +#define CHOKE_K_LEN min_t(u32, sizeof(struct flow_keys), QDISC_CB_PRIV_LEN - 3) + struct choke_skb_cb { u16 classid; u8 keys_valid; - struct flow_keys keys; + u8 keys[QDISC_CB_PRIV_LEN - 3]; }; static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) @@ -163,22 +169,26 @@ static u16 choke_get_classid(const struct sk_buff *skb) static bool choke_match_flow(struct sk_buff *skb1, struct sk_buff *skb2) { + struct flow_keys temp; + if (skb1->protocol != skb2->protocol) return false; if (!choke_skb_cb(skb1)->keys_valid) { choke_skb_cb(skb1)->keys_valid = 1; - skb_flow_dissect(skb1, &choke_skb_cb(skb1)->keys); + skb_flow_dissect(skb1, &temp); + memcpy(&choke_skb_cb(skb1)->keys, &temp, CHOKE_K_LEN); } if (!choke_skb_cb(skb2)->keys_valid) { choke_skb_cb(skb2)->keys_valid = 1; - skb_flow_dissect(skb2, &choke_skb_cb(skb2)->keys); + skb_flow_dissect(skb2, &temp); + memcpy(&choke_skb_cb(skb2)->keys, &temp, CHOKE_K_LEN); } return !memcmp(&choke_skb_cb(skb1)->keys, &choke_skb_cb(skb2)->keys, - sizeof(struct flow_keys)); + CHOKE_K_LEN); } /* -- cgit v1.2.3 From a35165ca101695aa2cc5a6300ef69ae60be39a49 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Sep 2014 10:38:16 -0700 Subject: ipv4: do not use this_cpu_ptr() in preemptible context this_cpu_ptr() in preemptible context is generally bad Sep 22 05:05:55 br kernel: [ 94.608310] BUG: using smp_processor_id() in preemptible [00000000] code: ip/2261 Sep 22 05:05:55 br kernel: [ 94.608316] caller is tunnel_dst_set.isra.28+0x20/0x60 [ip_tunnel] Sep 22 05:05:55 br kernel: [ 94.608319] CPU: 3 PID: 2261 Comm: ip Not tainted 3.17.0-rc5 #82 We can simply use raw_cpu_ptr(), as preemption is safe in these contexts. Should fix https://bugzilla.kernel.org/show_bug.cgi?id=84991 Signed-off-by: Eric Dumazet Reported-by: Joe Fixes: 9a4aa9af447f ("ipv4: Use percpu Cache route in IP tunnels") Acked-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index afed1aac2638..bd41dd1948b6 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -79,10 +79,10 @@ static void __tunnel_dst_set(struct ip_tunnel_dst *idst, idst->saddr = saddr; } -static void tunnel_dst_set(struct ip_tunnel *t, +static noinline void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst, __be32 saddr) { - __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst, saddr); + __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr); } static void tunnel_dst_reset(struct ip_tunnel *t) @@ -106,7 +106,7 @@ static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, struct dst_entry *dst; rcu_read_lock(); - idst = this_cpu_ptr(t->dst_cache); + idst = raw_cpu_ptr(t->dst_cache); dst = rcu_dereference(idst->dst); if (dst && !atomic_inc_not_zero(&dst->__refcnt)) dst = NULL; -- cgit v1.2.3