diff options
author | David S. Miller <davem@davemloft.net> | 2016-06-30 05:03:36 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-06-30 05:03:36 -0400 |
commit | ee58b57100ca953da7320c285315a95db2f7053d (patch) | |
tree | 77b815a31240adc4d6326346908137fc6c2c3a96 /net | |
parent | 6f30e8b022c8e3a722928ddb1a2ae0be852fcc0e (diff) | |
parent | e7bdea7750eb2a64aea4a08fa5c0a31719c8155d (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Several cases of overlapping changes, except the packet scheduler
conflicts which deal with the addition of the free list parameter
to qdisc_enqueue().
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
65 files changed, 465 insertions, 355 deletions
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index fbd0acf80b13..2fdebabbfacd 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -976,7 +976,8 @@ static int ax25_release(struct socket *sock) release_sock(sk); ax25_disconnect(ax25, 0); lock_sock(sk); - ax25_destroy_socket(ax25); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_destroy_socket(ax25); break; case AX25_STATE_3: diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 951cd57bb07d..5237dff6941d 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -102,6 +102,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25) switch (ax25->state) { case AX25_STATE_0: + case AX25_STATE_2: /* Magic here: If we listen() and a new link dies before it is accepted() it isn't 'dead' so doesn't get removed. */ if (!sk || sock_flag(sk, SOCK_DESTROY) || @@ -111,6 +112,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25) sock_hold(sk); ax25_destroy_socket(ax25); bh_unlock_sock(sk); + /* Ungrab socket and destroy it */ sock_put(sk); } else ax25_destroy_socket(ax25); @@ -213,7 +215,8 @@ void ax25_ds_t1_timeout(ax25_cb *ax25) case AX25_STATE_2: if (ax25->n2count == ax25->n2) { ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - ax25_disconnect(ax25, ETIMEDOUT); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_disconnect(ax25, ETIMEDOUT); return; } else { ax25->n2count++; diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c index 004467c9e6e1..2c0d6ef66f9d 100644 --- a/net/ax25/ax25_std_timer.c +++ b/net/ax25/ax25_std_timer.c @@ -38,6 +38,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25) switch (ax25->state) { case AX25_STATE_0: + case AX25_STATE_2: /* Magic here: If we listen() and a new link dies before it is accepted() it isn't 'dead' so doesn't get removed. */ if (!sk || sock_flag(sk, SOCK_DESTROY) || @@ -47,6 +48,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25) sock_hold(sk); ax25_destroy_socket(ax25); bh_unlock_sock(sk); + /* Ungrab socket and destroy it */ sock_put(sk); } else ax25_destroy_socket(ax25); @@ -144,7 +146,8 @@ void ax25_std_t1timer_expiry(ax25_cb *ax25) case AX25_STATE_2: if (ax25->n2count == ax25->n2) { ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - ax25_disconnect(ax25, ETIMEDOUT); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_disconnect(ax25, ETIMEDOUT); return; } else { ax25->n2count++; diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 3b78e8473a01..655a7d4c96e1 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -264,7 +264,8 @@ void ax25_disconnect(ax25_cb *ax25, int reason) { ax25_clear_queues(ax25); - ax25_stop_heartbeat(ax25); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_stop_heartbeat(ax25); ax25_stop_t1timer(ax25); ax25_stop_t2timer(ax25); ax25_stop_t3timer(ax25); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index f75091c983ee..396c0134c5ab 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -374,6 +374,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, if (skb_cow(skb, ETH_HLEN) < 0) goto out; + ethhdr = eth_hdr(skb); icmph = (struct batadv_icmp_header *)skb->data; icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmph; if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 343d2c904399..287a3879ed7e 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1033,7 +1033,9 @@ void batadv_softif_destroy_sysfs(struct net_device *soft_iface) static void batadv_softif_destroy_netlink(struct net_device *soft_iface, struct list_head *head) { + struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct batadv_hard_iface *hard_iface; + struct batadv_softif_vlan *vlan; list_for_each_entry(hard_iface, &batadv_hardif_list, list) { if (hard_iface->soft_iface == soft_iface) @@ -1041,6 +1043,13 @@ static void batadv_softif_destroy_netlink(struct net_device *soft_iface, BATADV_IF_CLEANUP_KEEP); } + /* destroy the "untagged" VLAN */ + vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + if (vlan) { + batadv_softif_destroy_vlan(bat_priv, vlan); + batadv_softif_vlan_put(vlan); + } + batadv_sysfs_del_meshif(soft_iface); unregister_netdevice_queue(soft_iface, head); } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index feaf492b01ca..57ec87f37050 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -650,8 +650,10 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, /* increase the refcounter of the related vlan */ vlan = batadv_softif_vlan_get(bat_priv, vid); - if (WARN(!vlan, "adding TT local entry %pM to non-existent VLAN %d", - addr, BATADV_PRINT_VID(vid))) { + if (!vlan) { + net_ratelimited_function(batadv_info, soft_iface, + "adding TT local entry %pM to non-existent VLAN %d\n", + addr, BATADV_PRINT_VID(vid)); kfree(tt_local); tt_local = NULL; goto out; @@ -691,7 +693,6 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, if (unlikely(hash_added != 0)) { /* remove the reference for the hash */ batadv_tt_local_entry_put(tt_local); - batadv_softif_vlan_put(vlan); goto out; } @@ -2269,6 +2270,29 @@ static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv, return crc; } +/** + * batadv_tt_req_node_release - free tt_req node entry + * @ref: kref pointer of the tt req_node entry + */ +static void batadv_tt_req_node_release(struct kref *ref) +{ + struct batadv_tt_req_node *tt_req_node; + + tt_req_node = container_of(ref, struct batadv_tt_req_node, refcount); + + kfree(tt_req_node); +} + +/** + * batadv_tt_req_node_put - decrement the tt_req_node refcounter and + * possibly release it + * @tt_req_node: tt_req_node to be free'd + */ +static void batadv_tt_req_node_put(struct batadv_tt_req_node *tt_req_node) +{ + kref_put(&tt_req_node->refcount, batadv_tt_req_node_release); +} + static void batadv_tt_req_list_free(struct batadv_priv *bat_priv) { struct batadv_tt_req_node *node; @@ -2278,7 +2302,7 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv) hlist_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { hlist_del_init(&node->list); - kfree(node); + batadv_tt_req_node_put(node); } spin_unlock_bh(&bat_priv->tt.req_list_lock); @@ -2315,7 +2339,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv) if (batadv_has_timed_out(node->issued_at, BATADV_TT_REQUEST_TIMEOUT)) { hlist_del_init(&node->list); - kfree(node); + batadv_tt_req_node_put(node); } } spin_unlock_bh(&bat_priv->tt.req_list_lock); @@ -2347,9 +2371,11 @@ batadv_tt_req_node_new(struct batadv_priv *bat_priv, if (!tt_req_node) goto unlock; + kref_init(&tt_req_node->refcount); ether_addr_copy(tt_req_node->addr, orig_node->orig); tt_req_node->issued_at = jiffies; + kref_get(&tt_req_node->refcount); hlist_add_head(&tt_req_node->list, &bat_priv->tt.req_list); unlock: spin_unlock_bh(&bat_priv->tt.req_list_lock); @@ -2613,13 +2639,19 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv, out: if (primary_if) batadv_hardif_put(primary_if); + if (ret && tt_req_node) { spin_lock_bh(&bat_priv->tt.req_list_lock); - /* hlist_del_init() verifies tt_req_node still is in the list */ - hlist_del_init(&tt_req_node->list); + if (!hlist_unhashed(&tt_req_node->list)) { + hlist_del_init(&tt_req_node->list); + batadv_tt_req_node_put(tt_req_node); + } spin_unlock_bh(&bat_priv->tt.req_list_lock); - kfree(tt_req_node); } + + if (tt_req_node) + batadv_tt_req_node_put(tt_req_node); + kfree(tvlv_tt_data); return ret; } @@ -3055,7 +3087,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv, if (!batadv_compare_eth(node->addr, resp_src)) continue; hlist_del_init(&node->list); - kfree(node); + batadv_tt_req_node_put(node); } spin_unlock_bh(&bat_priv->tt.req_list_lock); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 6a577f4f8ba7..ba846b078af8 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1137,11 +1137,13 @@ struct batadv_tt_change_node { * struct batadv_tt_req_node - data to keep track of the tt requests in flight * @addr: mac address address of the originator this request was sent to * @issued_at: timestamp used for purging stale tt requests + * @refcount: number of contexts the object is used by * @list: list node for batadv_priv_tt::req_list */ struct batadv_tt_req_node { u8 addr[ETH_ALEN]; unsigned long issued_at; + struct kref refcount; struct hlist_node list; }; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 160797722228..43d2cd862bc2 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -213,8 +213,7 @@ drop: } EXPORT_SYMBOL_GPL(br_handle_frame_finish); -/* note: already called with rcu_read_lock */ -static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +static void __br_handle_local_finish(struct sk_buff *skb) { struct net_bridge_port *p = br_port_get_rcu(skb->dev); u16 vid = 0; @@ -222,6 +221,14 @@ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_bu /* check if vlan is allowed, to avoid spoofing */ if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid)) br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false); +} + +/* note: already called with rcu_read_lock */ +static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct net_bridge_port *p = br_port_get_rcu(skb->dev); + + __br_handle_local_finish(skb); BR_INPUT_SKB_CB(skb)->brdev = p->br->dev; br_pass_frame_up(skb); @@ -274,7 +281,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) if (p->br->stp_enabled == BR_NO_STP || fwd_mask & (1u << dest[5])) goto forward; - break; + *pskb = skb; + __br_handle_local_finish(skb); + return RX_HANDLER_PASS; case 0x01: /* IEEE MAC (Pause) */ goto drop; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 6852f3c7009c..43844144c9c4 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -464,8 +464,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, &ip6h->saddr)) { kfree_skb(skb); + br->has_ipv6_addr = 0; return NULL; } + + br->has_ipv6_addr = 1; ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); hopopt = (u8 *)(ip6h + 1); @@ -1745,6 +1748,7 @@ void br_multicast_init(struct net_bridge *br) br->ip6_other_query.delay_time = 0; br->ip6_querier.port = NULL; #endif + br->has_ipv6_addr = 1; spin_lock_init(&br->multicast_lock); setup_timer(&br->multicast_router_timer, diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index a5343c7232bf..85e89f693589 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1273,7 +1273,7 @@ static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev, struct bridge_vlan_xstats vxi; struct br_vlan_stats stats; - if (vl_idx++ < *prividx) + if (++vl_idx < *prividx) continue; memset(&vxi, 0, sizeof(vxi)); vxi.vid = v->vid; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c7fb5d7a7218..52edecf3c294 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -314,6 +314,7 @@ struct net_bridge u8 multicast_disabled:1; u8 multicast_querier:1; u8 multicast_query_use_ifaddr:1; + u8 has_ipv6_addr:1; u32 hash_elasticity; u32 hash_max; @@ -588,10 +589,22 @@ static inline bool br_multicast_is_router(struct net_bridge *br) static inline bool __br_multicast_querier_exists(struct net_bridge *br, - struct bridge_mcast_other_query *querier) + struct bridge_mcast_other_query *querier, + const bool is_ipv6) { + bool own_querier_enabled; + + if (br->multicast_querier) { + if (is_ipv6 && !br->has_ipv6_addr) + own_querier_enabled = false; + else + own_querier_enabled = true; + } else { + own_querier_enabled = false; + } + return time_is_before_jiffies(querier->delay_time) && - (br->multicast_querier || timer_pending(&querier->timer)); + (own_querier_enabled || timer_pending(&querier->timer)); } static inline bool br_multicast_querier_exists(struct net_bridge *br, @@ -599,10 +612,12 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br, { switch (eth->h_proto) { case (htons(ETH_P_IP)): - return __br_multicast_querier_exists(br, &br->ip4_other_query); + return __br_multicast_querier_exists(br, + &br->ip4_other_query, false); #if IS_ENABLED(CONFIG_IPV6) case (htons(ETH_P_IPV6)): - return __br_multicast_querier_exists(br, &br->ip6_other_query); + return __br_multicast_querier_exists(br, + &br->ip6_other_query, true); #endif default: return false; diff --git a/net/core/filter.c b/net/core/filter.c index df6860c85d72..cb9fc16cac46 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2105,7 +2105,8 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type) } static bool sk_filter_is_valid_access(int off, int size, - enum bpf_access_type type) + enum bpf_access_type type, + enum bpf_reg_type *reg_type) { switch (off) { case offsetof(struct __sk_buff, tc_classid): @@ -2128,7 +2129,8 @@ static bool sk_filter_is_valid_access(int off, int size, } static bool tc_cls_act_is_valid_access(int off, int size, - enum bpf_access_type type) + enum bpf_access_type type, + enum bpf_reg_type *reg_type) { if (type == BPF_WRITE) { switch (off) { @@ -2143,6 +2145,16 @@ static bool tc_cls_act_is_valid_access(int off, int size, return false; } } + + switch (off) { + case offsetof(struct __sk_buff, data): + *reg_type = PTR_TO_PACKET; + break; + case offsetof(struct __sk_buff, data_end): + *reg_type = PTR_TO_PACKET_END; + break; + } + return __is_valid_access(off, size, type); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 29dd8cc22bbf..510cd62fcb99 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2469,13 +2469,17 @@ int neigh_xmit(int index, struct net_device *dev, tbl = neigh_tables[index]; if (!tbl) goto out; + rcu_read_lock_bh(); neigh = __neigh_lookup_noref(tbl, addr, dev); if (!neigh) neigh = __neigh_create(tbl, addr, dev, false); err = PTR_ERR(neigh); - if (IS_ERR(neigh)) + if (IS_ERR(neigh)) { + rcu_read_unlock_bh(); goto out_kfree_skb; + } err = neigh->output(neigh, skb); + rcu_read_unlock_bh(); } else if (index == NEIGH_LINK_TABLE) { err = dev_hard_header(skb, dev, ntohs(skb->protocol), diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 477937465a20..d95631d09248 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -23,6 +23,11 @@ struct esp_skb_cb { void *tmp; }; +struct esp_output_extra { + __be32 seqhi; + u32 esphoff; +}; + #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); @@ -35,11 +40,11 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); * * TODO: Use spare space in skb for this where possible. */ -static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) +static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int extralen) { unsigned int len; - len = seqhilen; + len = extralen; len += crypto_aead_ivsize(aead); @@ -57,15 +62,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) return kmalloc(len, GFP_ATOMIC); } -static inline __be32 *esp_tmp_seqhi(void *tmp) +static inline void *esp_tmp_extra(void *tmp) { - return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); + return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra)); } -static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) + +static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int extralen) { return crypto_aead_ivsize(aead) ? - PTR_ALIGN((u8 *)tmp + seqhilen, - crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; + PTR_ALIGN((u8 *)tmp + extralen, + crypto_aead_alignmask(aead) + 1) : tmp + extralen; } static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv) @@ -99,7 +105,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) { struct ip_esp_hdr *esph = (void *)(skb->data + offset); void *tmp = ESP_SKB_CB(skb)->tmp; - __be32 *seqhi = esp_tmp_seqhi(tmp); + __be32 *seqhi = esp_tmp_extra(tmp); esph->seq_no = esph->spi; esph->spi = *seqhi; @@ -107,7 +113,11 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) static void esp_output_restore_header(struct sk_buff *skb) { - esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32)); + void *tmp = ESP_SKB_CB(skb)->tmp; + struct esp_output_extra *extra = esp_tmp_extra(tmp); + + esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff - + sizeof(__be32)); } static void esp_output_done_esn(struct crypto_async_request *base, int err) @@ -121,6 +131,7 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) static int esp_output(struct xfrm_state *x, struct sk_buff *skb) { int err; + struct esp_output_extra *extra; struct ip_esp_hdr *esph; struct crypto_aead *aead; struct aead_request *req; @@ -137,8 +148,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int tfclen; int nfrags; int assoclen; - int seqhilen; - __be32 *seqhi; + int extralen; __be64 seqno; /* skb is pure payload to encrypt */ @@ -166,21 +176,21 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) nfrags = err; assoclen = sizeof(*esph); - seqhilen = 0; + extralen = 0; if (x->props.flags & XFRM_STATE_ESN) { - seqhilen += sizeof(__be32); - assoclen += seqhilen; + extralen += sizeof(*extra); + assoclen += sizeof(__be32); } - tmp = esp_alloc_tmp(aead, nfrags, seqhilen); + tmp = esp_alloc_tmp(aead, nfrags, extralen); if (!tmp) { err = -ENOMEM; goto error; } - seqhi = esp_tmp_seqhi(tmp); - iv = esp_tmp_iv(aead, tmp, seqhilen); + extra = esp_tmp_extra(tmp); + iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); @@ -247,8 +257,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { - esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); - *seqhi = esph->spi; + extra->esphoff = (unsigned char *)esph - + skb_transport_header(skb); + esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); + extra->seqhi = esph->spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); aead_request_set_callback(req, 0, esp_output_done_esn, skb); } @@ -445,7 +457,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) goto out; ESP_SKB_CB(skb)->tmp = tmp; - seqhi = esp_tmp_seqhi(tmp); + seqhi = esp_tmp_extra(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index c4c3e439f424..b798862b6be5 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -62,26 +62,26 @@ EXPORT_SYMBOL_GPL(gre_del_protocol); /* Fills in tpi and returns header length to be pulled. */ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err, __be16 proto) + bool *csum_err, __be16 proto, int nhs) { const struct gre_base_hdr *greh; __be32 *options; int hdr_len; - if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + if (unlikely(!pskb_may_pull(skb, nhs + sizeof(struct gre_base_hdr)))) return -EINVAL; - greh = (struct gre_base_hdr *)skb_transport_header(skb); + greh = (struct gre_base_hdr *)(skb->data + nhs); if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; tpi->flags = gre_flags_to_tnl_flags(greh->flags); hdr_len = gre_calc_hlen(tpi->flags); - if (!pskb_may_pull(skb, hdr_len)) + if (!pskb_may_pull(skb, nhs + hdr_len)) return -EINVAL; - greh = (struct gre_base_hdr *)skb_transport_header(skb); + greh = (struct gre_base_hdr *)(skb->data + nhs); tpi->proto = greh->protocol; options = (__be32 *)(greh + 1); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 8eec78f53f9e..5b1481be0282 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -49,12 +49,6 @@ #include <net/gre.h> #include <net/dst_metadata.h> -#if IS_ENABLED(CONFIG_IPV6) -#include <net/ipv6.h> -#include <net/ip6_fib.h> -#include <net/ip6_route.h> -#endif - /* Problems & solutions -------------------- @@ -226,12 +220,14 @@ static void gre_err(struct sk_buff *skb, u32 info) * by themselves??? */ + const struct iphdr *iph = (struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct tnl_ptk_info tpi; bool csum_err = false; - if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)) < 0) { + if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), + iph->ihl * 4) < 0) { if (!csum_err) /* ignore csum errors. */ return; } @@ -347,7 +343,7 @@ static int gre_rcv(struct sk_buff *skb) } #endif - hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)); + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0); if (hdr_len < 0) goto drop; @@ -1154,6 +1150,7 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, { struct nlattr *tb[IFLA_MAX + 1]; struct net_device *dev; + LIST_HEAD(list_kill); struct ip_tunnel *t; int err; @@ -1169,8 +1166,10 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, t->collect_md = true; err = ipgre_newlink(net, dev, tb, NULL); - if (err < 0) - goto out; + if (err < 0) { + free_netdev(dev); + return ERR_PTR(err); + } /* openvswitch users expect packet sizes to be unrestricted, * so set the largest MTU we can. @@ -1179,9 +1178,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, if (err) goto out; + err = rtnl_configure_link(dev, NULL); + if (err < 0) + goto out; + return dev; out: - free_netdev(dev); + ip_tunnel_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(gretap_fb_dev_create); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 2ed9dd2b5f2f..1d71c40eaaf3 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -127,7 +127,9 @@ __be32 ic_myaddr = NONE; /* My IP address */ static __be32 ic_netmask = NONE; /* Netmask for local subnet */ __be32 ic_gateway = NONE; /* Gateway IP address */ -__be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ +#ifdef IPCONFIG_DYNAMIC +static __be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ +#endif __be32 ic_servaddr = NONE; /* Boot server IP address */ diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 21a38e296fe2..5ad48ec77710 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -891,8 +891,10 @@ static struct mfc_cache *ipmr_cache_alloc(void) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); - if (c) + if (c) { + c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXVIFS; + } return c; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b1bcba0563f2..b26aa870adc0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2753,7 +2753,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; struct sk_buff *hole = NULL; - u32 last_lost; + u32 max_segs, last_lost; int mib_idx; int fwd_rexmitting = 0; @@ -2773,6 +2773,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) last_lost = tp->snd_una; } + max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk)); tcp_for_write_queue_from(skb, sk) { __u8 sacked = TCP_SKB_CB(skb)->sacked; int segs; @@ -2786,6 +2787,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk) segs = tp->snd_cwnd - tcp_packets_in_flight(tp); if (segs <= 0) return; + /* In case tcp_shift_skb_data() have aggregated large skbs, + * we need to make sure not sending too bigs TSO packets + */ + segs = min_t(int, segs, max_segs); if (fwd_rexmitting) { begin_fwd: diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0ff31d97d485..ca5e8ea29538 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -391,9 +391,9 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); } -static inline int compute_score(struct sock *sk, struct net *net, - __be32 saddr, unsigned short hnum, __be16 sport, - __be32 daddr, __be16 dport, int dif) +static int compute_score(struct sock *sk, struct net *net, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned short hnum, int dif) { int score; struct inet_sock *inet; @@ -434,52 +434,6 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } -/* - * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num) - */ -static inline int compute_score2(struct sock *sk, struct net *net, - __be32 saddr, __be16 sport, - __be32 daddr, unsigned int hnum, int dif) -{ - int score; - struct inet_sock *inet; - - if (!net_eq(sock_net(sk), net) || - ipv6_only_sock(sk)) - return -1; - - inet = inet_sk(sk); - - if (inet->inet_rcv_saddr != daddr || - inet->inet_num != hnum) - return -1; - - score = (sk->sk_family == PF_INET) ? 2 : 1; - - if (inet->inet_daddr) { - if (inet->inet_daddr != saddr) - return -1; - score += 4; - } - - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score += 4; - } - - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score += 4; - } - - if (sk->sk_incoming_cpu == raw_smp_processor_id()) - score++; - - return score; -} - static u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) @@ -492,11 +446,11 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, udp_ehash_secret + net_hash_mix(net)); } -/* called with read_rcu_lock() */ +/* called with rcu_read_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, unsigned int slot2, + struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; @@ -506,7 +460,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, result = NULL; badness = 0; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - score = compute_score2(sk, net, saddr, sport, + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; @@ -554,17 +508,22 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, result = udp4_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, slot2, skb); + hslot2, skb); if (!result) { + unsigned int old_slot2 = slot2; hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); slot2 = hash2 & udptable->mask; + /* avoid searching the same slot again. */ + if (unlikely(slot2 == old_slot2)) + return result; + hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) goto begin; result = udp4_lib_lookup2(net, saddr, sport, - htonl(INADDR_ANY), hnum, dif, - hslot2, slot2, skb); + daddr, hnum, dif, + hslot2, skb); } return result; } @@ -572,8 +531,8 @@ begin: result = NULL; badness = 0; sk_for_each_rcu(sk, &hslot->head) { - score = compute_score(sk, net, saddr, hnum, sport, - daddr, dport, dif); + score = compute_score(sk, net, saddr, sport, + daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -1755,8 +1714,11 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, return err; } - return skb_checksum_init_zero_check(skb, proto, uh->check, - inet_compute_pseudo); + /* Note, we are only interested in != 0 or == 0, thus the + * force to int. + */ + return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + inet_compute_pseudo); } /* diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index fd11f5856ce8..bd59c343d35f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -98,7 +98,7 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) - ping_err(skb, offset, info); + ping_err(skb, offset, ntohl(info)); } static int icmpv6_rcv(struct sk_buff *skb); diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index b2025bf3da4a..c0cbcb259f5a 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -78,9 +78,12 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) * we accept a checksum of zero here. When we find the socket * for the UDP packet we'll check if that socket allows zero checksum * for IPv6 (set by socket option). + * + * Note, we are only interested in != 0 or == 0, thus the + * force to int. */ - return skb_checksum_init_zero_check(skb, proto, uh->check, - ip6_compute_pseudo); + return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + ip6_compute_pseudo); } EXPORT_SYMBOL(udp6_csum_init); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index fdc9de276ab1..776d145113e1 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -468,7 +468,7 @@ static int gre_rcv(struct sk_buff *skb) bool csum_err = false; int hdr_len; - hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6)); + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6), 0); if (hdr_len < 0) goto drop; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index f2e2013f8346..487ef3bc7bbc 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1074,6 +1074,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void) struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (!c) return NULL; + c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXMIFS; return c; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 08b77f421268..49817555449e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1783,7 +1783,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, }; struct fib6_table *table; struct rt6_info *rt; - int flags = 0; + int flags = RT6_LOOKUP_F_IFACE; table = fib6_get_table(net, cfg->fc_table); if (!table) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index cdd714690f95..917a5cd4b8fc 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -526,13 +526,13 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, IPPROTO_IPV6, 0); + t->parms.link, 0, iph->protocol, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, - IPPROTO_IPV6, 0); + iph->protocol, 0); err = 0; goto out; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f36c2d076fce..2255d2bf5f6b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -738,7 +738,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int rst, - u8 tclass, u32 label) + u8 tclass, __be32 label) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -911,7 +911,7 @@ out: static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, u8 tclass, - u32 label) + __be32 label) { tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass, label); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4bb5c13777f1..0a71a312d0d8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -115,11 +115,10 @@ static void udp_v6_rehash(struct sock *sk) udp_lib_rehash(sk, new_hash); } -static inline int compute_score(struct sock *sk, struct net *net, - unsigned short hnum, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, __be16 dport, - int dif) +static int compute_score(struct sock *sk, struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, unsigned short hnum, + int dif) { int score; struct inet_sock *inet; @@ -162,54 +161,11 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } -static inline int compute_score2(struct sock *sk, struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, - unsigned short hnum, int dif) -{ - int score; - struct inet_sock *inet; - - if (!net_eq(sock_net(sk), net) || - udp_sk(sk)->udp_port_hash != hnum || - sk->sk_family != PF_INET6) - return -1; - - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) - return -1; - - score = 0; - inet = inet_sk(sk); - - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score++; - } - - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) - return -1; - score++; - } - - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score++; - } - - if (sk->sk_incoming_cpu == raw_smp_processor_id()) - score++; - - return score; -} - -/* called with read_rcu_lock() */ +/* called with rcu_read_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, unsigned int slot2, + struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; @@ -219,7 +175,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, result = NULL; badness = -1; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - score = compute_score2(sk, net, saddr, sport, + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; @@ -268,17 +224,22 @@ struct sock *__udp6_lib_lookup(struct net *net, result = udp6_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, slot2, skb); + hslot2, skb); if (!result) { + unsigned int old_slot2 = slot2; hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); slot2 = hash2 & udptable->mask; + /* avoid searching the same slot again. */ + if (unlikely(slot2 == old_slot2)) + return result; + hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) goto begin; result = udp6_lib_lookup2(net, saddr, sport, - &in6addr_any, hnum, dif, - hslot2, slot2, skb); + daddr, hnum, dif, + hslot2, skb); } return result; } @@ -286,7 +247,7 @@ begin: result = NULL; badness = -1; sk_for_each_rcu(sk, &hslot->head) { - score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index 738008726cc6..fda7f4715c58 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -241,6 +241,7 @@ static const struct file_operations kcm_seq_fops = { .open = kcm_seq_open, .read = seq_read, .llseek = seq_lseek, + .release = seq_release_net, }; static struct kcm_seq_muxinfo kcm_seq_muxinfo = { diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 21b1fdf5d01d..6a1603bcdced 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -148,14 +148,17 @@ u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) void mesh_sta_cleanup(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - u32 changed; + u32 changed = 0; /* * maybe userspace handles peer allocation and peering, but in either * case the beacon is still generated by the kernel and we might need * an update. */ - changed = mesh_accept_plinks_update(sdata); + if (sdata->u.mesh.user_mpm && + sta->mesh->plink_state == NL80211_PLINK_ESTAB) + changed |= mesh_plink_dec_estab_count(sdata); + changed |= mesh_accept_plinks_update(sdata); if (!sdata->u.mesh.user_mpm) { changed |= mesh_plink_deactivate(sta); del_timer_sync(&sta->mesh->plink_timer); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index db2312eeb2a4..f204274a9b6b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1544,6 +1544,8 @@ void nf_conntrack_cleanup_end(void) nf_conntrack_tstamp_fini(); nf_conntrack_acct_fini(); nf_conntrack_expect_fini(); + + kmem_cache_destroy(nf_conntrack_cachep); } /* diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7b7aa871a174..2c881871db38 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2946,24 +2946,20 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, * jumps are already validated for that chain. */ list_for_each_entry(i, &set->bindings, list) { - if (binding->flags & NFT_SET_MAP && + if (i->flags & NFT_SET_MAP && i->chain == binding->chain) goto bind; } + iter.genmask = nft_genmask_next(ctx->net); iter.skip = 0; iter.count = 0; iter.err = 0; iter.fn = nf_tables_bind_check_setelem; set->ops->walk(ctx, set, &iter); - if (iter.err < 0) { - /* Destroy anonymous sets if binding fails */ - if (set->flags & NFT_SET_ANONYMOUS) - nf_tables_set_destroy(ctx, set); - + if (iter.err < 0) return iter.err; - } } bind: binding->chain = ctx->chain; @@ -3192,12 +3188,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (nest == NULL) goto nla_put_failure; - args.cb = cb; - args.skb = skb; - args.iter.skip = cb->args[0]; - args.iter.count = 0; - args.iter.err = 0; - args.iter.fn = nf_tables_dump_setelem; + args.cb = cb; + args.skb = skb; + args.iter.genmask = nft_genmask_cur(ctx.net); + args.iter.skip = cb->args[0]; + args.iter.count = 0; + args.iter.err = 0; + args.iter.fn = nf_tables_dump_setelem; set->ops->walk(&ctx, set, &args.iter); nla_nest_end(skb, nest); @@ -4284,6 +4281,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, binding->chain != chain) continue; + iter.genmask = nft_genmask_next(ctx->net); iter.skip = 0; iter.count = 0; iter.err = 0; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index e9f8dffcc244..fb8b5892b5ff 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -143,7 +143,7 @@ next_rule: list_for_each_entry_continue_rcu(rule, &chain->rules, list) { /* This rule is not active, skip. */ - if (unlikely(rule->genmask & (1 << gencursor))) + if (unlikely(rule->genmask & gencursor)) continue; rulenum++; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 6fa016564f90..f39c53a159eb 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -189,7 +189,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_hash_elem *he; struct rhashtable_iter hti; struct nft_set_elem elem; - u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int err; err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); @@ -218,7 +217,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, goto cont; if (nft_set_elem_expired(&he->ext)) goto cont; - if (!nft_set_elem_active(&he->ext, genmask)) + if (!nft_set_elem_active(&he->ext, iter->genmask)) goto cont; elem.priv = he; diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index f762094af7c1..7201d57b5a93 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -211,7 +211,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_rbtree_elem *rbe; struct nft_set_elem elem; struct rb_node *node; - u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); spin_lock_bh(&nft_rbtree_lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { @@ -219,7 +218,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, if (iter->count < iter->skip) goto cont; - if (!nft_set_elem_active(&rbe->ext, genmask)) + if (!nft_set_elem_active(&rbe->ext, iter->genmask)) goto cont; elem.priv = rbe; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 52f3b9b89e97..b4069a90e375 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -818,8 +818,18 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, */ state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED; __ovs_ct_update_key(key, state, &info->zone, exp->master); - } else - return __ovs_ct_lookup(net, key, info, skb); + } else { + struct nf_conn *ct; + int err; + + err = __ovs_ct_lookup(net, key, info, skb); + if (err) + return err; + + ct = (struct nf_conn *)skb->nfct; + if (ct) + nf_ct_deliver_cached_events(ct); + } return 0; } diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 334287602b78..e48bb1ba3dfc 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -112,7 +112,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even } } - if (conn->c_version < RDS_PROTOCOL(3,1)) { + if (conn->c_version < RDS_PROTOCOL(3, 1)) { printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed," " no longer supported\n", &conn->c_faddr, diff --git a/net/rds/loop.c b/net/rds/loop.c index 268f07faaa1a..15f83db78f0c 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -96,8 +96,9 @@ out: */ static void rds_loop_inc_free(struct rds_incoming *inc) { - struct rds_message *rm = container_of(inc, struct rds_message, m_inc); - rds_message_put(rm); + struct rds_message *rm = container_of(inc, struct rds_message, m_inc); + + rds_message_put(rm); } /* we need to at least give the thread something to succeed */ diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index c173f69e1479..e381bbcd9cc1 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -102,7 +102,8 @@ int rds_sysctl_init(void) rds_sysctl_reconnect_min = msecs_to_jiffies(1); rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min; - rds_sysctl_reg_table = register_net_sysctl(&init_net,"net/rds", rds_sysctl_rds_table); + rds_sysctl_reg_table = + register_net_sysctl(&init_net, "net/rds", rds_sysctl_rds_table); if (!rds_sysctl_reg_table) return -ENOMEM; return 0; diff --git a/net/rds/tcp.h b/net/rds/tcp.h index ec0602b0dc24..7940babf6c71 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -83,7 +83,7 @@ int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to); void rds_tcp_xmit_prepare(struct rds_connection *conn); void rds_tcp_xmit_complete(struct rds_connection *conn); int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, - unsigned int hdr_off, unsigned int sg, unsigned int off); + unsigned int hdr_off, unsigned int sg, unsigned int off); void rds_tcp_write_space(struct sock *sk); /* tcp_stats.c */ diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index ba9ec67f4e41..96c2c4d17909 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -55,20 +55,20 @@ void rds_tcp_state_change(struct sock *sk) rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state); - switch(sk->sk_state) { - /* ignore connecting sockets as they make progress */ - case TCP_SYN_SENT: - case TCP_SYN_RECV: - break; - case TCP_ESTABLISHED: - rds_connect_path_complete(&conn->c_path[0], - RDS_CONN_CONNECTING); - break; - case TCP_CLOSE_WAIT: - case TCP_CLOSE: - rds_conn_drop(conn); - default: - break; + switch (sk->sk_state) { + /* ignore connecting sockets as they make progress */ + case TCP_SYN_SENT: + case TCP_SYN_RECV: + break; + case TCP_ESTABLISHED: + rds_connect_path_complete(&conn->c_path[0], + RDS_CONN_CONNECTING); + break; + case TCP_CLOSE_WAIT: + case TCP_CLOSE: + rds_conn_drop(conn); + default: + break; } out: read_unlock_bh(&sk->sk_callback_lock); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 22d9bb15f731..f9cc945a77b3 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -140,7 +140,7 @@ int rds_tcp_accept_one(struct socket *sock) conn->c_path[0].cp_outgoing = 0; /* rds_connect_path_complete() marks RDS_CONN_UP */ rds_connect_path_complete(&conn->c_path[0], - RDS_CONN_DISCONNECTING); + RDS_CONN_RESETTING); } } else { rds_tcp_set_callbacks(new_sock, conn); diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index 3f8fb38996c7..4a87d9ef3084 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -172,7 +172,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, while (left) { if (!tinc) { tinc = kmem_cache_alloc(rds_tcp_incoming_slab, - arg->gfp); + arg->gfp); if (!tinc) { desc->error = -ENOMEM; goto out; diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 2b3414f3c45c..710f1aae97ad 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -67,19 +67,19 @@ void rds_tcp_xmit_complete(struct rds_connection *conn) static int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len) { struct kvec vec = { - .iov_base = data, - .iov_len = len, + .iov_base = data, + .iov_len = len, + }; + struct msghdr msg = { + .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL, }; - struct msghdr msg = { - .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL, - }; return kernel_sendmsg(sock, &msg, &vec, 1, vec.iov_len); } /* the core send_sem serializes this with other xmit and shutdown */ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, - unsigned int hdr_off, unsigned int sg, unsigned int off) + unsigned int hdr_off, unsigned int sg, unsigned int off) { struct rds_tcp_connection *tc = conn->c_transport_data; int done = 0; @@ -197,7 +197,7 @@ void rds_tcp_write_space(struct sock *sk) tc->t_last_seen_una = rds_tcp_snd_una(tc); rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked); - if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) + if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) queue_delayed_work(rds_wq, &conn->c_send_w, 0); out: diff --git a/net/rds/transport.c b/net/rds/transport.c index f3afd1d60d3c..2ffd3e30c643 100644 --- a/net/rds/transport.c +++ b/net/rds/transport.c @@ -140,8 +140,7 @@ unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, rds_info_iter_unmap(iter); down_read(&rds_trans_sem); - for (i = 0; i < RDS_TRANS_COUNT; i++) - { + for (i = 0; i < RDS_TRANS_COUNT; i++) { trans = transports[i]; if (!trans || !trans->stats_info_copy) continue; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index f8c61d2a7963..47ec2305f920 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1122,7 +1122,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) nla_nest_end(skb, nest); ret = skb->len; } else - nla_nest_cancel(skb, nest); + nlmsg_trim(skb, b); nlh->nlmsg_len = skb_tail_pointer(skb) - b; if (NETLINK_CB(cb->skb).portid && ret) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index b7fa96926c90..845ab5119c05 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -106,9 +106,9 @@ int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi) } EXPORT_SYMBOL_GPL(ife_get_meta_u16); -int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval) +int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp) { - mi->metaval = kmemdup(metaval, sizeof(u32), GFP_KERNEL); + mi->metaval = kmemdup(metaval, sizeof(u32), gfp); if (!mi->metaval) return -ENOMEM; @@ -116,9 +116,9 @@ int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval) } EXPORT_SYMBOL_GPL(ife_alloc_meta_u32); -int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval) +int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp) { - mi->metaval = kmemdup(metaval, sizeof(u16), GFP_KERNEL); + mi->metaval = kmemdup(metaval, sizeof(u16), gfp); if (!mi->metaval) return -ENOMEM; @@ -240,10 +240,10 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len) } /* called when adding new meta information - * under ife->tcf_lock + * under ife->tcf_lock for existing action */ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, - void *val, int len) + void *val, int len, bool exists) { struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret = 0; @@ -251,11 +251,13 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, if (!ops) { ret = -ENOENT; #ifdef CONFIG_MODULES - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); rtnl_unlock(); request_module("ifemeta%u", metaid); rtnl_lock(); - spin_lock_bh(&ife->tcf_lock); + if (exists) + spin_lock_bh(&ife->tcf_lock); ops = find_ife_oplist(metaid); #endif } @@ -272,10 +274,10 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, } /* called when adding new meta information - * under ife->tcf_lock + * under ife->tcf_lock for existing action */ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len) + int len, bool atomic) { struct tcf_meta_info *mi = NULL; struct tcf_meta_ops *ops = find_ife_oplist(metaid); @@ -284,7 +286,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, if (!ops) return -ENOENT; - mi = kzalloc(sizeof(*mi), GFP_KERNEL); + mi = kzalloc(sizeof(*mi), atomic ? GFP_ATOMIC : GFP_KERNEL); if (!mi) { /*put back what find_ife_oplist took */ module_put(ops->owner); @@ -294,7 +296,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, mi->metaid = metaid; mi->ops = ops; if (len > 0) { - ret = ops->alloc(mi, metaval); + ret = ops->alloc(mi, metaval, atomic ? GFP_ATOMIC : GFP_KERNEL); if (ret != 0) { kfree(mi); module_put(ops->owner); @@ -313,11 +315,13 @@ static int use_all_metadata(struct tcf_ife_info *ife) int rc = 0; int installed = 0; + read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo(ife, o->metaid, NULL, 0); + rc = add_metainfo(ife, o->metaid, NULL, 0, true); if (rc == 0) installed += 1; } + read_unlock(&ife_mod_lock); if (installed) return 0; @@ -385,8 +389,9 @@ static void tcf_ife_cleanup(struct tc_action *a, int bind) spin_unlock_bh(&ife->tcf_lock); } -/* under ife->tcf_lock */ -static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb) +/* under ife->tcf_lock for existing action */ +static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, + bool exists) { int len = 0; int rc = 0; @@ -398,11 +403,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb) val = nla_data(tb[i]); len = nla_len(tb[i]); - rc = load_metaops_and_vet(ife, i, val, len); + rc = load_metaops_and_vet(ife, i, val, len, exists); if (rc != 0) return rc; - rc = add_metainfo(ife, i, val, len); + rc = add_metainfo(ife, i, val, len, exists); if (rc) return rc; } @@ -475,7 +480,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, saddr = nla_data(tb[TCA_IFE_SMAC]); } - spin_lock_bh(&ife->tcf_lock); + if (exists) + spin_lock_bh(&ife->tcf_lock); ife->tcf_action = parm->action; if (parm->flags & IFE_ENCODE) { @@ -505,11 +511,12 @@ metadata_parse_err: if (ret == ACT_P_CREATED) _tcf_ife_cleanup(a, bind); - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); return err; } - err = populate_metalist(ife, tb2); + err = populate_metalist(ife, tb2, exists); if (err) goto metadata_parse_err; @@ -524,12 +531,14 @@ metadata_parse_err: if (ret == ACT_P_CREATED) _tcf_ife_cleanup(a, bind); - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); return err; } } - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); if (ret == ACT_P_CREATED) tcf_hash_insert(tn, a); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 6148e323ed93..b8c50600697a 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -123,10 +123,13 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, } td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); - if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) + if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) { + if (exists) + tcf_hash_release(a, bind); return -EINVAL; + } - if (!tcf_hash_check(tn, index, a, bind)) { + if (!exists) { ret = tcf_hash_create(tn, index, est, a, sizeof(*ipt), bind, false); if (ret) diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 6ea0db427f91..baeed6a78d28 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -40,14 +40,18 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + unsigned int prev_backlog; + if (likely(skb_queue_len(&sch->q) < sch->limit)) return qdisc_enqueue_tail(skb, sch); + prev_backlog = sch->qstats.backlog; /* queue full, remove one skb to fulfill the limit */ __qdisc_queue_drop_head(sch, &sch->q, to_free); qdisc_qstats_drop(sch); qdisc_enqueue_tail(skb, sch); + qdisc_tree_reduce_backlog(sch, 0, prev_backlog - sch->qstats.backlog); return NET_XMIT_CN; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index ba098f2654b4..91982d9784b3 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -984,7 +984,9 @@ static void htb_work_func(struct work_struct *work) struct htb_sched *q = container_of(work, struct htb_sched, work); struct Qdisc *sch = q->watchdog.qdisc; + rcu_read_lock(); __netif_schedule(qdisc_root(sch)); + rcu_read_unlock(); } static int htb_init(struct Qdisc *sch, struct nlattr *opt) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 6eac3d880048..aaaf02175338 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -621,17 +621,17 @@ deliver: #endif if (q->qdisc) { + unsigned int pkt_len = qdisc_pkt_len(skb); struct sk_buff *to_free = NULL; int err; err = qdisc_enqueue(skb, q->qdisc, &to_free); kfree_skb_list(to_free); - if (unlikely(err != NET_XMIT_SUCCESS)) { - if (net_xmit_drop_count(err)) { - qdisc_qstats_drop(sch); - qdisc_tree_reduce_backlog(sch, 1, - qdisc_pkt_len(skb)); - } + if (err != NET_XMIT_SUCCESS && + net_xmit_drop_count(err)) { + qdisc_qstats_drop(sch); + qdisc_tree_reduce_backlog(sch, 1, + pkt_len); } goto tfifo_dequeue; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index f4d443aeae54..8f575899adfa 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -153,8 +153,9 @@ prio_destroy(struct Qdisc *sch) static int prio_tune(struct Qdisc *sch, struct nlattr *opt) { struct prio_sched_data *q = qdisc_priv(sch); + struct Qdisc *queues[TCQ_PRIO_BANDS]; + int oldbands = q->bands, i; struct tc_prio_qopt *qopt; - int i; if (nla_len(opt) < sizeof(*qopt)) return -EINVAL; @@ -168,62 +169,42 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) return -EINVAL; } + /* Before commit, make sure we can allocate all new qdiscs */ + for (i = oldbands; i < qopt->bands; i++) { + queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, i + 1)); + if (!queues[i]) { + while (i > oldbands) + qdisc_destroy(queues[--i]); + return -ENOMEM; + } + } + sch_tree_lock(sch); q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); - for (i = q->bands; i < TCQ_PRIO_BANDS; i++) { + for (i = q->bands; i < oldbands; i++) { struct Qdisc *child = q->queues[i]; - q->queues[i] = &noop_qdisc; - if (child != &noop_qdisc) { - qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); - qdisc_destroy(child); - } - } - sch_tree_unlock(sch); - for (i = 0; i < q->bands; i++) { - if (q->queues[i] == &noop_qdisc) { - struct Qdisc *child, *old; - - child = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, - TC_H_MAKE(sch->handle, i + 1)); - if (child) { - sch_tree_lock(sch); - old = q->queues[i]; - q->queues[i] = child; - - if (old != &noop_qdisc) { - qdisc_tree_reduce_backlog(old, - old->q.qlen, - old->qstats.backlog); - qdisc_destroy(old); - } - sch_tree_unlock(sch); - } - } + qdisc_tree_reduce_backlog(child, child->q.qlen, + child->qstats.backlog); + qdisc_destroy(child); } + + for (i = oldbands; i < q->bands; i++) + q->queues[i] = queues[i]; + + sch_tree_unlock(sch); return 0; } static int prio_init(struct Qdisc *sch, struct nlattr *opt) { - struct prio_sched_data *q = qdisc_priv(sch); - int i; - - for (i = 0; i < TCQ_PRIO_BANDS; i++) - q->queues[i] = &noop_qdisc; - - if (opt == NULL) { + if (!opt) return -EINVAL; - } else { - int err; - if ((err = prio_tune(sch, opt)) != 0) - return err; - } - return 0; + return prio_tune(sch, opt); } static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index 1ce724b87618..f69edcf219e5 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -3,12 +3,6 @@ #include <linux/sock_diag.h> #include <net/sctp/sctp.h> -extern void inet_diag_msg_common_fill(struct inet_diag_msg *r, - struct sock *sk); -extern int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, - struct inet_diag_msg *r, int ext, - struct user_namespace *user_ns); - static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *info); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 06b4df9faaa1..2808d550d273 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -446,16 +446,27 @@ out_no_rpciod: return ERR_PTR(err); } -struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, +static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, struct rpc_xprt *xprt) { struct rpc_clnt *clnt = NULL; struct rpc_xprt_switch *xps; - xps = xprt_switch_alloc(xprt, GFP_KERNEL); - if (xps == NULL) - return ERR_PTR(-ENOMEM); - + if (args->bc_xprt && args->bc_xprt->xpt_bc_xps) { + WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP); + xps = args->bc_xprt->xpt_bc_xps; + xprt_switch_get(xps); + } else { + xps = xprt_switch_alloc(xprt, GFP_KERNEL); + if (xps == NULL) { + xprt_put(xprt); + return ERR_PTR(-ENOMEM); + } + if (xprt->bc_xprt) { + xprt_switch_get(xps); + xprt->bc_xprt->xpt_bc_xps = xps; + } + } clnt = rpc_new_client(args, xps, xprt, NULL); if (IS_ERR(clnt)) return clnt; @@ -483,7 +494,6 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, return clnt; } -EXPORT_SYMBOL_GPL(rpc_create_xprt); /** * rpc_create - create an RPC client and transport with one call @@ -509,6 +519,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) }; char servername[48]; + if (args->bc_xprt) { + WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP); + xprt = args->bc_xprt->xpt_bc_xprt; + if (xprt) { + xprt_get(xprt); + return rpc_create_xprt(args, xprt); + } + } + if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS) xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS; if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index f5572e31d518..4f01f63102ee 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -136,6 +136,8 @@ static void svc_xprt_free(struct kref *kref) /* See comment on corresponding get in xs_setup_bc_tcp(): */ if (xprt->xpt_bc_xprt) xprt_put(xprt->xpt_bc_xprt); + if (xprt->xpt_bc_xps) + xprt_switch_put(xprt->xpt_bc_xps); xprt->xpt_ops->xpo_free(xprt); module_put(owner); } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2d3e0c42361e..7e2b2fa189c3 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -3057,6 +3057,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) return xprt; args->bc_xprt->xpt_bc_xprt = NULL; + args->bc_xprt->xpt_bc_xps = NULL; xprt_put(xprt); ret = ERR_PTR(-EINVAL); out_err: diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 9a70e1d744d2..8584cc48654c 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -411,7 +411,7 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, return 0; /* Send RESET message even if bearer is detached from device */ - tipc_ptr = rtnl_dereference(dev->tipc_ptr); + tipc_ptr = rcu_dereference_rtnl(dev->tipc_ptr); if (unlikely(!tipc_ptr && !msg_is_reset(buf_msg(skb)))) goto drop; diff --git a/net/tipc/link.c b/net/tipc/link.c index 03f8bdf70d8f..c1df33f878b2 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -705,7 +705,8 @@ static void link_profile_stats(struct tipc_link *l) */ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) { - int mtyp, rc = 0; + int mtyp = 0; + int rc = 0; bool state = false; bool probe = false; bool setup = false; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 8740930f0787..17201aa8423d 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -41,6 +41,8 @@ #include "name_table.h" #define MAX_FORWARD_SIZE 1024 +#define BUF_HEADROOM (LL_MAX_HEADER + 48) +#define BUF_TAILROOM 16 static unsigned int align(unsigned int i) { @@ -505,6 +507,10 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) msg_set_hdr_sz(hdr, BASIC_H_SIZE); } + if (skb_cloned(_skb) && + pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_KERNEL)) + goto exit; + /* Now reverse the concerned fields */ msg_set_errcode(hdr, err); msg_set_origport(hdr, msg_destport(&ohdr)); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 024da8af91f0..7cf52fb39bee 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -94,17 +94,6 @@ struct plist; #define TIPC_MEDIA_INFO_OFFSET 5 -/** - * TIPC message buffer code - * - * TIPC message buffer headroom reserves space for the worst-case - * link-level device header (in case the message is sent off-node). - * - * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields - * are word aligned for quicker access - */ -#define BUF_HEADROOM (LL_MAX_HEADER + 48) - struct tipc_skb_cb { void *handle; struct sk_buff *tail; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 88bfcd707064..c49b8df438cb 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -796,9 +796,11 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, * @tsk: receiving socket * @skb: pointer to message buffer. */ -static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb) +static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { struct sock *sk = &tsk->sk; + u32 onode = tsk_own_node(tsk); struct tipc_msg *hdr = buf_msg(skb); int mtyp = msg_type(hdr); bool conn_cong; @@ -811,7 +813,8 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb) if (mtyp == CONN_PROBE) { msg_set_type(hdr, CONN_PROBE_REPLY); - tipc_sk_respond(sk, skb, TIPC_OK); + if (tipc_msg_reverse(onode, &skb, TIPC_OK)) + __skb_queue_tail(xmitq, skb); return; } else if (mtyp == CONN_ACK) { conn_cong = tsk_conn_cong(tsk); @@ -1686,7 +1689,8 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) * * Returns true if message was added to socket receive queue, otherwise false */ -static bool filter_rcv(struct sock *sk, struct sk_buff *skb) +static bool filter_rcv(struct sock *sk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { struct socket *sock = sk->sk_socket; struct tipc_sock *tsk = tipc_sk(sk); @@ -1696,7 +1700,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb) int usr = msg_user(hdr); if (unlikely(msg_user(hdr) == CONN_MANAGER)) { - tipc_sk_proto_rcv(tsk, skb); + tipc_sk_proto_rcv(tsk, skb, xmitq); return false; } @@ -1739,7 +1743,8 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb) return true; reject: - tipc_sk_respond(sk, skb, err); + if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err)) + __skb_queue_tail(xmitq, skb); return false; } @@ -1755,9 +1760,24 @@ reject: static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) { unsigned int truesize = skb->truesize; + struct sk_buff_head xmitq; + u32 dnode, selector; - if (likely(filter_rcv(sk, skb))) + __skb_queue_head_init(&xmitq); + + if (likely(filter_rcv(sk, skb, &xmitq))) { atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt); + return 0; + } + + if (skb_queue_empty(&xmitq)) + return 0; + + /* Send response/rejected message */ + skb = __skb_dequeue(&xmitq); + dnode = msg_destnode(buf_msg(skb)); + selector = msg_origport(buf_msg(skb)); + tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); return 0; } @@ -1771,12 +1791,13 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) * Caller must hold socket lock */ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, - u32 dport) + u32 dport, struct sk_buff_head *xmitq) { + unsigned long time_limit = jiffies + 2; + struct sk_buff *skb; unsigned int lim; atomic_t *dcnt; - struct sk_buff *skb; - unsigned long time_limit = jiffies + 2; + u32 onode; while (skb_queue_len(inputq)) { if (unlikely(time_after_eq(jiffies, time_limit))) @@ -1788,7 +1809,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, /* Add message directly to receive queue if possible */ if (!sock_owned_by_user(sk)) { - filter_rcv(sk, skb); + filter_rcv(sk, skb, xmitq); continue; } @@ -1801,7 +1822,9 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, continue; /* Overload => reject message back to sender */ - tipc_sk_respond(sk, skb, TIPC_ERR_OVERLOAD); + onode = tipc_own_addr(sock_net(sk)); + if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) + __skb_queue_tail(xmitq, skb); break; } } @@ -1814,12 +1837,14 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, */ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) { + struct sk_buff_head xmitq; u32 dnode, dport = 0; int err; struct tipc_sock *tsk; struct sock *sk; struct sk_buff *skb; + __skb_queue_head_init(&xmitq); while (skb_queue_len(inputq)) { dport = tipc_skb_peek_port(inputq, dport); tsk = tipc_sk_lookup(net, dport); @@ -1827,9 +1852,14 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) if (likely(tsk)) { sk = &tsk->sk; if (likely(spin_trylock_bh(&sk->sk_lock.slock))) { - tipc_sk_enqueue(inputq, sk, dport); + tipc_sk_enqueue(inputq, sk, dport, &xmitq); spin_unlock_bh(&sk->sk_lock.slock); } + /* Send pending response/rejected messages, if any */ + while ((skb = __skb_dequeue(&xmitq))) { + dnode = msg_destnode(buf_msg(skb)); + tipc_node_xmit_skb(net, skb, dnode, dport); + } sock_put(sk); continue; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 80aa6a3e6817..735362c26c8e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -315,7 +315,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i) &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->path.dentry; - if (dentry && d_backing_inode(dentry) == i) { + if (dentry && d_real_inode(dentry) == i) { sock_hold(s); goto found; } @@ -911,7 +911,7 @@ static struct sock *unix_find_other(struct net *net, err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); if (err) goto fail; - inode = d_backing_inode(path.dentry); + inode = d_real_inode(path.dentry); err = inode_permission(inode, MAY_WRITE); if (err) goto put_fail; @@ -1048,7 +1048,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out_up; } addr->hash = UNIX_HASH_SIZE; - hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); + hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); spin_lock(&unix_table_lock); u->path = u_path; list = &unix_socket_table[hash]; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index b5f1221f48d4..b96ac918e0ba 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -61,6 +61,14 @@ * function will also cleanup rejected sockets, those that reach the connected * state but leave it before they have been accepted. * + * - Lock ordering for pending or accept queue sockets is: + * + * lock_sock(listener); + * lock_sock_nested(pending, SINGLE_DEPTH_NESTING); + * + * Using explicit nested locking keeps lockdep happy since normally only one + * lock of a given class may be taken at a time. + * * - Sockets created by user action will be cleaned up when the user process * calls close(2), causing our release implementation to be called. Our release * implementation will perform some cleanup then drop the last reference so our @@ -443,7 +451,7 @@ void vsock_pending_work(struct work_struct *work) cleanup = true; lock_sock(listener); - lock_sock(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); if (vsock_is_pending(sk)) { vsock_remove_pending(listener, sk); @@ -1292,7 +1300,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) if (connected) { listener->sk_ack_backlog--; - lock_sock(connected); + lock_sock_nested(connected, SINGLE_DEPTH_NESTING); vconnected = vsock_sk(connected); /* If the listener socket has received an error, then we should diff --git a/net/wireless/util.c b/net/wireless/util.c index 4e809e978b7d..2443ee30ba5b 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -509,7 +509,7 @@ static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr, * replace EtherType */ hdrlen += ETH_ALEN + 2; else - tmp.h_proto = htons(skb->len); + tmp.h_proto = htons(skb->len - hdrlen); pskb_pull(skb, hdrlen); |