From 33cf7c90fe2f97afb1cadaa0cfb782cb9d1b9ee2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Mar 2015 18:53:14 -0700 Subject: net: add real socket cookies A long standing problem in netlink socket dumps is the use of kernel socket addresses as cookies. 1) It is a security concern. 2) Sockets can be reused quite quickly, so there is no guarantee a cookie is used once and identify a flow. 3) request sock, establish sock, and timewait socks for a given flow have different cookies. Part of our effort to bring better TCP statistics requires to switch to a different allocator. In this patch, I chose to use a per network namespace 64bit generator, and to use it only in the case a socket needs to be dumped to netlink. (This might be refined later if needed) Note that I tried to carry cookies from request sock, to establish sock, then timewait sockets. Signed-off-by: Eric Dumazet Cc: Eric Salo Signed-off-by: David S. Miller --- include/net/inet_sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net/inet_sock.h') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index eb16c7beed1e..e565afdc14ad 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -77,6 +77,8 @@ struct inet_request_sock { #define ir_v6_rmt_addr req.__req_common.skc_v6_daddr #define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr #define ir_iif req.__req_common.skc_bound_dev_if +#define ir_cookie req.__req_common.skc_cookie +#define ireq_net req.__req_common.skc_net kmemcheck_bitfield_begin(flags); u16 snd_wscale : 4, -- cgit v1.2.3 From bd337c581b2b0d933d37f664bf55b342577fed3a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 12 Mar 2015 16:44:03 -0700 Subject: ipv6: add missing ireq_net & ir_cookie initializations I forgot to update dccp_v6_conn_request() & cookie_v6_check(). They both need to set ireq->ireq_net and ireq->ir_cookie Lets clear ireq->ir_cookie in inet_reqsk_alloc() Signed-off-by: Eric Dumazet Fixes: 33cf7c90fe2f ("net: add real socket cookies") Signed-off-by: David S. Miller --- include/net/inet_sock.h | 1 + net/dccp/ipv4.c | 1 - net/dccp/ipv6.c | 1 + net/ipv4/tcp_input.c | 1 - net/ipv6/syncookies.c | 1 + 5 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net/inet_sock.h') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index e565afdc14ad..30f7170abbf3 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -249,6 +249,7 @@ static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops if (req != NULL) { kmemcheck_annotate_bitfield(ireq, flags); ireq->opt = NULL; + atomic64_set(&ireq->ir_cookie, 0); } return req; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index a78e0b999f96..f695874b5ade 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -642,7 +642,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->ir_loc_addr = ip_hdr(skb)->daddr; ireq->ir_rmt_addr = ip_hdr(skb)->saddr; write_pnet(&ireq->ireq_net, sock_net(sk)); - atomic64_set(&ireq->ir_cookie, 0); /* * Step 3: Process LISTEN state diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6bcaa33cd804..703a21acf434 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -403,6 +403,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; + write_pnet(&ireq->ireq_net, sock_net(sk)); if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 26f24995bd3d..da61a8e75f68 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5966,7 +5966,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb, sk); write_pnet(&inet_rsk(req)->ireq_net, sock_net(sk)); - atomic64_set(&inet_rsk(req)->ir_cookie, 0); af_ops->init_req(req, sk, skb); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 7337fc7947e2..66bba6a84e47 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -196,6 +196,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); treq = tcp_rsk(req); treq->listener = NULL; + write_pnet(&ireq->ireq_net, sock_net(sk)); if (security_inet_conn_request(sk, skb, req)) goto out_free; -- cgit v1.2.3 From d34ac51b76e8c7de6094cfb11780ef9c2b93469f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 12 Mar 2015 16:44:05 -0700 Subject: inet: add ireq_state field to inet_request_sock We need to identify request sock when they'll be visible in global ehash table. ireq_state is an alias to req.__req_common.skc_state. Its value is set to TCP_NEW_SYN_RECV Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net/inet_sock.h') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 30f7170abbf3..b31f01de5cd6 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -27,6 +27,7 @@ #include #include #include +#include /** struct ip_options - IP Options * @@ -79,6 +80,7 @@ struct inet_request_sock { #define ir_iif req.__req_common.skc_bound_dev_if #define ir_cookie req.__req_common.skc_cookie #define ireq_net req.__req_common.skc_net +#define ireq_state req.__req_common.skc_state kmemcheck_bitfield_begin(flags); u16 snd_wscale : 4, @@ -250,6 +252,7 @@ static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops kmemcheck_annotate_bitfield(ireq, flags); ireq->opt = NULL; atomic64_set(&ireq->ir_cookie, 0); + ireq->ireq_state = TCP_NEW_SYN_RECV; } return req; -- cgit v1.2.3 From 1e2e01172fd11b4dbfee746c0c8fbcaa9dbf22a0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 12 Mar 2015 16:44:06 -0700 Subject: inet: add rsk_refcnt/ireq_refcnt to request socks When request socks will be in ehash, they'll need to be refcounted. This patch adds rsk_refcnt/ireq_refcnt macros, and adds reqsk_put() function, but nothing yet use them. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 1 + include/net/request_sock.h | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include/net/inet_sock.h') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index b31f01de5cd6..9d6470c16a27 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -81,6 +81,7 @@ struct inet_request_sock { #define ir_cookie req.__req_common.skc_cookie #define ireq_net req.__req_common.skc_net #define ireq_state req.__req_common.skc_state +#define ireq_refcnt req.__req_common.skc_refcnt kmemcheck_bitfield_begin(flags); u16 snd_wscale : 4, diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 7f830ff67f08..e255ecf8bb40 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -49,6 +49,8 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req); */ struct request_sock { struct sock_common __req_common; +#define rsk_refcnt __req_common.skc_refcnt + struct request_sock *dl_next; u16 mss; u8 num_retrans; /* number of retransmits */ @@ -86,6 +88,12 @@ static inline void reqsk_free(struct request_sock *req) __reqsk_free(req); } +static inline void reqsk_put(struct request_sock *req) +{ + if (atomic_dec_and_test(&req->rsk_refcnt)) + reqsk_free(req); +} + extern int sysctl_max_syn_backlog; /** struct listen_sock - listen state -- cgit v1.2.3 From 3f66b083a5b7f1a63540c24df3679c24f2e935a9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 12 Mar 2015 16:44:10 -0700 Subject: inet: introduce ireq_family Before inserting request socks into general hash table, fill their socket family. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 1 + net/dccp/ipv4.c | 1 + net/dccp/ipv6.c | 1 + net/ipv4/inet_diag.c | 2 +- net/ipv4/syncookies.c | 1 + net/ipv4/tcp_ipv4.c | 1 + net/ipv6/syncookies.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 8 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/net/inet_sock.h') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 9d6470c16a27..b3053fdd871e 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -82,6 +82,7 @@ struct inet_request_sock { #define ireq_net req.__req_common.skc_net #define ireq_state req.__req_common.skc_state #define ireq_refcnt req.__req_common.skc_refcnt +#define ireq_family req.__req_common.skc_family kmemcheck_bitfield_begin(flags); u16 snd_wscale : 4, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index f695874b5ade..8f6f4004daac 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -642,6 +642,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->ir_loc_addr = ip_hdr(skb)->daddr; ireq->ir_rmt_addr = ip_hdr(skb)->saddr; write_pnet(&ireq->ireq_net, sock_net(sk)); + ireq->ireq_family = AF_INET; /* * Step 3: Process LISTEN state diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 703a21acf434..5166b0043f95 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -404,6 +404,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; write_pnet(&ireq->ireq_net, sock_net(sk)); + ireq->ireq_family = AF_INET6; if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index c55a6fa3162d..43789c99031f 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -728,7 +728,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, return -EMSGSIZE; r = nlmsg_data(nlh); - r->idiag_family = sk->sk_family; + r->idiag_family = ireq->ireq_family; r->idiag_state = TCP_SYN_RECV; r->idiag_timer = 1; r->idiag_retrans = req->num_retrans; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 18e5a67fda81..0c432730c7b4 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -347,6 +347,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; treq->listener = NULL; write_pnet(&ireq->ireq_net, sock_net(sk)); + ireq->ireq_family = AF_INET; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 70b0f701bbdb..1f514a0c5e60 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1228,6 +1228,7 @@ static void tcp_v4_init_req(struct request_sock *req, struct sock *sk, ireq->ir_rmt_addr = ip_hdr(skb)->saddr; ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(skb); + ireq->ireq_family = AF_INET; } static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl, diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 66bba6a84e47..58875ce8e178 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -197,6 +197,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq = tcp_rsk(req); treq->listener = NULL; write_pnet(&ireq->ireq_net, sock_net(sk)); + ireq->ireq_family = AF_INET6; if (security_inet_conn_request(sk, skb, req)) goto out_free; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1ccfede7d55f..c5fc6a5e4adc 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -749,6 +749,7 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, atomic_inc(&skb->users); ireq->pktopts = skb; } + ireq->ireq_family = AF_INET6; } static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl, -- cgit v1.2.3 From 13854e5a60461daee08ce99842b7f4d37553d911 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 15 Mar 2015 21:12:16 -0700 Subject: inet: add proper refcounting to request sock reqsk_put() is the generic function that should be used to release a refcount (and automatically call reqsk_free()) reqsk_free() might be called if refcount is known to be 0 or undefined. refcnt is set to one in inet_csk_reqsk_queue_add() As request socks are not yet in global ehash table, I added temporary debugging checks in reqsk_put() and reqsk_free() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 5 +++++ include/net/inet_sock.h | 5 +++++ include/net/request_sock.h | 13 +++++++------ net/core/request_sock.c | 4 ++-- net/ipv4/inet_connection_sock.c | 8 ++++---- net/ipv4/syncookies.c | 10 +++++----- net/ipv4/tcp_fastopen.c | 2 +- 7 files changed, 29 insertions(+), 18 deletions(-) (limited to 'include/net/inet_sock.h') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b9a6b0a94cc6..191feec60205 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -275,6 +275,11 @@ static inline void inet_csk_reqsk_queue_add(struct sock *sk, struct sock *child) { reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); + /* before letting lookups find us, make sure all req fields + * are committed to memory. + */ + smp_wmb(); + atomic_set(&req->rsk_refcnt, 1); } void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index b3053fdd871e..3d8c09abb097 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -255,6 +255,11 @@ static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops ireq->opt = NULL; atomic64_set(&ireq->ir_cookie, 0); ireq->ireq_state = TCP_NEW_SYN_RECV; + + /* Following is temporary. It is coupled with debugging + * helpers in reqsk_put() & reqsk_free() + */ + atomic_set(&ireq->ireq_refcnt, 0); } return req; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 3275cf31f731..56dc2faba47e 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -82,19 +82,20 @@ static inline struct request_sock *inet_reqsk(struct sock *sk) return (struct request_sock *)sk; } -static inline void __reqsk_free(struct request_sock *req) -{ - kmem_cache_free(req->rsk_ops->slab, req); -} - static inline void reqsk_free(struct request_sock *req) { + /* temporary debugging */ + WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0); + req->rsk_ops->destructor(req); - __reqsk_free(req); + kmem_cache_free(req->rsk_ops->slab, req); } static inline void reqsk_put(struct request_sock *req) { + /* temporary debugging, until req sock are put into ehash table */ + WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 1); + if (atomic_dec_and_test(&req->rsk_refcnt)) reqsk_free(req); } diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 04db318e6218..e910317ef6d9 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -103,7 +103,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) while ((req = lopt->syn_table[i]) != NULL) { lopt->syn_table[i] = req->dl_next; lopt->qlen--; - reqsk_free(req); + reqsk_put(req); } } } @@ -180,7 +180,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, */ spin_unlock_bh(&fastopenq->lock); sock_put(lsk); - reqsk_free(req); + reqsk_put(req); return; } /* Wait for 60secs before removing a req that has triggered RST. diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 34581f928afa..3390ba6f96b2 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -340,7 +340,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) out: release_sock(sk); if (req) - __reqsk_free(req); + reqsk_put(req); return newsk; out_err: newsk = NULL; @@ -635,7 +635,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, /* Drop this request */ inet_csk_reqsk_queue_unlink(parent, req, reqp); reqsk_queue_removed(queue, req); - reqsk_free(req); + reqsk_put(req); continue; } reqp = &req->dl_next; @@ -837,7 +837,7 @@ void inet_csk_listen_stop(struct sock *sk) sock_put(child); sk_acceptq_removed(sk); - __reqsk_free(req); + reqsk_put(req); } if (queue->fastopenq != NULL) { /* Free all the reqs queued in rskq_rst_head. */ @@ -847,7 +847,7 @@ void inet_csk_listen_stop(struct sock *sk) spin_unlock_bh(&queue->fastopenq->lock); while ((req = acc_req) != NULL) { acc_req = req->dl_next; - __reqsk_free(req); + reqsk_put(req); } } WARN_ON(sk->sk_ack_backlog); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f17db898ed26..5ae0c49f5e2e 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -219,9 +219,9 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, } EXPORT_SYMBOL_GPL(__cookie_v4_check); -static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst) +static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst) { struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; @@ -357,7 +357,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->opt = tcp_v4_save_options(skb); if (security_inet_conn_request(sk, skb, req)) { - reqsk_free(req); + reqsk_put(req); goto out; } @@ -378,7 +378,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { - reqsk_free(req); + reqsk_put(req); goto out; } diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index fe77417fc137..84381319e1bc 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -253,7 +253,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk) fastopenq->rskq_rst_head = req1->dl_next; fastopenq->qlen--; spin_unlock(&fastopenq->lock); - reqsk_free(req1); + reqsk_put(req1); } return true; } -- cgit v1.2.3 From adc17d6a6ca08d11f70f6c49f3d40b87b68fe53f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Mar 2015 21:06:18 -0700 Subject: inet: move ir_mark to fill a hole On 64bit arches, we can save 8 bytes in inet_request_sock by moving ir_mark to fill a hole. While we are at it, inet_request_mark() can get a const qualifier for listener socket. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/net/inet_sock.h') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 3d8c09abb097..c9ed91891887 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -94,11 +94,11 @@ struct inet_request_sock { acked : 1, no_srccheck: 1; kmemcheck_bitfield_end(flags); + u32 ir_mark; union { struct ip_options_rcu *opt; struct sk_buff *pktopts; }; - u32 ir_mark; }; static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) @@ -106,13 +106,12 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) return (struct inet_request_sock *)sk; } -static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb) +static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) { - if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) { + if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) return skb->mark; - } else { - return sk->sk_mark; - } + + return sk->sk_mark; } struct inet_cork { -- cgit v1.2.3 From 407640de2152e33341ce1131dac269672c3d50f7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Mar 2015 18:32:26 -0700 Subject: inet: add sk_listener argument to inet_reqsk_alloc() listener socket can be used to set net pointer, and will be later used to hold a reference on listener. Add a const qualifier to first argument (struct request_sock_ops *), and factorize all write_pnet(&ireq->ireq_net, sock_net(sk)); Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 9 ++++++--- net/dccp/ipv4.c | 3 +-- net/dccp/ipv6.c | 3 +-- net/ipv4/syncookies.c | 3 +-- net/ipv4/tcp_input.c | 3 +-- net/ipv6/syncookies.c | 3 +-- 6 files changed, 11 insertions(+), 13 deletions(-) (limited to 'include/net/inet_sock.h') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index c9ed91891887..cf7abb00941b 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -244,16 +244,19 @@ static inline unsigned int __inet_ehashfn(const __be32 laddr, initval); } -static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops) +static inline struct request_sock * +inet_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) { struct request_sock *req = reqsk_alloc(ops); - struct inet_request_sock *ireq = inet_rsk(req); - if (req != NULL) { + if (req) { + struct inet_request_sock *ireq = inet_rsk(req); + kmemcheck_annotate_bitfield(ireq, flags); ireq->opt = NULL; atomic64_set(&ireq->ir_cookie, 0); ireq->ireq_state = TCP_NEW_SYN_RECV; + write_pnet(&ireq->ireq_net, sock_net(sk_listener)); /* Following is temporary. It is coupled with debugging * helpers in reqsk_put() & reqsk_free() diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 7f6456afbaec..bf897829f4f0 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -624,7 +624,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = inet_reqsk_alloc(&dccp_request_sock_ops); + req = inet_reqsk_alloc(&dccp_request_sock_ops, sk); if (req == NULL) goto drop; @@ -641,7 +641,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->ir_loc_addr = ip_hdr(skb)->daddr; ireq->ir_rmt_addr = ip_hdr(skb)->saddr; - write_pnet(&ireq->ireq_net, sock_net(sk)); ireq->ireq_family = AF_INET; ireq->ir_iif = sk->sk_bound_dev_if; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 5166b0043f95..d7e7c7b0a3f1 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -386,7 +386,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = inet_reqsk_alloc(&dccp6_request_sock_ops); + req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk); if (req == NULL) goto drop; @@ -403,7 +403,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; - write_pnet(&ireq->ireq_net, sock_net(sk)); ireq->ireq_family = AF_INET6; if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) || diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 5ae0c49f5e2e..eb940750bb1b 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -325,7 +325,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ + req = inet_reqsk_alloc(&tcp_request_sock_ops, sk); /* for safety */ if (!req) goto out; @@ -346,7 +346,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; treq->listener = NULL; - write_pnet(&ireq->ireq_net, sock_net(sk)); ireq->ireq_family = AF_INET; ireq->ir_iif = sk->sk_bound_dev_if; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7257eb206c07..2a480f6811ea 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6004,7 +6004,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop; } - req = inet_reqsk_alloc(rsk_ops); + req = inet_reqsk_alloc(rsk_ops, sk); if (!req) goto drop; @@ -6020,7 +6020,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb, sk); - write_pnet(&inet_rsk(req)->ireq_net, sock_net(sk)); /* Note: tcp_v6_init_req() might override ir_iif for link locals */ inet_rsk(req)->ir_iif = sk->sk_bound_dev_if; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 58875ce8e178..039e74dd29fe 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -189,14 +189,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet_reqsk_alloc(&tcp6_request_sock_ops); + req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk); if (!req) goto out; ireq = inet_rsk(req); treq = tcp_rsk(req); treq->listener = NULL; - write_pnet(&ireq->ireq_net, sock_net(sk)); ireq->ireq_family = AF_INET6; if (security_inet_conn_request(sk, skb, req)) -- cgit v1.2.3 From e49bb337d77d54afebe4fe5b9008955e1337f83d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Mar 2015 18:32:27 -0700 Subject: inet: uninline inet_reqsk_alloc() inet_reqsk_alloc() is becoming fat and should not be inlined. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 24 ++---------------------- net/ipv4/tcp_input.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 22 deletions(-) (limited to 'include/net/inet_sock.h') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index cf7abb00941b..6fec7343070f 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -244,28 +244,8 @@ static inline unsigned int __inet_ehashfn(const __be32 laddr, initval); } -static inline struct request_sock * -inet_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) -{ - struct request_sock *req = reqsk_alloc(ops); - - if (req) { - struct inet_request_sock *ireq = inet_rsk(req); - - kmemcheck_annotate_bitfield(ireq, flags); - ireq->opt = NULL; - atomic64_set(&ireq->ir_cookie, 0); - ireq->ireq_state = TCP_NEW_SYN_RECV; - write_pnet(&ireq->ireq_net, sock_net(sk_listener)); - - /* Following is temporary. It is coupled with debugging - * helpers in reqsk_put() & reqsk_free() - */ - atomic_set(&ireq->ireq_refcnt, 0); - } - - return req; -} +struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk_listener); static inline __u8 inet_sk_flowi_flags(const struct sock *sk) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2a480f6811ea..52b74e0eab2a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5967,6 +5967,30 @@ static void tcp_openreq_init(struct request_sock *req, ireq->ir_mark = inet_request_mark(sk, skb); } +struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk_listener) +{ + struct request_sock *req = reqsk_alloc(ops); + + if (req) { + struct inet_request_sock *ireq = inet_rsk(req); + + kmemcheck_annotate_bitfield(ireq, flags); + ireq->opt = NULL; + atomic64_set(&ireq->ir_cookie, 0); + ireq->ireq_state = TCP_NEW_SYN_RECV; + write_pnet(&ireq->ireq_net, sock_net(sk_listener)); + + /* Following is temporary. It is coupled with debugging + * helpers in reqsk_put() & reqsk_free() + */ + atomic_set(&ireq->ireq_refcnt, 0); + } + + return req; +} +EXPORT_SYMBOL(inet_reqsk_alloc); + int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) -- cgit v1.2.3 From 0470c8ca1d57927f2cc3e1d5add1fb2834609447 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Mar 2015 18:32:31 -0700 Subject: inet: fix request sock refcounting While testing last patch series, I found req sock refcounting was wrong. We must set skc_refcnt to 1 for all request socks added in hashes, but also on request sockets created by FastOpen or syncookies. It is tricky because we need to defer this initialization so that future RCU lookups do not try to take a refcount on a not yet fully initialized request socket. Also get rid of ireq_refcnt alias. Signed-off-by: Eric Dumazet Fixes: 13854e5a6046 ("inet: add proper refcounting to request sock") Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 5 ----- include/net/inet_sock.h | 1 - include/net/request_sock.h | 11 +++++++++++ net/ipv4/syncookies.c | 11 ++++++----- net/ipv4/tcp_fastopen.c | 1 + net/ipv4/tcp_input.c | 4 ---- net/ipv6/syncookies.c | 7 ++++--- 7 files changed, 22 insertions(+), 18 deletions(-) (limited to 'include/net/inet_sock.h') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 191feec60205..b9a6b0a94cc6 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -275,11 +275,6 @@ static inline void inet_csk_reqsk_queue_add(struct sock *sk, struct sock *child) { reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); - /* before letting lookups find us, make sure all req fields - * are committed to memory. - */ - smp_wmb(); - atomic_set(&req->rsk_refcnt, 1); } void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 6fec7343070f..b6c3737da4e9 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -81,7 +81,6 @@ struct inet_request_sock { #define ir_cookie req.__req_common.skc_cookie #define ireq_net req.__req_common.skc_net #define ireq_state req.__req_common.skc_state -#define ireq_refcnt req.__req_common.skc_refcnt #define ireq_family req.__req_common.skc_family kmemcheck_bitfield_begin(flags); diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 723d1cbdf20e..3fa4f824900a 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -77,6 +77,11 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) req->rsk_ops = ops; sock_hold(sk_listener); req->rsk_listener = sk_listener; + + /* Following is temporary. It is coupled with debugging + * helpers in reqsk_put() & reqsk_free() + */ + atomic_set(&req->rsk_refcnt, 0); } return req; } @@ -292,6 +297,12 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, req->sk = NULL; req->dl_next = lopt->syn_table[hash]; + /* before letting lookups find us, make sure all req fields + * are committed to memory and refcnt initialized. + */ + smp_wmb(); + atomic_set(&req->rsk_refcnt, 1); + write_lock(&queue->syn_wait_lock); lopt->syn_table[hash] = req; write_unlock(&queue->syn_wait_lock); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 574b67765a06..34e755403715 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -227,11 +227,12 @@ static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct sock *child; child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); - if (child) + if (child) { + atomic_set(&req->rsk_refcnt, 1); inet_csk_reqsk_queue_add(sk, req, child); - else + } else { reqsk_free(req); - + } return child; } @@ -356,7 +357,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->opt = tcp_v4_save_options(skb); if (security_inet_conn_request(sk, skb, req)) { - reqsk_put(req); + reqsk_free(req); goto out; } @@ -377,7 +378,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { - reqsk_put(req); + reqsk_free(req); goto out; } diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 186fd394ec0a..82e375a0cbcf 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -169,6 +169,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, TCP_TIMEOUT_INIT, TCP_RTO_MAX); + atomic_set(&req->rsk_refcnt, 1); /* Add the child socket directly into the accept queue */ inet_csk_reqsk_queue_add(sk, req, child); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a94ddb96fc85..1dfbaee3554e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5981,10 +5981,6 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, ireq->ireq_state = TCP_NEW_SYN_RECV; write_pnet(&ireq->ireq_net, sock_net(sk_listener)); - /* Following is temporary. It is coupled with debugging - * helpers in reqsk_put() & reqsk_free() - */ - atomic_set(&ireq->ireq_refcnt, 0); } return req; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 1ef0c926ce9d..da5823e5e5a7 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -49,11 +49,12 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct sock *child; child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); - if (child) + if (child) { + atomic_set(&req->rsk_refcnt, 1); inet_csk_reqsk_queue_add(sk, req, child); - else + } else { reqsk_free(req); - + } return child; } -- cgit v1.2.3