diff options
author | David Ahern <dsa@cumulusnetworks.com> | 2015-12-16 13:20:44 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-12-18 14:43:38 -0500 |
commit | 6dd9a14e92e54895e143f10fef4d0b9abe109aa9 (patch) | |
tree | bb02bd14741b26fc9ba0abcaeb9a7f01a8339e68 /net/ipv4 | |
parent | 1a8524794fc7c70f44ac28e3a6e8fd637bc41f14 (diff) |
net: Allow accepted sockets to be bound to l3mdev domain
Allow accepted sockets to derive their sk_bound_dev_if setting from the
l3mdev domain in which the packets originated. A sysctl setting is added
to control the behavior which is similar to sk_mark and
sysctl_tcp_fwmark_accept.
This effectively allow a process to have a "VRF-global" listen socket,
with child sockets bound to the VRF device in which the packet originated.
A similar behavior can be achieved using sk_mark, but a solution using marks
is incomplete as it does not handle duplicate addresses in different L3
domains/VRFs. Allowing sockets to inherit the sk_bound_dev_if from l3mdev
domain provides a complete solution.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/syncookies.c | 4 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 11 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 1 |
4 files changed, 15 insertions, 3 deletions
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 4cbe9f0a4281..643a86c49020 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -351,7 +351,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq->snt_synack.v64 = 0; treq->tfo_listener = false; - ireq->ir_iif = sk->sk_bound_dev_if; + ireq->ir_iif = inet_request_bound_dev_if(sk, skb); /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) @@ -371,7 +371,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) * hasn't changed since we received the original syn, but I see * no easy way to do this. */ - flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark, + flowi4_init_output(&fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), opt->srr ? opt->faddr : ireq->ir_rmt_addr, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a0bd7a55193e..41ff1f87dfd7 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -915,6 +915,17 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_NET_L3_MASTER_DEV + { + .procname = "tcp_l3mdev_accept", + .data = &init_net.ipv4.sysctl_tcp_l3mdev_accept, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, +#endif { .procname = "tcp_mtu_probing", .data = &init_net.ipv4.sysctl_tcp_mtu_probing, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2d656eef7f8e..7b1fddc47019 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6204,7 +6204,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_openreq_init(req, &tmp_opt, skb, sk); /* Note: tcp_v6_init_req() might override ir_iif for link locals */ - inet_rsk(req)->ir_iif = sk->sk_bound_dev_if; + inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb); af_ops->init_req(req, sk, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 205e6745393f..46e92fbd26a8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1276,6 +1276,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, ireq = inet_rsk(req); sk_daddr_set(newsk, ireq->ir_rmt_addr); sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); + newsk->sk_bound_dev_if = ireq->ir_iif; newinet->inet_saddr = ireq->ir_loc_addr; inet_opt = ireq->opt; rcu_assign_pointer(newinet->inet_opt, inet_opt); |