diff options
author | Ka-Cheong Poon <ka-cheong.poon@oracle.com> | 2018-07-23 20:51:22 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-07-23 21:17:44 -0700 |
commit | 1e2b44e78eead7bcadfbf96f70d95773191541c9 (patch) | |
tree | e7944339dd957ae23cfd690cb0ad6962d98c053c /net/rds/tcp_listen.c | |
parent | eee2fa6ab3225192d6d894c54a6fb02ac9efdff6 (diff) |
rds: Enable RDS IPv6 support
This patch enables RDS to use IPv6 addresses. For RDS/TCP, the
listener is now an IPv6 endpoint which accepts both IPv4 and IPv6
connection requests. RDS/RDMA/IB uses a private data (struct
rds_ib_connect_private) exchange between endpoints at RDS connection
establishment time to support RDMA. This private data exchange uses a
32 bit integer to represent an IP address. This needs to be changed in
order to support IPv6. A new private data struct
rds6_ib_connect_private is introduced to handle this. To ensure
backward compatibility, an IPv6 capable RDS stack uses another RDMA
listener port (RDS_CM_PORT) to accept IPv6 connection. And it
continues to use the original RDS_PORT for IPv4 RDS connections. When
it needs to communicate with an IPv6 peer, it uses the RDS_CM_PORT to
send the connection set up request.
v5: Fixed syntax problem (David Miller).
v4: Changed port history comments in rds.h (Sowmini Varadhan).
v3: Added support to set up IPv4 connection using mapped address
(David Miller).
Added support to set up connection between link local and non-link
addresses.
Various review comments from Santosh Shilimkar and Sowmini Varadhan.
v2: Fixed bound and peer address scope mismatched issue.
Added back rds_connect() IPv6 changes.
Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/rds/tcp_listen.c')
-rw-r--r-- | net/rds/tcp_listen.c | 64 |
1 files changed, 51 insertions, 13 deletions
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 4fdf5b3a47df..0cf0147117d8 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -131,6 +131,8 @@ int rds_tcp_accept_one(struct socket *sock) struct rds_tcp_connection *rs_tcp = NULL; int conn_state; struct rds_conn_path *cp; + struct in6_addr *my_addr, *peer_addr; + int dev_if; if (!sock) /* module unload or netns delete in progress */ return -ENETUNREACH; @@ -163,15 +165,29 @@ int rds_tcp_accept_one(struct socket *sock) inet = inet_sk(new_sock->sk); + my_addr = &new_sock->sk->sk_v6_rcv_saddr; + peer_addr = &new_sock->sk->sk_v6_daddr; rdsdebug("accepted tcp %pI6c:%u -> %pI6c:%u\n", - &new_sock->sk->sk_v6_rcv_saddr, ntohs(inet->inet_sport), - &new_sock->sk->sk_v6_daddr, ntohs(inet->inet_dport)); + my_addr, ntohs(inet->inet_sport), + peer_addr, ntohs(inet->inet_dport)); + /* sk_bound_dev_if is not set if the peer address is not link local + * address. In this case, it happens that mcast_oif is set. So + * just use it. + */ + if ((ipv6_addr_type(my_addr) & IPV6_ADDR_LINKLOCAL) && + !(ipv6_addr_type(peer_addr) & IPV6_ADDR_LINKLOCAL)) { + struct ipv6_pinfo *inet6; + + inet6 = inet6_sk(new_sock->sk); + dev_if = inet6->mcast_oif; + } else { + dev_if = new_sock->sk->sk_bound_dev_if; + } conn = rds_conn_create(sock_net(sock->sk), &new_sock->sk->sk_v6_rcv_saddr, &new_sock->sk->sk_v6_daddr, - &rds_tcp_transport, GFP_KERNEL, - new_sock->sk->sk_bound_dev_if); + &rds_tcp_transport, GFP_KERNEL, dev_if); if (IS_ERR(conn)) { ret = PTR_ERR(conn); @@ -256,15 +272,22 @@ out: ready(sk); } -struct socket *rds_tcp_listen_init(struct net *net) +struct socket *rds_tcp_listen_init(struct net *net, bool isv6) { - struct sockaddr_in sin; struct socket *sock = NULL; + struct sockaddr_storage ss; + struct sockaddr_in6 *sin6; + struct sockaddr_in *sin; + int addr_len; int ret; - ret = sock_create_kern(net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); - if (ret < 0) + ret = sock_create_kern(net, isv6 ? PF_INET6 : PF_INET, SOCK_STREAM, + IPPROTO_TCP, &sock); + if (ret < 0) { + rdsdebug("could not create %s listener socket: %d\n", + isv6 ? "IPv6" : "IPv4", ret); goto out; + } sock->sk->sk_reuse = SK_CAN_REUSE; rds_tcp_nonagle(sock); @@ -274,13 +297,28 @@ struct socket *rds_tcp_listen_init(struct net *net) sock->sk->sk_data_ready = rds_tcp_listen_data_ready; write_unlock_bh(&sock->sk->sk_callback_lock); - sin.sin_family = PF_INET; - sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY); - sin.sin_port = (__force u16)htons(RDS_TCP_PORT); + if (isv6) { + sin6 = (struct sockaddr_in6 *)&ss; + sin6->sin6_family = PF_INET6; + sin6->sin6_addr = in6addr_any; + sin6->sin6_port = (__force u16)htons(RDS_TCP_PORT); + sin6->sin6_scope_id = 0; + sin6->sin6_flowinfo = 0; + addr_len = sizeof(*sin6); + } else { + sin = (struct sockaddr_in *)&ss; + sin->sin_family = PF_INET; + sin->sin_addr.s_addr = INADDR_ANY; + sin->sin_port = (__force u16)htons(RDS_TCP_PORT); + addr_len = sizeof(*sin); + } - ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); - if (ret < 0) + ret = sock->ops->bind(sock, (struct sockaddr *)&ss, addr_len); + if (ret < 0) { + rdsdebug("could not bind %s listener socket: %d\n", + isv6 ? "IPv6" : "IPv4", ret); goto out; + } ret = sock->ops->listen(sock, 64); if (ret < 0) |