summaryrefslogtreecommitdiff
path: root/net/rds/tcp_listen.c
diff options
context:
space:
mode:
authorKa-Cheong Poon <ka-cheong.poon@oracle.com>2018-07-23 20:51:22 -0700
committerDavid S. Miller <davem@davemloft.net>2018-07-23 21:17:44 -0700
commit1e2b44e78eead7bcadfbf96f70d95773191541c9 (patch)
treee7944339dd957ae23cfd690cb0ad6962d98c053c /net/rds/tcp_listen.c
parenteee2fa6ab3225192d6d894c54a6fb02ac9efdff6 (diff)
rds: Enable RDS IPv6 support
This patch enables RDS to use IPv6 addresses. For RDS/TCP, the listener is now an IPv6 endpoint which accepts both IPv4 and IPv6 connection requests. RDS/RDMA/IB uses a private data (struct rds_ib_connect_private) exchange between endpoints at RDS connection establishment time to support RDMA. This private data exchange uses a 32 bit integer to represent an IP address. This needs to be changed in order to support IPv6. A new private data struct rds6_ib_connect_private is introduced to handle this. To ensure backward compatibility, an IPv6 capable RDS stack uses another RDMA listener port (RDS_CM_PORT) to accept IPv6 connection. And it continues to use the original RDS_PORT for IPv4 RDS connections. When it needs to communicate with an IPv6 peer, it uses the RDS_CM_PORT to send the connection set up request. v5: Fixed syntax problem (David Miller). v4: Changed port history comments in rds.h (Sowmini Varadhan). v3: Added support to set up IPv4 connection using mapped address (David Miller). Added support to set up connection between link local and non-link addresses. Various review comments from Santosh Shilimkar and Sowmini Varadhan. v2: Fixed bound and peer address scope mismatched issue. Added back rds_connect() IPv6 changes. Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com> Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/rds/tcp_listen.c')
-rw-r--r--net/rds/tcp_listen.c64
1 files changed, 51 insertions, 13 deletions
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 4fdf5b3a47df..0cf0147117d8 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -131,6 +131,8 @@ int rds_tcp_accept_one(struct socket *sock)
struct rds_tcp_connection *rs_tcp = NULL;
int conn_state;
struct rds_conn_path *cp;
+ struct in6_addr *my_addr, *peer_addr;
+ int dev_if;
if (!sock) /* module unload or netns delete in progress */
return -ENETUNREACH;
@@ -163,15 +165,29 @@ int rds_tcp_accept_one(struct socket *sock)
inet = inet_sk(new_sock->sk);
+ my_addr = &new_sock->sk->sk_v6_rcv_saddr;
+ peer_addr = &new_sock->sk->sk_v6_daddr;
rdsdebug("accepted tcp %pI6c:%u -> %pI6c:%u\n",
- &new_sock->sk->sk_v6_rcv_saddr, ntohs(inet->inet_sport),
- &new_sock->sk->sk_v6_daddr, ntohs(inet->inet_dport));
+ my_addr, ntohs(inet->inet_sport),
+ peer_addr, ntohs(inet->inet_dport));
+ /* sk_bound_dev_if is not set if the peer address is not link local
+ * address. In this case, it happens that mcast_oif is set. So
+ * just use it.
+ */
+ if ((ipv6_addr_type(my_addr) & IPV6_ADDR_LINKLOCAL) &&
+ !(ipv6_addr_type(peer_addr) & IPV6_ADDR_LINKLOCAL)) {
+ struct ipv6_pinfo *inet6;
+
+ inet6 = inet6_sk(new_sock->sk);
+ dev_if = inet6->mcast_oif;
+ } else {
+ dev_if = new_sock->sk->sk_bound_dev_if;
+ }
conn = rds_conn_create(sock_net(sock->sk),
&new_sock->sk->sk_v6_rcv_saddr,
&new_sock->sk->sk_v6_daddr,
- &rds_tcp_transport, GFP_KERNEL,
- new_sock->sk->sk_bound_dev_if);
+ &rds_tcp_transport, GFP_KERNEL, dev_if);
if (IS_ERR(conn)) {
ret = PTR_ERR(conn);
@@ -256,15 +272,22 @@ out:
ready(sk);
}
-struct socket *rds_tcp_listen_init(struct net *net)
+struct socket *rds_tcp_listen_init(struct net *net, bool isv6)
{
- struct sockaddr_in sin;
struct socket *sock = NULL;
+ struct sockaddr_storage ss;
+ struct sockaddr_in6 *sin6;
+ struct sockaddr_in *sin;
+ int addr_len;
int ret;
- ret = sock_create_kern(net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
- if (ret < 0)
+ ret = sock_create_kern(net, isv6 ? PF_INET6 : PF_INET, SOCK_STREAM,
+ IPPROTO_TCP, &sock);
+ if (ret < 0) {
+ rdsdebug("could not create %s listener socket: %d\n",
+ isv6 ? "IPv6" : "IPv4", ret);
goto out;
+ }
sock->sk->sk_reuse = SK_CAN_REUSE;
rds_tcp_nonagle(sock);
@@ -274,13 +297,28 @@ struct socket *rds_tcp_listen_init(struct net *net)
sock->sk->sk_data_ready = rds_tcp_listen_data_ready;
write_unlock_bh(&sock->sk->sk_callback_lock);
- sin.sin_family = PF_INET;
- sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
- sin.sin_port = (__force u16)htons(RDS_TCP_PORT);
+ if (isv6) {
+ sin6 = (struct sockaddr_in6 *)&ss;
+ sin6->sin6_family = PF_INET6;
+ sin6->sin6_addr = in6addr_any;
+ sin6->sin6_port = (__force u16)htons(RDS_TCP_PORT);
+ sin6->sin6_scope_id = 0;
+ sin6->sin6_flowinfo = 0;
+ addr_len = sizeof(*sin6);
+ } else {
+ sin = (struct sockaddr_in *)&ss;
+ sin->sin_family = PF_INET;
+ sin->sin_addr.s_addr = INADDR_ANY;
+ sin->sin_port = (__force u16)htons(RDS_TCP_PORT);
+ addr_len = sizeof(*sin);
+ }
- ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
- if (ret < 0)
+ ret = sock->ops->bind(sock, (struct sockaddr *)&ss, addr_len);
+ if (ret < 0) {
+ rdsdebug("could not bind %s listener socket: %d\n",
+ isv6 ? "IPv6" : "IPv4", ret);
goto out;
+ }
ret = sock->ops->listen(sock, 64);
if (ret < 0)