Skip to content

Commit

Permalink
rds: Enable RDS IPv6 support
Browse files Browse the repository at this point in the history
This patch enables RDS to use IPv6 addresses. For RDS/TCP, the
listener is now an IPv6 endpoint which accepts both IPv4 and IPv6
connection requests.  RDS/RDMA/IB uses a private data (struct
rds_ib_connect_private) exchange between endpoints at RDS connection
establishment time to support RDMA. This private data exchange uses a
32 bit integer to represent an IP address. This needs to be changed in
order to support IPv6. A new private data struct
rds6_ib_connect_private is introduced to handle this. To ensure
backward compatibility, an IPv6 capable RDS stack uses another RDMA
listener port (RDS_CM_PORT) to accept IPv6 connection. And it
continues to use the original RDS_PORT for IPv4 RDS connections. When
it needs to communicate with an IPv6 peer, it uses the RDS_CM_PORT to
send the connection set up request.

v5: Fixed syntax problem (David Miller).

v4: Changed port history comments in rds.h (Sowmini Varadhan).

v3: Added support to set up IPv4 connection using mapped address
    (David Miller).
    Added support to set up connection between link local and non-link
    addresses.
    Various review comments from Santosh Shilimkar and Sowmini Varadhan.

v2: Fixed bound and peer address scope mismatched issue.
    Added back rds_connect() IPv6 changes.

Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Ka-Cheong Poon authored and David S. Miller committed Jul 24, 2018
1 parent eee2fa6 commit 1e2b44e
Show file tree
Hide file tree
Showing 14 changed files with 459 additions and 114 deletions.
91 changes: 77 additions & 14 deletions net/rds/af_rds.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,15 +142,32 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
uaddr_len = sizeof(*sin6);
}
} else {
/* If socket is not yet bound, set the return address family
* to be AF_UNSPEC (value 0) and the address size to be that
* of an IPv4 address.
/* If socket is not yet bound and the socket is connected,
* set the return address family to be the same as the
* connected address, but with 0 address value. If it is not
* connected, set the family to be AF_UNSPEC (value 0) and
* the address size to be that of an IPv4 address.
*/
if (ipv6_addr_any(&rs->rs_bound_addr)) {
sin = (struct sockaddr_in *)uaddr;
memset(sin, 0, sizeof(*sin));
sin->sin_family = AF_UNSPEC;
return sizeof(*sin);
if (ipv6_addr_any(&rs->rs_conn_addr)) {
sin = (struct sockaddr_in *)uaddr;
memset(sin, 0, sizeof(*sin));
sin->sin_family = AF_UNSPEC;
return sizeof(*sin);
}

if (ipv6_addr_type(&rs->rs_conn_addr) &
IPV6_ADDR_MAPPED) {
sin = (struct sockaddr_in *)uaddr;
memset(sin, 0, sizeof(*sin));
sin->sin_family = AF_INET;
return sizeof(*sin);
}

sin6 = (struct sockaddr_in6 *)uaddr;
memset(sin6, 0, sizeof(*sin6));
sin6->sin6_family = AF_INET6;
return sizeof(*sin6);
}
if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) {
sin = (struct sockaddr_in *)uaddr;
Expand Down Expand Up @@ -484,16 +501,18 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
{
struct sock *sk = sock->sk;
struct sockaddr_in *sin;
struct sockaddr_in6 *sin6;
struct rds_sock *rs = rds_sk_to_rs(sk);
int addr_type;
int ret = 0;

lock_sock(sk);

switch (addr_len) {
case sizeof(struct sockaddr_in):
switch (uaddr->sa_family) {
case AF_INET:
sin = (struct sockaddr_in *)uaddr;
if (sin->sin_family != AF_INET) {
ret = -EAFNOSUPPORT;
if (addr_len < sizeof(struct sockaddr_in)) {
ret = -EINVAL;
break;
}
if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
Expand All @@ -509,12 +528,56 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
rs->rs_conn_port = sin->sin_port;
break;

case sizeof(struct sockaddr_in6):
ret = -EPROTONOSUPPORT;
case AF_INET6:
sin6 = (struct sockaddr_in6 *)uaddr;
if (addr_len < sizeof(struct sockaddr_in6)) {
ret = -EINVAL;
break;
}
addr_type = ipv6_addr_type(&sin6->sin6_addr);
if (!(addr_type & IPV6_ADDR_UNICAST)) {
__be32 addr4;

if (!(addr_type & IPV6_ADDR_MAPPED)) {
ret = -EPROTOTYPE;
break;
}

/* It is a mapped address. Need to do some sanity
* checks.
*/
addr4 = sin6->sin6_addr.s6_addr32[3];
if (addr4 == htonl(INADDR_ANY) ||
addr4 == htonl(INADDR_BROADCAST) ||
IN_MULTICAST(ntohl(addr4))) {
ret = -EPROTOTYPE;
break;
}
}

if (addr_type & IPV6_ADDR_LINKLOCAL) {
/* If socket is arleady bound to a link local address,
* the peer address must be on the same link.
*/
if (sin6->sin6_scope_id == 0 ||
(!ipv6_addr_any(&rs->rs_bound_addr) &&
rs->rs_bound_scope_id &&
sin6->sin6_scope_id != rs->rs_bound_scope_id)) {
ret = -EINVAL;
break;
}
/* Remember the connected address scope ID. It will
* be checked against the binding local address when
* the socket is bound.
*/
rs->rs_bound_scope_id = sin6->sin6_scope_id;
}
rs->rs_conn_addr = sin6->sin6_addr;
rs->rs_conn_port = sin6->sin6_port;
break;

default:
ret = -EINVAL;
ret = -EAFNOSUPPORT;
break;
}

Expand Down
59 changes: 50 additions & 9 deletions net/rds/bind.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,10 @@ static int rds_add_bound(struct rds_sock *rs, const struct in6_addr *addr,
if (!rhashtable_insert_fast(&bind_hash_table,
&rs->rs_bound_node, ht_parms)) {
*port = rs->rs_bound_port;
rs->rs_bound_scope_id = scope_id;
ret = 0;
rdsdebug("rs %p binding to %pI4:%d\n",
rs, &addr, (int)ntohs(*port));
rdsdebug("rs %p binding to %pI6c:%d\n",
rs, addr, (int)ntohs(*port));
break;
} else {
rs->rs_bound_addr = in6addr_any;
Expand Down Expand Up @@ -164,23 +165,53 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct in6_addr v6addr, *binding_addr;
struct rds_transport *trans;
__u32 scope_id = 0;
int addr_type;
int ret = 0;
__be16 port;

/* We only allow an RDS socket to be bound to an IPv4 address. IPv6
* address support will be added later.
/* We allow an RDS socket to be bound to either IPv4 or IPv6
* address.
*/
if (addr_len == sizeof(struct sockaddr_in)) {
if (uaddr->sa_family == AF_INET) {
struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;

if (sin->sin_family != AF_INET ||
sin->sin_addr.s_addr == htonl(INADDR_ANY))
if (addr_len < sizeof(struct sockaddr_in) ||
sin->sin_addr.s_addr == htonl(INADDR_ANY) ||
sin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
return -EINVAL;
ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr);
binding_addr = &v6addr;
port = sin->sin_port;
} else if (addr_len == sizeof(struct sockaddr_in6)) {
return -EPROTONOSUPPORT;
} else if (uaddr->sa_family == AF_INET6) {
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)uaddr;

if (addr_len < sizeof(struct sockaddr_in6))
return -EINVAL;
addr_type = ipv6_addr_type(&sin6->sin6_addr);
if (!(addr_type & IPV6_ADDR_UNICAST)) {
__be32 addr4;

if (!(addr_type & IPV6_ADDR_MAPPED))
return -EINVAL;

/* It is a mapped address. Need to do some sanity
* checks.
*/
addr4 = sin6->sin6_addr.s6_addr32[3];
if (addr4 == htonl(INADDR_ANY) ||
addr4 == htonl(INADDR_BROADCAST) ||
IN_MULTICAST(ntohl(addr4)))
return -EINVAL;
}
/* The scope ID must be specified for link local address. */
if (addr_type & IPV6_ADDR_LINKLOCAL) {
if (sin6->sin6_scope_id == 0)
return -EINVAL;
scope_id = sin6->sin6_scope_id;
}
binding_addr = &sin6->sin6_addr;
port = sin6->sin6_port;
} else {
return -EINVAL;
}
Expand All @@ -191,6 +222,16 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
ret = -EINVAL;
goto out;
}
/* Socket is connected. The binding address should have the same
* scope ID as the connected address, except the case when one is
* non-link local address (scope_id is 0).
*/
if (!ipv6_addr_any(&rs->rs_conn_addr) && scope_id &&
rs->rs_bound_scope_id &&
scope_id != rs->rs_bound_scope_id) {
ret = -EINVAL;
goto out;
}

ret = rds_add_bound(rs, binding_addr, &port, scope_id);
if (ret)
Expand Down
54 changes: 39 additions & 15 deletions net/rds/connection.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
Expand Down Expand Up @@ -36,6 +36,7 @@
#include <linux/export.h>
#include <net/ipv6.h>
#include <net/inet6_hashtables.h>
#include <net/addrconf.h>

#include "rds.h"
#include "loop.h"
Expand Down Expand Up @@ -200,6 +201,15 @@ static struct rds_connection *__rds_conn_create(struct net *net,
conn->c_isv6 = !ipv6_addr_v4mapped(laddr);
conn->c_faddr = *faddr;
conn->c_dev_if = dev_if;
/* If the local address is link local, set c_bound_if to be the
* index used for this connection. Otherwise, set it to 0 as
* the socket is not bound to an interface. c_bound_if is used
* to look up a socket when a packet is received
*/
if (ipv6_addr_type(laddr) & IPV6_ADDR_LINKLOCAL)
conn->c_bound_if = dev_if;
else
conn->c_bound_if = 0;

rds_conn_net_set(conn, net);

Expand Down Expand Up @@ -486,10 +496,18 @@ void rds_conn_destroy(struct rds_connection *conn)
}
EXPORT_SYMBOL_GPL(rds_conn_destroy);

static void rds_conn_message_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens,
int want_send)
static void __rds_inc_msg_cp(struct rds_incoming *inc,
struct rds_info_iterator *iter,
void *saddr, void *daddr, int flip)
{
rds_inc_info_copy(inc, iter, *(__be32 *)saddr,
*(__be32 *)daddr, flip);
}

static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens,
int want_send)
{
struct hlist_head *head;
struct list_head *list;
Expand Down Expand Up @@ -524,18 +542,13 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,

/* XXX too lazy to maintain counts.. */
list_for_each_entry(rm, list, m_conn_item) {
__be32 laddr;
__be32 faddr;

total++;
laddr = conn->c_laddr.s6_addr32[3];
faddr = conn->c_faddr.s6_addr32[3];
if (total <= len)
rds_inc_info_copy(&rm->m_inc,
iter,
laddr,
faddr,
0);
__rds_inc_msg_cp(&rm->m_inc,
iter,
&conn->c_laddr,
&conn->c_faddr,
0);
}

spin_unlock_irqrestore(&cp->cp_lock, flags);
Expand All @@ -548,6 +561,14 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
lens->each = sizeof(struct rds_info_message);
}

static void rds_conn_message_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens,
int want_send)
{
rds_conn_message_info_cmn(sock, len, iter, lens, want_send);
}

static void rds_conn_message_info_send(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens)
Expand Down Expand Up @@ -655,6 +676,9 @@ static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
struct rds_info_connection *cinfo = buffer;
struct rds_connection *conn = cp->cp_conn;

if (conn->c_isv6)
return 0;

cinfo->next_tx_seq = cp->cp_next_tx_seq;
cinfo->next_rx_seq = cp->cp_next_rx_seq;
cinfo->laddr = conn->c_laddr.s6_addr32[3];
Expand Down
Loading

0 comments on commit 1e2b44e

Please sign in to comment.