Skip to content

Commit

Permalink
Merge branch 'smc-link-layer-control-enhancements'
Browse files Browse the repository at this point in the history
Ursula Braun says:

====================
net/smc: Link Layer Control enhancements

here is a series of smc patches enabling SMC communication with peers
supporting more than one link per link group.

The first three patches are preparing code cleanups.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Mar 1, 2018
2 parents 3c5aa0b + 9651b93 commit ba60780
Show file tree
Hide file tree
Showing 8 changed files with 592 additions and 138 deletions.
127 changes: 60 additions & 67 deletions net/smc/af_smc.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
* applicable with RoCE-cards only
*
* Initial restrictions:
* - non-blocking connect postponed
* - IPv6 support postponed
* - support for alternate links postponed
* - partial support for non-blocking sockets only
Expand All @@ -24,7 +23,6 @@

#include <linux/module.h>
#include <linux/socket.h>
#include <linux/inetdevice.h>
#include <linux/workqueue.h>
#include <linux/in.h>
#include <linux/sched/signal.h>
Expand Down Expand Up @@ -273,46 +271,7 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}

/* determine subnet and mask of internal TCP socket */
int smc_netinfo_by_tcpsk(struct socket *clcsock,
__be32 *subnet, u8 *prefix_len)
{
struct dst_entry *dst = sk_dst_get(clcsock->sk);
struct in_device *in_dev;
struct sockaddr_in addr;
int rc = -ENOENT;

if (!dst) {
rc = -ENOTCONN;
goto out;
}
if (!dst->dev) {
rc = -ENODEV;
goto out_rel;
}

/* get address to which the internal TCP socket is bound */
kernel_getsockname(clcsock, (struct sockaddr *)&addr);
/* analyze IPv4 specific data of net_device belonging to TCP socket */
rcu_read_lock();
in_dev = __in_dev_get_rcu(dst->dev);
for_ifa(in_dev) {
if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
continue;
*prefix_len = inet_mask_len(ifa->ifa_mask);
*subnet = ifa->ifa_address & ifa->ifa_mask;
rc = 0;
break;
} endfor_ifa(in_dev);
rcu_read_unlock();

out_rel:
dst_release(dst);
out:
return rc;
}

static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
static int smc_clnt_conf_first_link(struct smc_sock *smc)
{
struct smc_link_group *lgr = smc->conn.lgr;
struct smc_link *link;
Expand All @@ -332,6 +291,9 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
return rc;
}

if (link->llc_confirm_rc)
return SMC_CLC_DECL_RMBE_EC;

rc = smc_ib_modify_qp_rts(link);
if (rc)
return SMC_CLC_DECL_INTERR;
Expand All @@ -346,11 +308,33 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
/* send CONFIRM LINK response over RoCE fabric */
rc = smc_llc_send_confirm_link(link,
link->smcibdev->mac[link->ibport - 1],
gid, SMC_LLC_RESP);
&link->smcibdev->gid[link->ibport - 1],
SMC_LLC_RESP);
if (rc < 0)
return SMC_CLC_DECL_TCL;

return rc;
/* receive ADD LINK request from server over RoCE fabric */
rest = wait_for_completion_interruptible_timeout(&link->llc_add,
SMC_LLC_WAIT_TIME);
if (rest <= 0) {
struct smc_clc_msg_decline dclc;

rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE);
return rc;
}

/* send add link reject message, only one link supported for now */
rc = smc_llc_send_add_link(link,
link->smcibdev->mac[link->ibport - 1],
&link->smcibdev->gid[link->ibport - 1],
SMC_LLC_RESP);
if (rc < 0)
return SMC_CLC_DECL_TCL;

link->state = SMC_LNK_ACTIVE;

return 0;
}

static void smc_conn_save_peer_info(struct smc_sock *smc,
Expand All @@ -372,19 +356,9 @@ static void smc_link_save_peer_info(struct smc_link *link,
link->peer_mtu = clc->qp_mtu;
}

static void smc_lgr_forget(struct smc_link_group *lgr)
{
spin_lock_bh(&smc_lgr_list.lock);
/* do not use this link group for new connections */
if (!list_empty(&lgr->list))
list_del_init(&lgr->list);
spin_unlock_bh(&smc_lgr_list.lock);
}

/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc)
{
struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr;
struct smc_clc_msg_accept_confirm aclc;
int local_contact = SMC_FIRST_CONTACT;
struct smc_ib_device *smcibdev;
Expand Down Expand Up @@ -438,8 +412,8 @@ static int smc_connect_rdma(struct smc_sock *smc)

srv_first_contact = aclc.hdr.flag;
mutex_lock(&smc_create_lgr_pending);
local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev,
ibport, &aclc.lcl, srv_first_contact);
local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl,
srv_first_contact);
if (local_contact < 0) {
rc = local_contact;
if (rc == -ENOMEM)
Expand Down Expand Up @@ -498,8 +472,7 @@ static int smc_connect_rdma(struct smc_sock *smc)

if (local_contact == SMC_FIRST_CONTACT) {
/* QP confirmation over RoCE fabric */
reason_code = smc_clnt_conf_first_link(
smc, &smcibdev->gid[ibport - 1]);
reason_code = smc_clnt_conf_first_link(smc);
if (reason_code < 0) {
rc = reason_code;
goto out_err_unlock;
Expand Down Expand Up @@ -558,7 +531,6 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
goto out_err;
if (addr->sa_family != AF_INET)
goto out_err;
smc->addr = addr; /* needed for nonblocking connect */

lock_sock(sk);
switch (sk->sk_state) {
Expand Down Expand Up @@ -748,9 +720,34 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)

rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE);
return rc;
}

return rc;
if (link->llc_confirm_resp_rc)
return SMC_CLC_DECL_RMBE_EC;

/* send ADD LINK request to client over the RoCE fabric */
rc = smc_llc_send_add_link(link,
link->smcibdev->mac[link->ibport - 1],
&link->smcibdev->gid[link->ibport - 1],
SMC_LLC_REQ);
if (rc < 0)
return SMC_CLC_DECL_TCL;

/* receive ADD LINK response from client over the RoCE fabric */
rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
SMC_LLC_WAIT_TIME);
if (rest <= 0) {
struct smc_clc_msg_decline dclc;

rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE);
return rc;
}

link->state = SMC_LNK_ACTIVE;

return 0;
}

/* setup for RDMA connection of server */
Expand All @@ -766,7 +763,6 @@ static void smc_listen_work(struct work_struct *work)
struct sock *newsmcsk = &new_smc->sk;
struct smc_clc_msg_proposal *pclc;
struct smc_ib_device *smcibdev;
struct sockaddr_in peeraddr;
u8 buf[SMC_CLC_MAX_LEN];
struct smc_link *link;
int reason_code = 0;
Expand Down Expand Up @@ -808,7 +804,7 @@ static void smc_listen_work(struct work_struct *work)
}

/* determine subnet and mask from internal TCP socket */
rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
rc = smc_clc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
if (rc) {
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
Expand All @@ -822,13 +818,10 @@ static void smc_listen_work(struct work_struct *work)
goto decline_rdma;
}

/* get address of the peer connected to the internal TCP socket */
kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr);

/* allocate connection / link group */
mutex_lock(&smc_create_lgr_pending);
local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
smcibdev, ibport, &pclc->lcl, 0);
local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl,
0);
if (local_contact < 0) {
rc = local_contact;
if (rc == -ENOMEM)
Expand Down
5 changes: 1 addition & 4 deletions net/smc/smc.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,6 @@ struct smc_sock { /* smc sock container */
struct sock sk;
struct socket *clcsock; /* internal tcp socket */
struct smc_connection conn; /* smc connection */
struct sockaddr *addr; /* inet connect address */
struct smc_sock *listen_smc; /* listen parent */
struct work_struct tcp_listen_work;/* handle tcp socket accepts */
struct work_struct smc_listen_work;/* prepare new accept socket */
Expand Down Expand Up @@ -263,10 +262,8 @@ static inline bool using_ipsec(struct smc_sock *smc)

struct smc_clc_msg_local;

int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
u8 *prefix_len);
void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
int smc_conn_create(struct smc_sock *smc,
struct smc_ib_device *smcibdev, u8 ibport,
struct smc_clc_msg_local *lcl, int srv_first_contact);
struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
Expand Down
47 changes: 45 additions & 2 deletions net/smc/smc_clc.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
*/

#include <linux/in.h>
#include <linux/inetdevice.h>
#include <linux/if_ether.h>
#include <linux/sched/signal.h>

Expand All @@ -22,6 +23,9 @@
#include "smc_clc.h"
#include "smc_ib.h"

/* eye catcher "SMCR" EBCDIC for CLC messages */
static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};

/* check if received message has a correct header length and contains valid
* heading and trailing eyecatchers
*/
Expand Down Expand Up @@ -70,6 +74,45 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
return true;
}

/* determine subnet and mask of internal TCP socket */
int smc_clc_netinfo_by_tcpsk(struct socket *clcsock,
__be32 *subnet, u8 *prefix_len)
{
struct dst_entry *dst = sk_dst_get(clcsock->sk);
struct in_device *in_dev;
struct sockaddr_in addr;
int rc = -ENOENT;

if (!dst) {
rc = -ENOTCONN;
goto out;
}
if (!dst->dev) {
rc = -ENODEV;
goto out_rel;
}

/* get address to which the internal TCP socket is bound */
kernel_getsockname(clcsock, (struct sockaddr *)&addr);
/* analyze IPv4 specific data of net_device belonging to TCP socket */
rcu_read_lock();
in_dev = __in_dev_get_rcu(dst->dev);
for_ifa(in_dev) {
if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
continue;
*prefix_len = inet_mask_len(ifa->ifa_mask);
*subnet = ifa->ifa_address & ifa->ifa_mask;
rc = 0;
break;
} endfor_ifa(in_dev);
rcu_read_unlock();

out_rel:
dst_release(dst);
out:
return rc;
}

/* Wait for data on the tcp-socket, analyze received data
* Returns:
* 0 if success and it was not a decline that we received.
Expand Down Expand Up @@ -211,8 +254,8 @@ int smc_clc_send_proposal(struct smc_sock *smc,

memset(&pclc_prfx, 0, sizeof(pclc_prfx));
/* determine subnet and mask from internal TCP socket */
rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
&pclc_prfx.prefix_len);
rc = smc_clc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
&pclc_prfx.prefix_len);
if (rc)
return SMC_CLC_DECL_CNFERR; /* configuration error */
pclc_prfx.ipv6_prefixes_cnt = 0;
Expand Down
9 changes: 3 additions & 6 deletions net/smc/smc_clc.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@
#define SMC_CLC_CONFIRM 0x03
#define SMC_CLC_DECLINE 0x04

/* eye catcher "SMCR" EBCDIC for CLC messages */
static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};

#define SMC_CLC_V1 0x1 /* SMC version */
#define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */
#define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */
Expand All @@ -36,6 +33,7 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
#define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */
#define SMC_CLC_DECL_TCL 0x02040000 /* timeout w4 QP confirm */
#define SMC_CLC_DECL_SEND 0x07000000 /* sending problem */
#define SMC_CLC_DECL_RMBE_EC 0x08000000 /* peer has eyecatcher in RMBE */

struct smc_clc_msg_hdr { /* header1 of clc messages */
u8 eyecatcher[4]; /* eye catcher */
Expand Down Expand Up @@ -124,9 +122,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
}

struct smc_sock;
struct smc_ib_device;

int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
u8 *prefix_len);
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
Expand Down
Loading

0 comments on commit ba60780

Please sign in to comment.