Skip to content

Commit

Permalink
Merge branch 'for_4.11/net-next/rds_v3' of git://git.kernel.org/pub/s…
Browse files Browse the repository at this point in the history
…cm/linux/kernel/git/ssantosh/linux

Santosh Shilimkar says:

====================
net: RDS updates

v2->v3:
- Re-based against latest net-next head.
- Dropped a user visible change after discussing with David Miller.
  It needs some more work to fully support old/new tools matrix.
- Addressed Dave's comment about bool usage in patch
  "RDS: IB: track and log active side..."

v1->v2:
Re-aligned indentation in patch 'RDS: mark few internal functions..."

Series consist of:
 - RDMA transport fixes for map failure, listen sequence, handler panic and
   composite message notification.
 - Couple of sparse fixes.
 - Message logging improvements for bind failure, use once mr semantics
   and connection remote address, active end point.
 - Performance improvement for RDMA transport by reducing the post send
   pressure on the queue and spreading the CQ vectors.
 - Useful statistics for socket send/recv usage and receive cache usage.
 - Additional RDS CMSG used by application to track the RDS message
   stages for certain type of traffic to find out latency spots.
   Can be enabled/disabled per socket.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jan 3, 2017
2 parents 0a0a8d6 + 3289025 commit 303b0d4
Show file tree
Hide file tree
Showing 18 changed files with 335 additions and 65 deletions.
33 changes: 33 additions & 0 deletions include/uapi/linux/rds.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@
#define RDS_GET_MR_FOR_DEST 7
#define SO_RDS_TRANSPORT 8

/* Socket option to tap receive path latency
* SO_RDS: SO_RDS_MSG_RXPATH_LATENCY
* Format used struct rds_rx_trace_so
*/
#define SO_RDS_MSG_RXPATH_LATENCY 10


/* supported values for SO_RDS_TRANSPORT */
#define RDS_TRANS_IB 0
#define RDS_TRANS_IWARP 1
Expand All @@ -77,6 +84,12 @@
* the same as for the GET_MR setsockopt.
* RDS_CMSG_RDMA_STATUS (recvmsg)
* Returns the status of a completed RDMA operation.
* RDS_CMSG_RXPATH_LATENCY(recvmsg)
* Returns rds message latencies in various stages of receive
* path in nS. Its set per socket using SO_RDS_MSG_RXPATH_LATENCY
* socket option. Legitimate points are defined in
* enum rds_message_rxpath_latency. More points can be added in
* future. CSMG format is struct rds_cmsg_rx_trace.
*/
#define RDS_CMSG_RDMA_ARGS 1
#define RDS_CMSG_RDMA_DEST 2
Expand All @@ -87,6 +100,7 @@
#define RDS_CMSG_ATOMIC_CSWP 7
#define RDS_CMSG_MASKED_ATOMIC_FADD 8
#define RDS_CMSG_MASKED_ATOMIC_CSWP 9
#define RDS_CMSG_RXPATH_LATENCY 11

#define RDS_INFO_FIRST 10000
#define RDS_INFO_COUNTERS 10000
Expand Down Expand Up @@ -171,6 +185,25 @@ struct rds_info_rdma_connection {
uint32_t rdma_mr_size;
};

/* RDS message Receive Path Latency points */
enum rds_message_rxpath_latency {
RDS_MSG_RX_HDR_TO_DGRAM_START = 0,
RDS_MSG_RX_DGRAM_REASSEMBLE,
RDS_MSG_RX_DGRAM_DELIVERED,
RDS_MSG_RX_DGRAM_TRACE_MAX
};

struct rds_rx_trace_so {
u8 rx_traces;
u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX];
};

struct rds_cmsg_rx_trace {
u8 rx_traces;
u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX];
u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
};

/*
* Congestion monitoring.
* Congestion control in RDS happens at the host connection
Expand Down
28 changes: 28 additions & 0 deletions net/rds/af_rds.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,30 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
return 0;
}

static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval,
int optlen)
{
struct rds_rx_trace_so trace;
int i;

if (optlen != sizeof(struct rds_rx_trace_so))
return -EFAULT;

if (copy_from_user(&trace, optval, sizeof(trace)))
return -EFAULT;

rs->rs_rx_traces = trace.rx_traces;
for (i = 0; i < rs->rs_rx_traces; i++) {
if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) {
rs->rs_rx_traces = 0;
return -EFAULT;
}
rs->rs_rx_trace[i] = trace.rx_trace_pos[i];
}

return 0;
}

static int rds_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen)
{
Expand Down Expand Up @@ -338,6 +362,9 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
ret = rds_enable_recvtstamp(sock->sk, optval, optlen);
release_sock(sock->sk);
break;
case SO_RDS_MSG_RXPATH_LATENCY:
ret = rds_recv_track_latency(rs, optval, optlen);
break;
default:
ret = -ENOPROTOOPT;
}
Expand Down Expand Up @@ -484,6 +511,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
INIT_LIST_HEAD(&rs->rs_cong_list);
spin_lock_init(&rs->rs_rdma_lock);
rs->rs_rdma_keys = RB_ROOT;
rs->rs_rx_traces = 0;

spin_lock_bh(&rds_sock_lock);
list_add_tail(&rs->rs_item, &rds_sock_list);
Expand Down
4 changes: 2 additions & 2 deletions net/rds/bind.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (!trans) {
ret = -EADDRNOTAVAIL;
rds_remove_bound(rs);
printk_ratelimited(KERN_INFO "RDS: rds_bind() could not find a transport, "
"load rds_tcp or rds_rdma?\n");
pr_info_ratelimited("RDS: %s could not find a transport for %pI4, load rds_tcp or rds_rdma?\n",
__func__, &sin->sin_addr.s_addr);
goto out;
}

Expand Down
10 changes: 5 additions & 5 deletions net/rds/connection.c
Original file line number Diff line number Diff line change
Expand Up @@ -545,11 +545,11 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
}
EXPORT_SYMBOL_GPL(rds_for_each_conn_info);

void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens,
int (*visitor)(struct rds_conn_path *, void *),
size_t item_len)
static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens,
int (*visitor)(struct rds_conn_path *, void *),
size_t item_len)
{
u64 buffer[(item_len + 7) / 8];
struct hlist_head *head;
Expand Down
11 changes: 11 additions & 0 deletions net/rds/ib.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ static void rds_ib_dev_free(struct work_struct *work)
kfree(i_ipaddr);
}

if (rds_ibdev->vector_load)
kfree(rds_ibdev->vector_load);

kfree(rds_ibdev);
}

Expand Down Expand Up @@ -159,6 +162,14 @@ static void rds_ib_add_one(struct ib_device *device)
rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom;
rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom;

rds_ibdev->vector_load = kzalloc(sizeof(int) * device->num_comp_vectors,
GFP_KERNEL);
if (!rds_ibdev->vector_load) {
pr_err("RDS/IB: %s failed to allocate vector memory\n",
__func__);
goto put_dev;
}

rds_ibdev->dev = device;
rds_ibdev->pd = ib_alloc_pd(device, 0);
if (IS_ERR(rds_ibdev->pd)) {
Expand Down
22 changes: 20 additions & 2 deletions net/rds/ib.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@

#define RDS_IB_DEFAULT_RECV_WR 1024
#define RDS_IB_DEFAULT_SEND_WR 256
#define RDS_IB_DEFAULT_FR_WR 512
#define RDS_IB_DEFAULT_FR_WR 256
#define RDS_IB_DEFAULT_FR_INV_WR 256

#define RDS_IB_DEFAULT_RETRY_COUNT 2
#define RDS_IB_DEFAULT_RETRY_COUNT 1

#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */

Expand Down Expand Up @@ -125,6 +126,7 @@ struct rds_ib_connection {

/* To control the number of wrs from fastreg */
atomic_t i_fastreg_wrs;
atomic_t i_fastunreg_wrs;

/* interrupt handling */
struct tasklet_struct i_send_tasklet;
Expand All @@ -149,6 +151,7 @@ struct rds_ib_connection {
u64 i_ack_recv; /* last ACK received */
struct rds_ib_refill_cache i_cache_incs;
struct rds_ib_refill_cache i_cache_frags;
atomic_t i_cache_allocs;

/* sending acks */
unsigned long i_ack_flags;
Expand Down Expand Up @@ -179,6 +182,14 @@ struct rds_ib_connection {

/* Batched completions */
unsigned int i_unsignaled_wrs;

/* Endpoint role in connection */
bool i_active_side;
atomic_t i_cq_quiesce;

/* Send/Recv vectors */
int i_scq_vector;
int i_rcq_vector;
};

/* This assumes that atomic_t is at least 32 bits */
Expand Down Expand Up @@ -221,6 +232,7 @@ struct rds_ib_device {
spinlock_t spinlock; /* protect the above */
atomic_t refcount;
struct work_struct free_work;
int *vector_load;
};

#define ibdev_to_node(ibdev) dev_to_node(ibdev->dma_device)
Expand Down Expand Up @@ -249,6 +261,8 @@ struct rds_ib_statistics {
uint64_t s_ib_rx_refill_from_cq;
uint64_t s_ib_rx_refill_from_thread;
uint64_t s_ib_rx_alloc_limit;
uint64_t s_ib_rx_total_frags;
uint64_t s_ib_rx_total_incs;
uint64_t s_ib_rx_credit_updates;
uint64_t s_ib_ack_sent;
uint64_t s_ib_ack_send_failure;
Expand All @@ -271,6 +285,8 @@ struct rds_ib_statistics {
uint64_t s_ib_rdma_mr_1m_reused;
uint64_t s_ib_atomic_cswp;
uint64_t s_ib_atomic_fadd;
uint64_t s_ib_recv_added_to_cache;
uint64_t s_ib_recv_removed_from_cache;
};

extern struct workqueue_struct *rds_ib_wq;
Expand Down Expand Up @@ -401,6 +417,8 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op);
/* ib_stats.c */
DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats);
#define rds_ib_stats_inc(member) rds_stats_inc_which(rds_ib_stats, member)
#define rds_ib_stats_add(member, count) \
rds_stats_add_which(rds_ib_stats, member, count)
unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
unsigned int avail);

Expand Down
Loading

0 comments on commit 303b0d4

Please sign in to comment.