diff --git a/include/linux/sunrpc/rdma_rn.h b/include/linux/sunrpc/rdma_rn.h new file mode 100644 index 0000000000000..7d032ca057afb --- /dev/null +++ b/include/linux/sunrpc/rdma_rn.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * * Copyright (c) 2024, Oracle and/or its affiliates. + */ + +#ifndef _LINUX_SUNRPC_RDMA_RN_H +#define _LINUX_SUNRPC_RDMA_RN_H + +#include + +/** + * rpcrdma_notification - request removal notification + */ +struct rpcrdma_notification { + void (*rn_done)(struct rpcrdma_notification *rn); + u32 rn_index; +}; + +int rpcrdma_rn_register(struct ib_device *device, + struct rpcrdma_notification *rn, + void (*done)(struct rpcrdma_notification *rn)); +void rpcrdma_rn_unregister(struct ib_device *device, + struct rpcrdma_notification *rn); +int rpcrdma_ib_client_register(void); +void rpcrdma_ib_client_unregister(void); + +#endif /* _LINUX_SUNRPC_RDMA_RN_H */ diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 14392652273ad..ecdaf088219da 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -2220,6 +2220,40 @@ TRACE_EVENT(svcrdma_sq_post_err, ) ); +DECLARE_EVENT_CLASS(rpcrdma_client_device_class, + TP_PROTO( + const struct ib_device *device + ), + + TP_ARGS(device), + + TP_STRUCT__entry( + __string(name, device->name) + ), + + TP_fast_assign( + __assign_str(name); + ), + + TP_printk("device=%s", + __get_str(name) + ) +); + +#define DEFINE_CLIENT_DEVICE_EVENT(name) \ + DEFINE_EVENT(rpcrdma_client_device_class, name, \ + TP_PROTO( \ + const struct ib_device *device \ + ), \ + TP_ARGS(device) \ + ) + +DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_completion); +DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_add_one); +DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one); +DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_wait_on); +DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one_done); + #endif /* _TRACE_RPCRDMA_H */ #include diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index 55b21bae866db..3232aa23cdb49 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o -rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \ +rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o ib_client.o \ svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \ svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \ svc_rdma_pcl.o module.o diff --git a/net/sunrpc/xprtrdma/ib_client.c b/net/sunrpc/xprtrdma/ib_client.c new file mode 100644 index 0000000000000..a938c19c3490d --- /dev/null +++ b/net/sunrpc/xprtrdma/ib_client.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* + * Copyright (c) 2024 Oracle. All rights reserved. + */ + +/* #include +#include */ +#include +#include +#include +#include + +#include +#include + +#include "xprt_rdma.h" +#include + +/* Per-ib_device private data for rpcrdma */ +struct rpcrdma_device { + struct kref rd_kref; + unsigned long rd_flags; + struct ib_device *rd_device; + struct xarray rd_xa; + struct completion rd_done; +}; + +#define RPCRDMA_RD_F_REMOVING (0) + +static struct ib_client rpcrdma_ib_client; + +/* + * Listeners have no associated device, so we never register them. + * Note that ib_get_client_data() does not check if @device is + * NULL for us. + */ +static struct rpcrdma_device *rpcrdma_get_client_data(struct ib_device *device) +{ + if (!device) + return NULL; + return ib_get_client_data(device, &rpcrdma_ib_client); +} + +/** + * rpcrdma_rn_register - register to get device removal notifications + * @device: device to monitor + * @rn: notification object that wishes to be notified + * @done: callback to notify caller of device removal + * + * Returns zero on success. The callback in rn_done is guaranteed + * to be invoked when the device is removed, unless this notification + * is unregistered first. + * + * On failure, a negative errno is returned. + */ +int rpcrdma_rn_register(struct ib_device *device, + struct rpcrdma_notification *rn, + void (*done)(struct rpcrdma_notification *rn)) +{ + struct rpcrdma_device *rd = rpcrdma_get_client_data(device); + + if (!rd || test_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags)) + return -ENETUNREACH; + + kref_get(&rd->rd_kref); + if (xa_alloc(&rd->rd_xa, &rn->rn_index, rn, xa_limit_32b, GFP_KERNEL) < 0) + return -ENOMEM; + rn->rn_done = done; + return 0; +} + +static void rpcrdma_rn_release(struct kref *kref) +{ + struct rpcrdma_device *rd = container_of(kref, struct rpcrdma_device, + rd_kref); + + trace_rpcrdma_client_completion(rd->rd_device); + complete(&rd->rd_done); +} + +/** + * rpcrdma_rn_unregister - stop device removal notifications + * @device: monitored device + * @rn: notification object that no longer wishes to be notified + */ +void rpcrdma_rn_unregister(struct ib_device *device, + struct rpcrdma_notification *rn) +{ + struct rpcrdma_device *rd = rpcrdma_get_client_data(device); + + if (!rd) + return; + + xa_erase(&rd->rd_xa, rn->rn_index); + kref_put(&rd->rd_kref, rpcrdma_rn_release); +} + +/** + * rpcrdma_add_one - ib_client device insertion callback + * @device: device about to be inserted + * + * Returns zero on success. xprtrdma private data has been allocated + * for this device. On failure, a negative errno is returned. + */ +static int rpcrdma_add_one(struct ib_device *device) +{ + struct rpcrdma_device *rd; + + rd = kzalloc(sizeof(*rd), GFP_KERNEL); + if (!rd) + return -ENOMEM; + + kref_init(&rd->rd_kref); + xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC1); + rd->rd_device = device; + init_completion(&rd->rd_done); + ib_set_client_data(device, &rpcrdma_ib_client, rd); + + trace_rpcrdma_client_add_one(device); + return 0; +} + +/** + * rpcrdma_remove_one - ib_client device removal callback + * @device: device about to be removed + * @client_data: this module's private per-device data + * + * Upon return, all transports associated with @device have divested + * themselves from IB hardware resources. + */ +static void rpcrdma_remove_one(struct ib_device *device, + void *client_data) +{ + struct rpcrdma_device *rd = client_data; + struct rpcrdma_notification *rn; + unsigned long index; + + trace_rpcrdma_client_remove_one(device); + + set_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags); + xa_for_each(&rd->rd_xa, index, rn) + rn->rn_done(rn); + + /* + * Wait only if there are still outstanding notification + * registrants for this device. + */ + if (!refcount_dec_and_test(&rd->rd_kref.refcount)) { + trace_rpcrdma_client_wait_on(device); + wait_for_completion(&rd->rd_done); + } + + trace_rpcrdma_client_remove_one_done(device); + kfree(rd); +} + +static struct ib_client rpcrdma_ib_client = { + .name = "rpcrdma", + .add = rpcrdma_add_one, + .remove = rpcrdma_remove_one, +}; + +/** + * rpcrdma_ib_client_unregister - unregister ib_client for xprtrdma + * + * cel: watch for orphaned rpcrdma_device objects on module unload + */ +void rpcrdma_ib_client_unregister(void) +{ + ib_unregister_client(&rpcrdma_ib_client); +} + +/** + * rpcrdma_ib_client_register - register ib_client for rpcrdma + * + * Returns zero on success, or a negative errno. + */ +int rpcrdma_ib_client_register(void) +{ + return ib_register_client(&rpcrdma_ib_client); +} diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c index 45c5b41ac8dc9..697f571d4c018 100644 --- a/net/sunrpc/xprtrdma/module.c +++ b/net/sunrpc/xprtrdma/module.c @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -30,21 +31,32 @@ static void __exit rpc_rdma_cleanup(void) { xprt_rdma_cleanup(); svc_rdma_cleanup(); + rpcrdma_ib_client_unregister(); } static int __init rpc_rdma_init(void) { int rc; + rc = rpcrdma_ib_client_register(); + if (rc) + goto out_rc; + rc = svc_rdma_init(); if (rc) - goto out; + goto out_ib_client; rc = xprt_rdma_init(); if (rc) - svc_rdma_cleanup(); + goto out_svc_rdma; -out: + return 0; + +out_svc_rdma: + svc_rdma_cleanup(); +out_ib_client: + rpcrdma_ib_client_unregister(); +out_rc: return rc; }