Skip to content

Commit

Permalink
Merge tag '9p-for-6.1' of https://github.com/martinetd/linux
Browse files Browse the repository at this point in the history
Pull 9p updates from Dominique Martinet:
 "Smaller buffers for small messages and fixes.

  The highlight of this is Christian's patch to allocate smaller buffers
  for most metadata requests: 9p with a big msize would try to allocate
  large buffers when just 4 or 8k would be more than enough; this brings
  in nice performance improvements.

  There's also a few fixes for problems reported by syzkaller (thanks to
  Schspa Shi, Tetsuo Handa for tests and feedback/patches) as well as
  some minor cleanup"

* tag '9p-for-6.1' of https://github.com/martinetd/linux:
  net/9p: clarify trans_fd parse_opt failure handling
  net/9p: add __init/__exit annotations to module init/exit funcs
  net/9p: use a dedicated spinlock for trans_fd
  9p/trans_fd: always use O_NONBLOCK read/write
  net/9p: allocate appropriate reduced message buffers
  net/9p: add 'pooled_rbuffers' flag to struct p9_trans_module
  net/9p: add p9_msg_buf_size()
  9p: add P9_ERRMAX for 9p2000 and 9p2000.u
  net/9p: split message size argument into 't_size' and 'r_size' pair
  9p: trans_fd/p9_conn_cancel: drop client lock earlier
  • Loading branch information
Linus Torvalds committed Oct 11, 2022
2 parents 288fc86 + a8e633c commit 0083340
Show file tree
Hide file tree
Showing 9 changed files with 254 additions and 28 deletions.
3 changes: 3 additions & 0 deletions include/net/9p/9p.h
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,9 @@ enum p9_qid_t {
/* size of header for zero copy read/write */
#define P9_ZC_HDR_SZ 4096

/* maximum length of an error string */
#define P9_ERRMAX 128

/**
* struct p9_qid - file system entity information
* @type: 8-bit type &p9_qid_t
Expand Down
5 changes: 5 additions & 0 deletions include/net/9p/transport.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
* @list: used to maintain a list of currently available transports
* @name: the human-readable name of the transport
* @maxsize: transport provided maximum packet size
* @pooled_rbuffers: currently only set for RDMA transport which pulls the
* response buffers from a shared pool, and accordingly
* we're less flexible when choosing the response message
* size in this case
* @def: set if this transport should be considered the default
* @create: member function to create a new connection on this transport
* @close: member function to discard a connection on this transport
Expand All @@ -38,6 +42,7 @@ struct p9_trans_module {
struct list_head list;
char *name; /* name of transport */
int maxsize; /* max message size of transport */
bool pooled_rbuffers;
int def; /* this transport should be default */
struct module *owner;
int (*create)(struct p9_client *client,
Expand Down
48 changes: 39 additions & 9 deletions net/9p/client.c
Original file line number Diff line number Diff line change
Expand Up @@ -255,24 +255,42 @@ static struct kmem_cache *p9_req_cache;
* p9_tag_alloc - Allocate a new request.
* @c: Client session.
* @type: Transaction type.
* @max_size: Maximum packet size for this request.
* @t_size: Buffer size for holding this request
* (automatic calculation by format template if 0).
* @r_size: Buffer size for holding server's reply on this request
* (automatic calculation by format template if 0).
* @fmt: Format template for assembling 9p request message
* (see p9pdu_vwritef).
* @ap: Variable arguments to be fed to passed format template
* (see p9pdu_vwritef).
*
* Context: Process context.
* Return: Pointer to new request.
*/
static struct p9_req_t *
p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
p9_tag_alloc(struct p9_client *c, int8_t type, uint t_size, uint r_size,
const char *fmt, va_list ap)
{
struct p9_req_t *req = kmem_cache_alloc(p9_req_cache, GFP_NOFS);
int alloc_msize = min(c->msize, max_size);
int alloc_tsize;
int alloc_rsize;
int tag;
va_list apc;

va_copy(apc, ap);
alloc_tsize = min_t(size_t, c->msize,
t_size ?: p9_msg_buf_size(c, type, fmt, apc));
va_end(apc);

alloc_rsize = min_t(size_t, c->msize,
r_size ?: p9_msg_buf_size(c, type + 1, fmt, ap));

if (!req)
return ERR_PTR(-ENOMEM);

if (p9_fcall_init(c, &req->tc, alloc_msize))
if (p9_fcall_init(c, &req->tc, alloc_tsize))
goto free_req;
if (p9_fcall_init(c, &req->rc, alloc_msize))
if (p9_fcall_init(c, &req->rc, alloc_rsize))
goto free;

p9pdu_reset(&req->tc);
Expand Down Expand Up @@ -592,11 +610,12 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
}

static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
int8_t type, int req_size,
int8_t type, uint t_size, uint r_size,
const char *fmt, va_list ap)
{
int err;
struct p9_req_t *req;
va_list apc;

p9_debug(P9_DEBUG_MUX, "client %p op %d\n", c, type);

Expand All @@ -608,7 +627,9 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
if (c->status == BeginDisconnect && type != P9_TCLUNK)
return ERR_PTR(-EIO);

req = p9_tag_alloc(c, type, req_size);
va_copy(apc, ap);
req = p9_tag_alloc(c, type, t_size, r_size, fmt, apc);
va_end(apc);
if (IS_ERR(req))
return req;

Expand Down Expand Up @@ -643,9 +664,18 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
int sigpending, err;
unsigned long flags;
struct p9_req_t *req;
/* Passing zero for tsize/rsize to p9_client_prepare_req() tells it to
* auto determine an appropriate (small) request/response size
* according to actual message data being sent. Currently RDMA
* transport is excluded from this response message size optimization,
* as it would not cope with it, due to its pooled response buffers
* (using an optimized request size for RDMA as well though).
*/
const uint tsize = 0;
const uint rsize = c->trans_mod->pooled_rbuffers ? c->msize : 0;

va_start(ap, fmt);
req = p9_client_prepare_req(c, type, c->msize, fmt, ap);
req = p9_client_prepare_req(c, type, tsize, rsize, fmt, ap);
va_end(ap);
if (IS_ERR(req))
return req;
Expand Down Expand Up @@ -743,7 +773,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
/* We allocate a inline protocol data of only 4k bytes.
* The actual content is passed in zero-copy fashion.
*/
req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap);
req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, P9_ZC_HDR_SZ, fmt, ap);
va_end(ap);
if (IS_ERR(req))
return req;
Expand Down
167 changes: 167 additions & 0 deletions net/9p/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,173 @@

#include <trace/events/9p.h>

/* len[2] text[len] */
#define P9_STRLEN(s) \
(2 + min_t(size_t, s ? strlen(s) : 0, USHRT_MAX))

/**
* p9_msg_buf_size - Returns a buffer size sufficiently large to hold the
* intended 9p message.
* @c: client
* @type: message type
* @fmt: format template for assembling request message
* (see p9pdu_vwritef)
* @ap: variable arguments to be fed to passed format template
* (see p9pdu_vwritef)
*
* Note: Even for response types (P9_R*) the format template and variable
* arguments must always be for the originating request type (P9_T*).
*/
size_t p9_msg_buf_size(struct p9_client *c, enum p9_msg_t type,
const char *fmt, va_list ap)
{
/* size[4] type[1] tag[2] */
const int hdr = 4 + 1 + 2;
/* ename[s] errno[4] */
const int rerror_size = hdr + P9_ERRMAX + 4;
/* ecode[4] */
const int rlerror_size = hdr + 4;
const int err_size =
c->proto_version == p9_proto_2000L ? rlerror_size : rerror_size;

static_assert(NAME_MAX <= 4*1024, "p9_msg_buf_size() currently assumes "
"a max. allowed directory entry name length of 4k");

switch (type) {

/* message types not used at all */
case P9_TERROR:
case P9_TLERROR:
case P9_TAUTH:
case P9_RAUTH:
BUG();

/* variable length & potentially large message types */
case P9_TATTACH:
BUG_ON(strcmp("ddss?u", fmt));
va_arg(ap, int32_t);
va_arg(ap, int32_t);
{
const char *uname = va_arg(ap, const char *);
const char *aname = va_arg(ap, const char *);
/* fid[4] afid[4] uname[s] aname[s] n_uname[4] */
return hdr + 4 + 4 + P9_STRLEN(uname) + P9_STRLEN(aname) + 4;
}
case P9_TWALK:
BUG_ON(strcmp("ddT", fmt));
va_arg(ap, int32_t);
va_arg(ap, int32_t);
{
uint i, nwname = va_arg(ap, int);
size_t wname_all;
const char **wnames = va_arg(ap, const char **);
for (i = 0, wname_all = 0; i < nwname; ++i) {
wname_all += P9_STRLEN(wnames[i]);
}
/* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
return hdr + 4 + 4 + 2 + wname_all;
}
case P9_RWALK:
BUG_ON(strcmp("ddT", fmt));
va_arg(ap, int32_t);
va_arg(ap, int32_t);
{
uint nwname = va_arg(ap, int);
/* nwqid[2] nwqid*(wqid[13]) */
return max_t(size_t, hdr + 2 + nwname * 13, err_size);
}
case P9_TCREATE:
BUG_ON(strcmp("dsdb?s", fmt));
va_arg(ap, int32_t);
{
const char *name = va_arg(ap, const char *);
if (c->proto_version == p9_proto_legacy) {
/* fid[4] name[s] perm[4] mode[1] */
return hdr + 4 + P9_STRLEN(name) + 4 + 1;
} else {
va_arg(ap, int32_t);
va_arg(ap, int);
{
const char *ext = va_arg(ap, const char *);
/* fid[4] name[s] perm[4] mode[1] extension[s] */
return hdr + 4 + P9_STRLEN(name) + 4 + 1 + P9_STRLEN(ext);
}
}
}
case P9_TLCREATE:
BUG_ON(strcmp("dsddg", fmt));
va_arg(ap, int32_t);
{
const char *name = va_arg(ap, const char *);
/* fid[4] name[s] flags[4] mode[4] gid[4] */
return hdr + 4 + P9_STRLEN(name) + 4 + 4 + 4;
}
case P9_RREAD:
case P9_RREADDIR:
BUG_ON(strcmp("dqd", fmt));
va_arg(ap, int32_t);
va_arg(ap, int64_t);
{
const int32_t count = va_arg(ap, int32_t);
/* count[4] data[count] */
return max_t(size_t, hdr + 4 + count, err_size);
}
case P9_TWRITE:
BUG_ON(strcmp("dqV", fmt));
va_arg(ap, int32_t);
va_arg(ap, int64_t);
{
const int32_t count = va_arg(ap, int32_t);
/* fid[4] offset[8] count[4] data[count] */
return hdr + 4 + 8 + 4 + count;
}
case P9_TRENAMEAT:
BUG_ON(strcmp("dsds", fmt));
va_arg(ap, int32_t);
{
const char *oldname, *newname;
oldname = va_arg(ap, const char *);
va_arg(ap, int32_t);
newname = va_arg(ap, const char *);
/* olddirfid[4] oldname[s] newdirfid[4] newname[s] */
return hdr + 4 + P9_STRLEN(oldname) + 4 + P9_STRLEN(newname);
}
case P9_TSYMLINK:
BUG_ON(strcmp("dssg", fmt));
va_arg(ap, int32_t);
{
const char *name = va_arg(ap, const char *);
const char *symtgt = va_arg(ap, const char *);
/* fid[4] name[s] symtgt[s] gid[4] */
return hdr + 4 + P9_STRLEN(name) + P9_STRLEN(symtgt) + 4;
}

case P9_RERROR:
return rerror_size;
case P9_RLERROR:
return rlerror_size;

/* small message types */
case P9_TWSTAT:
case P9_RSTAT:
case P9_RREADLINK:
case P9_TXATTRWALK:
case P9_TXATTRCREATE:
case P9_TLINK:
case P9_TMKDIR:
case P9_TMKNOD:
case P9_TRENAME:
case P9_TUNLINKAT:
case P9_TLOCK:
return 8 * 1024;

/* tiny message types */
default:
return 4 * 1024;

}
}

static int
p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);

Expand Down
2 changes: 2 additions & 0 deletions net/9p/protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
* Copyright (C) 2008 by IBM, Corp.
*/

size_t p9_msg_buf_size(struct p9_client *c, enum p9_msg_t type,
const char *fmt, va_list ap);
int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
va_list ap);
int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
Expand Down
Loading

0 comments on commit 0083340

Please sign in to comment.