From a22a29655c42d0263a7a84d5d808bfd55f20c53a Mon Sep 17 00:00:00 2001 From: Joshua Murphy Date: Sat, 18 Jan 2025 19:12:36 +0000 Subject: [PATCH 1/8] net/9p/fd: support ipv6 for trans=tcp Allows specifying an IPv6 address when mounting a remote 9p file system. Signed-off-by: Joshua Murphy Message-ID: <20250118192122.327-2-joshuamurphy@posteo.net> Signed-off-by: Dominique Martinet --- net/9p/trans_fd.c | 56 ++++++++++++++++++++--------------------------- 1 file changed, 24 insertions(+), 32 deletions(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 196060dc6138a..2fea50bf047ab 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include @@ -954,64 +955,55 @@ static void p9_fd_close(struct p9_client *client) kfree(ts); } -/* - * stolen from NFS - maybe should be made a generic function? - */ -static inline int valid_ipaddr4(const char *buf) -{ - int rc, count, in[4]; - - rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); - if (rc != 4) - return -EINVAL; - for (count = 0; count < 4; count++) { - if (in[count] > 255) - return -EINVAL; - } - return 0; -} - static int p9_bind_privport(struct socket *sock) { - struct sockaddr_in cl; + struct sockaddr_storage stor = { 0 }; int port, err = -EINVAL; - memset(&cl, 0, sizeof(cl)); - cl.sin_family = AF_INET; - cl.sin_addr.s_addr = htonl(INADDR_ANY); + stor.ss_family = sock->ops->family; + if (stor.ss_family == AF_INET) + ((struct sockaddr_in *)&stor)->sin_addr.s_addr = htonl(INADDR_ANY); + else + ((struct sockaddr_in6 *)&stor)->sin6_addr = in6addr_any; for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) { - cl.sin_port = htons((ushort)port); - err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl)); + if (stor.ss_family == AF_INET) + ((struct sockaddr_in *)&stor)->sin_port = htons((ushort)port); + else + ((struct sockaddr_in6 *)&stor)->sin6_port = htons((ushort)port); + err = kernel_bind(sock, (struct sockaddr *)&stor, sizeof(stor)); if (err != -EADDRINUSE) break; } return err; } - static int p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) { int err; + char port_str[6]; struct socket *csocket; - struct sockaddr_in sin_server; + struct sockaddr_storage stor = { 0 }; struct p9_fd_opts opts; err = parse_opts(args, &opts); if (err < 0) return err; - if (addr == NULL || valid_ipaddr4(addr) < 0) + if (!addr) return -EINVAL; + sprintf(port_str, "%u", opts.port); + err = inet_pton_with_scope(current->nsproxy->net_ns, AF_UNSPEC, addr, + port_str, &stor); + if (err < 0) + return err; + csocket = NULL; client->trans_opts.tcp.port = opts.port; client->trans_opts.tcp.privport = opts.privport; - sin_server.sin_family = AF_INET; - sin_server.sin_addr.s_addr = in_aton(addr); - sin_server.sin_port = htons(opts.port); - err = __sock_create(current->nsproxy->net_ns, PF_INET, + err = __sock_create(current->nsproxy->net_ns, stor.ss_family, SOCK_STREAM, IPPROTO_TCP, &csocket, 1); if (err) { pr_err("%s (%d): problem creating socket\n", @@ -1030,8 +1022,8 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) } err = READ_ONCE(csocket->ops)->connect(csocket, - (struct sockaddr *)&sin_server, - sizeof(struct sockaddr_in), 0); + (struct sockaddr *)&stor, + sizeof(stor), 0); if (err < 0) { pr_err("%s (%d): problem connecting socket to %s\n", __func__, task_pid_nr(current), addr); From 3f61ac7c65bdb26accb52f9db66313597e759821 Mon Sep 17 00:00:00 2001 From: Christian Schoenebeck Date: Thu, 13 Mar 2025 13:59:32 +0100 Subject: [PATCH 2/8] fs/9p: fix NULL pointer dereference on mkdir When a 9p tree was mounted with option 'posixacl', parent directory had a default ACL set for its subdirectories, e.g.: setfacl -m default:group:simpsons:rwx parentdir then creating a subdirectory crashed 9p client, as v9fs_fid_add() call in function v9fs_vfs_mkdir_dotl() sets the passed 'fid' pointer to NULL (since dafbe689736) even though the subsequent v9fs_set_create_acl() call expects a valid non-NULL 'fid' pointer: [ 37.273191] BUG: kernel NULL pointer dereference, address: 0000000000000000 ... [ 37.322338] Call Trace: [ 37.323043] [ 37.323621] ? __die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434) [ 37.324448] ? page_fault_oops (arch/x86/mm/fault.c:714) [ 37.325532] ? search_module_extables (kernel/module/main.c:3733) [ 37.326742] ? p9_client_walk (net/9p/client.c:1165) 9pnet [ 37.328006] ? search_bpf_extables (kernel/bpf/core.c:804) [ 37.329142] ? exc_page_fault (./arch/x86/include/asm/paravirt.h:686 arch/x86/mm/fault.c:1488 arch/x86/mm/fault.c:1538) [ 37.330196] ? asm_exc_page_fault (./arch/x86/include/asm/idtentry.h:574) [ 37.331330] ? p9_client_walk (net/9p/client.c:1165) 9pnet [ 37.332562] ? v9fs_fid_xattr_get (fs/9p/xattr.c:30) 9p [ 37.333824] v9fs_fid_xattr_set (fs/9p/fid.h:23 fs/9p/xattr.c:121) 9p [ 37.335077] v9fs_set_acl (fs/9p/acl.c:276) 9p [ 37.336112] v9fs_set_create_acl (fs/9p/acl.c:307) 9p [ 37.337326] v9fs_vfs_mkdir_dotl (fs/9p/vfs_inode_dotl.c:411) 9p [ 37.338590] vfs_mkdir (fs/namei.c:4313) [ 37.339535] do_mkdirat (fs/namei.c:4336) [ 37.340465] __x64_sys_mkdir (fs/namei.c:4354) [ 37.341455] do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) [ 37.342447] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) Fix this by simply swapping the sequence of these two calls in v9fs_vfs_mkdir_dotl(), i.e. calling v9fs_set_create_acl() before v9fs_fid_add(). Fixes: dafbe689736f ("9p fid refcount: cleanup p9_fid_put calls") Reported-by: syzbot+5b667f9a1fee4ba3775a@syzkaller.appspotmail.com Signed-off-by: Christian Schoenebeck Message-ID: Signed-off-by: Dominique Martinet --- fs/9p/vfs_inode_dotl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 143ac03b7425c..3397939fd2d5a 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -407,8 +407,8 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap, err); goto error; } - v9fs_fid_add(dentry, &fid); v9fs_set_create_acl(inode, fid, dacl, pacl); + v9fs_fid_add(dentry, &fid); d_instantiate(dentry, inode); err = 0; inc_nlink(dir); From d0259a856afca31d699b706ed5e2adf11086c73b Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Wed, 19 Mar 2025 20:20:15 +0900 Subject: [PATCH 3/8] 9p/net: fix improper handling of bogus negative read/write replies In p9_client_write() and p9_client_read_once(), if the server incorrectly replies with success but a negative write/read count then we would consider written (negative) <= rsize (positive) because both variables were signed. Make variables unsigned to avoid this problem. The reproducer linked below now fails with the following error instead of a null pointer deref: 9pnet: bogus RWRITE count (4294967295 > 3) Reported-by: Robert Morris Closes: https://lore.kernel.org/16271.1734448631@26-5-164.dynamic.csail.mit.edu Message-ID: <20250319-9p_unsigned_rw-v3-1-71327f1503d0@codewreck.org> Reviewed-by: Christian Schoenebeck Signed-off-by: Dominique Martinet --- net/9p/client.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index 09f8ced9f8bb7..52a5497cfca79 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1548,7 +1548,8 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, struct p9_client *clnt = fid->clnt; struct p9_req_t *req; int count = iov_iter_count(to); - int rsize, received, non_zc = 0; + u32 rsize, received; + bool non_zc = false; char *dataptr; *err = 0; @@ -1571,7 +1572,7 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, 0, 11, "dqd", fid->fid, offset, rsize); } else { - non_zc = 1; + non_zc = true; req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize); } @@ -1592,11 +1593,11 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, return 0; } if (rsize < received) { - pr_err("bogus RREAD count (%d > %d)\n", received, rsize); + pr_err("bogus RREAD count (%u > %u)\n", received, rsize); received = rsize; } - p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received); + p9_debug(P9_DEBUG_9P, "<<< RREAD count %u\n", received); if (non_zc) { int n = copy_to_iter(dataptr, received, to); @@ -1623,9 +1624,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) *err = 0; while (iov_iter_count(from)) { - int count = iov_iter_count(from); - int rsize = fid->iounit; - int written; + size_t count = iov_iter_count(from); + u32 rsize = fid->iounit; + u32 written; if (!rsize || rsize > clnt->msize - P9_IOHDRSZ) rsize = clnt->msize - P9_IOHDRSZ; @@ -1633,7 +1634,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) if (count < rsize) rsize = count; - p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n", + p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %u (/%zu)\n", fid->fid, offset, rsize, count); /* Don't bother zerocopy for small IO (< 1024) */ @@ -1659,11 +1660,11 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) break; } if (rsize < written) { - pr_err("bogus RWRITE count (%d > %d)\n", written, rsize); + pr_err("bogus RWRITE count (%u > %u)\n", written, rsize); written = rsize; } - p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written); + p9_debug(P9_DEBUG_9P, "<<< RWRITE count %u\n", written); p9_req_put(clnt, req); iov_iter_revert(from, count - written - iov_iter_count(from)); @@ -2098,7 +2099,8 @@ EXPORT_SYMBOL_GPL(p9_client_xattrcreate); int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) { - int err, rsize, non_zc = 0; + int err, non_zc = 0; + u32 rsize; struct p9_client *clnt; struct p9_req_t *req; char *dataptr; @@ -2107,7 +2109,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) iov_iter_kvec(&to, ITER_DEST, &kv, 1, count); - p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", + p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %u\n", fid->fid, offset, count); clnt = fid->clnt; @@ -2142,11 +2144,11 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) goto free_and_error; } if (rsize < count) { - pr_err("bogus RREADDIR count (%d > %d)\n", count, rsize); + pr_err("bogus RREADDIR count (%u > %u)\n", count, rsize); count = rsize; } - p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %u\n", count); if (non_zc) memmove(data, dataptr, count); From ad6d4558a7112af9e5f6727ac24fd8cd17469739 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Mon, 17 Mar 2025 06:51:06 +0900 Subject: [PATCH 4/8] 9p/net: return error on bogus (longer than requested) replies Up until now we've been considering longer than requested replies as acceptable, printing a message and just truncating the data, but it makes more sense to consider these an error. Make these fail with EIO instead. Suggested-by: Christian Schoenebeck Message-ID: <20250317-p9_bogus_io_error-v1-1-9639f6d1561f@codewreck.org> Signed-off-by: Dominique Martinet --- net/9p/client.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index 52a5497cfca79..61461b9fa1343 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1594,7 +1594,9 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, } if (rsize < received) { pr_err("bogus RREAD count (%u > %u)\n", received, rsize); - received = rsize; + *err = -EIO; + p9_req_put(clnt, req); + return 0; } p9_debug(P9_DEBUG_9P, "<<< RREAD count %u\n", received); @@ -1661,7 +1663,10 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) } if (rsize < written) { pr_err("bogus RWRITE count (%u > %u)\n", written, rsize); - written = rsize; + *err = -EIO; + iov_iter_revert(from, count - iov_iter_count(from)); + p9_req_put(clnt, req); + break; } p9_debug(P9_DEBUG_9P, "<<< RWRITE count %u\n", written); @@ -1713,7 +1718,7 @@ p9_client_write_subreq(struct netfs_io_subrequest *subreq) if (written > len) { pr_err("bogus RWRITE count (%d > %u)\n", written, len); - written = len; + written = -EIO; } p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", len); @@ -2145,7 +2150,8 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) } if (rsize < count) { pr_err("bogus RREADDIR count (%u > %u)\n", count, rsize); - count = rsize; + err = -EIO; + goto free_and_error; } p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %u\n", count); From fbc0283fbeae27b88448c90305e738991457fee2 Mon Sep 17 00:00:00 2001 From: Ignacio Encinas Date: Tue, 18 Mar 2025 22:39:02 +0100 Subject: [PATCH 5/8] 9p/trans_fd: mark concurrent read and writes to p9_conn->err Writes for the error value of a connection are spinlock-protected inside p9_conn_cancel, but lockless reads are present elsewhere to avoid performing unnecessary work after an error has been met. Mark the write and lockless reads to make KCSAN happy. Mark the write as exclusive following the recommendation in "Lock-Protected Writes with Lockless Reads" in tools/memory-model/Documentation/access-marking.txt while we are at it. Mark p9_fd_request and p9_conn_cancel m->err reads despite the fact that they do not race with concurrent writes for stylistic reasons. Reported-by: syzbot+d69a7cc8c683c2cb7506@syzkaller.appspotmail.com Reported-by: syzbot+483d6c9b9231ea7e1851@syzkaller.appspotmail.com Signed-off-by: Ignacio Encinas Message-ID: <20250318-p9_conn_err_benign_data_race-v3-1-290bb18335cc@iencinas.com> Signed-off-by: Dominique Martinet --- net/9p/trans_fd.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 2fea50bf047ab..339ec4e54778f 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -192,12 +192,13 @@ static void p9_conn_cancel(struct p9_conn *m, int err) spin_lock(&m->req_lock); - if (m->err) { + if (READ_ONCE(m->err)) { spin_unlock(&m->req_lock); return; } - m->err = err; + WRITE_ONCE(m->err, err); + ASSERT_EXCLUSIVE_WRITER(m->err); list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { list_move(&req->req_list, &cancel_list); @@ -284,7 +285,7 @@ static void p9_read_work(struct work_struct *work) m = container_of(work, struct p9_conn, rq); - if (m->err < 0) + if (READ_ONCE(m->err) < 0) return; p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset); @@ -451,7 +452,7 @@ static void p9_write_work(struct work_struct *work) m = container_of(work, struct p9_conn, wq); - if (m->err < 0) { + if (READ_ONCE(m->err) < 0) { clear_bit(Wworksched, &m->wsched); return; } @@ -623,7 +624,7 @@ static void p9_poll_mux(struct p9_conn *m) __poll_t n; int err = -ECONNRESET; - if (m->err < 0) + if (READ_ONCE(m->err) < 0) return; n = p9_fd_poll(m->client, NULL, &err); @@ -666,6 +667,7 @@ static void p9_poll_mux(struct p9_conn *m) static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) { __poll_t n; + int err; struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; @@ -674,9 +676,10 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) spin_lock(&m->req_lock); - if (m->err < 0) { + err = READ_ONCE(m->err); + if (err < 0) { spin_unlock(&m->req_lock); - return m->err; + return err; } WRITE_ONCE(req->status, REQ_STATUS_UNSENT); From 34ceb69edd6cb9581978e0f3e459da4959c0c387 Mon Sep 17 00:00:00 2001 From: Tuomas Ahola Date: Sat, 22 Mar 2025 17:36:39 +0200 Subject: [PATCH 6/8] Documentation/fs/9p: fix broken link In b529c06f9dc7 (Update the documentation referencing Plan 9 from User Space., 2020-04-26), another instance of the link was left unfixed. Fix that as well. Signed-off-by: Tuomas Ahola Message-ID: <20250322153639.4917-1-taahol@utu.fi> Signed-off-by: Dominique Martinet --- Documentation/filesystems/9p.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/filesystems/9p.rst b/Documentation/filesystems/9p.rst index 2bbf68b56b0d7..28871200e87c7 100644 --- a/Documentation/filesystems/9p.rst +++ b/Documentation/filesystems/9p.rst @@ -40,7 +40,7 @@ For remote file server:: mount -t 9p 10.10.1.2 /mnt/9 -For Plan 9 From User Space applications (http://swtch.com/plan9):: +For Plan 9 From User Space applications (https://9fans.github.io/plan9port/):: mount -t 9p `namespace`/acme /mnt/9 -o trans=unix,uname=$USER From ad2e2a77dcc7912c737a07fe061d93c414c6745e Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 20 Mar 2025 10:52:00 -0400 Subject: [PATCH 7/8] 9p: Use hashtable.h for hash_errmap Convert hash_errmap in error.c to use the generic hashtable implementation from hashtable.h instead of the manual hlist_head array implementation. This simplifies the code and makes it more maintainable by using the standard hashtable API and removes the need for manual hash table management. Signed-off-by: Sasha Levin Message-ID: <20250320145200.3124863-1-sashal@kernel.org> Signed-off-by: Dominique Martinet --- net/9p/error.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/net/9p/error.c b/net/9p/error.c index 8da744494b683..8ba8afc91482d 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -16,6 +16,7 @@ #include #include #include +#include #include /** @@ -33,8 +34,8 @@ struct errormap { struct hlist_node list; }; -#define ERRHASHSZ 32 -static struct hlist_head hash_errmap[ERRHASHSZ]; +#define ERRHASH_BITS 5 +static DEFINE_HASHTABLE(hash_errmap, ERRHASH_BITS); /* FixMe - reduce to a reasonable size */ static struct errormap errmap[] = { @@ -176,18 +177,14 @@ static struct errormap errmap[] = { int p9_error_init(void) { struct errormap *c; - int bucket; - - /* initialize hash table */ - for (bucket = 0; bucket < ERRHASHSZ; bucket++) - INIT_HLIST_HEAD(&hash_errmap[bucket]); + u32 hash; /* load initial error map into hash table */ for (c = errmap; c->name; c++) { c->namelen = strlen(c->name); - bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ; + hash = jhash(c->name, c->namelen, 0); INIT_HLIST_NODE(&c->list); - hlist_add_head(&c->list, &hash_errmap[bucket]); + hash_add(hash_errmap, &c->list, hash); } return 1; @@ -205,12 +202,12 @@ int p9_errstr2errno(char *errstr, int len) { int errno; struct errormap *c; - int bucket; + u32 hash; errno = 0; c = NULL; - bucket = jhash(errstr, len, 0) % ERRHASHSZ; - hlist_for_each_entry(c, &hash_errmap[bucket], list) { + hash = jhash(errstr, len, 0); + hash_for_each_possible(hash_errmap, c, list, hash) { if (c->namelen == len && !memcmp(c->name, errstr, len)) { errno = c->val; break; From 4210030d8bc4834fcb774babc458d02a2432ee76 Mon Sep 17 00:00:00 2001 From: Tingmao Wang Date: Sun, 30 Mar 2025 22:34:42 +0100 Subject: [PATCH 8/8] docs: fs/9p: Add missing "not" in cache documentation A quick fix for what I assume is a typo. Signed-off-by: Tingmao Wang Reviewed-by: Christian Schoenebeck Message-ID: <20250330213443.98434-1-m@maowtm.org> Signed-off-by: Dominique Martinet --- Documentation/filesystems/9p.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/9p.rst b/Documentation/filesystems/9p.rst index 28871200e87c7..522295857f7b1 100644 --- a/Documentation/filesystems/9p.rst +++ b/Documentation/filesystems/9p.rst @@ -165,8 +165,8 @@ Options do not necessarily validate cached values on the server. In other words changes on the server are not guaranteed to be reflected on the client system. Only use this mode of operation if you - have an exclusive mount and the server will modify the filesystem - underneath you. + have an exclusive mount and the server will not modify the + filesystem underneath you. debug=n specifies debug level. The debug level is a bitmask.