Skip to content

nfsd load

Donald Buczek edited this page Jul 24, 2020 · 8 revisions
    MODULE_ALIAS_FS("nfsd");

will make alias fs-nfsd nfsd appear in /lib/modules/$(uname -r)/modules.aliases, so when mount -t nfsd nfsd /proc/fs/nfsd is executed by proc-fs-nfsd.mount, the module is requested by https://elixir.bootlin.com/linux/v5.7/source/fs/filesystems.c#L273

    struct file_system_type *get_fs_type(const char *name)
    {
        /* ... */
	fs = __get_fs_type(name, len);
	if (!fs && (request_module("fs-%.*s", len, name) == 0)) {
		fs = __get_fs_type(name, len);
        /* ... */

https://elixir.bootlin.com/linux/v5.7/source/fs/nfsd/nfsctl.c#L857

err = nfsd_create_serv(net);
err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);

https://elixir.bootlin.com/linux/v5.7/source/fs/nfsd/nfsctl.c#L1577

module_init(init_nfsd)

https://elixir.bootlin.com/linux/v5.7/source/fs/nfsd/nfsctl.c#L1517

unsigned int nfsd_net_id;
/* ... */
static struct pernet_operations nfsd_net_ops = {
	.init = nfsd_init_net,
	.exit = nfsd_exit_net,
	.id   = &nfsd_net_id,
	.size = sizeof(struct nfsd_net),
};

static int __init init_nfsd(void)
{
	int retval;
	printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");

	retval = register_pernet_subsys(&nfsd_net_ops);

A zero filled struct nfsd_net is allocated and nfsd_init_net is called for each existing network namespace and will be done for each future network namespace.

https://elixir.bootlin.com/linux/v5.7/source/fs/nfsd/nfsctl.c#L1451

static __net_init int nfsd_init_net(struct net *net)
{
	int retval;
	struct vfsmount *mnt;
	struct nfsd_net *nn = net_generic(net, nfsd_net_id);

	retval = nfsd_export_init(net);
	if (retval)
		goto out_export_error;

init nfsd.export und nfsd.export caches and their /proc/net/rpc/NAME..

	retval = nfsd_idmap_init(net);
	if (retval)
		goto out_idmap_error;

init "nfs4.nametoid" and "nfs4.idtoname" cachees (nfs4 UID/GID mappings to names)

	nn->nfsd_versions = NULL;
	nn->nfsd4_minorversions = NULL;
	retval = nfsd_reply_cache_init(nn);
	if (retval)
		goto out_drc_error;

init reply cache (SLAB "nfsd_drc" but probably merged with another compatible type, so not visible in /proc/slabinfo).

	nn->nfsd_versions = NULL;
	nn->nfsd4_minorversions = NULL;
	retval = nfsd_reply_cache_init(nn);
	if (retval)
		goto out_drc_error;
	nn->nfsd4_lease = 90;	/* default lease time */
	nn->nfsd4_grace = 90;
	nn->somebody_reclaimed = false;
	nn->track_reclaim_completes = false;
	nn->clverifier_counter = prandom_u32();
	nn->clientid_base = prandom_u32();
	nn->clientid_counter = nn->clientid_base + 1;
	nn->s2s_cp_cl_id = nn->clientid_counter++;

	atomic_set(&nn->ntf_refcnt, 0);
	init_waitqueue_head(&nn->ntf_wq);
	seqlock_init(&nn->boot_lock);

	mnt =  vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
	if (IS_ERR(mnt)) {
		retval = PTR_ERR(mnt);
		goto out_mount_err;
	}

the mount is not (yet) attached to anything.

rpc.nfsd from nfsd.service creates the sockes and writes the fd numbers to /proc/fs/nfsd/portlist. Then it writes the number of requested threads to /proc/fs/nfsd/thread

https://elixir.bootlin.com/linux/v5.7/source/fs/nfsd/nfsctl.c#L1419

static struct file_system_type nfsd_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "nfsd",
	.init_fs_context = nfsd_init_fs_context,
	.kill_sb	= nfsd_umount,
};

https://elixir.bootlin.com/linux/v5.7/source/fs/nfsd/nfsctl.c#L1403

static int nfsd_init_fs_context(struct fs_context *fc)
{
	put_user_ns(fc->user_ns);
	fc->user_ns = get_user_ns(fc->net_ns->user_ns);
	fc->ops = &nfsd_fs_context_ops;
	return 0;
}

https://elixir.bootlin.com/linux/v5.7/source/fs/nfsd/nfsctl.c#L1398

static const struct fs_context_operations nfsd_fs_context_ops = {
	.free		= nfsd_fs_free_fc,
	.get_tree	= nfsd_fs_get_tree,
};

https://elixir.bootlin.com/linux/v5.7/source/fs/nfsd/nfsctl.c#L1387

static int nfsd_fs_get_tree(struct fs_context *fc)
{
	return get_tree_keyed(fc, nfsd_fill_super, get_net(fc->net_ns));
}

https://elixir.bootlin.com/linux/v5.7/source/fs/nfsd/nfsctl.c#L1341

static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
{
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
							nfsd_net_id);
	struct dentry *dentry;
	int ret;

	static const struct tree_descr nfsd_files[] = {
		[NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO},
		[NFSD_Export_features] = {"export_features",
					&export_features_operations, S_IRUGO},
		[NFSD_FO_UnlockIP] = {"unlock_ip",
					&transaction_ops, S_IWUSR|S_IRUSR},
		[NFSD_FO_UnlockFS] = {"unlock_filesystem",
					&transaction_ops, S_IWUSR|S_IRUSR},
		[NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
		[NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
		[NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
		[NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO},
		[NFSD_Reply_Cache_Stats] = {"reply_cache_stats", &reply_cache_stats_operations, S_IRUGO},
		[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
		[NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
		[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
		[NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
		[NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
#ifdef CONFIG_NFSD_V4
		[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
		[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
		[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
		[NFSD_V4EndGrace] = {"v4_end_grace", &transaction_ops, S_IWUSR|S_IRUGO},
#endif
		/* last one */ {""}
	};

	ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
	if (ret)
		return ret;
	dentry = nfsd_mkdir(sb->s_root, NULL, "clients");
	if (IS_ERR(dentry))
		return PTR_ERR(dentry);
	nn->nfsd_client_dir = dentry;
	return 0;
}

Memory

  • starting nfsd without exports consumes about 14 MB memory.

sysctl startup

/usr/sbin/exportfs -ra

  • (try to) read /etc/nfs.conf (which we don't have)
  • lock /var/lib/nfs/export-lock
  • read /etc/exports
  • read /proc/fs/nfsd/export_features
  • lstat export paths and resolve host names
  • (try to) read /etc/exports.d
  • write to /proc/net/rpc/auth.unix.ip/channel "nfsd 0.0.0.0 2147483647 -test-client-\n"
  • loop:
    • open and close /proc/net/rpc/nfsd.export/channel
    • write to /proc/net/rpc/nfsd.export/channel "-test-client- /amd/theinternet/1 3 11296 65534 65534 0\n"
  • create /var/lib/nfs/etab.tmp , compare with /var/lib/nfs/etab, (if needed) move etab.tmp to etab
  • write "1595594180\n" to /proc/net/rpc/auth.unix.ip/flush, /proc/net/rpc/auth.unix.gid/flush, /proc/net/rpc/nfsd.fh/flush, /proc/net/rpc/nfsd.export/flush"

/usr/sbin/rpc.nfsd --lease-time 90 --grace-time 90 --no-nfs-version 3 64

  • (try to) read /etc/nfs.conf (which we don't have)
  • read /proc/fs/nfsd/versions : "2 -3 +4 +4.1 +4.2\n"
  • read /proc/fs/nfsd/portlist : ""
  • write /proc/fs/nfsd/versions "-2 -3 +4 +4.1 +4.2\n"
  • write to /proc/fs/nfsd/nfsv4gracetime : "90"
  • write to /proc/sys/fs/nfs/nlm_grace_period : "90"
  • write to /proc/fs/nfsd/nfsv4leasetime : "90"
  • read network config via netlink socket
  • connect to localhost:2049 and close again??
  • create a listen socket 0.0.0.0:2049
  • write to /proc/fs/nfsd/portlist fd-number of listen socket "4\n"
  • create ipv6 listen socket ::2049
  • write to /proc/fs/nfsd/portlist fd-number of listen socket "3\n"
  • write to /proc/fs/nfsd/threads : "64\n"

https://elixir.bootlin.com/linux/v5.7/source/fs/nfsd/nfsctl.c#L857

static ssize_t write_ports(struct file *file, char *buf, size_t size)
{
	ssize_t rv;

	mutex_lock(&nfsd_mutex);
	rv = __write_ports(file, buf, size, netns(file));
	mutex_unlock(&nfsd_mutex);
	return rv;
}

https://elixir.bootlin.com/linux/v5.7/source/fs/nfsd/nfsctl.c#L799

static ssize_t __write_ports(struct file *file, char *buf, size_t size,
			     struct net *net)
{
	if (size == 0)
		return __write_ports_names(buf, net);

	if (isdigit(buf[0]))
		return __write_ports_addfd(buf, net, file->f_cred);

	if (isalpha(buf[0]))
		return __write_ports_addxprt(buf, net, file->f_cred);

	return -EINVAL;
}

https://elixir.bootlin.com/linux/v5.7/source/fs/nfsd/nfsctl.c#L724

static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred *cred)
{
	char *mesg = buf;
	int fd, err;
	struct nfsd_net *nn = net_generic(net, nfsd_net_id);

	err = get_int(&mesg, &fd);
	if (err != 0 || fd < 0)
		return -EINVAL;

	if (svc_alien_sock(net, fd)) {
		printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__);
		return -EINVAL;
	}

	err = nfsd_create_serv(net);
	if (err != 0)
		return err;

	err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
	if (err < 0) {
		nfsd_destroy(net);
		return err;
	}

	/* Decrease the count, but don't shut down the service */
	nn->nfsd_serv->sv_nrthreads--;
	return err;
}

lazy create nfsd service https://elixir.bootlin.com/linux/v5.7/source/fs/nfsd/nfssvc.c#L604

int nfsd_create_serv(struct net *net)
{
	int error;
	struct nfsd_net *nn = net_generic(net, nfsd_net_id);

	WARN_ON(!mutex_is_locked(&nfsd_mutex));
	if (nn->nfsd_serv) {
		svc_get(nn->nfsd_serv);
		return 0;
	}
	if (nfsd_max_blksize == 0)
		nfsd_max_blksize = nfsd_get_default_max_blksize();
	nfsd_reset_versions(nn);
	nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
						&nfsd_thread_sv_ops);
	if (nn->nfsd_serv == NULL)
		return -ENOMEM;

	nn->nfsd_serv->sv_maxconn = nn->max_connections;
	error = svc_bind(nn->nfsd_serv, net);
	if (error < 0) {
		svc_destroy(nn->nfsd_serv);
		return error;
	}

	set_max_drc();
	/* check if the notifier is already set */
	if (atomic_inc_return(&nfsd_notifier_refcount) == 1) {
		register_inetaddr_notifier(&nfsd_inetaddr_notifier);
#if IS_ENABLED(CONFIG_IPV6)
		register_inet6addr_notifier(&nfsd_inet6addr_notifier);
#endif
	}
	atomic_inc(&nn->ntf_refcnt);
	nfsd_reset_boot_verifier(nn);
	return 0;
}