Skip to content

Commit

Permalink
Merge tag 'nfsd-4.6-1' of git://linux-nfs.org/~bfields/linux
Browse files Browse the repository at this point in the history
Pull more nfsd updates from Bruce Fields:
 "Apologies for the previous request, which omitted the top 8 commits
  from my for-next branch (including the SCSI layout commits).  Thanks
  to Trond for spotting my error!"

This actually includes the new layout types, so here's that part of
the pull message repeated:

 "Support for a new pnfs layout type from Christoph Hellwig.  The new
  layout type is a variant of the block layout which uses SCSI features
  to offer improved fencing and device identification.

  Note this pull request also includes the client side of SCSI layout,
  with Trond's permission"

* tag 'nfsd-4.6-1' of git://linux-nfs.org/~bfields/linux:
  nfsd: use short read as well as i_size to set eof
  nfsd: better layoutupdate bounds-checking
  nfsd: block and scsi layout drivers need to depend on CONFIG_BLOCK
  nfsd: add SCSI layout support
  nfsd: move some blocklayout code
  nfsd: add a new config option for the block layout driver
  nfs/blocklayout: add SCSI layout support
  nfs4.h: add SCSI layout definitions
  • Loading branch information
Linus Torvalds committed Mar 25, 2016
2 parents 70c5eb8 + ac503e4 commit 8b306a2
Show file tree
Hide file tree
Showing 21 changed files with 723 additions and 92 deletions.
23 changes: 23 additions & 0 deletions Documentation/filesystems/nfs/pnfs-scsi-server.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@

pNFS SCSI layout server user guide
==================================

This document describes support for pNFS SCSI layouts in the Linux NFS server.
With pNFS SCSI layouts, the NFS server acts as Metadata Server (MDS) for pNFS,
which in addition to handling all the metadata access to the NFS export,
also hands out layouts to the clients so that they can directly access the
underlying SCSI LUNs that are shared with the client.

To use pNFS SCSI layouts with with the Linux NFS server, the exported file
system needs to support the pNFS SCSI layouts (currently just XFS), and the
file system must sit on a SCSI LUN that is accessible to the clients in
addition to the MDS. As of now the file system needs to sit directly on the
exported LUN, striping or concatenation of LUNs on the MDS and clients
is not supported yet.

On a server built with CONFIG_NFSD_SCSI, the pNFS SCSI volume support is
automatically enabled if the file system is exported using the "pnfs"
option and the underlying SCSI device support persistent reservations.
On the client make sure the kernel has the CONFIG_PNFS_BLOCK option
enabled, and the file system is mounted using the NFSv4.1 protocol
version (mount -o vers=4.1).
59 changes: 52 additions & 7 deletions fs/nfs/blocklayout/blocklayout.c
Original file line number Diff line number Diff line change
Expand Up @@ -446,8 +446,8 @@ static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo)
kfree(bl);
}

static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
gfp_t gfp_flags)
static struct pnfs_layout_hdr *__bl_alloc_layout_hdr(struct inode *inode,
gfp_t gfp_flags, bool is_scsi_layout)
{
struct pnfs_block_layout *bl;

Expand All @@ -460,9 +460,22 @@ static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
bl->bl_ext_ro = RB_ROOT;
spin_lock_init(&bl->bl_ext_lock);

bl->bl_scsi_layout = is_scsi_layout;
return &bl->bl_layout;
}

static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
gfp_t gfp_flags)
{
return __bl_alloc_layout_hdr(inode, gfp_flags, false);
}

static struct pnfs_layout_hdr *sl_alloc_layout_hdr(struct inode *inode,
gfp_t gfp_flags)
{
return __bl_alloc_layout_hdr(inode, gfp_flags, true);
}

static void bl_free_lseg(struct pnfs_layout_segment *lseg)
{
dprintk("%s enter\n", __func__);
Expand Down Expand Up @@ -889,22 +902,53 @@ static struct pnfs_layoutdriver_type blocklayout_type = {
.sync = pnfs_generic_sync,
};

static struct pnfs_layoutdriver_type scsilayout_type = {
.id = LAYOUT_SCSI,
.name = "LAYOUT_SCSI",
.owner = THIS_MODULE,
.flags = PNFS_LAYOUTRET_ON_SETATTR |
PNFS_READ_WHOLE_PAGE,
.read_pagelist = bl_read_pagelist,
.write_pagelist = bl_write_pagelist,
.alloc_layout_hdr = sl_alloc_layout_hdr,
.free_layout_hdr = bl_free_layout_hdr,
.alloc_lseg = bl_alloc_lseg,
.free_lseg = bl_free_lseg,
.return_range = bl_return_range,
.prepare_layoutcommit = bl_prepare_layoutcommit,
.cleanup_layoutcommit = bl_cleanup_layoutcommit,
.set_layoutdriver = bl_set_layoutdriver,
.alloc_deviceid_node = bl_alloc_deviceid_node,
.free_deviceid_node = bl_free_deviceid_node,
.pg_read_ops = &bl_pg_read_ops,
.pg_write_ops = &bl_pg_write_ops,
.sync = pnfs_generic_sync,
};


static int __init nfs4blocklayout_init(void)
{
int ret;

dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);

ret = pnfs_register_layoutdriver(&blocklayout_type);
ret = bl_init_pipefs();
if (ret)
goto out;
ret = bl_init_pipefs();

ret = pnfs_register_layoutdriver(&blocklayout_type);
if (ret)
goto out_unregister;
goto out_cleanup_pipe;

ret = pnfs_register_layoutdriver(&scsilayout_type);
if (ret)
goto out_unregister_block;
return 0;

out_unregister:
out_unregister_block:
pnfs_unregister_layoutdriver(&blocklayout_type);
out_cleanup_pipe:
bl_cleanup_pipefs();
out:
return ret;
}
Expand All @@ -914,8 +958,9 @@ static void __exit nfs4blocklayout_exit(void)
dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
__func__);

bl_cleanup_pipefs();
pnfs_unregister_layoutdriver(&scsilayout_type);
pnfs_unregister_layoutdriver(&blocklayout_type);
bl_cleanup_pipefs();
}

MODULE_ALIAS("nfs-layouttype4-3");
Expand Down
14 changes: 12 additions & 2 deletions fs/nfs/blocklayout/blocklayout.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ struct pnfs_block_dev;
*/
#define PNFS_BLOCK_UUID_LEN 128


struct pnfs_block_volume {
enum pnfs_block_volume_type type;
union {
Expand All @@ -82,6 +81,13 @@ struct pnfs_block_volume {
u32 volumes_count;
u32 volumes[PNFS_BLOCK_MAX_DEVICES];
} stripe;
struct {
enum scsi_code_set code_set;
enum scsi_designator_type designator_type;
int designator_len;
u8 designator[256];
u64 pr_key;
} scsi;
};
};

Expand All @@ -106,6 +112,9 @@ struct pnfs_block_dev {
struct block_device *bdev;
u64 disk_offset;

u64 pr_key;
bool pr_registered;

bool (*map)(struct pnfs_block_dev *dev, u64 offset,
struct pnfs_block_dev_map *map);
};
Expand All @@ -131,6 +140,7 @@ struct pnfs_block_layout {
struct rb_root bl_ext_rw;
struct rb_root bl_ext_ro;
spinlock_t bl_ext_lock; /* Protects list manipulation */
bool bl_scsi_layout;
};

static inline struct pnfs_block_layout *
Expand Down Expand Up @@ -182,6 +192,6 @@ void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status);
dev_t bl_resolve_deviceid(struct nfs_server *server,
struct pnfs_block_volume *b, gfp_t gfp_mask);
int __init bl_init_pipefs(void);
void __exit bl_cleanup_pipefs(void);
void bl_cleanup_pipefs(void);

#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
144 changes: 143 additions & 1 deletion fs/nfs/blocklayout/dev.c
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
/*
* Copyright (c) 2014 Christoph Hellwig.
* Copyright (c) 2014-2016 Christoph Hellwig.
*/
#include <linux/sunrpc/svc.h>
#include <linux/blkdev.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_xdr.h>
#include <linux/pr.h>

#include "blocklayout.h"

Expand All @@ -21,6 +22,17 @@ bl_free_device(struct pnfs_block_dev *dev)
bl_free_device(&dev->children[i]);
kfree(dev->children);
} else {
if (dev->pr_registered) {
const struct pr_ops *ops =
dev->bdev->bd_disk->fops->pr_ops;
int error;

error = ops->pr_register(dev->bdev, dev->pr_key, 0,
false);
if (error)
pr_err("failed to unregister PR key.\n");
}

if (dev->bdev)
blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
}
Expand Down Expand Up @@ -113,6 +125,24 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
for (i = 0; i < b->stripe.volumes_count; i++)
b->stripe.volumes[i] = be32_to_cpup(p++);
break;
case PNFS_BLOCK_VOLUME_SCSI:
p = xdr_inline_decode(xdr, 4 + 4 + 4);
if (!p)
return -EIO;
b->scsi.code_set = be32_to_cpup(p++);
b->scsi.designator_type = be32_to_cpup(p++);
b->scsi.designator_len = be32_to_cpup(p++);
p = xdr_inline_decode(xdr, b->scsi.designator_len);
if (!p)
return -EIO;
if (b->scsi.designator_len > 256)
return -EIO;
memcpy(&b->scsi.designator, p, b->scsi.designator_len);
p = xdr_inline_decode(xdr, 8);
if (!p)
return -EIO;
p = xdr_decode_hyper(p, &b->scsi.pr_key);
break;
default:
dprintk("unknown volume type!\n");
return -EIO;
Expand Down Expand Up @@ -216,6 +246,116 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
return 0;
}

static bool
bl_validate_designator(struct pnfs_block_volume *v)
{
switch (v->scsi.designator_type) {
case PS_DESIGNATOR_EUI64:
if (v->scsi.code_set != PS_CODE_SET_BINARY)
return false;

if (v->scsi.designator_len != 8 &&
v->scsi.designator_len != 10 &&
v->scsi.designator_len != 16)
return false;

return true;
case PS_DESIGNATOR_NAA:
if (v->scsi.code_set != PS_CODE_SET_BINARY)
return false;

if (v->scsi.designator_len != 8 &&
v->scsi.designator_len != 16)
return false;

return true;
case PS_DESIGNATOR_T10:
case PS_DESIGNATOR_NAME:
pr_err("pNFS: unsupported designator "
"(code set %d, type %d, len %d.\n",
v->scsi.code_set,
v->scsi.designator_type,
v->scsi.designator_len);
return false;
default:
pr_err("pNFS: invalid designator "
"(code set %d, type %d, len %d.\n",
v->scsi.code_set,
v->scsi.designator_type,
v->scsi.designator_len);
return false;
}
}

static int
bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
const struct pr_ops *ops;
const char *devname;
int error;

if (!bl_validate_designator(v))
return -EINVAL;

switch (v->scsi.designator_len) {
case 8:
devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%8phN",
v->scsi.designator);
break;
case 12:
devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%12phN",
v->scsi.designator);
break;
case 16:
devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%16phN",
v->scsi.designator);
break;
default:
return -EINVAL;
}

d->bdev = blkdev_get_by_path(devname, FMODE_READ, NULL);
if (IS_ERR(d->bdev)) {
pr_warn("pNFS: failed to open device %s (%ld)\n",
devname, PTR_ERR(d->bdev));
kfree(devname);
return PTR_ERR(d->bdev);
}

kfree(devname);

d->len = i_size_read(d->bdev->bd_inode);
d->map = bl_map_simple;
d->pr_key = v->scsi.pr_key;

pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
d->bdev->bd_disk->disk_name, d->pr_key);

ops = d->bdev->bd_disk->fops->pr_ops;
if (!ops) {
pr_err("pNFS: block device %s does not support reservations.",
d->bdev->bd_disk->disk_name);
error = -EINVAL;
goto out_blkdev_put;
}

error = ops->pr_register(d->bdev, 0, d->pr_key, true);
if (error) {
pr_err("pNFS: failed to register key for block device %s.",
d->bdev->bd_disk->disk_name);
goto out_blkdev_put;
}

d->pr_registered = true;
return 0;

out_blkdev_put:
blkdev_put(d->bdev, FMODE_READ);
return error;
}

static int
bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
Expand Down Expand Up @@ -303,6 +443,8 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
return bl_parse_concat(server, d, volumes, idx, gfp_mask);
case PNFS_BLOCK_VOLUME_STRIPE:
return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
case PNFS_BLOCK_VOLUME_SCSI:
return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
default:
dprintk("unsupported volume type: %d\n", volumes[idx].type);
return -EIO;
Expand Down
Loading

0 comments on commit 8b306a2

Please sign in to comment.