From 3ab2e420ec1caf4ead233f3161ac7d86fe5d2a9f Mon Sep 17 00:00:00 2001 From: Asias He Date: Sat, 27 Apr 2013 11:16:48 +0800 Subject: [PATCH 01/10] vhost: Allow device specific fields per vq This is useful for any device who wants device specific fields per vq. For example, tcm_vhost wants a per vq field to track requests which are in flight on the vq. Also, on top of this we can add patches to move things like ubufs from vhost.h out to net.c. Signed-off-by: Michael S. Tsirkin Signed-off-by: Asias He Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 64 ++++++++++++++++++---------- drivers/vhost/tcm_vhost.c | 55 ++++++++++++++++-------- drivers/vhost/vhost.c | 88 +++++++++++++++++++-------------------- drivers/vhost/vhost.h | 4 +- 4 files changed, 124 insertions(+), 87 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 87c216c1e54e..176aa030dc5f 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,9 +64,13 @@ enum { VHOST_NET_VQ_MAX = 2, }; +struct vhost_net_virtqueue { + struct vhost_virtqueue vq; +}; + struct vhost_net { struct vhost_dev dev; - struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; + struct vhost_net_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; /* Number of TX recently submitted. * Protected by tx vq lock. */ @@ -198,7 +202,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) { - struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX]; + struct vhost_virtqueue *vq = &net->vqs[VHOST_NET_VQ_TX].vq; unsigned out, in, s; int head; struct msghdr msg = { @@ -417,7 +421,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, * read-size critical section for our kind of RCU. */ static void handle_rx(struct vhost_net *net) { - struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; + struct vhost_virtqueue *vq = &net->vqs[VHOST_NET_VQ_RX].vq; unsigned uninitialized_var(in), log; struct vhost_log *vq_log; struct msghdr msg = { @@ -559,17 +563,26 @@ static int vhost_net_open(struct inode *inode, struct file *f) { struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); struct vhost_dev *dev; + struct vhost_virtqueue **vqs; int r; if (!n) return -ENOMEM; + vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL); + if (!vqs) { + kfree(n); + return -ENOMEM; + } dev = &n->dev; - n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick; - n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick; - r = vhost_dev_init(dev, n->vqs, VHOST_NET_VQ_MAX); + vqs[VHOST_NET_VQ_TX] = &n->vqs[VHOST_NET_VQ_TX].vq; + vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq; + n->vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick; + n->vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick; + r = vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); if (r < 0) { kfree(n); + kfree(vqs); return r; } @@ -584,7 +597,9 @@ static int vhost_net_open(struct inode *inode, struct file *f) static void vhost_net_disable_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { - struct vhost_poll *poll = n->poll + (vq - n->vqs); + struct vhost_net_virtqueue *nvq = + container_of(vq, struct vhost_net_virtqueue, vq); + struct vhost_poll *poll = n->poll + (nvq - n->vqs); if (!vq->private_data) return; vhost_poll_stop(poll); @@ -593,7 +608,9 @@ static void vhost_net_disable_vq(struct vhost_net *n, static int vhost_net_enable_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { - struct vhost_poll *poll = n->poll + (vq - n->vqs); + struct vhost_net_virtqueue *nvq = + container_of(vq, struct vhost_net_virtqueue, vq); + struct vhost_poll *poll = n->poll + (nvq - n->vqs); struct socket *sock; sock = rcu_dereference_protected(vq->private_data, @@ -621,30 +638,30 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n, static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock, struct socket **rx_sock) { - *tx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_TX); - *rx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_RX); + *tx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_TX].vq); + *rx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_RX].vq); } static void vhost_net_flush_vq(struct vhost_net *n, int index) { vhost_poll_flush(n->poll + index); - vhost_poll_flush(&n->dev.vqs[index].poll); + vhost_poll_flush(&n->vqs[index].vq.poll); } static void vhost_net_flush(struct vhost_net *n) { vhost_net_flush_vq(n, VHOST_NET_VQ_TX); vhost_net_flush_vq(n, VHOST_NET_VQ_RX); - if (n->dev.vqs[VHOST_NET_VQ_TX].ubufs) { - mutex_lock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); + if (n->vqs[VHOST_NET_VQ_TX].vq.ubufs) { + mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); n->tx_flush = true; - mutex_unlock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); + mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); /* Wait for all lower device DMAs done. */ - vhost_ubuf_put_and_wait(n->dev.vqs[VHOST_NET_VQ_TX].ubufs); - mutex_lock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); + vhost_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].vq.ubufs); + mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); n->tx_flush = false; - kref_init(&n->dev.vqs[VHOST_NET_VQ_TX].ubufs->kref); - mutex_unlock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); + kref_init(&n->vqs[VHOST_NET_VQ_TX].vq.ubufs->kref); + mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); } } @@ -665,6 +682,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) /* We do an extra flush before freeing memory, * since jobs can re-queue themselves. */ vhost_net_flush(n); + kfree(n->dev.vqs); kfree(n); return 0; } @@ -750,7 +768,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) r = -ENOBUFS; goto err; } - vq = n->vqs + index; + vq = &n->vqs[index].vq; mutex_lock(&vq->mutex); /* Verify that ring has been setup correctly. */ @@ -870,10 +888,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) n->dev.acked_features = features; smp_wmb(); for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { - mutex_lock(&n->vqs[i].mutex); - n->vqs[i].vhost_hlen = vhost_hlen; - n->vqs[i].sock_hlen = sock_hlen; - mutex_unlock(&n->vqs[i].mutex); + mutex_lock(&n->vqs[i].vq.mutex); + n->vqs[i].vq.vhost_hlen = vhost_hlen; + n->vqs[i].vq.sock_hlen = sock_hlen; + mutex_unlock(&n->vqs[i].vq.mutex); } vhost_net_flush(n); mutex_unlock(&n->dev.mutex); diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 1677238d281f..99d3480450e7 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -74,13 +74,17 @@ enum { #define VHOST_SCSI_MAX_VQ 128 #define VHOST_SCSI_MAX_EVENT 128 +struct vhost_scsi_virtqueue { + struct vhost_virtqueue vq; +}; + struct vhost_scsi { /* Protected by vhost_scsi->dev.mutex */ struct tcm_vhost_tpg **vs_tpg; char vs_vhost_wwpn[TRANSPORT_IQN_LEN]; struct vhost_dev dev; - struct vhost_virtqueue vqs[VHOST_SCSI_MAX_VQ]; + struct vhost_scsi_virtqueue vqs[VHOST_SCSI_MAX_VQ]; struct vhost_work vs_completion_work; /* cmd completion work item */ struct llist_head vs_completion_list; /* cmd completion queue */ @@ -366,7 +370,7 @@ static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt *evt) static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs, u32 event, u32 reason) { - struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; + struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; struct tcm_vhost_evt *evt; if (vs->vs_events_nr > VHOST_SCSI_MAX_EVENT) { @@ -409,7 +413,7 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd) static void tcm_vhost_do_evt_work(struct vhost_scsi *vs, struct tcm_vhost_evt *evt) { - struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; + struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; struct virtio_scsi_event *event = &evt->event; struct virtio_scsi_event __user *eventp; unsigned out, in; @@ -460,7 +464,7 @@ static void tcm_vhost_evt_work(struct vhost_work *work) { struct vhost_scsi *vs = container_of(work, struct vhost_scsi, vs_event_work); - struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; + struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; struct tcm_vhost_evt *evt; struct llist_node *llnode; @@ -511,8 +515,10 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) v_rsp.sense_len); ret = copy_to_user(tv_cmd->tvc_resp, &v_rsp, sizeof(v_rsp)); if (likely(ret == 0)) { + struct vhost_scsi_virtqueue *q; vhost_add_used(tv_cmd->tvc_vq, tv_cmd->tvc_vq_desc, 0); - vq = tv_cmd->tvc_vq - vs->vqs; + q = container_of(tv_cmd->tvc_vq, struct vhost_scsi_virtqueue, vq); + vq = q - vs->vqs; __set_bit(vq, signal); } else pr_err("Faulted on virtio_scsi_cmd_resp\n"); @@ -523,7 +529,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) vq = -1; while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1)) < VHOST_SCSI_MAX_VQ) - vhost_signal(&vs->dev, &vs->vqs[vq]); + vhost_signal(&vs->dev, &vs->vqs[vq].vq); } static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd( @@ -938,7 +944,7 @@ static void vhost_scsi_handle_kick(struct vhost_work *work) static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index) { - vhost_poll_flush(&vs->dev.vqs[index].poll); + vhost_poll_flush(&vs->vqs[index].vq.poll); } static void vhost_scsi_flush(struct vhost_scsi *vs) @@ -975,7 +981,7 @@ static int vhost_scsi_set_endpoint( /* Verify that ring has been setup correctly. */ for (index = 0; index < vs->dev.nvqs; ++index) { /* Verify that ring has been setup correctly. */ - if (!vhost_vq_access_ok(&vs->vqs[index])) { + if (!vhost_vq_access_ok(&vs->vqs[index].vq)) { ret = -EFAULT; goto out; } @@ -1022,7 +1028,7 @@ static int vhost_scsi_set_endpoint( memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn, sizeof(vs->vs_vhost_wwpn)); for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { - vq = &vs->vqs[i]; + vq = &vs->vqs[i].vq; /* Flushing the vhost_work acts as synchronize_rcu */ mutex_lock(&vq->mutex); rcu_assign_pointer(vq->private_data, vs_tpg); @@ -1063,7 +1069,7 @@ static int vhost_scsi_clear_endpoint( mutex_lock(&vs->dev.mutex); /* Verify that ring has been setup correctly. */ for (index = 0; index < vs->dev.nvqs; ++index) { - if (!vhost_vq_access_ok(&vs->vqs[index])) { + if (!vhost_vq_access_ok(&vs->vqs[index].vq)) { ret = -EFAULT; goto err_dev; } @@ -1103,7 +1109,7 @@ static int vhost_scsi_clear_endpoint( } if (match) { for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { - vq = &vs->vqs[i]; + vq = &vs->vqs[i].vq; /* Flushing the vhost_work acts as synchronize_rcu */ mutex_lock(&vq->mutex); rcu_assign_pointer(vq->private_data, NULL); @@ -1151,24 +1157,36 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) static int vhost_scsi_open(struct inode *inode, struct file *f) { struct vhost_scsi *s; + struct vhost_virtqueue **vqs; int r, i; s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; + vqs = kmalloc(VHOST_SCSI_MAX_VQ * sizeof(*vqs), GFP_KERNEL); + if (!vqs) { + kfree(s); + return -ENOMEM; + } + vhost_work_init(&s->vs_completion_work, vhost_scsi_complete_cmd_work); vhost_work_init(&s->vs_event_work, tcm_vhost_evt_work); s->vs_events_nr = 0; s->vs_events_missed = false; - s->vqs[VHOST_SCSI_VQ_CTL].handle_kick = vhost_scsi_ctl_handle_kick; - s->vqs[VHOST_SCSI_VQ_EVT].handle_kick = vhost_scsi_evt_handle_kick; - for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) - s->vqs[i].handle_kick = vhost_scsi_handle_kick; - r = vhost_dev_init(&s->dev, s->vqs, VHOST_SCSI_MAX_VQ); + vqs[VHOST_SCSI_VQ_CTL] = &s->vqs[VHOST_SCSI_VQ_CTL].vq; + vqs[VHOST_SCSI_VQ_EVT] = &s->vqs[VHOST_SCSI_VQ_EVT].vq; + s->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick; + s->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick; + for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) { + vqs[i] = &s->vqs[i].vq; + s->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; + } + r = vhost_dev_init(&s->dev, vqs, VHOST_SCSI_MAX_VQ); if (r < 0) { + kfree(vqs); kfree(s); return r; } @@ -1190,6 +1208,7 @@ static int vhost_scsi_release(struct inode *inode, struct file *f) vhost_dev_cleanup(&s->dev, false); /* Jobs can re-queue themselves in evt kick handler. Do extra flush. */ vhost_scsi_flush(s); + kfree(s->dev.vqs); kfree(s); return 0; } @@ -1205,7 +1224,7 @@ static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl, u32 events_missed; u64 features; int r, abi_version = VHOST_SCSI_ABI_VERSION; - struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; + struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; switch (ioctl) { case VHOST_SCSI_SET_ENDPOINT: @@ -1333,7 +1352,7 @@ static void tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg, else reason = VIRTIO_SCSI_EVT_RESET_REMOVED; - vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; + vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; mutex_lock(&vq->mutex); tcm_vhost_send_evt(vs, tpg, lun, VIRTIO_SCSI_T_TRANSPORT_RESET, reason); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 4eecdb867d53..bef8b6bae186 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -269,27 +269,27 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) bool zcopy; for (i = 0; i < dev->nvqs; ++i) { - dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect * + dev->vqs[i]->indirect = kmalloc(sizeof *dev->vqs[i]->indirect * UIO_MAXIOV, GFP_KERNEL); - dev->vqs[i].log = kmalloc(sizeof *dev->vqs[i].log * UIO_MAXIOV, + dev->vqs[i]->log = kmalloc(sizeof *dev->vqs[i]->log * UIO_MAXIOV, GFP_KERNEL); - dev->vqs[i].heads = kmalloc(sizeof *dev->vqs[i].heads * + dev->vqs[i]->heads = kmalloc(sizeof *dev->vqs[i]->heads * UIO_MAXIOV, GFP_KERNEL); zcopy = vhost_zcopy_mask & (0x1 << i); if (zcopy) - dev->vqs[i].ubuf_info = - kmalloc(sizeof *dev->vqs[i].ubuf_info * + dev->vqs[i]->ubuf_info = + kmalloc(sizeof *dev->vqs[i]->ubuf_info * UIO_MAXIOV, GFP_KERNEL); - if (!dev->vqs[i].indirect || !dev->vqs[i].log || - !dev->vqs[i].heads || - (zcopy && !dev->vqs[i].ubuf_info)) + if (!dev->vqs[i]->indirect || !dev->vqs[i]->log || + !dev->vqs[i]->heads || + (zcopy && !dev->vqs[i]->ubuf_info)) goto err_nomem; } return 0; err_nomem: for (; i >= 0; --i) - vhost_vq_free_iovecs(&dev->vqs[i]); + vhost_vq_free_iovecs(dev->vqs[i]); return -ENOMEM; } @@ -298,11 +298,11 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev) int i; for (i = 0; i < dev->nvqs; ++i) - vhost_vq_free_iovecs(&dev->vqs[i]); + vhost_vq_free_iovecs(dev->vqs[i]); } long vhost_dev_init(struct vhost_dev *dev, - struct vhost_virtqueue *vqs, int nvqs) + struct vhost_virtqueue **vqs, int nvqs) { int i; @@ -318,16 +318,16 @@ long vhost_dev_init(struct vhost_dev *dev, dev->worker = NULL; for (i = 0; i < dev->nvqs; ++i) { - dev->vqs[i].log = NULL; - dev->vqs[i].indirect = NULL; - dev->vqs[i].heads = NULL; - dev->vqs[i].ubuf_info = NULL; - dev->vqs[i].dev = dev; - mutex_init(&dev->vqs[i].mutex); - vhost_vq_reset(dev, dev->vqs + i); - if (dev->vqs[i].handle_kick) - vhost_poll_init(&dev->vqs[i].poll, - dev->vqs[i].handle_kick, POLLIN, dev); + dev->vqs[i]->log = NULL; + dev->vqs[i]->indirect = NULL; + dev->vqs[i]->heads = NULL; + dev->vqs[i]->ubuf_info = NULL; + dev->vqs[i]->dev = dev; + mutex_init(&dev->vqs[i]->mutex); + vhost_vq_reset(dev, dev->vqs[i]); + if (dev->vqs[i]->handle_kick) + vhost_poll_init(&dev->vqs[i]->poll, + dev->vqs[i]->handle_kick, POLLIN, dev); } return 0; @@ -430,9 +430,9 @@ void vhost_dev_stop(struct vhost_dev *dev) int i; for (i = 0; i < dev->nvqs; ++i) { - if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { - vhost_poll_stop(&dev->vqs[i].poll); - vhost_poll_flush(&dev->vqs[i].poll); + if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) { + vhost_poll_stop(&dev->vqs[i]->poll); + vhost_poll_flush(&dev->vqs[i]->poll); } } } @@ -443,17 +443,17 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) int i; for (i = 0; i < dev->nvqs; ++i) { - if (dev->vqs[i].error_ctx) - eventfd_ctx_put(dev->vqs[i].error_ctx); - if (dev->vqs[i].error) - fput(dev->vqs[i].error); - if (dev->vqs[i].kick) - fput(dev->vqs[i].kick); - if (dev->vqs[i].call_ctx) - eventfd_ctx_put(dev->vqs[i].call_ctx); - if (dev->vqs[i].call) - fput(dev->vqs[i].call); - vhost_vq_reset(dev, dev->vqs + i); + if (dev->vqs[i]->error_ctx) + eventfd_ctx_put(dev->vqs[i]->error_ctx); + if (dev->vqs[i]->error) + fput(dev->vqs[i]->error); + if (dev->vqs[i]->kick) + fput(dev->vqs[i]->kick); + if (dev->vqs[i]->call_ctx) + eventfd_ctx_put(dev->vqs[i]->call_ctx); + if (dev->vqs[i]->call) + fput(dev->vqs[i]->call); + vhost_vq_reset(dev, dev->vqs[i]); } vhost_dev_free_iovecs(dev); if (dev->log_ctx) @@ -524,14 +524,14 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, for (i = 0; i < d->nvqs; ++i) { int ok; - mutex_lock(&d->vqs[i].mutex); + mutex_lock(&d->vqs[i]->mutex); /* If ring is inactive, will check when it's enabled. */ - if (d->vqs[i].private_data) - ok = vq_memory_access_ok(d->vqs[i].log_base, mem, + if (d->vqs[i]->private_data) + ok = vq_memory_access_ok(d->vqs[i]->log_base, mem, log_all); else ok = 1; - mutex_unlock(&d->vqs[i].mutex); + mutex_unlock(&d->vqs[i]->mutex); if (!ok) return 0; } @@ -641,7 +641,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) if (idx >= d->nvqs) return -ENOBUFS; - vq = d->vqs + idx; + vq = d->vqs[idx]; mutex_lock(&vq->mutex); @@ -852,7 +852,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) for (i = 0; i < d->nvqs; ++i) { struct vhost_virtqueue *vq; void __user *base = (void __user *)(unsigned long)p; - vq = d->vqs + i; + vq = d->vqs[i]; mutex_lock(&vq->mutex); /* If ring is inactive, will check when it's enabled. */ if (vq->private_data && !vq_log_access_ok(d, vq, base)) @@ -879,9 +879,9 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) } else filep = eventfp; for (i = 0; i < d->nvqs; ++i) { - mutex_lock(&d->vqs[i].mutex); - d->vqs[i].log_ctx = d->log_ctx; - mutex_unlock(&d->vqs[i].mutex); + mutex_lock(&d->vqs[i]->mutex); + d->vqs[i]->log_ctx = d->log_ctx; + mutex_unlock(&d->vqs[i]->mutex); } if (ctx) eventfd_ctx_put(ctx); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 17261e277c02..f3afa8a41fe0 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -150,7 +150,7 @@ struct vhost_dev { struct mm_struct *mm; struct mutex mutex; unsigned acked_features; - struct vhost_virtqueue *vqs; + struct vhost_virtqueue **vqs; int nvqs; struct file *log_file; struct eventfd_ctx *log_ctx; @@ -159,7 +159,7 @@ struct vhost_dev { struct task_struct *worker; }; -long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs); +long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); long vhost_dev_check_owner(struct vhost_dev *); long vhost_dev_reset_owner(struct vhost_dev *); void vhost_dev_cleanup(struct vhost_dev *, bool locked); From f2f0173d6a95fa60e7934f62ce27d6bd24e4e09c Mon Sep 17 00:00:00 2001 From: Asias He Date: Sat, 27 Apr 2013 11:16:49 +0800 Subject: [PATCH 02/10] tcm_vhost: Wait for pending requests in vhost_scsi_flush() Unlike tcm_vhost_evt requests, tcm_vhost_cmd requests are passed to the target core system, we can not make sure all the pending requests will be finished by flushing the virt queue. In this patch, we do refcount for every tcm_vhost_cmd requests to make vhost_scsi_flush() wait for all the pending requests issued before the flush operation to be finished. This is useful when we call vhost_scsi_clear_endpoint() to stop tcm_vhost. No new requests will be passed to target core system because we clear the endpoint by setting vs_tpg to NULL. And we wait for all the old requests. These guarantee no requests will be leaked and existing requests will be completed. Signed-off-by: Asias He Signed-off-by: Michael S. Tsirkin --- drivers/vhost/tcm_vhost.c | 90 ++++++++++++++++++++++++++++++++++++++- drivers/vhost/tcm_vhost.h | 3 ++ 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 99d3480450e7..afb530887936 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -74,8 +74,19 @@ enum { #define VHOST_SCSI_MAX_VQ 128 #define VHOST_SCSI_MAX_EVENT 128 +struct vhost_scsi_inflight { + /* Wait for the flush operation to finish */ + struct completion comp; + /* Refcount for the inflight reqs */ + struct kref kref; +}; + struct vhost_scsi_virtqueue { struct vhost_virtqueue vq; + /* Track inflight reqs, protected by vq->mutex */ + struct vhost_scsi_inflight inflights[2]; + /* Indicate current inflight in use, protected by vq->mutex */ + int inflight_idx; }; struct vhost_scsi { @@ -111,6 +122,59 @@ static int iov_num_pages(struct iovec *iov) ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT; } +void tcm_vhost_done_inflight(struct kref *kref) +{ + struct vhost_scsi_inflight *inflight; + + inflight = container_of(kref, struct vhost_scsi_inflight, kref); + complete(&inflight->comp); +} + +static void tcm_vhost_init_inflight(struct vhost_scsi *vs, + struct vhost_scsi_inflight *old_inflight[]) +{ + struct vhost_scsi_inflight *new_inflight; + struct vhost_virtqueue *vq; + int idx, i; + + for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { + vq = &vs->vqs[i].vq; + + mutex_lock(&vq->mutex); + + /* store old infight */ + idx = vs->vqs[i].inflight_idx; + if (old_inflight) + old_inflight[i] = &vs->vqs[i].inflights[idx]; + + /* setup new infight */ + vs->vqs[i].inflight_idx = idx ^ 1; + new_inflight = &vs->vqs[i].inflights[idx ^ 1]; + kref_init(&new_inflight->kref); + init_completion(&new_inflight->comp); + + mutex_unlock(&vq->mutex); + } +} + +static struct vhost_scsi_inflight * +tcm_vhost_get_inflight(struct vhost_virtqueue *vq) +{ + struct vhost_scsi_inflight *inflight; + struct vhost_scsi_virtqueue *svq; + + svq = container_of(vq, struct vhost_scsi_virtqueue, vq); + inflight = &svq->inflights[svq->inflight_idx]; + kref_get(&inflight->kref); + + return inflight; +} + +static void tcm_vhost_put_inflight(struct vhost_scsi_inflight *inflight) +{ + kref_put(&inflight->kref, tcm_vhost_done_inflight); +} + static int tcm_vhost_check_true(struct se_portal_group *se_tpg) { return 1; @@ -407,6 +471,8 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd) kfree(tv_cmd->tvc_sgl); } + tcm_vhost_put_inflight(tv_cmd->inflight); + kfree(tv_cmd); } @@ -533,6 +599,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) } static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd( + struct vhost_virtqueue *vq, struct tcm_vhost_tpg *tv_tpg, struct virtio_scsi_cmd_req *v_req, u32 exp_data_len, @@ -557,6 +624,7 @@ static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd( tv_cmd->tvc_exp_data_len = exp_data_len; tv_cmd->tvc_data_direction = data_direction; tv_cmd->tvc_nexus = tv_nexus; + tv_cmd->inflight = tcm_vhost_get_inflight(vq); return tv_cmd; } @@ -812,7 +880,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, for (i = 0; i < data_num; i++) exp_data_len += vq->iov[data_first + i].iov_len; - tv_cmd = vhost_scsi_allocate_cmd(tv_tpg, &v_req, + tv_cmd = vhost_scsi_allocate_cmd(vq, tv_tpg, &v_req, exp_data_len, data_direction); if (IS_ERR(tv_cmd)) { vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n", @@ -949,12 +1017,29 @@ static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index) static void vhost_scsi_flush(struct vhost_scsi *vs) { + struct vhost_scsi_inflight *old_inflight[VHOST_SCSI_MAX_VQ]; int i; + /* Init new inflight and remember the old inflight */ + tcm_vhost_init_inflight(vs, old_inflight); + + /* + * The inflight->kref was initialized to 1. We decrement it here to + * indicate the start of the flush operation so that it will reach 0 + * when all the reqs are finished. + */ + for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) + kref_put(&old_inflight[i]->kref, tcm_vhost_done_inflight); + + /* Flush both the vhost poll and vhost work */ for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) vhost_scsi_flush_vq(vs, i); vhost_work_flush(&vs->dev, &vs->vs_completion_work); vhost_work_flush(&vs->dev, &vs->vs_event_work); + + /* Wait for all reqs issued before the flush to be finished */ + for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) + wait_for_completion(&old_inflight[i]->comp); } /* @@ -1185,6 +1270,9 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) s->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; } r = vhost_dev_init(&s->dev, vqs, VHOST_SCSI_MAX_VQ); + + tcm_vhost_init_inflight(s, NULL); + if (r < 0) { kfree(vqs); kfree(s); diff --git a/drivers/vhost/tcm_vhost.h b/drivers/vhost/tcm_vhost.h index 514b9fda230e..26a57c2fdf92 100644 --- a/drivers/vhost/tcm_vhost.h +++ b/drivers/vhost/tcm_vhost.h @@ -2,6 +2,7 @@ #define TCM_VHOST_NAMELEN 256 #define TCM_VHOST_MAX_CDB_SIZE 32 +struct vhost_scsi_inflight; struct tcm_vhost_cmd { /* Descriptor from vhost_get_vq_desc() for virt_queue segment */ int tvc_vq_desc; @@ -37,6 +38,8 @@ struct tcm_vhost_cmd { unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER]; /* Completed commands list, serviced from vhost worker thread */ struct llist_node tvc_completion_list; + /* Used to track inflight cmd */ + struct vhost_scsi_inflight *inflight; }; struct tcm_vhost_nexus { From 2839400f8fe28ce216eeeba3fb97bdf90977f7ad Mon Sep 17 00:00:00 2001 From: Asias He Date: Sat, 27 Apr 2013 15:07:46 +0800 Subject: [PATCH 03/10] vhost: move vhost-net zerocopy fields to net.c On top of 'vhost: Allow device specific fields per vq', we can move device specific fields to device virt queue from vhost virt queue. Signed-off-by: Asias He Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 164 ++++++++++++++++++++++++++++++++++++------ drivers/vhost/vhost.c | 57 +-------------- drivers/vhost/vhost.h | 22 ------ 3 files changed, 142 insertions(+), 101 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 176aa030dc5f..8672e0538d59 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,8 +64,24 @@ enum { VHOST_NET_VQ_MAX = 2, }; +struct vhost_ubuf_ref { + struct kref kref; + wait_queue_head_t wait; + struct vhost_virtqueue *vq; +}; + struct vhost_net_virtqueue { struct vhost_virtqueue vq; + /* vhost zerocopy support fields below: */ + /* last used idx for outstanding DMA zerocopy buffers */ + int upend_idx; + /* first used idx for DMA done zerocopy buffers */ + int done_idx; + /* an array of userspace buffers info */ + struct ubuf_info *ubuf_info; + /* Reference counting for outstanding ubufs. + * Protected by vq mutex. Writers must also take device mutex. */ + struct vhost_ubuf_ref *ubufs; }; struct vhost_net { @@ -82,6 +98,88 @@ struct vhost_net { bool tx_flush; }; +static unsigned vhost_zcopy_mask __read_mostly; + +void vhost_enable_zcopy(int vq) +{ + vhost_zcopy_mask |= 0x1 << vq; +} + +static void vhost_zerocopy_done_signal(struct kref *kref) +{ + struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref, + kref); + wake_up(&ubufs->wait); +} + +struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq, + bool zcopy) +{ + struct vhost_ubuf_ref *ubufs; + /* No zero copy backend? Nothing to count. */ + if (!zcopy) + return NULL; + ubufs = kmalloc(sizeof(*ubufs), GFP_KERNEL); + if (!ubufs) + return ERR_PTR(-ENOMEM); + kref_init(&ubufs->kref); + init_waitqueue_head(&ubufs->wait); + ubufs->vq = vq; + return ubufs; +} + +void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs) +{ + kref_put(&ubufs->kref, vhost_zerocopy_done_signal); +} + +void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs) +{ + kref_put(&ubufs->kref, vhost_zerocopy_done_signal); + wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); + kfree(ubufs); +} + +int vhost_net_set_ubuf_info(struct vhost_net *n) +{ + bool zcopy; + int i; + + for (i = 0; i < n->dev.nvqs; ++i) { + zcopy = vhost_zcopy_mask & (0x1 << i); + if (!zcopy) + continue; + n->vqs[i].ubuf_info = kmalloc(sizeof(*n->vqs[i].ubuf_info) * + UIO_MAXIOV, GFP_KERNEL); + if (!n->vqs[i].ubuf_info) + goto err; + } + return 0; + +err: + while (i--) { + zcopy = vhost_zcopy_mask & (0x1 << i); + if (!zcopy) + continue; + kfree(n->vqs[i].ubuf_info); + } + return -ENOMEM; +} + +void vhost_net_reset_ubuf_info(struct vhost_net *n) +{ + int i; + + for (i = 0; i < VHOST_NET_VQ_MAX; i++) { + n->vqs[i].done_idx = 0; + n->vqs[i].upend_idx = 0; + n->vqs[i].ubufs = NULL; + kfree(n->vqs[i].ubuf_info); + n->vqs[i].ubuf_info = NULL; + } + +} + static void vhost_net_tx_packet(struct vhost_net *net) { ++net->tx_packets; @@ -157,10 +255,12 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, static int vhost_zerocopy_signal_used(struct vhost_net *net, struct vhost_virtqueue *vq) { + struct vhost_net_virtqueue *nvq = + container_of(vq, struct vhost_net_virtqueue, vq); int i; int j = 0; - for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) { + for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) { if (vq->heads[i].len == VHOST_DMA_FAILED_LEN) vhost_net_tx_err(net); if (VHOST_DMA_IS_DONE(vq->heads[i].len)) { @@ -172,7 +272,7 @@ static int vhost_zerocopy_signal_used(struct vhost_net *net, break; } if (j) - vq->done_idx = i; + nvq->done_idx = i; return j; } @@ -203,6 +303,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = &net->vqs[VHOST_NET_VQ_TX].vq; + struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; unsigned out, in, s; int head; struct msghdr msg = { @@ -229,7 +330,7 @@ static void handle_tx(struct vhost_net *net) vhost_disable_notify(&net->dev, vq); hdr_size = vq->vhost_hlen; - zcopy = vq->ubufs; + zcopy = nvq->ubufs; for (;;) { /* Release DMAs done buffers first */ @@ -250,9 +351,10 @@ static void handle_tx(struct vhost_net *net) /* If more outstanding DMAs, queue the work. * Handle upend_idx wrap around */ - num_pends = likely(vq->upend_idx >= vq->done_idx) ? - (vq->upend_idx - vq->done_idx) : - (vq->upend_idx + UIO_MAXIOV - vq->done_idx); + num_pends = likely(nvq->upend_idx >= nvq->done_idx) ? + (nvq->upend_idx - nvq->done_idx) : + (nvq->upend_idx + UIO_MAXIOV - + nvq->done_idx); if (unlikely(num_pends > VHOST_MAX_PEND)) break; if (unlikely(vhost_enable_notify(&net->dev, vq))) { @@ -278,34 +380,34 @@ static void handle_tx(struct vhost_net *net) break; } zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN || - vq->upend_idx != vq->done_idx); + nvq->upend_idx != nvq->done_idx); /* use msg_control to pass vhost zerocopy ubuf info to skb */ if (zcopy_used) { - vq->heads[vq->upend_idx].id = head; + vq->heads[nvq->upend_idx].id = head; if (!vhost_net_tx_select_zcopy(net) || len < VHOST_GOODCOPY_LEN) { /* copy don't need to wait for DMA done */ - vq->heads[vq->upend_idx].len = + vq->heads[nvq->upend_idx].len = VHOST_DMA_DONE_LEN; msg.msg_control = NULL; msg.msg_controllen = 0; ubufs = NULL; } else { struct ubuf_info *ubuf; - ubuf = vq->ubuf_info + vq->upend_idx; + ubuf = nvq->ubuf_info + nvq->upend_idx; - vq->heads[vq->upend_idx].len = + vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; ubuf->callback = vhost_zerocopy_callback; - ubuf->ctx = vq->ubufs; - ubuf->desc = vq->upend_idx; + ubuf->ctx = nvq->ubufs; + ubuf->desc = nvq->upend_idx; msg.msg_control = ubuf; msg.msg_controllen = sizeof(ubuf); - ubufs = vq->ubufs; + ubufs = nvq->ubufs; kref_get(&ubufs->kref); } - vq->upend_idx = (vq->upend_idx + 1) % UIO_MAXIOV; + nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; } /* TODO: Check specific error and bomb out unless ENOBUFS? */ err = sock->ops->sendmsg(NULL, sock, &msg, len); @@ -313,8 +415,8 @@ static void handle_tx(struct vhost_net *net) if (zcopy_used) { if (ubufs) vhost_ubuf_put(ubufs); - vq->upend_idx = ((unsigned)vq->upend_idx - 1) % - UIO_MAXIOV; + nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) + % UIO_MAXIOV; } vhost_discard_vq_desc(vq, 1); break; @@ -564,7 +666,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); struct vhost_dev *dev; struct vhost_virtqueue **vqs; - int r; + int r, i; if (!n) return -ENOMEM; @@ -579,6 +681,12 @@ static int vhost_net_open(struct inode *inode, struct file *f) vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq; n->vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick; n->vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick; + for (i = 0; i < VHOST_NET_VQ_MAX; i++) { + n->vqs[i].ubufs = NULL; + n->vqs[i].ubuf_info = NULL; + n->vqs[i].upend_idx = 0; + n->vqs[i].done_idx = 0; + } r = vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); if (r < 0) { kfree(n); @@ -652,15 +760,15 @@ static void vhost_net_flush(struct vhost_net *n) { vhost_net_flush_vq(n, VHOST_NET_VQ_TX); vhost_net_flush_vq(n, VHOST_NET_VQ_RX); - if (n->vqs[VHOST_NET_VQ_TX].vq.ubufs) { + if (n->vqs[VHOST_NET_VQ_TX].ubufs) { mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); n->tx_flush = true; mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); /* Wait for all lower device DMAs done. */ - vhost_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].vq.ubufs); + vhost_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].ubufs); mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); n->tx_flush = false; - kref_init(&n->vqs[VHOST_NET_VQ_TX].vq.ubufs->kref); + kref_init(&n->vqs[VHOST_NET_VQ_TX].ubufs->kref); mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); } } @@ -675,6 +783,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) vhost_net_flush(n); vhost_dev_stop(&n->dev); vhost_dev_cleanup(&n->dev, false); + vhost_net_reset_ubuf_info(n); if (tx_sock) fput(tx_sock->file); if (rx_sock) @@ -756,6 +865,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) { struct socket *sock, *oldsock; struct vhost_virtqueue *vq; + struct vhost_net_virtqueue *nvq; struct vhost_ubuf_ref *ubufs, *oldubufs = NULL; int r; @@ -769,6 +879,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) goto err; } vq = &n->vqs[index].vq; + nvq = &n->vqs[index]; mutex_lock(&vq->mutex); /* Verify that ring has been setup correctly. */ @@ -801,8 +912,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) if (r) goto err_used; - oldubufs = vq->ubufs; - vq->ubufs = ubufs; + oldubufs = nvq->ubufs; + nvq->ubufs = ubufs; n->tx_packets = 0; n->tx_zcopy_err = 0; @@ -853,6 +964,7 @@ static long vhost_net_reset_owner(struct vhost_net *n) vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); err = vhost_dev_reset_owner(&n->dev); + vhost_net_reset_ubuf_info(n); done: mutex_unlock(&n->dev.mutex); if (tx_sock) @@ -928,11 +1040,17 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl, return vhost_net_reset_owner(n); default: mutex_lock(&n->dev.mutex); + if (ioctl == VHOST_SET_OWNER) { + r = vhost_net_set_ubuf_info(n); + if (r) + goto out; + } r = vhost_dev_ioctl(&n->dev, ioctl, argp); if (r == -ENOIOCTLCMD) r = vhost_vring_ioctl(&n->dev, ioctl, argp); else vhost_net_flush(n); +out: mutex_unlock(&n->dev.mutex); return r; } diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index bef8b6bae186..6644812e99b4 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -33,8 +33,6 @@ enum { VHOST_MEMORY_F_LOG = 0x1, }; -static unsigned vhost_zcopy_mask __read_mostly; - #define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num]) #define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num]) @@ -191,9 +189,6 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->call_ctx = NULL; vq->call = NULL; vq->log_ctx = NULL; - vq->upend_idx = 0; - vq->done_idx = 0; - vq->ubufs = NULL; } static int vhost_worker(void *data) @@ -253,20 +248,12 @@ static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) vq->log = NULL; kfree(vq->heads); vq->heads = NULL; - kfree(vq->ubuf_info); - vq->ubuf_info = NULL; -} - -void vhost_enable_zcopy(int vq) -{ - vhost_zcopy_mask |= 0x1 << vq; } /* Helper to allocate iovec buffers for all vqs. */ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) { int i; - bool zcopy; for (i = 0; i < dev->nvqs; ++i) { dev->vqs[i]->indirect = kmalloc(sizeof *dev->vqs[i]->indirect * @@ -275,14 +262,8 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) GFP_KERNEL); dev->vqs[i]->heads = kmalloc(sizeof *dev->vqs[i]->heads * UIO_MAXIOV, GFP_KERNEL); - zcopy = vhost_zcopy_mask & (0x1 << i); - if (zcopy) - dev->vqs[i]->ubuf_info = - kmalloc(sizeof *dev->vqs[i]->ubuf_info * - UIO_MAXIOV, GFP_KERNEL); if (!dev->vqs[i]->indirect || !dev->vqs[i]->log || - !dev->vqs[i]->heads || - (zcopy && !dev->vqs[i]->ubuf_info)) + !dev->vqs[i]->heads) goto err_nomem; } return 0; @@ -321,7 +302,6 @@ long vhost_dev_init(struct vhost_dev *dev, dev->vqs[i]->log = NULL; dev->vqs[i]->indirect = NULL; dev->vqs[i]->heads = NULL; - dev->vqs[i]->ubuf_info = NULL; dev->vqs[i]->dev = dev; mutex_init(&dev->vqs[i]->mutex); vhost_vq_reset(dev, dev->vqs[i]); @@ -1551,38 +1531,3 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) &vq->used->flags, r); } } - -static void vhost_zerocopy_done_signal(struct kref *kref) -{ - struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref, - kref); - wake_up(&ubufs->wait); -} - -struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq, - bool zcopy) -{ - struct vhost_ubuf_ref *ubufs; - /* No zero copy backend? Nothing to count. */ - if (!zcopy) - return NULL; - ubufs = kmalloc(sizeof *ubufs, GFP_KERNEL); - if (!ubufs) - return ERR_PTR(-ENOMEM); - kref_init(&ubufs->kref); - init_waitqueue_head(&ubufs->wait); - ubufs->vq = vq; - return ubufs; -} - -void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs) -{ - kref_put(&ubufs->kref, vhost_zerocopy_done_signal); -} - -void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs) -{ - kref_put(&ubufs->kref, vhost_zerocopy_done_signal); - wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); - kfree(ubufs); -} diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index f3afa8a41fe0..3a36712e0792 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -54,18 +54,6 @@ struct vhost_log { struct vhost_virtqueue; -struct vhost_ubuf_ref { - struct kref kref; - wait_queue_head_t wait; - struct vhost_virtqueue *vq; -}; - -struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *, bool zcopy); -void vhost_ubuf_put(struct vhost_ubuf_ref *); -void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *); - -struct ubuf_info; - /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -130,16 +118,6 @@ struct vhost_virtqueue { /* Log write descriptors */ void __user *log_base; struct vhost_log *log; - /* vhost zerocopy support fields below: */ - /* last used idx for outstanding DMA zerocopy buffers */ - int upend_idx; - /* first used idx for DMA done zerocopy buffers */ - int done_idx; - /* an array of userspace buffers info */ - struct ubuf_info *ubuf_info; - /* Reference counting for outstanding ubufs. - * Protected by vq mutex. Writers must also take device mutex. */ - struct vhost_ubuf_ref *ubufs; }; struct vhost_dev { From 3dfbff328f0491b7049673cf7fd568d26a14fc4d Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 28 Apr 2013 15:38:52 +0300 Subject: [PATCH 04/10] tcm_vhost: document inflight ref-counting use Add more comments so we remember not to break it next time we change things. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/tcm_vhost.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index afb530887936..96d3b47c82cd 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -83,9 +83,16 @@ struct vhost_scsi_inflight { struct vhost_scsi_virtqueue { struct vhost_virtqueue vq; - /* Track inflight reqs, protected by vq->mutex */ + /* + * Reference counting for inflight reqs, used for flush operation. At + * each time, one reference tracks new commands submitted, while we + * wait for another one to reach 0. + */ struct vhost_scsi_inflight inflights[2]; - /* Indicate current inflight in use, protected by vq->mutex */ + /* + * Indicate current inflight in use, protected by vq->mutex. + * Writers must also take dev mutex and flush under it. + */ int inflight_idx; }; @@ -1015,6 +1022,7 @@ static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index) vhost_poll_flush(&vs->vqs[index].vq.poll); } +/* Callers must hold dev mutex */ static void vhost_scsi_flush(struct vhost_scsi *vs) { struct vhost_scsi_inflight *old_inflight[VHOST_SCSI_MAX_VQ]; From 81f95a55802be669b3191b2828c34006d0f04214 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 28 Apr 2013 15:51:40 +0300 Subject: [PATCH 05/10] vhost: move per-vq net specific fields out to net This will remove the need for vhost scsi to pull in virtio-net.h. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 43 +++++++++++++++++++++++++++---------------- drivers/vhost/vhost.c | 2 -- drivers/vhost/vhost.h | 3 --- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 8672e0538d59..e34e195b9cf6 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -72,6 +72,12 @@ struct vhost_ubuf_ref { struct vhost_net_virtqueue { struct vhost_virtqueue vq; + /* hdr is used to store the virtio header. + * Since each iovec has >= 1 byte length, we never need more than + * header length entries to store the header. */ + struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)]; + size_t vhost_hlen; + size_t sock_hlen; /* vhost zerocopy support fields below: */ /* last used idx for outstanding DMA zerocopy buffers */ int upend_idx; @@ -166,7 +172,7 @@ int vhost_net_set_ubuf_info(struct vhost_net *n) return -ENOMEM; } -void vhost_net_reset_ubuf_info(struct vhost_net *n) +void vhost_net_vq_reset(struct vhost_net *n) { int i; @@ -176,6 +182,8 @@ void vhost_net_reset_ubuf_info(struct vhost_net *n) n->vqs[i].ubufs = NULL; kfree(n->vqs[i].ubuf_info); n->vqs[i].ubuf_info = NULL; + n->vqs[i].vhost_hlen = 0; + n->vqs[i].sock_hlen = 0; } } @@ -302,8 +310,8 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) { - struct vhost_virtqueue *vq = &net->vqs[VHOST_NET_VQ_TX].vq; struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; + struct vhost_virtqueue *vq = &nvq->vq; unsigned out, in, s; int head; struct msghdr msg = { @@ -329,7 +337,7 @@ static void handle_tx(struct vhost_net *net) mutex_lock(&vq->mutex); vhost_disable_notify(&net->dev, vq); - hdr_size = vq->vhost_hlen; + hdr_size = nvq->vhost_hlen; zcopy = nvq->ubufs; for (;;) { @@ -369,14 +377,14 @@ static void handle_tx(struct vhost_net *net) break; } /* Skip header. TODO: support TSO. */ - s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out); + s = move_iovec_hdr(vq->iov, nvq->hdr, hdr_size, out); msg.msg_iovlen = out; len = iov_length(vq->iov, out); /* Sanity check */ if (!len) { vq_err(vq, "Unexpected header len for TX: " "%zd expected %zd\n", - iov_length(vq->hdr, s), hdr_size); + iov_length(nvq->hdr, s), hdr_size); break; } zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN || @@ -523,7 +531,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, * read-size critical section for our kind of RCU. */ static void handle_rx(struct vhost_net *net) { - struct vhost_virtqueue *vq = &net->vqs[VHOST_NET_VQ_RX].vq; + struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX]; + struct vhost_virtqueue *vq = &nvq->vq; unsigned uninitialized_var(in), log; struct vhost_log *vq_log; struct msghdr msg = { @@ -551,8 +560,8 @@ static void handle_rx(struct vhost_net *net) mutex_lock(&vq->mutex); vhost_disable_notify(&net->dev, vq); - vhost_hlen = vq->vhost_hlen; - sock_hlen = vq->sock_hlen; + vhost_hlen = nvq->vhost_hlen; + sock_hlen = nvq->sock_hlen; vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? vq->log : NULL; @@ -582,11 +591,11 @@ static void handle_rx(struct vhost_net *net) /* We don't need to be notified again. */ if (unlikely((vhost_hlen))) /* Skip header. TODO: support TSO. */ - move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in); + move_iovec_hdr(vq->iov, nvq->hdr, vhost_hlen, in); else /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF: * needed because recvmsg can modify msg_iov. */ - copy_iovec_hdr(vq->iov, vq->hdr, sock_hlen, in); + copy_iovec_hdr(vq->iov, nvq->hdr, sock_hlen, in); msg.msg_iovlen = in; err = sock->ops->recvmsg(NULL, sock, &msg, sock_len, MSG_DONTWAIT | MSG_TRUNC); @@ -600,7 +609,7 @@ static void handle_rx(struct vhost_net *net) continue; } if (unlikely(vhost_hlen) && - memcpy_toiovecend(vq->hdr, (unsigned char *)&hdr, 0, + memcpy_toiovecend(nvq->hdr, (unsigned char *)&hdr, 0, vhost_hlen)) { vq_err(vq, "Unable to write vnet_hdr at addr %p\n", vq->iov->iov_base); @@ -608,7 +617,7 @@ static void handle_rx(struct vhost_net *net) } /* TODO: Should check and handle checksum. */ if (likely(mergeable) && - memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount, + memcpy_toiovecend(nvq->hdr, (unsigned char *)&headcount, offsetof(typeof(hdr), num_buffers), sizeof hdr.num_buffers)) { vq_err(vq, "Failed num_buffers write"); @@ -686,6 +695,8 @@ static int vhost_net_open(struct inode *inode, struct file *f) n->vqs[i].ubuf_info = NULL; n->vqs[i].upend_idx = 0; n->vqs[i].done_idx = 0; + n->vqs[i].vhost_hlen = 0; + n->vqs[i].sock_hlen = 0; } r = vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); if (r < 0) { @@ -783,7 +794,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) vhost_net_flush(n); vhost_dev_stop(&n->dev); vhost_dev_cleanup(&n->dev, false); - vhost_net_reset_ubuf_info(n); + vhost_net_vq_reset(n); if (tx_sock) fput(tx_sock->file); if (rx_sock) @@ -964,7 +975,7 @@ static long vhost_net_reset_owner(struct vhost_net *n) vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); err = vhost_dev_reset_owner(&n->dev); - vhost_net_reset_ubuf_info(n); + vhost_net_vq_reset(n); done: mutex_unlock(&n->dev.mutex); if (tx_sock) @@ -1001,8 +1012,8 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) smp_wmb(); for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { mutex_lock(&n->vqs[i].vq.mutex); - n->vqs[i].vq.vhost_hlen = vhost_hlen; - n->vqs[i].vq.sock_hlen = sock_hlen; + n->vqs[i].vhost_hlen = vhost_hlen; + n->vqs[i].sock_hlen = sock_hlen; mutex_unlock(&n->vqs[i].vq.mutex); } vhost_net_flush(n); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 6644812e99b4..6dcd81c87432 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -179,8 +179,6 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->used_flags = 0; vq->log_used = false; vq->log_addr = -1ull; - vq->vhost_hlen = 0; - vq->sock_hlen = 0; vq->private_data = NULL; vq->log_base = NULL; vq->error_ctx = NULL; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 3a36712e0792..1627eec0ca25 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -102,10 +102,7 @@ struct vhost_virtqueue { /* hdr is used to store the virtio header. * Since each iovec has >= 1 byte length, we never need more than * header length entries to store the header. */ - struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)]; struct iovec *indirect; - size_t vhost_hlen; - size_t sock_hlen; struct vring_used_elem *heads; /* We use a kind of RCU to access private pointer. * All readers access it from worker, which makes it possible to From 061b16cfe3dc7a106dd29b76f6355d84464d126c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 28 Apr 2013 15:52:53 +0300 Subject: [PATCH 06/10] tcm_vhost: remove virtio-net.h dependency vhost.h only has generic bits now, so we can drop it virtio-net.h in tcm_vhost. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/tcm_vhost.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 96d3b47c82cd..19ca021bf88c 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -45,7 +45,6 @@ #include #include #include -#include /* TODO vhost.h currently depends on this */ #include #include #include From 150b9e51ae975ca1fe468c565870fbc4a96e0574 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 28 Apr 2013 17:12:08 +0300 Subject: [PATCH 07/10] vhost: fix error handling in RESET_OWNER ioctl RESET_OWNER ioctl would leave the fd in a bad state if memory allocation failed: device is stopped but owner is not reset. Make state changes after allocating memory, such that a failed ioctl has no effect. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 8 +++++++- drivers/vhost/test.c | 9 ++++++++- drivers/vhost/vhost.c | 16 +++++++--------- drivers/vhost/vhost.h | 3 ++- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index e34e195b9cf6..a3645bd163d8 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -967,14 +967,20 @@ static long vhost_net_reset_owner(struct vhost_net *n) struct socket *tx_sock = NULL; struct socket *rx_sock = NULL; long err; + struct vhost_memory *memory; mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); if (err) goto done; + memory = vhost_dev_reset_owner_prepare(); + if (!memory) { + err = -ENOMEM; + goto done; + } vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); - err = vhost_dev_reset_owner(&n->dev); + vhost_dev_reset_owner(&n->dev, memory); vhost_net_vq_reset(n); done: mutex_unlock(&n->dev.mutex); diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 91d6f060aade..be65414d5bb1 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -219,13 +219,20 @@ static long vhost_test_reset_owner(struct vhost_test *n) { void *priv = NULL; long err; + struct vhost_memory *memory; + mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); if (err) goto done; + memory = vhost_dev_reset_owner_prepare(); + if (!memory) { + err = -ENOMEM; + goto done; + } vhost_test_stop(n, &priv); vhost_test_flush(n); - err = vhost_dev_reset_owner(&n->dev); + vhost_dev_reset_owner(&n->dev, memory); done: mutex_unlock(&n->dev.mutex); return err; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 6dcd81c87432..749b5ab5bfbb 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -386,21 +386,19 @@ static long vhost_dev_set_owner(struct vhost_dev *dev) return err; } -/* Caller should have device mutex */ -long vhost_dev_reset_owner(struct vhost_dev *dev) +struct vhost_memory *vhost_dev_reset_owner_prepare(void) { - struct vhost_memory *memory; - - /* Restore memory to default empty mapping. */ - memory = kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL); - if (!memory) - return -ENOMEM; + return kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL); +} +/* Caller should have device mutex */ +void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory) +{ vhost_dev_cleanup(dev, true); + /* Restore memory to default empty mapping. */ memory->nregions = 0; RCU_INIT_POINTER(dev->memory, memory); - return 0; } void vhost_dev_stop(struct vhost_dev *dev) diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 1627eec0ca25..b58f4ae82cb8 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -136,7 +136,8 @@ struct vhost_dev { long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); long vhost_dev_check_owner(struct vhost_dev *); -long vhost_dev_reset_owner(struct vhost_dev *); +struct vhost_memory *vhost_dev_reset_owner_prepare(void); +void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_memory *); void vhost_dev_cleanup(struct vhost_dev *, bool locked); void vhost_dev_stop(struct vhost_dev *); long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp); From eb62b74e90790dbc1aca7ea2a7161e23de7c9065 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 2 May 2013 03:37:38 +0300 Subject: [PATCH 08/10] vhost: src file renames Move tcm_vhost.c -> scsi.c Signed-off-by: Michael S. Tsirkin Reviewed-by: Asias He Acked-by: Nicholas Bellinger --- drivers/vhost/Makefile | 1 + drivers/vhost/{tcm_vhost.c => scsi.c} | 0 2 files changed, 1 insertion(+) rename drivers/vhost/{tcm_vhost.c => scsi.c} (100%) diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile index a27b053bc9ab..6ea9cecb1fb1 100644 --- a/drivers/vhost/Makefile +++ b/drivers/vhost/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_VHOST_NET) += vhost_net.o vhost_net-y := vhost.o net.o obj-$(CONFIG_TCM_VHOST) += tcm_vhost.o +tcm_vhost-y := scsi.o diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/scsi.c similarity index 100% rename from drivers/vhost/tcm_vhost.c rename to drivers/vhost/scsi.c From 5012a3a384ad917b5c72a918607bd0cc64452ff8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 2 May 2013 03:50:34 +0300 Subject: [PATCH 09/10] tcm_vhost: header split up move uapi parts to vhost.h move .c private parts to .c itself Signed-off-by: Michael S. Tsirkin Reviewed-by: Asias He Acked-by: Nicholas Bellinger --- drivers/vhost/scsi.c | 112 ++++++++++++++++++++++++++++--- drivers/vhost/tcm_vhost.h | 131 ------------------------------------- include/uapi/linux/vhost.h | 28 ++++++++ 3 files changed, 132 insertions(+), 139 deletions(-) delete mode 100644 drivers/vhost/tcm_vhost.h diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 19ca021bf88c..eb1aa56dced8 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -51,7 +51,110 @@ #include "vhost.c" #include "vhost.h" -#include "tcm_vhost.h" + +#define TCM_VHOST_VERSION "v0.1" +#define TCM_VHOST_NAMELEN 256 +#define TCM_VHOST_MAX_CDB_SIZE 32 + +struct vhost_scsi_inflight { + /* Wait for the flush operation to finish */ + struct completion comp; + /* Refcount for the inflight reqs */ + struct kref kref; +}; + +struct tcm_vhost_cmd { + /* Descriptor from vhost_get_vq_desc() for virt_queue segment */ + int tvc_vq_desc; + /* virtio-scsi initiator task attribute */ + int tvc_task_attr; + /* virtio-scsi initiator data direction */ + enum dma_data_direction tvc_data_direction; + /* Expected data transfer length from virtio-scsi header */ + u32 tvc_exp_data_len; + /* The Tag from include/linux/virtio_scsi.h:struct virtio_scsi_cmd_req */ + u64 tvc_tag; + /* The number of scatterlists associated with this cmd */ + u32 tvc_sgl_count; + /* Saved unpacked SCSI LUN for tcm_vhost_submission_work() */ + u32 tvc_lun; + /* Pointer to the SGL formatted memory from virtio-scsi */ + struct scatterlist *tvc_sgl; + /* Pointer to response */ + struct virtio_scsi_cmd_resp __user *tvc_resp; + /* Pointer to vhost_scsi for our device */ + struct vhost_scsi *tvc_vhost; + /* Pointer to vhost_virtqueue for the cmd */ + struct vhost_virtqueue *tvc_vq; + /* Pointer to vhost nexus memory */ + struct tcm_vhost_nexus *tvc_nexus; + /* The TCM I/O descriptor that is accessed via container_of() */ + struct se_cmd tvc_se_cmd; + /* work item used for cmwq dispatch to tcm_vhost_submission_work() */ + struct work_struct work; + /* Copy of the incoming SCSI command descriptor block (CDB) */ + unsigned char tvc_cdb[TCM_VHOST_MAX_CDB_SIZE]; + /* Sense buffer that will be mapped into outgoing status */ + unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER]; + /* Completed commands list, serviced from vhost worker thread */ + struct llist_node tvc_completion_list; + /* Used to track inflight cmd */ + struct vhost_scsi_inflight *inflight; +}; + +struct tcm_vhost_nexus { + /* Pointer to TCM session for I_T Nexus */ + struct se_session *tvn_se_sess; +}; + +struct tcm_vhost_nacl { + /* Binary World Wide unique Port Name for Vhost Initiator port */ + u64 iport_wwpn; + /* ASCII formatted WWPN for Sas Initiator port */ + char iport_name[TCM_VHOST_NAMELEN]; + /* Returned by tcm_vhost_make_nodeacl() */ + struct se_node_acl se_node_acl; +}; + +struct vhost_scsi; +struct tcm_vhost_tpg { + /* Vhost port target portal group tag for TCM */ + u16 tport_tpgt; + /* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus shutdown */ + int tv_tpg_port_count; + /* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */ + int tv_tpg_vhost_count; + /* list for tcm_vhost_list */ + struct list_head tv_tpg_list; + /* Used to protect access for tpg_nexus */ + struct mutex tv_tpg_mutex; + /* Pointer to the TCM VHost I_T Nexus for this TPG endpoint */ + struct tcm_vhost_nexus *tpg_nexus; + /* Pointer back to tcm_vhost_tport */ + struct tcm_vhost_tport *tport; + /* Returned by tcm_vhost_make_tpg() */ + struct se_portal_group se_tpg; + /* Pointer back to vhost_scsi, protected by tv_tpg_mutex */ + struct vhost_scsi *vhost_scsi; +}; + +struct tcm_vhost_tport { + /* SCSI protocol the tport is providing */ + u8 tport_proto_id; + /* Binary World Wide unique Port Name for Vhost Target port */ + u64 tport_wwpn; + /* ASCII formatted WWPN for Vhost Target port */ + char tport_name[TCM_VHOST_NAMELEN]; + /* Returned by tcm_vhost_make_tport() */ + struct se_wwn tport_wwn; +}; + +struct tcm_vhost_evt { + /* event to be sent to guest */ + struct virtio_scsi_event event; + /* event list, serviced from vhost worker thread */ + struct llist_node list; +}; enum { VHOST_SCSI_VQ_CTL = 0, @@ -73,13 +176,6 @@ enum { #define VHOST_SCSI_MAX_VQ 128 #define VHOST_SCSI_MAX_EVENT 128 -struct vhost_scsi_inflight { - /* Wait for the flush operation to finish */ - struct completion comp; - /* Refcount for the inflight reqs */ - struct kref kref; -}; - struct vhost_scsi_virtqueue { struct vhost_virtqueue vq; /* diff --git a/drivers/vhost/tcm_vhost.h b/drivers/vhost/tcm_vhost.h deleted file mode 100644 index 26a57c2fdf92..000000000000 --- a/drivers/vhost/tcm_vhost.h +++ /dev/null @@ -1,131 +0,0 @@ -#define TCM_VHOST_VERSION "v0.1" -#define TCM_VHOST_NAMELEN 256 -#define TCM_VHOST_MAX_CDB_SIZE 32 - -struct vhost_scsi_inflight; -struct tcm_vhost_cmd { - /* Descriptor from vhost_get_vq_desc() for virt_queue segment */ - int tvc_vq_desc; - /* virtio-scsi initiator task attribute */ - int tvc_task_attr; - /* virtio-scsi initiator data direction */ - enum dma_data_direction tvc_data_direction; - /* Expected data transfer length from virtio-scsi header */ - u32 tvc_exp_data_len; - /* The Tag from include/linux/virtio_scsi.h:struct virtio_scsi_cmd_req */ - u64 tvc_tag; - /* The number of scatterlists associated with this cmd */ - u32 tvc_sgl_count; - /* Saved unpacked SCSI LUN for tcm_vhost_submission_work() */ - u32 tvc_lun; - /* Pointer to the SGL formatted memory from virtio-scsi */ - struct scatterlist *tvc_sgl; - /* Pointer to response */ - struct virtio_scsi_cmd_resp __user *tvc_resp; - /* Pointer to vhost_scsi for our device */ - struct vhost_scsi *tvc_vhost; - /* Pointer to vhost_virtqueue for the cmd */ - struct vhost_virtqueue *tvc_vq; - /* Pointer to vhost nexus memory */ - struct tcm_vhost_nexus *tvc_nexus; - /* The TCM I/O descriptor that is accessed via container_of() */ - struct se_cmd tvc_se_cmd; - /* work item used for cmwq dispatch to tcm_vhost_submission_work() */ - struct work_struct work; - /* Copy of the incoming SCSI command descriptor block (CDB) */ - unsigned char tvc_cdb[TCM_VHOST_MAX_CDB_SIZE]; - /* Sense buffer that will be mapped into outgoing status */ - unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER]; - /* Completed commands list, serviced from vhost worker thread */ - struct llist_node tvc_completion_list; - /* Used to track inflight cmd */ - struct vhost_scsi_inflight *inflight; -}; - -struct tcm_vhost_nexus { - /* Pointer to TCM session for I_T Nexus */ - struct se_session *tvn_se_sess; -}; - -struct tcm_vhost_nacl { - /* Binary World Wide unique Port Name for Vhost Initiator port */ - u64 iport_wwpn; - /* ASCII formatted WWPN for Sas Initiator port */ - char iport_name[TCM_VHOST_NAMELEN]; - /* Returned by tcm_vhost_make_nodeacl() */ - struct se_node_acl se_node_acl; -}; - -struct vhost_scsi; -struct tcm_vhost_tpg { - /* Vhost port target portal group tag for TCM */ - u16 tport_tpgt; - /* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus shutdown */ - int tv_tpg_port_count; - /* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */ - int tv_tpg_vhost_count; - /* list for tcm_vhost_list */ - struct list_head tv_tpg_list; - /* Used to protect access for tpg_nexus */ - struct mutex tv_tpg_mutex; - /* Pointer to the TCM VHost I_T Nexus for this TPG endpoint */ - struct tcm_vhost_nexus *tpg_nexus; - /* Pointer back to tcm_vhost_tport */ - struct tcm_vhost_tport *tport; - /* Returned by tcm_vhost_make_tpg() */ - struct se_portal_group se_tpg; - /* Pointer back to vhost_scsi, protected by tv_tpg_mutex */ - struct vhost_scsi *vhost_scsi; -}; - -struct tcm_vhost_tport { - /* SCSI protocol the tport is providing */ - u8 tport_proto_id; - /* Binary World Wide unique Port Name for Vhost Target port */ - u64 tport_wwpn; - /* ASCII formatted WWPN for Vhost Target port */ - char tport_name[TCM_VHOST_NAMELEN]; - /* Returned by tcm_vhost_make_tport() */ - struct se_wwn tport_wwn; -}; - -struct tcm_vhost_evt { - /* event to be sent to guest */ - struct virtio_scsi_event event; - /* event list, serviced from vhost worker thread */ - struct llist_node list; -}; - -/* - * As per request from MST, keep TCM_VHOST related ioctl defines out of - * linux/vhost.h (user-space) for now.. - */ - -#include - -/* - * Used by QEMU userspace to ensure a consistent vhost-scsi ABI. - * - * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate + - * RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage - * ABI Rev 1: January 2013. Ignore vhost_tpgt filed in struct vhost_scsi_target. - * All the targets under vhost_wwpn can be seen and used by guset. - */ - -#define VHOST_SCSI_ABI_VERSION 1 - -struct vhost_scsi_target { - int abi_version; - char vhost_wwpn[TRANSPORT_IQN_LEN]; - unsigned short vhost_tpgt; - unsigned short reserved; -}; - -/* VHOST_SCSI specific defines */ -#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target) -#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target) -/* Changing this breaks userspace. */ -#define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int) -/* Set and get the events missed flag */ -#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32) -#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index e847f1e30756..bb6a5b4cb3c5 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -127,4 +127,32 @@ struct vhost_memory { /* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ #define VHOST_NET_F_VIRTIO_NET_HDR 27 +/* VHOST_SCSI specific definitions */ + +/* + * Used by QEMU userspace to ensure a consistent vhost-scsi ABI. + * + * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate + + * RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage + * ABI Rev 1: January 2013. Ignore vhost_tpgt filed in struct vhost_scsi_target. + * All the targets under vhost_wwpn can be seen and used by guset. + */ + +#define VHOST_SCSI_ABI_VERSION 1 + +struct vhost_scsi_target { + int abi_version; + char vhost_wwpn[224]; /* TRANSPORT_IQN_LEN */ + unsigned short vhost_tpgt; + unsigned short reserved; +}; + +#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target) +#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target) +/* Changing this breaks userspace. */ +#define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int) +/* Set and get the events missed flag */ +#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32) +#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32) + #endif From 181c04a357bb791587c55a99362c2fdde2c64f18 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 2 May 2013 03:52:59 +0300 Subject: [PATCH 10/10] vhost_scsi: module rename Rename module and update Kconfig and Makefile. Add alias for compatibility with old userspace scripts if any. Signed-off-by: Michael S. Tsirkin Reviewed-by: Asias He Acked-by: Nicholas Bellinger --- drivers/vhost/Kconfig | 10 +++++++--- drivers/vhost/Kconfig.tcm | 6 ------ drivers/vhost/Makefile | 4 ++-- drivers/vhost/scsi.c | 3 ++- 4 files changed, 11 insertions(+), 12 deletions(-) delete mode 100644 drivers/vhost/Kconfig.tcm diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index bf243177ffe1..26a64e5b8a58 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -9,6 +9,10 @@ config VHOST_NET To compile this driver as a module, choose M here: the module will be called vhost_net. -if STAGING -source "drivers/vhost/Kconfig.tcm" -endif +config VHOST_SCSI + tristate "VHOST_SCSI TCM fabric driver" + depends on TARGET_CORE && EVENTFD && m + default n + ---help--- + Say M here to enable the vhost_scsi TCM fabric module + for use with virtio-scsi guests diff --git a/drivers/vhost/Kconfig.tcm b/drivers/vhost/Kconfig.tcm deleted file mode 100644 index 7e3aa28d999e..000000000000 --- a/drivers/vhost/Kconfig.tcm +++ /dev/null @@ -1,6 +0,0 @@ -config TCM_VHOST - tristate "TCM_VHOST fabric module" - depends on TARGET_CORE && EVENTFD && m - default n - ---help--- - Say M here to enable the TCM_VHOST fabric module for use with virtio-scsi guests diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile index 6ea9cecb1fb1..ef21d5fdfa7d 100644 --- a/drivers/vhost/Makefile +++ b/drivers/vhost/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_VHOST_NET) += vhost_net.o vhost_net-y := vhost.o net.o -obj-$(CONFIG_TCM_VHOST) += tcm_vhost.o -tcm_vhost-y := scsi.o +obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o +vhost_scsi-y := scsi.o diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index eb1aa56dced8..5179f7aa1b0b 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -2136,7 +2136,8 @@ static void tcm_vhost_exit(void) destroy_workqueue(tcm_vhost_workqueue); }; -MODULE_DESCRIPTION("TCM_VHOST series fabric driver"); +MODULE_DESCRIPTION("VHOST_SCSI series fabric driver"); +MODULE_ALIAS("tcm_vhost"); MODULE_LICENSE("GPL"); module_init(tcm_vhost_init); module_exit(tcm_vhost_exit);