From 866f4c8e0e26293b5819fd61c241502c79023775 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 22 May 2018 12:42:57 +0200 Subject: [PATCH 1/8] s390/net: add pnetid support s390 hardware supports the definition of a so-call Physical NETwork IDentifier (short PNETID) per network device port. These PNETIDS can be used to identify network devices that are attached to the same physical network (broadcast domain). This patch provides the interface to extract the PNETID of a port of a device attached to the ccw-bus or pci-bus. Parts of this patch are based on an initial implementation by Thomas Richter. Signed-off-by: Ursula Braun Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 3 ++ arch/s390/include/asm/pnet.h | 23 +++++++++++ arch/s390/net/Makefile | 1 + arch/s390/net/pnet.c | 76 ++++++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+) create mode 100644 arch/s390/include/asm/pnet.h create mode 100644 arch/s390/net/pnet.c diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 199ac3e4da1d9..33072e0bc5895 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -59,6 +59,9 @@ config PCI_QUIRKS config ARCH_SUPPORTS_UPROBES def_bool y +config HAVE_PNETID + def_bool y if SMC + config S390 def_bool y select ARCH_BINFMT_ELF_STATE diff --git a/arch/s390/include/asm/pnet.h b/arch/s390/include/asm/pnet.h new file mode 100644 index 0000000000000..6e278584f8f1d --- /dev/null +++ b/arch/s390/include/asm/pnet.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * IBM System z PNET ID Support + * + * Copyright IBM Corp. 2018 + */ + +#ifndef _ASM_S390_PNET_H +#define _ASM_S390_PNET_H + +#include +#include + +#define PNETIDS_LEN 64 /* Total utility string length in bytes + * to cover up to 4 PNETIDs of 16 bytes + * for up to 4 device ports + */ +#define MAX_PNETID_LEN 16 /* Max.length of a single port PNETID */ +#define MAX_PNETID_PORTS (PNETIDS_LEN / MAX_PNETID_LEN) + /* Max. # of ports with a PNETID */ + +int pnet_id_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid); +#endif /* _ASM_S390_PNET_H */ diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile index e0d5f245e42bc..e2b85ffdbb0c0 100644 --- a/arch/s390/net/Makefile +++ b/arch/s390/net/Makefile @@ -3,3 +3,4 @@ # Arch-specific network modules # obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o +obj-$(CONFIG_HAVE_PNETID) += pnet.o diff --git a/arch/s390/net/pnet.c b/arch/s390/net/pnet.c new file mode 100644 index 0000000000000..ae958ba5337f1 --- /dev/null +++ b/arch/s390/net/pnet.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * IBM System z PNET ID Support + * + * Copyright IBM Corp. 2018 + */ + +#include +#include +#include +#include +#include +#include + +/* + * Get the PNETIDs from a device. + * s390 hardware supports the definition of a so-called Physical Network + * Identifier (short PNETID) per network device port. These PNETIDs can be + * used to identify network devices that are attached to the same physical + * network (broadcast domain). + * + * The device can be + * - a ccwgroup device with all bundled subchannels having the same PNETID + * - a PCI attached network device + * + * Returns: + * 0: PNETIDs extracted from device. + * -ENOMEM: No memory to extract utility string. + * -EOPNOTSUPP: Device type without utility string support + */ +static int pnet_ids_by_device(struct device *dev, u8 *pnetids) +{ + memset(pnetids, 0, PNETIDS_LEN); + if (dev_is_ccwgroup(dev)) { + struct ccwgroup_device *gdev = to_ccwgroupdev(dev); + u8 *util_str; + + util_str = ccw_device_get_util_str(gdev->cdev[0], 0); + if (!util_str) + return -ENOMEM; + memcpy(pnetids, util_str, PNETIDS_LEN); + kfree(util_str); + return 0; + } + if (dev_is_pci(dev)) { + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); + + memcpy(pnetids, zdev->util_str, sizeof(zdev->util_str)); + return 0; + } + return -EOPNOTSUPP; +} + +/* + * Extract the pnetid for a device port. + * + * Return 0 if a pnetid is found and -ENOENT otherwise. + */ +int pnet_id_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid) +{ + u8 pnetids[MAX_PNETID_PORTS][MAX_PNETID_LEN]; + static const u8 zero[MAX_PNETID_LEN] = { 0 }; + int rc = 0; + + if (!dev || port >= MAX_PNETID_PORTS) + return -ENOENT; + + if (!pnet_ids_by_device(dev, (u8 *)pnetids) && + memcmp(pnetids[port], zero, MAX_PNETID_LEN)) + memcpy(pnetid, pnetids[port], MAX_PNETID_LEN); + else + rc = -ENOENT; + + return rc; +} +EXPORT_SYMBOL_GPL(pnet_id_by_dev_port); From 3376d98021e915196f4894d835325a884e635a04 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 25 Apr 2018 11:43:17 +0200 Subject: [PATCH 2/8] s390/archrandom: Rework arch random implementation. The arch_get_random_seed_long() invocation done by the random device driver is done in interrupt context and may be invoked very very frequently. The existing s390 arch_get_random_seed*() implementation uses the PRNO(TRNG) instruction which produces excellent high quality entropy but is relatively slow and thus expensive. This fix reworks the arch_get_random_seed* implementation. It introduces a buffer concept to decouple the delivery of random data via arch_get_random_seed*() from the generation of new random bytes. The buffer of random data is filled asynchronously by a workqueue thread. If there are enough bytes in the buffer the s390_arch_random_generate() just delivers these bytes. Otherwise false is returned until the worker thread refills the buffer. The worker fills the rng buffer by pulling fresh entropy from the high quality (but slow) true hardware random generator. This entropy is then spread over the buffer with an pseudo random generator. As the arch_get_random_seed_long() fetches 8 bytes and the calling function add_interrupt_randomness() counts this as 1 bit entropy the distribution needs to make sure there is in fact 1 bit entropy contained in 8 bytes of the buffer. The current values pull 32 byte entropy and scatter this into a 2048 byte buffer. So 8 byte in the buffer will contain 1 bit of entropy. The worker thread is rescheduled based on the charge level of the buffer but at least with 500 ms delay to avoid too much cpu consumption. So the max. amount of rng data delivered via arch_get_random_seed is limited to 4Kb per second. Signed-off-by: Harald Freudenberger Reviewed-by: Patrick Steuer Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/arch_random.c | 103 +++++++++++++++++++++++++++-- arch/s390/include/asm/archrandom.h | 13 +--- 2 files changed, 102 insertions(+), 14 deletions(-) diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index 8720e9203ecfb..dd95cdbd22ce8 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -2,14 +2,37 @@ /* * s390 arch random implementation. * - * Copyright IBM Corp. 2017 - * Author(s): Harald Freudenberger + * Copyright IBM Corp. 2017, 2018 + * Author(s): Harald Freudenberger + * + * The s390_arch_random_generate() function may be called from random.c + * in interrupt context. So this implementation does the best to be very + * fast. There is a buffer of random data which is asynchronously checked + * and filled by a workqueue thread. + * If there are enough bytes in the buffer the s390_arch_random_generate() + * just delivers these bytes. Otherwise false is returned until the + * worker thread refills the buffer. + * The worker fills the rng buffer by pulling fresh entropy from the + * high quality (but slow) true hardware random generator. This entropy + * is then spread over the buffer with an pseudo random generator PRNG. + * As the arch_get_random_seed_long() fetches 8 bytes and the calling + * function add_interrupt_randomness() counts this as 1 bit entropy the + * distribution needs to make sure there is in fact 1 bit entropy contained + * in 8 bytes of the buffer. The current values pull 32 byte entropy + * and scatter this into a 2048 byte buffer. So 8 byte in the buffer + * will contain 1 bit of entropy. + * The worker thread is rescheduled based on the charge level of the + * buffer but at least with 500 ms delay to avoid too much CPU consumption. + * So the max. amount of rng data delivered via arch_get_random_seed is + * limited to 4k bytes per second. */ #include #include #include +#include #include +#include #include DEFINE_STATIC_KEY_FALSE(s390_arch_random_available); @@ -17,11 +40,83 @@ DEFINE_STATIC_KEY_FALSE(s390_arch_random_available); atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0); EXPORT_SYMBOL(s390_arch_random_counter); +#define ARCH_REFILL_TICKS (HZ/2) +#define ARCH_PRNG_SEED_SIZE 32 +#define ARCH_RNG_BUF_SIZE 2048 + +static DEFINE_SPINLOCK(arch_rng_lock); +static u8 *arch_rng_buf; +static unsigned int arch_rng_buf_idx; + +static void arch_rng_refill_buffer(struct work_struct *); +static DECLARE_DELAYED_WORK(arch_rng_work, arch_rng_refill_buffer); + +bool s390_arch_random_generate(u8 *buf, unsigned int nbytes) +{ + /* lock rng buffer */ + if (!spin_trylock(&arch_rng_lock)) + return false; + + /* try to resolve the requested amount of bytes from the buffer */ + arch_rng_buf_idx -= nbytes; + if (arch_rng_buf_idx < ARCH_RNG_BUF_SIZE) { + memcpy(buf, arch_rng_buf + arch_rng_buf_idx, nbytes); + atomic64_add(nbytes, &s390_arch_random_counter); + spin_unlock(&arch_rng_lock); + return true; + } + + /* not enough bytes in rng buffer, refill is done asynchronously */ + spin_unlock(&arch_rng_lock); + + return false; +} +EXPORT_SYMBOL(s390_arch_random_generate); + +static void arch_rng_refill_buffer(struct work_struct *unused) +{ + unsigned int delay = ARCH_REFILL_TICKS; + + spin_lock(&arch_rng_lock); + if (arch_rng_buf_idx > ARCH_RNG_BUF_SIZE) { + /* buffer is exhausted and needs refill */ + u8 seed[ARCH_PRNG_SEED_SIZE]; + u8 prng_wa[240]; + /* fetch ARCH_PRNG_SEED_SIZE bytes of entropy */ + cpacf_trng(NULL, 0, seed, sizeof(seed)); + /* blow this entropy up to ARCH_RNG_BUF_SIZE with PRNG */ + memset(prng_wa, 0, sizeof(prng_wa)); + cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, + &prng_wa, NULL, 0, seed, sizeof(seed)); + cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, + &prng_wa, arch_rng_buf, ARCH_RNG_BUF_SIZE, NULL, 0); + arch_rng_buf_idx = ARCH_RNG_BUF_SIZE; + } + delay += (ARCH_REFILL_TICKS * arch_rng_buf_idx) / ARCH_RNG_BUF_SIZE; + spin_unlock(&arch_rng_lock); + + /* kick next check */ + queue_delayed_work(system_long_wq, &arch_rng_work, delay); +} + static int __init s390_arch_random_init(void) { - /* check if subfunction CPACF_PRNO_TRNG is available */ - if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) + /* all the needed PRNO subfunctions available ? */ + if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG) && + cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) { + + /* alloc arch random working buffer */ + arch_rng_buf = kmalloc(ARCH_RNG_BUF_SIZE, GFP_KERNEL); + if (!arch_rng_buf) + return -ENOMEM; + + /* kick worker queue job to fill the random buffer */ + queue_delayed_work(system_long_wq, + &arch_rng_work, ARCH_REFILL_TICKS); + + /* enable arch random to the outside world */ static_branch_enable(&s390_arch_random_available); + } return 0; } diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h index 09aed10953362..c67b82dfa558e 100644 --- a/arch/s390/include/asm/archrandom.h +++ b/arch/s390/include/asm/archrandom.h @@ -15,16 +15,11 @@ #include #include -#include DECLARE_STATIC_KEY_FALSE(s390_arch_random_available); extern atomic64_t s390_arch_random_counter; -static void s390_arch_random_generate(u8 *buf, unsigned int nbytes) -{ - cpacf_trng(NULL, 0, buf, nbytes); - atomic64_add(nbytes, &s390_arch_random_counter); -} +bool s390_arch_random_generate(u8 *buf, unsigned int nbytes); static inline bool arch_has_random(void) { @@ -51,8 +46,7 @@ static inline bool arch_get_random_int(unsigned int *v) static inline bool arch_get_random_seed_long(unsigned long *v) { if (static_branch_likely(&s390_arch_random_available)) { - s390_arch_random_generate((u8 *)v, sizeof(*v)); - return true; + return s390_arch_random_generate((u8 *)v, sizeof(*v)); } return false; } @@ -60,8 +54,7 @@ static inline bool arch_get_random_seed_long(unsigned long *v) static inline bool arch_get_random_seed_int(unsigned int *v) { if (static_branch_likely(&s390_arch_random_available)) { - s390_arch_random_generate((u8 *)v, sizeof(*v)); - return true; + return s390_arch_random_generate((u8 *)v, sizeof(*v)); } return false; } From 2c861d89ccda2fbcea9358eff9cc5f8fae548be5 Mon Sep 17 00:00:00 2001 From: Dong Jia Shi Date: Wed, 2 May 2018 09:25:59 +0200 Subject: [PATCH 3/8] vfio: ccw: fix error return in vfio_ccw_sch_event If the device has not been registered, or there is work pending, we should reschedule a sch_event call again. Signed-off-by: Dong Jia Shi Message-Id: <20180502072559.50691-1-bjsdjshi@linux.vnet.ibm.com> Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index ea6a2d0b2894d..770fa9cfc3104 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -177,6 +177,7 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process) { struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); unsigned long flags; + int rc = -EAGAIN; spin_lock_irqsave(sch->lock, flags); if (!device_is_registered(&sch->dev)) @@ -187,6 +188,7 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process) if (cio_update_schib(sch)) { vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); + rc = 0; goto out_unlock; } @@ -195,11 +197,12 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process) private->state = private->mdev ? VFIO_CCW_STATE_IDLE : VFIO_CCW_STATE_STANDBY; } + rc = 0; out_unlock: spin_unlock_irqrestore(sch->lock, flags); - return 0; + return rc; } static struct css_device_id vfio_ccw_sch_ids[] = { From fb9e7880af357f0244f57a3dc4dd365091970b1a Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Wed, 16 May 2018 19:33:42 +0200 Subject: [PATCH 4/8] vfio: ccw: push down unsupported IDA check There is at least one relevant guest OS that doesn't set the IDA flags in the ORB as we would like them, but never uses any IDA. So instead of saying -EOPNOTSUPP when observing an ORB, such that a channel program specified by it could be a not supported one, let us say -EOPNOTSUPP only if the channel program is a not supported one. Of course, the real solution would be doing proper translation for all IDA. This is possible, but given the current code not straight forward. Signed-off-by: Halil Pasic Tested-by: Jason J. Herne Message-Id: <20180516173342.15174-1-pasic@linux.ibm.com> Reviewed-by: Dong Jia Shi Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_cp.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index dce92b2a895d6..9a2a39df10564 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -365,6 +365,9 @@ static void cp_unpin_free(struct channel_program *cp) * This is the chain length not considering any TICs. * You need to do a new round for each TIC target. * + * The program is also validated for absence of not yet supported + * indirect data addressing scenarios. + * * Returns: the length of the ccw chain or -errno. */ static int ccwchain_calc_length(u64 iova, struct channel_program *cp) @@ -391,6 +394,14 @@ static int ccwchain_calc_length(u64 iova, struct channel_program *cp) do { cnt++; + /* + * As we don't want to fail direct addressing even if the + * orb specified one of the unsupported formats, we defer + * checking for IDAWs in unsupported formats to here. + */ + if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw)) + return -EOPNOTSUPP; + if ((!ccw_is_chain(ccw)) && (!ccw_is_tic(ccw))) break; @@ -656,10 +667,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb) /* * XXX: * Only support prefetch enable mode now. - * Only support 64bit addressing idal. - * Only support 4k IDAW. */ - if (!orb->cmd.pfch || !orb->cmd.c64 || orb->cmd.i2k) + if (!orb->cmd.pfch) return -EOPNOTSUPP; INIT_LIST_HEAD(&cp->ccwchain_list); @@ -688,6 +697,10 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb) ret = ccwchain_loop_tic(chain, cp); if (ret) cp_unpin_free(cp); + /* It is safe to force: if not set but idals used + * ccwchain_calc_length returns an error. + */ + cp->orb.cmd.c64 = 1; return ret; } From 80c57f7a075b0c53944113e42ce114d8bf0977e4 Mon Sep 17 00:00:00 2001 From: Dong Jia Shi Date: Wed, 23 May 2018 04:56:42 +0200 Subject: [PATCH 5/8] vfio: ccw: shorten kernel doc description for pfn_array_pin() The kernel doc description for usage of the struct pfn_array in pfn_array_pin() is unnecessary long. Let's shorten it by describing the contents of the struct pfn_array fields at the struct's definition instead. Suggested-by: Cornelia Huck Signed-off-by: Dong Jia Shi Message-Id: <20180523025645.8978-2-bjsdjshi@linux.ibm.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_cp.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 9a2a39df10564..c532939c1c3f6 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -23,9 +23,13 @@ #define CCWCHAIN_LEN_MAX 256 struct pfn_array { + /* Starting guest physical I/O address. */ unsigned long pa_iova; + /* Array that stores PFNs of the pages need to pin. */ unsigned long *pa_iova_pfn; + /* Array that receives PFNs of the pages pinned. */ unsigned long *pa_pfn; + /* Number of pages to pin/pinned from @pa_iova. */ int pa_nr; }; @@ -53,14 +57,8 @@ struct ccwchain { * Attempt to pin user pages in memory. * * Usage of pfn_array: - * @pa->pa_iova starting guest physical I/O address. Assigned by caller. - * @pa->pa_iova_pfn array that stores PFNs of the pages need to pin. Allocated - * by caller. - * @pa->pa_pfn array that receives PFNs of the pages pinned. Allocated by - * caller. - * @pa->pa_nr number of pages from @pa->pa_iova to pin. Assigned by - * caller. - * number of pages pinned. Assigned by callee. + * Any field in this structure should be initialized by caller. + * We expect @pa->pa_nr > 0, and its value will be assigned by callee. * * Returns: * Number of pages pinned on success. From 5c1cfb1c3948fe93a32dfcd75223dda0f1558bb7 Mon Sep 17 00:00:00 2001 From: Dong Jia Shi Date: Wed, 23 May 2018 04:56:43 +0200 Subject: [PATCH 6/8] vfio: ccw: refactor and improve pfn_array_alloc_pin() This refactors pfn_array_alloc_pin() and also improves it by adding defensive code in error handling so that calling pfn_array_unpin_free() after error return won't lead to problem. This mainly does: 1. Merge pfn_array_pin() into pfn_array_alloc_pin(), since there is no other user of pfn_array_pin(). As a result, also remove kernel-doc for pfn_array_pin() and add/update kernel-doc for pfn_array_alloc_pin() and struct pfn_array. 2. For a vfio_pin_pages() failure, set pa->pa_nr to zero to indicate zero pages were pinned. 3. Set pa->pa_iova_pfn to NULL right after it was freed. Suggested-by: Pierre Morel Signed-off-by: Dong Jia Shi Message-Id: <20180523025645.8978-3-bjsdjshi@linux.ibm.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_cp.c | 82 +++++++++++++++------------------- 1 file changed, 36 insertions(+), 46 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index c532939c1c3f6..b0f20230fc723 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -29,7 +29,7 @@ struct pfn_array { unsigned long *pa_iova_pfn; /* Array that receives PFNs of the pages pinned. */ unsigned long *pa_pfn; - /* Number of pages to pin/pinned from @pa_iova. */ + /* Number of pages pinned from @pa_iova. */ int pa_nr; }; @@ -50,64 +50,33 @@ struct ccwchain { }; /* - * pfn_array_pin() - pin user pages in memory + * pfn_array_alloc_pin() - alloc memory for PFNs, then pin user pages in memory * @pa: pfn_array on which to perform the operation * @mdev: the mediated device to perform pin/unpin operations + * @iova: target guest physical address + * @len: number of bytes that should be pinned from @iova * - * Attempt to pin user pages in memory. + * Attempt to allocate memory for PFNs, and pin user pages in memory. * * Usage of pfn_array: - * Any field in this structure should be initialized by caller. - * We expect @pa->pa_nr > 0, and its value will be assigned by callee. + * We expect (pa_nr == 0) and (pa_iova_pfn == NULL), any field in + * this structure will be filled in by this function. * * Returns: * Number of pages pinned on success. - * If @pa->pa_nr is 0 or negative, returns 0. + * If @pa->pa_nr is not 0, or @pa->pa_iova_pfn is not NULL initially, + * returns -EINVAL. * If no pages were pinned, returns -errno. */ -static int pfn_array_pin(struct pfn_array *pa, struct device *mdev) -{ - int i, ret; - - if (pa->pa_nr <= 0) { - pa->pa_nr = 0; - return 0; - } - - pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT; - for (i = 1; i < pa->pa_nr; i++) - pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1; - - ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr, - IOMMU_READ | IOMMU_WRITE, pa->pa_pfn); - - if (ret > 0 && ret != pa->pa_nr) { - vfio_unpin_pages(mdev, pa->pa_iova_pfn, ret); - pa->pa_nr = 0; - return 0; - } - - return ret; -} - -/* Unpin the pages before releasing the memory. */ -static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev) -{ - vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr); - pa->pa_nr = 0; - kfree(pa->pa_iova_pfn); -} - -/* Alloc memory for PFNs, then pin pages with them. */ static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev, u64 iova, unsigned int len) { - int ret = 0; + int i, ret = 0; if (!len) return 0; - if (pa->pa_nr) + if (pa->pa_nr || pa->pa_iova_pfn) return -EINVAL; pa->pa_iova = iova; @@ -124,18 +93,39 @@ static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev, return -ENOMEM; pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr; - ret = pfn_array_pin(pa, mdev); + pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT; + for (i = 1; i < pa->pa_nr; i++) + pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1; + + ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr, + IOMMU_READ | IOMMU_WRITE, pa->pa_pfn); - if (ret > 0) - return ret; - else if (!ret) + if (ret < 0) { + goto err_out; + } else if (ret > 0 && ret != pa->pa_nr) { + vfio_unpin_pages(mdev, pa->pa_iova_pfn, ret); ret = -EINVAL; + goto err_out; + } + + return ret; +err_out: + pa->pa_nr = 0; kfree(pa->pa_iova_pfn); + pa->pa_iova_pfn = NULL; return ret; } +/* Unpin the pages before releasing the memory. */ +static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev) +{ + vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr); + pa->pa_nr = 0; + kfree(pa->pa_iova_pfn); +} + static int pfn_array_table_init(struct pfn_array_table *pat, int nr) { pat->pat_pa = kcalloc(nr, sizeof(*pat->pat_pa), GFP_KERNEL); From 6238f92132a6da64b731de1a728fa46ffaa21f62 Mon Sep 17 00:00:00 2001 From: Dong Jia Shi Date: Wed, 23 May 2018 04:56:44 +0200 Subject: [PATCH 7/8] vfio: ccw: set ccw->cda to NULL defensively Let's avoid free on ccw->cda that points to a guest address or an already freed memory area by setting it to NULL if memory allocation didn't happen or failed. Signed-off-by: Dong Jia Shi Message-Id: <20180523025645.8978-4-bjsdjshi@linux.ibm.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_cp.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index b0f20230fc723..dbe7c7ac9ac8c 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -502,7 +502,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, struct ccw1 *ccw; struct pfn_array_table *pat; unsigned long *idaws; - int idaw_nr; + int ret; ccw = chain->ch_ccw + idx; @@ -522,18 +522,19 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, * needed when translating a direct ccw to a idal ccw. */ pat = chain->ch_pat + idx; - if (pfn_array_table_init(pat, 1)) - return -ENOMEM; - idaw_nr = pfn_array_alloc_pin(pat->pat_pa, cp->mdev, - ccw->cda, ccw->count); - if (idaw_nr < 0) - return idaw_nr; + ret = pfn_array_table_init(pat, 1); + if (ret) + goto out_init; + + ret = pfn_array_alloc_pin(pat->pat_pa, cp->mdev, ccw->cda, ccw->count); + if (ret < 0) + goto out_init; /* Translate this direct ccw to a idal ccw. */ - idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL); + idaws = kcalloc(ret, sizeof(*idaws), GFP_DMA | GFP_KERNEL); if (!idaws) { - pfn_array_table_unpin_free(pat, cp->mdev); - return -ENOMEM; + ret = -ENOMEM; + goto out_unpin; } ccw->cda = (__u32) virt_to_phys(idaws); ccw->flags |= CCW_FLAG_IDA; @@ -541,6 +542,12 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, pfn_array_table_idal_create_words(pat, idaws); return 0; + +out_unpin: + pfn_array_table_unpin_free(pat, cp->mdev); +out_init: + ccw->cda = 0; + return ret; } static int ccwchain_fetch_idal(struct ccwchain *chain, @@ -570,7 +577,7 @@ static int ccwchain_fetch_idal(struct ccwchain *chain, pat = chain->ch_pat + idx; ret = pfn_array_table_init(pat, idaw_nr); if (ret) - return ret; + goto out_init; /* Translate idal ccw to use new allocated idaws. */ idaws = kzalloc(idaw_len, GFP_DMA | GFP_KERNEL); @@ -602,6 +609,8 @@ static int ccwchain_fetch_idal(struct ccwchain *chain, kfree(idaws); out_unpin: pfn_array_table_unpin_free(pat, cp->mdev); +out_init: + ccw->cda = 0; return ret; } From 3cd90214b70f7f971496bffc3c34d23b2141feb3 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Wed, 23 May 2018 04:56:45 +0200 Subject: [PATCH 8/8] vfio: ccw: add tracepoints for interesting error paths Add some tracepoints so we can inspect what is not working as is should. Signed-off-by: Halil Pasic Signed-off-by: Dong Jia Shi Message-Id: <20180523025645.8978-5-bjsdjshi@linux.ibm.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/Makefile | 1 + drivers/s390/cio/vfio_ccw_fsm.c | 17 +++++++++- drivers/s390/cio/vfio_ccw_trace.h | 54 +++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 drivers/s390/cio/vfio_ccw_trace.h diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile index a070ef0efe65d..f230516abb96d 100644 --- a/drivers/s390/cio/Makefile +++ b/drivers/s390/cio/Makefile @@ -5,6 +5,7 @@ # The following is required for define_trace.h to find ./trace.h CFLAGS_trace.o := -I$(src) +CFLAGS_vfio_ccw_fsm.o := -I$(src) obj-y += airq.o blacklist.o chsc.o cio.o css.o chp.o idset.o isc.o \ fcx.o itcw.o crw.o ccwreq.o trace.o ioasm.o diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c index 3c800642134e4..797a82731159a 100644 --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -13,6 +13,9 @@ #include "ioasm.h" #include "vfio_ccw_private.h" +#define CREATE_TRACE_POINTS +#include "vfio_ccw_trace.h" + static int fsm_io_helper(struct vfio_ccw_private *private) { struct subchannel *sch; @@ -110,6 +113,10 @@ static void fsm_disabled_irq(struct vfio_ccw_private *private, */ cio_disable_subchannel(sch); } +inline struct subchannel_id get_schid(struct vfio_ccw_private *p) +{ + return p->sch->schid; +} /* * Deal with the ccw command request from the userspace. @@ -121,6 +128,7 @@ static void fsm_io_request(struct vfio_ccw_private *private, union scsw *scsw = &private->scsw; struct ccw_io_region *io_region = &private->io_region; struct mdev_device *mdev = private->mdev; + char *errstr = "request"; private->state = VFIO_CCW_STATE_BOXED; @@ -132,15 +140,19 @@ static void fsm_io_request(struct vfio_ccw_private *private, /* Don't try to build a cp if transport mode is specified. */ if (orb->tm.b) { io_region->ret_code = -EOPNOTSUPP; + errstr = "transport mode"; goto err_out; } io_region->ret_code = cp_init(&private->cp, mdev_dev(mdev), orb); - if (io_region->ret_code) + if (io_region->ret_code) { + errstr = "cp init"; goto err_out; + } io_region->ret_code = cp_prefetch(&private->cp); if (io_region->ret_code) { + errstr = "cp prefetch"; cp_free(&private->cp); goto err_out; } @@ -148,6 +160,7 @@ static void fsm_io_request(struct vfio_ccw_private *private, /* Start channel program and wait for I/O interrupt. */ io_region->ret_code = fsm_io_helper(private); if (io_region->ret_code) { + errstr = "cp fsm_io_helper"; cp_free(&private->cp); goto err_out; } @@ -164,6 +177,8 @@ static void fsm_io_request(struct vfio_ccw_private *private, err_out: private->state = VFIO_CCW_STATE_IDLE; + trace_vfio_ccw_io_fctl(scsw->cmd.fctl, get_schid(private), + io_region->ret_code, errstr); } /* diff --git a/drivers/s390/cio/vfio_ccw_trace.h b/drivers/s390/cio/vfio_ccw_trace.h new file mode 100644 index 0000000000000..b1da53ddec1f5 --- /dev/null +++ b/drivers/s390/cio/vfio_ccw_trace.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Tracepoints for vfio_ccw driver + * + * Copyright IBM Corp. 2018 + * + * Author(s): Dong Jia Shi + * Halil Pasic + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM vfio_ccw + +#if !defined(_VFIO_CCW_TRACE_) || defined(TRACE_HEADER_MULTI_READ) +#define _VFIO_CCW_TRACE_ + +#include + +TRACE_EVENT(vfio_ccw_io_fctl, + TP_PROTO(int fctl, struct subchannel_id schid, int errno, char *errstr), + TP_ARGS(fctl, schid, errno, errstr), + + TP_STRUCT__entry( + __field(int, fctl) + __field_struct(struct subchannel_id, schid) + __field(int, errno) + __field(char*, errstr) + ), + + TP_fast_assign( + __entry->fctl = fctl; + __entry->schid = schid; + __entry->errno = errno; + __entry->errstr = errstr; + ), + + TP_printk("schid=%x.%x.%04x fctl=%x errno=%d info=%s", + __entry->schid.cssid, + __entry->schid.ssid, + __entry->schid.sch_no, + __entry->fctl, + __entry->errno, + __entry->errstr) +); + +#endif /* _VFIO_CCW_TRACE_ */ + +/* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE vfio_ccw_trace + +#include