From 3777ea7bac3113005b7180e6b9dadf16d19a5827 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Mar 2022 09:48:54 +0100 Subject: [PATCH 01/12] xen/xenbus: don't let xenbus_grant_ring() remove grants in error case Letting xenbus_grant_ring() tear down grants in the error case is problematic, as the other side could already have used these grants. Calling gnttab_end_foreign_access_ref() without checking success is resulting in an unclear situation for any caller of xenbus_grant_ring() as in the error case the memory pages of the ring page might be partially mapped. Freeing them would risk unwanted foreign access to them, while not freeing them would leak memory. In order to remove the need to undo any gnttab_grant_foreign_access() calls, use gnttab_alloc_grant_references() to make sure no further error can occur in the loop granting access to the ring pages. It should be noted that this way of handling removes leaking of grant entries in the error case, too. This is CVE-2022-23040 / part of XSA-396. Reported-by: Demi Marie Obenour Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- drivers/xen/xenbus/xenbus_client.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index e8bed1cb76ba2..df68906812315 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -379,7 +379,14 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, unsigned int nr_pages, grant_ref_t *grefs) { int err; - int i, j; + unsigned int i; + grant_ref_t gref_head; + + err = gnttab_alloc_grant_references(nr_pages, &gref_head); + if (err) { + xenbus_dev_fatal(dev, err, "granting access to ring page"); + return err; + } for (i = 0; i < nr_pages; i++) { unsigned long gfn; @@ -389,23 +396,14 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, else gfn = virt_to_gfn(vaddr); - err = gnttab_grant_foreign_access(dev->otherend_id, gfn, 0); - if (err < 0) { - xenbus_dev_fatal(dev, err, - "granting access to ring page"); - goto fail; - } - grefs[i] = err; + grefs[i] = gnttab_claim_grant_reference(&gref_head); + gnttab_grant_foreign_access_ref(grefs[i], dev->otherend_id, + gfn, 0); vaddr = vaddr + XEN_PAGE_SIZE; } return 0; - -fail: - for (j = 0; j < i; j++) - gnttab_end_foreign_access_ref(grefs[j], 0); - return err; } EXPORT_SYMBOL_GPL(xenbus_grant_ring); From 6b1775f26a2da2b05a6dc8ec2b5d14e9a4701a1a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Mar 2022 09:48:54 +0100 Subject: [PATCH 02/12] xen/grant-table: add gnttab_try_end_foreign_access() Add a new grant table function gnttab_try_end_foreign_access(), which will remove and free a grant if it is not in use. Its main use case is to either free a grant if it is no longer in use, or to take some other action if it is still in use. This other action can be an error exit, or (e.g. in the case of blkfront persistent grant feature) some special handling. This is CVE-2022-23036, CVE-2022-23038 / part of XSA-396. Reported-by: Demi Marie Obenour Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V2: - new patch V4: - add comments to header (Jan Beulich) --- drivers/xen/grant-table.c | 14 ++++++++++++-- include/xen/grant_table.h | 12 ++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 3729bea0c9895..1b82e7a3722a3 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -435,11 +435,21 @@ static void gnttab_add_deferred(grant_ref_t ref, bool readonly, what, ref, page ? page_to_pfn(page) : -1); } +int gnttab_try_end_foreign_access(grant_ref_t ref) +{ + int ret = _gnttab_end_foreign_access_ref(ref, 0); + + if (ret) + put_free_entry(ref); + + return ret; +} +EXPORT_SYMBOL_GPL(gnttab_try_end_foreign_access); + void gnttab_end_foreign_access(grant_ref_t ref, int readonly, unsigned long page) { - if (gnttab_end_foreign_access_ref(ref, readonly)) { - put_free_entry(ref); + if (gnttab_try_end_foreign_access(ref)) { if (page != 0) put_page(virt_to_page(page)); } else diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index cb854df031ce0..358d2817741b7 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -104,10 +104,22 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly); * access has been ended, free the given page too. Access will be ended * immediately iff the grant entry is not in use, otherwise it will happen * some time later. page may be 0, in which case no freeing will occur. + * Note that the granted page might still be accessed (read or write) by the + * other side after gnttab_end_foreign_access() returns, so even if page was + * specified as 0 it is not allowed to just reuse the page for other + * purposes immediately. */ void gnttab_end_foreign_access(grant_ref_t ref, int readonly, unsigned long page); +/* + * End access through the given grant reference, iff the grant entry is + * no longer in use. In case of success ending foreign access, the + * grant reference is deallocated. + * Return 1 if the grant entry was freed, 0 if it is still in use. + */ +int gnttab_try_end_foreign_access(grant_ref_t ref); + int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn); unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref); From abf1fd5919d6238ee3bc5eb4a9b6c3947caa6638 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Mar 2022 09:48:54 +0100 Subject: [PATCH 03/12] xen/blkfront: don't use gnttab_query_foreign_access() for mapped status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It isn't enough to check whether a grant is still being in use by calling gnttab_query_foreign_access(), as a mapping could be realized by the other side just after having called that function. In case the call was done in preparation of revoking a grant it is better to do so via gnttab_end_foreign_access_ref() and check the success of that operation instead. For the ring allocation use alloc_pages_exact() in order to avoid high order pages in case of a multi-page ring. If a grant wasn't unmapped by the backend without persistent grants being used, set the device state to "error". This is CVE-2022-23036 / part of XSA-396. Reported-by: Demi Marie Obenour Signed-off-by: Juergen Gross Reviewed-by: Roger Pau Monné --- V2: - use gnttab_try_end_foreign_access() V4: - use alloc_pages_exact() and free_pages_exact() - set state to error if backend didn't unmap (Roger Pau Monné) --- drivers/block/xen-blkfront.c | 63 +++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index ca71a0585333f..03b5fb341e589 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1288,7 +1288,8 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) rinfo->ring_ref[i] = GRANT_INVALID_REF; } } - free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE)); + free_pages_exact(rinfo->ring.sring, + info->nr_ring_pages * XEN_PAGE_SIZE); rinfo->ring.sring = NULL; if (rinfo->irq) @@ -1372,9 +1373,15 @@ static int blkif_get_final_status(enum blk_req_status s1, return BLKIF_RSP_OKAY; } -static bool blkif_completion(unsigned long *id, - struct blkfront_ring_info *rinfo, - struct blkif_response *bret) +/* + * Return values: + * 1 response processed. + * 0 missing further responses. + * -1 error while processing. + */ +static int blkif_completion(unsigned long *id, + struct blkfront_ring_info *rinfo, + struct blkif_response *bret) { int i = 0; struct scatterlist *sg; @@ -1397,7 +1404,7 @@ static bool blkif_completion(unsigned long *id, /* Wait the second response if not yet here. */ if (s2->status < REQ_DONE) - return false; + return 0; bret->status = blkif_get_final_status(s->status, s2->status); @@ -1448,42 +1455,43 @@ static bool blkif_completion(unsigned long *id, } /* Add the persistent grant into the list of free grants */ for (i = 0; i < num_grant; i++) { - if (gnttab_query_foreign_access(s->grants_used[i]->gref)) { + if (!gnttab_try_end_foreign_access(s->grants_used[i]->gref)) { /* * If the grant is still mapped by the backend (the * backend has chosen to make this grant persistent) * we add it at the head of the list, so it will be * reused first. */ - if (!info->feature_persistent) - pr_alert_ratelimited("backed has not unmapped grant: %u\n", - s->grants_used[i]->gref); + if (!info->feature_persistent) { + pr_alert("backed has not unmapped grant: %u\n", + s->grants_used[i]->gref); + return -1; + } list_add(&s->grants_used[i]->node, &rinfo->grants); rinfo->persistent_gnts_c++; } else { /* - * If the grant is not mapped by the backend we end the - * foreign access and add it to the tail of the list, - * so it will not be picked again unless we run out of - * persistent grants. + * If the grant is not mapped by the backend we add it + * to the tail of the list, so it will not be picked + * again unless we run out of persistent grants. */ - gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL); s->grants_used[i]->gref = GRANT_INVALID_REF; list_add_tail(&s->grants_used[i]->node, &rinfo->grants); } } if (s->req.operation == BLKIF_OP_INDIRECT) { for (i = 0; i < INDIRECT_GREFS(num_grant); i++) { - if (gnttab_query_foreign_access(s->indirect_grants[i]->gref)) { - if (!info->feature_persistent) - pr_alert_ratelimited("backed has not unmapped grant: %u\n", - s->indirect_grants[i]->gref); + if (!gnttab_try_end_foreign_access(s->indirect_grants[i]->gref)) { + if (!info->feature_persistent) { + pr_alert("backed has not unmapped grant: %u\n", + s->indirect_grants[i]->gref); + return -1; + } list_add(&s->indirect_grants[i]->node, &rinfo->grants); rinfo->persistent_gnts_c++; } else { struct page *indirect_page; - gnttab_end_foreign_access(s->indirect_grants[i]->gref, 0, 0UL); /* * Add the used indirect page back to the list of * available pages for indirect grefs. @@ -1498,7 +1506,7 @@ static bool blkif_completion(unsigned long *id, } } - return true; + return 1; } static irqreturn_t blkif_interrupt(int irq, void *dev_id) @@ -1564,12 +1572,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) } if (bret.operation != BLKIF_OP_DISCARD) { + int ret; + /* * We may need to wait for an extra response if the * I/O request is split in 2 */ - if (!blkif_completion(&id, rinfo, &bret)) + ret = blkif_completion(&id, rinfo, &bret); + if (!ret) continue; + if (unlikely(ret < 0)) + goto err; } if (add_id_to_freelist(rinfo, id)) { @@ -1676,8 +1689,7 @@ static int setup_blkring(struct xenbus_device *dev, for (i = 0; i < info->nr_ring_pages; i++) rinfo->ring_ref[i] = GRANT_INVALID_REF; - sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH, - get_order(ring_size)); + sring = alloc_pages_exact(ring_size, GFP_NOIO); if (!sring) { xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); return -ENOMEM; @@ -1687,7 +1699,7 @@ static int setup_blkring(struct xenbus_device *dev, err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref); if (err < 0) { - free_pages((unsigned long)sring, get_order(ring_size)); + free_pages_exact(sring, ring_size); rinfo->ring.sring = NULL; goto fail; } @@ -2532,11 +2544,10 @@ static void purge_persistent_grants(struct blkfront_info *info) list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants, node) { if (gnt_list_entry->gref == GRANT_INVALID_REF || - gnttab_query_foreign_access(gnt_list_entry->gref)) + !gnttab_try_end_foreign_access(gnt_list_entry->gref)) continue; list_del(&gnt_list_entry->node); - gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL); rinfo->persistent_gnts_c--; gnt_list_entry->gref = GRANT_INVALID_REF; list_add_tail(&gnt_list_entry->node, &rinfo->grants); From 31185df7e2b1d2fa1de4900247a12d7b9c7087eb Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Mar 2022 09:48:54 +0100 Subject: [PATCH 04/12] xen/netfront: don't use gnttab_query_foreign_access() for mapped status It isn't enough to check whether a grant is still being in use by calling gnttab_query_foreign_access(), as a mapping could be realized by the other side just after having called that function. In case the call was done in preparation of revoking a grant it is better to do so via gnttab_end_foreign_access_ref() and check the success of that operation instead. This is CVE-2022-23037 / part of XSA-396. Reported-by: Demi Marie Obenour Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V2: - use gnttab_try_end_foreign_access() V3: - don't use gnttab_try_end_foreign_access() --- drivers/net/xen-netfront.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 7748f07e2cf1d..13926d03418ef 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -424,14 +424,12 @@ static bool xennet_tx_buf_gc(struct netfront_queue *queue) queue->tx_link[id] = TX_LINK_NONE; skb = queue->tx_skbs[id]; queue->tx_skbs[id] = NULL; - if (unlikely(gnttab_query_foreign_access( - queue->grant_tx_ref[id]) != 0)) { + if (unlikely(!gnttab_end_foreign_access_ref( + queue->grant_tx_ref[id], GNTMAP_readonly))) { dev_alert(dev, "Grant still in use by backend domain\n"); goto err; } - gnttab_end_foreign_access_ref( - queue->grant_tx_ref[id], GNTMAP_readonly); gnttab_release_grant_reference( &queue->gref_tx_head, queue->grant_tx_ref[id]); queue->grant_tx_ref[id] = GRANT_INVALID_REF; From 33172ab50a53578a95691310f49567c9266968b0 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Mar 2022 09:48:54 +0100 Subject: [PATCH 05/12] xen/scsifront: don't use gnttab_query_foreign_access() for mapped status It isn't enough to check whether a grant is still being in use by calling gnttab_query_foreign_access(), as a mapping could be realized by the other side just after having called that function. In case the call was done in preparation of revoking a grant it is better to do so via gnttab_try_end_foreign_access() and check the success of that operation instead. This is CVE-2022-23038 / part of XSA-396. Reported-by: Demi Marie Obenour Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V2: - use gnttab_try_end_foreign_access() --- drivers/scsi/xen-scsifront.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c index 12c10a5e3d93a..7f421600cb66d 100644 --- a/drivers/scsi/xen-scsifront.c +++ b/drivers/scsi/xen-scsifront.c @@ -233,12 +233,11 @@ static void scsifront_gnttab_done(struct vscsifrnt_info *info, return; for (i = 0; i < shadow->nr_grants; i++) { - if (unlikely(gnttab_query_foreign_access(shadow->gref[i]))) { + if (unlikely(!gnttab_try_end_foreign_access(shadow->gref[i]))) { shost_printk(KERN_ALERT, info->host, KBUILD_MODNAME "grant still in use by backend\n"); BUG(); } - gnttab_end_foreign_access(shadow->gref[i], 0, 0UL); } kfree(shadow->sg); From d3b6372c5881cb54925212abb62c521df8ba4809 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Mar 2022 09:48:54 +0100 Subject: [PATCH 06/12] xen/gntalloc: don't use gnttab_query_foreign_access() Using gnttab_query_foreign_access() is unsafe, as it is racy by design. The use case in the gntalloc driver is not needed at all. While at it replace the call of gnttab_end_foreign_access_ref() with a call of gnttab_end_foreign_access(), which is what is really wanted there. In case the grant wasn't used due to an allocation failure, just free the grant via gnttab_free_grant_reference(). This is CVE-2022-23039 / part of XSA-396. Reported-by: Demi Marie Obenour Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V3: - fix __del_gref() (Jan Beulich) --- drivers/xen/gntalloc.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index 3fa40c723e8e9..edb0acd0b8323 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -169,20 +169,14 @@ static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, __del_gref(gref); } - /* It's possible for the target domain to map the just-allocated grant - * references by blindly guessing their IDs; if this is done, then - * __del_gref will leave them in the queue_gref list. They need to be - * added to the global list so that we can free them when they are no - * longer referenced. - */ - if (unlikely(!list_empty(&queue_gref))) - list_splice_tail(&queue_gref, &gref_list); mutex_unlock(&gref_mutex); return rc; } static void __del_gref(struct gntalloc_gref *gref) { + unsigned long addr; + if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { uint8_t *tmp = kmap(gref->page); tmp[gref->notify.pgoff] = 0; @@ -196,21 +190,16 @@ static void __del_gref(struct gntalloc_gref *gref) gref->notify.flags = 0; if (gref->gref_id) { - if (gnttab_query_foreign_access(gref->gref_id)) - return; - - if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) - return; - - gnttab_free_grant_reference(gref->gref_id); + if (gref->page) { + addr = (unsigned long)page_to_virt(gref->page); + gnttab_end_foreign_access(gref->gref_id, 0, addr); + } else + gnttab_free_grant_reference(gref->gref_id); } gref_size--; list_del(&gref->next_gref); - if (gref->page) - __free_page(gref->page); - kfree(gref); } From 1dbd11ca75fe664d3e54607547771d021f531f59 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Mar 2022 09:48:54 +0100 Subject: [PATCH 07/12] xen: remove gnttab_query_foreign_access() Remove gnttab_query_foreign_access(), as it is unused and unsafe to use. All previous use cases assumed a grant would not be in use after gnttab_query_foreign_access() returned 0. This information is useless in best case, as it only refers to a situation in the past, which could have changed already. Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- drivers/xen/grant-table.c | 25 ------------------------- include/xen/grant_table.h | 2 -- 2 files changed, 27 deletions(-) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 1b82e7a3722a3..e6548910e79ff 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -133,13 +133,6 @@ struct gnttab_ops { * return the frame. */ unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref); - /* - * Query the status of a grant entry. Ref parameter is reference of - * queried grant entry, return value is the status of queried entry. - * Detailed status(writing/reading) can be gotten from the return value - * by bit operations. - */ - int (*query_foreign_access)(grant_ref_t ref); }; struct unmap_refs_callback_data { @@ -284,22 +277,6 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); -static int gnttab_query_foreign_access_v1(grant_ref_t ref) -{ - return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing); -} - -static int gnttab_query_foreign_access_v2(grant_ref_t ref) -{ - return grstatus[ref] & (GTF_reading|GTF_writing); -} - -int gnttab_query_foreign_access(grant_ref_t ref) -{ - return gnttab_interface->query_foreign_access(ref); -} -EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); - static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly) { u16 flags, nflags; @@ -1427,7 +1404,6 @@ static const struct gnttab_ops gnttab_v1_ops = { .update_entry = gnttab_update_entry_v1, .end_foreign_access_ref = gnttab_end_foreign_access_ref_v1, .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v1, - .query_foreign_access = gnttab_query_foreign_access_v1, }; static const struct gnttab_ops gnttab_v2_ops = { @@ -1439,7 +1415,6 @@ static const struct gnttab_ops gnttab_v2_ops = { .update_entry = gnttab_update_entry_v2, .end_foreign_access_ref = gnttab_end_foreign_access_ref_v2, .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2, - .query_foreign_access = gnttab_query_foreign_access_v2, }; static bool gnttab_need_v2(void) diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 358d2817741b7..ab9e692a0ef4a 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -125,8 +125,6 @@ int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn); unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref); unsigned long gnttab_end_foreign_transfer(grant_ref_t ref); -int gnttab_query_foreign_access(grant_ref_t ref); - /* * operations on reserved batches of grant references */ From cd7bcfab4e73dcb3de92c2014c19f17af3864bfe Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Mar 2022 09:48:55 +0100 Subject: [PATCH 08/12] xen/usb: don't use gnttab_end_foreign_access() in xenhcd_gnttab_done() The usage of gnttab_end_foreign_access() in xenhcd_gnttab_done() is not safe against a malicious backend, as the backend could keep the I/O page mapped and modify it even after the granted memory page is being used for completely other purposes in the local system. So replace that use case with gnttab_try_end_foreign_access() and disable the PV host adapter in case the backend didn't stop using the granted page. In xenhcd_urb_request_done() immediately return in case of setting the device state to "error" instead of looking into further backend responses. Reported-by: Demi Marie Obenour Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V2: - use gnttab_try_end_foreign_access() --- drivers/usb/host/xen-hcd.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/usb/host/xen-hcd.c b/drivers/usb/host/xen-hcd.c index be09fd9bac586..19b8c7ed74cb1 100644 --- a/drivers/usb/host/xen-hcd.c +++ b/drivers/usb/host/xen-hcd.c @@ -716,8 +716,9 @@ static int xenhcd_map_urb_for_request(struct xenhcd_info *info, struct urb *urb, return 0; } -static void xenhcd_gnttab_done(struct usb_shadow *shadow) +static void xenhcd_gnttab_done(struct xenhcd_info *info, unsigned int id) { + struct usb_shadow *shadow = info->shadow + id; int nr_segs = 0; int i; @@ -726,8 +727,10 @@ static void xenhcd_gnttab_done(struct usb_shadow *shadow) if (xenusb_pipeisoc(shadow->req.pipe)) nr_segs += shadow->req.u.isoc.nr_frame_desc_segs; - for (i = 0; i < nr_segs; i++) - gnttab_end_foreign_access(shadow->req.seg[i].gref, 0, 0UL); + for (i = 0; i < nr_segs; i++) { + if (!gnttab_try_end_foreign_access(shadow->req.seg[i].gref)) + xenhcd_set_error(info, "backend didn't release grant"); + } shadow->req.nr_buffer_segs = 0; shadow->req.u.isoc.nr_frame_desc_segs = 0; @@ -841,7 +844,9 @@ static void xenhcd_cancel_all_enqueued_urbs(struct xenhcd_info *info) list_for_each_entry_safe(urbp, tmp, &info->in_progress_list, list) { req_id = urbp->req_id; if (!urbp->unlinked) { - xenhcd_gnttab_done(&info->shadow[req_id]); + xenhcd_gnttab_done(info, req_id); + if (info->error) + return; if (urbp->urb->status == -EINPROGRESS) /* not dequeued */ xenhcd_giveback_urb(info, urbp->urb, @@ -942,8 +947,7 @@ static int xenhcd_urb_request_done(struct xenhcd_info *info) rp = info->urb_ring.sring->rsp_prod; if (RING_RESPONSE_PROD_OVERFLOW(&info->urb_ring, rp)) { xenhcd_set_error(info, "Illegal index on urb-ring"); - spin_unlock_irqrestore(&info->lock, flags); - return 0; + goto err; } rmb(); /* ensure we see queued responses up to "rp" */ @@ -952,11 +956,13 @@ static int xenhcd_urb_request_done(struct xenhcd_info *info) id = res.id; if (id >= XENUSB_URB_RING_SIZE) { xenhcd_set_error(info, "Illegal data on urb-ring"); - continue; + goto err; } if (likely(xenusb_pipesubmit(info->shadow[id].req.pipe))) { - xenhcd_gnttab_done(&info->shadow[id]); + xenhcd_gnttab_done(info, id); + if (info->error) + goto err; urb = info->shadow[id].urb; if (likely(urb)) { urb->actual_length = res.actual_length; @@ -978,6 +984,10 @@ static int xenhcd_urb_request_done(struct xenhcd_info *info) spin_unlock_irqrestore(&info->lock, flags); return more_to_do; + + err: + spin_unlock_irqrestore(&info->lock, flags); + return 0; } static int xenhcd_conn_notify(struct xenhcd_info *info) From 5cadd4bb1d7fc9ab201ac14620d1a478357e4ebd Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Mar 2022 09:48:55 +0100 Subject: [PATCH 09/12] xen/9p: use alloc/free_pages_exact() Instead of __get_free_pages() and free_pages() use alloc_pages_exact() and free_pages_exact(). This is in preparation of a change of gnttab_end_foreign_access() which will prohibit use of high-order pages. By using the local variable "order" instead of ring->intf->ring_order in the error path of xen_9pfs_front_alloc_dataring() another bug is fixed, as the error path can be entered before ring->intf->ring_order is being set. By using alloc_pages_exact() the size in bytes is specified for the allocation, which fixes another bug for the case of order < (PAGE_SHIFT - XEN_PAGE_SHIFT). This is part of CVE-2022-23041 / XSA-396. Reported-by: Simon Gaiser Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V4: - new patch --- net/9p/trans_xen.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index eb9fb55280ef8..01f8067994d68 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -281,9 +281,9 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) ref = priv->rings[i].intf->ref[j]; gnttab_end_foreign_access(ref, 0, 0); } - free_pages((unsigned long)priv->rings[i].data.in, - priv->rings[i].intf->ring_order - - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + free_pages_exact(priv->rings[i].data.in, + 1UL << (priv->rings[i].intf->ring_order + + XEN_PAGE_SHIFT)); } gnttab_end_foreign_access(priv->rings[i].ref, 0, 0); free_page((unsigned long)priv->rings[i].intf); @@ -322,8 +322,8 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, if (ret < 0) goto out; ring->ref = ret; - bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - order - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + bytes = alloc_pages_exact(1UL << (order + XEN_PAGE_SHIFT), + GFP_KERNEL | __GFP_ZERO); if (!bytes) { ret = -ENOMEM; goto out; @@ -354,9 +354,7 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, if (bytes) { for (i--; i >= 0; i--) gnttab_end_foreign_access(ring->intf->ref[i], 0, 0); - free_pages((unsigned long)bytes, - ring->intf->ring_order - - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + free_pages_exact(bytes, 1UL << (order + XEN_PAGE_SHIFT)); } gnttab_end_foreign_access(ring->ref, 0, 0); free_page((unsigned long)ring->intf); From b0576cc9c6b843d99c6982888d59a56209341888 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Mar 2022 09:48:55 +0100 Subject: [PATCH 10/12] xen/pvcalls: use alloc/free_pages_exact() Instead of __get_free_pages() and free_pages() use alloc_pages_exact() and free_pages_exact(). This is in preparation of a change of gnttab_end_foreign_access() which will prohibit use of high-order pages. This is part of CVE-2022-23041 / XSA-396. Reported-by: Simon Gaiser Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V4: - new patch --- drivers/xen/pvcalls-front.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index 3c9ae156b597f..0ca351f30a6d2 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -337,8 +337,8 @@ static void free_active_ring(struct sock_mapping *map) if (!map->active.ring) return; - free_pages((unsigned long)map->active.data.in, - map->active.ring->ring_order); + free_pages_exact(map->active.data.in, + PAGE_SIZE << map->active.ring->ring_order); free_page((unsigned long)map->active.ring); } @@ -352,8 +352,8 @@ static int alloc_active_ring(struct sock_mapping *map) goto out; map->active.ring->ring_order = PVCALLS_RING_ORDER; - bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - PVCALLS_RING_ORDER); + bytes = alloc_pages_exact(PAGE_SIZE << PVCALLS_RING_ORDER, + GFP_KERNEL | __GFP_ZERO); if (!bytes) goto out; From 42baefac638f06314298087394b982ead9ec444b Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Mar 2022 09:48:55 +0100 Subject: [PATCH 11/12] xen/gnttab: fix gnttab_end_foreign_access() without page specified gnttab_end_foreign_access() is used to free a grant reference and optionally to free the associated page. In case the grant is still in use by the other side processing is being deferred. This leads to a problem in case no page to be freed is specified by the caller: the caller doesn't know that the page is still mapped by the other side and thus should not be used for other purposes. The correct way to handle this situation is to take an additional reference to the granted page in case handling is being deferred and to drop that reference when the grant reference could be freed finally. This requires that there are no users of gnttab_end_foreign_access() left directly repurposing the granted page after the call, as this might result in clobbered data or information leaks via the not yet freed grant reference. This is part of CVE-2022-23041 / XSA-396. Reported-by: Simon Gaiser Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V4: - expand comment in header V5: - get page ref in case of kmalloc() failure, too --- drivers/xen/grant-table.c | 36 +++++++++++++++++++++++++++++------- include/xen/grant_table.h | 7 ++++++- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index e6548910e79ff..5c83d41766c85 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -133,6 +133,10 @@ struct gnttab_ops { * return the frame. */ unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref); + /* + * Read the frame number related to a given grant reference. + */ + unsigned long (*read_frame)(grant_ref_t ref); }; struct unmap_refs_callback_data { @@ -330,6 +334,16 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) } EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); +static unsigned long gnttab_read_frame_v1(grant_ref_t ref) +{ + return gnttab_shared.v1[ref].frame; +} + +static unsigned long gnttab_read_frame_v2(grant_ref_t ref) +{ + return gnttab_shared.v2[ref].full_page.frame; +} + struct deferred_entry { struct list_head list; grant_ref_t ref; @@ -359,12 +373,9 @@ static void gnttab_handle_deferred(struct timer_list *unused) spin_unlock_irqrestore(&gnttab_list_lock, flags); if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) { put_free_entry(entry->ref); - if (entry->page) { - pr_debug("freeing g.e. %#x (pfn %#lx)\n", - entry->ref, page_to_pfn(entry->page)); - put_page(entry->page); - } else - pr_info("freeing g.e. %#x\n", entry->ref); + pr_debug("freeing g.e. %#x (pfn %#lx)\n", + entry->ref, page_to_pfn(entry->page)); + put_page(entry->page); kfree(entry); entry = NULL; } else { @@ -389,9 +400,18 @@ static void gnttab_handle_deferred(struct timer_list *unused) static void gnttab_add_deferred(grant_ref_t ref, bool readonly, struct page *page) { - struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + struct deferred_entry *entry; + gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL; const char *what = KERN_WARNING "leaking"; + entry = kmalloc(sizeof(*entry), gfp); + if (!page) { + unsigned long gfn = gnttab_interface->read_frame(ref); + + page = pfn_to_page(gfn_to_pfn(gfn)); + get_page(page); + } + if (entry) { unsigned long flags; @@ -1404,6 +1424,7 @@ static const struct gnttab_ops gnttab_v1_ops = { .update_entry = gnttab_update_entry_v1, .end_foreign_access_ref = gnttab_end_foreign_access_ref_v1, .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v1, + .read_frame = gnttab_read_frame_v1, }; static const struct gnttab_ops gnttab_v2_ops = { @@ -1415,6 +1436,7 @@ static const struct gnttab_ops gnttab_v2_ops = { .update_entry = gnttab_update_entry_v2, .end_foreign_access_ref = gnttab_end_foreign_access_ref_v2, .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2, + .read_frame = gnttab_read_frame_v2, }; static bool gnttab_need_v2(void) diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index ab9e692a0ef4a..c9fea9389ebec 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -107,7 +107,12 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly); * Note that the granted page might still be accessed (read or write) by the * other side after gnttab_end_foreign_access() returns, so even if page was * specified as 0 it is not allowed to just reuse the page for other - * purposes immediately. + * purposes immediately. gnttab_end_foreign_access() will take an additional + * reference to the granted page in this case, which is dropped only after + * the grant is no longer in use. + * This requires that multi page allocations for areas subject to + * gnttab_end_foreign_access() are done via alloc_pages_exact() (and freeing + * via free_pages_exact()) in order to avoid high order pages. */ void gnttab_end_foreign_access(grant_ref_t ref, int readonly, unsigned long page); From 66e3531b33ee51dad17c463b4d9c9f52e341503d Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Mar 2022 09:48:55 +0100 Subject: [PATCH 12/12] xen/netfront: react properly to failing gnttab_end_foreign_access_ref() When calling gnttab_end_foreign_access_ref() the returned value must be tested and the reaction to that value should be appropriate. In case of failure in xennet_get_responses() the reaction should not be to crash the system, but to disable the network device. The calls in setup_netfront() can be replaced by calls of gnttab_end_foreign_access(). While at it avoid double free of ring pages and grant references via xennet_disconnect_backend() in this case. This is CVE-2022-23042 / part of XSA-396. Reported-by: Demi Marie Obenour Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V2: - avoid double free V3: - remove pointless initializer (Jan Beulich) --- drivers/net/xen-netfront.c | 48 ++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 13926d03418ef..daa4e6106aacc 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -988,7 +988,6 @@ static int xennet_get_responses(struct netfront_queue *queue, struct device *dev = &queue->info->netdev->dev; struct bpf_prog *xdp_prog; struct xdp_buff xdp; - unsigned long ret; int slots = 1; int err = 0; u32 verdict; @@ -1030,8 +1029,13 @@ static int xennet_get_responses(struct netfront_queue *queue, goto next; } - ret = gnttab_end_foreign_access_ref(ref, 0); - BUG_ON(!ret); + if (!gnttab_end_foreign_access_ref(ref, 0)) { + dev_alert(dev, + "Grant still in use by backend domain\n"); + queue->info->broken = true; + dev_alert(dev, "Disabled for further use\n"); + return -EINVAL; + } gnttab_release_grant_reference(&queue->gref_rx_head, ref); @@ -1252,6 +1256,10 @@ static int xennet_poll(struct napi_struct *napi, int budget) &need_xdp_flush); if (unlikely(err)) { + if (queue->info->broken) { + spin_unlock(&queue->rx_lock); + return 0; + } err: while ((skb = __skb_dequeue(&tmpq))) __skb_queue_tail(&errq, skb); @@ -1916,7 +1924,7 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_queue *queue, unsigned int feature_split_evtchn) { struct xen_netif_tx_sring *txs; - struct xen_netif_rx_sring *rxs; + struct xen_netif_rx_sring *rxs = NULL; grant_ref_t gref; int err; @@ -1936,21 +1944,21 @@ static int setup_netfront(struct xenbus_device *dev, err = xenbus_grant_ring(dev, txs, 1, &gref); if (err < 0) - goto grant_tx_ring_fail; + goto fail; queue->tx_ring_ref = gref; rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH); if (!rxs) { err = -ENOMEM; xenbus_dev_fatal(dev, err, "allocating rx ring page"); - goto alloc_rx_ring_fail; + goto fail; } SHARED_RING_INIT(rxs); FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE); err = xenbus_grant_ring(dev, rxs, 1, &gref); if (err < 0) - goto grant_rx_ring_fail; + goto fail; queue->rx_ring_ref = gref; if (feature_split_evtchn) @@ -1963,22 +1971,28 @@ static int setup_netfront(struct xenbus_device *dev, err = setup_netfront_single(queue); if (err) - goto alloc_evtchn_fail; + goto fail; return 0; /* If we fail to setup netfront, it is safe to just revoke access to * granted pages because backend is not accessing it at this point. */ -alloc_evtchn_fail: - gnttab_end_foreign_access_ref(queue->rx_ring_ref, 0); -grant_rx_ring_fail: - free_page((unsigned long)rxs); -alloc_rx_ring_fail: - gnttab_end_foreign_access_ref(queue->tx_ring_ref, 0); -grant_tx_ring_fail: - free_page((unsigned long)txs); -fail: + fail: + if (queue->rx_ring_ref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(queue->rx_ring_ref, 0, + (unsigned long)rxs); + queue->rx_ring_ref = GRANT_INVALID_REF; + } else { + free_page((unsigned long)rxs); + } + if (queue->tx_ring_ref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(queue->tx_ring_ref, 0, + (unsigned long)txs); + queue->tx_ring_ref = GRANT_INVALID_REF; + } else { + free_page((unsigned long)txs); + } return err; }