From 8310b77b48c5558c140e7a57a702e7819e62f04e Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 25 Feb 2021 16:34:43 +0100 Subject: Xen/gnttab: handle p2m update errors on a per-slot basis Bailing immediately from set_foreign_p2m_mapping() upon a p2m updating error leaves the full batch in an ambiguous state as far as the caller is concerned. Instead flags respective slots as bad, unmapping what was mapped there right away. HYPERVISOR_grant_table_op()'s return value and the individual unmap slots' status fields get used only for a one-time - there's not much we can do in case of a failure. Note that there's no GNTST_enomem or alike, so GNTST_general_error gets used. The map ops' handle fields get overwritten just to be on the safe side. This is part of XSA-367. Cc: Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/96cccf5d-e756-5f53-b91a-ea269bfb9be0@suse.com Signed-off-by: Juergen Gross --- arch/arm/xen/p2m.c | 35 +++++++++++++++++++++++++++++++---- arch/x86/xen/p2m.c | 44 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 72 insertions(+), 7 deletions(-) diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index fd6e3aafe272..acb464547a54 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -93,12 +93,39 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, int i; for (i = 0; i < count; i++) { + struct gnttab_unmap_grant_ref unmap; + int rc; + if (map_ops[i].status) continue; - if (unlikely(!set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT, - map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT))) { - return -ENOMEM; - } + if (likely(set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT, + map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT))) + continue; + + /* + * Signal an error for this slot. This in turn requires + * immediate unmapping. + */ + map_ops[i].status = GNTST_general_error; + unmap.host_addr = map_ops[i].host_addr, + unmap.handle = map_ops[i].handle; + map_ops[i].handle = ~0; + if (map_ops[i].flags & GNTMAP_device_map) + unmap.dev_bus_addr = map_ops[i].dev_bus_addr; + else + unmap.dev_bus_addr = 0; + + /* + * Pre-populate the status field, to be recognizable in + * the log message below. + */ + unmap.status = 1; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + &unmap, 1); + if (rc || unmap.status != GNTST_okay) + pr_err_once("gnttab unmap failed: rc=%d st=%d\n", + rc, unmap.status); } return 0; diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index b5949e5a83ec..9545b8df5315 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -710,6 +710,8 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, for (i = 0; i < count; i++) { unsigned long mfn, pfn; + struct gnttab_unmap_grant_ref unmap[2]; + int rc; /* Do not add to override if the map failed. */ if (map_ops[i].status != GNTST_okay || @@ -727,10 +729,46 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned"); - if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { - ret = -ENOMEM; - goto out; + if (likely(set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) + continue; + + /* + * Signal an error for this slot. This in turn requires + * immediate unmapping. + */ + map_ops[i].status = GNTST_general_error; + unmap[0].host_addr = map_ops[i].host_addr, + unmap[0].handle = map_ops[i].handle; + map_ops[i].handle = ~0; + if (map_ops[i].flags & GNTMAP_device_map) + unmap[0].dev_bus_addr = map_ops[i].dev_bus_addr; + else + unmap[0].dev_bus_addr = 0; + + if (kmap_ops) { + kmap_ops[i].status = GNTST_general_error; + unmap[1].host_addr = kmap_ops[i].host_addr, + unmap[1].handle = kmap_ops[i].handle; + kmap_ops[i].handle = ~0; + if (kmap_ops[i].flags & GNTMAP_device_map) + unmap[1].dev_bus_addr = kmap_ops[i].dev_bus_addr; + else + unmap[1].dev_bus_addr = 0; } + + /* + * Pre-populate both status fields, to be recognizable in + * the log message below. + */ + unmap[0].status = 1; + unmap[1].status = 1; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + unmap, 1 + !!kmap_ops); + if (rc || unmap[0].status != GNTST_okay || + unmap[1].status != GNTST_okay) + pr_err_once("gnttab unmap failed: rc=%d st0=%d st1=%d\n", + rc, unmap[0].status, unmap[1].status); } out: -- cgit v1.2.3 From 2991397d23ec597405b116d96de3813420bdcbc3 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 25 Feb 2021 16:35:15 +0100 Subject: xen-netback: respect gnttab_map_refs()'s return value Commit 3194a1746e8a ("xen-netback: don't "handle" error by BUG()") dropped respective a BUG_ON() without noticing that with this the variable's value wouldn't be consumed anymore. With gnttab_set_map_op() setting all status fields to a non-zero value, in case of an error no slot should have a status of GNTST_okay (zero). This is part of XSA-367. Cc: Reported-by: kernel test robot Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/d933f495-619a-0086-5fb4-1ec3cf81a8fc@suse.com Signed-off-by: Juergen Gross --- drivers/net/xen-netback/netback.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index e5c73f819662..6afb5ca999c2 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1343,11 +1343,21 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget) return 0; gnttab_batch_copy(queue->tx_copy_ops, nr_cops); - if (nr_mops != 0) + if (nr_mops != 0) { ret = gnttab_map_refs(queue->tx_map_ops, NULL, queue->pages_to_map, nr_mops); + if (ret) { + unsigned int i; + + netdev_err(queue->vif->dev, "Map fail: nr %u ret %d\n", + nr_mops, ret); + for (i = 0; i < nr_mops; ++i) + WARN_ON_ONCE(queue->tx_map_ops[i].status == + GNTST_okay); + } + } work_done = xenvif_tx_submit(queue); -- cgit v1.2.3 From 882213990d32fd224340a4533f6318dd152be4b2 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 24 Feb 2021 16:03:08 +0100 Subject: xen: fix p2m size in dom0 for disabled memory hotplug case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 9e2369c06c8a18 ("xen: add helpers to allocate unpopulated memory") foreign mappings are using guest physical addresses allocated via ZONE_DEVICE functionality. This will result in problems for the case of no balloon memory hotplug being configured, as the p2m list will only cover the initial memory size of the domain. Any ZONE_DEVICE allocated address will be outside the p2m range and thus a mapping can't be established with that memory address. Fix that by extending the p2m size for that case. At the same time add a check for a to be created mapping to be within the p2m limits in order to detect errors early. While changing a comment, remove some 32-bit leftovers. This is XSA-369. Fixes: 9e2369c06c8a18 ("xen: add helpers to allocate unpopulated memory") Cc: # 5.9 Reported-by: Marek Marczykowski-Górecki Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross --- arch/x86/include/asm/xen/page.h | 12 ++++++++++++ arch/x86/xen/p2m.c | 10 ++++++---- arch/x86/xen/setup.c | 25 +++---------------------- 3 files changed, 21 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 1a162e559753..7068e4bb057d 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -86,6 +86,18 @@ clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, } #endif +/* + * The maximum amount of extra memory compared to the base size. The + * main scaling factor is the size of struct page. At extreme ratios + * of base:extra, all the base memory can be filled with page + * structures for the extra memory, leaving no space for anything + * else. + * + * 10x seems like a reasonable balance between scaling flexibility and + * leaving a practically usable system. + */ +#define XEN_EXTRA_MEM_RATIO (10) + /* * Helper functions to write or read unsigned long values to/from * memory, when the access may fault. diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 9545b8df5315..a3cc33091f46 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -416,6 +416,9 @@ void __init xen_vmalloc_p2m_tree(void) xen_p2m_last_pfn = xen_max_p2m_pfn; p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE; + if (!p2m_limit && IS_ENABLED(CONFIG_XEN_UNPOPULATED_ALLOC)) + p2m_limit = xen_start_info->nr_pages * XEN_EXTRA_MEM_RATIO; + vm.flags = VM_ALLOC; vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit), PMD_SIZE * PMDS_PER_MID_PAGE); @@ -652,10 +655,9 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) pte_t *ptep; unsigned int level; - if (unlikely(pfn >= xen_p2m_size)) { - BUG_ON(mfn != INVALID_P2M_ENTRY); - return true; - } + /* Only invalid entries allowed above the highest p2m covered frame. */ + if (unlikely(pfn >= xen_p2m_size)) + return mfn == INVALID_P2M_ENTRY; /* * The interface requires atomic updates on p2m elements. diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 7eab14d56369..1a3b75652fa4 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -59,18 +59,6 @@ static struct { } xen_remap_buf __initdata __aligned(PAGE_SIZE); static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY; -/* - * The maximum amount of extra memory compared to the base size. The - * main scaling factor is the size of struct page. At extreme ratios - * of base:extra, all the base memory can be filled with page - * structures for the extra memory, leaving no space for anything - * else. - * - * 10x seems like a reasonable balance between scaling flexibility and - * leaving a practically usable system. - */ -#define EXTRA_MEM_RATIO (10) - static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB); static void __init xen_parse_512gb(void) @@ -790,20 +778,13 @@ char * __init xen_memory_setup(void) extra_pages += max_pages - max_pfn; /* - * Clamp the amount of extra memory to a EXTRA_MEM_RATIO - * factor the base size. On non-highmem systems, the base - * size is the full initial memory allocation; on highmem it - * is limited to the max size of lowmem, so that it doesn't - * get completely filled. + * Clamp the amount of extra memory to a XEN_EXTRA_MEM_RATIO + * factor the base size. * * Make sure we have no memory above max_pages, as this area * isn't handled by the p2m management. - * - * In principle there could be a problem in lowmem systems if - * the initial memory is also very large with respect to - * lowmem, but we won't try to deal with that here. */ - extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), + extra_pages = min3(XEN_EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), extra_pages, max_pages - max_pfn); i = 0; addr = xen_e820_table.entries[0].addr; -- cgit v1.2.3