From 6b5e7d9ef793c55d72679f058514f33c4258f286 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 15 Apr 2012 11:27:12 +0200 Subject: [PATCH 1/7] xen/grant-table: add error-handling code on failure of gnttab_resume Jump to the label ini_nomem as done on the failure of the page allocations above. The code at ini_nomem is modified to accommodate different return values. Signed-off-by: Julia Lawall Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 1cd94daa71db..f805918a993f 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -1026,6 +1026,7 @@ int gnttab_init(void) int i; unsigned int max_nr_glist_frames, nr_glist_frames; unsigned int nr_init_grefs; + int ret; nr_grant_frames = 1; boot_max_nr_grant_frames = __max_nr_grant_frames(); @@ -1044,12 +1045,16 @@ int gnttab_init(void) nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP; for (i = 0; i < nr_glist_frames; i++) { gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL); - if (gnttab_list[i] == NULL) + if (gnttab_list[i] == NULL) { + ret = -ENOMEM; goto ini_nomem; + } } - if (gnttab_resume() < 0) - return -ENODEV; + if (gnttab_resume() < 0) { + ret = -ENODEV; + goto ini_nomem; + } nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; @@ -1067,7 +1072,7 @@ int gnttab_init(void) for (i--; i >= 0; i--) free_page((unsigned long)gnttab_list[i]); kfree(gnttab_list); - return -ENOMEM; + return ret; } EXPORT_SYMBOL_GPL(gnttab_init); From e8e937be971d706061dc56220ff3605ab77622a7 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 3 Apr 2012 18:05:47 +0100 Subject: [PATCH 2/7] xen/gntdev: do not set VM_PFNMAP Since we are using the m2p_override we do have struct pages corresponding to the user vma mmap'ed by gntdev. Removing the VM_PFNMAP flag makes get_user_pages work on that vma. An example test case would be using a Xen userspace block backend (QDISK) on a file on NFS using O_DIRECT. CC: stable@kernel.org Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/gntdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 99d8151c824a..1ffd03bf8e10 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -722,7 +722,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) vma->vm_flags |= VM_RESERVED|VM_DONTEXPAND; if (use_ptemod) - vma->vm_flags |= VM_DONTCOPY|VM_PFNMAP; + vma->vm_flags |= VM_DONTCOPY; vma->vm_private_data = map; From b960d6c43a63ebd2d8518b328da3816b833ee8cc Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 27 Mar 2012 14:52:44 +0100 Subject: [PATCH 3/7] xen/p2m: m2p_find_override: use list_for_each_entry_safe Use list_for_each_entry_safe and remove the spin_lock acquisition in m2p_find_override. Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/p2m.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 1b267e75158d..7ed8cc3434c5 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -809,21 +809,17 @@ struct page *m2p_find_override(unsigned long mfn) { unsigned long flags; struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)]; - struct page *p, *ret; + struct page *p, *t, *ret; ret = NULL; - spin_lock_irqsave(&m2p_override_lock, flags); - - list_for_each_entry(p, bucket, lru) { + list_for_each_entry_safe(p, t, bucket, lru) { if (page_private(p) == mfn) { ret = p; break; } } - spin_unlock_irqrestore(&m2p_override_lock, flags); - return ret; } From a71e23d9925517e609dfcb72b5874f33cdb0d2ad Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 16 Apr 2012 21:55:04 -0400 Subject: [PATCH 4/7] xen/blkback: Fix warning error. drivers/block/xen-blkback/xenbus.c: In function 'xen_blkbk_discard': drivers/block/xen-blkback/xenbus.c:419:4: warning: passing argument 1 of 'dev_warn' makes pointer from integer without a cast +[enabled by default] include/linux/device.h:894:5: note: expected 'const struct device *' but argument is of type 'long int' It is unclear how that mistake made it in. It surely is wrong. Acked-by: Jens Axboe Reported-by: Stephen Rothwell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 89860f34a7ec..4f66171c6683 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -416,7 +416,7 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info "discard-secure", "%d", blkif->vbd.discard_secure); if (err) { - dev_warn(dev-dev, "writing discard-secure (%d)", err); + dev_warn(&dev->dev, "writing discard-secure (%d)", err); return; } } From 3066616ce23aad5719c23a0f21f32676402cb44b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 17 Apr 2012 22:21:38 -0400 Subject: [PATCH 5/7] xen/xenbus: Add quirk to deal with misconfigured backends. A rather annoying and common case is when booting a PVonHVM guest and exposing the PV KBD and PV VFB - as broken toolstacks don't always initialize the backends correctly. Normally The HVM guest is using the VGA driver and the emulated keyboard for this (though upstream version of QEMU implements PV KBD, but still uses a VGA driver). We provide a very basic two-stage wait mechanism - where we wait for 30 seconds for all devices, and then for 270 for all them except the two mentioned. That allows us to wait for the essential devices, like network or disk for the full 6 minutes. To trigger this, put this in your guest config: vfb = [ 'vnc=1, vnclisten=0.0.0.0 ,vncunused=1'] instead of this: vnc=1 vnclisten="0.0.0.0" CC: stable@kernel.org Acked-by: Stefano Stabellini [v3: Split delay in non-essential (30 seconds) and essential devices per Ian and Stefano suggestion] [v4: Added comments per Stefano suggestion] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_probe_frontend.c | 67 +++++++++++++++++----- 1 file changed, 52 insertions(+), 15 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index f20c5f178b40..a31b54d48839 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -135,7 +135,7 @@ static int read_backend_details(struct xenbus_device *xendev) return xenbus_read_otherend_details(xendev, "backend-id", "backend"); } -static int is_device_connecting(struct device *dev, void *data) +static int is_device_connecting(struct device *dev, void *data, bool ignore_nonessential) { struct xenbus_device *xendev = to_xenbus_device(dev); struct device_driver *drv = data; @@ -152,16 +152,41 @@ static int is_device_connecting(struct device *dev, void *data) if (drv && (dev->driver != drv)) return 0; + if (ignore_nonessential) { + /* With older QEMU, for PVonHVM guests the guest config files + * could contain: vfb = [ 'vnc=1, vnclisten=0.0.0.0'] + * which is nonsensical as there is no PV FB (there can be + * a PVKB) running as HVM guest. */ + + if ((strncmp(xendev->nodename, "device/vkbd", 11) == 0)) + return 0; + + if ((strncmp(xendev->nodename, "device/vfb", 10) == 0)) + return 0; + } xendrv = to_xenbus_driver(dev->driver); return (xendev->state < XenbusStateConnected || (xendev->state == XenbusStateConnected && xendrv->is_ready && !xendrv->is_ready(xendev))); } +static int essential_device_connecting(struct device *dev, void *data) +{ + return is_device_connecting(dev, data, true /* ignore PV[KBB+FB] */); +} +static int non_essential_device_connecting(struct device *dev, void *data) +{ + return is_device_connecting(dev, data, false); +} -static int exists_connecting_device(struct device_driver *drv) +static int exists_essential_connecting_device(struct device_driver *drv) { return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, - is_device_connecting); + essential_device_connecting); +} +static int exists_non_essential_connecting_device(struct device_driver *drv) +{ + return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, + non_essential_device_connecting); } static int print_device_status(struct device *dev, void *data) @@ -192,6 +217,23 @@ static int print_device_status(struct device *dev, void *data) /* We only wait for device setup after most initcalls have run. */ static int ready_to_wait_for_devices; +static bool wait_loop(unsigned long start, unsigned int max_delay, + unsigned int *seconds_waited) +{ + if (time_after(jiffies, start + (*seconds_waited+5)*HZ)) { + if (!*seconds_waited) + printk(KERN_WARNING "XENBUS: Waiting for " + "devices to initialise: "); + *seconds_waited += 5; + printk("%us...", max_delay - *seconds_waited); + if (*seconds_waited == max_delay) + return true; + } + + schedule_timeout_interruptible(HZ/10); + + return false; +} /* * On a 5-minute timeout, wait for all devices currently configured. We need * to do this to guarantee that the filesystems and / or network devices @@ -215,19 +257,14 @@ static void wait_for_devices(struct xenbus_driver *xendrv) if (!ready_to_wait_for_devices || !xen_domain()) return; - while (exists_connecting_device(drv)) { - if (time_after(jiffies, start + (seconds_waited+5)*HZ)) { - if (!seconds_waited) - printk(KERN_WARNING "XENBUS: Waiting for " - "devices to initialise: "); - seconds_waited += 5; - printk("%us...", 300 - seconds_waited); - if (seconds_waited == 300) - break; - } + while (exists_non_essential_connecting_device(drv)) + if (wait_loop(start, 30, &seconds_waited)) + break; - schedule_timeout_interruptible(HZ/10); - } + /* Skips PVKB and PVFB check.*/ + while (exists_essential_connecting_device(drv)) + if (wait_loop(start, 270, &seconds_waited)) + break; if (seconds_waited) printk("\n"); From 186bab1ce04f99153b7eeb3348438b654c24c24b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 17 Apr 2012 14:35:49 -0400 Subject: [PATCH 6/7] xen/resume: Fix compile warnings. linux/drivers/xen/manage.c: In function 'do_suspend': linux/drivers/xen/manage.c:160:5: warning: 'si.cancelled' may be used uninitialized in this function Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/manage.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 9e14ae6cd49c..412b96cc5305 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -132,6 +132,7 @@ static void do_suspend(void) err = dpm_suspend_end(PMSG_FREEZE); if (err) { printk(KERN_ERR "dpm_suspend_end failed: %d\n", err); + si.cancelled = 0; goto out_resume; } From 3d81acb1cdb242378a1acb3eb1bc28c6bb5895f1 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 20 Apr 2012 11:50:30 -0400 Subject: [PATCH 7/7] Revert "xen/p2m: m2p_find_override: use list_for_each_entry_safe" This reverts commit b960d6c43a63ebd2d8518b328da3816b833ee8cc. If we have another thread (very likely) touched the list, we end up hitting a problem "that the next element is wrong because we should be able to cope with that. The problem is that the next->next pointer would be set LIST_POISON1. " (Stefano's comment on the patch). Reverting for now. Suggested-by: Dan Carpenter Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/p2m.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 7ed8cc3434c5..1b267e75158d 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -809,17 +809,21 @@ struct page *m2p_find_override(unsigned long mfn) { unsigned long flags; struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)]; - struct page *p, *t, *ret; + struct page *p, *ret; ret = NULL; - list_for_each_entry_safe(p, t, bucket, lru) { + spin_lock_irqsave(&m2p_override_lock, flags); + + list_for_each_entry(p, bucket, lru) { if (page_private(p) == mfn) { ret = p; break; } } + spin_unlock_irqrestore(&m2p_override_lock, flags); + return ret; }