From b440bde74f043c8ec31081cb59c9a53ade954701 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 10 Sep 2014 13:45:01 -0600 Subject: [PATCH 1/2] PCI: Add pci_ignore_hotplug() to ignore hotplug events for a device Powering off a hot-pluggable device, e.g., with pci_set_power_state(D3cold), normally generates a hot-remove event that unbinds the driver. Some drivers expect to remain bound to a device even while they power it off and back on again. This can be dangerous, because if the device is removed or replaced while it is powered off, the driver doesn't know that anything changed. But some drivers accept that risk. Add pci_ignore_hotplug() for use by drivers that know their device cannot be removed. Using pci_ignore_hotplug() tells the PCI core that hot-plug events for the device should be ignored. The radeon and nouveau drivers use this to switch between a low-power, integrated GPU and a higher-power, higher-performance discrete GPU. They power off the unused GPU, but they want to remain bound to it. This is a reimplementation of f244d8b623da ("ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug") but extends it to work with both acpiphp and pciehp. This fixes a problem where systems with dual GPUs using the radeon drivers become unusable, freezing every few seconds (see bugzillas below). The resume of the radeon device may also fail, e.g., This fixes problems on dual GPU systems where the radeon driver becomes unusable because of problems while suspending the device, as in bug 79701: [drm] radeon: finishing device. radeon 0000:01:00.0: Userspace still has active objects ! radeon 0000:01:00.0: ffff8800cb4ec288 ffff8800cb4ec000 16384 4294967297 force free ... WARNING: CPU: 0 PID: 67 at /home/apw/COD/linux/drivers/gpu/drm/radeon/radeon_gart.c:234 radeon_gart_unbind+0xd2/0xe0 [radeon]() trying to unbind memory from uninitialized GART ! or while resuming it, as in bug 77261: radeon 0000:01:00.0: ring 0 stalled for more than 10158msec radeon 0000:01:00.0: GPU lockup ... radeon 0000:01:00.0: GPU pci config reset pciehp 0000:00:01.0:pcie04: Card not present on Slot(1-1) radeon 0000:01:00.0: GPU reset succeeded, trying to resume *ERROR* radeon: dpm resume failed radeon 0000:01:00.0: Wait for MC idle timedout ! Link: https://bugzilla.kernel.org/show_bug.cgi?id=77261 Link: https://bugzilla.kernel.org/show_bug.cgi?id=79701 Reported-by: Shawn Starr Reported-by: Jose P. Signed-off-by: Bjorn Helgaas Acked-by: Alex Deucher Acked-by: Rajat Jain Acked-by: Rafael J. Wysocki Acked-by: Dave Airlie CC: stable@vger.kernel.org # v3.15+ --- drivers/gpu/drm/nouveau/nouveau_drm.c | 1 + drivers/gpu/drm/radeon/radeon_drv.c | 1 + drivers/pci/hotplug/acpiphp_glue.c | 16 ++++++---------- drivers/pci/hotplug/pciehp_hpc.c | 12 ++++++++++++ include/linux/pci.h | 6 ++++++ 5 files changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 250a5e88c7516..9c3af96a71538 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -627,6 +627,7 @@ int nouveau_pmops_suspend(struct device *dev) pci_save_state(pdev); pci_disable_device(pdev); + pci_ignore_hotplug(pdev); pci_set_power_state(pdev, PCI_D3hot); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 8df888908833f..abbd87adfd75f 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -440,6 +440,7 @@ static int radeon_pmops_runtime_suspend(struct device *dev) ret = radeon_suspend_kms(drm_dev, false, false); pci_save_state(pdev); pci_disable_device(pdev); + pci_ignore_hotplug(pdev); pci_set_power_state(pdev, PCI_D3cold); drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 70741c8c46a0c..6cd5160fc0576 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -560,19 +560,15 @@ static void disable_slot(struct acpiphp_slot *slot) slot->flags &= (~SLOT_ENABLED); } -static bool acpiphp_no_hotplug(struct acpi_device *adev) -{ - return adev && adev->flags.no_hotplug; -} - static bool slot_no_hotplug(struct acpiphp_slot *slot) { - struct acpiphp_func *func; + struct pci_bus *bus = slot->bus; + struct pci_dev *dev; - list_for_each_entry(func, &slot->funcs, sibling) - if (acpiphp_no_hotplug(func_to_acpi_device(func))) + list_for_each_entry(dev, &bus->devices, bus_list) { + if (PCI_SLOT(dev->devfn) == slot->device && dev->ignore_hotplug) return true; - + } return false; } @@ -645,7 +641,7 @@ static void trim_stale_devices(struct pci_dev *dev) status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta); alive = (ACPI_SUCCESS(status) && device_status_valid(sta)) - || acpiphp_no_hotplug(adev); + || dev->ignore_hotplug; } if (!alive) alive = pci_device_is_present(dev); diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 9da84b8b27d8a..5e01ae39ec46a 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -506,6 +506,8 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) { struct controller *ctrl = (struct controller *)dev_id; struct pci_dev *pdev = ctrl_dev(ctrl); + struct pci_bus *subordinate = pdev->subordinate; + struct pci_dev *dev; struct slot *slot = ctrl->slot; u16 detected, intr_loc; @@ -539,6 +541,16 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) wake_up(&ctrl->queue); } + if (subordinate) { + list_for_each_entry(dev, &subordinate->devices, bus_list) { + if (dev->ignore_hotplug) { + ctrl_dbg(ctrl, "ignoring hotplug event %#06x (%s requested no hotplug)\n", + intr_loc, pci_name(dev)); + return IRQ_HANDLED; + } + } + } + if (!(intr_loc & ~PCI_EXP_SLTSTA_CC)) return IRQ_HANDLED; diff --git a/include/linux/pci.h b/include/linux/pci.h index 61978a4608412..96453f9bc8baf 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -303,6 +303,7 @@ struct pci_dev { D3cold, not set for devices powered on/off by the corresponding bridge */ + unsigned int ignore_hotplug:1; /* Ignore hotplug events */ unsigned int d3_delay; /* D3->D0 transition time in ms */ unsigned int d3cold_delay; /* D3cold->D0 transition time in ms */ @@ -1021,6 +1022,11 @@ bool pci_dev_run_wake(struct pci_dev *dev); bool pci_check_pme_status(struct pci_dev *dev); void pci_pme_wakeup_bus(struct pci_bus *bus); +static inline void pci_ignore_hotplug(struct pci_dev *dev) +{ + dev->ignore_hotplug = 1; +} + static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable) { From f91ce35e471ae17552ce7bfe355cfd997e3ad781 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 10 Sep 2014 15:30:08 -0600 Subject: [PATCH 2/2] ACPIPHP / radeon / nouveau: Remove acpi_bus_no_hotplug() Revert parts of f244d8b623da ("ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug"). A previous commit 5493b31f0b55 ("PCI: Add pci_ignore_hotplug() to ignore hotplug events for a device") added equivalent functionality implemented in a different way for both acpiphp and pciehp. Signed-off-by: Bjorn Helgaas Acked-by: Alex Deucher Acked-by: Rafael J. Wysocki Acked-by: Dave Airlie Acked-by: Rajat Jain --- drivers/acpi/bus.c | 10 ---------- drivers/gpu/drm/nouveau/nouveau_acpi.c | 16 ++-------------- drivers/gpu/drm/radeon/radeon_atpx_handler.c | 16 ++-------------- include/acpi/acpi_bus.h | 4 +--- 4 files changed, 5 insertions(+), 41 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 8581f5b84f48b..8b67bd0f6bb5e 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -177,16 +177,6 @@ void acpi_bus_detach_private_data(acpi_handle handle) } EXPORT_SYMBOL_GPL(acpi_bus_detach_private_data); -void acpi_bus_no_hotplug(acpi_handle handle) -{ - struct acpi_device *adev = NULL; - - acpi_bus_get_device(handle, &adev); - if (adev) - adev->flags.no_hotplug = true; -} -EXPORT_SYMBOL_GPL(acpi_bus_no_hotplug); - static void acpi_print_osc_error(acpi_handle handle, struct acpi_osc_context *context, char *error) { diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index 279206997e5cd..622424692b3bd 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -46,7 +46,6 @@ static struct nouveau_dsm_priv { bool dsm_detected; bool optimus_detected; acpi_handle dhandle; - acpi_handle other_handle; acpi_handle rom_handle; } nouveau_dsm_priv; @@ -222,10 +221,9 @@ static int nouveau_dsm_pci_probe(struct pci_dev *pdev) if (!dhandle) return false; - if (!acpi_has_method(dhandle, "_DSM")) { - nouveau_dsm_priv.other_handle = dhandle; + if (!acpi_has_method(dhandle, "_DSM")) return false; - } + if (acpi_check_dsm(dhandle, nouveau_dsm_muid, 0x00000102, 1 << NOUVEAU_DSM_POWER)) retval |= NOUVEAU_DSM_HAS_MUX; @@ -301,16 +299,6 @@ static bool nouveau_dsm_detect(void) printk(KERN_INFO "VGA switcheroo: detected DSM switching method %s handle\n", acpi_method_name); nouveau_dsm_priv.dsm_detected = true; - /* - * On some systems hotplug events are generated for the device - * being switched off when _DSM is executed. They cause ACPI - * hotplug to trigger and attempt to remove the device from - * the system, which causes it to break down. Prevent that from - * happening by setting the no_hotplug flag for the involved - * ACPI device objects. - */ - acpi_bus_no_hotplug(nouveau_dsm_priv.dhandle); - acpi_bus_no_hotplug(nouveau_dsm_priv.other_handle); ret = true; } diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index a9fb0d016d387..8bc7d0bbd3c80 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -33,7 +33,6 @@ static struct radeon_atpx_priv { bool atpx_detected; /* handle for device - and atpx */ acpi_handle dhandle; - acpi_handle other_handle; struct radeon_atpx atpx; } radeon_atpx_priv; @@ -453,10 +452,9 @@ static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev) return false; status = acpi_get_handle(dhandle, "ATPX", &atpx_handle); - if (ACPI_FAILURE(status)) { - radeon_atpx_priv.other_handle = dhandle; + if (ACPI_FAILURE(status)) return false; - } + radeon_atpx_priv.dhandle = dhandle; radeon_atpx_priv.atpx.handle = atpx_handle; return true; @@ -540,16 +538,6 @@ static bool radeon_atpx_detect(void) printk(KERN_INFO "VGA switcheroo: detected switching method %s handle\n", acpi_method_name); radeon_atpx_priv.atpx_detected = true; - /* - * On some systems hotplug events are generated for the device - * being switched off when ATPX is executed. They cause ACPI - * hotplug to trigger and attempt to remove the device from - * the system, which causes it to break down. Prevent that from - * happening by setting the no_hotplug flag for the involved - * ACPI device objects. - */ - acpi_bus_no_hotplug(radeon_atpx_priv.dhandle); - acpi_bus_no_hotplug(radeon_atpx_priv.other_handle); return true; } return false; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index bcfd808b1098e..95c506961a134 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -204,10 +204,9 @@ struct acpi_device_flags { u32 match_driver:1; u32 initialized:1; u32 visited:1; - u32 no_hotplug:1; u32 hotplug_notify:1; u32 is_dock_station:1; - u32 reserved:22; + u32 reserved:23; }; /* File System */ @@ -412,7 +411,6 @@ void acpi_bus_private_data_handler(acpi_handle, void *); int acpi_bus_get_private_data(acpi_handle, void **); int acpi_bus_attach_private_data(acpi_handle, void *); void acpi_bus_detach_private_data(acpi_handle); -void acpi_bus_no_hotplug(acpi_handle handle); extern int acpi_notifier_call_chain(struct acpi_device *, u32, u32); extern int register_acpi_notifier(struct notifier_block *); extern int unregister_acpi_notifier(struct notifier_block *);