From 61cf16d8bd38c3dc52033ea75d5b1f8368514a17 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 16 Dec 2013 15:14:31 -0700 Subject: PCI: Add pci_try_reset_function(), pci_try_reset_slot(), pci_try_reset_bus() When doing a function/slot/bus reset PCI grabs the device_lock for each device to block things like suspend and driver probes, but call paths exist where this lock may already be held. This creates an opportunity for deadlock. For instance, vfio allows userspace to issue resets so long as it owns the device(s). If a driver unbind .remove callback races with userspace issuing a reset, we have a deadlock as userspace gets stuck waiting on device_lock while another thread has device_lock and waits for .remove to complete. To resolve this, we can make a version of the reset interfaces which use trylock. With this, we can safely attempt a reset and return error to userspace if there is contention. [bhelgaas: the deadlock happens when A (userspace) has a file descriptor for the device, and B waits in this path: driver_detach device_lock # take device_lock __device_release_driver pci_device_remove # pci_bus_type.remove vfio_pci_remove # pci_driver .remove vfio_del_group_dev wait_event(vfio.release_q, !vfio_dev_present) # wait (holding device_lock) Now B is stuck until A gives up the file descriptor. If A tries to acquire device_lock for any reason, we deadlock because A is waiting for B to release the lock, and B is waiting for A to release the file descriptor.] Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 155 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 3 + 2 files changed, 158 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 1cbd590cf1d1..8386367814ee 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3250,6 +3250,18 @@ static void pci_dev_lock(struct pci_dev *dev) device_lock(&dev->dev); } +/* Return 1 on successful lock, 0 on contention */ +static int pci_dev_trylock(struct pci_dev *dev) +{ + if (pci_cfg_access_trylock(dev)) { + if (device_trylock(&dev->dev)) + return 1; + pci_cfg_access_unlock(dev); + } + + return 0; +} + static void pci_dev_unlock(struct pci_dev *dev) { device_unlock(&dev->dev); @@ -3393,6 +3405,34 @@ int pci_reset_function(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_reset_function); +/** + * pci_try_reset_function - quiesce and reset a PCI device function + * @dev: PCI device to reset + * + * Same as above, except return -EAGAIN if unable to lock device. + */ +int pci_try_reset_function(struct pci_dev *dev) +{ + int rc; + + rc = pci_dev_reset(dev, 1); + if (rc) + return rc; + + pci_dev_save_and_disable(dev); + + if (pci_dev_trylock(dev)) { + rc = __pci_dev_reset(dev, 0); + pci_dev_unlock(dev); + } else + rc = -EAGAIN; + + pci_dev_restore(dev); + + return rc; +} +EXPORT_SYMBOL_GPL(pci_try_reset_function); + /* Lock devices from the top of the tree down */ static void pci_bus_lock(struct pci_bus *bus) { @@ -3417,6 +3457,32 @@ static void pci_bus_unlock(struct pci_bus *bus) } } +/* Return 1 on successful lock, 0 on contention */ +static int pci_bus_trylock(struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + if (!pci_dev_trylock(dev)) + goto unlock; + if (dev->subordinate) { + if (!pci_bus_trylock(dev->subordinate)) { + pci_dev_unlock(dev); + goto unlock; + } + } + } + return 1; + +unlock: + list_for_each_entry_continue_reverse(dev, &bus->devices, bus_list) { + if (dev->subordinate) + pci_bus_unlock(dev->subordinate); + pci_dev_unlock(dev); + } + return 0; +} + /* Lock devices from the top of the tree down */ static void pci_slot_lock(struct pci_slot *slot) { @@ -3445,6 +3511,37 @@ static void pci_slot_unlock(struct pci_slot *slot) } } +/* Return 1 on successful lock, 0 on contention */ +static int pci_slot_trylock(struct pci_slot *slot) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &slot->bus->devices, bus_list) { + if (!dev->slot || dev->slot != slot) + continue; + if (!pci_dev_trylock(dev)) + goto unlock; + if (dev->subordinate) { + if (!pci_bus_trylock(dev->subordinate)) { + pci_dev_unlock(dev); + goto unlock; + } + } + } + return 1; + +unlock: + list_for_each_entry_continue_reverse(dev, + &slot->bus->devices, bus_list) { + if (!dev->slot || dev->slot != slot) + continue; + if (dev->subordinate) + pci_bus_unlock(dev->subordinate); + pci_dev_unlock(dev); + } + return 0; +} + /* Save and disable devices from the top of the tree down */ static void pci_bus_save_and_disable(struct pci_bus *bus) { @@ -3568,6 +3665,35 @@ int pci_reset_slot(struct pci_slot *slot) } EXPORT_SYMBOL_GPL(pci_reset_slot); +/** + * pci_try_reset_slot - Try to reset a PCI slot + * @slot: PCI slot to reset + * + * Same as above except return -EAGAIN if the slot cannot be locked + */ +int pci_try_reset_slot(struct pci_slot *slot) +{ + int rc; + + rc = pci_slot_reset(slot, 1); + if (rc) + return rc; + + pci_slot_save_and_disable(slot); + + if (pci_slot_trylock(slot)) { + might_sleep(); + rc = pci_reset_hotplug_slot(slot->hotplug, 0); + pci_slot_unlock(slot); + } else + rc = -EAGAIN; + + pci_slot_restore(slot); + + return rc; +} +EXPORT_SYMBOL_GPL(pci_try_reset_slot); + static int pci_bus_reset(struct pci_bus *bus, int probe) { if (!bus->self) @@ -3626,6 +3752,35 @@ int pci_reset_bus(struct pci_bus *bus) } EXPORT_SYMBOL_GPL(pci_reset_bus); +/** + * pci_try_reset_bus - Try to reset a PCI bus + * @bus: top level PCI bus to reset + * + * Same as above except return -EAGAIN if the bus cannot be locked + */ +int pci_try_reset_bus(struct pci_bus *bus) +{ + int rc; + + rc = pci_bus_reset(bus, 1); + if (rc) + return rc; + + pci_bus_save_and_disable(bus); + + if (pci_bus_trylock(bus)) { + might_sleep(); + pci_reset_bridge_secondary_bus(bus->self); + pci_bus_unlock(bus); + } else + rc = -EAGAIN; + + pci_bus_restore(bus); + + return rc; +} +EXPORT_SYMBOL_GPL(pci_try_reset_bus); + /** * pcix_get_max_mmrbc - get PCI-X maximum designed memory read byte count * @dev: PCI device to query diff --git a/include/linux/pci.h b/include/linux/pci.h index 9e3ec8b951b7..d21be0343865 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -948,10 +948,13 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed, int __pci_reset_function(struct pci_dev *dev); int __pci_reset_function_locked(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); +int pci_try_reset_function(struct pci_dev *dev); int pci_probe_reset_slot(struct pci_slot *slot); int pci_reset_slot(struct pci_slot *slot); +int pci_try_reset_slot(struct pci_slot *slot); int pci_probe_reset_bus(struct pci_bus *bus); int pci_reset_bus(struct pci_bus *bus); +int pci_try_reset_bus(struct pci_bus *bus); void pci_reset_bridge_secondary_bus(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); -- cgit v1.2.3 From 890ed578df82f5b7b5a874f9f2fa4f117305df5f Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 14 Jan 2014 20:45:09 -0700 Subject: vfio-pci: Use pci "try" reset interface PCI resets will attempt to take the device_lock for any device to be reset. This is a problem if that lock is already held, for instance in the device remove path. It's not sufficient to simply kill the user process or skip the reset if called after .remove as a race could result in the same deadlock. Instead, we handle all resets as "best effort" using the PCI "try" reset interfaces. This prevents the user from being able to induce a deadlock by triggering a reset. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- drivers/vfio/pci/vfio_pci.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 6ab71b9fcf8d..2319d206f630 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -139,25 +139,14 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); /* - * Careful, device_lock may already be held. This is the case if - * a driver unbind is blocked. Try to get the locks ourselves to - * prevent a deadlock. + * Try to reset the device. The success of this is dependent on + * being able to lock the device, which is not always possible. */ if (vdev->reset_works) { - bool reset_done = false; - - if (pci_cfg_access_trylock(pdev)) { - if (device_trylock(&pdev->dev)) { - __pci_reset_function_locked(pdev); - reset_done = true; - device_unlock(&pdev->dev); - } - pci_cfg_access_unlock(pdev); - } - - if (!reset_done) - pr_warn("%s: Unable to acquire locks for reset of %s\n", - __func__, dev_name(&pdev->dev)); + int ret = pci_try_reset_function(pdev); + if (ret) + pr_warn("%s: Failed to reset device %s (%d)\n", + __func__, dev_name(&pdev->dev), ret); } pci_restore_state(pdev); @@ -514,7 +503,7 @@ static long vfio_pci_ioctl(void *device_data, } else if (cmd == VFIO_DEVICE_RESET) { return vdev->reset_works ? - pci_reset_function(vdev->pdev) : -EINVAL; + pci_try_reset_function(vdev->pdev) : -EINVAL; } else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) { struct vfio_pci_hot_reset_info hdr; @@ -684,8 +673,8 @@ reset_info_exit: &info, slot); if (!ret) /* User has access, do the reset */ - ret = slot ? pci_reset_slot(vdev->pdev->slot) : - pci_reset_bus(vdev->pdev->bus); + ret = slot ? pci_try_reset_slot(vdev->pdev->slot) : + pci_try_reset_bus(vdev->pdev->bus); hot_reset_release: for (i--; i >= 0; i--) -- cgit v1.2.3