Merge branch 'lorenzo/pci/hv'

- fix Hyper-V bus registration failure caused by domain/serial number
    confusion (Sridhar Pitchai)

  - improve Hyper-V refcounting and coding style (Stephen Hemminger)

  - avoid potential Hyper-V hang waiting for a response that will never
    come (Dexuan Cui)

* lorenzo/pci/hv:
  PCI: hv: Do not wait forever on a device that has disappeared
  PCI: hv: Use list_for_each_entry()
  PCI: hv: Convert remove_lock to refcount
  PCI: hv: Remove unused reason for refcount handler
  PCI: hv: Make sure the bus domain is really unique
This commit is contained in:
Bjorn Helgaas 2018-06-06 16:10:35 -05:00
commit 741f8e7ecc

View File

@ -433,7 +433,7 @@ enum hv_pcibus_state {
struct hv_pcibus_device {
struct pci_sysdata sysdata;
enum hv_pcibus_state state;
atomic_t remove_lock;
refcount_t remove_lock;
struct hv_device *hdev;
resource_size_t low_mmio_space;
resource_size_t high_mmio_space;
@ -488,17 +488,6 @@ enum hv_pcichild_state {
hv_pcichild_maximum
};
enum hv_pcidev_ref_reason {
hv_pcidev_ref_invalid = 0,
hv_pcidev_ref_initial,
hv_pcidev_ref_by_slot,
hv_pcidev_ref_packet,
hv_pcidev_ref_pnp,
hv_pcidev_ref_childlist,
hv_pcidev_irqdata,
hv_pcidev_ref_max
};
struct hv_pci_dev {
/* List protected by pci_rescan_remove_lock */
struct list_head list_entry;
@ -548,14 +537,41 @@ static void hv_pci_generic_compl(void *context, struct pci_response *resp,
static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus,
u32 wslot);
static void get_pcichild(struct hv_pci_dev *hv_pcidev,
enum hv_pcidev_ref_reason reason);
static void put_pcichild(struct hv_pci_dev *hv_pcidev,
enum hv_pcidev_ref_reason reason);
static void get_pcichild(struct hv_pci_dev *hpdev)
{
refcount_inc(&hpdev->refs);
}
static void put_pcichild(struct hv_pci_dev *hpdev)
{
if (refcount_dec_and_test(&hpdev->refs))
kfree(hpdev);
}
static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
/*
* There is no good way to get notified from vmbus_onoffer_rescind(),
* so let's use polling here, since this is not a hot path.
*/
static int wait_for_response(struct hv_device *hdev,
struct completion *comp)
{
while (true) {
if (hdev->channel->rescind) {
dev_warn_once(&hdev->device, "The device is gone.\n");
return -ENODEV;
}
if (wait_for_completion_timeout(comp, HZ / 10))
break;
}
return 0;
}
/**
* devfn_to_wslot() - Convert from Linux PCI slot to Windows
* @devfn: The Linux representation of PCI slot
@ -762,7 +778,7 @@ static int hv_pcifront_read_config(struct pci_bus *bus, unsigned int devfn,
_hv_pcifront_read_config(hpdev, where, size, val);
put_pcichild(hpdev, hv_pcidev_ref_by_slot);
put_pcichild(hpdev);
return PCIBIOS_SUCCESSFUL;
}
@ -790,7 +806,7 @@ static int hv_pcifront_write_config(struct pci_bus *bus, unsigned int devfn,
_hv_pcifront_write_config(hpdev, where, size, val);
put_pcichild(hpdev, hv_pcidev_ref_by_slot);
put_pcichild(hpdev);
return PCIBIOS_SUCCESSFUL;
}
@ -856,7 +872,7 @@ static void hv_msi_free(struct irq_domain *domain, struct msi_domain_info *info,
}
hv_int_desc_free(hpdev, int_desc);
put_pcichild(hpdev, hv_pcidev_ref_by_slot);
put_pcichild(hpdev);
}
static int hv_set_affinity(struct irq_data *data, const struct cpumask *dest,
@ -1186,13 +1202,13 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
msg->address_lo = comp.int_desc.address & 0xffffffff;
msg->data = comp.int_desc.data;
put_pcichild(hpdev, hv_pcidev_ref_by_slot);
put_pcichild(hpdev);
return;
free_int_desc:
kfree(int_desc);
drop_reference:
put_pcichild(hpdev, hv_pcidev_ref_by_slot);
put_pcichild(hpdev);
return_null_message:
msg->address_hi = 0;
msg->address_lo = 0;
@ -1283,7 +1299,6 @@ static u64 get_bar_size(u64 bar_val)
*/
static void survey_child_resources(struct hv_pcibus_device *hbus)
{
struct list_head *iter;
struct hv_pci_dev *hpdev;
resource_size_t bar_size = 0;
unsigned long flags;
@ -1309,8 +1324,7 @@ static void survey_child_resources(struct hv_pcibus_device *hbus)
* for a child device are a power of 2 in size and aligned in memory,
* so it's sufficient to just add them up without tracking alignment.
*/
list_for_each(iter, &hbus->children) {
hpdev = container_of(iter, struct hv_pci_dev, list_entry);
list_for_each_entry(hpdev, &hbus->children, list_entry) {
for (i = 0; i < 6; i++) {
if (hpdev->probed_bar[i] & PCI_BASE_ADDRESS_SPACE_IO)
dev_err(&hbus->hdev->device,
@ -1363,7 +1377,6 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus)
resource_size_t low_base = 0;
resource_size_t bar_size;
struct hv_pci_dev *hpdev;
struct list_head *iter;
unsigned long flags;
u64 bar_val;
u32 command;
@ -1385,9 +1398,7 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus)
/* Pick addresses for the BARs. */
do {
list_for_each(iter, &hbus->children) {
hpdev = container_of(iter, struct hv_pci_dev,
list_entry);
list_for_each_entry(hpdev, &hbus->children, list_entry) {
for (i = 0; i < 6; i++) {
bar_val = hpdev->probed_bar[i];
if (bar_val == 0)
@ -1508,19 +1519,6 @@ static void q_resource_requirements(void *context, struct pci_response *resp,
complete(&completion->host_event);
}
static void get_pcichild(struct hv_pci_dev *hpdev,
enum hv_pcidev_ref_reason reason)
{
refcount_inc(&hpdev->refs);
}
static void put_pcichild(struct hv_pci_dev *hpdev,
enum hv_pcidev_ref_reason reason)
{
if (refcount_dec_and_test(&hpdev->refs))
kfree(hpdev);
}
/**
* new_pcichild_device() - Create a new child device
* @hbus: The internal struct tracking this root PCI bus.
@ -1568,24 +1566,14 @@ static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus,
if (ret)
goto error;
wait_for_completion(&comp_pkt.host_event);
if (wait_for_response(hbus->hdev, &comp_pkt.host_event))
goto error;
hpdev->desc = *desc;
refcount_set(&hpdev->refs, 1);
get_pcichild(hpdev, hv_pcidev_ref_childlist);
get_pcichild(hpdev);
spin_lock_irqsave(&hbus->device_list_lock, flags);
/*
* When a device is being added to the bus, we set the PCI domain
* number to be the device serial number, which is non-zero and
* unique on the same VM. The serial numbers start with 1, and
* increase by 1 for each device. So device names including this
* can have shorter names than based on the bus instance UUID.
* Only the first device serial number is used for domain, so the
* domain number will not change after the first device is added.
*/
if (list_empty(&hbus->children))
hbus->sysdata.domain = desc->ser;
list_add_tail(&hpdev->list_entry, &hbus->children);
spin_unlock_irqrestore(&hbus->device_list_lock, flags);
return hpdev;
@ -1618,7 +1606,7 @@ static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus,
list_for_each_entry(iter, &hbus->children, list_entry) {
if (iter->desc.win_slot.slot == wslot) {
hpdev = iter;
get_pcichild(hpdev, hv_pcidev_ref_by_slot);
get_pcichild(hpdev);
break;
}
}
@ -1654,7 +1642,6 @@ static void pci_devices_present_work(struct work_struct *work)
{
u32 child_no;
bool found;
struct list_head *iter;
struct pci_function_description *new_desc;
struct hv_pci_dev *hpdev;
struct hv_pcibus_device *hbus;
@ -1691,10 +1678,8 @@ static void pci_devices_present_work(struct work_struct *work)
/* First, mark all existing children as reported missing. */
spin_lock_irqsave(&hbus->device_list_lock, flags);
list_for_each(iter, &hbus->children) {
hpdev = container_of(iter, struct hv_pci_dev,
list_entry);
hpdev->reported_missing = true;
list_for_each_entry(hpdev, &hbus->children, list_entry) {
hpdev->reported_missing = true;
}
spin_unlock_irqrestore(&hbus->device_list_lock, flags);
@ -1704,11 +1689,8 @@ static void pci_devices_present_work(struct work_struct *work)
new_desc = &dr->func[child_no];
spin_lock_irqsave(&hbus->device_list_lock, flags);
list_for_each(iter, &hbus->children) {
hpdev = container_of(iter, struct hv_pci_dev,
list_entry);
if ((hpdev->desc.win_slot.slot ==
new_desc->win_slot.slot) &&
list_for_each_entry(hpdev, &hbus->children, list_entry) {
if ((hpdev->desc.win_slot.slot == new_desc->win_slot.slot) &&
(hpdev->desc.v_id == new_desc->v_id) &&
(hpdev->desc.d_id == new_desc->d_id) &&
(hpdev->desc.ser == new_desc->ser)) {
@ -1730,12 +1712,10 @@ static void pci_devices_present_work(struct work_struct *work)
spin_lock_irqsave(&hbus->device_list_lock, flags);
do {
found = false;
list_for_each(iter, &hbus->children) {
hpdev = container_of(iter, struct hv_pci_dev,
list_entry);
list_for_each_entry(hpdev, &hbus->children, list_entry) {
if (hpdev->reported_missing) {
found = true;
put_pcichild(hpdev, hv_pcidev_ref_childlist);
put_pcichild(hpdev);
list_move_tail(&hpdev->list_entry, &removed);
break;
}
@ -1748,7 +1728,7 @@ static void pci_devices_present_work(struct work_struct *work)
hpdev = list_first_entry(&removed, struct hv_pci_dev,
list_entry);
list_del(&hpdev->list_entry);
put_pcichild(hpdev, hv_pcidev_ref_initial);
put_pcichild(hpdev);
}
switch (hbus->state) {
@ -1883,8 +1863,8 @@ static void hv_eject_device_work(struct work_struct *work)
sizeof(*ejct_pkt), (unsigned long)&ctxt.pkt,
VM_PKT_DATA_INBAND, 0);
put_pcichild(hpdev, hv_pcidev_ref_childlist);
put_pcichild(hpdev, hv_pcidev_ref_pnp);
put_pcichild(hpdev);
put_pcichild(hpdev);
put_hvpcibus(hpdev->hbus);
}
@ -1899,7 +1879,7 @@ static void hv_eject_device_work(struct work_struct *work)
static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
{
hpdev->state = hv_pcichild_ejecting;
get_pcichild(hpdev, hv_pcidev_ref_pnp);
get_pcichild(hpdev);
INIT_WORK(&hpdev->wrk, hv_eject_device_work);
get_hvpcibus(hpdev->hbus);
queue_work(hpdev->hbus->wq, &hpdev->wrk);
@ -1999,8 +1979,7 @@ static void hv_pci_onchannelcallback(void *context)
dev_message->wslot.slot);
if (hpdev) {
hv_pci_eject_device(hpdev);
put_pcichild(hpdev,
hv_pcidev_ref_by_slot);
put_pcichild(hpdev);
}
break;
@ -2069,15 +2048,16 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
sizeof(struct pci_version_request),
(unsigned long)pkt, VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (!ret)
ret = wait_for_response(hdev, &comp_pkt.host_event);
if (ret) {
dev_err(&hdev->device,
"PCI Pass-through VSP failed sending version reqquest: %#x",
"PCI Pass-through VSP failed to request version: %d",
ret);
goto exit;
}
wait_for_completion(&comp_pkt.host_event);
if (comp_pkt.completion_status >= 0) {
pci_protocol_version = pci_protocol_versions[i];
dev_info(&hdev->device,
@ -2286,11 +2266,12 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
ret = vmbus_sendpacket(hdev->channel, d0_entry, sizeof(*d0_entry),
(unsigned long)pkt, VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (!ret)
ret = wait_for_response(hdev, &comp_pkt.host_event);
if (ret)
goto exit;
wait_for_completion(&comp_pkt.host_event);
if (comp_pkt.completion_status < 0) {
dev_err(&hdev->device,
"PCI Pass-through VSP failed D0 Entry with status %x\n",
@ -2330,11 +2311,10 @@ static int hv_pci_query_relations(struct hv_device *hdev)
ret = vmbus_sendpacket(hdev->channel, &message, sizeof(message),
0, VM_PKT_DATA_INBAND, 0);
if (ret)
return ret;
if (!ret)
ret = wait_for_response(hdev, &comp);
wait_for_completion(&comp);
return 0;
return ret;
}
/**
@ -2398,17 +2378,17 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
PCI_RESOURCES_ASSIGNED2;
res_assigned2->wslot.slot = hpdev->desc.win_slot.slot;
}
put_pcichild(hpdev, hv_pcidev_ref_by_slot);
put_pcichild(hpdev);
ret = vmbus_sendpacket(hdev->channel, &pkt->message,
size_res, (unsigned long)pkt,
VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (!ret)
ret = wait_for_response(hdev, &comp_pkt.host_event);
if (ret)
break;
wait_for_completion(&comp_pkt.host_event);
if (comp_pkt.completion_status < 0) {
ret = -EPROTO;
dev_err(&hdev->device,
@ -2446,7 +2426,7 @@ static int hv_send_resources_released(struct hv_device *hdev)
pkt.message_type.type = PCI_RESOURCES_RELEASED;
pkt.wslot.slot = hpdev->desc.win_slot.slot;
put_pcichild(hpdev, hv_pcidev_ref_by_slot);
put_pcichild(hpdev);
ret = vmbus_sendpacket(hdev->channel, &pkt, sizeof(pkt), 0,
VM_PKT_DATA_INBAND, 0);
@ -2459,12 +2439,12 @@ static int hv_send_resources_released(struct hv_device *hdev)
static void get_hvpcibus(struct hv_pcibus_device *hbus)
{
atomic_inc(&hbus->remove_lock);
refcount_inc(&hbus->remove_lock);
}
static void put_hvpcibus(struct hv_pcibus_device *hbus)
{
if (atomic_dec_and_test(&hbus->remove_lock))
if (refcount_dec_and_test(&hbus->remove_lock))
complete(&hbus->remove_event);
}
@ -2508,7 +2488,7 @@ static int hv_pci_probe(struct hv_device *hdev,
hdev->dev_instance.b[8] << 8;
hbus->hdev = hdev;
atomic_inc(&hbus->remove_lock);
refcount_set(&hbus->remove_lock, 1);
INIT_LIST_HEAD(&hbus->children);
INIT_LIST_HEAD(&hbus->dr_list);
INIT_LIST_HEAD(&hbus->resources_for_children);