linux/arch/x86/pci/acpi.c
Yinghai Lu 871d5f8dd0 x86: get mp_bus_to_node early
Currently, on an amd k8 system with multi ht chains, the numa_node of
pci devices under /sys/devices/pci0000:80/* is always 0, even if that
chain is on node 1 or 2 or 3.

Workaround: pcibus_to_node(bus) is used when we want to get the node that
pci_device is on.

In struct device, we already have numa_node member, and we could use
dev_to_node()/set_dev_node() to get and set numa_node in the device.
set_dev_node is called in pci_device_add() with pcibus_to_node(bus),
and pcibus_to_node uses bus->sysdata for nodeid.

The problem is when pci_add_device is called, bus->sysdata is not assigned
correct nodeid yet. The result is that numa_node will always be 0.

pcibios_scan_root and pci_scan_root could take sysdata. So we need to get
mp_bus_to_node mapping before these two are called, and thus
get_mp_bus_to_node could get correct node for sysdata in root bus.

In scanning of the root bus, all child busses will take parent bus sysdata.
So all pci_device->dev.numa_node will be assigned correctly and automatically.

Later we could use dev_to_node(&pci_dev->dev) to get numa_node, and we
could also could make other bus specific device get the correct numa_node
too.

This is an updated version of pci_sysdata and Jeff's pci_domain patch.

[ mingo@elte.hu: build fix ]

Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-04-26 23:41:04 +02:00

298 lines
6.8 KiB
C

#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/init.h>
#include <linux/irq.h>
#include <linux/dmi.h>
#include <asm/numa.h>
#include "pci.h"
static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d)
{
pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident);
return 0;
}
static struct dmi_system_id acpi_pciprobe_dmi_table[] __devinitdata = {
/*
* Systems where PCI IO resource ISA alignment can be skipped
* when the ISA enable bit in the bridge control is not set
*/
{
.callback = can_skip_ioresource_align,
.ident = "IBM System x3800",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
DMI_MATCH(DMI_PRODUCT_NAME, "x3800"),
},
},
{
.callback = can_skip_ioresource_align,
.ident = "IBM System x3850",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
DMI_MATCH(DMI_PRODUCT_NAME, "x3850"),
},
},
{
.callback = can_skip_ioresource_align,
.ident = "IBM System x3950",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
DMI_MATCH(DMI_PRODUCT_NAME, "x3950"),
},
},
{}
};
struct pci_root_info {
char *name;
unsigned int res_num;
struct resource *res;
struct pci_bus *bus;
int busnum;
};
static acpi_status
resource_to_addr(struct acpi_resource *resource,
struct acpi_resource_address64 *addr)
{
acpi_status status;
status = acpi_resource_to_address64(resource, addr);
if (ACPI_SUCCESS(status) &&
(addr->resource_type == ACPI_MEMORY_RANGE ||
addr->resource_type == ACPI_IO_RANGE) &&
addr->address_length > 0 &&
addr->producer_consumer == ACPI_PRODUCER) {
return AE_OK;
}
return AE_ERROR;
}
static acpi_status
count_resource(struct acpi_resource *acpi_res, void *data)
{
struct pci_root_info *info = data;
struct acpi_resource_address64 addr;
acpi_status status;
if (info->res_num >= PCI_BUS_NUM_RESOURCES)
return AE_OK;
status = resource_to_addr(acpi_res, &addr);
if (ACPI_SUCCESS(status))
info->res_num++;
return AE_OK;
}
static acpi_status
setup_resource(struct acpi_resource *acpi_res, void *data)
{
struct pci_root_info *info = data;
struct resource *res;
struct acpi_resource_address64 addr;
acpi_status status;
unsigned long flags;
struct resource *root;
if (info->res_num >= PCI_BUS_NUM_RESOURCES)
return AE_OK;
status = resource_to_addr(acpi_res, &addr);
if (!ACPI_SUCCESS(status))
return AE_OK;
if (addr.resource_type == ACPI_MEMORY_RANGE) {
root = &iomem_resource;
flags = IORESOURCE_MEM;
if (addr.info.mem.caching == ACPI_PREFETCHABLE_MEMORY)
flags |= IORESOURCE_PREFETCH;
} else if (addr.resource_type == ACPI_IO_RANGE) {
root = &ioport_resource;
flags = IORESOURCE_IO;
} else
return AE_OK;
res = &info->res[info->res_num];
res->name = info->name;
res->flags = flags;
res->start = addr.minimum + addr.translation_offset;
res->end = res->start + addr.address_length - 1;
res->child = NULL;
if (insert_resource(root, res)) {
printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx "
"from %s for %s\n", (unsigned long) res->start,
(unsigned long) res->end, root->name, info->name);
} else {
info->bus->resource[info->res_num] = res;
info->res_num++;
}
return AE_OK;
}
static void
adjust_transparent_bridge_resources(struct pci_bus *bus)
{
struct pci_dev *dev;
list_for_each_entry(dev, &bus->devices, bus_list) {
int i;
u16 class = dev->class >> 8;
if (class == PCI_CLASS_BRIDGE_PCI && dev->transparent) {
for(i = 3; i < PCI_BUS_NUM_RESOURCES; i++)
dev->subordinate->resource[i] =
dev->bus->resource[i - 3];
}
}
}
static void
get_current_resources(struct acpi_device *device, int busnum,
int domain, struct pci_bus *bus)
{
struct pci_root_info info;
size_t size;
info.bus = bus;
info.res_num = 0;
acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource,
&info);
if (!info.res_num)
return;
size = sizeof(*info.res) * info.res_num;
info.res = kmalloc(size, GFP_KERNEL);
if (!info.res)
goto res_alloc_fail;
info.name = kmalloc(16, GFP_KERNEL);
if (!info.name)
goto name_alloc_fail;
sprintf(info.name, "PCI Bus %04x:%02x", domain, busnum);
info.res_num = 0;
acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource,
&info);
if (info.res_num)
adjust_transparent_bridge_resources(bus);
return;
name_alloc_fail:
kfree(info.res);
res_alloc_fail:
return;
}
struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum)
{
struct pci_bus *bus;
struct pci_sysdata *sd;
int node;
#ifdef CONFIG_ACPI_NUMA
int pxm;
#endif
dmi_check_system(acpi_pciprobe_dmi_table);
if (domain && !pci_domains_supported) {
printk(KERN_WARNING "PCI: Multiple domains not supported "
"(dom %d, bus %d)\n", domain, busnum);
return NULL;
}
node = -1;
#ifdef CONFIG_ACPI_NUMA
pxm = acpi_get_pxm(device->handle);
if (pxm >= 0)
node = pxm_to_node(pxm);
if (node != -1)
set_mp_bus_to_node(busnum, node);
else
node = get_mp_bus_to_node(busnum);
#endif
/* Allocate per-root-bus (not per bus) arch-specific data.
* TODO: leak; this memory is never freed.
* It's arguable whether it's worth the trouble to care.
*/
sd = kzalloc(sizeof(*sd), GFP_KERNEL);
if (!sd) {
printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum);
return NULL;
}
sd->domain = domain;
sd->node = node;
/*
* Maybe the desired pci bus has been already scanned. In such case
* it is unnecessary to scan the pci bus with the given domain,busnum.
*/
bus = pci_find_bus(domain, busnum);
if (bus) {
/*
* If the desired bus exits, the content of bus->sysdata will
* be replaced by sd.
*/
memcpy(bus->sysdata, sd, sizeof(*sd));
kfree(sd);
} else
bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd);
if (!bus)
kfree(sd);
#ifdef CONFIG_ACPI_NUMA
if (bus) {
if (pxm >= 0) {
printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n",
busnum, pxm, pxm_to_node(pxm));
}
}
#endif
if (bus && (pci_probe & PCI_USE__CRS))
get_current_resources(device, busnum, domain, bus);
return bus;
}
extern int pci_routeirq;
static int __init pci_acpi_init(void)
{
struct pci_dev *dev = NULL;
if (pcibios_scanned)
return 0;
if (acpi_noirq)
return 0;
printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
acpi_irq_penalty_init();
pcibios_scanned++;
pcibios_enable_irq = acpi_pci_irq_enable;
pcibios_disable_irq = acpi_pci_irq_disable;
if (pci_routeirq) {
/*
* PCI IRQ routing is set up by pci_enable_device(), but we
* also do it here in case there are still broken drivers that
* don't use pci_enable_device().
*/
printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n");
for_each_pci_dev(dev)
acpi_pci_irq_enable(dev);
}
#ifdef CONFIG_X86_IO_APIC
if (acpi_ioapic)
print_IO_APIC();
#endif
return 0;
}
subsys_initcall(pci_acpi_init);