From cf4c43dd439b90a1a876b3f836ebe745abb9a269 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 15 Jul 2009 13:13:00 -0700 Subject: [PATCH 01/30] PCI: Add pci_bus_find_ext_capability For use by code that needs to walk extended capability lists before pci_dev structures are set up. Signed-off-by: Jesse Barnes LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80CFD@orsmsx508.amr.corp.intel.com> Signed-off-by: Jacob Pan Signed-off-by: H. Peter Anvin --- drivers/pci/pci.c | 43 +++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 2 ++ 2 files changed, 45 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 315fea47e784..aad62af2b4c6 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -297,6 +297,49 @@ int pci_find_ext_capability(struct pci_dev *dev, int cap) } EXPORT_SYMBOL_GPL(pci_find_ext_capability); +/** + * pci_bus_find_ext_capability - find an extended capability + * @bus: the PCI bus to query + * @devfn: PCI device to query + * @cap: capability code + * + * Like pci_find_ext_capability() but works for pci devices that do not have a + * pci_dev structure set up yet. + * + * Returns the address of the requested capability structure within the + * device's PCI configuration space or 0 in case the device does not + * support it. + */ +int pci_bus_find_ext_capability(struct pci_bus *bus, unsigned int devfn, + int cap) +{ + u32 header; + int ttl; + int pos = PCI_CFG_SPACE_SIZE; + + /* minimum 8 bytes per capability */ + ttl = (PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) / 8; + + if (!pci_bus_read_config_dword(bus, devfn, pos, &header)) + return 0; + if (header == 0xffffffff || header == 0) + return 0; + + while (ttl-- > 0) { + if (PCI_EXT_CAP_ID(header) == cap) + return pos; + + pos = PCI_EXT_CAP_NEXT(header); + if (pos < PCI_CFG_SPACE_SIZE) + break; + + if (!pci_bus_read_config_dword(bus, devfn, pos, &header)) + break; + } + + return 0; +} + static int __pci_find_next_ht_cap(struct pci_dev *dev, int pos, int ht_cap) { int rc, ttl = PCI_FIND_CAP_TTL; diff --git a/include/linux/pci.h b/include/linux/pci.h index c1968f464c38..65f8a8f9d3e5 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -631,6 +631,8 @@ enum pci_lost_interrupt_reason pci_lost_interrupt(struct pci_dev *dev); int pci_find_capability(struct pci_dev *dev, int cap); int pci_find_next_capability(struct pci_dev *dev, u8 pos, int cap); int pci_find_ext_capability(struct pci_dev *dev, int cap); +int pci_bus_find_ext_capability(struct pci_bus *bus, unsigned int devfn, + int cap); int pci_find_ht_capability(struct pci_dev *dev, int ht_cap); int pci_find_next_ht_capability(struct pci_dev *dev, int pos, int ht_cap); struct pci_bus *pci_find_next_bus(const struct pci_bus *from); From b72d0db9dd41da1f2ec6274b03e8909583c64e41 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 29 Aug 2009 16:24:51 +0200 Subject: [PATCH 02/30] x86: Move pci init function to x86_init The PCI initialization in pci_subsys_init() is a mess. pci_numaq_init, pci_acpi_init, pci_visws_init and pci_legacy_init are called and each implementation checks and eventually modifies the global variable pcibios_scanned. x86_init functions allow us to do this more elegant. The pci.init function pointer is preset to pci_legacy_init. numaq, acpi and visws can modify the pointer in their early setup functions. The functions return 0 when they did the full initialization including bus scan. A non zero return value indicates that pci_legacy_init needs to be called either because the selected function failed or wants the generic bus scan in pci_legacy_init to happen (e.g. visws). Signed-off-by: Thomas Gleixner LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80CFE@orsmsx508.amr.corp.intel.com> Acked-by: Jesse Barnes Signed-off-by: Jacob Pan Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/numaq.h | 1 + arch/x86/include/asm/pci.h | 9 ++++++++- arch/x86/include/asm/pci_x86.h | 14 +++++++++++--- arch/x86/include/asm/setup.h | 2 -- arch/x86/include/asm/visws/cobalt.h | 2 ++ arch/x86/include/asm/x86_init.h | 9 +++++++++ arch/x86/kernel/acpi/boot.c | 4 ++++ arch/x86/kernel/apic/numaq_32.c | 1 + arch/x86/kernel/visws_quirks.c | 6 +----- arch/x86/kernel/x86_init.c | 5 +++++ arch/x86/pci/acpi.c | 6 +----- arch/x86/pci/common.c | 6 ------ arch/x86/pci/legacy.c | 22 ++++++++-------------- arch/x86/pci/numaq_32.c | 6 ------ arch/x86/pci/visws.c | 6 ++---- 15 files changed, 53 insertions(+), 46 deletions(-) diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h index 9f0a5f5d29ec..ef6bf81f8460 100644 --- a/arch/x86/include/asm/numaq.h +++ b/arch/x86/include/asm/numaq.h @@ -30,6 +30,7 @@ extern int found_numaq; extern int get_memcfg_numaq(void); +extern int pci_numaq_init(void); extern void *xquad_portio; diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index ada8c201d513..8bd433ccc242 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -45,8 +45,15 @@ static inline int pci_proc_domain(struct pci_bus *bus) #ifdef CONFIG_PCI extern unsigned int pcibios_assign_all_busses(void); +extern int pci_legacy_init(void); +# ifdef CONFIG_ACPI +# define x86_default_pci_init pci_acpi_init +# else +# define x86_default_pci_init pci_legacy_init +# endif #else -#define pcibios_assign_all_busses() 0 +# define pcibios_assign_all_busses() 0 +# define x86_default_pci_init NULL #endif extern unsigned long pci_mem_start; diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index b4bf9a942ed0..440124f1224d 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -82,7 +82,6 @@ struct irq_routing_table { extern unsigned int pcibios_irq_mask; -extern int pcibios_scanned; extern spinlock_t pci_config_lock; extern int (*pcibios_enable_irq)(struct pci_dev *dev); @@ -112,9 +111,8 @@ extern void __init dmi_check_skip_isa_align(void); /* some common used subsys_initcalls */ extern int __init pci_acpi_init(void); extern int __init pcibios_irq_init(void); -extern int __init pci_visws_init(void); -extern int __init pci_numaq_init(void); extern int __init pcibios_init(void); +extern int pci_legacy_init(void); /* pci-mmconfig.c */ @@ -182,3 +180,13 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val) { asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory"); } + +#ifdef CONFIG_PCI +# ifdef CONFIG_ACPI +# define x86_default_pci_init pci_acpi_init +# else +# define x86_default_pci_init pci_legacy_init +# endif +#else +# define x86_default_pci_init NULL +#endif diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 18e496c98ff0..86b1506f4179 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -37,10 +37,8 @@ void setup_bios_corruption_check(void); #ifdef CONFIG_X86_VISWS extern void visws_early_detect(void); -extern int is_visws_box(void); #else static inline void visws_early_detect(void) { } -static inline int is_visws_box(void) { return 0; } #endif extern unsigned long saved_video_mode; diff --git a/arch/x86/include/asm/visws/cobalt.h b/arch/x86/include/asm/visws/cobalt.h index 166adf61e770..2edb37637ead 100644 --- a/arch/x86/include/asm/visws/cobalt.h +++ b/arch/x86/include/asm/visws/cobalt.h @@ -122,4 +122,6 @@ extern char visws_board_type; extern char visws_board_rev; +extern int pci_visws_init(void); + #endif /* _ASM_X86_VISWS_COBALT_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index ea0e8ea15e15..f145d843f03d 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -98,6 +98,14 @@ struct x86_init_iommu { int (*iommu_init)(void); }; + /* + * struct x86_init_pci - platform specific pci init functions + * @init: platform specific pci init + */ +struct x86_init_pci { + int (*init)(void); +}; + /** * struct x86_init_ops - functions for platform specific setup * @@ -110,6 +118,7 @@ struct x86_init_ops { struct x86_init_paging paging; struct x86_init_timers timers; struct x86_init_iommu iommu; + struct x86_init_pci pci; }; /** diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 0acbcdfa5ca4..054a5f5548b0 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -1604,6 +1605,9 @@ int __init acpi_boot_init(void) acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet); + if (!acpi_noirq) + x86_init.pci.init = pci_acpi_init; + return 0; } diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 98c4665f251c..be5c4fd47778 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -277,6 +277,7 @@ static __init void early_check_numaq(void) x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; x86_init.timers.tsc_pre_init = numaq_tsc_init; + x86_init.pci.init = pci_numaq_init; } } diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 34a279a7471d..843e9d30a1e3 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -49,11 +49,6 @@ extern int no_broadcast; char visws_board_type = -1; char visws_board_rev = -1; -int is_visws_box(void) -{ - return visws_board_type >= 0; -} - static void __init visws_time_init(void) { printk(KERN_INFO "Starting Cobalt Timer system clock\n"); @@ -242,6 +237,7 @@ void __init visws_early_detect(void) x86_init.irqs.pre_vector_init = visws_pre_intr_init; x86_init.irqs.trap_init = visws_trap_init; x86_init.timers.timer_init = visws_time_init; + x86_init.pci.init = pci_visws_init; /* * Install reboot quirks: diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ccd179dec36e..81faa6d67d69 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -70,6 +71,10 @@ struct x86_init_ops x86_init __initdata = { .iommu = { .iommu_init = iommu_init_noop, }, + + .pci = { + .init = x86_default_pci_init, + }, }; struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 959e548a7039..73b3fe9aa716 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -282,15 +282,11 @@ int __init pci_acpi_init(void) { struct pci_dev *dev = NULL; - if (pcibios_scanned) - return 0; - if (acpi_noirq) - return 0; + return -ENODEV; printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); acpi_irq_penalty_init(); - pcibios_scanned++; pcibios_enable_irq = acpi_pci_irq_enable; pcibios_disable_irq = acpi_pci_irq_disable; diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index d2552c68e94d..f5770b5846a6 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -71,12 +71,6 @@ struct pci_ops pci_root_ops = { .write = pci_write, }; -/* - * legacy, numa, and acpi all want to call pcibios_scan_root - * from their initcalls. This flag prevents that. - */ -int pcibios_scanned; - /* * This interrupt-safe spinlock protects all accesses to PCI * configuration space. diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 4061bb0f267d..0daf264ddb6c 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -35,16 +35,13 @@ static void __devinit pcibios_fixup_peer_bridges(void) } } -static int __init pci_legacy_init(void) +int __init pci_legacy_init(void) { if (!raw_pci_ops) { printk("PCI: System does not support PCI\n"); return 0; } - if (pcibios_scanned++) - return 0; - printk("PCI: Probing PCI hardware\n"); pci_root_bus = pcibios_scan_root(0); if (pci_root_bus) @@ -55,16 +52,13 @@ static int __init pci_legacy_init(void) int __init pci_subsys_init(void) { -#ifdef CONFIG_X86_NUMAQ - pci_numaq_init(); -#endif -#ifdef CONFIG_ACPI - pci_acpi_init(); -#endif -#ifdef CONFIG_X86_VISWS - pci_visws_init(); -#endif - pci_legacy_init(); + /* + * The init function returns an non zero value when + * pci_legacy_init should be invoked. + */ + if (x86_init.pci.init()) + pci_legacy_init(); + pcibios_fixup_peer_bridges(); pcibios_irq_init(); pcibios_init(); diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 8eb295e116f6..45c0c9e45903 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -152,14 +152,8 @@ int __init pci_numaq_init(void) { int quad; - if (!found_numaq) - return 0; - raw_pci_ops = &pci_direct_conf1_mq; - if (pcibios_scanned++) - return 0; - pci_root_bus = pcibios_scan_root(0); if (pci_root_bus) pci_bus_add_devices(pci_root_bus); diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c index bcead7a46871..03008f72eb04 100644 --- a/arch/x86/pci/visws.c +++ b/arch/x86/pci/visws.c @@ -69,9 +69,6 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq) int __init pci_visws_init(void) { - if (!is_visws_box()) - return -1; - pcibios_enable_irq = &pci_visws_enable_irq; pcibios_disable_irq = &pci_visws_disable_irq; @@ -90,5 +87,6 @@ int __init pci_visws_init(void) pci_scan_bus_with_sysdata(pci_bus1); pci_fixup_irqs(pci_common_swizzle, visws_map_irq); pcibios_resource_survey(); - return 0; + /* Request bus scan */ + return 1; } From ab3b37937e8f4fb38dc9780b7bc3fd3c5195cca3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 29 Aug 2009 17:47:33 +0200 Subject: [PATCH 03/30] x86: Add pci_init_irq to x86_init Moorestown wants to reuse pcibios_init_irq but needs to provide its own implementation of pci_enable_irq. After we distangled the init we can move the init_irq call to x86_init and remove the pci_enable_irq != NULL check in pcibios_init_irq. pci_enable_irq is compile time initialized to pirq_enable_irq and the special cases which override it (visws and acpi) set the x86_init function pointer to noop. That allows MSRT to override pci_enable_irq and otherwise run pcibios_init_irq unmodified. Signed-off-by: Thomas Gleixner LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80CFF@orsmsx508.amr.corp.intel.com> Acked-by: Jesse Barnes Signed-off-by: Jacob Pan Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pci_x86.h | 4 +++- arch/x86/include/asm/x86_init.h | 2 ++ arch/x86/kernel/visws_quirks.c | 1 + arch/x86/kernel/x86_init.c | 1 + arch/x86/pci/acpi.c | 1 + arch/x86/pci/irq.c | 12 ++++-------- arch/x86/pci/legacy.c | 2 +- 7 files changed, 13 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 440124f1224d..46511c5be456 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -110,7 +110,7 @@ extern void __init dmi_check_skip_isa_align(void); /* some common used subsys_initcalls */ extern int __init pci_acpi_init(void); -extern int __init pcibios_irq_init(void); +extern void __init pcibios_irq_init(void); extern int __init pcibios_init(void); extern int pci_legacy_init(void); @@ -187,6 +187,8 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val) # else # define x86_default_pci_init pci_legacy_init # endif +# define x86_default_pci_init_irq pcibios_irq_init #else # define x86_default_pci_init NULL +# define x86_default_pci_init_irq NULL #endif diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index f145d843f03d..34f61cd56f3b 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -101,9 +101,11 @@ struct x86_init_iommu { /* * struct x86_init_pci - platform specific pci init functions * @init: platform specific pci init + * @init_irq: platform specific pci irq init */ struct x86_init_pci { int (*init)(void); + void (*init_irq)(void); }; /** diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 843e9d30a1e3..b48ef6c0d716 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -238,6 +238,7 @@ void __init visws_early_detect(void) x86_init.irqs.trap_init = visws_trap_init; x86_init.timers.timer_init = visws_time_init; x86_init.pci.init = pci_visws_init; + x86_init.pci.init_irq = x86_init_noop; /* * Install reboot quirks: diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 81faa6d67d69..203f26fb7f33 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -74,6 +74,7 @@ struct x86_init_ops x86_init __initdata = { .pci = { .init = x86_default_pci_init, + .init_irq = x86_default_pci_init_irq, }, }; diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 73b3fe9aa716..b53f0487e2d3 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -289,6 +289,7 @@ int __init pci_acpi_init(void) acpi_irq_penalty_init(); pcibios_enable_irq = acpi_pci_irq_enable; pcibios_disable_irq = acpi_pci_irq_disable; + x86_init.pci.init_irq = x86_init_noop; if (pci_routeirq) { /* diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 0696d506c4ad..0f40ff20dd67 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -53,7 +53,7 @@ struct irq_router_handler { int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device); }; -int (*pcibios_enable_irq)(struct pci_dev *dev) = NULL; +int (*pcibios_enable_irq)(struct pci_dev *dev) = pirq_enable_irq; void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL; /* @@ -1110,12 +1110,12 @@ static struct dmi_system_id __initdata pciirq_dmi_table[] = { { } }; -int __init pcibios_irq_init(void) +void __init pcibios_irq_init(void) { DBG(KERN_DEBUG "PCI: IRQ init\n"); - if (pcibios_enable_irq || raw_pci_ops == NULL) - return 0; + if (raw_pci_ops == NULL) + return; dmi_check_system(pciirq_dmi_table); @@ -1142,8 +1142,6 @@ int __init pcibios_irq_init(void) pirq_table = NULL; } - pcibios_enable_irq = pirq_enable_irq; - pcibios_fixup_irqs(); if (io_apic_assign_pci_irqs && pci_routeirq) { @@ -1157,8 +1155,6 @@ int __init pcibios_irq_init(void) for_each_pci_dev(dev) pirq_enable_irq(dev); } - - return 0; } static void pirq_penalize_isa_irq(int irq, int active) diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 0daf264ddb6c..0db5eaf54560 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -60,7 +60,7 @@ int __init pci_subsys_init(void) pci_legacy_init(); pcibios_fixup_peer_bridges(); - pcibios_irq_init(); + x86_init.pci.init_irq(); pcibios_init(); return 0; From 9325a28ce2fa7c597e5ed41455a06c30b82b5710 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 29 Aug 2009 17:51:26 +0200 Subject: [PATCH 04/30] x86: Add pcibios_fixup_irqs to x86_init Platforms like Moorestown want to override the pcibios_fixup_irqs default function. Add it to x86_init.pci. Signed-off-by: Thomas Gleixner LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80D00@orsmsx508.amr.corp.intel.com> Acked-by: Jesse Barnes Signed-off-by: Jacob Pan Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pci_x86.h | 3 +++ arch/x86/include/asm/x86_init.h | 2 ++ arch/x86/kernel/x86_init.c | 2 ++ arch/x86/pci/irq.c | 4 ++-- 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 46511c5be456..6e69edfbf074 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -113,6 +113,7 @@ extern int __init pci_acpi_init(void); extern void __init pcibios_irq_init(void); extern int __init pcibios_init(void); extern int pci_legacy_init(void); +extern void pcibios_fixup_irqs(void); /* pci-mmconfig.c */ @@ -188,7 +189,9 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val) # define x86_default_pci_init pci_legacy_init # endif # define x86_default_pci_init_irq pcibios_irq_init +# define x86_default_pci_fixup_irqs pcibios_fixup_irqs #else # define x86_default_pci_init NULL # define x86_default_pci_init_irq NULL +# define x86_default_pci_fixup_irqs NULL #endif diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 34f61cd56f3b..8ef56f21f9f0 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -102,10 +102,12 @@ struct x86_init_iommu { * struct x86_init_pci - platform specific pci init functions * @init: platform specific pci init * @init_irq: platform specific pci irq init + * @fixup_irqs: platform specific pci irq fixup */ struct x86_init_pci { int (*init)(void); void (*init_irq)(void); + void (*fixup_irqs)(void); }; /** diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 203f26fb7f33..1817cd7a03fa 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -4,6 +4,7 @@ * For licencing details see kernel-base/COPYING */ #include +#include #include #include @@ -75,6 +76,7 @@ struct x86_init_ops x86_init __initdata = { .pci = { .init = x86_default_pci_init, .init_irq = x86_default_pci_init_irq, + .fixup_irqs = x86_default_pci_fixup_irqs, }, }; diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 0f40ff20dd67..a60deb6e6696 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -1016,7 +1016,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) return 1; } -static void __init pcibios_fixup_irqs(void) +void __init pcibios_fixup_irqs(void) { struct pci_dev *dev = NULL; u8 pin; @@ -1142,7 +1142,7 @@ void __init pcibios_irq_init(void) pirq_table = NULL; } - pcibios_fixup_irqs(); + x86_init.pci.fixup_irqs(); if (io_apic_assign_pci_irqs && pci_routeirq) { struct pci_dev *dev = NULL; From d39f6495f66616b637260405d0b5dc2656bc490e Mon Sep 17 00:00:00 2001 From: Alek Du Date: Mon, 7 Sep 2009 16:25:45 +0800 Subject: [PATCH 05/30] x86, ioapic: Improve handling of i8259A irq init Since we already track the number of legacy vectors by nr_legacy_irqs, we can avoid use static vector allocations -- we can use dynamic one. Signed-off-by: Alek Du LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80D01@orsmsx508.amr.corp.intel.com> Signed-off-by: Jacob Pan Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 53243ca7816d..75265ab83b17 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -140,27 +140,10 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ #ifdef CONFIG_SPARSE_IRQ -static struct irq_cfg irq_cfgx[] = { +static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; #else -static struct irq_cfg irq_cfgx[NR_IRQS] = { +static struct irq_cfg irq_cfgx[NR_IRQS]; #endif - [0] = { .vector = IRQ0_VECTOR, }, - [1] = { .vector = IRQ1_VECTOR, }, - [2] = { .vector = IRQ2_VECTOR, }, - [3] = { .vector = IRQ3_VECTOR, }, - [4] = { .vector = IRQ4_VECTOR, }, - [5] = { .vector = IRQ5_VECTOR, }, - [6] = { .vector = IRQ6_VECTOR, }, - [7] = { .vector = IRQ7_VECTOR, }, - [8] = { .vector = IRQ8_VECTOR, }, - [9] = { .vector = IRQ9_VECTOR, }, - [10] = { .vector = IRQ10_VECTOR, }, - [11] = { .vector = IRQ11_VECTOR, }, - [12] = { .vector = IRQ12_VECTOR, }, - [13] = { .vector = IRQ13_VECTOR, }, - [14] = { .vector = IRQ14_VECTOR, }, - [15] = { .vector = IRQ15_VECTOR, }, -}; void __init io_apic_disable_legacy(void) { @@ -181,6 +164,8 @@ int __init arch_early_irq_init(void) node= cpu_to_node(boot_cpu_id); for (i = 0; i < count; i++) { + if (i < nr_legacy_irqs) + cfg[i].vector = IRQ0_VECTOR + i; desc = irq_to_desc(i); desc->chip_data = &cfg[i]; zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); From 35f720c5930f689647d51ad77e2a8d6f0abf66c8 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 17 Sep 2009 07:36:43 -0700 Subject: [PATCH 06/30] x86: Initialize stack canary in secondary start Some secondary clockevent setup code needs to call request_irq, which will cause fake stack check failure in schedule() if voluntary preemption model is chosen. It is safe to have stack canary initialized here early, since start_secondary() does not return. Signed-off-by: Jacob Pan LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80D02@orsmsx508.amr.corp.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/smpboot.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b4e870cbdc60..3e6150d421e4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -324,6 +325,9 @@ notrace static void __cpuinit start_secondary(void *unused) /* enable local interrupts */ local_irq_enable(); + /* to prevent fake stack check failure in clock setup */ + boot_init_stack_canary(); + x86_cpuinit.setup_percpu_clockev(); wmb(); From ef3548668c02cc8c3922f4423f32b53e662811c6 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Mon, 9 Nov 2009 11:24:14 -0800 Subject: [PATCH 07/30] x86, pic: Introduce legacy_pic abstraction This patch makes i8259A like legacy programmable interrupt controller code into a driver so that legacy pic functions can be selected at runtime based on platform information, such as HW subarchitecure ID. Default structure of legacy_pic maintains the current code path for x86pc. Signed-off-by: Jacob Pan LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80D03@orsmsx508.amr.corp.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i8259.h | 13 +++++++++++ arch/x86/kernel/i8259.c | 43 ++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index 58d7091eeb1f..e8a3e05c2882 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -57,6 +57,19 @@ static inline void outb_pic(unsigned char value, unsigned int port) extern struct irq_chip i8259A_chip; +struct legacy_pic { + int nr_legacy_irqs; + struct irq_chip *chip; + void (*mask_all)(void); + void (*restore_mask)(void); + void (*init)(int auto_eoi); + int (*irq_pending)(unsigned int irq); + void (*make_irq)(unsigned int irq); +}; + +extern struct legacy_pic *legacy_pic; +extern struct legacy_pic null_legacy_pic; + extern void mask_8259A(void); extern void unmask_8259A(void); diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index df89102bef80..b80987ca33ea 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -358,3 +358,46 @@ void init_8259A(int auto_eoi) spin_unlock_irqrestore(&i8259A_lock, flags); } +/* + * make i8259 a driver so that we can select pic functions at run time. the goal + * is to make x86 binary compatible among pc compatible and non-pc compatible + * platforms, such as x86 MID. + */ + +static void __init legacy_pic_noop(void) { }; +static void __init legacy_pic_uint_noop(unsigned int unused) { }; +static void __init legacy_pic_int_noop(int unused) { }; + +static struct irq_chip dummy_pic_chip = { + .name = "dummy pic", + .mask = legacy_pic_uint_noop, + .unmask = legacy_pic_uint_noop, + .disable = legacy_pic_uint_noop, + .mask_ack = legacy_pic_uint_noop, +}; +static int legacy_pic_irq_pending_noop(unsigned int irq) +{ + return 0; +} + +struct legacy_pic null_legacy_pic = { + .nr_legacy_irqs = 0, + .chip = &dummy_pic_chip, + .mask_all = legacy_pic_noop, + .restore_mask = legacy_pic_noop, + .init = legacy_pic_int_noop, + .irq_pending = legacy_pic_irq_pending_noop, + .make_irq = legacy_pic_uint_noop, +}; + +struct legacy_pic default_legacy_pic = { + .nr_legacy_irqs = NR_IRQS_LEGACY, + .chip = &i8259A_chip, + .mask_all = mask_8259A, + .restore_mask = unmask_8259A, + .init = init_8259A, + .irq_pending = i8259A_irq_pending, + .make_irq = make_8259A_irq, +}; + +struct legacy_pic *legacy_pic = &default_legacy_pic; From b81bb373a7e832a43921356aa1291044d7f52fb1 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Mon, 9 Nov 2009 11:27:04 -0800 Subject: [PATCH 08/30] x86, pic: Make use of legacy_pic abstraction This patch replaces legacy PIC-related global variable and functions with the new legacy_pic abstraction. Signed-off-by: Jacob Pan LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80D04@orsmsx508.amr.corp.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/hw_irq.h | 7 ----- arch/x86/include/asm/i8259.h | 8 ----- arch/x86/kernel/apic/apic.c | 8 ++--- arch/x86/kernel/apic/io_apic.c | 57 ++++++++++++++++------------------ arch/x86/kernel/apic/nmi.c | 2 +- arch/x86/kernel/i8259.c | 21 ++++++++----- arch/x86/kernel/irqinit.c | 2 +- arch/x86/kernel/smpboot.c | 5 +-- arch/x86/kernel/visws_quirks.c | 14 ++++++--- 9 files changed, 59 insertions(+), 65 deletions(-) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index eeac829a0f44..a929c9ede33d 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -53,13 +53,6 @@ extern void threshold_interrupt(void); extern void call_function_interrupt(void); extern void call_function_single_interrupt(void); -/* PIC specific functions */ -extern void disable_8259A_irq(unsigned int irq); -extern void enable_8259A_irq(unsigned int irq); -extern int i8259A_irq_pending(unsigned int irq); -extern void make_8259A_irq(unsigned int irq); -extern void init_8259A(int aeoi); - /* IOAPIC */ #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) extern unsigned long io_apic_irqs; diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index e8a3e05c2882..2832babd91fc 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -26,11 +26,6 @@ extern unsigned int cached_irq_mask; extern spinlock_t i8259A_lock; -extern void init_8259A(int auto_eoi); -extern void enable_8259A_irq(unsigned int irq); -extern void disable_8259A_irq(unsigned int irq); -extern unsigned int startup_8259A_irq(unsigned int irq); - /* the PIC may need a careful delay on some platforms, hence specific calls */ static inline unsigned char inb_pic(unsigned int port) { @@ -70,7 +65,4 @@ struct legacy_pic { extern struct legacy_pic *legacy_pic; extern struct legacy_pic null_legacy_pic; -extern void mask_8259A(void); -extern void unmask_8259A(void); - #endif /* _ASM_X86_I8259_H */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index dfca210f6a10..94f22b12858d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1390,7 +1390,7 @@ void __init enable_IR_x2apic(void) } local_irq_save(flags); - mask_8259A(); + legacy_pic->mask_all(); mask_IO_APIC_setup(ioapic_entries); if (dmar_table_init_ret) @@ -1422,7 +1422,7 @@ void __init enable_IR_x2apic(void) nox2apic: if (!ret) /* IR enabling failed */ restore_IO_APIC_setup(ioapic_entries); - unmask_8259A(); + legacy_pic->restore_mask(); local_irq_restore(flags); out: @@ -2018,7 +2018,7 @@ static int lapic_resume(struct sys_device *dev) } mask_IO_APIC_setup(ioapic_entries); - mask_8259A(); + legacy_pic->mask_all(); } if (x2apic_mode) @@ -2062,7 +2062,7 @@ static int lapic_resume(struct sys_device *dev) if (intr_remapping_enabled) { reenable_intr_remapping(x2apic_mode); - unmask_8259A(); + legacy_pic->restore_mask(); restore_IO_APIC_setup(ioapic_entries); free_ioapic_entries(ioapic_entries); } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 75265ab83b17..1704cd82db5f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -94,8 +94,6 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; -/* Number of legacy interrupts */ -static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY; /* GSI interrupts */ static int nr_irqs_gsi = NR_IRQS_LEGACY; @@ -147,7 +145,6 @@ static struct irq_cfg irq_cfgx[NR_IRQS]; void __init io_apic_disable_legacy(void) { - nr_legacy_irqs = 0; nr_irqs_gsi = 0; } @@ -164,13 +161,13 @@ int __init arch_early_irq_init(void) node= cpu_to_node(boot_cpu_id); for (i = 0; i < count; i++) { - if (i < nr_legacy_irqs) + if (i < legacy_pic->nr_legacy_irqs) cfg[i].vector = IRQ0_VECTOR + i; desc = irq_to_desc(i); desc->chip_data = &cfg[i]; zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); - if (i < nr_legacy_irqs) + if (i < legacy_pic->nr_legacy_irqs) cpumask_setall(cfg[i].domain); } @@ -850,7 +847,7 @@ static int __init find_isa_irq_apic(int irq, int type) */ static int EISA_ELCR(unsigned int irq) { - if (irq < nr_legacy_irqs) { + if (irq < legacy_pic->nr_legacy_irqs) { unsigned int port = 0x4d0 + (irq >> 3); return (inb(port) >> (irq & 7)) & 1; } @@ -1446,8 +1443,8 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq } ioapic_register_intr(irq, desc, trigger); - if (irq < nr_legacy_irqs) - disable_8259A_irq(irq); + if (irq < legacy_pic->nr_legacy_irqs) + legacy_pic->chip->mask(irq); ioapic_write_entry(apic_id, pin, entry); } @@ -1810,7 +1807,7 @@ __apicdebuginit(void) print_PIC(void) unsigned int v; unsigned long flags; - if (!nr_legacy_irqs) + if (!legacy_pic->nr_legacy_irqs) return; printk(KERN_DEBUG "\nprinting PIC contents\n"); @@ -1894,7 +1891,7 @@ void __init enable_IO_APIC(void) nr_ioapic_registers[apic] = reg_01.bits.entries+1; } - if (!nr_legacy_irqs) + if (!legacy_pic->nr_legacy_irqs) return; for(apic = 0; apic < nr_ioapics; apic++) { @@ -1951,7 +1948,7 @@ void disable_IO_APIC(void) */ clear_IO_APIC(); - if (!nr_legacy_irqs) + if (!legacy_pic->nr_legacy_irqs) return; /* @@ -2184,9 +2181,9 @@ static unsigned int startup_ioapic_irq(unsigned int irq) struct irq_cfg *cfg; spin_lock_irqsave(&ioapic_lock, flags); - if (irq < nr_legacy_irqs) { - disable_8259A_irq(irq); - if (i8259A_irq_pending(irq)) + if (irq < legacy_pic->nr_legacy_irqs) { + legacy_pic->chip->mask(irq); + if (legacy_pic->irq_pending(irq)) was_pending = 1; } cfg = irq_cfg(irq); @@ -2719,8 +2716,8 @@ static inline void init_IO_APIC_traps(void) * so default to an old-fashioned 8259 * interrupt if we can.. */ - if (irq < nr_legacy_irqs) - make_8259A_irq(irq); + if (irq < legacy_pic->nr_legacy_irqs) + legacy_pic->make_irq(irq); else /* Strange. Oh, well.. */ desc->chip = &no_irq_chip; @@ -2877,7 +2874,7 @@ static inline void __init check_timer(void) /* * get/set the timer IRQ vector: */ - disable_8259A_irq(0); + legacy_pic->chip->mask(0); assign_irq_vector(0, cfg, apic->target_cpus()); /* @@ -2890,7 +2887,7 @@ static inline void __init check_timer(void) * automatically. */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); - init_8259A(1); + legacy_pic->init(1); #ifdef CONFIG_X86_32 { unsigned int ver; @@ -2949,7 +2946,7 @@ static inline void __init check_timer(void) if (timer_irq_works()) { if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); - enable_8259A_irq(0); + legacy_pic->chip->unmask(0); } if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); @@ -2972,14 +2969,14 @@ static inline void __init check_timer(void) */ replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); - enable_8259A_irq(0); + legacy_pic->chip->unmask(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); timer_through_8259 = 1; if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); + legacy_pic->chip->mask(0); setup_nmi(); - enable_8259A_irq(0); + legacy_pic->chip->unmask(0); } goto out; } @@ -2987,7 +2984,7 @@ static inline void __init check_timer(void) * Cleanup, just in case ... */ local_irq_disable(); - disable_8259A_irq(0); + legacy_pic->chip->mask(0); clear_IO_APIC_pin(apic2, pin2); apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); } @@ -3006,22 +3003,22 @@ static inline void __init check_timer(void) lapic_register_intr(0, desc); apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ - enable_8259A_irq(0); + legacy_pic->chip->unmask(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); goto out; } local_irq_disable(); - disable_8259A_irq(0); + legacy_pic->chip->mask(0); apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as ExtINT IRQ...\n"); - init_8259A(0); - make_8259A_irq(0); + legacy_pic->init(0); + legacy_pic->make_irq(0); apic_write(APIC_LVT0, APIC_DM_EXTINT); unlock_ExtINT_logic(); @@ -3063,7 +3060,7 @@ void __init setup_IO_APIC(void) /* * calling enable_IO_APIC() is moved to setup_local_APIC for BP */ - io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL; + io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL; apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); /* @@ -3074,7 +3071,7 @@ void __init setup_IO_APIC(void) sync_Arb_IDs(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); - if (nr_legacy_irqs) + if (legacy_pic->nr_legacy_irqs) check_timer(); } @@ -3875,7 +3872,7 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq, /* * IRQs < 16 are already in the irq_2_pin[] map */ - if (irq >= nr_legacy_irqs) { + if (irq >= legacy_pic->nr_legacy_irqs) { cfg = desc->chip_data; if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { printk(KERN_INFO "can not add pin %d for irq %d\n", diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 0159a69396cb..3817739acee9 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -177,7 +177,7 @@ int __init check_nmi_watchdog(void) error: if (nmi_watchdog == NMI_IO_APIC) { if (!timer_through_8259) - disable_8259A_irq(0); + legacy_pic->chip->mask(0); on_each_cpu(__acpi_nmi_disable, NULL, 1); } diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index b80987ca33ea..1c790e75f7a0 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -34,6 +34,12 @@ static int i8259A_auto_eoi; DEFINE_SPINLOCK(i8259A_lock); static void mask_and_ack_8259A(unsigned int); +static void mask_8259A(void); +static void unmask_8259A(void); +static void disable_8259A_irq(unsigned int irq); +static void enable_8259A_irq(unsigned int irq); +static void init_8259A(int auto_eoi); +static int i8259A_irq_pending(unsigned int irq); struct irq_chip i8259A_chip = { .name = "XT-PIC", @@ -63,7 +69,7 @@ unsigned int cached_irq_mask = 0xffff; */ unsigned long io_apic_irqs; -void disable_8259A_irq(unsigned int irq) +static void disable_8259A_irq(unsigned int irq) { unsigned int mask = 1 << irq; unsigned long flags; @@ -77,7 +83,7 @@ void disable_8259A_irq(unsigned int irq) spin_unlock_irqrestore(&i8259A_lock, flags); } -void enable_8259A_irq(unsigned int irq) +static void enable_8259A_irq(unsigned int irq) { unsigned int mask = ~(1 << irq); unsigned long flags; @@ -91,7 +97,7 @@ void enable_8259A_irq(unsigned int irq) spin_unlock_irqrestore(&i8259A_lock, flags); } -int i8259A_irq_pending(unsigned int irq) +static int i8259A_irq_pending(unsigned int irq) { unsigned int mask = 1<init(0); /* * 16 old-style INTA-cycle interrupts: diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3e6150d421e4..f7a52f4a21a5 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -68,6 +68,7 @@ #include #include +#include #ifdef CONFIG_X86_32 u8 apicid_2_node[MAX_APICID]; @@ -287,9 +288,9 @@ notrace static void __cpuinit start_secondary(void *unused) check_tsc_sync_target(); if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); + legacy_pic->chip->mask(0); enable_NMI_through_LVT0(); - enable_8259A_irq(0); + legacy_pic->chip->unmask(0); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index b48ef6c0d716..f067e9556a47 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -505,7 +505,7 @@ static struct irq_chip cobalt_irq_type = { */ static unsigned int startup_piix4_master_irq(unsigned int irq) { - init_8259A(0); + legacy_pic->init(0); return startup_cobalt_irq(irq); } @@ -529,9 +529,6 @@ static struct irq_chip piix4_master_irq_type = { static struct irq_chip piix4_virtual_irq_type = { .name = "PIIX4-virtual", - .shutdown = disable_8259A_irq, - .enable = enable_8259A_irq, - .disable = disable_8259A_irq, }; @@ -606,7 +603,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) handle_IRQ_event(realirq, desc->action); if (!(desc->status & IRQ_DISABLED)) - enable_8259A_irq(realirq); + legacy_pic->chip->unmask(realirq); return IRQ_HANDLED; @@ -625,6 +622,12 @@ static struct irqaction cascade_action = { .name = "cascade", }; +static inline void set_piix4_virtual_irq_type(void) +{ + piix4_virtual_irq_type.shutdown = i8259A_chip.mask; + piix4_virtual_irq_type.enable = i8259A_chip.unmask; + piix4_virtual_irq_type.disable = i8259A_chip.mask; +} void init_VISWS_APIC_irqs(void) { @@ -650,6 +653,7 @@ void init_VISWS_APIC_irqs(void) desc->chip = &piix4_master_irq_type; } else if (i < CO_IRQ_APIC0) { + set_piix4_virtual_irq_type(); desc->chip = &piix4_virtual_irq_type; } else if (IS_CO_APIC(i)) { From 1f91233c26fd5f7d6525fd29b95e4b50ca7a3e88 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 5 Feb 2010 04:06:56 -0800 Subject: [PATCH 09/30] x86, apic: Remove ioapic_disable_legacy() The ioapic_disable_legacy() call is no longer needed for platforms do not have legacy pic. the legacy pic abstraction has taken care it automatically. This patch also initialize irq-related static variables based on information obtained from legacy_pic. Signed-off-by: Jacob Pan LKML-Reference: <43F901BD926A4E43B106BF17856F0755A30A7660@orsmsx508.amr.corp.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/io_apic.h | 2 -- arch/x86/kernel/apic/io_apic.c | 10 +++++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 7c7c16cde1f8..84fdd5110948 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -143,8 +143,6 @@ extern int noioapicreroute; /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ extern int timer_through_8259; -extern void io_apic_disable_legacy(void); - /* * If we use the IO-APIC for IRQ routing, disable automatic * assignment of PCI IRQ's. diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1704cd82db5f..3592a72f3f0a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -143,11 +143,6 @@ static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; static struct irq_cfg irq_cfgx[NR_IRQS]; #endif -void __init io_apic_disable_legacy(void) -{ - nr_irqs_gsi = 0; -} - int __init arch_early_irq_init(void) { struct irq_cfg *cfg; @@ -156,6 +151,11 @@ int __init arch_early_irq_init(void) int node; int i; + if (!legacy_pic->nr_legacy_irqs) { + nr_irqs_gsi = 0; + io_apic_irqs = ~0UL; + } + cfg = irq_cfgx; count = ARRAY_SIZE(irq_cfgx); node= cpu_to_node(boot_cpu_id); From 28a3c93d11212655ce0a9be977c405c703844164 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Tue, 23 Feb 2010 02:03:31 -0800 Subject: [PATCH 10/30] x86, pic: Fix section mismatch in legacy pic Move legacy_pic chip dummy functions out of init section as they might be referenced at run time. Signed-off-by: Jacob Pan LKML-Reference: <43F901BD926A4E43B106BF17856F0755A318D3AA@orsmsx508.amr.corp.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/i8259.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 9bac6817456f..fb725ee15f55 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -371,9 +371,9 @@ static void init_8259A(int auto_eoi) * platforms, such as x86 MID. */ -static void __init legacy_pic_noop(void) { }; -static void __init legacy_pic_uint_noop(unsigned int unused) { }; -static void __init legacy_pic_int_noop(int unused) { }; +static void legacy_pic_noop(void) { }; +static void legacy_pic_uint_noop(unsigned int unused) { }; +static void legacy_pic_int_noop(int unused) { }; static struct irq_chip dummy_pic_chip = { .name = "dummy pic", From 05ddafb17ad1a73c8bc333cb328bad46513e85e7 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 23 Sep 2009 07:20:23 -0700 Subject: [PATCH 11/30] x86, ioapic: Early enable ioapic for timer irq Moorestown platform needs apic ready early for the system timer irq which is delievered via ioapic. Should not impact other platforms. In the longer term, once ioapic setup is moved before late time init, we will not need this patch to do early apic enabling. Signed-off-by: Jacob Pan LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80D07@orsmsx508.amr.corp.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/io_apic.h | 1 + arch/x86/kernel/apic/io_apic.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 31dfb42d8649..99a416ed16b7 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -187,6 +187,7 @@ extern struct mp_ioapic_gsi mp_gsi_routing[]; int mp_find_ioapic(int gsi); int mp_find_ioapic_pin(int ioapic, int gsi); void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); +extern void __init pre_init_apic_IRQ0(void); #else /* !CONFIG_X86_IO_APIC */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index b34854358ee6..8c848b5877a0 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -4289,3 +4289,24 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) nr_ioapics++; } + +/* Enable IOAPIC early just for system timer */ +void __init pre_init_apic_IRQ0(void) +{ + struct irq_cfg *cfg; + struct irq_desc *desc; + + printk(KERN_INFO "Early APIC setup for system timer0\n"); +#ifndef CONFIG_SMP + phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); +#endif + desc = irq_to_desc_alloc_node(0, 0); + + setup_local_APIC(); + + cfg = irq_cfg(0); + add_pin_to_irq_node(cfg, 0, 0, 0); + set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); + + setup_IO_APIC_irq(0, 0, 0, desc, 0, 0); +} From 4966e1affb45c5fc402969e10e979407b972a7df Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Tue, 23 Feb 2010 10:43:58 -0800 Subject: [PATCH 12/30] x86, ioapic: Add dummy ioapic functions Some ioapic extern functions are used when CONFIG_X86_IO_APIC is not defined. We need the dummy functions to avoid a compile time error. Signed-off-by: Jacob Pan LKML-Reference: <43F901BD926A4E43B106BF17856F0755A318DA07@orsmsx508.amr.corp.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/io_apic.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 99a416ed16b7..35832a03a515 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -197,7 +197,11 @@ static const int timer_through_8259 = 0; static inline void ioapic_init_mappings(void) { } static inline void ioapic_insert_resources(void) { } static inline void probe_nr_irqs_gsi(void) { } +static inline int mp_find_ioapic(int gsi) { return 0; } +struct io_apic_irq_attr; +static inline int io_apic_set_pci_routing(struct device *dev, int irq, + struct io_apic_irq_attr *irq_attr) { return 0; } #endif #endif /* _ASM_X86_IO_APIC_H */ From a712ffbc199849364c46e9112b93b66de08e2c26 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 4 Feb 2010 10:59:27 -0800 Subject: [PATCH 13/30] x86/PCI: Moorestown PCI support The Moorestown platform only has a few devices that actually support PCI config cycles. The rest of the devices use an in-RAM MCFG space for the purposes of device enumeration and initialization. There are a few uglies in the fake support, like BAR sizes that aren't a power of two, sizing detection, and writes to the real devices, but other than that it's pretty straightforward. Another way to think of this is not really as PCI at all, but just a table in RAM describing which devices are present, their capabilities and their offsets in MMIO space. This could have been done with a special new firmware table on this platform, but given that we do have some real PCI devices too, simply describing things in an MCFG type space was pretty simple. Signed-off-by: Jesse Barnes LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80D08@orsmsx508.amr.corp.intel.com> Signed-off-by: Jacob Pan Signed-off-by: H. Peter Anvin --- arch/x86/pci/Makefile | 2 +- arch/x86/pci/mrst.c | 258 +++++++++++++++++++++++++++++++++++++++ include/linux/pci_regs.h | 1 + 3 files changed, 260 insertions(+), 1 deletion(-) create mode 100644 arch/x86/pci/mrst.c diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 39fba37f702f..4753ebc19cae 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_X86_VISWS) += visws.o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o -obj-y += common.o early.o +obj-y += common.o early.o mrst.o obj-y += amd_bus.o obj-$(CONFIG_X86_64) += bus_numa.o diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c new file mode 100644 index 000000000000..6e9e1a35a5d7 --- /dev/null +++ b/arch/x86/pci/mrst.c @@ -0,0 +1,258 @@ +/* + * Moorestown PCI support + * Copyright (c) 2008 Intel Corporation + * Jesse Barnes + * + * Moorestown has an interesting PCI implementation: + * - configuration space is memory mapped (as defined by MCFG) + * - Lincroft devices also have a real, type 1 configuration space + * - Early Lincroft silicon has a type 1 access bug that will cause + * a hang if non-existent devices are accessed + * - some devices have the "fixed BAR" capability, which means + * they can't be relocated or modified; check for that during + * BAR sizing + * + * So, we use the MCFG space for all reads and writes, but also send + * Lincroft writes to type 1 space. But only read/write if the device + * actually exists, otherwise return all 1s for reads and bit bucket + * the writes. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#define PCIE_CAP_OFFSET 0x100 + +/* Fixed BAR fields */ +#define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00 /* Fixed BAR (TBD) */ +#define PCI_FIXED_BAR_0_SIZE 0x04 +#define PCI_FIXED_BAR_1_SIZE 0x08 +#define PCI_FIXED_BAR_2_SIZE 0x0c +#define PCI_FIXED_BAR_3_SIZE 0x10 +#define PCI_FIXED_BAR_4_SIZE 0x14 +#define PCI_FIXED_BAR_5_SIZE 0x1c + +/** + * fixed_bar_cap - return the offset of the fixed BAR cap if found + * @bus: PCI bus + * @devfn: device in question + * + * Look for the fixed BAR cap on @bus and @devfn, returning its offset + * if found or 0 otherwise. + */ +static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn) +{ + int pos; + u32 pcie_cap = 0, cap_data; + + pos = PCIE_CAP_OFFSET; + while (pos) { + if (raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, + devfn, pos, 4, &pcie_cap)) + return 0; + + if (pcie_cap == 0xffffffff) + return 0; + + if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) { + raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, + devfn, pos + 4, 4, &cap_data); + if ((cap_data & 0xffff) == PCIE_VNDR_CAP_ID_FIXED_BAR) + return pos; + } + + pos = pcie_cap >> 20; + } + + return 0; +} + +static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn, + int reg, int len, u32 val, int offset) +{ + u32 size; + unsigned int domain, busnum; + int bar = (reg - PCI_BASE_ADDRESS_0) >> 2; + + domain = pci_domain_nr(bus); + busnum = bus->number; + + if (val == ~0 && len == 4) { + unsigned long decode; + + raw_pci_ext_ops->read(domain, busnum, devfn, + offset + 8 + (bar * 4), 4, &size); + + /* Turn the size into a decode pattern for the sizing code */ + if (size) { + decode = size - 1; + decode |= decode >> 1; + decode |= decode >> 2; + decode |= decode >> 4; + decode |= decode >> 8; + decode |= decode >> 16; + decode++; + decode = ~(decode - 1); + } else { + decode = ~0; + } + + /* + * If val is all ones, the core code is trying to size the reg, + * so update the mmconfig space with the real size. + * + * Note: this assumes the fixed size we got is a power of two. + */ + return raw_pci_ext_ops->write(domain, busnum, devfn, reg, 4, + decode); + } + + /* This is some other kind of BAR write, so just do it. */ + return raw_pci_ext_ops->write(domain, busnum, devfn, reg, len, val); +} + +/** + * type1_access_ok - check whether to use type 1 + * @bus: bus number + * @devfn: device & function in question + * + * If the bus is on a Lincroft chip and it exists, or is not on a Lincroft at + * all, the we can go ahead with any reads & writes. If it's on a Lincroft, + * but doesn't exist, avoid the access altogether to keep the chip from + * hanging. + */ +static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg) +{ + /* This is a workaround for A0 LNC bug where PCI status register does + * not have new CAP bit set. can not be written by SW either. + * + * PCI header type in real LNC indicates a single function device, this + * will prevent probing other devices under the same function in PCI + * shim. Therefore, use the header type in shim instead. + */ + if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE) + return 0; + if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0))) + return 1; + return 0; /* langwell on others */ +} + +static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *value) +{ + if (type1_access_ok(bus->number, devfn, where)) + return pci_direct_conf1.read(pci_domain_nr(bus), bus->number, + devfn, where, size, value); + return raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, + devfn, where, size, value); +} + +static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 value) +{ + int offset; + + /* On MRST, there is no PCI ROM BAR, this will cause a subsequent read + * to ROM BAR return 0 then being ignored. + */ + if (where == PCI_ROM_ADDRESS) + return 0; + + /* + * Devices with fixed BARs need special handling: + * - BAR sizing code will save, write ~0, read size, restore + * - so writes to fixed BARs need special handling + * - other writes to fixed BAR devices should go through mmconfig + */ + offset = fixed_bar_cap(bus, devfn); + if (offset && + (where >= PCI_BASE_ADDRESS_0 && where <= PCI_BASE_ADDRESS_5)) { + return pci_device_update_fixed(bus, devfn, where, size, value, + offset); + } + + /* + * On Moorestown update both real & mmconfig space + * Note: early Lincroft silicon can't handle type 1 accesses to + * non-existent devices, so just eat the write in that case. + */ + if (type1_access_ok(bus->number, devfn, where)) + return pci_direct_conf1.write(pci_domain_nr(bus), bus->number, + devfn, where, size, value); + return raw_pci_ext_ops->write(pci_domain_nr(bus), bus->number, devfn, + where, size, value); +} + +static int mrst_pci_irq_enable(struct pci_dev *dev) +{ + u8 pin; + struct io_apic_irq_attr irq_attr; + + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + + /* MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to + * IOAPIC RTE entries, so we just enable RTE for the device. + */ + irq_attr.ioapic = mp_find_ioapic(dev->irq); + irq_attr.ioapic_pin = dev->irq; + irq_attr.trigger = 1; /* level */ + irq_attr.polarity = 1; /* active low */ + io_apic_set_pci_routing(&dev->dev, dev->irq, &irq_attr); + + return 0; +} + +struct pci_ops pci_mrst_ops = { + .read = pci_read, + .write = pci_write, +}; + +/** + * pci_mrst_init - installs pci_mrst_ops + * + * Moorestown has an interesting PCI implementation (see above). + * Called when the early platform detection installs it. + */ +int __init pci_mrst_init(void) +{ + printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n"); + pci_mmcfg_late_init(); + pcibios_enable_irq = mrst_pci_irq_enable; + pci_root_ops = pci_mrst_ops; + /* Continue with standard init */ + return 1; +} + +/* + * Langwell devices reside at fixed offsets, don't try to move them. + */ +static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev) +{ + unsigned long offset; + u32 size; + int i; + + /* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */ + offset = fixed_bar_cap(dev->bus, dev->devfn); + if (!offset || PCI_DEVFN(2, 0) == dev->devfn || + PCI_DEVFN(2, 2) == dev->devfn) + return; + + for (i = 0; i < PCI_ROM_RESOURCE; i++) { + pci_read_config_dword(dev, offset + 8 + (i * 4), &size); + dev->resource[i].end = dev->resource[i].start + size - 1; + dev->resource[i].flags |= IORESOURCE_PCI_FIXED; + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixed_bar_fixup); diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 9f2ad0aa3c39..c8f302991b66 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -507,6 +507,7 @@ #define PCI_EXT_CAP_ID_VC 2 #define PCI_EXT_CAP_ID_DSN 3 #define PCI_EXT_CAP_ID_PWR 4 +#define PCI_EXT_CAP_ID_VNDR 11 #define PCI_EXT_CAP_ID_ACS 13 #define PCI_EXT_CAP_ID_ARI 14 #define PCI_EXT_CAP_ID_ATS 15 From 5b78b6724a405d4b649db53f7aac28c930c89640 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 12 Feb 2010 02:29:11 -0800 Subject: [PATCH 14/30] x86, mrst: Add dummy legacy pic to platform setup Moorestown has no legacy PIC; point it to the null legacy PIC. Signed-off-by: Jacob Pan LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80D09@orsmsx508.amr.corp.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/mrst.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c index 3b7078abc871..bdf9b17290c6 100644 --- a/arch/x86/kernel/mrst.c +++ b/arch/x86/kernel/mrst.c @@ -12,6 +12,9 @@ #include #include +#include +#include +#include /* * Moorestown specific x86_init function overrides and early setup @@ -21,4 +24,6 @@ void __init x86_mrst_early_setup(void) { x86_init.resources.probe_roms = x86_init_noop; x86_init.resources.reserve_resources = x86_init_noop; + + legacy_pic = &null_legacy_pic; } From af2730f6eefce24c4ef1dc3f8267d33626db81bc Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 12 Feb 2010 10:31:47 -0800 Subject: [PATCH 15/30] x86, mrst: Fill in PCI functions in x86_init layer This patch added Moorestown platform specific PCI init functions. Signed-off-by: Jacob Pan LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80D0A@orsmsx508.amr.corp.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mrst.h | 15 +++++++++++++++ arch/x86/kernel/mrst.c | 3 +++ 2 files changed, 18 insertions(+) create mode 100644 arch/x86/include/asm/mrst.h diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h new file mode 100644 index 000000000000..57a177a8e823 --- /dev/null +++ b/arch/x86/include/asm/mrst.h @@ -0,0 +1,15 @@ +/* + * mrst.h: Intel Moorestown platform specific setup code + * + * (C) Copyright 2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#ifndef _ASM_X86_MRST_H +#define _ASM_X86_MRST_H +extern int pci_mrst_init(void); + +#endif /* _ASM_X86_MRST_H */ diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c index bdf9b17290c6..98440e18919b 100644 --- a/arch/x86/kernel/mrst.c +++ b/arch/x86/kernel/mrst.c @@ -25,5 +25,8 @@ void __init x86_mrst_early_setup(void) x86_init.resources.probe_roms = x86_init_noop; x86_init.resources.reserve_resources = x86_init_noop; + x86_init.pci.init = pci_mrst_init; + x86_init.pci.fixup_irqs = x86_init_noop; + legacy_pic = &null_legacy_pic; } From 16ab5395856d8953ae3d81e81bd6a8c269a1bfd6 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 12 Feb 2010 03:08:30 -0800 Subject: [PATCH 16/30] x86, mrst: Add platform timer info parsing code Moorestown platform timer information is obtained from SFI FW tables. This patch parses SFI table then assign the irq information to mp_irqs. Signed-off-by: Jacob Pan LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80D0B@orsmsx508.amr.corp.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mrst.h | 2 + arch/x86/kernel/mrst.c | 110 ++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h index 57a177a8e823..fa144f2dd256 100644 --- a/arch/x86/include/asm/mrst.h +++ b/arch/x86/include/asm/mrst.h @@ -12,4 +12,6 @@ #define _ASM_X86_MRST_H extern int pci_mrst_init(void); +#define SFI_MTMR_MAX_NUM 8 + #endif /* _ASM_X86_MRST_H */ diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c index 98440e18919b..bb6e45c71dde 100644 --- a/arch/x86/kernel/mrst.c +++ b/arch/x86/kernel/mrst.c @@ -10,12 +10,122 @@ * of the License. */ #include +#include +#include +#include #include +#include +#include +#include +#include #include #include #include +static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM]; +static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM]; +int sfi_mtimer_num; + +static inline void assign_to_mp_irq(struct mpc_intsrc *m, + struct mpc_intsrc *mp_irq) +{ + memcpy(mp_irq, m, sizeof(struct mpc_intsrc)); +} + +static inline int mp_irq_cmp(struct mpc_intsrc *mp_irq, + struct mpc_intsrc *m) +{ + return memcmp(mp_irq, m, sizeof(struct mpc_intsrc)); +} + +static void save_mp_irq(struct mpc_intsrc *m) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) { + if (!mp_irq_cmp(&mp_irqs[i], m)) + return; + } + + assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]); + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!!\n"); +} + +/* parse all the mtimer info to a static mtimer array */ +static int __init sfi_parse_mtmr(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + struct sfi_timer_table_entry *pentry; + struct mpc_intsrc mp_irq; + int totallen; + + sb = (struct sfi_table_simple *)table; + if (!sfi_mtimer_num) { + sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb, + struct sfi_timer_table_entry); + pentry = (struct sfi_timer_table_entry *) sb->pentry; + totallen = sfi_mtimer_num * sizeof(*pentry); + memcpy(sfi_mtimer_array, pentry, totallen); + } + + printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num); + pentry = sfi_mtimer_array; + for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) { + printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz," + " irq = %d\n", totallen, (u32)pentry->phys_addr, + pentry->freq_hz, pentry->irq); + if (!pentry->irq) + continue; + mp_irq.type = MP_IOAPIC; + mp_irq.irqtype = mp_INT; +/* triggering mode edge bit 2-3, active high polarity bit 0-1 */ + mp_irq.irqflag = 5; + mp_irq.srcbus = 0; + mp_irq.srcbusirq = pentry->irq; /* IRQ */ + mp_irq.dstapic = MP_APIC_ALL; + mp_irq.dstirq = pentry->irq; + save_mp_irq(&mp_irq); + } + + return 0; +} + +struct sfi_timer_table_entry *sfi_get_mtmr(int hint) +{ + int i; + if (hint < sfi_mtimer_num) { + if (!sfi_mtimer_usage[hint]) { + pr_debug("hint taken for timer %d irq %d\n",\ + hint, sfi_mtimer_array[hint].irq); + sfi_mtimer_usage[hint] = 1; + return &sfi_mtimer_array[hint]; + } + } + /* take the first timer available */ + for (i = 0; i < sfi_mtimer_num;) { + if (!sfi_mtimer_usage[i]) { + sfi_mtimer_usage[i] = 1; + return &sfi_mtimer_array[i]; + } + i++; + } + return NULL; +} + +void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr) +{ + int i; + for (i = 0; i < sfi_mtimer_num;) { + if (mtmr->irq == sfi_mtimer_array[i].irq) { + sfi_mtimer_usage[i] = 0; + return; + } + i++; + } +} + /* * Moorestown specific x86_init function overrides and early setup * calls. From cf089455966e21aeb8e4cd2669e0c1885667b04e Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Fri, 12 Feb 2010 03:37:38 -0800 Subject: [PATCH 17/30] x86, mrst: Add vrtc platform data setup code vRTC information is obtained from SFI tables on Moorestown, this patch parses these tables and assign the information. Signed-off-by: Feng Tang LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80D0D@orsmsx508.amr.corp.intel.com> Signed-off-by: Jacob Pan Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mrst.h | 2 ++ arch/x86/kernel/mrst.c | 45 +++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h index fa144f2dd256..451d30e7f62d 100644 --- a/arch/x86/include/asm/mrst.h +++ b/arch/x86/include/asm/mrst.h @@ -11,7 +11,9 @@ #ifndef _ASM_X86_MRST_H #define _ASM_X86_MRST_H extern int pci_mrst_init(void); +int __init sfi_parse_mrtc(struct sfi_table_header *table); #define SFI_MTMR_MAX_NUM 8 +#define SFI_MRTC_MAX 8 #endif /* _ASM_X86_MRST_H */ diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c index bb6e45c71dde..b7fa049c826c 100644 --- a/arch/x86/kernel/mrst.c +++ b/arch/x86/kernel/mrst.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -27,6 +28,10 @@ static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM]; static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM]; int sfi_mtimer_num; +struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX]; +EXPORT_SYMBOL_GPL(sfi_mrtc_array); +int sfi_mrtc_num; + static inline void assign_to_mp_irq(struct mpc_intsrc *m, struct mpc_intsrc *mp_irq) { @@ -126,6 +131,46 @@ void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr) } } +/* parse all the mrtc info to a global mrtc array */ +int __init sfi_parse_mrtc(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + struct sfi_rtc_table_entry *pentry; + struct mpc_intsrc mp_irq; + + int totallen; + + sb = (struct sfi_table_simple *)table; + if (!sfi_mrtc_num) { + sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb, + struct sfi_rtc_table_entry); + pentry = (struct sfi_rtc_table_entry *)sb->pentry; + totallen = sfi_mrtc_num * sizeof(*pentry); + memcpy(sfi_mrtc_array, pentry, totallen); + } + + printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num); + pentry = sfi_mrtc_array; + for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { + printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n", + totallen, (u32)pentry->phys_addr, pentry->irq); + mp_irq.type = MP_IOAPIC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = 0; + mp_irq.srcbus = 0; + mp_irq.srcbusirq = pentry->irq; /* IRQ */ + mp_irq.dstapic = MP_APIC_ALL; + mp_irq.dstirq = pentry->irq; + save_mp_irq(&mp_irq); + } + return 0; +} + +void __init mrst_rtc_init(void) +{ + sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); +} + /* * Moorestown specific x86_init function overrides and early setup * calls. From bb24c4716185f6e116c440462c65c1f56649183b Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 2 Sep 2009 07:37:17 -0700 Subject: [PATCH 18/30] x86, apbt: Moorestown APB system timer driver Moorestown platform does not have PIT or HPET platform timers. Instead it has a bank of eight APB timers. The number of available timers to the os is exposed via SFI mtmr tables. All APB timer interrupts are routed via ioapic rtes and delivered as MSI. Currently, we use timer 0 and 1 for per cpu clockevent devices, timer 2 for clocksource. Signed-off-by: Jacob Pan LKML-Reference: <43F901BD926A4E43B106BF17856F0755A318D2D2@orsmsx508.amr.corp.intel.com> Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 6 + arch/x86/Kconfig | 11 + arch/x86/include/asm/apb_timer.h | 70 +++ arch/x86/kernel/Makefile | 1 + arch/x86/kernel/apb_timer.c | 780 ++++++++++++++++++++++++++++ 5 files changed, 868 insertions(+) create mode 100644 arch/x86/include/asm/apb_timer.h create mode 100644 arch/x86/kernel/apb_timer.c diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 51bceb0fb277..917220459e2a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2793,6 +2793,12 @@ and is between 256 and 4096 characters. It is defined in the file default x2apic cluster mode on platforms supporting x2apic. + x86_mrst_timer= [X86-32,APBT] + Choose timer option for x86 Moorestown MID platform. + Two valid options are apbt timer only and lapic timer + plus one apbt timer for broadcast timer. + x86_mrst_timer=apbt_only | lapic_and_apbt + xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks. xd_geo= See header of drivers/block/xd.c. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index eb4092568f9e..0ab2dcef7d84 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -390,6 +390,7 @@ config X86_MRST bool "Moorestown MID platform" depends on X86_32 depends on X86_EXTENDED_PLATFORM + select APB_TIMER ---help--- Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin Internet Device(MID) platform. Moorestown consists of two chips: @@ -612,6 +613,16 @@ config HPET_EMULATE_RTC def_bool y depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y) +config APB_TIMER + def_bool y if MRST + prompt "Langwell APB Timer Support" if X86_MRST + help + APB timer is the replacement for 8254, HPET on X86 MID platforms. + The APBT provides a stable time base on SMP + systems, unlike the TSC, but it is more expensive to access, + as it is off-chip. APB timers are always running regardless of CPU + C states, they are used as per CPU clockevent device when possible. + # Mark as embedded because too many people got it wrong. # The code disables itself when not needed. config DMI diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h new file mode 100644 index 000000000000..c74a2eebe570 --- /dev/null +++ b/arch/x86/include/asm/apb_timer.h @@ -0,0 +1,70 @@ +/* + * apb_timer.h: Driver for Langwell APB timer based on Synopsis DesignWare + * + * (C) Copyright 2009 Intel Corporation + * Author: Jacob Pan (jacob.jun.pan@intel.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + * + * Note: + */ + +#ifndef ASM_X86_APBT_H +#define ASM_X86_APBT_H +#include + +#ifdef CONFIG_APB_TIMER + +/* Langwell DW APB timer registers */ +#define APBTMR_N_LOAD_COUNT 0x00 +#define APBTMR_N_CURRENT_VALUE 0x04 +#define APBTMR_N_CONTROL 0x08 +#define APBTMR_N_EOI 0x0c +#define APBTMR_N_INT_STATUS 0x10 + +#define APBTMRS_INT_STATUS 0xa0 +#define APBTMRS_EOI 0xa4 +#define APBTMRS_RAW_INT_STATUS 0xa8 +#define APBTMRS_COMP_VERSION 0xac +#define APBTMRS_REG_SIZE 0x14 + +/* register bits */ +#define APBTMR_CONTROL_ENABLE (1<<0) +#define APBTMR_CONTROL_MODE_PERIODIC (1<<1) /*1: periodic 0:free running */ +#define APBTMR_CONTROL_INT (1<<2) + +/* default memory mapped register base */ +#define LNW_SCU_ADDR 0xFF100000 +#define LNW_EXT_TIMER_OFFSET 0x1B800 +#define APBT_DEFAULT_BASE (LNW_SCU_ADDR+LNW_EXT_TIMER_OFFSET) +#define LNW_EXT_TIMER_PGOFFSET 0x800 + +/* APBT clock speed range from PCLK to fabric base, 25-100MHz */ +#define APBT_MAX_FREQ 50 +#define APBT_MIN_FREQ 1 +#define APBT_MMAP_SIZE 1024 + +#define APBT_DEV_USED 1 + +extern void apbt_time_init(void); +extern struct clock_event_device *global_clock_event; +extern unsigned long apbt_quick_calibrate(void); +extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu); +extern void apbt_setup_secondary_clock(void); +extern unsigned int boot_cpu_id; +extern int disable_apbt_percpu; + +extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint); +extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr); +extern int sfi_mtimer_num; + +#else /* CONFIG_APB_TIMER */ + +static inline unsigned long apbt_quick_calibrate(void) {return 0; } +static inline void apbt_time_init(void) {return 0; } + +#endif +#endif /* ASM_X86_APBT_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d87f09bc5a52..4c58352209e0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -87,6 +87,7 @@ obj-$(CONFIG_VM86) += vm86_32.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_HPET_TIMER) += hpet.o +obj-$(CONFIG_APB_TIMER) += apb_timer.o obj-$(CONFIG_K8_NB) += k8.o obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c new file mode 100644 index 000000000000..83a345b0256c --- /dev/null +++ b/arch/x86/kernel/apb_timer.c @@ -0,0 +1,780 @@ +/* + * apb_timer.c: Driver for Langwell APB timers + * + * (C) Copyright 2009 Intel Corporation + * Author: Jacob Pan (jacob.jun.pan@intel.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + * + * Note: + * Langwell is the south complex of Intel Moorestown MID platform. There are + * eight external timers in total that can be used by the operating system. + * The timer information, such as frequency and addresses, is provided to the + * OS via SFI tables. + * Timer interrupts are routed via FW/HW emulated IOAPIC independently via + * individual redirection table entries (RTE). + * Unlike HPET, there is no master counter, therefore one of the timers are + * used as clocksource. The overall allocation looks like: + * - timer 0 - NR_CPUs for per cpu timer + * - one timer for clocksource + * - one timer for watchdog driver. + * It is also worth notice that APB timer does not support true one-shot mode, + * free-running mode will be used here to emulate one-shot mode. + * APB timer can also be used as broadcast timer along with per cpu local APIC + * timer, but by default APB timer has higher rating than local APIC timers. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define APBT_MASK CLOCKSOURCE_MASK(32) +#define APBT_SHIFT 22 +#define APBT_CLOCKEVENT_RATING 150 +#define APBT_CLOCKSOURCE_RATING 250 +#define APBT_MIN_DELTA_USEC 200 + +#define EVT_TO_APBT_DEV(evt) container_of(evt, struct apbt_dev, evt) +#define APBT_CLOCKEVENT0_NUM (0) +#define APBT_CLOCKEVENT1_NUM (1) +#define APBT_CLOCKSOURCE_NUM (2) + +static unsigned long apbt_address; +static int apb_timer_block_enabled; +static void __iomem *apbt_virt_address; +static int phy_cs_timer_id; + +/* + * Common DW APB timer info + */ +static uint64_t apbt_freq; + +static void apbt_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt); +static int apbt_next_event(unsigned long delta, + struct clock_event_device *evt); +static cycle_t apbt_read_clocksource(struct clocksource *cs); +static void apbt_restart_clocksource(void); + +struct apbt_dev { + struct clock_event_device evt; + unsigned int num; + int cpu; + unsigned int irq; + unsigned int tick; + unsigned int count; + unsigned int flags; + char name[10]; +}; + +int disable_apbt_percpu __cpuinitdata; + +#ifdef CONFIG_SMP +static unsigned int apbt_num_timers_used; +static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev); +static struct apbt_dev *apbt_devs; +#endif + +static inline unsigned long apbt_readl_reg(unsigned long a) +{ + return readl(apbt_virt_address + a); +} + +static inline void apbt_writel_reg(unsigned long d, unsigned long a) +{ + writel(d, apbt_virt_address + a); +} + +static inline unsigned long apbt_readl(int n, unsigned long a) +{ + return readl(apbt_virt_address + a + n * APBTMRS_REG_SIZE); +} + +static inline void apbt_writel(int n, unsigned long d, unsigned long a) +{ + writel(d, apbt_virt_address + a + n * APBTMRS_REG_SIZE); +} + +static inline void apbt_set_mapping(void) +{ + struct sfi_timer_table_entry *mtmr; + + if (apbt_virt_address) { + pr_debug("APBT base already mapped\n"); + return; + } + mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM); + if (mtmr == NULL) { + printk(KERN_ERR "Failed to get MTMR %d from SFI\n", + APBT_CLOCKEVENT0_NUM); + return; + } + apbt_address = (unsigned long)mtmr->phys_addr; + if (!apbt_address) { + printk(KERN_WARNING "No timer base from SFI, use default\n"); + apbt_address = APBT_DEFAULT_BASE; + } + apbt_virt_address = ioremap_nocache(apbt_address, APBT_MMAP_SIZE); + if (apbt_virt_address) { + pr_debug("Mapped APBT physical addr %p at virtual addr %p\n",\ + (void *)apbt_address, (void *)apbt_virt_address); + } else { + pr_debug("Failed mapping APBT phy address at %p\n",\ + (void *)apbt_address); + goto panic_noapbt; + } + apbt_freq = mtmr->freq_hz / USEC_PER_SEC; + sfi_free_mtmr(mtmr); + + /* Now figure out the physical timer id for clocksource device */ + mtmr = sfi_get_mtmr(APBT_CLOCKSOURCE_NUM); + if (mtmr == NULL) + goto panic_noapbt; + + /* Now figure out the physical timer id */ + phy_cs_timer_id = (unsigned int)(mtmr->phys_addr & 0xff) + / APBTMRS_REG_SIZE; + pr_debug("Use timer %d for clocksource\n", phy_cs_timer_id); + return; + +panic_noapbt: + panic("Failed to setup APB system timer\n"); + +} + +static inline void apbt_clear_mapping(void) +{ + iounmap(apbt_virt_address); + apbt_virt_address = NULL; +} + +/* + * APBT timer interrupt enable / disable + */ +static inline int is_apbt_capable(void) +{ + return apbt_virt_address ? 1 : 0; +} + +static struct clocksource clocksource_apbt = { + .name = "apbt", + .rating = APBT_CLOCKSOURCE_RATING, + .read = apbt_read_clocksource, + .mask = APBT_MASK, + .shift = APBT_SHIFT, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .resume = apbt_restart_clocksource, +}; + +/* boot APB clock event device */ +static struct clock_event_device apbt_clockevent = { + .name = "apbt0", + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, + .set_mode = apbt_set_mode, + .set_next_event = apbt_next_event, + .shift = APBT_SHIFT, + .irq = 0, + .rating = APBT_CLOCKEVENT_RATING, +}; + +/* + * if user does not want to use per CPU apb timer, just give it a lower rating + * than local apic timer and skip the late per cpu timer init. + */ +static inline int __init setup_x86_mrst_timer(char *arg) +{ + if (!arg) + return -EINVAL; + + if (strcmp("apbt_only", arg) == 0) + disable_apbt_percpu = 0; + else if (strcmp("lapic_and_apbt", arg) == 0) + disable_apbt_percpu = 1; + else { + pr_warning("X86 MRST timer option %s not recognised" + " use x86_mrst_timer=apbt_only or lapic_and_apbt\n", + arg); + return -EINVAL; + } + return 0; +} +__setup("x86_mrst_timer=", setup_x86_mrst_timer); + +/* + * start count down from 0xffff_ffff. this is done by toggling the enable bit + * then load initial load count to ~0. + */ +static void apbt_start_counter(int n) +{ + unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); + + ctrl &= ~APBTMR_CONTROL_ENABLE; + apbt_writel(n, ctrl, APBTMR_N_CONTROL); + apbt_writel(n, ~0, APBTMR_N_LOAD_COUNT); + /* enable, mask interrupt */ + ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC; + ctrl |= (APBTMR_CONTROL_ENABLE | APBTMR_CONTROL_INT); + apbt_writel(n, ctrl, APBTMR_N_CONTROL); + /* read it once to get cached counter value initialized */ + apbt_read_clocksource(&clocksource_apbt); +} + +static irqreturn_t apbt_interrupt_handler(int irq, void *data) +{ + struct apbt_dev *dev = (struct apbt_dev *)data; + struct clock_event_device *aevt = &dev->evt; + + if (!aevt->event_handler) { + printk(KERN_INFO "Spurious APBT timer interrupt on %d\n", + dev->num); + return IRQ_NONE; + } + aevt->event_handler(aevt); + return IRQ_HANDLED; +} + +static void apbt_restart_clocksource(void) +{ + apbt_start_counter(phy_cs_timer_id); +} + +/* Setup IRQ routing via IOAPIC */ +#ifdef CONFIG_SMP +static void apbt_setup_irq(struct apbt_dev *adev) +{ + struct irq_chip *chip; + struct irq_desc *desc; + + /* timer0 irq has been setup early */ + if (adev->irq == 0) + return; + desc = irq_to_desc(adev->irq); + chip = get_irq_chip(adev->irq); + disable_irq(adev->irq); + desc->status |= IRQ_MOVE_PCNTXT; + irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); + /* APB timer irqs are set up as mp_irqs, timer is edge triggerred */ + set_irq_chip_and_handler_name(adev->irq, chip, handle_edge_irq, "edge"); + enable_irq(adev->irq); + if (system_state == SYSTEM_BOOTING) + if (request_irq(adev->irq, apbt_interrupt_handler, + IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, + adev->name, adev)) { + printk(KERN_ERR "Failed request IRQ for APBT%d\n", + adev->num); + } +} +#endif + +static void apbt_enable_int(int n) +{ + unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); + /* clear pending intr */ + apbt_readl(n, APBTMR_N_EOI); + ctrl &= ~APBTMR_CONTROL_INT; + apbt_writel(n, ctrl, APBTMR_N_CONTROL); +} + +static void apbt_disable_int(int n) +{ + unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); + + ctrl |= APBTMR_CONTROL_INT; + apbt_writel(n, ctrl, APBTMR_N_CONTROL); +} + + +static int __init apbt_clockevent_register(void) +{ + struct sfi_timer_table_entry *mtmr; + + mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM); + if (mtmr == NULL) { + printk(KERN_ERR "Failed to get MTMR %d from SFI\n", + APBT_CLOCKEVENT0_NUM); + return -ENODEV; + } + + /* + * We need to calculate the scaled math multiplication factor for + * nanosecond to apbt tick conversion. + * mult = (nsec/cycle)*2^APBT_SHIFT + */ + apbt_clockevent.mult = div_sc((unsigned long) mtmr->freq_hz + , NSEC_PER_SEC, APBT_SHIFT); + + /* Calculate the min / max delta */ + apbt_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, + &apbt_clockevent); + apbt_clockevent.min_delta_ns = clockevent_delta2ns( + APBT_MIN_DELTA_USEC*apbt_freq, + &apbt_clockevent); + /* + * Start apbt with the boot cpu mask and make it + * global if not used for per cpu timer. + */ + apbt_clockevent.cpumask = cpumask_of(smp_processor_id()); + + if (disable_apbt_percpu) { + apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100; + global_clock_event = &apbt_clockevent; + printk(KERN_DEBUG "%s clockevent registered as global\n", + global_clock_event->name); + } + + if (request_irq(apbt_clockevent.irq, apbt_interrupt_handler, + IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, + apbt_clockevent.name, &apbt_clockevent)) { + printk(KERN_ERR "Failed request IRQ for APBT%d\n", + apbt_clockevent.irq); + } + + clockevents_register_device(&apbt_clockevent); + /* Start APBT 0 interrupts */ + apbt_enable_int(APBT_CLOCKEVENT0_NUM); + + sfi_free_mtmr(mtmr); + return 0; +} + +#ifdef CONFIG_SMP +/* Should be called with per cpu */ +void apbt_setup_secondary_clock(void) +{ + struct apbt_dev *adev; + struct clock_event_device *aevt; + int cpu; + + /* Don't register boot CPU clockevent */ + cpu = smp_processor_id(); + if (cpu == boot_cpu_id) + return; + /* + * We need to calculate the scaled math multiplication factor for + * nanosecond to apbt tick conversion. + * mult = (nsec/cycle)*2^APBT_SHIFT + */ + printk(KERN_INFO "Init per CPU clockevent %d\n", cpu); + adev = &per_cpu(cpu_apbt_dev, cpu); + aevt = &adev->evt; + + memcpy(aevt, &apbt_clockevent, sizeof(*aevt)); + aevt->cpumask = cpumask_of(cpu); + aevt->name = adev->name; + aevt->mode = CLOCK_EVT_MODE_UNUSED; + + printk(KERN_INFO "Registering CPU %d clockevent device %s, mask %08x\n", + cpu, aevt->name, *(u32 *)aevt->cpumask); + + apbt_setup_irq(adev); + + clockevents_register_device(aevt); + + apbt_enable_int(cpu); + + return; +} + +/* + * this notify handler process CPU hotplug events. in case of S0i3, nonboot + * cpus are disabled/enabled frequently, for performance reasons, we keep the + * per cpu timer irq registered so that we do need to do free_irq/request_irq. + * + * TODO: it might be more reliable to directly disable percpu clockevent device + * without the notifier chain. currently, cpu 0 may get interrupts from other + * cpu timers during the offline process due to the ordering of notification. + * the extra interrupt is harmless. + */ +static int apbt_cpuhp_notify(struct notifier_block *n, + unsigned long action, void *hcpu) +{ + unsigned long cpu = (unsigned long)hcpu; + struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu); + + switch (action & 0xf) { + case CPU_DEAD: + apbt_disable_int(cpu); + if (system_state == SYSTEM_RUNNING) + pr_debug("skipping APBT CPU %lu offline\n", cpu); + else if (adev) { + pr_debug("APBT clockevent for cpu %lu offline\n", cpu); + free_irq(adev->irq, adev); + } + break; + default: + pr_debug(KERN_INFO "APBT notified %lu, no action\n", action); + } + return NOTIFY_OK; +} + +static __init int apbt_late_init(void) +{ + if (disable_apbt_percpu) + return 0; + /* This notifier should be called after workqueue is ready */ + hotcpu_notifier(apbt_cpuhp_notify, -20); + return 0; +} +fs_initcall(apbt_late_init); +#else + +void apbt_setup_secondary_clock(void) {} + +#endif /* CONFIG_SMP */ + +static void apbt_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + unsigned long ctrl; + uint64_t delta; + int timer_num; + struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); + + timer_num = adev->num; + pr_debug("%s CPU %d timer %d mode=%d\n", + __func__, first_cpu(*evt->cpumask), timer_num, mode); + + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * apbt_clockevent.mult; + delta >>= apbt_clockevent.shift; + ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); + ctrl |= APBTMR_CONTROL_MODE_PERIODIC; + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + /* + * DW APB p. 46, have to disable timer before load counter, + * may cause sync problem. + */ + ctrl &= ~APBTMR_CONTROL_ENABLE; + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + udelay(1); + pr_debug("Setting clock period %d for HZ %d\n", (int)delta, HZ); + apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT); + ctrl |= APBTMR_CONTROL_ENABLE; + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + break; + /* APB timer does not have one-shot mode, use free running mode */ + case CLOCK_EVT_MODE_ONESHOT: + ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); + /* + * set free running mode, this mode will let timer reload max + * timeout which will give time (3min on 25MHz clock) to rearm + * the next event, therefore emulate the one-shot mode. + */ + ctrl &= ~APBTMR_CONTROL_ENABLE; + ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC; + + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + /* write again to set free running mode */ + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + + /* + * DW APB p. 46, load counter with all 1s before starting free + * running mode. + */ + apbt_writel(timer_num, ~0, APBTMR_N_LOAD_COUNT); + ctrl &= ~APBTMR_CONTROL_INT; + ctrl |= APBTMR_CONTROL_ENABLE; + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + break; + + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + apbt_disable_int(timer_num); + ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); + ctrl &= ~APBTMR_CONTROL_ENABLE; + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + break; + + case CLOCK_EVT_MODE_RESUME: + apbt_enable_int(timer_num); + break; + } +} + +static int apbt_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + unsigned long ctrl; + int timer_num; + + struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); + + timer_num = adev->num; + /* Disable timer */ + ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); + ctrl &= ~APBTMR_CONTROL_ENABLE; + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + /* write new count */ + apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT); + ctrl |= APBTMR_CONTROL_ENABLE; + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + return 0; +} + +/* + * APB timer clock is not in sync with pclk on Langwell, which translates to + * unreliable read value caused by sampling error. the error does not add up + * overtime and only happens when sampling a 0 as a 1 by mistake. so the time + * would go backwards. the following code is trying to prevent time traveling + * backwards. little bit paranoid. + */ +static cycle_t apbt_read_clocksource(struct clocksource *cs) +{ + unsigned long t0, t1, t2; + static unsigned long last_read; + +bad_count: + t1 = apbt_readl(phy_cs_timer_id, + APBTMR_N_CURRENT_VALUE); + t2 = apbt_readl(phy_cs_timer_id, + APBTMR_N_CURRENT_VALUE); + if (unlikely(t1 < t2)) { + pr_debug("APBT: read current count error %lx:%lx:%lx\n", + t1, t2, t2 - t1); + goto bad_count; + } + /* + * check against cached last read, makes sure time does not go back. + * it could be a normal rollover but we will do tripple check anyway + */ + if (unlikely(t2 > last_read)) { + /* check if we have a normal rollover */ + unsigned long raw_intr_status = + apbt_readl_reg(APBTMRS_RAW_INT_STATUS); + /* + * cs timer interrupt is masked but raw intr bit is set if + * rollover occurs. then we read EOI reg to clear it. + */ + if (raw_intr_status & (1 << phy_cs_timer_id)) { + apbt_readl(phy_cs_timer_id, APBTMR_N_EOI); + goto out; + } + pr_debug("APB CS going back %lx:%lx:%lx ", + t2, last_read, t2 - last_read); +bad_count_x3: + pr_debug(KERN_INFO "tripple check enforced\n"); + t0 = apbt_readl(phy_cs_timer_id, + APBTMR_N_CURRENT_VALUE); + udelay(1); + t1 = apbt_readl(phy_cs_timer_id, + APBTMR_N_CURRENT_VALUE); + udelay(1); + t2 = apbt_readl(phy_cs_timer_id, + APBTMR_N_CURRENT_VALUE); + if ((t2 > t1) || (t1 > t0)) { + printk(KERN_ERR "Error: APB CS tripple check failed\n"); + goto bad_count_x3; + } + } +out: + last_read = t2; + return (cycle_t)~t2; +} + +static int apbt_clocksource_register(void) +{ + u64 start, now; + cycle_t t1; + + /* Start the counter, use timer 2 as source, timer 0/1 for event */ + apbt_start_counter(phy_cs_timer_id); + + /* Verify whether apbt counter works */ + t1 = apbt_read_clocksource(&clocksource_apbt); + rdtscll(start); + + /* + * We don't know the TSC frequency yet, but waiting for + * 200000 TSC cycles is safe: + * 4 GHz == 50us + * 1 GHz == 200us + */ + do { + rep_nop(); + rdtscll(now); + } while ((now - start) < 200000UL); + + /* APBT is the only always on clocksource, it has to work! */ + if (t1 == apbt_read_clocksource(&clocksource_apbt)) + panic("APBT counter not counting. APBT disabled\n"); + + /* + * initialize and register APBT clocksource + * convert that to ns/clock cycle + * mult = (ns/c) * 2^APBT_SHIFT + */ + clocksource_apbt.mult = div_sc(MSEC_PER_SEC, + (unsigned long) apbt_freq, APBT_SHIFT); + clocksource_register(&clocksource_apbt); + + return 0; +} + +/* + * Early setup the APBT timer, only use timer 0 for booting then switch to + * per CPU timer if possible. + * returns 1 if per cpu apbt is setup + * returns 0 if no per cpu apbt is chosen + * panic if set up failed, this is the only platform timer on Moorestown. + */ +void __init apbt_time_init(void) +{ +#ifdef CONFIG_SMP + int i; + struct sfi_timer_table_entry *p_mtmr; + unsigned int percpu_timer; + struct apbt_dev *adev; +#endif + + if (apb_timer_block_enabled) + return; + apbt_set_mapping(); + if (apbt_virt_address) { + pr_debug("Found APBT version 0x%lx\n",\ + apbt_readl_reg(APBTMRS_COMP_VERSION)); + } else + goto out_noapbt; + /* + * Read the frequency and check for a sane value, for ESL model + * we extend the possible clock range to allow time scaling. + */ + + if (apbt_freq < APBT_MIN_FREQ || apbt_freq > APBT_MAX_FREQ) { + pr_debug("APBT has invalid freq 0x%llx\n", apbt_freq); + goto out_noapbt; + } + if (apbt_clocksource_register()) { + pr_debug("APBT has failed to register clocksource\n"); + goto out_noapbt; + } + if (!apbt_clockevent_register()) + apb_timer_block_enabled = 1; + else { + pr_debug("APBT has failed to register clockevent\n"); + goto out_noapbt; + } +#ifdef CONFIG_SMP + /* kernel cmdline disable apb timer, so we will use lapic timers */ + if (disable_apbt_percpu) { + printk(KERN_INFO "apbt: disabled per cpu timer\n"); + return; + } + pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus()); + if (num_possible_cpus() <= sfi_mtimer_num) { + percpu_timer = 1; + apbt_num_timers_used = num_possible_cpus(); + } else { + percpu_timer = 0; + apbt_num_timers_used = 1; + adev = &per_cpu(cpu_apbt_dev, 0); + adev->flags &= ~APBT_DEV_USED; + } + pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used); + + /* here we set up per CPU timer data structure */ + apbt_devs = kzalloc(sizeof(struct apbt_dev) * apbt_num_timers_used, + GFP_KERNEL); + if (!apbt_devs) { + printk(KERN_ERR "Failed to allocate APB timer devices\n"); + return; + } + for (i = 0; i < apbt_num_timers_used; i++) { + adev = &per_cpu(cpu_apbt_dev, i); + adev->num = i; + adev->cpu = i; + p_mtmr = sfi_get_mtmr(i); + if (p_mtmr) { + adev->tick = p_mtmr->freq_hz; + adev->irq = p_mtmr->irq; + } else + printk(KERN_ERR "Failed to get timer for cpu %d\n", i); + adev->count = 0; + sprintf(adev->name, "apbt%d", i); + } +#endif + + return; + +out_noapbt: + apbt_clear_mapping(); + apb_timer_block_enabled = 0; + panic("failed to enable APB timer\n"); +} + +static inline void apbt_disable(int n) +{ + if (is_apbt_capable()) { + unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); + ctrl &= ~APBTMR_CONTROL_ENABLE; + apbt_writel(n, ctrl, APBTMR_N_CONTROL); + } +} + +/* called before apb_timer_enable, use early map */ +unsigned long apbt_quick_calibrate() +{ + int i, scale; + u64 old, new; + cycle_t t1, t2; + unsigned long khz = 0; + u32 loop, shift; + + apbt_set_mapping(); + apbt_start_counter(phy_cs_timer_id); + + /* check if the timer can count down, otherwise return */ + old = apbt_read_clocksource(&clocksource_apbt); + i = 10000; + while (--i) { + if (old != apbt_read_clocksource(&clocksource_apbt)) + break; + } + if (!i) + goto failed; + + /* count 16 ms */ + loop = (apbt_freq * 1000) << 4; + + /* restart the timer to ensure it won't get to 0 in the calibration */ + apbt_start_counter(phy_cs_timer_id); + + old = apbt_read_clocksource(&clocksource_apbt); + old += loop; + + t1 = __native_read_tsc(); + + do { + new = apbt_read_clocksource(&clocksource_apbt); + } while (new < old); + + t2 = __native_read_tsc(); + + shift = 5; + if (unlikely(loop >> shift == 0)) { + printk(KERN_INFO + "APBT TSC calibration failed, not enough resolution\n"); + return 0; + } + scale = (int)div_u64((t2 - t1), loop >> shift); + khz = (scale * apbt_freq * 1000) >> shift; + printk(KERN_INFO "TSC freq calculated by APB timer is %lu khz\n", khz); + return khz; +failed: + return 0; +} From 3746c6b6e26b8ad605f11b43e54acb3481d40980 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 12 Feb 2010 05:01:12 -0800 Subject: [PATCH 19/30] x86, mrst: Platform clock setup code Add Moorestown platform clock setup code to the x86_init abstraction. Signed-off-by: Jacob Pan LKML-Reference: <43F901BD926A4E43B106BF17856F0755A318D2D4@orsmsx508.amr.corp.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/mrst.c | 53 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c index b7fa049c826c..0aad8670858e 100644 --- a/arch/x86/kernel/mrst.c +++ b/arch/x86/kernel/mrst.c @@ -23,6 +23,7 @@ #include #include #include +#include static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM]; static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM]; @@ -166,11 +167,55 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table) return 0; } +/* + * the secondary clock in Moorestown can be APBT or LAPIC clock, default to + * APBT but cmdline option can also override it. + */ +static void __cpuinit mrst_setup_secondary_clock(void) +{ + /* restore default lapic clock if disabled by cmdline */ + if (disable_apbt_percpu) + return setup_secondary_APIC_clock(); + apbt_setup_secondary_clock(); +} + +static unsigned long __init mrst_calibrate_tsc(void) +{ + unsigned long flags, fast_calibrate; + + local_irq_save(flags); + fast_calibrate = apbt_quick_calibrate(); + local_irq_restore(flags); + + if (fast_calibrate) + return fast_calibrate; + + return 0; +} + +void __init mrst_time_init(void) +{ + sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); + pre_init_apic_IRQ0(); + apbt_time_init(); +} + void __init mrst_rtc_init(void) { sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); } +/* + * if we use per cpu apb timer, the bootclock already setup. if we use lapic + * timer and one apbt timer for broadcast, we need to set up lapic boot clock. + */ +static void __init mrst_setup_boot_clock(void) +{ + pr_info("%s: per cpu apbt flag %d \n", __func__, disable_apbt_percpu); + if (disable_apbt_percpu) + setup_boot_APIC_clock(); +}; + /* * Moorestown specific x86_init function overrides and early setup * calls. @@ -180,6 +225,14 @@ void __init x86_mrst_early_setup(void) x86_init.resources.probe_roms = x86_init_noop; x86_init.resources.reserve_resources = x86_init_noop; + x86_init.timers.timer_init = mrst_time_init; + x86_init.timers.setup_percpu_clockev = mrst_setup_boot_clock; + + x86_init.irqs.pre_vector_init = x86_init_noop; + + x86_cpuinit.setup_percpu_clockev = mrst_setup_secondary_clock; + + x86_platform.calibrate_tsc = mrst_calibrate_tsc; x86_init.pci.init = pci_mrst_init; x86_init.pci.fixup_irqs = x86_init_noop; From 28c6a0ba30457380b140d9d7a61530eda8969180 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 23 Feb 2010 20:27:48 -0800 Subject: [PATCH 20/30] x86, legacy_irq: Remove left over nr_legacy_irqs nr_legacy_irqs and its ilk have moved to legacy_pic. -v2: there is one in ioapic_.c Singed-off-by: Yinghai Lu LKML-Reference: <4B84AAC4.2020204@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/irq.h | 1 - arch/x86/kernel/apic/io_apic.c | 2 +- arch/x86/kernel/irqinit.c | 7 ++----- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 262292729fc4..5458380b6ef8 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -48,6 +48,5 @@ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); extern int vector_used_by_percpu_irq(unsigned int vector); extern void init_ISA_irqs(void); -extern int nr_legacy_irqs; #endif /* _ASM_X86_IRQ_H */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8c848b5877a0..b9d08f0e1e33 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1440,7 +1440,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq * controllers like 8259. Now that IO-APIC can handle this irq, update * the cfg->domain. */ - if (irq < nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) + if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) apic->vector_allocation_domain(0, cfg->domain); if (assign_irq_vector(irq, cfg, apic->target_cpus())) diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index d2f787b3de56..ef257fc2921b 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -99,9 +99,6 @@ int vector_used_by_percpu_irq(unsigned int vector) return 0; } -/* Number of legacy interrupts */ -int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY; - void __init init_ISA_irqs(void) { int i; @@ -114,7 +111,7 @@ void __init init_ISA_irqs(void) /* * 16 old-style INTA-cycle interrupts: */ - for (i = 0; i < NR_IRQS_LEGACY; i++) { + for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) { struct irq_desc *desc = irq_to_desc(i); desc->status = IRQ_DISABLED; @@ -138,7 +135,7 @@ void __init init_IRQ(void) * then this vector space can be freed and re-used dynamically as the * irq's migrate etc. */ - for (i = 0; i < nr_legacy_irqs; i++) + for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; x86_init.irqs.intr_init(); From 9eeeb09edba1e3544526611663472743ca584d36 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 23 Feb 2010 18:49:04 -0800 Subject: [PATCH 21/30] x86, legacy_irq: Remove duplicate vector assigment Remove duplicated cfg[i].vector assignment. Signed-off-by: Yinghai Lu LKML-Reference: <4B8493A0.6080501@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index b9d08f0e1e33..b758d49b811c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -161,8 +161,6 @@ int __init arch_early_irq_init(void) node= cpu_to_node(boot_cpu_id); for (i = 0; i < count; i++) { - if (i < legacy_pic->nr_legacy_irqs) - cfg[i].vector = IRQ0_VECTOR + i; desc = irq_to_desc(i); desc->chip_data = &cfg[i]; zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); From c54113823c777f035fa7444f8841fbccda4a5cc0 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 24 Feb 2010 09:42:50 -0800 Subject: [PATCH 22/30] x86, pci: Add sanity check for PCI fixed bar probing While probing for the PCI fixed BAR capability in the extended PCI configuration space we need to make sure raw_pci_ext_ops is actually initialized. Signed-off-by: Jacob Pan LKML-Reference: <43F901BD926A4E43B106BF17856F0755A321E8F7@orsmsx508.amr.corp.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/pci/mrst.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c index 6e9e1a35a5d7..8bf2fcb88d04 100644 --- a/arch/x86/pci/mrst.c +++ b/arch/x86/pci/mrst.c @@ -57,6 +57,10 @@ static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn) u32 pcie_cap = 0, cap_data; pos = PCIE_CAP_OFFSET; + + if (!raw_pci_ext_ops) + return 0; + while (pos) { if (raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, devfn, pos, 4, &pcie_cap)) From a92d152ef9dd89c578ca2ec7118e9de8fb74a75f Mon Sep 17 00:00:00 2001 From: "Pan, Jacob jun" Date: Wed, 24 Feb 2010 16:59:55 -0800 Subject: [PATCH 23/30] x86, numaq: Make CONFIG_X86_NUMAQ depend on CONFIG_PCI The NUMAQ initialization sets x86_init.pci.init to pci_numaq_init, which obviously isn't defined if CONFIG_PCI isn't defined. This dependency was implicit in the past, because pci_numaq_init was invoked from arch/x86/pci/legacy.c, which itself was conditioned on CONFIG_PCI. I suspect that no NUMA-Q machines without PCI were ever built, so instead of complicating the code by adding #ifdefs or stub functions, just disable this bit of the configuration space. [ hpa: rewrote the checkin comment ] Signed-off-by: Jacob Pan LKML-Reference: <43F901BD926A4E43B106BF17856F0755A321EE1F@orsmsx508.amr.corp.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0ab2dcef7d84..f0322949328e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -425,6 +425,7 @@ config X86_32_NON_STANDARD config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" depends on X86_32_NON_STANDARD + depends on PCI select NUMA select X86_MPPARSE ---help--- From 722a639fd2cec44501c04ae32af57fd822c5a2d5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 24 Feb 2010 12:39:37 -0800 Subject: [PATCH 24/30] x86, pci: Exclude Moorestown PCI code if CONFIG_X86_MRST=n If we don't have any Moorestown CPU support compiled in, we don't need the Moorestown PCI support either. Signed-off-by: Yinghai Lu LKML-Reference: <4B858E89.7040807@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/pci/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 4753ebc19cae..56caf2a18baa 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -13,7 +13,9 @@ obj-$(CONFIG_X86_VISWS) += visws.o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o -obj-y += common.o early.o mrst.o +obj-$(CONFIG_X86_MRST) += mrst.o + +obj-y += common.o early.o obj-y += amd_bus.o obj-$(CONFIG_X86_64) += bus_numa.o From 4b2f3f7d0fe97ad312fb572c8f8d88836ae8fb1e Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 25 Feb 2010 10:02:14 -0800 Subject: [PATCH 25/30] x86, mrst: Add Kconfig dependencies for Moorestown The Moorestown platform requires IOAPIC for all interrupts from the south complex, since there is no legacy PIC. Furthermore, Moorestown I/O requires PCI. Moorestown PCI depends on PCI MMCONFIG and DIRECT method to perform device enumeration, as there is no PCI BIOS. [ hpa: rewrote commit message ] Signed-off-by: Jacob Pan LKML-Reference: <1267120934-9505-1-git-send-email-jacob.jun.pan@linux.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f0322949328e..2697fdb25ac2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -388,8 +388,12 @@ config X86_ELAN config X86_MRST bool "Moorestown MID platform" + depends on PCI + depends on PCI_GOANY + depends on PCI_IOAPIC depends on X86_32 depends on X86_EXTENDED_PLATFORM + depends on X86_IO_APIC select APB_TIMER ---help--- Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin From 4fb6088a5cb3a77123fea1279bf2d5b16cf27648 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Feb 2010 05:38:38 -0800 Subject: [PATCH 26/30] x86, pci: Add arch_init to x86_init abstraction Added an abstraction function for arch specific init calls. Signed-off-by: Jacob Pan Cc: Jesse Barnes LKML-Reference: <43F901BD926A4E43B106BF17856F0755A318CE84@orsmsx508.amr.corp.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/x86_init.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 8ef56f21f9f0..b1d62bbcae3d 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -98,13 +98,15 @@ struct x86_init_iommu { int (*iommu_init)(void); }; - /* - * struct x86_init_pci - platform specific pci init functions - * @init: platform specific pci init +/** + * struct x86_init_pci - platform specific pci init functions + * @arch_init: platform specific pci arch init call + * @init: platform specific pci subsystem init * @init_irq: platform specific pci irq init * @fixup_irqs: platform specific pci irq fixup */ struct x86_init_pci { + int (*arch_init)(void); int (*init)(void); void (*init_irq)(void); void (*fixup_irqs)(void); From d5d0e88c1e5b069aadb050ff6ec95df312de876a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Feb 2010 05:42:04 -0800 Subject: [PATCH 27/30] x86, olpc: Use pci subarch init for OLPC Replace the #ifdef'ed OLPC-specific init functions by a conditional x86_init function. If the function returns 0 we leave pci_arch_init, otherwise we continue. Signed-off-by: Thomas Gleixner Cc: Jesse Barnes Cc: Andres Salomon LKML-Reference: <43F901BD926A4E43B106BF17856F0755A318CE89@orsmsx508.amr.corp.intel.com> Signed-off-by: Jacob Pan Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/olpc.h | 20 ++------------------ arch/x86/include/asm/pci_x86.h | 1 - arch/x86/kernel/olpc.c | 10 +++++++--- arch/x86/pci/init.c | 8 ++++---- arch/x86/pci/olpc.c | 3 --- 5 files changed, 13 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h index 3a57385d9fa7..101229b0d8ed 100644 --- a/arch/x86/include/asm/olpc.h +++ b/arch/x86/include/asm/olpc.h @@ -13,7 +13,6 @@ struct olpc_platform_t { #define OLPC_F_PRESENT 0x01 #define OLPC_F_DCON 0x02 -#define OLPC_F_VSA 0x04 #ifdef CONFIG_OLPC @@ -50,18 +49,6 @@ static inline int olpc_has_dcon(void) return (olpc_platform_info.flags & OLPC_F_DCON) ? 1 : 0; } -/* - * The VSA is software from AMD that typical Geode bioses will include. - * It is used to emulate the PCI bus, VGA, etc. OLPC's Open Firmware does - * not include the VSA; instead, PCI is emulated by the kernel. - * - * The VSA is described further in arch/x86/pci/olpc.c. - */ -static inline int olpc_has_vsa(void) -{ - return (olpc_platform_info.flags & OLPC_F_VSA) ? 1 : 0; -} - /* * The "Mass Production" version of OLPC's XO is identified as being model * C2. During the prototype phase, the following models (in chronological @@ -87,13 +74,10 @@ static inline int olpc_has_dcon(void) return 0; } -static inline int olpc_has_vsa(void) -{ - return 0; -} - #endif +extern int pci_olpc_init(void); + /* EC related functions */ extern int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen, diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 6e69edfbf074..36085518badb 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -104,7 +104,6 @@ extern bool port_cf9_safe; extern int pci_direct_probe(void); extern void pci_direct_init(int type); extern void pci_pcbios_init(void); -extern int pci_olpc_init(void); extern void __init dmi_check_pciprobe(void); extern void __init dmi_check_skip_isa_align(void); diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 9d1d263f786f..8297160c41b3 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c @@ -17,7 +17,9 @@ #include #include #include + #include +#include #include #ifdef CONFIG_OPEN_FIRMWARE @@ -243,9 +245,11 @@ static int __init olpc_init(void) olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, (unsigned char *) &olpc_platform_info.ecver, 1); - /* check to see if the VSA exists */ - if (cs5535_has_vsa2()) - olpc_platform_info.flags |= OLPC_F_VSA; +#ifdef CONFIG_PCI_OLPC + /* If the VSA exists let it emulate PCI, if not emulate in kernel */ + if (!cs5535_has_vsa2()) + x86_init.pci.arch_init = pci_olpc_init; +#endif printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index 25a1f8efed4a..adb62aaa7ecd 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c @@ -1,6 +1,7 @@ #include #include #include +#include /* arch_initcall has too random ordering, so call the initializers in the right sequence from here. */ @@ -15,10 +16,9 @@ static __init int pci_arch_init(void) if (!(pci_probe & PCI_PROBE_NOEARLY)) pci_mmcfg_early_init(); -#ifdef CONFIG_PCI_OLPC - if (!pci_olpc_init()) - return 0; /* skip additional checks if it's an XO */ -#endif + if (x86_init.pci.arch_init && !x86_init.pci.arch_init()) + return 0; + #ifdef CONFIG_PCI_BIOS pci_pcbios_init(); #endif diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c index b889d824f7c6..b34815408f58 100644 --- a/arch/x86/pci/olpc.c +++ b/arch/x86/pci/olpc.c @@ -304,9 +304,6 @@ static struct pci_raw_ops pci_olpc_conf = { int __init pci_olpc_init(void) { - if (!machine_is_olpc() || olpc_has_vsa()) - return -ENODEV; - printk(KERN_INFO "PCI: Using configuration type OLPC\n"); raw_pci_ops = &pci_olpc_conf; is_lx = is_geode_lx(); From d8111cd91abee016d62b401e057fee66ba80be67 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Tue, 2 Mar 2010 09:19:28 -0800 Subject: [PATCH 28/30] x86, mrst: Remove X86_MRST dependency on PCI_IOAPIC PCI_IOAPIC is used for PCI hotplug, Moorestown does not have ACPI PCI hotplug, as it does not have ACPI. This unnecessary dependency causes X86_MRST fail to be selected if ACPI is not selected. Signed-off-by: Jacob Pan LKML-Reference: <1267550368-7435-1-git-send-email-jacob.jun.pan@linux.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2697fdb25ac2..d22686f69de8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -390,7 +390,6 @@ config X86_MRST bool "Moorestown MID platform" depends on PCI depends on PCI_GOANY - depends on PCI_IOAPIC depends on X86_32 depends on X86_EXTENDED_PLATFORM depends on X86_IO_APIC From 3010673ef5f7bef4b4685566a0713de1b4306c93 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Tue, 2 Mar 2010 21:01:34 -0800 Subject: [PATCH 29/30] x86, mrst: Fix APB timer per cpu clockevent The current APB timer code incorrectly registers a static copy of the clockevent device for the boot CPU. The per cpu clockevent should be used instead. This bug was hidden by zero-initialized data; as such it did not get exposed in testing, but was discovered by code review. Signed-off-by: Jacob Pan LKML-Reference: <1267592494-7723-1-git-send-email-jacob.jun.pan@linux.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apb_timer.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 83a345b0256c..6f27f8b75795 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -84,9 +84,10 @@ struct apbt_dev { int disable_apbt_percpu __cpuinitdata; +static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev); + #ifdef CONFIG_SMP static unsigned int apbt_num_timers_used; -static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev); static struct apbt_dev *apbt_devs; #endif @@ -302,6 +303,7 @@ static void apbt_disable_int(int n) static int __init apbt_clockevent_register(void) { struct sfi_timer_table_entry *mtmr; + struct apbt_dev *adev = &__get_cpu_var(cpu_apbt_dev); mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM); if (mtmr == NULL) { @@ -329,22 +331,24 @@ static int __init apbt_clockevent_register(void) * global if not used for per cpu timer. */ apbt_clockevent.cpumask = cpumask_of(smp_processor_id()); + adev->num = smp_processor_id(); + memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device)); if (disable_apbt_percpu) { apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100; - global_clock_event = &apbt_clockevent; + global_clock_event = &adev->evt; printk(KERN_DEBUG "%s clockevent registered as global\n", global_clock_event->name); } if (request_irq(apbt_clockevent.irq, apbt_interrupt_handler, IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, - apbt_clockevent.name, &apbt_clockevent)) { + apbt_clockevent.name, adev)) { printk(KERN_ERR "Failed request IRQ for APBT%d\n", apbt_clockevent.irq); } - clockevents_register_device(&apbt_clockevent); + clockevents_register_device(&adev->evt); /* Start APBT 0 interrupts */ apbt_enable_int(APBT_CLOCKEVENT0_NUM); From c7bbf52aa4fa332b84c4f2bb33e69561ee6870b4 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 3 Mar 2010 13:38:48 -0800 Subject: [PATCH 30/30] x86, mrst: Fix whitespace breakage in apb_timer.c Checkin bb24c4716185f6e116c440462c65c1f56649183b: "Moorestown APB system timer driver" suffered from severe whitespace damage in arch/x86/kernel/apb_timer.c due to using Microsoft Lookout to send a patch. Fix the whitespace breakage. Reported-by: Jacob Pan Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apb_timer.c | 974 ++++++++++++++++++------------------ 1 file changed, 487 insertions(+), 487 deletions(-) diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 6f27f8b75795..2afa27d01297 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -43,11 +43,11 @@ #include #include -#define APBT_MASK CLOCKSOURCE_MASK(32) -#define APBT_SHIFT 22 -#define APBT_CLOCKEVENT_RATING 150 -#define APBT_CLOCKSOURCE_RATING 250 -#define APBT_MIN_DELTA_USEC 200 +#define APBT_MASK CLOCKSOURCE_MASK(32) +#define APBT_SHIFT 22 +#define APBT_CLOCKEVENT_RATING 150 +#define APBT_CLOCKSOURCE_RATING 250 +#define APBT_MIN_DELTA_USEC 200 #define EVT_TO_APBT_DEV(evt) container_of(evt, struct apbt_dev, evt) #define APBT_CLOCKEVENT0_NUM (0) @@ -65,21 +65,21 @@ static int phy_cs_timer_id; static uint64_t apbt_freq; static void apbt_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt); + struct clock_event_device *evt); static int apbt_next_event(unsigned long delta, - struct clock_event_device *evt); + struct clock_event_device *evt); static cycle_t apbt_read_clocksource(struct clocksource *cs); static void apbt_restart_clocksource(void); struct apbt_dev { - struct clock_event_device evt; - unsigned int num; - int cpu; - unsigned int irq; - unsigned int tick; - unsigned int count; - unsigned int flags; - char name[10]; + struct clock_event_device evt; + unsigned int num; + int cpu; + unsigned int irq; + unsigned int tick; + unsigned int count; + unsigned int flags; + char name[10]; }; int disable_apbt_percpu __cpuinitdata; @@ -91,77 +91,77 @@ static unsigned int apbt_num_timers_used; static struct apbt_dev *apbt_devs; #endif -static inline unsigned long apbt_readl_reg(unsigned long a) +static inline unsigned long apbt_readl_reg(unsigned long a) { - return readl(apbt_virt_address + a); + return readl(apbt_virt_address + a); } static inline void apbt_writel_reg(unsigned long d, unsigned long a) { - writel(d, apbt_virt_address + a); + writel(d, apbt_virt_address + a); } static inline unsigned long apbt_readl(int n, unsigned long a) { - return readl(apbt_virt_address + a + n * APBTMRS_REG_SIZE); + return readl(apbt_virt_address + a + n * APBTMRS_REG_SIZE); } static inline void apbt_writel(int n, unsigned long d, unsigned long a) { - writel(d, apbt_virt_address + a + n * APBTMRS_REG_SIZE); + writel(d, apbt_virt_address + a + n * APBTMRS_REG_SIZE); } static inline void apbt_set_mapping(void) { - struct sfi_timer_table_entry *mtmr; + struct sfi_timer_table_entry *mtmr; - if (apbt_virt_address) { - pr_debug("APBT base already mapped\n"); - return; - } - mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM); - if (mtmr == NULL) { - printk(KERN_ERR "Failed to get MTMR %d from SFI\n", - APBT_CLOCKEVENT0_NUM); - return; - } - apbt_address = (unsigned long)mtmr->phys_addr; - if (!apbt_address) { - printk(KERN_WARNING "No timer base from SFI, use default\n"); - apbt_address = APBT_DEFAULT_BASE; - } - apbt_virt_address = ioremap_nocache(apbt_address, APBT_MMAP_SIZE); - if (apbt_virt_address) { - pr_debug("Mapped APBT physical addr %p at virtual addr %p\n",\ - (void *)apbt_address, (void *)apbt_virt_address); - } else { - pr_debug("Failed mapping APBT phy address at %p\n",\ - (void *)apbt_address); - goto panic_noapbt; - } - apbt_freq = mtmr->freq_hz / USEC_PER_SEC; - sfi_free_mtmr(mtmr); + if (apbt_virt_address) { + pr_debug("APBT base already mapped\n"); + return; + } + mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM); + if (mtmr == NULL) { + printk(KERN_ERR "Failed to get MTMR %d from SFI\n", + APBT_CLOCKEVENT0_NUM); + return; + } + apbt_address = (unsigned long)mtmr->phys_addr; + if (!apbt_address) { + printk(KERN_WARNING "No timer base from SFI, use default\n"); + apbt_address = APBT_DEFAULT_BASE; + } + apbt_virt_address = ioremap_nocache(apbt_address, APBT_MMAP_SIZE); + if (apbt_virt_address) { + pr_debug("Mapped APBT physical addr %p at virtual addr %p\n",\ + (void *)apbt_address, (void *)apbt_virt_address); + } else { + pr_debug("Failed mapping APBT phy address at %p\n",\ + (void *)apbt_address); + goto panic_noapbt; + } + apbt_freq = mtmr->freq_hz / USEC_PER_SEC; + sfi_free_mtmr(mtmr); - /* Now figure out the physical timer id for clocksource device */ - mtmr = sfi_get_mtmr(APBT_CLOCKSOURCE_NUM); - if (mtmr == NULL) - goto panic_noapbt; + /* Now figure out the physical timer id for clocksource device */ + mtmr = sfi_get_mtmr(APBT_CLOCKSOURCE_NUM); + if (mtmr == NULL) + goto panic_noapbt; - /* Now figure out the physical timer id */ - phy_cs_timer_id = (unsigned int)(mtmr->phys_addr & 0xff) - / APBTMRS_REG_SIZE; - pr_debug("Use timer %d for clocksource\n", phy_cs_timer_id); - return; + /* Now figure out the physical timer id */ + phy_cs_timer_id = (unsigned int)(mtmr->phys_addr & 0xff) + / APBTMRS_REG_SIZE; + pr_debug("Use timer %d for clocksource\n", phy_cs_timer_id); + return; panic_noapbt: - panic("Failed to setup APB system timer\n"); + panic("Failed to setup APB system timer\n"); } static inline void apbt_clear_mapping(void) { - iounmap(apbt_virt_address); - apbt_virt_address = NULL; + iounmap(apbt_virt_address); + apbt_virt_address = NULL; } /* @@ -169,28 +169,28 @@ static inline void apbt_clear_mapping(void) */ static inline int is_apbt_capable(void) { - return apbt_virt_address ? 1 : 0; + return apbt_virt_address ? 1 : 0; } static struct clocksource clocksource_apbt = { - .name = "apbt", - .rating = APBT_CLOCKSOURCE_RATING, - .read = apbt_read_clocksource, - .mask = APBT_MASK, - .shift = APBT_SHIFT, - .flags = CLOCK_SOURCE_IS_CONTINUOUS, - .resume = apbt_restart_clocksource, + .name = "apbt", + .rating = APBT_CLOCKSOURCE_RATING, + .read = apbt_read_clocksource, + .mask = APBT_MASK, + .shift = APBT_SHIFT, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .resume = apbt_restart_clocksource, }; /* boot APB clock event device */ static struct clock_event_device apbt_clockevent = { - .name = "apbt0", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .set_mode = apbt_set_mode, - .set_next_event = apbt_next_event, - .shift = APBT_SHIFT, - .irq = 0, - .rating = APBT_CLOCKEVENT_RATING, + .name = "apbt0", + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, + .set_mode = apbt_set_mode, + .set_next_event = apbt_next_event, + .shift = APBT_SHIFT, + .irq = 0, + .rating = APBT_CLOCKEVENT_RATING, }; /* @@ -199,20 +199,20 @@ static struct clock_event_device apbt_clockevent = { */ static inline int __init setup_x86_mrst_timer(char *arg) { - if (!arg) - return -EINVAL; + if (!arg) + return -EINVAL; - if (strcmp("apbt_only", arg) == 0) - disable_apbt_percpu = 0; - else if (strcmp("lapic_and_apbt", arg) == 0) - disable_apbt_percpu = 1; - else { - pr_warning("X86 MRST timer option %s not recognised" - " use x86_mrst_timer=apbt_only or lapic_and_apbt\n", - arg); - return -EINVAL; - } - return 0; + if (strcmp("apbt_only", arg) == 0) + disable_apbt_percpu = 0; + else if (strcmp("lapic_and_apbt", arg) == 0) + disable_apbt_percpu = 1; + else { + pr_warning("X86 MRST timer option %s not recognised" + " use x86_mrst_timer=apbt_only or lapic_and_apbt\n", + arg); + return -EINVAL; + } + return 0; } __setup("x86_mrst_timer=", setup_x86_mrst_timer); @@ -222,176 +222,176 @@ __setup("x86_mrst_timer=", setup_x86_mrst_timer); */ static void apbt_start_counter(int n) { - unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); + unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); - ctrl &= ~APBTMR_CONTROL_ENABLE; - apbt_writel(n, ctrl, APBTMR_N_CONTROL); - apbt_writel(n, ~0, APBTMR_N_LOAD_COUNT); - /* enable, mask interrupt */ - ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC; - ctrl |= (APBTMR_CONTROL_ENABLE | APBTMR_CONTROL_INT); - apbt_writel(n, ctrl, APBTMR_N_CONTROL); - /* read it once to get cached counter value initialized */ - apbt_read_clocksource(&clocksource_apbt); + ctrl &= ~APBTMR_CONTROL_ENABLE; + apbt_writel(n, ctrl, APBTMR_N_CONTROL); + apbt_writel(n, ~0, APBTMR_N_LOAD_COUNT); + /* enable, mask interrupt */ + ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC; + ctrl |= (APBTMR_CONTROL_ENABLE | APBTMR_CONTROL_INT); + apbt_writel(n, ctrl, APBTMR_N_CONTROL); + /* read it once to get cached counter value initialized */ + apbt_read_clocksource(&clocksource_apbt); } static irqreturn_t apbt_interrupt_handler(int irq, void *data) { - struct apbt_dev *dev = (struct apbt_dev *)data; - struct clock_event_device *aevt = &dev->evt; + struct apbt_dev *dev = (struct apbt_dev *)data; + struct clock_event_device *aevt = &dev->evt; - if (!aevt->event_handler) { - printk(KERN_INFO "Spurious APBT timer interrupt on %d\n", - dev->num); - return IRQ_NONE; - } - aevt->event_handler(aevt); - return IRQ_HANDLED; + if (!aevt->event_handler) { + printk(KERN_INFO "Spurious APBT timer interrupt on %d\n", + dev->num); + return IRQ_NONE; + } + aevt->event_handler(aevt); + return IRQ_HANDLED; } static void apbt_restart_clocksource(void) { - apbt_start_counter(phy_cs_timer_id); + apbt_start_counter(phy_cs_timer_id); } /* Setup IRQ routing via IOAPIC */ #ifdef CONFIG_SMP static void apbt_setup_irq(struct apbt_dev *adev) { - struct irq_chip *chip; - struct irq_desc *desc; + struct irq_chip *chip; + struct irq_desc *desc; - /* timer0 irq has been setup early */ - if (adev->irq == 0) - return; - desc = irq_to_desc(adev->irq); - chip = get_irq_chip(adev->irq); - disable_irq(adev->irq); - desc->status |= IRQ_MOVE_PCNTXT; - irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); - /* APB timer irqs are set up as mp_irqs, timer is edge triggerred */ - set_irq_chip_and_handler_name(adev->irq, chip, handle_edge_irq, "edge"); - enable_irq(adev->irq); - if (system_state == SYSTEM_BOOTING) - if (request_irq(adev->irq, apbt_interrupt_handler, - IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, - adev->name, adev)) { - printk(KERN_ERR "Failed request IRQ for APBT%d\n", - adev->num); - } + /* timer0 irq has been setup early */ + if (adev->irq == 0) + return; + desc = irq_to_desc(adev->irq); + chip = get_irq_chip(adev->irq); + disable_irq(adev->irq); + desc->status |= IRQ_MOVE_PCNTXT; + irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); + /* APB timer irqs are set up as mp_irqs, timer is edge triggerred */ + set_irq_chip_and_handler_name(adev->irq, chip, handle_edge_irq, "edge"); + enable_irq(adev->irq); + if (system_state == SYSTEM_BOOTING) + if (request_irq(adev->irq, apbt_interrupt_handler, + IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, + adev->name, adev)) { + printk(KERN_ERR "Failed request IRQ for APBT%d\n", + adev->num); + } } #endif static void apbt_enable_int(int n) { - unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); - /* clear pending intr */ - apbt_readl(n, APBTMR_N_EOI); - ctrl &= ~APBTMR_CONTROL_INT; - apbt_writel(n, ctrl, APBTMR_N_CONTROL); + unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); + /* clear pending intr */ + apbt_readl(n, APBTMR_N_EOI); + ctrl &= ~APBTMR_CONTROL_INT; + apbt_writel(n, ctrl, APBTMR_N_CONTROL); } static void apbt_disable_int(int n) { - unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); + unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); - ctrl |= APBTMR_CONTROL_INT; - apbt_writel(n, ctrl, APBTMR_N_CONTROL); + ctrl |= APBTMR_CONTROL_INT; + apbt_writel(n, ctrl, APBTMR_N_CONTROL); } static int __init apbt_clockevent_register(void) { - struct sfi_timer_table_entry *mtmr; - struct apbt_dev *adev = &__get_cpu_var(cpu_apbt_dev); + struct sfi_timer_table_entry *mtmr; + struct apbt_dev *adev = &__get_cpu_var(cpu_apbt_dev); - mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM); - if (mtmr == NULL) { - printk(KERN_ERR "Failed to get MTMR %d from SFI\n", - APBT_CLOCKEVENT0_NUM); - return -ENODEV; - } + mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM); + if (mtmr == NULL) { + printk(KERN_ERR "Failed to get MTMR %d from SFI\n", + APBT_CLOCKEVENT0_NUM); + return -ENODEV; + } - /* - * We need to calculate the scaled math multiplication factor for - * nanosecond to apbt tick conversion. - * mult = (nsec/cycle)*2^APBT_SHIFT - */ - apbt_clockevent.mult = div_sc((unsigned long) mtmr->freq_hz - , NSEC_PER_SEC, APBT_SHIFT); + /* + * We need to calculate the scaled math multiplication factor for + * nanosecond to apbt tick conversion. + * mult = (nsec/cycle)*2^APBT_SHIFT + */ + apbt_clockevent.mult = div_sc((unsigned long) mtmr->freq_hz + , NSEC_PER_SEC, APBT_SHIFT); - /* Calculate the min / max delta */ - apbt_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, - &apbt_clockevent); - apbt_clockevent.min_delta_ns = clockevent_delta2ns( - APBT_MIN_DELTA_USEC*apbt_freq, - &apbt_clockevent); - /* - * Start apbt with the boot cpu mask and make it - * global if not used for per cpu timer. - */ - apbt_clockevent.cpumask = cpumask_of(smp_processor_id()); - adev->num = smp_processor_id(); - memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device)); + /* Calculate the min / max delta */ + apbt_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, + &apbt_clockevent); + apbt_clockevent.min_delta_ns = clockevent_delta2ns( + APBT_MIN_DELTA_USEC*apbt_freq, + &apbt_clockevent); + /* + * Start apbt with the boot cpu mask and make it + * global if not used for per cpu timer. + */ + apbt_clockevent.cpumask = cpumask_of(smp_processor_id()); + adev->num = smp_processor_id(); + memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device)); - if (disable_apbt_percpu) { - apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100; + if (disable_apbt_percpu) { + apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100; global_clock_event = &adev->evt; - printk(KERN_DEBUG "%s clockevent registered as global\n", - global_clock_event->name); - } + printk(KERN_DEBUG "%s clockevent registered as global\n", + global_clock_event->name); + } - if (request_irq(apbt_clockevent.irq, apbt_interrupt_handler, - IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, - apbt_clockevent.name, adev)) { - printk(KERN_ERR "Failed request IRQ for APBT%d\n", - apbt_clockevent.irq); - } + if (request_irq(apbt_clockevent.irq, apbt_interrupt_handler, + IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, + apbt_clockevent.name, adev)) { + printk(KERN_ERR "Failed request IRQ for APBT%d\n", + apbt_clockevent.irq); + } - clockevents_register_device(&adev->evt); - /* Start APBT 0 interrupts */ - apbt_enable_int(APBT_CLOCKEVENT0_NUM); + clockevents_register_device(&adev->evt); + /* Start APBT 0 interrupts */ + apbt_enable_int(APBT_CLOCKEVENT0_NUM); - sfi_free_mtmr(mtmr); - return 0; + sfi_free_mtmr(mtmr); + return 0; } #ifdef CONFIG_SMP /* Should be called with per cpu */ void apbt_setup_secondary_clock(void) { - struct apbt_dev *adev; - struct clock_event_device *aevt; - int cpu; + struct apbt_dev *adev; + struct clock_event_device *aevt; + int cpu; - /* Don't register boot CPU clockevent */ - cpu = smp_processor_id(); - if (cpu == boot_cpu_id) - return; - /* - * We need to calculate the scaled math multiplication factor for - * nanosecond to apbt tick conversion. - * mult = (nsec/cycle)*2^APBT_SHIFT - */ - printk(KERN_INFO "Init per CPU clockevent %d\n", cpu); - adev = &per_cpu(cpu_apbt_dev, cpu); - aevt = &adev->evt; + /* Don't register boot CPU clockevent */ + cpu = smp_processor_id(); + if (cpu == boot_cpu_id) + return; + /* + * We need to calculate the scaled math multiplication factor for + * nanosecond to apbt tick conversion. + * mult = (nsec/cycle)*2^APBT_SHIFT + */ + printk(KERN_INFO "Init per CPU clockevent %d\n", cpu); + adev = &per_cpu(cpu_apbt_dev, cpu); + aevt = &adev->evt; - memcpy(aevt, &apbt_clockevent, sizeof(*aevt)); - aevt->cpumask = cpumask_of(cpu); - aevt->name = adev->name; - aevt->mode = CLOCK_EVT_MODE_UNUSED; + memcpy(aevt, &apbt_clockevent, sizeof(*aevt)); + aevt->cpumask = cpumask_of(cpu); + aevt->name = adev->name; + aevt->mode = CLOCK_EVT_MODE_UNUSED; - printk(KERN_INFO "Registering CPU %d clockevent device %s, mask %08x\n", - cpu, aevt->name, *(u32 *)aevt->cpumask); + printk(KERN_INFO "Registering CPU %d clockevent device %s, mask %08x\n", + cpu, aevt->name, *(u32 *)aevt->cpumask); - apbt_setup_irq(adev); + apbt_setup_irq(adev); - clockevents_register_device(aevt); + clockevents_register_device(aevt); - apbt_enable_int(cpu); + apbt_enable_int(cpu); - return; + return; } /* @@ -405,34 +405,34 @@ void apbt_setup_secondary_clock(void) * the extra interrupt is harmless. */ static int apbt_cpuhp_notify(struct notifier_block *n, - unsigned long action, void *hcpu) + unsigned long action, void *hcpu) { - unsigned long cpu = (unsigned long)hcpu; - struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu); + unsigned long cpu = (unsigned long)hcpu; + struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu); - switch (action & 0xf) { - case CPU_DEAD: - apbt_disable_int(cpu); - if (system_state == SYSTEM_RUNNING) - pr_debug("skipping APBT CPU %lu offline\n", cpu); - else if (adev) { - pr_debug("APBT clockevent for cpu %lu offline\n", cpu); - free_irq(adev->irq, adev); - } - break; - default: - pr_debug(KERN_INFO "APBT notified %lu, no action\n", action); - } - return NOTIFY_OK; + switch (action & 0xf) { + case CPU_DEAD: + apbt_disable_int(cpu); + if (system_state == SYSTEM_RUNNING) + pr_debug("skipping APBT CPU %lu offline\n", cpu); + else if (adev) { + pr_debug("APBT clockevent for cpu %lu offline\n", cpu); + free_irq(adev->irq, adev); + } + break; + default: + pr_debug(KERN_INFO "APBT notified %lu, no action\n", action); + } + return NOTIFY_OK; } static __init int apbt_late_init(void) { - if (disable_apbt_percpu) - return 0; - /* This notifier should be called after workqueue is ready */ - hotcpu_notifier(apbt_cpuhp_notify, -20); - return 0; + if (disable_apbt_percpu) + return 0; + /* This notifier should be called after workqueue is ready */ + hotcpu_notifier(apbt_cpuhp_notify, -20); + return 0; } fs_initcall(apbt_late_init); #else @@ -442,93 +442,93 @@ void apbt_setup_secondary_clock(void) {} #endif /* CONFIG_SMP */ static void apbt_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) + struct clock_event_device *evt) { - unsigned long ctrl; - uint64_t delta; - int timer_num; - struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); + unsigned long ctrl; + uint64_t delta; + int timer_num; + struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); - timer_num = adev->num; - pr_debug("%s CPU %d timer %d mode=%d\n", - __func__, first_cpu(*evt->cpumask), timer_num, mode); + timer_num = adev->num; + pr_debug("%s CPU %d timer %d mode=%d\n", + __func__, first_cpu(*evt->cpumask), timer_num, mode); - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * apbt_clockevent.mult; - delta >>= apbt_clockevent.shift; - ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); - ctrl |= APBTMR_CONTROL_MODE_PERIODIC; - apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); - /* - * DW APB p. 46, have to disable timer before load counter, - * may cause sync problem. - */ - ctrl &= ~APBTMR_CONTROL_ENABLE; - apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); - udelay(1); - pr_debug("Setting clock period %d for HZ %d\n", (int)delta, HZ); - apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT); - ctrl |= APBTMR_CONTROL_ENABLE; - apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); - break; - /* APB timer does not have one-shot mode, use free running mode */ - case CLOCK_EVT_MODE_ONESHOT: - ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); - /* - * set free running mode, this mode will let timer reload max - * timeout which will give time (3min on 25MHz clock) to rearm - * the next event, therefore emulate the one-shot mode. - */ - ctrl &= ~APBTMR_CONTROL_ENABLE; - ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC; + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * apbt_clockevent.mult; + delta >>= apbt_clockevent.shift; + ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); + ctrl |= APBTMR_CONTROL_MODE_PERIODIC; + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + /* + * DW APB p. 46, have to disable timer before load counter, + * may cause sync problem. + */ + ctrl &= ~APBTMR_CONTROL_ENABLE; + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + udelay(1); + pr_debug("Setting clock period %d for HZ %d\n", (int)delta, HZ); + apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT); + ctrl |= APBTMR_CONTROL_ENABLE; + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + break; + /* APB timer does not have one-shot mode, use free running mode */ + case CLOCK_EVT_MODE_ONESHOT: + ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); + /* + * set free running mode, this mode will let timer reload max + * timeout which will give time (3min on 25MHz clock) to rearm + * the next event, therefore emulate the one-shot mode. + */ + ctrl &= ~APBTMR_CONTROL_ENABLE; + ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC; - apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); - /* write again to set free running mode */ - apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + /* write again to set free running mode */ + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); - /* - * DW APB p. 46, load counter with all 1s before starting free - * running mode. - */ - apbt_writel(timer_num, ~0, APBTMR_N_LOAD_COUNT); - ctrl &= ~APBTMR_CONTROL_INT; - ctrl |= APBTMR_CONTROL_ENABLE; - apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); - break; + /* + * DW APB p. 46, load counter with all 1s before starting free + * running mode. + */ + apbt_writel(timer_num, ~0, APBTMR_N_LOAD_COUNT); + ctrl &= ~APBTMR_CONTROL_INT; + ctrl |= APBTMR_CONTROL_ENABLE; + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - apbt_disable_int(timer_num); - ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); - ctrl &= ~APBTMR_CONTROL_ENABLE; - apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); - break; + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + apbt_disable_int(timer_num); + ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); + ctrl &= ~APBTMR_CONTROL_ENABLE; + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + break; - case CLOCK_EVT_MODE_RESUME: - apbt_enable_int(timer_num); - break; - } + case CLOCK_EVT_MODE_RESUME: + apbt_enable_int(timer_num); + break; + } } static int apbt_next_event(unsigned long delta, - struct clock_event_device *evt) + struct clock_event_device *evt) { - unsigned long ctrl; - int timer_num; + unsigned long ctrl; + int timer_num; - struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); + struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); - timer_num = adev->num; - /* Disable timer */ - ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); - ctrl &= ~APBTMR_CONTROL_ENABLE; - apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); - /* write new count */ - apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT); - ctrl |= APBTMR_CONTROL_ENABLE; - apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); - return 0; + timer_num = adev->num; + /* Disable timer */ + ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); + ctrl &= ~APBTMR_CONTROL_ENABLE; + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + /* write new count */ + apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT); + ctrl |= APBTMR_CONTROL_ENABLE; + apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); + return 0; } /* @@ -540,94 +540,94 @@ static int apbt_next_event(unsigned long delta, */ static cycle_t apbt_read_clocksource(struct clocksource *cs) { - unsigned long t0, t1, t2; - static unsigned long last_read; + unsigned long t0, t1, t2; + static unsigned long last_read; bad_count: - t1 = apbt_readl(phy_cs_timer_id, - APBTMR_N_CURRENT_VALUE); - t2 = apbt_readl(phy_cs_timer_id, - APBTMR_N_CURRENT_VALUE); - if (unlikely(t1 < t2)) { - pr_debug("APBT: read current count error %lx:%lx:%lx\n", - t1, t2, t2 - t1); - goto bad_count; - } - /* - * check against cached last read, makes sure time does not go back. - * it could be a normal rollover but we will do tripple check anyway - */ - if (unlikely(t2 > last_read)) { - /* check if we have a normal rollover */ - unsigned long raw_intr_status = - apbt_readl_reg(APBTMRS_RAW_INT_STATUS); - /* - * cs timer interrupt is masked but raw intr bit is set if - * rollover occurs. then we read EOI reg to clear it. - */ - if (raw_intr_status & (1 << phy_cs_timer_id)) { - apbt_readl(phy_cs_timer_id, APBTMR_N_EOI); - goto out; - } - pr_debug("APB CS going back %lx:%lx:%lx ", - t2, last_read, t2 - last_read); + t1 = apbt_readl(phy_cs_timer_id, + APBTMR_N_CURRENT_VALUE); + t2 = apbt_readl(phy_cs_timer_id, + APBTMR_N_CURRENT_VALUE); + if (unlikely(t1 < t2)) { + pr_debug("APBT: read current count error %lx:%lx:%lx\n", + t1, t2, t2 - t1); + goto bad_count; + } + /* + * check against cached last read, makes sure time does not go back. + * it could be a normal rollover but we will do tripple check anyway + */ + if (unlikely(t2 > last_read)) { + /* check if we have a normal rollover */ + unsigned long raw_intr_status = + apbt_readl_reg(APBTMRS_RAW_INT_STATUS); + /* + * cs timer interrupt is masked but raw intr bit is set if + * rollover occurs. then we read EOI reg to clear it. + */ + if (raw_intr_status & (1 << phy_cs_timer_id)) { + apbt_readl(phy_cs_timer_id, APBTMR_N_EOI); + goto out; + } + pr_debug("APB CS going back %lx:%lx:%lx ", + t2, last_read, t2 - last_read); bad_count_x3: - pr_debug(KERN_INFO "tripple check enforced\n"); - t0 = apbt_readl(phy_cs_timer_id, - APBTMR_N_CURRENT_VALUE); - udelay(1); - t1 = apbt_readl(phy_cs_timer_id, - APBTMR_N_CURRENT_VALUE); - udelay(1); - t2 = apbt_readl(phy_cs_timer_id, - APBTMR_N_CURRENT_VALUE); - if ((t2 > t1) || (t1 > t0)) { - printk(KERN_ERR "Error: APB CS tripple check failed\n"); - goto bad_count_x3; - } - } + pr_debug(KERN_INFO "tripple check enforced\n"); + t0 = apbt_readl(phy_cs_timer_id, + APBTMR_N_CURRENT_VALUE); + udelay(1); + t1 = apbt_readl(phy_cs_timer_id, + APBTMR_N_CURRENT_VALUE); + udelay(1); + t2 = apbt_readl(phy_cs_timer_id, + APBTMR_N_CURRENT_VALUE); + if ((t2 > t1) || (t1 > t0)) { + printk(KERN_ERR "Error: APB CS tripple check failed\n"); + goto bad_count_x3; + } + } out: - last_read = t2; - return (cycle_t)~t2; + last_read = t2; + return (cycle_t)~t2; } static int apbt_clocksource_register(void) { - u64 start, now; - cycle_t t1; + u64 start, now; + cycle_t t1; - /* Start the counter, use timer 2 as source, timer 0/1 for event */ - apbt_start_counter(phy_cs_timer_id); + /* Start the counter, use timer 2 as source, timer 0/1 for event */ + apbt_start_counter(phy_cs_timer_id); - /* Verify whether apbt counter works */ - t1 = apbt_read_clocksource(&clocksource_apbt); - rdtscll(start); + /* Verify whether apbt counter works */ + t1 = apbt_read_clocksource(&clocksource_apbt); + rdtscll(start); - /* - * We don't know the TSC frequency yet, but waiting for - * 200000 TSC cycles is safe: - * 4 GHz == 50us - * 1 GHz == 200us - */ - do { - rep_nop(); - rdtscll(now); - } while ((now - start) < 200000UL); + /* + * We don't know the TSC frequency yet, but waiting for + * 200000 TSC cycles is safe: + * 4 GHz == 50us + * 1 GHz == 200us + */ + do { + rep_nop(); + rdtscll(now); + } while ((now - start) < 200000UL); - /* APBT is the only always on clocksource, it has to work! */ - if (t1 == apbt_read_clocksource(&clocksource_apbt)) - panic("APBT counter not counting. APBT disabled\n"); + /* APBT is the only always on clocksource, it has to work! */ + if (t1 == apbt_read_clocksource(&clocksource_apbt)) + panic("APBT counter not counting. APBT disabled\n"); - /* - * initialize and register APBT clocksource - * convert that to ns/clock cycle - * mult = (ns/c) * 2^APBT_SHIFT - */ - clocksource_apbt.mult = div_sc(MSEC_PER_SEC, - (unsigned long) apbt_freq, APBT_SHIFT); - clocksource_register(&clocksource_apbt); + /* + * initialize and register APBT clocksource + * convert that to ns/clock cycle + * mult = (ns/c) * 2^APBT_SHIFT + */ + clocksource_apbt.mult = div_sc(MSEC_PER_SEC, + (unsigned long) apbt_freq, APBT_SHIFT); + clocksource_register(&clocksource_apbt); - return 0; + return 0; } /* @@ -640,145 +640,145 @@ static int apbt_clocksource_register(void) void __init apbt_time_init(void) { #ifdef CONFIG_SMP - int i; - struct sfi_timer_table_entry *p_mtmr; - unsigned int percpu_timer; - struct apbt_dev *adev; + int i; + struct sfi_timer_table_entry *p_mtmr; + unsigned int percpu_timer; + struct apbt_dev *adev; #endif - if (apb_timer_block_enabled) - return; - apbt_set_mapping(); - if (apbt_virt_address) { - pr_debug("Found APBT version 0x%lx\n",\ - apbt_readl_reg(APBTMRS_COMP_VERSION)); - } else - goto out_noapbt; - /* - * Read the frequency and check for a sane value, for ESL model - * we extend the possible clock range to allow time scaling. - */ + if (apb_timer_block_enabled) + return; + apbt_set_mapping(); + if (apbt_virt_address) { + pr_debug("Found APBT version 0x%lx\n",\ + apbt_readl_reg(APBTMRS_COMP_VERSION)); + } else + goto out_noapbt; + /* + * Read the frequency and check for a sane value, for ESL model + * we extend the possible clock range to allow time scaling. + */ - if (apbt_freq < APBT_MIN_FREQ || apbt_freq > APBT_MAX_FREQ) { - pr_debug("APBT has invalid freq 0x%llx\n", apbt_freq); - goto out_noapbt; - } - if (apbt_clocksource_register()) { - pr_debug("APBT has failed to register clocksource\n"); - goto out_noapbt; - } - if (!apbt_clockevent_register()) - apb_timer_block_enabled = 1; - else { - pr_debug("APBT has failed to register clockevent\n"); - goto out_noapbt; - } + if (apbt_freq < APBT_MIN_FREQ || apbt_freq > APBT_MAX_FREQ) { + pr_debug("APBT has invalid freq 0x%llx\n", apbt_freq); + goto out_noapbt; + } + if (apbt_clocksource_register()) { + pr_debug("APBT has failed to register clocksource\n"); + goto out_noapbt; + } + if (!apbt_clockevent_register()) + apb_timer_block_enabled = 1; + else { + pr_debug("APBT has failed to register clockevent\n"); + goto out_noapbt; + } #ifdef CONFIG_SMP - /* kernel cmdline disable apb timer, so we will use lapic timers */ - if (disable_apbt_percpu) { - printk(KERN_INFO "apbt: disabled per cpu timer\n"); - return; - } - pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus()); - if (num_possible_cpus() <= sfi_mtimer_num) { - percpu_timer = 1; - apbt_num_timers_used = num_possible_cpus(); - } else { - percpu_timer = 0; - apbt_num_timers_used = 1; - adev = &per_cpu(cpu_apbt_dev, 0); - adev->flags &= ~APBT_DEV_USED; - } - pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used); + /* kernel cmdline disable apb timer, so we will use lapic timers */ + if (disable_apbt_percpu) { + printk(KERN_INFO "apbt: disabled per cpu timer\n"); + return; + } + pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus()); + if (num_possible_cpus() <= sfi_mtimer_num) { + percpu_timer = 1; + apbt_num_timers_used = num_possible_cpus(); + } else { + percpu_timer = 0; + apbt_num_timers_used = 1; + adev = &per_cpu(cpu_apbt_dev, 0); + adev->flags &= ~APBT_DEV_USED; + } + pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used); - /* here we set up per CPU timer data structure */ - apbt_devs = kzalloc(sizeof(struct apbt_dev) * apbt_num_timers_used, - GFP_KERNEL); - if (!apbt_devs) { - printk(KERN_ERR "Failed to allocate APB timer devices\n"); - return; - } - for (i = 0; i < apbt_num_timers_used; i++) { - adev = &per_cpu(cpu_apbt_dev, i); - adev->num = i; - adev->cpu = i; - p_mtmr = sfi_get_mtmr(i); - if (p_mtmr) { - adev->tick = p_mtmr->freq_hz; - adev->irq = p_mtmr->irq; - } else - printk(KERN_ERR "Failed to get timer for cpu %d\n", i); - adev->count = 0; - sprintf(adev->name, "apbt%d", i); - } + /* here we set up per CPU timer data structure */ + apbt_devs = kzalloc(sizeof(struct apbt_dev) * apbt_num_timers_used, + GFP_KERNEL); + if (!apbt_devs) { + printk(KERN_ERR "Failed to allocate APB timer devices\n"); + return; + } + for (i = 0; i < apbt_num_timers_used; i++) { + adev = &per_cpu(cpu_apbt_dev, i); + adev->num = i; + adev->cpu = i; + p_mtmr = sfi_get_mtmr(i); + if (p_mtmr) { + adev->tick = p_mtmr->freq_hz; + adev->irq = p_mtmr->irq; + } else + printk(KERN_ERR "Failed to get timer for cpu %d\n", i); + adev->count = 0; + sprintf(adev->name, "apbt%d", i); + } #endif - return; + return; out_noapbt: - apbt_clear_mapping(); - apb_timer_block_enabled = 0; - panic("failed to enable APB timer\n"); + apbt_clear_mapping(); + apb_timer_block_enabled = 0; + panic("failed to enable APB timer\n"); } static inline void apbt_disable(int n) { - if (is_apbt_capable()) { - unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); - ctrl &= ~APBTMR_CONTROL_ENABLE; - apbt_writel(n, ctrl, APBTMR_N_CONTROL); - } + if (is_apbt_capable()) { + unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); + ctrl &= ~APBTMR_CONTROL_ENABLE; + apbt_writel(n, ctrl, APBTMR_N_CONTROL); + } } /* called before apb_timer_enable, use early map */ unsigned long apbt_quick_calibrate() { - int i, scale; - u64 old, new; - cycle_t t1, t2; - unsigned long khz = 0; - u32 loop, shift; + int i, scale; + u64 old, new; + cycle_t t1, t2; + unsigned long khz = 0; + u32 loop, shift; - apbt_set_mapping(); - apbt_start_counter(phy_cs_timer_id); + apbt_set_mapping(); + apbt_start_counter(phy_cs_timer_id); - /* check if the timer can count down, otherwise return */ - old = apbt_read_clocksource(&clocksource_apbt); - i = 10000; - while (--i) { - if (old != apbt_read_clocksource(&clocksource_apbt)) - break; - } - if (!i) - goto failed; + /* check if the timer can count down, otherwise return */ + old = apbt_read_clocksource(&clocksource_apbt); + i = 10000; + while (--i) { + if (old != apbt_read_clocksource(&clocksource_apbt)) + break; + } + if (!i) + goto failed; - /* count 16 ms */ - loop = (apbt_freq * 1000) << 4; + /* count 16 ms */ + loop = (apbt_freq * 1000) << 4; - /* restart the timer to ensure it won't get to 0 in the calibration */ - apbt_start_counter(phy_cs_timer_id); + /* restart the timer to ensure it won't get to 0 in the calibration */ + apbt_start_counter(phy_cs_timer_id); - old = apbt_read_clocksource(&clocksource_apbt); - old += loop; + old = apbt_read_clocksource(&clocksource_apbt); + old += loop; - t1 = __native_read_tsc(); + t1 = __native_read_tsc(); - do { - new = apbt_read_clocksource(&clocksource_apbt); - } while (new < old); + do { + new = apbt_read_clocksource(&clocksource_apbt); + } while (new < old); - t2 = __native_read_tsc(); + t2 = __native_read_tsc(); - shift = 5; - if (unlikely(loop >> shift == 0)) { - printk(KERN_INFO - "APBT TSC calibration failed, not enough resolution\n"); - return 0; - } - scale = (int)div_u64((t2 - t1), loop >> shift); - khz = (scale * apbt_freq * 1000) >> shift; - printk(KERN_INFO "TSC freq calculated by APB timer is %lu khz\n", khz); - return khz; + shift = 5; + if (unlikely(loop >> shift == 0)) { + printk(KERN_INFO + "APBT TSC calibration failed, not enough resolution\n"); + return 0; + } + scale = (int)div_u64((t2 - t1), loop >> shift); + khz = (scale * apbt_freq * 1000) >> shift; + printk(KERN_INFO "TSC freq calculated by APB timer is %lu khz\n", khz); + return khz; failed: - return 0; + return 0; }