diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index 4303e0c12476..74b77f9e91bc 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -199,6 +199,11 @@ IOMMU allowed overwrite iommu off workarounds for specific chipsets. soft Use software bounce buffering (default for Intel machines) noaperture Don't touch the aperture for AGP. + allowdac Allow DMA >4GB + When off all DMA over >4GB is forced through an IOMMU or bounce + buffering. + nodac Forbid DMA >4GB + panic Always panic when IOMMU overflows swiotlb=pages[,force] diff --git a/arch/i386/defconfig b/arch/i386/defconfig index 1a29bfa26d0c..ee2d79bd8af7 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.18-git5 -# Tue Sep 26 09:30:47 2006 +# Linux kernel version: 2.6.18-git7 +# Wed Sep 27 21:53:10 2006 # CONFIG_X86_32=y CONFIG_GENERIC_TIME=y @@ -210,6 +210,7 @@ CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_PM=y CONFIG_PM_LEGACY=y # CONFIG_PM_DEBUG is not set +CONFIG_PM_SYSFS_DEPRECATED=y # # ACPI (Advanced Configuration and Power Interface) Support @@ -292,6 +293,7 @@ CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y # CONFIG_PCIEPORTBUS is not set CONFIG_PCI_MSI=y +# CONFIG_PCI_MULTITHREAD_PROBE is not set # CONFIG_PCI_DEBUG is not set CONFIG_ISA_DMA_API=y # CONFIG_ISA is not set @@ -1427,6 +1429,7 @@ CONFIG_KPROBES=y # CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y CONFIG_UNUSED_SYMBOLS=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index dbda706fdd14..0fc4997fb143 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -31,6 +31,9 @@ #include "mach_traps.h" +int unknown_nmi_panic; +int nmi_watchdog_enabled; + /* perfctr_nmi_owner tracks the ownership of the perfctr registers: * evtsel_nmi_owner tracks the ownership of the event selection * - different performance counters/ event selection may be reserved for diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 814cdebf7377..000cf03751fe 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -209,9 +209,6 @@ static struct resource adapter_rom_resources[] = { { .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM } }; -#define ADAPTER_ROM_RESOURCES \ - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0]) - static struct resource video_rom_resource = { .name = "Video ROM", .start = 0xc0000, @@ -273,9 +270,6 @@ static struct resource standard_io_resources[] = { { .flags = IORESOURCE_BUSY | IORESOURCE_IO } }; -#define STANDARD_IO_RESOURCES \ - (sizeof standard_io_resources / sizeof standard_io_resources[0]) - #define romsignature(x) (*(unsigned short *)(x) == 0xaa55) static int __init romchecksum(unsigned char *rom, unsigned long length) @@ -332,7 +326,7 @@ static void __init probe_roms(void) } /* check for adapter roms on 2k boundaries */ - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) { + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { rom = isa_bus_to_virt(start); if (!romsignature(rom)) continue; @@ -1272,7 +1266,7 @@ static int __init request_standard_resources(void) request_resource(&iomem_resource, &video_ram_resource); /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < STANDARD_IO_RESOURCES; i++) + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) request_resource(&ioport_resource, &standard_io_resources[i]); return 0; } diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index a13037fe0ee3..6820b8d643c7 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -57,6 +57,8 @@ #include "mach_traps.h" +int panic_on_unrecovered_nmi; + asmlinkage int system_call(void); struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index 647610ecb580..4844b543bed0 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -1,11 +1,12 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.18-git5 -# Tue Sep 26 09:30:47 2006 +# Linux kernel version: 2.6.18-git7 +# Wed Sep 27 21:53:10 2006 # CONFIG_X86_64=y CONFIG_64BIT=y CONFIG_X86=y +CONFIG_ZONE_DMA32=y CONFIG_LOCKDEP_SUPPORT=y CONFIG_STACKTRACE_SUPPORT=y CONFIG_SEMAPHORE_SLEEPERS=y @@ -179,6 +180,7 @@ CONFIG_GENERIC_PENDING_IRQ=y CONFIG_PM=y # CONFIG_PM_LEGACY is not set # CONFIG_PM_DEBUG is not set +# CONFIG_PM_SYSFS_DEPRECATED is not set CONFIG_SOFTWARE_SUSPEND=y CONFIG_PM_STD_PARTITION="" CONFIG_SUSPEND_SMP=y @@ -251,6 +253,7 @@ CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_MSI=y +# CONFIG_PCI_MULTITHREAD_PROBE is not set # CONFIG_PCI_DEBUG is not set # @@ -1458,6 +1461,7 @@ CONFIG_KPROBES=y # CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y CONFIG_UNUSED_SYMBOLS=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 20e88f4b564b..b8d53dfa9931 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -152,6 +152,21 @@ static void __init MP_bus_info (struct mpc_config_bus *m) } } +static int bad_ioapic(unsigned long address) +{ + if (nr_ioapics >= MAX_IO_APICS) { + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " + "(found %d)\n", MAX_IO_APICS, nr_ioapics); + panic("Recompile kernel with bigger MAX_IO_APICS!\n"); + } + if (!address) { + printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" + " found in table, skipping!\n"); + return 1; + } + return 0; +} + static void __init MP_ioapic_info (struct mpc_config_ioapic *m) { if (!(m->mpc_flags & MPC_APIC_USABLE)) @@ -159,16 +174,10 @@ static void __init MP_ioapic_info (struct mpc_config_ioapic *m) printk("I/O APIC #%d at 0x%X.\n", m->mpc_apicid, m->mpc_apicaddr); - if (nr_ioapics >= MAX_IO_APICS) { - printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n", - MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); - } - if (!m->mpc_apicaddr) { - printk(KERN_ERR "WARNING: bogus zero I/O APIC address" - " found in MP table, skipping!\n"); + + if (bad_ioapic(m->mpc_apicaddr)) return; - } + mp_ioapics[nr_ioapics] = *m; nr_ioapics++; } @@ -647,16 +656,8 @@ void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) { int idx = 0; - if (nr_ioapics >= MAX_IO_APICS) { - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " - "(found %d)\n", MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!\n"); - } - if (!address) { - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" - " found in MADT table, skipping!\n"); + if (bad_ioapic(address)) return; - } idx = nr_ioapics++; diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 4d6fb047952e..7af9cb3e2d99 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -28,6 +28,10 @@ #include #include +int unknown_nmi_panic; +int nmi_watchdog_enabled; +int panic_on_unrecovered_nmi; + /* perfctr_nmi_owner tracks the ownership of the perfctr registers: * evtsel_nmi_owner tracks the ownership of the event selection * - different performance counters/ event selection may be reserved for diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index 4dcb671bd19f..f8d857453f8a 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c @@ -170,8 +170,20 @@ void dma_free_coherent(struct device *dev, size_t size, } EXPORT_SYMBOL(dma_free_coherent); +static int forbid_dac __read_mostly; + int dma_supported(struct device *dev, u64 mask) { +#ifdef CONFIG_PCI + if (mask > 0xffffffff && forbid_dac > 0) { + + + + printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", dev->bus_id); + return 0; + } +#endif + if (dma_ops->dma_supported) return dma_ops->dma_supported(dev, mask); @@ -231,57 +243,64 @@ EXPORT_SYMBOL(dma_set_mask); allowed overwrite iommu off workarounds for specific chipsets. soft Use software bounce buffering (default for Intel machines) noaperture Don't touch the aperture for AGP. + allowdac Allow DMA >4GB + nodac Forbid DMA >4GB + panic Force panic when IOMMU overflows */ __init int iommu_setup(char *p) { - iommu_merge = 1; + iommu_merge = 1; if (!p) return -EINVAL; - while (*p) { - if (!strncmp(p,"off",3)) - no_iommu = 1; - /* gart_parse_options has more force support */ - if (!strncmp(p,"force",5)) - force_iommu = 1; - if (!strncmp(p,"noforce",7)) { - iommu_merge = 0; - force_iommu = 0; - } + while (*p) { + if (!strncmp(p,"off",3)) + no_iommu = 1; + /* gart_parse_options has more force support */ + if (!strncmp(p,"force",5)) + force_iommu = 1; + if (!strncmp(p,"noforce",7)) { + iommu_merge = 0; + force_iommu = 0; + } - if (!strncmp(p, "biomerge",8)) { - iommu_bio_merge = 4096; - iommu_merge = 1; - force_iommu = 1; - } - if (!strncmp(p, "panic",5)) - panic_on_overflow = 1; - if (!strncmp(p, "nopanic",7)) - panic_on_overflow = 0; - if (!strncmp(p, "merge",5)) { - iommu_merge = 1; - force_iommu = 1; - } - if (!strncmp(p, "nomerge",7)) - iommu_merge = 0; - if (!strncmp(p, "forcesac",8)) - iommu_sac_force = 1; + if (!strncmp(p, "biomerge",8)) { + iommu_bio_merge = 4096; + iommu_merge = 1; + force_iommu = 1; + } + if (!strncmp(p, "panic",5)) + panic_on_overflow = 1; + if (!strncmp(p, "nopanic",7)) + panic_on_overflow = 0; + if (!strncmp(p, "merge",5)) { + iommu_merge = 1; + force_iommu = 1; + } + if (!strncmp(p, "nomerge",7)) + iommu_merge = 0; + if (!strncmp(p, "forcesac",8)) + iommu_sac_force = 1; + if (!strncmp(p, "allowdac", 8)) + forbid_dac = 0; + if (!strncmp(p, "nodac", 5)) + forbid_dac = -1; #ifdef CONFIG_SWIOTLB - if (!strncmp(p, "soft",4)) - swiotlb = 1; + if (!strncmp(p, "soft",4)) + swiotlb = 1; #endif #ifdef CONFIG_IOMMU - gart_parse_options(p); + gart_parse_options(p); #endif - p += strcspn(p, ","); - if (*p == ',') - ++p; - } - return 0; + p += strcspn(p, ","); + if (*p == ',') + ++p; + } + return 0; } early_param("iommu", iommu_setup); diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 0b00bb2ea576..fc944b5e8f4a 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -123,9 +123,6 @@ struct resource standard_io_resources[] = { .flags = IORESOURCE_BUSY | IORESOURCE_IO } }; -#define STANDARD_IO_RESOURCES \ - (sizeof standard_io_resources / sizeof standard_io_resources[0]) - #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM) struct resource data_resource = { @@ -172,9 +169,6 @@ static struct resource adapter_rom_resources[] = { .flags = IORESOURCE_ROM } }; -#define ADAPTER_ROM_RESOURCES \ - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0]) - static struct resource video_rom_resource = { .name = "Video ROM", .start = 0xc0000, @@ -245,7 +239,8 @@ static void __init probe_roms(void) } /* check for adapter roms on 2k boundaries */ - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) { + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; + start += 2048) { rom = isa_bus_to_virt(start); if (!romsignature(rom)) continue; @@ -537,7 +532,7 @@ void __init setup_arch(char **cmdline_p) { unsigned i; /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < STANDARD_IO_RESOURCES; i++) + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) request_resource(&ioport_resource, &standard_io_resources[i]); } diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index d0564f1bcb0b..f8aeccf105fa 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -67,13 +67,6 @@ SECTIONS _edata = .; /* End of data section */ - __bss_start = .; /* BSS */ - .bss : AT(ADDR(.bss) - LOAD_OFFSET) { - *(.bss.page_aligned) - *(.bss) - } - __bss_stop = .; - . = ALIGN(PAGE_SIZE); . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { @@ -229,6 +222,13 @@ SECTIONS . = ALIGN(4096); __nosave_end = .; + __bss_start = .; /* BSS */ + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { + *(.bss.page_aligned) + *(.bss) + } + __bss_stop = .; + _end = . ; /* Sections to be discarded */ diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index ac48c3857ddb..07c086382059 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -155,8 +155,8 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) We do this here because otherwise user space would do it on its own in a likely inferior way (no access to jiffies). If you don't like it pass NULL. */ - if (tcache && tcache->t0 == (j = __jiffies)) { - p = tcache->t1; + if (tcache && tcache->blob[0] == (j = __jiffies)) { + p = tcache->blob[1]; } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { /* Load per CPU data from RDTSCP */ rdtscp(dummy, dummy, p); @@ -165,8 +165,8 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); } if (tcache) { - tcache->t0 = j; - tcache->t1 = p; + tcache->blob[0] = j; + tcache->blob[1] = p; } if (cpu) *cpu = p & 0xfff; diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h index 303bcd4592bb..269d315719ca 100644 --- a/include/asm-i386/nmi.h +++ b/include/asm-i386/nmi.h @@ -36,4 +36,10 @@ extern unsigned int nmi_watchdog; #define NMI_LOCAL_APIC 2 #define NMI_INVALID 3 +struct ctl_table; +struct file; +extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, + void __user *, size_t *, loff_t *); +extern int unknown_nmi_panic; + #endif /* ASM_NMI_H */ diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h index cbf2669bca71..f367d4014b42 100644 --- a/include/asm-x86_64/nmi.h +++ b/include/asm-x86_64/nmi.h @@ -70,4 +70,11 @@ extern unsigned int nmi_watchdog; #define NMI_LOCAL_APIC 2 #define NMI_INVALID 3 +struct ctl_table; +struct file; +extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, + void __user *, size_t *, loff_t *); + +extern int unknown_nmi_panic; + #endif /* ASM_NMI_H */ diff --git a/include/asm-x86_64/semaphore.h b/include/asm-x86_64/semaphore.h index 107bd90429e8..1194888536b9 100644 --- a/include/asm-x86_64/semaphore.h +++ b/include/asm-x86_64/semaphore.h @@ -132,7 +132,7 @@ static inline int down_interruptible(struct semaphore * sem) "jns 2f\n\t" "call __down_failed_interruptible\n" "2:\n" - :"=a" (result), "=m" (sem->count) + :"=&a" (result), "=m" (sem->count) :"D" (sem) :"memory"); return result; @@ -153,7 +153,7 @@ static inline int down_trylock(struct semaphore * sem) "jns 2f\n\t" "call __down_failed_trylock\n\t" "2:\n" - :"=a" (result), "=m" (sem->count) + :"=&a" (result), "=m" (sem->count) :"D" (sem) :"memory","cc"); return result; diff --git a/include/asm-x86_64/uaccess.h b/include/asm-x86_64/uaccess.h index e856570c0689..19f99178fe83 100644 --- a/include/asm-x86_64/uaccess.h +++ b/include/asm-x86_64/uaccess.h @@ -361,6 +361,11 @@ __must_check unsigned long clear_user(void __user *mem, unsigned long len); __must_check unsigned long __clear_user(void __user *mem, unsigned long len); __must_check long __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size); -#define __copy_to_user_inatomic copy_user_generic + +static __must_check __always_inline int +__copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) +{ + return copy_user_generic((__force void *)dst, src, size); +} #endif /* __X86_64_UACCESS_H */ diff --git a/include/linux/getcpu.h b/include/linux/getcpu.h index 031ed3780e45..c7372d7a97be 100644 --- a/include/linux/getcpu.h +++ b/include/linux/getcpu.h @@ -1,16 +1,18 @@ #ifndef _LINUX_GETCPU_H #define _LINUX_GETCPU_H 1 -/* Cache for getcpu() to speed it up. Results might be upto a jiffie +/* Cache for getcpu() to speed it up. Results might be a short time out of date, but will be faster. + User programs should not refer to the contents of this structure. - It is only a cache for vgetcpu(). It might change in future kernels. + I repeat they should not refer to it. If they do they will break + in future kernels. + + It is only a private cache for vgetcpu(). It will change in future kernels. The user program must store this information per thread (__thread) If you want 100% accurate information pass NULL instead. */ struct getcpu_cache { - unsigned long t0; - unsigned long t1; - unsigned long res[4]; + unsigned long blob[128 / sizeof(long)]; }; #endif diff --git a/kernel/panic.c b/kernel/panic.c index 6ceb664fb52a..525e365f7239 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -21,7 +21,6 @@ #include int panic_on_oops; -int panic_on_unrecovered_nmi; int tainted; static int pause_on_oops; static int pause_on_oops_flag; diff --git a/kernel/sys.c b/kernel/sys.c index 8647061c084a..b88806c66244 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2083,12 +2083,12 @@ asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, * padding */ unsigned long t0, t1; - get_user(t0, &cache->t0); - get_user(t1, &cache->t1); + get_user(t0, &cache->blob[0]); + get_user(t1, &cache->blob[1]); t0++; t1++; - put_user(t0, &cache->t0); - put_user(t1, &cache->t1); + put_user(t0, &cache->blob[0]); + put_user(t1, &cache->blob[1]); } return err ? -EFAULT : 0; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9535a3839930..c57c4532e296 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -52,6 +52,10 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); +#ifdef CONFIG_X86 +#include +#endif + #if defined(CONFIG_SYSCTL) /* External variables not in a header file. */ @@ -74,13 +78,6 @@ extern int sysctl_drop_caches; extern int percpu_pagelist_fraction; extern int compat_log; -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) -int unknown_nmi_panic; -int nmi_watchdog_enabled; -extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, - void __user *, size_t *, loff_t *); -#endif - /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; static int minolduid;