linux/include/asm-x86
Nick Piggin b6c7347fff x86: optimise barriers
According to latest memory ordering specification documents from Intel
and AMD, both manufacturers are committed to in-order loads from
cacheable memory for the x86 architecture.  Hence, smp_rmb() may be a
simple barrier.

Also according to those documents, and according to existing practice in
Linux (eg.  spin_unlock doesn't enforce ordering), stores to cacheable
memory are visible in program order too.  Special string stores are safe
-- their constituent stores may be out of order, but they must complete
in order WRT surrounding stores.  Nontemporal stores to WB memory can go
out of order, and so they should be fenced explicitly to make them
appear in-order WRT other stores.  Hence, smp_wmb() may be a simple
barrier.

    http://developer.intel.com/products/processor/manuals/318147.pdf
    http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/24593.pdf

In userspace microbenchmarks on a core2 system, fence instructions range
anywhere from around 15 cycles to 50, which may not be totally
insignificant in performance critical paths (code size will go down
too).

However the primary motivation for this is to have the canonical barrier
implementation for x86 architecture.

smp_rmb on buggy pentium pros remains a locked op, which is apparently
required.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-12 18:41:21 -07:00
..
mach-bigsmp
mach-default
mach-es7000
mach-generic
mach-numaq
mach-summit
mach-visws
mach-voyager
xen
a.out_32.h
a.out_64.h
a.out.h
acpi_32.h
acpi_64.h
acpi.h
agp_32.h
agp_64.h
agp.h
alternative_32.h
alternative_64.h
alternative-asm_32.i
alternative-asm_64.i
alternative-asm.i
alternative.h
apic_32.h
apic_64.h x86_64: remove now unused code 2007-10-12 23:04:23 +02:00
apic.h
apicdef_32.h
apicdef_64.h
apicdef.h
arch_hooks.h
atomic_32.h
atomic_64.h
atomic.h
auxvec_32.h
auxvec_64.h
auxvec.h
bitops_32.h
bitops_64.h
bitops.h
boot.h
bootparam.h
bootsetup.h
bug_32.h
bug_64.h
bug.h
bugs_32.h
bugs_64.h
bugs.h
byteorder_32.h
byteorder_64.h
byteorder.h
cache_32.h
cache_64.h
cache.h
cacheflush_32.h
cacheflush_64.h
cacheflush.h
calgary.h
calling.h
checksum_32.h
checksum_64.h
checksum.h
cmpxchg_32.h
cmpxchg_64.h
cmpxchg.h
compat.h
cpu.h
cpufeature_32.h
cpufeature_64.h
cpufeature.h
cputime_32.h
cputime_64.h
cputime.h
current_32.h
current_64.h
current.h
debugreg_32.h
debugreg_64.h
debugreg.h
delay_32.h
delay_64.h
delay.h
desc_32.h
desc_64.h
desc_defs.h
desc.h
device_32.h
device_64.h
device.h
div64_32.h
div64_64.h
div64.h
dma_32.h
dma_64.h
dma-mapping_32.h
dma-mapping_64.h
dma-mapping.h
dma.h
dmi_32.h
dmi_64.h
dmi.h
dwarf2_32.h
dwarf2_64.h
dwarf2.h
e820_32.h
e820_64.h
e820.h
edac_32.h
edac_64.h
edac.h
elf_32.h
elf_64.h
elf.h
emergency-restart.h
errno_32.h
errno_64.h
errno.h
fb_32.h
fb_64.h
fb.h
fcntl.h
fixmap_32.h
fixmap_64.h
fixmap.h
floppy_32.h
floppy_64.h
floppy.h
fpu32.h
frame.i
futex_32.h
futex_64.h
futex.h
genapic_32.h
genapic_64.h
genapic.h
geode.h x86: Geode Multi-Function General Purpose Timers support 2007-10-12 23:04:06 +02:00
hardirq_32.h
hardirq_64.h
hardirq.h
highmem.h
hpet.h x86: HPET force enable for ICH5 2007-10-12 23:04:24 +02:00
hw_irq_32.h
hw_irq_64.h
hw_irq.h
hypertransport.h
i387_32.h
i387_64.h
i387.h
i8253.h i386: Remove the useless #ifdef in i8253.h 2007-10-12 23:04:23 +02:00
i8259.h
ia32_unistd.h
ia32.h
ide.h
idle.h
intel_arch_perfmon_32.h
intel_arch_perfmon_64.h
intel_arch_perfmon.h
io_32.h
io_64.h
io_apic_32.h
io_apic_64.h
io_apic.h
io.h
ioctl.h
ioctls_32.h
ioctls_64.h
ioctls.h
iommu.h
ipc.h
ipcbuf_32.h
ipcbuf_64.h
ipcbuf.h
ipi.h
irq_32.h
irq_64.h
irq_regs_32.h
irq_regs_64.h
irq_regs.h
irq.h
irqflags_32.h
irqflags_64.h
irqflags.h
ist.h
k8.h
Kbuild
kdebug_32.h
kdebug_64.h
kdebug.h
kexec_32.h
kexec_64.h
kexec.h
kmap_types_32.h
kmap_types_64.h
kmap_types.h
kprobes_32.h
kprobes_64.h
kprobes.h
ldt_32.h
ldt_64.h
ldt.h
linkage_32.h
linkage_64.h
linkage.h
local_32.h
local_64.h
local.h
mach_apic.h
math_emu.h
mc146818rtc_32.h
mc146818rtc_64.h
mc146818rtc.h
mca_dma.h
mca.h
mce_32.h
mce_64.h
mce.h
mman_32.h
mman_64.h
mman.h
mmsegment.h
mmu_32.h
mmu_64.h
mmu_context_32.h
mmu_context_64.h
mmu_context.h
mmu.h
mmx.h
mmzone_32.h
mmzone_64.h
mmzone.h
module_32.h
module_64.h
module.h
mpspec_32.h
mpspec_64.h
mpspec_def.h
mpspec.h
msgbuf_32.h
msgbuf_64.h
msgbuf.h
msidef.h
msr_32.h
msr_64.h
msr-index.h
msr.h
mtrr_32.h
mtrr_64.h
mtrr.h
mutex_32.h
mutex_64.h
mutex.h
namei_32.h
namei_64.h
namei.h
nmi_32.h
nmi_64.h
nmi.h
numa_32.h
numa_64.h
numa.h
numaq.h
page_32.h
page_64.h
page.h
param_32.h
param_64.h
param.h
paravirt.h
parport_32.h
parport_64.h
parport.h
pci_32.h PCI: merge almost all of pci_32.h and pci_64.h together 2007-10-12 15:03:20 -07:00
pci_64.h PCI: merge almost all of pci_32.h and pci_64.h together 2007-10-12 15:03:20 -07:00
pci-direct.h
pci.h PCI: merge almost all of pci_32.h and pci_64.h together 2007-10-12 15:03:20 -07:00
pda.h x86: Fix irq0 / local apic timer accounting 2007-10-12 23:04:07 +02:00
percpu_32.h
percpu_64.h
percpu.h
pgalloc_32.h
pgalloc_64.h
pgalloc.h
pgtable_32.h
pgtable_64.h
pgtable-2level-defs.h
pgtable-2level.h
pgtable-3level-defs.h
pgtable-3level.h
pgtable.h
poll.h
posix_types_32.h
posix_types_64.h
posix_types.h
prctl.h
processor_32.h
processor_64.h
processor-cyrix.h
processor-flags.h
processor.h
proto.h x86_64: remove now unused code 2007-10-12 23:04:23 +02:00
ptrace_32.h
ptrace_64.h
ptrace-abi_32.h
ptrace-abi_64.h
ptrace-abi.h
ptrace.h
reboot_fixups.h
reboot.h
required-features_32.h
required-features_64.h
required-features.h
resource_32.h
resource_64.h
resource.h
resume-trace_32.h
resume-trace_64.h
resume-trace.h
rio.h
rtc_32.h
rtc_64.h
rtc.h
rwlock_32.h
rwlock_64.h
rwlock.h
rwsem.h
scatterlist_32.h
scatterlist_64.h
scatterlist.h
seccomp_32.h
seccomp_64.h
seccomp.h
sections_32.h
sections_64.h
sections.h
segment_32.h
segment_64.h
segment.h
semaphore_32.h
semaphore_64.h
semaphore.h
sembuf_32.h
sembuf_64.h
sembuf.h
serial_32.h
serial_64.h
serial.h
setup_32.h
setup_64.h
setup.h
shmbuf_32.h
shmbuf_64.h
shmbuf.h
shmparam_32.h
shmparam_64.h
shmparam.h
sigcontext32.h
sigcontext_32.h
sigcontext_64.h
sigcontext.h
siginfo_32.h
siginfo_64.h
siginfo.h
signal_32.h
signal_64.h
signal.h
smp_32.h
smp_64.h
smp.h
socket.h
sockios_32.h
sockios_64.h
sockios.h
sparsemem_32.h
sparsemem_64.h
sparsemem.h
spinlock_32.h
spinlock_64.h
spinlock_types.h
spinlock.h
srat.h
stacktrace.h
stat_32.h
stat_64.h
stat.h
statfs_32.h
statfs_64.h
statfs.h
string_32.h
string_64.h
string.h
suspend_32.h
suspend_64.h
suspend.h
swiotlb.h
sync_bitops.h
system_32.h x86: optimise barriers 2007-10-12 18:41:21 -07:00
system_64.h x86: optimise barriers 2007-10-12 18:41:21 -07:00
system.h
tce.h
termbits_32.h
termbits_64.h
termbits.h
termios_32.h
termios_64.h
termios.h
therm_throt.h
thread_info_32.h
thread_info_64.h
thread_info.h
time.h
timer.h
timex.h x86: unify timex.h variants 2007-10-12 23:04:23 +02:00
tlb_32.h
tlb_64.h
tlb.h
tlbflush_32.h
tlbflush_64.h
tlbflush.h
topology_32.h
topology_64.h
topology.h
tsc.h x86: unify timex.h variants 2007-10-12 23:04:23 +02:00
types_32.h
types_64.h
types.h
uaccess_32.h
uaccess_64.h
uaccess.h
ucontext_32.h
ucontext_64.h
ucontext.h
unaligned_32.h
unaligned_64.h
unaligned.h
unistd_32.h
unistd_64.h
unistd.h
unwind_32.h
unwind_64.h
unwind.h
user32.h
user_32.h
user_64.h
user.h
vga.h
vgtod.h
vic.h
vm86.h
vmi_time.h
vmi.h
voyager.h
vsyscall32.h
vsyscall.h i386/x8664: cleanup the shared hpet code 2007-10-12 23:04:23 +02:00
xor_32.h
xor_64.h
xor.h