linux/include/asm-x86_64/proto.h
Andi Kleen a2f1b42490 [PATCH] x86_64: Add 4GB DMA32 zone
Add a new 4GB GFP_DMA32 zone between the GFP_DMA and GFP_NORMAL zones.

As a bit of historical background: when the x86-64 port
was originally designed we had some discussion if we should
use a 16MB DMA zone like i386 or a 4GB DMA zone like IA64 or
both. Both was ruled out at this point because it was in early
2.4 when VM is still quite shakey and had bad troubles even
dealing with one DMA zone.  We settled on the 16MB DMA zone mainly
because we worried about older soundcards and the floppy.

But this has always caused problems since then because
device drivers had trouble getting enough DMA able memory. These days
the VM works much better and the wide use of NUMA has proven
it can deal with many zones successfully.

So this patch adds both zones.

This helps drivers who need a lot of memory below 4GB because
their hardware is not accessing more (graphic drivers - proprietary
and free ones, video frame buffer drivers, sound drivers etc.).
Previously they could only use IOMMU+16MB GFP_DMA, which
was not enough memory.

Another common problem is that hardware who has full memory
addressing for >4GB misses it for some control structures in memory
(like transmit rings or other metadata).  They tended to allocate memory
in the 16MB GFP_DMA or the IOMMU/swiotlb then using pci_alloc_consistent,
but that can tie up a lot of precious 16MB GFPDMA/IOMMU/swiotlb memory
(even on AMD systems the IOMMU tends to be quite small) especially if you have
many devices.  With the new zone pci_alloc_consistent can just put
this stuff into memory below 4GB which works better.

One argument was still if the zone should be 4GB or 2GB. The main
motivation for 2GB would be an unnamed not so unpopular hardware
raid controller (mostly found in older machines from a particular four letter
company) who has a strange 2GB restriction in firmware. But
that one works ok with swiotlb/IOMMU anyways, so it doesn't really
need GFP_DMA32. I chose 4GB to be compatible with IA64 and because
it seems to be the most common restriction.

The new zone is so far added only for x86-64.

For other architectures who don't set up this
new zone nothing changes. Architectures can set a compatibility
define in Kconfig CONFIG_DMA_IS_DMA32 that will define GFP_DMA32
as GFP_DMA. Otherwise it's a nop because on 32bit architectures
it's normally not needed because GFP_NORMAL (=0) is DMA able
enough.

One problem is still that GFP_DMA means different things on different
architectures. e.g. some drivers used to have #ifdef ia64  use GFP_DMA
(trusting it to be 4GB) #elif __x86_64__ (use other hacks like
the swiotlb because 16MB is not enough) ... . This was quite
ugly and is now obsolete.

These should be now converted to use GFP_DMA32 unconditionally. I haven't done
this yet. Or best only use pci_alloc_consistent/dma_alloc_coherent
which will use GFP_DMA32 transparently.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-11-14 19:55:13 -08:00

125 lines
3.3 KiB
C

#ifndef _ASM_X8664_PROTO_H
#define _ASM_X8664_PROTO_H 1
#include <asm/ldt.h>
/* misc architecture specific prototypes */
struct cpuinfo_x86;
struct pt_regs;
extern void start_kernel(void);
extern void pda_init(int);
extern void early_idt_handler(void);
extern void mcheck_init(struct cpuinfo_x86 *c);
#ifdef CONFIG_MTRR
extern void mtrr_ap_init(void);
extern void mtrr_bp_init(void);
#else
#define mtrr_ap_init() do {} while (0)
#define mtrr_bp_init() do {} while (0)
#endif
extern void init_memory_mapping(unsigned long start, unsigned long end);
extern void size_zones(unsigned long *z, unsigned long *h,
unsigned long start_pfn, unsigned long end_pfn);
extern void system_call(void);
extern int kernel_syscall(void);
extern void syscall_init(void);
extern void ia32_syscall(void);
extern void ia32_cstar_target(void);
extern void ia32_sysenter_target(void);
extern void config_acpi_tables(void);
extern void ia32_syscall(void);
extern void iommu_hole_init(void);
extern void time_init_gtod(void);
extern int pmtimer_mark_offset(void);
extern unsigned int do_gettimeoffset_pm(void);
extern u32 pmtmr_ioport;
extern unsigned long long monotonic_base;
extern int sysctl_vsyscall;
extern void do_softirq_thunk(void);
extern int numa_setup(char *opt);
extern int setup_early_printk(char *);
extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
extern void early_identify_cpu(struct cpuinfo_x86 *c);
extern int k8_scan_nodes(unsigned long start, unsigned long end);
extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
extern unsigned long numa_free_all_bootmem(void);
extern void reserve_bootmem_generic(unsigned long phys, unsigned len);
extern void free_bootmem_generic(unsigned long phys, unsigned len);
extern void load_gs_index(unsigned gs);
extern unsigned long end_pfn_map;
extern cpumask_t cpu_initialized;
extern void show_trace(unsigned long * rsp);
extern void show_registers(struct pt_regs *regs);
extern void exception_table_check(void);
extern void acpi_reserve_bootmem(void);
extern void swap_low_mappings(void);
extern void __show_regs(struct pt_regs * regs);
extern void show_regs(struct pt_regs * regs);
extern char *syscall32_page;
extern void syscall32_cpu_init(void);
extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end);
extern void check_ioapic(void);
extern void check_efer(void);
extern int unhandled_signal(struct task_struct *tsk, int sig);
extern void select_idle_routine(const struct cpuinfo_x86 *c);
extern void swiotlb_init(void);
extern unsigned long table_start, table_end;
extern int exception_trace;
extern int force_iommu, no_iommu;
extern int using_apic_timer;
extern int disable_apic;
extern unsigned cpu_khz;
extern int ioapic_force;
extern int skip_ioapic_setup;
extern int acpi_ht;
extern int acpi_disabled;
extern int fallback_aper_order;
extern int fallback_aper_force;
extern int iommu_aperture;
extern int iommu_aperture_disabled;
extern int iommu_aperture_allowed;
extern int fix_aperture;
extern int force_iommu;
extern int reboot_force;
extern void smp_local_timer_interrupt(struct pt_regs * regs);
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
#define round_up(x,y) (((x) + (y) - 1) & ~((y)-1))
#define round_down(x,y) ((x) & ~((y)-1))
#endif