sh: flush_cache_range() cleanup and optimizations.

flush_cache_range() wasn't page aligning the end of the range, we can't assume that it will always be page aligned, and we ended up getting unaligned faults in some rare call paths. Additionally, we add a small optimization to just purge the dcache entirely if the range is large enough that the page table walking will take longer. We use an arbitrary value of 64 pages for the large range size, as per sh64. Signed-off-by: Paul Mundt <lethal@linux-sh.org>
2006-09-27 11:29:55 +09:00 · 2006-09-27 11:29:55 +09:00 · a252710fc5
commit a252710fc5
parent e4e3b5ccd7
1 changed files with 46 additions and 26 deletions
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@ -2,7 +2,7 @@
 * arch/sh/mm/cache-sh4.c
 *
 * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- * Copyright (C) 2001, 2002, 2003, 2004  Paul Mundt
+ * Copyright (C) 2001, 2002, 2003, 2004, 2005  Paul Mundt
 * Copyright (C) 2003  Richard Curnow
 *
 * This file is subject to the terms and conditions of the GNU General Public
@ -25,6 +25,8 @@
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>

+extern void __flush_cache_4096(unsigned long addr, unsigned long phys,
+			       unsigned long exec_offset);
 extern void __flush_cache_4096_all(unsigned long start);
 static void __flush_cache_4096_all_ex(unsigned long start);
 extern void __flush_dcache_all(void);
@ -112,9 +114,14 @@ static void __flush_dcache_all_ex(void)
 {
 	unsigned long addr, end_addr, entry_offset;

-	end_addr = CACHE_OC_ADDRESS_ARRAY + (cpu_data->dcache.sets << cpu_data->dcache.entry_shift) * cpu_data->dcache.ways;
+	end_addr = CACHE_OC_ADDRESS_ARRAY +
+		(cpu_data->dcache.sets << cpu_data->dcache.entry_shift) *
+		 cpu_data->dcache.ways;
+
 	entry_offset = 1 << cpu_data->dcache.entry_shift;
-	for (addr = CACHE_OC_ADDRESS_ARRAY; addr < end_addr; addr += entry_offset) {
+	for (addr = CACHE_OC_ADDRESS_ARRAY;
+	     addr < end_addr;
+	     addr += entry_offset) {
 		ctrl_outl(0, addr);
 	}
 }
@ -125,7 +132,8 @@ static void __flush_cache_4096_all_ex(unsigned long start)
 	int i;

 	entry_offset = 1 << cpu_data->dcache.entry_shift;
-	for (i = 0; i < cpu_data->dcache.ways; i++, start += cpu_data->dcache.way_incr) {
+	for (i = 0; i < cpu_data->dcache.ways;
+	     i++, start += cpu_data->dcache.way_incr) {
 		for (addr = CACHE_OC_ADDRESS_ARRAY + start;
 		     addr < CACHE_OC_ADDRESS_ARRAY + 4096 + start;
 		     addr += entry_offset) {
@ -172,7 +180,8 @@ void flush_cache_sigtramp(unsigned long addr)

 	local_irq_save(flags);
 	jump_to_P2();
-	for(i = 0; i < cpu_data->icache.ways; i++, index += cpu_data->icache.way_incr)
+	for (i = 0; i < cpu_data->icache.ways;
+	     i++, index += cpu_data->icache.way_incr)
 		ctrl_outl(0, index);	/* Clear out Valid-bit */
 	back_to_P1();
 	local_irq_restore(flags);
@ -182,7 +191,6 @@ static inline void flush_cache_4096(unsigned long start,
 				    unsigned long phys)
 {
 	unsigned long flags;
-	extern void __flush_cache_4096(unsigned long addr, unsigned long phys, unsigned long exec_offset);

 	/*
 	 * SH7751, SH7751R, and ST40 have no restriction to handle cache.
@ -191,10 +199,12 @@ static inline void flush_cache_4096(unsigned long start,
 	if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG)
 	   || start < CACHE_OC_ADDRESS_ARRAY) {
 		local_irq_save(flags);
-		__flush_cache_4096(start | SH_CACHE_ASSOC, P1SEGADDR(phys), 0x20000000);
+		__flush_cache_4096(start | SH_CACHE_ASSOC,
+				   P1SEGADDR(phys), 0x20000000);
 		local_irq_restore(flags);
 	} else {
-		__flush_cache_4096(start | SH_CACHE_ASSOC, P1SEGADDR(phys), 0);
+		__flush_cache_4096(start | SH_CACHE_ASSOC,
+				   P1SEGADDR(phys), 0);
 	}
 }

@ -231,29 +241,22 @@ static inline void flush_icache_all(void)
 	local_irq_restore(flags);
 }

-void flush_cache_all(void)
+void flush_dcache_all(void)
 {
 	if (cpu_data->dcache.ways == 1)
 		__flush_dcache_all();
 	else
 		__flush_dcache_all_ex();
+}
+
+void flush_cache_all(void)
+{
+	flush_dcache_all();
 	flush_icache_all();
 }

 void flush_cache_mm(struct mm_struct *mm)
 {
-	/* Is there any good way? */
-	/* XXX: possibly call flush_cache_range for each vm area */
-	/* 
-	 * FIXME: Really, the optimal solution here would be able to flush out
-	 * individual lines created by the specified context, but this isn't
-	 * feasible for a number of architectures (such as MIPS, and some
-	 * SPARC) .. is this possible for SuperH?
-	 *
-	 * In the meantime, we'll just flush all of the caches.. this
-	 * seems to be the simplest way to avoid at least a few wasted
-	 * cache flushes. -Lethal
-	 */
 	flush_cache_all();
 }

@ -301,13 +304,30 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 	unsigned long p = start & PAGE_MASK;
 	pgd_t *dir;
 	pmd_t *pmd;
+	pud_t *pud;
 	pte_t *pte;
 	pte_t entry;
 	unsigned long phys;
 	unsigned long d = 0;

+	/*
+	 * Don't bother with the lookup and alias check if we have a
+	 * wide range to cover, just blow away the dcache in its
+	 * entirety instead. -- PFM.
+	 */
+	if (((end - start) >> PAGE_SHIFT) >= 64) {
+		flush_dcache_all();
+
+		if (vma->vm_flags & VM_EXEC)
+			flush_icache_all();
+
+		return;
+	}
+
 	dir = pgd_offset(vma->vm_mm, p);
-	pmd = pmd_offset(dir, p);
+	pud = pud_offset(dir, p);
+	pmd = pmd_offset(pud, p);
+	end = PAGE_ALIGN(end);

 	do {
 		if (pmd_none(*pmd) || pmd_bad(*pmd)) {