Make asm/word-at-a-time.h available on all architectures
Added the x86 implementation of word-at-a-time to the generic version, which previously only supported big-endian. Omitted the x86-specific load_unaligned_zeropad(), which in any case is also not present for the existing BE-only implementation of a word-at-a-time, and is only used under CONFIG_DCACHE_WORD_ACCESS. Added as a "generic-y" to the Kbuilds of all architectures that didn't previously have it. Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
This commit is contained in:
parent
d770e558e2
commit
a6e2f029ae
@ -47,4 +47,5 @@ generic-y += types.h
|
||||
generic-y += ucontext.h
|
||||
generic-y += user.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
@ -19,4 +19,5 @@ generic-y += sections.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
@ -45,4 +45,5 @@ generic-y += types.h
|
||||
generic-y += ucontext.h
|
||||
generic-y += unaligned.h
|
||||
generic-y += user.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
@ -58,4 +58,5 @@ generic-y += types.h
|
||||
generic-y += ucontext.h
|
||||
generic-y += user.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
@ -25,4 +25,5 @@ generic-y += sections.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
@ -6,3 +6,4 @@ generic-y += irq_work.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += preempt.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += word-at-a-time.h
|
||||
|
@ -57,4 +57,5 @@ generic-y += types.h
|
||||
generic-y += ucontext.h
|
||||
generic-y += unaligned.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
@ -7,3 +7,4 @@ generic-y += mcs_spinlock.h
|
||||
generic-y += preempt.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += vtime.h
|
||||
generic-y += word-at-a-time.h
|
||||
|
@ -8,3 +8,4 @@ generic-y += module.h
|
||||
generic-y += preempt.h
|
||||
generic-y += sections.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += word-at-a-time.h
|
||||
|
@ -53,4 +53,5 @@ generic-y += ucontext.h
|
||||
generic-y += unaligned.h
|
||||
generic-y += user.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
@ -9,3 +9,4 @@ generic-y += mcs_spinlock.h
|
||||
generic-y += preempt.h
|
||||
generic-y += syscalls.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += word-at-a-time.h
|
||||
|
@ -17,4 +17,5 @@ generic-y += serial.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += ucontext.h
|
||||
generic-y += user.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
@ -8,3 +8,4 @@ generic-y += mcs_spinlock.h
|
||||
generic-y += preempt.h
|
||||
generic-y += sections.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += word-at-a-time.h
|
||||
|
@ -60,4 +60,5 @@ generic-y += types.h
|
||||
generic-y += unaligned.h
|
||||
generic-y += user.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
@ -8,3 +8,4 @@ generic-y += preempt.h
|
||||
generic-y += rwsem.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += vtime.h
|
||||
generic-y += word-at-a-time.h
|
||||
|
@ -5,3 +5,4 @@ generic-y += irq_work.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += preempt.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += word-at-a-time.h
|
||||
|
@ -12,3 +12,4 @@ generic-y += sections.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += xor.h
|
||||
generic-y += serial.h
|
||||
generic-y += word-at-a-time.h
|
||||
|
@ -38,4 +38,5 @@ generic-y += termbits.h
|
||||
generic-y += termios.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += types.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
@ -24,4 +24,5 @@ generic-y += preempt.h
|
||||
generic-y += switch_to.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
@ -61,4 +61,5 @@ generic-y += ucontext.h
|
||||
generic-y += unaligned.h
|
||||
generic-y += user.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
@ -28,4 +28,5 @@ generic-y += statfs.h
|
||||
generic-y += termios.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
@ -1,15 +1,10 @@
|
||||
#ifndef _ASM_WORD_AT_A_TIME_H
|
||||
#define _ASM_WORD_AT_A_TIME_H
|
||||
|
||||
/*
|
||||
* This says "generic", but it's actually big-endian only.
|
||||
* Little-endian can use more efficient versions of these
|
||||
* interfaces, see for example
|
||||
* arch/x86/include/asm/word-at-a-time.h
|
||||
* for those.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <asm/byteorder.h>
|
||||
|
||||
#ifdef __BIG_ENDIAN
|
||||
|
||||
struct word_at_a_time {
|
||||
const unsigned long high_bits, low_bits;
|
||||
@ -53,4 +48,73 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct
|
||||
#define zero_bytemask(mask) (~1ul << __fls(mask))
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
* The optimal byte mask counting is probably going to be something
|
||||
* that is architecture-specific. If you have a reliably fast
|
||||
* bit count instruction, that might be better than the multiply
|
||||
* and shift, for example.
|
||||
*/
|
||||
struct word_at_a_time {
|
||||
const unsigned long one_bits, high_bits;
|
||||
};
|
||||
|
||||
#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
|
||||
/*
|
||||
* Jan Achrenius on G+: microoptimized version of
|
||||
* the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
|
||||
* that works for the bytemasks without having to
|
||||
* mask them first.
|
||||
*/
|
||||
static inline long count_masked_bytes(unsigned long mask)
|
||||
{
|
||||
return mask*0x0001020304050608ul >> 56;
|
||||
}
|
||||
|
||||
#else /* 32-bit case */
|
||||
|
||||
/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
|
||||
static inline long count_masked_bytes(long mask)
|
||||
{
|
||||
/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
|
||||
long a = (0x0ff0001+mask) >> 23;
|
||||
/* Fix the 1 for 00 case */
|
||||
return a & mask;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* Return nonzero if it has a zero */
|
||||
static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
|
||||
{
|
||||
unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
|
||||
*bits = mask;
|
||||
return mask;
|
||||
}
|
||||
|
||||
static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
|
||||
{
|
||||
return bits;
|
||||
}
|
||||
|
||||
static inline unsigned long create_zero_mask(unsigned long bits)
|
||||
{
|
||||
bits = (bits - 1) & ~bits;
|
||||
return bits >> 7;
|
||||
}
|
||||
|
||||
/* The mask we created is directly usable as a bytemask */
|
||||
#define zero_bytemask(mask) (mask)
|
||||
|
||||
static inline unsigned long find_zero(unsigned long mask)
|
||||
{
|
||||
return count_masked_bytes(mask);
|
||||
}
|
||||
|
||||
#endif /* __BIG_ENDIAN */
|
||||
|
||||
#endif /* _ASM_WORD_AT_A_TIME_H */
|
||||
|
Loading…
Reference in New Issue
Block a user