Fix constant folding and poor optimization in byte swapping code

Constant folding does not work for the swabXX() byte swapping functions,
and the C versions optimize poorly.

Attempting to initialize a global variable to swab16(0x1234) or put
something like "case swab32(42):" in a switch statement will not compile.
It can work, swab.h just isn't doing it correctly.  This patch fixes that.

Contrary to the comment in asm-i386/byteorder.h, gcc does not recognize the
"C" version of swab16 and turn it into efficient code.  gcc can do this,
just not with the current code.  The simple function:

u16 foo(u16 x) { return swab16(x); }

Would compile to:
        movzwl  %ax, %eax
        movl    %eax, %edx
        shrl    $8, %eax
        sall    $8, %edx
        orl     %eax, %edx

With this patch, it will compile to:
        rolw    $8, %ax

I also attempted to document the maze different macros/inline functions
that are used to create the final product.

Signed-off-by: Trent Piepho <xyzzy@speakeasy.org>
Cc: Francois-Rene Rideau <fare@tunes.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Trent Piepho 2007-05-08 00:24:05 -07:00 committed by Linus Torvalds
parent 02fb6149f7
commit 8e2c20023f

View File

@ -10,6 +10,10 @@
* separated swab functions from cpu_to_XX, * separated swab functions from cpu_to_XX,
* to clean up support for bizarre-endian architectures. * to clean up support for bizarre-endian architectures.
* *
* Trent Piepho <xyzzy@speakeasy.org> 2007114
* make constant-folding work, provide C versions that
* gcc can optimize better, explain different versions
*
* See asm-i386/byteorder.h and suches for examples of how to provide * See asm-i386/byteorder.h and suches for examples of how to provide
* architecture-dependent optimized versions * architecture-dependent optimized versions
* *
@ -17,40 +21,66 @@
#include <linux/compiler.h> #include <linux/compiler.h>
/* Functions/macros defined, there are a lot:
*
* ___swabXX
* Generic C versions of the swab functions.
*
* ___constant_swabXX
* C versions that gcc can fold into a compile-time constant when
* the argument is a compile-time constant.
*
* __arch__swabXX[sp]?
* Architecture optimized versions of all the swab functions
* (including the s and p versions). These can be defined in
* asm-arch/byteorder.h. Any which are not, are defined here.
* __arch__swabXXs() is defined in terms of __arch__swabXXp(), which
* is defined in terms of __arch__swabXX(), which is in turn defined
* in terms of ___swabXX(x).
* These must be macros. They may be unsafe for arguments with
* side-effects.
*
* __fswabXX
* Inline function versions of the __arch__ macros. These _are_ safe
* if the arguments have side-effects. Note there are no s and p
* versions of these.
*
* __swabXX[sb]
* There are the ones you should actually use. The __swabXX versions
* will be a constant given a constant argument and use the arch
* specific code (if any) for non-constant arguments. The s and p
* versions always use the arch specific code (constant folding
* doesn't apply). They are safe to use with arguments with
* side-effects.
*
* swabXX[sb]
* Nicknames for __swabXX[sb] to use in the kernel.
*/
/* casts are necessary for constants, because we never know how for sure /* casts are necessary for constants, because we never know how for sure
* how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way. * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way.
*/ */
#define ___swab16(x) \
({ \
__u16 __x = (x); \
((__u16)( \
(((__u16)(__x) & (__u16)0x00ffU) << 8) | \
(((__u16)(__x) & (__u16)0xff00U) >> 8) )); \
})
#define ___swab32(x) \ static __inline__ __attribute_const__ __u16 ___swab16(__u16 x)
({ \ {
__u32 __x = (x); \ return x<<8 | x>>8;
((__u32)( \ }
(((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \ static __inline__ __attribute_const__ __u32 ___swab32(__u32 x)
(((__u32)(__x) & (__u32)0x0000ff00UL) << 8) | \ {
(((__u32)(__x) & (__u32)0x00ff0000UL) >> 8) | \ return x<<24 | x>>24 |
(((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \ (x & (__u32)0x0000ff00UL)<<8 |
}) (x & (__u32)0x00ff0000UL)>>8;
}
#define ___swab64(x) \ static __inline__ __attribute_const__ __u64 ___swab64(__u64 x)
({ \ {
__u64 __x = (x); \ return x<<56 | x>>56 |
((__u64)( \ (x & (__u64)0x000000000000ff00ULL)<<40 |
(__u64)(((__u64)(__x) & (__u64)0x00000000000000ffULL) << 56) | \ (x & (__u64)0x0000000000ff0000ULL)<<24 |
(__u64)(((__u64)(__x) & (__u64)0x000000000000ff00ULL) << 40) | \ (x & (__u64)0x00000000ff000000ULL)<< 8 |
(__u64)(((__u64)(__x) & (__u64)0x0000000000ff0000ULL) << 24) | \ (x & (__u64)0x000000ff00000000ULL)>> 8 |
(__u64)(((__u64)(__x) & (__u64)0x00000000ff000000ULL) << 8) | \ (x & (__u64)0x0000ff0000000000ULL)>>24 |
(__u64)(((__u64)(__x) & (__u64)0x000000ff00000000ULL) >> 8) | \ (x & (__u64)0x00ff000000000000ULL)>>40;
(__u64)(((__u64)(__x) & (__u64)0x0000ff0000000000ULL) >> 24) | \ }
(__u64)(((__u64)(__x) & (__u64)0x00ff000000000000ULL) >> 40) | \
(__u64)(((__u64)(__x) & (__u64)0xff00000000000000ULL) >> 56) )); \
})
#define ___constant_swab16(x) \ #define ___constant_swab16(x) \
((__u16)( \ ((__u16)( \
@ -77,13 +107,13 @@
* provide defaults when no architecture-specific optimization is detected * provide defaults when no architecture-specific optimization is detected
*/ */
#ifndef __arch__swab16 #ifndef __arch__swab16
# define __arch__swab16(x) ({ __u16 __tmp = (x) ; ___swab16(__tmp); }) # define __arch__swab16(x) ___swab16(x)
#endif #endif
#ifndef __arch__swab32 #ifndef __arch__swab32
# define __arch__swab32(x) ({ __u32 __tmp = (x) ; ___swab32(__tmp); }) # define __arch__swab32(x) ___swab32(x)
#endif #endif
#ifndef __arch__swab64 #ifndef __arch__swab64
# define __arch__swab64(x) ({ __u64 __tmp = (x) ; ___swab64(__tmp); }) # define __arch__swab64(x) ___swab64(x)
#endif #endif
#ifndef __arch__swab16p #ifndef __arch__swab16p
@ -97,13 +127,13 @@
#endif #endif
#ifndef __arch__swab16s #ifndef __arch__swab16s
# define __arch__swab16s(x) do { *(x) = __arch__swab16p((x)); } while (0) # define __arch__swab16s(x) ((void)(*(x) = __arch__swab16p(x)))
#endif #endif
#ifndef __arch__swab32s #ifndef __arch__swab32s
# define __arch__swab32s(x) do { *(x) = __arch__swab32p((x)); } while (0) # define __arch__swab32s(x) ((void)(*(x) = __arch__swab32p(x)))
#endif #endif
#ifndef __arch__swab64s #ifndef __arch__swab64s
# define __arch__swab64s(x) do { *(x) = __arch__swab64p((x)); } while (0) # define __arch__swab64s(x) ((void)(*(x) = __arch__swab64p(x)))
#endif #endif
@ -113,15 +143,15 @@
#if defined(__GNUC__) && defined(__OPTIMIZE__) #if defined(__GNUC__) && defined(__OPTIMIZE__)
# define __swab16(x) \ # define __swab16(x) \
(__builtin_constant_p((__u16)(x)) ? \ (__builtin_constant_p((__u16)(x)) ? \
___swab16((x)) : \ ___constant_swab16((x)) : \
__fswab16((x))) __fswab16((x)))
# define __swab32(x) \ # define __swab32(x) \
(__builtin_constant_p((__u32)(x)) ? \ (__builtin_constant_p((__u32)(x)) ? \
___swab32((x)) : \ ___constant_swab32((x)) : \
__fswab32((x))) __fswab32((x)))
# define __swab64(x) \ # define __swab64(x) \
(__builtin_constant_p((__u64)(x)) ? \ (__builtin_constant_p((__u64)(x)) ? \
___swab64((x)) : \ ___constant_swab64((x)) : \
__fswab64((x))) __fswab64((x)))
#else #else
# define __swab16(x) __fswab16(x) # define __swab16(x) __fswab16(x)