xtensa: add helpers for division, remainder and shifts

Don't rely on libgcc presence, build own versions of the helpers with
correct ABI.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
This commit is contained in:
Max Filippov 2021-10-18 04:29:27 -07:00
parent 8c9ab55c0f
commit dbf4ed894c
10 changed files with 539 additions and 0 deletions

View File

@ -191,6 +191,40 @@
#endif
.endm
.macro do_nsau cnt, val, tmp, a
#if XCHAL_HAVE_NSA
nsau \cnt, \val
#else
mov \a, \val
movi \cnt, 0
extui \tmp, \a, 16, 16
bnez \tmp, 0f
movi \cnt, 16
slli \a, \a, 16
0:
extui \tmp, \a, 24, 8
bnez \tmp, 1f
addi \cnt, \cnt, 8
slli \a, \a, 8
1:
movi \tmp, __nsau_data
extui \a, \a, 24, 8
add \tmp, \tmp, \a
l8ui \tmp, \tmp, 0
add \cnt, \cnt, \tmp
#endif /* !XCHAL_HAVE_NSA */
.endm
.macro do_abs dst, src, tmp
#if XCHAL_HAVE_ABS
abs \dst, \src
#else
neg \tmp, \src
movgez \tmp, \src, \src
mov \dst, \tmp
#endif
.endm
#define XTENSA_STACK_ALIGNMENT 16
#if defined(__XTENSA_WINDOWED_ABI__)

View File

@ -4,5 +4,7 @@
#
lib-y += memcopy.o memset.o checksum.o \
ashldi3.o ashrdi3.o lshrdi3.o \
divsi3.o udivsi3.o modsi3.o umodsi3.o mulsi3.o \
usercopy.o strncpy_user.o strnlen_user.o
lib-$(CONFIG_PCI) += pci-auto.o

28
arch/xtensa/lib/ashldi3.S Normal file
View File

@ -0,0 +1,28 @@
/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
#include <linux/linkage.h>
#include <asm/asmmacro.h>
#include <asm/core.h>
#ifdef __XTENSA_EB__
#define uh a2
#define ul a3
#else
#define uh a3
#define ul a2
#endif /* __XTENSA_EB__ */
ENTRY(__ashldi3)
abi_entry_default
ssl a4
bgei a4, 32, .Llow_only
src uh, uh, ul
sll ul, ul
abi_ret_default
.Llow_only:
sll uh, ul
movi ul, 0
abi_ret_default
ENDPROC(__ashldi3)

28
arch/xtensa/lib/ashrdi3.S Normal file
View File

@ -0,0 +1,28 @@
/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
#include <linux/linkage.h>
#include <asm/asmmacro.h>
#include <asm/core.h>
#ifdef __XTENSA_EB__
#define uh a2
#define ul a3
#else
#define uh a3
#define ul a2
#endif /* __XTENSA_EB__ */
ENTRY(__ashrdi3)
abi_entry_default
ssr a4
bgei a4, 32, .Lhigh_only
src ul, uh, ul
sra uh, uh
abi_ret_default
.Lhigh_only:
sra ul, uh
srai uh, uh, 31
abi_ret_default
ENDPROC(__ashrdi3)

74
arch/xtensa/lib/divsi3.S Normal file
View File

@ -0,0 +1,74 @@
/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
#include <linux/linkage.h>
#include <asm/asmmacro.h>
#include <asm/core.h>
ENTRY(__divsi3)
abi_entry_default
#if XCHAL_HAVE_DIV32
quos a2, a2, a3
#else
xor a7, a2, a3 /* sign = dividend ^ divisor */
do_abs a6, a2, a4 /* udividend = abs (dividend) */
do_abs a3, a3, a4 /* udivisor = abs (divisor) */
bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */
do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */
bgeu a5, a4, .Lspecial
sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
ssl a4
sll a3, a3 /* udivisor <<= count */
movi a2, 0 /* quotient = 0 */
/* test-subtract-and-shift loop; one quotient bit on each iteration */
#if XCHAL_HAVE_LOOPS
loopnez a4, .Lloopend
#endif /* XCHAL_HAVE_LOOPS */
.Lloop:
bltu a6, a3, .Lzerobit
sub a6, a6, a3
addi a2, a2, 1
.Lzerobit:
slli a2, a2, 1
srli a3, a3, 1
#if !XCHAL_HAVE_LOOPS
addi a4, a4, -1
bnez a4, .Lloop
#endif /* !XCHAL_HAVE_LOOPS */
.Lloopend:
bltu a6, a3, .Lreturn
addi a2, a2, 1 /* increment if udividend >= udivisor */
.Lreturn:
neg a5, a2
movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */
abi_ret_default
.Lle_one:
beqz a3, .Lerror
neg a2, a6 /* if udivisor == 1, then return... */
movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */
abi_ret_default
.Lspecial:
bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
movi a2, 1
movi a4, -1
movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */
abi_ret_default
.Lerror:
/* Divide by zero: Use an illegal instruction to force an exception.
The subsequent "DIV0" string can be recognized by the exception
handler to identify the real cause of the exception. */
ill
.ascii "DIV0"
.Lreturn0:
movi a2, 0
#endif /* XCHAL_HAVE_DIV32 */
abi_ret_default
ENDPROC(__divsi3)

28
arch/xtensa/lib/lshrdi3.S Normal file
View File

@ -0,0 +1,28 @@
/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
#include <linux/linkage.h>
#include <asm/asmmacro.h>
#include <asm/core.h>
#ifdef __XTENSA_EB__
#define uh a2
#define ul a3
#else
#define uh a3
#define ul a2
#endif /* __XTENSA_EB__ */
ENTRY(__lshrdi3)
abi_entry_default
ssr a4
bgei a4, 32, .Lhigh_only
src ul, uh, ul
srl uh, uh
abi_ret_default
.Lhigh_only:
srl ul, uh
movi uh, 0
abi_ret_default
ENDPROC(__lshrdi3)

87
arch/xtensa/lib/modsi3.S Normal file
View File

@ -0,0 +1,87 @@
/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
#include <linux/linkage.h>
#include <asm/asmmacro.h>
#include <asm/core.h>
ENTRY(__modsi3)
abi_entry_default
#if XCHAL_HAVE_DIV32
rems a2, a2, a3
#else
mov a7, a2 /* save original (signed) dividend */
do_abs a2, a2, a4 /* udividend = abs (dividend) */
do_abs a3, a3, a4 /* udivisor = abs (divisor) */
bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */
do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */
bgeu a5, a4, .Lspecial
sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
ssl a4
sll a3, a3 /* udivisor <<= count */
/* test-subtract-and-shift loop */
#if XCHAL_HAVE_LOOPS
loopnez a4, .Lloopend
#endif /* XCHAL_HAVE_LOOPS */
.Lloop:
bltu a2, a3, .Lzerobit
sub a2, a2, a3
.Lzerobit:
srli a3, a3, 1
#if !XCHAL_HAVE_LOOPS
addi a4, a4, -1
bnez a4, .Lloop
#endif /* !XCHAL_HAVE_LOOPS */
.Lloopend:
.Lspecial:
bltu a2, a3, .Lreturn
sub a2, a2, a3 /* subtract again if udividend >= udivisor */
.Lreturn:
bgez a7, .Lpositive
neg a2, a2 /* if (dividend < 0), return -udividend */
.Lpositive:
abi_ret_default
.Lle_one:
bnez a3, .Lreturn0
/* Divide by zero: Use an illegal instruction to force an exception.
The subsequent "DIV0" string can be recognized by the exception
handler to identify the real cause of the exception. */
ill
.ascii "DIV0"
.Lreturn0:
movi a2, 0
#endif /* XCHAL_HAVE_DIV32 */
abi_ret_default
ENDPROC(__modsi3)
#if !XCHAL_HAVE_NSA
.section .rodata
.align 4
.global __nsau_data
.type __nsau_data, @object
__nsau_data:
.byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
.byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
.byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
.byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
.size __nsau_data, . - __nsau_data
#endif /* !XCHAL_HAVE_NSA */

133
arch/xtensa/lib/mulsi3.S Normal file
View File

@ -0,0 +1,133 @@
/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
#include <linux/linkage.h>
#include <asm/asmmacro.h>
#include <asm/core.h>
.macro do_addx2 dst, as, at, tmp
#if XCHAL_HAVE_ADDX
addx2 \dst, \as, \at
#else
slli \tmp, \as, 1
add \dst, \tmp, \at
#endif
.endm
.macro do_addx4 dst, as, at, tmp
#if XCHAL_HAVE_ADDX
addx4 \dst, \as, \at
#else
slli \tmp, \as, 2
add \dst, \tmp, \at
#endif
.endm
.macro do_addx8 dst, as, at, tmp
#if XCHAL_HAVE_ADDX
addx8 \dst, \as, \at
#else
slli \tmp, \as, 3
add \dst, \tmp, \at
#endif
.endm
ENTRY(__mulsi3)
abi_entry_default
#if XCHAL_HAVE_MUL32
mull a2, a2, a3
#elif XCHAL_HAVE_MUL16
or a4, a2, a3
srai a4, a4, 16
bnez a4, .LMUL16
mul16u a2, a2, a3
abi_ret_default
.LMUL16:
srai a4, a2, 16
srai a5, a3, 16
mul16u a7, a4, a3
mul16u a6, a5, a2
mul16u a4, a2, a3
add a7, a7, a6
slli a7, a7, 16
add a2, a7, a4
#elif XCHAL_HAVE_MAC16
mul.aa.hl a2, a3
mula.aa.lh a2, a3
rsr a5, ACCLO
umul.aa.ll a2, a3
rsr a4, ACCLO
slli a5, a5, 16
add a2, a4, a5
#else /* !MUL32 && !MUL16 && !MAC16 */
/* Multiply one bit at a time, but unroll the loop 4x to better
exploit the addx instructions and avoid overhead.
Peel the first iteration to save a cycle on init. */
/* Avoid negative numbers. */
xor a5, a2, a3 /* Top bit is 1 if one input is negative. */
do_abs a3, a3, a6
do_abs a2, a2, a6
/* Swap so the second argument is smaller. */
sub a7, a2, a3
mov a4, a3
movgez a4, a2, a7 /* a4 = max (a2, a3) */
movltz a3, a2, a7 /* a3 = min (a2, a3) */
movi a2, 0
extui a6, a3, 0, 1
movnez a2, a4, a6
do_addx2 a7, a4, a2, a7
extui a6, a3, 1, 1
movnez a2, a7, a6
do_addx4 a7, a4, a2, a7
extui a6, a3, 2, 1
movnez a2, a7, a6
do_addx8 a7, a4, a2, a7
extui a6, a3, 3, 1
movnez a2, a7, a6
bgeui a3, 16, .Lmult_main_loop
neg a3, a2
movltz a2, a3, a5
abi_ret_default
.align 4
.Lmult_main_loop:
srli a3, a3, 4
slli a4, a4, 4
add a7, a4, a2
extui a6, a3, 0, 1
movnez a2, a7, a6
do_addx2 a7, a4, a2, a7
extui a6, a3, 1, 1
movnez a2, a7, a6
do_addx4 a7, a4, a2, a7
extui a6, a3, 2, 1
movnez a2, a7, a6
do_addx8 a7, a4, a2, a7
extui a6, a3, 3, 1
movnez a2, a7, a6
bgeui a3, 16, .Lmult_main_loop
neg a3, a2
movltz a2, a3, a5
#endif /* !MUL32 && !MUL16 && !MAC16 */
abi_ret_default
ENDPROC(__mulsi3)

68
arch/xtensa/lib/udivsi3.S Normal file
View File

@ -0,0 +1,68 @@
/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
#include <linux/linkage.h>
#include <asm/asmmacro.h>
#include <asm/core.h>
ENTRY(__udivsi3)
abi_entry_default
#if XCHAL_HAVE_DIV32
quou a2, a2, a3
#else
bltui a3, 2, .Lle_one /* check if the divisor <= 1 */
mov a6, a2 /* keep dividend in a6 */
do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */
do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */
bgeu a5, a4, .Lspecial
sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
ssl a4
sll a3, a3 /* divisor <<= count */
movi a2, 0 /* quotient = 0 */
/* test-subtract-and-shift loop; one quotient bit on each iteration */
#if XCHAL_HAVE_LOOPS
loopnez a4, .Lloopend
#endif /* XCHAL_HAVE_LOOPS */
.Lloop:
bltu a6, a3, .Lzerobit
sub a6, a6, a3
addi a2, a2, 1
.Lzerobit:
slli a2, a2, 1
srli a3, a3, 1
#if !XCHAL_HAVE_LOOPS
addi a4, a4, -1
bnez a4, .Lloop
#endif /* !XCHAL_HAVE_LOOPS */
.Lloopend:
bltu a6, a3, .Lreturn
addi a2, a2, 1 /* increment quotient if dividend >= divisor */
.Lreturn:
abi_ret_default
.Lle_one:
beqz a3, .Lerror /* if divisor == 1, return the dividend */
abi_ret_default
.Lspecial:
/* return dividend >= divisor */
bltu a6, a3, .Lreturn0
movi a2, 1
abi_ret_default
.Lerror:
/* Divide by zero: Use an illegal instruction to force an exception.
The subsequent "DIV0" string can be recognized by the exception
handler to identify the real cause of the exception. */
ill
.ascii "DIV0"
.Lreturn0:
movi a2, 0
#endif /* XCHAL_HAVE_DIV32 */
abi_ret_default
ENDPROC(__udivsi3)

57
arch/xtensa/lib/umodsi3.S Normal file
View File

@ -0,0 +1,57 @@
/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
#include <linux/linkage.h>
#include <asm/asmmacro.h>
#include <asm/core.h>
ENTRY(__umodsi3)
abi_entry_default
#if XCHAL_HAVE_DIV32
remu a2, a2, a3
#else
bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */
do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */
do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */
bgeu a5, a4, .Lspecial
sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
ssl a4
sll a3, a3 /* divisor <<= count */
/* test-subtract-and-shift loop */
#if XCHAL_HAVE_LOOPS
loopnez a4, .Lloopend
#endif /* XCHAL_HAVE_LOOPS */
.Lloop:
bltu a2, a3, .Lzerobit
sub a2, a2, a3
.Lzerobit:
srli a3, a3, 1
#if !XCHAL_HAVE_LOOPS
addi a4, a4, -1
bnez a4, .Lloop
#endif /* !XCHAL_HAVE_LOOPS */
.Lloopend:
.Lspecial:
bltu a2, a3, .Lreturn
sub a2, a2, a3 /* subtract once more if dividend >= divisor */
.Lreturn:
abi_ret_default
.Lle_one:
bnez a3, .Lreturn0
/* Divide by zero: Use an illegal instruction to force an exception.
The subsequent "DIV0" string can be recognized by the exception
handler to identify the real cause of the exception. */
ill
.ascii "DIV0"
.Lreturn0:
movi a2, 0
#endif /* XCHAL_HAVE_DIV32 */
abi_ret_default
ENDPROC(__umodsi3)