b2f1858455
With the existing code, function symbols are defined in .text, and the
body is defined in .text.xxx. This causes (at least some version of) the
linker not to emit the function body into the final binary, since it's
part of a different section to the symbols being referenced. This of
course causes a wide variety of failures.
This change moves the push/pop-section directives before the function
symbols, and after any relate ENDPROC macro invocations, so that symbols
and bodies are all in the "pushed" sections, and thus the function bodies
are emitted into the binary.
This solves (at least) the boot problems currently seen on Tegra systems
that use SPL (i.e. all ARMv7 Tegras).
Fixes: 13b0a91a6d
("arm: lib: Split asm symbols into different .text subsections")
Cc: Marek Vasut <marex@denx.de>
Cc: Tom Warren <twarren@nvidia.com>
Cc: Simon Glass <sjg@chromium.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
247 lines
4.5 KiB
ArmAsm
247 lines
4.5 KiB
ArmAsm
/*
|
|
* Copyright 2010, Google Inc.
|
|
*
|
|
* Brought in from coreboot uldivmod.S
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
|
|
/*
|
|
* A, Q = r0 + (r1 << 32)
|
|
* B, R = r2 + (r3 << 32)
|
|
* A / B = Q ... R
|
|
*/
|
|
|
|
A_0 .req r0
|
|
A_1 .req r1
|
|
B_0 .req r2
|
|
B_1 .req r3
|
|
C_0 .req r4
|
|
C_1 .req r5
|
|
D_0 .req r6
|
|
D_1 .req r7
|
|
|
|
Q_0 .req r0
|
|
Q_1 .req r1
|
|
R_0 .req r2
|
|
R_1 .req r3
|
|
|
|
THUMB(
|
|
TMP .req r8
|
|
)
|
|
|
|
.pushsection .text.__aeabi_uldivmod, "ax"
|
|
ENTRY(__aeabi_uldivmod)
|
|
|
|
stmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) lr}
|
|
@ Test if B == 0
|
|
orrs ip, B_0, B_1 @ Z set -> B == 0
|
|
beq L_div_by_0
|
|
@ Test if B is power of 2: (B & (B - 1)) == 0
|
|
subs C_0, B_0, #1
|
|
sbc C_1, B_1, #0
|
|
tst C_0, B_0
|
|
tsteq B_1, C_1
|
|
beq L_pow2
|
|
@ Test if A_1 == B_1 == 0
|
|
orrs ip, A_1, B_1
|
|
beq L_div_32_32
|
|
|
|
L_div_64_64:
|
|
/* CLZ only exists in ARM architecture version 5 and above. */
|
|
#ifdef HAVE_CLZ
|
|
mov C_0, #1
|
|
mov C_1, #0
|
|
@ D_0 = clz A
|
|
teq A_1, #0
|
|
clz D_0, A_1
|
|
clzeq ip, A_0
|
|
addeq D_0, D_0, ip
|
|
@ D_1 = clz B
|
|
teq B_1, #0
|
|
clz D_1, B_1
|
|
clzeq ip, B_0
|
|
addeq D_1, D_1, ip
|
|
@ if clz B - clz A > 0
|
|
subs D_0, D_1, D_0
|
|
bls L_done_shift
|
|
@ B <<= (clz B - clz A)
|
|
subs D_1, D_0, #32
|
|
rsb ip, D_0, #32
|
|
movmi B_1, B_1, lsl D_0
|
|
ARM( orrmi B_1, B_1, B_0, lsr ip )
|
|
THUMB( lsrmi TMP, B_0, ip )
|
|
THUMB( orrmi B_1, B_1, TMP )
|
|
movpl B_1, B_0, lsl D_1
|
|
mov B_0, B_0, lsl D_0
|
|
@ C = 1 << (clz B - clz A)
|
|
movmi C_1, C_1, lsl D_0
|
|
ARM( orrmi C_1, C_1, C_0, lsr ip )
|
|
THUMB( lsrmi TMP, C_0, ip )
|
|
THUMB( orrmi C_1, C_1, TMP )
|
|
movpl C_1, C_0, lsl D_1
|
|
mov C_0, C_0, lsl D_0
|
|
L_done_shift:
|
|
mov D_0, #0
|
|
mov D_1, #0
|
|
@ C: current bit; D: result
|
|
#else
|
|
@ C: current bit; D: result
|
|
mov C_0, #1
|
|
mov C_1, #0
|
|
mov D_0, #0
|
|
mov D_1, #0
|
|
L_lsl_4:
|
|
cmp B_1, #0x10000000
|
|
cmpcc B_1, A_1
|
|
cmpeq B_0, A_0
|
|
bcs L_lsl_1
|
|
@ B <<= 4
|
|
mov B_1, B_1, lsl #4
|
|
orr B_1, B_1, B_0, lsr #28
|
|
mov B_0, B_0, lsl #4
|
|
@ C <<= 4
|
|
mov C_1, C_1, lsl #4
|
|
orr C_1, C_1, C_0, lsr #28
|
|
mov C_0, C_0, lsl #4
|
|
b L_lsl_4
|
|
L_lsl_1:
|
|
cmp B_1, #0x80000000
|
|
cmpcc B_1, A_1
|
|
cmpeq B_0, A_0
|
|
bcs L_subtract
|
|
@ B <<= 1
|
|
mov B_1, B_1, lsl #1
|
|
orr B_1, B_1, B_0, lsr #31
|
|
mov B_0, B_0, lsl #1
|
|
@ C <<= 1
|
|
mov C_1, C_1, lsl #1
|
|
orr C_1, C_1, C_0, lsr #31
|
|
mov C_0, C_0, lsl #1
|
|
b L_lsl_1
|
|
#endif
|
|
L_subtract:
|
|
@ if A >= B
|
|
cmp A_1, B_1
|
|
cmpeq A_0, B_0
|
|
bcc L_update
|
|
@ A -= B
|
|
subs A_0, A_0, B_0
|
|
sbc A_1, A_1, B_1
|
|
@ D |= C
|
|
orr D_0, D_0, C_0
|
|
orr D_1, D_1, C_1
|
|
L_update:
|
|
@ if A == 0: break
|
|
orrs ip, A_1, A_0
|
|
beq L_exit
|
|
@ C >>= 1
|
|
movs C_1, C_1, lsr #1
|
|
movs C_0, C_0, rrx
|
|
@ if C == 0: break
|
|
orrs ip, C_1, C_0
|
|
beq L_exit
|
|
@ B >>= 1
|
|
movs B_1, B_1, lsr #1
|
|
mov B_0, B_0, rrx
|
|
b L_subtract
|
|
L_exit:
|
|
@ Note: A, B & Q, R are aliases
|
|
mov R_0, A_0
|
|
mov R_1, A_1
|
|
mov Q_0, D_0
|
|
mov Q_1, D_1
|
|
ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
|
|
|
|
L_div_32_32:
|
|
@ Note: A_0 & r0 are aliases
|
|
@ Q_1 r1
|
|
mov r1, B_0
|
|
bl __aeabi_uidivmod
|
|
mov R_0, r1
|
|
mov R_1, #0
|
|
mov Q_1, #0
|
|
ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
|
|
|
|
L_pow2:
|
|
#ifdef HAVE_CLZ
|
|
@ Note: A, B and Q, R are aliases
|
|
@ R = A & (B - 1)
|
|
and C_0, A_0, C_0
|
|
and C_1, A_1, C_1
|
|
@ Q = A >> log2(B)
|
|
@ Note: B must not be 0 here!
|
|
clz D_0, B_0
|
|
add D_1, D_0, #1
|
|
rsbs D_0, D_0, #31
|
|
bpl L_1
|
|
clz D_0, B_1
|
|
rsb D_0, D_0, #31
|
|
mov A_0, A_1, lsr D_0
|
|
add D_0, D_0, #32
|
|
L_1:
|
|
movpl A_0, A_0, lsr D_0
|
|
ARM( orrpl A_0, A_0, A_1, lsl D_1 )
|
|
THUMB( lslpl TMP, A_1, D_1 )
|
|
THUMB( orrpl A_0, A_0, TMP )
|
|
mov A_1, A_1, lsr D_0
|
|
@ Mov back C to R
|
|
mov R_0, C_0
|
|
mov R_1, C_1
|
|
ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
|
|
#else
|
|
@ Note: A, B and Q, R are aliases
|
|
@ R = A & (B - 1)
|
|
and C_0, A_0, C_0
|
|
and C_1, A_1, C_1
|
|
@ Q = A >> log2(B)
|
|
@ Note: B must not be 0 here!
|
|
@ Count the leading zeroes in B.
|
|
mov D_0, #0
|
|
orrs B_0, B_0, B_0
|
|
@ If B is greater than 1 << 31, divide A and B by 1 << 32.
|
|
moveq A_0, A_1
|
|
moveq A_1, #0
|
|
moveq B_0, B_1
|
|
@ Count the remaining leading zeroes in B.
|
|
movs B_1, B_0, lsl #16
|
|
addeq D_0, #16
|
|
moveq B_0, B_0, lsr #16
|
|
tst B_0, #0xff
|
|
addeq D_0, #8
|
|
moveq B_0, B_0, lsr #8
|
|
tst B_0, #0xf
|
|
addeq D_0, #4
|
|
moveq B_0, B_0, lsr #4
|
|
tst B_0, #0x3
|
|
addeq D_0, #2
|
|
moveq B_0, B_0, lsr #2
|
|
tst B_0, #0x1
|
|
addeq D_0, #1
|
|
@ Shift A to the right by the appropriate amount.
|
|
rsb D_1, D_0, #32
|
|
mov Q_0, A_0, lsr D_0
|
|
ARM( orr Q_0, Q_0, A_1, lsl D_1 )
|
|
THUMB( lsl A_1, D_1 )
|
|
THUMB( orr Q_0, A_1 )
|
|
mov Q_1, A_1, lsr D_0
|
|
@ Move C to R
|
|
mov R_0, C_0
|
|
mov R_1, C_1
|
|
ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
|
|
#endif
|
|
|
|
L_div_by_0:
|
|
bl __div0
|
|
@ As wrong as it could be
|
|
mov Q_0, #0
|
|
mov Q_1, #0
|
|
mov R_0, #0
|
|
mov R_1, #0
|
|
ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
|
|
ENDPROC(__aeabi_uldivmod)
|
|
.popsection
|