From 911014051487e83177689893e57491b86e72589b Mon Sep 17 00:00:00 2001 From: Marc Tiehuis Date: Wed, 13 Jun 2018 22:25:04 +1200 Subject: [PATCH] Add i128 compiler-rt div/mul support --- CMakeLists.txt | 2 + std/special/compiler_rt/divti3.zig | 16 +++++ std/special/compiler_rt/divti3_test.zig | 21 +++++++ std/special/compiler_rt/index.zig | 3 + std/special/compiler_rt/muloti4.zig | 45 ++++++++++++++ std/special/compiler_rt/muloti4_test.zig | 76 ++++++++++++++++++++++++ 6 files changed, 163 insertions(+) create mode 100644 std/special/compiler_rt/divti3.zig create mode 100644 std/special/compiler_rt/divti3_test.zig create mode 100644 std/special/compiler_rt/muloti4.zig create mode 100644 std/special/compiler_rt/muloti4_test.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index 64abb67a8f..cfa0146bb1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -556,6 +556,7 @@ set(ZIG_STD_FILES "special/compiler_rt/aulldiv.zig" "special/compiler_rt/aullrem.zig" "special/compiler_rt/comparetf2.zig" + "special/compiler_rt/divti3.zig" "special/compiler_rt/fixuint.zig" "special/compiler_rt/fixunsdfdi.zig" "special/compiler_rt/fixunsdfsi.zig" @@ -566,6 +567,7 @@ set(ZIG_STD_FILES "special/compiler_rt/fixunstfdi.zig" "special/compiler_rt/fixunstfsi.zig" "special/compiler_rt/fixunstfti.zig" + "special/compiler_rt/muloti4.zig" "special/compiler_rt/index.zig" "special/compiler_rt/udivmod.zig" "special/compiler_rt/udivmoddi4.zig" diff --git a/std/special/compiler_rt/divti3.zig b/std/special/compiler_rt/divti3.zig new file mode 100644 index 0000000000..f3fccf3746 --- /dev/null +++ b/std/special/compiler_rt/divti3.zig @@ -0,0 +1,16 @@ +const udivmod = @import("udivmod.zig").udivmod; +const builtin = @import("builtin"); + +pub extern fn __divti3(a: i128, b: i128) i128 { + @setRuntimeSafety(builtin.is_test); + + const s_a = a >> (i128.bit_count - 1); + const s_b = b >> (i128.bit_count - 1); + + const an = (a ^ s_a) -% s_a; + const bn = (b ^ s_b) -% s_b; + + const r = udivmod(u128, @bitCast(u128, an), @bitCast(u128, bn), null); + const s = s_a ^ s_b; + return (i128(r) ^ s) -% s; +} diff --git a/std/special/compiler_rt/divti3_test.zig b/std/special/compiler_rt/divti3_test.zig new file mode 100644 index 0000000000..eef5a9b812 --- /dev/null +++ b/std/special/compiler_rt/divti3_test.zig @@ -0,0 +1,21 @@ +const __divti3 = @import("divti3.zig").__divti3; +const assert = @import("std").debug.assert; + +fn test__divti3(a: i128, b: i128, expected: i128) void { + const x = __divti3(a, b); + assert(x == expected); +} + +test "divti3" { + test__divti3(0, 1, 0); + test__divti3(0, -1, 0); + test__divti3(2, 1, 2); + test__divti3(2, -1, -2); + test__divti3(-2, 1, -2); + test__divti3(-2, -1, 2); + + test__divti3(@bitCast(i128, u128(0x8 << 124)), 1, @bitCast(i128, u128(0x8 << 124))); + test__divti3(@bitCast(i128, u128(0x8 << 124)), -1, @bitCast(i128, u128(0x8 << 124))); + test__divti3(@bitCast(i128, u128(0x8 << 124)), -2, @bitCast(i128, u128(0x4 << 124))); + test__divti3(@bitCast(i128, u128(0x8 << 124)), 2, @bitCast(i128, u128(0xc << 124))); +} diff --git a/std/special/compiler_rt/index.zig b/std/special/compiler_rt/index.zig index d328324320..0573854c91 100644 --- a/std/special/compiler_rt/index.zig +++ b/std/special/compiler_rt/index.zig @@ -38,6 +38,9 @@ comptime { @export("__umoddi3", __umoddi3, linkage); @export("__udivmodsi4", __udivmodsi4, linkage); + @export("__divti3", @import("divti3.zig").__divti3, linkage); + @export("__muloti4", @import("muloti4.zig").__muloti4, linkage); + if (isArmArch()) { @export("__aeabi_uldivmod", __aeabi_uldivmod, linkage); @export("__aeabi_uidivmod", __aeabi_uidivmod, linkage); diff --git a/std/special/compiler_rt/muloti4.zig b/std/special/compiler_rt/muloti4.zig new file mode 100644 index 0000000000..35d33f4ad4 --- /dev/null +++ b/std/special/compiler_rt/muloti4.zig @@ -0,0 +1,45 @@ +const udivmod = @import("udivmod.zig").udivmod; +const builtin = @import("builtin"); + +pub extern fn __muloti4(a: i128, b: i128, overflow: *c_int) i128 { + @setRuntimeSafety(builtin.is_test); + + const min = @bitCast(i128, u128(1 << (i128.bit_count - 1))); + const max = ~min; + overflow.* = 0; + + const r = a *% b; + if (a == min) { + if (b != 0 and b != 1) { + overflow.* = 1; + } + return r; + } + if (b == min) { + if (a != 0 and a != 1) { + overflow.* = 1; + } + return r; + } + + const sa = a >> (i128.bit_count - 1); + const abs_a = (a ^ sa) -% sa; + const sb = b >> (i128.bit_count - 1); + const abs_b = (b ^ sb) -% sb; + + if (abs_a < 2 or abs_b < 2) { + return r; + } + + if (sa == sb) { + if (abs_a > @divFloor(max, abs_b)) { + overflow.* = 1; + } + } else { + if (abs_a > @divFloor(min, -abs_b)) { + overflow.* = 1; + } + } + + return r; +} diff --git a/std/special/compiler_rt/muloti4_test.zig b/std/special/compiler_rt/muloti4_test.zig new file mode 100644 index 0000000000..b61655aaec --- /dev/null +++ b/std/special/compiler_rt/muloti4_test.zig @@ -0,0 +1,76 @@ +const __muloti4 = @import("muloti4.zig").__muloti4; +const assert = @import("std").debug.assert; + +fn test__muloti4(a: i128, b: i128, expected: i128, expected_overflow: c_int) void { + var overflow: c_int = undefined; + const x = __muloti4(a, b, &overflow); + assert(overflow == expected_overflow and (overflow != 0 or x == expected)); +} + +test "muloti4" { + test__muloti4(0, 0, 0, 0); + test__muloti4(0, 1, 0, 0); + test__muloti4(1, 0, 0, 0); + test__muloti4(0, 10, 0, 0); + test__muloti4(10, 0, 0, 0); + + test__muloti4(0, 81985529216486895, 0, 0); + test__muloti4(81985529216486895, 0, 0, 0); + + test__muloti4(0, -1, 0, 0); + test__muloti4(-1, 0, 0, 0); + test__muloti4(0, -10, 0, 0); + test__muloti4(-10, 0, 0, 0); + test__muloti4(0, -81985529216486895, 0, 0); + test__muloti4(-81985529216486895, 0, 0, 0); + + test__muloti4(3037000499, 3037000499, 9223372030926249001, 0); + test__muloti4(-3037000499, 3037000499, -9223372030926249001, 0); + test__muloti4(3037000499, -3037000499, -9223372030926249001, 0); + test__muloti4(-3037000499, -3037000499, 9223372030926249001, 0); + + test__muloti4(4398046511103, 2097152, 9223372036852678656, 0); + test__muloti4(-4398046511103, 2097152, -9223372036852678656, 0); + test__muloti4(4398046511103, -2097152, -9223372036852678656, 0); + test__muloti4(-4398046511103, -2097152, 9223372036852678656, 0); + + test__muloti4(2097152, 4398046511103, 9223372036852678656, 0); + test__muloti4(-2097152, 4398046511103, -9223372036852678656, 0); + test__muloti4(2097152, -4398046511103, -9223372036852678656, 0); + test__muloti4(-2097152, -4398046511103, 9223372036852678656, 0); + + test__muloti4(@bitCast(i128, u128(0x00000000000000B504F333F9DE5BE000)), @bitCast(i128, u128(0x000000000000000000B504F333F9DE5B)), @bitCast(i128, u128(0x7FFFFFFFFFFFF328DF915DA296E8A000)), 0); + test__muloti4(@bitCast(i128, u128(0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)), -2, @bitCast(i128, u128(0x80000000000000000000000000000001)), 1); + test__muloti4(-2, @bitCast(i128, u128(0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)), @bitCast(i128, u128(0x80000000000000000000000000000001)), 1); + + test__muloti4(@bitCast(i128, u128(0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)), -1, @bitCast(i128, u128(0x80000000000000000000000000000001)), 0); + test__muloti4(-1, @bitCast(i128, u128(0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)), @bitCast(i128, u128(0x80000000000000000000000000000001)), 0); + test__muloti4(@bitCast(i128, u128(0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)), 0, 0, 0); + test__muloti4(0, @bitCast(i128, u128(0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)), 0, 0); + test__muloti4(@bitCast(i128, u128(0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)), 1, @bitCast(i128, u128(0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)), 0); + test__muloti4(1, @bitCast(i128, u128(0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)), @bitCast(i128, u128(0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)), 0); + test__muloti4(@bitCast(i128, u128(0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)), 2, @bitCast(i128, u128(0x80000000000000000000000000000001)), 1); + test__muloti4(2, @bitCast(i128, u128(0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)), @bitCast(i128, u128(0x80000000000000000000000000000001)), 1); + + test__muloti4(@bitCast(i128, u128(0x80000000000000000000000000000000)), -2, @bitCast(i128, u128(0x80000000000000000000000000000000)), 1); + test__muloti4(-2, @bitCast(i128, u128(0x80000000000000000000000000000000)), @bitCast(i128, u128(0x80000000000000000000000000000000)), 1); + test__muloti4(@bitCast(i128, u128(0x80000000000000000000000000000000)), -1, @bitCast(i128, u128(0x80000000000000000000000000000000)), 1); + test__muloti4(-1, @bitCast(i128, u128(0x80000000000000000000000000000000)), @bitCast(i128, u128(0x80000000000000000000000000000000)), 1); + test__muloti4(@bitCast(i128, u128(0x80000000000000000000000000000000)), 0, 0, 0); + test__muloti4(0, @bitCast(i128, u128(0x80000000000000000000000000000000)), 0, 0); + test__muloti4(@bitCast(i128, u128(0x80000000000000000000000000000000)), 1, @bitCast(i128, u128(0x80000000000000000000000000000000)), 0); + test__muloti4(1, @bitCast(i128, u128(0x80000000000000000000000000000000)), @bitCast(i128, u128(0x80000000000000000000000000000000)), 0); + test__muloti4(@bitCast(i128, u128(0x80000000000000000000000000000000)), 2, @bitCast(i128, u128(0x80000000000000000000000000000000)), 1); + test__muloti4(2, @bitCast(i128, u128(0x80000000000000000000000000000000)), @bitCast(i128, u128(0x80000000000000000000000000000000)), 1); + + test__muloti4(@bitCast(i128, u128(0x80000000000000000000000000000001)), -2, @bitCast(i128, u128(0x80000000000000000000000000000001)), 1); + test__muloti4(-2, @bitCast(i128, u128(0x80000000000000000000000000000001)), @bitCast(i128, u128(0x80000000000000000000000000000001)), 1); + test__muloti4(@bitCast(i128, u128(0x80000000000000000000000000000001)), -1, @bitCast(i128, u128(0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)), 0); + test__muloti4(-1, @bitCast(i128, u128(0x80000000000000000000000000000001)), @bitCast(i128, u128(0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)), 0); + test__muloti4(@bitCast(i128, u128(0x80000000000000000000000000000001)), 0, 0, 0); + test__muloti4(0, @bitCast(i128, u128(0x80000000000000000000000000000001)), 0, 0); + test__muloti4(@bitCast(i128, u128(0x80000000000000000000000000000001)), 1, @bitCast(i128, u128(0x80000000000000000000000000000001)), 0); + test__muloti4(1, @bitCast(i128, u128(0x80000000000000000000000000000001)), @bitCast(i128, u128(0x80000000000000000000000000000001)), 0); + test__muloti4(@bitCast(i128, u128(0x80000000000000000000000000000001)), 2, @bitCast(i128, u128(0x80000000000000000000000000000000)), 1); + test__muloti4(2, @bitCast(i128, u128(0x80000000000000000000000000000001)), @bitCast(i128, u128(0x80000000000000000000000000000000)), 1); +}