Merge pull request #11342 from ziglang/stage2-x64-mul-div

x64: use all available registers (caller and callee), and fix how we multiply (fixes @mulWithOverflow for byte-size operands)
This commit is contained in:
Jakub Konka 2022-03-30 11:22:35 +02:00 committed by GitHub
commit b153e156b1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 198 additions and 145 deletions

View File

@ -31,6 +31,8 @@ const bits = @import("bits.zig");
const abi = @import("abi.zig");
const Register = bits.Register;
const callee_preserved_regs = abi.callee_preserved_regs;
const caller_preserved_regs = abi.caller_preserved_regs;
const allocatable_registers = abi.allocatable_registers;
const c_abi_int_param_regs = abi.c_abi_int_param_regs;
const c_abi_int_return_regs = abi.c_abi_int_return_regs;
@ -40,7 +42,7 @@ const InnerError = error{
OutOfRegisters,
};
const RegisterManager = RegisterManagerFn(Self, Register, &callee_preserved_regs);
const RegisterManager = RegisterManagerFn(Self, Register, &allocatable_registers);
gpa: Allocator,
air: Air,
@ -56,7 +58,6 @@ arg_index: u32,
src_loc: Module.SrcLoc,
stack_align: u32,
ret_backpatches: std.ArrayListUnmanaged(Mir.Inst.Index) = .{},
compare_flags_inst: ?Air.Inst.Index = null,
/// MIR Instructions
@ -351,7 +352,6 @@ pub fn generate(
std.AutoHashMap(Mir.Inst.Index, Air.Inst.Index).init(bin_file.allocator)
else {},
};
defer function.ret_backpatches.deinit(bin_file.allocator);
defer function.stack.deinit(bin_file.allocator);
defer function.blocks.deinit(bin_file.allocator);
defer function.exitlude_jump_relocs.deinit(bin_file.allocator);
@ -480,6 +480,21 @@ fn gen(self: *Self) InnerError!void {
.data = undefined,
});
if (self.ret_mcv == .stack_offset) {
// The address where to store the return value for the caller is in `.rdi`
// register which the callee is free to clobber. Therefore, we purposely
// spill it to stack immediately.
const ptr_ty = Type.usize;
const abi_size = @intCast(u32, ptr_ty.abiSize(self.target.*));
const abi_align = ptr_ty.abiAlignment(self.target.*);
const stack_offset = mem.alignForwardGeneric(u32, self.next_stack_offset + abi_size, abi_align);
self.next_stack_offset = stack_offset;
self.max_end_stack = @maximum(self.max_end_stack, self.next_stack_offset);
try self.genSetStack(ptr_ty, @intCast(i32, stack_offset), MCValue{ .register = .rdi }, .{});
self.ret_mcv = MCValue{ .stack_offset = @intCast(i32, stack_offset) };
log.debug("gen: spilling .rdi to stack at offset {}", .{stack_offset});
}
_ = try self.addInst(.{
.tag = .dbg_prologue_end,
.ops = undefined,
@ -517,23 +532,6 @@ fn gen(self: *Self) InnerError!void {
var disp = data.disp + 8;
inline for (callee_preserved_regs) |reg, i| {
if (self.register_manager.isRegAllocated(reg)) {
if (reg.to64() == .rdi) {
for (self.ret_backpatches.items) |inst| {
log.debug(".rdi was spilled, backpatching with mov from stack at offset {}", .{
-@intCast(i32, disp),
});
const ops = Mir.Ops.decode(self.mir_instructions.items(.ops)[inst]);
self.mir_instructions.set(inst, Mir.Inst{
.tag = .mov,
.ops = (Mir.Ops{
.reg1 = ops.reg1,
.reg2 = .rbp,
.flags = 0b01,
}).encode(),
.data = .{ .imm = @bitCast(u32, -@intCast(i32, disp)) },
});
}
}
data.regs |= 1 << @intCast(u5, i);
self.max_end_stack += 8;
disp += 8;
@ -912,8 +910,7 @@ fn allocMem(self: *Self, inst: Air.Inst.Index, abi_size: u32, abi_align: u32) !u
// TODO find a free slot instead of always appending
const offset = mem.alignForwardGeneric(u32, self.next_stack_offset + abi_size, abi_align);
self.next_stack_offset = offset;
if (self.next_stack_offset > self.max_end_stack)
self.max_end_stack = self.next_stack_offset;
self.max_end_stack = @maximum(self.max_end_stack, self.next_stack_offset);
try self.stack.putNoClobber(self.gpa, offset, .{
.inst = inst,
.size = abi_size,
@ -1256,7 +1253,7 @@ fn genPtrBinMathOp(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_r
offset_mcv.freezeIfRegister(&self.register_manager);
defer offset_mcv.unfreezeIfRegister(&self.register_manager);
try self.genIMulOpMir(offset_ty, offset_mcv, .{ .immediate = elem_size });
try self.genIntMulComplexOpMir(offset_ty, offset_mcv, .{ .immediate = elem_size });
const tag = self.air.instructions.items(.tag)[inst];
switch (tag) {
@ -1398,10 +1395,29 @@ fn airSubSat(self: *Self, inst: Air.Inst.Index) !void {
fn airMul(self: *Self, inst: Air.Inst.Index) !void {
const bin_op = self.air.instructions.items(.data)[inst].bin_op;
const result: MCValue = if (self.liveness.isUnused(inst))
.dead
else
try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs);
const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: {
const ty = self.air.typeOfIndex(inst);
if (ty.zigTypeTag() != .Int) {
return self.fail("TODO implement 'mul' for operands of dst type {}", .{ty.zigTypeTag()});
}
// Spill .rax and .rdx upfront to ensure we don't spill the operands too late.
try self.register_manager.getReg(.rax, inst);
try self.register_manager.getReg(.rdx, null);
self.register_manager.freezeRegs(&.{ .rax, .rdx });
defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx });
const lhs = try self.resolveInst(bin_op.lhs);
const rhs = try self.resolveInst(bin_op.rhs);
const signedness = ty.intInfo(self.target.*).signedness;
try self.genIntMulDivOpMir(switch (signedness) {
.signed => .imul,
.unsigned => .mul,
}, ty, signedness, lhs, rhs);
break :result MCValue{ .register = .rax };
};
return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
}
@ -1476,23 +1492,33 @@ fn airSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
const result = if (self.liveness.isUnused(inst)) .dead else result: {
const ty = self.air.typeOf(bin_op.lhs);
const signedness: std.builtin.Signedness = blk: {
if (ty.zigTypeTag() != .Int) {
return self.fail("TODO implement airMulWithOverflow for type {}", .{ty.fmtDebug()});
}
break :blk ty.intInfo(self.target.*).signedness;
};
if (self.liveness.isUnused(inst)) {
return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none });
}
// Spill .rax and .rdx upfront to ensure we don't spill the operands too late.
try self.register_manager.getReg(.rax, inst);
try self.register_manager.getReg(.rdx, null);
self.register_manager.freezeRegs(&.{ .rax, .rdx });
defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx });
const ty = self.air.typeOf(bin_op.lhs);
const signedness: std.builtin.Signedness = blk: {
if (ty.zigTypeTag() != .Int) {
return self.fail("TODO implement airMulWithOverflow for type {}", .{ty.fmtDebug()});
const lhs = try self.resolveInst(bin_op.lhs);
const rhs = try self.resolveInst(bin_op.rhs);
try self.genIntMulDivOpMir(switch (signedness) {
.signed => .imul,
.unsigned => .mul,
}, ty, signedness, lhs, rhs);
switch (signedness) {
.signed => break :result MCValue{ .register_overflow_signed = .rax },
.unsigned => break :result MCValue{ .register_overflow_unsigned = .rax },
}
break :blk ty.intInfo(self.target.*).signedness;
};
const partial = try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs);
const result: MCValue = switch (signedness) {
.signed => .{ .register_overflow_signed = partial.register },
.unsigned => .{ .register_overflow_unsigned = partial.register },
};
return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
@ -1503,11 +1529,12 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
return self.fail("TODO implement airShlWithOverflow for {}", .{self.target.cpu.arch});
}
/// Generates signed or unsigned integer division.
/// Requires use of .rax and .rdx registers. Spills them if necessary.
/// Generates signed or unsigned integer multiplication/division.
/// Clobbers .rax and .rdx registers.
/// Quotient is saved in .rax and remainder in .rdx.
fn genIntDivOpMir(
fn genIntMulDivOpMir(
self: *Self,
tag: Mir.Inst.Tag,
ty: Type,
signedness: std.builtin.Signedness,
lhs: MCValue,
@ -1515,25 +1542,10 @@ fn genIntDivOpMir(
) !void {
const abi_size = @intCast(u32, ty.abiSize(self.target.*));
if (abi_size > 8) {
return self.fail("TODO implement genIntDivOpMir for ABI size larger than 8", .{});
return self.fail("TODO implement genIntMulDivOpMir for ABI size larger than 8", .{});
}
try self.register_manager.getReg(.rax, null);
try self.register_manager.getReg(.rdx, null);
self.register_manager.freezeRegs(&.{ .rax, .rdx });
defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx });
const dividend = switch (lhs) {
.register => lhs,
else => blk: {
const reg = try self.copyToTmpRegister(ty, lhs);
break :blk MCValue{ .register = reg };
},
};
try self.genSetReg(ty, .rax, dividend);
self.register_manager.freezeRegs(&.{dividend.register});
defer self.register_manager.unfreezeRegs(&.{dividend.register});
try self.genSetReg(ty, .rax, lhs);
switch (signedness) {
.signed => {
@ -1557,22 +1569,19 @@ fn genIntDivOpMir(
},
}
const divisor = switch (rhs) {
const factor = switch (rhs) {
.register => rhs,
.stack_offset => rhs,
else => blk: {
const reg = try self.copyToTmpRegister(ty, rhs);
break :blk MCValue{ .register = reg };
},
};
const op_tag: Mir.Inst.Tag = switch (signedness) {
.signed => .idiv,
.unsigned => .div,
};
switch (divisor) {
switch (factor) {
.register => |reg| {
_ = try self.addInst(.{
.tag = op_tag,
.tag = tag,
.ops = (Mir.Ops{
.reg1 = reg,
}).encode(),
@ -1581,7 +1590,7 @@ fn genIntDivOpMir(
},
.stack_offset => |off| {
_ = try self.addInst(.{
.tag = op_tag,
.tag = tag,
.ops = (Mir.Ops{
.reg2 = .rbp,
.flags = switch (abi_size) {
@ -1599,6 +1608,7 @@ fn genIntDivOpMir(
}
}
/// Clobbers .rax and .rdx registers.
fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCValue {
const signedness = ty.intInfo(self.target.*).signedness;
const dividend = switch (lhs) {
@ -1614,7 +1624,10 @@ fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCVa
self.register_manager.freezeRegs(&.{divisor});
defer self.register_manager.unfreezeRegs(&.{ dividend, divisor });
try self.genIntDivOpMir(Type.isize, signedness, .{ .register = dividend }, .{ .register = divisor });
try self.genIntMulDivOpMir(switch (signedness) {
.signed => .idiv,
.unsigned => .div,
}, Type.isize, signedness, .{ .register = dividend }, .{ .register = divisor });
_ = try self.addInst(.{
.tag = .xor,
@ -1666,22 +1679,53 @@ fn airDiv(self: *Self, inst: Air.Inst.Index) !void {
return self.fail("TODO implement {}", .{tag});
}
const signedness = ty.intInfo(self.target.*).signedness;
// Spill .rax and .rdx upfront to ensure we don't spill the operands too late.
try self.register_manager.getReg(.rax, null);
const track_rax: ?Air.Inst.Index = blk: {
if (signedness == .unsigned) break :blk inst;
switch (tag) {
.div_exact, .div_trunc => break :blk inst,
else => break :blk null,
}
};
try self.register_manager.getReg(.rax, track_rax);
try self.register_manager.getReg(.rdx, null);
self.register_manager.freezeRegs(&.{ .rax, .rdx });
defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx });
const lhs = try self.resolveInst(bin_op.lhs);
const rhs = try self.resolveInst(bin_op.rhs);
lhs.freezeIfRegister(&self.register_manager);
defer lhs.unfreezeIfRegister(&self.register_manager);
const rhs = blk: {
const rhs = try self.resolveInst(bin_op.rhs);
if (signedness == .signed) {
switch (tag) {
.div_floor => {
rhs.freezeIfRegister(&self.register_manager);
defer rhs.unfreezeIfRegister(&self.register_manager);
break :blk try self.copyToRegisterWithInstTracking(inst, ty, rhs);
},
else => {},
}
}
break :blk rhs;
};
rhs.freezeIfRegister(&self.register_manager);
defer rhs.unfreezeIfRegister(&self.register_manager);
const signedness = ty.intInfo(self.target.*).signedness;
if (signedness == .unsigned) {
try self.genIntDivOpMir(ty, signedness, lhs, rhs);
try self.genIntMulDivOpMir(.div, ty, signedness, lhs, rhs);
break :result MCValue{ .register = .rax };
}
switch (tag) {
.div_exact, .div_trunc => {
try self.genIntDivOpMir(ty, signedness, lhs, rhs);
try self.genIntMulDivOpMir(switch (signedness) {
.signed => .idiv,
.unsigned => .div,
}, ty, signedness, lhs, rhs);
break :result MCValue{ .register = .rax };
},
.div_floor => {
@ -1702,11 +1746,18 @@ fn airRem(self: *Self, inst: Air.Inst.Index) !void {
}
// Spill .rax and .rdx upfront to ensure we don't spill the operands too late.
try self.register_manager.getReg(.rax, null);
try self.register_manager.getReg(.rdx, null);
try self.register_manager.getReg(.rdx, inst);
self.register_manager.freezeRegs(&.{ .rax, .rdx });
defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx });
const lhs = try self.resolveInst(bin_op.lhs);
const rhs = try self.resolveInst(bin_op.rhs);
const signedness = ty.intInfo(self.target.*).signedness;
try self.genIntDivOpMir(ty, signedness, lhs, rhs);
try self.genIntMulDivOpMir(switch (signedness) {
.signed => .idiv,
.unsigned => .div,
}, ty, signedness, lhs, rhs);
break :result MCValue{ .register = .rdx };
};
return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
@ -1719,25 +1770,33 @@ fn airMod(self: *Self, inst: Air.Inst.Index) !void {
if (ty.zigTypeTag() != .Int) {
return self.fail("TODO implement .mod for operands of dst type {}", .{ty.zigTypeTag()});
}
const signedness = ty.intInfo(self.target.*).signedness;
// Spill .rax and .rdx upfront to ensure we don't spill the operands too late.
try self.register_manager.getReg(.rax, null);
try self.register_manager.getReg(.rdx, null);
try self.register_manager.getReg(.rdx, if (signedness == .unsigned) inst else null);
self.register_manager.freezeRegs(&.{ .rax, .rdx });
defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx });
const lhs = try self.resolveInst(bin_op.lhs);
const rhs = try self.resolveInst(bin_op.rhs);
const signedness = ty.intInfo(self.target.*).signedness;
switch (signedness) {
.unsigned => {
try self.genIntDivOpMir(ty, signedness, lhs, rhs);
try self.genIntMulDivOpMir(switch (signedness) {
.signed => .idiv,
.unsigned => .div,
}, ty, signedness, lhs, rhs);
break :result MCValue{ .register = .rdx };
},
.signed => {
const div_floor = try self.genInlineIntDivFloor(ty, lhs, rhs);
try self.genIMulOpMir(ty, div_floor, rhs);
try self.genIntMulComplexOpMir(ty, div_floor, rhs);
const reg = try self.copyToTmpRegister(ty, lhs);
try self.genBinMathOpMir(.sub, ty, .{ .register = reg }, div_floor);
const result = try self.copyToRegisterWithInstTracking(inst, ty, lhs);
try self.genBinMathOpMir(.sub, ty, result, div_floor);
break :result MCValue{ .register = reg };
break :result result;
},
}
};
@ -2134,7 +2193,7 @@ fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void {
fn elemOffset(self: *Self, index_ty: Type, index: MCValue, elem_size: u64) !Register {
const reg = try self.copyToTmpRegister(index_ty, index);
try self.genIMulOpMir(index_ty, .{ .register = reg }, .{ .immediate = elem_size });
try self.genIntMulComplexOpMir(index_ty, .{ .register = reg }, .{ .immediate = elem_size });
return reg;
}
@ -3098,7 +3157,6 @@ fn genBinMathOp(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_rhs:
.bool_or, .bit_or => try self.genBinMathOpMir(.@"or", dst_ty, dst_mcv, src_mcv),
.bool_and, .bit_and => try self.genBinMathOpMir(.@"and", dst_ty, dst_mcv, src_mcv),
.xor, .not => try self.genBinMathOpMir(.xor, dst_ty, dst_mcv, src_mcv),
.mul, .mulwrap, .mul_with_overflow => try self.genIMulOpMir(dst_ty, dst_mcv, src_mcv),
else => unreachable,
}
return dst_mcv;
@ -3254,8 +3312,9 @@ fn genBinMathOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MC
}
}
// Performs integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv.
fn genIMulOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void {
/// Performs multi-operand integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv.
/// Does not support byte-size operands.
fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void {
const abi_size = @intCast(u32, dst_ty.abiSize(self.target.*));
switch (dst_mcv) {
.none => unreachable,
@ -3301,7 +3360,7 @@ fn genIMulOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !
} else {
// TODO verify we don't spill and assign to the same register as dst_mcv
const src_reg = try self.copyToTmpRegister(dst_ty, src_mcv);
return self.genIMulOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg });
return self.genIntMulComplexOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg });
}
},
.stack_offset => |off| {
@ -3458,15 +3517,20 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
try self.spillCompareFlagsIfOccupied();
for (caller_preserved_regs) |reg| {
try self.register_manager.getReg(reg, null);
}
if (info.return_value == .stack_offset) {
const ret_ty = fn_ty.fnReturnType();
const ret_abi_size = @intCast(u32, ret_ty.abiSize(self.target.*));
const ret_abi_align = @intCast(u32, ret_ty.abiAlignment(self.target.*));
const stack_offset = @intCast(i32, try self.allocMem(inst, ret_abi_size, ret_abi_align));
log.debug("airCall: return value on stack at offset {}", .{stack_offset});
try self.register_manager.getReg(.rdi, null);
self.register_manager.freezeRegs(&.{.rdi});
try self.genSetReg(Type.usize, .rdi, .{ .ptr_stack_offset = stack_offset });
self.register_manager.freezeRegs(&.{.rdi});
info.return_value.stack_offset = stack_offset;
}
@ -3653,15 +3717,13 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
const result: MCValue = result: {
switch (info.return_value) {
.register => |reg| {
if (RegisterManager.indexOfReg(&callee_preserved_regs, reg) == null) {
// Save function return value in a callee saved register
break :result try self.copyToRegisterWithInstTracking(
inst,
self.air.typeOfIndex(inst),
info.return_value,
);
}
.register => {
// Save function return value in a new register
break :result try self.copyToRegisterWithInstTracking(
inst,
self.air.typeOfIndex(inst),
info.return_value,
);
},
else => {},
}
@ -3688,19 +3750,11 @@ fn airRet(self: *Self, inst: Air.Inst.Index) !void {
const ret_ty = self.fn_type.fnReturnType();
switch (self.ret_mcv) {
.stack_offset => {
// TODO audit register allocation!
self.register_manager.freezeRegs(&.{ .rax, .rcx, .rdi });
defer self.register_manager.unfreezeRegs(&.{ .rax, .rcx, .rdi });
const reg = try self.register_manager.allocReg(null);
const backpatch = try self.addInst(.{
.tag = .mov,
.ops = (Mir.Ops{
.reg1 = reg,
.reg2 = .rdi,
}).encode(),
.data = undefined,
});
try self.ret_backpatches.append(self.gpa, backpatch);
self.register_manager.freezeRegs(&.{ .rax, .rcx });
defer self.register_manager.unfreezeRegs(&.{ .rax, .rcx });
const reg = try self.copyToTmpRegister(Type.usize, self.ret_mcv);
self.register_manager.freezeRegs(&.{reg});
defer self.register_manager.unfreezeRegs(&.{reg});
try self.genSetStack(ret_ty, 0, operand, .{
.source_stack_base = .rbp,
.dest_stack_base = reg,
@ -3731,19 +3785,11 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void {
const elem_ty = ptr_ty.elemType();
switch (self.ret_mcv) {
.stack_offset => {
// TODO audit register allocation!
self.register_manager.freezeRegs(&.{ .rax, .rcx, .rdi });
defer self.register_manager.unfreezeRegs(&.{ .rax, .rcx, .rdi });
const reg = try self.register_manager.allocReg(null);
const backpatch = try self.addInst(.{
.tag = .mov,
.ops = (Mir.Ops{
.reg1 = reg,
.reg2 = .rdi,
}).encode(),
.data = undefined,
});
try self.ret_backpatches.append(self.gpa, backpatch);
self.register_manager.freezeRegs(&.{ .rax, .rcx });
defer self.register_manager.unfreezeRegs(&.{ .rax, .rcx });
const reg = try self.copyToTmpRegister(Type.usize, self.ret_mcv);
self.register_manager.freezeRegs(&.{reg});
defer self.register_manager.unfreezeRegs(&.{reg});
try self.genInlineMemcpy(.{ .stack_offset = 0 }, ptr, .{ .immediate = elem_ty.abiSize(self.target.*) }, .{
.source_stack_base = .rbp,
.dest_stack_base = reg,
@ -5033,6 +5079,7 @@ const InlineMemcpyOpts = struct {
dest_stack_base: ?Register = null,
};
/// Spills .rax and .rcx.
fn genInlineMemcpy(
self: *Self,
dst_ptr: MCValue,
@ -5040,13 +5087,7 @@ fn genInlineMemcpy(
len: MCValue,
opts: InlineMemcpyOpts,
) InnerError!void {
// TODO this is wrong. We should check first if any of the operands is in `.rax` or `.rcx` before
// spilling. Consolidate with other TODOs regarding register allocation mechanics.
try self.register_manager.getReg(.rax, null);
try self.register_manager.getReg(.rcx, null);
self.register_manager.freezeRegs(&.{ .rax, .rcx });
defer self.register_manager.unfreezeRegs(&.{ .rax, .rcx });
if (opts.source_stack_base) |reg| self.register_manager.freezeRegs(&.{reg});
defer if (opts.source_stack_base) |reg| self.register_manager.unfreezeRegs(&.{reg});
@ -5086,7 +5127,6 @@ fn genInlineMemcpy(
return self.fail("TODO implement memcpy for setting stack when dest is {}", .{dst_ptr});
},
}
self.register_manager.freezeRegs(&.{dst_addr_reg});
defer self.register_manager.unfreezeRegs(&.{dst_addr_reg});
@ -5122,7 +5162,6 @@ fn genInlineMemcpy(
return self.fail("TODO implement memcpy for setting stack when src is {}", .{src_ptr});
},
}
self.register_manager.freezeRegs(&.{src_addr_reg});
defer self.register_manager.unfreezeRegs(&.{src_addr_reg});
@ -5130,6 +5169,11 @@ fn genInlineMemcpy(
const count_reg = regs[0].to64();
const tmp_reg = regs[1].to8();
self.register_manager.unfreezeRegs(&.{ .rax, .rcx });
try self.register_manager.getReg(.rax, null);
try self.register_manager.getReg(.rcx, null);
try self.genSetReg(Type.usize, count_reg, len);
// mov rcx, 0
@ -5225,6 +5269,7 @@ fn genInlineMemcpy(
try self.performReloc(loop_reloc);
}
/// Spills .rax register.
fn genInlineMemset(
self: *Self,
dst_ptr: MCValue,
@ -5232,9 +5277,7 @@ fn genInlineMemset(
len: MCValue,
opts: InlineMemcpyOpts,
) InnerError!void {
try self.register_manager.getReg(.rax, null);
self.register_manager.freezeRegs(&.{.rax});
defer self.register_manager.unfreezeRegs(&.{.rax});
const addr_reg = try self.register_manager.allocReg(null);
switch (dst_ptr) {
@ -5271,6 +5314,9 @@ fn genInlineMemset(
self.register_manager.freezeRegs(&.{addr_reg});
defer self.register_manager.unfreezeRegs(&.{addr_reg});
self.register_manager.unfreezeRegs(&.{.rax});
try self.register_manager.getReg(.rax, null);
try self.genSetReg(Type.usize, .rax, len);
try self.genBinMathOpMir(.sub, Type.usize, .{ .register = .rax }, .{ .immediate = 1 });

View File

@ -145,6 +145,7 @@ pub fn lowerMir(emit: *Emit) InnerError!void {
.sar => try emit.mirShift(.sar, inst),
.imul => try emit.mirMulDiv(.imul, inst),
.mul => try emit.mirMulDiv(.mul, inst),
.idiv => try emit.mirMulDiv(.idiv, inst),
.div => try emit.mirMulDiv(.div, inst),
.imul_complex => try emit.mirIMulComplex(inst),
@ -1164,6 +1165,7 @@ const Tag = enum {
brk,
nop,
imul,
mul,
idiv,
div,
syscall,
@ -1411,7 +1413,7 @@ inline fn getOpCode(tag: Tag, enc: Encoding, is_one_byte: bool) ?OpCode {
.setnl, .setge => OpCode.twoByte(0x0f, 0x9d),
.setle, .setng => OpCode.twoByte(0x0f, 0x9e),
.setnle, .setg => OpCode.twoByte(0x0f, 0x9f),
.idiv, .div, .imul => OpCode.oneByte(if (is_one_byte) 0xf6 else 0xf7),
.idiv, .div, .imul, .mul => OpCode.oneByte(if (is_one_byte) 0xf6 else 0xf7),
.fisttp16 => OpCode.oneByte(0xdf),
.fisttp32 => OpCode.oneByte(0xdb),
.fisttp64 => OpCode.oneByte(0xdd),
@ -1554,9 +1556,10 @@ inline fn getModRmExt(tag: Tag) ?u3 {
=> 0x4,
.shr => 0x5,
.sar => 0x7,
.mul => 0x4,
.imul => 0x5,
.idiv => 0x7,
.div => 0x6,
.idiv => 0x7,
.fisttp16 => 0x1,
.fisttp32 => 0x1,
.fisttp64 => 0x1,

View File

@ -227,6 +227,7 @@ pub const Inst = struct {
/// 0b11 qword ptr [reg2 + imm32]
imul,
idiv,
mul,
div,
/// ops flags: form:

View File

@ -370,8 +370,13 @@ pub fn classifySystemV(ty: Type, target: Target) [8]Class {
}
}
/// These registers need to be preserved (saved on the stack) and restored by the callee before getting clobbered
/// and when the callee returns.
pub const callee_preserved_regs = [_]Register{ .rcx, .rsi, .rdi, .r8, .r9, .r10, .r11 };
/// Note that .rsp and .rbp also belong to this set, however, we never expect to use them
/// for anything else but stack offset tracking therefore we exclude them from this set.
pub const callee_preserved_regs = [_]Register{ .rbx, .r12, .r13, .r14, .r15 };
/// These registers need to be preserved (saved on the stack) and restored by the caller before
/// the caller relinquishes control to a subroutine via call instruction (or similar).
/// In other words, these registers are free to use by the callee.
pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11 };
pub const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs;
pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 };
pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx };

View File

@ -697,11 +697,9 @@ test "@mulWithOverflow" {
try expect(!@mulWithOverflow(u8, a, b, &result));
try expect(result == 246);
if (builtin.zig_backend != .stage2_x86_64) { // TODO fix mul/imul on x86_64
b = 4;
try expect(@mulWithOverflow(u8, a, b, &result));
try expect(result == 236);
}
b = 4;
try expect(@mulWithOverflow(u8, a, b, &result));
try expect(result == 236);
}
test "@subWithOverflow" {