update @memcpy to require equal src and dest lens

* Sema: upgrade operands to array pointers if possible when emitting
   AIR.
 * Implement safety checks for length mismatch and aliasing.
 * AIR: make ptrtoint support slice operands. Implement in LLVM backend.
 * C backend: implement new `@memset` semantics. `@memcpy` is not done
   yet.
This commit is contained in:
Andrew Kelley 2023-04-21 18:03:33 -07:00
parent a5c910adb6
commit edb5e493e6
12 changed files with 280 additions and 78 deletions

View File

@ -8683,18 +8683,20 @@ test "integer cast panic" {
{#header_open|@memcpy#}
<pre>{#syntax#}@memcpy(noalias dest, noalias source) void{#endsyntax#}</pre>
<p>This function copies bytes from one region of memory to another.</p>
<p>{#syntax#}dest{#endsyntax#} must be a mutable slice, or a mutable pointer to an array.
It may have any alignment, and it may have any element type.</p>
<p>{#syntax#}source{#endsyntax#} must be an array, pointer, or a slice
with the same element type as {#syntax#}dest{#endsyntax#}. It may have
any alignment. Only {#syntax#}const{#endsyntax#} access is required. It
is sliced from 0 to the same length as
{#syntax#}dest{#endsyntax#}, triggering the same set of safety checks and
possible compile errors as
{#syntax#}source[0..dest.len]{#endsyntax#}.</p>
<p>It is illegal for {#syntax#}dest{#endsyntax#} and
{#syntax#}source[0..dest.len]{#endsyntax#} to overlap. If safety
checks are enabled, there will be a runtime check for such overlapping.</p>
<p>{#syntax#}dest{#endsyntax#} must be a mutable slice, a mutable pointer to an array, or
a mutable many-item {#link|pointer|Pointer#}. It may have any
alignment, and it may have any element type.</p>
<p>Likewise, {#syntax#}source{#endsyntax#} must be a mutable slice, a
mutable pointer to an array, or a mutable many-item
{#link|pointer|Pointer#}. It may have any alignment, and it may have any
element type.</p>
<p>The {#syntax#}source{#endsyntax#} element type must support {#link|Type Coercion#}
into the {#syntax#}dest{#endsyntax#} element type. The element types may have
different ABI size, however, that may incur a performance penalty.</p>
<p>Similar to {#link|for#} loops, at least one of {#syntax#}source{#endsyntax#} and
{#syntax#}dest{#endsyntax#} must provide a length, and if two lengths are provided,
they must be equal.</p>
<p>Finally, the two memory regions must not overlap.</p>
{#header_close#}
{#header_open|@memset#}

View File

@ -1002,6 +1002,8 @@ pub const panic_messages = struct {
pub const index_out_of_bounds = "index out of bounds";
pub const start_index_greater_than_end = "start index is larger than end index";
pub const for_len_mismatch = "for loop over objects with non-equal lengths";
pub const memcpy_len_mismatch = "@memcpy arguments have non-equal lengths";
pub const memcpy_alias = "@memcpy arguments alias";
};
pub noinline fn returnError(st: *StackTrace) void {

View File

@ -209,7 +209,7 @@ fn Aegis128LGeneric(comptime tag_bits: u9) type {
acc |= (computed_tag[j] ^ tag[j]);
}
if (acc != 0) {
@memset(m.ptr, undefined, m.len);
@memset(m, undefined);
return error.AuthenticationFailed;
}
}
@ -390,7 +390,7 @@ fn Aegis256Generic(comptime tag_bits: u9) type {
acc |= (computed_tag[j] ^ tag[j]);
}
if (acc != 0) {
@memset(m.ptr, undefined, m.len);
@memset(m, undefined);
return error.AuthenticationFailed;
}
}

View File

@ -99,9 +99,8 @@ pub const Murmur2_64 = struct {
pub fn hashWithSeed(str: []const u8, seed: u64) u64 {
const m: u64 = 0xc6a4a7935bd1e995;
const len = @as(u64, str.len);
var h1: u64 = seed ^ (len *% m);
for (@ptrCast([*]align(1) const u64, str.ptr)[0..@intCast(usize, len >> 3)]) |v| {
var h1: u64 = seed ^ (@as(u64, str.len) *% m);
for (@ptrCast([*]align(1) const u64, str.ptr)[0..str.len / 8]) |v| {
var k1: u64 = v;
if (native_endian == .Big)
k1 = @byteSwap(k1);
@ -111,11 +110,11 @@ pub const Murmur2_64 = struct {
h1 ^= k1;
h1 *%= m;
}
const rest = len & 7;
const offset = len - rest;
const rest = str.len & 7;
const offset = str.len - rest;
if (rest > 0) {
var k1: u64 = 0;
@memcpy(@ptrCast([*]u8, &k1)[0..@intCast(usize, rest)], @ptrCast([*]const u8, &str[@intCast(usize, offset)]));
@memcpy(@ptrCast([*]u8, &k1)[0..rest], str[offset..]);
if (native_endian == .Big)
k1 = @byteSwap(k1);
h1 ^= k1;

View File

@ -282,7 +282,8 @@ pub fn reallocAdvanced(
const new_mem = self.rawAlloc(byte_count, log2a(Slice.alignment), return_address) orelse
return error.OutOfMemory;
@memcpy(new_mem[0..@min(byte_count, old_byte_slice.len)], old_byte_slice);
const copy_len = @min(byte_count, old_byte_slice.len);
@memcpy(new_mem[0..copy_len], old_byte_slice[0..copy_len]);
// TODO https://github.com/ziglang/zig/issues/4298
@memset(old_byte_slice, undefined);
self.rawFree(old_byte_slice, log2a(Slice.alignment), return_address);

View File

@ -462,6 +462,7 @@ pub const Inst = struct {
/// Uses the `ty_op` field.
load,
/// Converts a pointer to its address. Result type is always `usize`.
/// Pointer type size may be any, including slice.
/// Uses the `un_op` field.
ptrtoint,
/// Given a boolean, returns 0 or 1.

View File

@ -8455,7 +8455,7 @@ fn builtinCall(
.memcpy => {
_ = try gz.addPlNode(.memcpy, node, Zir.Inst.Bin{
.lhs = try expr(gz, scope, .{ .rl = .none }, params[0]),
.rhs = try expr(gz, scope, .{ .rl = .ref }, params[1]),
.rhs = try expr(gz, scope, .{ .rl = .none }, params[1]),
});
return rvalue(gz, ri, .void_value, node);
},

View File

@ -254,6 +254,8 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
.set_union_tag,
.min,
.max,
.memset,
.memcpy,
=> {
const bin_op = data[inst].bin_op;
try self.verifyInst(inst, .{ bin_op.lhs, bin_op.rhs, .none });
@ -306,13 +308,6 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
const extra = self.air.extraData(Air.Bin, vector_store_elem.payload).data;
try self.verifyInst(inst, .{ vector_store_elem.vector_ptr, extra.lhs, extra.rhs });
},
.memset,
.memcpy,
=> {
const pl_op = data[inst].pl_op;
const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
try self.verifyInst(inst, .{ pl_op.operand, extra.lhs, extra.rhs });
},
.cmpxchg_strong,
.cmpxchg_weak,
=> {

View File

@ -3386,17 +3386,39 @@ fn zirIndexablePtrLen(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileE
const inst_data = sema.code.instructions.items(.data)[inst].un_node;
const src = inst_data.src();
const object = try sema.resolveInst(inst_data.operand);
return indexablePtrLen(sema, block, src, object);
}
fn indexablePtrLen(
sema: *Sema,
block: *Block,
src: LazySrcLoc,
object: Air.Inst.Ref,
) CompileError!Air.Inst.Ref {
const object_ty = sema.typeOf(object);
const is_pointer_to = object_ty.isSinglePointer();
const array_ty = if (is_pointer_to)
object_ty.childType()
else
object_ty;
const array_ty = if (is_pointer_to) object_ty.childType() else object_ty;
try checkIndexable(sema, block, src, array_ty);
return sema.fieldVal(block, src, object, "len", src);
}
fn indexablePtrLenOrNone(
sema: *Sema,
block: *Block,
src: LazySrcLoc,
object: Air.Inst.Ref,
) CompileError!Air.Inst.Ref {
const object_ty = sema.typeOf(object);
const array_ty = t: {
const ptr_size = object_ty.ptrSizeOrNull() orelse break :t object_ty;
break :t switch (ptr_size) {
.Many => return .none,
.One => object_ty.childType(),
else => object_ty,
};
};
try checkIndexable(sema, block, src, array_ty);
return sema.fieldVal(block, src, object, "len", src);
}
@ -21773,6 +21795,29 @@ fn analyzeMinMax(
return block.addBinOp(air_tag, simd_op.lhs, simd_op.rhs);
}
fn upgradeToArrayPtr(sema: *Sema, block: *Block, ptr: Air.Inst.Ref, len: u64) !Air.Inst.Ref {
const mod = sema.mod;
const info = sema.typeOf(ptr).ptrInfo().data;
if (info.size == .One) {
// Already an array pointer.
return ptr;
}
const new_ty = try Type.ptr(sema.arena, mod, .{
.pointee_type = try Type.array(sema.arena, len, info.sentinel, info.pointee_type, mod),
.sentinel = null,
.@"align" = info.@"align",
.@"addrspace" = info.@"addrspace",
.mutable = info.mutable,
.@"allowzero" = info.@"allowzero",
.@"volatile" = info.@"volatile",
.size = .One,
});
if (info.size == .Slice) {
return block.addTyOp(.slice_ptr, new_ty, ptr);
}
return block.addBitCast(new_ty, ptr);
}
fn zirMemcpy(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void {
const inst_data = sema.code.instructions.items(.data)[inst].pl_node;
const extra = sema.code.extraData(Zir.Inst.Bin, inst_data.payload_index).data;
@ -21780,27 +21825,125 @@ fn zirMemcpy(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void
const dest_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = inst_data.src_node };
const src_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node };
const dest_ptr = try sema.resolveInst(extra.lhs);
const src_ptr_ptr = try sema.resolveInst(extra.rhs);
const dest_ptr_ty = sema.typeOf(dest_ptr);
try checkSliceOrArrayType(sema, block, dest_src, dest_ptr_ty);
const src_ptr = try sema.resolveInst(extra.rhs);
const dest_len = try indexablePtrLenOrNone(sema, block, dest_src, dest_ptr);
const src_len = try indexablePtrLenOrNone(sema, block, src_src, src_ptr);
const dest_len = try sema.fieldVal(block, dest_src, dest_ptr, "len", dest_src);
const src_ptr = try sema.analyzeSlice(block, src_src, src_ptr_ptr, .zero_usize, dest_len, .none, .unneeded, src_src, src_src, src_src);
if (dest_len == .none and src_len == .none) {
const msg = msg: {
const msg = try sema.errMsg(block, src, "unknown @memcpy length", .{});
errdefer msg.destroy(sema.gpa);
try sema.errNote(block, dest_src, msg, "destination type {} provides no length", .{
sema.typeOf(dest_ptr).fmt(sema.mod),
});
try sema.errNote(block, src_src, msg, "source type {} provides no length", .{
sema.typeOf(src_ptr).fmt(sema.mod),
});
break :msg msg;
};
return sema.failWithOwnedErrorMsg(msg);
}
var len_val: ?Value = null;
if (dest_len != .none and src_len != .none) check: {
// If we can check at compile-time, no need for runtime safety.
if (try sema.resolveDefinedValue(block, dest_src, dest_len)) |dest_len_val| {
len_val = dest_len_val;
if (try sema.resolveDefinedValue(block, src_src, src_len)) |src_len_val| {
if (!(try sema.valuesEqual(dest_len_val, src_len_val, Type.usize))) {
const msg = msg: {
const msg = try sema.errMsg(block, src, "non-matching @memcpy lengths", .{});
errdefer msg.destroy(sema.gpa);
try sema.errNote(block, dest_src, msg, "length {} here", .{
dest_len_val.fmtValue(Type.usize, sema.mod),
});
try sema.errNote(block, src_src, msg, "length {} here", .{
src_len_val.fmtValue(Type.usize, sema.mod),
});
break :msg msg;
};
return sema.failWithOwnedErrorMsg(msg);
}
break :check;
}
} else if (try sema.resolveDefinedValue(block, src_src, src_len)) |src_len_val| {
len_val = src_len_val;
}
if (block.wantSafety()) {
const ok = try block.addBinOp(.cmp_eq, dest_len, src_len);
try sema.addSafetyCheck(block, ok, .memcpy_len_mismatch);
}
}
const runtime_src = if (try sema.resolveDefinedValue(block, dest_src, dest_ptr)) |dest_ptr_val| rs: {
if (!dest_ptr_val.isComptimeMutablePtr()) break :rs dest_src;
if (try sema.resolveDefinedValue(block, src_src, src_ptr)) |src_ptr_val| {
if (!src_ptr_val.isComptimeMutablePtr()) break :rs src_src;
_ = src_ptr_val;
return sema.fail(block, src, "TODO: @memcpy at comptime", .{});
} else break :rs src_src;
} else dest_src;
try sema.requireRuntimeBlock(block, src, runtime_src);
const dest_ty = sema.typeOf(dest_ptr);
const src_ty = sema.typeOf(src_ptr);
// If in-memory coercion is not allowed, explode this memcpy call into a
// for loop that copies element-wise.
// Likewise if this is an iterable rather than a pointer, do the same
// lowering. The AIR instruction requires pointers with element types of
// equal ABI size.
if (dest_ty.zigTypeTag() != .Pointer or src_ty.zigTypeTag() != .Pointer) {
return sema.fail(block, src, "TODO: lower @memcpy to a for loop because the source or destination iterable is a tuple", .{});
}
const dest_elem_ty = dest_ty.elemType2();
const src_elem_ty = src_ty.elemType2();
const target = sema.mod.getTarget();
if (.ok != try sema.coerceInMemoryAllowed(block, dest_elem_ty, src_elem_ty, true, target, dest_src, src_src)) {
return sema.fail(block, src, "TODO: lower @memcpy to a for loop because the element types have different ABI sizes", .{});
}
// If the length is comptime-known, then upgrade src and destination types
// into pointer-to-array. At this point we know they are both pointers
// already.
var new_dest_ptr = dest_ptr;
var new_src_ptr = src_ptr;
if (len_val) |val| {
const len = val.toUnsignedInt(target);
new_dest_ptr = try upgradeToArrayPtr(sema, block, dest_ptr, len);
new_src_ptr = try upgradeToArrayPtr(sema, block, src_ptr, len);
}
// Aliasing safety check.
if (block.wantSafety()) {
const dest_int = try block.addUnOp(.ptrtoint, new_dest_ptr);
const src_int = try block.addUnOp(.ptrtoint, new_src_ptr);
const len = if (len_val) |v|
try sema.addConstant(Type.usize, v)
else if (dest_len != .none)
dest_len
else
src_len;
// ok1: dest >= src + len
// ok2: src >= dest + len
const src_plus_len = try block.addBinOp(.add, src_int, len);
const dest_plus_len = try block.addBinOp(.add, dest_int, len);
const ok1 = try block.addBinOp(.cmp_gte, dest_int, src_plus_len);
const ok2 = try block.addBinOp(.cmp_gte, src_int, dest_plus_len);
const ok = try block.addBinOp(.bit_or, ok1, ok2);
try sema.addSafetyCheck(block, ok, .memcpy_alias);
}
_ = try block.addInst(.{
.tag = .memcpy,
.data = .{ .bin_op = .{
.lhs = dest_ptr,
.rhs = src_ptr,
.lhs = new_dest_ptr,
.rhs = new_src_ptr,
} },
});
}
@ -22949,6 +23092,8 @@ pub const PanicId = enum {
index_out_of_bounds,
start_index_greater_than_end,
for_len_mismatch,
memcpy_len_mismatch,
memcpy_alias,
};
fn addSafetyCheck(

View File

@ -6177,18 +6177,43 @@ fn airAtomicStore(f: *Function, inst: Air.Inst.Index, order: [*:0]const u8) !CVa
}
fn airMemset(f: *Function, inst: Air.Inst.Index) !CValue {
const pl_op = f.air.instructions.items(.data)[inst].pl_op;
const extra = f.air.extraData(Air.Bin, pl_op.payload).data;
const dest_ty = f.air.typeOf(pl_op.operand);
const dest_ptr = try f.resolveInst(pl_op.operand);
const value = try f.resolveInst(extra.lhs);
const len = try f.resolveInst(extra.rhs);
const bin_op = f.air.instructions.items(.data)[inst].bin_op;
const dest_ty = f.air.typeOf(bin_op.lhs);
const dest_slice = try f.resolveInst(bin_op.lhs);
const value = try f.resolveInst(bin_op.rhs);
const elem_ty = f.air.typeOf(bin_op.rhs);
const target = f.object.dg.module.getTarget();
const elem_abi_size = elem_ty.abiSize(target);
const val_is_undef = if (f.air.value(bin_op.rhs)) |val| val.isUndefDeep() else false;
const writer = f.object.writer();
if (dest_ty.isVolatilePtr()) {
var u8_ptr_pl = dest_ty.ptrInfo();
u8_ptr_pl.data.pointee_type = Type.u8;
const u8_ptr_ty = Type.initPayload(&u8_ptr_pl.base);
if (val_is_undef) {
try writer.writeAll("memset(");
switch (dest_ty.ptrSize()) {
.Slice => {
try f.writeCValueMember(writer, dest_slice, .{ .identifier = "ptr" });
try writer.writeAll(", 0xaa, ");
try f.writeCValueMember(writer, dest_slice, .{ .identifier = "len" });
if (elem_abi_size > 1) {
try writer.print(" * {d});\n", .{elem_abi_size});
} else {
try writer.writeAll(");\n");
}
},
.One => {
const array_ty = dest_ty.childType();
const len = array_ty.arrayLen() * elem_abi_size;
try f.writeCValue(writer, dest_slice, .FunctionArgument);
try writer.print(", 0xaa, {d});\n", .{len});
},
.Many, .C => unreachable,
}
try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
return .none;
}
if (elem_abi_size > 1 or dest_ty.isVolatilePtr()) {
const index = try f.allocLocal(inst, Type.usize);
try writer.writeAll("for (");
@ -6198,36 +6223,61 @@ fn airMemset(f: *Function, inst: Air.Inst.Index) !CValue {
try writer.writeAll("; ");
try f.writeCValue(writer, index, .Other);
try writer.writeAll(" != ");
try f.writeCValue(writer, len, .Other);
switch (dest_ty.ptrSize()) {
.Slice => {
try f.writeCValueMember(writer, dest_slice, .{ .identifier = "len" });
},
.One => {
const array_ty = dest_ty.childType();
const len = array_ty.arrayLen() * elem_abi_size;
try writer.print("{d}", .{len});
},
.Many, .C => unreachable,
}
try writer.writeAll("; ");
try f.writeCValue(writer, index, .Other);
try writer.writeAll(" += ");
try f.object.dg.renderValue(writer, Type.usize, Value.one, .Other);
try writer.writeAll(") ((");
try f.renderType(writer, u8_ptr_ty);
try writer.writeByte(')');
try f.writeCValue(writer, dest_ptr, .FunctionArgument);
try writer.writeAll(") (");
switch (dest_ty.ptrSize()) {
.Slice => try f.writeCValueMember(writer, dest_slice, .{ .identifier = "ptr" }),
.One => try f.writeCValue(writer, dest_slice, .FunctionArgument),
.Many, .C => unreachable,
}
try writer.writeAll(")[");
try f.writeCValue(writer, index, .Other);
try writer.writeAll("] = ");
try f.writeCValue(writer, value, .FunctionArgument);
try writer.writeAll(";\n");
try reap(f, inst, &.{ pl_op.operand, extra.lhs, extra.rhs });
try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
try freeLocal(f, inst, index.new_local, 0);
return .none;
}
try reap(f, inst, &.{ pl_op.operand, extra.lhs, extra.rhs });
try writer.writeAll("memset(");
try f.writeCValue(writer, dest_ptr, .FunctionArgument);
try writer.writeAll(", ");
try f.writeCValue(writer, value, .FunctionArgument);
try writer.writeAll(", ");
try f.writeCValue(writer, len, .FunctionArgument);
try writer.writeAll(");\n");
switch (dest_ty.ptrSize()) {
.Slice => {
try f.writeCValueMember(writer, dest_slice, .{ .identifier = "ptr" });
try writer.writeAll(", ");
try f.writeCValue(writer, value, .FunctionArgument);
try writer.writeAll(", ");
try f.writeCValueMember(writer, dest_slice, .{ .identifier = "len" });
try writer.writeAll(");\n");
},
.One => {
const array_ty = dest_ty.childType();
const len = array_ty.arrayLen() * elem_abi_size;
try f.writeCValue(writer, dest_slice, .FunctionArgument);
try writer.writeAll(", ");
try f.writeCValue(writer, value, .FunctionArgument);
try writer.print(", {d});\n", .{len});
},
.Many, .C => unreachable,
}
try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
return .none;
}

View File

@ -5777,10 +5777,10 @@ pub const FuncGen = struct {
}
fn sliceOrArrayPtr(fg: *FuncGen, ptr: *llvm.Value, ty: Type) *llvm.Value {
switch (ty.ptrSize()) {
.Slice => return fg.builder.buildExtractValue(ptr, 0, ""),
.One => return ptr,
.Many, .C => unreachable,
if (ty.isSlice()) {
return fg.builder.buildExtractValue(ptr, 0, "");
} else {
return ptr;
}
}
@ -7917,8 +7917,10 @@ pub const FuncGen = struct {
fn airPtrToInt(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
const un_op = self.air.instructions.items(.data)[inst].un_op;
const operand = try self.resolveInst(un_op);
const ptr_ty = self.air.typeOf(un_op);
const operand_ptr = self.sliceOrArrayPtr(operand, ptr_ty);
const dest_llvm_ty = try self.dg.lowerType(self.air.typeOfIndex(inst));
return self.builder.buildPtrToInt(operand, dest_llvm_ty, "");
return self.builder.buildPtrToInt(operand_ptr, dest_llvm_ty, "");
}
fn airBitCast(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {

View File

@ -3843,9 +3843,14 @@ pub const Type = extern union {
};
}
/// Asserts the `Type` is a pointer.
pub fn ptrSize(self: Type) std.builtin.Type.Pointer.Size {
return switch (self.tag()) {
/// Asserts `ty` is a pointer.
pub fn ptrSize(ty: Type) std.builtin.Type.Pointer.Size {
return ptrSizeOrNull(ty).?;
}
/// Returns `null` if `ty` is not a pointer.
pub fn ptrSizeOrNull(ty: Type) ?std.builtin.Type.Pointer.Size {
return switch (ty.tag()) {
.const_slice,
.mut_slice,
.const_slice_u8,
@ -3870,9 +3875,9 @@ pub const Type = extern union {
.inferred_alloc_mut,
=> .One,
.pointer => self.castTag(.pointer).?.data.size,
.pointer => ty.castTag(.pointer).?.data.size,
else => unreachable,
else => null,
};
}