build system: implement lazy dependencies, part 1

Build manifest files support `lazy: true` for dependency sections.
This causes the auto-generated dependencies.zig to have 2 more
possibilities:
1. It communicates whether a dependency is lazy or not.
2. The dependency might be acknowledged, but missing due to being lazy
   and not fetched.

Lazy dependencies are not fetched by default, but if they are already
fetched then they are provided to the build script.

The build runner reports the set of missing lazy dependenices that are
required to the parent process via stdout and indicates the situation
with exit code 3.

std.Build now has a `lazyDependency` function. I'll let the doc comments
speak for themselves:

When this function is called, it means that the current build does, in
fact, require this dependency. If the dependency is already fetched, it
proceeds in the same manner as `dependency`. However if the dependency
was not fetched, then when the build script is finished running, the
build will not proceed to the make phase. Instead, the parent process
will additionally fetch all the lazy dependencies that were actually
required by running the build script, rebuild the build script, and then
run it again.
In other words, if this function returns `null` it means that the only
purpose of completing the configure phase is to find out all the other
lazy dependencies that are also required.
It is allowed to use this function for non-lazy dependencies, in which
case it will never return `null`. This allows toggling laziness via
build.zig.zon without changing build.zig logic.

The CLI for `zig build` detects this situation, but the logic for then
redoing the build process with these extra dependencies fetched is not
yet implemented.
This commit is contained in:
Andrew Kelley 2024-02-02 14:42:29 -07:00
parent 434a6a4f63
commit 252f4ab2a5
5 changed files with 163 additions and 17 deletions

View File

@ -100,8 +100,6 @@ pub fn main() !void {
var help_menu: bool = false;
var steps_menu: bool = false;
const stdout_stream = io.getStdOut().writer();
while (nextArg(args, &arg_idx)) |arg| {
if (mem.startsWith(u8, arg, "-D")) {
const option_contents = arg[2..];
@ -308,17 +306,29 @@ pub fn main() !void {
try builder.runBuild(root);
}
if (graph.needed_lazy_dependencies.entries.len != 0) {
var buffer: std.ArrayListUnmanaged(u8) = .{};
for (graph.needed_lazy_dependencies.keys()) |k| {
try buffer.appendSlice(arena, k);
try buffer.append(arena, '\n');
}
try io.getStdOut().writeAll(buffer.items);
process.exit(3); // Indicate configure phase failed with meaningful stdout.
}
if (builder.validateUserInputDidItFail()) {
fatal(" access the help menu with 'zig build -h'", .{});
}
validateSystemLibraryOptions(builder);
const stdout_writer = io.getStdOut().writer();
if (help_menu)
return usage(builder, stdout_stream);
return usage(builder, stdout_writer);
if (steps_menu)
return steps(builder, stdout_stream);
return steps(builder, stdout_writer);
var run: Run = .{
.max_rss = max_rss,

View File

@ -117,6 +117,7 @@ pub const Graph = struct {
env_map: EnvMap,
global_cache_root: Cache.Directory,
host_query_options: std.Target.Query.ParseOptions = .{},
needed_lazy_dependencies: std.StringArrayHashMapUnmanaged(void) = .{},
};
const AvailableDeps = []const struct { []const u8, []const u8 };
@ -1802,21 +1803,63 @@ pub const Dependency = struct {
}
};
pub fn dependency(b: *Build, name: []const u8, args: anytype) *Dependency {
fn findPkgHashOrFatal(b: *Build, name: []const u8) []const u8 {
for (b.available_deps) |dep| {
if (mem.eql(u8, dep[0], name)) return dep[1];
}
const full_path = b.pathFromRoot("build.zig.zon");
std.debug.panic("no dependency named '{s}' in '{s}'. All packages used in build.zig must be declared in this file", .{ name, full_path });
}
fn markNeededLazyDep(b: *Build, pkg_hash: []const u8) void {
b.graph.needed_lazy_dependencies.put(b.graph.arena, pkg_hash, {}) catch @panic("OOM");
}
/// When this function is called, it means that the current build does, in
/// fact, require this dependency. If the dependency is already fetched, it
/// proceeds in the same manner as `dependency`. However if the dependency was
/// not fetched, then when the build script is finished running, the build will
/// not proceed to the make phase. Instead, the parent process will
/// additionally fetch all the lazy dependencies that were actually required by
/// running the build script, rebuild the build script, and then run it again.
/// In other words, if this function returns `null` it means that the only
/// purpose of completing the configure phase is to find out all the other lazy
/// dependencies that are also required.
/// It is allowed to use this function for non-lazy dependencies, in which case
/// it will never return `null`. This allows toggling laziness via
/// build.zig.zon without changing build.zig logic.
pub fn lazyDependency(b: *Build, name: []const u8, args: anytype) ?*Dependency {
const build_runner = @import("root");
const deps = build_runner.dependencies;
const pkg_hash = for (b.available_deps) |dep| {
if (mem.eql(u8, dep[0], name)) break dep[1];
} else {
const full_path = b.pathFromRoot("build.zig.zon");
std.debug.print("no dependency named '{s}' in '{s}'. All packages used in build.zig must be declared in this file.\n", .{ name, full_path });
process.exit(1);
};
const pkg_hash = findPkgHashOrFatal(b, name);
inline for (@typeInfo(deps.packages).Struct.decls) |decl| {
if (mem.eql(u8, decl.name, pkg_hash)) {
const pkg = @field(deps.packages, decl.name);
const available = !@hasDecl(pkg, "available") or pkg.available;
if (!available) {
markNeededLazyDep(b, pkg_hash);
return null;
}
return dependencyInner(b, name, pkg.build_root, if (@hasDecl(pkg, "build_zig")) pkg.build_zig else null, pkg.deps, args);
}
}
unreachable; // Bad @dependencies source
}
pub fn dependency(b: *Build, name: []const u8, args: anytype) *Dependency {
const build_runner = @import("root");
const deps = build_runner.dependencies;
const pkg_hash = findPkgHashOrFatal(b, name);
inline for (@typeInfo(deps.packages).Struct.decls) |decl| {
if (mem.eql(u8, decl.name, pkg_hash)) {
const pkg = @field(deps.packages, decl.name);
if (@hasDecl(pkg, "available")) {
@compileError("dependency is marked as lazy in build.zig.zon which means it must use the lazyDependency function instead");
}
return dependencyInner(b, name, pkg.build_root, if (@hasDecl(pkg, "build_zig")) pkg.build_zig else null, pkg.deps, args);
}
}

View File

@ -31,6 +31,8 @@ arena: std.heap.ArenaAllocator,
location: Location,
location_tok: std.zig.Ast.TokenIndex,
hash_tok: std.zig.Ast.TokenIndex,
name_tok: std.zig.Ast.TokenIndex,
lazy_status: LazyStatus,
parent_package_root: Package.Path,
parent_manifest_ast: ?*const std.zig.Ast,
prog_node: *std.Progress.Node,
@ -64,6 +66,15 @@ oom_flag: bool,
/// the root source file.
module: ?*Package.Module,
pub const LazyStatus = enum {
/// Not lazy.
eager,
/// Lazy, found.
available,
/// Lazy, not found.
unavailable,
};
/// Contains shared state among all `Fetch` tasks.
pub const JobQueue = struct {
mutex: std.Thread.Mutex = .{},
@ -150,11 +161,37 @@ pub const JobQueue = struct {
// The first one is a dummy package for the current project.
continue;
}
try buf.writer().print(
\\ pub const {} = struct {{
\\
, .{std.zig.fmtId(&hash)});
lazy: {
switch (fetch.lazy_status) {
.eager => break :lazy,
.available => {
try buf.appendSlice(
\\ pub const available = true;
\\
);
break :lazy;
},
.unavailable => {
try buf.appendSlice(
\\ pub const available = false;
\\ };
\\
);
continue;
},
}
}
try buf.writer().print(
\\ pub const build_root = "{q}";
\\
, .{ std.zig.fmtId(&hash), fetch.package_root });
, .{fetch.package_root});
if (fetch.has_build_zig) {
try buf.writer().print(
@ -325,6 +362,7 @@ pub fn run(f: *Fetch) RunError!void {
const prefix_len: usize = if (f.job_queue.read_only) "p/".len else 0;
const pkg_sub_path = prefixed_pkg_sub_path[prefix_len..];
if (cache_root.handle.access(pkg_sub_path, .{})) |_| {
assert(f.lazy_status != .unavailable);
f.package_root = .{
.root_dir = cache_root,
.sub_path = try arena.dupe(u8, pkg_sub_path),
@ -335,8 +373,16 @@ pub fn run(f: *Fetch) RunError!void {
return queueJobsForDeps(f);
} else |err| switch (err) {
error.FileNotFound => {
switch (f.lazy_status) {
.eager => {},
.available => {
f.lazy_status = .unavailable;
return;
},
.unavailable => unreachable,
}
if (f.job_queue.read_only) return f.fail(
f.location_tok,
f.name_tok,
try eb.printString("package not found at '{}{s}'", .{
cache_root, pkg_sub_path,
}),
@ -627,6 +673,8 @@ fn queueJobsForDeps(f: *Fetch) RunError!void {
.location = location,
.location_tok = dep.location_tok,
.hash_tok = dep.hash_tok,
.name_tok = dep.name_tok,
.lazy_status = if (dep.lazy) .available else .eager,
.parent_package_root = f.package_root,
.parent_manifest_ast = &f.manifest_ast,
.prog_node = f.prog_node,

View File

@ -12,6 +12,8 @@ pub const Dependency = struct {
hash: ?[]const u8,
hash_tok: Ast.TokenIndex,
node: Ast.Node.Index,
name_tok: Ast.TokenIndex,
lazy: bool,
pub const Location = union(enum) {
url: []const u8,
@ -303,11 +305,14 @@ const Parse = struct {
.hash = null,
.hash_tok = 0,
.node = node,
.name_tok = 0,
.lazy = false,
};
var has_location = false;
for (struct_init.ast.fields) |field_init| {
const name_token = ast.firstToken(field_init) - 2;
dep.name_tok = name_token;
const field_name = try identifierTokenString(p, name_token);
// We could get fancy with reflection and comptime logic here but doing
// things manually provides an opportunity to do any additional verification
@ -342,6 +347,11 @@ const Parse = struct {
else => |e| return e,
};
dep.hash_tok = main_tokens[field_init];
} else if (mem.eql(u8, field_name, "lazy")) {
dep.lazy = parseBool(p, field_init) catch |err| switch (err) {
error.ParseFailure => continue,
else => |e| return e,
};
} else {
// Ignore unknown fields so that we can add fields in future zig
// versions without breaking older zig versions.
@ -374,6 +384,24 @@ const Parse = struct {
}
}
fn parseBool(p: *Parse, node: Ast.Node.Index) !bool {
const ast = p.ast;
const node_tags = ast.nodes.items(.tag);
const main_tokens = ast.nodes.items(.main_token);
if (node_tags[node] != .identifier) {
return fail(p, main_tokens[node], "expected identifier", .{});
}
const ident_token = main_tokens[node];
const token_bytes = ast.tokenSlice(ident_token);
if (mem.eql(u8, token_bytes, "true")) {
return true;
} else if (mem.eql(u8, token_bytes, "false")) {
return false;
} else {
return fail(p, ident_token, "expected boolean", .{});
}
}
fn parseString(p: *Parse, node: Ast.Node.Index) ![]const u8 {
const ast = p.ast;
const node_tags = ast.nodes.items(.tag);

View File

@ -5464,6 +5464,8 @@ pub fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !voi
.location = .{ .relative_path = build_mod.root },
.location_tok = 0,
.hash_tok = 0,
.name_tok = 0,
.lazy_status = .eager,
.parent_package_root = build_mod.root,
.parent_manifest_ast = null,
.prog_node = root_prog_node,
@ -5618,10 +5620,14 @@ pub fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !voi
if (process.can_spawn) {
var child = std.ChildProcess.init(child_argv, gpa);
child.stdin_behavior = .Inherit;
child.stdout_behavior = .Inherit;
child.stdout_behavior = .Pipe;
child.stderr_behavior = .Inherit;
const term = try child.spawnAndWait();
try child.spawn();
// Since only one output stream is piped, we can simply do a blocking
// read until the stream is finished.
const stdout = try child.stdout.?.readToEndAlloc(arena, 50 * 1024 * 1024);
const term = try child.wait();
switch (term) {
.Exited => |code| {
if (code == 0) return cleanExit();
@ -5630,6 +5636,15 @@ pub fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !voi
// diagnostics.
if (code == 2) process.exit(2);
if (code == 3) {
// Indicates the configure phase failed due to missing lazy
// dependencies and stdout contains the hashes of the ones
// that are missing.
std.debug.print("missing lazy dependencies: '{s}'\n", .{stdout});
std.debug.print("TODO: fetch them and rebuild the build script\n", .{});
process.exit(1);
}
const cmd = try std.mem.join(arena, " ", child_argv);
fatal("the following build command failed with exit code {d}:\n{s}", .{ code, cmd });
},
@ -7395,6 +7410,8 @@ fn cmdFetch(
.location = .{ .path_or_url = path_or_url },
.location_tok = 0,
.hash_tok = 0,
.name_tok = 0,
.lazy_status = .eager,
.parent_package_root = undefined,
.parent_manifest_ast = null,
.prog_node = root_prog_node,