From 2682f2c7d70c90fedd048cc4fc6d44ccb14a0301 Mon Sep 17 00:00:00 2001 From: David Rubin <87927264+Rexicon226@users.noreply.github.com> Date: Fri, 21 Jun 2024 21:57:46 -0700 Subject: [PATCH] CFG Graph Visualizer (#11) * add graph support * add a requirements.txt for plot.py --- .gitignore | 12 +- graph/plot.py | 107 ++++++++++++++ graph/requirements.txt | 2 + src/compiler/Instruction.zig | 9 ++ src/graph/Graph.zig | 274 +++++++++++++++++++++++++++++++++++ src/main.zig | 26 +++- src/modules/builtins.zig | 2 +- src/vm/Object.zig | 46 +++++- src/vm/Vm.zig | 65 ++++++--- 9 files changed, 510 insertions(+), 33 deletions(-) create mode 100644 graph/plot.py create mode 100644 graph/requirements.txt create mode 100644 src/graph/Graph.zig diff --git a/.gitignore b/.gitignore index 50a373a..27ff112 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,11 @@ .zig-cache/ -zig-out/ -temp/ .vscode/ -traces/ +zig-out/ -# Don't want any artifacts +graph/* +!graph/plot.py +!graph/requirements.txt + +traces/ **/__pycache__ -demo/ \ No newline at end of file +demo/ diff --git a/graph/plot.py b/graph/plot.py new file mode 100644 index 0000000..7edbbd0 --- /dev/null +++ b/graph/plot.py @@ -0,0 +1,107 @@ +import struct +import matplotlib.pyplot as plt +import networkx as nx +import math + +def get_offset_pos(pos, edges, offset_scale=0.03): + offset_pos = {node: pos[node] for node in pos} + for edge in edges: + x1, y1 = pos[edge[0]] + x2, y2 = pos[edge[1]] + dx = x2 - x1 + dy = y2 - y1 + length = math.sqrt(dx*dx + dy*dy) + if length == 0: + continue + offset_x = offset_scale * dy / length + offset_y = -offset_scale * dx / length + offset_pos[edge[0]] = (offset_pos[edge[0]][0] + offset_x, offset_pos[edge[0]][1] + offset_y) + offset_pos[edge[1]] = (offset_pos[edge[1]][0] + offset_x, offset_pos[edge[1]][1] + offset_y) + return offset_pos + +def read_graph_binary(filename): + nodes = [] + edges = [] + cfg_edges = [] + + with open(filename, "rb") as file: + while True: + node_id_bytes = file.read(4) + if not node_id_bytes: + break + node_id = struct.unpack("i", node_id_bytes)[0] + if node_id == -1: + break + data_len = struct.unpack("Q", file.read(8))[0] + data = file.read(data_len).decode("utf-8") + nodes.append((node_id, data)) + + while True: + edge_from_bytes = file.read(4) + if not edge_from_bytes: + break + edge_from = struct.unpack("i", edge_from_bytes)[0] + if edge_from == -1: + break + edge_to = struct.unpack("i", file.read(4))[0] + edges.append((edge_from, edge_to)) + + while True: + cfg_from_bytes = file.read(4) + if not cfg_from_bytes: + break + cfg_from = struct.unpack("i", cfg_from_bytes)[0] + cfg_to = struct.unpack("i", file.read(4))[0] + cfg_edges.append((cfg_from, cfg_to)) + + return nodes, edges, cfg_edges + +nodes, edges, cfg_edges = read_graph_binary("graph.bin") + +G = nx.DiGraph() + +for node in nodes: + G.add_node(node[0], label=node[1]) + +for edge in edges: + G.add_edge(edge[0], edge[1]) + +predecessors = {cfg_edge[1] for cfg_edge in cfg_edges} +nodes_with_edges = [node for node in G.nodes if G.out_degree(node) > 0 or G.in_degree(node) > 0 and node in predecessors] + +G_filtered = G.subgraph(nodes_with_edges).copy() + +# Coloring the first and last nodes in the CFG +first_node = cfg_edges[0][0] +predecessors = {cfg_edge[1] for cfg_edge in cfg_edges} +nodes_with_no_successors = {node for node in G_filtered.nodes if all(cfg_edge[0] != node for cfg_edge in cfg_edges) and node in predecessors} + +node_colors = ['green' if node == first_node else 'red' if node in nodes_with_no_successors else 'skyblue' for node in G_filtered.nodes] + +pos = nx.nx_agraph.graphviz_layout(G_filtered, prog='dot') +plt.figure(figsize=(15, 10)) +nx.draw(G_filtered, pos, with_labels=True, labels=nx.get_node_attributes(G_filtered, 'label'), node_color=node_colors, node_size=3000, font_size=10, font_color='black', font_weight='bold', edge_color='gray') + +data_edges_pos = pos +cfg_edges_pos = get_offset_pos(pos, cfg_edges) + +for edge in edges: + if edge[0] in G_filtered and edge[1] in G_filtered: + nx.draw_networkx_edges(G_filtered, data_edges_pos, edgelist=[edge], edge_color='gray', arrows=True) + +scale = 4 + +for cfg_edge in cfg_edges: + if cfg_edge[0] in G_filtered and cfg_edge[1] in G_filtered: + x1, y1 = pos[cfg_edge[0]] + x2, y2 = cfg_edges_pos[cfg_edge[1]] + dx = x2 - x1 + dy = y2 - y1 + length = math.sqrt(dx*dx + dy*dy) + if length == 0: + continue + offset_x = scale * dy / length + offset_y = -scale * dx / length + plt.arrow(x1 + offset_x, y1 + offset_y, dx, dy, color='red', linewidth=1, head_width=0.1, head_length=0.2) + +plt.savefig("out.png") diff --git a/graph/requirements.txt b/graph/requirements.txt new file mode 100644 index 0000000..92ec552 --- /dev/null +++ b/graph/requirements.txt @@ -0,0 +1,2 @@ +matplotlib >= 3.8.4 +networkx >= 2.8.8 \ No newline at end of file diff --git a/src/compiler/Instruction.zig b/src/compiler/Instruction.zig index 0e69131..7112bcf 100644 --- a/src/compiler/Instruction.zig +++ b/src/compiler/Instruction.zig @@ -71,6 +71,15 @@ fn format2( } } +pub fn returns( + inst: Instruction, +) bool { + return switch (inst.op) { + .RETURN_VALUE => true, + else => false, + }; +} + pub fn fmt(inst: Instruction, co: CodeObject) std.fmt.Formatter(format2) { return .{ .data = .{ .co = co, diff --git a/src/graph/Graph.zig b/src/graph/Graph.zig new file mode 100644 index 0000000..721e310 --- /dev/null +++ b/src/graph/Graph.zig @@ -0,0 +1,274 @@ +//! Creates a temporal graph of a CodeObject + +const std = @import("std"); +const CodeObject = @import("../compiler/CodeObject.zig"); +const Instruction = @import("../compiler/Instruction.zig"); +const builtins = @import("../modules/builtins.zig"); +const log = std.log.scoped(.graph); +const Graph = @This(); + +const assert = std.debug.assert; + +allocator: std.mem.Allocator, +nodes: std.MultiArrayList(Node) = .{}, +edges: std.MultiArrayList(Edge) = .{}, +co: CodeObject, + +/// shows the control flow of nodes +cfg: std.MultiArrayList(Edge) = .{}, + +scope: std.StringHashMapUnmanaged(u32) = .{}, + +pub fn evaluate( + allocator: std.mem.Allocator, + input_co: CodeObject, +) !Graph { + var co = try input_co.clone(allocator); + try co.process(allocator); + const instructions = co.instructions.?; + + var graph: Graph = .{ + .allocator = allocator, + .co = co, + }; + + // insert some names that will always exist into the graph to depend on + inline for (builtins.builtin_fns) |entry| { + const name = entry[0]; + try graph.nodes.append(allocator, .{ + .data = .none, + .name = name, + }); + try graph.scope.put( + allocator, + name, + @intCast(graph.nodes.len - 1), + ); + } + + for (instructions, 0..) |inst, i| { + try graph.walkInst(inst); + const new_index: u32 = @intCast(graph.nodes.len - 1); + if (i != 0 and !instructions[i - 1].returns()) { + try graph.cfg.append(allocator, .{ + .from = new_index - 1, + .to = new_index, + }); + } + } + + return graph; +} + +pub fn walkInst(graph: *Graph, inst: Instruction) !void { + log.debug("walkInst: {s}", .{@tagName(inst.op)}); + + const allocator = graph.allocator; + try graph.nodes.append(allocator, .{ + .data = .none, + .name = @tagName(inst.op), + }); + const new_index: u32 = @intCast(graph.nodes.len - 1); + + switch (inst.op) { + // these instructions have a direct edge to the instruction above them. + // usually when the instruction pops one off of the stack. + .POP_TOP, + .RETURN_VALUE, + .LOAD_METHOD, + => { + try graph.edges.append(allocator, .{ + .from = new_index - 1, + .to = new_index, + }); + }, + + // same thing as above, but relies on the two above instructions + .CALL_FUNCTION, + .MAKE_FUNCTION, + .COMPARE_OP, + .LIST_EXTEND, + .INPLACE_ADD, + .BINARY_ADD, + .INPLACE_SUBTRACT, + .BINARY_SUBTRACT, + => { + try graph.edges.append(allocator, .{ + .from = new_index - 1, + .to = new_index, + }); + + try graph.edges.append(allocator, .{ + .from = new_index - 2, + .to = new_index, + }); + }, + + // instructions that only have N arguments + .BUILD_LIST, + => { + for (0..inst.extra) |i| { + try graph.edges.append(allocator, .{ + .from = new_index - @as(u32, @intCast(i)) - 1, + .to = new_index, + }); + } + }, + + // function calls have N amount of arguments + .CALL_METHOD, + => { + try graph.edges.append(allocator, .{ + .from = new_index - 1, + .to = new_index, + }); + + try graph.edges.append(allocator, .{ + .from = new_index - 2, + .to = new_index, + }); + + for (0..inst.extra) |i| { + try graph.edges.append(allocator, .{ + .from = new_index - @as(u32, @intCast(i)) - 3, // 1 for offset, 2 for the above two edges + .to = new_index, + }); + } + }, + + // we try to create two edges between the node + // and both targets it could jump to. in theory, + // the number of insts should be the same as the number of nodes, + // so we can simply create a forward edge for the node that doens't exist yet + .POP_JUMP_IF_FALSE, + .POP_JUMP_IF_TRUE, + => { + // the compare op + // fall through + try graph.edges.append(allocator, .{ + .from = new_index - 1, + .to = new_index, + }); + + // fall through + try graph.edges.append(allocator, .{ + .from = new_index, // ourselves + .to = new_index + 1, + }); + + // target + try graph.edges.append(allocator, .{ + .from = new_index, // ourselves + .to = inst.extra + @as(u32, @intCast(builtins.builtin_fns.len)), + }); + + try graph.cfg.append(allocator, .{ + .from = new_index, + .to = new_index + 1, + }); + + try graph.cfg.append(allocator, .{ + .from = new_index, + .to = inst.extra + @as(u32, @intCast(builtins.builtin_fns.len)), + }); + }, + + .JUMP_FORWARD => { + try graph.edges.append(allocator, .{ + .from = new_index, + .to = new_index + inst.extra, + }); + + try graph.cfg.append(allocator, .{ + .from = new_index, + .to = new_index + inst.extra, + }); + }, + + .STORE_NAME => { + try graph.edges.append(allocator, .{ + .from = new_index - 1, + .to = new_index, + }); + + try graph.scope.put( + allocator, + graph.co.getName(inst.extra), + new_index, + ); + }, + + .LOAD_NAME => { + const dependee = graph.scope.get(graph.co.getName(inst.extra)) orelse { + @panic("didn't find dependee"); + }; + + try graph.edges.append(allocator, .{ + .from = dependee, + .to = new_index, + }); + }, + + // a dependee, has no dependencies + .LOAD_CONST, + => {}, + else => std.debug.panic("TODO: walkInst {s}", .{@tagName(inst.op)}), + } +} + +pub fn deinit(graph: *Graph) void { + graph.cfg.deinit(graph.allocator); + graph.nodes.deinit(graph.allocator); + graph.edges.deinit(graph.allocator); + graph.co.deinit(graph.allocator); + graph.scope.deinit(graph.allocator); + + graph.* = undefined; +} + +pub fn dump( + graph: Graph, +) !void { + const outfile = try std.fs.cwd().createFile("graph.bin", .{}); + defer outfile.close(); + const writer = outfile.writer(); + + const node_names = graph.nodes.items(.name); + for (node_names, 0..) |name, id| { + try writer.writeInt(u32, @intCast(id), .little); + try writer.writeInt(usize, name.len, .little); + try writer.writeAll(name); + } + + try writer.writeInt(i32, -1, .little); + + const edge_froms = graph.edges.items(.from); + const edge_tos = graph.edges.items(.to); + for (edge_froms, edge_tos) |from, to| { + try writer.writeInt(u32, from, .little); + try writer.writeInt(u32, to, .little); + } + + try writer.writeInt(i32, -1, .little); + + const cfg_froms = graph.cfg.items(.from); + const cfg_tos = graph.cfg.items(.to); + for (cfg_froms, cfg_tos) |from, to| { + try writer.writeInt(u32, from, .little); + try writer.writeInt(u32, to, .little); + } +} + +pub const Node = struct { + data: Data, + name: []const u8, + + pub const Data = union(enum) { + none, + }; +}; + +pub const Edge = struct { + from: u32, + to: u32, +}; diff --git a/src/main.zig b/src/main.zig index 111a097..ed87340 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,6 +1,7 @@ const std = @import("std"); const builtin = @import("builtin"); +const Graph = @import("graph/Graph.zig"); const Python = @import("frontend/Python.zig"); const Marshal = @import("compiler/Marshal.zig"); const Vm = @import("vm/Vm.zig"); @@ -63,6 +64,10 @@ pub fn log( std.debug.print(prefix1 ++ prefix2 ++ format ++ "\n", args); } +const Args = struct { + make_graph: bool, +}; + pub fn main() !u8 { crash_report.initialize(); @@ -93,6 +98,9 @@ pub fn main() !u8 { defer args.deinit(); var file_path: ?[:0]const u8 = null; + var options: Args = .{ + .make_graph = false, + }; while (args.next()) |arg| { if (std.mem.eql(u8, arg, "--help") or std.mem.eql(u8, arg, "-h")) { @@ -113,11 +121,13 @@ pub fn main() !u8 { const scope = args.next() orelse fatal("--debug-log expects scope", .{}); try log_scopes.append(allocator, scope); } + } else if (std.mem.eql(u8, arg, "--graph")) { + options.make_graph = true; } } if (file_path) |path| { - try run_file(allocator, path); + try run_file(allocator, path, options); return 0; } @@ -155,7 +165,11 @@ fn fatal(comptime fmt: []const u8, args: anytype) noreturn { std.posix.exit(1); } -pub fn run_file(allocator: std.mem.Allocator, file_name: [:0]const u8) !void { +pub fn run_file( + allocator: std.mem.Allocator, + file_name: [:0]const u8, + options: Args, +) !void { const t = tracer.trace(@src(), "", .{}); defer t.end(); @@ -183,6 +197,14 @@ pub fn run_file(allocator: std.mem.Allocator, file_name: [:0]const u8) !void { defer marshal.deinit(); const seed = try marshal.parse(); + + if (options.make_graph) { + var graph = try Graph.evaluate(allocator, seed); + defer graph.deinit(); + + try graph.dump(); + } + var vm = try Vm.init(gc_allocator, file_name, seed); { var dir_path_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined; diff --git a/src/modules/builtins.zig b/src/modules/builtins.zig index bf4e126..d207917 100644 --- a/src/modules/builtins.zig +++ b/src/modules/builtins.zig @@ -49,7 +49,7 @@ pub fn create(allocator: std.mem.Allocator) !Module { } /// https://docs.python.org/3.10/library/functions.html -const builtin_fns = &.{ +pub const builtin_fns = &.{ // // zig fmt: off .{ "abs", abs }, .{ "bool", @"bool" }, diff --git a/src/vm/Object.zig b/src/vm/Object.zig index e79b88c..979a0ce 100644 --- a/src/vm/Object.zig +++ b/src/vm/Object.zig @@ -285,12 +285,35 @@ pub fn getMemberFunction(object: *const Object, name: []const u8, allocator: All const member_list: Payload.MemberFuncTy = switch (object.tag) { .list => Payload.List.MemberFns, .set => Payload.Set.MemberFns, + .module => blk: { + // we parse out all of the functions within the module. + var list = std.ArrayList(std.meta.Child(Payload.MemberFuncTy)).init(allocator); + const module = object.get(.module); + var iter = module.dict.iterator(); + while (iter.next()) |entry| { + if (entry.value_ptr.tag == .function) { + const cloned = try entry.value_ptr.clone(allocator); + + try list.append(.{ + .name = try allocator.dupe(u8, entry.key_ptr.*), + .func = .{ .py_func = cloned.get(.function).* }, + }); + } + } + break :blk try list.toOwnedSlice(); + }, else => std.debug.panic("{s} has no member functions", .{@tagName(object.tag)}), }; for (member_list) |func| { if (std.mem.eql(u8, func.name, name)) { - const func_ptr = func.func; - return try Object.create(.zig_function, allocator, func_ptr); + switch (func.func) { + .zig_func => |func_ptr| { + return try Object.create(.zig_function, allocator, func_ptr); + }, + .py_func => |py_func| { + return try Object.create(.function, allocator, py_func); + }, + } } } return null; @@ -309,6 +332,17 @@ pub fn callMemberFunction( try @call(.auto, func_ptr.*, .{ vm, self_args, kw }); } +/// The return belongs to the `object`. +pub fn ident(object: *const Object) []const u8 { + switch (object.tag) { + .module => { + const mod = object.get(.module); + return mod.name; + }, + else => return @tagName(object.tag), + } +} + pub const Payload = union(enum) { int: Int, string: String, @@ -324,7 +358,7 @@ pub const Payload = union(enum) { pub const MemberFuncTy = []const struct { name: []const u8, - func: *const builtins.func_proto, + func: union(enum) { zig_func: ZigFunc, py_func: PythonFunction }, }; pub const ZigFunc = *const builtins.func_proto; @@ -337,7 +371,7 @@ pub const Payload = union(enum) { pub const HashMap = std.ArrayListUnmanaged(Object); pub const MemberFns: MemberFuncTy = &.{ - .{ .name = "append", .func = append }, + .{ .name = "append", .func = .{ .zig_func = append } }, }; fn append(vm: *Vm, args: []const Object, kw: ?builtins.KW_Type) !void { @@ -402,8 +436,8 @@ pub const Payload = union(enum) { // zig fmt: off pub const MemberFns: MemberFuncTy = &.{ - .{ .name = "update", .func = update }, - .{ .name = "add" , .func = add }, + .{ .name = "update", .func = .{ .zig_func = update } }, + .{ .name = "add" , .func = .{ .zig_func = add } }, }; // zig fmt: on diff --git a/src/vm/Vm.zig b/src/vm/Vm.zig index e35886b..6d93ea6 100644 --- a/src/vm/Vm.zig +++ b/src/vm/Vm.zig @@ -219,6 +219,8 @@ fn exec(vm: *Vm, inst: Instruction) !void { .POP_JUMP_IF_TRUE => try vm.execPopJump(inst, true), .POP_JUMP_IF_FALSE => try vm.execPopJump(inst, false), + .JUMP_FORWARD => try vm.execJumpForward(inst), + .INPLACE_ADD, .BINARY_ADD => try vm.execBinaryOperation(.add), .INPLACE_SUBTRACT, .BINARY_SUBTRACT => try vm.execBinaryOperation(.sub), .INPLACE_MULTIPLY, .BINARY_MULTIPLY => try vm.execBinaryOperation(.mul), @@ -253,16 +255,16 @@ fn execLoadName(vm: *Vm, inst: Instruction) !void { const name = vm.co.getName(inst.extra); const val = vm.lookUpwards(name) orelse vm.fail("couldn't find '{s}'", .{name}); + log.debug("load name: {}", .{val}); try vm.stack.append(vm.allocator, val); } fn execLoadMethod(vm: *Vm, inst: Instruction) !void { const name = vm.co.getName(inst.extra); - const tos = vm.stack.pop(); const func = try tos.getMemberFunction(name, vm.allocator) orelse { - vm.fail("couldn't find '{s}.{s}'", .{ @tagName(tos.tag), name }); + vm.fail("couldn't find '{s}.{s}'", .{ tos.ident(), name }); }; try vm.stack.append(vm.allocator, func); @@ -272,8 +274,7 @@ fn execLoadMethod(vm: *Vm, inst: Instruction) !void { fn execLoadGlobal(vm: *Vm, inst: Instruction) !void { const name = vm.co.getName(inst.extra); const val = vm.scopes.items[0].get(name) orelse blk: { - // python is allowed to load builtin function using LOAD_GLOBAl - // as well + // python is allowed to load builtin function using LOAD_GLOBAl as well const builtin = vm.builtin_mods.get("builtins") orelse @panic("didn't init builtins"); const print_obj = builtin.dict.get(name) orelse vm.fail("name '{s}' not defined in the global scope", .{name}); break :blk print_obj; @@ -356,24 +357,25 @@ fn execBuildSet(vm: *Vm, inst: Instruction) !void { } fn execListExtend(vm: *Vm, inst: Instruction) !void { - _ = inst; - - const tos = vm.stack.pop(); - const list = vm.stack.pop(); + const additions = vm.stack.pop(); + const list = vm.stack.getLast(); assert(list.tag == .list); - assert(tos.tag == .list or tos.tag == .tuple); + assert(additions.tag == .list or additions.tag == .tuple); const list_ptr = list.get(.list); - switch (tos.tag) { - .tuple => { - const tuple = tos.get(.tuple); - try list_ptr.list.appendSlice(vm.allocator, tuple); + const index = absIndex(-@as(i16, inst.extra), list_ptr.list.items.len); + const new: []const Object = switch (additions.tag) { + .tuple => additions.get(.tuple), + .list => list: { + const additions_list = additions.get(.list); + break :list try vm.allocator.dupe(Object, additions_list.list.items); }, - .list => @panic("TODO: execListExtend list"), else => unreachable, - } + }; + + try list_ptr.list.insertSlice(vm.allocator, index, new); } fn execCallFunction(vm: *Vm, inst: Instruction) !void { @@ -433,11 +435,25 @@ fn execCallMethod(vm: *Vm, inst: Instruction) !void { const self = vm.stack.pop(); const func = vm.stack.pop(); - const func_ptr = func.get(.zig_function); - - const self_args = try std.mem.concat(vm.allocator, Object, &.{ &.{self}, args }); - try @call(.auto, func_ptr.*, .{ vm, self_args, null }); + switch (func.tag) { + .function => { + const py_func = func.get(.function); + try vm.scopes.append(vm.allocator, .{}); + try vm.co_stack.append(vm.allocator, vm.co); + vm.setNewCo(py_func.co); + for (args, 0..) |arg, i| { + vm.co.varnames[i] = arg; + } + vm.depth += 1; + }, + .zig_function => { + const func_ptr = func.get(.zig_function); + const self_args = try std.mem.concat(vm.allocator, Object, &.{ &.{self}, args }); + try @call(.auto, func_ptr.*, .{ vm, self_args, null }); + }, + else => unreachable, + } } fn execPopTop(vm: *Vm) !void { @@ -601,6 +617,10 @@ fn execPopJump(vm: *Vm, inst: Instruction, case: bool) !void { } } +fn execJumpForward(vm: *Vm, inst: Instruction) !void { + vm.co.index += inst.extra; +} + fn execMakeFunction(vm: *Vm, inst: Instruction) !void { const arg_ty: Object.Payload.PythonFunction.ArgType = @enumFromInt(inst.extra); @@ -807,3 +827,10 @@ pub fn fail( std.posix.exit(1); } + +pub fn absIndex(index: i128, length: usize) usize { + return if (length == 0) return 0 else if (index < 0) val: { + const true_index: usize = @intCast(length - @abs(index)); + break :val true_index; + } else @intCast(index); +}