Skip to content

Commit

Permalink
Spruce Up Disassembler (#550)
Browse files Browse the repository at this point in the history
Bring disassembler code into modern Python era by using f-strings for
formatting output. Also, add an option for giving verbose information
about the fields of the disassembled instructions. Finally, add the
ability to print warnings when unused fields in the instruction contain
non-zero values.

Signed-off-by: Will Hawkins <[email protected]>
Co-authored-by: Alan Jowett <[email protected]>
  • Loading branch information
hawkinsw and Alan-Jowett authored Sep 23, 2024
1 parent ffb5d34 commit fe8a79f
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 54 deletions.
4 changes: 3 additions & 1 deletion bin/ubpf-disassembler
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def main():
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('input', type=argparse.FileType('rb'), default='-', nargs='?')
parser.add_argument('output', type=argparse.FileType('w'), default='-', nargs='?')
parser.add_argument('--verbose', dest='verbose', action='store_true', default=False)
args = parser.parse_args()

if args.input.name == "<stdin>" and hasattr(args.input, "buffer"):
Expand All @@ -29,7 +30,8 @@ def main():
else:
input_ = args.input.read()

args.output.write(ubpf.disassembler.disassemble(input_))
print(f"{args.verbose=}")
args.output.write(ubpf.disassembler.disassemble(input_, args.verbose))

if __name__ == "__main__":
main()
170 changes: 117 additions & 53 deletions ubpf/disassembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@
BPF_CLASS_LDX = 1
BPF_CLASS_ST = 2
BPF_CLASS_STX = 3
BPF_CLASS_ALU = 4
BPF_CLASS_ALU32 = 4
BPF_CLASS_JMP = 5
BPF_CLASS_JMP32 = 6
BPF_CLASS_ALU64 = 7
BPF_CLASS_ALU = 7

BPF_ALU_NEG = 8
BPF_ALU_END = 13
Expand All @@ -96,94 +96,158 @@ def O(off):
else:
return "-" + str(65536-off)

def disassemble_one(data, offset):
def disassemble_one(data, offset, verbose = False):
code, regs, off, imm = Inst.unpack_from(data, offset)
dst_reg = regs & 0xf
src_reg = (regs >> 4) & 0xf
cls = code & 7
clz = code & 7

class_name = CLASSES.get(cls)
increment = 8

if cls == BPF_CLASS_ALU or cls == BPF_CLASS_ALU64:
class Field(object):
def __init__(self, name, value):
self.name = name
self.used = False
self.value = value

def set_used(self):
self.used = True

def set_unused(self):
self.used = False

fields = {}
fields['off'] = Field("offset", off)
fields['dst_reg'] = Field("destination register", dst_reg)
fields['src_reg'] = Field("source register", src_reg)
fields['imm'] = Field("immediate", imm)

disassembled = ""

class_name = CLASSES.get(clz)

if clz == BPF_CLASS_ALU or clz == BPF_CLASS_ALU32:
source = (code >> 3) & 1
opcode = (code >> 4) & 0xf
opcode_name = ALU_OPCODES.get(opcode)
if cls == BPF_CLASS_ALU:
if clz == BPF_CLASS_ALU32:
opcode_name += "32"

if opcode == BPF_ALU_END:
opcode_name = source == 1 and "be" or "le"
return "%s%d %s" % (opcode_name, imm, R(dst_reg))
fields["imm"].used = True
fields["dst_reg"].used = True
disassembled = f'{opcode_name}{imm} {R(dst_reg)}'
elif opcode == BPF_ALU_NEG:
return "%s %s" % (opcode_name, R(dst_reg))
fields["dst_reg"].used = True
disassembled = f'{opcode_name} {R(dst_reg)}'
elif source == 0:
return "%s %s, %s" % (opcode_name, R(dst_reg), I(imm))
fields["dst_reg"].used = True
fields["imm"].used = True
disassembled = f'{opcode_name} {R(dst_reg)}, {I(imm)}'
else:
return "%s %s, %s" % (opcode_name, R(dst_reg), R(src_reg))
elif cls == BPF_CLASS_JMP:
fields["dst_reg"].used = True
fields["src_reg"].used = True
disassembled = f'{opcode_name} {R(dst_reg)}, {R(src_reg)}'
elif clz == BPF_CLASS_JMP or clz == BPF_CLASS_JMP32:
source = (code >> 3) & 1
opcode = (code >> 4) & 0xf
opcode_name = JMP_OPCODES.get(opcode)
if clz == BPF_CLASS_JMP32:
opcode_name += "32"

if opcode_name == "exit":
return opcode_name
elif opcode_name == "call":
if src_reg == 1:
opcode_name += " local"
return "%s %s" % (opcode_name, I(imm))
elif opcode_name == "ja":
return "%s %s" % (opcode_name, O(off))
elif source == 0:
return "%s %s, %s, %s" % (opcode_name, R(dst_reg), I(imm), O(off))
else:
return "%s %s, %s, %s" % (opcode_name, R(dst_reg), R(src_reg), O(off))
elif cls == BPF_CLASS_JMP32:
source = (code >> 3) & 1
opcode = (code >> 4) & 0xf
opcode_name = JMP_OPCODES.get(opcode) + "32"

if opcode_name == "exit":
return opcode_name
disassembled = f'{opcode_name}'
elif opcode_name == "call":
if src_reg == 1:
opcode_name += " local"
return "%s %s" % (opcode_name, I(imm))
fields["imm"].used = True
disassembled = f'{opcode_name} {I(imm)}'
elif opcode_name == "ja":
return "%s %s" % (opcode_name, O(off))
fields["off"].used = True
disassembled = f'{opcode_name} {O(off)}'
elif source == 0:
return "%s %s, %s, %s" % (opcode_name, R(dst_reg), I(imm), O(off))
fields["dst_reg"].used = True
fields["imm"].used = True
fields["off"].used = True
disassembled = f'{opcode_name} {R(dst_reg)}, {I(imm)}, {O(off)}'
else:
return "%s %s, %s, %s" % (opcode_name, R(dst_reg), R(src_reg), O(off))
elif cls == BPF_CLASS_LD or cls == BPF_CLASS_LDX or cls == BPF_CLASS_ST or cls == BPF_CLASS_STX:
fields["dst_reg"].used = True
fields["src_reg"].used = True
fields["off"].used = True
disassembled = f'{opcode_name} {R(dst_reg)}, {R(src_reg)}, {O(off)}'
elif clz == BPF_CLASS_LD:
size = (code >> 3) & 3
mode = (code >> 5) & 7
mode_name = MODES.get(mode, str(mode))
# TODO use different syntax for non-MEM instructions
size_name = SIZES.get(size, str(size))
if code == 0x18: # lddw
if clz == BPF_CLASS_LD and size == 0x3 and src_reg == 0:
# Make sure that we skip the next instruction because we use it here!
increment += 8
_, _, _, imm2 = Inst.unpack_from(data, offset+8)
imm = (imm2 << 32) | imm
return "%s %s, %s" % (class_name + size_name, R(dst_reg), I(imm))
elif code == 0x00:
# Second instruction of lddw
return None
elif cls == BPF_CLASS_LDX:
return "%s %s, %s" % (class_name + size_name, R(dst_reg), M(R(src_reg), off))
elif cls == BPF_CLASS_ST:
return "%s %s, %s" % (class_name + size_name, M(R(dst_reg), off), I(imm))
elif cls == BPF_CLASS_STX:
return "%s %s, %s" % (class_name + size_name, M(R(dst_reg), off), R(src_reg))
fields["dst_reg"].used = True
fields["imm"].used = True
disassembled = f'{class_name}{size_name} {R(dst_reg)}, {I(imm)}'
else:
return "unknown mem instruction %#x" % code
else:
return "unknown instruction %#x" % code
result = f"unknown/unsupported special LOAD instruction {code=:x}"

def disassemble(data):
elif clz == BPF_CLASS_LD or clz == BPF_CLASS_LDX or clz == BPF_CLASS_ST or clz == BPF_CLASS_STX:
size = (code >> 3) & 3
mode = (code >> 5) & 7
mode_name = MODES.get(mode, str(mode))
size_name = SIZES.get(size, str(size))
if clz == BPF_CLASS_LDX:
fields["dst_reg"].used = True
fields["src_reg"].used = True
fields["off"].used = True
disassembled = f'{class_name}{size_name} {R(dst_reg)}, {M(R(src_reg), off)}'
elif clz == BPF_CLASS_ST:
fields["dst_reg"].used = True
fields["off"].used = True
fields["imm"].used = True
disassembled = f'{class_name}{size_name} {M(R(dst_reg), off)}, {I(imm)}'
elif clz == BPF_CLASS_STX:
fields["dst_reg"].used = True
fields["src_reg"].used = True
fields["off"].used = True
disassembled = f'{class_name}{size_name} {M(R(dst_reg), off)}, {R(src_reg)}'
else:
disassembled = f'unknown/unsupported mem instruction {code=:x}'
else:
disassembled = f'unknown/unsupported instruction {code=:x}'

warnings = ""
for k in fields.keys():
if not fields[k].used and fields[k].value != 0:
if len(warnings) != 0:
warnings += "; "
warnings += f"The {fields[k].name} field of the instruction has a value but it is not used by the instruction"

if len(warnings) != 0:
disassembled += f"\n\tWarnings: {warnings}."
disassembled += "\n"

if verbose:
disassembled += "\nDetails:\n"
disassembled += f"\tClass: 0x{clz:x}"
disassembled += "\n"
disassembled += f"\tRegs: 0x{regs:x}"
disassembled += "\n"
disassembled += f"\tOffset: 0x{off:x}"
disassembled += "\n"
disassembled += f"\tImmediate: 0x{imm:x}"
disassembled += "\n"
disassembled += "-----------------"

return disassembled, increment

def disassemble(data, verbose = False):
output = io()
offset = 0
while offset < len(data):
s = disassemble_one(data, offset)
(s, increment) = disassemble_one(data, offset, verbose)
if s:
output.write(s + "\n")
offset += 8
offset += increment
return output.getvalue()

0 comments on commit fe8a79f

Please sign in to comment.