diff --git a/custom_tests/data/ubpf_test_frame_pointer.input b/custom_tests/data/ubpf_test_frame_pointer.input new file mode 100644 index 000000000..222f3f580 --- /dev/null +++ b/custom_tests/data/ubpf_test_frame_pointer.input @@ -0,0 +1 @@ +b7 06 00 00 0a 00 00 00 b7 07 00 00 0a 00 00 00 b7 08 00 00 0a 00 00 00 b7 09 00 00 0a 00 00 00 b7 01 00 00 05 00 00 00 7b 1a f8 ff 00 00 00 00 85 10 00 00 02 00 00 00 79 a0 f8 ff 00 00 00 00 95 00 00 00 00 00 00 00 b7 01 00 00 37 00 00 00 7b 1a f8 ff 00 00 00 00 95 00 00 00 00 00 00 00 diff --git a/custom_tests/descrs/ubpf_test_frame_pointer.md b/custom_tests/descrs/ubpf_test_frame_pointer.md new file mode 100644 index 000000000..e0a872ad3 --- /dev/null +++ b/custom_tests/descrs/ubpf_test_frame_pointer.md @@ -0,0 +1,4 @@ +## Test Description + +This custom test program tests whether it is possible to update the external helper +functions for an eBPF program that has already been JIT'd. diff --git a/custom_tests/srcs/ubpf_test_frame_pointer.cc b/custom_tests/srcs/ubpf_test_frame_pointer.cc new file mode 100644 index 000000000..e75121861 --- /dev/null +++ b/custom_tests/srcs/ubpf_test_frame_pointer.cc @@ -0,0 +1,103 @@ +// Copyright (c) Will Hawkins +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include +#include +#include +#include + +extern "C" +{ +#include "ubpf.h" +} + +#include "ubpf_custom_test_support.h" + +int +stack_usage_calculator(const struct ubpf_vm *vm, uint16_t pc, void *cookie) +{ + UNREFERENCED_PARAMETER(vm); + UNREFERENCED_PARAMETER(pc); + UNREFERENCED_PARAMETER(cookie); + return 16; +} + +int +overwrite_stack_usage_calculator(const struct ubpf_vm *vm, uint16_t pc, void *cookie) +{ + UNREFERENCED_PARAMETER(vm); + UNREFERENCED_PARAMETER(pc); + UNREFERENCED_PARAMETER(cookie); + return 0; +} + +int main(int argc, char **argv) +{ + std::vector args(argv, argv + argc); + std::string program_string{}; + ubpf_jit_fn jit_fn; + + std::getline(std::cin, program_string); + + uint64_t no_overwrite_interp_result = 0; + uint64_t no_overwrite_jit_result = 0; + uint64_t overwrite_interp_result = 0; + uint64_t overwrite_jit_result = 0; + + { + + std::unique_ptr vm(ubpf_create(), ubpf_destroy); + std::string error{}; + if (!ubpf_setup_custom_test( + vm, + program_string, + [](ubpf_vm_up& vm, std::string& error) { + if (ubpf_register_stack_usage_calculator(vm.get(), stack_usage_calculator, nullptr) < 0) { + error = "Failed to register stack usage calculator."; + return false; + } + return true; + }, + jit_fn, + error)) { + std::cerr << "Problem setting up custom test: " << error << std::endl; + return 1; + } + + no_overwrite_jit_result = jit_fn(nullptr, 0); + [[maybe_unused]] auto exec_result = ubpf_exec(vm.get(), NULL, 0, &no_overwrite_interp_result); + } + + { + + std::unique_ptr vm(ubpf_create(), ubpf_destroy); + std::string error{}; + if (!ubpf_setup_custom_test( + vm, + program_string, + [](ubpf_vm_up& vm, std::string& error) { + if (ubpf_register_stack_usage_calculator(vm.get(), overwrite_stack_usage_calculator, nullptr) < 0) { + error = "Failed to register stack usage calculator."; + return false; + } + return true; + }, + jit_fn, + error)) { + std::cerr << "Problem setting up custom test: " << error << std::endl; + return 1; + } + + overwrite_jit_result = jit_fn(nullptr, 0); + + [[maybe_unused]] auto exec_result = ubpf_exec(vm.get(), NULL, 0, &overwrite_interp_result); + } + // ... because of the semantics of external_dispatcher, the result of the eBPF + // program execution should point to the same place to which &memory points. + return !(no_overwrite_interp_result == no_overwrite_jit_result && + no_overwrite_interp_result == 0x5 && + overwrite_interp_result == overwrite_jit_result && + overwrite_interp_result == 0x37); +} diff --git a/tests/call-save.data b/tests/call-save.data index bbf5300a8..423947ff9 100644 --- a/tests/call-save.data +++ b/tests/call-save.data @@ -3,13 +3,68 @@ mov %r6, 0x0001 mov %r7, 0x0020 mov %r8, 0x0300 mov %r9, 0x4000 + +# r1 should contain pointer to program memory. +# Don't screw that up because helper function 1 (memfrob) +# needs it. +mov %r2, 0x0001 +mov %r3, 0x0001 +mov %r4, 0x0001 +mov %r5, 0x0001 +call 1 +mov %r0, 0 +or %r0, %r6 +or %r0, %r7 +or %r0, %r8 +or %r0, %r9 +jeq %r0, 0x4321, +1 +exit + +# Call helper function 0 -- the memory pointer is +# no longer needed for any other helper functions, so +# we don't have to worry about keeping it safe. +mov %r1, 0x0001 +mov %r2, 0x0001 +mov %r3, 0x0001 +mov %r4, 0x0001 +mov %r5, 0x0001 +call 0 +mov %r0, 0 +or %r0, %r6 +or %r0, %r7 +or %r0, %r8 +or %r0, %r9 +jeq %r0, 0x4321, +1 +exit + +mov %r1, 0x0001 +mov %r2, 0x0001 +mov %r3, 0x0001 +mov %r4, 0x0001 +mov %r5, 0x0001 call 2 mov %r0, 0 or %r0, %r6 or %r0, %r7 or %r0, %r8 or %r0, %r9 +jeq %r0, 0x4321, +1 +exit + +mov %r1, 0x0001 +mov %r2, 0x0001 +mov %r3, 0x0001 +mov %r4, 0x0001 +mov %r5, 0x0001 +call 3 +mov %r0, 0 +or %r0, %r6 +or %r0, %r7 +or %r0, %r8 +or %r0, %r9 exit +-- mem +01 02 03 04 05 06 07 08 -- result 0x4321 -- no register offset diff --git a/ubpf_plugin/ubpf_plugin.cc b/ubpf_plugin/ubpf_plugin.cc index ec48ff175..8ba047922 100644 --- a/ubpf_plugin/ubpf_plugin.cc +++ b/ubpf_plugin/ubpf_plugin.cc @@ -73,6 +73,24 @@ bytes_to_ebpf_inst(std::vector bytes) return instructions; } +/** + * @brief The handler to determine the stack usage of local functions. + * + * @param[in] vm Pointer to the VM of which the local function at pc is a part. + * @param[in] pc The instruction address of the local function. + * @param[in] cookie A pointer to the context cookie given when this callback + * was registered. + * @return The amount of stack used by the local function starting at pc. + */ +int stack_usage_calculator(const struct ubpf_vm *vm, uint16_t pc, void *cookie) { + UNREFERENCED_PARAMETER(pc); + UNREFERENCED_PARAMETER(cookie); + UNREFERENCED_PARAMETER(vm); + // We will default to a conservative 64 bytes of stack usage for each local function. + // That should be enough for all the conformance tests. + return 64; +} + /** * @brief This program reads BPF instructions from stdin and memory contents from * the first agument. It then executes the BPF program and prints the @@ -138,6 +156,8 @@ int main(int argc, char **argv) ubpf_register_external_dispatcher(vm.get(), test_helpers_dispatcher, test_helpers_validater); + ubpf_register_stack_usage_calculator(vm.get(), stack_usage_calculator, nullptr); + if (ubpf_set_unwind_function_index(vm.get(), 5) != 0) { std::cerr << "Failed to set unwind function index" << std::endl; @@ -246,7 +266,7 @@ int main(int argc, char **argv) } } - // ... but first reset program memory. + // ... but first reset program memory ... usable_program_memory = memory; usable_program_memory_pointer = nullptr; if (usable_program_memory.size() != 0) { @@ -260,6 +280,30 @@ int main(int argc, char **argv) return 1; } + // ... and, for the cherry on the sundae, execute the program by specifying a stack ... + uint64_t* external_stack = NULL; + + external_stack = (uint64_t*)calloc(512, 1); + if (!external_stack) { + return -1; + } + + // ... but first, reset that pesky memory again ... + usable_program_memory = memory; + usable_program_memory_pointer = nullptr; + if (usable_program_memory.size() != 0) { + usable_program_memory_pointer = usable_program_memory.data(); + } + + uint64_t external_memory_index_helper_result; + if (ubpf_exec_ex(vm.get(), usable_program_memory_pointer, usable_program_memory.size(), &external_memory_index_helper_result, (uint8_t*)external_stack, 512) != 0) + { + std::cerr << "Failed to execute program" << std::endl; + return 1; + } + + free(external_stack); + // ... and make sure the results are the same. if (external_dispatcher_result != index_helper_result) { std::cerr << "Execution of the interpreted code with external and indexed helpers gave difference results: 0x" diff --git a/vm/inc/ubpf.h b/vm/inc/ubpf.h index 6ee77cbe0..e305c6132 100644 --- a/vm/inc/ubpf.h +++ b/vm/inc/ubpf.h @@ -40,12 +40,15 @@ extern "C" #endif /** - * @brief Default stack size for the VM. Must be divisible by 16. + * @brief Default stack size for the eBPF program. Must be divisible by 16. */ -#if !defined(UBPF_STACK_SIZE) -#define UBPF_STACK_SIZE 512 +#if !defined(UBPF_EBPF_STACK_SIZE) +// #define UBPF_EBPF_STACK_SIZE 512 +#define UBPF_EBPF_STACK_SIZE 4096 #endif +#define UBPF_EBPF_NONVOLATILE_SIZE (sizeof(uint64_t) * 5) + /** * @brief Default maximum number of nested calls in the VM. */ @@ -63,6 +66,16 @@ extern "C" */ typedef uint64_t (*ubpf_jit_fn)(void* mem, size_t mem_len); + /** + * @brief Enum to describe JIT mode. + */ + + enum JitMode + { + ExtendedJitMode, + BasicJitMode + }; + /** * @brief Create a new uBPF VM. * @@ -160,9 +173,12 @@ extern "C" */ int ubpf_register_external_dispatcher( - struct ubpf_vm* vm, - external_function_dispatcher_t dispatcher, - external_function_validate_t validater); + struct ubpf_vm* vm, external_function_dispatcher_t dispatcher, external_function_validate_t validater); + + typedef int (*stack_usage_calculator_t)(const struct ubpf_vm* vm, uint16_t pc, void* cookie); + + int + ubpf_register_stack_usage_calculator(struct ubpf_vm* vm, stack_usage_calculator_t calculator, void* cookie); /** * @brief Load code into a VM. @@ -268,6 +284,15 @@ extern "C" int ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_return_value); + int + ubpf_exec_ex( + const struct ubpf_vm* vm, + void* mem, + size_t mem_len, + uint64_t* bpf_return_value, + uint8_t* stack, + size_t stack_len); + /** * @brief Compile a BPF program in the VM to native code. * @@ -294,7 +319,7 @@ extern "C" * NULL on failure. */ ubpf_jit_fn - ubpf_copy_jit(struct ubpf_vm* vm, void *buffer, size_t size, char** errmsg); + ubpf_copy_jit(struct ubpf_vm* vm, void* buffer, size_t size, char** errmsg); /** * @brief Translate the eBPF byte code to machine code. @@ -428,7 +453,6 @@ extern "C" int ubpf_set_instruction_limit(struct ubpf_vm* vm, uint32_t limit, uint32_t* previous_limit); - #ifdef __cplusplus } #endif diff --git a/vm/test.c b/vm/test.c index 80f4d1000..fa50b413d 100644 --- a/vm/test.c +++ b/vm/test.c @@ -176,6 +176,25 @@ map_relocation_bounds_check_function(void* user_context, uint64_t addr, uint64_t } return false; } +/** + * @brief The handler to determine the stack usage of local functions. + * + * @param[in] vm Pointer to the VM of which the local function at pc is a part. + * @param[in] pc The instruction address of the local function. + * @param[in] cookie A pointer to the context cookie given when this callback + * was registered. + * @return The amount of stack used by the local function starting at pc. + */ +int +stack_usage_calculator(const struct ubpf_vm* vm, uint16_t pc, void* cookie) +{ + (void)(pc); + (void)(cookie); + (void)(vm); + // This is sized large enough that the rel_64_32.bpf.c program has enough space + // for each local function! + return 32; +} int main(int argc, char** argv) @@ -283,6 +302,7 @@ main(int argc, char** argv) register_functions(vm); + ubpf_register_stack_usage_calculator(vm, stack_usage_calculator, NULL); /* * The ELF magic corresponds to an RSH instruction with an offset, * which is invalid. diff --git a/vm/ubpf_int.h b/vm/ubpf_int.h index 1389886b3..41a0da0b2 100644 --- a/vm/ubpf_int.h +++ b/vm/ubpf_int.h @@ -21,6 +21,7 @@ #ifndef UBPF_INT_H #define UBPF_INT_H +#include #include #include "ebpf.h" @@ -29,16 +30,24 @@ struct ebpf_inst; typedef uint64_t (*ext_func)(uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4); -typedef enum { +typedef enum +{ UBPF_JIT_COMPILE_SUCCESS, UBPF_JIT_COMPILE_FAILURE, } upbf_jit_result_t; -struct ubpf_jit_result { +struct ubpf_jit_result +{ uint32_t external_dispatcher_offset; uint32_t external_helper_offset; upbf_jit_result_t compile_result; - char *errmsg; + char* errmsg; +}; + +struct ubpf_stack_usage +{ + bool stack_usage_calculated; + uint16_t stack_usage; }; #define MAX_EXT_FUNCS 64 @@ -56,14 +65,24 @@ struct ubpf_vm bool* int_funcs; const char** ext_func_names; + struct ubpf_stack_usage* local_func_stack_usage; + void* stack_usage_calculator_cookie; + stack_usage_calculator_t stack_usage_calculator; + external_function_dispatcher_t dispatcher; external_function_validate_t dispatcher_validate; bool bounds_check_enabled; int (*error_printf)(FILE* stream, const char* format, ...); - struct ubpf_jit_result (*jit_translate)(struct ubpf_vm* vm, uint8_t* buffer, size_t* size); - bool (*jit_update_dispatcher)(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); - bool (*jit_update_helper)(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); + struct ubpf_jit_result (*jit_translate)(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode); + bool (*jit_update_dispatcher)( + struct ubpf_vm* vm, + external_function_dispatcher_t new_dispatcher, + uint8_t* buffer, + size_t size, + uint32_t offset); + bool (*jit_update_helper)( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); int unwind_stack_extension_index; uint64_t pointer_secret; ubpf_data_relocation data_relocation_function; @@ -78,29 +97,42 @@ struct ubpf_vm struct ubpf_stack_frame { + uint16_t stack_usage; uint16_t return_address; - uint64_t saved_registers[4]; + uint64_t saved_registers[5]; }; /* The various JIT targets. */ // arm64 struct ubpf_jit_result -ubpf_translate_arm64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size); -bool ubpf_jit_update_dispatcher_arm64(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); -bool ubpf_jit_update_helper_arm64(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); - -//x86_64 +ubpf_translate_arm64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode); +bool +ubpf_jit_update_dispatcher_arm64( + struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); +bool +ubpf_jit_update_helper_arm64( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); + +// x86_64 struct ubpf_jit_result -ubpf_translate_x86_64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size); -bool ubpf_jit_update_dispatcher_x86_64(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); -bool ubpf_jit_update_helper_x86_64(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); - -//uhm, hello? +ubpf_translate_x86_64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode); +bool +ubpf_jit_update_dispatcher_x86_64( + struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); +bool +ubpf_jit_update_helper_x86_64( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); + +// uhm, hello? struct ubpf_jit_result -ubpf_translate_null(struct ubpf_vm* vm, uint8_t* buffer, size_t* size); -bool ubpf_jit_update_dispatcher_null(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); -bool ubpf_jit_update_helper_null(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); +ubpf_translate_null(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode); +bool +ubpf_jit_update_dispatcher_null( + struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); +bool +ubpf_jit_update_helper_null( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); char* ubpf_error(const char* fmt, ...); @@ -130,4 +162,10 @@ ubpf_fetch_instruction(const struct ubpf_vm* vm, uint16_t pc); void ubpf_store_instruction(const struct ubpf_vm* vm, uint16_t pc, struct ebpf_inst inst); -#endif +uint16_t +ubpf_stack_usage_for_local_func(const struct ubpf_vm* vm, uint16_t pc); + +bool +ubpf_calculate_stack_usage_for_local_func(const struct ubpf_vm* vm, uint16_t pc, char** errmsg); + +#endif \ No newline at end of file diff --git a/vm/ubpf_jit.c b/vm/ubpf_jit.c index cc534ade6..f94a22bcd 100644 --- a/vm/ubpf_jit.c +++ b/vm/ubpf_jit.c @@ -19,6 +19,7 @@ * limitations under the License. */ +#include "ubpf.h" #define _GNU_SOURCE #include #include @@ -29,11 +30,10 @@ #include #include "ubpf_int.h" - int -ubpf_translate(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, char** errmsg) +ubpf_translate_ex(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, char** errmsg, enum JitMode jit_mode) { - struct ubpf_jit_result jit_result = vm->jit_translate(vm, buffer, size); + struct ubpf_jit_result jit_result = vm->jit_translate(vm, buffer, size, jit_mode); vm->jitted_result = jit_result; if (jit_result.errmsg) { *errmsg = jit_result.errmsg; @@ -41,8 +41,14 @@ ubpf_translate(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, char** errmsg) return jit_result.compile_result == UBPF_JIT_COMPILE_SUCCESS ? 0 : -1; } +int +ubpf_translate(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, char** errmsg) +{ + return ubpf_translate_ex(vm, buffer, size, errmsg, BasicJitMode); +} + struct ubpf_jit_result -ubpf_translate_null(struct ubpf_vm* vm, uint8_t* buffer, size_t* size) +ubpf_translate_null(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode) { struct ubpf_jit_result compile_result; compile_result.compile_result = UBPF_JIT_COMPILE_FAILURE; @@ -52,11 +58,14 @@ ubpf_translate_null(struct ubpf_vm* vm, uint8_t* buffer, size_t* size) UNUSED_PARAMETER(vm); UNUSED_PARAMETER(buffer); UNUSED_PARAMETER(size); + UNUSED_PARAMETER(jit_mode); compile_result.errmsg = ubpf_error("Code can not be JITed on this target."); return compile_result; } -bool ubpf_jit_update_dispatcher_null(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset) +bool +ubpf_jit_update_dispatcher_null( + struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset) { UNUSED_PARAMETER(vm); UNUSED_PARAMETER(new_dispatcher); @@ -66,7 +75,9 @@ bool ubpf_jit_update_dispatcher_null(struct ubpf_vm* vm, external_function_dispa return false; } -bool ubpf_jit_update_helper_null(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset) +bool +ubpf_jit_update_helper_null( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset) { UNUSED_PARAMETER(vm); UNUSED_PARAMETER(new_helper); @@ -138,7 +149,7 @@ ubpf_compile(struct ubpf_vm* vm, char** errmsg) } ubpf_jit_fn -ubpf_copy_jit(struct ubpf_vm *vm, void *buffer, size_t size, char **errmsg) +ubpf_copy_jit(struct ubpf_vm* vm, void* buffer, size_t size, char** errmsg) { // If compilation was not successfull or it has not even been attempted, // we cannot copy. diff --git a/vm/ubpf_jit_arm64.c b/vm/ubpf_jit_arm64.c index c67104069..8cdc758d5 100644 --- a/vm/ubpf_jit_arm64.c +++ b/vm/ubpf_jit_arm64.c @@ -24,7 +24,6 @@ #define _GNU_SOURCE #include -#include #include #include #include @@ -245,8 +244,7 @@ emit_loadstore_register( } static void -emit_loadstore_literal( - struct jit_state* state, enum LoadStoreOpcode op, enum Registers rt, uint32_t target) +emit_loadstore_literal(struct jit_state* state, enum LoadStoreOpcode op, enum Registers rt, uint32_t target) { note_load(state, target); const uint32_t reg_op_base = 0x08000000U; @@ -254,7 +252,7 @@ emit_loadstore_literal( } static void -emit_adr(struct jit_state *state, uint32_t offset, enum Registers rd) +emit_adr(struct jit_state* state, uint32_t offset, enum Registers rd) { note_lea(state, offset); uint32_t instr = 0x10000000 | rd; @@ -502,8 +500,9 @@ emit_movewide_immediate(struct jit_state* state, bool sixty_four, enum Registers /* Generate the function prologue. * * We set the stack to look like: - * SP on entry * ubpf_stack_size bytes of UBPF stack + * SP on entry + * SP on entry * Callee saved registers * Frame <- SP. * Precondition: The runtime stack pointer is 16-byte aligned. @@ -512,24 +511,27 @@ emit_movewide_immediate(struct jit_state* state, bool sixty_four, enum Registers static void emit_jit_prologue(struct jit_state* state, size_t ubpf_stack_size) { - uint32_t register_space = _countof(callee_saved_registers) * 8 + 2 * 8; - state->stack_size = align_to(ubpf_stack_size + register_space, 16); - emit_addsub_immediate(state, true, AS_SUB, SP, SP, state->stack_size); - - /* Set up frame */ + ubpf_stack_size = 512; + emit_addsub_immediate(state, true, AS_SUB, SP, SP, 16); emit_loadstorepair_immediate(state, LSP_STPX, R29, R30, SP, 0); - /* In ARM64 calling convention, R29 is the frame pointer. */ - emit_addsub_immediate(state, true, AS_ADD, R29, SP, 0); + state->stack_size = _countof(callee_saved_registers) * 8; + emit_addsub_immediate(state, true, AS_SUB, SP, SP, state->stack_size); /* Save callee saved registers */ unsigned i; for (i = 0; i < _countof(callee_saved_registers); i += 2) { emit_loadstorepair_immediate( - state, LSP_STPX, callee_saved_registers[i], callee_saved_registers[i + 1], SP, (i + 2) * 8); + state, LSP_STPX, callee_saved_registers[i], callee_saved_registers[i + 1], SP, (i)*8); } + emit_addsub_immediate(state, true, AS_ADD, R29, SP, 0); - /* Setup UBPF frame pointer. */ - emit_addsub_immediate(state, true, AS_ADD, map_register(10), SP, state->stack_size); + if (state->jit_mode == BasicJitMode) { + /* Setup UBPF frame pointer. */ + emit_addsub_immediate(state, true, AS_ADD, map_register(10), SP, 0); + emit_addsub_immediate(state, true, AS_SUB, SP, SP, ubpf_stack_size); + } else { + emit_addsub_immediate(state, true, AS_ADD, map_register(10), R2, 0); + } /* Copy R0 to the volatile context for safe keeping. */ emit_logical_register(state, true, LOG_ORR, VOLATILE_CTXT, RZ, R0); @@ -539,6 +541,33 @@ emit_jit_prologue(struct jit_state* state, size_t ubpf_stack_size) state->entry_loc = state->offset; } +static void +emit_jit_epilogue(struct jit_state* state) +{ + state->exit_loc = state->offset; + + /* Move register 0 into R0 */ + if (map_register(0) != R0) { + emit_logical_register(state, true, LOG_ORR, R0, RZ, map_register(0)); + } + + /* We could be anywhere in the stack if we excepted. Get our head right. */ + emit_addsub_immediate(state, true, AS_ADD, SP, R29, 0); + + /* Restore callee-saved registers). */ + size_t i; + for (i = 0; i < _countof(callee_saved_registers); i += 2) { + emit_loadstorepair_immediate( + state, LSP_LDPX, callee_saved_registers[i], callee_saved_registers[i + 1], SP, (i)*8); + } + emit_addsub_immediate(state, true, AS_ADD, SP, SP, state->stack_size); + + emit_loadstorepair_immediate(state, LSP_LDPX, R29, R30, SP, 0); + emit_addsub_immediate(state, true, AS_ADD, SP, SP, 16); + + emit_unconditionalbranch_register(state, BR_RET, R30); +} + static void emit_dispatched_external_helper_call(struct jit_state* state, struct ubpf_vm* vm, unsigned int idx) { @@ -602,44 +631,31 @@ emit_dispatched_external_helper_call(struct jit_state* state, struct ubpf_vm* vm static void emit_local_call(struct jit_state* state, uint32_t target_pc) { - uint32_t stack_movement = align_to(40, 16); + emit_loadstore_immediate(state, LS_LDRX, temp_register, SP, 0); + emit_addsub_register(state, true, AS_SUB, map_register(10), map_register(10), temp_register); + + uint32_t stack_movement = align_to(48, 16); emit_addsub_immediate(state, true, AS_SUB, SP, SP, stack_movement); + emit_loadstore_immediate(state, LS_STRX, R30, SP, 0); - emit_loadstorepair_immediate(state, LSP_STPX, map_register(6), map_register(7), SP, 8); - emit_loadstorepair_immediate(state, LSP_STPX, map_register(8), map_register(9), SP, 24); - emit_unconditionalbranch_immediate(state, UBR_BL, target_pc); - emit_loadstore_immediate(state, LS_LDRX, R30, SP, 0); - emit_loadstorepair_immediate(state, LSP_LDPX, map_register(6), map_register(7), SP, 8); - emit_loadstorepair_immediate(state, LSP_LDPX, map_register(8), map_register(9), SP, 24); - emit_addsub_immediate(state, true, AS_ADD, SP, SP, stack_movement); -} + emit_loadstore_immediate(state, LS_STRX, temp_register, SP, 8); + emit_loadstorepair_immediate(state, LSP_STPX, map_register(6), map_register(7), SP, 16); + emit_loadstorepair_immediate(state, LSP_STPX, map_register(8), map_register(9), SP, 32); -static void -emit_jit_epilogue(struct jit_state* state) -{ - state->exit_loc = state->offset; + emit_unconditionalbranch_immediate(state, UBR_BL, target_pc); - /* Move register 0 into R0 */ - if (map_register(0) != R0) { - emit_logical_register(state, true, LOG_ORR, R0, RZ, map_register(0)); - } + emit_loadstore_immediate(state, LS_LDRX, R30, SP, 0); + emit_loadstore_immediate(state, LS_LDRX, temp_register, SP, 8); + emit_loadstorepair_immediate(state, LSP_LDPX, map_register(6), map_register(7), SP, 16); + emit_loadstorepair_immediate(state, LSP_LDPX, map_register(8), map_register(9), SP, 32); - /* We could be anywhere in the stack if we excepted. Get our head right. */ - emit_addsub_immediate(state, true, AS_ADD, SP, R29, 0); + emit_addsub_immediate(state, true, AS_ADD, SP, SP, stack_movement); - /* Restore callee-saved registers). */ - size_t i; - for (i = 0; i < _countof(callee_saved_registers); i += 2) { - emit_loadstorepair_immediate( - state, LSP_LDPX, callee_saved_registers[i], callee_saved_registers[i + 1], SP, (i + 2) * 8); - } - emit_loadstorepair_immediate(state, LSP_LDPX, R29, R30, SP, 0); - emit_addsub_immediate(state, true, AS_ADD, SP, SP, state->stack_size); - emit_unconditionalbranch_register(state, BR_RET, R30); + emit_addsub_register(state, true, AS_ADD, map_register(10), map_register(10), temp_register); } static uint32_t -emit_dispatched_external_helper_address(struct jit_state *state, uint64_t dispatcher_addr) +emit_dispatched_external_helper_address(struct jit_state* state, uint64_t dispatcher_addr) { // We will assume that the buffer of memory holding the JIT'd code is 4-byte aligned. // And, because ARM is 32-bit instructions, we know that each instruction is 4-byte aligned. @@ -658,10 +674,11 @@ emit_dispatched_external_helper_address(struct jit_state *state, uint64_t dispat } static uint32_t -emit_helper_table(struct jit_state* state, struct ubpf_vm* vm) { +emit_helper_table(struct jit_state* state, struct ubpf_vm* vm) +{ uint32_t helper_table_address_target = state->offset; - for (int i = 0; iext_funcs[i], sizeof(uint64_t)); } return helper_table_address_target; @@ -938,7 +955,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) { int i; - emit_jit_prologue(state, UBPF_STACK_SIZE); + emit_jit_prologue(state, UBPF_EBPF_STACK_SIZE); for (i = 0; i < vm->num_insts; i++) { @@ -951,6 +968,12 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) struct ebpf_inst inst = ubpf_fetch_instruction(vm, i); state->pc_locs[i] = state->offset; + if (i == 0 || vm->int_funcs[i]) { + emit_movewide_immediate(state, true, temp_register, ubpf_stack_usage_for_local_func(vm, i)); + emit_addsub_immediate(state, true, AS_SUB, SP, SP, 16); + emit_loadstorepair_immediate(state, LSP_STPX, temp_register, temp_register, SP, 0); + } + enum Registers dst = map_register(inst.dst); enum Registers src = map_register(inst.src); uint8_t opcode = inst.opcode; @@ -1112,6 +1135,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) } break; case EBPF_OP_EXIT: + emit_addsub_immediate(state, true, AS_ADD, SP, SP, 16); emit_unconditionalbranch_register(state, BR_RET, R30); break; @@ -1177,43 +1201,42 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) if (state->jit_status != NoError) { switch (state->jit_status) { - case TooManyJumps: { - *errmsg = ubpf_error("Too many jump instructions."); - break; - } - case TooManyLoads: { - *errmsg = ubpf_error("Too many load instructions."); - break; - } - case TooManyLeas: { - *errmsg = ubpf_error("Too many LEA calculations."); - break; - } - case UnexpectedInstruction: { - // errmsg set at time the error was detected because the message requires - // information about the unexpected instruction. - break; - } - case UnknownInstruction: { - // errmsg set at time the error was detected because the message requires - // information about the unknown instruction. - break; - } - case NotEnoughSpace: { - *errmsg = ubpf_error("Target buffer too small"); - break; - } - case NoError: { - assert(false); - } + case TooManyJumps: { + *errmsg = ubpf_error("Too many jump instructions."); + break; + } + case TooManyLoads: { + *errmsg = ubpf_error("Too many load instructions."); + break; + } + case TooManyLeas: { + *errmsg = ubpf_error("Too many LEA calculations."); + break; + } + case UnexpectedInstruction: { + // errmsg set at time the error was detected because the message requires + // information about the unexpected instruction. + break; + } + case UnknownInstruction: { + // errmsg set at time the error was detected because the message requires + // information about the unknown instruction. + break; + } + case NotEnoughSpace: { + *errmsg = ubpf_error("Target buffer too small"); + break; + } + case NoError: { + assert(false); + } } return -1; } - emit_jit_epilogue(state); - state->dispatcher_loc = emit_dispatched_external_helper_address(state, (uint64_t)vm->dispatcher); + state->dispatcher_loc = emit_dispatched_external_helper_address(state, (uint64_t)vm->dispatcher); state->helper_table_loc = emit_helper_table(state, vm); return 0; @@ -1277,7 +1300,6 @@ resolve_adr(struct jit_state* state, uint32_t instr_offset, int32_t immediate) memcpy(state->buf + instr_offset, &instr, sizeof(uint32_t)); } - static bool resolve_jumps(struct jit_state* state) { @@ -1345,12 +1367,13 @@ resolve_leas(struct jit_state* state) return true; } - -bool ubpf_jit_update_dispatcher_arm64(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset) +bool +ubpf_jit_update_dispatcher_arm64( + struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset) { UNUSED_PARAMETER(vm); uint64_t jit_upper_bound = (uint64_t)buffer + size; - void *dispatcher_address = (void*)((uint64_t)buffer + offset); + void* dispatcher_address = (void*)((uint64_t)buffer + offset); if ((uint64_t)dispatcher_address + sizeof(void*) < jit_upper_bound) { memcpy(dispatcher_address, &new_dispatcher, sizeof(void*)); return true; @@ -1359,7 +1382,9 @@ bool ubpf_jit_update_dispatcher_arm64(struct ubpf_vm* vm, external_function_disp return false; } -bool ubpf_jit_update_helper_arm64(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset) +bool +ubpf_jit_update_helper_arm64( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset) { UNUSED_PARAMETER(vm); uint64_t jit_upper_bound = (uint64_t)buffer + size; @@ -1373,12 +1398,12 @@ bool ubpf_jit_update_helper_arm64(struct ubpf_vm* vm, ext_func new_helper, unsig } struct ubpf_jit_result -ubpf_translate_arm64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size) +ubpf_translate_arm64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode) { struct jit_state state; struct ubpf_jit_result compile_result; - if (initialize_jit_state_result(&state, &compile_result, buffer, *size, &compile_result.errmsg) < 0) { + if (initialize_jit_state_result(&state, &compile_result, buffer, *size, jit_mode, &compile_result.errmsg) < 0) { goto out; } diff --git a/vm/ubpf_jit_support.c b/vm/ubpf_jit_support.c index ba96b9be0..e4984067c 100644 --- a/vm/ubpf_jit_support.c +++ b/vm/ubpf_jit_support.c @@ -19,10 +19,18 @@ #include "ubpf_jit_support.h" #include +#include "ubpf.h" #include "ubpf_int.h" int -initialize_jit_state_result(struct jit_state *state, struct ubpf_jit_result *compile_result, uint8_t *buffer, uint32_t size, char **errmsg) { +initialize_jit_state_result( + struct jit_state* state, + struct ubpf_jit_result* compile_result, + uint8_t* buffer, + uint32_t size, + enum JitMode jit_mode, + char** errmsg) +{ compile_result->compile_result = UBPF_JIT_COMPILE_FAILURE; compile_result->errmsg = NULL; compile_result->external_dispatcher_offset = 0; @@ -38,6 +46,7 @@ initialize_jit_state_result(struct jit_state *state, struct ubpf_jit_result *com state->num_loads = 0; state->num_leas = 0; state->jit_status = NoError; + state->jit_mode = jit_mode; if (!state->pc_locs || !state->jumps || !state->loads || !state->leas) { *errmsg = ubpf_error("Could not allocate space needed to JIT compile eBPF program"); @@ -48,7 +57,7 @@ initialize_jit_state_result(struct jit_state *state, struct ubpf_jit_result *com } void -release_jit_state_result(struct jit_state *state, struct ubpf_jit_result *compile_result) +release_jit_state_result(struct jit_state* state, struct ubpf_jit_result* compile_result) { UNUSED_PARAMETER(compile_result); free(state->pc_locs); @@ -62,7 +71,8 @@ release_jit_state_result(struct jit_state *state, struct ubpf_jit_result *compil } void -emit_patchable_relative(uint32_t offset, uint32_t target_pc, uint32_t manual_target_offset, struct patchable_relative *table, size_t index) +emit_patchable_relative( + uint32_t offset, uint32_t target_pc, uint32_t manual_target_offset, struct patchable_relative* table, size_t index) { struct patchable_relative* jump = &table[index]; jump->offset_loc = offset; @@ -82,9 +92,8 @@ note_lea(struct jit_state* state, uint32_t offset) emit_patchable_relative(state->offset, offset, 0, state->leas, state->num_leas++); } - void -fixup_jump_target(struct patchable_relative *table, size_t table_size, uint32_t src_offset, uint32_t dest_offset) +fixup_jump_target(struct patchable_relative* table, size_t table_size, uint32_t src_offset, uint32_t dest_offset) { for (size_t index = 0; index < table_size; index++) { if (table[index].offset_loc == src_offset) { diff --git a/vm/ubpf_jit_support.h b/vm/ubpf_jit_support.h index 197389752..659bcc85a 100644 --- a/vm/ubpf_jit_support.h +++ b/vm/ubpf_jit_support.h @@ -28,7 +28,8 @@ #include #include "ubpf_int.h" -enum JitProgress { +enum JitProgress +{ NoError, TooManyJumps, TooManyLoads, @@ -51,10 +52,10 @@ struct patchable_relative /* Special values for target_pc in struct jump */ #define TARGET_PC_EXIT ~UINT32_C(0) -#define TARGET_PC_ENTER (~UINT32_C(0) & 0x01) -#define TARGET_PC_RETPOLINE (~UINT32_C(0) & 0x0101) +#define TARGET_PC_ENTER (~UINT32_C(0) & 0x01) +#define TARGET_PC_RETPOLINE (~UINT32_C(0) & 0x0101) #define TARGET_PC_EXTERNAL_DISPATCHER (~UINT32_C(0) & 0x010101) -#define TARGET_LOAD_HELPER_TABLE (~UINT32_C(0) & 0x01010101) +#define TARGET_LOAD_HELPER_TABLE (~UINT32_C(0) & 0x01010101) struct jit_state { @@ -85,6 +86,7 @@ struct jit_state */ uint32_t helper_table_loc; enum JitProgress jit_status; + enum JitMode jit_mode; struct patchable_relative* jumps; struct patchable_relative* loads; struct patchable_relative* leas; @@ -95,13 +97,20 @@ struct jit_state }; int -initialize_jit_state_result(struct jit_state *state, struct ubpf_jit_result *compile_result, uint8_t *buffer, uint32_t size, char **errmsg); +initialize_jit_state_result( + struct jit_state* state, + struct ubpf_jit_result* compile_result, + uint8_t* buffer, + uint32_t size, + enum JitMode jit_mode, + char** errmsg); void -release_jit_state_result(struct jit_state *state, struct ubpf_jit_result *compile_result); +release_jit_state_result(struct jit_state* state, struct ubpf_jit_result* compile_result); void -emit_patchable_relative(uint32_t offset, uint32_t target_pc, uint32_t manual_target_offset, struct patchable_relative *table, size_t index); +emit_patchable_relative( + uint32_t offset, uint32_t target_pc, uint32_t manual_target_offset, struct patchable_relative* table, size_t index); void note_load(struct jit_state* state, uint32_t target_pc); @@ -113,5 +122,5 @@ void emit_jump_target(struct jit_state* state, uint32_t jump_src); void -fixup_jump_target(struct patchable_relative *table, size_t table_size, uint32_t src_offset, uint32_t dest_offset); +fixup_jump_target(struct patchable_relative* table, size_t table_size, uint32_t src_offset, uint32_t dest_offset); #endif diff --git a/vm/ubpf_jit_x86_64.c b/vm/ubpf_jit_x86_64.c index 467d6daab..c5d348194 100644 --- a/vm/ubpf_jit_x86_64.c +++ b/vm/ubpf_jit_x86_64.c @@ -18,6 +18,7 @@ * limitations under the License. */ +#include "ubpf.h" #include "ubpf_jit_support.h" #define _GNU_SOURCE @@ -63,36 +64,43 @@ muldivmod(struct jit_state* state, uint8_t opcode, int src, int dst, int32_t imm #define RCX_ALT R10 #if defined(_WIN32) -static int platform_nonvolatile_registers[] = {RBP, RBX, RDI, RSI, R13, R14, R15}; +static int platform_nonvolatile_registers[] = {RBP, RBX, RDI, RSI, R12, R13, R14, R15}; // Callee-saved registers. +static int platform_volatile_registers[] = {RAX, RDX, RCX, R8, R9, R10, R11}; // Caller-saved registers (if needed). static int platform_parameter_registers[] = {RCX, RDX, R8, R9}; static int register_map[REGISTER_MAP_SIZE] = { + // Scratch registers RAX, R10, RDX, R8, R9, - R14, - R15, + R12, + // Non-volatile registers + RBX, RDI, RSI, - RBX, - RBP, + R14, + R15, // Until further notice, r15 must be mapped to eBPF register r10 }; #else -static int platform_nonvolatile_registers[] = {RBP, RBX, R13, R14, R15}; +static int platform_nonvolatile_registers[] = {RBP, RBX, R12, R13, R14, R15}; // Callee-saved registers. +static int platform_volatile_registers[] = { + RAX, RDI, RSI, RDX, RCX, R8, R9, R10, R11}; // Caller-saved registers (if needed). static int platform_parameter_registers[] = {RDI, RSI, RDX, RCX, R8, R9}; static int register_map[REGISTER_MAP_SIZE] = { + // Scratch registers RAX, RDI, RSI, RDX, R10, R8, + // Non-volatile registers RBX, + R12, R13, R14, - R15, - RBP, + R15, // Until further notice, r15 must be mapped to eBPF register r10 }; #endif @@ -104,17 +112,39 @@ map_register(int r) return register_map[r % _BPF_REG_MAX]; } +#if 0 static inline void -emit_local_call(struct jit_state* state, uint32_t target_pc) +emit_bpf_push(struct jit_state* state, int bpf_register) { - /* - * Pushing 4 * 8 = 32 bytes will maintain the invariant - * that the stack is 16-byte aligned. - */ + emit_store(state, S64, map_register(bpf_register), map_register(BPF_REG_10), -8); + emit_alu64_imm32(state, 0x81, 5, map_register(BPF_REG_10), 8); +} + +static inline void +emit_bpf_pop(struct jit_state* state, int bpf_register) +{ + emit_alu64_imm32(state, 0x81, 0, map_register(BPF_REG_10), 8); + emit_load(state, S64, map_register(BPF_REG_10), map_register(bpf_register), -8); +} +#endif + +static inline void +emit_local_call(struct ubpf_vm* vm, struct jit_state* state, uint32_t target_pc) +{ + UNUSED_PARAMETER(vm); + // Because the top of the stack holds the stack usage of the calling function, + // we adjust the base pointer down by that value! + // sub r15, [rsp] + emit1(state, 0x4c); + emit1(state, 0x2B); + emit1(state, 0x3C); // Mod: 00b Reg: 111b RM: 100b + emit1(state, 0x24); // Scale: 00b Index: 100b Base: 100b + emit_push(state, map_register(BPF_REG_6)); emit_push(state, map_register(BPF_REG_7)); emit_push(state, map_register(BPF_REG_8)); emit_push(state, map_register(BPF_REG_9)); + #if defined(_WIN32) /* Windows x64 ABI requires home register space */ /* Allocate home register space - 4 registers */ @@ -122,6 +152,7 @@ emit_local_call(struct jit_state* state, uint32_t target_pc) #endif emit1(state, 0xe8); // e8 is the opcode for a CALL emit_jump_address_reloc(state, target_pc); + #if defined(_WIN32) /* Deallocate home register space - 4 registers */ emit_alu64_imm32(state, 0x81, 0, RSP, 4 * sizeof(uint64_t)); @@ -130,12 +161,19 @@ emit_local_call(struct jit_state* state, uint32_t target_pc) emit_pop(state, map_register(BPF_REG_8)); emit_pop(state, map_register(BPF_REG_7)); emit_pop(state, map_register(BPF_REG_6)); + + // Because the top of the stack holds the stack usage of the calling function, + // we adjust the base pointer back up by that value! + // add r15, [rsp] + emit1(state, 0x4c); + emit1(state, 0x03); + emit1(state, 0x3C); // Mod: 00b Reg: 111b RM: 100b + emit1(state, 0x24); // Scale: 00b Index: 100b Base: 100b } static uint32_t emit_dispatched_external_helper_address(struct jit_state* state, struct ubpf_vm* vm) { - uint32_t external_helper_address_target = state->offset; emit8(state, (uint64_t)vm->dispatcher); return external_helper_address_target; @@ -237,6 +275,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) { int i; + (void)platform_volatile_registers; /* Save platform non-volatile registers */ for (i = 0; i < _countof(platform_nonvolatile_registers); i++) { emit_push(state, platform_nonvolatile_registers[i]); @@ -247,7 +286,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) emit_mov(state, platform_parameter_registers[0], map_register(BPF_REG_1)); } - /* Move the platform parameter register to the (volatile) register + /* Move the first platform parameter register to the (volatile) register * that holds the pointer to the context. */ emit_mov(state, platform_parameter_registers[0], VOLATILE_CTXT); @@ -256,7 +295,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) * Assuming that the stack is 16-byte aligned right before * the call insn that brought us to this code, when * we start executing the jit'd code, we need to regain a 16-byte - * alignment. The UBPF_STACK_SIZE is guaranteed to be + * alignment. The UBPF_EBPF_STACK_SIZE is guaranteed to be * divisible by 16. However, if we pushed an even number of * registers on the stack when we are saving state (see above), * then we have to add an additional 8 bytes to get back @@ -267,12 +306,23 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) } /* - * Set BPF R10 (the way to access the frame in eBPF) to match RSP. + * Let's set RBP to RSP so that we can restore RSP later! */ - emit_mov(state, RSP, map_register(BPF_REG_10)); - - /* Allocate stack space */ - emit_alu64_imm32(state, 0x81, 5, RSP, UBPF_STACK_SIZE); + emit_mov(state, RSP, RBP); + + /* Configure eBPF program stack space */ + if (state->jit_mode == BasicJitMode) { + /* + * Set BPF R10 (the way to access the frame in eBPF) the beginning + * of the eBPF program's stack space. + */ + emit_mov(state, RSP, map_register(BPF_REG_10)); + /* Allocate eBPF program stack space */ + emit_alu64_imm32(state, 0x81, 5, RSP, UBPF_EBPF_STACK_SIZE); + } else { + /* Use given eBPF program stack space */ + emit_mov(state, platform_parameter_registers[2], map_register(BPF_REG_10)); + } #if defined(_WIN32) /* Windows x64 ABI requires home register space */ @@ -308,10 +358,13 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) uint32_t target_pc = i + inst.offset + 1; if (i == 0 || vm->int_funcs[i]) { - /* When we are the subject of a call, we have to properly align our - * stack pointer. - */ + uint16_t stack_usage = ubpf_stack_usage_for_local_func(vm, i); emit_alu64_imm32(state, 0x81, 5, RSP, 8); + emit1(state, 0x48); + emit1(state, 0xC7); + emit1(state, 0x04); // Mod: 00b Reg: 000b RM: 100b + emit1(state, 0x24); // Scale: 00b Index: 100b Base: 100b + emit4(state, stack_usage); } switch (inst.opcode) { @@ -662,7 +715,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) } } else if (inst.src == 1) { target_pc = i + inst.imm + 1; - emit_local_call(state, target_pc); + emit_local_call(vm, state, target_pc); } break; case EBPF_OP_EXIT: @@ -768,8 +821,8 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) emit_mov(state, map_register(BPF_REG_0), RAX); } - /* Deallocate stack space by restoring RSP from BPF R10. */ - emit_mov(state, map_register(BPF_REG_10), RSP); + /* Deallocate stack space by restoring RSP from RBP. */ + emit_mov(state, RBP, RSP); if (!(_countof(platform_nonvolatile_registers) % 2)) { emit_alu64_imm32(state, 0x81, 0, RSP, 0x8); @@ -977,12 +1030,12 @@ resolve_patchable_relatives(struct jit_state* state) } struct ubpf_jit_result -ubpf_translate_x86_64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size) +ubpf_translate_x86_64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode) { struct jit_state state; struct ubpf_jit_result compile_result; - if (initialize_jit_state_result(&state, &compile_result, buffer, *size, &compile_result.errmsg) < 0) { + if (initialize_jit_state_result(&state, &compile_result, buffer, *size, jit_mode, &compile_result.errmsg) < 0) { goto out; } diff --git a/vm/ubpf_vm.c b/vm/ubpf_vm.c index e2d71ae00..57acc9bb5 100644 --- a/vm/ubpf_vm.c +++ b/vm/ubpf_vm.c @@ -45,7 +45,8 @@ bounds_check( uint16_t cur_pc, void* mem, size_t mem_len, - void* stack); + void* stack, + size_t stack_len); bool ubpf_toggle_bounds_check(struct ubpf_vm* vm, bool enable) @@ -65,7 +66,14 @@ ubpf_set_error_print(struct ubpf_vm* vm, int (*error_printf)(FILE* stream, const } static uint64_t -ubpf_default_external_dispatcher(uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, unsigned int index, external_function_t *external_fns) +ubpf_default_external_dispatcher( + uint64_t arg1, + uint64_t arg2, + uint64_t arg3, + uint64_t arg4, + uint64_t arg5, + unsigned int index, + external_function_t* external_fns) { return external_fns[index](arg1, arg2, arg3, arg4, arg5); } @@ -90,6 +98,12 @@ ubpf_create(void) return NULL; } + vm->local_func_stack_usage = calloc(UBPF_MAX_INSTS, sizeof(struct ubpf_stack_usage)); + if (vm->local_func_stack_usage == NULL) { + ubpf_destroy(vm); + return NULL; + } + vm->bounds_check_enabled = true; vm->error_printf = fprintf; @@ -118,6 +132,7 @@ ubpf_destroy(struct ubpf_vm* vm) free(vm->int_funcs); free(vm->ext_funcs); free(vm->ext_func_names); + free(vm->local_func_stack_usage); free(vm); } @@ -127,7 +142,6 @@ as_external_function_t(void* f) return (external_function_t)f; }; - int ubpf_register(struct ubpf_vm* vm, unsigned int idx, const char* name, external_function_t fn) { @@ -146,7 +160,8 @@ ubpf_register(struct ubpf_vm* vm, unsigned int idx, const char* name, external_f } // Now, update! - if (!vm->jit_update_helper(vm, fn, idx, (uint8_t*)vm->jitted, vm->jitted_size, vm->jitted_result.external_helper_offset)) { + if (!vm->jit_update_helper( + vm, fn, idx, (uint8_t*)vm->jitted, vm->jitted_size, vm->jitted_result.external_helper_offset)) { // Can't immediately stop here because we have unprotected memory! success = -1; } @@ -173,7 +188,8 @@ ubpf_register_external_dispatcher( } // Now, update! - if (!vm->jit_update_dispatcher(vm, dispatcher, (uint8_t*)vm->jitted, vm->jitted_size, vm->jitted_result.external_dispatcher_offset)) { + if (!vm->jit_update_dispatcher( + vm, dispatcher, (uint8_t*)vm->jitted, vm->jitted_size, vm->jitted_result.external_dispatcher_offset)) { // Can't immediately stop here because we have unprotected memory! success = -1; } @@ -215,8 +231,8 @@ ubpf_load(struct ubpf_vm* vm, const void* code, uint32_t code_len, char** errmsg const struct ebpf_inst* source_inst = code; *errmsg = NULL; - if (UBPF_STACK_SIZE % sizeof(uint64_t) != 0) { - *errmsg = ubpf_error("UBPF_STACK_SIZE must be a multiple of 8"); + if (UBPF_EBPF_STACK_SIZE % sizeof(uint64_t) != 0) { + *errmsg = ubpf_error("UBPF_EBPF_STACK_SIZE must be a multiple of 8"); return -1; } @@ -299,12 +315,13 @@ i32(uint64_t x) * @param[in] immediate The signed 32-bit immediate value to sign extend. * @return The sign extended 64-bit value. */ -static int64_t i64(int32_t immediate) { +static int64_t +i64(int32_t immediate) +{ return (int64_t)immediate; - } -#define IS_ALIGNED(x, a) (((uintptr_t)(x) & ((a)-1)) == 0) +#define IS_ALIGNED(x, a) (((uintptr_t)(x) & ((a) - 1)) == 0) inline static uint64_t ubpf_mem_load(uint64_t address, size_t size) @@ -357,44 +374,30 @@ ubpf_mem_store(uint64_t address, uint64_t value, size_t size) } int -ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_return_value) +ubpf_exec_ex( + const struct ubpf_vm* vm, + void* mem, + size_t mem_len, + uint64_t* bpf_return_value, + uint8_t* stack_start, + size_t stack_length) { uint16_t pc = 0; const struct ebpf_inst* insts = vm->insts; uint64_t* reg; uint64_t _reg[16]; - uint64_t ras_index = 0; + uint64_t stack_frame_index = 0; int return_value = -1; - void *external_dispatcher_cookie = mem; - -// Windows Kernel mode limits stack usage to 12K, so we need to allocate it dynamically. -#if defined(NTDDI_VERSION) && defined(WINNT) - uint64_t* stack = NULL; - struct ubpf_stack_frame* stack_frames = NULL; + void* external_dispatcher_cookie = mem; - stack = calloc(UBPF_STACK_SIZE, 1); - if (!stack) { - return_value = -1; - goto cleanup; - } - - stack_frames = calloc(UBPF_MAX_CALL_DEPTH, sizeof(struct ubpf_stack_frame)); - if (!stack_frames) { - return_value = -1; - goto cleanup; + if (!insts) { + /* Code must be loaded before we can execute */ + return -1; } -#else - uint64_t stack[UBPF_STACK_SIZE / sizeof(uint64_t)]; struct ubpf_stack_frame stack_frames[UBPF_MAX_CALL_DEPTH] = { 0, }; -#endif - - if (!insts) { - /* Code must be loaded before we can execute */ - return -1; - } #ifdef DEBUG if (vm->regs) @@ -407,7 +410,7 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret reg[1] = (uintptr_t)mem; reg[2] = (uint64_t)mem_len; - reg[10] = (uintptr_t)stack + UBPF_STACK_SIZE; + reg[10] = (uintptr_t)stack_start + stack_length; int instruction_limit = vm->instruction_limit; @@ -421,6 +424,11 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret return_value = -1; goto cleanup; } + + if (pc == 0 || vm->int_funcs[pc]) { + stack_frames[stack_frame_index].stack_usage = ubpf_stack_usage_for_local_func(vm, pc); + } + struct ebpf_inst inst = ubpf_fetch_instruction(vm, pc++); switch (inst.opcode) { @@ -622,19 +630,37 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret * * Needed since we don't have a verifier yet. */ -#define BOUNDS_CHECK_LOAD(size) \ - do { \ - if (!bounds_check(vm, (char*)reg[inst.src] + inst.offset, size, "load", cur_pc, mem, mem_len, stack)) { \ - return_value = -1; \ - goto cleanup; \ - } \ +#define BOUNDS_CHECK_LOAD(size) \ + do { \ + if (!bounds_check( \ + vm, \ + (char*)reg[inst.src] + inst.offset, \ + size, \ + "load", \ + cur_pc, \ + mem, \ + mem_len, \ + stack_start, \ + stack_length)) { \ + return_value = -1; \ + goto cleanup; \ + } \ } while (0) -#define BOUNDS_CHECK_STORE(size) \ - do { \ - if (!bounds_check(vm, (char*)reg[inst.dst] + inst.offset, size, "store", cur_pc, mem, mem_len, stack)) { \ - return_value = -1; \ - goto cleanup; \ - } \ +#define BOUNDS_CHECK_STORE(size) \ + do { \ + if (!bounds_check( \ + vm, \ + (char*)reg[inst.dst] + inst.offset, \ + size, \ + "store", \ + cur_pc, \ + mem, \ + mem_len, \ + stack_start, \ + stack_length)) { \ + return_value = -1; \ + goto cleanup; \ + } \ } while (0) case EBPF_OP_LDXW: @@ -916,13 +942,14 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret } break; case EBPF_OP_EXIT: - if (ras_index > 0) { - ras_index--; - pc = stack_frames[ras_index].return_address; - reg[BPF_REG_6] = stack_frames[ras_index].saved_registers[0]; - reg[BPF_REG_7] = stack_frames[ras_index].saved_registers[1]; - reg[BPF_REG_8] = stack_frames[ras_index].saved_registers[2]; - reg[BPF_REG_9] = stack_frames[ras_index].saved_registers[3]; + if (stack_frame_index > 0) { + stack_frame_index--; + pc = stack_frames[stack_frame_index].return_address; + reg[BPF_REG_6] = stack_frames[stack_frame_index].saved_registers[0]; + reg[BPF_REG_7] = stack_frames[stack_frame_index].saved_registers[1]; + reg[BPF_REG_8] = stack_frames[stack_frame_index].saved_registers[2]; + reg[BPF_REG_9] = stack_frames[stack_frame_index].saved_registers[3]; + reg[BPF_REG_10] += stack_frames[stack_frame_index].stack_usage; break; } *bpf_return_value = reg[0]; @@ -934,9 +961,11 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret if (inst.src == 0) { // Handle call by address to external function. if (vm->dispatcher != NULL) { - reg[0] = vm->dispatcher(reg[1], reg[2], reg[3], reg[4], reg[5], inst.imm, external_dispatcher_cookie); + reg[0] = + vm->dispatcher(reg[1], reg[2], reg[3], reg[4], reg[5], inst.imm, external_dispatcher_cookie); } else { - reg[0] = ubpf_default_external_dispatcher(reg[1], reg[2], reg[3], reg[4], reg[5], inst.imm, vm->ext_funcs); + reg[0] = ubpf_default_external_dispatcher( + reg[1], reg[2], reg[3], reg[4], reg[5], inst.imm, vm->ext_funcs); } if (inst.imm == vm->unwind_stack_extension_index && reg[0] == 0) { *bpf_return_value = reg[0]; @@ -944,22 +973,25 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret goto cleanup; } } else if (inst.src == 1) { - if (ras_index >= UBPF_MAX_CALL_DEPTH) { + if (stack_frame_index >= UBPF_MAX_CALL_DEPTH) { vm->error_printf( stderr, "uBPF error: number of nested functions calls (%lu) exceeds max (%lu) at PC %u\n", - ras_index + 1, + stack_frame_index + 1, UBPF_MAX_CALL_DEPTH, cur_pc); return_value = -1; goto cleanup; } - stack_frames[ras_index].saved_registers[0] = reg[BPF_REG_6]; - stack_frames[ras_index].saved_registers[1] = reg[BPF_REG_7]; - stack_frames[ras_index].saved_registers[2] = reg[BPF_REG_8]; - stack_frames[ras_index].saved_registers[3] = reg[BPF_REG_9]; - stack_frames[ras_index].return_address = pc; - ras_index++; + stack_frames[stack_frame_index].saved_registers[0] = reg[BPF_REG_6]; + stack_frames[stack_frame_index].saved_registers[1] = reg[BPF_REG_7]; + stack_frames[stack_frame_index].saved_registers[2] = reg[BPF_REG_8]; + stack_frames[stack_frame_index].saved_registers[3] = reg[BPF_REG_9]; + stack_frames[stack_frame_index].return_address = pc; + + reg[BPF_REG_10] -= stack_frames[stack_frame_index].stack_usage; + + stack_frame_index++; pc += inst.imm; break; } else if (inst.src == 2) { @@ -976,11 +1008,32 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret cleanup: #if defined(NTDDI_VERSION) && defined(WINNT) free(stack_frames); - free(stack); #endif return return_value; } +int +ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_return_value) +{ +// Windows Kernel mode limits stack usage to 12K, so we need to allocate it dynamically. +#if defined(NTDDI_VERSION) && defined(WINNT) + uint64_t* stack = NULL; + struct ubpf_stack_frame* stack_frames = NULL; + + stack = calloc(UBPF_EBPF_STACK_SIZE, 1); + if (!stack) { + return -1; + } +#else + uint64_t stack[UBPF_EBPF_STACK_SIZE / sizeof(uint64_t)]; +#endif + int result = ubpf_exec_ex(vm, mem, mem_len, bpf_return_value, (uint8_t*)stack, UBPF_EBPF_STACK_SIZE); +#if defined(NTDDI_VERSION) && defined(WINNT) + free(stack); +#endif + return result; +} + static bool validate(const struct ubpf_vm* vm, const struct ebpf_inst* insts, uint32_t num_insts, char** errmsg) { @@ -989,6 +1042,10 @@ validate(const struct ubpf_vm* vm, const struct ebpf_inst* insts, uint32_t num_i return false; } + if (!ubpf_calculate_stack_usage_for_local_func(vm, 0, errmsg)) { + return false; + } + int i; for (i = 0; i < num_insts; i++) { struct ebpf_inst inst = insts[i]; @@ -1047,8 +1104,11 @@ validate(const struct ubpf_vm* vm, const struct ebpf_inst* insts, uint32_t num_i case EBPF_OP_MOD64_REG: case EBPF_OP_XOR64_IMM: case EBPF_OP_XOR64_REG: + break; case EBPF_OP_MOV64_IMM: case EBPF_OP_MOV64_REG: + store = true; + break; case EBPF_OP_ARSH64_IMM: case EBPF_OP_ARSH64_REG: break; @@ -1159,6 +1219,9 @@ validate(const struct ubpf_vm* vm, const struct ebpf_inst* insts, uint32_t num_i ubpf_error("call to local function (at PC %d) is out of bounds (target: %d)", i, call_target); return false; } + if (!ubpf_calculate_stack_usage_for_local_func(vm, call_target, errmsg)) { + return false; + } } else if (inst.src == 2) { *errmsg = ubpf_error("call to external function by BTF ID (at PC %d) is not supported", i); return false; @@ -1205,15 +1268,16 @@ bounds_check( uint16_t cur_pc, void* mem, size_t mem_len, - void* stack) + void* stack, + size_t stack_len) { if (!vm->bounds_check_enabled) return true; - uintptr_t access_start= (uintptr_t)addr; + uintptr_t access_start = (uintptr_t)addr; uintptr_t access_end = access_start + size; uintptr_t stack_start = (uintptr_t)stack; - uintptr_t stack_end = stack_start + UBPF_STACK_SIZE; + uintptr_t stack_end = stack_start + stack_len; uintptr_t mem_start = (uintptr_t)mem; uintptr_t mem_end = mem_start + mem_len; @@ -1223,12 +1287,7 @@ bounds_check( if (access_start > access_end) { vm->error_printf( - stderr, - "uBPF error: invalid memory access %s at PC %u, addr %p, size %d\n", - type, - cur_pc, - addr, - size); + stderr, "uBPF error: invalid memory access %s at PC %u, addr %p, size %d\n", type, cur_pc, addr, size); return false; } @@ -1249,7 +1308,8 @@ bounds_check( // The address may be invalid or it may be a region of memory that the caller // is aware of but that is not part of the stack or memory. // Call any registered bounds check function to determine if the access is valid. - if (vm->bounds_check_function != NULL && vm->bounds_check_function(vm->bounds_check_user_data, access_start, size)) { + if (vm->bounds_check_function != NULL && + vm->bounds_check_function(vm->bounds_check_user_data, access_start, size)) { return true; } @@ -1266,7 +1326,7 @@ bounds_check( mem, mem_len, stack, - UBPF_STACK_SIZE); + UBPF_EBPF_STACK_SIZE); return false; } @@ -1388,3 +1448,45 @@ ubpf_set_instruction_limit(struct ubpf_vm* vm, uint32_t limit, uint32_t* previou vm->instruction_limit = limit; return 0; } + +bool +ubpf_calculate_stack_usage_for_local_func(const struct ubpf_vm* vm, uint16_t pc, char** errmsg) +{ + // If there is a stack usage calculator and we have not invoked it before for the target, + // then now is the time to call it! + if (vm->stack_usage_calculator && !vm->local_func_stack_usage[pc].stack_usage_calculated) { + uint16_t stack_usage = (vm->stack_usage_calculator)(vm, pc, vm->stack_usage_calculator_cookie); + vm->local_func_stack_usage[pc].stack_usage = stack_usage; + } + vm->local_func_stack_usage[pc].stack_usage_calculated = true; + // Now that we are guaranteed to have a value for the amount of the stack used by the function + // starting at call_target, let's make sure that it is 16-byte aligned. Note: The amount of stack + // used might be 0 (in the case where there is no registered stack usage calculator callback). That + // is okay because ubpf_stack_usage_for_local_func will give us a meaningful default. + if (vm->local_func_stack_usage[pc].stack_usage % 16) { + *errmsg = ubpf_error( + "local function (at PC %d) has improperly sized stack use (%d)", + pc, + vm->local_func_stack_usage[pc].stack_usage); + return false; + } + return true; +} + +uint16_t +ubpf_stack_usage_for_local_func(const struct ubpf_vm* vm, uint16_t pc) +{ + uint16_t stack_usage = 0; + if (vm->local_func_stack_usage[pc].stack_usage_calculated) { + stack_usage = vm->local_func_stack_usage[pc].stack_usage; + } + return stack_usage; +} + +int +ubpf_register_stack_usage_calculator(struct ubpf_vm* vm, stack_usage_calculator_t calculator, void* cookie) +{ + vm->stack_usage_calculator_cookie = cookie; + vm->stack_usage_calculator = calculator; + return 0; +} \ No newline at end of file