This is based on a video by Molly Rocket where he shows how clean code can lead to poor performance. This is more or less a port of his demonstation regarding 'hiding internal implementation details' to rust.
The branching code is about 3.4x slower than the non-branching code.
match shape.shape_type {
Square => 1.0 * shape.width * shape.height,
Rectangle => 1.0 * shape.width * shape.height,
Triangle => 0.5 * shape.width * shape.height,
Circle => PI * shape.width * shape.height,
let coefficient = match shape.shape_type {
Square => 1.0,
Rectangle => 1.0,
Triangle => 0.5,
Circle => PI,
coefficient * shape.width * shape.height
$ cargo bench
time: [608.63 µs 616.95 µs 625.30 µs]
time: [146.78 µs 149.05 µs 151.31 µs]
$ cargo asm shape::get_area_branching
movzx eax, byte, ptr, [rdi, +, 8]
lea rcx, [rip, +, .LJTI0_0]
movsxd rax, dword, ptr, [rcx, +, 4*rax]
add rax, rcx
jmp rax
movss xmm0, dword, ptr, [rdi]
mulss xmm0, dword, ptr, [rdi, +, 4]
movss xmm0, dword, ptr, [rdi]
mulss xmm0, dword, ptr, [rip, +, .LCPI0_1]
mulss xmm0, dword, ptr, [rdi, +, 4]
movss xmm0, dword, ptr, [rdi]
mulss xmm0, dword, ptr, [rip, +, .LCPI0_0]
mulss xmm0, dword, ptr, [rdi, +, 4]
$ cargo asm shape::get_area_non_branching
movzx eax, byte, ptr, [rdi, +, 8]
lea rcx, [rip, +, .Lswitch.table._ZN5shape22get_area_non_branching17h104da2585c6dbc8eE]
movss xmm0, dword, ptr, [rcx, +, 4*rax]
mulss xmm0, dword, ptr, [rdi]
mulss xmm0, dword, ptr, [rdi, +, 4]