From c5cf8862d3a0476d7eff868bf1bd430ffaa45c4a Mon Sep 17 00:00:00 2001 From: Vesa Karvonen Date: Fri, 27 Sep 2024 16:33:45 +0300 Subject: [PATCH] Port to 4.12 and tweak the implementation for size --- .ocamlformat | 2 +- backoff.opam | 2 +- dune-project | 2 +- src/backoff.ml | 24 ++++++++++++++++++------ 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/.ocamlformat b/.ocamlformat index 29a9cf7..0b457a3 100644 --- a/.ocamlformat +++ b/.ocamlformat @@ -1,2 +1,2 @@ profile = default -version = 0.26.0 +version = 0.26.2 diff --git a/backoff.opam b/backoff.opam index 24d62af..e76fb58 100644 --- a/backoff.opam +++ b/backoff.opam @@ -8,7 +8,7 @@ homepage: "https://github.com/ocaml-multicore/backoff" bug-reports: "https://github.com/ocaml-multicore/backoff/issues" depends: [ "dune" {>= "3.3"} - "ocaml" {>= "4.13"} + "ocaml" {>= "4.08"} "alcotest" {>= "1.7.0" & with-test} "domain_shims" {>= "0.1.0" & with-test} "odoc" {with-doc} diff --git a/dune-project b/dune-project index 51f48cf..6bce681 100644 --- a/dune-project +++ b/dune-project @@ -10,6 +10,6 @@ (name backoff) (synopsis "Exponential backoff mechanism for OCaml") (depends - (ocaml (>= 4.13)) + (ocaml (>= 4.08)) (alcotest (and (>= 1.7.0) :with-test)) (domain_shims (and (>= 0.1.0) :with-test)))) diff --git a/src/backoff.ml b/src/backoff.ml index cd8d06f..f0874fc 100644 --- a/src/backoff.ml +++ b/src/backoff.ml @@ -40,15 +40,27 @@ let reset backoff = let lower_wait_log = get_lower_wait_log backoff in backoff land lnot mask lor lower_wait_log -let once backoff = +(* We don't want [once] to be inlined. This may avoid code bloat. *) +let[@inline never] once backoff = + (* We call [Random.bits] first. In this case this helps to reduce register + pressure so that fewer words will be allocated from the stack. *) + let t = Random.bits () in let wait_log = get_wait_log backoff in let wait_mask = (1 lsl wait_log) - 1 in - let t = Random.bits () land wait_mask land single_mask in - for _ = 0 to t do - Domain.cpu_relax () + (* We use a ref and countdown while-loop (uses one variable) instead of a + for-loop (uses two variables) to reduce register pressure. Local ref does + not allocate with native compiler. *) + let t = ref (t land wait_mask land single_mask) in + while 0 <= !t do + Domain.cpu_relax (); + t := !t - 1 done; let upper_wait_log = get_upper_wait_log backoff in - let next_wait_log = Int.min upper_wait_log (wait_log + 1) in - backoff lxor wait_log lor next_wait_log + (* We recompute [wait_log] to reduce register pressure: *) + let wait_log = get_wait_log backoff in + (* [Bool.to_int] generates branchless code, this reduces branch predictor + pressure and generates shorter code. *) + let next_wait_log = wait_log + Bool.to_int (wait_log < upper_wait_log) in + backoff - wait_log + next_wait_log let default = create ()