Benchmarks for the Agda CEK machine (PLT-9187) (#5725)

* Add benchmarks for Agda CEK machine * Update test results (hash size changed) * Update test results again * Tidying up * Tidying up * More tidying up * Rename file for consistency * Remove blank line
IntersectMBO · Jan 17, 2024 · 8f55aa1 · 8f55aa1 · github-actions · Jan 17, 2024
1 parent 907c537
commit 8f55aa1
Show file tree

Hide file tree

Showing 15 changed files with 211 additions and 90 deletions.
diff --git a/plutus-benchmark/common/PlutusBenchmark/Common.hs b/plutus-benchmark/common/PlutusBenchmark/Common.hs
@@ -4,6 +4,7 @@
 {- | Miscellaneous shared code for benchmarking-related things. -}
 module PlutusBenchmark.Common
     ( module Export
+    , Program
     , Term
     , getConfig
     , toAnonDeBruijnTerm
@@ -15,6 +16,8 @@ module PlutusBenchmark.Common
     , unsafeRunTermCek
     , runTermCek
     , cekResultMatchesHaskellValue
+    , benchTermAgdaCek
+    , benchProgramAgdaCek
     , TestSize (..)
     , printHeader
     , printSizeStatistics
@@ -35,6 +38,8 @@ import PlutusTx qualified as Tx
 import UntypedPlutusCore qualified as UPLC
 import UntypedPlutusCore.Evaluation.Machine.Cek as Cek
 
+import MAlonzo.Code.Main (runUAgda)
+
 import Criterion.Main
 import Criterion.Types (Config (..))
 import Data.ByteString qualified as BS
@@ -101,15 +106,15 @@ haskellValueToTerm
 haskellValueToTerm = compiledCodeToTerm . Tx.liftCodeDef
 
 
-{- | Convert a de-Bruijn-named UPLC term to a Benchmark -}
+{- | Convert a de-Bruijn-named UPLC term to a CEK Benchmark -}
 benchTermCek :: Term -> Benchmarkable
 benchTermCek term =
-    nf (unsafeRunTermCek) $! term -- Or whnf?
+    nf unsafeRunTermCek $! term -- Or whnf?
 
-{- | Convert a de-Bruijn-named UPLC term to a Benchmark -}
+{- | Convert a de-Bruijn-named UPLC term to a CEK Benchmark -}
 benchProgramCek :: Program -> Benchmarkable
 benchProgramCek (UPLC.Program _ _ term) =
-    nf (unsafeRunTermCek) $! term -- Or whnf?
+    nf unsafeRunTermCek $! term -- Or whnf?
 
 {- | Just run a term to obtain an `EvaluationResult` (used for tests etc.) -}
 unsafeRunTermCek :: Term -> EvaluationResult Term
@@ -151,6 +156,21 @@ cekResultMatchesHaskellValue
 cekResultMatchesHaskellValue term matches value =
     (unsafeRunTermCek term) `matches` (unsafeRunTermCek $ haskellValueToTerm value)
 
+
+---------------- Run a term or program using the plutus-metatheory CEK evaluator ----------------
+
+benchTermAgdaCek :: Term -> Benchmarkable
+benchTermAgdaCek term =
+    nf unsafeRunAgdaCek $! term
+
+benchProgramAgdaCek :: Program -> Benchmarkable
+benchProgramAgdaCek (UPLC.Program _ _ term) =
+    nf unsafeRunAgdaCek $! term
+
+unsafeRunAgdaCek :: Term -> EvaluationResult Term
+unsafeRunAgdaCek =
+    either (error . \e -> "Agda evaluation error: " ++ show e) EvaluationSuccess . runUAgda
+
 ---------------- Printing tables of information about costs ----------------
 
 data TestSize =

diff --git a/plutus-benchmark/marlowe/bench/BenchAgdaCek.hs b/plutus-benchmark/marlowe/bench/BenchAgdaCek.hs
@@ -0,0 +1,9 @@
+{- | Benchmarks for the Agda CEK machine based on some Marlowe examples. -}
+
+module Main where
+
+import PlutusBenchmark.Common (benchProgramAgdaCek)
+import Shared (runBenchmarks)
+
+main :: IO ()
+main = runBenchmarks benchProgramAgdaCek
diff --git a/plutus-benchmark/marlowe/bench/BenchCek.hs b/plutus-benchmark/marlowe/bench/BenchCek.hs
@@ -0,0 +1,9 @@
+{- | Benchmarks for the CEK machine based on some Marlowe examples. -}
+
+module Main where
+
+import PlutusBenchmark.Common (benchProgramCek)
+import Shared (runBenchmarks)
+
+main :: IO ()
+main = runBenchmarks benchProgramCek
diff --git a/plutus-benchmark/marlowe/bench/Bench.hs → plutus-benchmark/marlowe/bench/Shared.hs b/plutus-benchmark/marlowe/bench/Bench.hs → plutus-benchmark/marlowe/bench/Shared.hs
@@ -1,11 +1,11 @@
-{-# LANGUAGE RecordWildCards #-}
-
 {- | Plutus benchmarks based on some Marlowe examples. -}
 
-module Main where
+{-# LANGUAGE RecordWildCards #-}
+
+module Shared where
 import Criterion.Main (Benchmark, Benchmarkable, bench, bgroup, defaultMainWith)
 
-import PlutusBenchmark.Common (benchProgramCek, getConfig)
+import PlutusBenchmark.Common (Program, getConfig)
 import PlutusBenchmark.Marlowe.BenchUtil (benchmarkToUPLC, rolePayoutBenchmarks,
                                           semanticsBenchmarks)
 import PlutusBenchmark.Marlowe.Scripts.RolePayout (rolePayoutValidator)
@@ -14,14 +14,18 @@ import PlutusBenchmark.Marlowe.Types qualified as M
 import PlutusLedgerApi.V2 (scriptContextTxInfo, txInfoId)
 import PlutusTx.Code (CompiledCode)
 
-mkBenchmarkable :: CompiledCode a -> M.Benchmark -> (String, Benchmarkable)
-mkBenchmarkable validator bm@M.Benchmark{..} =
+mkBenchmarkable
+    :: (Program -> Benchmarkable)
+    -> CompiledCode a
+    -> M.Benchmark
+    -> (String, Benchmarkable)
+mkBenchmarkable benchmarker validator bm@M.Benchmark{..} =
   let benchName = show $ txInfoId $ scriptContextTxInfo bScriptContext
   in
-    (benchName, benchProgramCek $ benchmarkToUPLC validator bm )
+    (benchName, benchmarker $ benchmarkToUPLC validator bm )
 
-main :: IO ()
-main = do
+runBenchmarks :: (Program -> Benchmarkable) -> IO ()
+runBenchmarks benchmarker = do
 
   -- Read the semantics benchmark files.
   semanticsMBench <- either error id <$> semanticsBenchmarks
@@ -34,10 +38,10 @@ main = do
     uncurriedBench = uncurry bench
     semanticsBench :: [Benchmark] -- list of criterion semantics Benchmarks
     semanticsBench =
-      fmap (uncurriedBench . mkBenchmarkable marloweValidator) semanticsMBench
+      fmap (uncurriedBench . mkBenchmarkable benchmarker marloweValidator) semanticsMBench
     rolePayoutBench :: [Benchmark] -- list of criterion role payout Benchmarks
     rolePayoutBench =
-      fmap (uncurriedBench . mkBenchmarkable rolePayoutValidator) rolePayoutMBench
+      fmap (uncurriedBench . mkBenchmarkable benchmarker rolePayoutValidator) rolePayoutMBench
 
   -- Run each benchmark for 5 secs by default. This benchmark runs on the longitudinal
   -- benchmarking flow so we don't want to set it higher by default. One can change this with -L or

diff --git a/plutus-benchmark/nofib/bench/BenchAgdaCek.hs b/plutus-benchmark/nofib/bench/BenchAgdaCek.hs
@@ -0,0 +1,8 @@
+{- | Plutus benchmarks for the Agda CEK machine based on some nofib examples. -}
+module Main where
+
+import PlutusBenchmark.Common (benchTermAgdaCek)
+import Shared (benchWith)
+
+main :: IO ()
+main = benchWith benchTermAgdaCek
diff --git a/plutus-benchmark/nofib/bench/BenchCek.hs b/plutus-benchmark/nofib/bench/BenchCek.hs
@@ -0,0 +1,8 @@
+{- | Plutus benchmarks for the CEK machine based on some nofib examples. -}
+module Main where
+
+import PlutusBenchmark.Common (benchTermCek)
+import Shared (benchWith)
+
+main :: IO ()
+main = benchWith benchTermCek
diff --git a/plutus-benchmark/nofib/bench/BenchHaskell.hs b/plutus-benchmark/nofib/bench/BenchHaskell.hs
@@ -1,6 +1,4 @@
-{- | Benchmarking for the Plutus versions of the Plutus nofib benchmarks. -}
-{-# OPTIONS_GHC -fwarn-unused-imports #-}
-
+{- | Benchmarking for the Haskell versions of the Plutus nofib benchmarks. -}
 module Main (main) where
 
 import Shared (mkBenchMarks)

diff --git a/plutus-benchmark/nofib/bench/BenchPlc.hs b/plutus-benchmark/nofib/bench/BenchPlc.hs
diff --git a/plutus-benchmark/nofib/bench/Shared.hs b/plutus-benchmark/nofib/bench/Shared.hs
@@ -1,13 +1,17 @@
 {- | Shared code for benchmarking Plutus and Haskell versions of the Plutus nofib examples -}
-module Shared (mkBenchMarks, BenchmarkRunners)
+module Shared (benchWith, mkBenchMarks)
 where
 
-import Criterion.Main
+import PlutusBenchmark.Common (Term, getConfig)
 
 import PlutusBenchmark.NoFib.Clausify qualified as Clausify
+import PlutusBenchmark.NoFib.Knights qualified as Knights
 import PlutusBenchmark.NoFib.Prime qualified as Prime
 import PlutusBenchmark.NoFib.Queens qualified as Queens
 
+import Criterion.Main
+
+
 {- | Package together functions to create benchmarks for each program given suitable inputs. -}
 type BenchmarkRunners =
     ( Clausify.StaticFormula -> Benchmarkable
@@ -55,3 +59,49 @@ mkBenchMarks (benchClausify, benchKnights, benchPrime, benchQueens) = [
                     ]
        ]
 
+
+---------------- Create a benchmark with given inputs ----------------
+
+benchClausifyWith :: (Term -> Benchmarkable) -> Clausify.StaticFormula -> Benchmarkable
+benchClausifyWith benchmarker f = benchmarker $ Clausify.mkClausifyTerm f
+
+benchPrimeWith :: (Term -> Benchmarkable) -> Prime.PrimeID -> Benchmarkable
+benchPrimeWith benchmarker pid = benchmarker $ Prime.mkPrimalityBenchTerm pid
+
+benchQueensWith :: (Term -> Benchmarkable) -> Integer -> Queens.Algorithm -> Benchmarkable
+benchQueensWith benchmarker sz alg = benchmarker $ Queens.mkQueensTerm sz alg
+
+benchKnightsWith :: (Term -> Benchmarkable) -> Integer -> Integer -> Benchmarkable
+benchKnightsWith benchmarker depth sz = benchmarker $ Knights.mkKnightsTerm depth sz
+
+{- This runs all of the benchmarks, which will take a long time.
+   To run an individual benmark, try, for example,
+
+     cabal bench plutus-benchmark:nofib --benchmark-options "primetest/40digits".
+
+   Better results will be obtained with more repetitions of the benchmark.  Set
+   the minimum time for the benchmarking process (in seconds) with the -L
+   option. For example,
+
+     stack bench plutus-benchmark:nofib --ba "primetest/40digits -L300"
+
+   You can list the avaiable benchmarks with
+
+     stack bench plutus-benchmark:nofib --ba --list
+
+   or
+
+     cabal bench plutus-benchmark:nofib --benchmark-options --list
+
+-}
+
+
+-- Given a function (involving some evaluator) which constructs a Benchmarkable
+-- from a Term, use it to construct and run all of the benchmarks
+benchWith :: (Term -> Benchmarkable) -> IO ()
+benchWith benchmarker = do
+  let runners = ( benchClausifyWith benchmarker, benchKnightsWith benchmarker
+                , benchPrimeWith benchmarker, benchQueensWith benchmarker)
+  -- Run each benchmark for at least one minute.  Change this with -L or --timeout.
+  config <- getConfig 60.0
+  defaultMainWith config $ mkBenchMarks runners