Skip to content

Commit

Permalink
Benchmarks for the Agda CEK machine (PLT-9187) (#5725)
Browse files Browse the repository at this point in the history
* Add benchmarks for Agda CEK machine

* Update test results (hash size changed)

* Update test results again

* Tidying up

* Tidying up

* More tidying up

* Rename file for consistency

* Remove blank line
  • Loading branch information
kwxm authored Jan 17, 2024
1 parent 907c537 commit 8f55aa1
Show file tree
Hide file tree
Showing 15 changed files with 211 additions and 90 deletions.
28 changes: 24 additions & 4 deletions plutus-benchmark/common/PlutusBenchmark/Common.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
{- | Miscellaneous shared code for benchmarking-related things. -}
module PlutusBenchmark.Common
( module Export
, Program
, Term
, getConfig
, toAnonDeBruijnTerm
Expand All @@ -15,6 +16,8 @@ module PlutusBenchmark.Common
, unsafeRunTermCek
, runTermCek
, cekResultMatchesHaskellValue
, benchTermAgdaCek
, benchProgramAgdaCek
, TestSize (..)
, printHeader
, printSizeStatistics
Expand All @@ -35,6 +38,8 @@ import PlutusTx qualified as Tx
import UntypedPlutusCore qualified as UPLC
import UntypedPlutusCore.Evaluation.Machine.Cek as Cek

import MAlonzo.Code.Main (runUAgda)

import Criterion.Main
import Criterion.Types (Config (..))
import Data.ByteString qualified as BS
Expand Down Expand Up @@ -101,15 +106,15 @@ haskellValueToTerm
haskellValueToTerm = compiledCodeToTerm . Tx.liftCodeDef


{- | Convert a de-Bruijn-named UPLC term to a Benchmark -}
{- | Convert a de-Bruijn-named UPLC term to a CEK Benchmark -}
benchTermCek :: Term -> Benchmarkable
benchTermCek term =
nf (unsafeRunTermCek) $! term -- Or whnf?
nf unsafeRunTermCek $! term -- Or whnf?

{- | Convert a de-Bruijn-named UPLC term to a Benchmark -}
{- | Convert a de-Bruijn-named UPLC term to a CEK Benchmark -}
benchProgramCek :: Program -> Benchmarkable
benchProgramCek (UPLC.Program _ _ term) =
nf (unsafeRunTermCek) $! term -- Or whnf?
nf unsafeRunTermCek $! term -- Or whnf?

{- | Just run a term to obtain an `EvaluationResult` (used for tests etc.) -}
unsafeRunTermCek :: Term -> EvaluationResult Term
Expand Down Expand Up @@ -151,6 +156,21 @@ cekResultMatchesHaskellValue
cekResultMatchesHaskellValue term matches value =
(unsafeRunTermCek term) `matches` (unsafeRunTermCek $ haskellValueToTerm value)


---------------- Run a term or program using the plutus-metatheory CEK evaluator ----------------

benchTermAgdaCek :: Term -> Benchmarkable
benchTermAgdaCek term =
nf unsafeRunAgdaCek $! term

benchProgramAgdaCek :: Program -> Benchmarkable
benchProgramAgdaCek (UPLC.Program _ _ term) =
nf unsafeRunAgdaCek $! term

unsafeRunAgdaCek :: Term -> EvaluationResult Term
unsafeRunAgdaCek =
either (error . \e -> "Agda evaluation error: " ++ show e) EvaluationSuccess . runUAgda

---------------- Printing tables of information about costs ----------------

data TestSize =
Expand Down
9 changes: 9 additions & 0 deletions plutus-benchmark/marlowe/bench/BenchAgdaCek.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{- | Benchmarks for the Agda CEK machine based on some Marlowe examples. -}

module Main where

import PlutusBenchmark.Common (benchProgramAgdaCek)
import Shared (runBenchmarks)

main :: IO ()
main = runBenchmarks benchProgramAgdaCek
9 changes: 9 additions & 0 deletions plutus-benchmark/marlowe/bench/BenchCek.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{- | Benchmarks for the CEK machine based on some Marlowe examples. -}

module Main where

import PlutusBenchmark.Common (benchProgramCek)
import Shared (runBenchmarks)

main :: IO ()
main = runBenchmarks benchProgramCek
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{-# LANGUAGE RecordWildCards #-}

{- | Plutus benchmarks based on some Marlowe examples. -}

module Main where
{-# LANGUAGE RecordWildCards #-}

module Shared where
import Criterion.Main (Benchmark, Benchmarkable, bench, bgroup, defaultMainWith)

import PlutusBenchmark.Common (benchProgramCek, getConfig)
import PlutusBenchmark.Common (Program, getConfig)
import PlutusBenchmark.Marlowe.BenchUtil (benchmarkToUPLC, rolePayoutBenchmarks,
semanticsBenchmarks)
import PlutusBenchmark.Marlowe.Scripts.RolePayout (rolePayoutValidator)
Expand All @@ -14,14 +14,18 @@ import PlutusBenchmark.Marlowe.Types qualified as M
import PlutusLedgerApi.V2 (scriptContextTxInfo, txInfoId)
import PlutusTx.Code (CompiledCode)

mkBenchmarkable :: CompiledCode a -> M.Benchmark -> (String, Benchmarkable)
mkBenchmarkable validator bm@M.Benchmark{..} =
mkBenchmarkable
:: (Program -> Benchmarkable)
-> CompiledCode a
-> M.Benchmark
-> (String, Benchmarkable)
mkBenchmarkable benchmarker validator bm@M.Benchmark{..} =
let benchName = show $ txInfoId $ scriptContextTxInfo bScriptContext
in
(benchName, benchProgramCek $ benchmarkToUPLC validator bm )
(benchName, benchmarker $ benchmarkToUPLC validator bm )

main :: IO ()
main = do
runBenchmarks :: (Program -> Benchmarkable) -> IO ()
runBenchmarks benchmarker = do

-- Read the semantics benchmark files.
semanticsMBench <- either error id <$> semanticsBenchmarks
Expand All @@ -34,10 +38,10 @@ main = do
uncurriedBench = uncurry bench
semanticsBench :: [Benchmark] -- list of criterion semantics Benchmarks
semanticsBench =
fmap (uncurriedBench . mkBenchmarkable marloweValidator) semanticsMBench
fmap (uncurriedBench . mkBenchmarkable benchmarker marloweValidator) semanticsMBench
rolePayoutBench :: [Benchmark] -- list of criterion role payout Benchmarks
rolePayoutBench =
fmap (uncurriedBench . mkBenchmarkable rolePayoutValidator) rolePayoutMBench
fmap (uncurriedBench . mkBenchmarkable benchmarker rolePayoutValidator) rolePayoutMBench

-- Run each benchmark for 5 secs by default. This benchmark runs on the longitudinal
-- benchmarking flow so we don't want to set it higher by default. One can change this with -L or
Expand Down
8 changes: 8 additions & 0 deletions plutus-benchmark/nofib/bench/BenchAgdaCek.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{- | Plutus benchmarks for the Agda CEK machine based on some nofib examples. -}
module Main where

import PlutusBenchmark.Common (benchTermAgdaCek)
import Shared (benchWith)

main :: IO ()
main = benchWith benchTermAgdaCek
8 changes: 8 additions & 0 deletions plutus-benchmark/nofib/bench/BenchCek.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{- | Plutus benchmarks for the CEK machine based on some nofib examples. -}
module Main where

import PlutusBenchmark.Common (benchTermCek)
import Shared (benchWith)

main :: IO ()
main = benchWith benchTermCek
4 changes: 1 addition & 3 deletions plutus-benchmark/nofib/bench/BenchHaskell.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
{- | Benchmarking for the Plutus versions of the Plutus nofib benchmarks. -}
{-# OPTIONS_GHC -fwarn-unused-imports #-}

{- | Benchmarking for the Haskell versions of the Plutus nofib benchmarks. -}
module Main (main) where

import Shared (mkBenchMarks)
Expand Down
57 changes: 0 additions & 57 deletions plutus-benchmark/nofib/bench/BenchPlc.hs

This file was deleted.

54 changes: 52 additions & 2 deletions plutus-benchmark/nofib/bench/Shared.hs
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
{- | Shared code for benchmarking Plutus and Haskell versions of the Plutus nofib examples -}
module Shared (mkBenchMarks, BenchmarkRunners)
module Shared (benchWith, mkBenchMarks)
where

import Criterion.Main
import PlutusBenchmark.Common (Term, getConfig)

import PlutusBenchmark.NoFib.Clausify qualified as Clausify
import PlutusBenchmark.NoFib.Knights qualified as Knights
import PlutusBenchmark.NoFib.Prime qualified as Prime
import PlutusBenchmark.NoFib.Queens qualified as Queens

import Criterion.Main


{- | Package together functions to create benchmarks for each program given suitable inputs. -}
type BenchmarkRunners =
( Clausify.StaticFormula -> Benchmarkable
Expand Down Expand Up @@ -55,3 +59,49 @@ mkBenchMarks (benchClausify, benchKnights, benchPrime, benchQueens) = [
]
]


---------------- Create a benchmark with given inputs ----------------

benchClausifyWith :: (Term -> Benchmarkable) -> Clausify.StaticFormula -> Benchmarkable
benchClausifyWith benchmarker f = benchmarker $ Clausify.mkClausifyTerm f

benchPrimeWith :: (Term -> Benchmarkable) -> Prime.PrimeID -> Benchmarkable
benchPrimeWith benchmarker pid = benchmarker $ Prime.mkPrimalityBenchTerm pid

benchQueensWith :: (Term -> Benchmarkable) -> Integer -> Queens.Algorithm -> Benchmarkable
benchQueensWith benchmarker sz alg = benchmarker $ Queens.mkQueensTerm sz alg

benchKnightsWith :: (Term -> Benchmarkable) -> Integer -> Integer -> Benchmarkable
benchKnightsWith benchmarker depth sz = benchmarker $ Knights.mkKnightsTerm depth sz

{- This runs all of the benchmarks, which will take a long time.
To run an individual benmark, try, for example,
cabal bench plutus-benchmark:nofib --benchmark-options "primetest/40digits".
Better results will be obtained with more repetitions of the benchmark. Set
the minimum time for the benchmarking process (in seconds) with the -L
option. For example,
stack bench plutus-benchmark:nofib --ba "primetest/40digits -L300"
You can list the avaiable benchmarks with
stack bench plutus-benchmark:nofib --ba --list
or
cabal bench plutus-benchmark:nofib --benchmark-options --list
-}


-- Given a function (involving some evaluator) which constructs a Benchmarkable
-- from a Term, use it to construct and run all of the benchmarks
benchWith :: (Term -> Benchmarkable) -> IO ()
benchWith benchmarker = do
let runners = ( benchClausifyWith benchmarker, benchKnightsWith benchmarker
, benchPrimeWith benchmarker, benchQueensWith benchmarker)
-- Run each benchmark for at least one minute. Change this with -L or --timeout.
config <- getConfig 60.0
defaultMainWith config $ mkBenchMarks runners
Loading

1 comment on commit 8f55aa1

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark 'Plutus Benchmarks'.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 1.05.

Benchmark suite Current: 8f55aa1 Previous: 907c537 Ratio
validation-decode-crowdfunding-success-3 243 μs 227.9 μs 1.07

This comment was automatically generated by workflow using github-action-benchmark.

CC: @input-output-hk/plutus-core

Please sign in to comment.