diff --git a/plutus-benchmark/common/PlutusBenchmark/Common.hs b/plutus-benchmark/common/PlutusBenchmark/Common.hs
index 44607e72aa1..2019f010710 100644
--- a/plutus-benchmark/common/PlutusBenchmark/Common.hs
+++ b/plutus-benchmark/common/PlutusBenchmark/Common.hs
@@ -4,6 +4,7 @@
 {- | Miscellaneous shared code for benchmarking-related things. -}
 module PlutusBenchmark.Common
     ( module Export
+    , Program
     , Term
     , getConfig
     , toAnonDeBruijnTerm
@@ -15,6 +16,8 @@ module PlutusBenchmark.Common
     , unsafeRunTermCek
     , runTermCek
     , cekResultMatchesHaskellValue
+    , benchTermAgdaCek
+    , benchProgramAgdaCek
     , TestSize (..)
     , printHeader
     , printSizeStatistics
@@ -35,6 +38,8 @@ import PlutusTx qualified as Tx
 import UntypedPlutusCore qualified as UPLC
 import UntypedPlutusCore.Evaluation.Machine.Cek as Cek
 
+import MAlonzo.Code.Main (runUAgda)
+
 import Criterion.Main
 import Criterion.Types (Config (..))
 import Data.ByteString qualified as BS
@@ -101,15 +106,15 @@ haskellValueToTerm
 haskellValueToTerm = compiledCodeToTerm . Tx.liftCodeDef
 
 
-{- | Convert a de-Bruijn-named UPLC term to a Benchmark -}
+{- | Convert a de-Bruijn-named UPLC term to a CEK Benchmark -}
 benchTermCek :: Term -> Benchmarkable
 benchTermCek term =
-    nf (unsafeRunTermCek) $! term -- Or whnf?
+    nf unsafeRunTermCek $! term -- Or whnf?
 
-{- | Convert a de-Bruijn-named UPLC term to a Benchmark -}
+{- | Convert a de-Bruijn-named UPLC term to a CEK Benchmark -}
 benchProgramCek :: Program -> Benchmarkable
 benchProgramCek (UPLC.Program _ _ term) =
-    nf (unsafeRunTermCek) $! term -- Or whnf?
+    nf unsafeRunTermCek $! term -- Or whnf?
 
 {- | Just run a term to obtain an `EvaluationResult` (used for tests etc.) -}
 unsafeRunTermCek :: Term -> EvaluationResult Term
@@ -151,6 +156,21 @@ cekResultMatchesHaskellValue
 cekResultMatchesHaskellValue term matches value =
     (unsafeRunTermCek term) `matches` (unsafeRunTermCek $ haskellValueToTerm value)
 
+
+---------------- Run a term or program using the plutus-metatheory CEK evaluator ----------------
+
+benchTermAgdaCek :: Term -> Benchmarkable
+benchTermAgdaCek term =
+    nf unsafeRunAgdaCek $! term
+
+benchProgramAgdaCek :: Program -> Benchmarkable
+benchProgramAgdaCek (UPLC.Program _ _ term) =
+    nf unsafeRunAgdaCek $! term
+
+unsafeRunAgdaCek :: Term -> EvaluationResult Term
+unsafeRunAgdaCek =
+    either (error . \e -> "Agda evaluation error: " ++ show e) EvaluationSuccess . runUAgda
+
 ---------------- Printing tables of information about costs ----------------
 
 data TestSize =
diff --git a/plutus-benchmark/marlowe/bench/BenchAgdaCek.hs b/plutus-benchmark/marlowe/bench/BenchAgdaCek.hs
new file mode 100644
index 00000000000..33e01d70195
--- /dev/null
+++ b/plutus-benchmark/marlowe/bench/BenchAgdaCek.hs
@@ -0,0 +1,9 @@
+{- | Benchmarks for the Agda CEK machine based on some Marlowe examples. -}
+
+module Main where
+
+import PlutusBenchmark.Common (benchProgramAgdaCek)
+import Shared (runBenchmarks)
+
+main :: IO ()
+main = runBenchmarks benchProgramAgdaCek
diff --git a/plutus-benchmark/marlowe/bench/BenchCek.hs b/plutus-benchmark/marlowe/bench/BenchCek.hs
new file mode 100644
index 00000000000..c51bbaabbe0
--- /dev/null
+++ b/plutus-benchmark/marlowe/bench/BenchCek.hs
@@ -0,0 +1,9 @@
+{- | Benchmarks for the CEK machine based on some Marlowe examples. -}
+
+module Main where
+
+import PlutusBenchmark.Common (benchProgramCek)
+import Shared (runBenchmarks)
+
+main :: IO ()
+main = runBenchmarks benchProgramCek
diff --git a/plutus-benchmark/marlowe/bench/Bench.hs b/plutus-benchmark/marlowe/bench/Shared.hs
similarity index 72%
rename from plutus-benchmark/marlowe/bench/Bench.hs
rename to plutus-benchmark/marlowe/bench/Shared.hs
index 9c57cbb2e89..8dd0aaf2be9 100644
--- a/plutus-benchmark/marlowe/bench/Bench.hs
+++ b/plutus-benchmark/marlowe/bench/Shared.hs
@@ -1,11 +1,11 @@
-{-# LANGUAGE RecordWildCards #-}
-
 {- | Plutus benchmarks based on some Marlowe examples. -}
 
-module Main where
+{-# LANGUAGE RecordWildCards #-}
+
+module Shared where
 import Criterion.Main (Benchmark, Benchmarkable, bench, bgroup, defaultMainWith)
 
-import PlutusBenchmark.Common (benchProgramCek, getConfig)
+import PlutusBenchmark.Common (Program, getConfig)
 import PlutusBenchmark.Marlowe.BenchUtil (benchmarkToUPLC, rolePayoutBenchmarks,
                                           semanticsBenchmarks)
 import PlutusBenchmark.Marlowe.Scripts.RolePayout (rolePayoutValidator)
@@ -14,14 +14,18 @@ import PlutusBenchmark.Marlowe.Types qualified as M
 import PlutusLedgerApi.V2 (scriptContextTxInfo, txInfoId)
 import PlutusTx.Code (CompiledCode)
 
-mkBenchmarkable :: CompiledCode a -> M.Benchmark -> (String, Benchmarkable)
-mkBenchmarkable validator bm@M.Benchmark{..} =
+mkBenchmarkable
+    :: (Program -> Benchmarkable)
+    -> CompiledCode a
+    -> M.Benchmark
+    -> (String, Benchmarkable)
+mkBenchmarkable benchmarker validator bm@M.Benchmark{..} =
   let benchName = show $ txInfoId $ scriptContextTxInfo bScriptContext
   in
-    (benchName, benchProgramCek $ benchmarkToUPLC validator bm )
+    (benchName, benchmarker $ benchmarkToUPLC validator bm )
 
-main :: IO ()
-main = do
+runBenchmarks :: (Program -> Benchmarkable) -> IO ()
+runBenchmarks benchmarker = do
 
   -- Read the semantics benchmark files.
   semanticsMBench <- either error id <$> semanticsBenchmarks
@@ -34,10 +38,10 @@ main = do
     uncurriedBench = uncurry bench
     semanticsBench :: [Benchmark] -- list of criterion semantics Benchmarks
     semanticsBench =
-      fmap (uncurriedBench . mkBenchmarkable marloweValidator) semanticsMBench
+      fmap (uncurriedBench . mkBenchmarkable benchmarker marloweValidator) semanticsMBench
     rolePayoutBench :: [Benchmark] -- list of criterion role payout Benchmarks
     rolePayoutBench =
-      fmap (uncurriedBench . mkBenchmarkable rolePayoutValidator) rolePayoutMBench
+      fmap (uncurriedBench . mkBenchmarkable benchmarker rolePayoutValidator) rolePayoutMBench
 
   -- Run each benchmark for 5 secs by default. This benchmark runs on the longitudinal
   -- benchmarking flow so we don't want to set it higher by default. One can change this with -L or
diff --git a/plutus-benchmark/nofib/bench/BenchAgdaCek.hs b/plutus-benchmark/nofib/bench/BenchAgdaCek.hs
new file mode 100644
index 00000000000..497fa0d7162
--- /dev/null
+++ b/plutus-benchmark/nofib/bench/BenchAgdaCek.hs
@@ -0,0 +1,8 @@
+{- | Plutus benchmarks for the Agda CEK machine based on some nofib examples. -}
+module Main where
+
+import PlutusBenchmark.Common (benchTermAgdaCek)
+import Shared (benchWith)
+
+main :: IO ()
+main = benchWith benchTermAgdaCek
diff --git a/plutus-benchmark/nofib/bench/BenchCek.hs b/plutus-benchmark/nofib/bench/BenchCek.hs
new file mode 100644
index 00000000000..73aaa4a37c7
--- /dev/null
+++ b/plutus-benchmark/nofib/bench/BenchCek.hs
@@ -0,0 +1,8 @@
+{- | Plutus benchmarks for the CEK machine based on some nofib examples. -}
+module Main where
+
+import PlutusBenchmark.Common (benchTermCek)
+import Shared (benchWith)
+
+main :: IO ()
+main = benchWith benchTermCek
diff --git a/plutus-benchmark/nofib/bench/BenchHaskell.hs b/plutus-benchmark/nofib/bench/BenchHaskell.hs
index 35979ff6544..3c6231287cf 100644
--- a/plutus-benchmark/nofib/bench/BenchHaskell.hs
+++ b/plutus-benchmark/nofib/bench/BenchHaskell.hs
@@ -1,6 +1,4 @@
-{- | Benchmarking for the Plutus versions of the Plutus nofib benchmarks. -}
-{-# OPTIONS_GHC -fwarn-unused-imports #-}
-
+{- | Benchmarking for the Haskell versions of the Plutus nofib benchmarks. -}
 module Main (main) where
 
 import Shared (mkBenchMarks)
diff --git a/plutus-benchmark/nofib/bench/BenchPlc.hs b/plutus-benchmark/nofib/bench/BenchPlc.hs
deleted file mode 100644
index 40a694964e9..00000000000
--- a/plutus-benchmark/nofib/bench/BenchPlc.hs
+++ /dev/null
@@ -1,57 +0,0 @@
-{- | Plutus benchmarks based on some nofib examples. -}
-module Main where
-
-import Shared (mkBenchMarks)
-
-import Criterion.Main
-
-import PlutusBenchmark.Common (benchTermCek, getConfig)
-
-import PlutusBenchmark.NoFib.Clausify qualified as Clausify
-import PlutusBenchmark.NoFib.Knights qualified as Knights
-import PlutusBenchmark.NoFib.Prime qualified as Prime
-import PlutusBenchmark.NoFib.Queens qualified as Queens
-
-benchClausify :: Clausify.StaticFormula -> Benchmarkable
-benchClausify f = benchTermCek $ Clausify.mkClausifyTerm f
-
-benchPrime :: Prime.PrimeID -> Benchmarkable
-benchPrime pid = benchTermCek $ Prime.mkPrimalityBenchTerm pid
-
-benchQueens :: Integer -> Queens.Algorithm -> Benchmarkable
-benchQueens sz alg = benchTermCek $ Queens.mkQueensTerm sz alg
-
-benchKnights :: Integer -> Integer -> Benchmarkable
-benchKnights depth sz = benchTermCek $ Knights.mkKnightsTerm depth sz
-
-{- This runs all of the benchmarks, which will take a long time.
-   To run an individual benmark, try, for example,
-
-     stack bench plutus-benchmark:nofib --ba primetest/40digits
-
-   or
-
-     cabal bench plutus-benchmark:nofib --benchmark-options "primetest/40digits".
-
-   Better results will be obtained with more repetitions of the benchmark.  Set
-   the minimum time for the benchmarking process (in seconds) with the -L
-   option. For example,
-
-     stack bench plutus-benchmark:nofib --ba "primetest/40digits -L300"
-
-   You can list the avaiable benchmarks with
-
-     stack bench plutus-benchmark:nofib --ba --list
-
-   or
-
-     cabal bench plutus-benchmark:nofib --benchmark-options --list
-
--}
-
-main :: IO ()
-main = do
-  let runners = (benchClausify, benchKnights, benchPrime, benchQueens)
-  -- Run each benchmark for at least one minute.  Change this with -L or --timeout.
-  config <- getConfig 60.0
-  defaultMainWith config $ mkBenchMarks runners
diff --git a/plutus-benchmark/nofib/bench/Shared.hs b/plutus-benchmark/nofib/bench/Shared.hs
index 4c1aba012f3..35e409e8cd6 100644
--- a/plutus-benchmark/nofib/bench/Shared.hs
+++ b/plutus-benchmark/nofib/bench/Shared.hs
@@ -1,13 +1,17 @@
 {- | Shared code for benchmarking Plutus and Haskell versions of the Plutus nofib examples -}
-module Shared (mkBenchMarks, BenchmarkRunners)
+module Shared (benchWith, mkBenchMarks)
 where
 
-import Criterion.Main
+import PlutusBenchmark.Common (Term, getConfig)
 
 import PlutusBenchmark.NoFib.Clausify qualified as Clausify
+import PlutusBenchmark.NoFib.Knights qualified as Knights
 import PlutusBenchmark.NoFib.Prime qualified as Prime
 import PlutusBenchmark.NoFib.Queens qualified as Queens
 
+import Criterion.Main
+
+
 {- | Package together functions to create benchmarks for each program given suitable inputs. -}
 type BenchmarkRunners =
     ( Clausify.StaticFormula -> Benchmarkable
@@ -55,3 +59,49 @@ mkBenchMarks (benchClausify, benchKnights, benchPrime, benchQueens) = [
                     ]
        ]
 
+
+---------------- Create a benchmark with given inputs ----------------
+
+benchClausifyWith :: (Term -> Benchmarkable) -> Clausify.StaticFormula -> Benchmarkable
+benchClausifyWith benchmarker f = benchmarker $ Clausify.mkClausifyTerm f
+
+benchPrimeWith :: (Term -> Benchmarkable) -> Prime.PrimeID -> Benchmarkable
+benchPrimeWith benchmarker pid = benchmarker $ Prime.mkPrimalityBenchTerm pid
+
+benchQueensWith :: (Term -> Benchmarkable) -> Integer -> Queens.Algorithm -> Benchmarkable
+benchQueensWith benchmarker sz alg = benchmarker $ Queens.mkQueensTerm sz alg
+
+benchKnightsWith :: (Term -> Benchmarkable) -> Integer -> Integer -> Benchmarkable
+benchKnightsWith benchmarker depth sz = benchmarker $ Knights.mkKnightsTerm depth sz
+
+{- This runs all of the benchmarks, which will take a long time.
+   To run an individual benmark, try, for example,
+
+     cabal bench plutus-benchmark:nofib --benchmark-options "primetest/40digits".
+
+   Better results will be obtained with more repetitions of the benchmark.  Set
+   the minimum time for the benchmarking process (in seconds) with the -L
+   option. For example,
+
+     stack bench plutus-benchmark:nofib --ba "primetest/40digits -L300"
+
+   You can list the avaiable benchmarks with
+
+     stack bench plutus-benchmark:nofib --ba --list
+
+   or
+
+     cabal bench plutus-benchmark:nofib --benchmark-options --list
+
+-}
+
+
+-- Given a function (involving some evaluator) which constructs a Benchmarkable
+-- from a Term, use it to construct and run all of the benchmarks
+benchWith :: (Term -> Benchmarkable) -> IO ()
+benchWith benchmarker = do
+  let runners = ( benchClausifyWith benchmarker, benchKnightsWith benchmarker
+                , benchPrimeWith benchmarker, benchQueensWith benchmarker)
+  -- Run each benchmark for at least one minute.  Change this with -L or --timeout.
+  config <- getConfig 60.0
+  defaultMainWith config $ mkBenchMarks runners
diff --git a/plutus-benchmark/plutus-benchmark.cabal b/plutus-benchmark/plutus-benchmark.cabal
index 140488300ae..ae1af882e12 100644
--- a/plutus-benchmark/plutus-benchmark.cabal
+++ b/plutus-benchmark/plutus-benchmark.cabal
@@ -72,14 +72,15 @@ library plutus-benchmark-common
 
   other-modules:   Paths_plutus_benchmark
   build-depends:
-    , base          >=4.9   && <5
+    , base               >=4.9   && <5
     , bytestring
     , criterion
     , directory
     , filepath
-    , flat          ^>=0.6
-    , plutus-core   ^>=1.20
-    , plutus-tx     ^>=1.20
+    , flat               ^>=0.6
+    , plutus-core        ^>=1.20
+    , plutus-metatheory
+    , plutus-tx          ^>=1.20
     , tasty
     , tasty-golden
     , temporary
@@ -134,7 +135,7 @@ executable nofib-exe
 benchmark nofib
   import:         lang, ghc-version-support
   type:           exitcode-stdio-1.0
-  main-is:        BenchPlc.hs
+  main-is:        BenchCek.hs
   hs-source-dirs: nofib/bench
   other-modules:  Shared
   build-depends:
@@ -246,7 +247,7 @@ benchmark validation
   import:         lang
   type:           exitcode-stdio-1.0
   main-is:        BenchCek.hs
-  hs-source-dirs: validation
+  hs-source-dirs: validation/bench
   other-modules:  Common
   build-depends:
     , base                     >=4.9     && <5
@@ -267,7 +268,7 @@ benchmark validation-decode
   import:         lang
   type:           exitcode-stdio-1.0
   main-is:        BenchDec.hs
-  hs-source-dirs: validation
+  hs-source-dirs: validation/bench
   other-modules:  Common
   build-depends:
     , base                     >=4.9     && <5
@@ -288,7 +289,7 @@ benchmark validation-full
   import:         lang
   type:           exitcode-stdio-1.0
   main-is:        BenchFull.hs
-  hs-source-dirs: validation
+  hs-source-dirs: validation/bench
   other-modules:  Common
   build-depends:
     , base                     >=4.9     && <5
@@ -514,7 +515,8 @@ executable marlowe-validators
 benchmark marlowe
   import:         lang, ghc-version-support
   type:           exitcode-stdio-1.0
-  main-is:        Bench.hs
+  main-is:        BenchCek.hs
+  other-modules:  Shared
   hs-source-dirs: marlowe/bench
   build-depends:
     , base                     >=4.9   && <5
@@ -537,3 +539,52 @@ test-suite plutus-benchmark-marlowe-tests
     , plutus-ledger-api                               ^>=1.20
     , plutus-tx:{plutus-tx, plutus-tx-testlib}        ^>=1.20
     , tasty
+
+---------------- agda evaluators ----------------
+
+-- TODO: Add benchmarks for the executable semantics when we have a UPLC version
+
+benchmark validation-agda-cek
+  import:         lang
+  type:           exitcode-stdio-1.0
+  main-is:        BenchAgdaCek.hs
+  hs-source-dirs: validation/bench
+  other-modules:  Common
+  build-depends:
+    , base                     >=4.9     && <5
+    , bytestring
+    , criterion                >=1.5.9.0
+    , deepseq
+    , directory
+    , filepath
+    , flat                     ^>=0.6
+    , optparse-applicative
+    , plutus-benchmark-common
+    , plutus-core              ^>=1.20
+    , plutus-ledger-api        ^>=1.20
+
+benchmark nofib-agda-cek
+  import:         lang, ghc-version-support
+  type:           exitcode-stdio-1.0
+  main-is:        BenchAgdaCek.hs
+  hs-source-dirs: nofib/bench
+  other-modules:  Shared
+  build-depends:
+    , base                     >=4.9     && <5
+    , criterion                >=1.5.9.0
+    , nofib-internal
+    , plutus-benchmark-common
+
+benchmark marlowe-agda-cek
+  import:         lang, ghc-version-support
+  type:           exitcode-stdio-1.0
+  main-is:        BenchAgdaCek.hs
+  other-modules:  Shared
+  hs-source-dirs: marlowe/bench
+  build-depends:
+    , base                     >=4.9   && <5
+    , criterion
+    , marlowe-internal
+    , plutus-benchmark-common
+    , plutus-ledger-api        ^>=1.20
+    , plutus-tx                ^>=1.20
diff --git a/plutus-benchmark/validation/bench/BenchAgdaCek.hs b/plutus-benchmark/validation/bench/BenchAgdaCek.hs
new file mode 100644
index 00000000000..2e85ce555f1
--- /dev/null
+++ b/plutus-benchmark/validation/bench/BenchAgdaCek.hs
@@ -0,0 +1,18 @@
+{- | Validation benchmarks for the Agda CEK machine. -}
+
+{-# LANGUAGE BangPatterns #-}
+module Main where
+
+import Common (benchWith, unsafeUnflat)
+import PlutusBenchmark.Common (benchTermAgdaCek, toNamedDeBruijnTerm)
+import UntypedPlutusCore qualified as UPLC
+
+import Control.DeepSeq (force)
+
+-- Run the validation benchmarks using the Agda CEK machine.
+main :: IO ()
+main = do
+  let mkAgdaCekBM file program =
+          let !benchTerm = force . toNamedDeBruijnTerm . UPLC._progTerm $ unsafeUnflat file program
+          in benchTermAgdaCek benchTerm
+  benchWith mkAgdaCekBM
diff --git a/plutus-benchmark/validation/BenchCek.hs b/plutus-benchmark/validation/bench/BenchCek.hs
similarity index 79%
rename from plutus-benchmark/validation/BenchCek.hs
rename to plutus-benchmark/validation/bench/BenchCek.hs
index a6ae5bd46ff..2884c7a7f73 100644
--- a/plutus-benchmark/validation/BenchCek.hs
+++ b/plutus-benchmark/validation/bench/BenchCek.hs
@@ -1,13 +1,16 @@
+{- | Validation benchmarks for the CEK machine. -}
+
 {-# LANGUAGE BangPatterns #-}
 module Main where
 
-import Common
+import Common (benchWith, evaluateCekLikeInProd, mkEvalCtx, unsafeUnflat)
 import Control.DeepSeq (force)
-import Control.Exception
-import Criterion
-import PlutusBenchmark.Common
+import Control.Exception (evaluate)
+import PlutusBenchmark.Common (toNamedDeBruijnTerm)
 import UntypedPlutusCore as UPLC
 
+import Criterion (whnf)
+
 {-|
  Benchmarks only for the CEK execution time of the data/*.flat validation scripts
 
diff --git a/plutus-benchmark/validation/BenchDec.hs b/plutus-benchmark/validation/bench/BenchDec.hs
similarity index 100%
rename from plutus-benchmark/validation/BenchDec.hs
rename to plutus-benchmark/validation/bench/BenchDec.hs
diff --git a/plutus-benchmark/validation/BenchFull.hs b/plutus-benchmark/validation/bench/BenchFull.hs
similarity index 100%
rename from plutus-benchmark/validation/BenchFull.hs
rename to plutus-benchmark/validation/bench/BenchFull.hs
diff --git a/plutus-benchmark/validation/Common.hs b/plutus-benchmark/validation/bench/Common.hs
similarity index 100%
rename from plutus-benchmark/validation/Common.hs
rename to plutus-benchmark/validation/bench/Common.hs