From 28d293299216011d42caf1867160804d8bae9b43 Mon Sep 17 00:00:00 2001 From: Basil Hess Date: Mon, 9 Dec 2024 17:46:47 +0100 Subject: [PATCH] working C pqcp-mlkem Signed-off-by: Basil Hess --- .CMake/alg_support.cmake | 18 - docs/algorithms/kem/ml_kem.md | 5 +- docs/algorithms/kem/ml_kem.yml | 51 +- docs/cbom.json | 95 +-- .../copy_from_upstream/copy_from_upstream.yml | 9 +- src/kem/ml_kem/CMakeLists.txt | 51 +- src/kem/ml_kem/kem_ml_kem_1024.c | 54 +- src/kem/ml_kem/kem_ml_kem_512.c | 54 +- src/kem/ml_kem/kem_ml_kem_768.c | 54 +- .../LICENSE | 0 .../ml_kem/mlkem-native_ml-kem-1024_ref/api.h | 62 ++ .../arith_native.h | 250 ++++++ .../ml_kem/mlkem-native_ml-kem-1024_ref/cbd.c | 143 ++++ .../ml_kem/mlkem-native_ml-kem-1024_ref/cbd.h | 50 ++ .../mlkem-native_ml-kem-1024_ref/cbmc.h | 143 ++++ .../mlkem-native_ml-kem-1024_ref/common.h | 65 ++ .../mlkem-native_ml-kem-1024_ref/config.h | 22 + .../mlkem-native_ml-kem-1024_ref/cpucap.h | 43 + .../debug/debug.c | 58 ++ .../debug/debug.h | 219 +++++ .../mlkem-native_ml-kem-1024_ref/indcpa.c | 577 +++++++++++++ .../mlkem-native_ml-kem-1024_ref/indcpa.h | 75 ++ .../ml_kem/mlkem-native_ml-kem-1024_ref/kem.c | 182 +++++ .../ml_kem/mlkem-native_ml-kem-1024_ref/kem.h | 141 ++++ .../ml_kem/mlkem-native_ml-kem-1024_ref/ntt.c | 278 +++++++ .../ml_kem/mlkem-native_ml-kem-1024_ref/ntt.h | 102 +++ .../mlkem-native_ml-kem-1024_ref/params.h | 73 ++ .../mlkem-native_ml-kem-1024_ref/poly.c | 568 +++++++++++++ .../mlkem-native_ml-kem-1024_ref/poly.h | 773 ++++++++++++++++++ .../mlkem-native_ml-kem-1024_ref/polyvec.c | 209 +++++ .../mlkem-native_ml-kem-1024_ref/polyvec.h | 304 +++++++ .../mlkem-native_ml-kem-1024_ref/reduce.c | 154 ++++ .../mlkem-native_ml-kem-1024_ref/reduce.h | 64 ++ .../rej_uniform.c | 90 ++ .../rej_uniform.h | 61 ++ .../symmetric-shake.c | 35 + .../mlkem-native_ml-kem-1024_ref/symmetric.h | 82 ++ .../mlkem-native_ml-kem-1024_ref/verify.c | 19 + .../mlkem-native_ml-kem-1024_ref/verify.h | 305 +++++++ .../mlkem-native_ml-kem-1024_ref/zetas.c | 30 + .../LICENSE | 0 .../ml_kem/mlkem-native_ml-kem-512_ref/api.h | 62 ++ .../arith_native.h | 250 ++++++ .../ml_kem/mlkem-native_ml-kem-512_ref/cbd.c | 143 ++++ .../ml_kem/mlkem-native_ml-kem-512_ref/cbd.h | 50 ++ .../ml_kem/mlkem-native_ml-kem-512_ref/cbmc.h | 143 ++++ .../mlkem-native_ml-kem-512_ref/common.h | 65 ++ .../mlkem-native_ml-kem-512_ref/config.h | 22 + .../mlkem-native_ml-kem-512_ref/cpucap.h | 43 + .../mlkem-native_ml-kem-512_ref/debug/debug.c | 58 ++ .../mlkem-native_ml-kem-512_ref/debug/debug.h | 219 +++++ .../mlkem-native_ml-kem-512_ref/indcpa.c | 577 +++++++++++++ .../mlkem-native_ml-kem-512_ref/indcpa.h | 75 ++ .../ml_kem/mlkem-native_ml-kem-512_ref/kem.c | 182 +++++ .../ml_kem/mlkem-native_ml-kem-512_ref/kem.h | 141 ++++ .../ml_kem/mlkem-native_ml-kem-512_ref/ntt.c | 278 +++++++ .../ml_kem/mlkem-native_ml-kem-512_ref/ntt.h | 102 +++ .../mlkem-native_ml-kem-512_ref/params.h | 72 ++ .../ml_kem/mlkem-native_ml-kem-512_ref/poly.c | 568 +++++++++++++ .../ml_kem/mlkem-native_ml-kem-512_ref/poly.h | 773 ++++++++++++++++++ .../mlkem-native_ml-kem-512_ref/polyvec.c | 209 +++++ .../mlkem-native_ml-kem-512_ref/polyvec.h | 304 +++++++ .../mlkem-native_ml-kem-512_ref/reduce.c | 154 ++++ .../mlkem-native_ml-kem-512_ref/reduce.h | 64 ++ .../mlkem-native_ml-kem-512_ref/rej_uniform.c | 90 ++ .../mlkem-native_ml-kem-512_ref/rej_uniform.h | 61 ++ .../symmetric-shake.c | 35 + .../mlkem-native_ml-kem-512_ref/symmetric.h | 82 ++ .../mlkem-native_ml-kem-512_ref/verify.c | 19 + .../mlkem-native_ml-kem-512_ref/verify.h | 305 +++++++ .../mlkem-native_ml-kem-512_ref/zetas.c | 30 + .../LICENSE | 0 .../ml_kem/mlkem-native_ml-kem-768_ref/api.h | 62 ++ .../arith_native.h | 250 ++++++ .../ml_kem/mlkem-native_ml-kem-768_ref/cbd.c | 143 ++++ .../ml_kem/mlkem-native_ml-kem-768_ref/cbd.h | 50 ++ .../ml_kem/mlkem-native_ml-kem-768_ref/cbmc.h | 143 ++++ .../mlkem-native_ml-kem-768_ref/common.h | 65 ++ .../mlkem-native_ml-kem-768_ref/config.h | 22 + .../mlkem-native_ml-kem-768_ref/cpucap.h | 43 + .../mlkem-native_ml-kem-768_ref/debug/debug.c | 58 ++ .../mlkem-native_ml-kem-768_ref/debug/debug.h | 219 +++++ .../mlkem-native_ml-kem-768_ref/indcpa.c | 578 +++++++++++++ .../mlkem-native_ml-kem-768_ref/indcpa.h | 75 ++ .../ml_kem/mlkem-native_ml-kem-768_ref/kem.c | 182 +++++ .../ml_kem/mlkem-native_ml-kem-768_ref/kem.h | 141 ++++ .../ml_kem/mlkem-native_ml-kem-768_ref/ntt.c | 278 +++++++ .../ml_kem/mlkem-native_ml-kem-768_ref/ntt.h | 102 +++ .../mlkem-native_ml-kem-768_ref/params.h | 72 ++ .../ml_kem/mlkem-native_ml-kem-768_ref/poly.c | 568 +++++++++++++ .../ml_kem/mlkem-native_ml-kem-768_ref/poly.h | 773 ++++++++++++++++++ .../mlkem-native_ml-kem-768_ref/polyvec.c | 209 +++++ .../mlkem-native_ml-kem-768_ref/polyvec.h | 304 +++++++ .../mlkem-native_ml-kem-768_ref/reduce.c | 154 ++++ .../mlkem-native_ml-kem-768_ref/reduce.h | 64 ++ .../mlkem-native_ml-kem-768_ref/rej_uniform.c | 90 ++ .../mlkem-native_ml-kem-768_ref/rej_uniform.h | 61 ++ .../symmetric-shake.c | 35 + .../mlkem-native_ml-kem-768_ref/symmetric.h | 82 ++ .../mlkem-native_ml-kem-768_ref/verify.c | 19 + .../mlkem-native_ml-kem-768_ref/verify.h | 305 +++++++ .../mlkem-native_ml-kem-768_ref/zetas.c | 30 + .../align.h | 19 - .../api.h | 66 -- .../basemul.S | 105 --- .../cbd.c | 144 ---- .../cbd.h | 15 - .../consts.c | 121 --- .../consts.h | 43 - .../fq.S | 88 -- .../fq.inc | 30 - .../indcpa.c | 568 ------------- .../indcpa.h | 27 - .../invntt.S | 193 ----- .../kem.c | 169 ---- .../kem.h | 35 - .../ntt.S | 189 ----- .../ntt.h | 28 - .../params.h | 68 -- .../poly.c | 519 ------------ .../poly.h | 77 -- .../polyvec.c | 307 ------- .../polyvec.h | 36 - .../reduce.h | 12 - .../rejsample.c | 398 --------- .../rejsample.h | 14 - .../shuffle.S | 255 ------ .../shuffle.inc | 25 - .../symmetric-shake.c | 74 -- .../symmetric.h | 34 - .../verify.c | 83 -- .../verify.h | 17 - .../api.h | 66 -- .../cbd.c | 128 --- .../cbd.h | 14 - .../indcpa.c | 334 -------- .../indcpa.h | 27 - .../kem.c | 169 ---- .../kem.h | 35 - .../ntt.c | 146 ---- .../ntt.h | 19 - .../params.h | 55 -- .../poly.c | 360 -------- .../poly.h | 53 -- .../polyvec.c | 246 ------ .../polyvec.h | 36 - .../reduce.c | 42 - .../reduce.h | 16 - .../symmetric-shake.c | 74 -- .../symmetric.h | 35 - .../verify.c | 75 -- .../verify.h | 17 - .../align.h | 19 - .../api.h | 66 -- .../basemul.S | 105 --- .../cbd.c | 144 ---- .../cbd.h | 15 - .../consts.c | 121 --- .../consts.h | 43 - .../fq.S | 88 -- .../fq.inc | 30 - .../indcpa.c | 568 ------------- .../indcpa.h | 27 - .../invntt.S | 193 ----- .../kem.c | 169 ---- .../kem.h | 35 - .../ntt.S | 189 ----- .../ntt.h | 28 - .../params.h | 68 -- .../poly.c | 519 ------------ .../poly.h | 77 -- .../polyvec.c | 307 ------- .../polyvec.h | 36 - .../reduce.h | 12 - .../rejsample.c | 398 --------- .../rejsample.h | 14 - .../shuffle.S | 255 ------ .../shuffle.inc | 25 - .../symmetric-shake.c | 74 -- .../symmetric.h | 34 - .../verify.c | 83 -- .../verify.h | 17 - .../LICENSE | 6 - .../api.h | 66 -- .../cbd.c | 128 --- .../cbd.h | 14 - .../indcpa.c | 334 -------- .../indcpa.h | 27 - .../kem.c | 169 ---- .../kem.h | 35 - .../ntt.c | 146 ---- .../ntt.h | 19 - .../params.h | 55 -- .../poly.c | 360 -------- .../poly.h | 53 -- .../polyvec.c | 246 ------ .../polyvec.h | 36 - .../reduce.c | 42 - .../reduce.h | 16 - .../symmetric-shake.c | 74 -- .../symmetric.h | 35 - .../verify.c | 75 -- .../verify.h | 17 - .../LICENSE | 6 - .../align.h | 19 - .../api.h | 66 -- .../basemul.S | 105 --- .../cbd.c | 144 ---- .../cbd.h | 15 - .../consts.c | 121 --- .../consts.h | 43 - .../fq.S | 88 -- .../fq.inc | 30 - .../indcpa.c | 568 ------------- .../indcpa.h | 27 - .../invntt.S | 193 ----- .../kem.c | 169 ---- .../kem.h | 35 - .../ntt.S | 189 ----- .../ntt.h | 28 - .../params.h | 68 -- .../poly.c | 519 ------------ .../poly.h | 77 -- .../polyvec.c | 307 ------- .../polyvec.h | 36 - .../reduce.h | 12 - .../rejsample.c | 398 --------- .../rejsample.h | 14 - .../shuffle.S | 255 ------ .../shuffle.inc | 25 - .../symmetric-shake.c | 74 -- .../symmetric.h | 34 - .../verify.c | 83 -- .../verify.h | 17 - .../LICENSE | 6 - .../api.h | 66 -- .../cbd.c | 128 --- .../cbd.h | 14 - .../indcpa.c | 334 -------- .../indcpa.h | 27 - .../kem.c | 169 ---- .../kem.h | 35 - .../ntt.c | 146 ---- .../ntt.h | 19 - .../params.h | 55 -- .../poly.c | 360 -------- .../poly.h | 53 -- .../polyvec.c | 246 ------ .../polyvec.h | 36 - .../reduce.c | 42 - .../reduce.h | 16 - .../symmetric-shake.c | 74 -- .../symmetric.h | 35 - .../verify.c | 75 -- .../verify.h | 17 - src/oqsconfig.h.cmake | 3 - tests/test_binary.py | 2 +- 257 files changed, 15578 insertions(+), 17484 deletions(-) rename src/kem/ml_kem/{pqcrystals-kyber-standard_ml-kem-1024_avx2 => mlkem-native_ml-kem-1024_ref}/LICENSE (100%) create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/api.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/arith_native.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbmc.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/common.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/config.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cpucap.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/params.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/reduce.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/reduce.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/symmetric-shake.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/symmetric.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/zetas.c rename src/kem/ml_kem/{pqcrystals-kyber-standard_ml-kem-1024_ref => mlkem-native_ml-kem-512_ref}/LICENSE (100%) create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/api.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/arith_native.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbmc.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/common.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/config.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cpucap.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/params.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/reduce.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/reduce.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/symmetric-shake.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/symmetric.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/zetas.c rename src/kem/ml_kem/{pqcrystals-kyber-standard_ml-kem-512_avx2 => mlkem-native_ml-kem-768_ref}/LICENSE (100%) create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/api.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/arith_native.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbmc.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/common.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/config.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cpucap.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/params.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/reduce.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/reduce.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/symmetric-shake.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/symmetric.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.c create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.h create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/zetas.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/align.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/api.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/basemul.S delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/cbd.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/cbd.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/consts.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/consts.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/fq.S delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/fq.inc delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/indcpa.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/indcpa.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/invntt.S delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/kem.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/kem.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/ntt.S delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/ntt.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/params.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/poly.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/poly.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/polyvec.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/polyvec.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/reduce.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/rejsample.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/rejsample.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/shuffle.S delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/shuffle.inc delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/symmetric-shake.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/symmetric.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/verify.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/verify.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/api.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/cbd.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/cbd.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/indcpa.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/indcpa.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/kem.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/kem.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/ntt.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/ntt.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/params.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/poly.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/poly.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/polyvec.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/polyvec.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/reduce.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/reduce.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/symmetric-shake.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/symmetric.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/verify.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/verify.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/align.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/api.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/basemul.S delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/cbd.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/cbd.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/consts.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/consts.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/fq.S delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/fq.inc delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/indcpa.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/indcpa.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/invntt.S delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/kem.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/kem.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/ntt.S delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/ntt.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/params.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/poly.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/poly.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/polyvec.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/polyvec.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/reduce.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/rejsample.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/rejsample.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/shuffle.S delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/shuffle.inc delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/symmetric-shake.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/symmetric.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/verify.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/verify.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/LICENSE delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/api.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/cbd.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/cbd.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/indcpa.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/indcpa.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/kem.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/kem.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/ntt.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/ntt.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/params.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/poly.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/poly.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/polyvec.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/polyvec.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/reduce.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/reduce.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/symmetric-shake.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/symmetric.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/verify.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/verify.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/LICENSE delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/align.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/api.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/basemul.S delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/cbd.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/cbd.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/consts.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/consts.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/fq.S delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/fq.inc delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/indcpa.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/indcpa.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/invntt.S delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/kem.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/kem.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/ntt.S delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/ntt.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/params.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/poly.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/poly.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/polyvec.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/polyvec.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/reduce.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/rejsample.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/rejsample.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/shuffle.S delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/shuffle.inc delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/symmetric-shake.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/symmetric.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/verify.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/verify.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/LICENSE delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/api.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/cbd.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/cbd.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/indcpa.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/indcpa.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/kem.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/kem.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/ntt.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/ntt.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/params.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/poly.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/poly.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/polyvec.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/polyvec.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/reduce.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/reduce.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/symmetric-shake.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/symmetric.h delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/verify.c delete mode 100644 src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/verify.h diff --git a/.CMake/alg_support.cmake b/.CMake/alg_support.cmake index 9afa6e4b15..114ad69a5d 100644 --- a/.CMake/alg_support.cmake +++ b/.CMake/alg_support.cmake @@ -332,24 +332,6 @@ endif() endif() -if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin") -if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS)) - cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_512_avx2 "" ON "OQS_ENABLE_KEM_ml_kem_512" OFF) -endif() -endif() - -if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin") -if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS)) - cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_768_avx2 "" ON "OQS_ENABLE_KEM_ml_kem_768" OFF) -endif() -endif() - -if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin") -if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS)) - cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_1024_avx2 "" ON "OQS_ENABLE_KEM_ml_kem_1024" OFF) -endif() -endif() - if(CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux") if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS)) diff --git a/docs/algorithms/kem/ml_kem.md b/docs/algorithms/kem/ml_kem.md index d1806517ba..4fa0971fa9 100644 --- a/docs/algorithms/kem/ml_kem.md +++ b/docs/algorithms/kem/ml_kem.md @@ -7,7 +7,7 @@ - **Authors' website**: https://pq-crystals.org/kyber/ and https://csrc.nist.gov/pubs/fips/203 - **Specification version**: ML-KEM. - **Primary Source**: - - **Source**: https://github.com/pq-crystals/kyber/commit/10b478fc3cc4ff6215eb0b6a11bd758bf0929cbd with copy_from_upstream patches + - **Source**: https://github.com/bhess/mlkem-native/commit/2b650d6676bf6a3a82ab7e9ecd96acd397ca71cd - **Implementation license (SPDX-Identifier)**: CC0-1.0 or Apache-2.0 @@ -24,7 +24,6 @@ | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage?‡ | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:----------------------| | [Primary Source](#primary-source) | ref | All | All | None | True | True | False | -| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False | Are implementations chosen based on runtime CPU feature detection? **Yes**. @@ -35,7 +34,6 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**. | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| | [Primary Source](#primary-source) | ref | All | All | None | True | True | False | -| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False | Are implementations chosen based on runtime CPU feature detection? **Yes**. @@ -44,7 +42,6 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**. | Implementation source | Identifier in upstream | Supported architecture(s) | Supported operating system(s) | CPU extension(s) used | No branching-on-secrets claimed? | No branching-on-secrets checked by valgrind? | Large stack usage? | |:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------| | [Primary Source](#primary-source) | ref | All | All | None | True | True | False | -| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False | Are implementations chosen based on runtime CPU feature detection? **Yes**. diff --git a/docs/algorithms/kem/ml_kem.yml b/docs/algorithms/kem/ml_kem.yml index 81ef2b6c4a..34561915cb 100644 --- a/docs/algorithms/kem/ml_kem.yml +++ b/docs/algorithms/kem/ml_kem.yml @@ -17,8 +17,7 @@ website: https://pq-crystals.org/kyber/ and https://csrc.nist.gov/pubs/fips/203 nist-round: FIPS203 spec-version: ML-KEM primary-upstream: - source: https://github.com/pq-crystals/kyber/commit/10b478fc3cc4ff6215eb0b6a11bd758bf0929cbd - with copy_from_upstream patches + source: https://github.com/bhess/mlkem-native/commit/2b650d6676bf6a3a82ab7e9ecd96acd397ca71cd spdx-license-identifier: CC0-1.0 or Apache-2.0 parameter-sets: - name: ML-KEM-512 @@ -38,22 +37,6 @@ parameter-sets: no-secret-dependent-branching-claimed: true no-secret-dependent-branching-checked-by-valgrind: true large-stack-usage: false - - upstream: primary-upstream - upstream-id: avx2 - supported-platforms: - - architecture: x86_64 - operating_systems: - - Linux - - Darwin - required_flags: - - avx2 - - bmi2 - - popcnt - common-crypto: - - SHA3: liboqs - no-secret-dependent-branching-claimed: true - no-secret-dependent-branching-checked-by-valgrind: true - large-stack-usage: false - name: ML-KEM-768 claimed-nist-level: 3 claimed-security: IND-CCA2 @@ -71,22 +54,6 @@ parameter-sets: no-secret-dependent-branching-claimed: true no-secret-dependent-branching-checked-by-valgrind: true large-stack-usage: false - - upstream: primary-upstream - upstream-id: avx2 - supported-platforms: - - architecture: x86_64 - operating_systems: - - Linux - - Darwin - required_flags: - - avx2 - - bmi2 - - popcnt - common-crypto: - - SHA3: liboqs - no-secret-dependent-branching-claimed: true - no-secret-dependent-branching-checked-by-valgrind: true - large-stack-usage: false - name: ML-KEM-1024 claimed-nist-level: 5 claimed-security: IND-CCA2 @@ -104,19 +71,3 @@ parameter-sets: no-secret-dependent-branching-claimed: true no-secret-dependent-branching-checked-by-valgrind: true large-stack-usage: false - - upstream: primary-upstream - upstream-id: avx2 - supported-platforms: - - architecture: x86_64 - operating_systems: - - Linux - - Darwin - required_flags: - - avx2 - - bmi2 - - popcnt - common-crypto: - - SHA3: liboqs - no-secret-dependent-branching-claimed: true - no-secret-dependent-branching-checked-by-valgrind: true - large-stack-usage: false diff --git a/docs/cbom.json b/docs/cbom.json index 52cf0a0a59..a9361e3756 100644 --- a/docs/cbom.json +++ b/docs/cbom.json @@ -2,23 +2,23 @@ "$schema": "https://raw.githubusercontent.com/CycloneDX/specification/1.6/schema/bom-1.6.schema.json", "bomFormat": "CycloneDX", "specVersion": "1.6", - "serialNumber": "urn:uuid:de1355bb-9681-4a7e-8aa9-0ccc414ebe3b", + "serialNumber": "urn:uuid:d66add05-17dd-4986-8894-ed47d1e910b6", "version": 1, "metadata": { - "timestamp": "2024-11-05T12:25:53.012740+00:00", + "timestamp": "2024-12-09T14:24:28.343759+00:00", "component": { "type": "library", - "bom-ref": "pkg:github/open-quantum-safe/liboqs@69a80f8a66988521d51e94d716cff8c936c07b8d", + "bom-ref": "pkg:github/open-quantum-safe/liboqs@d0d0413dc9fff538296ab86bac492cb4bf54dedb", "name": "liboqs", - "version": "69a80f8a66988521d51e94d716cff8c936c07b8d" + "version": "d0d0413dc9fff538296ab86bac492cb4bf54dedb" } }, "components": [ { "type": "library", - "bom-ref": "pkg:github/open-quantum-safe/liboqs@69a80f8a66988521d51e94d716cff8c936c07b8d", + "bom-ref": "pkg:github/open-quantum-safe/liboqs@d0d0413dc9fff538296ab86bac492cb4bf54dedb", "name": "liboqs", - "version": "69a80f8a66988521d51e94d716cff8c936c07b8d" + "version": "d0d0413dc9fff538296ab86bac492cb4bf54dedb" }, { "type": "cryptographic-asset", @@ -1060,26 +1060,6 @@ } } }, - { - "type": "cryptographic-asset", - "bom-ref": "alg:ML-KEM-512:x86_64", - "name": "ML-KEM", - "cryptoProperties": { - "assetType": "algorithm", - "algorithmProperties": { - "parameterSetIdentifier": "ML-KEM-512", - "primitive": "kem", - "executionEnvironment": "software-plain-ram", - "cryptoFunctions": [ - "keygen", - "encapsulate", - "decapsulate" - ], - "nistQuantumSecurityLevel": 1, - "implementationPlatform": "x86_64" - } - } - }, { "type": "cryptographic-asset", "bom-ref": "alg:ML-KEM-768:generic", @@ -1100,26 +1080,6 @@ } } }, - { - "type": "cryptographic-asset", - "bom-ref": "alg:ML-KEM-768:x86_64", - "name": "ML-KEM", - "cryptoProperties": { - "assetType": "algorithm", - "algorithmProperties": { - "parameterSetIdentifier": "ML-KEM-768", - "primitive": "kem", - "executionEnvironment": "software-plain-ram", - "cryptoFunctions": [ - "keygen", - "encapsulate", - "decapsulate" - ], - "nistQuantumSecurityLevel": 3, - "implementationPlatform": "x86_64" - } - } - }, { "type": "cryptographic-asset", "bom-ref": "alg:ML-KEM-1024:generic", @@ -1140,26 +1100,6 @@ } } }, - { - "type": "cryptographic-asset", - "bom-ref": "alg:ML-KEM-1024:x86_64", - "name": "ML-KEM", - "cryptoProperties": { - "assetType": "algorithm", - "algorithmProperties": { - "parameterSetIdentifier": "ML-KEM-1024", - "primitive": "kem", - "executionEnvironment": "software-plain-ram", - "cryptoFunctions": [ - "keygen", - "encapsulate", - "decapsulate" - ], - "nistQuantumSecurityLevel": 5, - "implementationPlatform": "x86_64" - } - } - }, { "type": "cryptographic-asset", "bom-ref": "alg:sntrup761:generic", @@ -3127,7 +3067,7 @@ ], "dependencies": [ { - "ref": "pkg:github/open-quantum-safe/liboqs@69a80f8a66988521d51e94d716cff8c936c07b8d", + "ref": "pkg:github/open-quantum-safe/liboqs@d0d0413dc9fff538296ab86bac492cb4bf54dedb", "provides": [ "alg:BIKE-L1:x86_64", "alg:BIKE-L3:x86_64", @@ -3181,11 +3121,8 @@ "alg:Kyber1024:x86_64", "alg:Kyber1024:armv8-a", "alg:ML-KEM-512:generic", - "alg:ML-KEM-512:x86_64", "alg:ML-KEM-768:generic", - "alg:ML-KEM-768:x86_64", "alg:ML-KEM-1024:generic", - "alg:ML-KEM-1024:x86_64", "alg:sntrup761:generic", "alg:sntrup761:x86_64", "alg:cross-rsdp-128-balanced:generic", @@ -3605,36 +3542,18 @@ "alg:sha3" ] }, - { - "ref": "alg:ML-KEM-512:x86_64", - "dependsOn": [ - "alg:sha3" - ] - }, { "ref": "alg:ML-KEM-768:generic", "dependsOn": [ "alg:sha3" ] }, - { - "ref": "alg:ML-KEM-768:x86_64", - "dependsOn": [ - "alg:sha3" - ] - }, { "ref": "alg:ML-KEM-1024:generic", "dependsOn": [ "alg:sha3" ] }, - { - "ref": "alg:ML-KEM-1024:x86_64", - "dependsOn": [ - "alg:sha3" - ] - }, { "ref": "alg:sntrup761:generic", "dependsOn": [ diff --git a/scripts/copy_from_upstream/copy_from_upstream.yml b/scripts/copy_from_upstream/copy_from_upstream.yml index ff23a2287d..ea14e4022f 100644 --- a/scripts/copy_from_upstream/copy_from_upstream.yml +++ b/scripts/copy_from_upstream/copy_from_upstream.yml @@ -38,6 +38,13 @@ upstreams: kem_meta_path: '{pretty_name_full}_META.yml' kem_scheme_path: '.' patches: [pqcrystals-ml_kem.patch] + - + name: mlkem-native + git_url: https://github.com/bhess/mlkem-native.git + git_branch: updates + git_commit: 14141720b0149cad6c2f91b037e3e6a15882840c + kem_meta_path: '{pretty_name_full}_META.yml' + kem_scheme_path: '.' - name: pqcrystals-dilithium git_url: https://github.com/pq-crystals/dilithium.git @@ -166,7 +173,7 @@ kems: - name: ml_kem default_implementation: ref - upstream_location: pqcrystals-kyber-standard + upstream_location: mlkem-native schemes: - scheme: "512" diff --git a/src/kem/ml_kem/CMakeLists.txt b/src/kem/ml_kem/CMakeLists.txt index 14cc9b850d..f9cf7706aa 100644 --- a/src/kem/ml_kem/CMakeLists.txt +++ b/src/kem/ml_kem/CMakeLists.txt @@ -6,57 +6,30 @@ set(_ML_KEM_OBJS "") if(OQS_ENABLE_KEM_ml_kem_512) - add_library(ml_kem_512_ref OBJECT kem_ml_kem_512.c pqcrystals-kyber-standard_ml-kem-512_ref/cbd.c pqcrystals-kyber-standard_ml-kem-512_ref/indcpa.c pqcrystals-kyber-standard_ml-kem-512_ref/kem.c pqcrystals-kyber-standard_ml-kem-512_ref/ntt.c pqcrystals-kyber-standard_ml-kem-512_ref/poly.c pqcrystals-kyber-standard_ml-kem-512_ref/polyvec.c pqcrystals-kyber-standard_ml-kem-512_ref/reduce.c pqcrystals-kyber-standard_ml-kem-512_ref/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-512_ref/verify.c) - target_compile_options(ml_kem_512_ref PUBLIC -DKYBER_K=2) - target_include_directories(ml_kem_512_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-512_ref) + add_library(ml_kem_512_ref OBJECT kem_ml_kem_512.c mlkem-native_ml-kem-512_ref/cbd.c mlkem-native_ml-kem-512_ref/debug/debug.c mlkem-native_ml-kem-512_ref/indcpa.c mlkem-native_ml-kem-512_ref/kem.c mlkem-native_ml-kem-512_ref/ntt.c mlkem-native_ml-kem-512_ref/poly.c mlkem-native_ml-kem-512_ref/polyvec.c mlkem-native_ml-kem-512_ref/reduce.c mlkem-native_ml-kem-512_ref/rej_uniform.c mlkem-native_ml-kem-512_ref/symmetric-shake.c mlkem-native_ml-kem-512_ref/verify.c mlkem-native_ml-kem-512_ref/zetas.c) + target_compile_options(ml_kem_512_ref PUBLIC -DMLKEM_K=2) + target_include_directories(ml_kem_512_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-512_ref) target_include_directories(ml_kem_512_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) - target_compile_options(ml_kem_512_ref PUBLIC -DKYBER_K=2) + target_compile_options(ml_kem_512_ref PUBLIC -DMLKEM_K=2) set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $) endif() -if(OQS_ENABLE_KEM_ml_kem_512_avx2) - add_library(ml_kem_512_avx2 OBJECT pqcrystals-kyber-standard_ml-kem-512_avx2/basemul.S pqcrystals-kyber-standard_ml-kem-512_avx2/cbd.c pqcrystals-kyber-standard_ml-kem-512_avx2/consts.c pqcrystals-kyber-standard_ml-kem-512_avx2/fq.S pqcrystals-kyber-standard_ml-kem-512_avx2/indcpa.c pqcrystals-kyber-standard_ml-kem-512_avx2/invntt.S pqcrystals-kyber-standard_ml-kem-512_avx2/kem.c pqcrystals-kyber-standard_ml-kem-512_avx2/ntt.S pqcrystals-kyber-standard_ml-kem-512_avx2/poly.c pqcrystals-kyber-standard_ml-kem-512_avx2/polyvec.c pqcrystals-kyber-standard_ml-kem-512_avx2/rejsample.c pqcrystals-kyber-standard_ml-kem-512_avx2/shuffle.S pqcrystals-kyber-standard_ml-kem-512_avx2/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-512_avx2/verify.c) - target_include_directories(ml_kem_512_avx2 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-512_avx2) - target_include_directories(ml_kem_512_avx2 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) - target_compile_options(ml_kem_512_avx2 PRIVATE -mavx2 -mbmi2 -mpopcnt ) - target_compile_options(ml_kem_512_avx2 PUBLIC -DKYBER_K=2) - set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $) -endif() - if(OQS_ENABLE_KEM_ml_kem_768) - add_library(ml_kem_768_ref OBJECT kem_ml_kem_768.c pqcrystals-kyber-standard_ml-kem-768_ref/cbd.c pqcrystals-kyber-standard_ml-kem-768_ref/indcpa.c pqcrystals-kyber-standard_ml-kem-768_ref/kem.c pqcrystals-kyber-standard_ml-kem-768_ref/ntt.c pqcrystals-kyber-standard_ml-kem-768_ref/poly.c pqcrystals-kyber-standard_ml-kem-768_ref/polyvec.c pqcrystals-kyber-standard_ml-kem-768_ref/reduce.c pqcrystals-kyber-standard_ml-kem-768_ref/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-768_ref/verify.c) - target_compile_options(ml_kem_768_ref PUBLIC -DKYBER_K=3) - target_include_directories(ml_kem_768_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-768_ref) + add_library(ml_kem_768_ref OBJECT kem_ml_kem_768.c mlkem-native_ml-kem-768_ref/cbd.c mlkem-native_ml-kem-768_ref/debug/debug.c mlkem-native_ml-kem-768_ref/indcpa.c mlkem-native_ml-kem-768_ref/kem.c mlkem-native_ml-kem-768_ref/ntt.c mlkem-native_ml-kem-768_ref/poly.c mlkem-native_ml-kem-768_ref/polyvec.c mlkem-native_ml-kem-768_ref/reduce.c mlkem-native_ml-kem-768_ref/rej_uniform.c mlkem-native_ml-kem-768_ref/symmetric-shake.c mlkem-native_ml-kem-768_ref/verify.c mlkem-native_ml-kem-768_ref/zetas.c) + target_compile_options(ml_kem_768_ref PUBLIC -DMLKEM_K=3) + target_include_directories(ml_kem_768_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-768_ref) target_include_directories(ml_kem_768_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) - target_compile_options(ml_kem_768_ref PUBLIC -DKYBER_K=3) + target_compile_options(ml_kem_768_ref PUBLIC -DMLKEM_K=3) set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $) endif() -if(OQS_ENABLE_KEM_ml_kem_768_avx2) - add_library(ml_kem_768_avx2 OBJECT pqcrystals-kyber-standard_ml-kem-768_avx2/basemul.S pqcrystals-kyber-standard_ml-kem-768_avx2/cbd.c pqcrystals-kyber-standard_ml-kem-768_avx2/consts.c pqcrystals-kyber-standard_ml-kem-768_avx2/fq.S pqcrystals-kyber-standard_ml-kem-768_avx2/indcpa.c pqcrystals-kyber-standard_ml-kem-768_avx2/invntt.S pqcrystals-kyber-standard_ml-kem-768_avx2/kem.c pqcrystals-kyber-standard_ml-kem-768_avx2/ntt.S pqcrystals-kyber-standard_ml-kem-768_avx2/poly.c pqcrystals-kyber-standard_ml-kem-768_avx2/polyvec.c pqcrystals-kyber-standard_ml-kem-768_avx2/rejsample.c pqcrystals-kyber-standard_ml-kem-768_avx2/shuffle.S pqcrystals-kyber-standard_ml-kem-768_avx2/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-768_avx2/verify.c) - target_include_directories(ml_kem_768_avx2 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-768_avx2) - target_include_directories(ml_kem_768_avx2 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) - target_compile_options(ml_kem_768_avx2 PRIVATE -mavx2 -mbmi2 -mpopcnt ) - target_compile_options(ml_kem_768_avx2 PUBLIC -DKYBER_K=3) - set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $) -endif() - if(OQS_ENABLE_KEM_ml_kem_1024) - add_library(ml_kem_1024_ref OBJECT kem_ml_kem_1024.c pqcrystals-kyber-standard_ml-kem-1024_ref/cbd.c pqcrystals-kyber-standard_ml-kem-1024_ref/indcpa.c pqcrystals-kyber-standard_ml-kem-1024_ref/kem.c pqcrystals-kyber-standard_ml-kem-1024_ref/ntt.c pqcrystals-kyber-standard_ml-kem-1024_ref/poly.c pqcrystals-kyber-standard_ml-kem-1024_ref/polyvec.c pqcrystals-kyber-standard_ml-kem-1024_ref/reduce.c pqcrystals-kyber-standard_ml-kem-1024_ref/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-1024_ref/verify.c) - target_compile_options(ml_kem_1024_ref PUBLIC -DKYBER_K=4) - target_include_directories(ml_kem_1024_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-1024_ref) + add_library(ml_kem_1024_ref OBJECT kem_ml_kem_1024.c mlkem-native_ml-kem-1024_ref/cbd.c mlkem-native_ml-kem-1024_ref/debug/debug.c mlkem-native_ml-kem-1024_ref/indcpa.c mlkem-native_ml-kem-1024_ref/kem.c mlkem-native_ml-kem-1024_ref/ntt.c mlkem-native_ml-kem-1024_ref/poly.c mlkem-native_ml-kem-1024_ref/polyvec.c mlkem-native_ml-kem-1024_ref/reduce.c mlkem-native_ml-kem-1024_ref/rej_uniform.c mlkem-native_ml-kem-1024_ref/symmetric-shake.c mlkem-native_ml-kem-1024_ref/verify.c mlkem-native_ml-kem-1024_ref/zetas.c) + target_compile_options(ml_kem_1024_ref PUBLIC -DMLKEM_K=4) + target_include_directories(ml_kem_1024_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-1024_ref) target_include_directories(ml_kem_1024_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) - target_compile_options(ml_kem_1024_ref PUBLIC -DKYBER_K=4) + target_compile_options(ml_kem_1024_ref PUBLIC -DMLKEM_K=4) set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $) endif() -if(OQS_ENABLE_KEM_ml_kem_1024_avx2) - add_library(ml_kem_1024_avx2 OBJECT pqcrystals-kyber-standard_ml-kem-1024_avx2/basemul.S pqcrystals-kyber-standard_ml-kem-1024_avx2/cbd.c pqcrystals-kyber-standard_ml-kem-1024_avx2/consts.c pqcrystals-kyber-standard_ml-kem-1024_avx2/fq.S pqcrystals-kyber-standard_ml-kem-1024_avx2/indcpa.c pqcrystals-kyber-standard_ml-kem-1024_avx2/invntt.S pqcrystals-kyber-standard_ml-kem-1024_avx2/kem.c pqcrystals-kyber-standard_ml-kem-1024_avx2/ntt.S pqcrystals-kyber-standard_ml-kem-1024_avx2/poly.c pqcrystals-kyber-standard_ml-kem-1024_avx2/polyvec.c pqcrystals-kyber-standard_ml-kem-1024_avx2/rejsample.c pqcrystals-kyber-standard_ml-kem-1024_avx2/shuffle.S pqcrystals-kyber-standard_ml-kem-1024_avx2/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-1024_avx2/verify.c) - target_include_directories(ml_kem_1024_avx2 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqcrystals-kyber-standard_ml-kem-1024_avx2) - target_include_directories(ml_kem_1024_avx2 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) - target_compile_options(ml_kem_1024_avx2 PRIVATE -mavx2 -mbmi2 -mpopcnt ) - target_compile_options(ml_kem_1024_avx2 PUBLIC -DKYBER_K=4) - set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $) -endif() - set(ML_KEM_OBJS ${_ML_KEM_OBJS} PARENT_SCOPE) diff --git a/src/kem/ml_kem/kem_ml_kem_1024.c b/src/kem/ml_kem/kem_ml_kem_1024.c index bc533aef9e..c4babc3195 100644 --- a/src/kem/ml_kem/kem_ml_kem_1024.c +++ b/src/kem/ml_kem/kem_ml_kem_1024.c @@ -30,62 +30,20 @@ OQS_KEM *OQS_KEM_ml_kem_1024_new(void) { return kem; } -extern int pqcrystals_ml_kem_1024_ref_keypair(uint8_t *pk, uint8_t *sk); -extern int pqcrystals_ml_kem_1024_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -extern int pqcrystals_ml_kem_1024_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#if defined(OQS_ENABLE_KEM_ml_kem_1024_avx2) -extern int pqcrystals_ml_kem_1024_avx2_keypair(uint8_t *pk, uint8_t *sk); -extern int pqcrystals_ml_kem_1024_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -extern int pqcrystals_ml_kem_1024_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); -#endif +extern int PQCP_MLKEM_NATIVE_MLKEM1024_keypair(uint8_t *pk, uint8_t *sk); +extern int PQCP_MLKEM_NATIVE_MLKEM1024_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +extern int PQCP_MLKEM_NATIVE_MLKEM1024_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_keypair(uint8_t *public_key, uint8_t *secret_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_1024_avx2) -#if defined(OQS_DIST_BUILD) - if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { -#endif /* OQS_DIST_BUILD */ - return (OQS_STATUS) pqcrystals_ml_kem_1024_avx2_keypair(public_key, secret_key); -#if defined(OQS_DIST_BUILD) - } else { - return (OQS_STATUS) pqcrystals_ml_kem_1024_ref_keypair(public_key, secret_key); - } -#endif /* OQS_DIST_BUILD */ -#else - return (OQS_STATUS) pqcrystals_ml_kem_1024_ref_keypair(public_key, secret_key); -#endif + return (OQS_STATUS) PQCP_MLKEM_NATIVE_MLKEM1024_keypair(public_key, secret_key); } OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_1024_avx2) -#if defined(OQS_DIST_BUILD) - if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { -#endif /* OQS_DIST_BUILD */ - return (OQS_STATUS) pqcrystals_ml_kem_1024_avx2_enc(ciphertext, shared_secret, public_key); -#if defined(OQS_DIST_BUILD) - } else { - return (OQS_STATUS) pqcrystals_ml_kem_1024_ref_enc(ciphertext, shared_secret, public_key); - } -#endif /* OQS_DIST_BUILD */ -#else - return (OQS_STATUS) pqcrystals_ml_kem_1024_ref_enc(ciphertext, shared_secret, public_key); -#endif + return (OQS_STATUS) PQCP_MLKEM_NATIVE_MLKEM1024_enc(ciphertext, shared_secret, public_key); } OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_1024_avx2) -#if defined(OQS_DIST_BUILD) - if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { -#endif /* OQS_DIST_BUILD */ - return (OQS_STATUS) pqcrystals_ml_kem_1024_avx2_dec(shared_secret, ciphertext, secret_key); -#if defined(OQS_DIST_BUILD) - } else { - return (OQS_STATUS) pqcrystals_ml_kem_1024_ref_dec(shared_secret, ciphertext, secret_key); - } -#endif /* OQS_DIST_BUILD */ -#else - return (OQS_STATUS) pqcrystals_ml_kem_1024_ref_dec(shared_secret, ciphertext, secret_key); -#endif + return (OQS_STATUS) PQCP_MLKEM_NATIVE_MLKEM1024_dec(shared_secret, ciphertext, secret_key); } #endif diff --git a/src/kem/ml_kem/kem_ml_kem_512.c b/src/kem/ml_kem/kem_ml_kem_512.c index f2dcde53d2..8f52c2a3b9 100644 --- a/src/kem/ml_kem/kem_ml_kem_512.c +++ b/src/kem/ml_kem/kem_ml_kem_512.c @@ -30,62 +30,20 @@ OQS_KEM *OQS_KEM_ml_kem_512_new(void) { return kem; } -extern int pqcrystals_ml_kem_512_ref_keypair(uint8_t *pk, uint8_t *sk); -extern int pqcrystals_ml_kem_512_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -extern int pqcrystals_ml_kem_512_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#if defined(OQS_ENABLE_KEM_ml_kem_512_avx2) -extern int pqcrystals_ml_kem_512_avx2_keypair(uint8_t *pk, uint8_t *sk); -extern int pqcrystals_ml_kem_512_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -extern int pqcrystals_ml_kem_512_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); -#endif +extern int PQCP_MLKEM_NATIVE_MLKEM512_keypair(uint8_t *pk, uint8_t *sk); +extern int PQCP_MLKEM_NATIVE_MLKEM512_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +extern int PQCP_MLKEM_NATIVE_MLKEM512_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); OQS_API OQS_STATUS OQS_KEM_ml_kem_512_keypair(uint8_t *public_key, uint8_t *secret_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_512_avx2) -#if defined(OQS_DIST_BUILD) - if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { -#endif /* OQS_DIST_BUILD */ - return (OQS_STATUS) pqcrystals_ml_kem_512_avx2_keypair(public_key, secret_key); -#if defined(OQS_DIST_BUILD) - } else { - return (OQS_STATUS) pqcrystals_ml_kem_512_ref_keypair(public_key, secret_key); - } -#endif /* OQS_DIST_BUILD */ -#else - return (OQS_STATUS) pqcrystals_ml_kem_512_ref_keypair(public_key, secret_key); -#endif + return (OQS_STATUS) PQCP_MLKEM_NATIVE_MLKEM512_keypair(public_key, secret_key); } OQS_API OQS_STATUS OQS_KEM_ml_kem_512_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_512_avx2) -#if defined(OQS_DIST_BUILD) - if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { -#endif /* OQS_DIST_BUILD */ - return (OQS_STATUS) pqcrystals_ml_kem_512_avx2_enc(ciphertext, shared_secret, public_key); -#if defined(OQS_DIST_BUILD) - } else { - return (OQS_STATUS) pqcrystals_ml_kem_512_ref_enc(ciphertext, shared_secret, public_key); - } -#endif /* OQS_DIST_BUILD */ -#else - return (OQS_STATUS) pqcrystals_ml_kem_512_ref_enc(ciphertext, shared_secret, public_key); -#endif + return (OQS_STATUS) PQCP_MLKEM_NATIVE_MLKEM512_enc(ciphertext, shared_secret, public_key); } OQS_API OQS_STATUS OQS_KEM_ml_kem_512_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_512_avx2) -#if defined(OQS_DIST_BUILD) - if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { -#endif /* OQS_DIST_BUILD */ - return (OQS_STATUS) pqcrystals_ml_kem_512_avx2_dec(shared_secret, ciphertext, secret_key); -#if defined(OQS_DIST_BUILD) - } else { - return (OQS_STATUS) pqcrystals_ml_kem_512_ref_dec(shared_secret, ciphertext, secret_key); - } -#endif /* OQS_DIST_BUILD */ -#else - return (OQS_STATUS) pqcrystals_ml_kem_512_ref_dec(shared_secret, ciphertext, secret_key); -#endif + return (OQS_STATUS) PQCP_MLKEM_NATIVE_MLKEM512_dec(shared_secret, ciphertext, secret_key); } #endif diff --git a/src/kem/ml_kem/kem_ml_kem_768.c b/src/kem/ml_kem/kem_ml_kem_768.c index 14eb6ba404..fd7de0a0ff 100644 --- a/src/kem/ml_kem/kem_ml_kem_768.c +++ b/src/kem/ml_kem/kem_ml_kem_768.c @@ -30,62 +30,20 @@ OQS_KEM *OQS_KEM_ml_kem_768_new(void) { return kem; } -extern int pqcrystals_ml_kem_768_ref_keypair(uint8_t *pk, uint8_t *sk); -extern int pqcrystals_ml_kem_768_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -extern int pqcrystals_ml_kem_768_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#if defined(OQS_ENABLE_KEM_ml_kem_768_avx2) -extern int pqcrystals_ml_kem_768_avx2_keypair(uint8_t *pk, uint8_t *sk); -extern int pqcrystals_ml_kem_768_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -extern int pqcrystals_ml_kem_768_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); -#endif +extern int PQCP_MLKEM_NATIVE_MLKEM768_keypair(uint8_t *pk, uint8_t *sk); +extern int PQCP_MLKEM_NATIVE_MLKEM768_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +extern int PQCP_MLKEM_NATIVE_MLKEM768_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); OQS_API OQS_STATUS OQS_KEM_ml_kem_768_keypair(uint8_t *public_key, uint8_t *secret_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_768_avx2) -#if defined(OQS_DIST_BUILD) - if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { -#endif /* OQS_DIST_BUILD */ - return (OQS_STATUS) pqcrystals_ml_kem_768_avx2_keypair(public_key, secret_key); -#if defined(OQS_DIST_BUILD) - } else { - return (OQS_STATUS) pqcrystals_ml_kem_768_ref_keypair(public_key, secret_key); - } -#endif /* OQS_DIST_BUILD */ -#else - return (OQS_STATUS) pqcrystals_ml_kem_768_ref_keypair(public_key, secret_key); -#endif + return (OQS_STATUS) PQCP_MLKEM_NATIVE_MLKEM768_keypair(public_key, secret_key); } OQS_API OQS_STATUS OQS_KEM_ml_kem_768_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_768_avx2) -#if defined(OQS_DIST_BUILD) - if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { -#endif /* OQS_DIST_BUILD */ - return (OQS_STATUS) pqcrystals_ml_kem_768_avx2_enc(ciphertext, shared_secret, public_key); -#if defined(OQS_DIST_BUILD) - } else { - return (OQS_STATUS) pqcrystals_ml_kem_768_ref_enc(ciphertext, shared_secret, public_key); - } -#endif /* OQS_DIST_BUILD */ -#else - return (OQS_STATUS) pqcrystals_ml_kem_768_ref_enc(ciphertext, shared_secret, public_key); -#endif + return (OQS_STATUS) PQCP_MLKEM_NATIVE_MLKEM768_enc(ciphertext, shared_secret, public_key); } OQS_API OQS_STATUS OQS_KEM_ml_kem_768_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) { -#if defined(OQS_ENABLE_KEM_ml_kem_768_avx2) -#if defined(OQS_DIST_BUILD) - if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) { -#endif /* OQS_DIST_BUILD */ - return (OQS_STATUS) pqcrystals_ml_kem_768_avx2_dec(shared_secret, ciphertext, secret_key); -#if defined(OQS_DIST_BUILD) - } else { - return (OQS_STATUS) pqcrystals_ml_kem_768_ref_dec(shared_secret, ciphertext, secret_key); - } -#endif /* OQS_DIST_BUILD */ -#else - return (OQS_STATUS) pqcrystals_ml_kem_768_ref_dec(shared_secret, ciphertext, secret_key); -#endif + return (OQS_STATUS) PQCP_MLKEM_NATIVE_MLKEM768_dec(shared_secret, ciphertext, secret_key); } #endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/LICENSE b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/LICENSE similarity index 100% rename from src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/LICENSE rename to src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/LICENSE diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/api.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/api.h new file mode 100644 index 0000000000..94597323f1 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/api.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef API_H +#define API_H + +#include + +#define PQCP_MLKEM_NATIVE_MLKEM512_SECRETKEYBYTES 1632 +#define PQCP_MLKEM_NATIVE_MLKEM512_PUBLICKEYBYTES 800 +#define PQCP_MLKEM_NATIVE_MLKEM512_CIPHERTEXTBYTES 768 +#define PQCP_MLKEM_NATIVE_MLKEM512_KEYPAIRCOINBYTES 64 +#define PQCP_MLKEM_NATIVE_MLKEM512_ENCCOINBYTES 32 +#define PQCP_MLKEM_NATIVE_MLKEM512_BYTES 32 + +int PQCP_MLKEM_NATIVE_MLKEM512_keypair_derand(uint8_t *pk, uint8_t *sk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM512_keypair(uint8_t *pk, uint8_t *sk); +int PQCP_MLKEM_NATIVE_MLKEM512_enc_derand(uint8_t *ct, uint8_t *ss, + const uint8_t *pk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM512_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int PQCP_MLKEM_NATIVE_MLKEM512_dec(uint8_t *ss, const uint8_t *ct, + const uint8_t *sk); + +#define PQCP_MLKEM_NATIVE_MLKEM768_SECRETKEYBYTES 2400 +#define PQCP_MLKEM_NATIVE_MLKEM768_PUBLICKEYBYTES 1184 +#define PQCP_MLKEM_NATIVE_MLKEM768_CIPHERTEXTBYTES 1088 +#define PQCP_MLKEM_NATIVE_MLKEM768_KEYPAIRCOINBYTES 64 +#define PQCP_MLKEM_NATIVE_MLKEM768_ENCCOINBYTES 32 +#define PQCP_MLKEM_NATIVE_MLKEM768_BYTES 32 + +int PQCP_MLKEM_NATIVE_MLKEM768_keypair_derand(uint8_t *pk, uint8_t *sk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM768_keypair(uint8_t *pk, uint8_t *sk); +int PQCP_MLKEM_NATIVE_MLKEM768_enc_derand(uint8_t *ct, uint8_t *ss, + const uint8_t *pk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM768_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int PQCP_MLKEM_NATIVE_MLKEM768_dec(uint8_t *ss, const uint8_t *ct, + const uint8_t *sk); + +#define PQCP_MLKEM_NATIVE_MLKEM1024_SECRETKEYBYTES 3168 +#define PQCP_MLKEM_NATIVE_MLKEM1024_PUBLICKEYBYTES 1568 +#define PQCP_MLKEM_NATIVE_MLKEM1024_CIPHERTEXTBYTES 1568 +#define PQCP_MLKEM_NATIVE_MLKEM1024_KEYPAIRCOINBYTES 64 +#define PQCP_MLKEM_NATIVE_MLKEM1024_ENCCOINBYTES 32 +#define PQCP_MLKEM_NATIVE_MLKEM1024_BYTES 32 + +int PQCP_MLKEM_NATIVE_MLKEM1024_keypair_derand(uint8_t *pk, uint8_t *sk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM1024_keypair(uint8_t *pk, uint8_t *sk); +int PQCP_MLKEM_NATIVE_MLKEM1024_enc_derand(uint8_t *ct, uint8_t *ss, + const uint8_t *pk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM1024_enc(uint8_t *ct, uint8_t *ss, + const uint8_t *pk); +int PQCP_MLKEM_NATIVE_MLKEM1024_dec(uint8_t *ss, const uint8_t *ct, + const uint8_t *sk); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/arith_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/arith_native.h new file mode 100644 index 0000000000..b7e921323a --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/arith_native.h @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef MLKEM_ARITH_NATIVE_H +#define MLKEM_ARITH_NATIVE_H + +#include +#include "config.h" +#include "params.h" + +#if defined(MLKEM_USE_NATIVE) + +#include "poly.h" +#include "polyvec.h" +#include "profile.h" + +/* + * MLKEM native arithmetic interface + * + * This is the C<->native arithmetic interface used in this repository + * to allow for the drop-in of native code for performance critical + * components of ML-KEM. + * + * A _profile_ is a specific implementation of (part of) this interface. + * The active profile (if any) is determined in profile.h. + * + * To add a function to a profile, define MLKEM_USE_NATIVE_XXX and + * implement `static inline xxx(...)` in the profile header. + * + * The only exception is MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER. This option can + * be set if there are native implementations for all of NTT, invNTT, and + * base multiplication, and allows the native implementation to use a + * custom order of polynomial coefficients in NTT domain -- the use of such + * custom order is not an implementation-detail since the public matrix + * is generated in NTT domain. In this case, a permutation function + * poly_permute_bitrev_to_custom() needs to be provided that permutes + * polynomials in NTT domain from bitreversed to the custom order. + */ + +/* + * Those functions are meant to be trivial wrappers around + * the chosen native implementation. The are static inline + * to avoid unnecessary calls. + * The macro before each declaration controls whether a native + * implementation is present. + */ + +#if defined(MLKEM_USE_NATIVE_NTT) +/************************************************* + * Name: ntt_native + * + * Description: Computes negacyclic number-theoretic transform (NTT) of + * a polynomial in place. + * + * The input polynomial is assumed to be in normal order. + * The output polynomial is in bitreversed order, or of a + * custom order if MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER is set. + * See the documentation of MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER + * for more information. + * + * Arguments: - poly *p: pointer to in/output polynomial + **************************************************/ +static INLINE void ntt_native(poly *); +#endif /* MLKEM_USE_NATIVE_NTT */ + +#if defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER) +/* + * This must only be set if NTT, invNTT, basemul, mulcache, and + * to/from byte stream conversions all have native implementations + * that are adapted to the custom order. + */ +#if !defined(MLKEM_USE_NATIVE_NTT) || !defined(MLKEM_USE_NATIVE_INTT) || \ + !defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE) || \ + !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) || \ + !defined(MLKEM_USE_NATIVE_POLY_TOBYTES) || \ + !defined(MLKEM_USE_NATIVE_POLY_FROMBYTES) +#error \ + "Invalid native profile: MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER can only be \ +set if there are native implementations for NTT, invNTT, mulcache, basemul, \ +and to/from bytes conversions." +#endif + +/************************************************* + * Name: poly_permute_bitrev_to_custom + * + * Description: When MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER is defined, + * convert a polynomial in NTT domain from bitreversed + * order to the custom order output by the native NTT. + * + * This must only be defined if there is native code for + * all of (a) NTT, (b) invNTT, (c) basemul, (d) mulcache. + * Arguments: - poly *p: pointer to in/output polynomial + * + **************************************************/ +static INLINE void poly_permute_bitrev_to_custom(poly *); +#endif /* MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER */ + +#if defined(MLKEM_USE_NATIVE_INTT) +/************************************************* + * Name: intt_native + * + * Description: Computes inverse of negacyclic number-theoretic transform (NTT) + * of a polynomial in place. + * + * The input polynomial is in bitreversed order, or of a + * custom order if MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER is set. + * See the documentation of MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER + * for more information. + * The output polynomial is assumed to be in normal order. + * + * Arguments: - uint16_t *a: pointer to in/output polynomial + **************************************************/ +static INLINE void intt_native(poly *); +#endif /* MLKEM_USE_NATIVE_INTT */ + +#if defined(MLKEM_USE_NATIVE_POLY_REDUCE) +/************************************************* + * Name: poly_reduce_native + * + * Description: Applies modular reduction to all coefficients of a polynomial. + * + * Arguments: - poly *r: pointer to input/output polynomial + **************************************************/ +static INLINE void poly_reduce_native(poly *); +#endif /* MLKEM_USE_NATIVE_POLY_REDUCE */ + +#if defined(MLKEM_USE_NATIVE_POLY_TOMONT) +/************************************************* + * Name: poly_tomont_native + * + * Description: Inplace conversion of all coefficients of a polynomial + * from normal domain to Montgomery domain + * + * Arguments: - poly *r: pointer to input/output polynomial + **************************************************/ +static INLINE void poly_tomont_native(poly *); +#endif /* MLKEM_USE_NATIVE_POLY_TOMONT */ + +#if defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE) +/************************************************* + * Name: poly_mulcache_compute_native + * + * Description: Compute multiplication cache for a polynomial + * in NTT domain. + * + * The purpose of the multiplication cache is to + * cache repeated computations required during a + * base multiplication of polynomials in NTT domain. + * The structure of the multiplication-cache is + * implementation defined. + * + * Arguments: INPUT: + * - poly: const pointer to input polynomial. + * This must be in NTT domain and inin bitreversed order, or of + * a custom order if MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER is set. + * See the documentation of MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER + * for more information. + * OUTPUT + * - cache: pointer to multiplication cache + **************************************************/ +static INLINE void poly_mulcache_compute_native(poly_mulcache *cache, + const poly *poly); +#endif /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ + +#if defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) +/************************************************* + * Name: poly_mulcache_compute_native + * + * Description: Compute multiplication of polynomials in NTT domain. + * + * Arguments: INPUT: + * - a: First polynomial operand. + * This must be in NTT domain and inin bitreversed order, or of + * a custom order if MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER is set. + * See the documentation of MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER + * for more information. + * - b: Second polynomial operand. + * As for a. + * - b_cache: Multiplication-cache for b. + * OUTPUT + * - r: Result of the base multiplication. This is again + * in NTT domain, and of the same order as a and b. + **************************************************/ +static INLINE void polyvec_basemul_acc_montgomery_cached_native( + poly *r, const polyvec *a, const polyvec *b, + const polyvec_mulcache *b_cache); +#endif + +#if defined(MLKEM_USE_NATIVE_POLY_TOBYTES) +/************************************************* + * Name: poly_tobytes_native + * + * Description: Serialization of a polynomial. + * Signed coefficients are converted to + * unsigned form before serialization. + * + * Arguments: INPUT: + * - a: const pointer to input polynomial, + * with each coefficient in the range -Q+1 .. Q-1 + * OUTPUT + * - r: pointer to output byte array + * (of MLKEM_POLYBYTES bytes) + **************************************************/ +static INLINE void poly_tobytes_native(uint8_t r[MLKEM_POLYBYTES], + const poly *a); +#endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */ + +#if defined(MLKEM_USE_NATIVE_POLY_FROMBYTES) +/************************************************* + * Name: poly_frombytes_native + * + * Description: Serialization of a polynomial. + * Signed coefficients are converted to + * unsigned form before serialization. + * + * Arguments: INPUT: + * - r: pointer to output polynomial in NTT domain + * OUTPUT + * - a: const pointer to input byte aray + * (of MLKEM_POLYBYTES bytes) + **************************************************/ +static INLINE void poly_frombytes_native(poly *a, + const uint8_t r[MLKEM_POLYBYTES]); +#endif /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ + +#if defined(MLKEM_USE_NATIVE_REJ_UNIFORM) +/************************************************* + * Name: rej_uniform_native + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int len: requested number of 16-bit integers + * (uniform mod q). + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes. + * + * Return -1 if the native implementation does not support the input lengths. + * Otherwise, returns non-negative number of sampled 16-bit integers (at most + * len). + **************************************************/ +static INLINE int rej_uniform_native(int16_t *r, unsigned int len, + const uint8_t *buf, unsigned int buflen); +#endif /* MLKEM_USE_NATIVE_REJ_UNIFORM */ + +#endif /* MLKEM_USE_NATIVE */ +#endif /* MLKEM_ARITH_NATIVE_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.c new file mode 100644 index 0000000000..073f3c81d7 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "cbd.h" +#include +#include "params.h" + +/************************************************* + * Name: load32_littleendian + * + * Description: load 4 bytes into a 32-bit integer + * in little-endian order + * + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns 32-bit unsigned integer loaded from x + **************************************************/ +static uint32_t load32_littleendian(const uint8_t x[4]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + r |= (uint32_t)x[3] << 24; + return r; +} + +/************************************************* + * Name: load24_littleendian + * + * Description: load 3 bytes into a 32-bit integer + * in little-endian order. + * This function is only needed for ML-KEM-512 + * + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) + **************************************************/ +#if MLKEM_ETA1 == 3 +static uint32_t load24_littleendian(const uint8_t x[3]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + return r; +} +#endif + +/************************************************* + * Name: cbd2 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter eta=2 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) +{ + int i; + for (i = 0; i < MLKEM_N / 8; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(array_abs_bound(r->coeffs, 0, (8 * i - 1), 2))) + { + int j; + uint32_t t = load32_littleendian(buf + 4 * i); + uint32_t d = t & 0x55555555; + d += (t >> 1) & 0x55555555; + + for (j = 0; j < 8; j++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(array_abs_bound(r->coeffs, 0, 8 * i + j - 1, 2))) + { + const int16_t a = (d >> (4 * j + 0)) & 0x3; + const int16_t b = (d >> (4 * j + 2)) & 0x3; + r->coeffs[8 * i + j] = a - b; + } + } +} + +/************************************************* + * Name: cbd3 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter eta=3. + * This function is only needed for ML-KEM-512 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +#if MLKEM_ETA1 == 3 +static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) +{ + int i; + for (i = 0; i < MLKEM_N / 4; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 3))) + { + int j; + const uint32_t t = load24_littleendian(buf + 3 * i); + uint32_t d = t & 0x00249249; + d += (t >> 1) & 0x00249249; + d += (t >> 2) & 0x00249249; + + for (j = 0; j < 4; j++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4) + invariant(array_abs_bound(r->coeffs, 0, 4 * i + j - 1, 3))) + { + const int16_t a = (d >> (6 * j + 0)) & 0x7; + const int16_t b = (d >> (6 * j + 3)) & 0x7; + r->coeffs[4 * i + j] = a - b; + } + } +} +#endif + +void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) +{ +#if MLKEM_ETA1 == 2 + cbd2(r, buf); +#elif MLKEM_ETA1 == 3 + cbd3(r, buf); +#else +#error "This implementation requires eta1 in {2,3}" +#endif +} + +void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) +{ +#if MLKEM_ETA2 == 2 + cbd2(r, buf); +#else +#error "This implementation requires eta2 = 2" +#endif +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.h new file mode 100644 index 0000000000..4dc8635bb5 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef CBD_H +#define CBD_H + +#include +#include "params.h" +#include "poly.h" + +#define poly_cbd_eta1 MLKEM_NAMESPACE(poly_cbd_eta1) +/************************************************* + * Name: poly_cbd_eta1 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA1. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA1)) +); + +#define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2) +/************************************************* + * Name: poly_cbd_eta1 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA2. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2)) +); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbmc.h new file mode 100644 index 0000000000..317a26421b --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbmc.h @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +/*************************************************** + * Basic replacements for __CPROVER_XXX contracts + ***************************************************/ + +#include "common.h" + +#ifndef CBMC + +#define STATIC_INLINE_TESTABLE static INLINE +#define STATIC_TESTABLE static + +#define __contract__(x) +#define __loop__(x) +#define cassert(x, y) + +#else /* CBMC _is_ defined, therefore we're doing proof */ + +/* expose certain procedures to CBMC proofs that are static otherwise */ +#define STATIC_TESTABLE +#define STATIC_INLINE_TESTABLE + +#define __contract__(x) x +#define __loop__(x) x + +/* https://diffblue.github.io/cbmc/contracts-assigns.html */ +#define assigns(...) __CPROVER_assigns(__VA_ARGS__) + +/* https://diffblue.github.io/cbmc/contracts-requires-ensures.html */ +#define requires(...) __CPROVER_requires(__VA_ARGS__) +#define ensures(...) __CPROVER_ensures(__VA_ARGS__) +/* https://diffblue.github.io/cbmc/contracts-loops.html */ +#define invariant(...) __CPROVER_loop_invariant(__VA_ARGS__) +#define decreases(...) __CPROVER_decreases(__VA_ARGS__) +/* cassert to avoid confusion with in-built assert */ +#define cassert(...) __CPROVER_assert(__VA_ARGS__) +#define assume(...) __CPROVER_assume(__VA_ARGS__) + +/*************************************************** + * Macros for "expression" forms that may appear + * _inside_ top-level contracts. + ***************************************************/ + +/* + * function return value - useful inside ensures + * https://diffblue.github.io/cbmc/contracts-functions.html + */ +#define return_value (__CPROVER_return_value) + +/* + * assigns l-value targets + * https://diffblue.github.io/cbmc/contracts-assigns.html + */ +#define object_whole(...) __CPROVER_object_whole(__VA_ARGS__) +#define memory_slice(...) __CPROVER_object_upto(__VA_ARGS__) +#define same_object(...) __CPROVER_same_object(__VA_ARGS__) + +/* + * Pointer-related predicates + * https://diffblue.github.io/cbmc/contracts-memory-predicates.html + */ +#define memory_no_alias(...) __CPROVER_is_fresh(__VA_ARGS__) +#define readable(...) __CPROVER_r_ok(__VA_ARGS__) +#define writeable(...) __CPROVER_w_ok(__VA_ARGS__) + +/* + * History variables + * https://diffblue.github.io/cbmc/contracts-history-variables.html + */ +#define old(...) __CPROVER_old(__VA_ARGS__) +#define loop_entry(...) __CPROVER_loop_entry(__VA_ARGS__) + +/* + * Quantifiers + * Note that the range on qvar is _inclusive_ between qvar_lb .. qvar_ub + * https://diffblue.github.io/cbmc/contracts-quantifiers.html + */ + +/* + * Prevent clang-format from corrupting CBMC's special ==> operator + */ +/* clang-format off */ +#define forall(type, qvar, qvar_lb, qvar_ub, predicate) \ + __CPROVER_forall \ + { \ + type qvar; \ + ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==> (predicate) \ + } + +#define EXISTS(type, qvar, qvar_lb, qvar_ub, predicate) \ + __CPROVER_exists \ + { \ + type qvar; \ + ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) && (predicate) \ + } +/* clang-format on */ + +/*************************************************** + * Convenience macros for common contract patterns + ***************************************************/ + +/* + * Boolean-value predidate that asserts that "all values of array_var are in + * range value_lb .. value_ub (inclusive)" + * Example: + * array_bound(a->coeffs, 0, MLKEM_N-1, -(MLKEM_Q - 1), MLKEM_Q - 1) + * expands to + * __CPROVER_forall { int k; (0 <= k && k <= MLKEM_N-1) ==> ( (-(MLKEM_Q - + * 1) <= a->coeffs[k]) && (a->coeffs[k] <= (MLKEM_Q - 1))) } + */ + +/* + * Prevent clang-format from corrupting CBMC's special ==> operator + */ +/* clang-format off */ +#define CBMC_CONCAT_(left, right) left##right +#define CBMC_CONCAT(left, right) CBMC_CONCAT_(left, right) + +#define array_bound_core(indextype, qvar, qvar_lb, qvar_ub, array_var, \ + value_lb, value_ub) \ + __CPROVER_forall \ + { \ + indextype qvar; \ + ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==> \ + (((value_lb) <= (array_var[(qvar)])) && \ + ((array_var[(qvar)]) <= (value_ub))) \ + } + +#define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \ + array_bound_core(int, CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \ + (qvar_ub), (array_var), (value_lb), (value_ub)) + + +/* Wrapper around array_bound operating on absolute values */ +#define array_abs_bound(arr, lb, ub, k) \ + array_bound((arr), (lb), (ub), (-(k)), (k)) +/* clang-format on */ + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/common.h new file mode 100644 index 0000000000..94c29ed927 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/common.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef COMMON_H +#define COMMON_H + + +/* + * C90 does not have the inline compiler directive yet. + * We don't use it in C90 builds. + * However, in that case the compiler warns about some inline functions in + * header files not being used in every compilation unit that includes that + * header. To work around it we silence that warning in that case using + * __attribute__((unused)). + */ + +/* Do not use inline for C90 builds*/ +#if !defined(inline) +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define INLINE inline +#define ALWAYS_INLINE __attribute__((always_inline)) +#elif defined(_MSC_VER) +#define INLINE __inline +#define ALWAYS_INLINE __forceinline +#else +#define INLINE __attribute__((unused)) +#define ALWAYS_INLINE +#endif + +#else +#define INLINE inline +#define ALWAYS_INLINE __attribute__((always_inline)) +#endif + + +/* + * C90 does not have the restrict compiler directive yet. + * We don't use it in C90 builds. + */ +#if !defined(restrict) +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define RESTRICT restrict +#else +#define RESTRICT +#endif + +#else + +#define RESTRICT restrict +#endif + +#define DEFAULT_ALIGN 32 +#if defined(_WIN32) +#define ALIGN __declspec(align(DEFAULT_ALIGN)) +#define asm __asm +#else +#define asm __asm__ +#define ALIGN __attribute__((aligned(DEFAULT_ALIGN))) +#endif + +#define MLKEM_CONCAT_(left, right) left##right +#define MLKEM_CONCAT(left, right) MLKEM_CONCAT_(left, right) + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/config.h new file mode 100644 index 0000000000..370a141a65 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/config.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef CONFIG_H +#define CONFIG_H + +#include "cpucap.h" + +#if defined(MLKEM_USE_NATIVE) + +#if defined(SYS_AARCH64) +#define MLKEM_USE_NATIVE_AARCH64 +#endif /* SYS_AARCH64 */ + +#if defined(SYS_X86_64) +#define MLKEM_USE_NATIVE_X86_64 +#endif /* SYS_X86_64 */ + +#endif /* MLKEM_USE_NATIVE */ +#endif /* CONFIG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cpucap.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cpucap.h new file mode 100644 index 0000000000..cfcbbc3fe9 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cpucap.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef CPUCAP_H +#define CPUCAP_H + +/* Check if we're running on an AArch64 system. _M_ARM64 is set by MSVC. */ +#if defined(__AARCH64EL__) || defined(_M_ARM64) +#define SYS_AARCH64 +#endif + +#if defined(__x86_64__) +#define SYS_X86_64 +#if defined(__AVX2__) +#define SYS_X86_64_AVX2 +#endif +#endif /* __x86_64__ */ + +/* Check endianness */ +#if defined(__BYTE_ORDER__) + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define SYS_LITTLE_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define SYS_BIG_ENDIAN +#else /* __BYTE_ORER__ */ +#error "__BYTE_ORDER__ defined, but don't recognize value." +#endif /* __BYTE_ORER__ */ +#endif /* !defined(__BYTE_ORER__) */ + +/* If FORCE_AARCH64 is set, assert that we're indeed on an AArch64 system. */ +#if defined(FORCE_AARCH64) && !defined(SYS_AARCH64) +#error "FORCE_AARCH64 is set, but we don't seem to be on an AArch64 system." +#endif + +/* If FORCE_X86_64 is set, assert that we're indeed on an X86_64 system. */ +#if defined(FORCE_X86_64) && !defined(SYS_X86_64) +#error "FORCE_X86_64 is set, but we don't seem to be on an X86_64 system." +#endif + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.c new file mode 100644 index 0000000000..aa9b578074 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#define _ISOC99_SOURCE +#include "debug.h" +#include + +#if defined(MLKEM_DEBUG) + +static char debug_buf[256]; + +void mlkem_debug_assert(const char *file, int line, const char *description, + const int val) +{ + if (val == 0) + { + snprintf(debug_buf, sizeof(debug_buf), "Assertion failed: %s (value %d)", + description, val); + mlkem_debug_print_error(file, line, debug_buf); + exit(1); + } +} +void mlkem_debug_check_bounds(const char *file, int line, + const char *description, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive) +{ + int err = 0; + unsigned i; + for (i = 0; i < len; i++) + { + int16_t val = ptr[i]; + if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) + { + snprintf(debug_buf, sizeof(debug_buf), + "%s, index %u, value %d out of bounds (%d,%d)", description, i, + (int)val, lower_bound_exclusive, upper_bound_exclusive); + mlkem_debug_print_error(file, line, debug_buf); + err = 1; + } + } + + if (err == 1) + exit(1); +} + +void mlkem_debug_print_error(const char *file, int line, const char *msg) +{ + fprintf(stderr, "[ERROR:%s:%04d] %s\n", file, line, msg); + fflush(stderr); +} + +#else /* MLKEM_DEBUG */ + +int empty_cu_debug; + +#endif /* MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.h new file mode 100644 index 0000000000..65208771d2 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.h @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef MLKEM_DEBUG_H +#define MLKEM_DEBUG_H + +#if defined(MLKEM_DEBUG) +#include +#include +#include + +/************************************************* + * Name: mlkem_debug_assert + * + * Description: Check debug assertion + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - description: Textual description of assertion + * - val: Value asserted to be non-zero + **************************************************/ +void mlkem_debug_assert(const char *file, int line, const char *description, + const int val); + +/************************************************* + * Name: mlkem_debug_check_bounds + * + * Description: Check whether values in an array of int16_t + * are within specified bounds. + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - description: Textual description of check + * - ptr: Base of array to be checked + * - len: Number of int16_t in ptr + * - lower_bound_exclusive: Exclusive lower bound + * - upper_bound_exclusive: Exclusive upper bound + **************************************************/ +void mlkem_debug_check_bounds(const char *file, int line, + const char *description, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive); + +/* Print error message to stderr alongside file and line information */ +void mlkem_debug_print_error(const char *file, int line, const char *msg); + +/* Check assertion, calling exit() upon failure + * + * val: Value that's asserted to be non-zero + * msg: Message to print on failure + * + * Currently called CASSERT to avoid clash with CBMC assert. + */ +#define CASSERT(val, msg) \ + do \ + { \ + mlkem_debug_assert(__FILE__, __LINE__, (msg), (val)); \ + } while (0) + +/* Check absolute bounds of scalar + * val: Scalar to be checked + * abs_bound: Exclusive upper bound on absolute value to check + * msg: Message to print on failure */ +#define SCALAR_BOUND(val, abs_bound, msg) \ + CASSERT((val) > -(abs_bound) && (val) < (abs_bound), msg) + +/* Check that all coefficients in array of int16_t's are non-negative + * and below an exclusive upper bound. + * + * ptr: Base of array, expression of type int16_t* + * len: Number of int16_t in array + * high_bound: Exclusive upper bound on absolute value to check + * msg: Message to print on failure */ +#define UBOUND(ptr, len, high_bound, msg) \ + do \ + { \ + mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ + (len), -1, ((high_bound))); \ + } while (0) + +/* Check absolute bounds in array of int16_t's + * ptr: Base of array, expression of type int16_t* + * len: Number of int16_t in array + * abs_bound: Exclusive upper bound on absolute value to check + * msg: Message to print on failure */ +#define BOUND(ptr, len, abs_bound, msg) \ + do \ + { \ + mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ + (len), -(abs_bound), (abs_bound)); \ + } while (0) + +/* Check absolute bounds on coefficients in polynomial or mulcache + * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check + * abs_bound: Exclusive upper bound on absolute value to check + * msg: Message to print on failure */ +#define POLY_BOUND_MSG(ptr, abs_bound, msg) \ + BOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (abs_bound), \ + msg) + +/* Check unsigned bounds on coefficients in polynomial or mulcache + * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check + * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. + * msg: Message to print on failure */ +#define POLY_UBOUND_MSG(ptr, ubound, msg) \ + UBOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (ubound), \ + msg) + +/* Check absolute bounds on coefficients in polynomial + * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check + * abs_bound: Exclusive upper bound on absolute value to check */ +#define POLY_BOUND(ptr, abs_bound) \ + POLY_BOUND_MSG((ptr), (abs_bound), "poly absolute bound for " #ptr) + +/* Check unsigned bounds on coefficients in polynomial + * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check + * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. + */ +#define POLY_UBOUND(ptr, ubound) \ + POLY_UBOUND_MSG((ptr), (ubound), "poly unsigned bound for " #ptr) + +/* Check absolute bounds on coefficients in vector of polynomials + * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check + * abs_bound: Exclusive upper bound on absolute value to check */ +#define POLYVEC_BOUND(ptr, abs_bound) \ + do \ + { \ + unsigned _debug_polyvec_bound_idx; \ + for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ + _debug_polyvec_bound_idx++) \ + POLY_BOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (abs_bound), \ + "polyvec absolute bound for " #ptr ".vec[i]"); \ + } while (0) + +/* Check unsigned bounds on coefficients in vector of polynomials + * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check + * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. + */ +#define POLYVEC_UBOUND(ptr, ubound) \ + do \ + { \ + unsigned _debug_polyvec_bound_idx; \ + for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ + _debug_polyvec_bound_idx++) \ + POLY_UBOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (ubound), \ + "polyvec unsigned bound for " #ptr ".vec[i]"); \ + } while (0) + +/* Following AWS-LC to define a C99-compliant static assert */ +#define MLKEM_STATIC_ASSERT_DEFINE(cond, msg) \ + typedef struct \ + { \ + unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \ + } MLKEM_CONCAT(static_assertion_, msg) __attribute__((unused)); + +#define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \ + MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix)) +#define MLKEM_STATIC_ASSERT_ADD_LINE1(cond, line, suffix) \ + MLKEM_STATIC_ASSERT_ADD_LINE0(cond, MLKEM_CONCAT(line, suffix)) +#define MLKEM_STATIC_ASSERT_ADD_LINE2(cond, suffix) \ + MLKEM_STATIC_ASSERT_ADD_LINE1(cond, __LINE__, suffix) +#define MLKEM_STATIC_ASSERT_ADD_ERROR(cond, suffix) \ + MLKEM_STATIC_ASSERT_ADD_LINE2(cond, MLKEM_CONCAT(_error_is_, suffix)) +#define STATIC_ASSERT(cond, error) MLKEM_STATIC_ASSERT_ADD_ERROR(cond, error) + +#else /* MLKEM_DEBUG */ + +#define CASSERT(val, msg) \ + do \ + { \ + } while (0) +#define SCALAR_BOUND(val, abs_bound, msg) \ + do \ + { \ + } while (0) +#define BOUND(ptr, len, abs_bound, msg) \ + do \ + { \ + } while (0) +#define POLY_BOUND(ptr, abs_bound) \ + do \ + { \ + } while (0) +#define POLYVEC_BOUND(ptr, abs_bound) \ + do \ + { \ + } while (0) +#define POLY_BOUND_MSG(ptr, ubound, abs_bound) \ + do \ + { \ + } while (0) +#define UBOUND(ptr, len, high_bound, msg) \ + do \ + { \ + } while (0) +#define POLY_UBOUND(ptr, ubound) \ + do \ + { \ + } while (0) +#define POLYVEC_UBOUND(ptr, ubound) \ + do \ + { \ + } while (0) +#define POLY_UBOUND_MSG(ptr, ubound, msg) \ + do \ + { \ + } while (0) +#define STATIC_ASSERT(cond, error) + +#endif /* MLKEM_DEBUG */ + +#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.c new file mode 100644 index 0000000000..669460c29c --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.c @@ -0,0 +1,577 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "indcpa.h" +#include +#include +#include +#include "fips202.h" +#include "fips202x4.h" +#include "indcpa.h" +#include "ntt.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include "randombytes.h" +#include "rej_uniform.h" +#include "symmetric.h" + +#include "arith_native.h" +#include "debug/debug.h" + +#include "cbmc.h" + + +/************************************************* + * Name: pack_pk + * + * Description: Serialize the public key as concatenation of the + * serialized vector of polynomials pk + * and the public seed used to generate the matrix A. + * + * Arguments: uint8_t *r: pointer to the output serialized public key + * polyvec *pk: pointer to the input public-key polyvec. + * Must have coefficients within [0,..,q-1]. + * const uint8_t *seed: pointer to the input public seed + **************************************************/ +static void pack_pk(uint8_t r[MLKEM_INDCPA_PUBLICKEYBYTES], polyvec *pk, + const uint8_t seed[MLKEM_SYMBYTES]) +{ + POLYVEC_BOUND(pk, MLKEM_Q); + polyvec_tobytes(r, pk); + memcpy(r + MLKEM_POLYVECBYTES, seed, MLKEM_SYMBYTES); +} + +/************************************************* + * Name: unpack_pk + * + * Description: De-serialize public key from a byte array; + * approximate inverse of pack_pk + * + * Arguments: - polyvec *pk: pointer to output public-key polynomial vector + * Coefficients will be normalized to [0,..,q-1]. + * - uint8_t *seed: pointer to output seed to generate matrix A + * - const uint8_t *packedpk: pointer to input serialized public + * key. + **************************************************/ +static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], + const uint8_t packedpk[MLKEM_INDCPA_PUBLICKEYBYTES]) +{ + polyvec_frombytes(pk, packedpk); + memcpy(seed, packedpk + MLKEM_POLYVECBYTES, MLKEM_SYMBYTES); + + /* + * TODO! We know from the modulus check that this will result in an + * unsigned canonical polynomial, but CBMC does not know it. We should + * weaken the specification of `unpack_pk()` and all depending functions + * to work with the weaker 4096-bound, so that the proofs go through + * without the need of this redundant call to polyvec_reduce(). + */ + polyvec_reduce(pk); +} + +/************************************************* + * Name: pack_sk + * + * Description: Serialize the secret key + * + * Arguments: - uint8_t *r: pointer to output serialized secret key + * - polyvec *sk: pointer to input vector of polynomials (secret + *key) + **************************************************/ +static void pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], polyvec *sk) +{ + POLYVEC_BOUND(sk, MLKEM_Q); + polyvec_tobytes(r, sk); +} + +/************************************************* + * Name: unpack_sk + * + * Description: De-serialize the secret key; inverse of pack_sk + * + * Arguments: - polyvec *sk: pointer to output vector of polynomials (secret + *key) + * - const uint8_t *packedsk: pointer to input serialized secret + *key + **************************************************/ +static void unpack_sk(polyvec *sk, + const uint8_t packedsk[MLKEM_INDCPA_SECRETKEYBYTES]) +{ + polyvec_frombytes(sk, packedsk); + polyvec_reduce(sk); +} + +/************************************************* + * Name: pack_ciphertext + * + * Description: Serialize the ciphertext as concatenation of the + * compressed and serialized vector of polynomials b + * and the compressed and serialized polynomial v + * + * Arguments: uint8_t *r: pointer to the output serialized ciphertext + * poly *pk: pointer to the input vector of polynomials b + * poly *v: pointer to the input polynomial v + **************************************************/ +static void pack_ciphertext(uint8_t r[MLKEM_INDCPA_BYTES], polyvec *b, poly *v) +{ + polyvec_compress_du(r, b); + poly_compress_dv(r + MLKEM_POLYVECCOMPRESSEDBYTES_DU, v); +} + +/************************************************* + * Name: unpack_ciphertext + * + * Description: De-serialize and decompress ciphertext from a byte array; + * approximate inverse of pack_ciphertext + * + * Arguments: - polyvec *b: pointer to the output vector of polynomials b + * - poly *v: pointer to the output polynomial v + * - const uint8_t *c: pointer to the input serialized ciphertext + **************************************************/ +static void unpack_ciphertext(polyvec *b, poly *v, + const uint8_t c[MLKEM_INDCPA_BYTES]) +{ + polyvec_decompress_du(b, c); + poly_decompress_dv(v, c + MLKEM_POLYVECCOMPRESSEDBYTES_DU); +} + +#ifndef MLKEM_GEN_MATRIX_NBLOCKS +#define MLKEM_GEN_MATRIX_NBLOCKS \ + ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + SHAKE128_RATE) / SHAKE128_RATE) +#endif + +/* + * Generate four A matrix entries from a seed, using rejection + * sampling on the output of a XOF. + */ +STATIC_TESTABLE +void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4]) +__contract__( + requires(memory_no_alias(vec, sizeof(poly) * 4)) + requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) + requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) + assigns(memory_slice(vec, sizeof(poly) * 4)) + ensures(array_bound(vec[0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))) + ensures(array_bound(vec[1].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))) + ensures(array_bound(vec[2].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))) + ensures(array_bound(vec[3].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))) +{ + /* Temporary buffers for XOF output before rejection sampling */ + uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE]; + uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE]; + uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE]; + uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE]; + + /* Tracks the number of coefficients we have already sampled */ + unsigned int ctr[KECCAK_WAY]; + shake128x4incctx statex; + unsigned int buflen; + + shake128x4_inc_init(&statex); + + /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ + shake128x4_absorb_once(&statex, seed[0], seed[1], seed[2], seed[3], + MLKEM_SYMBYTES + 2); + + /* + * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + * This should generate the matrix entries with high probability. + */ + shake128x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, + &statex); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE; + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); + + /* + * So long as not all matrix entries have been generated, squeeze + * one more block a time until we're done. + */ + buflen = SHAKE128_RATE; + while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || + ctr[3] < MLKEM_N) + __loop__( + assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), + object_whole(buf1), object_whole(buf2), object_whole(buf3)) + invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) + invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) + invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0] - 1, 0, (MLKEM_Q - 1))) + invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1] - 1, 0, (MLKEM_Q - 1))) + invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2] - 1, 0, (MLKEM_Q - 1))) + invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3] - 1, 0, (MLKEM_Q - 1)))) + { + shake128x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); + } + + shake128x4_inc_ctx_release(&statex); +} + +/* + * Generate a single A matrix entry from a seed, using rejection + * sampling on the output of a XOF. + */ +STATIC_TESTABLE +void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) +__contract__( + requires(memory_no_alias(entry, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) + assigns(memory_slice(entry, sizeof(poly))) + ensures(array_bound(entry->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))) +{ + shake128incctx state; + uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE]; + unsigned int ctr, buflen; + + shake128_inc_init(&state); + + shake128_absorb_once(&state, seed, MLKEM_SYMBYTES + 2); + + /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + */ + /* This should generate the matrix entry with high probability. */ + shake128_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE; + ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); + + /* Squeeze + sample one more block a time until we're done */ + buflen = SHAKE128_RATE; + while (ctr < MLKEM_N) + __loop__( + assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) + invariant(0 <= ctr && ctr <= MLKEM_N) + invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr - 1, + 0, (MLKEM_Q - 1)))) + { + shake128_squeezeblocks(buf, 1, &state); + ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, SHAKE128_RATE); + } + + shake128_inc_ctx_release(&state); +} + +/************************************************* + * Name: gen_matrix + * + * Description: Deterministically generate matrix A (or the transpose of A) + * from a seed. Entries of the matrix are polynomials that look + * uniformly random. Performs rejection sampling on output of + * a XOF + * + * Arguments: - polyvec *a: pointer to ouptput matrix A + * - const uint8_t *seed: pointer to input seed + * - int transposed: boolean deciding whether A or A^T is generated + **************************************************/ +/* Not static for benchmarking */ +void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) +{ + int i; + unsigned int j; + /* + * We generate four separate seed arrays rather than a single one to work + * around limitations in CBMC function contracts dealing with disjoint slices + * of the same parent object. + */ + + ALIGN uint8_t seed0[MLKEM_SYMBYTES + 2]; + ALIGN uint8_t seed1[MLKEM_SYMBYTES + 2]; + ALIGN uint8_t seed2[MLKEM_SYMBYTES + 2]; + ALIGN uint8_t seed3[MLKEM_SYMBYTES + 2]; + uint8_t *seedxy[4]; + seedxy[0] = seed0; + seedxy[1] = seed1; + seedxy[2] = seed2; + seedxy[3] = seed3; + + for (j = 0; j < KECCAK_WAY; j++) + { + memcpy(seedxy[j], seed, MLKEM_SYMBYTES); + } + + for (i = 0; i < (MLKEM_K * MLKEM_K / KECCAK_WAY) * KECCAK_WAY; + i += KECCAK_WAY) + { + uint8_t x, y; + + for (j = 0; j < KECCAK_WAY; j++) + { + x = (i + j) / MLKEM_K; + y = (i + j) % MLKEM_K; + if (transposed) + { + seedxy[j][MLKEM_SYMBYTES + 0] = x; + seedxy[j][MLKEM_SYMBYTES + 1] = y; + } + else + { + seedxy[j][MLKEM_SYMBYTES + 0] = y; + seedxy[j][MLKEM_SYMBYTES + 1] = x; + } + } + + /* + * This call writes across polyvec boundaries for K=2 and K=3. + * This is intentional and safe. + */ + gen_matrix_entry_x4(&a[0].vec[0] + i, seedxy); + } + + /* For left over polynomial, we use single keccak. */ + if (i < MLKEM_K * MLKEM_K) + { + uint8_t x, y; + x = i / MLKEM_K; + y = i % MLKEM_K; + + if (transposed) + { + seed0[MLKEM_SYMBYTES + 0] = x; + seed0[MLKEM_SYMBYTES + 1] = y; + } + else + { + seed0[MLKEM_SYMBYTES + 0] = y; + seed0[MLKEM_SYMBYTES + 1] = x; + } + + gen_matrix_entry(&a[0].vec[0] + i, seed0); + i++; + } + + cassert(i == MLKEM_K * MLKEM_K, + "gen_matrix: failed to generate whole matrix"); + +#if defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER) + /* + * The public matrix is generated in NTT domain. If the native backend + * uses a custom order in NTT domain, permute A accordingly. + */ + for (i = 0; i < MLKEM_K; i++) + { + for (j = 0; j < MLKEM_K; j++) + { + poly_permute_bitrev_to_custom(&a[i].vec[j]); + } + } +#endif /* MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER */ +} + +/************************************************* + * Name: matvec_mul + * + * Description: Computes matrix-vector product in NTT domain, + * via Montgomery multiplication. + * + * Arguments: - polyvec *out: Pointer to output polynomial vector + * - polyvec a[MLKEM_K]: Input matrix. Must be in NTT domain + * and have coefficients of absolute value < MLKEM_Q. + * - polyvec *v: Input polynomial vector. Must be in NTT domain. + * - polyvec *vc: Mulcache for v, computed via + * polyvec_mulcache_compute(). + **************************************************/ +STATIC_TESTABLE +void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v, + const polyvec_mulcache *vc) +__contract__( + requires(memory_no_alias(out, sizeof(polyvec))) + requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K)) + requires(memory_no_alias(v, sizeof(polyvec))) + requires(memory_no_alias(vc, sizeof(polyvec_mulcache))) + requires(forall(int, k0, 0, MLKEM_K - 1, + forall(int, k1, 0, MLKEM_K - 1, + array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))) + assigns(object_whole(out))) +{ + int i; + for (i = 0; i < MLKEM_K; i++) + __loop__( + assigns(i, object_whole(out)) + invariant(i >= 0 && i <= MLKEM_K)) + { + polyvec_basemul_acc_montgomery_cached(&out->vec[i], &a[i], v, vc); + } +} + +/************************************************* + * Name: indcpa_keypair_derand + * + * Description: Generates public and private key for the CPA-secure + * public-key encryption scheme underlying ML-KEM + * + * Arguments: - uint8_t *pk: pointer to output public key + * (of length MLKEM_INDCPA_PUBLICKEYBYTES bytes) + * - uint8_t *sk: pointer to output private key + * (of length MLKEM_INDCPA_SECRETKEYBYTES bytes) + * - const uint8_t *coins: pointer to input randomness + * (of length MLKEM_SYMBYTES bytes) + **************************************************/ + +STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0) + +void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], + uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES]) +{ + ALIGN uint8_t buf[2 * MLKEM_SYMBYTES]; + const uint8_t *publicseed = buf; + const uint8_t *noiseseed = buf + MLKEM_SYMBYTES; + polyvec a[MLKEM_K], e, pkpv, skpv; + polyvec_mulcache skpv_cache; + + ALIGN uint8_t coins_with_domain_separator[MLKEM_SYMBYTES + 1]; + /* Concatenate coins with MLKEM_K for domain separation of security levels */ + memcpy(coins_with_domain_separator, coins, MLKEM_SYMBYTES); + coins_with_domain_separator[MLKEM_SYMBYTES] = MLKEM_K; + + hash_g(buf, coins_with_domain_separator, MLKEM_SYMBYTES + 1); + + gen_matrix(a, publicseed, 0 /* no transpose */); + +#if MLKEM_K == 2 + poly_getnoise_eta1_4x(skpv.vec + 0, skpv.vec + 1, e.vec + 0, e.vec + 1, + noiseseed, 0, 1, 2, 3); +#elif MLKEM_K == 3 + /* + * Only the first three output buffers are needed. + * The laster parameter is a dummy that's overwritten later. + */ + poly_getnoise_eta1_4x(skpv.vec + 0, skpv.vec + 1, skpv.vec + 2, + pkpv.vec + 0 /* irrelevant */, noiseseed, 0, 1, 2, + 0xFF /* irrelevant */); + /* Same here */ + poly_getnoise_eta1_4x(e.vec + 0, e.vec + 1, e.vec + 2, + pkpv.vec + 0 /* irrelevant */, noiseseed, 3, 4, 5, + 0xFF /* irrelevant */); +#elif MLKEM_K == 4 + poly_getnoise_eta1_4x(skpv.vec + 0, skpv.vec + 1, skpv.vec + 2, skpv.vec + 3, + noiseseed, 0, 1, 2, 3); + poly_getnoise_eta1_4x(e.vec + 0, e.vec + 1, e.vec + 2, e.vec + 3, noiseseed, + 4, 5, 6, 7); +#endif + + polyvec_ntt(&skpv); + polyvec_ntt(&e); + + polyvec_mulcache_compute(&skpv_cache, &skpv); + matvec_mul(&pkpv, a, &skpv, &skpv_cache); + polyvec_tomont(&pkpv); + + /* Arithmetic cannot overflow, see static assertion at the top */ + polyvec_add(&pkpv, &e); + polyvec_reduce(&pkpv); + polyvec_reduce(&skpv); + + pack_sk(sk, &skpv); + pack_pk(pk, &pkpv, publicseed); +} + +/************************************************* + * Name: indcpa_enc + * + * Description: Encryption function of the CPA-secure + * public-key encryption scheme underlying Kyber. + * + * Arguments: - uint8_t *c: pointer to output ciphertext + * (of length MLKEM_INDCPA_BYTES bytes) + * - const uint8_t *m: pointer to input message + * (of length MLKEM_INDCPA_MSGBYTES bytes) + * - const uint8_t *pk: pointer to input public key + * (of length MLKEM_INDCPA_PUBLICKEYBYTES) + * - const uint8_t *coins: pointer to input random coins used as + *seed (of length MLKEM_SYMBYTES) to deterministically generate all randomness + **************************************************/ + +/* Check that the arithmetic in indcpa_enc() does not overflow */ +STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0) +STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX, + indcpa_enc_bound_1) + +void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], + const uint8_t m[MLKEM_INDCPA_MSGBYTES], + const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES]) +{ + ALIGN uint8_t seed[MLKEM_SYMBYTES]; + polyvec sp, pkpv, ep, at[MLKEM_K], b; + poly v, k, epp; + polyvec_mulcache sp_cache; + + unpack_pk(&pkpv, seed, pk); + poly_frommsg(&k, m); + gen_matrix(at, seed, 1 /* transpose */); + +#if MLKEM_K == 2 + poly_getnoise_eta1122_4x(sp.vec + 0, sp.vec + 1, ep.vec + 0, ep.vec + 1, + coins, 0, 1, 2, 3); + poly_getnoise_eta2(&epp, coins, 4); +#elif MLKEM_K == 3 + /* + * In this call, only the first three output buffers are needed. + * The last parameter is a dummy that's overwritten later. + */ + poly_getnoise_eta1_4x(sp.vec + 0, sp.vec + 1, sp.vec + 2, &b.vec[0], coins, 0, + 1, 2, 0xFF); + /* The fourth output buffer in this call _is_ used. */ + poly_getnoise_eta2_4x(ep.vec + 0, ep.vec + 1, ep.vec + 2, &epp, coins, 3, 4, + 5, 6); +#elif MLKEM_K == 4 + poly_getnoise_eta1_4x(sp.vec + 0, sp.vec + 1, sp.vec + 2, sp.vec + 3, coins, + 0, 1, 2, 3); + poly_getnoise_eta2_4x(ep.vec + 0, ep.vec + 1, ep.vec + 2, ep.vec + 3, coins, + 4, 5, 6, 7); + poly_getnoise_eta2(&epp, coins, 8); +#endif + + polyvec_ntt(&sp); + + polyvec_mulcache_compute(&sp_cache, &sp); + matvec_mul(&b, at, &sp, &sp_cache); + polyvec_basemul_acc_montgomery_cached(&v, &pkpv, &sp, &sp_cache); + + polyvec_invntt_tomont(&b); + poly_invntt_tomont(&v); + + /* Arithmetic cannot overflow, see static assertion at the top */ + polyvec_add(&b, &ep); + poly_add(&v, &epp); + poly_add(&v, &k); + + polyvec_reduce(&b); + poly_reduce(&v); + + pack_ciphertext(c, &b, &v); +} + +/* Check that the arithmetic in indcpa_dec() does not overflow */ +STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0) + +void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], + const uint8_t c[MLKEM_INDCPA_BYTES], + const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES]) +{ + polyvec b, skpv; + poly v, sb; + + unpack_ciphertext(&b, &v, c); + unpack_sk(&skpv, sk); + + polyvec_ntt(&b); + polyvec_basemul_acc_montgomery(&sb, &skpv, &b); + poly_invntt_tomont(&sb); + + /* Arithmetic cannot overflow, see static assertion at the top */ + poly_sub(&v, &sb); + poly_reduce(&v); + + poly_tomsg(m, &v); +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.h new file mode 100644 index 0000000000..3f57eb1295 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef INDCPA_H +#define INDCPA_H + +#include +#include "cbmc.h" +#include "params.h" +#include "polyvec.h" + + +#define gen_matrix MLKEM_NAMESPACE(gen_matrix) + +void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) +__contract__( + requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K)) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires(transposed == 0 || transposed == 1) + assigns(object_whole(a)) + ensures(forall(int, x, 0, MLKEM_K - 1, forall(int, y, 0, MLKEM_K - 1, + array_bound(a[x].vec[y].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))); +); + +#define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand) +void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], + uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES]) +__contract__( + requires(memory_no_alias(pk, MLKEM_INDCPA_PUBLICKEYBYTES)) + requires(memory_no_alias(sk, MLKEM_INDCPA_SECRETKEYBYTES)) + requires(memory_no_alias(coins, MLKEM_SYMBYTES)) + assigns(object_whole(pk)) + assigns(object_whole(sk)) +); + +#define indcpa_enc MLKEM_NAMESPACE(indcpa_enc) +/************************************************* + * Name: indcpa_dec + * + * Description: Decryption function of the CPA-secure + * public-key encryption scheme underlying Kyber. + * + * Arguments: - uint8_t *m: pointer to output decrypted message + * (of length MLKEM_INDCPA_MSGBYTES) + * - const uint8_t *c: pointer to input ciphertext + * (of length MLKEM_INDCPA_BYTES) + * - const uint8_t *sk: pointer to input secret key + * (of length MLKEM_INDCPA_SECRETKEYBYTES) + **************************************************/ +void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], + const uint8_t m[MLKEM_INDCPA_MSGBYTES], + const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES]) +__contract__( + requires(memory_no_alias(c, MLKEM_INDCPA_BYTES)) + requires(memory_no_alias(m, MLKEM_INDCPA_MSGBYTES)) + requires(memory_no_alias(pk, MLKEM_INDCPA_PUBLICKEYBYTES)) + requires(memory_no_alias(coins, MLKEM_SYMBYTES)) + assigns(object_whole(c)) +); + +#define indcpa_dec MLKEM_NAMESPACE(indcpa_dec) +void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], + const uint8_t c[MLKEM_INDCPA_BYTES], + const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES]) +__contract__( + requires(memory_no_alias(c, MLKEM_INDCPA_BYTES)) + requires(memory_no_alias(m, MLKEM_INDCPA_MSGBYTES)) + requires(memory_no_alias(sk, MLKEM_INDCPA_SECRETKEYBYTES)) + assigns(object_whole(m)) +); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.c new file mode 100644 index 0000000000..f84ee3f3da --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.c @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "kem.h" +#include +#include +#include +#include "indcpa.h" +#include "params.h" +#include "randombytes.h" +#include "symmetric.h" +#include "verify.h" + +#if defined(CBMC) +/* Redeclaration with contract needed for CBMC only */ +int memcmp(const void *str1, const void *str2, size_t n) +__contract__( + requires(memory_no_alias(str1, n)) + requires(memory_no_alias(str2, n)) +); +#endif + +/************************************************* + * Name: check_pk + * + * Description: Implements modulus check mandated by FIPS203, + * i.e., ensures that coefficients are in [0,q-1]. + * Described in Section 7.2 of FIPS203. + * + * Arguments: - const uint8_t *pk: pointer to input public key + * (an already allocated array of MLKEM_PUBLICKEYBYTES bytes) + ** + * Returns 0 on success, and -1 on failure + **************************************************/ +static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES]) +{ + polyvec p; + uint8_t p_reencoded[MLKEM_POLYVECBYTES]; + polyvec_frombytes(&p, pk); + polyvec_reduce(&p); + polyvec_tobytes(p_reencoded, &p); + /* Data is public, so a variable-time memcmp() is OK */ + if (memcmp(pk, p_reencoded, MLKEM_POLYVECBYTES)) + { + return -1; + } + return 0; +} + +/************************************************* + * Name: check_sk + * + * Description: Implements public key hash check mandated by FIPS203, + * i.e., ensures that + * sk[768𝑘+32 ∶ 768𝑘+64] = H(pk)= H(sk[384𝑘 : 768𝑘+32]) + * Described in Section 7.3 of FIPS203. + * + * Arguments: - const uint8_t *sk: pointer to input private key + * (an already allocated array of MLKEM_SECRETKEYBYTES bytes) + * + * Returns 0 on success, and -1 on failure + **************************************************/ +static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES]) +{ + uint8_t test[MLKEM_SYMBYTES]; + /* + * The parts of `sk` being hashed and compared here are public, so + * no public information is leaked through the runtime or the return value + * of this function. + */ + hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_PUBLICKEYBYTES); + if (memcmp(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test, + MLKEM_SYMBYTES)) + { + return -1; + } + return 0; +} + +/************************************************* + * Name: crypto_kem_keypair_derand + * + * Description: Generates public and private key + * for CCA-secure ML-KEM key encapsulation mechanism + * + * Arguments: - uint8_t *pk: pointer to output public key + * (an already allocated array of MLKEM_PUBLICKEYBYTES bytes) + * - uint8_t *sk: pointer to output private key + * (an already allocated array of MLKEM_SECRETKEYBYTES bytes) + * - uint8_t *coins: pointer to input randomness + * (an already allocated array filled with 2*MLKEM_SYMBYTES + *random bytes) + ** + * Returns 0 (success) + **************************************************/ +int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins) +{ + indcpa_keypair_derand(pk, sk, coins); + memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_PUBLICKEYBYTES); + hash_h(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk, + MLKEM_PUBLICKEYBYTES); + /* Value z for pseudo-random output on reject */ + memcpy(sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, coins + MLKEM_SYMBYTES, + MLKEM_SYMBYTES); + return 0; +} + +int crypto_kem_keypair(uint8_t *pk, uint8_t *sk) +{ + ALIGN uint8_t coins[2 * MLKEM_SYMBYTES]; + randombytes(coins, 2 * MLKEM_SYMBYTES); + crypto_kem_keypair_derand(pk, sk, coins); + return 0; +} + +int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, + const uint8_t *coins) +{ + ALIGN uint8_t buf[2 * MLKEM_SYMBYTES]; + /* Will contain key, coins */ + ALIGN uint8_t kr[2 * MLKEM_SYMBYTES]; + + if (check_pk(pk)) + { + return -1; + } + + memcpy(buf, coins, MLKEM_SYMBYTES); + + /* Multitarget countermeasure for coins + contributory KEM */ + hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_PUBLICKEYBYTES); + hash_g(kr, buf, 2 * MLKEM_SYMBYTES); + + /* coins are in kr+MLKEM_SYMBYTES */ + indcpa_enc(ct, buf, pk, kr + MLKEM_SYMBYTES); + + memcpy(ss, kr, MLKEM_SYMBYTES); + return 0; +} + +int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) +{ + ALIGN uint8_t coins[MLKEM_SYMBYTES]; + randombytes(coins, MLKEM_SYMBYTES); + return crypto_kem_enc_derand(ct, ss, pk, coins); +} + +int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) +{ + uint8_t fail; + ALIGN uint8_t buf[2 * MLKEM_SYMBYTES]; + /* Will contain key, coins */ + ALIGN uint8_t kr[2 * MLKEM_SYMBYTES]; + ALIGN uint8_t cmp[MLKEM_CIPHERTEXTBYTES + MLKEM_SYMBYTES]; + const uint8_t *pk = sk + MLKEM_INDCPA_SECRETKEYBYTES; + + if (check_sk(sk)) + { + return -1; + } + + indcpa_dec(buf, ct, sk); + + /* Multitarget countermeasure for coins + contributory KEM */ + memcpy(buf + MLKEM_SYMBYTES, sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, + MLKEM_SYMBYTES); + hash_g(kr, buf, 2 * MLKEM_SYMBYTES); + + /* coins are in kr+MLKEM_SYMBYTES */ + indcpa_enc(cmp, buf, pk, kr + MLKEM_SYMBYTES); + + fail = ct_memcmp(ct, cmp, MLKEM_CIPHERTEXTBYTES); + + /* Compute rejection key */ + rkprf(ss, sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, ct); + + /* Copy true key to return buffer if fail is 0 */ + ct_cmov_zero(ss, kr, MLKEM_SYMBYTES, fail); + + return 0; +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.h new file mode 100644 index 0000000000..6a33be7c7e --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef KEM_H +#define KEM_H + +#include +#include "cbmc.h" +#include "params.h" + +#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES +#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES +#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES +#define CRYPTO_BYTES MLKEM_SSBYTES + +#if (MLKEM_K == 2) +#define CRYPTO_ALGNAME "Kyber512" +#elif (MLKEM_K == 3) +#define CRYPTO_ALGNAME "Kyber768" +#elif (MLKEM_K == 4) +#define CRYPTO_ALGNAME "Kyber1024" +#endif + +#define crypto_kem_keypair_derand MLKEM_NAMESPACE(keypair_derand) +int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins) +__contract__( + requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES)) + requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES)) + requires(memory_no_alias(coins, 2 * MLKEM_SYMBYTES)) + assigns(object_whole(pk)) + assigns(object_whole(sk)) +); + +#define crypto_kem_keypair MLKEM_NAMESPACE(keypair) +/************************************************* + * Name: crypto_kem_keypair + * + * Description: Generates public and private key + * for CCA-secure ML-KEM key encapsulation mechanism + * + * Arguments: - uint8_t *pk: pointer to output public key + * (an already allocated array of MLKEM_PUBLICKEYBYTES bytes) + * - uint8_t *sk: pointer to output private key + * (an already allocated array of MLKEM_SECRETKEYBYTES bytes) + * + * Returns 0 (success) + **************************************************/ +int crypto_kem_keypair(uint8_t *pk, uint8_t *sk) +__contract__( + requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES)) + requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES)) + assigns(object_whole(pk)) + assigns(object_whole(sk)) +); + +#define crypto_kem_enc_derand MLKEM_NAMESPACE(enc_derand) +/************************************************* + * Name: crypto_kem_enc_derand + * + * Description: Generates cipher text and shared + * secret for given public key + * + * Arguments: - uint8_t *ct: pointer to output cipher text + * (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes) + * - uint8_t *ss: pointer to output shared secret + * (an already allocated array of MLKEM_SSBYTES bytes) + * - const uint8_t *pk: pointer to input public key + * (an already allocated array of MLKEM_PUBLICKEYBYTES bytes) + * - const uint8_t *coins: pointer to input randomness + * (an already allocated array filled with MLKEM_SYMBYTES random + *bytes) + ** + * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2 + * of FIPS203) fails. + **************************************************/ +int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, + const uint8_t *coins) +__contract__( + requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES)) + requires(memory_no_alias(ss, MLKEM_SSBYTES)) + requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES)) + requires(memory_no_alias(coins, MLKEM_SYMBYTES)) + assigns(object_whole(ct)) + assigns(object_whole(ss)) +); + +#define crypto_kem_enc MLKEM_NAMESPACE(enc) +/************************************************* + * Name: crypto_kem_enc + * + * Description: Generates cipher text and shared + * secret for given public key + * + * Arguments: - uint8_t *ct: pointer to output cipher text + * (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes) + * - uint8_t *ss: pointer to output shared secret + * (an already allocated array of MLKEM_SSBYTES bytes) + * - const uint8_t *pk: pointer to input public key + * (an already allocated array of MLKEM_PUBLICKEYBYTES bytes) + * + * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2 + * of FIPS203) fails. + **************************************************/ +int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) +__contract__( + requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES)) + requires(memory_no_alias(ss, MLKEM_SSBYTES)) + requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES)) + assigns(object_whole(ct)) + assigns(object_whole(ss)) +); + +#define crypto_kem_dec MLKEM_NAMESPACE(dec) +/************************************************* + * Name: crypto_kem_dec + * + * Description: Generates shared secret for given + * cipher text and private key + * + * Arguments: - uint8_t *ss: pointer to output shared secret + * (an already allocated array of MLKEM_SSBYTES bytes) + * - const uint8_t *ct: pointer to input cipher text + * (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes) + * - const uint8_t *sk: pointer to input private key + * (an already allocated array of MLKEM_SECRETKEYBYTES bytes) + * + * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of + * FIPS203) fails. + * + * On failure, ss will contain a pseudo-random value. + **************************************************/ +int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) +__contract__( + requires(memory_no_alias(ss, MLKEM_SSBYTES)) + requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES)) + requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES)) + assigns(object_whole(ss)) +); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.c new file mode 100644 index 0000000000..1844ca19fd --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.c @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "ntt.h" +#include +#include "params.h" +#include "reduce.h" + +#include "arith_native.h" +#include "debug/debug.h" +#include "ntt.h" + +#if !defined(MLKEM_USE_NATIVE_NTT) +/* + * Computes a block CT butterflies with a fixed twiddle factor, + * using Montgomery multiplication. + * Parameters: + * - r: Pointer to base of polynomial (_not_ the base of butterfly block) + * - root: Twiddle factor to use for the butterfly. This must be in + * Montgomery form and signed canonical. + * - start: Offset to the beginning of the butterfly block + * - len: Index difference between coefficients subject to a butterfly + * - bound: Ghost variable describing coefficient bound: Prior to `start`, + * coefficients must be bound by `bound + MLKEM_Q`. Post `start`, + * they must be bound by `bound`. + * When this function returns, output coefficients in the index range + * [start, start+2*len) have bound bumped to `bound + MLKEM_Q`. + * Example: + * - start=8, len=4 + * This would compute the following four butterflies + * 8 -- 12 + * 9 -- 13 + * 10 -- 14 + * 11 -- 15 + * - start=4, len=2 + * This would compute the following two butterflies + * 4 -- 6 + * 5 -- 7 + */ +STATIC_TESTABLE +void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, int len, + int bound) +__contract__( + requires(0 <= start && start < MLKEM_N) + requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N) + requires(0 <= bound && bound < INT16_MAX - MLKEM_Q) + requires(-HALF_Q < zeta && zeta < HALF_Q) + requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) + requires(array_abs_bound(r, 0, start - 1, bound + MLKEM_Q)) + requires(array_abs_bound(r, start, MLKEM_N - 1, bound)) + assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) + ensures(array_abs_bound(r, 0, start + 2*len - 1, bound + MLKEM_Q)) + ensures(array_abs_bound(r, start + 2 * len, MLKEM_N - 1, bound))) +{ + /* `bound` is a ghost variable only needed in the CBMC specification */ + int j; + ((void)bound); + for (j = start; j < start + len; j++) + __loop__( + invariant(start <= j && j <= start + len) + /* + * Coefficients are updated in strided pairs, so the bounds for the + * intermediate states alternate twice between the old and new bound + */ + invariant(array_abs_bound(r, 0, j - 1, bound + MLKEM_Q)) + invariant(array_abs_bound(r, j, start + len - 1, bound)) + invariant(array_abs_bound(r, start + len, j + len - 1, bound + MLKEM_Q)) + invariant(array_abs_bound(r, j + len, MLKEM_N - 1, bound))) + { + int16_t t; + t = fqmul(r[j + len], zeta); + r[j + len] = r[j] - t; + r[j] = r[j] + t; + } +} + +/* + *Compute one layer of forward NTT + * Parameters: + * - r: Pointer to base of polynomial + * - len: Stride of butterflies in this layer. + * - layer: Ghost variable indicating which layer is being applied. + * Must match `len` via `len == MLKEM_N >> layer`. + * Note: `len` could be dropped and computed in the function, but + * we are following the structure of the reference NTT from the + * official Kyber implementation here, merely adding `layer` as + * a ghost variable for the specifications. + */ +STATIC_TESTABLE +void ntt_layer(int16_t r[MLKEM_N], int len, int layer) +__contract__( + requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) + requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer)) + requires(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1)) + assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) + ensures(array_abs_bound(r, 0, MLKEM_N - 1, (layer + 1) * MLKEM_Q - 1))) +{ + int start, k; + /* `layer` is a ghost variable only needed in the CBMC specification */ + ((void)layer); + /* Twiddle factors for layer n start at index 2^(layer-1) */ + k = MLKEM_N / (2 * len); + for (start = 0; start < MLKEM_N; start += 2 * len) + __loop__( + invariant(0 <= start && start < MLKEM_N + 2 * len) + invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) + invariant(array_abs_bound(r, 0, start - 1, (layer * MLKEM_Q - 1) + MLKEM_Q)) + invariant(array_abs_bound(r, start, MLKEM_N - 1, layer * MLKEM_Q - 1))) + { + int16_t zeta = zetas[k++]; + ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q - 1); + } +} + +/* + * Compute full forward NTT + * NOTE: This particular implementation satisfies a much tighter + * bound on the output coefficients (5*q) than the contractual one (8*q), + * but this is not needed in the calling code. Should we change the + * base multiplication strategy to require smaller NTT output bounds, + * the proof may need strengthening. + * REF-CHANGE: Removed indirection poly_ntt -> ntt() + * and integrated polynomial reduction into the NTT. + */ + + +void poly_ntt(poly *p) +{ + int len, layer; + int16_t *r; + POLY_BOUND_MSG(p, MLKEM_Q, "ref ntt input"); + r = p->coeffs; + + for (len = 128, layer = 1; len >= 2; len >>= 1, layer++) + __loop__( + invariant(1 <= layer && layer <= 8 && len == (MLKEM_N >> layer)) + invariant(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1))) + { + ntt_layer(r, len, layer); + } + + /* Check the stronger bound */ + POLY_BOUND_MSG(p, NTT_BOUND, "ref ntt output"); +} +#else /* MLKEM_USE_NATIVE_NTT */ + +/* Check that bound for native NTT implies contractual bound */ +STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound) + +void poly_ntt(poly *p) +{ + POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input"); + ntt_native(p); + POLY_BOUND_MSG(p, NTT_BOUND_NATIVE, "native ntt output"); +} +#endif /* MLKEM_USE_NATIVE_NTT */ + +#if !defined(MLKEM_USE_NATIVE_INTT) + +/* Check that bound for reference invNTT implies contractual bound */ +#define INVNTT_BOUND_REF (3 * MLKEM_Q / 4) +STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound) + +/* Compute one layer of inverse NTT */ +STATIC_TESTABLE +void invntt_layer(int16_t *r, int len, int layer) +__contract__( + requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) + requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7) + requires(len == (1 << (8 - layer))) + requires(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) + ensures(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))) +{ + int start, k; + /* `layer` is a ghost variable used only in the specification */ + ((void)layer); + k = MLKEM_N / len - 1; + for (start = 0; start < MLKEM_N; start += 2 * len) + __loop__( + invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)) + invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */ + invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len)) + { + int j; + int16_t zeta = zetas[k--]; + for (j = start; j < start + len; j++) + __loop__( + invariant(start <= j && j <= start + len) + invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))) + { + int16_t t = r[j]; + r[j] = barrett_reduce(t + r[j + len]); + r[j + len] = r[j + len] - t; + r[j + len] = fqmul(r[j + len], zeta); + } + } +} + +void poly_invntt_tomont(poly *p) +{ + /* + * Scale input polynomial to account for Montgomery factor + * and NTT twist. This also brings coefficients down to + * absolute value < MLKEM_Q. + */ + int j, len, layer; + const int16_t f = 1441; + int16_t *r = p->coeffs; + + for (j = 0; j < MLKEM_N; j++) + __loop__( + invariant(0 <= j && j <= MLKEM_N) + invariant(array_abs_bound(r, 0, j - 1, MLKEM_Q))) + { + r[j] = fqmul(r[j], f); + } + + /* Run the invNTT layers */ + for (len = 2, layer = 7; len <= 128; len <<= 1, layer--) + __loop__( + invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer))) + invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))) + { + invntt_layer(p->coeffs, len, layer); + } + + POLY_BOUND_MSG(p, INVNTT_BOUND_REF, "ref intt output"); +} +#else /* MLKEM_USE_NATIVE_INTT */ + +/* Check that bound for native invNTT implies contractual bound */ +STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound) + +void poly_invntt_tomont(poly *p) +{ + intt_native(p); + POLY_BOUND_MSG(p, INVNTT_BOUND_NATIVE, "native intt output"); +} +#endif /* MLKEM_USE_NATIVE_INTT */ + +/************************************************* + * Name: basemul_cached + * + * Description: Multiplication of polynomials in Zq[X]/(X^2-zeta) + * used for multiplication of elements in Rq in NTT domain + * + * Bounds: + * - a is assumed to be < q in absolute value. + * - Return value < 3/2 q in absolute value + * + * Arguments: - int16_t r[2]: pointer to the output polynomial + * - const int16_t a[2]: pointer to the first factor + * - const int16_t b[2]: pointer to the second factor + * - int16_t b_cached: Cached precomputation of b[1] * zeta + **************************************************/ +void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], + int16_t b_cached) +{ + int32_t t0, t1; + + BOUND(a, 2, MLKEM_Q, "basemul input bound"); + + t0 = (int32_t)a[1] * b_cached; + t0 += (int32_t)a[0] * b[0]; + t1 = (int32_t)a[0] * b[1]; + t1 += (int32_t)a[1] * b[0]; + + /* |ti| < 2 * q * 2^15 */ + r[0] = montgomery_reduce(t0); + r[1] = montgomery_reduce(t1); + + /* |r[i]| < 3/2 q */ + BOUND(r, 2, 3 * MLKEM_Q / 2, "basemul output bound"); +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.h new file mode 100644 index 0000000000..0f7b30624b --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef NTT_H +#define NTT_H + +#include +#include "arith_native.h" +#include "cbmc.h" +#include "params.h" +#include "poly.h" +#include "reduce.h" + +#define zetas MLKEM_NAMESPACE(zetas) +extern const int16_t zetas[128]; + +/************************************************* + * Name: poly_ntt + * + * Description: Computes negacyclic number-theoretic transform (NTT) of + * a polynomial in place. + * + * The input is assumed to be in normal order and + * coefficient-wise bound by MLKEM_Q in absolute value. + * + * The output polynomial is in bitreversed order, and + * coefficient-wise bound by NTT_BOUND in absolute value. + * + * (NOTE: Sometimes the input to the NTT is actually smaller, + * which gives better bounds.) + * + * Arguments: - poly *p: pointer to in/output polynomial + **************************************************/ + +#define poly_ntt MLKEM_NAMESPACE(poly_ntt) +void poly_ntt(poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_Q - 1)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, NTT_BOUND - 1)) +); + +/************************************************* + * Name: poly_invntt_tomont + * + * Description: Computes inverse of negacyclic number-theoretic transform (NTT) + * of a polynomial in place; + * inputs assumed to be in bitreversed order, output in normal + * order + * + * The input is assumed to be in bitreversed order, and can + * have arbitrary coefficients in int16_t. + * + * The output polynomial is in normal order, and + * coefficient-wise bound by INVNTT_BOUND in absolute value. + * + * Arguments: - uint16_t *a: pointer to in/output polynomial + **************************************************/ +#define poly_invntt_tomont MLKEM_NAMESPACE(poly_invntt_tomont) +void poly_invntt_tomont(poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, INVNTT_BOUND - 1)) +); + +#define basemul_cached MLKEM_NAMESPACE(basemul_cached) +/************************************************************ + * Name: basemul_cached + * + * Description: Computes a representative modulo q of + * (a0*b0 + a1*b_cached, a0*b1 + a1*b0)/65536 + * + * If b_cached is b1*zeta, this represents the + * product of (a0 + a1*X) and (b0 + b1*X) in + * Fq[X]/(X^2 - zeta). + * + * Arguments: - r: Pointer to output polynomial + * Upon return, coefficients are bound by + * 3*(q+1)/2 in absolute value. + * - a: Pointer to first input polynomial + * Must be coefficient-wise < q in absolute value. + * - b: Pointer to second input polynomial + * Can have arbitrary int16_t coefficients + * - b_cached: Some precomputed value, typically derived from + * b1 and a twiddle factor. Can be an arbitary int16_t. + ************************************************************/ +void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], + int16_t b_cached) +__contract__( + requires(memory_no_alias(r, 2 * sizeof(int16_t))) + requires(memory_no_alias(a, 2 * sizeof(int16_t))) + requires(memory_no_alias(b, 2 * sizeof(int16_t))) + requires(array_abs_bound(a, 0, 1, MLKEM_Q - 1)) + assigns(memory_slice(r, 2 * sizeof(int16_t))) + ensures(array_abs_bound(r, 0, 1, (3 * HALF_Q - 1))) +); + + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/params.h new file mode 100644 index 0000000000..c5ba1aa656 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/params.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef PARAMS_H +#define PARAMS_H + +#include "common.h" +#include "cpucap.h" + +#define KECCAK_WAY 4 + + +#ifndef MLKEM_K +#define MLKEM_K 3 /* Change this for different security strengths */ +#endif + +/* Don't change parameters below this line */ +#if (MLKEM_K == 2) +#define MLKEM_NAMESPACE(s) PQCP_MLKEM_NATIVE_MLKEM512_##s +#define _MLKEM_NAMESPACE(s) _PQCP_MLKEM_NATIVE_MLKEM512_##s +#elif (MLKEM_K == 3) +#define MLKEM_NAMESPACE(s) PQCP_MLKEM_NATIVE_MLKEM768_##s +#define _MLKEM_NAMESPACE(s) _PQCP_MLKEM_NATIVE_MLKEM768_##s +#elif (MLKEM_K == 4) +#define MLKEM_NAMESPACE(s) PQCP_MLKEM_NATIVE_MLKEM1024_##s +#define _MLKEM_NAMESPACE(s) _PQCP_MLKEM_NATIVE_MLKEM1024_##s +#else +#error "MLKEM_K must be in {2,3,4}" +#endif + +#define MLKEM_N 256 +#define MLKEM_Q 3329 + +#define MLKEM_SYMBYTES 32 /* size in bytes of hashes, and seeds */ +#define MLKEM_SSBYTES 32 /* size in bytes of shared key */ + +#define MLKEM_POLYBYTES 384 +#define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES) + +#if MLKEM_K == 2 +#define MLKEM_ETA1 3 +#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 +#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) +#elif MLKEM_K == 3 +#define MLKEM_ETA1 2 +#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 +#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) +#elif MLKEM_K == 4 +#define MLKEM_ETA1 2 +#define MLKEM_POLYCOMPRESSEDBYTES_DV 160 +#define MLKEM_POLYCOMPRESSEDBYTES_DU 352 +#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) +#endif + +#define MLKEM_ETA2 2 + +#define MLKEM_INDCPA_MSGBYTES (MLKEM_SYMBYTES) +#define MLKEM_INDCPA_PUBLICKEYBYTES (MLKEM_POLYVECBYTES + MLKEM_SYMBYTES) +#define MLKEM_INDCPA_SECRETKEYBYTES (MLKEM_POLYVECBYTES) +#define MLKEM_INDCPA_BYTES \ + (MLKEM_POLYVECCOMPRESSEDBYTES_DU + MLKEM_POLYCOMPRESSEDBYTES_DV) + +#define MLKEM_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES) +/* 32 bytes of additional space to save H(pk) */ +#define MLKEM_SECRETKEYBYTES \ + (MLKEM_INDCPA_SECRETKEYBYTES + MLKEM_INDCPA_PUBLICKEYBYTES + \ + 2 * MLKEM_SYMBYTES) +#define MLKEM_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES) + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.c new file mode 100644 index 0000000000..93a663c12b --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.c @@ -0,0 +1,568 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "poly.h" +#include +#include +#include "cbd.h" +#include "cbmc.h" +#include "fips202x4.h" +#include "ntt.h" +#include "params.h" +#include "reduce.h" +#include "symmetric.h" +#include "verify.h" + +#include "arith_native.h" +#include "debug/debug.h" + +void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) +{ + int j; +#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) + for (j = 0; j < MLKEM_N / 8; j++) + __loop__(invariant(j >= 0 && j <= MLKEM_N / 8)) + { + int k; + uint16_t t[8]; + for (k = 0; k < 8; k++) + __loop__( + invariant(k >= 0 && k <= 8) + invariant(forall(int, r, 0, k - 1, t[r] < (1u << 11)))) + { + t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + } + + /* + * REF-CHANGE: Use array indexing into + * r rather than pointer-arithmetic to simplify verification + * Make all implicit truncation explicit. No data is being + * truncated for the LHS's since each t[i] is 11-bit in size. + */ + r[11 * j + 0] = (t[0] >> 0) & 0xFF; + r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); + r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); + r[11 * j + 3] = (t[2] >> 2) & 0xFF; + r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); + r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); + r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); + r[11 * j + 7] = (t[5] >> 1) & 0xFF; + r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); + r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); + r[11 * j + 10] = (t[7] >> 3); + } + +#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) + for (j = 0; j < MLKEM_N / 4; j++) + __loop__(invariant(j >= 0 && j <= MLKEM_N / 4)) + { + int k; + uint16_t t[4]; + for (k = 0; k < 4; k++) + __loop__( + invariant(k >= 0 && k <= 4) + invariant(forall(int, r, 0, k - 1, t[r] < (1u << 10)))) + { + t[k] = scalar_compress_d10(a->coeffs[4 * j + k]); + } + + /* + * REF-CHANGE: Use array indexing into + * r rather than pointer-arithmetic to simplify verification + * Make all implicit truncation explicit. No data is being + * truncated for the LHS's since each t[i] is 10-bit in size. + */ + r[5 * j + 0] = (t[0] >> 0) & 0xFF; + r[5 * j + 1] = (t[0] >> 8) | ((t[1] << 2) & 0xFF); + r[5 * j + 2] = (t[1] >> 6) | ((t[2] << 4) & 0xFF); + r[5 * j + 3] = (t[2] >> 4) | ((t[3] << 6) & 0xFF); + r[5 * j + 4] = (t[3] >> 2); + } +#else +#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" +#endif +} + + +void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +{ + int j; +#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) + for (j = 0; j < MLKEM_N / 8; j++) + __loop__( + invariant(0 <= j && j <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, 8 * j - 1, 0, (MLKEM_Q - 1)))) + { + int k; + uint16_t t[8]; + uint8_t const *base = &a[11 * j]; + t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); + t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); + t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | + ((uint16_t)base[4] << 10)); + t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); + t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); + t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | + ((uint16_t)base[8] << 9)); + t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); + t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); + + for (k = 0; k < 8; k++) + __loop__( + invariant(0 <= k && k <= 8) + invariant(array_bound(r->coeffs, 0, 8 * j + k - 1, 0, (MLKEM_Q - 1)))) + { + r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); + } + } +#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) + for (j = 0; j < MLKEM_N / 4; j++) + __loop__( + invariant(0 <= j && j <= MLKEM_N / 4) + invariant(array_bound(r->coeffs, 0, 4 * j - 1, 0, (MLKEM_Q - 1)))) + { + int k; + uint16_t t[4]; + uint8_t const *base = &a[5 * j]; + + t[0] = 0x3FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); + t[1] = 0x3FF & ((base[1] >> 2) | ((uint16_t)base[2] << 6)); + t[2] = 0x3FF & ((base[2] >> 4) | ((uint16_t)base[3] << 4)); + t[3] = 0x3FF & ((base[3] >> 6) | ((uint16_t)base[4] << 2)); + + for (k = 0; k < 4; k++) + __loop__( + invariant(0 <= k && k <= 4) + invariant(array_bound(r->coeffs, 0, 4 * j + k - 1, 0, (MLKEM_Q - 1)))) + { + r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]); + } + } +#else +#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" +#endif +} + +void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) +{ + int i; + POLY_UBOUND(a, MLKEM_Q); + +#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + { + int j; + uint8_t t[8] = {0}; + for (j = 0; j < 8; j++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(array_bound(t, 0, (j-1), 0, 15))) + { + /* REF-CHANGE: Precondition change, we assume unsigned canonical data */ + t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); + } + + /* + * REF-CHANGE: Use array indexing into + * r rather than pointer-arithmetic to simplify verification + */ + r[i * 4] = t[0] | (t[1] << 4); + r[i * 4 + 1] = t[2] | (t[3] << 4); + r[i * 4 + 2] = t[4] | (t[5] << 4); + r[i * 4 + 3] = t[6] | (t[7] << 4); + } +#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + { + int j; + uint8_t t[8] = {0}; + for (j = 0; j < 8; j++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(array_bound(t, 0, (j-1), 0, 31))) + { + /* REF-CHANGE: Precondition change, we assume unsigned canonical data */ + t[j] = scalar_compress_d5(a->coeffs[8 * i + j]); + } + + /* + * REF-CHANGE: Explicitly truncate to avoid warning about + * implicit truncation in CBMC, and use array indexing into + * r rather than pointer-arithmetic to simplify verification + */ + r[i * 5] = 0xFF & ((t[0] >> 0) | (t[1] << 5)); + r[i * 5 + 1] = 0xFF & ((t[1] >> 3) | (t[2] << 2) | (t[3] << 7)); + r[i * 5 + 2] = 0xFF & ((t[3] >> 1) | (t[4] << 4)); + r[i * 5 + 3] = 0xFF & ((t[4] >> 4) | (t[5] << 1) | (t[6] << 6)); + r[i * 5 + 4] = 0xFF & ((t[6] >> 2) | (t[7] << 3)); + } +#else +#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" +#endif +} + +void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +{ + int i; +#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) + for (i = 0; i < MLKEM_N / 2; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 2) + invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, (MLKEM_Q - 1)))) + { + /* REF-CHANGE: Hoist scalar decompression into separate function */ + r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); + r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); + } +#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) + for (i = 0; i < MLKEM_N / 8; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1)))) + { + int j; + uint8_t t[8]; + const int offset = i * 5; + /* + * REF-CHANGE: Explicitly truncate to avoid warning about + * implicit truncation in CBMC and unwind loop for ease + * of proof. + */ + + /* + * Decompress 5 8-bit bytes (so 40 bits) into + * 8 5-bit values stored in t[] + */ + t[0] = 0x1F & (a[offset + 0] >> 0); + t[1] = 0x1F & ((a[offset + 0] >> 5) | (a[offset + 1] << 3)); + t[2] = 0x1F & (a[offset + 1] >> 2); + t[3] = 0x1F & ((a[offset + 1] >> 7) | (a[offset + 2] << 1)); + t[4] = 0x1F & ((a[offset + 2] >> 4) | (a[offset + 3] << 4)); + t[5] = 0x1F & (a[offset + 3] >> 1); + t[6] = 0x1F & ((a[offset + 3] >> 6) | (a[offset + 4] << 2)); + t[7] = 0x1F & (a[offset + 4] >> 3); + + /* and copy to the correct slice in r[] */ + for (j = 0; j < 8; j++) + __loop__( + invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1)))) + { + /* REF-CHANGE: Hoist scalar decompression into separate function */ + r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]); + } + } +#else +#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" +#endif + + POLY_UBOUND(r, MLKEM_Q); +} + +#if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES) +void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) +{ + unsigned int i; + POLY_UBOUND(a, MLKEM_Q); + + + for (i = 0; i < MLKEM_N / 2; i++) + __loop__(invariant(i >= 0 && i <= MLKEM_N / 2)) + { + const uint16_t t0 = a->coeffs[2 * i]; + const uint16_t t1 = a->coeffs[2 * i + 1]; + /* REF-CHANGE: Precondition change, we assume unsigned canonical data */ + + /* + * t0 and t1 are both < MLKEM_Q, so contain at most 12 bits each of + * significant data, so these can be packed into 24 bits or exactly + * 3 bytes, as follows. + */ + + /* Least significant bits 0 - 7 of t0. */ + r[3 * i + 0] = t0 & 0xFF; + + /* + * Most significant bits 8 - 11 of t0 become the least significant + * nibble of the second byte. The least significant 4 bits + * of t1 become the upper nibble of the second byte. + */ + r[3 * i + 1] = (t0 >> 8) | ((t1 << 4) & 0xF0); + + /* Bits 4 - 11 of t1 become the third byte. */ + r[3 * i + 2] = t1 >> 4; + } +} +#else /* MLKEM_USE_NATIVE_POLY_TOBYTES */ +void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) +{ + POLY_UBOUND(a, MLKEM_Q); + poly_tobytes_native(r, a); +} +#endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */ + +#if !defined(MLKEM_USE_NATIVE_POLY_FROMBYTES) +void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) +{ + int i; + for (i = 0; i < MLKEM_N / 2; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 2) + invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, 4095))) + { + /* REF-CHANGE: Introduce some locals for better readability */ + const uint8_t t0 = a[3 * i + 0]; + const uint8_t t1 = a[3 * i + 1]; + const uint8_t t2 = a[3 * i + 2]; + r->coeffs[2 * i + 0] = t0 | ((t1 << 8) & 0xFFF); + r->coeffs[2 * i + 1] = (t1 >> 4) | (t2 << 4); + } + + /* Note that the coefficients are not canonical */ + POLY_UBOUND(r, 4096); +} +#else /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ +void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) +{ + poly_frombytes_native(r, a); +} +#endif /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ + +void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) +{ + int i; +#if (MLKEM_INDCPA_MSGBYTES != MLKEM_N / 8) +#error "MLKEM_INDCPA_MSGBYTES must be equal to MLKEM_N/8 bytes!" +#endif + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1)))) + { + int j; + for (j = 0; j < 8; j++) + __loop__( + invariant(i >= 0 && i < MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1)))) + { + /* Prevent the compiler from recognizing this as a bit selection */ + uint8_t mask = value_barrier_u8(1u << j); + r->coeffs[8 * i + j] = ct_sel_int16(HALF_Q, 0, msg[i] & mask); + } + } + POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output"); +} + +void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) +{ + int i; + POLY_UBOUND(a, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + { + int j; + msg[i] = 0; + for (j = 0; j < 8; j++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)) + { + uint32_t t = scalar_compress_d1(a->coeffs[8 * i + j]); + msg[i] |= t << j; + } + } +} + +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +{ + ALIGN uint8_t buf[KECCAK_WAY][MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; + memcpy(extkey[0], seed, MLKEM_SYMBYTES); + memcpy(extkey[1], seed, MLKEM_SYMBYTES); + memcpy(extkey[2], seed, MLKEM_SYMBYTES); + memcpy(extkey[3], seed, MLKEM_SYMBYTES); + extkey[0][MLKEM_SYMBYTES] = nonce0; + extkey[1][MLKEM_SYMBYTES] = nonce1; + extkey[2][MLKEM_SYMBYTES] = nonce2; + extkey[3][MLKEM_SYMBYTES] = nonce3; + shake256x4(buf[0], buf[1], buf[2], buf[3], MLKEM_ETA1 * MLKEM_N / 4, + extkey[0], extkey[1], extkey[2], extkey[3], MLKEM_SYMBYTES + 1); + poly_cbd_eta1(r0, buf[0]); + poly_cbd_eta1(r1, buf[1]); + poly_cbd_eta1(r2, buf[2]); + poly_cbd_eta1(r3, buf[3]); + + POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0"); + POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1"); + POLY_BOUND_MSG(r2, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 2"); + POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3"); +} + +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +{ + ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; + prf(buf, sizeof(buf), seed, nonce); + poly_cbd_eta2(r, buf); + + POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output"); +} + +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +{ + ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; + memcpy(extkey[0], seed, MLKEM_SYMBYTES); + memcpy(extkey[1], seed, MLKEM_SYMBYTES); + memcpy(extkey[2], seed, MLKEM_SYMBYTES); + memcpy(extkey[3], seed, MLKEM_SYMBYTES); + extkey[0][MLKEM_SYMBYTES] = nonce0; + extkey[1][MLKEM_SYMBYTES] = nonce1; + extkey[2][MLKEM_SYMBYTES] = nonce2; + extkey[3][MLKEM_SYMBYTES] = nonce3; + +#if MLKEM_ETA1 == MLKEM_ETA2 + shake256x4(buf1[0], buf1[1], buf2[0], buf2[1], MLKEM_ETA1 * MLKEM_N / 4, + extkey[0], extkey[1], extkey[2], extkey[3], MLKEM_SYMBYTES + 1); +#else + shake256(buf1[0], sizeof(buf1[0]), extkey[0], sizeof(extkey[0])); + shake256(buf1[1], sizeof(buf1[1]), extkey[1], sizeof(extkey[1])); + shake256(buf2[0], sizeof(buf2[0]), extkey[2], sizeof(extkey[2])); + shake256(buf2[1], sizeof(buf2[1]), extkey[3], sizeof(extkey[3])); +#endif + + poly_cbd_eta1(r0, buf1[0]); + poly_cbd_eta1(r1, buf1[1]); + poly_cbd_eta2(r2, buf2[0]); + poly_cbd_eta2(r3, buf2[1]); + + POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 0"); + POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 1"); + POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2"); + POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3"); +} + +void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, + const poly_mulcache *b_cache) +{ + int i; + POLY_BOUND(b_cache, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 4; i++) + __loop__( + assigns(i, object_whole(r)) + invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), (3 * HALF_Q - 1)))) + { + basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i], + b_cache->coeffs[2 * i]); + basemul_cached(&r->coeffs[4 * i + 2], &a->coeffs[4 * i + 2], + &b->coeffs[4 * i + 2], b_cache->coeffs[2 * i + 1]); + } +} + +#if !defined(MLKEM_USE_NATIVE_POLY_TOMONT) +void poly_tomont(poly *r) +{ + int i; + const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */ + for (i = 0; i < MLKEM_N; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N) + invariant(array_abs_bound(r->coeffs ,0, (i - 1), (MLKEM_Q - 1)))) + { + r->coeffs[i] = fqmul(r->coeffs[i], f); + } + + POLY_BOUND(r, MLKEM_Q); +} +#else /* MLKEM_USE_NATIVE_POLY_TOMONT */ +void poly_tomont(poly *r) +{ + poly_tomont_native(r); + POLY_BOUND(r, MLKEM_Q); +} +#endif /* MLKEM_USE_NATIVE_POLY_TOMONT */ + +#if !defined(MLKEM_USE_NATIVE_POLY_REDUCE) +void poly_reduce(poly *r) +{ + int i; + for (i = 0; i < MLKEM_N; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N) + invariant(array_bound(r->coeffs, 0, (i - 1), 0, (MLKEM_Q - 1)))) + { + /* Barrett reduction, giving signed canonical representative */ + int16_t t = barrett_reduce(r->coeffs[i]); + /* Conditional addition to get unsigned canonical representative */ + r->coeffs[i] = scalar_signed_to_unsigned_q(t); + } + + POLY_UBOUND(r, MLKEM_Q); +} +#else /* MLKEM_USE_NATIVE_POLY_REDUCE */ +void poly_reduce(poly *r) +{ + poly_reduce_native(r); + POLY_UBOUND(r, MLKEM_Q); +} +#endif /* MLKEM_USE_NATIVE_POLY_REDUCE */ + +void poly_add(poly *r, const poly *b) +{ + int i; + for (i = 0; i < MLKEM_N; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N) + invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) + invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1]))) + { + r->coeffs[i] = r->coeffs[i] + b->coeffs[i]; + } +} + +void poly_sub(poly *r, const poly *b) +{ + int i; + for (i = 0; i < MLKEM_N; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N) + invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) + invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1]))) + { + r->coeffs[i] = r->coeffs[i] - b->coeffs[i]; + } +} + +#if !defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE) +void poly_mulcache_compute(poly_mulcache *x, const poly *a) +{ + int i; + for (i = 0; i < MLKEM_N / 4; i++) + __loop__(invariant(i >= 0 && i <= MLKEM_N / 4)) + { + x->coeffs[2 * i + 0] = fqmul(a->coeffs[4 * i + 1], zetas[64 + i]); + x->coeffs[2 * i + 1] = fqmul(a->coeffs[4 * i + 3], -zetas[64 + i]); + } + POLY_BOUND(x, MLKEM_Q); +} +#else /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ +void poly_mulcache_compute(poly_mulcache *x, const poly *a) +{ + poly_mulcache_compute_native(x, a); + /* Omitting POLY_BOUND(x, MLKEM_Q) since native implementations may + * decide not to use a mulcache. Note that the C backend implementation + * of poly_basemul_montgomery_cached() does still include the check. */ +} +#endif /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.h new file mode 100644 index 0000000000..35990684b6 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.h @@ -0,0 +1,773 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef POLY_H +#define POLY_H + +#include +#include +#include "cbmc.h" +#include "params.h" +#include "reduce.h" +#include "verify.h" + +/* Absolute exclusive upper bound for the output of the inverse NTT */ +#define INVNTT_BOUND (8 * MLKEM_Q) + +/* Absolute exclusive upper bound for the output of the forward NTT */ +#define NTT_BOUND (8 * MLKEM_Q) + +/* + * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial + * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1] + */ +typedef struct +{ + int16_t coeffs[MLKEM_N]; +} ALIGN poly; + +/* + * INTERNAL presentation of precomputed data speeding up + * the base multiplication of two polynomials in NTT domain. + */ +/* + * REF-CHANGE: This structure does not exist in the reference + * implementation. + */ +typedef struct +{ + int16_t coeffs[MLKEM_N >> 1]; +} poly_mulcache; + +/************************************************************ + * Name: scalar_compress_d1 + * + * Description: Computes round(u * 2 / q) + * + * Implements Compress_d from FIPS203, Eq (4.7), + * for d = 1. + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif +static INLINE uint32_t scalar_compress_d1(uint16_t u) +__contract__( + requires(u <= MLKEM_Q - 1) + ensures(return_value < 2) + ensures(return_value == (((uint32_t)u * 2 + MLKEM_Q / 2) / MLKEM_Q) % 2) ) +{ + uint32_t d0 = u << 1; + d0 *= 645083; + d0 += 1u << 30; + d0 >>= 31; + return d0; +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: scalar_compress_d4 + * + * Description: Computes round(u * 16 / q) % 16 + * + * Implements Compress_d from FIPS203, Eq (4.7), + * for d = 4. + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif +static INLINE uint32_t scalar_compress_d4(uint16_t u) +__contract__( + requires(u <= MLKEM_Q - 1) + ensures(return_value < 16) + ensures(return_value == (((uint32_t)u * 16 + MLKEM_Q / 2) / MLKEM_Q) % 16)) +{ + uint32_t d0 = (uint32_t)u * 1290160; /* 16 * round(2^28 / MLKEM_Q) */ + return (d0 + (1u << 27)) >> 28; /* round(d0/2^28) */ +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: scalar_decompress_d4 + * + * Description: Computes round(u * q / 16) + * + * Implements Decompress_d from FIPS203, Eq (4.8), + * for d = 4. + * + * Arguments: - u: Unsigned canonical modulus modulo 16 + * to be decompressed. + ************************************************************/ +static INLINE uint16_t scalar_decompress_d4(uint32_t u) +__contract__( + requires(0 <= u && u < 16) + ensures(return_value <= (MLKEM_Q - 1)) +) { return ((u * MLKEM_Q) + 8) / 16; } + +/************************************************************ + * Name: scalar_compress_d5 + * + * Description: Computes round(u * 32 / q) % 32 + * + * Implements Compress_d from FIPS203, Eq (4.7), + * for d = 5. + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif +static INLINE uint32_t scalar_compress_d5(uint16_t u) +__contract__( + requires(u <= MLKEM_Q - 1) + ensures(return_value < 32) + ensures(return_value == (((uint32_t)u * 32 + MLKEM_Q / 2) / MLKEM_Q) % 32) ) +{ + uint32_t d0 = (uint32_t)u * 1290176; /* 2^5 * round(2^27 / MLKEM_Q) */ + return (d0 + (1u << 26)) >> 27; /* round(d0/2^27) */ +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: scalar_decompress_d5 + * + * Description: Computes round(u * q / 32) + * + * Implements Decompress_d from FIPS203, Eq (4.8), + * for d = 5. + * + * Arguments: - u: Unsigned canonical modulus modulo 32 + * to be decompressed. + ************************************************************/ +static INLINE uint16_t scalar_decompress_d5(uint32_t u) +__contract__( + requires(0 <= u && u < 32) + ensures(return_value <= MLKEM_Q - 1) +) { return ((u * MLKEM_Q) + 16) / 32; } + +/************************************************************ + * Name: scalar_compress_d10 + * + * Description: Computes round(u * 2**10 / q) % 2**10 + * + * Implements Compress_d from FIPS203, Eq (4.7), + * for d = 10. + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif +static INLINE uint32_t scalar_compress_d10(uint16_t u) +__contract__( + requires(u <= MLKEM_Q - 1) + ensures(return_value < (1u << 10)) + ensures(return_value == (((uint32_t)u * (1u << 10) + MLKEM_Q / 2) / MLKEM_Q) % (1 << 10))) +{ + uint64_t d0 = (uint64_t)u * 2642263040; /* 2^10 * round(2^32 / MLKEM_Q) */ + d0 = (d0 + ((uint64_t)1u << 32)) >> 33; + return (d0 & 0x3FF); +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: scalar_decompress_d10 + * + * Description: Computes round(u * q / 1024) + * + * Implements Decompress_d from FIPS203, Eq (4.8), + * for d = 10. + * + * Arguments: - u: Unsigned canonical modulus modulo 16 + * to be decompressed. + ************************************************************/ +static INLINE uint16_t scalar_decompress_d10(uint32_t u) +__contract__( + requires(0 <= u && u < 1024) + ensures(return_value <= (MLKEM_Q - 1)) +) { return ((u * MLKEM_Q) + 512) / 1024; } + +/************************************************************ + * Name: scalar_compress_d11 + * + * Description: Computes round(u * 2**11 / q) % 2**11 + * + * Implements Compress_d from FIPS203, Eq (4.7), + * for d = 11. + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif +static INLINE uint32_t scalar_compress_d11(uint16_t u) +__contract__( + requires(u <= MLKEM_Q - 1) + ensures(return_value < (1u << 11)) + ensures(return_value == (((uint32_t)u * (1u << 11) + MLKEM_Q / 2) / MLKEM_Q) % (1 << 11))) +{ + uint64_t d0 = (uint64_t)u * 5284526080; /* 2^11 * round(2^33 / MLKEM_Q) */ + d0 = (d0 + ((uint64_t)1u << 32)) >> 33; + return (d0 & 0x7FF); +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: scalar_decompress_d11 + * + * Description: Computes round(u * q / 1024) + * + * Implements Decompress_d from FIPS203, Eq (4.8), + * for d = 10. + * + * Arguments: - u: Unsigned canonical modulus modulo 16 + * to be decompressed. + ************************************************************/ +static INLINE uint16_t scalar_decompress_d11(uint32_t u) +__contract__( + requires(0 <= u && u < 2048) + ensures(return_value <= (MLKEM_Q - 1)) +) { return ((u * MLKEM_Q) + 1024) / 2048; } + +/************************************************************ + * Name: scalar_signed_to_unsigned_q + * + * Description: converts signed polynomial coefficient + * from signed (-3328 .. 3328) form to + * unsigned form (0 .. 3328). + * + * Note: Cryptographic constant time implementation + * + * Examples: 0 -> 0 + * 1 -> 1 + * 3328 -> 3328 + * -1 -> 3328 + * -2 -> 3327 + * -3328 -> 1 + * + * Arguments: c: signed coefficient to be converted + ************************************************************/ +static INLINE uint16_t scalar_signed_to_unsigned_q(int16_t c) +__contract__( + requires(c >= -(MLKEM_Q - 1) && c <= (MLKEM_Q - 1)) + ensures(return_value >= 0 && return_value <= (MLKEM_Q - 1)) + ensures(return_value == (int32_t)c + (((int32_t)c < 0) * MLKEM_Q))) +{ + /* Add Q if c is negative, but in constant time */ + c = ct_sel_int16(c + MLKEM_Q, c, ct_cmask_neg_i16(c)); + + cassert(c >= 0, "scalar_signed_to_unsigned_q result lower bound"); + cassert(c < MLKEM_Q, "scalar_signed_to_unsigned_q result upper bound"); + + /* and therefore cast to uint16_t is safe. */ + return (uint16_t)c; +} + +#define poly_compress_du MLKEM_NAMESPACE(poly_compress_du) +/************************************************* + * Name: poly_compress_du + * + * Description: Compression (du bits) and subsequent serialization of a + *polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) +); + +#define poly_decompress_du MLKEM_NAMESPACE(poly_decompress_du) +/************************************************* + * Name: poly_decompress_du + * + * Description: De-serialization and subsequent decompression (du bits) of a + *polynomial; approximate inverse of poly_compress_du + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) +); + +#define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv) +/************************************************* + * Name: poly_compress_dv + * + * Description: Compression (dv bits) and subsequent serialization of a + *polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) + assigns(object_whole(r)) +); + +#define poly_decompress_dv MLKEM_NAMESPACE(poly_decompress_dv) +/************************************************* + * Name: poly_decompress_dv + * + * Description: De-serialization and subsequent decompression (dv bits) of a + *polynomial; approximate inverse of poly_compress + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV + *bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(object_whole(r)) + ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) +); + +#define poly_tobytes MLKEM_NAMESPACE(poly_tobytes) +/************************************************* + * Name: poly_tobytes + * + * Description: Serialization of a polynomial. + * Signed coefficients are converted to + * unsigned form before serialization. + * + * Arguments: INPUT: + * - a: const pointer to input polynomial, + * with each coefficient in the range [0,1,..,Q-1] + * OUTPUT + * - r: pointer to output byte array + * (of MLKEM_POLYBYTES bytes) + **************************************************/ +void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYBYTES)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) + assigns(object_whole(r)) +); + + +#define poly_frombytes MLKEM_NAMESPACE(poly_frombytes) +/************************************************* + * Name: poly_frombytes + * + * Description: De-serialization of a polynomial. + * + * Arguments: INPUT + * - a: pointer to input byte array + * (of MLKEM_POLYBYTES bytes) + * OUTPUT + * - r: pointer to output polynomial, with + * each coefficient unsigned and in the range + * 0 .. 4095 + **************************************************/ +void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYBYTES)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, 4095)) +); + + +#define poly_frommsg MLKEM_NAMESPACE(poly_frommsg) +/************************************************* + * Name: poly_frommsg + * + * Description: Convert 32-byte message to polynomial + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *msg: pointer to input message + **************************************************/ +void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) +__contract__( + requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(object_whole(r)) + ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) +); + +#define poly_tomsg MLKEM_NAMESPACE(poly_tomsg) +/************************************************* + * Name: poly_tomsg + * + * Description: Convert polynomial to 32-byte message + * + * Arguments: - uint8_t *msg: pointer to output message + * - const poly *r: pointer to input polynomial + * Coefficients must be unsigned canonical + **************************************************/ +void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *r) +__contract__( + requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES)) + requires(memory_no_alias(r, sizeof(poly))) + requires(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) + assigns(object_whole(msg)) +); + +#define poly_getnoise_eta1_4x MLKEM_NAMESPACE(poly_getnoise_eta1_4x) +/************************************************* + * Name: poly_getnoise_eta1_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and nonces, with output polynomials close to centered binomial distribution + * with parameter MLKEM_ETA1. + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +/* Depending on MLKEM_K, the pointers passed to this function belong + to the same objects, so we cannot use memory_no_alias for r0-r3. + + NOTE: Somehow it is important to use memory_no_alias() first in the + conjunctions defining each case. +*/ +#if MLKEM_K == 2 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)); +); +#elif MLKEM_K == 4 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case B: r0, r1, r2, r3 consecutive */ + (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)); +); +#elif MLKEM_K == 3 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case C: r0, r1, r2 consecutive */ + (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && + r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)); +); +#endif /* MLKEM_K */ + +#if MLKEM_ETA1 == MLKEM_ETA2 +/* + * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 + * where MLKEM_ETA2 = MLKEM_ETA1 = 2. + * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. + */ +#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x +#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ + +#define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2) +/************************************************* + * Name: poly_getnoise_eta2 + * + * Description: Sample a polynomial deterministically from a seed and a nonce, + * with output polynomial close to centered binomial distribution + * with parameter MLKEM_ETA2 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce: one-byte input nonce + **************************************************/ +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r)) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2)) +); + +#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x) +/************************************************* + * Name: poly_getnoise_eta1122_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and a nonces, with output polynomials close to centered binomial + * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +__contract__( + requires( /* r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) + ensures(array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA2) + && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA2)); +); + +#define poly_basemul_montgomery_cached \ + MLKEM_NAMESPACE(poly_basemul_montgomery_cached) +/************************************************* + * Name: poly_basemul_montgomery_cached + * + * Description: Multiplication of two polynomials in NTT domain, + * using mulcache for second operand. + * + * Bounds: + * - a is assumed to be coefficient-wise < q in absolute value. + * + * The result is coefficient-wise bound by 3/2 q in absolute + * value. + * + * Arguments: - poly *r: pointer to output polynomial + * - const poly *a: pointer to first input polynomial + * - const poly *b: pointer to second input polynomial + * - const poly_mulcache *b_cache: pointer to mulcache + * for second input polynomial. Can be computed + * via poly_mulcache_compute(). + **************************************************/ +void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, + const poly_mulcache *b_cache) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(a, sizeof(poly))) + requires(memory_no_alias(b, sizeof(poly))) + requires(memory_no_alias(b_cache, sizeof(poly_mulcache))) + requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))) + assigns(object_whole(r)) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (3 * HALF_Q - 1))) +); + +#define poly_tomont MLKEM_NAMESPACE(poly_tomont) +/************************************************* + * Name: poly_tomont + * + * Description: Inplace conversion of all coefficients of a polynomial + * from normal domain to Montgomery domain + * + * Bounds: Output < q in absolute value. + * + * Arguments: - poly *r: pointer to input/output polynomial + **************************************************/ +void poly_tomont(poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))) +); + +/* REF-CHANGE: This function does not exist in the reference implementation */ +#define poly_mulcache_compute MLKEM_NAMESPACE(poly_mulcache_compute) +/************************************************************ + * Name: poly_mulcache_compute + * + * Description: Computes the mulcache for a polynomial in NTT domain + * + * The mulcache of a degree-2 polynomial b := b0 + b1*X + * in Fq[X]/(X^2-zeta) is the value b1*zeta, needed when + * computing products of b in Fq[X]/(X^2-zeta). + * + * The mulcache of a polynomial in NTT domain -- which is + * a 128-tuple of degree-2 polynomials in Fq[X]/(X^2-zeta), + * for varying zeta, is the 128-tuple of mulcaches of those + * polynomials. + * + * Arguments: - x: Pointer to mulcache to be populated + * - a: Pointer to input polynomial + ************************************************************/ +/* + * NOTE: The default C implementation of this function populates + * the mulcache with values in (-q,q), but this is not needed for the + * higher level safety proofs, and thus not part of the spec. + */ +void poly_mulcache_compute(poly_mulcache *x, const poly *a) +__contract__( + requires(memory_no_alias(x, sizeof(poly_mulcache))) + requires(memory_no_alias(a, sizeof(poly))) + assigns(object_whole(x)) +); + +#define poly_reduce MLKEM_NAMESPACE(poly_reduce) +/************************************************* + * Name: poly_reduce + * + * Description: Converts polynomial to _unsigned canonical_ representatives. + * + * The input coefficients can be arbitrary integers in int16_t. + * The output coefficients are in [0,1,...,MLKEM_Q-1]. + * + * Arguments: - poly *r: pointer to input/output polynomial + **************************************************/ +/* + * REF-CHANGE: The semantics of poly_reduce() is different in + * the reference implementation, which requires + * signed canonical output data. Unsigned canonical + * outputs are better suited to the only remaining + * use of poly_reduce() in the context of (de)serialization. + */ +void poly_reduce(poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))) +); + +#define poly_add MLKEM_NAMESPACE(poly_add) +/************************************************************ + * Name: poly_add + * + * Description: Adds two polynomials in place + * + * Arguments: - r: Pointer to input-output polynomial to be added to. + * - b: Pointer to input polynomial that should be added + * to r. Must be disjoint from r. + * + * The coefficients of r and b must be so that the addition does + * not overflow. Otherwise, the behaviour of this function is undefined. + * + ************************************************************/ +/* + * REF-CHANGE: + * The reference implementation uses a 3-argument poly_add. + * We specialize to the accumulator form to avoid reasoning about aliasing. + */ +void poly_add(poly *r, const poly *b) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(b, sizeof(poly))) + requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX)) + requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN)) + ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k])) + assigns(memory_slice(r, sizeof(poly))) +); + +#define poly_sub MLKEM_NAMESPACE(poly_sub) +/************************************************* + * Name: poly_sub + * + * Description: Subtract two polynomials; no modular reduction is performed + * + * Arguments: - poly *r: Pointer to input-output polynomial to be added + *to. + * - const poly *b: Pointer to second input polynomial + **************************************************/ +/* + * REF-CHANGE: + * The reference implementation uses a 3-argument poly_sub. + * We specialize to the accumulator form to avoid reasoning about aliasing. + */ +void poly_sub(poly *r, const poly *b) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(b, sizeof(poly))) + requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX)) + requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN)) + ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k])) + assigns(object_whole(r)) +); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.c new file mode 100644 index 0000000000..5e4dd0c5c4 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "polyvec.h" +#include +#include "arith_native.h" +#include "config.h" +#include "ntt.h" +#include "params.h" +#include "poly.h" + +#include "debug/debug.h" +void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU], + const polyvec *a) +{ + unsigned int i; + POLYVEC_UBOUND(a, MLKEM_Q); + + for (i = 0; i < MLKEM_K; i++) + { + poly_compress_du(r + i * MLKEM_POLYCOMPRESSEDBYTES_DU, &a->vec[i]); + } +} + +void polyvec_decompress_du(polyvec *r, + const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU]) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU); + } + + POLYVEC_UBOUND(r, MLKEM_Q); +} + +void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]); + } +} + +void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) +{ + int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES); + } +} + +void polyvec_ntt(polyvec *r) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_ntt(&r->vec[i]); + } +} + +void polyvec_invntt_tomont(polyvec *r) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_invntt_tomont(&r->vec[i]); + } +} + +/************************************************* + * Name: polyvec_basemul_acc_montgomery + * + * Description: Multiply elements of a and b in NTT domain, accumulate into r, + * and multiply by 2^-16. + * + * Bounds: + * - a is assumed to be coefficient-wise < q in absolute value. + * - b is assumed to be the output of a forward NTT and + * thus coefficient-wise bound by NTT_BOUND + * - b_cache is assumed to be coefficient-wise bound by + * MLKEM_Q. + * + * Arguments: - poly *r: pointer to output polynomial + * - const polyvec *a: pointer to first input vector of polynomials + * - const polyvec *b: pointer to second input vector of polynomials + * - const polyvec_mulcache *b_cache: mulcache for b + **************************************************/ +#if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) +void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, + const polyvec *b, + const polyvec_mulcache *b_cache) +{ + int i; + poly t; + + POLYVEC_BOUND(a, MLKEM_Q); + POLYVEC_BOUND(b, NTT_BOUND); + POLYVEC_BOUND(b_cache, MLKEM_Q); + + poly_basemul_montgomery_cached(r, &a->vec[0], &b->vec[0], &b_cache->vec[0]); + for (i = 1; i < MLKEM_K; i++) + { + poly_basemul_montgomery_cached(&t, &a->vec[i], &b->vec[i], + &b_cache->vec[i]); + poly_add(r, &t); + /* abs bounds: < (i+1) * 3/2 * q */ + } + + /* + * Those bounds are true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus best to omit + * them from the spec to not unnecessarily constraint native implementations. + */ + cassert( + array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (3 * HALF_Q - 1)), + "polyvec_basemul_acc_montgomery_cached output bounds"); + /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */ + POLY_BOUND(r, MLKEM_K * 3 * HALF_Q); +} +#else /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ +void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, + const polyvec *b, + const polyvec_mulcache *b_cache) +{ + POLYVEC_BOUND(a, MLKEM_Q); + POLYVEC_BOUND(b, NTT_BOUND); + /* Omitting POLYVEC_BOUND(b_cache, MLKEM_Q) since native implementations may + * decide not to use a mulcache. Note that the C backend implementation + * of poly_basemul_montgomery_cached() does still include the check. */ + polyvec_basemul_acc_montgomery_cached_native(r, a, b, b_cache); +} +#endif /* MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ + +/************************************************* + * Name: polyvec_basemul_acc_montgomery + * + * Description: Multiply elements of a and b in NTT domain, accumulate into r, + * and multiply by 2^-16. + * + * Arguments: - poly *r: pointer to output polynomial + * - const polyvec *a: pointer to first input vector of polynomials + * - const polyvec *b: pointer to second input vector of polynomials + **************************************************/ +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) +{ + polyvec_mulcache b_cache; + polyvec_mulcache_compute(&b_cache, b); + polyvec_basemul_acc_montgomery_cached(r, a, b, &b_cache); +} + +/************************************************* + * Name: polyvec_mulcache_compute + * + * Description: Precompute values speeding up + * base multiplications of polynomials + * in NTT domain. + * + * Arguments: - polyvec_mulcache *x: pointer to output cache. + * - const poly *a: pointer to input polynomial + **************************************************/ +void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_mulcache_compute(&x->vec[i], &a->vec[i]); + } +} + + +/************************************************* + * Name: polyvec_reduce + * + * Description: Applies Barrett reduction to each coefficient + * of each element of a vector of polynomials; + * for details of the Barrett reduction see comments in reduce.c + * + * Arguments: - polyvec *r: pointer to input/output polynomial + **************************************************/ +void polyvec_reduce(polyvec *r) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_reduce(&r->vec[i]); + } +} + +void polyvec_add(polyvec *r, const polyvec *b) +{ + int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_add(&r->vec[i], &b->vec[i]); + } +} + +void polyvec_tomont(polyvec *r) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_tomont(&r->vec[i]); + } +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.h new file mode 100644 index 0000000000..7771fd3b28 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.h @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef POLYVEC_H +#define POLYVEC_H + +#include +#include "params.h" +#include "poly.h" + +typedef struct +{ + poly vec[MLKEM_K]; +} ALIGN polyvec; + +/* REF-CHANGE: This struct does not exist in the reference implementation */ +typedef struct +{ + poly_mulcache vec[MLKEM_K]; +} polyvec_mulcache; + +#define polyvec_compress_du MLKEM_NAMESPACE(polyvec_compress_du) +/************************************************* + * Name: polyvec_compress_du + * + * Description: Compress and serialize vector of polynomials + * + * Arguments: - uint8_t *r: pointer to output byte array + * (needs space for MLKEM_POLYVECCOMPRESSEDBYTES_DU) + * - const polyvec *a: pointer to input vector of polynomials. + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU], + const polyvec *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYVECCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(a, sizeof(polyvec))) + requires(forall(int, k0, 0, MLKEM_K - 1, + array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))) + assigns(object_whole(r)) +); + +#define polyvec_decompress_du MLKEM_NAMESPACE(polyvec_decompress_du) +/************************************************* + * Name: polyvec_decompress_du + * + * Description: De-serialize and decompress vector of polynomials; + * approximate inverse of polyvec_compress_du + * + * Arguments: - polyvec *r: pointer to output vector of polynomials. + * Output will have coefficients normalized to [0,..,q-1]. + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYVECCOMPRESSEDBYTES_DU) + **************************************************/ +void polyvec_decompress_du(polyvec *r, + const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYVECCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(r, sizeof(polyvec))) + assigns(object_whole(r)) + ensures(forall(int, k0, 0, MLKEM_K - 1, + array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))) +); + +#define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes) +/************************************************* + * Name: polyvec_tobytes + * + * Description: Serialize vector of polynomials + * + * Arguments: - uint8_t *r: pointer to output byte array + * (needs space for MLKEM_POLYVECBYTES) + * - const polyvec *a: pointer to input vector of polynomials + * Each polynomial must have coefficients in [0,..,q-1]. + **************************************************/ +void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a) +__contract__( + requires(memory_no_alias(a, sizeof(polyvec))) + requires(memory_no_alias(r, MLKEM_POLYVECBYTES)) + requires(forall(int, k0, 0, MLKEM_K - 1, + array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))) + assigns(object_whole(r)) +); + +#define polyvec_frombytes MLKEM_NAMESPACE(polyvec_frombytes) +/************************************************* + * Name: polyvec_frombytes + * + * Description: De-serialize vector of polynomials; + * inverse of polyvec_tobytes + * + * Arguments: - const polyvec *a: pointer to output vector of polynomials + * (of length MLKEM_POLYVECBYTES). Output will have coefficients + * normalized to [0,..,q-1]. + * - uint8_t *r: pointer to input byte array + **************************************************/ +void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + requires(memory_no_alias(a, MLKEM_POLYVECBYTES)) + assigns(object_whole(r)) + ensures(forall(int, k0, 0, MLKEM_K - 1, + array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, 4095))) +); + +#define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt) +/************************************************* + * Name: polyvec_ntt + * + * Description: Apply forward NTT to all elements of a vector of polynomials. + * + * The input is assumed to be in normal order and + * coefficient-wise bound by MLKEM_Q in absolute value. + * + * The output polynomial is in bitreversed order, and + * coefficient-wise bound by NTT_BOUND in absolute value. + * + * Arguments: - polyvec *r: pointer to in/output vector of polynomials + * + **************************************************/ +void polyvec_ntt(polyvec *r) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + requires(forall(int, j, 0, MLKEM_K - 1, + array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))) + assigns(object_whole(r)) + ensures(forall(int, j, 0, MLKEM_K - 1, + array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (NTT_BOUND - 1)))) +); + +#define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont) +/************************************************* + * Name: polyvec_invntt_tomont + * + * Description: Apply inverse NTT to all elements of a vector of polynomials + * and multiply by Montgomery factor 2^16 + * + * The input is assumed to be in bitreversed order, and can + * have arbitrary coefficients in int16_t. + * + * The output polynomial is in normal order, and + * coefficient-wise bound by INVNTT_BOUND in absolute value. + * + * + * Arguments: - polyvec *r: pointer to in/output vector of polynomials + **************************************************/ +void polyvec_invntt_tomont(polyvec *r) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + assigns(object_whole(r)) + ensures(forall(int, j, 0, MLKEM_K - 1, + array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (INVNTT_BOUND - 1)))) +); + +#define polyvec_basemul_acc_montgomery \ + MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery) +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, + const polyvec *b); + +/* REF-CHANGE: This function does not exist in the reference implementation */ +#define polyvec_basemul_acc_montgomery_cached \ + MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached) +/************************************************* + * Name: polyvec_basemul_acc_montgomery_cached + * + * Description: Scalar product of two vectors of polynomials in NTT domain, + * using mulcache for second operand. + * + * Bounds: + * - a is assumed to be coefficient-wise < q in absolute value. + * - No bounds guarantees for the coefficients in the result. + * + * Arguments: - poly *r: pointer to output polynomial + * - const polyvec *a: pointer to first input polynomial vector + * - const polyvec *b: pointer to second input polynomial vector + * - const polyvec_mulcache *b_cache: pointer to mulcache + * for second input polynomial vector. Can be computed + * via polyvec_mulcache_compute(). + **************************************************/ +void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, + const polyvec *b, + const polyvec_mulcache *b_cache) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(a, sizeof(polyvec))) + requires(memory_no_alias(b, sizeof(polyvec))) + requires(memory_no_alias(b_cache, sizeof(polyvec_mulcache))) +/* Input is coefficient-wise < q in absolute value */ + requires(forall(int, k1, 0, MLKEM_K - 1, + array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))) + assigns(memory_slice(r, sizeof(poly))) +); + +/* REF-CHANGE: This function does not exist in the reference implementation */ +#define polyvec_mulcache_compute MLKEM_NAMESPACE(polyvec_mulcache_compute) +/************************************************************ + * Name: polyvec_mulcache_compute + * + * Description: Computes the mulcache for a vector of polynomials in NTT domain + * + * The mulcache of a degree-2 polynomial b := b0 + b1*X + * in Fq[X]/(X^2-zeta) is the value b1*zeta, needed when + * computing products of b in Fq[X]/(X^2-zeta). + * + * The mulcache of a polynomial in NTT domain -- which is + * a 128-tuple of degree-2 polynomials in Fq[X]/(X^2-zeta), + * for varying zeta, is the 128-tuple of mulcaches of those + * polynomials. + * + * The mulcache of a vector of polynomials is the vector + * of mulcaches of its entries. + * + * Arguments: - x: Pointer to mulcache to be populated + * - a: Pointer to input polynomial vector + ************************************************************/ +/* + * NOTE: The default C implementation of this function populates + * the mulcache with values in (-q,q), but this is not needed for the + * higher level safety proofs, and thus not part of the spec. + */ +void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a) +__contract__( + requires(memory_no_alias(x, sizeof(polyvec_mulcache))) + requires(memory_no_alias(a, sizeof(polyvec))) + assigns(object_whole(x)) +); + +#define polyvec_reduce MLKEM_NAMESPACE(polyvec_reduce) +/************************************************* + * Name: polyvec_reduce + * + * Description: Applies Barrett reduction to each coefficient + * of each element of a vector of polynomials; + * for details of the Barrett reduction see comments in reduce.c + * + * Arguments: - polyvec *r: pointer to input/output polynomial + **************************************************/ +/* + * REF-CHANGE: The semantics of polyvec_reduce() is different in + * the reference implementation, which requires + * signed canonical output data. Unsigned canonical + * outputs are better suited to the only remaining + * use of poly_reduce() in the context of (de)serialization. + */ +void polyvec_reduce(polyvec *r) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + assigns(object_whole(r)) + ensures(forall(int, k0, 0, MLKEM_K - 1, + array_bound(r->vec[k0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))) +); + +#define polyvec_add MLKEM_NAMESPACE(polyvec_add) +/************************************************* + * Name: polyvec_add + * + * Description: Add vectors of polynomials + * + * Arguments: - polyvec *r: pointer to input-output vector of polynomials to be + * added to + * - const polyvec *b: pointer to second input vector of polynomials + * + * The coefficients of r and b must be so that the addition does + * not overflow. Otherwise, the behaviour of this function is undefined. + * + * The coefficients returned in *r are in int16_t which is sufficient + * to prove type-safety of calling units. Therefore, no stronger + * ensures clause is required on this function. + **************************************************/ +void polyvec_add(polyvec *r, const polyvec *b) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + requires(memory_no_alias(b, sizeof(polyvec))) + requires(forall(int, j0, 0, MLKEM_K - 1, + forall(int, k0, 0, MLKEM_N - 1, + (int32_t)r->vec[j0].coeffs[k0] + b->vec[j0].coeffs[k0] <= INT16_MAX))) + requires(forall(int, j1, 0, MLKEM_K - 1, + forall(int, k1, 0, MLKEM_N - 1, + (int32_t)r->vec[j1].coeffs[k1] + b->vec[j1].coeffs[k1] >= INT16_MIN))) + assigns(object_whole(r)) +); + +#define polyvec_tomont MLKEM_NAMESPACE(polyvec_tomont) +/************************************************* + * Name: polyvec_tomont + * + * Description: Inplace conversion of all coefficients of a polynomial + * vector from normal domain to Montgomery domain + * + * Bounds: Output < q in absolute value. + * + **************************************************/ +void polyvec_tomont(polyvec *r) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + assigns(memory_slice(r, sizeof(polyvec))) + assigns(object_whole(r)) + ensures(forall(int, j, 0, MLKEM_K - 1, + array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))) +); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/reduce.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/reduce.c new file mode 100644 index 0000000000..db7baf0f56 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/reduce.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "reduce.h" +#include +#include "params.h" + +/* QINV == -3327 converted to uint16_t == -3327 + 65536 == 62209 */ +static const uint32_t QINV = 62209; /* q^-1 mod 2^16 */ + +/************************************************* + * Name: cast_uint16_to_int16 + * + * Description: Cast uint16 value to int16 + * + * Returns: + * input x in 0 .. 32767: returns value unchanged + * input x in 32768 .. 65535: returns (x - 65536) + **************************************************/ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "conversion" +#endif +static INLINE int16_t cast_uint16_to_int16(uint16_t x) +{ + /* + * PORTABILITY: This relies on uint16_t -> int16_t + * being implemented as the inverse of int16_t -> uint16_t, + * which is implementation-defined (C99 6.3.1.3 (3)) + * CBMC (correctly) fails to prove this conversion is OK, + * so we have to suppress that check here + */ + return (int16_t)x; +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************* + * Name: montgomery_reduce_generic + * + * Description: Generic Montgomery reduction; given a 32-bit integer a, computes + * 16-bit integer congruent to a * R^-1 mod q, where R=2^16 + * + * Arguments: - int32_t a: input integer to be reduced + * + * Returns: integer congruent to a * R^-1 modulo q + * + * Bounds: For any C such that |a| < q * C, the return value + * has absolute value < q (C/2^16 + 1/2). + * + * Notable special cases: + * - The Montgomery multiplication of a value of absolute value + * < q * C with a signed-canonical value ( < q/2 ) has + * absolute value q * (0.0254 * C + 1/2). + * - The Montgomery multiplication of a value of absolute value + * < q * C with a value t of |t| < q has absolute value + * < q * (0.0508 * C + 1/2). + * - The Montgomery multiplication of a value of absolute value + * < C with a value of abs < q has absolute value + * < q (C/2^16 + 1/2). + **************************************************/ +ALWAYS_INLINE +static INLINE int16_t montgomery_reduce_generic(int32_t a) +{ + /* + *Bounds on paper + * - Case |a| < q * C, for some C + * |t| <= |a|/2^16 + |t|*q/2^16 + * < q * C / 2^16 + q/2 + * = q (C/2^16 + 1/2) + * - Case |a| < (q/2) * C * q, for some C + * Replace C -> C * q in the above and estimate + * q / 2^17 < 0.0254. + */ + + /* Compute a*q^{-1} mod 2^16 in unsigned representatives */ + const uint16_t a_reduced = a & UINT16_MAX; + const uint16_t a_inverted = (a_reduced * QINV) & UINT16_MAX; + + /* Lift to signed canonical representative mod 2^16. */ + const int16_t t = cast_uint16_to_int16(a_inverted); + + int32_t r = a - ((int32_t)t * MLKEM_Q); + + /* + * PORTABILITY: Right-shift on a signed integer is, strictly-speaking, + * implementation-defined for negative left argument. Here, + * we assume it's sign-preserving "arithmetic" shift right. (C99 6.5.7 (5)) + */ + r = r >> 16; + + return (int16_t)r; +} + +int16_t montgomery_reduce(int32_t a) +{ + int16_t res; + SCALAR_BOUND(a, 2 * MLKEM_Q * 32768, "montgomery_reduce input"); + + res = montgomery_reduce_generic(a); + + SCALAR_BOUND(res, (3 * (MLKEM_Q + 1)) / 2, "montgomery_reduce output"); + return res; +} + +int16_t fqmul(int16_t a, int16_t b) +{ + int16_t res; + SCALAR_BOUND(b, HALF_Q, "fqmul input"); + + res = montgomery_reduce((int32_t)a * (int32_t)b); + + SCALAR_BOUND(res, MLKEM_Q, "fqmul output"); + return res; +} + +/* + * To divide by MLKEM_Q using Barrett multiplication, the "magic number" + * multiplier is round_to_nearest(2**26/MLKEM_Q) + */ +#define BPOWER 26 +static const int32_t barrett_multiplier = + ((1 << BPOWER) + MLKEM_Q / 2) / MLKEM_Q; + +/************************************************* + * Name: barrett_reduce + * + * Description: Barrett reduction; given a 16-bit integer a, computes + * centered representative congruent to a mod q in + * {-(q-1)/2,...,(q-1)/2} + * + * Arguments: - int16_t a: input integer to be reduced + * + * Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q. + **************************************************/ +int16_t barrett_reduce(int16_t a) +{ + /* + * Compute round_to_nearest(a/MLKEM_Q) using the multiplier + * above and shift by BPOWER places. + * PORTABILITY: Right-shift on a signed integer is, strictly-speaking, + * implementation-defined for negative left argument. Here, + * we assume it's sign-preserving "arithmetic" shift right. (C99 6.5.7 (5)) + */ + const int32_t t = (barrett_multiplier * a + (1 << (BPOWER - 1))) >> BPOWER; + + /* + * t is in -10 .. +10, so we need 32-bit math to + * evaluate t * MLKEM_Q and the subsequent subtraction + */ + return (int16_t)(a - t * MLKEM_Q); +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/reduce.h new file mode 100644 index 0000000000..2a486cf3ec --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/reduce.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef REDUCE_H +#define REDUCE_H + +#include +#include "cbmc.h" +#include "debug/debug.h" +#include "params.h" + +#define MONT -1044 /* 2^16 mod q */ +#define HALF_Q ((MLKEM_Q + 1) / 2) /* 1665 */ + +/************************************************* + * Name: montgomery_reduce + * + * Description: Montgomery reduction + * + * Arguments: - int32_t a: input integer to be reduced + * Must be smaller than 2 * q * 2^15 in absolute value. + * + * Returns: integer congruent to a * R^-1 modulo q, + * smaller than 3/2 q in absolute value. + **************************************************/ +#define montgomery_reduce MLKEM_NAMESPACE(montgomery_reduce) +int16_t montgomery_reduce(int32_t a) +__contract__( + requires(a > -(2 * MLKEM_Q * 32768)) + requires(a < (2 * MLKEM_Q * 32768)) + ensures(return_value > -(3 * HALF_Q) && return_value < (3 * HALF_Q)) +); + +#define barrett_reduce MLKEM_NAMESPACE(barrett_reduce) +int16_t barrett_reduce(int16_t a) +__contract__( + ensures(return_value > -HALF_Q && return_value < HALF_Q) +); + +/************************************************* + * Name: fqmul + * + * Description: Montgomery multiplication modulo q=3329 + * + * Arguments: - int16_t a: first factor + * Can be any int16_t. + * - int16_t b: second factor. + * Must be signed canonical (abs value <(q+1)/2) + * + * Returns 16-bit integer congruent to a*b*R^{-1} mod q, and + * smaller than q in absolute value. + * + **************************************************/ +#define fqmul MLKEM_NAMESPACE(fqmul) +int16_t fqmul(int16_t a, int16_t b) +__contract__( + requires(b > -HALF_Q) + requires(b < HALF_Q) + ensures(return_value > -MLKEM_Q && return_value < MLKEM_Q) +); + + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.c new file mode 100644 index 0000000000..4e8a5ce9b2 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "params.h" + +#include "arith_native.h" +#include "rej_uniform.h" + +/************************************************* + * Name: rej_uniform_scalar + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int target: requested number of 16-bit integers + * (uniform mod q). + * Must be <= 4096. + * - unsigned int offset: number of 16-bit integers that have + * already been sampled. + * Must be <= target. + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes + * Must be <= 4096. + * Must be a multiple of 3. + * + * Note: Strictly speaking, only a few values of buflen near UINT_MAX need + * excluding. The limit of 4096 is somewhat arbitary but sufficient for all + * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. + * + * Returns the new offset of sampled 16-bit integers, at most target, + * and at least the initial offset. + * If the new offset is strictly less than len, all of the input buffers + * is guaranteed to have been consumed. If it is equal to len, no information + * is provided on how many bytes of the input buffer have been consumed. + **************************************************/ +static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint16_t val0, val1; + + ctr = offset; + pos = 0; + /* pos + 3 cannot overflow due to the assumption buflen <= 4096 */ + while (ctr < target && pos + 3 <= buflen) + __loop__( + invariant(offset <= ctr && ctr <= target && pos <= buflen) + invariant(ctr > 0 ==> array_bound(r, 0, ctr - 1, 0, (MLKEM_Q - 1)))) + { + val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF; + val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF; + pos += 3; + + if (val0 < MLKEM_Q) + { + r[ctr++] = val0; + } + if (ctr < target && val1 < MLKEM_Q) + { + r[ctr++] = val1; + } + } + return ctr; +} + +#if !defined(MLKEM_USE_NATIVE_REJ_UNIFORM) +unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, + const uint8_t *buf, unsigned int buflen) +{ + return rej_uniform_scalar(r, target, offset, buf, buflen); +} +#else /* MLKEM_USE_NATIVE_REJ_UNIFORM */ + +unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, + const uint8_t *buf, unsigned int buflen) +{ + int ret; + + /* Sample from large buffer with full lane as much as possible. */ + ret = rej_uniform_native(r + offset, target - offset, buf, buflen); + if (ret != -1) + return offset + (unsigned)ret; + + return rej_uniform_scalar(r, target, offset, buf, buflen); +} +#endif /* MLKEM_USE_NATIVE_REJ_UNIFORM */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.h new file mode 100644 index 0000000000..aeb9cc3eb0 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef REJ_UNIFORM_H +#define REJ_UNIFORM_H + +#include +#include +#include "cbmc.h" +#include "params.h" + +#define rej_uniform MLKEM_NAMESPACE(rej_uniform) +/************************************************* + * Name: rej_uniform + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int target: requested number of 16-bit integers + * (uniform mod q). + * Must be <= 4096. + * - unsigned int offset: number of 16-bit integers that have + * already been sampled. + * Must be <= target. + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes + * Must be <= 4096. + * Must be a multiple of 3. + * + * Note: Strictly speaking, only a few values of buflen near UINT_MAX need + * excluding. The limit of 4096 is somewhat arbitary but sufficient for all + * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. + * + * Returns the new offset of sampled 16-bit integers, at most target, + * and at least the initial offset. + * If the new offset is strictly less than len, all of the input buffers + * is guaranteed to have been consumed. If it is equal to len, no information + * is provided on how many bytes of the input buffer have been consumed. + **************************************************/ + +/* + * REF-CHANGE: The signature differs from the Kyber reference implementation + * in that it adds the offset and always expects the base of the target + * buffer. This avoids shifting the buffer base in the caller, which appears + * tricky to reason about. + */ +unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, + const uint8_t *buf, unsigned int buflen) +__contract__( + requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) + requires(memory_no_alias(r, sizeof(int16_t) * target)) + requires(memory_no_alias(buf, buflen)) + requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1))) + assigns(memory_slice(r, sizeof(int16_t) * target)) + ensures(offset <= return_value && return_value <= target) + ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1))) +); +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/symmetric-shake.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/symmetric-shake.c new file mode 100644 index 0000000000..5dd8c10d92 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/symmetric-shake.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include +#include +#include +#include "fips202.h" +#include "params.h" +#include "symmetric.h" + +void mlkem_shake256_prf(uint8_t *out, size_t outlen, + const uint8_t key[MLKEM_SYMBYTES], uint8_t nonce) +{ + uint8_t extkey[MLKEM_SYMBYTES + 1]; + + memcpy(extkey, key, MLKEM_SYMBYTES); + extkey[MLKEM_SYMBYTES] = nonce; + + shake256(out, outlen, extkey, sizeof(extkey)); +} + +void mlkem_shake256_rkprf(uint8_t out[MLKEM_SSBYTES], + const uint8_t key[MLKEM_SYMBYTES], + const uint8_t input[MLKEM_CIPHERTEXTBYTES]) +{ + shake256incctx s; + + shake256_inc_init(&s); + shake256_inc_absorb(&s, key, MLKEM_SYMBYTES); + shake256_inc_absorb(&s, input, MLKEM_CIPHERTEXTBYTES); + shake256_inc_finalize(&s); + shake256_inc_squeeze(out, MLKEM_SSBYTES, &s); + shake256_inc_ctx_release(&s); +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/symmetric.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/symmetric.h new file mode 100644 index 0000000000..202741a7b3 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/symmetric.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef SYMMETRIC_H +#define SYMMETRIC_H + +#include +#include +#include "params.h" + +#include "fips202.h" + +#include "cbmc.h" + +#define mlkem_shake256_prf MLKEM_NAMESPACE(mlkem_shake256_prf) +/************************************************* + * Name: mlkem_shake256_prf + * + * Ref: FIPS-203 Section 4.1. Function PRF (eq 4.3) + * + * Description: Usage of SHAKE256 as a PRF, concatenates secret and public input + * and then generates outlen bytes of SHAKE256 output + * + * Arguments: - uint8_t *out: pointer to output + * - size_t outlen: number of requested output bytes + * - const uint8_t *key: pointer to the key (of length + * MLKEM_SYMBYTES) + * - uint8_t nonce: single-byte nonce (public PRF input) + * + * out and key may NOT be aliased. + **************************************************/ +void mlkem_shake256_prf(uint8_t *out, size_t outlen, + const uint8_t key[MLKEM_SYMBYTES], uint8_t nonce) +__contract__( + requires(memory_no_alias(out, outlen)) + requires(memory_no_alias(key, MLKEM_SYMBYTES)) + assigns(memory_slice(out, outlen)) +); + +#define mlkem_shake256_rkprf MLKEM_NAMESPACE(mlkem_shake256_rkprf) +/************************************************* + * Name: mlkem_shake256_rkprf + * + * Ref: FIPS-203 Section 4.1. Hash function J + * + * Description: Usage of SHAKE256 as a PRF, concatenates key with input + * and then generates MLKEM_SSBYTES bytes of SHAKE256 output + * + * Arguments: - uint8_t *out: pointer to output + * - const uint8_t *key: pointer to the key (of length + * MLKEM_SYMBYTES) + * - const uint8_t *input: pointer to the input (of length + * MLKEM_CIPHERTEXTBYTES) + * + * out, key, and input may NOT be aliased. + **************************************************/ +void mlkem_shake256_rkprf(uint8_t out[MLKEM_SSBYTES], + const uint8_t key[MLKEM_SYMBYTES], + const uint8_t input[MLKEM_CIPHERTEXTBYTES]) +__contract__( + requires(memory_no_alias(out, MLKEM_SSBYTES)) + requires(memory_no_alias(key, MLKEM_SYMBYTES)) + requires(memory_no_alias(input, MLKEM_CIPHERTEXTBYTES)) + assigns(memory_slice(out, MLKEM_SSBYTES)) +); + + +/* Macros denoting FIPS-203 specific Hash functions */ + +/* Hash function H, FIPS-201 4.1 (eq 4.4) */ +#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) + +/* Hash function G, FIPS-201 4.1 (eq 4.5) */ +#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) + +/* Macros denoting FIPS-203 specific PRFs */ +#define prf(OUT, OUTBYTES, KEY, NONCE) \ + mlkem_shake256_prf(OUT, OUTBYTES, KEY, NONCE) +#define rkprf(OUT, KEY, INPUT) mlkem_shake256_rkprf(OUT, KEY, INPUT) + +#endif /* SYMMETRIC_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.c new file mode 100644 index 0000000000..b5b71e023e --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.c @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "verify.h" + +#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) +/* + * Masking value used in constant-time functions from + * verify.h to block the compiler's range analysis and + * thereby reduce the risk of compiler-introduced branches. + */ +volatile uint64_t ct_opt_blocker_u64 = 0; + +#else /* MLKEM_USE_ASM_VALUE_BARRIER */ + +int empty_cu_verify; + +#endif /* MLKEM_USE_ASM_VALUE_BARRIER */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.h new file mode 100644 index 0000000000..5c62223c3d --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.h @@ -0,0 +1,305 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef VERIFY_H +#define VERIFY_H + +#include +#include +#include +#include "cbmc.h" +#include "params.h" + +/* Constant-time comparisons and conditional operations + + We reduce the risk for compilation into variable-time code + through the use of 'value barriers'. + + Functionally, a value barrier is a no-op. To the compiler, however, + it constitutes an arbitrary modification of its input, and therefore + harden's value propagation and range analysis. + + We consider two approaches to implement a value barrier: + - An empty inline asm block which marks the target value as clobbered. + - XOR'ing with the value of a volatile global that's set to 0. + + The first approach is cheap because it only prevents the compiler + from reasoning about the value of the variable past the barrier, + but does not directly generate additional instructions. + + The second approach generates redundant loads and XOR operations + and therefore comes at a higher runtime cost. However, it appears + more robust towards optimization, as compilers should never drop + a volatile load. + + We use the empty-ASM value barrier for GCC and clang, and fall + back to the global volatile barrier otherwise. + + The global value barrier can be forced by setting MLKEM_NO_ASM_VALUE_BARRIER. + +*/ + +#if (defined(__GNUC__) || defined(__clang__)) && !defined(CBMC) && \ + !defined(MLKEM_NO_ASM_VALUE_BARRIER) +#define MLKEM_USE_ASM_VALUE_BARRIER +#endif + +#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) + +/* + * Declaration of global volatile that the global value barrier + * is loading from and masking with. + */ +#define ct_opt_blocker_u64 MLKEM_NAMESPACE(ct_opt_blocker_u64) +extern volatile uint64_t ct_opt_blocker_u64; + +/* Helper functions for obtaining masks of various sizes */ +STATIC_INLINE_TESTABLE uint8_t get_optblocker_u8(void) +__contract__(ensures(return_value == 0)) { return (uint8_t)ct_opt_blocker_u64; } + +STATIC_INLINE_TESTABLE uint32_t get_optblocker_u32(void) +__contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; } + +STATIC_INLINE_TESTABLE uint32_t get_optblocker_i32(void) +__contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; } + +STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b) +__contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u32()); } + +STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b) +__contract__(ensures(return_value == b)) { return (b ^ get_optblocker_i32()); } + +STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b) +__contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u8()); } + +#else /* !MLKEM_USE_ASM_VALUE_BARRIER */ + +STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b) +__contract__(ensures(return_value == b)) +{ + asm("" : "+r"(b)); + return b; +} + +STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b) +__contract__(ensures(return_value == b)) +{ + asm("" : "+r"(b)); + return b; +} + +STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b) +__contract__(ensures(return_value == b)) +{ + asm("" : "+r"(b)); + return b; +} + +#endif /* MLKEM_USE_ASM_VALUE_BARRIER */ + +/* + * The ct_cmask_nonzero_xxx functions below make deliberate use of unsigned + * overflow, which is fully defined behaviour in C. It is thus safe to disable + * this warning. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif + +/************************************************* + * Name: ct_cmask_nonzero_u16 + * + * Description: Return 0 if input is zero, and -1 otherwise. + * + * Arguments: uint16_t x: Value to be converted into a mask + **************************************************/ +STATIC_INLINE_TESTABLE uint16_t ct_cmask_nonzero_u16(uint16_t x) +__contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF))) +{ + uint32_t tmp = value_barrier_u32(-((uint32_t)x)); + tmp >>= 16; + return tmp; +} + +/************************************************* + * Name: ct_cmask_nonzero_u8 + * + * Description: Return 0 if input is zero, and -1 otherwise. + * + * Arguments: uint8_t x: Value to be converted into a mask + **************************************************/ +STATIC_INLINE_TESTABLE uint8_t ct_cmask_nonzero_u8(uint8_t x) +__contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF))) +{ + uint32_t tmp = value_barrier_u32(-((uint32_t)x)); + tmp >>= 24; + return tmp; +} + +/* Put unsigned overflow warnings in CBMC back into scope */ +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/* + * The ct_cmask_neg_i16 function below makes deliberate use of + * signed to unsigned integer conversion, which is fully defined + * behaviour in C. It is thus safe to disable this warning. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "conversion" +#endif + +/************************************************* + * Name: ct_cmask_neg_i16 + * + * Description: Return 0 if input is non-negative, and -1 otherwise. + * + * Arguments: uint16_t x: Value to be converted into a mask + **************************************************/ +STATIC_INLINE_TESTABLE uint16_t ct_cmask_neg_i16(int16_t x) +__contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0))) +{ + int32_t tmp = value_barrier_i32((int32_t)x); + tmp >>= 16; + return (int16_t)tmp; +} + +/* Put unsigned-to-signed warnings in CBMC back into scope */ +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/* + * The ct_csel_xxx functions below make deliberate use of unsigned + * to signed integer conversion, which is implementation-defined + * behaviour. Here, we assume that uint16_t -> int16_t is inverse + * to int16_t -> uint16_t. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "conversion" +#endif + +/************************************************* + * Name: ct_sel_int16 + * + * Description: Functionally equivalent to cond ? a : b, + * but implemented with guards against + * compiler-introduced branches. + * + * Arguments: int16_t a: First alternative + * int16_t b: Second alternative + * uint16_t cond: Condition variable. + **************************************************/ +STATIC_INLINE_TESTABLE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond) +__contract__(ensures(return_value == (cond ? a : b))) +{ + uint16_t au = a, bu = b; + uint16_t res = bu ^ (ct_cmask_nonzero_u16(cond) & (au ^ bu)); + return (int16_t)res; +} + +/* Put unsigned-to-signed warnings in CBMC back into scope */ +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************* + * Name: ct_sel_uint8 + * + * Description: Functionally equivalent to cond ? a : b, + * but implemented with guards against + * compiler-introduced branches. + * + * Arguments: uint8_t a: First alternative + * uint8_t b: Second alternative + * uuint8_t cond: Condition variable. + **************************************************/ +STATIC_INLINE_TESTABLE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond) +__contract__(ensures(return_value == (cond ? a : b))) +{ + return b ^ (ct_cmask_nonzero_u8(cond) & (a ^ b)); +} + +/************************************************* + * Name: ct_memcmp + * + * Description: Compare two arrays for equality in constant time. + * + * Arguments: const uint8_t *a: pointer to first byte array + * const uint8_t *b: pointer to second byte array + * size_t len: length of the byte arrays + * + * Returns 0 if the byte arrays are equal, a non-zero value otherwise + **************************************************/ +STATIC_INLINE_TESTABLE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b, + const size_t len) +__contract__( + requires(memory_no_alias(a, len)) + requires(memory_no_alias(b, len)) + requires(len <= INT_MAX) + ensures((return_value == 0) == forall(int, i, 0, ((int)len - 1), (a[i] == b[i])))) +{ + uint8_t r = 0, s = 0; + + /* + * Switch to a _signed_ ilen value, so that our loop counter + * can also be signed, and thus (i - 1) in the loop invariant + * can yield -1 as required. + */ + const int ilen = (int)len; + int i; + + for (i = 0; i < ilen; i++) + __loop__( + invariant(i >= 0 && i <= ilen) + invariant((r == 0) == (forall(int, k, 0, (i - 1), (a[k] == b[k]))))) + { + r |= a[i] ^ b[i]; + /* s is useless, but prevents the loop from being aborted once r=0xff. */ + s ^= a[i] ^ b[i]; + } + + /* + * - Convert r into a mask; this may not be necessary, but is an additional + * safeguard + * towards leaking information about a and b. + * - XOR twice with s, separated by a value barrier, to prevent the compile + * from dropping the s computation in the loop. + */ + return (value_barrier_u8(ct_cmask_nonzero_u8(r) ^ s) ^ s); +} + +/************************************************* + * Name: ct_cmov_zero + * + * Description: Copy len bytes from x to r if b is zero; + * don't modify x if b is non-zero. + * assumes two's complement representation of negative integers. + * Runs in constant time. + * + * Arguments: uint8_t *r: pointer to output byte array + * const uint8_t *x: pointer to input byte array + * size_t len: Amount of bytes to be copied + * uint8_t b: Condition value. + **************************************************/ +STATIC_INLINE_TESTABLE +void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) +__contract__( + requires(memory_no_alias(r, len)) + requires(memory_no_alias(x, len)) + assigns(memory_slice(r, len))) +{ + size_t i; + for (i = 0; i < len; i++) + __loop__(invariant(i <= len)) + { + r[i] = ct_sel_uint8(r[i], x[i], b); + } +} + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/zetas.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/zetas.c new file mode 100644 index 0000000000..f52b2ff5ad --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/zetas.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * WARNING: This file is auto-generated from scripts/autogenerate_files.py + * Do not modify it directly. + */ + +#include "ntt.h" + +/* + * Table of zeta values used in the reference NTT and inverse NTT. + * See autogenerate_files.py for details. + */ +const int16_t zetas[128] = { + -1044, -758, -359, -1517, 1493, 1422, 287, 202, -171, 622, 1577, + 182, 962, -1202, -1474, 1468, 573, -1325, 264, 383, -829, 1458, + -1602, -130, -681, 1017, 732, 608, -1542, 411, -205, -1571, 1223, + 652, -552, 1015, -1293, 1491, -282, -1544, 516, -8, -320, -666, + -1618, -1162, 126, 1469, -853, -90, -271, 830, 107, -1421, -247, + -951, -398, 961, -1508, -725, 448, -1065, 677, -1275, -1103, 430, + 555, 843, -1251, 871, 1550, 105, 422, 587, 177, -235, -291, + -460, 1574, 1653, -246, 778, 1159, -147, -777, 1483, -602, 1119, + -1590, 644, -872, 349, 418, 329, -156, -75, 817, 1097, 603, + 610, 1322, -1285, -1465, 384, -1215, -136, 1218, -1335, -874, 220, + -1187, -1659, -1185, -1530, -1278, 794, -1510, -854, -870, 478, -108, + -308, 996, 991, 958, -1460, 1522, 1628, +}; diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/LICENSE b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/LICENSE similarity index 100% rename from src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/LICENSE rename to src/kem/ml_kem/mlkem-native_ml-kem-512_ref/LICENSE diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/api.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/api.h new file mode 100644 index 0000000000..94597323f1 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/api.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef API_H +#define API_H + +#include + +#define PQCP_MLKEM_NATIVE_MLKEM512_SECRETKEYBYTES 1632 +#define PQCP_MLKEM_NATIVE_MLKEM512_PUBLICKEYBYTES 800 +#define PQCP_MLKEM_NATIVE_MLKEM512_CIPHERTEXTBYTES 768 +#define PQCP_MLKEM_NATIVE_MLKEM512_KEYPAIRCOINBYTES 64 +#define PQCP_MLKEM_NATIVE_MLKEM512_ENCCOINBYTES 32 +#define PQCP_MLKEM_NATIVE_MLKEM512_BYTES 32 + +int PQCP_MLKEM_NATIVE_MLKEM512_keypair_derand(uint8_t *pk, uint8_t *sk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM512_keypair(uint8_t *pk, uint8_t *sk); +int PQCP_MLKEM_NATIVE_MLKEM512_enc_derand(uint8_t *ct, uint8_t *ss, + const uint8_t *pk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM512_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int PQCP_MLKEM_NATIVE_MLKEM512_dec(uint8_t *ss, const uint8_t *ct, + const uint8_t *sk); + +#define PQCP_MLKEM_NATIVE_MLKEM768_SECRETKEYBYTES 2400 +#define PQCP_MLKEM_NATIVE_MLKEM768_PUBLICKEYBYTES 1184 +#define PQCP_MLKEM_NATIVE_MLKEM768_CIPHERTEXTBYTES 1088 +#define PQCP_MLKEM_NATIVE_MLKEM768_KEYPAIRCOINBYTES 64 +#define PQCP_MLKEM_NATIVE_MLKEM768_ENCCOINBYTES 32 +#define PQCP_MLKEM_NATIVE_MLKEM768_BYTES 32 + +int PQCP_MLKEM_NATIVE_MLKEM768_keypair_derand(uint8_t *pk, uint8_t *sk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM768_keypair(uint8_t *pk, uint8_t *sk); +int PQCP_MLKEM_NATIVE_MLKEM768_enc_derand(uint8_t *ct, uint8_t *ss, + const uint8_t *pk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM768_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int PQCP_MLKEM_NATIVE_MLKEM768_dec(uint8_t *ss, const uint8_t *ct, + const uint8_t *sk); + +#define PQCP_MLKEM_NATIVE_MLKEM1024_SECRETKEYBYTES 3168 +#define PQCP_MLKEM_NATIVE_MLKEM1024_PUBLICKEYBYTES 1568 +#define PQCP_MLKEM_NATIVE_MLKEM1024_CIPHERTEXTBYTES 1568 +#define PQCP_MLKEM_NATIVE_MLKEM1024_KEYPAIRCOINBYTES 64 +#define PQCP_MLKEM_NATIVE_MLKEM1024_ENCCOINBYTES 32 +#define PQCP_MLKEM_NATIVE_MLKEM1024_BYTES 32 + +int PQCP_MLKEM_NATIVE_MLKEM1024_keypair_derand(uint8_t *pk, uint8_t *sk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM1024_keypair(uint8_t *pk, uint8_t *sk); +int PQCP_MLKEM_NATIVE_MLKEM1024_enc_derand(uint8_t *ct, uint8_t *ss, + const uint8_t *pk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM1024_enc(uint8_t *ct, uint8_t *ss, + const uint8_t *pk); +int PQCP_MLKEM_NATIVE_MLKEM1024_dec(uint8_t *ss, const uint8_t *ct, + const uint8_t *sk); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/arith_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/arith_native.h new file mode 100644 index 0000000000..b7e921323a --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/arith_native.h @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef MLKEM_ARITH_NATIVE_H +#define MLKEM_ARITH_NATIVE_H + +#include +#include "config.h" +#include "params.h" + +#if defined(MLKEM_USE_NATIVE) + +#include "poly.h" +#include "polyvec.h" +#include "profile.h" + +/* + * MLKEM native arithmetic interface + * + * This is the C<->native arithmetic interface used in this repository + * to allow for the drop-in of native code for performance critical + * components of ML-KEM. + * + * A _profile_ is a specific implementation of (part of) this interface. + * The active profile (if any) is determined in profile.h. + * + * To add a function to a profile, define MLKEM_USE_NATIVE_XXX and + * implement `static inline xxx(...)` in the profile header. + * + * The only exception is MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER. This option can + * be set if there are native implementations for all of NTT, invNTT, and + * base multiplication, and allows the native implementation to use a + * custom order of polynomial coefficients in NTT domain -- the use of such + * custom order is not an implementation-detail since the public matrix + * is generated in NTT domain. In this case, a permutation function + * poly_permute_bitrev_to_custom() needs to be provided that permutes + * polynomials in NTT domain from bitreversed to the custom order. + */ + +/* + * Those functions are meant to be trivial wrappers around + * the chosen native implementation. The are static inline + * to avoid unnecessary calls. + * The macro before each declaration controls whether a native + * implementation is present. + */ + +#if defined(MLKEM_USE_NATIVE_NTT) +/************************************************* + * Name: ntt_native + * + * Description: Computes negacyclic number-theoretic transform (NTT) of + * a polynomial in place. + * + * The input polynomial is assumed to be in normal order. + * The output polynomial is in bitreversed order, or of a + * custom order if MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER is set. + * See the documentation of MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER + * for more information. + * + * Arguments: - poly *p: pointer to in/output polynomial + **************************************************/ +static INLINE void ntt_native(poly *); +#endif /* MLKEM_USE_NATIVE_NTT */ + +#if defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER) +/* + * This must only be set if NTT, invNTT, basemul, mulcache, and + * to/from byte stream conversions all have native implementations + * that are adapted to the custom order. + */ +#if !defined(MLKEM_USE_NATIVE_NTT) || !defined(MLKEM_USE_NATIVE_INTT) || \ + !defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE) || \ + !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) || \ + !defined(MLKEM_USE_NATIVE_POLY_TOBYTES) || \ + !defined(MLKEM_USE_NATIVE_POLY_FROMBYTES) +#error \ + "Invalid native profile: MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER can only be \ +set if there are native implementations for NTT, invNTT, mulcache, basemul, \ +and to/from bytes conversions." +#endif + +/************************************************* + * Name: poly_permute_bitrev_to_custom + * + * Description: When MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER is defined, + * convert a polynomial in NTT domain from bitreversed + * order to the custom order output by the native NTT. + * + * This must only be defined if there is native code for + * all of (a) NTT, (b) invNTT, (c) basemul, (d) mulcache. + * Arguments: - poly *p: pointer to in/output polynomial + * + **************************************************/ +static INLINE void poly_permute_bitrev_to_custom(poly *); +#endif /* MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER */ + +#if defined(MLKEM_USE_NATIVE_INTT) +/************************************************* + * Name: intt_native + * + * Description: Computes inverse of negacyclic number-theoretic transform (NTT) + * of a polynomial in place. + * + * The input polynomial is in bitreversed order, or of a + * custom order if MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER is set. + * See the documentation of MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER + * for more information. + * The output polynomial is assumed to be in normal order. + * + * Arguments: - uint16_t *a: pointer to in/output polynomial + **************************************************/ +static INLINE void intt_native(poly *); +#endif /* MLKEM_USE_NATIVE_INTT */ + +#if defined(MLKEM_USE_NATIVE_POLY_REDUCE) +/************************************************* + * Name: poly_reduce_native + * + * Description: Applies modular reduction to all coefficients of a polynomial. + * + * Arguments: - poly *r: pointer to input/output polynomial + **************************************************/ +static INLINE void poly_reduce_native(poly *); +#endif /* MLKEM_USE_NATIVE_POLY_REDUCE */ + +#if defined(MLKEM_USE_NATIVE_POLY_TOMONT) +/************************************************* + * Name: poly_tomont_native + * + * Description: Inplace conversion of all coefficients of a polynomial + * from normal domain to Montgomery domain + * + * Arguments: - poly *r: pointer to input/output polynomial + **************************************************/ +static INLINE void poly_tomont_native(poly *); +#endif /* MLKEM_USE_NATIVE_POLY_TOMONT */ + +#if defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE) +/************************************************* + * Name: poly_mulcache_compute_native + * + * Description: Compute multiplication cache for a polynomial + * in NTT domain. + * + * The purpose of the multiplication cache is to + * cache repeated computations required during a + * base multiplication of polynomials in NTT domain. + * The structure of the multiplication-cache is + * implementation defined. + * + * Arguments: INPUT: + * - poly: const pointer to input polynomial. + * This must be in NTT domain and inin bitreversed order, or of + * a custom order if MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER is set. + * See the documentation of MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER + * for more information. + * OUTPUT + * - cache: pointer to multiplication cache + **************************************************/ +static INLINE void poly_mulcache_compute_native(poly_mulcache *cache, + const poly *poly); +#endif /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ + +#if defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) +/************************************************* + * Name: poly_mulcache_compute_native + * + * Description: Compute multiplication of polynomials in NTT domain. + * + * Arguments: INPUT: + * - a: First polynomial operand. + * This must be in NTT domain and inin bitreversed order, or of + * a custom order if MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER is set. + * See the documentation of MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER + * for more information. + * - b: Second polynomial operand. + * As for a. + * - b_cache: Multiplication-cache for b. + * OUTPUT + * - r: Result of the base multiplication. This is again + * in NTT domain, and of the same order as a and b. + **************************************************/ +static INLINE void polyvec_basemul_acc_montgomery_cached_native( + poly *r, const polyvec *a, const polyvec *b, + const polyvec_mulcache *b_cache); +#endif + +#if defined(MLKEM_USE_NATIVE_POLY_TOBYTES) +/************************************************* + * Name: poly_tobytes_native + * + * Description: Serialization of a polynomial. + * Signed coefficients are converted to + * unsigned form before serialization. + * + * Arguments: INPUT: + * - a: const pointer to input polynomial, + * with each coefficient in the range -Q+1 .. Q-1 + * OUTPUT + * - r: pointer to output byte array + * (of MLKEM_POLYBYTES bytes) + **************************************************/ +static INLINE void poly_tobytes_native(uint8_t r[MLKEM_POLYBYTES], + const poly *a); +#endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */ + +#if defined(MLKEM_USE_NATIVE_POLY_FROMBYTES) +/************************************************* + * Name: poly_frombytes_native + * + * Description: Serialization of a polynomial. + * Signed coefficients are converted to + * unsigned form before serialization. + * + * Arguments: INPUT: + * - r: pointer to output polynomial in NTT domain + * OUTPUT + * - a: const pointer to input byte aray + * (of MLKEM_POLYBYTES bytes) + **************************************************/ +static INLINE void poly_frombytes_native(poly *a, + const uint8_t r[MLKEM_POLYBYTES]); +#endif /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ + +#if defined(MLKEM_USE_NATIVE_REJ_UNIFORM) +/************************************************* + * Name: rej_uniform_native + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int len: requested number of 16-bit integers + * (uniform mod q). + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes. + * + * Return -1 if the native implementation does not support the input lengths. + * Otherwise, returns non-negative number of sampled 16-bit integers (at most + * len). + **************************************************/ +static INLINE int rej_uniform_native(int16_t *r, unsigned int len, + const uint8_t *buf, unsigned int buflen); +#endif /* MLKEM_USE_NATIVE_REJ_UNIFORM */ + +#endif /* MLKEM_USE_NATIVE */ +#endif /* MLKEM_ARITH_NATIVE_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.c new file mode 100644 index 0000000000..073f3c81d7 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "cbd.h" +#include +#include "params.h" + +/************************************************* + * Name: load32_littleendian + * + * Description: load 4 bytes into a 32-bit integer + * in little-endian order + * + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns 32-bit unsigned integer loaded from x + **************************************************/ +static uint32_t load32_littleendian(const uint8_t x[4]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + r |= (uint32_t)x[3] << 24; + return r; +} + +/************************************************* + * Name: load24_littleendian + * + * Description: load 3 bytes into a 32-bit integer + * in little-endian order. + * This function is only needed for ML-KEM-512 + * + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) + **************************************************/ +#if MLKEM_ETA1 == 3 +static uint32_t load24_littleendian(const uint8_t x[3]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + return r; +} +#endif + +/************************************************* + * Name: cbd2 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter eta=2 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) +{ + int i; + for (i = 0; i < MLKEM_N / 8; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(array_abs_bound(r->coeffs, 0, (8 * i - 1), 2))) + { + int j; + uint32_t t = load32_littleendian(buf + 4 * i); + uint32_t d = t & 0x55555555; + d += (t >> 1) & 0x55555555; + + for (j = 0; j < 8; j++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(array_abs_bound(r->coeffs, 0, 8 * i + j - 1, 2))) + { + const int16_t a = (d >> (4 * j + 0)) & 0x3; + const int16_t b = (d >> (4 * j + 2)) & 0x3; + r->coeffs[8 * i + j] = a - b; + } + } +} + +/************************************************* + * Name: cbd3 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter eta=3. + * This function is only needed for ML-KEM-512 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +#if MLKEM_ETA1 == 3 +static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) +{ + int i; + for (i = 0; i < MLKEM_N / 4; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 3))) + { + int j; + const uint32_t t = load24_littleendian(buf + 3 * i); + uint32_t d = t & 0x00249249; + d += (t >> 1) & 0x00249249; + d += (t >> 2) & 0x00249249; + + for (j = 0; j < 4; j++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4) + invariant(array_abs_bound(r->coeffs, 0, 4 * i + j - 1, 3))) + { + const int16_t a = (d >> (6 * j + 0)) & 0x7; + const int16_t b = (d >> (6 * j + 3)) & 0x7; + r->coeffs[4 * i + j] = a - b; + } + } +} +#endif + +void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) +{ +#if MLKEM_ETA1 == 2 + cbd2(r, buf); +#elif MLKEM_ETA1 == 3 + cbd3(r, buf); +#else +#error "This implementation requires eta1 in {2,3}" +#endif +} + +void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) +{ +#if MLKEM_ETA2 == 2 + cbd2(r, buf); +#else +#error "This implementation requires eta2 = 2" +#endif +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.h new file mode 100644 index 0000000000..4dc8635bb5 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef CBD_H +#define CBD_H + +#include +#include "params.h" +#include "poly.h" + +#define poly_cbd_eta1 MLKEM_NAMESPACE(poly_cbd_eta1) +/************************************************* + * Name: poly_cbd_eta1 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA1. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA1)) +); + +#define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2) +/************************************************* + * Name: poly_cbd_eta1 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA2. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2)) +); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbmc.h new file mode 100644 index 0000000000..317a26421b --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbmc.h @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +/*************************************************** + * Basic replacements for __CPROVER_XXX contracts + ***************************************************/ + +#include "common.h" + +#ifndef CBMC + +#define STATIC_INLINE_TESTABLE static INLINE +#define STATIC_TESTABLE static + +#define __contract__(x) +#define __loop__(x) +#define cassert(x, y) + +#else /* CBMC _is_ defined, therefore we're doing proof */ + +/* expose certain procedures to CBMC proofs that are static otherwise */ +#define STATIC_TESTABLE +#define STATIC_INLINE_TESTABLE + +#define __contract__(x) x +#define __loop__(x) x + +/* https://diffblue.github.io/cbmc/contracts-assigns.html */ +#define assigns(...) __CPROVER_assigns(__VA_ARGS__) + +/* https://diffblue.github.io/cbmc/contracts-requires-ensures.html */ +#define requires(...) __CPROVER_requires(__VA_ARGS__) +#define ensures(...) __CPROVER_ensures(__VA_ARGS__) +/* https://diffblue.github.io/cbmc/contracts-loops.html */ +#define invariant(...) __CPROVER_loop_invariant(__VA_ARGS__) +#define decreases(...) __CPROVER_decreases(__VA_ARGS__) +/* cassert to avoid confusion with in-built assert */ +#define cassert(...) __CPROVER_assert(__VA_ARGS__) +#define assume(...) __CPROVER_assume(__VA_ARGS__) + +/*************************************************** + * Macros for "expression" forms that may appear + * _inside_ top-level contracts. + ***************************************************/ + +/* + * function return value - useful inside ensures + * https://diffblue.github.io/cbmc/contracts-functions.html + */ +#define return_value (__CPROVER_return_value) + +/* + * assigns l-value targets + * https://diffblue.github.io/cbmc/contracts-assigns.html + */ +#define object_whole(...) __CPROVER_object_whole(__VA_ARGS__) +#define memory_slice(...) __CPROVER_object_upto(__VA_ARGS__) +#define same_object(...) __CPROVER_same_object(__VA_ARGS__) + +/* + * Pointer-related predicates + * https://diffblue.github.io/cbmc/contracts-memory-predicates.html + */ +#define memory_no_alias(...) __CPROVER_is_fresh(__VA_ARGS__) +#define readable(...) __CPROVER_r_ok(__VA_ARGS__) +#define writeable(...) __CPROVER_w_ok(__VA_ARGS__) + +/* + * History variables + * https://diffblue.github.io/cbmc/contracts-history-variables.html + */ +#define old(...) __CPROVER_old(__VA_ARGS__) +#define loop_entry(...) __CPROVER_loop_entry(__VA_ARGS__) + +/* + * Quantifiers + * Note that the range on qvar is _inclusive_ between qvar_lb .. qvar_ub + * https://diffblue.github.io/cbmc/contracts-quantifiers.html + */ + +/* + * Prevent clang-format from corrupting CBMC's special ==> operator + */ +/* clang-format off */ +#define forall(type, qvar, qvar_lb, qvar_ub, predicate) \ + __CPROVER_forall \ + { \ + type qvar; \ + ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==> (predicate) \ + } + +#define EXISTS(type, qvar, qvar_lb, qvar_ub, predicate) \ + __CPROVER_exists \ + { \ + type qvar; \ + ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) && (predicate) \ + } +/* clang-format on */ + +/*************************************************** + * Convenience macros for common contract patterns + ***************************************************/ + +/* + * Boolean-value predidate that asserts that "all values of array_var are in + * range value_lb .. value_ub (inclusive)" + * Example: + * array_bound(a->coeffs, 0, MLKEM_N-1, -(MLKEM_Q - 1), MLKEM_Q - 1) + * expands to + * __CPROVER_forall { int k; (0 <= k && k <= MLKEM_N-1) ==> ( (-(MLKEM_Q - + * 1) <= a->coeffs[k]) && (a->coeffs[k] <= (MLKEM_Q - 1))) } + */ + +/* + * Prevent clang-format from corrupting CBMC's special ==> operator + */ +/* clang-format off */ +#define CBMC_CONCAT_(left, right) left##right +#define CBMC_CONCAT(left, right) CBMC_CONCAT_(left, right) + +#define array_bound_core(indextype, qvar, qvar_lb, qvar_ub, array_var, \ + value_lb, value_ub) \ + __CPROVER_forall \ + { \ + indextype qvar; \ + ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==> \ + (((value_lb) <= (array_var[(qvar)])) && \ + ((array_var[(qvar)]) <= (value_ub))) \ + } + +#define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \ + array_bound_core(int, CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \ + (qvar_ub), (array_var), (value_lb), (value_ub)) + + +/* Wrapper around array_bound operating on absolute values */ +#define array_abs_bound(arr, lb, ub, k) \ + array_bound((arr), (lb), (ub), (-(k)), (k)) +/* clang-format on */ + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/common.h new file mode 100644 index 0000000000..94c29ed927 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/common.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef COMMON_H +#define COMMON_H + + +/* + * C90 does not have the inline compiler directive yet. + * We don't use it in C90 builds. + * However, in that case the compiler warns about some inline functions in + * header files not being used in every compilation unit that includes that + * header. To work around it we silence that warning in that case using + * __attribute__((unused)). + */ + +/* Do not use inline for C90 builds*/ +#if !defined(inline) +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define INLINE inline +#define ALWAYS_INLINE __attribute__((always_inline)) +#elif defined(_MSC_VER) +#define INLINE __inline +#define ALWAYS_INLINE __forceinline +#else +#define INLINE __attribute__((unused)) +#define ALWAYS_INLINE +#endif + +#else +#define INLINE inline +#define ALWAYS_INLINE __attribute__((always_inline)) +#endif + + +/* + * C90 does not have the restrict compiler directive yet. + * We don't use it in C90 builds. + */ +#if !defined(restrict) +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define RESTRICT restrict +#else +#define RESTRICT +#endif + +#else + +#define RESTRICT restrict +#endif + +#define DEFAULT_ALIGN 32 +#if defined(_WIN32) +#define ALIGN __declspec(align(DEFAULT_ALIGN)) +#define asm __asm +#else +#define asm __asm__ +#define ALIGN __attribute__((aligned(DEFAULT_ALIGN))) +#endif + +#define MLKEM_CONCAT_(left, right) left##right +#define MLKEM_CONCAT(left, right) MLKEM_CONCAT_(left, right) + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/config.h new file mode 100644 index 0000000000..370a141a65 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/config.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef CONFIG_H +#define CONFIG_H + +#include "cpucap.h" + +#if defined(MLKEM_USE_NATIVE) + +#if defined(SYS_AARCH64) +#define MLKEM_USE_NATIVE_AARCH64 +#endif /* SYS_AARCH64 */ + +#if defined(SYS_X86_64) +#define MLKEM_USE_NATIVE_X86_64 +#endif /* SYS_X86_64 */ + +#endif /* MLKEM_USE_NATIVE */ +#endif /* CONFIG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cpucap.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cpucap.h new file mode 100644 index 0000000000..cfcbbc3fe9 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cpucap.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef CPUCAP_H +#define CPUCAP_H + +/* Check if we're running on an AArch64 system. _M_ARM64 is set by MSVC. */ +#if defined(__AARCH64EL__) || defined(_M_ARM64) +#define SYS_AARCH64 +#endif + +#if defined(__x86_64__) +#define SYS_X86_64 +#if defined(__AVX2__) +#define SYS_X86_64_AVX2 +#endif +#endif /* __x86_64__ */ + +/* Check endianness */ +#if defined(__BYTE_ORDER__) + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define SYS_LITTLE_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define SYS_BIG_ENDIAN +#else /* __BYTE_ORER__ */ +#error "__BYTE_ORDER__ defined, but don't recognize value." +#endif /* __BYTE_ORER__ */ +#endif /* !defined(__BYTE_ORER__) */ + +/* If FORCE_AARCH64 is set, assert that we're indeed on an AArch64 system. */ +#if defined(FORCE_AARCH64) && !defined(SYS_AARCH64) +#error "FORCE_AARCH64 is set, but we don't seem to be on an AArch64 system." +#endif + +/* If FORCE_X86_64 is set, assert that we're indeed on an X86_64 system. */ +#if defined(FORCE_X86_64) && !defined(SYS_X86_64) +#error "FORCE_X86_64 is set, but we don't seem to be on an X86_64 system." +#endif + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.c new file mode 100644 index 0000000000..aa9b578074 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#define _ISOC99_SOURCE +#include "debug.h" +#include + +#if defined(MLKEM_DEBUG) + +static char debug_buf[256]; + +void mlkem_debug_assert(const char *file, int line, const char *description, + const int val) +{ + if (val == 0) + { + snprintf(debug_buf, sizeof(debug_buf), "Assertion failed: %s (value %d)", + description, val); + mlkem_debug_print_error(file, line, debug_buf); + exit(1); + } +} +void mlkem_debug_check_bounds(const char *file, int line, + const char *description, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive) +{ + int err = 0; + unsigned i; + for (i = 0; i < len; i++) + { + int16_t val = ptr[i]; + if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) + { + snprintf(debug_buf, sizeof(debug_buf), + "%s, index %u, value %d out of bounds (%d,%d)", description, i, + (int)val, lower_bound_exclusive, upper_bound_exclusive); + mlkem_debug_print_error(file, line, debug_buf); + err = 1; + } + } + + if (err == 1) + exit(1); +} + +void mlkem_debug_print_error(const char *file, int line, const char *msg) +{ + fprintf(stderr, "[ERROR:%s:%04d] %s\n", file, line, msg); + fflush(stderr); +} + +#else /* MLKEM_DEBUG */ + +int empty_cu_debug; + +#endif /* MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.h new file mode 100644 index 0000000000..65208771d2 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.h @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef MLKEM_DEBUG_H +#define MLKEM_DEBUG_H + +#if defined(MLKEM_DEBUG) +#include +#include +#include + +/************************************************* + * Name: mlkem_debug_assert + * + * Description: Check debug assertion + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - description: Textual description of assertion + * - val: Value asserted to be non-zero + **************************************************/ +void mlkem_debug_assert(const char *file, int line, const char *description, + const int val); + +/************************************************* + * Name: mlkem_debug_check_bounds + * + * Description: Check whether values in an array of int16_t + * are within specified bounds. + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - description: Textual description of check + * - ptr: Base of array to be checked + * - len: Number of int16_t in ptr + * - lower_bound_exclusive: Exclusive lower bound + * - upper_bound_exclusive: Exclusive upper bound + **************************************************/ +void mlkem_debug_check_bounds(const char *file, int line, + const char *description, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive); + +/* Print error message to stderr alongside file and line information */ +void mlkem_debug_print_error(const char *file, int line, const char *msg); + +/* Check assertion, calling exit() upon failure + * + * val: Value that's asserted to be non-zero + * msg: Message to print on failure + * + * Currently called CASSERT to avoid clash with CBMC assert. + */ +#define CASSERT(val, msg) \ + do \ + { \ + mlkem_debug_assert(__FILE__, __LINE__, (msg), (val)); \ + } while (0) + +/* Check absolute bounds of scalar + * val: Scalar to be checked + * abs_bound: Exclusive upper bound on absolute value to check + * msg: Message to print on failure */ +#define SCALAR_BOUND(val, abs_bound, msg) \ + CASSERT((val) > -(abs_bound) && (val) < (abs_bound), msg) + +/* Check that all coefficients in array of int16_t's are non-negative + * and below an exclusive upper bound. + * + * ptr: Base of array, expression of type int16_t* + * len: Number of int16_t in array + * high_bound: Exclusive upper bound on absolute value to check + * msg: Message to print on failure */ +#define UBOUND(ptr, len, high_bound, msg) \ + do \ + { \ + mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ + (len), -1, ((high_bound))); \ + } while (0) + +/* Check absolute bounds in array of int16_t's + * ptr: Base of array, expression of type int16_t* + * len: Number of int16_t in array + * abs_bound: Exclusive upper bound on absolute value to check + * msg: Message to print on failure */ +#define BOUND(ptr, len, abs_bound, msg) \ + do \ + { \ + mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ + (len), -(abs_bound), (abs_bound)); \ + } while (0) + +/* Check absolute bounds on coefficients in polynomial or mulcache + * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check + * abs_bound: Exclusive upper bound on absolute value to check + * msg: Message to print on failure */ +#define POLY_BOUND_MSG(ptr, abs_bound, msg) \ + BOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (abs_bound), \ + msg) + +/* Check unsigned bounds on coefficients in polynomial or mulcache + * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check + * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. + * msg: Message to print on failure */ +#define POLY_UBOUND_MSG(ptr, ubound, msg) \ + UBOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (ubound), \ + msg) + +/* Check absolute bounds on coefficients in polynomial + * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check + * abs_bound: Exclusive upper bound on absolute value to check */ +#define POLY_BOUND(ptr, abs_bound) \ + POLY_BOUND_MSG((ptr), (abs_bound), "poly absolute bound for " #ptr) + +/* Check unsigned bounds on coefficients in polynomial + * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check + * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. + */ +#define POLY_UBOUND(ptr, ubound) \ + POLY_UBOUND_MSG((ptr), (ubound), "poly unsigned bound for " #ptr) + +/* Check absolute bounds on coefficients in vector of polynomials + * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check + * abs_bound: Exclusive upper bound on absolute value to check */ +#define POLYVEC_BOUND(ptr, abs_bound) \ + do \ + { \ + unsigned _debug_polyvec_bound_idx; \ + for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ + _debug_polyvec_bound_idx++) \ + POLY_BOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (abs_bound), \ + "polyvec absolute bound for " #ptr ".vec[i]"); \ + } while (0) + +/* Check unsigned bounds on coefficients in vector of polynomials + * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check + * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. + */ +#define POLYVEC_UBOUND(ptr, ubound) \ + do \ + { \ + unsigned _debug_polyvec_bound_idx; \ + for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ + _debug_polyvec_bound_idx++) \ + POLY_UBOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (ubound), \ + "polyvec unsigned bound for " #ptr ".vec[i]"); \ + } while (0) + +/* Following AWS-LC to define a C99-compliant static assert */ +#define MLKEM_STATIC_ASSERT_DEFINE(cond, msg) \ + typedef struct \ + { \ + unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \ + } MLKEM_CONCAT(static_assertion_, msg) __attribute__((unused)); + +#define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \ + MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix)) +#define MLKEM_STATIC_ASSERT_ADD_LINE1(cond, line, suffix) \ + MLKEM_STATIC_ASSERT_ADD_LINE0(cond, MLKEM_CONCAT(line, suffix)) +#define MLKEM_STATIC_ASSERT_ADD_LINE2(cond, suffix) \ + MLKEM_STATIC_ASSERT_ADD_LINE1(cond, __LINE__, suffix) +#define MLKEM_STATIC_ASSERT_ADD_ERROR(cond, suffix) \ + MLKEM_STATIC_ASSERT_ADD_LINE2(cond, MLKEM_CONCAT(_error_is_, suffix)) +#define STATIC_ASSERT(cond, error) MLKEM_STATIC_ASSERT_ADD_ERROR(cond, error) + +#else /* MLKEM_DEBUG */ + +#define CASSERT(val, msg) \ + do \ + { \ + } while (0) +#define SCALAR_BOUND(val, abs_bound, msg) \ + do \ + { \ + } while (0) +#define BOUND(ptr, len, abs_bound, msg) \ + do \ + { \ + } while (0) +#define POLY_BOUND(ptr, abs_bound) \ + do \ + { \ + } while (0) +#define POLYVEC_BOUND(ptr, abs_bound) \ + do \ + { \ + } while (0) +#define POLY_BOUND_MSG(ptr, ubound, abs_bound) \ + do \ + { \ + } while (0) +#define UBOUND(ptr, len, high_bound, msg) \ + do \ + { \ + } while (0) +#define POLY_UBOUND(ptr, ubound) \ + do \ + { \ + } while (0) +#define POLYVEC_UBOUND(ptr, ubound) \ + do \ + { \ + } while (0) +#define POLY_UBOUND_MSG(ptr, ubound, msg) \ + do \ + { \ + } while (0) +#define STATIC_ASSERT(cond, error) + +#endif /* MLKEM_DEBUG */ + +#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.c new file mode 100644 index 0000000000..669460c29c --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.c @@ -0,0 +1,577 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "indcpa.h" +#include +#include +#include +#include "fips202.h" +#include "fips202x4.h" +#include "indcpa.h" +#include "ntt.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include "randombytes.h" +#include "rej_uniform.h" +#include "symmetric.h" + +#include "arith_native.h" +#include "debug/debug.h" + +#include "cbmc.h" + + +/************************************************* + * Name: pack_pk + * + * Description: Serialize the public key as concatenation of the + * serialized vector of polynomials pk + * and the public seed used to generate the matrix A. + * + * Arguments: uint8_t *r: pointer to the output serialized public key + * polyvec *pk: pointer to the input public-key polyvec. + * Must have coefficients within [0,..,q-1]. + * const uint8_t *seed: pointer to the input public seed + **************************************************/ +static void pack_pk(uint8_t r[MLKEM_INDCPA_PUBLICKEYBYTES], polyvec *pk, + const uint8_t seed[MLKEM_SYMBYTES]) +{ + POLYVEC_BOUND(pk, MLKEM_Q); + polyvec_tobytes(r, pk); + memcpy(r + MLKEM_POLYVECBYTES, seed, MLKEM_SYMBYTES); +} + +/************************************************* + * Name: unpack_pk + * + * Description: De-serialize public key from a byte array; + * approximate inverse of pack_pk + * + * Arguments: - polyvec *pk: pointer to output public-key polynomial vector + * Coefficients will be normalized to [0,..,q-1]. + * - uint8_t *seed: pointer to output seed to generate matrix A + * - const uint8_t *packedpk: pointer to input serialized public + * key. + **************************************************/ +static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], + const uint8_t packedpk[MLKEM_INDCPA_PUBLICKEYBYTES]) +{ + polyvec_frombytes(pk, packedpk); + memcpy(seed, packedpk + MLKEM_POLYVECBYTES, MLKEM_SYMBYTES); + + /* + * TODO! We know from the modulus check that this will result in an + * unsigned canonical polynomial, but CBMC does not know it. We should + * weaken the specification of `unpack_pk()` and all depending functions + * to work with the weaker 4096-bound, so that the proofs go through + * without the need of this redundant call to polyvec_reduce(). + */ + polyvec_reduce(pk); +} + +/************************************************* + * Name: pack_sk + * + * Description: Serialize the secret key + * + * Arguments: - uint8_t *r: pointer to output serialized secret key + * - polyvec *sk: pointer to input vector of polynomials (secret + *key) + **************************************************/ +static void pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], polyvec *sk) +{ + POLYVEC_BOUND(sk, MLKEM_Q); + polyvec_tobytes(r, sk); +} + +/************************************************* + * Name: unpack_sk + * + * Description: De-serialize the secret key; inverse of pack_sk + * + * Arguments: - polyvec *sk: pointer to output vector of polynomials (secret + *key) + * - const uint8_t *packedsk: pointer to input serialized secret + *key + **************************************************/ +static void unpack_sk(polyvec *sk, + const uint8_t packedsk[MLKEM_INDCPA_SECRETKEYBYTES]) +{ + polyvec_frombytes(sk, packedsk); + polyvec_reduce(sk); +} + +/************************************************* + * Name: pack_ciphertext + * + * Description: Serialize the ciphertext as concatenation of the + * compressed and serialized vector of polynomials b + * and the compressed and serialized polynomial v + * + * Arguments: uint8_t *r: pointer to the output serialized ciphertext + * poly *pk: pointer to the input vector of polynomials b + * poly *v: pointer to the input polynomial v + **************************************************/ +static void pack_ciphertext(uint8_t r[MLKEM_INDCPA_BYTES], polyvec *b, poly *v) +{ + polyvec_compress_du(r, b); + poly_compress_dv(r + MLKEM_POLYVECCOMPRESSEDBYTES_DU, v); +} + +/************************************************* + * Name: unpack_ciphertext + * + * Description: De-serialize and decompress ciphertext from a byte array; + * approximate inverse of pack_ciphertext + * + * Arguments: - polyvec *b: pointer to the output vector of polynomials b + * - poly *v: pointer to the output polynomial v + * - const uint8_t *c: pointer to the input serialized ciphertext + **************************************************/ +static void unpack_ciphertext(polyvec *b, poly *v, + const uint8_t c[MLKEM_INDCPA_BYTES]) +{ + polyvec_decompress_du(b, c); + poly_decompress_dv(v, c + MLKEM_POLYVECCOMPRESSEDBYTES_DU); +} + +#ifndef MLKEM_GEN_MATRIX_NBLOCKS +#define MLKEM_GEN_MATRIX_NBLOCKS \ + ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + SHAKE128_RATE) / SHAKE128_RATE) +#endif + +/* + * Generate four A matrix entries from a seed, using rejection + * sampling on the output of a XOF. + */ +STATIC_TESTABLE +void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4]) +__contract__( + requires(memory_no_alias(vec, sizeof(poly) * 4)) + requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) + requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) + assigns(memory_slice(vec, sizeof(poly) * 4)) + ensures(array_bound(vec[0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))) + ensures(array_bound(vec[1].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))) + ensures(array_bound(vec[2].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))) + ensures(array_bound(vec[3].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))) +{ + /* Temporary buffers for XOF output before rejection sampling */ + uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE]; + uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE]; + uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE]; + uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE]; + + /* Tracks the number of coefficients we have already sampled */ + unsigned int ctr[KECCAK_WAY]; + shake128x4incctx statex; + unsigned int buflen; + + shake128x4_inc_init(&statex); + + /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ + shake128x4_absorb_once(&statex, seed[0], seed[1], seed[2], seed[3], + MLKEM_SYMBYTES + 2); + + /* + * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + * This should generate the matrix entries with high probability. + */ + shake128x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, + &statex); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE; + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); + + /* + * So long as not all matrix entries have been generated, squeeze + * one more block a time until we're done. + */ + buflen = SHAKE128_RATE; + while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || + ctr[3] < MLKEM_N) + __loop__( + assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), + object_whole(buf1), object_whole(buf2), object_whole(buf3)) + invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) + invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) + invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0] - 1, 0, (MLKEM_Q - 1))) + invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1] - 1, 0, (MLKEM_Q - 1))) + invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2] - 1, 0, (MLKEM_Q - 1))) + invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3] - 1, 0, (MLKEM_Q - 1)))) + { + shake128x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); + } + + shake128x4_inc_ctx_release(&statex); +} + +/* + * Generate a single A matrix entry from a seed, using rejection + * sampling on the output of a XOF. + */ +STATIC_TESTABLE +void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) +__contract__( + requires(memory_no_alias(entry, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) + assigns(memory_slice(entry, sizeof(poly))) + ensures(array_bound(entry->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))) +{ + shake128incctx state; + uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE]; + unsigned int ctr, buflen; + + shake128_inc_init(&state); + + shake128_absorb_once(&state, seed, MLKEM_SYMBYTES + 2); + + /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + */ + /* This should generate the matrix entry with high probability. */ + shake128_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE; + ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); + + /* Squeeze + sample one more block a time until we're done */ + buflen = SHAKE128_RATE; + while (ctr < MLKEM_N) + __loop__( + assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) + invariant(0 <= ctr && ctr <= MLKEM_N) + invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr - 1, + 0, (MLKEM_Q - 1)))) + { + shake128_squeezeblocks(buf, 1, &state); + ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, SHAKE128_RATE); + } + + shake128_inc_ctx_release(&state); +} + +/************************************************* + * Name: gen_matrix + * + * Description: Deterministically generate matrix A (or the transpose of A) + * from a seed. Entries of the matrix are polynomials that look + * uniformly random. Performs rejection sampling on output of + * a XOF + * + * Arguments: - polyvec *a: pointer to ouptput matrix A + * - const uint8_t *seed: pointer to input seed + * - int transposed: boolean deciding whether A or A^T is generated + **************************************************/ +/* Not static for benchmarking */ +void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) +{ + int i; + unsigned int j; + /* + * We generate four separate seed arrays rather than a single one to work + * around limitations in CBMC function contracts dealing with disjoint slices + * of the same parent object. + */ + + ALIGN uint8_t seed0[MLKEM_SYMBYTES + 2]; + ALIGN uint8_t seed1[MLKEM_SYMBYTES + 2]; + ALIGN uint8_t seed2[MLKEM_SYMBYTES + 2]; + ALIGN uint8_t seed3[MLKEM_SYMBYTES + 2]; + uint8_t *seedxy[4]; + seedxy[0] = seed0; + seedxy[1] = seed1; + seedxy[2] = seed2; + seedxy[3] = seed3; + + for (j = 0; j < KECCAK_WAY; j++) + { + memcpy(seedxy[j], seed, MLKEM_SYMBYTES); + } + + for (i = 0; i < (MLKEM_K * MLKEM_K / KECCAK_WAY) * KECCAK_WAY; + i += KECCAK_WAY) + { + uint8_t x, y; + + for (j = 0; j < KECCAK_WAY; j++) + { + x = (i + j) / MLKEM_K; + y = (i + j) % MLKEM_K; + if (transposed) + { + seedxy[j][MLKEM_SYMBYTES + 0] = x; + seedxy[j][MLKEM_SYMBYTES + 1] = y; + } + else + { + seedxy[j][MLKEM_SYMBYTES + 0] = y; + seedxy[j][MLKEM_SYMBYTES + 1] = x; + } + } + + /* + * This call writes across polyvec boundaries for K=2 and K=3. + * This is intentional and safe. + */ + gen_matrix_entry_x4(&a[0].vec[0] + i, seedxy); + } + + /* For left over polynomial, we use single keccak. */ + if (i < MLKEM_K * MLKEM_K) + { + uint8_t x, y; + x = i / MLKEM_K; + y = i % MLKEM_K; + + if (transposed) + { + seed0[MLKEM_SYMBYTES + 0] = x; + seed0[MLKEM_SYMBYTES + 1] = y; + } + else + { + seed0[MLKEM_SYMBYTES + 0] = y; + seed0[MLKEM_SYMBYTES + 1] = x; + } + + gen_matrix_entry(&a[0].vec[0] + i, seed0); + i++; + } + + cassert(i == MLKEM_K * MLKEM_K, + "gen_matrix: failed to generate whole matrix"); + +#if defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER) + /* + * The public matrix is generated in NTT domain. If the native backend + * uses a custom order in NTT domain, permute A accordingly. + */ + for (i = 0; i < MLKEM_K; i++) + { + for (j = 0; j < MLKEM_K; j++) + { + poly_permute_bitrev_to_custom(&a[i].vec[j]); + } + } +#endif /* MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER */ +} + +/************************************************* + * Name: matvec_mul + * + * Description: Computes matrix-vector product in NTT domain, + * via Montgomery multiplication. + * + * Arguments: - polyvec *out: Pointer to output polynomial vector + * - polyvec a[MLKEM_K]: Input matrix. Must be in NTT domain + * and have coefficients of absolute value < MLKEM_Q. + * - polyvec *v: Input polynomial vector. Must be in NTT domain. + * - polyvec *vc: Mulcache for v, computed via + * polyvec_mulcache_compute(). + **************************************************/ +STATIC_TESTABLE +void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v, + const polyvec_mulcache *vc) +__contract__( + requires(memory_no_alias(out, sizeof(polyvec))) + requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K)) + requires(memory_no_alias(v, sizeof(polyvec))) + requires(memory_no_alias(vc, sizeof(polyvec_mulcache))) + requires(forall(int, k0, 0, MLKEM_K - 1, + forall(int, k1, 0, MLKEM_K - 1, + array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))) + assigns(object_whole(out))) +{ + int i; + for (i = 0; i < MLKEM_K; i++) + __loop__( + assigns(i, object_whole(out)) + invariant(i >= 0 && i <= MLKEM_K)) + { + polyvec_basemul_acc_montgomery_cached(&out->vec[i], &a[i], v, vc); + } +} + +/************************************************* + * Name: indcpa_keypair_derand + * + * Description: Generates public and private key for the CPA-secure + * public-key encryption scheme underlying ML-KEM + * + * Arguments: - uint8_t *pk: pointer to output public key + * (of length MLKEM_INDCPA_PUBLICKEYBYTES bytes) + * - uint8_t *sk: pointer to output private key + * (of length MLKEM_INDCPA_SECRETKEYBYTES bytes) + * - const uint8_t *coins: pointer to input randomness + * (of length MLKEM_SYMBYTES bytes) + **************************************************/ + +STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0) + +void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], + uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES]) +{ + ALIGN uint8_t buf[2 * MLKEM_SYMBYTES]; + const uint8_t *publicseed = buf; + const uint8_t *noiseseed = buf + MLKEM_SYMBYTES; + polyvec a[MLKEM_K], e, pkpv, skpv; + polyvec_mulcache skpv_cache; + + ALIGN uint8_t coins_with_domain_separator[MLKEM_SYMBYTES + 1]; + /* Concatenate coins with MLKEM_K for domain separation of security levels */ + memcpy(coins_with_domain_separator, coins, MLKEM_SYMBYTES); + coins_with_domain_separator[MLKEM_SYMBYTES] = MLKEM_K; + + hash_g(buf, coins_with_domain_separator, MLKEM_SYMBYTES + 1); + + gen_matrix(a, publicseed, 0 /* no transpose */); + +#if MLKEM_K == 2 + poly_getnoise_eta1_4x(skpv.vec + 0, skpv.vec + 1, e.vec + 0, e.vec + 1, + noiseseed, 0, 1, 2, 3); +#elif MLKEM_K == 3 + /* + * Only the first three output buffers are needed. + * The laster parameter is a dummy that's overwritten later. + */ + poly_getnoise_eta1_4x(skpv.vec + 0, skpv.vec + 1, skpv.vec + 2, + pkpv.vec + 0 /* irrelevant */, noiseseed, 0, 1, 2, + 0xFF /* irrelevant */); + /* Same here */ + poly_getnoise_eta1_4x(e.vec + 0, e.vec + 1, e.vec + 2, + pkpv.vec + 0 /* irrelevant */, noiseseed, 3, 4, 5, + 0xFF /* irrelevant */); +#elif MLKEM_K == 4 + poly_getnoise_eta1_4x(skpv.vec + 0, skpv.vec + 1, skpv.vec + 2, skpv.vec + 3, + noiseseed, 0, 1, 2, 3); + poly_getnoise_eta1_4x(e.vec + 0, e.vec + 1, e.vec + 2, e.vec + 3, noiseseed, + 4, 5, 6, 7); +#endif + + polyvec_ntt(&skpv); + polyvec_ntt(&e); + + polyvec_mulcache_compute(&skpv_cache, &skpv); + matvec_mul(&pkpv, a, &skpv, &skpv_cache); + polyvec_tomont(&pkpv); + + /* Arithmetic cannot overflow, see static assertion at the top */ + polyvec_add(&pkpv, &e); + polyvec_reduce(&pkpv); + polyvec_reduce(&skpv); + + pack_sk(sk, &skpv); + pack_pk(pk, &pkpv, publicseed); +} + +/************************************************* + * Name: indcpa_enc + * + * Description: Encryption function of the CPA-secure + * public-key encryption scheme underlying Kyber. + * + * Arguments: - uint8_t *c: pointer to output ciphertext + * (of length MLKEM_INDCPA_BYTES bytes) + * - const uint8_t *m: pointer to input message + * (of length MLKEM_INDCPA_MSGBYTES bytes) + * - const uint8_t *pk: pointer to input public key + * (of length MLKEM_INDCPA_PUBLICKEYBYTES) + * - const uint8_t *coins: pointer to input random coins used as + *seed (of length MLKEM_SYMBYTES) to deterministically generate all randomness + **************************************************/ + +/* Check that the arithmetic in indcpa_enc() does not overflow */ +STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0) +STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX, + indcpa_enc_bound_1) + +void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], + const uint8_t m[MLKEM_INDCPA_MSGBYTES], + const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES]) +{ + ALIGN uint8_t seed[MLKEM_SYMBYTES]; + polyvec sp, pkpv, ep, at[MLKEM_K], b; + poly v, k, epp; + polyvec_mulcache sp_cache; + + unpack_pk(&pkpv, seed, pk); + poly_frommsg(&k, m); + gen_matrix(at, seed, 1 /* transpose */); + +#if MLKEM_K == 2 + poly_getnoise_eta1122_4x(sp.vec + 0, sp.vec + 1, ep.vec + 0, ep.vec + 1, + coins, 0, 1, 2, 3); + poly_getnoise_eta2(&epp, coins, 4); +#elif MLKEM_K == 3 + /* + * In this call, only the first three output buffers are needed. + * The last parameter is a dummy that's overwritten later. + */ + poly_getnoise_eta1_4x(sp.vec + 0, sp.vec + 1, sp.vec + 2, &b.vec[0], coins, 0, + 1, 2, 0xFF); + /* The fourth output buffer in this call _is_ used. */ + poly_getnoise_eta2_4x(ep.vec + 0, ep.vec + 1, ep.vec + 2, &epp, coins, 3, 4, + 5, 6); +#elif MLKEM_K == 4 + poly_getnoise_eta1_4x(sp.vec + 0, sp.vec + 1, sp.vec + 2, sp.vec + 3, coins, + 0, 1, 2, 3); + poly_getnoise_eta2_4x(ep.vec + 0, ep.vec + 1, ep.vec + 2, ep.vec + 3, coins, + 4, 5, 6, 7); + poly_getnoise_eta2(&epp, coins, 8); +#endif + + polyvec_ntt(&sp); + + polyvec_mulcache_compute(&sp_cache, &sp); + matvec_mul(&b, at, &sp, &sp_cache); + polyvec_basemul_acc_montgomery_cached(&v, &pkpv, &sp, &sp_cache); + + polyvec_invntt_tomont(&b); + poly_invntt_tomont(&v); + + /* Arithmetic cannot overflow, see static assertion at the top */ + polyvec_add(&b, &ep); + poly_add(&v, &epp); + poly_add(&v, &k); + + polyvec_reduce(&b); + poly_reduce(&v); + + pack_ciphertext(c, &b, &v); +} + +/* Check that the arithmetic in indcpa_dec() does not overflow */ +STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0) + +void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], + const uint8_t c[MLKEM_INDCPA_BYTES], + const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES]) +{ + polyvec b, skpv; + poly v, sb; + + unpack_ciphertext(&b, &v, c); + unpack_sk(&skpv, sk); + + polyvec_ntt(&b); + polyvec_basemul_acc_montgomery(&sb, &skpv, &b); + poly_invntt_tomont(&sb); + + /* Arithmetic cannot overflow, see static assertion at the top */ + poly_sub(&v, &sb); + poly_reduce(&v); + + poly_tomsg(m, &v); +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.h new file mode 100644 index 0000000000..3f57eb1295 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef INDCPA_H +#define INDCPA_H + +#include +#include "cbmc.h" +#include "params.h" +#include "polyvec.h" + + +#define gen_matrix MLKEM_NAMESPACE(gen_matrix) + +void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) +__contract__( + requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K)) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires(transposed == 0 || transposed == 1) + assigns(object_whole(a)) + ensures(forall(int, x, 0, MLKEM_K - 1, forall(int, y, 0, MLKEM_K - 1, + array_bound(a[x].vec[y].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))); +); + +#define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand) +void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], + uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES]) +__contract__( + requires(memory_no_alias(pk, MLKEM_INDCPA_PUBLICKEYBYTES)) + requires(memory_no_alias(sk, MLKEM_INDCPA_SECRETKEYBYTES)) + requires(memory_no_alias(coins, MLKEM_SYMBYTES)) + assigns(object_whole(pk)) + assigns(object_whole(sk)) +); + +#define indcpa_enc MLKEM_NAMESPACE(indcpa_enc) +/************************************************* + * Name: indcpa_dec + * + * Description: Decryption function of the CPA-secure + * public-key encryption scheme underlying Kyber. + * + * Arguments: - uint8_t *m: pointer to output decrypted message + * (of length MLKEM_INDCPA_MSGBYTES) + * - const uint8_t *c: pointer to input ciphertext + * (of length MLKEM_INDCPA_BYTES) + * - const uint8_t *sk: pointer to input secret key + * (of length MLKEM_INDCPA_SECRETKEYBYTES) + **************************************************/ +void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], + const uint8_t m[MLKEM_INDCPA_MSGBYTES], + const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES]) +__contract__( + requires(memory_no_alias(c, MLKEM_INDCPA_BYTES)) + requires(memory_no_alias(m, MLKEM_INDCPA_MSGBYTES)) + requires(memory_no_alias(pk, MLKEM_INDCPA_PUBLICKEYBYTES)) + requires(memory_no_alias(coins, MLKEM_SYMBYTES)) + assigns(object_whole(c)) +); + +#define indcpa_dec MLKEM_NAMESPACE(indcpa_dec) +void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], + const uint8_t c[MLKEM_INDCPA_BYTES], + const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES]) +__contract__( + requires(memory_no_alias(c, MLKEM_INDCPA_BYTES)) + requires(memory_no_alias(m, MLKEM_INDCPA_MSGBYTES)) + requires(memory_no_alias(sk, MLKEM_INDCPA_SECRETKEYBYTES)) + assigns(object_whole(m)) +); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.c new file mode 100644 index 0000000000..f84ee3f3da --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.c @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "kem.h" +#include +#include +#include +#include "indcpa.h" +#include "params.h" +#include "randombytes.h" +#include "symmetric.h" +#include "verify.h" + +#if defined(CBMC) +/* Redeclaration with contract needed for CBMC only */ +int memcmp(const void *str1, const void *str2, size_t n) +__contract__( + requires(memory_no_alias(str1, n)) + requires(memory_no_alias(str2, n)) +); +#endif + +/************************************************* + * Name: check_pk + * + * Description: Implements modulus check mandated by FIPS203, + * i.e., ensures that coefficients are in [0,q-1]. + * Described in Section 7.2 of FIPS203. + * + * Arguments: - const uint8_t *pk: pointer to input public key + * (an already allocated array of MLKEM_PUBLICKEYBYTES bytes) + ** + * Returns 0 on success, and -1 on failure + **************************************************/ +static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES]) +{ + polyvec p; + uint8_t p_reencoded[MLKEM_POLYVECBYTES]; + polyvec_frombytes(&p, pk); + polyvec_reduce(&p); + polyvec_tobytes(p_reencoded, &p); + /* Data is public, so a variable-time memcmp() is OK */ + if (memcmp(pk, p_reencoded, MLKEM_POLYVECBYTES)) + { + return -1; + } + return 0; +} + +/************************************************* + * Name: check_sk + * + * Description: Implements public key hash check mandated by FIPS203, + * i.e., ensures that + * sk[768𝑘+32 ∶ 768𝑘+64] = H(pk)= H(sk[384𝑘 : 768𝑘+32]) + * Described in Section 7.3 of FIPS203. + * + * Arguments: - const uint8_t *sk: pointer to input private key + * (an already allocated array of MLKEM_SECRETKEYBYTES bytes) + * + * Returns 0 on success, and -1 on failure + **************************************************/ +static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES]) +{ + uint8_t test[MLKEM_SYMBYTES]; + /* + * The parts of `sk` being hashed and compared here are public, so + * no public information is leaked through the runtime or the return value + * of this function. + */ + hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_PUBLICKEYBYTES); + if (memcmp(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test, + MLKEM_SYMBYTES)) + { + return -1; + } + return 0; +} + +/************************************************* + * Name: crypto_kem_keypair_derand + * + * Description: Generates public and private key + * for CCA-secure ML-KEM key encapsulation mechanism + * + * Arguments: - uint8_t *pk: pointer to output public key + * (an already allocated array of MLKEM_PUBLICKEYBYTES bytes) + * - uint8_t *sk: pointer to output private key + * (an already allocated array of MLKEM_SECRETKEYBYTES bytes) + * - uint8_t *coins: pointer to input randomness + * (an already allocated array filled with 2*MLKEM_SYMBYTES + *random bytes) + ** + * Returns 0 (success) + **************************************************/ +int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins) +{ + indcpa_keypair_derand(pk, sk, coins); + memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_PUBLICKEYBYTES); + hash_h(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk, + MLKEM_PUBLICKEYBYTES); + /* Value z for pseudo-random output on reject */ + memcpy(sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, coins + MLKEM_SYMBYTES, + MLKEM_SYMBYTES); + return 0; +} + +int crypto_kem_keypair(uint8_t *pk, uint8_t *sk) +{ + ALIGN uint8_t coins[2 * MLKEM_SYMBYTES]; + randombytes(coins, 2 * MLKEM_SYMBYTES); + crypto_kem_keypair_derand(pk, sk, coins); + return 0; +} + +int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, + const uint8_t *coins) +{ + ALIGN uint8_t buf[2 * MLKEM_SYMBYTES]; + /* Will contain key, coins */ + ALIGN uint8_t kr[2 * MLKEM_SYMBYTES]; + + if (check_pk(pk)) + { + return -1; + } + + memcpy(buf, coins, MLKEM_SYMBYTES); + + /* Multitarget countermeasure for coins + contributory KEM */ + hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_PUBLICKEYBYTES); + hash_g(kr, buf, 2 * MLKEM_SYMBYTES); + + /* coins are in kr+MLKEM_SYMBYTES */ + indcpa_enc(ct, buf, pk, kr + MLKEM_SYMBYTES); + + memcpy(ss, kr, MLKEM_SYMBYTES); + return 0; +} + +int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) +{ + ALIGN uint8_t coins[MLKEM_SYMBYTES]; + randombytes(coins, MLKEM_SYMBYTES); + return crypto_kem_enc_derand(ct, ss, pk, coins); +} + +int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) +{ + uint8_t fail; + ALIGN uint8_t buf[2 * MLKEM_SYMBYTES]; + /* Will contain key, coins */ + ALIGN uint8_t kr[2 * MLKEM_SYMBYTES]; + ALIGN uint8_t cmp[MLKEM_CIPHERTEXTBYTES + MLKEM_SYMBYTES]; + const uint8_t *pk = sk + MLKEM_INDCPA_SECRETKEYBYTES; + + if (check_sk(sk)) + { + return -1; + } + + indcpa_dec(buf, ct, sk); + + /* Multitarget countermeasure for coins + contributory KEM */ + memcpy(buf + MLKEM_SYMBYTES, sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, + MLKEM_SYMBYTES); + hash_g(kr, buf, 2 * MLKEM_SYMBYTES); + + /* coins are in kr+MLKEM_SYMBYTES */ + indcpa_enc(cmp, buf, pk, kr + MLKEM_SYMBYTES); + + fail = ct_memcmp(ct, cmp, MLKEM_CIPHERTEXTBYTES); + + /* Compute rejection key */ + rkprf(ss, sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, ct); + + /* Copy true key to return buffer if fail is 0 */ + ct_cmov_zero(ss, kr, MLKEM_SYMBYTES, fail); + + return 0; +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.h new file mode 100644 index 0000000000..6a33be7c7e --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef KEM_H +#define KEM_H + +#include +#include "cbmc.h" +#include "params.h" + +#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES +#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES +#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES +#define CRYPTO_BYTES MLKEM_SSBYTES + +#if (MLKEM_K == 2) +#define CRYPTO_ALGNAME "Kyber512" +#elif (MLKEM_K == 3) +#define CRYPTO_ALGNAME "Kyber768" +#elif (MLKEM_K == 4) +#define CRYPTO_ALGNAME "Kyber1024" +#endif + +#define crypto_kem_keypair_derand MLKEM_NAMESPACE(keypair_derand) +int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins) +__contract__( + requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES)) + requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES)) + requires(memory_no_alias(coins, 2 * MLKEM_SYMBYTES)) + assigns(object_whole(pk)) + assigns(object_whole(sk)) +); + +#define crypto_kem_keypair MLKEM_NAMESPACE(keypair) +/************************************************* + * Name: crypto_kem_keypair + * + * Description: Generates public and private key + * for CCA-secure ML-KEM key encapsulation mechanism + * + * Arguments: - uint8_t *pk: pointer to output public key + * (an already allocated array of MLKEM_PUBLICKEYBYTES bytes) + * - uint8_t *sk: pointer to output private key + * (an already allocated array of MLKEM_SECRETKEYBYTES bytes) + * + * Returns 0 (success) + **************************************************/ +int crypto_kem_keypair(uint8_t *pk, uint8_t *sk) +__contract__( + requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES)) + requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES)) + assigns(object_whole(pk)) + assigns(object_whole(sk)) +); + +#define crypto_kem_enc_derand MLKEM_NAMESPACE(enc_derand) +/************************************************* + * Name: crypto_kem_enc_derand + * + * Description: Generates cipher text and shared + * secret for given public key + * + * Arguments: - uint8_t *ct: pointer to output cipher text + * (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes) + * - uint8_t *ss: pointer to output shared secret + * (an already allocated array of MLKEM_SSBYTES bytes) + * - const uint8_t *pk: pointer to input public key + * (an already allocated array of MLKEM_PUBLICKEYBYTES bytes) + * - const uint8_t *coins: pointer to input randomness + * (an already allocated array filled with MLKEM_SYMBYTES random + *bytes) + ** + * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2 + * of FIPS203) fails. + **************************************************/ +int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, + const uint8_t *coins) +__contract__( + requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES)) + requires(memory_no_alias(ss, MLKEM_SSBYTES)) + requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES)) + requires(memory_no_alias(coins, MLKEM_SYMBYTES)) + assigns(object_whole(ct)) + assigns(object_whole(ss)) +); + +#define crypto_kem_enc MLKEM_NAMESPACE(enc) +/************************************************* + * Name: crypto_kem_enc + * + * Description: Generates cipher text and shared + * secret for given public key + * + * Arguments: - uint8_t *ct: pointer to output cipher text + * (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes) + * - uint8_t *ss: pointer to output shared secret + * (an already allocated array of MLKEM_SSBYTES bytes) + * - const uint8_t *pk: pointer to input public key + * (an already allocated array of MLKEM_PUBLICKEYBYTES bytes) + * + * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2 + * of FIPS203) fails. + **************************************************/ +int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) +__contract__( + requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES)) + requires(memory_no_alias(ss, MLKEM_SSBYTES)) + requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES)) + assigns(object_whole(ct)) + assigns(object_whole(ss)) +); + +#define crypto_kem_dec MLKEM_NAMESPACE(dec) +/************************************************* + * Name: crypto_kem_dec + * + * Description: Generates shared secret for given + * cipher text and private key + * + * Arguments: - uint8_t *ss: pointer to output shared secret + * (an already allocated array of MLKEM_SSBYTES bytes) + * - const uint8_t *ct: pointer to input cipher text + * (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes) + * - const uint8_t *sk: pointer to input private key + * (an already allocated array of MLKEM_SECRETKEYBYTES bytes) + * + * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of + * FIPS203) fails. + * + * On failure, ss will contain a pseudo-random value. + **************************************************/ +int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) +__contract__( + requires(memory_no_alias(ss, MLKEM_SSBYTES)) + requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES)) + requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES)) + assigns(object_whole(ss)) +); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.c new file mode 100644 index 0000000000..1844ca19fd --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.c @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "ntt.h" +#include +#include "params.h" +#include "reduce.h" + +#include "arith_native.h" +#include "debug/debug.h" +#include "ntt.h" + +#if !defined(MLKEM_USE_NATIVE_NTT) +/* + * Computes a block CT butterflies with a fixed twiddle factor, + * using Montgomery multiplication. + * Parameters: + * - r: Pointer to base of polynomial (_not_ the base of butterfly block) + * - root: Twiddle factor to use for the butterfly. This must be in + * Montgomery form and signed canonical. + * - start: Offset to the beginning of the butterfly block + * - len: Index difference between coefficients subject to a butterfly + * - bound: Ghost variable describing coefficient bound: Prior to `start`, + * coefficients must be bound by `bound + MLKEM_Q`. Post `start`, + * they must be bound by `bound`. + * When this function returns, output coefficients in the index range + * [start, start+2*len) have bound bumped to `bound + MLKEM_Q`. + * Example: + * - start=8, len=4 + * This would compute the following four butterflies + * 8 -- 12 + * 9 -- 13 + * 10 -- 14 + * 11 -- 15 + * - start=4, len=2 + * This would compute the following two butterflies + * 4 -- 6 + * 5 -- 7 + */ +STATIC_TESTABLE +void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, int len, + int bound) +__contract__( + requires(0 <= start && start < MLKEM_N) + requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N) + requires(0 <= bound && bound < INT16_MAX - MLKEM_Q) + requires(-HALF_Q < zeta && zeta < HALF_Q) + requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) + requires(array_abs_bound(r, 0, start - 1, bound + MLKEM_Q)) + requires(array_abs_bound(r, start, MLKEM_N - 1, bound)) + assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) + ensures(array_abs_bound(r, 0, start + 2*len - 1, bound + MLKEM_Q)) + ensures(array_abs_bound(r, start + 2 * len, MLKEM_N - 1, bound))) +{ + /* `bound` is a ghost variable only needed in the CBMC specification */ + int j; + ((void)bound); + for (j = start; j < start + len; j++) + __loop__( + invariant(start <= j && j <= start + len) + /* + * Coefficients are updated in strided pairs, so the bounds for the + * intermediate states alternate twice between the old and new bound + */ + invariant(array_abs_bound(r, 0, j - 1, bound + MLKEM_Q)) + invariant(array_abs_bound(r, j, start + len - 1, bound)) + invariant(array_abs_bound(r, start + len, j + len - 1, bound + MLKEM_Q)) + invariant(array_abs_bound(r, j + len, MLKEM_N - 1, bound))) + { + int16_t t; + t = fqmul(r[j + len], zeta); + r[j + len] = r[j] - t; + r[j] = r[j] + t; + } +} + +/* + *Compute one layer of forward NTT + * Parameters: + * - r: Pointer to base of polynomial + * - len: Stride of butterflies in this layer. + * - layer: Ghost variable indicating which layer is being applied. + * Must match `len` via `len == MLKEM_N >> layer`. + * Note: `len` could be dropped and computed in the function, but + * we are following the structure of the reference NTT from the + * official Kyber implementation here, merely adding `layer` as + * a ghost variable for the specifications. + */ +STATIC_TESTABLE +void ntt_layer(int16_t r[MLKEM_N], int len, int layer) +__contract__( + requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) + requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer)) + requires(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1)) + assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) + ensures(array_abs_bound(r, 0, MLKEM_N - 1, (layer + 1) * MLKEM_Q - 1))) +{ + int start, k; + /* `layer` is a ghost variable only needed in the CBMC specification */ + ((void)layer); + /* Twiddle factors for layer n start at index 2^(layer-1) */ + k = MLKEM_N / (2 * len); + for (start = 0; start < MLKEM_N; start += 2 * len) + __loop__( + invariant(0 <= start && start < MLKEM_N + 2 * len) + invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) + invariant(array_abs_bound(r, 0, start - 1, (layer * MLKEM_Q - 1) + MLKEM_Q)) + invariant(array_abs_bound(r, start, MLKEM_N - 1, layer * MLKEM_Q - 1))) + { + int16_t zeta = zetas[k++]; + ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q - 1); + } +} + +/* + * Compute full forward NTT + * NOTE: This particular implementation satisfies a much tighter + * bound on the output coefficients (5*q) than the contractual one (8*q), + * but this is not needed in the calling code. Should we change the + * base multiplication strategy to require smaller NTT output bounds, + * the proof may need strengthening. + * REF-CHANGE: Removed indirection poly_ntt -> ntt() + * and integrated polynomial reduction into the NTT. + */ + + +void poly_ntt(poly *p) +{ + int len, layer; + int16_t *r; + POLY_BOUND_MSG(p, MLKEM_Q, "ref ntt input"); + r = p->coeffs; + + for (len = 128, layer = 1; len >= 2; len >>= 1, layer++) + __loop__( + invariant(1 <= layer && layer <= 8 && len == (MLKEM_N >> layer)) + invariant(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1))) + { + ntt_layer(r, len, layer); + } + + /* Check the stronger bound */ + POLY_BOUND_MSG(p, NTT_BOUND, "ref ntt output"); +} +#else /* MLKEM_USE_NATIVE_NTT */ + +/* Check that bound for native NTT implies contractual bound */ +STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound) + +void poly_ntt(poly *p) +{ + POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input"); + ntt_native(p); + POLY_BOUND_MSG(p, NTT_BOUND_NATIVE, "native ntt output"); +} +#endif /* MLKEM_USE_NATIVE_NTT */ + +#if !defined(MLKEM_USE_NATIVE_INTT) + +/* Check that bound for reference invNTT implies contractual bound */ +#define INVNTT_BOUND_REF (3 * MLKEM_Q / 4) +STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound) + +/* Compute one layer of inverse NTT */ +STATIC_TESTABLE +void invntt_layer(int16_t *r, int len, int layer) +__contract__( + requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) + requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7) + requires(len == (1 << (8 - layer))) + requires(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) + ensures(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))) +{ + int start, k; + /* `layer` is a ghost variable used only in the specification */ + ((void)layer); + k = MLKEM_N / len - 1; + for (start = 0; start < MLKEM_N; start += 2 * len) + __loop__( + invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)) + invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */ + invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len)) + { + int j; + int16_t zeta = zetas[k--]; + for (j = start; j < start + len; j++) + __loop__( + invariant(start <= j && j <= start + len) + invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))) + { + int16_t t = r[j]; + r[j] = barrett_reduce(t + r[j + len]); + r[j + len] = r[j + len] - t; + r[j + len] = fqmul(r[j + len], zeta); + } + } +} + +void poly_invntt_tomont(poly *p) +{ + /* + * Scale input polynomial to account for Montgomery factor + * and NTT twist. This also brings coefficients down to + * absolute value < MLKEM_Q. + */ + int j, len, layer; + const int16_t f = 1441; + int16_t *r = p->coeffs; + + for (j = 0; j < MLKEM_N; j++) + __loop__( + invariant(0 <= j && j <= MLKEM_N) + invariant(array_abs_bound(r, 0, j - 1, MLKEM_Q))) + { + r[j] = fqmul(r[j], f); + } + + /* Run the invNTT layers */ + for (len = 2, layer = 7; len <= 128; len <<= 1, layer--) + __loop__( + invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer))) + invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))) + { + invntt_layer(p->coeffs, len, layer); + } + + POLY_BOUND_MSG(p, INVNTT_BOUND_REF, "ref intt output"); +} +#else /* MLKEM_USE_NATIVE_INTT */ + +/* Check that bound for native invNTT implies contractual bound */ +STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound) + +void poly_invntt_tomont(poly *p) +{ + intt_native(p); + POLY_BOUND_MSG(p, INVNTT_BOUND_NATIVE, "native intt output"); +} +#endif /* MLKEM_USE_NATIVE_INTT */ + +/************************************************* + * Name: basemul_cached + * + * Description: Multiplication of polynomials in Zq[X]/(X^2-zeta) + * used for multiplication of elements in Rq in NTT domain + * + * Bounds: + * - a is assumed to be < q in absolute value. + * - Return value < 3/2 q in absolute value + * + * Arguments: - int16_t r[2]: pointer to the output polynomial + * - const int16_t a[2]: pointer to the first factor + * - const int16_t b[2]: pointer to the second factor + * - int16_t b_cached: Cached precomputation of b[1] * zeta + **************************************************/ +void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], + int16_t b_cached) +{ + int32_t t0, t1; + + BOUND(a, 2, MLKEM_Q, "basemul input bound"); + + t0 = (int32_t)a[1] * b_cached; + t0 += (int32_t)a[0] * b[0]; + t1 = (int32_t)a[0] * b[1]; + t1 += (int32_t)a[1] * b[0]; + + /* |ti| < 2 * q * 2^15 */ + r[0] = montgomery_reduce(t0); + r[1] = montgomery_reduce(t1); + + /* |r[i]| < 3/2 q */ + BOUND(r, 2, 3 * MLKEM_Q / 2, "basemul output bound"); +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.h new file mode 100644 index 0000000000..0f7b30624b --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef NTT_H +#define NTT_H + +#include +#include "arith_native.h" +#include "cbmc.h" +#include "params.h" +#include "poly.h" +#include "reduce.h" + +#define zetas MLKEM_NAMESPACE(zetas) +extern const int16_t zetas[128]; + +/************************************************* + * Name: poly_ntt + * + * Description: Computes negacyclic number-theoretic transform (NTT) of + * a polynomial in place. + * + * The input is assumed to be in normal order and + * coefficient-wise bound by MLKEM_Q in absolute value. + * + * The output polynomial is in bitreversed order, and + * coefficient-wise bound by NTT_BOUND in absolute value. + * + * (NOTE: Sometimes the input to the NTT is actually smaller, + * which gives better bounds.) + * + * Arguments: - poly *p: pointer to in/output polynomial + **************************************************/ + +#define poly_ntt MLKEM_NAMESPACE(poly_ntt) +void poly_ntt(poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_Q - 1)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, NTT_BOUND - 1)) +); + +/************************************************* + * Name: poly_invntt_tomont + * + * Description: Computes inverse of negacyclic number-theoretic transform (NTT) + * of a polynomial in place; + * inputs assumed to be in bitreversed order, output in normal + * order + * + * The input is assumed to be in bitreversed order, and can + * have arbitrary coefficients in int16_t. + * + * The output polynomial is in normal order, and + * coefficient-wise bound by INVNTT_BOUND in absolute value. + * + * Arguments: - uint16_t *a: pointer to in/output polynomial + **************************************************/ +#define poly_invntt_tomont MLKEM_NAMESPACE(poly_invntt_tomont) +void poly_invntt_tomont(poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, INVNTT_BOUND - 1)) +); + +#define basemul_cached MLKEM_NAMESPACE(basemul_cached) +/************************************************************ + * Name: basemul_cached + * + * Description: Computes a representative modulo q of + * (a0*b0 + a1*b_cached, a0*b1 + a1*b0)/65536 + * + * If b_cached is b1*zeta, this represents the + * product of (a0 + a1*X) and (b0 + b1*X) in + * Fq[X]/(X^2 - zeta). + * + * Arguments: - r: Pointer to output polynomial + * Upon return, coefficients are bound by + * 3*(q+1)/2 in absolute value. + * - a: Pointer to first input polynomial + * Must be coefficient-wise < q in absolute value. + * - b: Pointer to second input polynomial + * Can have arbitrary int16_t coefficients + * - b_cached: Some precomputed value, typically derived from + * b1 and a twiddle factor. Can be an arbitary int16_t. + ************************************************************/ +void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], + int16_t b_cached) +__contract__( + requires(memory_no_alias(r, 2 * sizeof(int16_t))) + requires(memory_no_alias(a, 2 * sizeof(int16_t))) + requires(memory_no_alias(b, 2 * sizeof(int16_t))) + requires(array_abs_bound(a, 0, 1, MLKEM_Q - 1)) + assigns(memory_slice(r, 2 * sizeof(int16_t))) + ensures(array_abs_bound(r, 0, 1, (3 * HALF_Q - 1))) +); + + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/params.h new file mode 100644 index 0000000000..da8041267e --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/params.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef PARAMS_H +#define PARAMS_H + +#include "common.h" +#include "cpucap.h" + +#define KECCAK_WAY 4 + +#ifndef MLKEM_K +#define MLKEM_K 3 /* Change this for different security strengths */ +#endif + +/* Don't change parameters below this line */ +#if (MLKEM_K == 2) +#define MLKEM_NAMESPACE(s) PQCP_MLKEM_NATIVE_MLKEM512_##s +#define _MLKEM_NAMESPACE(s) _PQCP_MLKEM_NATIVE_MLKEM512_##s +#elif (MLKEM_K == 3) +#define MLKEM_NAMESPACE(s) PQCP_MLKEM_NATIVE_MLKEM768_##s +#define _MLKEM_NAMESPACE(s) _PQCP_MLKEM_NATIVE_MLKEM768_##s +#elif (MLKEM_K == 4) +#define MLKEM_NAMESPACE(s) PQCP_MLKEM_NATIVE_MLKEM1024_##s +#define _MLKEM_NAMESPACE(s) _PQCP_MLKEM_NATIVE_MLKEM1024_##s +#else +#error "MLKEM_K must be in {2,3,4}" +#endif + +#define MLKEM_N 256 +#define MLKEM_Q 3329 + +#define MLKEM_SYMBYTES 32 /* size in bytes of hashes, and seeds */ +#define MLKEM_SSBYTES 32 /* size in bytes of shared key */ + +#define MLKEM_POLYBYTES 384 +#define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES) + +#if MLKEM_K == 2 +#define MLKEM_ETA1 3 +#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 +#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) +#elif MLKEM_K == 3 +#define MLKEM_ETA1 2 +#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 +#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) +#elif MLKEM_K == 4 +#define MLKEM_ETA1 2 +#define MLKEM_POLYCOMPRESSEDBYTES_DV 160 +#define MLKEM_POLYCOMPRESSEDBYTES_DU 352 +#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) +#endif + +#define MLKEM_ETA2 2 + +#define MLKEM_INDCPA_MSGBYTES (MLKEM_SYMBYTES) +#define MLKEM_INDCPA_PUBLICKEYBYTES (MLKEM_POLYVECBYTES + MLKEM_SYMBYTES) +#define MLKEM_INDCPA_SECRETKEYBYTES (MLKEM_POLYVECBYTES) +#define MLKEM_INDCPA_BYTES \ + (MLKEM_POLYVECCOMPRESSEDBYTES_DU + MLKEM_POLYCOMPRESSEDBYTES_DV) + +#define MLKEM_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES) +/* 32 bytes of additional space to save H(pk) */ +#define MLKEM_SECRETKEYBYTES \ + (MLKEM_INDCPA_SECRETKEYBYTES + MLKEM_INDCPA_PUBLICKEYBYTES + \ + 2 * MLKEM_SYMBYTES) +#define MLKEM_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES) + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.c new file mode 100644 index 0000000000..93a663c12b --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.c @@ -0,0 +1,568 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "poly.h" +#include +#include +#include "cbd.h" +#include "cbmc.h" +#include "fips202x4.h" +#include "ntt.h" +#include "params.h" +#include "reduce.h" +#include "symmetric.h" +#include "verify.h" + +#include "arith_native.h" +#include "debug/debug.h" + +void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) +{ + int j; +#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) + for (j = 0; j < MLKEM_N / 8; j++) + __loop__(invariant(j >= 0 && j <= MLKEM_N / 8)) + { + int k; + uint16_t t[8]; + for (k = 0; k < 8; k++) + __loop__( + invariant(k >= 0 && k <= 8) + invariant(forall(int, r, 0, k - 1, t[r] < (1u << 11)))) + { + t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + } + + /* + * REF-CHANGE: Use array indexing into + * r rather than pointer-arithmetic to simplify verification + * Make all implicit truncation explicit. No data is being + * truncated for the LHS's since each t[i] is 11-bit in size. + */ + r[11 * j + 0] = (t[0] >> 0) & 0xFF; + r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); + r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); + r[11 * j + 3] = (t[2] >> 2) & 0xFF; + r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); + r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); + r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); + r[11 * j + 7] = (t[5] >> 1) & 0xFF; + r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); + r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); + r[11 * j + 10] = (t[7] >> 3); + } + +#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) + for (j = 0; j < MLKEM_N / 4; j++) + __loop__(invariant(j >= 0 && j <= MLKEM_N / 4)) + { + int k; + uint16_t t[4]; + for (k = 0; k < 4; k++) + __loop__( + invariant(k >= 0 && k <= 4) + invariant(forall(int, r, 0, k - 1, t[r] < (1u << 10)))) + { + t[k] = scalar_compress_d10(a->coeffs[4 * j + k]); + } + + /* + * REF-CHANGE: Use array indexing into + * r rather than pointer-arithmetic to simplify verification + * Make all implicit truncation explicit. No data is being + * truncated for the LHS's since each t[i] is 10-bit in size. + */ + r[5 * j + 0] = (t[0] >> 0) & 0xFF; + r[5 * j + 1] = (t[0] >> 8) | ((t[1] << 2) & 0xFF); + r[5 * j + 2] = (t[1] >> 6) | ((t[2] << 4) & 0xFF); + r[5 * j + 3] = (t[2] >> 4) | ((t[3] << 6) & 0xFF); + r[5 * j + 4] = (t[3] >> 2); + } +#else +#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" +#endif +} + + +void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +{ + int j; +#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) + for (j = 0; j < MLKEM_N / 8; j++) + __loop__( + invariant(0 <= j && j <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, 8 * j - 1, 0, (MLKEM_Q - 1)))) + { + int k; + uint16_t t[8]; + uint8_t const *base = &a[11 * j]; + t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); + t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); + t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | + ((uint16_t)base[4] << 10)); + t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); + t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); + t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | + ((uint16_t)base[8] << 9)); + t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); + t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); + + for (k = 0; k < 8; k++) + __loop__( + invariant(0 <= k && k <= 8) + invariant(array_bound(r->coeffs, 0, 8 * j + k - 1, 0, (MLKEM_Q - 1)))) + { + r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); + } + } +#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) + for (j = 0; j < MLKEM_N / 4; j++) + __loop__( + invariant(0 <= j && j <= MLKEM_N / 4) + invariant(array_bound(r->coeffs, 0, 4 * j - 1, 0, (MLKEM_Q - 1)))) + { + int k; + uint16_t t[4]; + uint8_t const *base = &a[5 * j]; + + t[0] = 0x3FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); + t[1] = 0x3FF & ((base[1] >> 2) | ((uint16_t)base[2] << 6)); + t[2] = 0x3FF & ((base[2] >> 4) | ((uint16_t)base[3] << 4)); + t[3] = 0x3FF & ((base[3] >> 6) | ((uint16_t)base[4] << 2)); + + for (k = 0; k < 4; k++) + __loop__( + invariant(0 <= k && k <= 4) + invariant(array_bound(r->coeffs, 0, 4 * j + k - 1, 0, (MLKEM_Q - 1)))) + { + r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]); + } + } +#else +#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" +#endif +} + +void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) +{ + int i; + POLY_UBOUND(a, MLKEM_Q); + +#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + { + int j; + uint8_t t[8] = {0}; + for (j = 0; j < 8; j++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(array_bound(t, 0, (j-1), 0, 15))) + { + /* REF-CHANGE: Precondition change, we assume unsigned canonical data */ + t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); + } + + /* + * REF-CHANGE: Use array indexing into + * r rather than pointer-arithmetic to simplify verification + */ + r[i * 4] = t[0] | (t[1] << 4); + r[i * 4 + 1] = t[2] | (t[3] << 4); + r[i * 4 + 2] = t[4] | (t[5] << 4); + r[i * 4 + 3] = t[6] | (t[7] << 4); + } +#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + { + int j; + uint8_t t[8] = {0}; + for (j = 0; j < 8; j++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(array_bound(t, 0, (j-1), 0, 31))) + { + /* REF-CHANGE: Precondition change, we assume unsigned canonical data */ + t[j] = scalar_compress_d5(a->coeffs[8 * i + j]); + } + + /* + * REF-CHANGE: Explicitly truncate to avoid warning about + * implicit truncation in CBMC, and use array indexing into + * r rather than pointer-arithmetic to simplify verification + */ + r[i * 5] = 0xFF & ((t[0] >> 0) | (t[1] << 5)); + r[i * 5 + 1] = 0xFF & ((t[1] >> 3) | (t[2] << 2) | (t[3] << 7)); + r[i * 5 + 2] = 0xFF & ((t[3] >> 1) | (t[4] << 4)); + r[i * 5 + 3] = 0xFF & ((t[4] >> 4) | (t[5] << 1) | (t[6] << 6)); + r[i * 5 + 4] = 0xFF & ((t[6] >> 2) | (t[7] << 3)); + } +#else +#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" +#endif +} + +void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +{ + int i; +#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) + for (i = 0; i < MLKEM_N / 2; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 2) + invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, (MLKEM_Q - 1)))) + { + /* REF-CHANGE: Hoist scalar decompression into separate function */ + r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); + r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); + } +#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) + for (i = 0; i < MLKEM_N / 8; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1)))) + { + int j; + uint8_t t[8]; + const int offset = i * 5; + /* + * REF-CHANGE: Explicitly truncate to avoid warning about + * implicit truncation in CBMC and unwind loop for ease + * of proof. + */ + + /* + * Decompress 5 8-bit bytes (so 40 bits) into + * 8 5-bit values stored in t[] + */ + t[0] = 0x1F & (a[offset + 0] >> 0); + t[1] = 0x1F & ((a[offset + 0] >> 5) | (a[offset + 1] << 3)); + t[2] = 0x1F & (a[offset + 1] >> 2); + t[3] = 0x1F & ((a[offset + 1] >> 7) | (a[offset + 2] << 1)); + t[4] = 0x1F & ((a[offset + 2] >> 4) | (a[offset + 3] << 4)); + t[5] = 0x1F & (a[offset + 3] >> 1); + t[6] = 0x1F & ((a[offset + 3] >> 6) | (a[offset + 4] << 2)); + t[7] = 0x1F & (a[offset + 4] >> 3); + + /* and copy to the correct slice in r[] */ + for (j = 0; j < 8; j++) + __loop__( + invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1)))) + { + /* REF-CHANGE: Hoist scalar decompression into separate function */ + r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]); + } + } +#else +#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" +#endif + + POLY_UBOUND(r, MLKEM_Q); +} + +#if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES) +void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) +{ + unsigned int i; + POLY_UBOUND(a, MLKEM_Q); + + + for (i = 0; i < MLKEM_N / 2; i++) + __loop__(invariant(i >= 0 && i <= MLKEM_N / 2)) + { + const uint16_t t0 = a->coeffs[2 * i]; + const uint16_t t1 = a->coeffs[2 * i + 1]; + /* REF-CHANGE: Precondition change, we assume unsigned canonical data */ + + /* + * t0 and t1 are both < MLKEM_Q, so contain at most 12 bits each of + * significant data, so these can be packed into 24 bits or exactly + * 3 bytes, as follows. + */ + + /* Least significant bits 0 - 7 of t0. */ + r[3 * i + 0] = t0 & 0xFF; + + /* + * Most significant bits 8 - 11 of t0 become the least significant + * nibble of the second byte. The least significant 4 bits + * of t1 become the upper nibble of the second byte. + */ + r[3 * i + 1] = (t0 >> 8) | ((t1 << 4) & 0xF0); + + /* Bits 4 - 11 of t1 become the third byte. */ + r[3 * i + 2] = t1 >> 4; + } +} +#else /* MLKEM_USE_NATIVE_POLY_TOBYTES */ +void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) +{ + POLY_UBOUND(a, MLKEM_Q); + poly_tobytes_native(r, a); +} +#endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */ + +#if !defined(MLKEM_USE_NATIVE_POLY_FROMBYTES) +void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) +{ + int i; + for (i = 0; i < MLKEM_N / 2; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 2) + invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, 4095))) + { + /* REF-CHANGE: Introduce some locals for better readability */ + const uint8_t t0 = a[3 * i + 0]; + const uint8_t t1 = a[3 * i + 1]; + const uint8_t t2 = a[3 * i + 2]; + r->coeffs[2 * i + 0] = t0 | ((t1 << 8) & 0xFFF); + r->coeffs[2 * i + 1] = (t1 >> 4) | (t2 << 4); + } + + /* Note that the coefficients are not canonical */ + POLY_UBOUND(r, 4096); +} +#else /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ +void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) +{ + poly_frombytes_native(r, a); +} +#endif /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ + +void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) +{ + int i; +#if (MLKEM_INDCPA_MSGBYTES != MLKEM_N / 8) +#error "MLKEM_INDCPA_MSGBYTES must be equal to MLKEM_N/8 bytes!" +#endif + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1)))) + { + int j; + for (j = 0; j < 8; j++) + __loop__( + invariant(i >= 0 && i < MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1)))) + { + /* Prevent the compiler from recognizing this as a bit selection */ + uint8_t mask = value_barrier_u8(1u << j); + r->coeffs[8 * i + j] = ct_sel_int16(HALF_Q, 0, msg[i] & mask); + } + } + POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output"); +} + +void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) +{ + int i; + POLY_UBOUND(a, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + { + int j; + msg[i] = 0; + for (j = 0; j < 8; j++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)) + { + uint32_t t = scalar_compress_d1(a->coeffs[8 * i + j]); + msg[i] |= t << j; + } + } +} + +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +{ + ALIGN uint8_t buf[KECCAK_WAY][MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; + memcpy(extkey[0], seed, MLKEM_SYMBYTES); + memcpy(extkey[1], seed, MLKEM_SYMBYTES); + memcpy(extkey[2], seed, MLKEM_SYMBYTES); + memcpy(extkey[3], seed, MLKEM_SYMBYTES); + extkey[0][MLKEM_SYMBYTES] = nonce0; + extkey[1][MLKEM_SYMBYTES] = nonce1; + extkey[2][MLKEM_SYMBYTES] = nonce2; + extkey[3][MLKEM_SYMBYTES] = nonce3; + shake256x4(buf[0], buf[1], buf[2], buf[3], MLKEM_ETA1 * MLKEM_N / 4, + extkey[0], extkey[1], extkey[2], extkey[3], MLKEM_SYMBYTES + 1); + poly_cbd_eta1(r0, buf[0]); + poly_cbd_eta1(r1, buf[1]); + poly_cbd_eta1(r2, buf[2]); + poly_cbd_eta1(r3, buf[3]); + + POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0"); + POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1"); + POLY_BOUND_MSG(r2, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 2"); + POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3"); +} + +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +{ + ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; + prf(buf, sizeof(buf), seed, nonce); + poly_cbd_eta2(r, buf); + + POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output"); +} + +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +{ + ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; + memcpy(extkey[0], seed, MLKEM_SYMBYTES); + memcpy(extkey[1], seed, MLKEM_SYMBYTES); + memcpy(extkey[2], seed, MLKEM_SYMBYTES); + memcpy(extkey[3], seed, MLKEM_SYMBYTES); + extkey[0][MLKEM_SYMBYTES] = nonce0; + extkey[1][MLKEM_SYMBYTES] = nonce1; + extkey[2][MLKEM_SYMBYTES] = nonce2; + extkey[3][MLKEM_SYMBYTES] = nonce3; + +#if MLKEM_ETA1 == MLKEM_ETA2 + shake256x4(buf1[0], buf1[1], buf2[0], buf2[1], MLKEM_ETA1 * MLKEM_N / 4, + extkey[0], extkey[1], extkey[2], extkey[3], MLKEM_SYMBYTES + 1); +#else + shake256(buf1[0], sizeof(buf1[0]), extkey[0], sizeof(extkey[0])); + shake256(buf1[1], sizeof(buf1[1]), extkey[1], sizeof(extkey[1])); + shake256(buf2[0], sizeof(buf2[0]), extkey[2], sizeof(extkey[2])); + shake256(buf2[1], sizeof(buf2[1]), extkey[3], sizeof(extkey[3])); +#endif + + poly_cbd_eta1(r0, buf1[0]); + poly_cbd_eta1(r1, buf1[1]); + poly_cbd_eta2(r2, buf2[0]); + poly_cbd_eta2(r3, buf2[1]); + + POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 0"); + POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 1"); + POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2"); + POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3"); +} + +void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, + const poly_mulcache *b_cache) +{ + int i; + POLY_BOUND(b_cache, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 4; i++) + __loop__( + assigns(i, object_whole(r)) + invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), (3 * HALF_Q - 1)))) + { + basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i], + b_cache->coeffs[2 * i]); + basemul_cached(&r->coeffs[4 * i + 2], &a->coeffs[4 * i + 2], + &b->coeffs[4 * i + 2], b_cache->coeffs[2 * i + 1]); + } +} + +#if !defined(MLKEM_USE_NATIVE_POLY_TOMONT) +void poly_tomont(poly *r) +{ + int i; + const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */ + for (i = 0; i < MLKEM_N; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N) + invariant(array_abs_bound(r->coeffs ,0, (i - 1), (MLKEM_Q - 1)))) + { + r->coeffs[i] = fqmul(r->coeffs[i], f); + } + + POLY_BOUND(r, MLKEM_Q); +} +#else /* MLKEM_USE_NATIVE_POLY_TOMONT */ +void poly_tomont(poly *r) +{ + poly_tomont_native(r); + POLY_BOUND(r, MLKEM_Q); +} +#endif /* MLKEM_USE_NATIVE_POLY_TOMONT */ + +#if !defined(MLKEM_USE_NATIVE_POLY_REDUCE) +void poly_reduce(poly *r) +{ + int i; + for (i = 0; i < MLKEM_N; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N) + invariant(array_bound(r->coeffs, 0, (i - 1), 0, (MLKEM_Q - 1)))) + { + /* Barrett reduction, giving signed canonical representative */ + int16_t t = barrett_reduce(r->coeffs[i]); + /* Conditional addition to get unsigned canonical representative */ + r->coeffs[i] = scalar_signed_to_unsigned_q(t); + } + + POLY_UBOUND(r, MLKEM_Q); +} +#else /* MLKEM_USE_NATIVE_POLY_REDUCE */ +void poly_reduce(poly *r) +{ + poly_reduce_native(r); + POLY_UBOUND(r, MLKEM_Q); +} +#endif /* MLKEM_USE_NATIVE_POLY_REDUCE */ + +void poly_add(poly *r, const poly *b) +{ + int i; + for (i = 0; i < MLKEM_N; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N) + invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) + invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1]))) + { + r->coeffs[i] = r->coeffs[i] + b->coeffs[i]; + } +} + +void poly_sub(poly *r, const poly *b) +{ + int i; + for (i = 0; i < MLKEM_N; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N) + invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) + invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1]))) + { + r->coeffs[i] = r->coeffs[i] - b->coeffs[i]; + } +} + +#if !defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE) +void poly_mulcache_compute(poly_mulcache *x, const poly *a) +{ + int i; + for (i = 0; i < MLKEM_N / 4; i++) + __loop__(invariant(i >= 0 && i <= MLKEM_N / 4)) + { + x->coeffs[2 * i + 0] = fqmul(a->coeffs[4 * i + 1], zetas[64 + i]); + x->coeffs[2 * i + 1] = fqmul(a->coeffs[4 * i + 3], -zetas[64 + i]); + } + POLY_BOUND(x, MLKEM_Q); +} +#else /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ +void poly_mulcache_compute(poly_mulcache *x, const poly *a) +{ + poly_mulcache_compute_native(x, a); + /* Omitting POLY_BOUND(x, MLKEM_Q) since native implementations may + * decide not to use a mulcache. Note that the C backend implementation + * of poly_basemul_montgomery_cached() does still include the check. */ +} +#endif /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.h new file mode 100644 index 0000000000..35990684b6 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.h @@ -0,0 +1,773 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef POLY_H +#define POLY_H + +#include +#include +#include "cbmc.h" +#include "params.h" +#include "reduce.h" +#include "verify.h" + +/* Absolute exclusive upper bound for the output of the inverse NTT */ +#define INVNTT_BOUND (8 * MLKEM_Q) + +/* Absolute exclusive upper bound for the output of the forward NTT */ +#define NTT_BOUND (8 * MLKEM_Q) + +/* + * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial + * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1] + */ +typedef struct +{ + int16_t coeffs[MLKEM_N]; +} ALIGN poly; + +/* + * INTERNAL presentation of precomputed data speeding up + * the base multiplication of two polynomials in NTT domain. + */ +/* + * REF-CHANGE: This structure does not exist in the reference + * implementation. + */ +typedef struct +{ + int16_t coeffs[MLKEM_N >> 1]; +} poly_mulcache; + +/************************************************************ + * Name: scalar_compress_d1 + * + * Description: Computes round(u * 2 / q) + * + * Implements Compress_d from FIPS203, Eq (4.7), + * for d = 1. + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif +static INLINE uint32_t scalar_compress_d1(uint16_t u) +__contract__( + requires(u <= MLKEM_Q - 1) + ensures(return_value < 2) + ensures(return_value == (((uint32_t)u * 2 + MLKEM_Q / 2) / MLKEM_Q) % 2) ) +{ + uint32_t d0 = u << 1; + d0 *= 645083; + d0 += 1u << 30; + d0 >>= 31; + return d0; +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: scalar_compress_d4 + * + * Description: Computes round(u * 16 / q) % 16 + * + * Implements Compress_d from FIPS203, Eq (4.7), + * for d = 4. + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif +static INLINE uint32_t scalar_compress_d4(uint16_t u) +__contract__( + requires(u <= MLKEM_Q - 1) + ensures(return_value < 16) + ensures(return_value == (((uint32_t)u * 16 + MLKEM_Q / 2) / MLKEM_Q) % 16)) +{ + uint32_t d0 = (uint32_t)u * 1290160; /* 16 * round(2^28 / MLKEM_Q) */ + return (d0 + (1u << 27)) >> 28; /* round(d0/2^28) */ +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: scalar_decompress_d4 + * + * Description: Computes round(u * q / 16) + * + * Implements Decompress_d from FIPS203, Eq (4.8), + * for d = 4. + * + * Arguments: - u: Unsigned canonical modulus modulo 16 + * to be decompressed. + ************************************************************/ +static INLINE uint16_t scalar_decompress_d4(uint32_t u) +__contract__( + requires(0 <= u && u < 16) + ensures(return_value <= (MLKEM_Q - 1)) +) { return ((u * MLKEM_Q) + 8) / 16; } + +/************************************************************ + * Name: scalar_compress_d5 + * + * Description: Computes round(u * 32 / q) % 32 + * + * Implements Compress_d from FIPS203, Eq (4.7), + * for d = 5. + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif +static INLINE uint32_t scalar_compress_d5(uint16_t u) +__contract__( + requires(u <= MLKEM_Q - 1) + ensures(return_value < 32) + ensures(return_value == (((uint32_t)u * 32 + MLKEM_Q / 2) / MLKEM_Q) % 32) ) +{ + uint32_t d0 = (uint32_t)u * 1290176; /* 2^5 * round(2^27 / MLKEM_Q) */ + return (d0 + (1u << 26)) >> 27; /* round(d0/2^27) */ +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: scalar_decompress_d5 + * + * Description: Computes round(u * q / 32) + * + * Implements Decompress_d from FIPS203, Eq (4.8), + * for d = 5. + * + * Arguments: - u: Unsigned canonical modulus modulo 32 + * to be decompressed. + ************************************************************/ +static INLINE uint16_t scalar_decompress_d5(uint32_t u) +__contract__( + requires(0 <= u && u < 32) + ensures(return_value <= MLKEM_Q - 1) +) { return ((u * MLKEM_Q) + 16) / 32; } + +/************************************************************ + * Name: scalar_compress_d10 + * + * Description: Computes round(u * 2**10 / q) % 2**10 + * + * Implements Compress_d from FIPS203, Eq (4.7), + * for d = 10. + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif +static INLINE uint32_t scalar_compress_d10(uint16_t u) +__contract__( + requires(u <= MLKEM_Q - 1) + ensures(return_value < (1u << 10)) + ensures(return_value == (((uint32_t)u * (1u << 10) + MLKEM_Q / 2) / MLKEM_Q) % (1 << 10))) +{ + uint64_t d0 = (uint64_t)u * 2642263040; /* 2^10 * round(2^32 / MLKEM_Q) */ + d0 = (d0 + ((uint64_t)1u << 32)) >> 33; + return (d0 & 0x3FF); +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: scalar_decompress_d10 + * + * Description: Computes round(u * q / 1024) + * + * Implements Decompress_d from FIPS203, Eq (4.8), + * for d = 10. + * + * Arguments: - u: Unsigned canonical modulus modulo 16 + * to be decompressed. + ************************************************************/ +static INLINE uint16_t scalar_decompress_d10(uint32_t u) +__contract__( + requires(0 <= u && u < 1024) + ensures(return_value <= (MLKEM_Q - 1)) +) { return ((u * MLKEM_Q) + 512) / 1024; } + +/************************************************************ + * Name: scalar_compress_d11 + * + * Description: Computes round(u * 2**11 / q) % 2**11 + * + * Implements Compress_d from FIPS203, Eq (4.7), + * for d = 11. + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif +static INLINE uint32_t scalar_compress_d11(uint16_t u) +__contract__( + requires(u <= MLKEM_Q - 1) + ensures(return_value < (1u << 11)) + ensures(return_value == (((uint32_t)u * (1u << 11) + MLKEM_Q / 2) / MLKEM_Q) % (1 << 11))) +{ + uint64_t d0 = (uint64_t)u * 5284526080; /* 2^11 * round(2^33 / MLKEM_Q) */ + d0 = (d0 + ((uint64_t)1u << 32)) >> 33; + return (d0 & 0x7FF); +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: scalar_decompress_d11 + * + * Description: Computes round(u * q / 1024) + * + * Implements Decompress_d from FIPS203, Eq (4.8), + * for d = 10. + * + * Arguments: - u: Unsigned canonical modulus modulo 16 + * to be decompressed. + ************************************************************/ +static INLINE uint16_t scalar_decompress_d11(uint32_t u) +__contract__( + requires(0 <= u && u < 2048) + ensures(return_value <= (MLKEM_Q - 1)) +) { return ((u * MLKEM_Q) + 1024) / 2048; } + +/************************************************************ + * Name: scalar_signed_to_unsigned_q + * + * Description: converts signed polynomial coefficient + * from signed (-3328 .. 3328) form to + * unsigned form (0 .. 3328). + * + * Note: Cryptographic constant time implementation + * + * Examples: 0 -> 0 + * 1 -> 1 + * 3328 -> 3328 + * -1 -> 3328 + * -2 -> 3327 + * -3328 -> 1 + * + * Arguments: c: signed coefficient to be converted + ************************************************************/ +static INLINE uint16_t scalar_signed_to_unsigned_q(int16_t c) +__contract__( + requires(c >= -(MLKEM_Q - 1) && c <= (MLKEM_Q - 1)) + ensures(return_value >= 0 && return_value <= (MLKEM_Q - 1)) + ensures(return_value == (int32_t)c + (((int32_t)c < 0) * MLKEM_Q))) +{ + /* Add Q if c is negative, but in constant time */ + c = ct_sel_int16(c + MLKEM_Q, c, ct_cmask_neg_i16(c)); + + cassert(c >= 0, "scalar_signed_to_unsigned_q result lower bound"); + cassert(c < MLKEM_Q, "scalar_signed_to_unsigned_q result upper bound"); + + /* and therefore cast to uint16_t is safe. */ + return (uint16_t)c; +} + +#define poly_compress_du MLKEM_NAMESPACE(poly_compress_du) +/************************************************* + * Name: poly_compress_du + * + * Description: Compression (du bits) and subsequent serialization of a + *polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) +); + +#define poly_decompress_du MLKEM_NAMESPACE(poly_decompress_du) +/************************************************* + * Name: poly_decompress_du + * + * Description: De-serialization and subsequent decompression (du bits) of a + *polynomial; approximate inverse of poly_compress_du + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) +); + +#define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv) +/************************************************* + * Name: poly_compress_dv + * + * Description: Compression (dv bits) and subsequent serialization of a + *polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) + assigns(object_whole(r)) +); + +#define poly_decompress_dv MLKEM_NAMESPACE(poly_decompress_dv) +/************************************************* + * Name: poly_decompress_dv + * + * Description: De-serialization and subsequent decompression (dv bits) of a + *polynomial; approximate inverse of poly_compress + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV + *bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(object_whole(r)) + ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) +); + +#define poly_tobytes MLKEM_NAMESPACE(poly_tobytes) +/************************************************* + * Name: poly_tobytes + * + * Description: Serialization of a polynomial. + * Signed coefficients are converted to + * unsigned form before serialization. + * + * Arguments: INPUT: + * - a: const pointer to input polynomial, + * with each coefficient in the range [0,1,..,Q-1] + * OUTPUT + * - r: pointer to output byte array + * (of MLKEM_POLYBYTES bytes) + **************************************************/ +void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYBYTES)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) + assigns(object_whole(r)) +); + + +#define poly_frombytes MLKEM_NAMESPACE(poly_frombytes) +/************************************************* + * Name: poly_frombytes + * + * Description: De-serialization of a polynomial. + * + * Arguments: INPUT + * - a: pointer to input byte array + * (of MLKEM_POLYBYTES bytes) + * OUTPUT + * - r: pointer to output polynomial, with + * each coefficient unsigned and in the range + * 0 .. 4095 + **************************************************/ +void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYBYTES)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, 4095)) +); + + +#define poly_frommsg MLKEM_NAMESPACE(poly_frommsg) +/************************************************* + * Name: poly_frommsg + * + * Description: Convert 32-byte message to polynomial + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *msg: pointer to input message + **************************************************/ +void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) +__contract__( + requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(object_whole(r)) + ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) +); + +#define poly_tomsg MLKEM_NAMESPACE(poly_tomsg) +/************************************************* + * Name: poly_tomsg + * + * Description: Convert polynomial to 32-byte message + * + * Arguments: - uint8_t *msg: pointer to output message + * - const poly *r: pointer to input polynomial + * Coefficients must be unsigned canonical + **************************************************/ +void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *r) +__contract__( + requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES)) + requires(memory_no_alias(r, sizeof(poly))) + requires(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) + assigns(object_whole(msg)) +); + +#define poly_getnoise_eta1_4x MLKEM_NAMESPACE(poly_getnoise_eta1_4x) +/************************************************* + * Name: poly_getnoise_eta1_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and nonces, with output polynomials close to centered binomial distribution + * with parameter MLKEM_ETA1. + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +/* Depending on MLKEM_K, the pointers passed to this function belong + to the same objects, so we cannot use memory_no_alias for r0-r3. + + NOTE: Somehow it is important to use memory_no_alias() first in the + conjunctions defining each case. +*/ +#if MLKEM_K == 2 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)); +); +#elif MLKEM_K == 4 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case B: r0, r1, r2, r3 consecutive */ + (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)); +); +#elif MLKEM_K == 3 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case C: r0, r1, r2 consecutive */ + (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && + r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)); +); +#endif /* MLKEM_K */ + +#if MLKEM_ETA1 == MLKEM_ETA2 +/* + * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 + * where MLKEM_ETA2 = MLKEM_ETA1 = 2. + * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. + */ +#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x +#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ + +#define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2) +/************************************************* + * Name: poly_getnoise_eta2 + * + * Description: Sample a polynomial deterministically from a seed and a nonce, + * with output polynomial close to centered binomial distribution + * with parameter MLKEM_ETA2 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce: one-byte input nonce + **************************************************/ +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r)) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2)) +); + +#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x) +/************************************************* + * Name: poly_getnoise_eta1122_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and a nonces, with output polynomials close to centered binomial + * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +__contract__( + requires( /* r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) + ensures(array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA2) + && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA2)); +); + +#define poly_basemul_montgomery_cached \ + MLKEM_NAMESPACE(poly_basemul_montgomery_cached) +/************************************************* + * Name: poly_basemul_montgomery_cached + * + * Description: Multiplication of two polynomials in NTT domain, + * using mulcache for second operand. + * + * Bounds: + * - a is assumed to be coefficient-wise < q in absolute value. + * + * The result is coefficient-wise bound by 3/2 q in absolute + * value. + * + * Arguments: - poly *r: pointer to output polynomial + * - const poly *a: pointer to first input polynomial + * - const poly *b: pointer to second input polynomial + * - const poly_mulcache *b_cache: pointer to mulcache + * for second input polynomial. Can be computed + * via poly_mulcache_compute(). + **************************************************/ +void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, + const poly_mulcache *b_cache) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(a, sizeof(poly))) + requires(memory_no_alias(b, sizeof(poly))) + requires(memory_no_alias(b_cache, sizeof(poly_mulcache))) + requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))) + assigns(object_whole(r)) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (3 * HALF_Q - 1))) +); + +#define poly_tomont MLKEM_NAMESPACE(poly_tomont) +/************************************************* + * Name: poly_tomont + * + * Description: Inplace conversion of all coefficients of a polynomial + * from normal domain to Montgomery domain + * + * Bounds: Output < q in absolute value. + * + * Arguments: - poly *r: pointer to input/output polynomial + **************************************************/ +void poly_tomont(poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))) +); + +/* REF-CHANGE: This function does not exist in the reference implementation */ +#define poly_mulcache_compute MLKEM_NAMESPACE(poly_mulcache_compute) +/************************************************************ + * Name: poly_mulcache_compute + * + * Description: Computes the mulcache for a polynomial in NTT domain + * + * The mulcache of a degree-2 polynomial b := b0 + b1*X + * in Fq[X]/(X^2-zeta) is the value b1*zeta, needed when + * computing products of b in Fq[X]/(X^2-zeta). + * + * The mulcache of a polynomial in NTT domain -- which is + * a 128-tuple of degree-2 polynomials in Fq[X]/(X^2-zeta), + * for varying zeta, is the 128-tuple of mulcaches of those + * polynomials. + * + * Arguments: - x: Pointer to mulcache to be populated + * - a: Pointer to input polynomial + ************************************************************/ +/* + * NOTE: The default C implementation of this function populates + * the mulcache with values in (-q,q), but this is not needed for the + * higher level safety proofs, and thus not part of the spec. + */ +void poly_mulcache_compute(poly_mulcache *x, const poly *a) +__contract__( + requires(memory_no_alias(x, sizeof(poly_mulcache))) + requires(memory_no_alias(a, sizeof(poly))) + assigns(object_whole(x)) +); + +#define poly_reduce MLKEM_NAMESPACE(poly_reduce) +/************************************************* + * Name: poly_reduce + * + * Description: Converts polynomial to _unsigned canonical_ representatives. + * + * The input coefficients can be arbitrary integers in int16_t. + * The output coefficients are in [0,1,...,MLKEM_Q-1]. + * + * Arguments: - poly *r: pointer to input/output polynomial + **************************************************/ +/* + * REF-CHANGE: The semantics of poly_reduce() is different in + * the reference implementation, which requires + * signed canonical output data. Unsigned canonical + * outputs are better suited to the only remaining + * use of poly_reduce() in the context of (de)serialization. + */ +void poly_reduce(poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))) +); + +#define poly_add MLKEM_NAMESPACE(poly_add) +/************************************************************ + * Name: poly_add + * + * Description: Adds two polynomials in place + * + * Arguments: - r: Pointer to input-output polynomial to be added to. + * - b: Pointer to input polynomial that should be added + * to r. Must be disjoint from r. + * + * The coefficients of r and b must be so that the addition does + * not overflow. Otherwise, the behaviour of this function is undefined. + * + ************************************************************/ +/* + * REF-CHANGE: + * The reference implementation uses a 3-argument poly_add. + * We specialize to the accumulator form to avoid reasoning about aliasing. + */ +void poly_add(poly *r, const poly *b) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(b, sizeof(poly))) + requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX)) + requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN)) + ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k])) + assigns(memory_slice(r, sizeof(poly))) +); + +#define poly_sub MLKEM_NAMESPACE(poly_sub) +/************************************************* + * Name: poly_sub + * + * Description: Subtract two polynomials; no modular reduction is performed + * + * Arguments: - poly *r: Pointer to input-output polynomial to be added + *to. + * - const poly *b: Pointer to second input polynomial + **************************************************/ +/* + * REF-CHANGE: + * The reference implementation uses a 3-argument poly_sub. + * We specialize to the accumulator form to avoid reasoning about aliasing. + */ +void poly_sub(poly *r, const poly *b) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(b, sizeof(poly))) + requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX)) + requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN)) + ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k])) + assigns(object_whole(r)) +); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.c new file mode 100644 index 0000000000..5e4dd0c5c4 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "polyvec.h" +#include +#include "arith_native.h" +#include "config.h" +#include "ntt.h" +#include "params.h" +#include "poly.h" + +#include "debug/debug.h" +void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU], + const polyvec *a) +{ + unsigned int i; + POLYVEC_UBOUND(a, MLKEM_Q); + + for (i = 0; i < MLKEM_K; i++) + { + poly_compress_du(r + i * MLKEM_POLYCOMPRESSEDBYTES_DU, &a->vec[i]); + } +} + +void polyvec_decompress_du(polyvec *r, + const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU]) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU); + } + + POLYVEC_UBOUND(r, MLKEM_Q); +} + +void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]); + } +} + +void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) +{ + int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES); + } +} + +void polyvec_ntt(polyvec *r) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_ntt(&r->vec[i]); + } +} + +void polyvec_invntt_tomont(polyvec *r) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_invntt_tomont(&r->vec[i]); + } +} + +/************************************************* + * Name: polyvec_basemul_acc_montgomery + * + * Description: Multiply elements of a and b in NTT domain, accumulate into r, + * and multiply by 2^-16. + * + * Bounds: + * - a is assumed to be coefficient-wise < q in absolute value. + * - b is assumed to be the output of a forward NTT and + * thus coefficient-wise bound by NTT_BOUND + * - b_cache is assumed to be coefficient-wise bound by + * MLKEM_Q. + * + * Arguments: - poly *r: pointer to output polynomial + * - const polyvec *a: pointer to first input vector of polynomials + * - const polyvec *b: pointer to second input vector of polynomials + * - const polyvec_mulcache *b_cache: mulcache for b + **************************************************/ +#if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) +void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, + const polyvec *b, + const polyvec_mulcache *b_cache) +{ + int i; + poly t; + + POLYVEC_BOUND(a, MLKEM_Q); + POLYVEC_BOUND(b, NTT_BOUND); + POLYVEC_BOUND(b_cache, MLKEM_Q); + + poly_basemul_montgomery_cached(r, &a->vec[0], &b->vec[0], &b_cache->vec[0]); + for (i = 1; i < MLKEM_K; i++) + { + poly_basemul_montgomery_cached(&t, &a->vec[i], &b->vec[i], + &b_cache->vec[i]); + poly_add(r, &t); + /* abs bounds: < (i+1) * 3/2 * q */ + } + + /* + * Those bounds are true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus best to omit + * them from the spec to not unnecessarily constraint native implementations. + */ + cassert( + array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (3 * HALF_Q - 1)), + "polyvec_basemul_acc_montgomery_cached output bounds"); + /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */ + POLY_BOUND(r, MLKEM_K * 3 * HALF_Q); +} +#else /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ +void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, + const polyvec *b, + const polyvec_mulcache *b_cache) +{ + POLYVEC_BOUND(a, MLKEM_Q); + POLYVEC_BOUND(b, NTT_BOUND); + /* Omitting POLYVEC_BOUND(b_cache, MLKEM_Q) since native implementations may + * decide not to use a mulcache. Note that the C backend implementation + * of poly_basemul_montgomery_cached() does still include the check. */ + polyvec_basemul_acc_montgomery_cached_native(r, a, b, b_cache); +} +#endif /* MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ + +/************************************************* + * Name: polyvec_basemul_acc_montgomery + * + * Description: Multiply elements of a and b in NTT domain, accumulate into r, + * and multiply by 2^-16. + * + * Arguments: - poly *r: pointer to output polynomial + * - const polyvec *a: pointer to first input vector of polynomials + * - const polyvec *b: pointer to second input vector of polynomials + **************************************************/ +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) +{ + polyvec_mulcache b_cache; + polyvec_mulcache_compute(&b_cache, b); + polyvec_basemul_acc_montgomery_cached(r, a, b, &b_cache); +} + +/************************************************* + * Name: polyvec_mulcache_compute + * + * Description: Precompute values speeding up + * base multiplications of polynomials + * in NTT domain. + * + * Arguments: - polyvec_mulcache *x: pointer to output cache. + * - const poly *a: pointer to input polynomial + **************************************************/ +void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_mulcache_compute(&x->vec[i], &a->vec[i]); + } +} + + +/************************************************* + * Name: polyvec_reduce + * + * Description: Applies Barrett reduction to each coefficient + * of each element of a vector of polynomials; + * for details of the Barrett reduction see comments in reduce.c + * + * Arguments: - polyvec *r: pointer to input/output polynomial + **************************************************/ +void polyvec_reduce(polyvec *r) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_reduce(&r->vec[i]); + } +} + +void polyvec_add(polyvec *r, const polyvec *b) +{ + int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_add(&r->vec[i], &b->vec[i]); + } +} + +void polyvec_tomont(polyvec *r) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_tomont(&r->vec[i]); + } +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.h new file mode 100644 index 0000000000..7771fd3b28 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.h @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef POLYVEC_H +#define POLYVEC_H + +#include +#include "params.h" +#include "poly.h" + +typedef struct +{ + poly vec[MLKEM_K]; +} ALIGN polyvec; + +/* REF-CHANGE: This struct does not exist in the reference implementation */ +typedef struct +{ + poly_mulcache vec[MLKEM_K]; +} polyvec_mulcache; + +#define polyvec_compress_du MLKEM_NAMESPACE(polyvec_compress_du) +/************************************************* + * Name: polyvec_compress_du + * + * Description: Compress and serialize vector of polynomials + * + * Arguments: - uint8_t *r: pointer to output byte array + * (needs space for MLKEM_POLYVECCOMPRESSEDBYTES_DU) + * - const polyvec *a: pointer to input vector of polynomials. + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU], + const polyvec *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYVECCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(a, sizeof(polyvec))) + requires(forall(int, k0, 0, MLKEM_K - 1, + array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))) + assigns(object_whole(r)) +); + +#define polyvec_decompress_du MLKEM_NAMESPACE(polyvec_decompress_du) +/************************************************* + * Name: polyvec_decompress_du + * + * Description: De-serialize and decompress vector of polynomials; + * approximate inverse of polyvec_compress_du + * + * Arguments: - polyvec *r: pointer to output vector of polynomials. + * Output will have coefficients normalized to [0,..,q-1]. + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYVECCOMPRESSEDBYTES_DU) + **************************************************/ +void polyvec_decompress_du(polyvec *r, + const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYVECCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(r, sizeof(polyvec))) + assigns(object_whole(r)) + ensures(forall(int, k0, 0, MLKEM_K - 1, + array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))) +); + +#define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes) +/************************************************* + * Name: polyvec_tobytes + * + * Description: Serialize vector of polynomials + * + * Arguments: - uint8_t *r: pointer to output byte array + * (needs space for MLKEM_POLYVECBYTES) + * - const polyvec *a: pointer to input vector of polynomials + * Each polynomial must have coefficients in [0,..,q-1]. + **************************************************/ +void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a) +__contract__( + requires(memory_no_alias(a, sizeof(polyvec))) + requires(memory_no_alias(r, MLKEM_POLYVECBYTES)) + requires(forall(int, k0, 0, MLKEM_K - 1, + array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))) + assigns(object_whole(r)) +); + +#define polyvec_frombytes MLKEM_NAMESPACE(polyvec_frombytes) +/************************************************* + * Name: polyvec_frombytes + * + * Description: De-serialize vector of polynomials; + * inverse of polyvec_tobytes + * + * Arguments: - const polyvec *a: pointer to output vector of polynomials + * (of length MLKEM_POLYVECBYTES). Output will have coefficients + * normalized to [0,..,q-1]. + * - uint8_t *r: pointer to input byte array + **************************************************/ +void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + requires(memory_no_alias(a, MLKEM_POLYVECBYTES)) + assigns(object_whole(r)) + ensures(forall(int, k0, 0, MLKEM_K - 1, + array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, 4095))) +); + +#define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt) +/************************************************* + * Name: polyvec_ntt + * + * Description: Apply forward NTT to all elements of a vector of polynomials. + * + * The input is assumed to be in normal order and + * coefficient-wise bound by MLKEM_Q in absolute value. + * + * The output polynomial is in bitreversed order, and + * coefficient-wise bound by NTT_BOUND in absolute value. + * + * Arguments: - polyvec *r: pointer to in/output vector of polynomials + * + **************************************************/ +void polyvec_ntt(polyvec *r) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + requires(forall(int, j, 0, MLKEM_K - 1, + array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))) + assigns(object_whole(r)) + ensures(forall(int, j, 0, MLKEM_K - 1, + array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (NTT_BOUND - 1)))) +); + +#define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont) +/************************************************* + * Name: polyvec_invntt_tomont + * + * Description: Apply inverse NTT to all elements of a vector of polynomials + * and multiply by Montgomery factor 2^16 + * + * The input is assumed to be in bitreversed order, and can + * have arbitrary coefficients in int16_t. + * + * The output polynomial is in normal order, and + * coefficient-wise bound by INVNTT_BOUND in absolute value. + * + * + * Arguments: - polyvec *r: pointer to in/output vector of polynomials + **************************************************/ +void polyvec_invntt_tomont(polyvec *r) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + assigns(object_whole(r)) + ensures(forall(int, j, 0, MLKEM_K - 1, + array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (INVNTT_BOUND - 1)))) +); + +#define polyvec_basemul_acc_montgomery \ + MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery) +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, + const polyvec *b); + +/* REF-CHANGE: This function does not exist in the reference implementation */ +#define polyvec_basemul_acc_montgomery_cached \ + MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached) +/************************************************* + * Name: polyvec_basemul_acc_montgomery_cached + * + * Description: Scalar product of two vectors of polynomials in NTT domain, + * using mulcache for second operand. + * + * Bounds: + * - a is assumed to be coefficient-wise < q in absolute value. + * - No bounds guarantees for the coefficients in the result. + * + * Arguments: - poly *r: pointer to output polynomial + * - const polyvec *a: pointer to first input polynomial vector + * - const polyvec *b: pointer to second input polynomial vector + * - const polyvec_mulcache *b_cache: pointer to mulcache + * for second input polynomial vector. Can be computed + * via polyvec_mulcache_compute(). + **************************************************/ +void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, + const polyvec *b, + const polyvec_mulcache *b_cache) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(a, sizeof(polyvec))) + requires(memory_no_alias(b, sizeof(polyvec))) + requires(memory_no_alias(b_cache, sizeof(polyvec_mulcache))) +/* Input is coefficient-wise < q in absolute value */ + requires(forall(int, k1, 0, MLKEM_K - 1, + array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))) + assigns(memory_slice(r, sizeof(poly))) +); + +/* REF-CHANGE: This function does not exist in the reference implementation */ +#define polyvec_mulcache_compute MLKEM_NAMESPACE(polyvec_mulcache_compute) +/************************************************************ + * Name: polyvec_mulcache_compute + * + * Description: Computes the mulcache for a vector of polynomials in NTT domain + * + * The mulcache of a degree-2 polynomial b := b0 + b1*X + * in Fq[X]/(X^2-zeta) is the value b1*zeta, needed when + * computing products of b in Fq[X]/(X^2-zeta). + * + * The mulcache of a polynomial in NTT domain -- which is + * a 128-tuple of degree-2 polynomials in Fq[X]/(X^2-zeta), + * for varying zeta, is the 128-tuple of mulcaches of those + * polynomials. + * + * The mulcache of a vector of polynomials is the vector + * of mulcaches of its entries. + * + * Arguments: - x: Pointer to mulcache to be populated + * - a: Pointer to input polynomial vector + ************************************************************/ +/* + * NOTE: The default C implementation of this function populates + * the mulcache with values in (-q,q), but this is not needed for the + * higher level safety proofs, and thus not part of the spec. + */ +void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a) +__contract__( + requires(memory_no_alias(x, sizeof(polyvec_mulcache))) + requires(memory_no_alias(a, sizeof(polyvec))) + assigns(object_whole(x)) +); + +#define polyvec_reduce MLKEM_NAMESPACE(polyvec_reduce) +/************************************************* + * Name: polyvec_reduce + * + * Description: Applies Barrett reduction to each coefficient + * of each element of a vector of polynomials; + * for details of the Barrett reduction see comments in reduce.c + * + * Arguments: - polyvec *r: pointer to input/output polynomial + **************************************************/ +/* + * REF-CHANGE: The semantics of polyvec_reduce() is different in + * the reference implementation, which requires + * signed canonical output data. Unsigned canonical + * outputs are better suited to the only remaining + * use of poly_reduce() in the context of (de)serialization. + */ +void polyvec_reduce(polyvec *r) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + assigns(object_whole(r)) + ensures(forall(int, k0, 0, MLKEM_K - 1, + array_bound(r->vec[k0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))) +); + +#define polyvec_add MLKEM_NAMESPACE(polyvec_add) +/************************************************* + * Name: polyvec_add + * + * Description: Add vectors of polynomials + * + * Arguments: - polyvec *r: pointer to input-output vector of polynomials to be + * added to + * - const polyvec *b: pointer to second input vector of polynomials + * + * The coefficients of r and b must be so that the addition does + * not overflow. Otherwise, the behaviour of this function is undefined. + * + * The coefficients returned in *r are in int16_t which is sufficient + * to prove type-safety of calling units. Therefore, no stronger + * ensures clause is required on this function. + **************************************************/ +void polyvec_add(polyvec *r, const polyvec *b) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + requires(memory_no_alias(b, sizeof(polyvec))) + requires(forall(int, j0, 0, MLKEM_K - 1, + forall(int, k0, 0, MLKEM_N - 1, + (int32_t)r->vec[j0].coeffs[k0] + b->vec[j0].coeffs[k0] <= INT16_MAX))) + requires(forall(int, j1, 0, MLKEM_K - 1, + forall(int, k1, 0, MLKEM_N - 1, + (int32_t)r->vec[j1].coeffs[k1] + b->vec[j1].coeffs[k1] >= INT16_MIN))) + assigns(object_whole(r)) +); + +#define polyvec_tomont MLKEM_NAMESPACE(polyvec_tomont) +/************************************************* + * Name: polyvec_tomont + * + * Description: Inplace conversion of all coefficients of a polynomial + * vector from normal domain to Montgomery domain + * + * Bounds: Output < q in absolute value. + * + **************************************************/ +void polyvec_tomont(polyvec *r) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + assigns(memory_slice(r, sizeof(polyvec))) + assigns(object_whole(r)) + ensures(forall(int, j, 0, MLKEM_K - 1, + array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))) +); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/reduce.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/reduce.c new file mode 100644 index 0000000000..db7baf0f56 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/reduce.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "reduce.h" +#include +#include "params.h" + +/* QINV == -3327 converted to uint16_t == -3327 + 65536 == 62209 */ +static const uint32_t QINV = 62209; /* q^-1 mod 2^16 */ + +/************************************************* + * Name: cast_uint16_to_int16 + * + * Description: Cast uint16 value to int16 + * + * Returns: + * input x in 0 .. 32767: returns value unchanged + * input x in 32768 .. 65535: returns (x - 65536) + **************************************************/ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "conversion" +#endif +static INLINE int16_t cast_uint16_to_int16(uint16_t x) +{ + /* + * PORTABILITY: This relies on uint16_t -> int16_t + * being implemented as the inverse of int16_t -> uint16_t, + * which is implementation-defined (C99 6.3.1.3 (3)) + * CBMC (correctly) fails to prove this conversion is OK, + * so we have to suppress that check here + */ + return (int16_t)x; +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************* + * Name: montgomery_reduce_generic + * + * Description: Generic Montgomery reduction; given a 32-bit integer a, computes + * 16-bit integer congruent to a * R^-1 mod q, where R=2^16 + * + * Arguments: - int32_t a: input integer to be reduced + * + * Returns: integer congruent to a * R^-1 modulo q + * + * Bounds: For any C such that |a| < q * C, the return value + * has absolute value < q (C/2^16 + 1/2). + * + * Notable special cases: + * - The Montgomery multiplication of a value of absolute value + * < q * C with a signed-canonical value ( < q/2 ) has + * absolute value q * (0.0254 * C + 1/2). + * - The Montgomery multiplication of a value of absolute value + * < q * C with a value t of |t| < q has absolute value + * < q * (0.0508 * C + 1/2). + * - The Montgomery multiplication of a value of absolute value + * < C with a value of abs < q has absolute value + * < q (C/2^16 + 1/2). + **************************************************/ +ALWAYS_INLINE +static INLINE int16_t montgomery_reduce_generic(int32_t a) +{ + /* + *Bounds on paper + * - Case |a| < q * C, for some C + * |t| <= |a|/2^16 + |t|*q/2^16 + * < q * C / 2^16 + q/2 + * = q (C/2^16 + 1/2) + * - Case |a| < (q/2) * C * q, for some C + * Replace C -> C * q in the above and estimate + * q / 2^17 < 0.0254. + */ + + /* Compute a*q^{-1} mod 2^16 in unsigned representatives */ + const uint16_t a_reduced = a & UINT16_MAX; + const uint16_t a_inverted = (a_reduced * QINV) & UINT16_MAX; + + /* Lift to signed canonical representative mod 2^16. */ + const int16_t t = cast_uint16_to_int16(a_inverted); + + int32_t r = a - ((int32_t)t * MLKEM_Q); + + /* + * PORTABILITY: Right-shift on a signed integer is, strictly-speaking, + * implementation-defined for negative left argument. Here, + * we assume it's sign-preserving "arithmetic" shift right. (C99 6.5.7 (5)) + */ + r = r >> 16; + + return (int16_t)r; +} + +int16_t montgomery_reduce(int32_t a) +{ + int16_t res; + SCALAR_BOUND(a, 2 * MLKEM_Q * 32768, "montgomery_reduce input"); + + res = montgomery_reduce_generic(a); + + SCALAR_BOUND(res, (3 * (MLKEM_Q + 1)) / 2, "montgomery_reduce output"); + return res; +} + +int16_t fqmul(int16_t a, int16_t b) +{ + int16_t res; + SCALAR_BOUND(b, HALF_Q, "fqmul input"); + + res = montgomery_reduce((int32_t)a * (int32_t)b); + + SCALAR_BOUND(res, MLKEM_Q, "fqmul output"); + return res; +} + +/* + * To divide by MLKEM_Q using Barrett multiplication, the "magic number" + * multiplier is round_to_nearest(2**26/MLKEM_Q) + */ +#define BPOWER 26 +static const int32_t barrett_multiplier = + ((1 << BPOWER) + MLKEM_Q / 2) / MLKEM_Q; + +/************************************************* + * Name: barrett_reduce + * + * Description: Barrett reduction; given a 16-bit integer a, computes + * centered representative congruent to a mod q in + * {-(q-1)/2,...,(q-1)/2} + * + * Arguments: - int16_t a: input integer to be reduced + * + * Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q. + **************************************************/ +int16_t barrett_reduce(int16_t a) +{ + /* + * Compute round_to_nearest(a/MLKEM_Q) using the multiplier + * above and shift by BPOWER places. + * PORTABILITY: Right-shift on a signed integer is, strictly-speaking, + * implementation-defined for negative left argument. Here, + * we assume it's sign-preserving "arithmetic" shift right. (C99 6.5.7 (5)) + */ + const int32_t t = (barrett_multiplier * a + (1 << (BPOWER - 1))) >> BPOWER; + + /* + * t is in -10 .. +10, so we need 32-bit math to + * evaluate t * MLKEM_Q and the subsequent subtraction + */ + return (int16_t)(a - t * MLKEM_Q); +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/reduce.h new file mode 100644 index 0000000000..2a486cf3ec --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/reduce.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef REDUCE_H +#define REDUCE_H + +#include +#include "cbmc.h" +#include "debug/debug.h" +#include "params.h" + +#define MONT -1044 /* 2^16 mod q */ +#define HALF_Q ((MLKEM_Q + 1) / 2) /* 1665 */ + +/************************************************* + * Name: montgomery_reduce + * + * Description: Montgomery reduction + * + * Arguments: - int32_t a: input integer to be reduced + * Must be smaller than 2 * q * 2^15 in absolute value. + * + * Returns: integer congruent to a * R^-1 modulo q, + * smaller than 3/2 q in absolute value. + **************************************************/ +#define montgomery_reduce MLKEM_NAMESPACE(montgomery_reduce) +int16_t montgomery_reduce(int32_t a) +__contract__( + requires(a > -(2 * MLKEM_Q * 32768)) + requires(a < (2 * MLKEM_Q * 32768)) + ensures(return_value > -(3 * HALF_Q) && return_value < (3 * HALF_Q)) +); + +#define barrett_reduce MLKEM_NAMESPACE(barrett_reduce) +int16_t barrett_reduce(int16_t a) +__contract__( + ensures(return_value > -HALF_Q && return_value < HALF_Q) +); + +/************************************************* + * Name: fqmul + * + * Description: Montgomery multiplication modulo q=3329 + * + * Arguments: - int16_t a: first factor + * Can be any int16_t. + * - int16_t b: second factor. + * Must be signed canonical (abs value <(q+1)/2) + * + * Returns 16-bit integer congruent to a*b*R^{-1} mod q, and + * smaller than q in absolute value. + * + **************************************************/ +#define fqmul MLKEM_NAMESPACE(fqmul) +int16_t fqmul(int16_t a, int16_t b) +__contract__( + requires(b > -HALF_Q) + requires(b < HALF_Q) + ensures(return_value > -MLKEM_Q && return_value < MLKEM_Q) +); + + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.c new file mode 100644 index 0000000000..4e8a5ce9b2 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "params.h" + +#include "arith_native.h" +#include "rej_uniform.h" + +/************************************************* + * Name: rej_uniform_scalar + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int target: requested number of 16-bit integers + * (uniform mod q). + * Must be <= 4096. + * - unsigned int offset: number of 16-bit integers that have + * already been sampled. + * Must be <= target. + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes + * Must be <= 4096. + * Must be a multiple of 3. + * + * Note: Strictly speaking, only a few values of buflen near UINT_MAX need + * excluding. The limit of 4096 is somewhat arbitary but sufficient for all + * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. + * + * Returns the new offset of sampled 16-bit integers, at most target, + * and at least the initial offset. + * If the new offset is strictly less than len, all of the input buffers + * is guaranteed to have been consumed. If it is equal to len, no information + * is provided on how many bytes of the input buffer have been consumed. + **************************************************/ +static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint16_t val0, val1; + + ctr = offset; + pos = 0; + /* pos + 3 cannot overflow due to the assumption buflen <= 4096 */ + while (ctr < target && pos + 3 <= buflen) + __loop__( + invariant(offset <= ctr && ctr <= target && pos <= buflen) + invariant(ctr > 0 ==> array_bound(r, 0, ctr - 1, 0, (MLKEM_Q - 1)))) + { + val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF; + val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF; + pos += 3; + + if (val0 < MLKEM_Q) + { + r[ctr++] = val0; + } + if (ctr < target && val1 < MLKEM_Q) + { + r[ctr++] = val1; + } + } + return ctr; +} + +#if !defined(MLKEM_USE_NATIVE_REJ_UNIFORM) +unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, + const uint8_t *buf, unsigned int buflen) +{ + return rej_uniform_scalar(r, target, offset, buf, buflen); +} +#else /* MLKEM_USE_NATIVE_REJ_UNIFORM */ + +unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, + const uint8_t *buf, unsigned int buflen) +{ + int ret; + + /* Sample from large buffer with full lane as much as possible. */ + ret = rej_uniform_native(r + offset, target - offset, buf, buflen); + if (ret != -1) + return offset + (unsigned)ret; + + return rej_uniform_scalar(r, target, offset, buf, buflen); +} +#endif /* MLKEM_USE_NATIVE_REJ_UNIFORM */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.h new file mode 100644 index 0000000000..aeb9cc3eb0 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef REJ_UNIFORM_H +#define REJ_UNIFORM_H + +#include +#include +#include "cbmc.h" +#include "params.h" + +#define rej_uniform MLKEM_NAMESPACE(rej_uniform) +/************************************************* + * Name: rej_uniform + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int target: requested number of 16-bit integers + * (uniform mod q). + * Must be <= 4096. + * - unsigned int offset: number of 16-bit integers that have + * already been sampled. + * Must be <= target. + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes + * Must be <= 4096. + * Must be a multiple of 3. + * + * Note: Strictly speaking, only a few values of buflen near UINT_MAX need + * excluding. The limit of 4096 is somewhat arbitary but sufficient for all + * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. + * + * Returns the new offset of sampled 16-bit integers, at most target, + * and at least the initial offset. + * If the new offset is strictly less than len, all of the input buffers + * is guaranteed to have been consumed. If it is equal to len, no information + * is provided on how many bytes of the input buffer have been consumed. + **************************************************/ + +/* + * REF-CHANGE: The signature differs from the Kyber reference implementation + * in that it adds the offset and always expects the base of the target + * buffer. This avoids shifting the buffer base in the caller, which appears + * tricky to reason about. + */ +unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, + const uint8_t *buf, unsigned int buflen) +__contract__( + requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) + requires(memory_no_alias(r, sizeof(int16_t) * target)) + requires(memory_no_alias(buf, buflen)) + requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1))) + assigns(memory_slice(r, sizeof(int16_t) * target)) + ensures(offset <= return_value && return_value <= target) + ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1))) +); +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/symmetric-shake.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/symmetric-shake.c new file mode 100644 index 0000000000..5dd8c10d92 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/symmetric-shake.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include +#include +#include +#include "fips202.h" +#include "params.h" +#include "symmetric.h" + +void mlkem_shake256_prf(uint8_t *out, size_t outlen, + const uint8_t key[MLKEM_SYMBYTES], uint8_t nonce) +{ + uint8_t extkey[MLKEM_SYMBYTES + 1]; + + memcpy(extkey, key, MLKEM_SYMBYTES); + extkey[MLKEM_SYMBYTES] = nonce; + + shake256(out, outlen, extkey, sizeof(extkey)); +} + +void mlkem_shake256_rkprf(uint8_t out[MLKEM_SSBYTES], + const uint8_t key[MLKEM_SYMBYTES], + const uint8_t input[MLKEM_CIPHERTEXTBYTES]) +{ + shake256incctx s; + + shake256_inc_init(&s); + shake256_inc_absorb(&s, key, MLKEM_SYMBYTES); + shake256_inc_absorb(&s, input, MLKEM_CIPHERTEXTBYTES); + shake256_inc_finalize(&s); + shake256_inc_squeeze(out, MLKEM_SSBYTES, &s); + shake256_inc_ctx_release(&s); +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/symmetric.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/symmetric.h new file mode 100644 index 0000000000..202741a7b3 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/symmetric.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef SYMMETRIC_H +#define SYMMETRIC_H + +#include +#include +#include "params.h" + +#include "fips202.h" + +#include "cbmc.h" + +#define mlkem_shake256_prf MLKEM_NAMESPACE(mlkem_shake256_prf) +/************************************************* + * Name: mlkem_shake256_prf + * + * Ref: FIPS-203 Section 4.1. Function PRF (eq 4.3) + * + * Description: Usage of SHAKE256 as a PRF, concatenates secret and public input + * and then generates outlen bytes of SHAKE256 output + * + * Arguments: - uint8_t *out: pointer to output + * - size_t outlen: number of requested output bytes + * - const uint8_t *key: pointer to the key (of length + * MLKEM_SYMBYTES) + * - uint8_t nonce: single-byte nonce (public PRF input) + * + * out and key may NOT be aliased. + **************************************************/ +void mlkem_shake256_prf(uint8_t *out, size_t outlen, + const uint8_t key[MLKEM_SYMBYTES], uint8_t nonce) +__contract__( + requires(memory_no_alias(out, outlen)) + requires(memory_no_alias(key, MLKEM_SYMBYTES)) + assigns(memory_slice(out, outlen)) +); + +#define mlkem_shake256_rkprf MLKEM_NAMESPACE(mlkem_shake256_rkprf) +/************************************************* + * Name: mlkem_shake256_rkprf + * + * Ref: FIPS-203 Section 4.1. Hash function J + * + * Description: Usage of SHAKE256 as a PRF, concatenates key with input + * and then generates MLKEM_SSBYTES bytes of SHAKE256 output + * + * Arguments: - uint8_t *out: pointer to output + * - const uint8_t *key: pointer to the key (of length + * MLKEM_SYMBYTES) + * - const uint8_t *input: pointer to the input (of length + * MLKEM_CIPHERTEXTBYTES) + * + * out, key, and input may NOT be aliased. + **************************************************/ +void mlkem_shake256_rkprf(uint8_t out[MLKEM_SSBYTES], + const uint8_t key[MLKEM_SYMBYTES], + const uint8_t input[MLKEM_CIPHERTEXTBYTES]) +__contract__( + requires(memory_no_alias(out, MLKEM_SSBYTES)) + requires(memory_no_alias(key, MLKEM_SYMBYTES)) + requires(memory_no_alias(input, MLKEM_CIPHERTEXTBYTES)) + assigns(memory_slice(out, MLKEM_SSBYTES)) +); + + +/* Macros denoting FIPS-203 specific Hash functions */ + +/* Hash function H, FIPS-201 4.1 (eq 4.4) */ +#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) + +/* Hash function G, FIPS-201 4.1 (eq 4.5) */ +#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) + +/* Macros denoting FIPS-203 specific PRFs */ +#define prf(OUT, OUTBYTES, KEY, NONCE) \ + mlkem_shake256_prf(OUT, OUTBYTES, KEY, NONCE) +#define rkprf(OUT, KEY, INPUT) mlkem_shake256_rkprf(OUT, KEY, INPUT) + +#endif /* SYMMETRIC_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.c new file mode 100644 index 0000000000..b5b71e023e --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.c @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "verify.h" + +#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) +/* + * Masking value used in constant-time functions from + * verify.h to block the compiler's range analysis and + * thereby reduce the risk of compiler-introduced branches. + */ +volatile uint64_t ct_opt_blocker_u64 = 0; + +#else /* MLKEM_USE_ASM_VALUE_BARRIER */ + +int empty_cu_verify; + +#endif /* MLKEM_USE_ASM_VALUE_BARRIER */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.h new file mode 100644 index 0000000000..5c62223c3d --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.h @@ -0,0 +1,305 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef VERIFY_H +#define VERIFY_H + +#include +#include +#include +#include "cbmc.h" +#include "params.h" + +/* Constant-time comparisons and conditional operations + + We reduce the risk for compilation into variable-time code + through the use of 'value barriers'. + + Functionally, a value barrier is a no-op. To the compiler, however, + it constitutes an arbitrary modification of its input, and therefore + harden's value propagation and range analysis. + + We consider two approaches to implement a value barrier: + - An empty inline asm block which marks the target value as clobbered. + - XOR'ing with the value of a volatile global that's set to 0. + + The first approach is cheap because it only prevents the compiler + from reasoning about the value of the variable past the barrier, + but does not directly generate additional instructions. + + The second approach generates redundant loads and XOR operations + and therefore comes at a higher runtime cost. However, it appears + more robust towards optimization, as compilers should never drop + a volatile load. + + We use the empty-ASM value barrier for GCC and clang, and fall + back to the global volatile barrier otherwise. + + The global value barrier can be forced by setting MLKEM_NO_ASM_VALUE_BARRIER. + +*/ + +#if (defined(__GNUC__) || defined(__clang__)) && !defined(CBMC) && \ + !defined(MLKEM_NO_ASM_VALUE_BARRIER) +#define MLKEM_USE_ASM_VALUE_BARRIER +#endif + +#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) + +/* + * Declaration of global volatile that the global value barrier + * is loading from and masking with. + */ +#define ct_opt_blocker_u64 MLKEM_NAMESPACE(ct_opt_blocker_u64) +extern volatile uint64_t ct_opt_blocker_u64; + +/* Helper functions for obtaining masks of various sizes */ +STATIC_INLINE_TESTABLE uint8_t get_optblocker_u8(void) +__contract__(ensures(return_value == 0)) { return (uint8_t)ct_opt_blocker_u64; } + +STATIC_INLINE_TESTABLE uint32_t get_optblocker_u32(void) +__contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; } + +STATIC_INLINE_TESTABLE uint32_t get_optblocker_i32(void) +__contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; } + +STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b) +__contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u32()); } + +STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b) +__contract__(ensures(return_value == b)) { return (b ^ get_optblocker_i32()); } + +STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b) +__contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u8()); } + +#else /* !MLKEM_USE_ASM_VALUE_BARRIER */ + +STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b) +__contract__(ensures(return_value == b)) +{ + asm("" : "+r"(b)); + return b; +} + +STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b) +__contract__(ensures(return_value == b)) +{ + asm("" : "+r"(b)); + return b; +} + +STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b) +__contract__(ensures(return_value == b)) +{ + asm("" : "+r"(b)); + return b; +} + +#endif /* MLKEM_USE_ASM_VALUE_BARRIER */ + +/* + * The ct_cmask_nonzero_xxx functions below make deliberate use of unsigned + * overflow, which is fully defined behaviour in C. It is thus safe to disable + * this warning. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif + +/************************************************* + * Name: ct_cmask_nonzero_u16 + * + * Description: Return 0 if input is zero, and -1 otherwise. + * + * Arguments: uint16_t x: Value to be converted into a mask + **************************************************/ +STATIC_INLINE_TESTABLE uint16_t ct_cmask_nonzero_u16(uint16_t x) +__contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF))) +{ + uint32_t tmp = value_barrier_u32(-((uint32_t)x)); + tmp >>= 16; + return tmp; +} + +/************************************************* + * Name: ct_cmask_nonzero_u8 + * + * Description: Return 0 if input is zero, and -1 otherwise. + * + * Arguments: uint8_t x: Value to be converted into a mask + **************************************************/ +STATIC_INLINE_TESTABLE uint8_t ct_cmask_nonzero_u8(uint8_t x) +__contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF))) +{ + uint32_t tmp = value_barrier_u32(-((uint32_t)x)); + tmp >>= 24; + return tmp; +} + +/* Put unsigned overflow warnings in CBMC back into scope */ +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/* + * The ct_cmask_neg_i16 function below makes deliberate use of + * signed to unsigned integer conversion, which is fully defined + * behaviour in C. It is thus safe to disable this warning. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "conversion" +#endif + +/************************************************* + * Name: ct_cmask_neg_i16 + * + * Description: Return 0 if input is non-negative, and -1 otherwise. + * + * Arguments: uint16_t x: Value to be converted into a mask + **************************************************/ +STATIC_INLINE_TESTABLE uint16_t ct_cmask_neg_i16(int16_t x) +__contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0))) +{ + int32_t tmp = value_barrier_i32((int32_t)x); + tmp >>= 16; + return (int16_t)tmp; +} + +/* Put unsigned-to-signed warnings in CBMC back into scope */ +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/* + * The ct_csel_xxx functions below make deliberate use of unsigned + * to signed integer conversion, which is implementation-defined + * behaviour. Here, we assume that uint16_t -> int16_t is inverse + * to int16_t -> uint16_t. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "conversion" +#endif + +/************************************************* + * Name: ct_sel_int16 + * + * Description: Functionally equivalent to cond ? a : b, + * but implemented with guards against + * compiler-introduced branches. + * + * Arguments: int16_t a: First alternative + * int16_t b: Second alternative + * uint16_t cond: Condition variable. + **************************************************/ +STATIC_INLINE_TESTABLE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond) +__contract__(ensures(return_value == (cond ? a : b))) +{ + uint16_t au = a, bu = b; + uint16_t res = bu ^ (ct_cmask_nonzero_u16(cond) & (au ^ bu)); + return (int16_t)res; +} + +/* Put unsigned-to-signed warnings in CBMC back into scope */ +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************* + * Name: ct_sel_uint8 + * + * Description: Functionally equivalent to cond ? a : b, + * but implemented with guards against + * compiler-introduced branches. + * + * Arguments: uint8_t a: First alternative + * uint8_t b: Second alternative + * uuint8_t cond: Condition variable. + **************************************************/ +STATIC_INLINE_TESTABLE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond) +__contract__(ensures(return_value == (cond ? a : b))) +{ + return b ^ (ct_cmask_nonzero_u8(cond) & (a ^ b)); +} + +/************************************************* + * Name: ct_memcmp + * + * Description: Compare two arrays for equality in constant time. + * + * Arguments: const uint8_t *a: pointer to first byte array + * const uint8_t *b: pointer to second byte array + * size_t len: length of the byte arrays + * + * Returns 0 if the byte arrays are equal, a non-zero value otherwise + **************************************************/ +STATIC_INLINE_TESTABLE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b, + const size_t len) +__contract__( + requires(memory_no_alias(a, len)) + requires(memory_no_alias(b, len)) + requires(len <= INT_MAX) + ensures((return_value == 0) == forall(int, i, 0, ((int)len - 1), (a[i] == b[i])))) +{ + uint8_t r = 0, s = 0; + + /* + * Switch to a _signed_ ilen value, so that our loop counter + * can also be signed, and thus (i - 1) in the loop invariant + * can yield -1 as required. + */ + const int ilen = (int)len; + int i; + + for (i = 0; i < ilen; i++) + __loop__( + invariant(i >= 0 && i <= ilen) + invariant((r == 0) == (forall(int, k, 0, (i - 1), (a[k] == b[k]))))) + { + r |= a[i] ^ b[i]; + /* s is useless, but prevents the loop from being aborted once r=0xff. */ + s ^= a[i] ^ b[i]; + } + + /* + * - Convert r into a mask; this may not be necessary, but is an additional + * safeguard + * towards leaking information about a and b. + * - XOR twice with s, separated by a value barrier, to prevent the compile + * from dropping the s computation in the loop. + */ + return (value_barrier_u8(ct_cmask_nonzero_u8(r) ^ s) ^ s); +} + +/************************************************* + * Name: ct_cmov_zero + * + * Description: Copy len bytes from x to r if b is zero; + * don't modify x if b is non-zero. + * assumes two's complement representation of negative integers. + * Runs in constant time. + * + * Arguments: uint8_t *r: pointer to output byte array + * const uint8_t *x: pointer to input byte array + * size_t len: Amount of bytes to be copied + * uint8_t b: Condition value. + **************************************************/ +STATIC_INLINE_TESTABLE +void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) +__contract__( + requires(memory_no_alias(r, len)) + requires(memory_no_alias(x, len)) + assigns(memory_slice(r, len))) +{ + size_t i; + for (i = 0; i < len; i++) + __loop__(invariant(i <= len)) + { + r[i] = ct_sel_uint8(r[i], x[i], b); + } +} + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/zetas.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/zetas.c new file mode 100644 index 0000000000..f52b2ff5ad --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/zetas.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * WARNING: This file is auto-generated from scripts/autogenerate_files.py + * Do not modify it directly. + */ + +#include "ntt.h" + +/* + * Table of zeta values used in the reference NTT and inverse NTT. + * See autogenerate_files.py for details. + */ +const int16_t zetas[128] = { + -1044, -758, -359, -1517, 1493, 1422, 287, 202, -171, 622, 1577, + 182, 962, -1202, -1474, 1468, 573, -1325, 264, 383, -829, 1458, + -1602, -130, -681, 1017, 732, 608, -1542, 411, -205, -1571, 1223, + 652, -552, 1015, -1293, 1491, -282, -1544, 516, -8, -320, -666, + -1618, -1162, 126, 1469, -853, -90, -271, 830, 107, -1421, -247, + -951, -398, 961, -1508, -725, 448, -1065, 677, -1275, -1103, 430, + 555, 843, -1251, 871, 1550, 105, 422, 587, 177, -235, -291, + -460, 1574, 1653, -246, 778, 1159, -147, -777, 1483, -602, 1119, + -1590, 644, -872, 349, 418, 329, -156, -75, 817, 1097, 603, + 610, 1322, -1285, -1465, 384, -1215, -136, 1218, -1335, -874, 220, + -1187, -1659, -1185, -1530, -1278, 794, -1510, -854, -870, 478, -108, + -308, 996, 991, 958, -1460, 1522, 1628, +}; diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/LICENSE b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/LICENSE similarity index 100% rename from src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/LICENSE rename to src/kem/ml_kem/mlkem-native_ml-kem-768_ref/LICENSE diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/api.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/api.h new file mode 100644 index 0000000000..94597323f1 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/api.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef API_H +#define API_H + +#include + +#define PQCP_MLKEM_NATIVE_MLKEM512_SECRETKEYBYTES 1632 +#define PQCP_MLKEM_NATIVE_MLKEM512_PUBLICKEYBYTES 800 +#define PQCP_MLKEM_NATIVE_MLKEM512_CIPHERTEXTBYTES 768 +#define PQCP_MLKEM_NATIVE_MLKEM512_KEYPAIRCOINBYTES 64 +#define PQCP_MLKEM_NATIVE_MLKEM512_ENCCOINBYTES 32 +#define PQCP_MLKEM_NATIVE_MLKEM512_BYTES 32 + +int PQCP_MLKEM_NATIVE_MLKEM512_keypair_derand(uint8_t *pk, uint8_t *sk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM512_keypair(uint8_t *pk, uint8_t *sk); +int PQCP_MLKEM_NATIVE_MLKEM512_enc_derand(uint8_t *ct, uint8_t *ss, + const uint8_t *pk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM512_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int PQCP_MLKEM_NATIVE_MLKEM512_dec(uint8_t *ss, const uint8_t *ct, + const uint8_t *sk); + +#define PQCP_MLKEM_NATIVE_MLKEM768_SECRETKEYBYTES 2400 +#define PQCP_MLKEM_NATIVE_MLKEM768_PUBLICKEYBYTES 1184 +#define PQCP_MLKEM_NATIVE_MLKEM768_CIPHERTEXTBYTES 1088 +#define PQCP_MLKEM_NATIVE_MLKEM768_KEYPAIRCOINBYTES 64 +#define PQCP_MLKEM_NATIVE_MLKEM768_ENCCOINBYTES 32 +#define PQCP_MLKEM_NATIVE_MLKEM768_BYTES 32 + +int PQCP_MLKEM_NATIVE_MLKEM768_keypair_derand(uint8_t *pk, uint8_t *sk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM768_keypair(uint8_t *pk, uint8_t *sk); +int PQCP_MLKEM_NATIVE_MLKEM768_enc_derand(uint8_t *ct, uint8_t *ss, + const uint8_t *pk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM768_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); +int PQCP_MLKEM_NATIVE_MLKEM768_dec(uint8_t *ss, const uint8_t *ct, + const uint8_t *sk); + +#define PQCP_MLKEM_NATIVE_MLKEM1024_SECRETKEYBYTES 3168 +#define PQCP_MLKEM_NATIVE_MLKEM1024_PUBLICKEYBYTES 1568 +#define PQCP_MLKEM_NATIVE_MLKEM1024_CIPHERTEXTBYTES 1568 +#define PQCP_MLKEM_NATIVE_MLKEM1024_KEYPAIRCOINBYTES 64 +#define PQCP_MLKEM_NATIVE_MLKEM1024_ENCCOINBYTES 32 +#define PQCP_MLKEM_NATIVE_MLKEM1024_BYTES 32 + +int PQCP_MLKEM_NATIVE_MLKEM1024_keypair_derand(uint8_t *pk, uint8_t *sk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM1024_keypair(uint8_t *pk, uint8_t *sk); +int PQCP_MLKEM_NATIVE_MLKEM1024_enc_derand(uint8_t *ct, uint8_t *ss, + const uint8_t *pk, + const uint8_t *coins); +int PQCP_MLKEM_NATIVE_MLKEM1024_enc(uint8_t *ct, uint8_t *ss, + const uint8_t *pk); +int PQCP_MLKEM_NATIVE_MLKEM1024_dec(uint8_t *ss, const uint8_t *ct, + const uint8_t *sk); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/arith_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/arith_native.h new file mode 100644 index 0000000000..b7e921323a --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/arith_native.h @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef MLKEM_ARITH_NATIVE_H +#define MLKEM_ARITH_NATIVE_H + +#include +#include "config.h" +#include "params.h" + +#if defined(MLKEM_USE_NATIVE) + +#include "poly.h" +#include "polyvec.h" +#include "profile.h" + +/* + * MLKEM native arithmetic interface + * + * This is the C<->native arithmetic interface used in this repository + * to allow for the drop-in of native code for performance critical + * components of ML-KEM. + * + * A _profile_ is a specific implementation of (part of) this interface. + * The active profile (if any) is determined in profile.h. + * + * To add a function to a profile, define MLKEM_USE_NATIVE_XXX and + * implement `static inline xxx(...)` in the profile header. + * + * The only exception is MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER. This option can + * be set if there are native implementations for all of NTT, invNTT, and + * base multiplication, and allows the native implementation to use a + * custom order of polynomial coefficients in NTT domain -- the use of such + * custom order is not an implementation-detail since the public matrix + * is generated in NTT domain. In this case, a permutation function + * poly_permute_bitrev_to_custom() needs to be provided that permutes + * polynomials in NTT domain from bitreversed to the custom order. + */ + +/* + * Those functions are meant to be trivial wrappers around + * the chosen native implementation. The are static inline + * to avoid unnecessary calls. + * The macro before each declaration controls whether a native + * implementation is present. + */ + +#if defined(MLKEM_USE_NATIVE_NTT) +/************************************************* + * Name: ntt_native + * + * Description: Computes negacyclic number-theoretic transform (NTT) of + * a polynomial in place. + * + * The input polynomial is assumed to be in normal order. + * The output polynomial is in bitreversed order, or of a + * custom order if MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER is set. + * See the documentation of MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER + * for more information. + * + * Arguments: - poly *p: pointer to in/output polynomial + **************************************************/ +static INLINE void ntt_native(poly *); +#endif /* MLKEM_USE_NATIVE_NTT */ + +#if defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER) +/* + * This must only be set if NTT, invNTT, basemul, mulcache, and + * to/from byte stream conversions all have native implementations + * that are adapted to the custom order. + */ +#if !defined(MLKEM_USE_NATIVE_NTT) || !defined(MLKEM_USE_NATIVE_INTT) || \ + !defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE) || \ + !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) || \ + !defined(MLKEM_USE_NATIVE_POLY_TOBYTES) || \ + !defined(MLKEM_USE_NATIVE_POLY_FROMBYTES) +#error \ + "Invalid native profile: MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER can only be \ +set if there are native implementations for NTT, invNTT, mulcache, basemul, \ +and to/from bytes conversions." +#endif + +/************************************************* + * Name: poly_permute_bitrev_to_custom + * + * Description: When MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER is defined, + * convert a polynomial in NTT domain from bitreversed + * order to the custom order output by the native NTT. + * + * This must only be defined if there is native code for + * all of (a) NTT, (b) invNTT, (c) basemul, (d) mulcache. + * Arguments: - poly *p: pointer to in/output polynomial + * + **************************************************/ +static INLINE void poly_permute_bitrev_to_custom(poly *); +#endif /* MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER */ + +#if defined(MLKEM_USE_NATIVE_INTT) +/************************************************* + * Name: intt_native + * + * Description: Computes inverse of negacyclic number-theoretic transform (NTT) + * of a polynomial in place. + * + * The input polynomial is in bitreversed order, or of a + * custom order if MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER is set. + * See the documentation of MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER + * for more information. + * The output polynomial is assumed to be in normal order. + * + * Arguments: - uint16_t *a: pointer to in/output polynomial + **************************************************/ +static INLINE void intt_native(poly *); +#endif /* MLKEM_USE_NATIVE_INTT */ + +#if defined(MLKEM_USE_NATIVE_POLY_REDUCE) +/************************************************* + * Name: poly_reduce_native + * + * Description: Applies modular reduction to all coefficients of a polynomial. + * + * Arguments: - poly *r: pointer to input/output polynomial + **************************************************/ +static INLINE void poly_reduce_native(poly *); +#endif /* MLKEM_USE_NATIVE_POLY_REDUCE */ + +#if defined(MLKEM_USE_NATIVE_POLY_TOMONT) +/************************************************* + * Name: poly_tomont_native + * + * Description: Inplace conversion of all coefficients of a polynomial + * from normal domain to Montgomery domain + * + * Arguments: - poly *r: pointer to input/output polynomial + **************************************************/ +static INLINE void poly_tomont_native(poly *); +#endif /* MLKEM_USE_NATIVE_POLY_TOMONT */ + +#if defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE) +/************************************************* + * Name: poly_mulcache_compute_native + * + * Description: Compute multiplication cache for a polynomial + * in NTT domain. + * + * The purpose of the multiplication cache is to + * cache repeated computations required during a + * base multiplication of polynomials in NTT domain. + * The structure of the multiplication-cache is + * implementation defined. + * + * Arguments: INPUT: + * - poly: const pointer to input polynomial. + * This must be in NTT domain and inin bitreversed order, or of + * a custom order if MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER is set. + * See the documentation of MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER + * for more information. + * OUTPUT + * - cache: pointer to multiplication cache + **************************************************/ +static INLINE void poly_mulcache_compute_native(poly_mulcache *cache, + const poly *poly); +#endif /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ + +#if defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) +/************************************************* + * Name: poly_mulcache_compute_native + * + * Description: Compute multiplication of polynomials in NTT domain. + * + * Arguments: INPUT: + * - a: First polynomial operand. + * This must be in NTT domain and inin bitreversed order, or of + * a custom order if MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER is set. + * See the documentation of MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER + * for more information. + * - b: Second polynomial operand. + * As for a. + * - b_cache: Multiplication-cache for b. + * OUTPUT + * - r: Result of the base multiplication. This is again + * in NTT domain, and of the same order as a and b. + **************************************************/ +static INLINE void polyvec_basemul_acc_montgomery_cached_native( + poly *r, const polyvec *a, const polyvec *b, + const polyvec_mulcache *b_cache); +#endif + +#if defined(MLKEM_USE_NATIVE_POLY_TOBYTES) +/************************************************* + * Name: poly_tobytes_native + * + * Description: Serialization of a polynomial. + * Signed coefficients are converted to + * unsigned form before serialization. + * + * Arguments: INPUT: + * - a: const pointer to input polynomial, + * with each coefficient in the range -Q+1 .. Q-1 + * OUTPUT + * - r: pointer to output byte array + * (of MLKEM_POLYBYTES bytes) + **************************************************/ +static INLINE void poly_tobytes_native(uint8_t r[MLKEM_POLYBYTES], + const poly *a); +#endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */ + +#if defined(MLKEM_USE_NATIVE_POLY_FROMBYTES) +/************************************************* + * Name: poly_frombytes_native + * + * Description: Serialization of a polynomial. + * Signed coefficients are converted to + * unsigned form before serialization. + * + * Arguments: INPUT: + * - r: pointer to output polynomial in NTT domain + * OUTPUT + * - a: const pointer to input byte aray + * (of MLKEM_POLYBYTES bytes) + **************************************************/ +static INLINE void poly_frombytes_native(poly *a, + const uint8_t r[MLKEM_POLYBYTES]); +#endif /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ + +#if defined(MLKEM_USE_NATIVE_REJ_UNIFORM) +/************************************************* + * Name: rej_uniform_native + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int len: requested number of 16-bit integers + * (uniform mod q). + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes. + * + * Return -1 if the native implementation does not support the input lengths. + * Otherwise, returns non-negative number of sampled 16-bit integers (at most + * len). + **************************************************/ +static INLINE int rej_uniform_native(int16_t *r, unsigned int len, + const uint8_t *buf, unsigned int buflen); +#endif /* MLKEM_USE_NATIVE_REJ_UNIFORM */ + +#endif /* MLKEM_USE_NATIVE */ +#endif /* MLKEM_ARITH_NATIVE_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.c new file mode 100644 index 0000000000..073f3c81d7 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "cbd.h" +#include +#include "params.h" + +/************************************************* + * Name: load32_littleendian + * + * Description: load 4 bytes into a 32-bit integer + * in little-endian order + * + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns 32-bit unsigned integer loaded from x + **************************************************/ +static uint32_t load32_littleendian(const uint8_t x[4]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + r |= (uint32_t)x[3] << 24; + return r; +} + +/************************************************* + * Name: load24_littleendian + * + * Description: load 3 bytes into a 32-bit integer + * in little-endian order. + * This function is only needed for ML-KEM-512 + * + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) + **************************************************/ +#if MLKEM_ETA1 == 3 +static uint32_t load24_littleendian(const uint8_t x[3]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + return r; +} +#endif + +/************************************************* + * Name: cbd2 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter eta=2 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4]) +{ + int i; + for (i = 0; i < MLKEM_N / 8; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(array_abs_bound(r->coeffs, 0, (8 * i - 1), 2))) + { + int j; + uint32_t t = load32_littleendian(buf + 4 * i); + uint32_t d = t & 0x55555555; + d += (t >> 1) & 0x55555555; + + for (j = 0; j < 8; j++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(array_abs_bound(r->coeffs, 0, 8 * i + j - 1, 2))) + { + const int16_t a = (d >> (4 * j + 0)) & 0x3; + const int16_t b = (d >> (4 * j + 2)) & 0x3; + r->coeffs[8 * i + j] = a - b; + } + } +} + +/************************************************* + * Name: cbd3 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter eta=3. + * This function is only needed for ML-KEM-512 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +#if MLKEM_ETA1 == 3 +static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4]) +{ + int i; + for (i = 0; i < MLKEM_N / 4; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 3))) + { + int j; + const uint32_t t = load24_littleendian(buf + 3 * i); + uint32_t d = t & 0x00249249; + d += (t >> 1) & 0x00249249; + d += (t >> 2) & 0x00249249; + + for (j = 0; j < 4; j++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4) + invariant(array_abs_bound(r->coeffs, 0, 4 * i + j - 1, 3))) + { + const int16_t a = (d >> (6 * j + 0)) & 0x7; + const int16_t b = (d >> (6 * j + 3)) & 0x7; + r->coeffs[4 * i + j] = a - b; + } + } +} +#endif + +void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) +{ +#if MLKEM_ETA1 == 2 + cbd2(r, buf); +#elif MLKEM_ETA1 == 3 + cbd3(r, buf); +#else +#error "This implementation requires eta1 in {2,3}" +#endif +} + +void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) +{ +#if MLKEM_ETA2 == 2 + cbd2(r, buf); +#else +#error "This implementation requires eta2 = 2" +#endif +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.h new file mode 100644 index 0000000000..4dc8635bb5 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef CBD_H +#define CBD_H + +#include +#include "params.h" +#include "poly.h" + +#define poly_cbd_eta1 MLKEM_NAMESPACE(poly_cbd_eta1) +/************************************************* + * Name: poly_cbd_eta1 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA1. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA1)) +); + +#define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2) +/************************************************* + * Name: poly_cbd_eta1 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA2. + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + **************************************************/ +void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2)) +); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbmc.h new file mode 100644 index 0000000000..317a26421b --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbmc.h @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +/*************************************************** + * Basic replacements for __CPROVER_XXX contracts + ***************************************************/ + +#include "common.h" + +#ifndef CBMC + +#define STATIC_INLINE_TESTABLE static INLINE +#define STATIC_TESTABLE static + +#define __contract__(x) +#define __loop__(x) +#define cassert(x, y) + +#else /* CBMC _is_ defined, therefore we're doing proof */ + +/* expose certain procedures to CBMC proofs that are static otherwise */ +#define STATIC_TESTABLE +#define STATIC_INLINE_TESTABLE + +#define __contract__(x) x +#define __loop__(x) x + +/* https://diffblue.github.io/cbmc/contracts-assigns.html */ +#define assigns(...) __CPROVER_assigns(__VA_ARGS__) + +/* https://diffblue.github.io/cbmc/contracts-requires-ensures.html */ +#define requires(...) __CPROVER_requires(__VA_ARGS__) +#define ensures(...) __CPROVER_ensures(__VA_ARGS__) +/* https://diffblue.github.io/cbmc/contracts-loops.html */ +#define invariant(...) __CPROVER_loop_invariant(__VA_ARGS__) +#define decreases(...) __CPROVER_decreases(__VA_ARGS__) +/* cassert to avoid confusion with in-built assert */ +#define cassert(...) __CPROVER_assert(__VA_ARGS__) +#define assume(...) __CPROVER_assume(__VA_ARGS__) + +/*************************************************** + * Macros for "expression" forms that may appear + * _inside_ top-level contracts. + ***************************************************/ + +/* + * function return value - useful inside ensures + * https://diffblue.github.io/cbmc/contracts-functions.html + */ +#define return_value (__CPROVER_return_value) + +/* + * assigns l-value targets + * https://diffblue.github.io/cbmc/contracts-assigns.html + */ +#define object_whole(...) __CPROVER_object_whole(__VA_ARGS__) +#define memory_slice(...) __CPROVER_object_upto(__VA_ARGS__) +#define same_object(...) __CPROVER_same_object(__VA_ARGS__) + +/* + * Pointer-related predicates + * https://diffblue.github.io/cbmc/contracts-memory-predicates.html + */ +#define memory_no_alias(...) __CPROVER_is_fresh(__VA_ARGS__) +#define readable(...) __CPROVER_r_ok(__VA_ARGS__) +#define writeable(...) __CPROVER_w_ok(__VA_ARGS__) + +/* + * History variables + * https://diffblue.github.io/cbmc/contracts-history-variables.html + */ +#define old(...) __CPROVER_old(__VA_ARGS__) +#define loop_entry(...) __CPROVER_loop_entry(__VA_ARGS__) + +/* + * Quantifiers + * Note that the range on qvar is _inclusive_ between qvar_lb .. qvar_ub + * https://diffblue.github.io/cbmc/contracts-quantifiers.html + */ + +/* + * Prevent clang-format from corrupting CBMC's special ==> operator + */ +/* clang-format off */ +#define forall(type, qvar, qvar_lb, qvar_ub, predicate) \ + __CPROVER_forall \ + { \ + type qvar; \ + ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==> (predicate) \ + } + +#define EXISTS(type, qvar, qvar_lb, qvar_ub, predicate) \ + __CPROVER_exists \ + { \ + type qvar; \ + ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) && (predicate) \ + } +/* clang-format on */ + +/*************************************************** + * Convenience macros for common contract patterns + ***************************************************/ + +/* + * Boolean-value predidate that asserts that "all values of array_var are in + * range value_lb .. value_ub (inclusive)" + * Example: + * array_bound(a->coeffs, 0, MLKEM_N-1, -(MLKEM_Q - 1), MLKEM_Q - 1) + * expands to + * __CPROVER_forall { int k; (0 <= k && k <= MLKEM_N-1) ==> ( (-(MLKEM_Q - + * 1) <= a->coeffs[k]) && (a->coeffs[k] <= (MLKEM_Q - 1))) } + */ + +/* + * Prevent clang-format from corrupting CBMC's special ==> operator + */ +/* clang-format off */ +#define CBMC_CONCAT_(left, right) left##right +#define CBMC_CONCAT(left, right) CBMC_CONCAT_(left, right) + +#define array_bound_core(indextype, qvar, qvar_lb, qvar_ub, array_var, \ + value_lb, value_ub) \ + __CPROVER_forall \ + { \ + indextype qvar; \ + ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==> \ + (((value_lb) <= (array_var[(qvar)])) && \ + ((array_var[(qvar)]) <= (value_ub))) \ + } + +#define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \ + array_bound_core(int, CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \ + (qvar_ub), (array_var), (value_lb), (value_ub)) + + +/* Wrapper around array_bound operating on absolute values */ +#define array_abs_bound(arr, lb, ub, k) \ + array_bound((arr), (lb), (ub), (-(k)), (k)) +/* clang-format on */ + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/common.h new file mode 100644 index 0000000000..94c29ed927 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/common.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef COMMON_H +#define COMMON_H + + +/* + * C90 does not have the inline compiler directive yet. + * We don't use it in C90 builds. + * However, in that case the compiler warns about some inline functions in + * header files not being used in every compilation unit that includes that + * header. To work around it we silence that warning in that case using + * __attribute__((unused)). + */ + +/* Do not use inline for C90 builds*/ +#if !defined(inline) +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define INLINE inline +#define ALWAYS_INLINE __attribute__((always_inline)) +#elif defined(_MSC_VER) +#define INLINE __inline +#define ALWAYS_INLINE __forceinline +#else +#define INLINE __attribute__((unused)) +#define ALWAYS_INLINE +#endif + +#else +#define INLINE inline +#define ALWAYS_INLINE __attribute__((always_inline)) +#endif + + +/* + * C90 does not have the restrict compiler directive yet. + * We don't use it in C90 builds. + */ +#if !defined(restrict) +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define RESTRICT restrict +#else +#define RESTRICT +#endif + +#else + +#define RESTRICT restrict +#endif + +#define DEFAULT_ALIGN 32 +#if defined(_WIN32) +#define ALIGN __declspec(align(DEFAULT_ALIGN)) +#define asm __asm +#else +#define asm __asm__ +#define ALIGN __attribute__((aligned(DEFAULT_ALIGN))) +#endif + +#define MLKEM_CONCAT_(left, right) left##right +#define MLKEM_CONCAT(left, right) MLKEM_CONCAT_(left, right) + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/config.h new file mode 100644 index 0000000000..370a141a65 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/config.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef CONFIG_H +#define CONFIG_H + +#include "cpucap.h" + +#if defined(MLKEM_USE_NATIVE) + +#if defined(SYS_AARCH64) +#define MLKEM_USE_NATIVE_AARCH64 +#endif /* SYS_AARCH64 */ + +#if defined(SYS_X86_64) +#define MLKEM_USE_NATIVE_X86_64 +#endif /* SYS_X86_64 */ + +#endif /* MLKEM_USE_NATIVE */ +#endif /* CONFIG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cpucap.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cpucap.h new file mode 100644 index 0000000000..cfcbbc3fe9 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cpucap.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef CPUCAP_H +#define CPUCAP_H + +/* Check if we're running on an AArch64 system. _M_ARM64 is set by MSVC. */ +#if defined(__AARCH64EL__) || defined(_M_ARM64) +#define SYS_AARCH64 +#endif + +#if defined(__x86_64__) +#define SYS_X86_64 +#if defined(__AVX2__) +#define SYS_X86_64_AVX2 +#endif +#endif /* __x86_64__ */ + +/* Check endianness */ +#if defined(__BYTE_ORDER__) + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define SYS_LITTLE_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define SYS_BIG_ENDIAN +#else /* __BYTE_ORER__ */ +#error "__BYTE_ORDER__ defined, but don't recognize value." +#endif /* __BYTE_ORER__ */ +#endif /* !defined(__BYTE_ORER__) */ + +/* If FORCE_AARCH64 is set, assert that we're indeed on an AArch64 system. */ +#if defined(FORCE_AARCH64) && !defined(SYS_AARCH64) +#error "FORCE_AARCH64 is set, but we don't seem to be on an AArch64 system." +#endif + +/* If FORCE_X86_64 is set, assert that we're indeed on an X86_64 system. */ +#if defined(FORCE_X86_64) && !defined(SYS_X86_64) +#error "FORCE_X86_64 is set, but we don't seem to be on an X86_64 system." +#endif + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.c new file mode 100644 index 0000000000..aa9b578074 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#define _ISOC99_SOURCE +#include "debug.h" +#include + +#if defined(MLKEM_DEBUG) + +static char debug_buf[256]; + +void mlkem_debug_assert(const char *file, int line, const char *description, + const int val) +{ + if (val == 0) + { + snprintf(debug_buf, sizeof(debug_buf), "Assertion failed: %s (value %d)", + description, val); + mlkem_debug_print_error(file, line, debug_buf); + exit(1); + } +} +void mlkem_debug_check_bounds(const char *file, int line, + const char *description, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive) +{ + int err = 0; + unsigned i; + for (i = 0; i < len; i++) + { + int16_t val = ptr[i]; + if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) + { + snprintf(debug_buf, sizeof(debug_buf), + "%s, index %u, value %d out of bounds (%d,%d)", description, i, + (int)val, lower_bound_exclusive, upper_bound_exclusive); + mlkem_debug_print_error(file, line, debug_buf); + err = 1; + } + } + + if (err == 1) + exit(1); +} + +void mlkem_debug_print_error(const char *file, int line, const char *msg) +{ + fprintf(stderr, "[ERROR:%s:%04d] %s\n", file, line, msg); + fflush(stderr); +} + +#else /* MLKEM_DEBUG */ + +int empty_cu_debug; + +#endif /* MLKEM_DEBUG */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.h new file mode 100644 index 0000000000..65208771d2 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.h @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef MLKEM_DEBUG_H +#define MLKEM_DEBUG_H + +#if defined(MLKEM_DEBUG) +#include +#include +#include + +/************************************************* + * Name: mlkem_debug_assert + * + * Description: Check debug assertion + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - description: Textual description of assertion + * - val: Value asserted to be non-zero + **************************************************/ +void mlkem_debug_assert(const char *file, int line, const char *description, + const int val); + +/************************************************* + * Name: mlkem_debug_check_bounds + * + * Description: Check whether values in an array of int16_t + * are within specified bounds. + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - description: Textual description of check + * - ptr: Base of array to be checked + * - len: Number of int16_t in ptr + * - lower_bound_exclusive: Exclusive lower bound + * - upper_bound_exclusive: Exclusive upper bound + **************************************************/ +void mlkem_debug_check_bounds(const char *file, int line, + const char *description, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive); + +/* Print error message to stderr alongside file and line information */ +void mlkem_debug_print_error(const char *file, int line, const char *msg); + +/* Check assertion, calling exit() upon failure + * + * val: Value that's asserted to be non-zero + * msg: Message to print on failure + * + * Currently called CASSERT to avoid clash with CBMC assert. + */ +#define CASSERT(val, msg) \ + do \ + { \ + mlkem_debug_assert(__FILE__, __LINE__, (msg), (val)); \ + } while (0) + +/* Check absolute bounds of scalar + * val: Scalar to be checked + * abs_bound: Exclusive upper bound on absolute value to check + * msg: Message to print on failure */ +#define SCALAR_BOUND(val, abs_bound, msg) \ + CASSERT((val) > -(abs_bound) && (val) < (abs_bound), msg) + +/* Check that all coefficients in array of int16_t's are non-negative + * and below an exclusive upper bound. + * + * ptr: Base of array, expression of type int16_t* + * len: Number of int16_t in array + * high_bound: Exclusive upper bound on absolute value to check + * msg: Message to print on failure */ +#define UBOUND(ptr, len, high_bound, msg) \ + do \ + { \ + mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ + (len), -1, ((high_bound))); \ + } while (0) + +/* Check absolute bounds in array of int16_t's + * ptr: Base of array, expression of type int16_t* + * len: Number of int16_t in array + * abs_bound: Exclusive upper bound on absolute value to check + * msg: Message to print on failure */ +#define BOUND(ptr, len, abs_bound, msg) \ + do \ + { \ + mlkem_debug_check_bounds(__FILE__, __LINE__, (msg), (int16_t *)(ptr), \ + (len), -(abs_bound), (abs_bound)); \ + } while (0) + +/* Check absolute bounds on coefficients in polynomial or mulcache + * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check + * abs_bound: Exclusive upper bound on absolute value to check + * msg: Message to print on failure */ +#define POLY_BOUND_MSG(ptr, abs_bound, msg) \ + BOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (abs_bound), \ + msg) + +/* Check unsigned bounds on coefficients in polynomial or mulcache + * ptr: poly* or poly_mulcache* pointer to polynomial (cache) to check + * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. + * msg: Message to print on failure */ +#define POLY_UBOUND_MSG(ptr, ubound, msg) \ + UBOUND((ptr)->coeffs, (sizeof((ptr)->coeffs) / sizeof(int16_t)), (ubound), \ + msg) + +/* Check absolute bounds on coefficients in polynomial + * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check + * abs_bound: Exclusive upper bound on absolute value to check */ +#define POLY_BOUND(ptr, abs_bound) \ + POLY_BOUND_MSG((ptr), (abs_bound), "poly absolute bound for " #ptr) + +/* Check unsigned bounds on coefficients in polynomial + * ptr: poly* of poly_mulcache* pointer to polynomial (cache) to check + * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. + */ +#define POLY_UBOUND(ptr, ubound) \ + POLY_UBOUND_MSG((ptr), (ubound), "poly unsigned bound for " #ptr) + +/* Check absolute bounds on coefficients in vector of polynomials + * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check + * abs_bound: Exclusive upper bound on absolute value to check */ +#define POLYVEC_BOUND(ptr, abs_bound) \ + do \ + { \ + unsigned _debug_polyvec_bound_idx; \ + for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ + _debug_polyvec_bound_idx++) \ + POLY_BOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (abs_bound), \ + "polyvec absolute bound for " #ptr ".vec[i]"); \ + } while (0) + +/* Check unsigned bounds on coefficients in vector of polynomials + * ptr: polyvec* or polyvec_mulcache* pointer to vector of polynomials to check + * ubound: Exclusive upper bound on value to check. Inclusive lower bound is 0. + */ +#define POLYVEC_UBOUND(ptr, ubound) \ + do \ + { \ + unsigned _debug_polyvec_bound_idx; \ + for (_debug_polyvec_bound_idx = 0; _debug_polyvec_bound_idx < MLKEM_K; \ + _debug_polyvec_bound_idx++) \ + POLY_UBOUND_MSG(&(ptr)->vec[_debug_polyvec_bound_idx], (ubound), \ + "polyvec unsigned bound for " #ptr ".vec[i]"); \ + } while (0) + +/* Following AWS-LC to define a C99-compliant static assert */ +#define MLKEM_STATIC_ASSERT_DEFINE(cond, msg) \ + typedef struct \ + { \ + unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \ + } MLKEM_CONCAT(static_assertion_, msg) __attribute__((unused)); + +#define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \ + MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix)) +#define MLKEM_STATIC_ASSERT_ADD_LINE1(cond, line, suffix) \ + MLKEM_STATIC_ASSERT_ADD_LINE0(cond, MLKEM_CONCAT(line, suffix)) +#define MLKEM_STATIC_ASSERT_ADD_LINE2(cond, suffix) \ + MLKEM_STATIC_ASSERT_ADD_LINE1(cond, __LINE__, suffix) +#define MLKEM_STATIC_ASSERT_ADD_ERROR(cond, suffix) \ + MLKEM_STATIC_ASSERT_ADD_LINE2(cond, MLKEM_CONCAT(_error_is_, suffix)) +#define STATIC_ASSERT(cond, error) MLKEM_STATIC_ASSERT_ADD_ERROR(cond, error) + +#else /* MLKEM_DEBUG */ + +#define CASSERT(val, msg) \ + do \ + { \ + } while (0) +#define SCALAR_BOUND(val, abs_bound, msg) \ + do \ + { \ + } while (0) +#define BOUND(ptr, len, abs_bound, msg) \ + do \ + { \ + } while (0) +#define POLY_BOUND(ptr, abs_bound) \ + do \ + { \ + } while (0) +#define POLYVEC_BOUND(ptr, abs_bound) \ + do \ + { \ + } while (0) +#define POLY_BOUND_MSG(ptr, ubound, abs_bound) \ + do \ + { \ + } while (0) +#define UBOUND(ptr, len, high_bound, msg) \ + do \ + { \ + } while (0) +#define POLY_UBOUND(ptr, ubound) \ + do \ + { \ + } while (0) +#define POLYVEC_UBOUND(ptr, ubound) \ + do \ + { \ + } while (0) +#define POLY_UBOUND_MSG(ptr, ubound, msg) \ + do \ + { \ + } while (0) +#define STATIC_ASSERT(cond, error) + +#endif /* MLKEM_DEBUG */ + +#endif /* MLKEM_DEBUG_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.c new file mode 100644 index 0000000000..d37e818aeb --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.c @@ -0,0 +1,578 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "indcpa.h" +#include +#include +#include +#include "fips202.h" +#include "fips202x4.h" +#include "indcpa.h" +#include "ntt.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include "randombytes.h" +#include "rej_uniform.h" +#include "symmetric.h" + +#include "arith_native.h" +#include "debug/debug.h" + +#include "cbmc.h" + + + +/************************************************* + * Name: pack_pk + * + * Description: Serialize the public key as concatenation of the + * serialized vector of polynomials pk + * and the public seed used to generate the matrix A. + * + * Arguments: uint8_t *r: pointer to the output serialized public key + * polyvec *pk: pointer to the input public-key polyvec. + * Must have coefficients within [0,..,q-1]. + * const uint8_t *seed: pointer to the input public seed + **************************************************/ +static void pack_pk(uint8_t r[MLKEM_INDCPA_PUBLICKEYBYTES], polyvec *pk, + const uint8_t seed[MLKEM_SYMBYTES]) +{ + POLYVEC_BOUND(pk, MLKEM_Q); + polyvec_tobytes(r, pk); + memcpy(r + MLKEM_POLYVECBYTES, seed, MLKEM_SYMBYTES); +} + +/************************************************* + * Name: unpack_pk + * + * Description: De-serialize public key from a byte array; + * approximate inverse of pack_pk + * + * Arguments: - polyvec *pk: pointer to output public-key polynomial vector + * Coefficients will be normalized to [0,..,q-1]. + * - uint8_t *seed: pointer to output seed to generate matrix A + * - const uint8_t *packedpk: pointer to input serialized public + * key. + **************************************************/ +static void unpack_pk(polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], + const uint8_t packedpk[MLKEM_INDCPA_PUBLICKEYBYTES]) +{ + polyvec_frombytes(pk, packedpk); + memcpy(seed, packedpk + MLKEM_POLYVECBYTES, MLKEM_SYMBYTES); + + /* + * TODO! We know from the modulus check that this will result in an + * unsigned canonical polynomial, but CBMC does not know it. We should + * weaken the specification of `unpack_pk()` and all depending functions + * to work with the weaker 4096-bound, so that the proofs go through + * without the need of this redundant call to polyvec_reduce(). + */ + polyvec_reduce(pk); +} + +/************************************************* + * Name: pack_sk + * + * Description: Serialize the secret key + * + * Arguments: - uint8_t *r: pointer to output serialized secret key + * - polyvec *sk: pointer to input vector of polynomials (secret + *key) + **************************************************/ +static void pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], polyvec *sk) +{ + POLYVEC_BOUND(sk, MLKEM_Q); + polyvec_tobytes(r, sk); +} + +/************************************************* + * Name: unpack_sk + * + * Description: De-serialize the secret key; inverse of pack_sk + * + * Arguments: - polyvec *sk: pointer to output vector of polynomials (secret + *key) + * - const uint8_t *packedsk: pointer to input serialized secret + *key + **************************************************/ +static void unpack_sk(polyvec *sk, + const uint8_t packedsk[MLKEM_INDCPA_SECRETKEYBYTES]) +{ + polyvec_frombytes(sk, packedsk); + polyvec_reduce(sk); +} + +/************************************************* + * Name: pack_ciphertext + * + * Description: Serialize the ciphertext as concatenation of the + * compressed and serialized vector of polynomials b + * and the compressed and serialized polynomial v + * + * Arguments: uint8_t *r: pointer to the output serialized ciphertext + * poly *pk: pointer to the input vector of polynomials b + * poly *v: pointer to the input polynomial v + **************************************************/ +static void pack_ciphertext(uint8_t r[MLKEM_INDCPA_BYTES], polyvec *b, poly *v) +{ + polyvec_compress_du(r, b); + poly_compress_dv(r + MLKEM_POLYVECCOMPRESSEDBYTES_DU, v); +} + +/************************************************* + * Name: unpack_ciphertext + * + * Description: De-serialize and decompress ciphertext from a byte array; + * approximate inverse of pack_ciphertext + * + * Arguments: - polyvec *b: pointer to the output vector of polynomials b + * - poly *v: pointer to the output polynomial v + * - const uint8_t *c: pointer to the input serialized ciphertext + **************************************************/ +static void unpack_ciphertext(polyvec *b, poly *v, + const uint8_t c[MLKEM_INDCPA_BYTES]) +{ + polyvec_decompress_du(b, c); + poly_decompress_dv(v, c + MLKEM_POLYVECCOMPRESSEDBYTES_DU); +} + +#ifndef MLKEM_GEN_MATRIX_NBLOCKS +#define MLKEM_GEN_MATRIX_NBLOCKS \ + ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + SHAKE128_RATE) / SHAKE128_RATE) +#endif + +/* + * Generate four A matrix entries from a seed, using rejection + * sampling on the output of a XOF. + */ +STATIC_TESTABLE +void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4]) +__contract__( + requires(memory_no_alias(vec, sizeof(poly) * 4)) + requires(memory_no_alias(seed, sizeof(uint8_t*) * 4)) + requires(memory_no_alias(seed[0], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[1], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2)) + requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2)) + assigns(memory_slice(vec, sizeof(poly) * 4)) + ensures(array_bound(vec[0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))) + ensures(array_bound(vec[1].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))) + ensures(array_bound(vec[2].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))) + ensures(array_bound(vec[3].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))) +{ + /* Temporary buffers for XOF output before rejection sampling */ + uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE]; + uint8_t buf1[MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE]; + uint8_t buf2[MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE]; + uint8_t buf3[MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE]; + + /* Tracks the number of coefficients we have already sampled */ + unsigned int ctr[KECCAK_WAY]; + shake128x4incctx statex; + unsigned int buflen; + + shake128x4_inc_init(&statex); + + /* seed is MLKEM_SYMBYTES + 2 bytes long, but padded to MLKEM_SYMBYTES + 16 */ + shake128x4_absorb_once(&statex, seed[0], seed[1], seed[2], seed[3], + MLKEM_SYMBYTES + 2); + + /* + * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + * This should generate the matrix entries with high probability. + */ + shake128x4_squeezeblocks(buf0, buf1, buf2, buf3, MLKEM_GEN_MATRIX_NBLOCKS, + &statex); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE; + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf3, buflen); + + /* + * So long as not all matrix entries have been generated, squeeze + * one more block a time until we're done. + */ + buflen = SHAKE128_RATE; + while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || + ctr[3] < MLKEM_N) + __loop__( + assigns(ctr, statex, memory_slice(vec, sizeof(poly) * 4), object_whole(buf0), + object_whole(buf1), object_whole(buf2), object_whole(buf3)) + invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) + invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) + invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0] - 1, 0, (MLKEM_Q - 1))) + invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1] - 1, 0, (MLKEM_Q - 1))) + invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2] - 1, 0, (MLKEM_Q - 1))) + invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3] - 1, 0, (MLKEM_Q - 1)))) + { + shake128x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex); + ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen); + ctr[1] = rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf1, buflen); + ctr[2] = rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf2, buflen); + ctr[3] = rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf3, buflen); + } + + shake128x4_inc_ctx_release(&statex); +} + +/* + * Generate a single A matrix entry from a seed, using rejection + * sampling on the output of a XOF. + */ +STATIC_TESTABLE +void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) +__contract__( + requires(memory_no_alias(entry, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) + assigns(memory_slice(entry, sizeof(poly))) + ensures(array_bound(entry->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))) +{ + shake128incctx state; + uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE]; + unsigned int ctr, buflen; + + shake128_inc_init(&state); + + shake128_absorb_once(&state, seed, MLKEM_SYMBYTES + 2); + + /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + */ + /* This should generate the matrix entry with high probability. */ + shake128_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * SHAKE128_RATE; + ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); + + /* Squeeze + sample one more block a time until we're done */ + buflen = SHAKE128_RATE; + while (ctr < MLKEM_N) + __loop__( + assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf)) + invariant(0 <= ctr && ctr <= MLKEM_N) + invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr - 1, + 0, (MLKEM_Q - 1)))) + { + shake128_squeezeblocks(buf, 1, &state); + ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, SHAKE128_RATE); + } + + shake128_inc_ctx_release(&state); +} + +/************************************************* + * Name: gen_matrix + * + * Description: Deterministically generate matrix A (or the transpose of A) + * from a seed. Entries of the matrix are polynomials that look + * uniformly random. Performs rejection sampling on output of + * a XOF + * + * Arguments: - polyvec *a: pointer to ouptput matrix A + * - const uint8_t *seed: pointer to input seed + * - int transposed: boolean deciding whether A or A^T is generated + **************************************************/ +/* Not static for benchmarking */ +void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) +{ + int i; + unsigned int j; + /* + * We generate four separate seed arrays rather than a single one to work + * around limitations in CBMC function contracts dealing with disjoint slices + * of the same parent object. + */ + + ALIGN uint8_t seed0[MLKEM_SYMBYTES + 2]; + ALIGN uint8_t seed1[MLKEM_SYMBYTES + 2]; + ALIGN uint8_t seed2[MLKEM_SYMBYTES + 2]; + ALIGN uint8_t seed3[MLKEM_SYMBYTES + 2]; + uint8_t *seedxy[4]; + seedxy[0] = seed0; + seedxy[1] = seed1; + seedxy[2] = seed2; + seedxy[3] = seed3; + + for (j = 0; j < KECCAK_WAY; j++) + { + memcpy(seedxy[j], seed, MLKEM_SYMBYTES); + } + + for (i = 0; i < (MLKEM_K * MLKEM_K / KECCAK_WAY) * KECCAK_WAY; + i += KECCAK_WAY) + { + uint8_t x, y; + + for (j = 0; j < KECCAK_WAY; j++) + { + x = (i + j) / MLKEM_K; + y = (i + j) % MLKEM_K; + if (transposed) + { + seedxy[j][MLKEM_SYMBYTES + 0] = x; + seedxy[j][MLKEM_SYMBYTES + 1] = y; + } + else + { + seedxy[j][MLKEM_SYMBYTES + 0] = y; + seedxy[j][MLKEM_SYMBYTES + 1] = x; + } + } + + /* + * This call writes across polyvec boundaries for K=2 and K=3. + * This is intentional and safe. + */ + gen_matrix_entry_x4(&a[0].vec[0] + i, seedxy); + } + + /* For left over polynomial, we use single keccak. */ + if (i < MLKEM_K * MLKEM_K) + { + uint8_t x, y; + x = i / MLKEM_K; + y = i % MLKEM_K; + + if (transposed) + { + seed0[MLKEM_SYMBYTES + 0] = x; + seed0[MLKEM_SYMBYTES + 1] = y; + } + else + { + seed0[MLKEM_SYMBYTES + 0] = y; + seed0[MLKEM_SYMBYTES + 1] = x; + } + + gen_matrix_entry(&a[0].vec[0] + i, seed0); + i++; + } + + cassert(i == MLKEM_K * MLKEM_K, + "gen_matrix: failed to generate whole matrix"); + +#if defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER) + /* + * The public matrix is generated in NTT domain. If the native backend + * uses a custom order in NTT domain, permute A accordingly. + */ + for (i = 0; i < MLKEM_K; i++) + { + for (j = 0; j < MLKEM_K; j++) + { + poly_permute_bitrev_to_custom(&a[i].vec[j]); + } + } +#endif /* MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER */ +} + +/************************************************* + * Name: matvec_mul + * + * Description: Computes matrix-vector product in NTT domain, + * via Montgomery multiplication. + * + * Arguments: - polyvec *out: Pointer to output polynomial vector + * - polyvec a[MLKEM_K]: Input matrix. Must be in NTT domain + * and have coefficients of absolute value < MLKEM_Q. + * - polyvec *v: Input polynomial vector. Must be in NTT domain. + * - polyvec *vc: Mulcache for v, computed via + * polyvec_mulcache_compute(). + **************************************************/ +STATIC_TESTABLE +void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v, + const polyvec_mulcache *vc) +__contract__( + requires(memory_no_alias(out, sizeof(polyvec))) + requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K)) + requires(memory_no_alias(v, sizeof(polyvec))) + requires(memory_no_alias(vc, sizeof(polyvec_mulcache))) + requires(forall(int, k0, 0, MLKEM_K - 1, + forall(int, k1, 0, MLKEM_K - 1, + array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))) + assigns(object_whole(out))) +{ + int i; + for (i = 0; i < MLKEM_K; i++) + __loop__( + assigns(i, object_whole(out)) + invariant(i >= 0 && i <= MLKEM_K)) + { + polyvec_basemul_acc_montgomery_cached(&out->vec[i], &a[i], v, vc); + } +} + +/************************************************* + * Name: indcpa_keypair_derand + * + * Description: Generates public and private key for the CPA-secure + * public-key encryption scheme underlying ML-KEM + * + * Arguments: - uint8_t *pk: pointer to output public key + * (of length MLKEM_INDCPA_PUBLICKEYBYTES bytes) + * - uint8_t *sk: pointer to output private key + * (of length MLKEM_INDCPA_SECRETKEYBYTES bytes) + * - const uint8_t *coins: pointer to input randomness + * (of length MLKEM_SYMBYTES bytes) + **************************************************/ + +STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0) + +void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], + uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES]) +{ + ALIGN uint8_t buf[2 * MLKEM_SYMBYTES]; + const uint8_t *publicseed = buf; + const uint8_t *noiseseed = buf + MLKEM_SYMBYTES; + polyvec a[MLKEM_K], e, pkpv, skpv; + polyvec_mulcache skpv_cache; + + ALIGN uint8_t coins_with_domain_separator[MLKEM_SYMBYTES + 1]; + /* Concatenate coins with MLKEM_K for domain separation of security levels */ + memcpy(coins_with_domain_separator, coins, MLKEM_SYMBYTES); + coins_with_domain_separator[MLKEM_SYMBYTES] = MLKEM_K; + + hash_g(buf, coins_with_domain_separator, MLKEM_SYMBYTES + 1); + + gen_matrix(a, publicseed, 0 /* no transpose */); + +#if MLKEM_K == 2 + poly_getnoise_eta1_4x(skpv.vec + 0, skpv.vec + 1, e.vec + 0, e.vec + 1, + noiseseed, 0, 1, 2, 3); +#elif MLKEM_K == 3 + /* + * Only the first three output buffers are needed. + * The laster parameter is a dummy that's overwritten later. + */ + poly_getnoise_eta1_4x(skpv.vec + 0, skpv.vec + 1, skpv.vec + 2, + pkpv.vec + 0 /* irrelevant */, noiseseed, 0, 1, 2, + 0xFF /* irrelevant */); + /* Same here */ + poly_getnoise_eta1_4x(e.vec + 0, e.vec + 1, e.vec + 2, + pkpv.vec + 0 /* irrelevant */, noiseseed, 3, 4, 5, + 0xFF /* irrelevant */); +#elif MLKEM_K == 4 + poly_getnoise_eta1_4x(skpv.vec + 0, skpv.vec + 1, skpv.vec + 2, skpv.vec + 3, + noiseseed, 0, 1, 2, 3); + poly_getnoise_eta1_4x(e.vec + 0, e.vec + 1, e.vec + 2, e.vec + 3, noiseseed, + 4, 5, 6, 7); +#endif + + polyvec_ntt(&skpv); + polyvec_ntt(&e); + + polyvec_mulcache_compute(&skpv_cache, &skpv); + matvec_mul(&pkpv, a, &skpv, &skpv_cache); + polyvec_tomont(&pkpv); + + /* Arithmetic cannot overflow, see static assertion at the top */ + polyvec_add(&pkpv, &e); + polyvec_reduce(&pkpv); + polyvec_reduce(&skpv); + + pack_sk(sk, &skpv); + pack_pk(pk, &pkpv, publicseed); +} + +/************************************************* + * Name: indcpa_enc + * + * Description: Encryption function of the CPA-secure + * public-key encryption scheme underlying Kyber. + * + * Arguments: - uint8_t *c: pointer to output ciphertext + * (of length MLKEM_INDCPA_BYTES bytes) + * - const uint8_t *m: pointer to input message + * (of length MLKEM_INDCPA_MSGBYTES bytes) + * - const uint8_t *pk: pointer to input public key + * (of length MLKEM_INDCPA_PUBLICKEYBYTES) + * - const uint8_t *coins: pointer to input random coins used as + *seed (of length MLKEM_SYMBYTES) to deterministically generate all randomness + **************************************************/ + +/* Check that the arithmetic in indcpa_enc() does not overflow */ +STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0) +STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX, + indcpa_enc_bound_1) + +void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], + const uint8_t m[MLKEM_INDCPA_MSGBYTES], + const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES]) +{ + ALIGN uint8_t seed[MLKEM_SYMBYTES]; + polyvec sp, pkpv, ep, at[MLKEM_K], b; + poly v, k, epp; + polyvec_mulcache sp_cache; + + unpack_pk(&pkpv, seed, pk); + poly_frommsg(&k, m); + gen_matrix(at, seed, 1 /* transpose */); + +#if MLKEM_K == 2 + poly_getnoise_eta1122_4x(sp.vec + 0, sp.vec + 1, ep.vec + 0, ep.vec + 1, + coins, 0, 1, 2, 3); + poly_getnoise_eta2(&epp, coins, 4); +#elif MLKEM_K == 3 + /* + * In this call, only the first three output buffers are needed. + * The last parameter is a dummy that's overwritten later. + */ + poly_getnoise_eta1_4x(sp.vec + 0, sp.vec + 1, sp.vec + 2, &b.vec[0], coins, 0, + 1, 2, 0xFF); + /* The fourth output buffer in this call _is_ used. */ + poly_getnoise_eta2_4x(ep.vec + 0, ep.vec + 1, ep.vec + 2, &epp, coins, 3, 4, + 5, 6); +#elif MLKEM_K == 4 + poly_getnoise_eta1_4x(sp.vec + 0, sp.vec + 1, sp.vec + 2, sp.vec + 3, coins, + 0, 1, 2, 3); + poly_getnoise_eta2_4x(ep.vec + 0, ep.vec + 1, ep.vec + 2, ep.vec + 3, coins, + 4, 5, 6, 7); + poly_getnoise_eta2(&epp, coins, 8); +#endif + + polyvec_ntt(&sp); + + polyvec_mulcache_compute(&sp_cache, &sp); + matvec_mul(&b, at, &sp, &sp_cache); + polyvec_basemul_acc_montgomery_cached(&v, &pkpv, &sp, &sp_cache); + + polyvec_invntt_tomont(&b); + poly_invntt_tomont(&v); + + /* Arithmetic cannot overflow, see static assertion at the top */ + polyvec_add(&b, &ep); + poly_add(&v, &epp); + poly_add(&v, &k); + + polyvec_reduce(&b); + poly_reduce(&v); + + pack_ciphertext(c, &b, &v); +} + +/* Check that the arithmetic in indcpa_dec() does not overflow */ +STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0) + +void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], + const uint8_t c[MLKEM_INDCPA_BYTES], + const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES]) +{ + polyvec b, skpv; + poly v, sb; + + unpack_ciphertext(&b, &v, c); + unpack_sk(&skpv, sk); + + polyvec_ntt(&b); + polyvec_basemul_acc_montgomery(&sb, &skpv, &b); + poly_invntt_tomont(&sb); + + /* Arithmetic cannot overflow, see static assertion at the top */ + poly_sub(&v, &sb); + poly_reduce(&v); + + poly_tomsg(m, &v); +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.h new file mode 100644 index 0000000000..3f57eb1295 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef INDCPA_H +#define INDCPA_H + +#include +#include "cbmc.h" +#include "params.h" +#include "polyvec.h" + + +#define gen_matrix MLKEM_NAMESPACE(gen_matrix) + +void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed) +__contract__( + requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K)) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires(transposed == 0 || transposed == 1) + assigns(object_whole(a)) + ensures(forall(int, x, 0, MLKEM_K - 1, forall(int, y, 0, MLKEM_K - 1, + array_bound(a[x].vec[y].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))); +); + +#define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand) +void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], + uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES]) +__contract__( + requires(memory_no_alias(pk, MLKEM_INDCPA_PUBLICKEYBYTES)) + requires(memory_no_alias(sk, MLKEM_INDCPA_SECRETKEYBYTES)) + requires(memory_no_alias(coins, MLKEM_SYMBYTES)) + assigns(object_whole(pk)) + assigns(object_whole(sk)) +); + +#define indcpa_enc MLKEM_NAMESPACE(indcpa_enc) +/************************************************* + * Name: indcpa_dec + * + * Description: Decryption function of the CPA-secure + * public-key encryption scheme underlying Kyber. + * + * Arguments: - uint8_t *m: pointer to output decrypted message + * (of length MLKEM_INDCPA_MSGBYTES) + * - const uint8_t *c: pointer to input ciphertext + * (of length MLKEM_INDCPA_BYTES) + * - const uint8_t *sk: pointer to input secret key + * (of length MLKEM_INDCPA_SECRETKEYBYTES) + **************************************************/ +void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], + const uint8_t m[MLKEM_INDCPA_MSGBYTES], + const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES]) +__contract__( + requires(memory_no_alias(c, MLKEM_INDCPA_BYTES)) + requires(memory_no_alias(m, MLKEM_INDCPA_MSGBYTES)) + requires(memory_no_alias(pk, MLKEM_INDCPA_PUBLICKEYBYTES)) + requires(memory_no_alias(coins, MLKEM_SYMBYTES)) + assigns(object_whole(c)) +); + +#define indcpa_dec MLKEM_NAMESPACE(indcpa_dec) +void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], + const uint8_t c[MLKEM_INDCPA_BYTES], + const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES]) +__contract__( + requires(memory_no_alias(c, MLKEM_INDCPA_BYTES)) + requires(memory_no_alias(m, MLKEM_INDCPA_MSGBYTES)) + requires(memory_no_alias(sk, MLKEM_INDCPA_SECRETKEYBYTES)) + assigns(object_whole(m)) +); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.c new file mode 100644 index 0000000000..f84ee3f3da --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.c @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "kem.h" +#include +#include +#include +#include "indcpa.h" +#include "params.h" +#include "randombytes.h" +#include "symmetric.h" +#include "verify.h" + +#if defined(CBMC) +/* Redeclaration with contract needed for CBMC only */ +int memcmp(const void *str1, const void *str2, size_t n) +__contract__( + requires(memory_no_alias(str1, n)) + requires(memory_no_alias(str2, n)) +); +#endif + +/************************************************* + * Name: check_pk + * + * Description: Implements modulus check mandated by FIPS203, + * i.e., ensures that coefficients are in [0,q-1]. + * Described in Section 7.2 of FIPS203. + * + * Arguments: - const uint8_t *pk: pointer to input public key + * (an already allocated array of MLKEM_PUBLICKEYBYTES bytes) + ** + * Returns 0 on success, and -1 on failure + **************************************************/ +static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES]) +{ + polyvec p; + uint8_t p_reencoded[MLKEM_POLYVECBYTES]; + polyvec_frombytes(&p, pk); + polyvec_reduce(&p); + polyvec_tobytes(p_reencoded, &p); + /* Data is public, so a variable-time memcmp() is OK */ + if (memcmp(pk, p_reencoded, MLKEM_POLYVECBYTES)) + { + return -1; + } + return 0; +} + +/************************************************* + * Name: check_sk + * + * Description: Implements public key hash check mandated by FIPS203, + * i.e., ensures that + * sk[768𝑘+32 ∶ 768𝑘+64] = H(pk)= H(sk[384𝑘 : 768𝑘+32]) + * Described in Section 7.3 of FIPS203. + * + * Arguments: - const uint8_t *sk: pointer to input private key + * (an already allocated array of MLKEM_SECRETKEYBYTES bytes) + * + * Returns 0 on success, and -1 on failure + **************************************************/ +static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES]) +{ + uint8_t test[MLKEM_SYMBYTES]; + /* + * The parts of `sk` being hashed and compared here are public, so + * no public information is leaked through the runtime or the return value + * of this function. + */ + hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_PUBLICKEYBYTES); + if (memcmp(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test, + MLKEM_SYMBYTES)) + { + return -1; + } + return 0; +} + +/************************************************* + * Name: crypto_kem_keypair_derand + * + * Description: Generates public and private key + * for CCA-secure ML-KEM key encapsulation mechanism + * + * Arguments: - uint8_t *pk: pointer to output public key + * (an already allocated array of MLKEM_PUBLICKEYBYTES bytes) + * - uint8_t *sk: pointer to output private key + * (an already allocated array of MLKEM_SECRETKEYBYTES bytes) + * - uint8_t *coins: pointer to input randomness + * (an already allocated array filled with 2*MLKEM_SYMBYTES + *random bytes) + ** + * Returns 0 (success) + **************************************************/ +int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins) +{ + indcpa_keypair_derand(pk, sk, coins); + memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_PUBLICKEYBYTES); + hash_h(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk, + MLKEM_PUBLICKEYBYTES); + /* Value z for pseudo-random output on reject */ + memcpy(sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, coins + MLKEM_SYMBYTES, + MLKEM_SYMBYTES); + return 0; +} + +int crypto_kem_keypair(uint8_t *pk, uint8_t *sk) +{ + ALIGN uint8_t coins[2 * MLKEM_SYMBYTES]; + randombytes(coins, 2 * MLKEM_SYMBYTES); + crypto_kem_keypair_derand(pk, sk, coins); + return 0; +} + +int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, + const uint8_t *coins) +{ + ALIGN uint8_t buf[2 * MLKEM_SYMBYTES]; + /* Will contain key, coins */ + ALIGN uint8_t kr[2 * MLKEM_SYMBYTES]; + + if (check_pk(pk)) + { + return -1; + } + + memcpy(buf, coins, MLKEM_SYMBYTES); + + /* Multitarget countermeasure for coins + contributory KEM */ + hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_PUBLICKEYBYTES); + hash_g(kr, buf, 2 * MLKEM_SYMBYTES); + + /* coins are in kr+MLKEM_SYMBYTES */ + indcpa_enc(ct, buf, pk, kr + MLKEM_SYMBYTES); + + memcpy(ss, kr, MLKEM_SYMBYTES); + return 0; +} + +int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) +{ + ALIGN uint8_t coins[MLKEM_SYMBYTES]; + randombytes(coins, MLKEM_SYMBYTES); + return crypto_kem_enc_derand(ct, ss, pk, coins); +} + +int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) +{ + uint8_t fail; + ALIGN uint8_t buf[2 * MLKEM_SYMBYTES]; + /* Will contain key, coins */ + ALIGN uint8_t kr[2 * MLKEM_SYMBYTES]; + ALIGN uint8_t cmp[MLKEM_CIPHERTEXTBYTES + MLKEM_SYMBYTES]; + const uint8_t *pk = sk + MLKEM_INDCPA_SECRETKEYBYTES; + + if (check_sk(sk)) + { + return -1; + } + + indcpa_dec(buf, ct, sk); + + /* Multitarget countermeasure for coins + contributory KEM */ + memcpy(buf + MLKEM_SYMBYTES, sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, + MLKEM_SYMBYTES); + hash_g(kr, buf, 2 * MLKEM_SYMBYTES); + + /* coins are in kr+MLKEM_SYMBYTES */ + indcpa_enc(cmp, buf, pk, kr + MLKEM_SYMBYTES); + + fail = ct_memcmp(ct, cmp, MLKEM_CIPHERTEXTBYTES); + + /* Compute rejection key */ + rkprf(ss, sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, ct); + + /* Copy true key to return buffer if fail is 0 */ + ct_cmov_zero(ss, kr, MLKEM_SYMBYTES, fail); + + return 0; +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.h new file mode 100644 index 0000000000..6a33be7c7e --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef KEM_H +#define KEM_H + +#include +#include "cbmc.h" +#include "params.h" + +#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES +#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES +#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES +#define CRYPTO_BYTES MLKEM_SSBYTES + +#if (MLKEM_K == 2) +#define CRYPTO_ALGNAME "Kyber512" +#elif (MLKEM_K == 3) +#define CRYPTO_ALGNAME "Kyber768" +#elif (MLKEM_K == 4) +#define CRYPTO_ALGNAME "Kyber1024" +#endif + +#define crypto_kem_keypair_derand MLKEM_NAMESPACE(keypair_derand) +int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins) +__contract__( + requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES)) + requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES)) + requires(memory_no_alias(coins, 2 * MLKEM_SYMBYTES)) + assigns(object_whole(pk)) + assigns(object_whole(sk)) +); + +#define crypto_kem_keypair MLKEM_NAMESPACE(keypair) +/************************************************* + * Name: crypto_kem_keypair + * + * Description: Generates public and private key + * for CCA-secure ML-KEM key encapsulation mechanism + * + * Arguments: - uint8_t *pk: pointer to output public key + * (an already allocated array of MLKEM_PUBLICKEYBYTES bytes) + * - uint8_t *sk: pointer to output private key + * (an already allocated array of MLKEM_SECRETKEYBYTES bytes) + * + * Returns 0 (success) + **************************************************/ +int crypto_kem_keypair(uint8_t *pk, uint8_t *sk) +__contract__( + requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES)) + requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES)) + assigns(object_whole(pk)) + assigns(object_whole(sk)) +); + +#define crypto_kem_enc_derand MLKEM_NAMESPACE(enc_derand) +/************************************************* + * Name: crypto_kem_enc_derand + * + * Description: Generates cipher text and shared + * secret for given public key + * + * Arguments: - uint8_t *ct: pointer to output cipher text + * (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes) + * - uint8_t *ss: pointer to output shared secret + * (an already allocated array of MLKEM_SSBYTES bytes) + * - const uint8_t *pk: pointer to input public key + * (an already allocated array of MLKEM_PUBLICKEYBYTES bytes) + * - const uint8_t *coins: pointer to input randomness + * (an already allocated array filled with MLKEM_SYMBYTES random + *bytes) + ** + * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2 + * of FIPS203) fails. + **************************************************/ +int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, + const uint8_t *coins) +__contract__( + requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES)) + requires(memory_no_alias(ss, MLKEM_SSBYTES)) + requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES)) + requires(memory_no_alias(coins, MLKEM_SYMBYTES)) + assigns(object_whole(ct)) + assigns(object_whole(ss)) +); + +#define crypto_kem_enc MLKEM_NAMESPACE(enc) +/************************************************* + * Name: crypto_kem_enc + * + * Description: Generates cipher text and shared + * secret for given public key + * + * Arguments: - uint8_t *ct: pointer to output cipher text + * (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes) + * - uint8_t *ss: pointer to output shared secret + * (an already allocated array of MLKEM_SSBYTES bytes) + * - const uint8_t *pk: pointer to input public key + * (an already allocated array of MLKEM_PUBLICKEYBYTES bytes) + * + * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2 + * of FIPS203) fails. + **************************************************/ +int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) +__contract__( + requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES)) + requires(memory_no_alias(ss, MLKEM_SSBYTES)) + requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES)) + assigns(object_whole(ct)) + assigns(object_whole(ss)) +); + +#define crypto_kem_dec MLKEM_NAMESPACE(dec) +/************************************************* + * Name: crypto_kem_dec + * + * Description: Generates shared secret for given + * cipher text and private key + * + * Arguments: - uint8_t *ss: pointer to output shared secret + * (an already allocated array of MLKEM_SSBYTES bytes) + * - const uint8_t *ct: pointer to input cipher text + * (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes) + * - const uint8_t *sk: pointer to input private key + * (an already allocated array of MLKEM_SECRETKEYBYTES bytes) + * + * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of + * FIPS203) fails. + * + * On failure, ss will contain a pseudo-random value. + **************************************************/ +int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) +__contract__( + requires(memory_no_alias(ss, MLKEM_SSBYTES)) + requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES)) + requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES)) + assigns(object_whole(ss)) +); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.c new file mode 100644 index 0000000000..1844ca19fd --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.c @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "ntt.h" +#include +#include "params.h" +#include "reduce.h" + +#include "arith_native.h" +#include "debug/debug.h" +#include "ntt.h" + +#if !defined(MLKEM_USE_NATIVE_NTT) +/* + * Computes a block CT butterflies with a fixed twiddle factor, + * using Montgomery multiplication. + * Parameters: + * - r: Pointer to base of polynomial (_not_ the base of butterfly block) + * - root: Twiddle factor to use for the butterfly. This must be in + * Montgomery form and signed canonical. + * - start: Offset to the beginning of the butterfly block + * - len: Index difference between coefficients subject to a butterfly + * - bound: Ghost variable describing coefficient bound: Prior to `start`, + * coefficients must be bound by `bound + MLKEM_Q`. Post `start`, + * they must be bound by `bound`. + * When this function returns, output coefficients in the index range + * [start, start+2*len) have bound bumped to `bound + MLKEM_Q`. + * Example: + * - start=8, len=4 + * This would compute the following four butterflies + * 8 -- 12 + * 9 -- 13 + * 10 -- 14 + * 11 -- 15 + * - start=4, len=2 + * This would compute the following two butterflies + * 4 -- 6 + * 5 -- 7 + */ +STATIC_TESTABLE +void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, int len, + int bound) +__contract__( + requires(0 <= start && start < MLKEM_N) + requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N) + requires(0 <= bound && bound < INT16_MAX - MLKEM_Q) + requires(-HALF_Q < zeta && zeta < HALF_Q) + requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) + requires(array_abs_bound(r, 0, start - 1, bound + MLKEM_Q)) + requires(array_abs_bound(r, start, MLKEM_N - 1, bound)) + assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) + ensures(array_abs_bound(r, 0, start + 2*len - 1, bound + MLKEM_Q)) + ensures(array_abs_bound(r, start + 2 * len, MLKEM_N - 1, bound))) +{ + /* `bound` is a ghost variable only needed in the CBMC specification */ + int j; + ((void)bound); + for (j = start; j < start + len; j++) + __loop__( + invariant(start <= j && j <= start + len) + /* + * Coefficients are updated in strided pairs, so the bounds for the + * intermediate states alternate twice between the old and new bound + */ + invariant(array_abs_bound(r, 0, j - 1, bound + MLKEM_Q)) + invariant(array_abs_bound(r, j, start + len - 1, bound)) + invariant(array_abs_bound(r, start + len, j + len - 1, bound + MLKEM_Q)) + invariant(array_abs_bound(r, j + len, MLKEM_N - 1, bound))) + { + int16_t t; + t = fqmul(r[j + len], zeta); + r[j + len] = r[j] - t; + r[j] = r[j] + t; + } +} + +/* + *Compute one layer of forward NTT + * Parameters: + * - r: Pointer to base of polynomial + * - len: Stride of butterflies in this layer. + * - layer: Ghost variable indicating which layer is being applied. + * Must match `len` via `len == MLKEM_N >> layer`. + * Note: `len` could be dropped and computed in the function, but + * we are following the structure of the reference NTT from the + * official Kyber implementation here, merely adding `layer` as + * a ghost variable for the specifications. + */ +STATIC_TESTABLE +void ntt_layer(int16_t r[MLKEM_N], int len, int layer) +__contract__( + requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) + requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer)) + requires(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1)) + assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) + ensures(array_abs_bound(r, 0, MLKEM_N - 1, (layer + 1) * MLKEM_Q - 1))) +{ + int start, k; + /* `layer` is a ghost variable only needed in the CBMC specification */ + ((void)layer); + /* Twiddle factors for layer n start at index 2^(layer-1) */ + k = MLKEM_N / (2 * len); + for (start = 0; start < MLKEM_N; start += 2 * len) + __loop__( + invariant(0 <= start && start < MLKEM_N + 2 * len) + invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) + invariant(array_abs_bound(r, 0, start - 1, (layer * MLKEM_Q - 1) + MLKEM_Q)) + invariant(array_abs_bound(r, start, MLKEM_N - 1, layer * MLKEM_Q - 1))) + { + int16_t zeta = zetas[k++]; + ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q - 1); + } +} + +/* + * Compute full forward NTT + * NOTE: This particular implementation satisfies a much tighter + * bound on the output coefficients (5*q) than the contractual one (8*q), + * but this is not needed in the calling code. Should we change the + * base multiplication strategy to require smaller NTT output bounds, + * the proof may need strengthening. + * REF-CHANGE: Removed indirection poly_ntt -> ntt() + * and integrated polynomial reduction into the NTT. + */ + + +void poly_ntt(poly *p) +{ + int len, layer; + int16_t *r; + POLY_BOUND_MSG(p, MLKEM_Q, "ref ntt input"); + r = p->coeffs; + + for (len = 128, layer = 1; len >= 2; len >>= 1, layer++) + __loop__( + invariant(1 <= layer && layer <= 8 && len == (MLKEM_N >> layer)) + invariant(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1))) + { + ntt_layer(r, len, layer); + } + + /* Check the stronger bound */ + POLY_BOUND_MSG(p, NTT_BOUND, "ref ntt output"); +} +#else /* MLKEM_USE_NATIVE_NTT */ + +/* Check that bound for native NTT implies contractual bound */ +STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound) + +void poly_ntt(poly *p) +{ + POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input"); + ntt_native(p); + POLY_BOUND_MSG(p, NTT_BOUND_NATIVE, "native ntt output"); +} +#endif /* MLKEM_USE_NATIVE_NTT */ + +#if !defined(MLKEM_USE_NATIVE_INTT) + +/* Check that bound for reference invNTT implies contractual bound */ +#define INVNTT_BOUND_REF (3 * MLKEM_Q / 4) +STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound) + +/* Compute one layer of inverse NTT */ +STATIC_TESTABLE +void invntt_layer(int16_t *r, int len, int layer) +__contract__( + requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) + requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7) + requires(len == (1 << (8 - layer))) + requires(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) + ensures(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))) +{ + int start, k; + /* `layer` is a ghost variable used only in the specification */ + ((void)layer); + k = MLKEM_N / len - 1; + for (start = 0; start < MLKEM_N; start += 2 * len) + __loop__( + invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)) + invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */ + invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len)) + { + int j; + int16_t zeta = zetas[k--]; + for (j = start; j < start + len; j++) + __loop__( + invariant(start <= j && j <= start + len) + invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127) + invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))) + { + int16_t t = r[j]; + r[j] = barrett_reduce(t + r[j + len]); + r[j + len] = r[j + len] - t; + r[j + len] = fqmul(r[j + len], zeta); + } + } +} + +void poly_invntt_tomont(poly *p) +{ + /* + * Scale input polynomial to account for Montgomery factor + * and NTT twist. This also brings coefficients down to + * absolute value < MLKEM_Q. + */ + int j, len, layer; + const int16_t f = 1441; + int16_t *r = p->coeffs; + + for (j = 0; j < MLKEM_N; j++) + __loop__( + invariant(0 <= j && j <= MLKEM_N) + invariant(array_abs_bound(r, 0, j - 1, MLKEM_Q))) + { + r[j] = fqmul(r[j], f); + } + + /* Run the invNTT layers */ + for (len = 2, layer = 7; len <= 128; len <<= 1, layer--) + __loop__( + invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer))) + invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))) + { + invntt_layer(p->coeffs, len, layer); + } + + POLY_BOUND_MSG(p, INVNTT_BOUND_REF, "ref intt output"); +} +#else /* MLKEM_USE_NATIVE_INTT */ + +/* Check that bound for native invNTT implies contractual bound */ +STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound) + +void poly_invntt_tomont(poly *p) +{ + intt_native(p); + POLY_BOUND_MSG(p, INVNTT_BOUND_NATIVE, "native intt output"); +} +#endif /* MLKEM_USE_NATIVE_INTT */ + +/************************************************* + * Name: basemul_cached + * + * Description: Multiplication of polynomials in Zq[X]/(X^2-zeta) + * used for multiplication of elements in Rq in NTT domain + * + * Bounds: + * - a is assumed to be < q in absolute value. + * - Return value < 3/2 q in absolute value + * + * Arguments: - int16_t r[2]: pointer to the output polynomial + * - const int16_t a[2]: pointer to the first factor + * - const int16_t b[2]: pointer to the second factor + * - int16_t b_cached: Cached precomputation of b[1] * zeta + **************************************************/ +void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], + int16_t b_cached) +{ + int32_t t0, t1; + + BOUND(a, 2, MLKEM_Q, "basemul input bound"); + + t0 = (int32_t)a[1] * b_cached; + t0 += (int32_t)a[0] * b[0]; + t1 = (int32_t)a[0] * b[1]; + t1 += (int32_t)a[1] * b[0]; + + /* |ti| < 2 * q * 2^15 */ + r[0] = montgomery_reduce(t0); + r[1] = montgomery_reduce(t1); + + /* |r[i]| < 3/2 q */ + BOUND(r, 2, 3 * MLKEM_Q / 2, "basemul output bound"); +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.h new file mode 100644 index 0000000000..0f7b30624b --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef NTT_H +#define NTT_H + +#include +#include "arith_native.h" +#include "cbmc.h" +#include "params.h" +#include "poly.h" +#include "reduce.h" + +#define zetas MLKEM_NAMESPACE(zetas) +extern const int16_t zetas[128]; + +/************************************************* + * Name: poly_ntt + * + * Description: Computes negacyclic number-theoretic transform (NTT) of + * a polynomial in place. + * + * The input is assumed to be in normal order and + * coefficient-wise bound by MLKEM_Q in absolute value. + * + * The output polynomial is in bitreversed order, and + * coefficient-wise bound by NTT_BOUND in absolute value. + * + * (NOTE: Sometimes the input to the NTT is actually smaller, + * which gives better bounds.) + * + * Arguments: - poly *p: pointer to in/output polynomial + **************************************************/ + +#define poly_ntt MLKEM_NAMESPACE(poly_ntt) +void poly_ntt(poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_Q - 1)) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, NTT_BOUND - 1)) +); + +/************************************************* + * Name: poly_invntt_tomont + * + * Description: Computes inverse of negacyclic number-theoretic transform (NTT) + * of a polynomial in place; + * inputs assumed to be in bitreversed order, output in normal + * order + * + * The input is assumed to be in bitreversed order, and can + * have arbitrary coefficients in int16_t. + * + * The output polynomial is in normal order, and + * coefficient-wise bound by INVNTT_BOUND in absolute value. + * + * Arguments: - uint16_t *a: pointer to in/output polynomial + **************************************************/ +#define poly_invntt_tomont MLKEM_NAMESPACE(poly_invntt_tomont) +void poly_invntt_tomont(poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, INVNTT_BOUND - 1)) +); + +#define basemul_cached MLKEM_NAMESPACE(basemul_cached) +/************************************************************ + * Name: basemul_cached + * + * Description: Computes a representative modulo q of + * (a0*b0 + a1*b_cached, a0*b1 + a1*b0)/65536 + * + * If b_cached is b1*zeta, this represents the + * product of (a0 + a1*X) and (b0 + b1*X) in + * Fq[X]/(X^2 - zeta). + * + * Arguments: - r: Pointer to output polynomial + * Upon return, coefficients are bound by + * 3*(q+1)/2 in absolute value. + * - a: Pointer to first input polynomial + * Must be coefficient-wise < q in absolute value. + * - b: Pointer to second input polynomial + * Can have arbitrary int16_t coefficients + * - b_cached: Some precomputed value, typically derived from + * b1 and a twiddle factor. Can be an arbitary int16_t. + ************************************************************/ +void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2], + int16_t b_cached) +__contract__( + requires(memory_no_alias(r, 2 * sizeof(int16_t))) + requires(memory_no_alias(a, 2 * sizeof(int16_t))) + requires(memory_no_alias(b, 2 * sizeof(int16_t))) + requires(array_abs_bound(a, 0, 1, MLKEM_Q - 1)) + assigns(memory_slice(r, 2 * sizeof(int16_t))) + ensures(array_abs_bound(r, 0, 1, (3 * HALF_Q - 1))) +); + + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/params.h new file mode 100644 index 0000000000..da8041267e --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/params.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef PARAMS_H +#define PARAMS_H + +#include "common.h" +#include "cpucap.h" + +#define KECCAK_WAY 4 + +#ifndef MLKEM_K +#define MLKEM_K 3 /* Change this for different security strengths */ +#endif + +/* Don't change parameters below this line */ +#if (MLKEM_K == 2) +#define MLKEM_NAMESPACE(s) PQCP_MLKEM_NATIVE_MLKEM512_##s +#define _MLKEM_NAMESPACE(s) _PQCP_MLKEM_NATIVE_MLKEM512_##s +#elif (MLKEM_K == 3) +#define MLKEM_NAMESPACE(s) PQCP_MLKEM_NATIVE_MLKEM768_##s +#define _MLKEM_NAMESPACE(s) _PQCP_MLKEM_NATIVE_MLKEM768_##s +#elif (MLKEM_K == 4) +#define MLKEM_NAMESPACE(s) PQCP_MLKEM_NATIVE_MLKEM1024_##s +#define _MLKEM_NAMESPACE(s) _PQCP_MLKEM_NATIVE_MLKEM1024_##s +#else +#error "MLKEM_K must be in {2,3,4}" +#endif + +#define MLKEM_N 256 +#define MLKEM_Q 3329 + +#define MLKEM_SYMBYTES 32 /* size in bytes of hashes, and seeds */ +#define MLKEM_SSBYTES 32 /* size in bytes of shared key */ + +#define MLKEM_POLYBYTES 384 +#define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES) + +#if MLKEM_K == 2 +#define MLKEM_ETA1 3 +#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 +#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) +#elif MLKEM_K == 3 +#define MLKEM_ETA1 2 +#define MLKEM_POLYCOMPRESSEDBYTES_DV 128 +#define MLKEM_POLYCOMPRESSEDBYTES_DU 320 +#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) +#elif MLKEM_K == 4 +#define MLKEM_ETA1 2 +#define MLKEM_POLYCOMPRESSEDBYTES_DV 160 +#define MLKEM_POLYCOMPRESSEDBYTES_DU 352 +#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) +#endif + +#define MLKEM_ETA2 2 + +#define MLKEM_INDCPA_MSGBYTES (MLKEM_SYMBYTES) +#define MLKEM_INDCPA_PUBLICKEYBYTES (MLKEM_POLYVECBYTES + MLKEM_SYMBYTES) +#define MLKEM_INDCPA_SECRETKEYBYTES (MLKEM_POLYVECBYTES) +#define MLKEM_INDCPA_BYTES \ + (MLKEM_POLYVECCOMPRESSEDBYTES_DU + MLKEM_POLYCOMPRESSEDBYTES_DV) + +#define MLKEM_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES) +/* 32 bytes of additional space to save H(pk) */ +#define MLKEM_SECRETKEYBYTES \ + (MLKEM_INDCPA_SECRETKEYBYTES + MLKEM_INDCPA_PUBLICKEYBYTES + \ + 2 * MLKEM_SYMBYTES) +#define MLKEM_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES) + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.c new file mode 100644 index 0000000000..93a663c12b --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.c @@ -0,0 +1,568 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "poly.h" +#include +#include +#include "cbd.h" +#include "cbmc.h" +#include "fips202x4.h" +#include "ntt.h" +#include "params.h" +#include "reduce.h" +#include "symmetric.h" +#include "verify.h" + +#include "arith_native.h" +#include "debug/debug.h" + +void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) +{ + int j; +#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) + for (j = 0; j < MLKEM_N / 8; j++) + __loop__(invariant(j >= 0 && j <= MLKEM_N / 8)) + { + int k; + uint16_t t[8]; + for (k = 0; k < 8; k++) + __loop__( + invariant(k >= 0 && k <= 8) + invariant(forall(int, r, 0, k - 1, t[r] < (1u << 11)))) + { + t[k] = scalar_compress_d11(a->coeffs[8 * j + k]); + } + + /* + * REF-CHANGE: Use array indexing into + * r rather than pointer-arithmetic to simplify verification + * Make all implicit truncation explicit. No data is being + * truncated for the LHS's since each t[i] is 11-bit in size. + */ + r[11 * j + 0] = (t[0] >> 0) & 0xFF; + r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF); + r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF); + r[11 * j + 3] = (t[2] >> 2) & 0xFF; + r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF); + r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF); + r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF); + r[11 * j + 7] = (t[5] >> 1) & 0xFF; + r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF); + r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF); + r[11 * j + 10] = (t[7] >> 3); + } + +#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) + for (j = 0; j < MLKEM_N / 4; j++) + __loop__(invariant(j >= 0 && j <= MLKEM_N / 4)) + { + int k; + uint16_t t[4]; + for (k = 0; k < 4; k++) + __loop__( + invariant(k >= 0 && k <= 4) + invariant(forall(int, r, 0, k - 1, t[r] < (1u << 10)))) + { + t[k] = scalar_compress_d10(a->coeffs[4 * j + k]); + } + + /* + * REF-CHANGE: Use array indexing into + * r rather than pointer-arithmetic to simplify verification + * Make all implicit truncation explicit. No data is being + * truncated for the LHS's since each t[i] is 10-bit in size. + */ + r[5 * j + 0] = (t[0] >> 0) & 0xFF; + r[5 * j + 1] = (t[0] >> 8) | ((t[1] << 2) & 0xFF); + r[5 * j + 2] = (t[1] >> 6) | ((t[2] << 4) & 0xFF); + r[5 * j + 3] = (t[2] >> 4) | ((t[3] << 6) & 0xFF); + r[5 * j + 4] = (t[3] >> 2); + } +#else +#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" +#endif +} + + +void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +{ + int j; +#if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352) + for (j = 0; j < MLKEM_N / 8; j++) + __loop__( + invariant(0 <= j && j <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, 8 * j - 1, 0, (MLKEM_Q - 1)))) + { + int k; + uint16_t t[8]; + uint8_t const *base = &a[11 * j]; + t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); + t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); + t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | + ((uint16_t)base[4] << 10)); + t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); + t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); + t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | + ((uint16_t)base[8] << 9)); + t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); + t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); + + for (k = 0; k < 8; k++) + __loop__( + invariant(0 <= k && k <= 8) + invariant(array_bound(r->coeffs, 0, 8 * j + k - 1, 0, (MLKEM_Q - 1)))) + { + r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]); + } + } +#elif (MLKEM_POLYCOMPRESSEDBYTES_DU == 320) + for (j = 0; j < MLKEM_N / 4; j++) + __loop__( + invariant(0 <= j && j <= MLKEM_N / 4) + invariant(array_bound(r->coeffs, 0, 4 * j - 1, 0, (MLKEM_Q - 1)))) + { + int k; + uint16_t t[4]; + uint8_t const *base = &a[5 * j]; + + t[0] = 0x3FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); + t[1] = 0x3FF & ((base[1] >> 2) | ((uint16_t)base[2] << 6)); + t[2] = 0x3FF & ((base[2] >> 4) | ((uint16_t)base[3] << 4)); + t[3] = 0x3FF & ((base[3] >> 6) | ((uint16_t)base[4] << 2)); + + for (k = 0; k < 4; k++) + __loop__( + invariant(0 <= k && k <= 4) + invariant(array_bound(r->coeffs, 0, 4 * j + k - 1, 0, (MLKEM_Q - 1)))) + { + r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]); + } + } +#else +#error "MLKEM_POLYCOMPRESSEDBYTES_DU needs to be in {320,352}" +#endif +} + +void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) +{ + int i; + POLY_UBOUND(a, MLKEM_Q); + +#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + { + int j; + uint8_t t[8] = {0}; + for (j = 0; j < 8; j++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(array_bound(t, 0, (j-1), 0, 15))) + { + /* REF-CHANGE: Precondition change, we assume unsigned canonical data */ + t[j] = scalar_compress_d4(a->coeffs[8 * i + j]); + } + + /* + * REF-CHANGE: Use array indexing into + * r rather than pointer-arithmetic to simplify verification + */ + r[i * 4] = t[0] | (t[1] << 4); + r[i * 4 + 1] = t[2] | (t[3] << 4); + r[i * 4 + 2] = t[4] | (t[5] << 4); + r[i * 4 + 3] = t[6] | (t[7] << 4); + } +#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + { + int j; + uint8_t t[8] = {0}; + for (j = 0; j < 8; j++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(array_bound(t, 0, (j-1), 0, 31))) + { + /* REF-CHANGE: Precondition change, we assume unsigned canonical data */ + t[j] = scalar_compress_d5(a->coeffs[8 * i + j]); + } + + /* + * REF-CHANGE: Explicitly truncate to avoid warning about + * implicit truncation in CBMC, and use array indexing into + * r rather than pointer-arithmetic to simplify verification + */ + r[i * 5] = 0xFF & ((t[0] >> 0) | (t[1] << 5)); + r[i * 5 + 1] = 0xFF & ((t[1] >> 3) | (t[2] << 2) | (t[3] << 7)); + r[i * 5 + 2] = 0xFF & ((t[3] >> 1) | (t[4] << 4)); + r[i * 5 + 3] = 0xFF & ((t[4] >> 4) | (t[5] << 1) | (t[6] << 6)); + r[i * 5 + 4] = 0xFF & ((t[6] >> 2) | (t[7] << 3)); + } +#else +#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" +#endif +} + +void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +{ + int i; +#if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128) + for (i = 0; i < MLKEM_N / 2; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 2) + invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, (MLKEM_Q - 1)))) + { + /* REF-CHANGE: Hoist scalar decompression into separate function */ + r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF); + r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF); + } +#elif (MLKEM_POLYCOMPRESSEDBYTES_DV == 160) + for (i = 0; i < MLKEM_N / 8; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1)))) + { + int j; + uint8_t t[8]; + const int offset = i * 5; + /* + * REF-CHANGE: Explicitly truncate to avoid warning about + * implicit truncation in CBMC and unwind loop for ease + * of proof. + */ + + /* + * Decompress 5 8-bit bytes (so 40 bits) into + * 8 5-bit values stored in t[] + */ + t[0] = 0x1F & (a[offset + 0] >> 0); + t[1] = 0x1F & ((a[offset + 0] >> 5) | (a[offset + 1] << 3)); + t[2] = 0x1F & (a[offset + 1] >> 2); + t[3] = 0x1F & ((a[offset + 1] >> 7) | (a[offset + 2] << 1)); + t[4] = 0x1F & ((a[offset + 2] >> 4) | (a[offset + 3] << 4)); + t[5] = 0x1F & (a[offset + 3] >> 1); + t[6] = 0x1F & ((a[offset + 3] >> 6) | (a[offset + 4] << 2)); + t[7] = 0x1F & (a[offset + 4] >> 3); + + /* and copy to the correct slice in r[] */ + for (j = 0; j < 8; j++) + __loop__( + invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1)))) + { + /* REF-CHANGE: Hoist scalar decompression into separate function */ + r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]); + } + } +#else +#error "MLKEM_POLYCOMPRESSEDBYTES_DV needs to be in {128, 160}" +#endif + + POLY_UBOUND(r, MLKEM_Q); +} + +#if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES) +void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) +{ + unsigned int i; + POLY_UBOUND(a, MLKEM_Q); + + + for (i = 0; i < MLKEM_N / 2; i++) + __loop__(invariant(i >= 0 && i <= MLKEM_N / 2)) + { + const uint16_t t0 = a->coeffs[2 * i]; + const uint16_t t1 = a->coeffs[2 * i + 1]; + /* REF-CHANGE: Precondition change, we assume unsigned canonical data */ + + /* + * t0 and t1 are both < MLKEM_Q, so contain at most 12 bits each of + * significant data, so these can be packed into 24 bits or exactly + * 3 bytes, as follows. + */ + + /* Least significant bits 0 - 7 of t0. */ + r[3 * i + 0] = t0 & 0xFF; + + /* + * Most significant bits 8 - 11 of t0 become the least significant + * nibble of the second byte. The least significant 4 bits + * of t1 become the upper nibble of the second byte. + */ + r[3 * i + 1] = (t0 >> 8) | ((t1 << 4) & 0xF0); + + /* Bits 4 - 11 of t1 become the third byte. */ + r[3 * i + 2] = t1 >> 4; + } +} +#else /* MLKEM_USE_NATIVE_POLY_TOBYTES */ +void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) +{ + POLY_UBOUND(a, MLKEM_Q); + poly_tobytes_native(r, a); +} +#endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */ + +#if !defined(MLKEM_USE_NATIVE_POLY_FROMBYTES) +void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) +{ + int i; + for (i = 0; i < MLKEM_N / 2; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 2) + invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, 4095))) + { + /* REF-CHANGE: Introduce some locals for better readability */ + const uint8_t t0 = a[3 * i + 0]; + const uint8_t t1 = a[3 * i + 1]; + const uint8_t t2 = a[3 * i + 2]; + r->coeffs[2 * i + 0] = t0 | ((t1 << 8) & 0xFFF); + r->coeffs[2 * i + 1] = (t1 >> 4) | (t2 << 4); + } + + /* Note that the coefficients are not canonical */ + POLY_UBOUND(r, 4096); +} +#else /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ +void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) +{ + poly_frombytes_native(r, a); +} +#endif /* MLKEM_USE_NATIVE_POLY_FROMBYTES */ + +void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) +{ + int i; +#if (MLKEM_INDCPA_MSGBYTES != MLKEM_N / 8) +#error "MLKEM_INDCPA_MSGBYTES must be equal to MLKEM_N/8 bytes!" +#endif + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1)))) + { + int j; + for (j = 0; j < 8; j++) + __loop__( + invariant(i >= 0 && i < MLKEM_N / 8 && j >= 0 && j <= 8) + invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1)))) + { + /* Prevent the compiler from recognizing this as a bit selection */ + uint8_t mask = value_barrier_u8(1u << j); + r->coeffs[8 * i + j] = ct_sel_int16(HALF_Q, 0, msg[i] & mask); + } + } + POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output"); +} + +void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a) +{ + int i; + POLY_UBOUND(a, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i >= 0 && i <= MLKEM_N / 8)) + { + int j; + msg[i] = 0; + for (j = 0; j < 8; j++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)) + { + uint32_t t = scalar_compress_d1(a->coeffs[8 * i + j]); + msg[i] |= t << j; + } + } +} + +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +{ + ALIGN uint8_t buf[KECCAK_WAY][MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; + memcpy(extkey[0], seed, MLKEM_SYMBYTES); + memcpy(extkey[1], seed, MLKEM_SYMBYTES); + memcpy(extkey[2], seed, MLKEM_SYMBYTES); + memcpy(extkey[3], seed, MLKEM_SYMBYTES); + extkey[0][MLKEM_SYMBYTES] = nonce0; + extkey[1][MLKEM_SYMBYTES] = nonce1; + extkey[2][MLKEM_SYMBYTES] = nonce2; + extkey[3][MLKEM_SYMBYTES] = nonce3; + shake256x4(buf[0], buf[1], buf[2], buf[3], MLKEM_ETA1 * MLKEM_N / 4, + extkey[0], extkey[1], extkey[2], extkey[3], MLKEM_SYMBYTES + 1); + poly_cbd_eta1(r0, buf[0]); + poly_cbd_eta1(r1, buf[1]); + poly_cbd_eta1(r2, buf[2]); + poly_cbd_eta1(r3, buf[3]); + + POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0"); + POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1"); + POLY_BOUND_MSG(r2, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 2"); + POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3"); +} + +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +{ + ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; + prf(buf, sizeof(buf), seed, nonce); + poly_cbd_eta2(r, buf); + + POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output"); +} + +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +{ + ALIGN uint8_t buf1[KECCAK_WAY / 2][MLKEM_ETA1 * MLKEM_N / 4]; + ALIGN uint8_t buf2[KECCAK_WAY / 2][MLKEM_ETA2 * MLKEM_N / 4]; + ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1]; + memcpy(extkey[0], seed, MLKEM_SYMBYTES); + memcpy(extkey[1], seed, MLKEM_SYMBYTES); + memcpy(extkey[2], seed, MLKEM_SYMBYTES); + memcpy(extkey[3], seed, MLKEM_SYMBYTES); + extkey[0][MLKEM_SYMBYTES] = nonce0; + extkey[1][MLKEM_SYMBYTES] = nonce1; + extkey[2][MLKEM_SYMBYTES] = nonce2; + extkey[3][MLKEM_SYMBYTES] = nonce3; + +#if MLKEM_ETA1 == MLKEM_ETA2 + shake256x4(buf1[0], buf1[1], buf2[0], buf2[1], MLKEM_ETA1 * MLKEM_N / 4, + extkey[0], extkey[1], extkey[2], extkey[3], MLKEM_SYMBYTES + 1); +#else + shake256(buf1[0], sizeof(buf1[0]), extkey[0], sizeof(extkey[0])); + shake256(buf1[1], sizeof(buf1[1]), extkey[1], sizeof(extkey[1])); + shake256(buf2[0], sizeof(buf2[0]), extkey[2], sizeof(extkey[2])); + shake256(buf2[1], sizeof(buf2[1]), extkey[3], sizeof(extkey[3])); +#endif + + poly_cbd_eta1(r0, buf1[0]); + poly_cbd_eta1(r1, buf1[1]); + poly_cbd_eta2(r2, buf2[0]); + poly_cbd_eta2(r3, buf2[1]); + + POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 0"); + POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1122_4x output 1"); + POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2"); + POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3"); +} + +void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, + const poly_mulcache *b_cache) +{ + int i; + POLY_BOUND(b_cache, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 4; i++) + __loop__( + assigns(i, object_whole(r)) + invariant(i >= 0 && i <= MLKEM_N / 4) + invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), (3 * HALF_Q - 1)))) + { + basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i], + b_cache->coeffs[2 * i]); + basemul_cached(&r->coeffs[4 * i + 2], &a->coeffs[4 * i + 2], + &b->coeffs[4 * i + 2], b_cache->coeffs[2 * i + 1]); + } +} + +#if !defined(MLKEM_USE_NATIVE_POLY_TOMONT) +void poly_tomont(poly *r) +{ + int i; + const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */ + for (i = 0; i < MLKEM_N; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N) + invariant(array_abs_bound(r->coeffs ,0, (i - 1), (MLKEM_Q - 1)))) + { + r->coeffs[i] = fqmul(r->coeffs[i], f); + } + + POLY_BOUND(r, MLKEM_Q); +} +#else /* MLKEM_USE_NATIVE_POLY_TOMONT */ +void poly_tomont(poly *r) +{ + poly_tomont_native(r); + POLY_BOUND(r, MLKEM_Q); +} +#endif /* MLKEM_USE_NATIVE_POLY_TOMONT */ + +#if !defined(MLKEM_USE_NATIVE_POLY_REDUCE) +void poly_reduce(poly *r) +{ + int i; + for (i = 0; i < MLKEM_N; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N) + invariant(array_bound(r->coeffs, 0, (i - 1), 0, (MLKEM_Q - 1)))) + { + /* Barrett reduction, giving signed canonical representative */ + int16_t t = barrett_reduce(r->coeffs[i]); + /* Conditional addition to get unsigned canonical representative */ + r->coeffs[i] = scalar_signed_to_unsigned_q(t); + } + + POLY_UBOUND(r, MLKEM_Q); +} +#else /* MLKEM_USE_NATIVE_POLY_REDUCE */ +void poly_reduce(poly *r) +{ + poly_reduce_native(r); + POLY_UBOUND(r, MLKEM_Q); +} +#endif /* MLKEM_USE_NATIVE_POLY_REDUCE */ + +void poly_add(poly *r, const poly *b) +{ + int i; + for (i = 0; i < MLKEM_N; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N) + invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) + invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1]))) + { + r->coeffs[i] = r->coeffs[i] + b->coeffs[i]; + } +} + +void poly_sub(poly *r, const poly *b) +{ + int i; + for (i = 0; i < MLKEM_N; i++) + __loop__( + invariant(i >= 0 && i <= MLKEM_N) + invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) + invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1]))) + { + r->coeffs[i] = r->coeffs[i] - b->coeffs[i]; + } +} + +#if !defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE) +void poly_mulcache_compute(poly_mulcache *x, const poly *a) +{ + int i; + for (i = 0; i < MLKEM_N / 4; i++) + __loop__(invariant(i >= 0 && i <= MLKEM_N / 4)) + { + x->coeffs[2 * i + 0] = fqmul(a->coeffs[4 * i + 1], zetas[64 + i]); + x->coeffs[2 * i + 1] = fqmul(a->coeffs[4 * i + 3], -zetas[64 + i]); + } + POLY_BOUND(x, MLKEM_Q); +} +#else /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ +void poly_mulcache_compute(poly_mulcache *x, const poly *a) +{ + poly_mulcache_compute_native(x, a); + /* Omitting POLY_BOUND(x, MLKEM_Q) since native implementations may + * decide not to use a mulcache. Note that the C backend implementation + * of poly_basemul_montgomery_cached() does still include the check. */ +} +#endif /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.h new file mode 100644 index 0000000000..35990684b6 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.h @@ -0,0 +1,773 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef POLY_H +#define POLY_H + +#include +#include +#include "cbmc.h" +#include "params.h" +#include "reduce.h" +#include "verify.h" + +/* Absolute exclusive upper bound for the output of the inverse NTT */ +#define INVNTT_BOUND (8 * MLKEM_Q) + +/* Absolute exclusive upper bound for the output of the forward NTT */ +#define NTT_BOUND (8 * MLKEM_Q) + +/* + * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial + * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1] + */ +typedef struct +{ + int16_t coeffs[MLKEM_N]; +} ALIGN poly; + +/* + * INTERNAL presentation of precomputed data speeding up + * the base multiplication of two polynomials in NTT domain. + */ +/* + * REF-CHANGE: This structure does not exist in the reference + * implementation. + */ +typedef struct +{ + int16_t coeffs[MLKEM_N >> 1]; +} poly_mulcache; + +/************************************************************ + * Name: scalar_compress_d1 + * + * Description: Computes round(u * 2 / q) + * + * Implements Compress_d from FIPS203, Eq (4.7), + * for d = 1. + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif +static INLINE uint32_t scalar_compress_d1(uint16_t u) +__contract__( + requires(u <= MLKEM_Q - 1) + ensures(return_value < 2) + ensures(return_value == (((uint32_t)u * 2 + MLKEM_Q / 2) / MLKEM_Q) % 2) ) +{ + uint32_t d0 = u << 1; + d0 *= 645083; + d0 += 1u << 30; + d0 >>= 31; + return d0; +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: scalar_compress_d4 + * + * Description: Computes round(u * 16 / q) % 16 + * + * Implements Compress_d from FIPS203, Eq (4.7), + * for d = 4. + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif +static INLINE uint32_t scalar_compress_d4(uint16_t u) +__contract__( + requires(u <= MLKEM_Q - 1) + ensures(return_value < 16) + ensures(return_value == (((uint32_t)u * 16 + MLKEM_Q / 2) / MLKEM_Q) % 16)) +{ + uint32_t d0 = (uint32_t)u * 1290160; /* 16 * round(2^28 / MLKEM_Q) */ + return (d0 + (1u << 27)) >> 28; /* round(d0/2^28) */ +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: scalar_decompress_d4 + * + * Description: Computes round(u * q / 16) + * + * Implements Decompress_d from FIPS203, Eq (4.8), + * for d = 4. + * + * Arguments: - u: Unsigned canonical modulus modulo 16 + * to be decompressed. + ************************************************************/ +static INLINE uint16_t scalar_decompress_d4(uint32_t u) +__contract__( + requires(0 <= u && u < 16) + ensures(return_value <= (MLKEM_Q - 1)) +) { return ((u * MLKEM_Q) + 8) / 16; } + +/************************************************************ + * Name: scalar_compress_d5 + * + * Description: Computes round(u * 32 / q) % 32 + * + * Implements Compress_d from FIPS203, Eq (4.7), + * for d = 5. + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif +static INLINE uint32_t scalar_compress_d5(uint16_t u) +__contract__( + requires(u <= MLKEM_Q - 1) + ensures(return_value < 32) + ensures(return_value == (((uint32_t)u * 32 + MLKEM_Q / 2) / MLKEM_Q) % 32) ) +{ + uint32_t d0 = (uint32_t)u * 1290176; /* 2^5 * round(2^27 / MLKEM_Q) */ + return (d0 + (1u << 26)) >> 27; /* round(d0/2^27) */ +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: scalar_decompress_d5 + * + * Description: Computes round(u * q / 32) + * + * Implements Decompress_d from FIPS203, Eq (4.8), + * for d = 5. + * + * Arguments: - u: Unsigned canonical modulus modulo 32 + * to be decompressed. + ************************************************************/ +static INLINE uint16_t scalar_decompress_d5(uint32_t u) +__contract__( + requires(0 <= u && u < 32) + ensures(return_value <= MLKEM_Q - 1) +) { return ((u * MLKEM_Q) + 16) / 32; } + +/************************************************************ + * Name: scalar_compress_d10 + * + * Description: Computes round(u * 2**10 / q) % 2**10 + * + * Implements Compress_d from FIPS203, Eq (4.7), + * for d = 10. + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif +static INLINE uint32_t scalar_compress_d10(uint16_t u) +__contract__( + requires(u <= MLKEM_Q - 1) + ensures(return_value < (1u << 10)) + ensures(return_value == (((uint32_t)u * (1u << 10) + MLKEM_Q / 2) / MLKEM_Q) % (1 << 10))) +{ + uint64_t d0 = (uint64_t)u * 2642263040; /* 2^10 * round(2^32 / MLKEM_Q) */ + d0 = (d0 + ((uint64_t)1u << 32)) >> 33; + return (d0 & 0x3FF); +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: scalar_decompress_d10 + * + * Description: Computes round(u * q / 1024) + * + * Implements Decompress_d from FIPS203, Eq (4.8), + * for d = 10. + * + * Arguments: - u: Unsigned canonical modulus modulo 16 + * to be decompressed. + ************************************************************/ +static INLINE uint16_t scalar_decompress_d10(uint32_t u) +__contract__( + requires(0 <= u && u < 1024) + ensures(return_value <= (MLKEM_Q - 1)) +) { return ((u * MLKEM_Q) + 512) / 1024; } + +/************************************************************ + * Name: scalar_compress_d11 + * + * Description: Computes round(u * 2**11 / q) % 2**11 + * + * Implements Compress_d from FIPS203, Eq (4.7), + * for d = 11. + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif +static INLINE uint32_t scalar_compress_d11(uint16_t u) +__contract__( + requires(u <= MLKEM_Q - 1) + ensures(return_value < (1u << 11)) + ensures(return_value == (((uint32_t)u * (1u << 11) + MLKEM_Q / 2) / MLKEM_Q) % (1 << 11))) +{ + uint64_t d0 = (uint64_t)u * 5284526080; /* 2^11 * round(2^33 / MLKEM_Q) */ + d0 = (d0 + ((uint64_t)1u << 32)) >> 33; + return (d0 & 0x7FF); +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: scalar_decompress_d11 + * + * Description: Computes round(u * q / 1024) + * + * Implements Decompress_d from FIPS203, Eq (4.8), + * for d = 10. + * + * Arguments: - u: Unsigned canonical modulus modulo 16 + * to be decompressed. + ************************************************************/ +static INLINE uint16_t scalar_decompress_d11(uint32_t u) +__contract__( + requires(0 <= u && u < 2048) + ensures(return_value <= (MLKEM_Q - 1)) +) { return ((u * MLKEM_Q) + 1024) / 2048; } + +/************************************************************ + * Name: scalar_signed_to_unsigned_q + * + * Description: converts signed polynomial coefficient + * from signed (-3328 .. 3328) form to + * unsigned form (0 .. 3328). + * + * Note: Cryptographic constant time implementation + * + * Examples: 0 -> 0 + * 1 -> 1 + * 3328 -> 3328 + * -1 -> 3328 + * -2 -> 3327 + * -3328 -> 1 + * + * Arguments: c: signed coefficient to be converted + ************************************************************/ +static INLINE uint16_t scalar_signed_to_unsigned_q(int16_t c) +__contract__( + requires(c >= -(MLKEM_Q - 1) && c <= (MLKEM_Q - 1)) + ensures(return_value >= 0 && return_value <= (MLKEM_Q - 1)) + ensures(return_value == (int32_t)c + (((int32_t)c < 0) * MLKEM_Q))) +{ + /* Add Q if c is negative, but in constant time */ + c = ct_sel_int16(c + MLKEM_Q, c, ct_cmask_neg_i16(c)); + + cassert(c >= 0, "scalar_signed_to_unsigned_q result lower bound"); + cassert(c < MLKEM_Q, "scalar_signed_to_unsigned_q result upper bound"); + + /* and therefore cast to uint16_t is safe. */ + return (uint16_t)c; +} + +#define poly_compress_du MLKEM_NAMESPACE(poly_compress_du) +/************************************************* + * Name: poly_compress_du + * + * Description: Compression (du bits) and subsequent serialization of a + *polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) +); + +#define poly_decompress_du MLKEM_NAMESPACE(poly_decompress_du) +/************************************************* + * Name: poly_decompress_du + * + * Description: De-serialization and subsequent decompression (du bits) of a + *polynomial; approximate inverse of poly_compress_du + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) +); + +#define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv) +/************************************************* + * Name: poly_compress_dv + * + * Description: Compression (dv bits) and subsequent serialization of a + *polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV) + * - const poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) + assigns(object_whole(r)) +); + +#define poly_decompress_dv MLKEM_NAMESPACE(poly_decompress_dv) +/************************************************* + * Name: poly_decompress_dv + * + * Description: De-serialization and subsequent decompression (dv bits) of a + *polynomial; approximate inverse of poly_compress + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV + *bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + **************************************************/ +void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(object_whole(r)) + ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) +); + +#define poly_tobytes MLKEM_NAMESPACE(poly_tobytes) +/************************************************* + * Name: poly_tobytes + * + * Description: Serialization of a polynomial. + * Signed coefficients are converted to + * unsigned form before serialization. + * + * Arguments: INPUT: + * - a: const pointer to input polynomial, + * with each coefficient in the range [0,1,..,Q-1] + * OUTPUT + * - r: pointer to output byte array + * (of MLKEM_POLYBYTES bytes) + **************************************************/ +void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYBYTES)) + requires(memory_no_alias(a, sizeof(poly))) + requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) + assigns(object_whole(r)) +); + + +#define poly_frombytes MLKEM_NAMESPACE(poly_frombytes) +/************************************************* + * Name: poly_frombytes + * + * Description: De-serialization of a polynomial. + * + * Arguments: INPUT + * - a: pointer to input byte array + * (of MLKEM_POLYBYTES bytes) + * OUTPUT + * - r: pointer to output polynomial, with + * each coefficient unsigned and in the range + * 0 .. 4095 + **************************************************/ +void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYBYTES)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, 4095)) +); + + +#define poly_frommsg MLKEM_NAMESPACE(poly_frommsg) +/************************************************* + * Name: poly_frommsg + * + * Description: Convert 32-byte message to polynomial + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *msg: pointer to input message + **************************************************/ +void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) +__contract__( + requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES)) + requires(memory_no_alias(r, sizeof(poly))) + assigns(object_whole(r)) + ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) +); + +#define poly_tomsg MLKEM_NAMESPACE(poly_tomsg) +/************************************************* + * Name: poly_tomsg + * + * Description: Convert polynomial to 32-byte message + * + * Arguments: - uint8_t *msg: pointer to output message + * - const poly *r: pointer to input polynomial + * Coefficients must be unsigned canonical + **************************************************/ +void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *r) +__contract__( + requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES)) + requires(memory_no_alias(r, sizeof(poly))) + requires(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))) + assigns(object_whole(msg)) +); + +#define poly_getnoise_eta1_4x MLKEM_NAMESPACE(poly_getnoise_eta1_4x) +/************************************************* + * Name: poly_getnoise_eta1_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and nonces, with output polynomials close to centered binomial distribution + * with parameter MLKEM_ETA1. + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0, + uint8_t nonce1, uint8_t nonce2, uint8_t nonce3) +/* Depending on MLKEM_K, the pointers passed to this function belong + to the same objects, so we cannot use memory_no_alias for r0-r3. + + NOTE: Somehow it is important to use memory_no_alias() first in the + conjunctions defining each case. +*/ +#if MLKEM_K == 2 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case A: r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)); +); +#elif MLKEM_K == 4 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case B: r0, r1, r2, r3 consecutive */ + (memory_no_alias(r0, 4 * sizeof(poly)) && r1 == r0 + 1 && r2 == r0 + 2 && r3 == r0 + 3)) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)); +); +#elif MLKEM_K == 3 +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires( /* Case C: r0, r1, r2 consecutive */ + (memory_no_alias(r0, 3 * sizeof(poly)) && memory_no_alias(r3, 1 * sizeof(poly)) && + r1 == r0 + 1 && r2 == r0 + 2 && !same_object(r3, r0))) + assigns(memory_slice(r0, sizeof(poly))) + assigns(memory_slice(r1, sizeof(poly))) + assigns(memory_slice(r2, sizeof(poly))) + assigns(memory_slice(r3, sizeof(poly))) + ensures( + array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)); +); +#endif /* MLKEM_K */ + +#if MLKEM_ETA1 == MLKEM_ETA2 +/* + * We only require poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 + * where MLKEM_ETA2 = MLKEM_ETA1 = 2. + * For ml-kem-512, poly_getnoise_eta1122_4x is used instead. + */ +#define poly_getnoise_eta2_4x poly_getnoise_eta1_4x +#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ + +#define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2) +/************************************************* + * Name: poly_getnoise_eta2 + * + * Description: Sample a polynomial deterministically from a seed and a nonce, + * with output polynomial close to centered binomial distribution + * with parameter MLKEM_ETA2 + * + * Arguments: - poly *r: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce: one-byte input nonce + **************************************************/ +void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r)) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2)) +); + +#define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x) +/************************************************* + * Name: poly_getnoise_eta1122_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and a nonces, with output polynomials close to centered binomial + * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 + * + * Arguments: - poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + **************************************************/ +void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +__contract__( + requires( /* r0, r1 consecutive, r2, r3 consecutive */ + (memory_no_alias(r0, 2 * sizeof(poly)) && memory_no_alias(r2, 2 * sizeof(poly)) && + r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3)) + ensures(array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1) + && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA2) + && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA2)); +); + +#define poly_basemul_montgomery_cached \ + MLKEM_NAMESPACE(poly_basemul_montgomery_cached) +/************************************************* + * Name: poly_basemul_montgomery_cached + * + * Description: Multiplication of two polynomials in NTT domain, + * using mulcache for second operand. + * + * Bounds: + * - a is assumed to be coefficient-wise < q in absolute value. + * + * The result is coefficient-wise bound by 3/2 q in absolute + * value. + * + * Arguments: - poly *r: pointer to output polynomial + * - const poly *a: pointer to first input polynomial + * - const poly *b: pointer to second input polynomial + * - const poly_mulcache *b_cache: pointer to mulcache + * for second input polynomial. Can be computed + * via poly_mulcache_compute(). + **************************************************/ +void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b, + const poly_mulcache *b_cache) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(a, sizeof(poly))) + requires(memory_no_alias(b, sizeof(poly))) + requires(memory_no_alias(b_cache, sizeof(poly_mulcache))) + requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))) + assigns(object_whole(r)) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (3 * HALF_Q - 1))) +); + +#define poly_tomont MLKEM_NAMESPACE(poly_tomont) +/************************************************* + * Name: poly_tomont + * + * Description: Inplace conversion of all coefficients of a polynomial + * from normal domain to Montgomery domain + * + * Bounds: Output < q in absolute value. + * + * Arguments: - poly *r: pointer to input/output polynomial + **************************************************/ +void poly_tomont(poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))) +); + +/* REF-CHANGE: This function does not exist in the reference implementation */ +#define poly_mulcache_compute MLKEM_NAMESPACE(poly_mulcache_compute) +/************************************************************ + * Name: poly_mulcache_compute + * + * Description: Computes the mulcache for a polynomial in NTT domain + * + * The mulcache of a degree-2 polynomial b := b0 + b1*X + * in Fq[X]/(X^2-zeta) is the value b1*zeta, needed when + * computing products of b in Fq[X]/(X^2-zeta). + * + * The mulcache of a polynomial in NTT domain -- which is + * a 128-tuple of degree-2 polynomials in Fq[X]/(X^2-zeta), + * for varying zeta, is the 128-tuple of mulcaches of those + * polynomials. + * + * Arguments: - x: Pointer to mulcache to be populated + * - a: Pointer to input polynomial + ************************************************************/ +/* + * NOTE: The default C implementation of this function populates + * the mulcache with values in (-q,q), but this is not needed for the + * higher level safety proofs, and thus not part of the spec. + */ +void poly_mulcache_compute(poly_mulcache *x, const poly *a) +__contract__( + requires(memory_no_alias(x, sizeof(poly_mulcache))) + requires(memory_no_alias(a, sizeof(poly))) + assigns(object_whole(x)) +); + +#define poly_reduce MLKEM_NAMESPACE(poly_reduce) +/************************************************* + * Name: poly_reduce + * + * Description: Converts polynomial to _unsigned canonical_ representatives. + * + * The input coefficients can be arbitrary integers in int16_t. + * The output coefficients are in [0,1,...,MLKEM_Q-1]. + * + * Arguments: - poly *r: pointer to input/output polynomial + **************************************************/ +/* + * REF-CHANGE: The semantics of poly_reduce() is different in + * the reference implementation, which requires + * signed canonical output data. Unsigned canonical + * outputs are better suited to the only remaining + * use of poly_reduce() in the context of (de)serialization. + */ +void poly_reduce(poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + assigns(memory_slice(r, sizeof(poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))) +); + +#define poly_add MLKEM_NAMESPACE(poly_add) +/************************************************************ + * Name: poly_add + * + * Description: Adds two polynomials in place + * + * Arguments: - r: Pointer to input-output polynomial to be added to. + * - b: Pointer to input polynomial that should be added + * to r. Must be disjoint from r. + * + * The coefficients of r and b must be so that the addition does + * not overflow. Otherwise, the behaviour of this function is undefined. + * + ************************************************************/ +/* + * REF-CHANGE: + * The reference implementation uses a 3-argument poly_add. + * We specialize to the accumulator form to avoid reasoning about aliasing. + */ +void poly_add(poly *r, const poly *b) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(b, sizeof(poly))) + requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX)) + requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN)) + ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k])) + assigns(memory_slice(r, sizeof(poly))) +); + +#define poly_sub MLKEM_NAMESPACE(poly_sub) +/************************************************* + * Name: poly_sub + * + * Description: Subtract two polynomials; no modular reduction is performed + * + * Arguments: - poly *r: Pointer to input-output polynomial to be added + *to. + * - const poly *b: Pointer to second input polynomial + **************************************************/ +/* + * REF-CHANGE: + * The reference implementation uses a 3-argument poly_sub. + * We specialize to the accumulator form to avoid reasoning about aliasing. + */ +void poly_sub(poly *r, const poly *b) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(b, sizeof(poly))) + requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX)) + requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN)) + ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k])) + assigns(object_whole(r)) +); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.c new file mode 100644 index 0000000000..5e4dd0c5c4 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "polyvec.h" +#include +#include "arith_native.h" +#include "config.h" +#include "ntt.h" +#include "params.h" +#include "poly.h" + +#include "debug/debug.h" +void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU], + const polyvec *a) +{ + unsigned int i; + POLYVEC_UBOUND(a, MLKEM_Q); + + for (i = 0; i < MLKEM_K; i++) + { + poly_compress_du(r + i * MLKEM_POLYCOMPRESSEDBYTES_DU, &a->vec[i]); + } +} + +void polyvec_decompress_du(polyvec *r, + const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU]) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU); + } + + POLYVEC_UBOUND(r, MLKEM_Q); +} + +void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]); + } +} + +void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) +{ + int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES); + } +} + +void polyvec_ntt(polyvec *r) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_ntt(&r->vec[i]); + } +} + +void polyvec_invntt_tomont(polyvec *r) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_invntt_tomont(&r->vec[i]); + } +} + +/************************************************* + * Name: polyvec_basemul_acc_montgomery + * + * Description: Multiply elements of a and b in NTT domain, accumulate into r, + * and multiply by 2^-16. + * + * Bounds: + * - a is assumed to be coefficient-wise < q in absolute value. + * - b is assumed to be the output of a forward NTT and + * thus coefficient-wise bound by NTT_BOUND + * - b_cache is assumed to be coefficient-wise bound by + * MLKEM_Q. + * + * Arguments: - poly *r: pointer to output polynomial + * - const polyvec *a: pointer to first input vector of polynomials + * - const polyvec *b: pointer to second input vector of polynomials + * - const polyvec_mulcache *b_cache: mulcache for b + **************************************************/ +#if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) +void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, + const polyvec *b, + const polyvec_mulcache *b_cache) +{ + int i; + poly t; + + POLYVEC_BOUND(a, MLKEM_Q); + POLYVEC_BOUND(b, NTT_BOUND); + POLYVEC_BOUND(b_cache, MLKEM_Q); + + poly_basemul_montgomery_cached(r, &a->vec[0], &b->vec[0], &b_cache->vec[0]); + for (i = 1; i < MLKEM_K; i++) + { + poly_basemul_montgomery_cached(&t, &a->vec[i], &b->vec[i], + &b_cache->vec[i]); + poly_add(r, &t); + /* abs bounds: < (i+1) * 3/2 * q */ + } + + /* + * Those bounds are true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus best to omit + * them from the spec to not unnecessarily constraint native implementations. + */ + cassert( + array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (3 * HALF_Q - 1)), + "polyvec_basemul_acc_montgomery_cached output bounds"); + /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */ + POLY_BOUND(r, MLKEM_K * 3 * HALF_Q); +} +#else /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ +void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, + const polyvec *b, + const polyvec_mulcache *b_cache) +{ + POLYVEC_BOUND(a, MLKEM_Q); + POLYVEC_BOUND(b, NTT_BOUND); + /* Omitting POLYVEC_BOUND(b_cache, MLKEM_Q) since native implementations may + * decide not to use a mulcache. Note that the C backend implementation + * of poly_basemul_montgomery_cached() does still include the check. */ + polyvec_basemul_acc_montgomery_cached_native(r, a, b, b_cache); +} +#endif /* MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ + +/************************************************* + * Name: polyvec_basemul_acc_montgomery + * + * Description: Multiply elements of a and b in NTT domain, accumulate into r, + * and multiply by 2^-16. + * + * Arguments: - poly *r: pointer to output polynomial + * - const polyvec *a: pointer to first input vector of polynomials + * - const polyvec *b: pointer to second input vector of polynomials + **************************************************/ +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) +{ + polyvec_mulcache b_cache; + polyvec_mulcache_compute(&b_cache, b); + polyvec_basemul_acc_montgomery_cached(r, a, b, &b_cache); +} + +/************************************************* + * Name: polyvec_mulcache_compute + * + * Description: Precompute values speeding up + * base multiplications of polynomials + * in NTT domain. + * + * Arguments: - polyvec_mulcache *x: pointer to output cache. + * - const poly *a: pointer to input polynomial + **************************************************/ +void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_mulcache_compute(&x->vec[i], &a->vec[i]); + } +} + + +/************************************************* + * Name: polyvec_reduce + * + * Description: Applies Barrett reduction to each coefficient + * of each element of a vector of polynomials; + * for details of the Barrett reduction see comments in reduce.c + * + * Arguments: - polyvec *r: pointer to input/output polynomial + **************************************************/ +void polyvec_reduce(polyvec *r) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_reduce(&r->vec[i]); + } +} + +void polyvec_add(polyvec *r, const polyvec *b) +{ + int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_add(&r->vec[i], &b->vec[i]); + } +} + +void polyvec_tomont(polyvec *r) +{ + unsigned int i; + for (i = 0; i < MLKEM_K; i++) + { + poly_tomont(&r->vec[i]); + } +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.h new file mode 100644 index 0000000000..7771fd3b28 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.h @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef POLYVEC_H +#define POLYVEC_H + +#include +#include "params.h" +#include "poly.h" + +typedef struct +{ + poly vec[MLKEM_K]; +} ALIGN polyvec; + +/* REF-CHANGE: This struct does not exist in the reference implementation */ +typedef struct +{ + poly_mulcache vec[MLKEM_K]; +} polyvec_mulcache; + +#define polyvec_compress_du MLKEM_NAMESPACE(polyvec_compress_du) +/************************************************* + * Name: polyvec_compress_du + * + * Description: Compress and serialize vector of polynomials + * + * Arguments: - uint8_t *r: pointer to output byte array + * (needs space for MLKEM_POLYVECCOMPRESSEDBYTES_DU) + * - const polyvec *a: pointer to input vector of polynomials. + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + **************************************************/ +void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU], + const polyvec *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYVECCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(a, sizeof(polyvec))) + requires(forall(int, k0, 0, MLKEM_K - 1, + array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))) + assigns(object_whole(r)) +); + +#define polyvec_decompress_du MLKEM_NAMESPACE(polyvec_decompress_du) +/************************************************* + * Name: polyvec_decompress_du + * + * Description: De-serialize and decompress vector of polynomials; + * approximate inverse of polyvec_compress_du + * + * Arguments: - polyvec *r: pointer to output vector of polynomials. + * Output will have coefficients normalized to [0,..,q-1]. + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYVECCOMPRESSEDBYTES_DU) + **************************************************/ +void polyvec_decompress_du(polyvec *r, + const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYVECCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(r, sizeof(polyvec))) + assigns(object_whole(r)) + ensures(forall(int, k0, 0, MLKEM_K - 1, + array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))) +); + +#define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes) +/************************************************* + * Name: polyvec_tobytes + * + * Description: Serialize vector of polynomials + * + * Arguments: - uint8_t *r: pointer to output byte array + * (needs space for MLKEM_POLYVECBYTES) + * - const polyvec *a: pointer to input vector of polynomials + * Each polynomial must have coefficients in [0,..,q-1]. + **************************************************/ +void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a) +__contract__( + requires(memory_no_alias(a, sizeof(polyvec))) + requires(memory_no_alias(r, MLKEM_POLYVECBYTES)) + requires(forall(int, k0, 0, MLKEM_K - 1, + array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))) + assigns(object_whole(r)) +); + +#define polyvec_frombytes MLKEM_NAMESPACE(polyvec_frombytes) +/************************************************* + * Name: polyvec_frombytes + * + * Description: De-serialize vector of polynomials; + * inverse of polyvec_tobytes + * + * Arguments: - const polyvec *a: pointer to output vector of polynomials + * (of length MLKEM_POLYVECBYTES). Output will have coefficients + * normalized to [0,..,q-1]. + * - uint8_t *r: pointer to input byte array + **************************************************/ +void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + requires(memory_no_alias(a, MLKEM_POLYVECBYTES)) + assigns(object_whole(r)) + ensures(forall(int, k0, 0, MLKEM_K - 1, + array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, 4095))) +); + +#define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt) +/************************************************* + * Name: polyvec_ntt + * + * Description: Apply forward NTT to all elements of a vector of polynomials. + * + * The input is assumed to be in normal order and + * coefficient-wise bound by MLKEM_Q in absolute value. + * + * The output polynomial is in bitreversed order, and + * coefficient-wise bound by NTT_BOUND in absolute value. + * + * Arguments: - polyvec *r: pointer to in/output vector of polynomials + * + **************************************************/ +void polyvec_ntt(polyvec *r) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + requires(forall(int, j, 0, MLKEM_K - 1, + array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))) + assigns(object_whole(r)) + ensures(forall(int, j, 0, MLKEM_K - 1, + array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (NTT_BOUND - 1)))) +); + +#define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont) +/************************************************* + * Name: polyvec_invntt_tomont + * + * Description: Apply inverse NTT to all elements of a vector of polynomials + * and multiply by Montgomery factor 2^16 + * + * The input is assumed to be in bitreversed order, and can + * have arbitrary coefficients in int16_t. + * + * The output polynomial is in normal order, and + * coefficient-wise bound by INVNTT_BOUND in absolute value. + * + * + * Arguments: - polyvec *r: pointer to in/output vector of polynomials + **************************************************/ +void polyvec_invntt_tomont(polyvec *r) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + assigns(object_whole(r)) + ensures(forall(int, j, 0, MLKEM_K - 1, + array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (INVNTT_BOUND - 1)))) +); + +#define polyvec_basemul_acc_montgomery \ + MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery) +void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, + const polyvec *b); + +/* REF-CHANGE: This function does not exist in the reference implementation */ +#define polyvec_basemul_acc_montgomery_cached \ + MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached) +/************************************************* + * Name: polyvec_basemul_acc_montgomery_cached + * + * Description: Scalar product of two vectors of polynomials in NTT domain, + * using mulcache for second operand. + * + * Bounds: + * - a is assumed to be coefficient-wise < q in absolute value. + * - No bounds guarantees for the coefficients in the result. + * + * Arguments: - poly *r: pointer to output polynomial + * - const polyvec *a: pointer to first input polynomial vector + * - const polyvec *b: pointer to second input polynomial vector + * - const polyvec_mulcache *b_cache: pointer to mulcache + * for second input polynomial vector. Can be computed + * via polyvec_mulcache_compute(). + **************************************************/ +void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, + const polyvec *b, + const polyvec_mulcache *b_cache) +__contract__( + requires(memory_no_alias(r, sizeof(poly))) + requires(memory_no_alias(a, sizeof(polyvec))) + requires(memory_no_alias(b, sizeof(polyvec))) + requires(memory_no_alias(b_cache, sizeof(polyvec_mulcache))) +/* Input is coefficient-wise < q in absolute value */ + requires(forall(int, k1, 0, MLKEM_K - 1, + array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))) + assigns(memory_slice(r, sizeof(poly))) +); + +/* REF-CHANGE: This function does not exist in the reference implementation */ +#define polyvec_mulcache_compute MLKEM_NAMESPACE(polyvec_mulcache_compute) +/************************************************************ + * Name: polyvec_mulcache_compute + * + * Description: Computes the mulcache for a vector of polynomials in NTT domain + * + * The mulcache of a degree-2 polynomial b := b0 + b1*X + * in Fq[X]/(X^2-zeta) is the value b1*zeta, needed when + * computing products of b in Fq[X]/(X^2-zeta). + * + * The mulcache of a polynomial in NTT domain -- which is + * a 128-tuple of degree-2 polynomials in Fq[X]/(X^2-zeta), + * for varying zeta, is the 128-tuple of mulcaches of those + * polynomials. + * + * The mulcache of a vector of polynomials is the vector + * of mulcaches of its entries. + * + * Arguments: - x: Pointer to mulcache to be populated + * - a: Pointer to input polynomial vector + ************************************************************/ +/* + * NOTE: The default C implementation of this function populates + * the mulcache with values in (-q,q), but this is not needed for the + * higher level safety proofs, and thus not part of the spec. + */ +void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a) +__contract__( + requires(memory_no_alias(x, sizeof(polyvec_mulcache))) + requires(memory_no_alias(a, sizeof(polyvec))) + assigns(object_whole(x)) +); + +#define polyvec_reduce MLKEM_NAMESPACE(polyvec_reduce) +/************************************************* + * Name: polyvec_reduce + * + * Description: Applies Barrett reduction to each coefficient + * of each element of a vector of polynomials; + * for details of the Barrett reduction see comments in reduce.c + * + * Arguments: - polyvec *r: pointer to input/output polynomial + **************************************************/ +/* + * REF-CHANGE: The semantics of polyvec_reduce() is different in + * the reference implementation, which requires + * signed canonical output data. Unsigned canonical + * outputs are better suited to the only remaining + * use of poly_reduce() in the context of (de)serialization. + */ +void polyvec_reduce(polyvec *r) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + assigns(object_whole(r)) + ensures(forall(int, k0, 0, MLKEM_K - 1, + array_bound(r->vec[k0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))) +); + +#define polyvec_add MLKEM_NAMESPACE(polyvec_add) +/************************************************* + * Name: polyvec_add + * + * Description: Add vectors of polynomials + * + * Arguments: - polyvec *r: pointer to input-output vector of polynomials to be + * added to + * - const polyvec *b: pointer to second input vector of polynomials + * + * The coefficients of r and b must be so that the addition does + * not overflow. Otherwise, the behaviour of this function is undefined. + * + * The coefficients returned in *r are in int16_t which is sufficient + * to prove type-safety of calling units. Therefore, no stronger + * ensures clause is required on this function. + **************************************************/ +void polyvec_add(polyvec *r, const polyvec *b) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + requires(memory_no_alias(b, sizeof(polyvec))) + requires(forall(int, j0, 0, MLKEM_K - 1, + forall(int, k0, 0, MLKEM_N - 1, + (int32_t)r->vec[j0].coeffs[k0] + b->vec[j0].coeffs[k0] <= INT16_MAX))) + requires(forall(int, j1, 0, MLKEM_K - 1, + forall(int, k1, 0, MLKEM_N - 1, + (int32_t)r->vec[j1].coeffs[k1] + b->vec[j1].coeffs[k1] >= INT16_MIN))) + assigns(object_whole(r)) +); + +#define polyvec_tomont MLKEM_NAMESPACE(polyvec_tomont) +/************************************************* + * Name: polyvec_tomont + * + * Description: Inplace conversion of all coefficients of a polynomial + * vector from normal domain to Montgomery domain + * + * Bounds: Output < q in absolute value. + * + **************************************************/ +void polyvec_tomont(polyvec *r) +__contract__( + requires(memory_no_alias(r, sizeof(polyvec))) + assigns(memory_slice(r, sizeof(polyvec))) + assigns(object_whole(r)) + ensures(forall(int, j, 0, MLKEM_K - 1, + array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))) +); + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/reduce.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/reduce.c new file mode 100644 index 0000000000..db7baf0f56 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/reduce.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "reduce.h" +#include +#include "params.h" + +/* QINV == -3327 converted to uint16_t == -3327 + 65536 == 62209 */ +static const uint32_t QINV = 62209; /* q^-1 mod 2^16 */ + +/************************************************* + * Name: cast_uint16_to_int16 + * + * Description: Cast uint16 value to int16 + * + * Returns: + * input x in 0 .. 32767: returns value unchanged + * input x in 32768 .. 65535: returns (x - 65536) + **************************************************/ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "conversion" +#endif +static INLINE int16_t cast_uint16_to_int16(uint16_t x) +{ + /* + * PORTABILITY: This relies on uint16_t -> int16_t + * being implemented as the inverse of int16_t -> uint16_t, + * which is implementation-defined (C99 6.3.1.3 (3)) + * CBMC (correctly) fails to prove this conversion is OK, + * so we have to suppress that check here + */ + return (int16_t)x; +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************* + * Name: montgomery_reduce_generic + * + * Description: Generic Montgomery reduction; given a 32-bit integer a, computes + * 16-bit integer congruent to a * R^-1 mod q, where R=2^16 + * + * Arguments: - int32_t a: input integer to be reduced + * + * Returns: integer congruent to a * R^-1 modulo q + * + * Bounds: For any C such that |a| < q * C, the return value + * has absolute value < q (C/2^16 + 1/2). + * + * Notable special cases: + * - The Montgomery multiplication of a value of absolute value + * < q * C with a signed-canonical value ( < q/2 ) has + * absolute value q * (0.0254 * C + 1/2). + * - The Montgomery multiplication of a value of absolute value + * < q * C with a value t of |t| < q has absolute value + * < q * (0.0508 * C + 1/2). + * - The Montgomery multiplication of a value of absolute value + * < C with a value of abs < q has absolute value + * < q (C/2^16 + 1/2). + **************************************************/ +ALWAYS_INLINE +static INLINE int16_t montgomery_reduce_generic(int32_t a) +{ + /* + *Bounds on paper + * - Case |a| < q * C, for some C + * |t| <= |a|/2^16 + |t|*q/2^16 + * < q * C / 2^16 + q/2 + * = q (C/2^16 + 1/2) + * - Case |a| < (q/2) * C * q, for some C + * Replace C -> C * q in the above and estimate + * q / 2^17 < 0.0254. + */ + + /* Compute a*q^{-1} mod 2^16 in unsigned representatives */ + const uint16_t a_reduced = a & UINT16_MAX; + const uint16_t a_inverted = (a_reduced * QINV) & UINT16_MAX; + + /* Lift to signed canonical representative mod 2^16. */ + const int16_t t = cast_uint16_to_int16(a_inverted); + + int32_t r = a - ((int32_t)t * MLKEM_Q); + + /* + * PORTABILITY: Right-shift on a signed integer is, strictly-speaking, + * implementation-defined for negative left argument. Here, + * we assume it's sign-preserving "arithmetic" shift right. (C99 6.5.7 (5)) + */ + r = r >> 16; + + return (int16_t)r; +} + +int16_t montgomery_reduce(int32_t a) +{ + int16_t res; + SCALAR_BOUND(a, 2 * MLKEM_Q * 32768, "montgomery_reduce input"); + + res = montgomery_reduce_generic(a); + + SCALAR_BOUND(res, (3 * (MLKEM_Q + 1)) / 2, "montgomery_reduce output"); + return res; +} + +int16_t fqmul(int16_t a, int16_t b) +{ + int16_t res; + SCALAR_BOUND(b, HALF_Q, "fqmul input"); + + res = montgomery_reduce((int32_t)a * (int32_t)b); + + SCALAR_BOUND(res, MLKEM_Q, "fqmul output"); + return res; +} + +/* + * To divide by MLKEM_Q using Barrett multiplication, the "magic number" + * multiplier is round_to_nearest(2**26/MLKEM_Q) + */ +#define BPOWER 26 +static const int32_t barrett_multiplier = + ((1 << BPOWER) + MLKEM_Q / 2) / MLKEM_Q; + +/************************************************* + * Name: barrett_reduce + * + * Description: Barrett reduction; given a 16-bit integer a, computes + * centered representative congruent to a mod q in + * {-(q-1)/2,...,(q-1)/2} + * + * Arguments: - int16_t a: input integer to be reduced + * + * Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q. + **************************************************/ +int16_t barrett_reduce(int16_t a) +{ + /* + * Compute round_to_nearest(a/MLKEM_Q) using the multiplier + * above and shift by BPOWER places. + * PORTABILITY: Right-shift on a signed integer is, strictly-speaking, + * implementation-defined for negative left argument. Here, + * we assume it's sign-preserving "arithmetic" shift right. (C99 6.5.7 (5)) + */ + const int32_t t = (barrett_multiplier * a + (1 << (BPOWER - 1))) >> BPOWER; + + /* + * t is in -10 .. +10, so we need 32-bit math to + * evaluate t * MLKEM_Q and the subsequent subtraction + */ + return (int16_t)(a - t * MLKEM_Q); +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/reduce.h new file mode 100644 index 0000000000..2a486cf3ec --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/reduce.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef REDUCE_H +#define REDUCE_H + +#include +#include "cbmc.h" +#include "debug/debug.h" +#include "params.h" + +#define MONT -1044 /* 2^16 mod q */ +#define HALF_Q ((MLKEM_Q + 1) / 2) /* 1665 */ + +/************************************************* + * Name: montgomery_reduce + * + * Description: Montgomery reduction + * + * Arguments: - int32_t a: input integer to be reduced + * Must be smaller than 2 * q * 2^15 in absolute value. + * + * Returns: integer congruent to a * R^-1 modulo q, + * smaller than 3/2 q in absolute value. + **************************************************/ +#define montgomery_reduce MLKEM_NAMESPACE(montgomery_reduce) +int16_t montgomery_reduce(int32_t a) +__contract__( + requires(a > -(2 * MLKEM_Q * 32768)) + requires(a < (2 * MLKEM_Q * 32768)) + ensures(return_value > -(3 * HALF_Q) && return_value < (3 * HALF_Q)) +); + +#define barrett_reduce MLKEM_NAMESPACE(barrett_reduce) +int16_t barrett_reduce(int16_t a) +__contract__( + ensures(return_value > -HALF_Q && return_value < HALF_Q) +); + +/************************************************* + * Name: fqmul + * + * Description: Montgomery multiplication modulo q=3329 + * + * Arguments: - int16_t a: first factor + * Can be any int16_t. + * - int16_t b: second factor. + * Must be signed canonical (abs value <(q+1)/2) + * + * Returns 16-bit integer congruent to a*b*R^{-1} mod q, and + * smaller than q in absolute value. + * + **************************************************/ +#define fqmul MLKEM_NAMESPACE(fqmul) +int16_t fqmul(int16_t a, int16_t b) +__contract__( + requires(b > -HALF_Q) + requires(b < HALF_Q) + ensures(return_value > -MLKEM_Q && return_value < MLKEM_Q) +); + + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.c new file mode 100644 index 0000000000..4e8a5ce9b2 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "params.h" + +#include "arith_native.h" +#include "rej_uniform.h" + +/************************************************* + * Name: rej_uniform_scalar + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int target: requested number of 16-bit integers + * (uniform mod q). + * Must be <= 4096. + * - unsigned int offset: number of 16-bit integers that have + * already been sampled. + * Must be <= target. + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes + * Must be <= 4096. + * Must be a multiple of 3. + * + * Note: Strictly speaking, only a few values of buflen near UINT_MAX need + * excluding. The limit of 4096 is somewhat arbitary but sufficient for all + * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. + * + * Returns the new offset of sampled 16-bit integers, at most target, + * and at least the initial offset. + * If the new offset is strictly less than len, all of the input buffers + * is guaranteed to have been consumed. If it is equal to len, no information + * is provided on how many bytes of the input buffer have been consumed. + **************************************************/ +static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target, + unsigned int offset, const uint8_t *buf, + unsigned int buflen) +{ + unsigned int ctr, pos; + uint16_t val0, val1; + + ctr = offset; + pos = 0; + /* pos + 3 cannot overflow due to the assumption buflen <= 4096 */ + while (ctr < target && pos + 3 <= buflen) + __loop__( + invariant(offset <= ctr && ctr <= target && pos <= buflen) + invariant(ctr > 0 ==> array_bound(r, 0, ctr - 1, 0, (MLKEM_Q - 1)))) + { + val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF; + val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF; + pos += 3; + + if (val0 < MLKEM_Q) + { + r[ctr++] = val0; + } + if (ctr < target && val1 < MLKEM_Q) + { + r[ctr++] = val1; + } + } + return ctr; +} + +#if !defined(MLKEM_USE_NATIVE_REJ_UNIFORM) +unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, + const uint8_t *buf, unsigned int buflen) +{ + return rej_uniform_scalar(r, target, offset, buf, buflen); +} +#else /* MLKEM_USE_NATIVE_REJ_UNIFORM */ + +unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, + const uint8_t *buf, unsigned int buflen) +{ + int ret; + + /* Sample from large buffer with full lane as much as possible. */ + ret = rej_uniform_native(r + offset, target - offset, buf, buflen); + if (ret != -1) + return offset + (unsigned)ret; + + return rej_uniform_scalar(r, target, offset, buf, buflen); +} +#endif /* MLKEM_USE_NATIVE_REJ_UNIFORM */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.h new file mode 100644 index 0000000000..aeb9cc3eb0 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef REJ_UNIFORM_H +#define REJ_UNIFORM_H + +#include +#include +#include "cbmc.h" +#include "params.h" + +#define rej_uniform MLKEM_NAMESPACE(rej_uniform) +/************************************************* + * Name: rej_uniform + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned int target: requested number of 16-bit integers + * (uniform mod q). + * Must be <= 4096. + * - unsigned int offset: number of 16-bit integers that have + * already been sampled. + * Must be <= target. + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned int buflen: length of input buffer in bytes + * Must be <= 4096. + * Must be a multiple of 3. + * + * Note: Strictly speaking, only a few values of buflen near UINT_MAX need + * excluding. The limit of 4096 is somewhat arbitary but sufficient for all + * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. + * + * Returns the new offset of sampled 16-bit integers, at most target, + * and at least the initial offset. + * If the new offset is strictly less than len, all of the input buffers + * is guaranteed to have been consumed. If it is equal to len, no information + * is provided on how many bytes of the input buffer have been consumed. + **************************************************/ + +/* + * REF-CHANGE: The signature differs from the Kyber reference implementation + * in that it adds the offset and always expects the base of the target + * buffer. This avoids shifting the buffer base in the caller, which appears + * tricky to reason about. + */ +unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset, + const uint8_t *buf, unsigned int buflen) +__contract__( + requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) + requires(memory_no_alias(r, sizeof(int16_t) * target)) + requires(memory_no_alias(buf, buflen)) + requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1))) + assigns(memory_slice(r, sizeof(int16_t) * target)) + ensures(offset <= return_value && return_value <= target) + ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1))) +); +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/symmetric-shake.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/symmetric-shake.c new file mode 100644 index 0000000000..5dd8c10d92 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/symmetric-shake.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include +#include +#include +#include "fips202.h" +#include "params.h" +#include "symmetric.h" + +void mlkem_shake256_prf(uint8_t *out, size_t outlen, + const uint8_t key[MLKEM_SYMBYTES], uint8_t nonce) +{ + uint8_t extkey[MLKEM_SYMBYTES + 1]; + + memcpy(extkey, key, MLKEM_SYMBYTES); + extkey[MLKEM_SYMBYTES] = nonce; + + shake256(out, outlen, extkey, sizeof(extkey)); +} + +void mlkem_shake256_rkprf(uint8_t out[MLKEM_SSBYTES], + const uint8_t key[MLKEM_SYMBYTES], + const uint8_t input[MLKEM_CIPHERTEXTBYTES]) +{ + shake256incctx s; + + shake256_inc_init(&s); + shake256_inc_absorb(&s, key, MLKEM_SYMBYTES); + shake256_inc_absorb(&s, input, MLKEM_CIPHERTEXTBYTES); + shake256_inc_finalize(&s); + shake256_inc_squeeze(out, MLKEM_SSBYTES, &s); + shake256_inc_ctx_release(&s); +} diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/symmetric.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/symmetric.h new file mode 100644 index 0000000000..202741a7b3 --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/symmetric.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef SYMMETRIC_H +#define SYMMETRIC_H + +#include +#include +#include "params.h" + +#include "fips202.h" + +#include "cbmc.h" + +#define mlkem_shake256_prf MLKEM_NAMESPACE(mlkem_shake256_prf) +/************************************************* + * Name: mlkem_shake256_prf + * + * Ref: FIPS-203 Section 4.1. Function PRF (eq 4.3) + * + * Description: Usage of SHAKE256 as a PRF, concatenates secret and public input + * and then generates outlen bytes of SHAKE256 output + * + * Arguments: - uint8_t *out: pointer to output + * - size_t outlen: number of requested output bytes + * - const uint8_t *key: pointer to the key (of length + * MLKEM_SYMBYTES) + * - uint8_t nonce: single-byte nonce (public PRF input) + * + * out and key may NOT be aliased. + **************************************************/ +void mlkem_shake256_prf(uint8_t *out, size_t outlen, + const uint8_t key[MLKEM_SYMBYTES], uint8_t nonce) +__contract__( + requires(memory_no_alias(out, outlen)) + requires(memory_no_alias(key, MLKEM_SYMBYTES)) + assigns(memory_slice(out, outlen)) +); + +#define mlkem_shake256_rkprf MLKEM_NAMESPACE(mlkem_shake256_rkprf) +/************************************************* + * Name: mlkem_shake256_rkprf + * + * Ref: FIPS-203 Section 4.1. Hash function J + * + * Description: Usage of SHAKE256 as a PRF, concatenates key with input + * and then generates MLKEM_SSBYTES bytes of SHAKE256 output + * + * Arguments: - uint8_t *out: pointer to output + * - const uint8_t *key: pointer to the key (of length + * MLKEM_SYMBYTES) + * - const uint8_t *input: pointer to the input (of length + * MLKEM_CIPHERTEXTBYTES) + * + * out, key, and input may NOT be aliased. + **************************************************/ +void mlkem_shake256_rkprf(uint8_t out[MLKEM_SSBYTES], + const uint8_t key[MLKEM_SYMBYTES], + const uint8_t input[MLKEM_CIPHERTEXTBYTES]) +__contract__( + requires(memory_no_alias(out, MLKEM_SSBYTES)) + requires(memory_no_alias(key, MLKEM_SYMBYTES)) + requires(memory_no_alias(input, MLKEM_CIPHERTEXTBYTES)) + assigns(memory_slice(out, MLKEM_SSBYTES)) +); + + +/* Macros denoting FIPS-203 specific Hash functions */ + +/* Hash function H, FIPS-201 4.1 (eq 4.4) */ +#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) + +/* Hash function G, FIPS-201 4.1 (eq 4.5) */ +#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) + +/* Macros denoting FIPS-203 specific PRFs */ +#define prf(OUT, OUTBYTES, KEY, NONCE) \ + mlkem_shake256_prf(OUT, OUTBYTES, KEY, NONCE) +#define rkprf(OUT, KEY, INPUT) mlkem_shake256_rkprf(OUT, KEY, INPUT) + +#endif /* SYMMETRIC_H */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.c new file mode 100644 index 0000000000..b5b71e023e --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.c @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#include "verify.h" + +#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) +/* + * Masking value used in constant-time functions from + * verify.h to block the compiler's range analysis and + * thereby reduce the risk of compiler-introduced branches. + */ +volatile uint64_t ct_opt_blocker_u64 = 0; + +#else /* MLKEM_USE_ASM_VALUE_BARRIER */ + +int empty_cu_verify; + +#endif /* MLKEM_USE_ASM_VALUE_BARRIER */ diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.h new file mode 100644 index 0000000000..5c62223c3d --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.h @@ -0,0 +1,305 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef VERIFY_H +#define VERIFY_H + +#include +#include +#include +#include "cbmc.h" +#include "params.h" + +/* Constant-time comparisons and conditional operations + + We reduce the risk for compilation into variable-time code + through the use of 'value barriers'. + + Functionally, a value barrier is a no-op. To the compiler, however, + it constitutes an arbitrary modification of its input, and therefore + harden's value propagation and range analysis. + + We consider two approaches to implement a value barrier: + - An empty inline asm block which marks the target value as clobbered. + - XOR'ing with the value of a volatile global that's set to 0. + + The first approach is cheap because it only prevents the compiler + from reasoning about the value of the variable past the barrier, + but does not directly generate additional instructions. + + The second approach generates redundant loads and XOR operations + and therefore comes at a higher runtime cost. However, it appears + more robust towards optimization, as compilers should never drop + a volatile load. + + We use the empty-ASM value barrier for GCC and clang, and fall + back to the global volatile barrier otherwise. + + The global value barrier can be forced by setting MLKEM_NO_ASM_VALUE_BARRIER. + +*/ + +#if (defined(__GNUC__) || defined(__clang__)) && !defined(CBMC) && \ + !defined(MLKEM_NO_ASM_VALUE_BARRIER) +#define MLKEM_USE_ASM_VALUE_BARRIER +#endif + +#if !defined(MLKEM_USE_ASM_VALUE_BARRIER) + +/* + * Declaration of global volatile that the global value barrier + * is loading from and masking with. + */ +#define ct_opt_blocker_u64 MLKEM_NAMESPACE(ct_opt_blocker_u64) +extern volatile uint64_t ct_opt_blocker_u64; + +/* Helper functions for obtaining masks of various sizes */ +STATIC_INLINE_TESTABLE uint8_t get_optblocker_u8(void) +__contract__(ensures(return_value == 0)) { return (uint8_t)ct_opt_blocker_u64; } + +STATIC_INLINE_TESTABLE uint32_t get_optblocker_u32(void) +__contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; } + +STATIC_INLINE_TESTABLE uint32_t get_optblocker_i32(void) +__contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; } + +STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b) +__contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u32()); } + +STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b) +__contract__(ensures(return_value == b)) { return (b ^ get_optblocker_i32()); } + +STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b) +__contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u8()); } + +#else /* !MLKEM_USE_ASM_VALUE_BARRIER */ + +STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b) +__contract__(ensures(return_value == b)) +{ + asm("" : "+r"(b)); + return b; +} + +STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b) +__contract__(ensures(return_value == b)) +{ + asm("" : "+r"(b)); + return b; +} + +STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b) +__contract__(ensures(return_value == b)) +{ + asm("" : "+r"(b)); + return b; +} + +#endif /* MLKEM_USE_ASM_VALUE_BARRIER */ + +/* + * The ct_cmask_nonzero_xxx functions below make deliberate use of unsigned + * overflow, which is fully defined behaviour in C. It is thus safe to disable + * this warning. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif + +/************************************************* + * Name: ct_cmask_nonzero_u16 + * + * Description: Return 0 if input is zero, and -1 otherwise. + * + * Arguments: uint16_t x: Value to be converted into a mask + **************************************************/ +STATIC_INLINE_TESTABLE uint16_t ct_cmask_nonzero_u16(uint16_t x) +__contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF))) +{ + uint32_t tmp = value_barrier_u32(-((uint32_t)x)); + tmp >>= 16; + return tmp; +} + +/************************************************* + * Name: ct_cmask_nonzero_u8 + * + * Description: Return 0 if input is zero, and -1 otherwise. + * + * Arguments: uint8_t x: Value to be converted into a mask + **************************************************/ +STATIC_INLINE_TESTABLE uint8_t ct_cmask_nonzero_u8(uint8_t x) +__contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF))) +{ + uint32_t tmp = value_barrier_u32(-((uint32_t)x)); + tmp >>= 24; + return tmp; +} + +/* Put unsigned overflow warnings in CBMC back into scope */ +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/* + * The ct_cmask_neg_i16 function below makes deliberate use of + * signed to unsigned integer conversion, which is fully defined + * behaviour in C. It is thus safe to disable this warning. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "conversion" +#endif + +/************************************************* + * Name: ct_cmask_neg_i16 + * + * Description: Return 0 if input is non-negative, and -1 otherwise. + * + * Arguments: uint16_t x: Value to be converted into a mask + **************************************************/ +STATIC_INLINE_TESTABLE uint16_t ct_cmask_neg_i16(int16_t x) +__contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0))) +{ + int32_t tmp = value_barrier_i32((int32_t)x); + tmp >>= 16; + return (int16_t)tmp; +} + +/* Put unsigned-to-signed warnings in CBMC back into scope */ +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/* + * The ct_csel_xxx functions below make deliberate use of unsigned + * to signed integer conversion, which is implementation-defined + * behaviour. Here, we assume that uint16_t -> int16_t is inverse + * to int16_t -> uint16_t. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "conversion" +#endif + +/************************************************* + * Name: ct_sel_int16 + * + * Description: Functionally equivalent to cond ? a : b, + * but implemented with guards against + * compiler-introduced branches. + * + * Arguments: int16_t a: First alternative + * int16_t b: Second alternative + * uint16_t cond: Condition variable. + **************************************************/ +STATIC_INLINE_TESTABLE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond) +__contract__(ensures(return_value == (cond ? a : b))) +{ + uint16_t au = a, bu = b; + uint16_t res = bu ^ (ct_cmask_nonzero_u16(cond) & (au ^ bu)); + return (int16_t)res; +} + +/* Put unsigned-to-signed warnings in CBMC back into scope */ +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************* + * Name: ct_sel_uint8 + * + * Description: Functionally equivalent to cond ? a : b, + * but implemented with guards against + * compiler-introduced branches. + * + * Arguments: uint8_t a: First alternative + * uint8_t b: Second alternative + * uuint8_t cond: Condition variable. + **************************************************/ +STATIC_INLINE_TESTABLE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond) +__contract__(ensures(return_value == (cond ? a : b))) +{ + return b ^ (ct_cmask_nonzero_u8(cond) & (a ^ b)); +} + +/************************************************* + * Name: ct_memcmp + * + * Description: Compare two arrays for equality in constant time. + * + * Arguments: const uint8_t *a: pointer to first byte array + * const uint8_t *b: pointer to second byte array + * size_t len: length of the byte arrays + * + * Returns 0 if the byte arrays are equal, a non-zero value otherwise + **************************************************/ +STATIC_INLINE_TESTABLE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b, + const size_t len) +__contract__( + requires(memory_no_alias(a, len)) + requires(memory_no_alias(b, len)) + requires(len <= INT_MAX) + ensures((return_value == 0) == forall(int, i, 0, ((int)len - 1), (a[i] == b[i])))) +{ + uint8_t r = 0, s = 0; + + /* + * Switch to a _signed_ ilen value, so that our loop counter + * can also be signed, and thus (i - 1) in the loop invariant + * can yield -1 as required. + */ + const int ilen = (int)len; + int i; + + for (i = 0; i < ilen; i++) + __loop__( + invariant(i >= 0 && i <= ilen) + invariant((r == 0) == (forall(int, k, 0, (i - 1), (a[k] == b[k]))))) + { + r |= a[i] ^ b[i]; + /* s is useless, but prevents the loop from being aborted once r=0xff. */ + s ^= a[i] ^ b[i]; + } + + /* + * - Convert r into a mask; this may not be necessary, but is an additional + * safeguard + * towards leaking information about a and b. + * - XOR twice with s, separated by a value barrier, to prevent the compile + * from dropping the s computation in the loop. + */ + return (value_barrier_u8(ct_cmask_nonzero_u8(r) ^ s) ^ s); +} + +/************************************************* + * Name: ct_cmov_zero + * + * Description: Copy len bytes from x to r if b is zero; + * don't modify x if b is non-zero. + * assumes two's complement representation of negative integers. + * Runs in constant time. + * + * Arguments: uint8_t *r: pointer to output byte array + * const uint8_t *x: pointer to input byte array + * size_t len: Amount of bytes to be copied + * uint8_t b: Condition value. + **************************************************/ +STATIC_INLINE_TESTABLE +void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) +__contract__( + requires(memory_no_alias(r, len)) + requires(memory_no_alias(x, len)) + assigns(memory_slice(r, len))) +{ + size_t i; + for (i = 0; i < len; i++) + __loop__(invariant(i <= len)) + { + r[i] = ct_sel_uint8(r[i], x[i], b); + } +} + +#endif diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/zetas.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/zetas.c new file mode 100644 index 0000000000..f52b2ff5ad --- /dev/null +++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/zetas.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2024 The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * WARNING: This file is auto-generated from scripts/autogenerate_files.py + * Do not modify it directly. + */ + +#include "ntt.h" + +/* + * Table of zeta values used in the reference NTT and inverse NTT. + * See autogenerate_files.py for details. + */ +const int16_t zetas[128] = { + -1044, -758, -359, -1517, 1493, 1422, 287, 202, -171, 622, 1577, + 182, 962, -1202, -1474, 1468, 573, -1325, 264, 383, -829, 1458, + -1602, -130, -681, 1017, 732, 608, -1542, 411, -205, -1571, 1223, + 652, -552, 1015, -1293, 1491, -282, -1544, 516, -8, -320, -666, + -1618, -1162, 126, 1469, -853, -90, -271, 830, 107, -1421, -247, + -951, -398, 961, -1508, -725, 448, -1065, 677, -1275, -1103, 430, + 555, 843, -1251, 871, 1550, 105, 422, 587, 177, -235, -291, + -460, 1574, 1653, -246, 778, 1159, -147, -777, 1483, -602, 1119, + -1590, 644, -872, 349, 418, 329, -156, -75, 817, 1097, 603, + 610, 1322, -1285, -1465, 384, -1215, -136, 1218, -1335, -874, 220, + -1187, -1659, -1185, -1530, -1278, 794, -1510, -854, -870, 478, -108, + -308, 996, 991, 958, -1460, 1522, 1628, +}; diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/align.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/align.h deleted file mode 100644 index 3463866f37..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/align.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef ALIGN_H -#define ALIGN_H - -#include -#include - -#define ALIGNED_UINT8(N) \ - union { \ - uint8_t coeffs[N]; \ - __m256i vec[(N+31)/32]; \ - } - -#define ALIGNED_INT16(N) \ - union { \ - int16_t coeffs[N]; \ - __m256i vec[(N+15)/16]; \ - } - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/api.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/api.h deleted file mode 100644 index a154e80f1d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/api.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef API_H -#define API_H - -#include - -#define pqcrystals_kyber512_SECRETKEYBYTES 1632 -#define pqcrystals_kyber512_PUBLICKEYBYTES 800 -#define pqcrystals_kyber512_CIPHERTEXTBYTES 768 -#define pqcrystals_kyber512_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber512_ENCCOINBYTES 32 -#define pqcrystals_kyber512_BYTES 32 - -#define pqcrystals_kyber512_avx2_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES -#define pqcrystals_kyber512_avx2_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES -#define pqcrystals_kyber512_avx2_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES -#define pqcrystals_kyber512_avx2_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES -#define pqcrystals_kyber512_avx2_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES -#define pqcrystals_kyber512_avx2_BYTES pqcrystals_kyber512_BYTES - -int pqcrystals_kyber512_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber512_avx2_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber512_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber512_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber512_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#define pqcrystals_kyber768_SECRETKEYBYTES 2400 -#define pqcrystals_kyber768_PUBLICKEYBYTES 1184 -#define pqcrystals_kyber768_CIPHERTEXTBYTES 1088 -#define pqcrystals_kyber768_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber768_ENCCOINBYTES 32 -#define pqcrystals_kyber768_BYTES 32 - -#define pqcrystals_kyber768_avx2_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES -#define pqcrystals_kyber768_avx2_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES -#define pqcrystals_kyber768_avx2_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES -#define pqcrystals_kyber768_avx2_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES -#define pqcrystals_kyber768_avx2_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES -#define pqcrystals_kyber768_avx2_BYTES pqcrystals_kyber768_BYTES - -int pqcrystals_kyber768_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber768_avx2_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber768_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber768_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber768_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#define pqcrystals_kyber1024_SECRETKEYBYTES 3168 -#define pqcrystals_kyber1024_PUBLICKEYBYTES 1568 -#define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568 -#define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber1024_ENCCOINBYTES 32 -#define pqcrystals_kyber1024_BYTES 32 - -#define pqcrystals_kyber1024_avx2_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES -#define pqcrystals_kyber1024_avx2_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES -#define pqcrystals_kyber1024_avx2_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES -#define pqcrystals_kyber1024_avx2_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES -#define pqcrystals_kyber1024_avx2_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES -#define pqcrystals_kyber1024_avx2_BYTES pqcrystals_kyber1024_BYTES - -int pqcrystals_kyber1024_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber1024_avx2_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber1024_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber1024_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber1024_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/basemul.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/basemul.S deleted file mode 100644 index 36990639b2..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/basemul.S +++ /dev/null @@ -1,105 +0,0 @@ -#include "consts.h" - -.macro schoolbook off -vmovdqa _16XQINV*2(%rcx),%ymm0 -vmovdqa (64*\off+ 0)*2(%rsi),%ymm1 # a0 -vmovdqa (64*\off+16)*2(%rsi),%ymm2 # b0 -vmovdqa (64*\off+32)*2(%rsi),%ymm3 # a1 -vmovdqa (64*\off+48)*2(%rsi),%ymm4 # b1 - -vpmullw %ymm0,%ymm1,%ymm9 # a0.lo -vpmullw %ymm0,%ymm2,%ymm10 # b0.lo -vpmullw %ymm0,%ymm3,%ymm11 # a1.lo -vpmullw %ymm0,%ymm4,%ymm12 # b1.lo - -vmovdqa (64*\off+ 0)*2(%rdx),%ymm5 # c0 -vmovdqa (64*\off+16)*2(%rdx),%ymm6 # d0 - -vpmulhw %ymm5,%ymm1,%ymm13 # a0c0.hi -vpmulhw %ymm6,%ymm1,%ymm1 # a0d0.hi -vpmulhw %ymm5,%ymm2,%ymm14 # b0c0.hi -vpmulhw %ymm6,%ymm2,%ymm2 # b0d0.hi - -vmovdqa (64*\off+32)*2(%rdx),%ymm7 # c1 -vmovdqa (64*\off+48)*2(%rdx),%ymm8 # d1 - -vpmulhw %ymm7,%ymm3,%ymm15 # a1c1.hi -vpmulhw %ymm8,%ymm3,%ymm3 # a1d1.hi -vpmulhw %ymm7,%ymm4,%ymm0 # b1c1.hi -vpmulhw %ymm8,%ymm4,%ymm4 # b1d1.hi - -vmovdqa %ymm13,(%rsp) - -vpmullw %ymm5,%ymm9,%ymm13 # a0c0.lo -vpmullw %ymm6,%ymm9,%ymm9 # a0d0.lo -vpmullw %ymm5,%ymm10,%ymm5 # b0c0.lo -vpmullw %ymm6,%ymm10,%ymm10 # b0d0.lo - -vpmullw %ymm7,%ymm11,%ymm6 # a1c1.lo -vpmullw %ymm8,%ymm11,%ymm11 # a1d1.lo -vpmullw %ymm7,%ymm12,%ymm7 # b1c1.lo -vpmullw %ymm8,%ymm12,%ymm12 # b1d1.lo - -vmovdqa _16XQ*2(%rcx),%ymm8 -vpmulhw %ymm8,%ymm13,%ymm13 -vpmulhw %ymm8,%ymm9,%ymm9 -vpmulhw %ymm8,%ymm5,%ymm5 -vpmulhw %ymm8,%ymm10,%ymm10 -vpmulhw %ymm8,%ymm6,%ymm6 -vpmulhw %ymm8,%ymm11,%ymm11 -vpmulhw %ymm8,%ymm7,%ymm7 -vpmulhw %ymm8,%ymm12,%ymm12 - -vpsubw (%rsp),%ymm13,%ymm13 # -a0c0 -vpsubw %ymm9,%ymm1,%ymm9 # a0d0 -vpsubw %ymm5,%ymm14,%ymm5 # b0c0 -vpsubw %ymm10,%ymm2,%ymm10 # b0d0 - -vpsubw %ymm6,%ymm15,%ymm6 # a1c1 -vpsubw %ymm11,%ymm3,%ymm11 # a1d1 -vpsubw %ymm7,%ymm0,%ymm7 # b1c1 -vpsubw %ymm12,%ymm4,%ymm12 # b1d1 - -vmovdqa (%r9),%ymm0 -vmovdqa 32(%r9),%ymm1 -vpmullw %ymm0,%ymm10,%ymm2 -vpmullw %ymm0,%ymm12,%ymm3 -vpmulhw %ymm1,%ymm10,%ymm10 -vpmulhw %ymm1,%ymm12,%ymm12 -vpmulhw %ymm8,%ymm2,%ymm2 -vpmulhw %ymm8,%ymm3,%ymm3 -vpsubw %ymm2,%ymm10,%ymm10 # rb0d0 -vpsubw %ymm3,%ymm12,%ymm12 # rb1d1 - -vpaddw %ymm5,%ymm9,%ymm9 -vpaddw %ymm7,%ymm11,%ymm11 -vpsubw %ymm13,%ymm10,%ymm13 -vpsubw %ymm12,%ymm6,%ymm6 - -vmovdqa %ymm13,(64*\off+ 0)*2(%rdi) -vmovdqa %ymm9,(64*\off+16)*2(%rdi) -vmovdqa %ymm6,(64*\off+32)*2(%rdi) -vmovdqa %ymm11,(64*\off+48)*2(%rdi) -.endm - -.text -.global cdecl(basemul_avx) -cdecl(basemul_avx): -mov %rsp,%r8 -and $-32,%rsp -sub $32,%rsp - -lea (_ZETAS_EXP+176)*2(%rcx),%r9 -schoolbook 0 - -add $32*2,%r9 -schoolbook 1 - -add $192*2,%r9 -schoolbook 2 - -add $32*2,%r9 -schoolbook 3 - -mov %r8,%rsp -ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/cbd.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/cbd.c deleted file mode 100644 index dad473c79e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/cbd.c +++ /dev/null @@ -1,144 +0,0 @@ -#include -#include -#include "params.h" -#include "cbd.h" - -/************************************************* -* Name: cbd2 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const __m256i *buf: pointer to aligned input byte array -**************************************************/ -static void cbd2(poly * restrict r, const __m256i buf[2*KYBER_N/128]) -{ - unsigned int i; - __m256i f0, f1, f2, f3; - const __m256i mask55 = _mm256_set1_epi32(0x55555555); - const __m256i mask33 = _mm256_set1_epi32(0x33333333); - const __m256i mask03 = _mm256_set1_epi32(0x03030303); - const __m256i mask0F = _mm256_set1_epi32(0x0F0F0F0F); - - for(i = 0; i < KYBER_N/64; i++) { - f0 = _mm256_load_si256(&buf[i]); - - f1 = _mm256_srli_epi16(f0, 1); - f0 = _mm256_and_si256(mask55, f0); - f1 = _mm256_and_si256(mask55, f1); - f0 = _mm256_add_epi8(f0, f1); - - f1 = _mm256_srli_epi16(f0, 2); - f0 = _mm256_and_si256(mask33, f0); - f1 = _mm256_and_si256(mask33, f1); - f0 = _mm256_add_epi8(f0, mask33); - f0 = _mm256_sub_epi8(f0, f1); - - f1 = _mm256_srli_epi16(f0, 4); - f0 = _mm256_and_si256(mask0F, f0); - f1 = _mm256_and_si256(mask0F, f1); - f0 = _mm256_sub_epi8(f0, mask03); - f1 = _mm256_sub_epi8(f1, mask03); - - f2 = _mm256_unpacklo_epi8(f0, f1); - f3 = _mm256_unpackhi_epi8(f0, f1); - - f0 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f2)); - f1 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f2,1)); - f2 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f3)); - f3 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f3,1)); - - _mm256_store_si256(&r->vec[4*i+0], f0); - _mm256_store_si256(&r->vec[4*i+1], f2); - _mm256_store_si256(&r->vec[4*i+2], f1); - _mm256_store_si256(&r->vec[4*i+3], f3); - } -} - -#if KYBER_ETA1 == 3 -/************************************************* -* Name: cbd3 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=3 -* This function is only needed for Kyber-512 -* -* Arguments: - poly *r: pointer to output polynomial -* - const __m256i *buf: pointer to aligned input byte array -**************************************************/ -static void cbd3(poly * restrict r, const uint8_t buf[3*KYBER_N/4+8]) -{ - unsigned int i; - __m256i f0, f1, f2, f3; - const __m256i mask249 = _mm256_set1_epi32(0x249249); - const __m256i mask6DB = _mm256_set1_epi32(0x6DB6DB); - const __m256i mask07 = _mm256_set1_epi32(7); - const __m256i mask70 = _mm256_set1_epi32(7 << 16); - const __m256i mask3 = _mm256_set1_epi16(3); - const __m256i shufbidx = _mm256_set_epi8(-1,15,14,13,-1,12,11,10,-1, 9, 8, 7,-1, 6, 5, 4, - -1,11,10, 9,-1, 8, 7, 6,-1, 5, 4, 3,-1, 2, 1, 0); - - for(i = 0; i < KYBER_N/32; i++) { - f0 = _mm256_loadu_si256((__m256i *)&buf[24*i]); - f0 = _mm256_permute4x64_epi64(f0,0x94); - f0 = _mm256_shuffle_epi8(f0,shufbidx); - - f1 = _mm256_srli_epi32(f0,1); - f2 = _mm256_srli_epi32(f0,2); - f0 = _mm256_and_si256(mask249,f0); - f1 = _mm256_and_si256(mask249,f1); - f2 = _mm256_and_si256(mask249,f2); - f0 = _mm256_add_epi32(f0,f1); - f0 = _mm256_add_epi32(f0,f2); - - f1 = _mm256_srli_epi32(f0,3); - f0 = _mm256_add_epi32(f0,mask6DB); - f0 = _mm256_sub_epi32(f0,f1); - - f1 = _mm256_slli_epi32(f0,10); - f2 = _mm256_srli_epi32(f0,12); - f3 = _mm256_srli_epi32(f0, 2); - f0 = _mm256_and_si256(f0,mask07); - f1 = _mm256_and_si256(f1,mask70); - f2 = _mm256_and_si256(f2,mask07); - f3 = _mm256_and_si256(f3,mask70); - f0 = _mm256_add_epi16(f0,f1); - f1 = _mm256_add_epi16(f2,f3); - f0 = _mm256_sub_epi16(f0,mask3); - f1 = _mm256_sub_epi16(f1,mask3); - - f2 = _mm256_unpacklo_epi32(f0,f1); - f3 = _mm256_unpackhi_epi32(f0,f1); - - f0 = _mm256_permute2x128_si256(f2,f3,0x20); - f1 = _mm256_permute2x128_si256(f2,f3,0x31); - - _mm256_store_si256(&r->vec[2*i+0], f0); - _mm256_store_si256(&r->vec[2*i+1], f1); - } -} -#endif - -/* buf 32 bytes longer for cbd3 */ -void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1]) -{ -#if KYBER_ETA1 == 2 - cbd2(r, buf); -#elif KYBER_ETA1 == 3 - cbd3(r, (uint8_t *)buf); -#else -#error "This implementation requires eta1 in {2,3}" -#endif -} - -void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128]) -{ -#if KYBER_ETA2 == 2 - cbd2(r, buf); -#else -#error "This implementation requires eta2 = 2" -#endif -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/cbd.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/cbd.h deleted file mode 100644 index 05788e06b4..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/cbd.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef CBD_H -#define CBD_H - -#include -#include -#include "params.h" -#include "poly.h" - -#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1) -void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1]); - -#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2) -void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128]); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/consts.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/consts.c deleted file mode 100644 index 84e596893d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/consts.c +++ /dev/null @@ -1,121 +0,0 @@ -#include "align.h" -#include "params.h" -#include "consts.h" - -#define Q KYBER_Q -#define MONT -1044 // 2^16 mod q -#define QINV -3327 // q^-1 mod 2^16 -#define V 20159 // floor(2^26/q + 0.5) -#define FHI 1441 // mont^2/128 -#define FLO -10079 // qinv*FHI -#define MONTSQHI 1353 // mont^2 -#define MONTSQLO 20553 // qinv*MONTSQHI -#define MASK 4095 -#define SHIFT 32 - -const qdata_t qdata = {{ -#define _16XQ 0 - Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, - -#define _16XQINV 16 - QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, - QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, - -#define _16XV 32 - V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, - -#define _16XFLO 48 - FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, - FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, - -#define _16XFHI 64 - FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, - FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, - -#define _16XMONTSQLO 80 - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - -#define _16XMONTSQHI 96 - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - -#define _16XMASK 112 - MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, - MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, - -#define _REVIDXB 128 - 3854, 3340, 2826, 2312, 1798, 1284, 770, 256, - 3854, 3340, 2826, 2312, 1798, 1284, 770, 256, - -#define _REVIDXD 144 - 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0, 0, - -#define _ZETAS_EXP 160 - 31498, 31498, 31498, 31498, -758, -758, -758, -758, - 5237, 5237, 5237, 5237, 1397, 1397, 1397, 1397, - 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745, - 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745, - -359, -359, -359, -359, -359, -359, -359, -359, - -359, -359, -359, -359, -359, -359, -359, -359, - 13525, 13525, 13525, 13525, 13525, 13525, 13525, 13525, - -12402, -12402, -12402, -12402, -12402, -12402, -12402, -12402, - 1493, 1493, 1493, 1493, 1493, 1493, 1493, 1493, - 1422, 1422, 1422, 1422, 1422, 1422, 1422, 1422, - -20907, -20907, -20907, -20907, 27758, 27758, 27758, 27758, - -3799, -3799, -3799, -3799, -15690, -15690, -15690, -15690, - -171, -171, -171, -171, 622, 622, 622, 622, - 1577, 1577, 1577, 1577, 182, 182, 182, 182, - -5827, -5827, 17363, 17363, -26360, -26360, -29057, -29057, - 5571, 5571, -1102, -1102, 21438, 21438, -26242, -26242, - 573, 573, -1325, -1325, 264, 264, 383, 383, - -829, -829, 1458, 1458, -1602, -1602, -130, -130, - -5689, -6516, 1496, 30967, -23565, 20179, 20710, 25080, - -12796, 26616, 16064, -12442, 9134, -650, -25986, 27837, - 1223, 652, -552, 1015, -1293, 1491, -282, -1544, - 516, -8, -320, -666, -1618, -1162, 126, 1469, - -335, -11477, -32227, 20494, -27738, 945, -14883, 6182, - 32010, 10631, 29175, -28762, -18486, 17560, -14430, -5276, - -1103, 555, -1251, 1550, 422, 177, -291, 1574, - -246, 1159, -777, -602, -1590, -872, 418, -156, - 11182, 13387, -14233, -21655, 13131, -4587, 23092, 5493, - -32502, 30317, -18741, 12639, 20100, 18525, 19529, -12619, - 430, 843, 871, 105, 587, -235, -460, 1653, - 778, -147, 1483, 1119, 644, 349, 329, -75, - 787, 787, 787, 787, 787, 787, 787, 787, - 787, 787, 787, 787, 787, 787, 787, 787, - -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517, - -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517, - 28191, 28191, 28191, 28191, 28191, 28191, 28191, 28191, - -16694, -16694, -16694, -16694, -16694, -16694, -16694, -16694, - 287, 287, 287, 287, 287, 287, 287, 287, - 202, 202, 202, 202, 202, 202, 202, 202, - 10690, 10690, 10690, 10690, 1358, 1358, 1358, 1358, - -11202, -11202, -11202, -11202, 31164, 31164, 31164, 31164, - 962, 962, 962, 962, -1202, -1202, -1202, -1202, - -1474, -1474, -1474, -1474, 1468, 1468, 1468, 1468, - -28073, -28073, 24313, 24313, -10532, -10532, 8800, 8800, - 18426, 18426, 8859, 8859, 26675, 26675, -16163, -16163, - -681, -681, 1017, 1017, 732, 732, 608, 608, - -1542, -1542, 411, 411, -205, -205, -1571, -1571, - 19883, -28250, -15887, -8898, -28309, 9075, -30199, 18249, - 13426, 14017, -29156, -12757, 16832, 4311, -24155, -17915, - -853, -90, -271, 830, 107, -1421, -247, -951, - -398, 961, -1508, -725, 448, -1065, 677, -1275, - -31183, 25435, -7382, 24391, -20927, 10946, 24214, 16989, - 10335, -7934, -22502, 10906, 31636, 28644, 23998, -17422, - 817, 603, 1322, -1465, -1215, 1218, -874, -1187, - -1185, -1278, -1510, -870, -108, 996, 958, 1522, - 20297, 2146, 15355, -32384, -6280, -14903, -11044, 14469, - -21498, -20198, 23210, -17442, -23860, -20257, 7756, 23132, - 1097, 610, -1285, 384, -136, -1335, 220, -1659, - -1530, 794, -854, 478, -308, 991, -1460, 1628, - -#define _16XSHIFT 624 - SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, - SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT -}}; diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/consts.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/consts.h deleted file mode 100644 index f95899cd8e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/consts.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef CONSTS_H -#define CONSTS_H - -#include "params.h" - -#define _16XQ 0 -#define _16XQINV 16 -#define _16XV 32 -#define _16XFLO 48 -#define _16XFHI 64 -#define _16XMONTSQLO 80 -#define _16XMONTSQHI 96 -#define _16XMASK 112 -#define _REVIDXB 128 -#define _REVIDXD 144 -#define _ZETAS_EXP 160 -#define _16XSHIFT 624 - -/* The C ABI on MacOS exports all symbols with a leading - * underscore. This means that any symbols we refer to from - * C files (functions) can't be found, and all symbols we - * refer to from ASM also can't be found. - * - * This define helps us get around this - */ -#ifdef __ASSEMBLER__ -#if defined(__WIN32__) || defined(__APPLE__) -#define decorate(s) _##s -#define cdecl2(s) decorate(s) -#define cdecl(s) cdecl2(KYBER_NAMESPACE(##s)) -#else -#define cdecl(s) KYBER_NAMESPACE(##s) -#endif -#endif - -#ifndef __ASSEMBLER__ -#include "align.h" -typedef ALIGNED_INT16(640) qdata_t; -#define qdata KYBER_NAMESPACE(qdata) -extern const qdata_t qdata; -#endif - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/fq.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/fq.S deleted file mode 100644 index 3bb1ebd3d8..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/fq.S +++ /dev/null @@ -1,88 +0,0 @@ -#include "consts.h" -.include "fq.inc" - -.text -reduce128_avx: -#load -vmovdqa (%rdi),%ymm2 -vmovdqa 32(%rdi),%ymm3 -vmovdqa 64(%rdi),%ymm4 -vmovdqa 96(%rdi),%ymm5 -vmovdqa 128(%rdi),%ymm6 -vmovdqa 160(%rdi),%ymm7 -vmovdqa 192(%rdi),%ymm8 -vmovdqa 224(%rdi),%ymm9 - -red16 2 -red16 3 -red16 4 -red16 5 -red16 6 -red16 7 -red16 8 -red16 9 - -#store -vmovdqa %ymm2,(%rdi) -vmovdqa %ymm3,32(%rdi) -vmovdqa %ymm4,64(%rdi) -vmovdqa %ymm5,96(%rdi) -vmovdqa %ymm6,128(%rdi) -vmovdqa %ymm7,160(%rdi) -vmovdqa %ymm8,192(%rdi) -vmovdqa %ymm9,224(%rdi) - -ret - -.global cdecl(reduce_avx) -cdecl(reduce_avx): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -vmovdqa _16XV*2(%rsi),%ymm1 -call reduce128_avx -add $256,%rdi -call reduce128_avx -ret - -tomont128_avx: -#load -vmovdqa (%rdi),%ymm3 -vmovdqa 32(%rdi),%ymm4 -vmovdqa 64(%rdi),%ymm5 -vmovdqa 96(%rdi),%ymm6 -vmovdqa 128(%rdi),%ymm7 -vmovdqa 160(%rdi),%ymm8 -vmovdqa 192(%rdi),%ymm9 -vmovdqa 224(%rdi),%ymm10 - -fqmulprecomp 1,2,3,11 -fqmulprecomp 1,2,4,12 -fqmulprecomp 1,2,5,13 -fqmulprecomp 1,2,6,14 -fqmulprecomp 1,2,7,15 -fqmulprecomp 1,2,8,11 -fqmulprecomp 1,2,9,12 -fqmulprecomp 1,2,10,13 - -#store -vmovdqa %ymm3,(%rdi) -vmovdqa %ymm4,32(%rdi) -vmovdqa %ymm5,64(%rdi) -vmovdqa %ymm6,96(%rdi) -vmovdqa %ymm7,128(%rdi) -vmovdqa %ymm8,160(%rdi) -vmovdqa %ymm9,192(%rdi) -vmovdqa %ymm10,224(%rdi) - -ret - -.global cdecl(tomont_avx) -cdecl(tomont_avx): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -vmovdqa _16XMONTSQLO*2(%rsi),%ymm1 -vmovdqa _16XMONTSQHI*2(%rsi),%ymm2 -call tomont128_avx -add $256,%rdi -call tomont128_avx -ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/fq.inc b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/fq.inc deleted file mode 100644 index 4b7afc3118..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/fq.inc +++ /dev/null @@ -1,30 +0,0 @@ -.macro red16 r,rs=0,x=12 -vpmulhw %ymm1,%ymm\r,%ymm\x -.if \rs -vpmulhrsw %ymm\rs,%ymm\x,%ymm\x -.else -vpsraw $10,%ymm\x,%ymm\x -.endif -vpmullw %ymm0,%ymm\x,%ymm\x -vpsubw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro csubq r,x=12 -vpsubw %ymm0,%ymm\r,%ymm\r -vpsraw $15,%ymm\r,%ymm\x -vpand %ymm0,%ymm\x,%ymm\x -vpaddw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro caddq r,x=12 -vpsraw $15,%ymm\r,%ymm\x -vpand %ymm0,%ymm\x,%ymm\x -vpaddw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro fqmulprecomp al,ah,b,x=12 -vpmullw %ymm\al,%ymm\b,%ymm\x -vpmulhw %ymm\ah,%ymm\b,%ymm\b -vpmulhw %ymm0,%ymm\x,%ymm\x -vpsubw %ymm\x,%ymm\b,%ymm\b -.endm diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/indcpa.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/indcpa.c deleted file mode 100644 index c4b2b3a89f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/indcpa.c +++ /dev/null @@ -1,568 +0,0 @@ -#include -#include -#include -#include -#include "align.h" -#include "params.h" -#include "indcpa.h" -#include "polyvec.h" -#include "poly.h" -#include "ntt.h" -#include "cbd.h" -#include "rejsample.h" -#include "symmetric.h" -#include "randombytes.h" - -/************************************************* -* Name: pack_pk -* -* Description: Serialize the public key as concatenation of the -* serialized vector of polynomials pk and the -* public seed used to generate the matrix A. -* The polynomial coefficients in pk are assumed to -* lie in the invertal [0,q], i.e. pk must be reduced -* by polyvec_reduce(). -* -* Arguments: uint8_t *r: pointer to the output serialized public key -* polyvec *pk: pointer to the input public-key polyvec -* const uint8_t *seed: pointer to the input public seed -**************************************************/ -static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES], - polyvec *pk, - const uint8_t seed[KYBER_SYMBYTES]) -{ - polyvec_tobytes(r, pk); - memcpy(r+KYBER_POLYVECBYTES, seed, KYBER_SYMBYTES); -} - -/************************************************* -* Name: unpack_pk -* -* Description: De-serialize public key from a byte array; -* approximate inverse of pack_pk -* -* Arguments: - polyvec *pk: pointer to output public-key polynomial vector -* - uint8_t *seed: pointer to output seed to generate matrix A -* - const uint8_t *packedpk: pointer to input serialized public key -**************************************************/ -static void unpack_pk(polyvec *pk, - uint8_t seed[KYBER_SYMBYTES], - const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES]) -{ - polyvec_frombytes(pk, packedpk); - memcpy(seed, packedpk+KYBER_POLYVECBYTES, KYBER_SYMBYTES); -} - -/************************************************* -* Name: pack_sk -* -* Description: Serialize the secret key. -* The polynomial coefficients in sk are assumed to -* lie in the invertal [0,q], i.e. sk must be reduced -* by polyvec_reduce(). -* -* Arguments: - uint8_t *r: pointer to output serialized secret key -* - polyvec *sk: pointer to input vector of polynomials (secret key) -**************************************************/ -static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk) -{ - polyvec_tobytes(r, sk); -} - -/************************************************* -* Name: unpack_sk -* -* Description: De-serialize the secret key; inverse of pack_sk -* -* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) -* - const uint8_t *packedsk: pointer to input serialized secret key -**************************************************/ -static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES]) -{ - polyvec_frombytes(sk, packedsk); -} - -/************************************************* -* Name: pack_ciphertext -* -* Description: Serialize the ciphertext as concatenation of the -* compressed and serialized vector of polynomials b -* and the compressed and serialized polynomial v. -* The polynomial coefficients in b and v are assumed to -* lie in the invertal [0,q], i.e. b and v must be reduced -* by polyvec_reduce() and poly_reduce(), respectively. -* -* Arguments: uint8_t *r: pointer to the output serialized ciphertext -* poly *pk: pointer to the input vector of polynomials b -* poly *v: pointer to the input polynomial v -**************************************************/ -static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v) -{ - polyvec_compress(r, b); - poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v); -} - -/************************************************* -* Name: unpack_ciphertext -* -* Description: De-serialize and decompress ciphertext from a byte array; -* approximate inverse of pack_ciphertext -* -* Arguments: - polyvec *b: pointer to the output vector of polynomials b -* - poly *v: pointer to the output polynomial v -* - const uint8_t *c: pointer to the input serialized ciphertext -**************************************************/ -static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES]) -{ - polyvec_decompress(b, c); - poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES); -} - -/************************************************* -* Name: rej_uniform -* -* Description: Run rejection sampling on uniform random bytes to generate -* uniform random integers mod q -* -* Arguments: - int16_t *r: pointer to output array -* - unsigned int len: requested number of 16-bit integers (uniform mod q) -* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes) -* - unsigned int buflen: length of input buffer in bytes -* -* Returns number of sampled 16-bit integers (at most len) -**************************************************/ -static unsigned int rej_uniform(int16_t *r, - unsigned int len, - const uint8_t *buf, - unsigned int buflen) -{ - unsigned int ctr, pos; - uint16_t val0, val1; - - ctr = pos = 0; - while(ctr < len && pos <= buflen - 3) { // buflen is always at least 3 - val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; - val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF; - pos += 3; - - if(val0 < KYBER_Q) - r[ctr++] = val0; - if(ctr < len && val1 < KYBER_Q) - r[ctr++] = val1; - } - - return ctr; -} - -#define gen_a(A,B) gen_matrix(A,B,0) -#define gen_at(A,B) gen_matrix(A,B,1) - -/************************************************* -* Name: gen_matrix -* -* Description: Deterministically generate matrix A (or the transpose of A) -* from a seed. Entries of the matrix are polynomials that look -* uniformly random. Performs rejection sampling on output of -* a XOF -* -* Arguments: - polyvec *a: pointer to ouptput matrix A -* - const uint8_t *seed: pointer to input seed -* - int transposed: boolean deciding whether A or A^T is generated -**************************************************/ -#if KYBER_K == 2 -void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) -{ - unsigned int ctr0, ctr1, ctr2, ctr3; - ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; - __m256i f; - shake128x4incctx state; - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - if(transposed) { - buf[0].coeffs[32] = 0; - buf[0].coeffs[33] = 0; - buf[1].coeffs[32] = 0; - buf[1].coeffs[33] = 1; - buf[2].coeffs[32] = 1; - buf[2].coeffs[33] = 0; - buf[3].coeffs[32] = 1; - buf[3].coeffs[33] = 1; - } - else { - buf[0].coeffs[32] = 0; - buf[0].coeffs[33] = 0; - buf[1].coeffs[32] = 1; - buf[1].coeffs[33] = 0; - buf[2].coeffs[32] = 0; - buf[2].coeffs[33] = 1; - buf[3].coeffs[32] = 1; - buf[3].coeffs[33] = 1; - } - - shake128x4_inc_init(&state); - shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); - - ctr0 = rej_uniform_avx(a[0].vec[0].coeffs, buf[0].coeffs); - ctr1 = rej_uniform_avx(a[0].vec[1].coeffs, buf[1].coeffs); - ctr2 = rej_uniform_avx(a[1].vec[0].coeffs, buf[2].coeffs); - ctr3 = rej_uniform_avx(a[1].vec[1].coeffs, buf[3].coeffs); - - while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); - - ctr0 += rej_uniform(a[0].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); - ctr1 += rej_uniform(a[0].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); - ctr2 += rej_uniform(a[1].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); - ctr3 += rej_uniform(a[1].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); - } - - poly_nttunpack(&a[0].vec[0]); - poly_nttunpack(&a[0].vec[1]); - poly_nttunpack(&a[1].vec[0]); - poly_nttunpack(&a[1].vec[1]); - shake128x4_inc_ctx_release(&state); -} -#elif KYBER_K == 3 -void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) -{ - unsigned int ctr0, ctr1, ctr2, ctr3; - ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; - __m256i f; - shake128x4incctx state; - shake128incctx state1x; - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - if(transposed) { - buf[0].coeffs[32] = 0; - buf[0].coeffs[33] = 0; - buf[1].coeffs[32] = 0; - buf[1].coeffs[33] = 1; - buf[2].coeffs[32] = 0; - buf[2].coeffs[33] = 2; - buf[3].coeffs[32] = 1; - buf[3].coeffs[33] = 0; - } - else { - buf[0].coeffs[32] = 0; - buf[0].coeffs[33] = 0; - buf[1].coeffs[32] = 1; - buf[1].coeffs[33] = 0; - buf[2].coeffs[32] = 2; - buf[2].coeffs[33] = 0; - buf[3].coeffs[32] = 0; - buf[3].coeffs[33] = 1; - } - - shake128x4_inc_init(&state); - shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); - - ctr0 = rej_uniform_avx(a[0].vec[0].coeffs, buf[0].coeffs); - ctr1 = rej_uniform_avx(a[0].vec[1].coeffs, buf[1].coeffs); - ctr2 = rej_uniform_avx(a[0].vec[2].coeffs, buf[2].coeffs); - ctr3 = rej_uniform_avx(a[1].vec[0].coeffs, buf[3].coeffs); - - while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); - - ctr0 += rej_uniform(a[0].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); - ctr1 += rej_uniform(a[0].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); - ctr2 += rej_uniform(a[0].vec[2].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); - ctr3 += rej_uniform(a[1].vec[0].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); - } - - poly_nttunpack(&a[0].vec[0]); - poly_nttunpack(&a[0].vec[1]); - poly_nttunpack(&a[0].vec[2]); - poly_nttunpack(&a[1].vec[0]); - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - if(transposed) { - buf[0].coeffs[32] = 1; - buf[0].coeffs[33] = 1; - buf[1].coeffs[32] = 1; - buf[1].coeffs[33] = 2; - buf[2].coeffs[32] = 2; - buf[2].coeffs[33] = 0; - buf[3].coeffs[32] = 2; - buf[3].coeffs[33] = 1; - } - else { - buf[0].coeffs[32] = 1; - buf[0].coeffs[33] = 1; - buf[1].coeffs[32] = 2; - buf[1].coeffs[33] = 1; - buf[2].coeffs[32] = 0; - buf[2].coeffs[33] = 2; - buf[3].coeffs[32] = 1; - buf[3].coeffs[33] = 2; - } - - shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); - - ctr0 = rej_uniform_avx(a[1].vec[1].coeffs, buf[0].coeffs); - ctr1 = rej_uniform_avx(a[1].vec[2].coeffs, buf[1].coeffs); - ctr2 = rej_uniform_avx(a[2].vec[0].coeffs, buf[2].coeffs); - ctr3 = rej_uniform_avx(a[2].vec[1].coeffs, buf[3].coeffs); - - while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); - - ctr0 += rej_uniform(a[1].vec[1].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); - ctr1 += rej_uniform(a[1].vec[2].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); - ctr2 += rej_uniform(a[2].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); - ctr3 += rej_uniform(a[2].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); - } - shake128x4_inc_ctx_release(&state); - - poly_nttunpack(&a[1].vec[1]); - poly_nttunpack(&a[1].vec[2]); - poly_nttunpack(&a[2].vec[0]); - poly_nttunpack(&a[2].vec[1]); - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - buf[0].coeffs[32] = 2; - buf[0].coeffs[33] = 2; - - shake128_inc_init(&state1x); - shake128_absorb_once(&state1x, buf[0].coeffs, 34); - shake128_squeezeblocks(buf[0].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state1x); - ctr0 = rej_uniform_avx(a[2].vec[2].coeffs, buf[0].coeffs); - while(ctr0 < KYBER_N) { - shake128_squeezeblocks(buf[0].coeffs, 1, &state1x); - ctr0 += rej_uniform(a[2].vec[2].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); - } - shake128_inc_ctx_release(&state1x); - - poly_nttunpack(&a[2].vec[2]); -} -#elif KYBER_K == 4 -void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) -{ - unsigned int i, ctr0, ctr1, ctr2, ctr3; - ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; - __m256i f; - shake128x4incctx state; - shake128x4_inc_init(&state); - - for(i=0;i<4;i++) { - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - if(transposed) { - buf[0].coeffs[32] = i; - buf[0].coeffs[33] = 0; - buf[1].coeffs[32] = i; - buf[1].coeffs[33] = 1; - buf[2].coeffs[32] = i; - buf[2].coeffs[33] = 2; - buf[3].coeffs[32] = i; - buf[3].coeffs[33] = 3; - } - else { - buf[0].coeffs[32] = 0; - buf[0].coeffs[33] = i; - buf[1].coeffs[32] = 1; - buf[1].coeffs[33] = i; - buf[2].coeffs[32] = 2; - buf[2].coeffs[33] = i; - buf[3].coeffs[32] = 3; - buf[3].coeffs[33] = i; - } - - shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); - - ctr0 = rej_uniform_avx(a[i].vec[0].coeffs, buf[0].coeffs); - ctr1 = rej_uniform_avx(a[i].vec[1].coeffs, buf[1].coeffs); - ctr2 = rej_uniform_avx(a[i].vec[2].coeffs, buf[2].coeffs); - ctr3 = rej_uniform_avx(a[i].vec[3].coeffs, buf[3].coeffs); - - while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); - - ctr0 += rej_uniform(a[i].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); - ctr1 += rej_uniform(a[i].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); - ctr2 += rej_uniform(a[i].vec[2].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); - ctr3 += rej_uniform(a[i].vec[3].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); - } - - poly_nttunpack(&a[i].vec[0]); - poly_nttunpack(&a[i].vec[1]); - poly_nttunpack(&a[i].vec[2]); - poly_nttunpack(&a[i].vec[3]); - } - shake128x4_inc_ctx_release(&state); -} -#endif - -/************************************************* -* Name: indcpa_keypair_derand -* -* Description: Generates public and private key for the CPA-secure -* public-key encryption scheme underlying Kyber -* -* Arguments: - uint8_t *pk: pointer to output public key -* (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key -* (of length KYBER_INDCPA_SECRETKEYBYTES bytes) -* - const uint8_t *coins: pointer to input randomness -* (of length KYBER_SYMBYTES bytes) -**************************************************/ -void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], - uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], - const uint8_t coins[KYBER_SYMBYTES]) -{ - unsigned int i; - uint8_t buf[2*KYBER_SYMBYTES]; - const uint8_t *publicseed = buf; - const uint8_t *noiseseed = buf + KYBER_SYMBYTES; - polyvec a[KYBER_K], e, pkpv, skpv; - - memcpy(buf, coins, KYBER_SYMBYTES); - buf[KYBER_SYMBYTES] = KYBER_K; - hash_g(buf, buf, KYBER_SYMBYTES+1); - - gen_a(a, publicseed); - -#if KYBER_K == 2 - poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, e.vec+0, e.vec+1, noiseseed, 0, 1, 2, 3); -#elif KYBER_K == 3 - poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, e.vec+0, noiseseed, 0, 1, 2, 3); - poly_getnoise_eta1_4x(e.vec+1, e.vec+2, pkpv.vec+0, pkpv.vec+1, noiseseed, 4, 5, 6, 7); -#elif KYBER_K == 4 - poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, skpv.vec+3, noiseseed, 0, 1, 2, 3); - poly_getnoise_eta1_4x(e.vec+0, e.vec+1, e.vec+2, e.vec+3, noiseseed, 4, 5, 6, 7); -#endif - - polyvec_ntt(&skpv); - polyvec_reduce(&skpv); - polyvec_ntt(&e); - - // matrix-vector multiplication - for(i=0;i -#include "params.h" -#include "polyvec.h" - -#define gen_matrix KYBER_NAMESPACE(gen_matrix) -void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed); - -#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand) -void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], - uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], - const uint8_t coins[KYBER_SYMBYTES]); - -#define indcpa_enc KYBER_NAMESPACE(indcpa_enc) -void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], - const uint8_t m[KYBER_INDCPA_MSGBYTES], - const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], - const uint8_t coins[KYBER_SYMBYTES]); - -#define indcpa_dec KYBER_NAMESPACE(indcpa_dec) -void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES], - const uint8_t c[KYBER_INDCPA_BYTES], - const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/invntt.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/invntt.S deleted file mode 100644 index 76d4189996..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/invntt.S +++ /dev/null @@ -1,193 +0,0 @@ -#include "consts.h" -.include "shuffle.inc" -.include "fq.inc" - -.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,zl0=2,zl1=2,zh0=3,zh1=3 -vpsubw %ymm\rl0,%ymm\rh0,%ymm12 -vpaddw %ymm\rh0,%ymm\rl0,%ymm\rl0 -vpsubw %ymm\rl1,%ymm\rh1,%ymm13 - -vpmullw %ymm\zl0,%ymm12,%ymm\rh0 -vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl1 -vpsubw %ymm\rl2,%ymm\rh2,%ymm14 - -vpmullw %ymm\zl0,%ymm13,%ymm\rh1 -vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl2 -vpsubw %ymm\rl3,%ymm\rh3,%ymm15 - -vpmullw %ymm\zl1,%ymm14,%ymm\rh2 -vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl3 -vpmullw %ymm\zl1,%ymm15,%ymm\rh3 - -vpmulhw %ymm\zh0,%ymm12,%ymm12 -vpmulhw %ymm\zh0,%ymm13,%ymm13 - -vpmulhw %ymm\zh1,%ymm14,%ymm14 -vpmulhw %ymm\zh1,%ymm15,%ymm15 - -vpmulhw %ymm0,%ymm\rh0,%ymm\rh0 - -vpmulhw %ymm0,%ymm\rh1,%ymm\rh1 - -vpmulhw %ymm0,%ymm\rh2,%ymm\rh2 -vpmulhw %ymm0,%ymm\rh3,%ymm\rh3 - -# - -# - -vpsubw %ymm\rh0,%ymm12,%ymm\rh0 - -vpsubw %ymm\rh1,%ymm13,%ymm\rh1 - -vpsubw %ymm\rh2,%ymm14,%ymm\rh2 -vpsubw %ymm\rh3,%ymm15,%ymm\rh3 -.endm - -.macro intt_levels0t5 off -/* level 0 */ -vmovdqa _16XFLO*2(%rsi),%ymm2 -vmovdqa _16XFHI*2(%rsi),%ymm3 - -vmovdqa (128*\off+ 0)*2(%rdi),%ymm4 -vmovdqa (128*\off+ 32)*2(%rdi),%ymm6 -vmovdqa (128*\off+ 16)*2(%rdi),%ymm5 -vmovdqa (128*\off+ 48)*2(%rdi),%ymm7 - -fqmulprecomp 2,3,4 -fqmulprecomp 2,3,6 -fqmulprecomp 2,3,5 -fqmulprecomp 2,3,7 - -vmovdqa (128*\off+ 64)*2(%rdi),%ymm8 -vmovdqa (128*\off+ 96)*2(%rdi),%ymm10 -vmovdqa (128*\off+ 80)*2(%rdi),%ymm9 -vmovdqa (128*\off+112)*2(%rdi),%ymm11 - -fqmulprecomp 2,3,8 -fqmulprecomp 2,3,10 -fqmulprecomp 2,3,9 -fqmulprecomp 2,3,11 - -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+208)*2(%rsi),%ymm15 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+176)*2(%rsi),%ymm1 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+224)*2(%rsi),%ymm2 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+192)*2(%rsi),%ymm3 -vmovdqa _REVIDXB*2(%rsi),%ymm12 -vpshufb %ymm12,%ymm15,%ymm15 -vpshufb %ymm12,%ymm1,%ymm1 -vpshufb %ymm12,%ymm2,%ymm2 -vpshufb %ymm12,%ymm3,%ymm3 - -butterfly 4,5,8,9,6,7,10,11,15,1,2,3 - -/* level 1 */ -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+144)*2(%rsi),%ymm2 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+160)*2(%rsi),%ymm3 -vmovdqa _REVIDXB*2(%rsi),%ymm1 -vpshufb %ymm1,%ymm2,%ymm2 -vpshufb %ymm1,%ymm3,%ymm3 - -butterfly 4,5,6,7,8,9,10,11,2,2,3,3 - -shuffle1 4,5,3,5 -shuffle1 6,7,4,7 -shuffle1 8,9,6,9 -shuffle1 10,11,8,11 - -/* level 2 */ -vmovdqa _REVIDXD*2(%rsi),%ymm12 -vpermd (_ZETAS_EXP+(1-\off)*224+112)*2(%rsi),%ymm12,%ymm2 -vpermd (_ZETAS_EXP+(1-\off)*224+128)*2(%rsi),%ymm12,%ymm10 - -butterfly 3,4,6,8,5,7,9,11,2,2,10,10 - -vmovdqa _16XV*2(%rsi),%ymm1 -red16 3 - -shuffle2 3,4,10,4 -shuffle2 6,8,3,8 -shuffle2 5,7,6,7 -shuffle2 9,11,5,11 - -/* level 3 */ -vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+80)*2(%rsi),%ymm2 -vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+96)*2(%rsi),%ymm9 - -butterfly 10,3,6,5,4,8,7,11,2,2,9,9 - -shuffle4 10,3,9,3 -shuffle4 6,5,10,5 -shuffle4 4,8,6,8 -shuffle4 7,11,4,11 - -/* level 4 */ -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+48)*2(%rsi),%ymm2 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+64)*2(%rsi),%ymm7 - -butterfly 9,10,6,4,3,5,8,11,2,2,7,7 - -red16 9 - -shuffle8 9,10,7,10 -shuffle8 6,4,9,4 -shuffle8 3,5,6,5 -shuffle8 8,11,3,11 - -/* level 5 */ -vmovdqa (_ZETAS_EXP+(1-\off)*224+16)*2(%rsi),%ymm2 -vmovdqa (_ZETAS_EXP+(1-\off)*224+32)*2(%rsi),%ymm8 - -butterfly 7,9,6,3,10,4,5,11,2,2,8,8 - -vmovdqa %ymm7,(128*\off+ 0)*2(%rdi) -vmovdqa %ymm9,(128*\off+ 16)*2(%rdi) -vmovdqa %ymm6,(128*\off+ 32)*2(%rdi) -vmovdqa %ymm3,(128*\off+ 48)*2(%rdi) -vmovdqa %ymm10,(128*\off+ 64)*2(%rdi) -vmovdqa %ymm4,(128*\off+ 80)*2(%rdi) -vmovdqa %ymm5,(128*\off+ 96)*2(%rdi) -vmovdqa %ymm11,(128*\off+112)*2(%rdi) -.endm - -.macro intt_level6 off -/* level 6 */ -vmovdqa (64*\off+ 0)*2(%rdi),%ymm4 -vmovdqa (64*\off+128)*2(%rdi),%ymm8 -vmovdqa (64*\off+ 16)*2(%rdi),%ymm5 -vmovdqa (64*\off+144)*2(%rdi),%ymm9 -vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm2 - -vmovdqa (64*\off+ 32)*2(%rdi),%ymm6 -vmovdqa (64*\off+160)*2(%rdi),%ymm10 -vmovdqa (64*\off+ 48)*2(%rdi),%ymm7 -vmovdqa (64*\off+176)*2(%rdi),%ymm11 -vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm3 - -butterfly 4,5,6,7,8,9,10,11 - -.if \off == 0 -red16 4 -.endif - -vmovdqa %ymm4,(64*\off+ 0)*2(%rdi) -vmovdqa %ymm5,(64*\off+ 16)*2(%rdi) -vmovdqa %ymm6,(64*\off+ 32)*2(%rdi) -vmovdqa %ymm7,(64*\off+ 48)*2(%rdi) -vmovdqa %ymm8,(64*\off+128)*2(%rdi) -vmovdqa %ymm9,(64*\off+144)*2(%rdi) -vmovdqa %ymm10,(64*\off+160)*2(%rdi) -vmovdqa %ymm11,(64*\off+176)*2(%rdi) -.endm - -.text -.global cdecl(invntt_avx) -cdecl(invntt_avx): -vmovdqa _16XQ*2(%rsi),%ymm0 - -intt_levels0t5 0 -intt_levels0t5 1 - -intt_level6 0 -intt_level6 1 -ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/kem.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/kem.c deleted file mode 100644 index 63abc1029c..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/kem.c +++ /dev/null @@ -1,169 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "kem.h" -#include "indcpa.h" -#include "verify.h" -#include "symmetric.h" -#include "randombytes.h" -/************************************************* -* Name: crypto_kem_keypair_derand -* -* Description: Generates public and private key -* for CCA-secure Kyber key encapsulation mechanism -* -* Arguments: - uint8_t *pk: pointer to output public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* - uint8_t *coins: pointer to input randomness -* (an already allocated array filled with 2*KYBER_SYMBYTES random bytes) -** -* Returns 0 (success) -**************************************************/ -int crypto_kem_keypair_derand(uint8_t *pk, - uint8_t *sk, - const uint8_t *coins) -{ - indcpa_keypair_derand(pk, sk, coins); - memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_PUBLICKEYBYTES); - hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - /* Value z for pseudo-random output on reject */ - memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES); - return 0; -} - -/************************************************* -* Name: crypto_kem_keypair -* -* Description: Generates public and private key -* for CCA-secure Kyber key encapsulation mechanism -* -* Arguments: - uint8_t *pk: pointer to output public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int crypto_kem_keypair(uint8_t *pk, - uint8_t *sk) -{ - uint8_t coins[2*KYBER_SYMBYTES]; - randombytes(coins, 2*KYBER_SYMBYTES); - crypto_kem_keypair_derand(pk, sk, coins); - return 0; -} - -/************************************************* -* Name: crypto_kem_enc_derand -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - uint8_t *ct: pointer to output cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *pk: pointer to input public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - const uint8_t *coins: pointer to input randomness -* (an already allocated array filled with KYBER_SYMBYTES random bytes) -** -* Returns 0 (success) -**************************************************/ -int crypto_kem_enc_derand(uint8_t *ct, - uint8_t *ss, - const uint8_t *pk, - const uint8_t *coins) -{ - uint8_t buf[2*KYBER_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2*KYBER_SYMBYTES]; - - memcpy(buf, coins, KYBER_SYMBYTES); - - /* Multitarget countermeasure for coins + contributory KEM */ - hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - hash_g(kr, buf, 2*KYBER_SYMBYTES); - - /* coins are in kr+KYBER_SYMBYTES */ - indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES); - - memcpy(ss,kr,KYBER_SYMBYTES); - return 0; -} - -/************************************************* -* Name: crypto_kem_enc -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - uint8_t *ct: pointer to output cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *pk: pointer to input public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int crypto_kem_enc(uint8_t *ct, - uint8_t *ss, - const uint8_t *pk) -{ - uint8_t coins[KYBER_SYMBYTES]; - randombytes(coins, KYBER_SYMBYTES); - crypto_kem_enc_derand(ct, ss, pk, coins); - return 0; -} - -/************************************************* -* Name: crypto_kem_dec -* -* Description: Generates shared secret for given -* cipher text and private key -* -* Arguments: - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *ct: pointer to input cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - const uint8_t *sk: pointer to input private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* -* Returns 0. -* -* On failure, ss will contain a pseudo-random value. -**************************************************/ -int crypto_kem_dec(uint8_t *ss, - const uint8_t *ct, - const uint8_t *sk) -{ - int fail; - uint8_t buf[2*KYBER_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2*KYBER_SYMBYTES]; - uint8_t cmp[KYBER_CIPHERTEXTBYTES+KYBER_SYMBYTES]; - const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES; - - indcpa_dec(buf, ct, sk); - - /* Multitarget countermeasure for coins + contributory KEM */ - memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES); - hash_g(kr, buf, 2*KYBER_SYMBYTES); - - /* coins are in kr+KYBER_SYMBYTES */ - indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES); - - fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES); - - /* Compute rejection key */ - rkprf(ss,sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES,ct); - - /* Copy true key to return buffer if fail is false */ - cmov(ss,kr,KYBER_SYMBYTES,!fail); - - return 0; -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/kem.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/kem.h deleted file mode 100644 index 234f11966b..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/kem.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef KEM_H -#define KEM_H - -#include -#include "params.h" - -#define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES -#define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES -#define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES -#define CRYPTO_BYTES KYBER_SSBYTES - -#if (KYBER_K == 2) -#define CRYPTO_ALGNAME "Kyber512" -#elif (KYBER_K == 3) -#define CRYPTO_ALGNAME "Kyber768" -#elif (KYBER_K == 4) -#define CRYPTO_ALGNAME "Kyber1024" -#endif - -#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand) -int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); - -#define crypto_kem_keypair KYBER_NAMESPACE(keypair) -int crypto_kem_keypair(uint8_t *pk, uint8_t *sk); - -#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand) -int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); - -#define crypto_kem_enc KYBER_NAMESPACE(enc) -int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); - -#define crypto_kem_dec KYBER_NAMESPACE(dec) -int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/ntt.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/ntt.S deleted file mode 100644 index 0ce7b41297..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/ntt.S +++ /dev/null @@ -1,189 +0,0 @@ -#include "consts.h" -.include "shuffle.inc" - -.macro mul rh0,rh1,rh2,rh3,zl0=15,zl1=15,zh0=2,zh1=2 -vpmullw %ymm\zl0,%ymm\rh0,%ymm12 -vpmullw %ymm\zl0,%ymm\rh1,%ymm13 - -vpmullw %ymm\zl1,%ymm\rh2,%ymm14 -vpmullw %ymm\zl1,%ymm\rh3,%ymm15 - -vpmulhw %ymm\zh0,%ymm\rh0,%ymm\rh0 -vpmulhw %ymm\zh0,%ymm\rh1,%ymm\rh1 - -vpmulhw %ymm\zh1,%ymm\rh2,%ymm\rh2 -vpmulhw %ymm\zh1,%ymm\rh3,%ymm\rh3 -.endm - -.macro reduce -vpmulhw %ymm0,%ymm12,%ymm12 -vpmulhw %ymm0,%ymm13,%ymm13 - -vpmulhw %ymm0,%ymm14,%ymm14 -vpmulhw %ymm0,%ymm15,%ymm15 -.endm - -.macro update rln,rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 -vpaddw %ymm\rh0,%ymm\rl0,%ymm\rln -vpsubw %ymm\rh0,%ymm\rl0,%ymm\rh0 -vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl0 - -vpsubw %ymm\rh1,%ymm\rl1,%ymm\rh1 -vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl1 -vpsubw %ymm\rh2,%ymm\rl2,%ymm\rh2 - -vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl2 -vpsubw %ymm\rh3,%ymm\rl3,%ymm\rh3 - -vpsubw %ymm12,%ymm\rln,%ymm\rln -vpaddw %ymm12,%ymm\rh0,%ymm\rh0 -vpsubw %ymm13,%ymm\rl0,%ymm\rl0 - -vpaddw %ymm13,%ymm\rh1,%ymm\rh1 -vpsubw %ymm14,%ymm\rl1,%ymm\rl1 -vpaddw %ymm14,%ymm\rh2,%ymm\rh2 - -vpsubw %ymm15,%ymm\rl2,%ymm\rl2 -vpaddw %ymm15,%ymm\rh3,%ymm\rh3 -.endm - -.macro level0 off -vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm15 -vmovdqa (64*\off+128)*2(%rdi),%ymm8 -vmovdqa (64*\off+144)*2(%rdi),%ymm9 -vmovdqa (64*\off+160)*2(%rdi),%ymm10 -vmovdqa (64*\off+176)*2(%rdi),%ymm11 -vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm2 - -mul 8,9,10,11 - -vmovdqa (64*\off+ 0)*2(%rdi),%ymm4 -vmovdqa (64*\off+ 16)*2(%rdi),%ymm5 -vmovdqa (64*\off+ 32)*2(%rdi),%ymm6 -vmovdqa (64*\off+ 48)*2(%rdi),%ymm7 - -reduce -update 3,4,5,6,7,8,9,10,11 - -vmovdqa %ymm3,(64*\off+ 0)*2(%rdi) -vmovdqa %ymm4,(64*\off+ 16)*2(%rdi) -vmovdqa %ymm5,(64*\off+ 32)*2(%rdi) -vmovdqa %ymm6,(64*\off+ 48)*2(%rdi) -vmovdqa %ymm8,(64*\off+128)*2(%rdi) -vmovdqa %ymm9,(64*\off+144)*2(%rdi) -vmovdqa %ymm10,(64*\off+160)*2(%rdi) -vmovdqa %ymm11,(64*\off+176)*2(%rdi) -.endm - -.macro levels1t6 off -/* level 1 */ -vmovdqa (_ZETAS_EXP+224*\off+16)*2(%rsi),%ymm15 -vmovdqa (128*\off+ 64)*2(%rdi),%ymm8 -vmovdqa (128*\off+ 80)*2(%rdi),%ymm9 -vmovdqa (128*\off+ 96)*2(%rdi),%ymm10 -vmovdqa (128*\off+112)*2(%rdi),%ymm11 -vmovdqa (_ZETAS_EXP+224*\off+32)*2(%rsi),%ymm2 - -mul 8,9,10,11 - -vmovdqa (128*\off+ 0)*2(%rdi),%ymm4 -vmovdqa (128*\off+ 16)*2(%rdi),%ymm5 -vmovdqa (128*\off+ 32)*2(%rdi),%ymm6 -vmovdqa (128*\off+ 48)*2(%rdi),%ymm7 - -reduce -update 3,4,5,6,7,8,9,10,11 - -/* level 2 */ -shuffle8 5,10,7,10 -shuffle8 6,11,5,11 - -vmovdqa (_ZETAS_EXP+224*\off+48)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+64)*2(%rsi),%ymm2 - -mul 7,10,5,11 - -shuffle8 3,8,6,8 -shuffle8 4,9,3,9 - -reduce -update 4,6,8,3,9,7,10,5,11 - -/* level 3 */ -shuffle4 8,5,9,5 -shuffle4 3,11,8,11 - -vmovdqa (_ZETAS_EXP+224*\off+80)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+96)*2(%rsi),%ymm2 - -mul 9,5,8,11 - -shuffle4 4,7,3,7 -shuffle4 6,10,4,10 - -reduce -update 6,3,7,4,10,9,5,8,11 - -/* level 4 */ -shuffle2 7,8,10,8 -shuffle2 4,11,7,11 - -vmovdqa (_ZETAS_EXP+224*\off+112)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+128)*2(%rsi),%ymm2 - -mul 10,8,7,11 - -shuffle2 6,9,4,9 -shuffle2 3,5,6,5 - -reduce -update 3,4,9,6,5,10,8,7,11 - -/* level 5 */ -shuffle1 9,7,5,7 -shuffle1 6,11,9,11 - -vmovdqa (_ZETAS_EXP+224*\off+144)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+160)*2(%rsi),%ymm2 - -mul 5,7,9,11 - -shuffle1 3,10,6,10 -shuffle1 4,8,3,8 - -reduce -update 4,6,10,3,8,5,7,9,11 - -/* level 6 */ -vmovdqa (_ZETAS_EXP+224*\off+176)*2(%rsi),%ymm14 -vmovdqa (_ZETAS_EXP+224*\off+208)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+192)*2(%rsi),%ymm8 -vmovdqa (_ZETAS_EXP+224*\off+224)*2(%rsi),%ymm2 - -mul 10,3,9,11,14,15,8,2 - -reduce -update 8,4,6,5,7,10,3,9,11 - -vmovdqa %ymm8,(128*\off+ 0)*2(%rdi) -vmovdqa %ymm4,(128*\off+ 16)*2(%rdi) -vmovdqa %ymm10,(128*\off+ 32)*2(%rdi) -vmovdqa %ymm3,(128*\off+ 48)*2(%rdi) -vmovdqa %ymm6,(128*\off+ 64)*2(%rdi) -vmovdqa %ymm5,(128*\off+ 80)*2(%rdi) -vmovdqa %ymm9,(128*\off+ 96)*2(%rdi) -vmovdqa %ymm11,(128*\off+112)*2(%rdi) -.endm - -.text -.global cdecl(ntt_avx) -cdecl(ntt_avx): -vmovdqa _16XQ*2(%rsi),%ymm0 - -level0 0 -level0 1 - -levels1t6 0 -levels1t6 1 - -ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/ntt.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/ntt.h deleted file mode 100644 index a4f48e343b..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/ntt.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef NTT_H -#define NTT_H - -#include -#include - -#define ntt_avx KYBER_NAMESPACE(ntt_avx) -void ntt_avx(__m256i *r, const __m256i *qdata); -#define invntt_avx KYBER_NAMESPACE(invntt_avx) -void invntt_avx(__m256i *r, const __m256i *qdata); - -#define nttpack_avx KYBER_NAMESPACE(nttpack_avx) -void nttpack_avx(__m256i *r, const __m256i *qdata); -#define nttunpack_avx KYBER_NAMESPACE(nttunpack_avx) -void nttunpack_avx(__m256i *r, const __m256i *qdata); - -#define basemul_avx KYBER_NAMESPACE(basemul_avx) -void basemul_avx(__m256i *r, - const __m256i *a, - const __m256i *b, - const __m256i *qdata); - -#define ntttobytes_avx KYBER_NAMESPACE(ntttobytes_avx) -void ntttobytes_avx(uint8_t *r, const __m256i *a, const __m256i *qdata); -#define nttfrombytes_avx KYBER_NAMESPACE(nttfrombytes_avx) -void nttfrombytes_avx(__m256i *r, const uint8_t *a, const __m256i *qdata); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/params.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/params.h deleted file mode 100644 index ecfabce4a5..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/params.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef PARAMS_H -#define PARAMS_H - -#ifndef KYBER_K -#define KYBER_K 3 /* Change this for different security strengths */ -#endif - -//#define KYBER_90S /* Uncomment this if you want the 90S variant */ - -/* Don't change parameters below this line */ -#if (KYBER_K == 2) -#ifdef KYBER_90S -#define KYBER_NAMESPACE(s) pqcrystals_kyber512_90s_avx2_##s -#else -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_avx2_##s -#endif -#elif (KYBER_K == 3) -#ifdef KYBER_90S -#define KYBER_NAMESPACE(s) pqcrystals_kyber768_90s_avx2_##s -#else -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_avx2_##s -#endif -#elif (KYBER_K == 4) -#ifdef KYBER_90S -#define KYBER_NAMESPACE(s) pqcrystals_kyber1024_90s_avx2_##s -#else -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_avx2_##s -#endif -#else -#error "KYBER_K must be in {2,3,4}" -#endif - -#define KYBER_N 256 -#define KYBER_Q 3329 - -#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ -#define KYBER_SSBYTES 32 /* size in bytes of shared key */ - -#define KYBER_POLYBYTES 384 -#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) - -#if KYBER_K == 2 -#define KYBER_ETA1 3 -#define KYBER_POLYCOMPRESSEDBYTES 128 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -#elif KYBER_K == 3 -#define KYBER_ETA1 2 -#define KYBER_POLYCOMPRESSEDBYTES 128 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -#elif KYBER_K == 4 -#define KYBER_ETA1 2 -#define KYBER_POLYCOMPRESSEDBYTES 160 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) -#endif - -#define KYBER_ETA2 2 - -#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES) -#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) -#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) -#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) - -#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) -/* 32 bytes of additional space to save H(pk) */ -#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) -#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES) - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/poly.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/poly.c deleted file mode 100644 index 681fd6d23e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/poly.c +++ /dev/null @@ -1,519 +0,0 @@ -#include -#include -#include -#include "align.h" -#include "fips202x4.h" -#include "params.h" -#include "poly.h" -#include "ntt.h" -#include "consts.h" -#include "reduce.h" -#include "cbd.h" -#include "symmetric.h" - -/************************************************* -* Name: poly_compress -* -* Description: Compression and subsequent serialization of a polynomial. -* The coefficients of the input polynomial are assumed to -* lie in the invertal [0,q], i.e. the polynomial must be reduced -* by poly_reduce(). -* -* Arguments: - uint8_t *r: pointer to output byte array -* (of length KYBER_POLYCOMPRESSEDBYTES) -* - const poly *a: pointer to input polynomial -**************************************************/ -#if (KYBER_POLYCOMPRESSEDBYTES == 128) -void poly_compress(uint8_t r[128], const poly * restrict a) -{ - unsigned int i; - __m256i f0, f1, f2, f3; - const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); - const __m256i shift1 = _mm256_set1_epi16(1 << 9); - const __m256i mask = _mm256_set1_epi16(15); - const __m256i shift2 = _mm256_set1_epi16((16 << 8) + 1); - const __m256i permdidx = _mm256_set_epi32(7,3,6,2,5,1,4,0); - - for(i=0;ivec[4*i+0]); - f1 = _mm256_load_si256(&a->vec[4*i+1]); - f2 = _mm256_load_si256(&a->vec[4*i+2]); - f3 = _mm256_load_si256(&a->vec[4*i+3]); - f0 = _mm256_mulhi_epi16(f0,v); - f1 = _mm256_mulhi_epi16(f1,v); - f2 = _mm256_mulhi_epi16(f2,v); - f3 = _mm256_mulhi_epi16(f3,v); - f0 = _mm256_mulhrs_epi16(f0,shift1); - f1 = _mm256_mulhrs_epi16(f1,shift1); - f2 = _mm256_mulhrs_epi16(f2,shift1); - f3 = _mm256_mulhrs_epi16(f3,shift1); - f0 = _mm256_and_si256(f0,mask); - f1 = _mm256_and_si256(f1,mask); - f2 = _mm256_and_si256(f2,mask); - f3 = _mm256_and_si256(f3,mask); - f0 = _mm256_packus_epi16(f0,f1); - f2 = _mm256_packus_epi16(f2,f3); - f0 = _mm256_maddubs_epi16(f0,shift2); - f2 = _mm256_maddubs_epi16(f2,shift2); - f0 = _mm256_packus_epi16(f0,f2); - f0 = _mm256_permutevar8x32_epi32(f0,permdidx); - _mm256_storeu_si256((__m256i *)&r[32*i],f0); - } -} - -void poly_decompress(poly * restrict r, const uint8_t a[128]) -{ - unsigned int i; - __m128i t; - __m256i f; - const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); - const __m256i shufbidx = _mm256_set_epi8(7,7,7,7,6,6,6,6,5,5,5,5,4,4,4,4, - 3,3,3,3,2,2,2,2,1,1,1,1,0,0,0,0); - const __m256i mask = _mm256_set1_epi32(0x00F0000F); - const __m256i shift = _mm256_set1_epi32((128 << 16) + 2048); - - for(i=0;ivec[i],f); - } -} - -#elif (KYBER_POLYCOMPRESSEDBYTES == 160) -void poly_compress(uint8_t r[160], const poly * restrict a) -{ - unsigned int i; - __m256i f0, f1; - __m128i t0, t1; - const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); - const __m256i shift1 = _mm256_set1_epi16(1 << 10); - const __m256i mask = _mm256_set1_epi16(31); - const __m256i shift2 = _mm256_set1_epi16((32 << 8) + 1); - const __m256i shift3 = _mm256_set1_epi32((1024 << 16) + 1); - const __m256i sllvdidx = _mm256_set1_epi64x(12); - const __m256i shufbidx = _mm256_set_epi8( 8,-1,-1,-1,-1,-1, 4, 3, 2, 1, 0,-1,12,11,10, 9, - -1,12,11,10, 9, 8,-1,-1,-1,-1,-1 ,4, 3, 2, 1, 0); - - for(i=0;ivec[2*i+0]); - f1 = _mm256_load_si256(&a->vec[2*i+1]); - f0 = _mm256_mulhi_epi16(f0,v); - f1 = _mm256_mulhi_epi16(f1,v); - f0 = _mm256_mulhrs_epi16(f0,shift1); - f1 = _mm256_mulhrs_epi16(f1,shift1); - f0 = _mm256_and_si256(f0,mask); - f1 = _mm256_and_si256(f1,mask); - f0 = _mm256_packus_epi16(f0,f1); - f0 = _mm256_maddubs_epi16(f0,shift2); // a0 a1 a2 a3 b0 b1 b2 b3 a4 a5 a6 a7 b4 b5 b6 b7 - f0 = _mm256_madd_epi16(f0,shift3); // a0 a1 b0 b1 a2 a3 b2 b3 - f0 = _mm256_sllv_epi32(f0,sllvdidx); - f0 = _mm256_srlv_epi64(f0,sllvdidx); - f0 = _mm256_shuffle_epi8(f0,shufbidx); - t0 = _mm256_castsi256_si128(f0); - t1 = _mm256_extracti128_si256(f0,1); - t0 = _mm_blendv_epi8(t0,t1,_mm256_castsi256_si128(shufbidx)); - _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0); - memcpy(&r[20*i+16],&t1,4); - } -} - -void poly_decompress(poly * restrict r, const uint8_t a[160]) -{ - unsigned int i; - __m128i t; - __m256i f; - int16_t ti; - const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); - const __m256i shufbidx = _mm256_set_epi8(9,9,9,8,8,8,8,7,7,6,6,6,6,5,5,5, - 4,4,4,3,3,3,3,2,2,1,1,1,1,0,0,0); - const __m256i mask = _mm256_set_epi16(248,1984,62,496,3968,124,992,31, - 248,1984,62,496,3968,124,992,31); - const __m256i shift = _mm256_set_epi16(128,16,512,64,8,256,32,1024, - 128,16,512,64,8,256,32,1024); - - for(i=0;ivec[i],f); - } -} - -#endif - -/************************************************* -* Name: poly_tobytes -* -* Description: Serialization of a polynomial in NTT representation. -* The coefficients of the input polynomial are assumed to -* lie in the invertal [0,q], i.e. the polynomial must be reduced -* by poly_reduce(). The coefficients are orderd as output by -* poly_ntt(); the serialized output coefficients are in bitreversed -* order. -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYBYTES bytes) -* - poly *a: pointer to input polynomial -**************************************************/ -void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a) -{ - ntttobytes_avx(r, a->vec, qdata.vec); -} - -/************************************************* -* Name: poly_frombytes -* -* Description: De-serialization of a polynomial; -* inverse of poly_tobytes -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *a: pointer to input byte array -* (of KYBER_POLYBYTES bytes) -**************************************************/ -void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]) -{ - nttfrombytes_avx(r->vec, a, qdata.vec); -} - -/************************************************* -* Name: poly_frommsg -* -* Description: Convert 32-byte message to polynomial -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *msg: pointer to input message -**************************************************/ -void poly_frommsg(poly * restrict r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]) -{ -#if (KYBER_INDCPA_MSGBYTES != 32) -#error "KYBER_INDCPA_MSGBYTES must be equal to 32!" -#endif - __m256i f, g0, g1, g2, g3, h0, h1, h2, h3; - const __m256i shift = _mm256_broadcastsi128_si256(_mm_set_epi32(0,1,2,3)); - const __m256i idx = _mm256_broadcastsi128_si256(_mm_set_epi8(15,14,11,10,7,6,3,2,13,12,9,8,5,4,1,0)); - const __m256i hqs = _mm256_set1_epi16((KYBER_Q+1)/2); - -#define FROMMSG64(i) \ - g3 = _mm256_shuffle_epi32(f,0x55*i); \ - g3 = _mm256_sllv_epi32(g3,shift); \ - g3 = _mm256_shuffle_epi8(g3,idx); \ - g0 = _mm256_slli_epi16(g3,12); \ - g1 = _mm256_slli_epi16(g3,8); \ - g2 = _mm256_slli_epi16(g3,4); \ - g0 = _mm256_srai_epi16(g0,15); \ - g1 = _mm256_srai_epi16(g1,15); \ - g2 = _mm256_srai_epi16(g2,15); \ - g3 = _mm256_srai_epi16(g3,15); \ - g0 = _mm256_and_si256(g0,hqs); /* 19 18 17 16 3 2 1 0 */ \ - g1 = _mm256_and_si256(g1,hqs); /* 23 22 21 20 7 6 5 4 */ \ - g2 = _mm256_and_si256(g2,hqs); /* 27 26 25 24 11 10 9 8 */ \ - g3 = _mm256_and_si256(g3,hqs); /* 31 30 29 28 15 14 13 12 */ \ - h0 = _mm256_unpacklo_epi64(g0,g1); \ - h2 = _mm256_unpackhi_epi64(g0,g1); \ - h1 = _mm256_unpacklo_epi64(g2,g3); \ - h3 = _mm256_unpackhi_epi64(g2,g3); \ - g0 = _mm256_permute2x128_si256(h0,h1,0x20); \ - g2 = _mm256_permute2x128_si256(h0,h1,0x31); \ - g1 = _mm256_permute2x128_si256(h2,h3,0x20); \ - g3 = _mm256_permute2x128_si256(h2,h3,0x31); \ - _mm256_store_si256(&r->vec[0+2*i+0],g0); \ - _mm256_store_si256(&r->vec[0+2*i+1],g1); \ - _mm256_store_si256(&r->vec[8+2*i+0],g2); \ - _mm256_store_si256(&r->vec[8+2*i+1],g3) - - f = _mm256_loadu_si256((__m256i *)msg); - FROMMSG64(0); - FROMMSG64(1); - FROMMSG64(2); - FROMMSG64(3); -} - -/************************************************* -* Name: poly_tomsg -* -* Description: Convert polynomial to 32-byte message. -* The coefficients of the input polynomial are assumed to -* lie in the invertal [0,q], i.e. the polynomial must be reduced -* by poly_reduce(). -* -* Arguments: - uint8_t *msg: pointer to output message -* - poly *a: pointer to input polynomial -**************************************************/ -void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly * restrict a) -{ - unsigned int i; - uint32_t small; - __m256i f0, f1, g0, g1; - const __m256i hq = _mm256_set1_epi16((KYBER_Q - 1)/2); - const __m256i hhq = _mm256_set1_epi16((KYBER_Q - 1)/4); - - for(i=0;ivec[2*i+0]); - f1 = _mm256_load_si256(&a->vec[2*i+1]); - f0 = _mm256_sub_epi16(hq, f0); - f1 = _mm256_sub_epi16(hq, f1); - g0 = _mm256_srai_epi16(f0, 15); - g1 = _mm256_srai_epi16(f1, 15); - f0 = _mm256_xor_si256(f0, g0); - f1 = _mm256_xor_si256(f1, g1); - f0 = _mm256_sub_epi16(f0, hhq); - f1 = _mm256_sub_epi16(f1, hhq); - f0 = _mm256_packs_epi16(f0, f1); - f0 = _mm256_permute4x64_epi64(f0, 0xD8); - small = _mm256_movemask_epi8(f0); - memcpy(&msg[4*i], &small, 4); - } -} - -/************************************************* -* Name: poly_getnoise_eta1 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter KYBER_ETA1 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length KYBER_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) -{ - ALIGNED_UINT8(KYBER_ETA1*KYBER_N/4+32) buf; // +32 bytes as required by poly_cbd_eta1 - prf(buf.coeffs, KYBER_ETA1*KYBER_N/4, seed, nonce); - poly_cbd_eta1(r, buf.vec); -} - -/************************************************* -* Name: poly_getnoise_eta2 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter KYBER_ETA2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length KYBER_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) -{ - ALIGNED_UINT8(KYBER_ETA2*KYBER_N/4) buf; - prf(buf.coeffs, KYBER_ETA2*KYBER_N/4, seed, nonce); - poly_cbd_eta2(r, buf.vec); -} - -#ifndef KYBER_90S -#define NOISE_NBLOCKS ((KYBER_ETA1*KYBER_N/4+SHAKE256_RATE-1)/SHAKE256_RATE) -void poly_getnoise_eta1_4x(poly *r0, - poly *r1, - poly *r2, - poly *r3, - const uint8_t seed[32], - uint8_t nonce0, - uint8_t nonce1, - uint8_t nonce2, - uint8_t nonce3) -{ - ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4]; - __m256i f; - shake256x4incctx state; - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - buf[0].coeffs[32] = nonce0; - buf[1].coeffs[32] = nonce1; - buf[2].coeffs[32] = nonce2; - buf[3].coeffs[32] = nonce3; - - shake256x4_inc_init(&state); - shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33); - shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state); - shake256x4_inc_ctx_release(&state); - - poly_cbd_eta1(r0, buf[0].vec); - poly_cbd_eta1(r1, buf[1].vec); - poly_cbd_eta1(r2, buf[2].vec); - poly_cbd_eta1(r3, buf[3].vec); -} - -#if KYBER_K == 2 -void poly_getnoise_eta1122_4x(poly *r0, - poly *r1, - poly *r2, - poly *r3, - const uint8_t seed[32], - uint8_t nonce0, - uint8_t nonce1, - uint8_t nonce2, - uint8_t nonce3) -{ - ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4]; - __m256i f; - shake256x4incctx state; - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - buf[0].coeffs[32] = nonce0; - buf[1].coeffs[32] = nonce1; - buf[2].coeffs[32] = nonce2; - buf[3].coeffs[32] = nonce3; - - shake256x4_inc_init(&state); - shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33); - shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state); - shake256x4_inc_ctx_release(&state); - - poly_cbd_eta1(r0, buf[0].vec); - poly_cbd_eta1(r1, buf[1].vec); - poly_cbd_eta2(r2, buf[2].vec); - poly_cbd_eta2(r3, buf[3].vec); -} -#endif -#endif - -/************************************************* -* Name: poly_ntt -* -* Description: Computes negacyclic number-theoretic transform (NTT) of -* a polynomial in place. -* Input coefficients assumed to be in normal order, -* output coefficients are in special order that is natural -* for the vectorization. Input coefficients are assumed to be -* bounded by q in absolute value, output coefficients are bounded -* by 16118 in absolute value. -* -* Arguments: - poly *r: pointer to in/output polynomial -**************************************************/ -void poly_ntt(poly *r) -{ - ntt_avx(r->vec, qdata.vec); -} - -/************************************************* -* Name: poly_invntt_tomont -* -* Description: Computes inverse of negacyclic number-theoretic transform (NTT) -* of a polynomial in place; -* Input coefficients assumed to be in special order from vectorized -* forward ntt, output in normal order. Input coefficients can be -* arbitrary 16-bit integers, output coefficients are bounded by 14870 -* in absolute value. -* -* Arguments: - poly *a: pointer to in/output polynomial -**************************************************/ -void poly_invntt_tomont(poly *r) -{ - invntt_avx(r->vec, qdata.vec); -} - -void poly_nttunpack(poly *r) -{ - nttunpack_avx(r->vec, qdata.vec); -} - -/************************************************* -* Name: poly_basemul_montgomery -* -* Description: Multiplication of two polynomials in NTT domain. -* One of the input polynomials needs to have coefficients -* bounded by q, the other polynomial can have arbitrary -* coefficients. Output coefficients are bounded by 6656. -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_basemul_montgomery(poly *r, const poly *a, const poly *b) -{ - basemul_avx(r->vec, a->vec, b->vec, qdata.vec); -} - -/************************************************* -* Name: poly_tomont -* -* Description: Inplace conversion of all coefficients of a polynomial -* from normal domain to Montgomery domain -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_tomont(poly *r) -{ - tomont_avx(r->vec, qdata.vec); -} - -/************************************************* -* Name: poly_reduce -* -* Description: Applies Barrett reduction to all coefficients of a polynomial -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_reduce(poly *r) -{ - reduce_avx(r->vec, qdata.vec); -} - -/************************************************* -* Name: poly_add -* -* Description: Add two polynomials. No modular reduction -* is performed. -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_add(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - __m256i f0, f1; - - for(i=0;ivec[i]); - f1 = _mm256_load_si256(&b->vec[i]); - f0 = _mm256_add_epi16(f0, f1); - _mm256_store_si256(&r->vec[i], f0); - } -} - -/************************************************* -* Name: poly_sub -* -* Description: Subtract two polynomials. No modular reduction -* is performed. -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_sub(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - __m256i f0, f1; - - for(i=0;ivec[i]); - f1 = _mm256_load_si256(&b->vec[i]); - f0 = _mm256_sub_epi16(f0, f1); - _mm256_store_si256(&r->vec[i], f0); - } -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/poly.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/poly.h deleted file mode 100644 index 6a9cf71c70..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/poly.h +++ /dev/null @@ -1,77 +0,0 @@ -#ifndef POLY_H -#define POLY_H - -#include -#include "align.h" -#include "params.h" - -typedef ALIGNED_INT16(KYBER_N) poly; - -#define poly_compress KYBER_NAMESPACE(poly_compress) -void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a); -#define poly_decompress KYBER_NAMESPACE(poly_decompress) -void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]); - -#define poly_tobytes KYBER_NAMESPACE(poly_tobytes) -void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a); -#define poly_frombytes KYBER_NAMESPACE(poly_frombytes) -void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]); - -#define poly_frommsg KYBER_NAMESPACE(poly_frommsg) -void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]); -#define poly_tomsg KYBER_NAMESPACE(poly_tomsg) -void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); - -#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1) -void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); - -#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2) -void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); - -#ifndef KYBER_90S -#define poly_getnoise_eta1_4x KYBER_NAMESPACE(poly_getnoise_eta2_4x) -void poly_getnoise_eta1_4x(poly *r0, - poly *r1, - poly *r2, - poly *r3, - const uint8_t seed[32], - uint8_t nonce0, - uint8_t nonce1, - uint8_t nonce2, - uint8_t nonce3); - -#if KYBER_K == 2 -#define poly_getnoise_eta1122_4x KYBER_NAMESPACE(poly_getnoise_eta1122_4x) -void poly_getnoise_eta1122_4x(poly *r0, - poly *r1, - poly *r2, - poly *r3, - const uint8_t seed[32], - uint8_t nonce0, - uint8_t nonce1, - uint8_t nonce2, - uint8_t nonce3); -#endif -#endif - - -#define poly_ntt KYBER_NAMESPACE(poly_ntt) -void poly_ntt(poly *r); -#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont) -void poly_invntt_tomont(poly *r); -#define poly_nttunpack KYBER_NAMESPACE(poly_nttunpack) -void poly_nttunpack(poly *r); -#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery) -void poly_basemul_montgomery(poly *r, const poly *a, const poly *b); -#define poly_tomont KYBER_NAMESPACE(poly_tomont) -void poly_tomont(poly *r); - -#define poly_reduce KYBER_NAMESPACE(poly_reduce) -void poly_reduce(poly *r); - -#define poly_add KYBER_NAMESPACE(poly_add) -void poly_add(poly *r, const poly *a, const poly *b); -#define poly_sub KYBER_NAMESPACE(poly_sub) -void poly_sub(poly *r, const poly *a, const poly *b); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/polyvec.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/polyvec.c deleted file mode 100644 index a0174b7b3f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/polyvec.c +++ /dev/null @@ -1,307 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "polyvec.h" -#include "poly.h" -#include "ntt.h" -#include "consts.h" - -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) -static void poly_compress10(uint8_t r[320], const poly * restrict a) -{ - unsigned int i; - __m256i f0, f1, f2; - __m128i t0, t1; - const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); - const __m256i v8 = _mm256_slli_epi16(v,3); - const __m256i off = _mm256_set1_epi16(15); - const __m256i shift1 = _mm256_set1_epi16(1 << 12); - const __m256i mask = _mm256_set1_epi16(1023); - const __m256i shift2 = _mm256_set1_epi64x((1024LL << 48) + (1LL << 32) + (1024 << 16) + 1); - const __m256i sllvdidx = _mm256_set1_epi64x(12); - const __m256i shufbidx = _mm256_set_epi8( 8, 4, 3, 2, 1, 0,-1,-1,-1,-1,-1,-1,12,11,10, 9, - -1,-1,-1,-1,-1,-1,12,11,10, 9, 8, 4, 3, 2, 1, 0); - - for(i=0;ivec[i]); - f1 = _mm256_mullo_epi16(f0,v8); - f2 = _mm256_add_epi16(f0,off); - f0 = _mm256_slli_epi16(f0,3); - f0 = _mm256_mulhi_epi16(f0,v); - f2 = _mm256_sub_epi16(f1,f2); - f1 = _mm256_andnot_si256(f1,f2); - f1 = _mm256_srli_epi16(f1,15); - f0 = _mm256_sub_epi16(f0,f1); - f0 = _mm256_mulhrs_epi16(f0,shift1); - f0 = _mm256_and_si256(f0,mask); - f0 = _mm256_madd_epi16(f0,shift2); - f0 = _mm256_sllv_epi32(f0,sllvdidx); - f0 = _mm256_srli_epi64(f0,12); - f0 = _mm256_shuffle_epi8(f0,shufbidx); - t0 = _mm256_castsi256_si128(f0); - t1 = _mm256_extracti128_si256(f0,1); - t0 = _mm_blend_epi16(t0,t1,0xE0); - _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0); - memcpy(&r[20*i+16],&t1,4); - } -} - -static void poly_decompress10(poly * restrict r, const uint8_t a[320+12]) -{ - unsigned int i; - __m256i f; - const __m256i q = _mm256_set1_epi32((KYBER_Q << 16) + 4*KYBER_Q); - const __m256i shufbidx = _mm256_set_epi8(11,10,10, 9, 9, 8, 8, 7, - 6, 5, 5, 4, 4, 3, 3, 2, - 9, 8, 8, 7, 7, 6, 6, 5, - 4, 3, 3, 2, 2, 1, 1, 0); - const __m256i sllvdidx = _mm256_set1_epi64x(4); - const __m256i mask = _mm256_set1_epi32((32736 << 16) + 8184); - - for(i=0;ivec[i],f); - } -} - -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) -static void poly_compress11(uint8_t r[352+2], const poly * restrict a) -{ - unsigned int i; - __m256i f0, f1, f2; - __m128i t0, t1; - const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); - const __m256i v8 = _mm256_slli_epi16(v,3); - const __m256i off = _mm256_set1_epi16(36); - const __m256i shift1 = _mm256_set1_epi16(1 << 13); - const __m256i mask = _mm256_set1_epi16(2047); - const __m256i shift2 = _mm256_set1_epi64x((2048LL << 48) + (1LL << 32) + (2048 << 16) + 1); - const __m256i sllvdidx = _mm256_set1_epi64x(10); - const __m256i srlvqidx = _mm256_set_epi64x(30,10,30,10); - const __m256i shufbidx = _mm256_set_epi8( 4, 3, 2, 1, 0, 0,-1,-1,-1,-1,10, 9, 8, 7, 6, 5, - -1,-1,-1,-1,-1,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - - for(i=0;ivec[i]); - f1 = _mm256_mullo_epi16(f0,v8); - f2 = _mm256_add_epi16(f0,off); - f0 = _mm256_slli_epi16(f0,3); - f0 = _mm256_mulhi_epi16(f0,v); - f2 = _mm256_sub_epi16(f1,f2); - f1 = _mm256_andnot_si256(f1,f2); - f1 = _mm256_srli_epi16(f1,15); - f0 = _mm256_sub_epi16(f0,f1); - f0 = _mm256_mulhrs_epi16(f0,shift1); - f0 = _mm256_and_si256(f0,mask); - f0 = _mm256_madd_epi16(f0,shift2); - f0 = _mm256_sllv_epi32(f0,sllvdidx); - f1 = _mm256_bsrli_epi128(f0,8); - f0 = _mm256_srlv_epi64(f0,srlvqidx); - f1 = _mm256_slli_epi64(f1,34); - f0 = _mm256_add_epi64(f0,f1); - f0 = _mm256_shuffle_epi8(f0,shufbidx); - t0 = _mm256_castsi256_si128(f0); - t1 = _mm256_extracti128_si256(f0,1); - t0 = _mm_blendv_epi8(t0,t1,_mm256_castsi256_si128(shufbidx)); - _mm_storeu_si128((__m128i *)&r[22*i+ 0],t0); - _mm_storel_epi64((__m128i *)&r[22*i+16],t1); - } -} - -static void poly_decompress11(poly * restrict r, const uint8_t a[352+10]) -{ - unsigned int i; - __m256i f; - const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); - const __m256i shufbidx = _mm256_set_epi8(13,12,12,11,10, 9, 9, 8, - 8, 7, 6, 5, 5, 4, 4, 3, - 10, 9, 9, 8, 7, 6, 6, 5, - 5, 4, 3, 2, 2, 1, 1, 0); - const __m256i srlvdidx = _mm256_set_epi32(0,0,1,0,0,0,1,0); - const __m256i srlvqidx = _mm256_set_epi64x(2,0,2,0); - const __m256i shift = _mm256_set_epi16(4,32,1,8,32,1,4,32,4,32,1,8,32,1,4,32); - const __m256i mask = _mm256_set1_epi16(32752); - - for(i=0;ivec[i],f); - } -} - -#endif - -/************************************************* -* Name: polyvec_compress -* -* Description: Compress and serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYVECCOMPRESSEDBYTES) -* - polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a) -{ - unsigned int i; - -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) - for(i=0;ivec[i]); -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) - for(i=0;ivec[i]); -#endif -} - -/************************************************* -* Name: polyvec_decompress -* -* Description: De-serialize and decompress vector of polynomials; -* approximate inverse of polyvec_compress -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const uint8_t *a: pointer to input byte array -* (of length KYBER_POLYVECCOMPRESSEDBYTES) -**************************************************/ -void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12]) -{ - unsigned int i; - -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) - for(i=0;ivec[i],&a[320*i]); -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) - for(i=0;ivec[i],&a[352*i]); -#endif -} - -/************************************************* -* Name: polyvec_tobytes -* -* Description: Serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYVECBYTES) -* - polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_frombytes -* -* Description: De-serialize vector of polynomials; -* inverse of polyvec_tobytes -* -* Arguments: - uint8_t *r: pointer to output byte array -* - const polyvec *a: pointer to input vector of polynomials -* (of length KYBER_POLYVECBYTES) -**************************************************/ -void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]) -{ - unsigned int i; - for(i=0;ivec[i], a+i*KYBER_POLYBYTES); -} - -/************************************************* -* Name: polyvec_ntt -* -* Description: Apply forward NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_ntt(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_invntt_tomont -* -* Description: Apply inverse NTT to all elements of a vector of polynomials -* and multiply by Montgomery factor 2^16 -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_invntt_tomont(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_basemul_acc_montgomery -* -* Description: Multiply elements in a and b in NTT domain, accumulate into r, -* and multiply by 2^-16. -* -* Arguments: - poly *r: pointer to output polynomial -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) -{ - unsigned int i; - poly tmp; - - poly_basemul_montgomery(r,&a->vec[0],&b->vec[0]); - for(i=1;ivec[i],&b->vec[i]); - poly_add(r,r,&tmp); - } -} - -/************************************************* -* Name: polyvec_reduce -* -* Description: Applies Barrett reduction to each coefficient -* of each element of a vector of polynomials; -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - polyvec *r: pointer to input/output polynomial -**************************************************/ -void polyvec_reduce(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_add -* -* Description: Add vectors of polynomials -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) -{ - unsigned int i; - for(i=0;ivec[i], &a->vec[i], &b->vec[i]); -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/polyvec.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/polyvec.h deleted file mode 100644 index 2ce23c31ff..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/polyvec.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef POLYVEC_H -#define POLYVEC_H - -#include -#include "params.h" -#include "poly.h" - -typedef struct{ - poly vec[KYBER_K]; -} polyvec; - -#define polyvec_compress KYBER_NAMESPACE(polyvec_compress) -void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a); -#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress) -void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12]); - -#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes) -void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a); -#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes) -void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]); - -#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt) -void polyvec_ntt(polyvec *r); -#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont) -void polyvec_invntt_tomont(polyvec *r); - -#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery) -void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b); - -#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce) -void polyvec_reduce(polyvec *r); - -#define polyvec_add KYBER_NAMESPACE(polyvec_add) -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/reduce.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/reduce.h deleted file mode 100644 index 5368185b5f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/reduce.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef REDUCE_H -#define REDUCE_H - -#include "params.h" -#include - -#define reduce_avx KYBER_NAMESPACE(reduce_avx) -void reduce_avx(__m256i *r, const __m256i *qdata); -#define tomont_avx KYBER_NAMESPACE(tomont_avx) -void tomont_avx(__m256i *r, const __m256i *qdata); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/rejsample.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/rejsample.c deleted file mode 100644 index 9060a44cb9..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/rejsample.c +++ /dev/null @@ -1,398 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "consts.h" -#include "rejsample.h" - -//#define BMI - -#ifndef BMI -static const uint8_t idx[256][8] = { - {-1, -1, -1, -1, -1, -1, -1, -1}, - { 0, -1, -1, -1, -1, -1, -1, -1}, - { 2, -1, -1, -1, -1, -1, -1, -1}, - { 0, 2, -1, -1, -1, -1, -1, -1}, - { 4, -1, -1, -1, -1, -1, -1, -1}, - { 0, 4, -1, -1, -1, -1, -1, -1}, - { 2, 4, -1, -1, -1, -1, -1, -1}, - { 0, 2, 4, -1, -1, -1, -1, -1}, - { 6, -1, -1, -1, -1, -1, -1, -1}, - { 0, 6, -1, -1, -1, -1, -1, -1}, - { 2, 6, -1, -1, -1, -1, -1, -1}, - { 0, 2, 6, -1, -1, -1, -1, -1}, - { 4, 6, -1, -1, -1, -1, -1, -1}, - { 0, 4, 6, -1, -1, -1, -1, -1}, - { 2, 4, 6, -1, -1, -1, -1, -1}, - { 0, 2, 4, 6, -1, -1, -1, -1}, - { 8, -1, -1, -1, -1, -1, -1, -1}, - { 0, 8, -1, -1, -1, -1, -1, -1}, - { 2, 8, -1, -1, -1, -1, -1, -1}, - { 0, 2, 8, -1, -1, -1, -1, -1}, - { 4, 8, -1, -1, -1, -1, -1, -1}, - { 0, 4, 8, -1, -1, -1, -1, -1}, - { 2, 4, 8, -1, -1, -1, -1, -1}, - { 0, 2, 4, 8, -1, -1, -1, -1}, - { 6, 8, -1, -1, -1, -1, -1, -1}, - { 0, 6, 8, -1, -1, -1, -1, -1}, - { 2, 6, 8, -1, -1, -1, -1, -1}, - { 0, 2, 6, 8, -1, -1, -1, -1}, - { 4, 6, 8, -1, -1, -1, -1, -1}, - { 0, 4, 6, 8, -1, -1, -1, -1}, - { 2, 4, 6, 8, -1, -1, -1, -1}, - { 0, 2, 4, 6, 8, -1, -1, -1}, - {10, -1, -1, -1, -1, -1, -1, -1}, - { 0, 10, -1, -1, -1, -1, -1, -1}, - { 2, 10, -1, -1, -1, -1, -1, -1}, - { 0, 2, 10, -1, -1, -1, -1, -1}, - { 4, 10, -1, -1, -1, -1, -1, -1}, - { 0, 4, 10, -1, -1, -1, -1, -1}, - { 2, 4, 10, -1, -1, -1, -1, -1}, - { 0, 2, 4, 10, -1, -1, -1, -1}, - { 6, 10, -1, -1, -1, -1, -1, -1}, - { 0, 6, 10, -1, -1, -1, -1, -1}, - { 2, 6, 10, -1, -1, -1, -1, -1}, - { 0, 2, 6, 10, -1, -1, -1, -1}, - { 4, 6, 10, -1, -1, -1, -1, -1}, - { 0, 4, 6, 10, -1, -1, -1, -1}, - { 2, 4, 6, 10, -1, -1, -1, -1}, - { 0, 2, 4, 6, 10, -1, -1, -1}, - { 8, 10, -1, -1, -1, -1, -1, -1}, - { 0, 8, 10, -1, -1, -1, -1, -1}, - { 2, 8, 10, -1, -1, -1, -1, -1}, - { 0, 2, 8, 10, -1, -1, -1, -1}, - { 4, 8, 10, -1, -1, -1, -1, -1}, - { 0, 4, 8, 10, -1, -1, -1, -1}, - { 2, 4, 8, 10, -1, -1, -1, -1}, - { 0, 2, 4, 8, 10, -1, -1, -1}, - { 6, 8, 10, -1, -1, -1, -1, -1}, - { 0, 6, 8, 10, -1, -1, -1, -1}, - { 2, 6, 8, 10, -1, -1, -1, -1}, - { 0, 2, 6, 8, 10, -1, -1, -1}, - { 4, 6, 8, 10, -1, -1, -1, -1}, - { 0, 4, 6, 8, 10, -1, -1, -1}, - { 2, 4, 6, 8, 10, -1, -1, -1}, - { 0, 2, 4, 6, 8, 10, -1, -1}, - {12, -1, -1, -1, -1, -1, -1, -1}, - { 0, 12, -1, -1, -1, -1, -1, -1}, - { 2, 12, -1, -1, -1, -1, -1, -1}, - { 0, 2, 12, -1, -1, -1, -1, -1}, - { 4, 12, -1, -1, -1, -1, -1, -1}, - { 0, 4, 12, -1, -1, -1, -1, -1}, - { 2, 4, 12, -1, -1, -1, -1, -1}, - { 0, 2, 4, 12, -1, -1, -1, -1}, - { 6, 12, -1, -1, -1, -1, -1, -1}, - { 0, 6, 12, -1, -1, -1, -1, -1}, - { 2, 6, 12, -1, -1, -1, -1, -1}, - { 0, 2, 6, 12, -1, -1, -1, -1}, - { 4, 6, 12, -1, -1, -1, -1, -1}, - { 0, 4, 6, 12, -1, -1, -1, -1}, - { 2, 4, 6, 12, -1, -1, -1, -1}, - { 0, 2, 4, 6, 12, -1, -1, -1}, - { 8, 12, -1, -1, -1, -1, -1, -1}, - { 0, 8, 12, -1, -1, -1, -1, -1}, - { 2, 8, 12, -1, -1, -1, -1, -1}, - { 0, 2, 8, 12, -1, -1, -1, -1}, - { 4, 8, 12, -1, -1, -1, -1, -1}, - { 0, 4, 8, 12, -1, -1, -1, -1}, - { 2, 4, 8, 12, -1, -1, -1, -1}, - { 0, 2, 4, 8, 12, -1, -1, -1}, - { 6, 8, 12, -1, -1, -1, -1, -1}, - { 0, 6, 8, 12, -1, -1, -1, -1}, - { 2, 6, 8, 12, -1, -1, -1, -1}, - { 0, 2, 6, 8, 12, -1, -1, -1}, - { 4, 6, 8, 12, -1, -1, -1, -1}, - { 0, 4, 6, 8, 12, -1, -1, -1}, - { 2, 4, 6, 8, 12, -1, -1, -1}, - { 0, 2, 4, 6, 8, 12, -1, -1}, - {10, 12, -1, -1, -1, -1, -1, -1}, - { 0, 10, 12, -1, -1, -1, -1, -1}, - { 2, 10, 12, -1, -1, -1, -1, -1}, - { 0, 2, 10, 12, -1, -1, -1, -1}, - { 4, 10, 12, -1, -1, -1, -1, -1}, - { 0, 4, 10, 12, -1, -1, -1, -1}, - { 2, 4, 10, 12, -1, -1, -1, -1}, - { 0, 2, 4, 10, 12, -1, -1, -1}, - { 6, 10, 12, -1, -1, -1, -1, -1}, - { 0, 6, 10, 12, -1, -1, -1, -1}, - { 2, 6, 10, 12, -1, -1, -1, -1}, - { 0, 2, 6, 10, 12, -1, -1, -1}, - { 4, 6, 10, 12, -1, -1, -1, -1}, - { 0, 4, 6, 10, 12, -1, -1, -1}, - { 2, 4, 6, 10, 12, -1, -1, -1}, - { 0, 2, 4, 6, 10, 12, -1, -1}, - { 8, 10, 12, -1, -1, -1, -1, -1}, - { 0, 8, 10, 12, -1, -1, -1, -1}, - { 2, 8, 10, 12, -1, -1, -1, -1}, - { 0, 2, 8, 10, 12, -1, -1, -1}, - { 4, 8, 10, 12, -1, -1, -1, -1}, - { 0, 4, 8, 10, 12, -1, -1, -1}, - { 2, 4, 8, 10, 12, -1, -1, -1}, - { 0, 2, 4, 8, 10, 12, -1, -1}, - { 6, 8, 10, 12, -1, -1, -1, -1}, - { 0, 6, 8, 10, 12, -1, -1, -1}, - { 2, 6, 8, 10, 12, -1, -1, -1}, - { 0, 2, 6, 8, 10, 12, -1, -1}, - { 4, 6, 8, 10, 12, -1, -1, -1}, - { 0, 4, 6, 8, 10, 12, -1, -1}, - { 2, 4, 6, 8, 10, 12, -1, -1}, - { 0, 2, 4, 6, 8, 10, 12, -1}, - {14, -1, -1, -1, -1, -1, -1, -1}, - { 0, 14, -1, -1, -1, -1, -1, -1}, - { 2, 14, -1, -1, -1, -1, -1, -1}, - { 0, 2, 14, -1, -1, -1, -1, -1}, - { 4, 14, -1, -1, -1, -1, -1, -1}, - { 0, 4, 14, -1, -1, -1, -1, -1}, - { 2, 4, 14, -1, -1, -1, -1, -1}, - { 0, 2, 4, 14, -1, -1, -1, -1}, - { 6, 14, -1, -1, -1, -1, -1, -1}, - { 0, 6, 14, -1, -1, -1, -1, -1}, - { 2, 6, 14, -1, -1, -1, -1, -1}, - { 0, 2, 6, 14, -1, -1, -1, -1}, - { 4, 6, 14, -1, -1, -1, -1, -1}, - { 0, 4, 6, 14, -1, -1, -1, -1}, - { 2, 4, 6, 14, -1, -1, -1, -1}, - { 0, 2, 4, 6, 14, -1, -1, -1}, - { 8, 14, -1, -1, -1, -1, -1, -1}, - { 0, 8, 14, -1, -1, -1, -1, -1}, - { 2, 8, 14, -1, -1, -1, -1, -1}, - { 0, 2, 8, 14, -1, -1, -1, -1}, - { 4, 8, 14, -1, -1, -1, -1, -1}, - { 0, 4, 8, 14, -1, -1, -1, -1}, - { 2, 4, 8, 14, -1, -1, -1, -1}, - { 0, 2, 4, 8, 14, -1, -1, -1}, - { 6, 8, 14, -1, -1, -1, -1, -1}, - { 0, 6, 8, 14, -1, -1, -1, -1}, - { 2, 6, 8, 14, -1, -1, -1, -1}, - { 0, 2, 6, 8, 14, -1, -1, -1}, - { 4, 6, 8, 14, -1, -1, -1, -1}, - { 0, 4, 6, 8, 14, -1, -1, -1}, - { 2, 4, 6, 8, 14, -1, -1, -1}, - { 0, 2, 4, 6, 8, 14, -1, -1}, - {10, 14, -1, -1, -1, -1, -1, -1}, - { 0, 10, 14, -1, -1, -1, -1, -1}, - { 2, 10, 14, -1, -1, -1, -1, -1}, - { 0, 2, 10, 14, -1, -1, -1, -1}, - { 4, 10, 14, -1, -1, -1, -1, -1}, - { 0, 4, 10, 14, -1, -1, -1, -1}, - { 2, 4, 10, 14, -1, -1, -1, -1}, - { 0, 2, 4, 10, 14, -1, -1, -1}, - { 6, 10, 14, -1, -1, -1, -1, -1}, - { 0, 6, 10, 14, -1, -1, -1, -1}, - { 2, 6, 10, 14, -1, -1, -1, -1}, - { 0, 2, 6, 10, 14, -1, -1, -1}, - { 4, 6, 10, 14, -1, -1, -1, -1}, - { 0, 4, 6, 10, 14, -1, -1, -1}, - { 2, 4, 6, 10, 14, -1, -1, -1}, - { 0, 2, 4, 6, 10, 14, -1, -1}, - { 8, 10, 14, -1, -1, -1, -1, -1}, - { 0, 8, 10, 14, -1, -1, -1, -1}, - { 2, 8, 10, 14, -1, -1, -1, -1}, - { 0, 2, 8, 10, 14, -1, -1, -1}, - { 4, 8, 10, 14, -1, -1, -1, -1}, - { 0, 4, 8, 10, 14, -1, -1, -1}, - { 2, 4, 8, 10, 14, -1, -1, -1}, - { 0, 2, 4, 8, 10, 14, -1, -1}, - { 6, 8, 10, 14, -1, -1, -1, -1}, - { 0, 6, 8, 10, 14, -1, -1, -1}, - { 2, 6, 8, 10, 14, -1, -1, -1}, - { 0, 2, 6, 8, 10, 14, -1, -1}, - { 4, 6, 8, 10, 14, -1, -1, -1}, - { 0, 4, 6, 8, 10, 14, -1, -1}, - { 2, 4, 6, 8, 10, 14, -1, -1}, - { 0, 2, 4, 6, 8, 10, 14, -1}, - {12, 14, -1, -1, -1, -1, -1, -1}, - { 0, 12, 14, -1, -1, -1, -1, -1}, - { 2, 12, 14, -1, -1, -1, -1, -1}, - { 0, 2, 12, 14, -1, -1, -1, -1}, - { 4, 12, 14, -1, -1, -1, -1, -1}, - { 0, 4, 12, 14, -1, -1, -1, -1}, - { 2, 4, 12, 14, -1, -1, -1, -1}, - { 0, 2, 4, 12, 14, -1, -1, -1}, - { 6, 12, 14, -1, -1, -1, -1, -1}, - { 0, 6, 12, 14, -1, -1, -1, -1}, - { 2, 6, 12, 14, -1, -1, -1, -1}, - { 0, 2, 6, 12, 14, -1, -1, -1}, - { 4, 6, 12, 14, -1, -1, -1, -1}, - { 0, 4, 6, 12, 14, -1, -1, -1}, - { 2, 4, 6, 12, 14, -1, -1, -1}, - { 0, 2, 4, 6, 12, 14, -1, -1}, - { 8, 12, 14, -1, -1, -1, -1, -1}, - { 0, 8, 12, 14, -1, -1, -1, -1}, - { 2, 8, 12, 14, -1, -1, -1, -1}, - { 0, 2, 8, 12, 14, -1, -1, -1}, - { 4, 8, 12, 14, -1, -1, -1, -1}, - { 0, 4, 8, 12, 14, -1, -1, -1}, - { 2, 4, 8, 12, 14, -1, -1, -1}, - { 0, 2, 4, 8, 12, 14, -1, -1}, - { 6, 8, 12, 14, -1, -1, -1, -1}, - { 0, 6, 8, 12, 14, -1, -1, -1}, - { 2, 6, 8, 12, 14, -1, -1, -1}, - { 0, 2, 6, 8, 12, 14, -1, -1}, - { 4, 6, 8, 12, 14, -1, -1, -1}, - { 0, 4, 6, 8, 12, 14, -1, -1}, - { 2, 4, 6, 8, 12, 14, -1, -1}, - { 0, 2, 4, 6, 8, 12, 14, -1}, - {10, 12, 14, -1, -1, -1, -1, -1}, - { 0, 10, 12, 14, -1, -1, -1, -1}, - { 2, 10, 12, 14, -1, -1, -1, -1}, - { 0, 2, 10, 12, 14, -1, -1, -1}, - { 4, 10, 12, 14, -1, -1, -1, -1}, - { 0, 4, 10, 12, 14, -1, -1, -1}, - { 2, 4, 10, 12, 14, -1, -1, -1}, - { 0, 2, 4, 10, 12, 14, -1, -1}, - { 6, 10, 12, 14, -1, -1, -1, -1}, - { 0, 6, 10, 12, 14, -1, -1, -1}, - { 2, 6, 10, 12, 14, -1, -1, -1}, - { 0, 2, 6, 10, 12, 14, -1, -1}, - { 4, 6, 10, 12, 14, -1, -1, -1}, - { 0, 4, 6, 10, 12, 14, -1, -1}, - { 2, 4, 6, 10, 12, 14, -1, -1}, - { 0, 2, 4, 6, 10, 12, 14, -1}, - { 8, 10, 12, 14, -1, -1, -1, -1}, - { 0, 8, 10, 12, 14, -1, -1, -1}, - { 2, 8, 10, 12, 14, -1, -1, -1}, - { 0, 2, 8, 10, 12, 14, -1, -1}, - { 4, 8, 10, 12, 14, -1, -1, -1}, - { 0, 4, 8, 10, 12, 14, -1, -1}, - { 2, 4, 8, 10, 12, 14, -1, -1}, - { 0, 2, 4, 8, 10, 12, 14, -1}, - { 6, 8, 10, 12, 14, -1, -1, -1}, - { 0, 6, 8, 10, 12, 14, -1, -1}, - { 2, 6, 8, 10, 12, 14, -1, -1}, - { 0, 2, 6, 8, 10, 12, 14, -1}, - { 4, 6, 8, 10, 12, 14, -1, -1}, - { 0, 4, 6, 8, 10, 12, 14, -1}, - { 2, 4, 6, 8, 10, 12, 14, -1}, - { 0, 2, 4, 6, 8, 10, 12, 14} -}; -#endif - -#define _mm256_cmpge_epu16(a, b) _mm256_cmpeq_epi16(_mm256_max_epu16(a, b), a) -#define _mm_cmpge_epu16(a, b) _mm_cmpeq_epi16(_mm_max_epu16(a, b), a) - -unsigned int rej_uniform_avx(int16_t * restrict r, const uint8_t *buf) -{ - unsigned int ctr, pos; - uint16_t val0, val1; - uint32_t good; -#ifdef BMI - uint64_t idx0, idx1, idx2, idx3; -#endif - const __m256i bound = _mm256_load_si256(&qdata.vec[_16XQ/16]); - const __m256i ones = _mm256_set1_epi8(1); - const __m256i mask = _mm256_set1_epi16(0xFFF); - const __m256i idx8 = _mm256_set_epi8(15,14,14,13,12,11,11,10, - 9, 8, 8, 7, 6, 5, 5, 4, - 11,10,10, 9, 8, 7, 7, 6, - 5, 4, 4, 3, 2, 1, 1, 0); - __m256i f0, f1, g0, g1, g2, g3; - __m128i f, t, pilo, pihi; - - ctr = pos = 0; - while(ctr <= KYBER_N - 32 && pos <= REJ_UNIFORM_AVX_BUFLEN - 56) { - f0 = _mm256_loadu_si256((__m256i *)&buf[pos]); - f1 = _mm256_loadu_si256((__m256i *)&buf[pos+24]); - f0 = _mm256_permute4x64_epi64(f0, 0x94); - f1 = _mm256_permute4x64_epi64(f1, 0x94); - f0 = _mm256_shuffle_epi8(f0, idx8); - f1 = _mm256_shuffle_epi8(f1, idx8); - g0 = _mm256_srli_epi16(f0, 4); - g1 = _mm256_srli_epi16(f1, 4); - f0 = _mm256_blend_epi16(f0, g0, 0xAA); - f1 = _mm256_blend_epi16(f1, g1, 0xAA); - f0 = _mm256_and_si256(f0, mask); - f1 = _mm256_and_si256(f1, mask); - pos += 48; - - g0 = _mm256_cmpgt_epi16(bound, f0); - g1 = _mm256_cmpgt_epi16(bound, f1); - - g0 = _mm256_packs_epi16(g0, g1); - good = _mm256_movemask_epi8(g0); - -#ifdef BMI - idx0 = _pdep_u64(good >> 0, 0x0101010101010101); - idx1 = _pdep_u64(good >> 8, 0x0101010101010101); - idx2 = _pdep_u64(good >> 16, 0x0101010101010101); - idx3 = _pdep_u64(good >> 24, 0x0101010101010101); - idx0 = (idx0 << 8) - idx0; - idx0 = _pext_u64(0x0E0C0A0806040200, idx0); - idx1 = (idx1 << 8) - idx1; - idx1 = _pext_u64(0x0E0C0A0806040200, idx1); - idx2 = (idx2 << 8) - idx2; - idx2 = _pext_u64(0x0E0C0A0806040200, idx2); - idx3 = (idx3 << 8) - idx3; - idx3 = _pext_u64(0x0E0C0A0806040200, idx3); - - g0 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx0)); - g1 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx1)); - g0 = _mm256_inserti128_si256(g0, _mm_cvtsi64_si128(idx2), 1); - g1 = _mm256_inserti128_si256(g1, _mm_cvtsi64_si128(idx3), 1); -#else - g0 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 0) & 0xFF])); - g1 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 8) & 0xFF])); - g0 = _mm256_inserti128_si256(g0, _mm_loadl_epi64((__m128i *)&idx[(good >> 16) & 0xFF]), 1); - g1 = _mm256_inserti128_si256(g1, _mm_loadl_epi64((__m128i *)&idx[(good >> 24) & 0xFF]), 1); -#endif - - g2 = _mm256_add_epi8(g0, ones); - g3 = _mm256_add_epi8(g1, ones); - g0 = _mm256_unpacklo_epi8(g0, g2); - g1 = _mm256_unpacklo_epi8(g1, g3); - - f0 = _mm256_shuffle_epi8(f0, g0); - f1 = _mm256_shuffle_epi8(f1, g1); - - _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f0)); - ctr += _mm_popcnt_u32((good >> 0) & 0xFF); - _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f0, 1)); - ctr += _mm_popcnt_u32((good >> 16) & 0xFF); - _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f1)); - ctr += _mm_popcnt_u32((good >> 8) & 0xFF); - _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f1, 1)); - ctr += _mm_popcnt_u32((good >> 24) & 0xFF); - } - - while(ctr <= KYBER_N - 8 && pos <= REJ_UNIFORM_AVX_BUFLEN - 16) { - f = _mm_loadu_si128((__m128i *)&buf[pos]); - f = _mm_shuffle_epi8(f, _mm256_castsi256_si128(idx8)); - t = _mm_srli_epi16(f, 4); - f = _mm_blend_epi16(f, t, 0xAA); - f = _mm_and_si128(f, _mm256_castsi256_si128(mask)); - pos += 12; - - t = _mm_cmpgt_epi16(_mm256_castsi256_si128(bound), f); - good = _mm_movemask_epi8(t); - -#ifdef BMI - good &= 0x5555; - idx0 = _pdep_u64(good, 0x1111111111111111); - idx0 = (idx0 << 8) - idx0; - idx0 = _pext_u64(0x0E0C0A0806040200, idx0); - pilo = _mm_cvtsi64_si128(idx0); -#else - good = _pext_u32(good, 0x5555); - pilo = _mm_loadl_epi64((__m128i *)&idx[good]); -#endif - - pihi = _mm_add_epi8(pilo, _mm256_castsi256_si128(ones)); - pilo = _mm_unpacklo_epi8(pilo, pihi); - f = _mm_shuffle_epi8(f, pilo); - _mm_storeu_si128((__m128i *)&r[ctr], f); - ctr += _mm_popcnt_u32(good); - } - - while(ctr < KYBER_N && pos <= REJ_UNIFORM_AVX_BUFLEN - 3) { - val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; - val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)); - pos += 3; - - if(val0 < KYBER_Q) - r[ctr++] = val0; - if(val1 < KYBER_Q && ctr < KYBER_N) - r[ctr++] = val1; - } - - return ctr; -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/rejsample.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/rejsample.h deleted file mode 100644 index 3be5e2192e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/rejsample.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef REJSAMPLE_H -#define REJSAMPLE_H - -#include -#include "params.h" -#include "symmetric.h" - -#define REJ_UNIFORM_AVX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES) -#define REJ_UNIFORM_AVX_BUFLEN (REJ_UNIFORM_AVX_NBLOCKS*XOF_BLOCKBYTES) - -#define rej_uniform_avx KYBER_NAMESPACE(rej_uniform_avx) -unsigned int rej_uniform_avx(int16_t *r, const uint8_t *buf); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/shuffle.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/shuffle.S deleted file mode 100644 index 18325ebec0..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/shuffle.S +++ /dev/null @@ -1,255 +0,0 @@ -#include "consts.h" -.include "fq.inc" -.include "shuffle.inc" - -/* -nttpack_avx: -#load -vmovdqa (%rdi),%ymm4 -vmovdqa 32(%rdi),%ymm5 -vmovdqa 64(%rdi),%ymm6 -vmovdqa 96(%rdi),%ymm7 -vmovdqa 128(%rdi),%ymm8 -vmovdqa 160(%rdi),%ymm9 -vmovdqa 192(%rdi),%ymm10 -vmovdqa 224(%rdi),%ymm11 - -shuffle1 4,5,3,5 -shuffle1 6,7,4,7 -shuffle1 8,9,6,9 -shuffle1 10,11,8,11 - -shuffle2 3,4,10,4 -shuffle2 6,8,3,8 -shuffle2 5,7,6,7 -shuffle2 9,11,5,11 - -shuffle4 10,3,9,3 -shuffle4 6,5,10,5 -shuffle4 4,8,6,8 -shuffle4 7,11,4,11 - -shuffle8 9,10,7,10 -shuffle8 6,4,9,4 -shuffle8 3,5,6,5 -shuffle8 8,11,3,11 - -#store -vmovdqa %ymm7,(%rdi) -vmovdqa %ymm9,32(%rdi) -vmovdqa %ymm6,64(%rdi) -vmovdqa %ymm3,96(%rdi) -vmovdqa %ymm10,128(%rdi) -vmovdqa %ymm4,160(%rdi) -vmovdqa %ymm5,192(%rdi) -vmovdqa %ymm11,224(%rdi) - -ret -*/ - -.text -nttunpack128_avx: -#load -vmovdqa (%rdi),%ymm4 -vmovdqa 32(%rdi),%ymm5 -vmovdqa 64(%rdi),%ymm6 -vmovdqa 96(%rdi),%ymm7 -vmovdqa 128(%rdi),%ymm8 -vmovdqa 160(%rdi),%ymm9 -vmovdqa 192(%rdi),%ymm10 -vmovdqa 224(%rdi),%ymm11 - -shuffle8 4,8,3,8 -shuffle8 5,9,4,9 -shuffle8 6,10,5,10 -shuffle8 7,11,6,11 - -shuffle4 3,5,7,5 -shuffle4 8,10,3,10 -shuffle4 4,6,8,6 -shuffle4 9,11,4,11 - -shuffle2 7,8,9,8 -shuffle2 5,6,7,6 -shuffle2 3,4,5,4 -shuffle2 10,11,3,11 - -shuffle1 9,5,10,5 -shuffle1 8,4,9,4 -shuffle1 7,3,8,3 -shuffle1 6,11,7,11 - -#store -vmovdqa %ymm10,(%rdi) -vmovdqa %ymm5,32(%rdi) -vmovdqa %ymm9,64(%rdi) -vmovdqa %ymm4,96(%rdi) -vmovdqa %ymm8,128(%rdi) -vmovdqa %ymm3,160(%rdi) -vmovdqa %ymm7,192(%rdi) -vmovdqa %ymm11,224(%rdi) - -ret - -.global cdecl(nttunpack_avx) -cdecl(nttunpack_avx): -call nttunpack128_avx -add $256,%rdi -call nttunpack128_avx -ret - -ntttobytes128_avx: -#load -vmovdqa (%rsi),%ymm5 -vmovdqa 32(%rsi),%ymm6 -vmovdqa 64(%rsi),%ymm7 -vmovdqa 96(%rsi),%ymm8 -vmovdqa 128(%rsi),%ymm9 -vmovdqa 160(%rsi),%ymm10 -vmovdqa 192(%rsi),%ymm11 -vmovdqa 224(%rsi),%ymm12 - -#csubq -csubq 5,13 -csubq 6,13 -csubq 7,13 -csubq 8,13 -csubq 9,13 -csubq 10,13 -csubq 11,13 -csubq 12,13 - -#bitpack -vpsllw $12,%ymm6,%ymm4 -vpor %ymm4,%ymm5,%ymm4 - -vpsrlw $4,%ymm6,%ymm5 -vpsllw $8,%ymm7,%ymm6 -vpor %ymm5,%ymm6,%ymm5 - -vpsrlw $8,%ymm7,%ymm6 -vpsllw $4,%ymm8,%ymm7 -vpor %ymm6,%ymm7,%ymm6 - -vpsllw $12,%ymm10,%ymm7 -vpor %ymm7,%ymm9,%ymm7 - -vpsrlw $4,%ymm10,%ymm8 -vpsllw $8,%ymm11,%ymm9 -vpor %ymm8,%ymm9,%ymm8 - -vpsrlw $8,%ymm11,%ymm9 -vpsllw $4,%ymm12,%ymm10 -vpor %ymm9,%ymm10,%ymm9 - -shuffle1 4,5,3,5 -shuffle1 6,7,4,7 -shuffle1 8,9,6,9 - -shuffle2 3,4,8,4 -shuffle2 6,5,3,5 -shuffle2 7,9,6,9 - -shuffle4 8,3,7,3 -shuffle4 6,4,8,4 -shuffle4 5,9,6,9 - -shuffle8 7,8,5,8 -shuffle8 6,3,7,3 -shuffle8 4,9,6,9 - -#store -vmovdqu %ymm5,(%rdi) -vmovdqu %ymm7,32(%rdi) -vmovdqu %ymm6,64(%rdi) -vmovdqu %ymm8,96(%rdi) -vmovdqu %ymm3,128(%rdi) -vmovdqu %ymm9,160(%rdi) - -ret - -.global cdecl(ntttobytes_avx) -cdecl(ntttobytes_avx): -#consts -vmovdqa _16XQ*2(%rdx),%ymm0 -call ntttobytes128_avx -add $256,%rsi -add $192,%rdi -call ntttobytes128_avx -ret - -nttfrombytes128_avx: -#load -vmovdqu (%rsi),%ymm4 -vmovdqu 32(%rsi),%ymm5 -vmovdqu 64(%rsi),%ymm6 -vmovdqu 96(%rsi),%ymm7 -vmovdqu 128(%rsi),%ymm8 -vmovdqu 160(%rsi),%ymm9 - -shuffle8 4,7,3,7 -shuffle8 5,8,4,8 -shuffle8 6,9,5,9 - -shuffle4 3,8,6,8 -shuffle4 7,5,3,5 -shuffle4 4,9,7,9 - -shuffle2 6,5,4,5 -shuffle2 8,7,6,7 -shuffle2 3,9,8,9 - -shuffle1 4,7,10,7 -shuffle1 5,8,4,8 -shuffle1 6,9,5,9 - -#bitunpack -vpsrlw $12,%ymm10,%ymm11 -vpsllw $4,%ymm7,%ymm12 -vpor %ymm11,%ymm12,%ymm11 -vpand %ymm0,%ymm10,%ymm10 -vpand %ymm0,%ymm11,%ymm11 - -vpsrlw $8,%ymm7,%ymm12 -vpsllw $8,%ymm4,%ymm13 -vpor %ymm12,%ymm13,%ymm12 -vpand %ymm0,%ymm12,%ymm12 - -vpsrlw $4,%ymm4,%ymm13 -vpand %ymm0,%ymm13,%ymm13 - -vpsrlw $12,%ymm8,%ymm14 -vpsllw $4,%ymm5,%ymm15 -vpor %ymm14,%ymm15,%ymm14 -vpand %ymm0,%ymm8,%ymm8 -vpand %ymm0,%ymm14,%ymm14 - -vpsrlw $8,%ymm5,%ymm15 -vpsllw $8,%ymm9,%ymm1 -vpor %ymm15,%ymm1,%ymm15 -vpand %ymm0,%ymm15,%ymm15 - -vpsrlw $4,%ymm9,%ymm1 -vpand %ymm0,%ymm1,%ymm1 - -#store -vmovdqa %ymm10,(%rdi) -vmovdqa %ymm11,32(%rdi) -vmovdqa %ymm12,64(%rdi) -vmovdqa %ymm13,96(%rdi) -vmovdqa %ymm8,128(%rdi) -vmovdqa %ymm14,160(%rdi) -vmovdqa %ymm15,192(%rdi) -vmovdqa %ymm1,224(%rdi) - -ret - -.global cdecl(nttfrombytes_avx) -cdecl(nttfrombytes_avx): -#consts -vmovdqa _16XMASK*2(%rdx),%ymm0 -call nttfrombytes128_avx -add $256,%rdi -add $192,%rsi -call nttfrombytes128_avx -ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/shuffle.inc b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/shuffle.inc deleted file mode 100644 index 73e9ffe03c..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/shuffle.inc +++ /dev/null @@ -1,25 +0,0 @@ -.macro shuffle8 r0,r1,r2,r3 -vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 -vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle4 r0,r1,r2,r3 -vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 -vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle2 r0,r1,r2,r3 -#vpsllq $32,%ymm\r1,%ymm\r2 -vmovsldup %ymm\r1,%ymm\r2 -vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 -vpsrlq $32,%ymm\r0,%ymm\r0 -#vmovshdup %ymm\r0,%ymm\r0 -vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle1 r0,r1,r2,r3 -vpslld $16,%ymm\r1,%ymm\r2 -vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 -vpsrld $16,%ymm\r0,%ymm\r0 -vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 -.endm diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/symmetric-shake.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/symmetric-shake.c deleted file mode 100644 index 20f451882e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/symmetric-shake.c +++ /dev/null @@ -1,74 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "symmetric.h" -#include "fips202.h" - -/************************************************* -* Name: kyber_shake128_absorb -* -* Description: Absorb step of the SHAKE128 specialized for the Kyber context. -* -* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state -* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state -* - uint8_t i: additional byte of input -* - uint8_t j: additional byte of input -**************************************************/ -void kyber_shake128_absorb(shake128incctx *state, - const uint8_t seed[KYBER_SYMBYTES], - uint8_t x, - uint8_t y) -{ - uint8_t extseed[KYBER_SYMBYTES+2]; - - memcpy(extseed, seed, KYBER_SYMBYTES); - extseed[KYBER_SYMBYTES+0] = x; - extseed[KYBER_SYMBYTES+1] = y; - - shake128_absorb_once(state, extseed, sizeof(extseed)); -} - -/************************************************* -* Name: kyber_shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - uint8_t *out: pointer to output -* - size_t outlen: number of requested output bytes -* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) -* - uint8_t nonce: single-byte nonce (public PRF input) -**************************************************/ -void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce) -{ - uint8_t extkey[KYBER_SYMBYTES+1]; - - memcpy(extkey, key, KYBER_SYMBYTES); - extkey[KYBER_SYMBYTES] = nonce; - - shake256(out, outlen, extkey, sizeof(extkey)); -} - -/************************************************* -* Name: kyber_shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - uint8_t *out: pointer to output -* - size_t outlen: number of requested output bytes -* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) -* - uint8_t nonce: single-byte nonce (public PRF input) -**************************************************/ -void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]) -{ - shake256incctx s; - - shake256_inc_init(&s); - shake256_inc_absorb(&s, key, KYBER_SYMBYTES); - shake256_inc_absorb(&s, input, KYBER_CIPHERTEXTBYTES); - shake256_inc_finalize(&s); - shake256_inc_squeeze(out, KYBER_SSBYTES, &s); - shake256_inc_ctx_release(&s); -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/symmetric.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/symmetric.h deleted file mode 100644 index e4941f7a86..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/symmetric.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef SYMMETRIC_H -#define SYMMETRIC_H - -#include -#include -#include "params.h" - -#include "fips202.h" -#include "fips202x4.h" - -typedef shake128incctx xof_state; - -#define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb) -void kyber_shake128_absorb(shake128incctx *s, - const uint8_t seed[KYBER_SYMBYTES], - uint8_t x, - uint8_t y); - -#define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf) -void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce); - -#define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf) -void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]); - -#define XOF_BLOCKBYTES SHAKE128_RATE - -#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) -#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) -#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y) -#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) -#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE) -#define rkprf(OUT, KEY, INPUT) kyber_shake256_rkprf(OUT, KEY, INPUT) - -#endif /* SYMMETRIC_H */ diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/verify.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/verify.c deleted file mode 100644 index 06243b837f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_avx2/verify.c +++ /dev/null @@ -1,83 +0,0 @@ -#include -#include -#include -#include "verify.h" - -/************************************************* -* Name: verify -* -* Description: Compare two arrays for equality in constant time. -* -* Arguments: const uint8_t *a: pointer to first byte array -* const uint8_t *b: pointer to second byte array -* size_t len: length of the byte arrays -* -* Returns 0 if the byte arrays are equal, 1 otherwise -**************************************************/ -int verify(const uint8_t *a, const uint8_t *b, size_t len) -{ - size_t i; - uint64_t r; - __m256i f, g, h; - - h = _mm256_setzero_si256(); - for(i=0;i> 63; - return r; -} - -/************************************************* -* Name: cmov -* -* Description: Copy len bytes from x to r if b is 1; -* don't modify x if b is 0. Requires b to be in {0,1}; -* assumes two's complement representation of negative integers. -* Runs in constant time. -* -* Arguments: uint8_t *r: pointer to output byte array -* const uint8_t *x: pointer to input byte array -* size_t len: Amount of bytes to be copied -* uint8_t b: Condition bit; has to be in {0,1} -**************************************************/ -void cmov(uint8_t * restrict r, const uint8_t *x, size_t len, uint8_t b) -{ - size_t i; - __m256i xvec, rvec, bvec; - -#if defined(__GNUC__) || defined(__clang__) - // Prevent the compiler from - // 1) inferring that b is 0/1-valued, and - // 2) handling the two cases with a branch. - // This is not necessary when verify.c and kem.c are separate translation - // units, but we expect that downstream consumers will copy this code and/or - // change how it is built. - __asm__("" : "+r"(b) : /* no inputs */); -#endif - - bvec = _mm256_set1_epi64x(-(uint64_t)b); - for(i=0;i -#include -#include "params.h" - -#define verify KYBER_NAMESPACE(verify) -int verify(const uint8_t *a, const uint8_t *b, size_t len); - -#define cmov KYBER_NAMESPACE(cmov) -void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); - -#define cmov_int16 KYBER_NAMESPACE(cmov_int16) -void cmov_int16(int16_t *r, int16_t v, uint16_t b); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/api.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/api.h deleted file mode 100644 index 70d40f3f3e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/api.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef API_H -#define API_H - -#include - -#define pqcrystals_kyber512_SECRETKEYBYTES 1632 -#define pqcrystals_kyber512_PUBLICKEYBYTES 800 -#define pqcrystals_kyber512_CIPHERTEXTBYTES 768 -#define pqcrystals_kyber512_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber512_ENCCOINBYTES 32 -#define pqcrystals_kyber512_BYTES 32 - -#define pqcrystals_kyber512_ref_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES -#define pqcrystals_kyber512_ref_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES -#define pqcrystals_kyber512_ref_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES -#define pqcrystals_kyber512_ref_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES -#define pqcrystals_kyber512_ref_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES -#define pqcrystals_kyber512_ref_BYTES pqcrystals_kyber512_BYTES - -int pqcrystals_kyber512_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber512_ref_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber512_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber512_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber512_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#define pqcrystals_kyber768_SECRETKEYBYTES 2400 -#define pqcrystals_kyber768_PUBLICKEYBYTES 1184 -#define pqcrystals_kyber768_CIPHERTEXTBYTES 1088 -#define pqcrystals_kyber768_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber768_ENCCOINBYTES 32 -#define pqcrystals_kyber768_BYTES 32 - -#define pqcrystals_kyber768_ref_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES -#define pqcrystals_kyber768_ref_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES -#define pqcrystals_kyber768_ref_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES -#define pqcrystals_kyber768_ref_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES -#define pqcrystals_kyber768_ref_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES -#define pqcrystals_kyber768_ref_BYTES pqcrystals_kyber768_BYTES - -int pqcrystals_kyber768_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber768_ref_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber768_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber768_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber768_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#define pqcrystals_kyber1024_SECRETKEYBYTES 3168 -#define pqcrystals_kyber1024_PUBLICKEYBYTES 1568 -#define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568 -#define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber1024_ENCCOINBYTES 32 -#define pqcrystals_kyber1024_BYTES 32 - -#define pqcrystals_kyber1024_ref_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES -#define pqcrystals_kyber1024_ref_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES -#define pqcrystals_kyber1024_ref_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES -#define pqcrystals_kyber1024_ref_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES -#define pqcrystals_kyber1024_ref_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES -#define pqcrystals_kyber1024_ref_BYTES pqcrystals_kyber1024_BYTES - -int pqcrystals_kyber1024_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber1024_ref_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber1024_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber1024_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber1024_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/cbd.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/cbd.c deleted file mode 100644 index 1500ffea56..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/cbd.c +++ /dev/null @@ -1,128 +0,0 @@ -#include -#include "params.h" -#include "cbd.h" - -/************************************************* -* Name: load32_littleendian -* -* Description: load 4 bytes into a 32-bit integer -* in little-endian order -* -* Arguments: - const uint8_t *x: pointer to input byte array -* -* Returns 32-bit unsigned integer loaded from x -**************************************************/ -static uint32_t load32_littleendian(const uint8_t x[4]) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - r |= (uint32_t)x[3] << 24; - return r; -} - -/************************************************* -* Name: load24_littleendian -* -* Description: load 3 bytes into a 32-bit integer -* in little-endian order. -* This function is only needed for Kyber-512 -* -* Arguments: - const uint8_t *x: pointer to input byte array -* -* Returns 32-bit unsigned integer loaded from x (most significant byte is zero) -**************************************************/ -#if KYBER_ETA1 == 3 -static uint32_t load24_littleendian(const uint8_t x[3]) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - return r; -} -#endif - - -/************************************************* -* Name: cbd2 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *buf: pointer to input byte array -**************************************************/ -static void cbd2(poly *r, const uint8_t buf[2*KYBER_N/4]) -{ - unsigned int i,j; - uint32_t t,d; - int16_t a,b; - - for(i=0;i>1) & 0x55555555; - - for(j=0;j<8;j++) { - a = (d >> (4*j+0)) & 0x3; - b = (d >> (4*j+2)) & 0x3; - r->coeffs[8*i+j] = a - b; - } - } -} - -/************************************************* -* Name: cbd3 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=3. -* This function is only needed for Kyber-512 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *buf: pointer to input byte array -**************************************************/ -#if KYBER_ETA1 == 3 -static void cbd3(poly *r, const uint8_t buf[3*KYBER_N/4]) -{ - unsigned int i,j; - uint32_t t,d; - int16_t a,b; - - for(i=0;i>1) & 0x00249249; - d += (t>>2) & 0x00249249; - - for(j=0;j<4;j++) { - a = (d >> (6*j+0)) & 0x7; - b = (d >> (6*j+3)) & 0x7; - r->coeffs[4*i+j] = a - b; - } - } -} -#endif - -void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]) -{ -#if KYBER_ETA1 == 2 - cbd2(r, buf); -#elif KYBER_ETA1 == 3 - cbd3(r, buf); -#else -#error "This implementation requires eta1 in {2,3}" -#endif -} - -void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]) -{ -#if KYBER_ETA2 == 2 - cbd2(r, buf); -#else -#error "This implementation requires eta2 = 2" -#endif -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/cbd.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/cbd.h deleted file mode 100644 index 7b677d745d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/cbd.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef CBD_H -#define CBD_H - -#include -#include "params.h" -#include "poly.h" - -#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1) -void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]); - -#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2) -void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/indcpa.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/indcpa.c deleted file mode 100644 index 726cfa985d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/indcpa.c +++ /dev/null @@ -1,334 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "indcpa.h" -#include "polyvec.h" -#include "poly.h" -#include "ntt.h" -#include "symmetric.h" -#include "randombytes.h" - -/************************************************* -* Name: pack_pk -* -* Description: Serialize the public key as concatenation of the -* serialized vector of polynomials pk -* and the public seed used to generate the matrix A. -* -* Arguments: uint8_t *r: pointer to the output serialized public key -* polyvec *pk: pointer to the input public-key polyvec -* const uint8_t *seed: pointer to the input public seed -**************************************************/ -static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES], - polyvec *pk, - const uint8_t seed[KYBER_SYMBYTES]) -{ - polyvec_tobytes(r, pk); - memcpy(r+KYBER_POLYVECBYTES, seed, KYBER_SYMBYTES); -} - -/************************************************* -* Name: unpack_pk -* -* Description: De-serialize public key from a byte array; -* approximate inverse of pack_pk -* -* Arguments: - polyvec *pk: pointer to output public-key polynomial vector -* - uint8_t *seed: pointer to output seed to generate matrix A -* - const uint8_t *packedpk: pointer to input serialized public key -**************************************************/ -static void unpack_pk(polyvec *pk, - uint8_t seed[KYBER_SYMBYTES], - const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES]) -{ - polyvec_frombytes(pk, packedpk); - memcpy(seed, packedpk+KYBER_POLYVECBYTES, KYBER_SYMBYTES); -} - -/************************************************* -* Name: pack_sk -* -* Description: Serialize the secret key -* -* Arguments: - uint8_t *r: pointer to output serialized secret key -* - polyvec *sk: pointer to input vector of polynomials (secret key) -**************************************************/ -static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk) -{ - polyvec_tobytes(r, sk); -} - -/************************************************* -* Name: unpack_sk -* -* Description: De-serialize the secret key; inverse of pack_sk -* -* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) -* - const uint8_t *packedsk: pointer to input serialized secret key -**************************************************/ -static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES]) -{ - polyvec_frombytes(sk, packedsk); -} - -/************************************************* -* Name: pack_ciphertext -* -* Description: Serialize the ciphertext as concatenation of the -* compressed and serialized vector of polynomials b -* and the compressed and serialized polynomial v -* -* Arguments: uint8_t *r: pointer to the output serialized ciphertext -* poly *pk: pointer to the input vector of polynomials b -* poly *v: pointer to the input polynomial v -**************************************************/ -static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v) -{ - polyvec_compress(r, b); - poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v); -} - -/************************************************* -* Name: unpack_ciphertext -* -* Description: De-serialize and decompress ciphertext from a byte array; -* approximate inverse of pack_ciphertext -* -* Arguments: - polyvec *b: pointer to the output vector of polynomials b -* - poly *v: pointer to the output polynomial v -* - const uint8_t *c: pointer to the input serialized ciphertext -**************************************************/ -static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES]) -{ - polyvec_decompress(b, c); - poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES); -} - -/************************************************* -* Name: rej_uniform -* -* Description: Run rejection sampling on uniform random bytes to generate -* uniform random integers mod q -* -* Arguments: - int16_t *r: pointer to output buffer -* - unsigned int len: requested number of 16-bit integers (uniform mod q) -* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes) -* - unsigned int buflen: length of input buffer in bytes -* -* Returns number of sampled 16-bit integers (at most len) -**************************************************/ -static unsigned int rej_uniform(int16_t *r, - unsigned int len, - const uint8_t *buf, - unsigned int buflen) -{ - unsigned int ctr, pos; - uint16_t val0, val1; - - ctr = pos = 0; - while(ctr < len && pos + 3 <= buflen) { - val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; - val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF; - pos += 3; - - if(val0 < KYBER_Q) - r[ctr++] = val0; - if(ctr < len && val1 < KYBER_Q) - r[ctr++] = val1; - } - - return ctr; -} - -#define gen_a(A,B) gen_matrix(A,B,0) -#define gen_at(A,B) gen_matrix(A,B,1) - -/************************************************* -* Name: gen_matrix -* -* Description: Deterministically generate matrix A (or the transpose of A) -* from a seed. Entries of the matrix are polynomials that look -* uniformly random. Performs rejection sampling on output of -* a XOF -* -* Arguments: - polyvec *a: pointer to ouptput matrix A -* - const uint8_t *seed: pointer to input seed -* - int transposed: boolean deciding whether A or A^T is generated -**************************************************/ -#if(XOF_BLOCKBYTES % 3) -#error "Implementation of gen_matrix assumes that XOF_BLOCKBYTES is a multiple of 3" -#endif - -#define GEN_MATRIX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES) -// Not static for benchmarking -void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed) -{ - unsigned int ctr, i, j; - unsigned int buflen; - uint8_t buf[GEN_MATRIX_NBLOCKS*XOF_BLOCKBYTES]; - xof_state state; - xof_init(&state, seed); - - for(i=0;i -#include "params.h" -#include "polyvec.h" - -#define gen_matrix KYBER_NAMESPACE(gen_matrix) -void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed); - -#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand) -void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], - uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], - const uint8_t coins[KYBER_SYMBYTES]); - -#define indcpa_enc KYBER_NAMESPACE(indcpa_enc) -void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], - const uint8_t m[KYBER_INDCPA_MSGBYTES], - const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], - const uint8_t coins[KYBER_SYMBYTES]); - -#define indcpa_dec KYBER_NAMESPACE(indcpa_dec) -void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES], - const uint8_t c[KYBER_INDCPA_BYTES], - const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/kem.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/kem.c deleted file mode 100644 index 63abc1029c..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/kem.c +++ /dev/null @@ -1,169 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "kem.h" -#include "indcpa.h" -#include "verify.h" -#include "symmetric.h" -#include "randombytes.h" -/************************************************* -* Name: crypto_kem_keypair_derand -* -* Description: Generates public and private key -* for CCA-secure Kyber key encapsulation mechanism -* -* Arguments: - uint8_t *pk: pointer to output public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* - uint8_t *coins: pointer to input randomness -* (an already allocated array filled with 2*KYBER_SYMBYTES random bytes) -** -* Returns 0 (success) -**************************************************/ -int crypto_kem_keypair_derand(uint8_t *pk, - uint8_t *sk, - const uint8_t *coins) -{ - indcpa_keypair_derand(pk, sk, coins); - memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_PUBLICKEYBYTES); - hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - /* Value z for pseudo-random output on reject */ - memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES); - return 0; -} - -/************************************************* -* Name: crypto_kem_keypair -* -* Description: Generates public and private key -* for CCA-secure Kyber key encapsulation mechanism -* -* Arguments: - uint8_t *pk: pointer to output public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int crypto_kem_keypair(uint8_t *pk, - uint8_t *sk) -{ - uint8_t coins[2*KYBER_SYMBYTES]; - randombytes(coins, 2*KYBER_SYMBYTES); - crypto_kem_keypair_derand(pk, sk, coins); - return 0; -} - -/************************************************* -* Name: crypto_kem_enc_derand -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - uint8_t *ct: pointer to output cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *pk: pointer to input public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - const uint8_t *coins: pointer to input randomness -* (an already allocated array filled with KYBER_SYMBYTES random bytes) -** -* Returns 0 (success) -**************************************************/ -int crypto_kem_enc_derand(uint8_t *ct, - uint8_t *ss, - const uint8_t *pk, - const uint8_t *coins) -{ - uint8_t buf[2*KYBER_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2*KYBER_SYMBYTES]; - - memcpy(buf, coins, KYBER_SYMBYTES); - - /* Multitarget countermeasure for coins + contributory KEM */ - hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - hash_g(kr, buf, 2*KYBER_SYMBYTES); - - /* coins are in kr+KYBER_SYMBYTES */ - indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES); - - memcpy(ss,kr,KYBER_SYMBYTES); - return 0; -} - -/************************************************* -* Name: crypto_kem_enc -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - uint8_t *ct: pointer to output cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *pk: pointer to input public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int crypto_kem_enc(uint8_t *ct, - uint8_t *ss, - const uint8_t *pk) -{ - uint8_t coins[KYBER_SYMBYTES]; - randombytes(coins, KYBER_SYMBYTES); - crypto_kem_enc_derand(ct, ss, pk, coins); - return 0; -} - -/************************************************* -* Name: crypto_kem_dec -* -* Description: Generates shared secret for given -* cipher text and private key -* -* Arguments: - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *ct: pointer to input cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - const uint8_t *sk: pointer to input private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* -* Returns 0. -* -* On failure, ss will contain a pseudo-random value. -**************************************************/ -int crypto_kem_dec(uint8_t *ss, - const uint8_t *ct, - const uint8_t *sk) -{ - int fail; - uint8_t buf[2*KYBER_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2*KYBER_SYMBYTES]; - uint8_t cmp[KYBER_CIPHERTEXTBYTES+KYBER_SYMBYTES]; - const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES; - - indcpa_dec(buf, ct, sk); - - /* Multitarget countermeasure for coins + contributory KEM */ - memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES); - hash_g(kr, buf, 2*KYBER_SYMBYTES); - - /* coins are in kr+KYBER_SYMBYTES */ - indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES); - - fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES); - - /* Compute rejection key */ - rkprf(ss,sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES,ct); - - /* Copy true key to return buffer if fail is false */ - cmov(ss,kr,KYBER_SYMBYTES,!fail); - - return 0; -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/kem.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/kem.h deleted file mode 100644 index 234f11966b..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/kem.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef KEM_H -#define KEM_H - -#include -#include "params.h" - -#define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES -#define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES -#define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES -#define CRYPTO_BYTES KYBER_SSBYTES - -#if (KYBER_K == 2) -#define CRYPTO_ALGNAME "Kyber512" -#elif (KYBER_K == 3) -#define CRYPTO_ALGNAME "Kyber768" -#elif (KYBER_K == 4) -#define CRYPTO_ALGNAME "Kyber1024" -#endif - -#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand) -int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); - -#define crypto_kem_keypair KYBER_NAMESPACE(keypair) -int crypto_kem_keypair(uint8_t *pk, uint8_t *sk); - -#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand) -int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); - -#define crypto_kem_enc KYBER_NAMESPACE(enc) -int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); - -#define crypto_kem_dec KYBER_NAMESPACE(dec) -int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/ntt.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/ntt.c deleted file mode 100644 index 2f2eb10b2f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/ntt.c +++ /dev/null @@ -1,146 +0,0 @@ -#include -#include "params.h" -#include "ntt.h" -#include "reduce.h" - -/* Code to generate zetas and zetas_inv used in the number-theoretic transform: - -#define KYBER_ROOT_OF_UNITY 17 - -static const uint8_t tree[128] = { - 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, - 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, - 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, - 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, - 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, - 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, - 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, - 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127 -}; - -void init_ntt() { - unsigned int i; - int16_t tmp[128]; - - tmp[0] = MONT; - for(i=1;i<128;i++) - tmp[i] = fqmul(tmp[i-1],MONT*KYBER_ROOT_OF_UNITY % KYBER_Q); - - for(i=0;i<128;i++) { - zetas[i] = tmp[tree[i]]; - if(zetas[i] > KYBER_Q/2) - zetas[i] -= KYBER_Q; - if(zetas[i] < -KYBER_Q/2) - zetas[i] += KYBER_Q; - } -} -*/ - -const int16_t zetas[128] = { - -1044, -758, -359, -1517, 1493, 1422, 287, 202, - -171, 622, 1577, 182, 962, -1202, -1474, 1468, - 573, -1325, 264, 383, -829, 1458, -1602, -130, - -681, 1017, 732, 608, -1542, 411, -205, -1571, - 1223, 652, -552, 1015, -1293, 1491, -282, -1544, - 516, -8, -320, -666, -1618, -1162, 126, 1469, - -853, -90, -271, 830, 107, -1421, -247, -951, - -398, 961, -1508, -725, 448, -1065, 677, -1275, - -1103, 430, 555, 843, -1251, 871, 1550, 105, - 422, 587, 177, -235, -291, -460, 1574, 1653, - -246, 778, 1159, -147, -777, 1483, -602, 1119, - -1590, 644, -872, 349, 418, 329, -156, -75, - 817, 1097, 603, 610, 1322, -1285, -1465, 384, - -1215, -136, 1218, -1335, -874, 220, -1187, -1659, - -1185, -1530, -1278, 794, -1510, -854, -870, 478, - -108, -308, 996, 991, 958, -1460, 1522, 1628 -}; - -/************************************************* -* Name: fqmul -* -* Description: Multiplication followed by Montgomery reduction -* -* Arguments: - int16_t a: first factor -* - int16_t b: second factor -* -* Returns 16-bit integer congruent to a*b*R^{-1} mod q -**************************************************/ -static int16_t fqmul(int16_t a, int16_t b) { - return montgomery_reduce((int32_t)a*b); -} - -/************************************************* -* Name: ntt -* -* Description: Inplace number-theoretic transform (NTT) in Rq. -* input is in standard order, output is in bitreversed order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void ntt(int16_t r[256]) { - unsigned int len, start, j, k; - int16_t t, zeta; - - k = 1; - for(len = 128; len >= 2; len >>= 1) { - for(start = 0; start < 256; start = j + len) { - zeta = zetas[k++]; - for(j = start; j < start + len; j++) { - t = fqmul(zeta, r[j + len]); - r[j + len] = r[j] - t; - r[j] = r[j] + t; - } - } - } -} - -/************************************************* -* Name: invntt_tomont -* -* Description: Inplace inverse number-theoretic transform in Rq and -* multiplication by Montgomery factor 2^16. -* Input is in bitreversed order, output is in standard order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void invntt(int16_t r[256]) { - unsigned int start, len, j, k; - int16_t t, zeta; - const int16_t f = 1441; // mont^2/128 - - k = 127; - for(len = 2; len <= 128; len <<= 1) { - for(start = 0; start < 256; start = j + len) { - zeta = zetas[k--]; - for(j = start; j < start + len; j++) { - t = r[j]; - r[j] = barrett_reduce(t + r[j + len]); - r[j + len] = r[j + len] - t; - r[j + len] = fqmul(zeta, r[j + len]); - } - } - } - - for(j = 0; j < 256; j++) - r[j] = fqmul(r[j], f); -} - -/************************************************* -* Name: basemul -* -* Description: Multiplication of polynomials in Zq[X]/(X^2-zeta) -* used for multiplication of elements in Rq in NTT domain -* -* Arguments: - int16_t r[2]: pointer to the output polynomial -* - const int16_t a[2]: pointer to the first factor -* - const int16_t b[2]: pointer to the second factor -* - int16_t zeta: integer defining the reduction polynomial -**************************************************/ -void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) -{ - r[0] = fqmul(a[1], b[1]); - r[0] = fqmul(r[0], zeta); - r[0] += fqmul(a[0], b[0]); - r[1] = fqmul(a[0], b[1]); - r[1] += fqmul(a[1], b[0]); -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/ntt.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/ntt.h deleted file mode 100644 index 227ea74f08..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/ntt.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef NTT_H -#define NTT_H - -#include -#include "params.h" - -#define zetas KYBER_NAMESPACE(zetas) -extern const int16_t zetas[128]; - -#define ntt KYBER_NAMESPACE(ntt) -void ntt(int16_t poly[256]); - -#define invntt KYBER_NAMESPACE(invntt) -void invntt(int16_t poly[256]); - -#define basemul KYBER_NAMESPACE(basemul) -void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/params.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/params.h deleted file mode 100644 index fb4190b311..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/params.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef PARAMS_H -#define PARAMS_H - -#ifndef KYBER_K -#define KYBER_K 3 /* Change this for different security strengths */ -#endif - - -/* Don't change parameters below this line */ -#if (KYBER_K == 2) -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_ref_##s -#elif (KYBER_K == 3) -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_ref_##s -#elif (KYBER_K == 4) -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_ref_##s -#else -#error "KYBER_K must be in {2,3,4}" -#endif - -#define KYBER_N 256 -#define KYBER_Q 3329 - -#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ -#define KYBER_SSBYTES 32 /* size in bytes of shared key */ - -#define KYBER_POLYBYTES 384 -#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) - -#if KYBER_K == 2 -#define KYBER_ETA1 3 -#define KYBER_POLYCOMPRESSEDBYTES 128 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -#elif KYBER_K == 3 -#define KYBER_ETA1 2 -#define KYBER_POLYCOMPRESSEDBYTES 128 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -#elif KYBER_K == 4 -#define KYBER_ETA1 2 -#define KYBER_POLYCOMPRESSEDBYTES 160 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) -#endif - -#define KYBER_ETA2 2 - -#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES) -#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) -#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) -#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) - -#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) -/* 32 bytes of additional space to save H(pk) */ -#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) -#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES) - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/poly.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/poly.c deleted file mode 100644 index cbd3abfb54..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/poly.c +++ /dev/null @@ -1,360 +0,0 @@ -#include -#include "params.h" -#include "poly.h" -#include "ntt.h" -#include "reduce.h" -#include "cbd.h" -#include "symmetric.h" -#include "verify.h" - -/************************************************* -* Name: poly_compress -* -* Description: Compression and subsequent serialization of a polynomial -* -* Arguments: - uint8_t *r: pointer to output byte array -* (of length KYBER_POLYCOMPRESSEDBYTES) -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) -{ - unsigned int i,j; - int16_t u; - uint32_t d0; - uint8_t t[8]; - -#if (KYBER_POLYCOMPRESSEDBYTES == 128) - - for(i=0;icoeffs[8*i+j]; - u += (u >> 15) & KYBER_Q; -/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */ - d0 = u << 4; - d0 += 1665; - d0 *= 80635; - d0 >>= 28; - t[j] = d0 & 0xf; - } - - r[0] = t[0] | (t[1] << 4); - r[1] = t[2] | (t[3] << 4); - r[2] = t[4] | (t[5] << 4); - r[3] = t[6] | (t[7] << 4); - r += 4; - } -#elif (KYBER_POLYCOMPRESSEDBYTES == 160) - for(i=0;icoeffs[8*i+j]; - u += (u >> 15) & KYBER_Q; -/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */ - d0 = u << 5; - d0 += 1664; - d0 *= 40318; - d0 >>= 27; - t[j] = d0 & 0x1f; - } - - r[0] = (t[0] >> 0) | (t[1] << 5); - r[1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7); - r[2] = (t[3] >> 1) | (t[4] << 4); - r[3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6); - r[4] = (t[6] >> 2) | (t[7] << 3); - r += 5; - } -#else -#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" -#endif -} - -/************************************************* -* Name: poly_decompress -* -* Description: De-serialization and subsequent decompression of a polynomial; -* approximate inverse of poly_compress -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *a: pointer to input byte array -* (of length KYBER_POLYCOMPRESSEDBYTES bytes) -**************************************************/ -void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]) -{ - unsigned int i; - -#if (KYBER_POLYCOMPRESSEDBYTES == 128) - for(i=0;icoeffs[2*i+0] = (((uint16_t)(a[0] & 15)*KYBER_Q) + 8) >> 4; - r->coeffs[2*i+1] = (((uint16_t)(a[0] >> 4)*KYBER_Q) + 8) >> 4; - a += 1; - } -#elif (KYBER_POLYCOMPRESSEDBYTES == 160) - unsigned int j; - uint8_t t[8]; - for(i=0;i> 0); - t[1] = (a[0] >> 5) | (a[1] << 3); - t[2] = (a[1] >> 2); - t[3] = (a[1] >> 7) | (a[2] << 1); - t[4] = (a[2] >> 4) | (a[3] << 4); - t[5] = (a[3] >> 1); - t[6] = (a[3] >> 6) | (a[4] << 2); - t[7] = (a[4] >> 3); - a += 5; - - for(j=0;j<8;j++) - r->coeffs[8*i+j] = ((uint32_t)(t[j] & 31)*KYBER_Q + 16) >> 5; - } -#else -#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" -#endif -} - -/************************************************* -* Name: poly_tobytes -* -* Description: Serialization of a polynomial -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYBYTES bytes) -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a) -{ - unsigned int i; - uint16_t t0, t1; - - for(i=0;icoeffs[2*i]; - t0 += ((int16_t)t0 >> 15) & KYBER_Q; - t1 = a->coeffs[2*i+1]; - t1 += ((int16_t)t1 >> 15) & KYBER_Q; - r[3*i+0] = (t0 >> 0); - r[3*i+1] = (t0 >> 8) | (t1 << 4); - r[3*i+2] = (t1 >> 4); - } -} - -/************************************************* -* Name: poly_frombytes -* -* Description: De-serialization of a polynomial; -* inverse of poly_tobytes -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *a: pointer to input byte array -* (of KYBER_POLYBYTES bytes) -**************************************************/ -void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]) -{ - unsigned int i; - for(i=0;icoeffs[2*i] = ((a[3*i+0] >> 0) | ((uint16_t)a[3*i+1] << 8)) & 0xFFF; - r->coeffs[2*i+1] = ((a[3*i+1] >> 4) | ((uint16_t)a[3*i+2] << 4)) & 0xFFF; - } -} - -/************************************************* -* Name: poly_frommsg -* -* Description: Convert 32-byte message to polynomial -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *msg: pointer to input message -**************************************************/ -void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]) -{ - unsigned int i,j; - -#if (KYBER_INDCPA_MSGBYTES != KYBER_N/8) -#error "KYBER_INDCPA_MSGBYTES must be equal to KYBER_N/8 bytes!" -#endif - - for(i=0;icoeffs[8*i+j] = 0; - cmov_int16(r->coeffs+8*i+j, ((KYBER_Q+1)/2), (msg[i] >> j)&1); - } - } -} - -/************************************************* -* Name: poly_tomsg -* -* Description: Convert polynomial to 32-byte message -* -* Arguments: - uint8_t *msg: pointer to output message -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *a) -{ - unsigned int i,j; - uint32_t t; - - for(i=0;icoeffs[8*i+j]; - // t += ((int16_t)t >> 15) & KYBER_Q; - // t = (((t << 1) + KYBER_Q/2)/KYBER_Q) & 1; - t <<= 1; - t += 1665; - t *= 80635; - t >>= 28; - t &= 1; - msg[i] |= t << j; - } - } -} - -/************************************************* -* Name: poly_getnoise_eta1 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter KYBER_ETA1 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length KYBER_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) -{ - uint8_t buf[KYBER_ETA1*KYBER_N/4]; - prf(buf, sizeof(buf), seed, nonce); - poly_cbd_eta1(r, buf); -} - -/************************************************* -* Name: poly_getnoise_eta2 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter KYBER_ETA2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length KYBER_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) -{ - uint8_t buf[KYBER_ETA2*KYBER_N/4]; - prf(buf, sizeof(buf), seed, nonce); - poly_cbd_eta2(r, buf); -} - - -/************************************************* -* Name: poly_ntt -* -* Description: Computes negacyclic number-theoretic transform (NTT) of -* a polynomial in place; -* inputs assumed to be in normal order, output in bitreversed order -* -* Arguments: - uint16_t *r: pointer to in/output polynomial -**************************************************/ -void poly_ntt(poly *r) -{ - ntt(r->coeffs); - poly_reduce(r); -} - -/************************************************* -* Name: poly_invntt_tomont -* -* Description: Computes inverse of negacyclic number-theoretic transform (NTT) -* of a polynomial in place; -* inputs assumed to be in bitreversed order, output in normal order -* -* Arguments: - uint16_t *a: pointer to in/output polynomial -**************************************************/ -void poly_invntt_tomont(poly *r) -{ - invntt(r->coeffs); -} - -/************************************************* -* Name: poly_basemul_montgomery -* -* Description: Multiplication of two polynomials in NTT domain -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_basemul_montgomery(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - for(i=0;icoeffs[4*i], &a->coeffs[4*i], &b->coeffs[4*i], zetas[64+i]); - basemul(&r->coeffs[4*i+2], &a->coeffs[4*i+2], &b->coeffs[4*i+2], -zetas[64+i]); - } -} - -/************************************************* -* Name: poly_tomont -* -* Description: Inplace conversion of all coefficients of a polynomial -* from normal domain to Montgomery domain -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_tomont(poly *r) -{ - unsigned int i; - const int16_t f = (1ULL << 32) % KYBER_Q; - for(i=0;icoeffs[i] = montgomery_reduce((int32_t)r->coeffs[i]*f); -} - -/************************************************* -* Name: poly_reduce -* -* Description: Applies Barrett reduction to all coefficients of a polynomial -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_reduce(poly *r) -{ - unsigned int i; - for(i=0;icoeffs[i] = barrett_reduce(r->coeffs[i]); -} - -/************************************************* -* Name: poly_add -* -* Description: Add two polynomials; no modular reduction is performed -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_add(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - for(i=0;icoeffs[i] = a->coeffs[i] + b->coeffs[i]; -} - -/************************************************* -* Name: poly_sub -* -* Description: Subtract two polynomials; no modular reduction is performed -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_sub(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - for(i=0;icoeffs[i] = a->coeffs[i] - b->coeffs[i]; -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/poly.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/poly.h deleted file mode 100644 index 9a99c7cdad..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/poly.h +++ /dev/null @@ -1,53 +0,0 @@ -#ifndef POLY_H -#define POLY_H - -#include -#include "params.h" - -/* - * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial - * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1] - */ -typedef struct{ - int16_t coeffs[KYBER_N]; -} poly; - -#define poly_compress KYBER_NAMESPACE(poly_compress) -void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a); -#define poly_decompress KYBER_NAMESPACE(poly_decompress) -void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]); - -#define poly_tobytes KYBER_NAMESPACE(poly_tobytes) -void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a); -#define poly_frombytes KYBER_NAMESPACE(poly_frombytes) -void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]); - -#define poly_frommsg KYBER_NAMESPACE(poly_frommsg) -void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]); -#define poly_tomsg KYBER_NAMESPACE(poly_tomsg) -void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); - -#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1) -void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); - -#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2) -void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); - -#define poly_ntt KYBER_NAMESPACE(poly_ntt) -void poly_ntt(poly *r); -#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont) -void poly_invntt_tomont(poly *r); -#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery) -void poly_basemul_montgomery(poly *r, const poly *a, const poly *b); -#define poly_tomont KYBER_NAMESPACE(poly_tomont) -void poly_tomont(poly *r); - -#define poly_reduce KYBER_NAMESPACE(poly_reduce) -void poly_reduce(poly *r); - -#define poly_add KYBER_NAMESPACE(poly_add) -void poly_add(poly *r, const poly *a, const poly *b); -#define poly_sub KYBER_NAMESPACE(poly_sub) -void poly_sub(poly *r, const poly *a, const poly *b); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/polyvec.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/polyvec.c deleted file mode 100644 index 669f6a5f1d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/polyvec.c +++ /dev/null @@ -1,246 +0,0 @@ -#include -#include "params.h" -#include "poly.h" -#include "polyvec.h" - -/************************************************* -* Name: polyvec_compress -* -* Description: Compress and serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYVECCOMPRESSEDBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) -{ - unsigned int i,j,k; - uint64_t d0; - -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) - uint16_t t[8]; - for(i=0;ivec[i].coeffs[8*j+k]; - t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; -/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */ - d0 = t[k]; - d0 <<= 11; - d0 += 1664; - d0 *= 645084; - d0 >>= 31; - t[k] = d0 & 0x7ff; - } - - r[ 0] = (t[0] >> 0); - r[ 1] = (t[0] >> 8) | (t[1] << 3); - r[ 2] = (t[1] >> 5) | (t[2] << 6); - r[ 3] = (t[2] >> 2); - r[ 4] = (t[2] >> 10) | (t[3] << 1); - r[ 5] = (t[3] >> 7) | (t[4] << 4); - r[ 6] = (t[4] >> 4) | (t[5] << 7); - r[ 7] = (t[5] >> 1); - r[ 8] = (t[5] >> 9) | (t[6] << 2); - r[ 9] = (t[6] >> 6) | (t[7] << 5); - r[10] = (t[7] >> 3); - r += 11; - } - } -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) - uint16_t t[4]; - for(i=0;ivec[i].coeffs[4*j+k]; - t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; -/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */ - d0 = t[k]; - d0 <<= 10; - d0 += 1665; - d0 *= 1290167; - d0 >>= 32; - t[k] = d0 & 0x3ff; - } - - r[0] = (t[0] >> 0); - r[1] = (t[0] >> 8) | (t[1] << 2); - r[2] = (t[1] >> 6) | (t[2] << 4); - r[3] = (t[2] >> 4) | (t[3] << 6); - r[4] = (t[3] >> 2); - r += 5; - } - } -#else -#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" -#endif -} - -/************************************************* -* Name: polyvec_decompress -* -* Description: De-serialize and decompress vector of polynomials; -* approximate inverse of polyvec_compress -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const uint8_t *a: pointer to input byte array -* (of length KYBER_POLYVECCOMPRESSEDBYTES) -**************************************************/ -void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]) -{ - unsigned int i,j,k; - -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) - uint16_t t[8]; - for(i=0;i> 0) | ((uint16_t)a[ 1] << 8); - t[1] = (a[1] >> 3) | ((uint16_t)a[ 2] << 5); - t[2] = (a[2] >> 6) | ((uint16_t)a[ 3] << 2) | ((uint16_t)a[4] << 10); - t[3] = (a[4] >> 1) | ((uint16_t)a[ 5] << 7); - t[4] = (a[5] >> 4) | ((uint16_t)a[ 6] << 4); - t[5] = (a[6] >> 7) | ((uint16_t)a[ 7] << 1) | ((uint16_t)a[8] << 9); - t[6] = (a[8] >> 2) | ((uint16_t)a[ 9] << 6); - t[7] = (a[9] >> 5) | ((uint16_t)a[10] << 3); - a += 11; - - for(k=0;k<8;k++) - r->vec[i].coeffs[8*j+k] = ((uint32_t)(t[k] & 0x7FF)*KYBER_Q + 1024) >> 11; - } - } -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) - uint16_t t[4]; - for(i=0;i> 0) | ((uint16_t)a[1] << 8); - t[1] = (a[1] >> 2) | ((uint16_t)a[2] << 6); - t[2] = (a[2] >> 4) | ((uint16_t)a[3] << 4); - t[3] = (a[3] >> 6) | ((uint16_t)a[4] << 2); - a += 5; - - for(k=0;k<4;k++) - r->vec[i].coeffs[4*j+k] = ((uint32_t)(t[k] & 0x3FF)*KYBER_Q + 512) >> 10; - } - } -#else -#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" -#endif -} - -/************************************************* -* Name: polyvec_tobytes -* -* Description: Serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYVECBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_frombytes -* -* Description: De-serialize vector of polynomials; -* inverse of polyvec_tobytes -* -* Arguments: - uint8_t *r: pointer to output byte array -* - const polyvec *a: pointer to input vector of polynomials -* (of length KYBER_POLYVECBYTES) -**************************************************/ -void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]) -{ - unsigned int i; - for(i=0;ivec[i], a+i*KYBER_POLYBYTES); -} - -/************************************************* -* Name: polyvec_ntt -* -* Description: Apply forward NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_ntt(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_invntt_tomont -* -* Description: Apply inverse NTT to all elements of a vector of polynomials -* and multiply by Montgomery factor 2^16 -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_invntt_tomont(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_basemul_acc_montgomery -* -* Description: Multiply elements of a and b in NTT domain, accumulate into r, -* and multiply by 2^-16. -* -* Arguments: - poly *r: pointer to output polynomial -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) -{ - unsigned int i; - poly t; - - poly_basemul_montgomery(r, &a->vec[0], &b->vec[0]); - for(i=1;ivec[i], &b->vec[i]); - poly_add(r, r, &t); - } - - poly_reduce(r); -} - -/************************************************* -* Name: polyvec_reduce -* -* Description: Applies Barrett reduction to each coefficient -* of each element of a vector of polynomials; -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - polyvec *r: pointer to input/output polynomial -**************************************************/ -void polyvec_reduce(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_add -* -* Description: Add vectors of polynomials -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) -{ - unsigned int i; - for(i=0;ivec[i], &a->vec[i], &b->vec[i]); -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/polyvec.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/polyvec.h deleted file mode 100644 index 57b605494e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/polyvec.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef POLYVEC_H -#define POLYVEC_H - -#include -#include "params.h" -#include "poly.h" - -typedef struct{ - poly vec[KYBER_K]; -} polyvec; - -#define polyvec_compress KYBER_NAMESPACE(polyvec_compress) -void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a); -#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress) -void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]); - -#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes) -void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a); -#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes) -void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]); - -#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt) -void polyvec_ntt(polyvec *r); -#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont) -void polyvec_invntt_tomont(polyvec *r); - -#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery) -void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b); - -#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce) -void polyvec_reduce(polyvec *r); - -#define polyvec_add KYBER_NAMESPACE(polyvec_add) -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/reduce.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/reduce.c deleted file mode 100644 index 9d8e7edf83..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/reduce.c +++ /dev/null @@ -1,42 +0,0 @@ -#include -#include "params.h" -#include "reduce.h" - -/************************************************* -* Name: montgomery_reduce -* -* Description: Montgomery reduction; given a 32-bit integer a, computes -* 16-bit integer congruent to a * R^-1 mod q, where R=2^16 -* -* Arguments: - int32_t a: input integer to be reduced; -* has to be in {-q2^15,...,q2^15-1} -* -* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. -**************************************************/ -int16_t montgomery_reduce(int32_t a) -{ - int16_t t; - - t = (int16_t)a*QINV; - t = (a - (int32_t)t*KYBER_Q) >> 16; - return t; -} - -/************************************************* -* Name: barrett_reduce -* -* Description: Barrett reduction; given a 16-bit integer a, computes -* centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2} -* -* Arguments: - int16_t a: input integer to be reduced -* -* Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q. -**************************************************/ -int16_t barrett_reduce(int16_t a) { - int16_t t; - const int16_t v = ((1<<26) + KYBER_Q/2)/KYBER_Q; - - t = ((int32_t)v*a + (1<<25)) >> 26; - t *= KYBER_Q; - return a - t; -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/reduce.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/reduce.h deleted file mode 100644 index c1bc1e4c7b..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/reduce.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef REDUCE_H -#define REDUCE_H - -#include -#include "params.h" - -#define MONT -1044 // 2^16 mod q -#define QINV -3327 // q^-1 mod 2^16 - -#define montgomery_reduce KYBER_NAMESPACE(montgomery_reduce) -int16_t montgomery_reduce(int32_t a); - -#define barrett_reduce KYBER_NAMESPACE(barrett_reduce) -int16_t barrett_reduce(int16_t a); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/symmetric-shake.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/symmetric-shake.c deleted file mode 100644 index 20f451882e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/symmetric-shake.c +++ /dev/null @@ -1,74 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "symmetric.h" -#include "fips202.h" - -/************************************************* -* Name: kyber_shake128_absorb -* -* Description: Absorb step of the SHAKE128 specialized for the Kyber context. -* -* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state -* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state -* - uint8_t i: additional byte of input -* - uint8_t j: additional byte of input -**************************************************/ -void kyber_shake128_absorb(shake128incctx *state, - const uint8_t seed[KYBER_SYMBYTES], - uint8_t x, - uint8_t y) -{ - uint8_t extseed[KYBER_SYMBYTES+2]; - - memcpy(extseed, seed, KYBER_SYMBYTES); - extseed[KYBER_SYMBYTES+0] = x; - extseed[KYBER_SYMBYTES+1] = y; - - shake128_absorb_once(state, extseed, sizeof(extseed)); -} - -/************************************************* -* Name: kyber_shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - uint8_t *out: pointer to output -* - size_t outlen: number of requested output bytes -* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) -* - uint8_t nonce: single-byte nonce (public PRF input) -**************************************************/ -void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce) -{ - uint8_t extkey[KYBER_SYMBYTES+1]; - - memcpy(extkey, key, KYBER_SYMBYTES); - extkey[KYBER_SYMBYTES] = nonce; - - shake256(out, outlen, extkey, sizeof(extkey)); -} - -/************************************************* -* Name: kyber_shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - uint8_t *out: pointer to output -* - size_t outlen: number of requested output bytes -* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) -* - uint8_t nonce: single-byte nonce (public PRF input) -**************************************************/ -void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]) -{ - shake256incctx s; - - shake256_inc_init(&s); - shake256_inc_absorb(&s, key, KYBER_SYMBYTES); - shake256_inc_absorb(&s, input, KYBER_CIPHERTEXTBYTES); - shake256_inc_finalize(&s); - shake256_inc_squeeze(out, KYBER_SSBYTES, &s); - shake256_inc_ctx_release(&s); -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/symmetric.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/symmetric.h deleted file mode 100644 index 2acc66f98d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/symmetric.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef SYMMETRIC_H -#define SYMMETRIC_H - -#include -#include -#include "params.h" - -#include "fips202.h" - -typedef shake128incctx xof_state; - -#define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb) -void kyber_shake128_absorb(shake128incctx *s, - const uint8_t seed[KYBER_SYMBYTES], - uint8_t x, - uint8_t y); - -#define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf) -void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce); - -#define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf) -void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]); - -#define XOF_BLOCKBYTES SHAKE128_RATE - -#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) -#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) -#define xof_init(STATE, SEED) shake128_inc_init(STATE) -#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y) -#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) -#define xof_release(STATE) shake128_inc_ctx_release(STATE) -#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE) -#define rkprf(OUT, KEY, INPUT) kyber_shake256_rkprf(OUT, KEY, INPUT) - -#endif /* SYMMETRIC_H */ diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/verify.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/verify.c deleted file mode 100644 index 914ccd448f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-1024_ref/verify.c +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include -#include "verify.h" - -/************************************************* -* Name: verify -* -* Description: Compare two arrays for equality in constant time. -* -* Arguments: const uint8_t *a: pointer to first byte array -* const uint8_t *b: pointer to second byte array -* size_t len: length of the byte arrays -* -* Returns 0 if the byte arrays are equal, 1 otherwise -**************************************************/ -int verify(const uint8_t *a, const uint8_t *b, size_t len) -{ - size_t i; - uint8_t r = 0; - - for(i=0;i> 63; -} - -/************************************************* -* Name: cmov -* -* Description: Copy len bytes from x to r if b is 1; -* don't modify x if b is 0. Requires b to be in {0,1}; -* assumes two's complement representation of negative integers. -* Runs in constant time. -* -* Arguments: uint8_t *r: pointer to output byte array -* const uint8_t *x: pointer to input byte array -* size_t len: Amount of bytes to be copied -* uint8_t b: Condition bit; has to be in {0,1} -**************************************************/ -void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) -{ - size_t i; - -#if defined(__GNUC__) || defined(__clang__) - // Prevent the compiler from - // 1) inferring that b is 0/1-valued, and - // 2) handling the two cases with a branch. - // This is not necessary when verify.c and kem.c are separate translation - // units, but we expect that downstream consumers will copy this code and/or - // change how it is built. - __asm__("" : "+r"(b) : /* no inputs */); -#endif - - b = -b; - for(i=0;i -#include -#include "params.h" - -#define verify KYBER_NAMESPACE(verify) -int verify(const uint8_t *a, const uint8_t *b, size_t len); - -#define cmov KYBER_NAMESPACE(cmov) -void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); - -#define cmov_int16 KYBER_NAMESPACE(cmov_int16) -void cmov_int16(int16_t *r, int16_t v, uint16_t b); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/align.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/align.h deleted file mode 100644 index 3463866f37..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/align.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef ALIGN_H -#define ALIGN_H - -#include -#include - -#define ALIGNED_UINT8(N) \ - union { \ - uint8_t coeffs[N]; \ - __m256i vec[(N+31)/32]; \ - } - -#define ALIGNED_INT16(N) \ - union { \ - int16_t coeffs[N]; \ - __m256i vec[(N+15)/16]; \ - } - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/api.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/api.h deleted file mode 100644 index a154e80f1d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/api.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef API_H -#define API_H - -#include - -#define pqcrystals_kyber512_SECRETKEYBYTES 1632 -#define pqcrystals_kyber512_PUBLICKEYBYTES 800 -#define pqcrystals_kyber512_CIPHERTEXTBYTES 768 -#define pqcrystals_kyber512_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber512_ENCCOINBYTES 32 -#define pqcrystals_kyber512_BYTES 32 - -#define pqcrystals_kyber512_avx2_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES -#define pqcrystals_kyber512_avx2_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES -#define pqcrystals_kyber512_avx2_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES -#define pqcrystals_kyber512_avx2_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES -#define pqcrystals_kyber512_avx2_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES -#define pqcrystals_kyber512_avx2_BYTES pqcrystals_kyber512_BYTES - -int pqcrystals_kyber512_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber512_avx2_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber512_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber512_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber512_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#define pqcrystals_kyber768_SECRETKEYBYTES 2400 -#define pqcrystals_kyber768_PUBLICKEYBYTES 1184 -#define pqcrystals_kyber768_CIPHERTEXTBYTES 1088 -#define pqcrystals_kyber768_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber768_ENCCOINBYTES 32 -#define pqcrystals_kyber768_BYTES 32 - -#define pqcrystals_kyber768_avx2_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES -#define pqcrystals_kyber768_avx2_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES -#define pqcrystals_kyber768_avx2_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES -#define pqcrystals_kyber768_avx2_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES -#define pqcrystals_kyber768_avx2_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES -#define pqcrystals_kyber768_avx2_BYTES pqcrystals_kyber768_BYTES - -int pqcrystals_kyber768_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber768_avx2_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber768_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber768_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber768_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#define pqcrystals_kyber1024_SECRETKEYBYTES 3168 -#define pqcrystals_kyber1024_PUBLICKEYBYTES 1568 -#define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568 -#define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber1024_ENCCOINBYTES 32 -#define pqcrystals_kyber1024_BYTES 32 - -#define pqcrystals_kyber1024_avx2_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES -#define pqcrystals_kyber1024_avx2_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES -#define pqcrystals_kyber1024_avx2_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES -#define pqcrystals_kyber1024_avx2_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES -#define pqcrystals_kyber1024_avx2_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES -#define pqcrystals_kyber1024_avx2_BYTES pqcrystals_kyber1024_BYTES - -int pqcrystals_kyber1024_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber1024_avx2_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber1024_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber1024_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber1024_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/basemul.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/basemul.S deleted file mode 100644 index 36990639b2..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/basemul.S +++ /dev/null @@ -1,105 +0,0 @@ -#include "consts.h" - -.macro schoolbook off -vmovdqa _16XQINV*2(%rcx),%ymm0 -vmovdqa (64*\off+ 0)*2(%rsi),%ymm1 # a0 -vmovdqa (64*\off+16)*2(%rsi),%ymm2 # b0 -vmovdqa (64*\off+32)*2(%rsi),%ymm3 # a1 -vmovdqa (64*\off+48)*2(%rsi),%ymm4 # b1 - -vpmullw %ymm0,%ymm1,%ymm9 # a0.lo -vpmullw %ymm0,%ymm2,%ymm10 # b0.lo -vpmullw %ymm0,%ymm3,%ymm11 # a1.lo -vpmullw %ymm0,%ymm4,%ymm12 # b1.lo - -vmovdqa (64*\off+ 0)*2(%rdx),%ymm5 # c0 -vmovdqa (64*\off+16)*2(%rdx),%ymm6 # d0 - -vpmulhw %ymm5,%ymm1,%ymm13 # a0c0.hi -vpmulhw %ymm6,%ymm1,%ymm1 # a0d0.hi -vpmulhw %ymm5,%ymm2,%ymm14 # b0c0.hi -vpmulhw %ymm6,%ymm2,%ymm2 # b0d0.hi - -vmovdqa (64*\off+32)*2(%rdx),%ymm7 # c1 -vmovdqa (64*\off+48)*2(%rdx),%ymm8 # d1 - -vpmulhw %ymm7,%ymm3,%ymm15 # a1c1.hi -vpmulhw %ymm8,%ymm3,%ymm3 # a1d1.hi -vpmulhw %ymm7,%ymm4,%ymm0 # b1c1.hi -vpmulhw %ymm8,%ymm4,%ymm4 # b1d1.hi - -vmovdqa %ymm13,(%rsp) - -vpmullw %ymm5,%ymm9,%ymm13 # a0c0.lo -vpmullw %ymm6,%ymm9,%ymm9 # a0d0.lo -vpmullw %ymm5,%ymm10,%ymm5 # b0c0.lo -vpmullw %ymm6,%ymm10,%ymm10 # b0d0.lo - -vpmullw %ymm7,%ymm11,%ymm6 # a1c1.lo -vpmullw %ymm8,%ymm11,%ymm11 # a1d1.lo -vpmullw %ymm7,%ymm12,%ymm7 # b1c1.lo -vpmullw %ymm8,%ymm12,%ymm12 # b1d1.lo - -vmovdqa _16XQ*2(%rcx),%ymm8 -vpmulhw %ymm8,%ymm13,%ymm13 -vpmulhw %ymm8,%ymm9,%ymm9 -vpmulhw %ymm8,%ymm5,%ymm5 -vpmulhw %ymm8,%ymm10,%ymm10 -vpmulhw %ymm8,%ymm6,%ymm6 -vpmulhw %ymm8,%ymm11,%ymm11 -vpmulhw %ymm8,%ymm7,%ymm7 -vpmulhw %ymm8,%ymm12,%ymm12 - -vpsubw (%rsp),%ymm13,%ymm13 # -a0c0 -vpsubw %ymm9,%ymm1,%ymm9 # a0d0 -vpsubw %ymm5,%ymm14,%ymm5 # b0c0 -vpsubw %ymm10,%ymm2,%ymm10 # b0d0 - -vpsubw %ymm6,%ymm15,%ymm6 # a1c1 -vpsubw %ymm11,%ymm3,%ymm11 # a1d1 -vpsubw %ymm7,%ymm0,%ymm7 # b1c1 -vpsubw %ymm12,%ymm4,%ymm12 # b1d1 - -vmovdqa (%r9),%ymm0 -vmovdqa 32(%r9),%ymm1 -vpmullw %ymm0,%ymm10,%ymm2 -vpmullw %ymm0,%ymm12,%ymm3 -vpmulhw %ymm1,%ymm10,%ymm10 -vpmulhw %ymm1,%ymm12,%ymm12 -vpmulhw %ymm8,%ymm2,%ymm2 -vpmulhw %ymm8,%ymm3,%ymm3 -vpsubw %ymm2,%ymm10,%ymm10 # rb0d0 -vpsubw %ymm3,%ymm12,%ymm12 # rb1d1 - -vpaddw %ymm5,%ymm9,%ymm9 -vpaddw %ymm7,%ymm11,%ymm11 -vpsubw %ymm13,%ymm10,%ymm13 -vpsubw %ymm12,%ymm6,%ymm6 - -vmovdqa %ymm13,(64*\off+ 0)*2(%rdi) -vmovdqa %ymm9,(64*\off+16)*2(%rdi) -vmovdqa %ymm6,(64*\off+32)*2(%rdi) -vmovdqa %ymm11,(64*\off+48)*2(%rdi) -.endm - -.text -.global cdecl(basemul_avx) -cdecl(basemul_avx): -mov %rsp,%r8 -and $-32,%rsp -sub $32,%rsp - -lea (_ZETAS_EXP+176)*2(%rcx),%r9 -schoolbook 0 - -add $32*2,%r9 -schoolbook 1 - -add $192*2,%r9 -schoolbook 2 - -add $32*2,%r9 -schoolbook 3 - -mov %r8,%rsp -ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/cbd.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/cbd.c deleted file mode 100644 index dad473c79e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/cbd.c +++ /dev/null @@ -1,144 +0,0 @@ -#include -#include -#include "params.h" -#include "cbd.h" - -/************************************************* -* Name: cbd2 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const __m256i *buf: pointer to aligned input byte array -**************************************************/ -static void cbd2(poly * restrict r, const __m256i buf[2*KYBER_N/128]) -{ - unsigned int i; - __m256i f0, f1, f2, f3; - const __m256i mask55 = _mm256_set1_epi32(0x55555555); - const __m256i mask33 = _mm256_set1_epi32(0x33333333); - const __m256i mask03 = _mm256_set1_epi32(0x03030303); - const __m256i mask0F = _mm256_set1_epi32(0x0F0F0F0F); - - for(i = 0; i < KYBER_N/64; i++) { - f0 = _mm256_load_si256(&buf[i]); - - f1 = _mm256_srli_epi16(f0, 1); - f0 = _mm256_and_si256(mask55, f0); - f1 = _mm256_and_si256(mask55, f1); - f0 = _mm256_add_epi8(f0, f1); - - f1 = _mm256_srli_epi16(f0, 2); - f0 = _mm256_and_si256(mask33, f0); - f1 = _mm256_and_si256(mask33, f1); - f0 = _mm256_add_epi8(f0, mask33); - f0 = _mm256_sub_epi8(f0, f1); - - f1 = _mm256_srli_epi16(f0, 4); - f0 = _mm256_and_si256(mask0F, f0); - f1 = _mm256_and_si256(mask0F, f1); - f0 = _mm256_sub_epi8(f0, mask03); - f1 = _mm256_sub_epi8(f1, mask03); - - f2 = _mm256_unpacklo_epi8(f0, f1); - f3 = _mm256_unpackhi_epi8(f0, f1); - - f0 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f2)); - f1 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f2,1)); - f2 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f3)); - f3 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f3,1)); - - _mm256_store_si256(&r->vec[4*i+0], f0); - _mm256_store_si256(&r->vec[4*i+1], f2); - _mm256_store_si256(&r->vec[4*i+2], f1); - _mm256_store_si256(&r->vec[4*i+3], f3); - } -} - -#if KYBER_ETA1 == 3 -/************************************************* -* Name: cbd3 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=3 -* This function is only needed for Kyber-512 -* -* Arguments: - poly *r: pointer to output polynomial -* - const __m256i *buf: pointer to aligned input byte array -**************************************************/ -static void cbd3(poly * restrict r, const uint8_t buf[3*KYBER_N/4+8]) -{ - unsigned int i; - __m256i f0, f1, f2, f3; - const __m256i mask249 = _mm256_set1_epi32(0x249249); - const __m256i mask6DB = _mm256_set1_epi32(0x6DB6DB); - const __m256i mask07 = _mm256_set1_epi32(7); - const __m256i mask70 = _mm256_set1_epi32(7 << 16); - const __m256i mask3 = _mm256_set1_epi16(3); - const __m256i shufbidx = _mm256_set_epi8(-1,15,14,13,-1,12,11,10,-1, 9, 8, 7,-1, 6, 5, 4, - -1,11,10, 9,-1, 8, 7, 6,-1, 5, 4, 3,-1, 2, 1, 0); - - for(i = 0; i < KYBER_N/32; i++) { - f0 = _mm256_loadu_si256((__m256i *)&buf[24*i]); - f0 = _mm256_permute4x64_epi64(f0,0x94); - f0 = _mm256_shuffle_epi8(f0,shufbidx); - - f1 = _mm256_srli_epi32(f0,1); - f2 = _mm256_srli_epi32(f0,2); - f0 = _mm256_and_si256(mask249,f0); - f1 = _mm256_and_si256(mask249,f1); - f2 = _mm256_and_si256(mask249,f2); - f0 = _mm256_add_epi32(f0,f1); - f0 = _mm256_add_epi32(f0,f2); - - f1 = _mm256_srli_epi32(f0,3); - f0 = _mm256_add_epi32(f0,mask6DB); - f0 = _mm256_sub_epi32(f0,f1); - - f1 = _mm256_slli_epi32(f0,10); - f2 = _mm256_srli_epi32(f0,12); - f3 = _mm256_srli_epi32(f0, 2); - f0 = _mm256_and_si256(f0,mask07); - f1 = _mm256_and_si256(f1,mask70); - f2 = _mm256_and_si256(f2,mask07); - f3 = _mm256_and_si256(f3,mask70); - f0 = _mm256_add_epi16(f0,f1); - f1 = _mm256_add_epi16(f2,f3); - f0 = _mm256_sub_epi16(f0,mask3); - f1 = _mm256_sub_epi16(f1,mask3); - - f2 = _mm256_unpacklo_epi32(f0,f1); - f3 = _mm256_unpackhi_epi32(f0,f1); - - f0 = _mm256_permute2x128_si256(f2,f3,0x20); - f1 = _mm256_permute2x128_si256(f2,f3,0x31); - - _mm256_store_si256(&r->vec[2*i+0], f0); - _mm256_store_si256(&r->vec[2*i+1], f1); - } -} -#endif - -/* buf 32 bytes longer for cbd3 */ -void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1]) -{ -#if KYBER_ETA1 == 2 - cbd2(r, buf); -#elif KYBER_ETA1 == 3 - cbd3(r, (uint8_t *)buf); -#else -#error "This implementation requires eta1 in {2,3}" -#endif -} - -void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128]) -{ -#if KYBER_ETA2 == 2 - cbd2(r, buf); -#else -#error "This implementation requires eta2 = 2" -#endif -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/cbd.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/cbd.h deleted file mode 100644 index 05788e06b4..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/cbd.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef CBD_H -#define CBD_H - -#include -#include -#include "params.h" -#include "poly.h" - -#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1) -void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1]); - -#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2) -void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128]); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/consts.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/consts.c deleted file mode 100644 index 84e596893d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/consts.c +++ /dev/null @@ -1,121 +0,0 @@ -#include "align.h" -#include "params.h" -#include "consts.h" - -#define Q KYBER_Q -#define MONT -1044 // 2^16 mod q -#define QINV -3327 // q^-1 mod 2^16 -#define V 20159 // floor(2^26/q + 0.5) -#define FHI 1441 // mont^2/128 -#define FLO -10079 // qinv*FHI -#define MONTSQHI 1353 // mont^2 -#define MONTSQLO 20553 // qinv*MONTSQHI -#define MASK 4095 -#define SHIFT 32 - -const qdata_t qdata = {{ -#define _16XQ 0 - Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, - -#define _16XQINV 16 - QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, - QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, - -#define _16XV 32 - V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, - -#define _16XFLO 48 - FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, - FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, - -#define _16XFHI 64 - FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, - FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, - -#define _16XMONTSQLO 80 - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - -#define _16XMONTSQHI 96 - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - -#define _16XMASK 112 - MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, - MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, - -#define _REVIDXB 128 - 3854, 3340, 2826, 2312, 1798, 1284, 770, 256, - 3854, 3340, 2826, 2312, 1798, 1284, 770, 256, - -#define _REVIDXD 144 - 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0, 0, - -#define _ZETAS_EXP 160 - 31498, 31498, 31498, 31498, -758, -758, -758, -758, - 5237, 5237, 5237, 5237, 1397, 1397, 1397, 1397, - 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745, - 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745, - -359, -359, -359, -359, -359, -359, -359, -359, - -359, -359, -359, -359, -359, -359, -359, -359, - 13525, 13525, 13525, 13525, 13525, 13525, 13525, 13525, - -12402, -12402, -12402, -12402, -12402, -12402, -12402, -12402, - 1493, 1493, 1493, 1493, 1493, 1493, 1493, 1493, - 1422, 1422, 1422, 1422, 1422, 1422, 1422, 1422, - -20907, -20907, -20907, -20907, 27758, 27758, 27758, 27758, - -3799, -3799, -3799, -3799, -15690, -15690, -15690, -15690, - -171, -171, -171, -171, 622, 622, 622, 622, - 1577, 1577, 1577, 1577, 182, 182, 182, 182, - -5827, -5827, 17363, 17363, -26360, -26360, -29057, -29057, - 5571, 5571, -1102, -1102, 21438, 21438, -26242, -26242, - 573, 573, -1325, -1325, 264, 264, 383, 383, - -829, -829, 1458, 1458, -1602, -1602, -130, -130, - -5689, -6516, 1496, 30967, -23565, 20179, 20710, 25080, - -12796, 26616, 16064, -12442, 9134, -650, -25986, 27837, - 1223, 652, -552, 1015, -1293, 1491, -282, -1544, - 516, -8, -320, -666, -1618, -1162, 126, 1469, - -335, -11477, -32227, 20494, -27738, 945, -14883, 6182, - 32010, 10631, 29175, -28762, -18486, 17560, -14430, -5276, - -1103, 555, -1251, 1550, 422, 177, -291, 1574, - -246, 1159, -777, -602, -1590, -872, 418, -156, - 11182, 13387, -14233, -21655, 13131, -4587, 23092, 5493, - -32502, 30317, -18741, 12639, 20100, 18525, 19529, -12619, - 430, 843, 871, 105, 587, -235, -460, 1653, - 778, -147, 1483, 1119, 644, 349, 329, -75, - 787, 787, 787, 787, 787, 787, 787, 787, - 787, 787, 787, 787, 787, 787, 787, 787, - -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517, - -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517, - 28191, 28191, 28191, 28191, 28191, 28191, 28191, 28191, - -16694, -16694, -16694, -16694, -16694, -16694, -16694, -16694, - 287, 287, 287, 287, 287, 287, 287, 287, - 202, 202, 202, 202, 202, 202, 202, 202, - 10690, 10690, 10690, 10690, 1358, 1358, 1358, 1358, - -11202, -11202, -11202, -11202, 31164, 31164, 31164, 31164, - 962, 962, 962, 962, -1202, -1202, -1202, -1202, - -1474, -1474, -1474, -1474, 1468, 1468, 1468, 1468, - -28073, -28073, 24313, 24313, -10532, -10532, 8800, 8800, - 18426, 18426, 8859, 8859, 26675, 26675, -16163, -16163, - -681, -681, 1017, 1017, 732, 732, 608, 608, - -1542, -1542, 411, 411, -205, -205, -1571, -1571, - 19883, -28250, -15887, -8898, -28309, 9075, -30199, 18249, - 13426, 14017, -29156, -12757, 16832, 4311, -24155, -17915, - -853, -90, -271, 830, 107, -1421, -247, -951, - -398, 961, -1508, -725, 448, -1065, 677, -1275, - -31183, 25435, -7382, 24391, -20927, 10946, 24214, 16989, - 10335, -7934, -22502, 10906, 31636, 28644, 23998, -17422, - 817, 603, 1322, -1465, -1215, 1218, -874, -1187, - -1185, -1278, -1510, -870, -108, 996, 958, 1522, - 20297, 2146, 15355, -32384, -6280, -14903, -11044, 14469, - -21498, -20198, 23210, -17442, -23860, -20257, 7756, 23132, - 1097, 610, -1285, 384, -136, -1335, 220, -1659, - -1530, 794, -854, 478, -308, 991, -1460, 1628, - -#define _16XSHIFT 624 - SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, - SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT -}}; diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/consts.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/consts.h deleted file mode 100644 index f95899cd8e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/consts.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef CONSTS_H -#define CONSTS_H - -#include "params.h" - -#define _16XQ 0 -#define _16XQINV 16 -#define _16XV 32 -#define _16XFLO 48 -#define _16XFHI 64 -#define _16XMONTSQLO 80 -#define _16XMONTSQHI 96 -#define _16XMASK 112 -#define _REVIDXB 128 -#define _REVIDXD 144 -#define _ZETAS_EXP 160 -#define _16XSHIFT 624 - -/* The C ABI on MacOS exports all symbols with a leading - * underscore. This means that any symbols we refer to from - * C files (functions) can't be found, and all symbols we - * refer to from ASM also can't be found. - * - * This define helps us get around this - */ -#ifdef __ASSEMBLER__ -#if defined(__WIN32__) || defined(__APPLE__) -#define decorate(s) _##s -#define cdecl2(s) decorate(s) -#define cdecl(s) cdecl2(KYBER_NAMESPACE(##s)) -#else -#define cdecl(s) KYBER_NAMESPACE(##s) -#endif -#endif - -#ifndef __ASSEMBLER__ -#include "align.h" -typedef ALIGNED_INT16(640) qdata_t; -#define qdata KYBER_NAMESPACE(qdata) -extern const qdata_t qdata; -#endif - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/fq.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/fq.S deleted file mode 100644 index 3bb1ebd3d8..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/fq.S +++ /dev/null @@ -1,88 +0,0 @@ -#include "consts.h" -.include "fq.inc" - -.text -reduce128_avx: -#load -vmovdqa (%rdi),%ymm2 -vmovdqa 32(%rdi),%ymm3 -vmovdqa 64(%rdi),%ymm4 -vmovdqa 96(%rdi),%ymm5 -vmovdqa 128(%rdi),%ymm6 -vmovdqa 160(%rdi),%ymm7 -vmovdqa 192(%rdi),%ymm8 -vmovdqa 224(%rdi),%ymm9 - -red16 2 -red16 3 -red16 4 -red16 5 -red16 6 -red16 7 -red16 8 -red16 9 - -#store -vmovdqa %ymm2,(%rdi) -vmovdqa %ymm3,32(%rdi) -vmovdqa %ymm4,64(%rdi) -vmovdqa %ymm5,96(%rdi) -vmovdqa %ymm6,128(%rdi) -vmovdqa %ymm7,160(%rdi) -vmovdqa %ymm8,192(%rdi) -vmovdqa %ymm9,224(%rdi) - -ret - -.global cdecl(reduce_avx) -cdecl(reduce_avx): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -vmovdqa _16XV*2(%rsi),%ymm1 -call reduce128_avx -add $256,%rdi -call reduce128_avx -ret - -tomont128_avx: -#load -vmovdqa (%rdi),%ymm3 -vmovdqa 32(%rdi),%ymm4 -vmovdqa 64(%rdi),%ymm5 -vmovdqa 96(%rdi),%ymm6 -vmovdqa 128(%rdi),%ymm7 -vmovdqa 160(%rdi),%ymm8 -vmovdqa 192(%rdi),%ymm9 -vmovdqa 224(%rdi),%ymm10 - -fqmulprecomp 1,2,3,11 -fqmulprecomp 1,2,4,12 -fqmulprecomp 1,2,5,13 -fqmulprecomp 1,2,6,14 -fqmulprecomp 1,2,7,15 -fqmulprecomp 1,2,8,11 -fqmulprecomp 1,2,9,12 -fqmulprecomp 1,2,10,13 - -#store -vmovdqa %ymm3,(%rdi) -vmovdqa %ymm4,32(%rdi) -vmovdqa %ymm5,64(%rdi) -vmovdqa %ymm6,96(%rdi) -vmovdqa %ymm7,128(%rdi) -vmovdqa %ymm8,160(%rdi) -vmovdqa %ymm9,192(%rdi) -vmovdqa %ymm10,224(%rdi) - -ret - -.global cdecl(tomont_avx) -cdecl(tomont_avx): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -vmovdqa _16XMONTSQLO*2(%rsi),%ymm1 -vmovdqa _16XMONTSQHI*2(%rsi),%ymm2 -call tomont128_avx -add $256,%rdi -call tomont128_avx -ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/fq.inc b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/fq.inc deleted file mode 100644 index 4b7afc3118..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/fq.inc +++ /dev/null @@ -1,30 +0,0 @@ -.macro red16 r,rs=0,x=12 -vpmulhw %ymm1,%ymm\r,%ymm\x -.if \rs -vpmulhrsw %ymm\rs,%ymm\x,%ymm\x -.else -vpsraw $10,%ymm\x,%ymm\x -.endif -vpmullw %ymm0,%ymm\x,%ymm\x -vpsubw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro csubq r,x=12 -vpsubw %ymm0,%ymm\r,%ymm\r -vpsraw $15,%ymm\r,%ymm\x -vpand %ymm0,%ymm\x,%ymm\x -vpaddw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro caddq r,x=12 -vpsraw $15,%ymm\r,%ymm\x -vpand %ymm0,%ymm\x,%ymm\x -vpaddw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro fqmulprecomp al,ah,b,x=12 -vpmullw %ymm\al,%ymm\b,%ymm\x -vpmulhw %ymm\ah,%ymm\b,%ymm\b -vpmulhw %ymm0,%ymm\x,%ymm\x -vpsubw %ymm\x,%ymm\b,%ymm\b -.endm diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/indcpa.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/indcpa.c deleted file mode 100644 index c4b2b3a89f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/indcpa.c +++ /dev/null @@ -1,568 +0,0 @@ -#include -#include -#include -#include -#include "align.h" -#include "params.h" -#include "indcpa.h" -#include "polyvec.h" -#include "poly.h" -#include "ntt.h" -#include "cbd.h" -#include "rejsample.h" -#include "symmetric.h" -#include "randombytes.h" - -/************************************************* -* Name: pack_pk -* -* Description: Serialize the public key as concatenation of the -* serialized vector of polynomials pk and the -* public seed used to generate the matrix A. -* The polynomial coefficients in pk are assumed to -* lie in the invertal [0,q], i.e. pk must be reduced -* by polyvec_reduce(). -* -* Arguments: uint8_t *r: pointer to the output serialized public key -* polyvec *pk: pointer to the input public-key polyvec -* const uint8_t *seed: pointer to the input public seed -**************************************************/ -static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES], - polyvec *pk, - const uint8_t seed[KYBER_SYMBYTES]) -{ - polyvec_tobytes(r, pk); - memcpy(r+KYBER_POLYVECBYTES, seed, KYBER_SYMBYTES); -} - -/************************************************* -* Name: unpack_pk -* -* Description: De-serialize public key from a byte array; -* approximate inverse of pack_pk -* -* Arguments: - polyvec *pk: pointer to output public-key polynomial vector -* - uint8_t *seed: pointer to output seed to generate matrix A -* - const uint8_t *packedpk: pointer to input serialized public key -**************************************************/ -static void unpack_pk(polyvec *pk, - uint8_t seed[KYBER_SYMBYTES], - const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES]) -{ - polyvec_frombytes(pk, packedpk); - memcpy(seed, packedpk+KYBER_POLYVECBYTES, KYBER_SYMBYTES); -} - -/************************************************* -* Name: pack_sk -* -* Description: Serialize the secret key. -* The polynomial coefficients in sk are assumed to -* lie in the invertal [0,q], i.e. sk must be reduced -* by polyvec_reduce(). -* -* Arguments: - uint8_t *r: pointer to output serialized secret key -* - polyvec *sk: pointer to input vector of polynomials (secret key) -**************************************************/ -static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk) -{ - polyvec_tobytes(r, sk); -} - -/************************************************* -* Name: unpack_sk -* -* Description: De-serialize the secret key; inverse of pack_sk -* -* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) -* - const uint8_t *packedsk: pointer to input serialized secret key -**************************************************/ -static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES]) -{ - polyvec_frombytes(sk, packedsk); -} - -/************************************************* -* Name: pack_ciphertext -* -* Description: Serialize the ciphertext as concatenation of the -* compressed and serialized vector of polynomials b -* and the compressed and serialized polynomial v. -* The polynomial coefficients in b and v are assumed to -* lie in the invertal [0,q], i.e. b and v must be reduced -* by polyvec_reduce() and poly_reduce(), respectively. -* -* Arguments: uint8_t *r: pointer to the output serialized ciphertext -* poly *pk: pointer to the input vector of polynomials b -* poly *v: pointer to the input polynomial v -**************************************************/ -static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v) -{ - polyvec_compress(r, b); - poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v); -} - -/************************************************* -* Name: unpack_ciphertext -* -* Description: De-serialize and decompress ciphertext from a byte array; -* approximate inverse of pack_ciphertext -* -* Arguments: - polyvec *b: pointer to the output vector of polynomials b -* - poly *v: pointer to the output polynomial v -* - const uint8_t *c: pointer to the input serialized ciphertext -**************************************************/ -static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES]) -{ - polyvec_decompress(b, c); - poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES); -} - -/************************************************* -* Name: rej_uniform -* -* Description: Run rejection sampling on uniform random bytes to generate -* uniform random integers mod q -* -* Arguments: - int16_t *r: pointer to output array -* - unsigned int len: requested number of 16-bit integers (uniform mod q) -* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes) -* - unsigned int buflen: length of input buffer in bytes -* -* Returns number of sampled 16-bit integers (at most len) -**************************************************/ -static unsigned int rej_uniform(int16_t *r, - unsigned int len, - const uint8_t *buf, - unsigned int buflen) -{ - unsigned int ctr, pos; - uint16_t val0, val1; - - ctr = pos = 0; - while(ctr < len && pos <= buflen - 3) { // buflen is always at least 3 - val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; - val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF; - pos += 3; - - if(val0 < KYBER_Q) - r[ctr++] = val0; - if(ctr < len && val1 < KYBER_Q) - r[ctr++] = val1; - } - - return ctr; -} - -#define gen_a(A,B) gen_matrix(A,B,0) -#define gen_at(A,B) gen_matrix(A,B,1) - -/************************************************* -* Name: gen_matrix -* -* Description: Deterministically generate matrix A (or the transpose of A) -* from a seed. Entries of the matrix are polynomials that look -* uniformly random. Performs rejection sampling on output of -* a XOF -* -* Arguments: - polyvec *a: pointer to ouptput matrix A -* - const uint8_t *seed: pointer to input seed -* - int transposed: boolean deciding whether A or A^T is generated -**************************************************/ -#if KYBER_K == 2 -void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) -{ - unsigned int ctr0, ctr1, ctr2, ctr3; - ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; - __m256i f; - shake128x4incctx state; - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - if(transposed) { - buf[0].coeffs[32] = 0; - buf[0].coeffs[33] = 0; - buf[1].coeffs[32] = 0; - buf[1].coeffs[33] = 1; - buf[2].coeffs[32] = 1; - buf[2].coeffs[33] = 0; - buf[3].coeffs[32] = 1; - buf[3].coeffs[33] = 1; - } - else { - buf[0].coeffs[32] = 0; - buf[0].coeffs[33] = 0; - buf[1].coeffs[32] = 1; - buf[1].coeffs[33] = 0; - buf[2].coeffs[32] = 0; - buf[2].coeffs[33] = 1; - buf[3].coeffs[32] = 1; - buf[3].coeffs[33] = 1; - } - - shake128x4_inc_init(&state); - shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); - - ctr0 = rej_uniform_avx(a[0].vec[0].coeffs, buf[0].coeffs); - ctr1 = rej_uniform_avx(a[0].vec[1].coeffs, buf[1].coeffs); - ctr2 = rej_uniform_avx(a[1].vec[0].coeffs, buf[2].coeffs); - ctr3 = rej_uniform_avx(a[1].vec[1].coeffs, buf[3].coeffs); - - while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); - - ctr0 += rej_uniform(a[0].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); - ctr1 += rej_uniform(a[0].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); - ctr2 += rej_uniform(a[1].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); - ctr3 += rej_uniform(a[1].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); - } - - poly_nttunpack(&a[0].vec[0]); - poly_nttunpack(&a[0].vec[1]); - poly_nttunpack(&a[1].vec[0]); - poly_nttunpack(&a[1].vec[1]); - shake128x4_inc_ctx_release(&state); -} -#elif KYBER_K == 3 -void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) -{ - unsigned int ctr0, ctr1, ctr2, ctr3; - ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; - __m256i f; - shake128x4incctx state; - shake128incctx state1x; - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - if(transposed) { - buf[0].coeffs[32] = 0; - buf[0].coeffs[33] = 0; - buf[1].coeffs[32] = 0; - buf[1].coeffs[33] = 1; - buf[2].coeffs[32] = 0; - buf[2].coeffs[33] = 2; - buf[3].coeffs[32] = 1; - buf[3].coeffs[33] = 0; - } - else { - buf[0].coeffs[32] = 0; - buf[0].coeffs[33] = 0; - buf[1].coeffs[32] = 1; - buf[1].coeffs[33] = 0; - buf[2].coeffs[32] = 2; - buf[2].coeffs[33] = 0; - buf[3].coeffs[32] = 0; - buf[3].coeffs[33] = 1; - } - - shake128x4_inc_init(&state); - shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); - - ctr0 = rej_uniform_avx(a[0].vec[0].coeffs, buf[0].coeffs); - ctr1 = rej_uniform_avx(a[0].vec[1].coeffs, buf[1].coeffs); - ctr2 = rej_uniform_avx(a[0].vec[2].coeffs, buf[2].coeffs); - ctr3 = rej_uniform_avx(a[1].vec[0].coeffs, buf[3].coeffs); - - while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); - - ctr0 += rej_uniform(a[0].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); - ctr1 += rej_uniform(a[0].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); - ctr2 += rej_uniform(a[0].vec[2].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); - ctr3 += rej_uniform(a[1].vec[0].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); - } - - poly_nttunpack(&a[0].vec[0]); - poly_nttunpack(&a[0].vec[1]); - poly_nttunpack(&a[0].vec[2]); - poly_nttunpack(&a[1].vec[0]); - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - if(transposed) { - buf[0].coeffs[32] = 1; - buf[0].coeffs[33] = 1; - buf[1].coeffs[32] = 1; - buf[1].coeffs[33] = 2; - buf[2].coeffs[32] = 2; - buf[2].coeffs[33] = 0; - buf[3].coeffs[32] = 2; - buf[3].coeffs[33] = 1; - } - else { - buf[0].coeffs[32] = 1; - buf[0].coeffs[33] = 1; - buf[1].coeffs[32] = 2; - buf[1].coeffs[33] = 1; - buf[2].coeffs[32] = 0; - buf[2].coeffs[33] = 2; - buf[3].coeffs[32] = 1; - buf[3].coeffs[33] = 2; - } - - shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); - - ctr0 = rej_uniform_avx(a[1].vec[1].coeffs, buf[0].coeffs); - ctr1 = rej_uniform_avx(a[1].vec[2].coeffs, buf[1].coeffs); - ctr2 = rej_uniform_avx(a[2].vec[0].coeffs, buf[2].coeffs); - ctr3 = rej_uniform_avx(a[2].vec[1].coeffs, buf[3].coeffs); - - while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); - - ctr0 += rej_uniform(a[1].vec[1].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); - ctr1 += rej_uniform(a[1].vec[2].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); - ctr2 += rej_uniform(a[2].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); - ctr3 += rej_uniform(a[2].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); - } - shake128x4_inc_ctx_release(&state); - - poly_nttunpack(&a[1].vec[1]); - poly_nttunpack(&a[1].vec[2]); - poly_nttunpack(&a[2].vec[0]); - poly_nttunpack(&a[2].vec[1]); - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - buf[0].coeffs[32] = 2; - buf[0].coeffs[33] = 2; - - shake128_inc_init(&state1x); - shake128_absorb_once(&state1x, buf[0].coeffs, 34); - shake128_squeezeblocks(buf[0].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state1x); - ctr0 = rej_uniform_avx(a[2].vec[2].coeffs, buf[0].coeffs); - while(ctr0 < KYBER_N) { - shake128_squeezeblocks(buf[0].coeffs, 1, &state1x); - ctr0 += rej_uniform(a[2].vec[2].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); - } - shake128_inc_ctx_release(&state1x); - - poly_nttunpack(&a[2].vec[2]); -} -#elif KYBER_K == 4 -void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) -{ - unsigned int i, ctr0, ctr1, ctr2, ctr3; - ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; - __m256i f; - shake128x4incctx state; - shake128x4_inc_init(&state); - - for(i=0;i<4;i++) { - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - if(transposed) { - buf[0].coeffs[32] = i; - buf[0].coeffs[33] = 0; - buf[1].coeffs[32] = i; - buf[1].coeffs[33] = 1; - buf[2].coeffs[32] = i; - buf[2].coeffs[33] = 2; - buf[3].coeffs[32] = i; - buf[3].coeffs[33] = 3; - } - else { - buf[0].coeffs[32] = 0; - buf[0].coeffs[33] = i; - buf[1].coeffs[32] = 1; - buf[1].coeffs[33] = i; - buf[2].coeffs[32] = 2; - buf[2].coeffs[33] = i; - buf[3].coeffs[32] = 3; - buf[3].coeffs[33] = i; - } - - shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); - - ctr0 = rej_uniform_avx(a[i].vec[0].coeffs, buf[0].coeffs); - ctr1 = rej_uniform_avx(a[i].vec[1].coeffs, buf[1].coeffs); - ctr2 = rej_uniform_avx(a[i].vec[2].coeffs, buf[2].coeffs); - ctr3 = rej_uniform_avx(a[i].vec[3].coeffs, buf[3].coeffs); - - while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); - - ctr0 += rej_uniform(a[i].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); - ctr1 += rej_uniform(a[i].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); - ctr2 += rej_uniform(a[i].vec[2].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); - ctr3 += rej_uniform(a[i].vec[3].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); - } - - poly_nttunpack(&a[i].vec[0]); - poly_nttunpack(&a[i].vec[1]); - poly_nttunpack(&a[i].vec[2]); - poly_nttunpack(&a[i].vec[3]); - } - shake128x4_inc_ctx_release(&state); -} -#endif - -/************************************************* -* Name: indcpa_keypair_derand -* -* Description: Generates public and private key for the CPA-secure -* public-key encryption scheme underlying Kyber -* -* Arguments: - uint8_t *pk: pointer to output public key -* (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key -* (of length KYBER_INDCPA_SECRETKEYBYTES bytes) -* - const uint8_t *coins: pointer to input randomness -* (of length KYBER_SYMBYTES bytes) -**************************************************/ -void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], - uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], - const uint8_t coins[KYBER_SYMBYTES]) -{ - unsigned int i; - uint8_t buf[2*KYBER_SYMBYTES]; - const uint8_t *publicseed = buf; - const uint8_t *noiseseed = buf + KYBER_SYMBYTES; - polyvec a[KYBER_K], e, pkpv, skpv; - - memcpy(buf, coins, KYBER_SYMBYTES); - buf[KYBER_SYMBYTES] = KYBER_K; - hash_g(buf, buf, KYBER_SYMBYTES+1); - - gen_a(a, publicseed); - -#if KYBER_K == 2 - poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, e.vec+0, e.vec+1, noiseseed, 0, 1, 2, 3); -#elif KYBER_K == 3 - poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, e.vec+0, noiseseed, 0, 1, 2, 3); - poly_getnoise_eta1_4x(e.vec+1, e.vec+2, pkpv.vec+0, pkpv.vec+1, noiseseed, 4, 5, 6, 7); -#elif KYBER_K == 4 - poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, skpv.vec+3, noiseseed, 0, 1, 2, 3); - poly_getnoise_eta1_4x(e.vec+0, e.vec+1, e.vec+2, e.vec+3, noiseseed, 4, 5, 6, 7); -#endif - - polyvec_ntt(&skpv); - polyvec_reduce(&skpv); - polyvec_ntt(&e); - - // matrix-vector multiplication - for(i=0;i -#include "params.h" -#include "polyvec.h" - -#define gen_matrix KYBER_NAMESPACE(gen_matrix) -void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed); - -#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand) -void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], - uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], - const uint8_t coins[KYBER_SYMBYTES]); - -#define indcpa_enc KYBER_NAMESPACE(indcpa_enc) -void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], - const uint8_t m[KYBER_INDCPA_MSGBYTES], - const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], - const uint8_t coins[KYBER_SYMBYTES]); - -#define indcpa_dec KYBER_NAMESPACE(indcpa_dec) -void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES], - const uint8_t c[KYBER_INDCPA_BYTES], - const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/invntt.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/invntt.S deleted file mode 100644 index 76d4189996..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/invntt.S +++ /dev/null @@ -1,193 +0,0 @@ -#include "consts.h" -.include "shuffle.inc" -.include "fq.inc" - -.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,zl0=2,zl1=2,zh0=3,zh1=3 -vpsubw %ymm\rl0,%ymm\rh0,%ymm12 -vpaddw %ymm\rh0,%ymm\rl0,%ymm\rl0 -vpsubw %ymm\rl1,%ymm\rh1,%ymm13 - -vpmullw %ymm\zl0,%ymm12,%ymm\rh0 -vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl1 -vpsubw %ymm\rl2,%ymm\rh2,%ymm14 - -vpmullw %ymm\zl0,%ymm13,%ymm\rh1 -vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl2 -vpsubw %ymm\rl3,%ymm\rh3,%ymm15 - -vpmullw %ymm\zl1,%ymm14,%ymm\rh2 -vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl3 -vpmullw %ymm\zl1,%ymm15,%ymm\rh3 - -vpmulhw %ymm\zh0,%ymm12,%ymm12 -vpmulhw %ymm\zh0,%ymm13,%ymm13 - -vpmulhw %ymm\zh1,%ymm14,%ymm14 -vpmulhw %ymm\zh1,%ymm15,%ymm15 - -vpmulhw %ymm0,%ymm\rh0,%ymm\rh0 - -vpmulhw %ymm0,%ymm\rh1,%ymm\rh1 - -vpmulhw %ymm0,%ymm\rh2,%ymm\rh2 -vpmulhw %ymm0,%ymm\rh3,%ymm\rh3 - -# - -# - -vpsubw %ymm\rh0,%ymm12,%ymm\rh0 - -vpsubw %ymm\rh1,%ymm13,%ymm\rh1 - -vpsubw %ymm\rh2,%ymm14,%ymm\rh2 -vpsubw %ymm\rh3,%ymm15,%ymm\rh3 -.endm - -.macro intt_levels0t5 off -/* level 0 */ -vmovdqa _16XFLO*2(%rsi),%ymm2 -vmovdqa _16XFHI*2(%rsi),%ymm3 - -vmovdqa (128*\off+ 0)*2(%rdi),%ymm4 -vmovdqa (128*\off+ 32)*2(%rdi),%ymm6 -vmovdqa (128*\off+ 16)*2(%rdi),%ymm5 -vmovdqa (128*\off+ 48)*2(%rdi),%ymm7 - -fqmulprecomp 2,3,4 -fqmulprecomp 2,3,6 -fqmulprecomp 2,3,5 -fqmulprecomp 2,3,7 - -vmovdqa (128*\off+ 64)*2(%rdi),%ymm8 -vmovdqa (128*\off+ 96)*2(%rdi),%ymm10 -vmovdqa (128*\off+ 80)*2(%rdi),%ymm9 -vmovdqa (128*\off+112)*2(%rdi),%ymm11 - -fqmulprecomp 2,3,8 -fqmulprecomp 2,3,10 -fqmulprecomp 2,3,9 -fqmulprecomp 2,3,11 - -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+208)*2(%rsi),%ymm15 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+176)*2(%rsi),%ymm1 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+224)*2(%rsi),%ymm2 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+192)*2(%rsi),%ymm3 -vmovdqa _REVIDXB*2(%rsi),%ymm12 -vpshufb %ymm12,%ymm15,%ymm15 -vpshufb %ymm12,%ymm1,%ymm1 -vpshufb %ymm12,%ymm2,%ymm2 -vpshufb %ymm12,%ymm3,%ymm3 - -butterfly 4,5,8,9,6,7,10,11,15,1,2,3 - -/* level 1 */ -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+144)*2(%rsi),%ymm2 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+160)*2(%rsi),%ymm3 -vmovdqa _REVIDXB*2(%rsi),%ymm1 -vpshufb %ymm1,%ymm2,%ymm2 -vpshufb %ymm1,%ymm3,%ymm3 - -butterfly 4,5,6,7,8,9,10,11,2,2,3,3 - -shuffle1 4,5,3,5 -shuffle1 6,7,4,7 -shuffle1 8,9,6,9 -shuffle1 10,11,8,11 - -/* level 2 */ -vmovdqa _REVIDXD*2(%rsi),%ymm12 -vpermd (_ZETAS_EXP+(1-\off)*224+112)*2(%rsi),%ymm12,%ymm2 -vpermd (_ZETAS_EXP+(1-\off)*224+128)*2(%rsi),%ymm12,%ymm10 - -butterfly 3,4,6,8,5,7,9,11,2,2,10,10 - -vmovdqa _16XV*2(%rsi),%ymm1 -red16 3 - -shuffle2 3,4,10,4 -shuffle2 6,8,3,8 -shuffle2 5,7,6,7 -shuffle2 9,11,5,11 - -/* level 3 */ -vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+80)*2(%rsi),%ymm2 -vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+96)*2(%rsi),%ymm9 - -butterfly 10,3,6,5,4,8,7,11,2,2,9,9 - -shuffle4 10,3,9,3 -shuffle4 6,5,10,5 -shuffle4 4,8,6,8 -shuffle4 7,11,4,11 - -/* level 4 */ -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+48)*2(%rsi),%ymm2 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+64)*2(%rsi),%ymm7 - -butterfly 9,10,6,4,3,5,8,11,2,2,7,7 - -red16 9 - -shuffle8 9,10,7,10 -shuffle8 6,4,9,4 -shuffle8 3,5,6,5 -shuffle8 8,11,3,11 - -/* level 5 */ -vmovdqa (_ZETAS_EXP+(1-\off)*224+16)*2(%rsi),%ymm2 -vmovdqa (_ZETAS_EXP+(1-\off)*224+32)*2(%rsi),%ymm8 - -butterfly 7,9,6,3,10,4,5,11,2,2,8,8 - -vmovdqa %ymm7,(128*\off+ 0)*2(%rdi) -vmovdqa %ymm9,(128*\off+ 16)*2(%rdi) -vmovdqa %ymm6,(128*\off+ 32)*2(%rdi) -vmovdqa %ymm3,(128*\off+ 48)*2(%rdi) -vmovdqa %ymm10,(128*\off+ 64)*2(%rdi) -vmovdqa %ymm4,(128*\off+ 80)*2(%rdi) -vmovdqa %ymm5,(128*\off+ 96)*2(%rdi) -vmovdqa %ymm11,(128*\off+112)*2(%rdi) -.endm - -.macro intt_level6 off -/* level 6 */ -vmovdqa (64*\off+ 0)*2(%rdi),%ymm4 -vmovdqa (64*\off+128)*2(%rdi),%ymm8 -vmovdqa (64*\off+ 16)*2(%rdi),%ymm5 -vmovdqa (64*\off+144)*2(%rdi),%ymm9 -vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm2 - -vmovdqa (64*\off+ 32)*2(%rdi),%ymm6 -vmovdqa (64*\off+160)*2(%rdi),%ymm10 -vmovdqa (64*\off+ 48)*2(%rdi),%ymm7 -vmovdqa (64*\off+176)*2(%rdi),%ymm11 -vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm3 - -butterfly 4,5,6,7,8,9,10,11 - -.if \off == 0 -red16 4 -.endif - -vmovdqa %ymm4,(64*\off+ 0)*2(%rdi) -vmovdqa %ymm5,(64*\off+ 16)*2(%rdi) -vmovdqa %ymm6,(64*\off+ 32)*2(%rdi) -vmovdqa %ymm7,(64*\off+ 48)*2(%rdi) -vmovdqa %ymm8,(64*\off+128)*2(%rdi) -vmovdqa %ymm9,(64*\off+144)*2(%rdi) -vmovdqa %ymm10,(64*\off+160)*2(%rdi) -vmovdqa %ymm11,(64*\off+176)*2(%rdi) -.endm - -.text -.global cdecl(invntt_avx) -cdecl(invntt_avx): -vmovdqa _16XQ*2(%rsi),%ymm0 - -intt_levels0t5 0 -intt_levels0t5 1 - -intt_level6 0 -intt_level6 1 -ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/kem.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/kem.c deleted file mode 100644 index 63abc1029c..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/kem.c +++ /dev/null @@ -1,169 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "kem.h" -#include "indcpa.h" -#include "verify.h" -#include "symmetric.h" -#include "randombytes.h" -/************************************************* -* Name: crypto_kem_keypair_derand -* -* Description: Generates public and private key -* for CCA-secure Kyber key encapsulation mechanism -* -* Arguments: - uint8_t *pk: pointer to output public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* - uint8_t *coins: pointer to input randomness -* (an already allocated array filled with 2*KYBER_SYMBYTES random bytes) -** -* Returns 0 (success) -**************************************************/ -int crypto_kem_keypair_derand(uint8_t *pk, - uint8_t *sk, - const uint8_t *coins) -{ - indcpa_keypair_derand(pk, sk, coins); - memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_PUBLICKEYBYTES); - hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - /* Value z for pseudo-random output on reject */ - memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES); - return 0; -} - -/************************************************* -* Name: crypto_kem_keypair -* -* Description: Generates public and private key -* for CCA-secure Kyber key encapsulation mechanism -* -* Arguments: - uint8_t *pk: pointer to output public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int crypto_kem_keypair(uint8_t *pk, - uint8_t *sk) -{ - uint8_t coins[2*KYBER_SYMBYTES]; - randombytes(coins, 2*KYBER_SYMBYTES); - crypto_kem_keypair_derand(pk, sk, coins); - return 0; -} - -/************************************************* -* Name: crypto_kem_enc_derand -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - uint8_t *ct: pointer to output cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *pk: pointer to input public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - const uint8_t *coins: pointer to input randomness -* (an already allocated array filled with KYBER_SYMBYTES random bytes) -** -* Returns 0 (success) -**************************************************/ -int crypto_kem_enc_derand(uint8_t *ct, - uint8_t *ss, - const uint8_t *pk, - const uint8_t *coins) -{ - uint8_t buf[2*KYBER_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2*KYBER_SYMBYTES]; - - memcpy(buf, coins, KYBER_SYMBYTES); - - /* Multitarget countermeasure for coins + contributory KEM */ - hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - hash_g(kr, buf, 2*KYBER_SYMBYTES); - - /* coins are in kr+KYBER_SYMBYTES */ - indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES); - - memcpy(ss,kr,KYBER_SYMBYTES); - return 0; -} - -/************************************************* -* Name: crypto_kem_enc -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - uint8_t *ct: pointer to output cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *pk: pointer to input public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int crypto_kem_enc(uint8_t *ct, - uint8_t *ss, - const uint8_t *pk) -{ - uint8_t coins[KYBER_SYMBYTES]; - randombytes(coins, KYBER_SYMBYTES); - crypto_kem_enc_derand(ct, ss, pk, coins); - return 0; -} - -/************************************************* -* Name: crypto_kem_dec -* -* Description: Generates shared secret for given -* cipher text and private key -* -* Arguments: - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *ct: pointer to input cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - const uint8_t *sk: pointer to input private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* -* Returns 0. -* -* On failure, ss will contain a pseudo-random value. -**************************************************/ -int crypto_kem_dec(uint8_t *ss, - const uint8_t *ct, - const uint8_t *sk) -{ - int fail; - uint8_t buf[2*KYBER_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2*KYBER_SYMBYTES]; - uint8_t cmp[KYBER_CIPHERTEXTBYTES+KYBER_SYMBYTES]; - const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES; - - indcpa_dec(buf, ct, sk); - - /* Multitarget countermeasure for coins + contributory KEM */ - memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES); - hash_g(kr, buf, 2*KYBER_SYMBYTES); - - /* coins are in kr+KYBER_SYMBYTES */ - indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES); - - fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES); - - /* Compute rejection key */ - rkprf(ss,sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES,ct); - - /* Copy true key to return buffer if fail is false */ - cmov(ss,kr,KYBER_SYMBYTES,!fail); - - return 0; -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/kem.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/kem.h deleted file mode 100644 index 234f11966b..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/kem.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef KEM_H -#define KEM_H - -#include -#include "params.h" - -#define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES -#define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES -#define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES -#define CRYPTO_BYTES KYBER_SSBYTES - -#if (KYBER_K == 2) -#define CRYPTO_ALGNAME "Kyber512" -#elif (KYBER_K == 3) -#define CRYPTO_ALGNAME "Kyber768" -#elif (KYBER_K == 4) -#define CRYPTO_ALGNAME "Kyber1024" -#endif - -#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand) -int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); - -#define crypto_kem_keypair KYBER_NAMESPACE(keypair) -int crypto_kem_keypair(uint8_t *pk, uint8_t *sk); - -#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand) -int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); - -#define crypto_kem_enc KYBER_NAMESPACE(enc) -int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); - -#define crypto_kem_dec KYBER_NAMESPACE(dec) -int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/ntt.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/ntt.S deleted file mode 100644 index 0ce7b41297..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/ntt.S +++ /dev/null @@ -1,189 +0,0 @@ -#include "consts.h" -.include "shuffle.inc" - -.macro mul rh0,rh1,rh2,rh3,zl0=15,zl1=15,zh0=2,zh1=2 -vpmullw %ymm\zl0,%ymm\rh0,%ymm12 -vpmullw %ymm\zl0,%ymm\rh1,%ymm13 - -vpmullw %ymm\zl1,%ymm\rh2,%ymm14 -vpmullw %ymm\zl1,%ymm\rh3,%ymm15 - -vpmulhw %ymm\zh0,%ymm\rh0,%ymm\rh0 -vpmulhw %ymm\zh0,%ymm\rh1,%ymm\rh1 - -vpmulhw %ymm\zh1,%ymm\rh2,%ymm\rh2 -vpmulhw %ymm\zh1,%ymm\rh3,%ymm\rh3 -.endm - -.macro reduce -vpmulhw %ymm0,%ymm12,%ymm12 -vpmulhw %ymm0,%ymm13,%ymm13 - -vpmulhw %ymm0,%ymm14,%ymm14 -vpmulhw %ymm0,%ymm15,%ymm15 -.endm - -.macro update rln,rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 -vpaddw %ymm\rh0,%ymm\rl0,%ymm\rln -vpsubw %ymm\rh0,%ymm\rl0,%ymm\rh0 -vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl0 - -vpsubw %ymm\rh1,%ymm\rl1,%ymm\rh1 -vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl1 -vpsubw %ymm\rh2,%ymm\rl2,%ymm\rh2 - -vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl2 -vpsubw %ymm\rh3,%ymm\rl3,%ymm\rh3 - -vpsubw %ymm12,%ymm\rln,%ymm\rln -vpaddw %ymm12,%ymm\rh0,%ymm\rh0 -vpsubw %ymm13,%ymm\rl0,%ymm\rl0 - -vpaddw %ymm13,%ymm\rh1,%ymm\rh1 -vpsubw %ymm14,%ymm\rl1,%ymm\rl1 -vpaddw %ymm14,%ymm\rh2,%ymm\rh2 - -vpsubw %ymm15,%ymm\rl2,%ymm\rl2 -vpaddw %ymm15,%ymm\rh3,%ymm\rh3 -.endm - -.macro level0 off -vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm15 -vmovdqa (64*\off+128)*2(%rdi),%ymm8 -vmovdqa (64*\off+144)*2(%rdi),%ymm9 -vmovdqa (64*\off+160)*2(%rdi),%ymm10 -vmovdqa (64*\off+176)*2(%rdi),%ymm11 -vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm2 - -mul 8,9,10,11 - -vmovdqa (64*\off+ 0)*2(%rdi),%ymm4 -vmovdqa (64*\off+ 16)*2(%rdi),%ymm5 -vmovdqa (64*\off+ 32)*2(%rdi),%ymm6 -vmovdqa (64*\off+ 48)*2(%rdi),%ymm7 - -reduce -update 3,4,5,6,7,8,9,10,11 - -vmovdqa %ymm3,(64*\off+ 0)*2(%rdi) -vmovdqa %ymm4,(64*\off+ 16)*2(%rdi) -vmovdqa %ymm5,(64*\off+ 32)*2(%rdi) -vmovdqa %ymm6,(64*\off+ 48)*2(%rdi) -vmovdqa %ymm8,(64*\off+128)*2(%rdi) -vmovdqa %ymm9,(64*\off+144)*2(%rdi) -vmovdqa %ymm10,(64*\off+160)*2(%rdi) -vmovdqa %ymm11,(64*\off+176)*2(%rdi) -.endm - -.macro levels1t6 off -/* level 1 */ -vmovdqa (_ZETAS_EXP+224*\off+16)*2(%rsi),%ymm15 -vmovdqa (128*\off+ 64)*2(%rdi),%ymm8 -vmovdqa (128*\off+ 80)*2(%rdi),%ymm9 -vmovdqa (128*\off+ 96)*2(%rdi),%ymm10 -vmovdqa (128*\off+112)*2(%rdi),%ymm11 -vmovdqa (_ZETAS_EXP+224*\off+32)*2(%rsi),%ymm2 - -mul 8,9,10,11 - -vmovdqa (128*\off+ 0)*2(%rdi),%ymm4 -vmovdqa (128*\off+ 16)*2(%rdi),%ymm5 -vmovdqa (128*\off+ 32)*2(%rdi),%ymm6 -vmovdqa (128*\off+ 48)*2(%rdi),%ymm7 - -reduce -update 3,4,5,6,7,8,9,10,11 - -/* level 2 */ -shuffle8 5,10,7,10 -shuffle8 6,11,5,11 - -vmovdqa (_ZETAS_EXP+224*\off+48)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+64)*2(%rsi),%ymm2 - -mul 7,10,5,11 - -shuffle8 3,8,6,8 -shuffle8 4,9,3,9 - -reduce -update 4,6,8,3,9,7,10,5,11 - -/* level 3 */ -shuffle4 8,5,9,5 -shuffle4 3,11,8,11 - -vmovdqa (_ZETAS_EXP+224*\off+80)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+96)*2(%rsi),%ymm2 - -mul 9,5,8,11 - -shuffle4 4,7,3,7 -shuffle4 6,10,4,10 - -reduce -update 6,3,7,4,10,9,5,8,11 - -/* level 4 */ -shuffle2 7,8,10,8 -shuffle2 4,11,7,11 - -vmovdqa (_ZETAS_EXP+224*\off+112)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+128)*2(%rsi),%ymm2 - -mul 10,8,7,11 - -shuffle2 6,9,4,9 -shuffle2 3,5,6,5 - -reduce -update 3,4,9,6,5,10,8,7,11 - -/* level 5 */ -shuffle1 9,7,5,7 -shuffle1 6,11,9,11 - -vmovdqa (_ZETAS_EXP+224*\off+144)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+160)*2(%rsi),%ymm2 - -mul 5,7,9,11 - -shuffle1 3,10,6,10 -shuffle1 4,8,3,8 - -reduce -update 4,6,10,3,8,5,7,9,11 - -/* level 6 */ -vmovdqa (_ZETAS_EXP+224*\off+176)*2(%rsi),%ymm14 -vmovdqa (_ZETAS_EXP+224*\off+208)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+192)*2(%rsi),%ymm8 -vmovdqa (_ZETAS_EXP+224*\off+224)*2(%rsi),%ymm2 - -mul 10,3,9,11,14,15,8,2 - -reduce -update 8,4,6,5,7,10,3,9,11 - -vmovdqa %ymm8,(128*\off+ 0)*2(%rdi) -vmovdqa %ymm4,(128*\off+ 16)*2(%rdi) -vmovdqa %ymm10,(128*\off+ 32)*2(%rdi) -vmovdqa %ymm3,(128*\off+ 48)*2(%rdi) -vmovdqa %ymm6,(128*\off+ 64)*2(%rdi) -vmovdqa %ymm5,(128*\off+ 80)*2(%rdi) -vmovdqa %ymm9,(128*\off+ 96)*2(%rdi) -vmovdqa %ymm11,(128*\off+112)*2(%rdi) -.endm - -.text -.global cdecl(ntt_avx) -cdecl(ntt_avx): -vmovdqa _16XQ*2(%rsi),%ymm0 - -level0 0 -level0 1 - -levels1t6 0 -levels1t6 1 - -ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/ntt.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/ntt.h deleted file mode 100644 index a4f48e343b..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/ntt.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef NTT_H -#define NTT_H - -#include -#include - -#define ntt_avx KYBER_NAMESPACE(ntt_avx) -void ntt_avx(__m256i *r, const __m256i *qdata); -#define invntt_avx KYBER_NAMESPACE(invntt_avx) -void invntt_avx(__m256i *r, const __m256i *qdata); - -#define nttpack_avx KYBER_NAMESPACE(nttpack_avx) -void nttpack_avx(__m256i *r, const __m256i *qdata); -#define nttunpack_avx KYBER_NAMESPACE(nttunpack_avx) -void nttunpack_avx(__m256i *r, const __m256i *qdata); - -#define basemul_avx KYBER_NAMESPACE(basemul_avx) -void basemul_avx(__m256i *r, - const __m256i *a, - const __m256i *b, - const __m256i *qdata); - -#define ntttobytes_avx KYBER_NAMESPACE(ntttobytes_avx) -void ntttobytes_avx(uint8_t *r, const __m256i *a, const __m256i *qdata); -#define nttfrombytes_avx KYBER_NAMESPACE(nttfrombytes_avx) -void nttfrombytes_avx(__m256i *r, const uint8_t *a, const __m256i *qdata); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/params.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/params.h deleted file mode 100644 index ecfabce4a5..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/params.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef PARAMS_H -#define PARAMS_H - -#ifndef KYBER_K -#define KYBER_K 3 /* Change this for different security strengths */ -#endif - -//#define KYBER_90S /* Uncomment this if you want the 90S variant */ - -/* Don't change parameters below this line */ -#if (KYBER_K == 2) -#ifdef KYBER_90S -#define KYBER_NAMESPACE(s) pqcrystals_kyber512_90s_avx2_##s -#else -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_avx2_##s -#endif -#elif (KYBER_K == 3) -#ifdef KYBER_90S -#define KYBER_NAMESPACE(s) pqcrystals_kyber768_90s_avx2_##s -#else -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_avx2_##s -#endif -#elif (KYBER_K == 4) -#ifdef KYBER_90S -#define KYBER_NAMESPACE(s) pqcrystals_kyber1024_90s_avx2_##s -#else -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_avx2_##s -#endif -#else -#error "KYBER_K must be in {2,3,4}" -#endif - -#define KYBER_N 256 -#define KYBER_Q 3329 - -#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ -#define KYBER_SSBYTES 32 /* size in bytes of shared key */ - -#define KYBER_POLYBYTES 384 -#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) - -#if KYBER_K == 2 -#define KYBER_ETA1 3 -#define KYBER_POLYCOMPRESSEDBYTES 128 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -#elif KYBER_K == 3 -#define KYBER_ETA1 2 -#define KYBER_POLYCOMPRESSEDBYTES 128 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -#elif KYBER_K == 4 -#define KYBER_ETA1 2 -#define KYBER_POLYCOMPRESSEDBYTES 160 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) -#endif - -#define KYBER_ETA2 2 - -#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES) -#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) -#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) -#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) - -#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) -/* 32 bytes of additional space to save H(pk) */ -#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) -#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES) - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/poly.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/poly.c deleted file mode 100644 index 681fd6d23e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/poly.c +++ /dev/null @@ -1,519 +0,0 @@ -#include -#include -#include -#include "align.h" -#include "fips202x4.h" -#include "params.h" -#include "poly.h" -#include "ntt.h" -#include "consts.h" -#include "reduce.h" -#include "cbd.h" -#include "symmetric.h" - -/************************************************* -* Name: poly_compress -* -* Description: Compression and subsequent serialization of a polynomial. -* The coefficients of the input polynomial are assumed to -* lie in the invertal [0,q], i.e. the polynomial must be reduced -* by poly_reduce(). -* -* Arguments: - uint8_t *r: pointer to output byte array -* (of length KYBER_POLYCOMPRESSEDBYTES) -* - const poly *a: pointer to input polynomial -**************************************************/ -#if (KYBER_POLYCOMPRESSEDBYTES == 128) -void poly_compress(uint8_t r[128], const poly * restrict a) -{ - unsigned int i; - __m256i f0, f1, f2, f3; - const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); - const __m256i shift1 = _mm256_set1_epi16(1 << 9); - const __m256i mask = _mm256_set1_epi16(15); - const __m256i shift2 = _mm256_set1_epi16((16 << 8) + 1); - const __m256i permdidx = _mm256_set_epi32(7,3,6,2,5,1,4,0); - - for(i=0;ivec[4*i+0]); - f1 = _mm256_load_si256(&a->vec[4*i+1]); - f2 = _mm256_load_si256(&a->vec[4*i+2]); - f3 = _mm256_load_si256(&a->vec[4*i+3]); - f0 = _mm256_mulhi_epi16(f0,v); - f1 = _mm256_mulhi_epi16(f1,v); - f2 = _mm256_mulhi_epi16(f2,v); - f3 = _mm256_mulhi_epi16(f3,v); - f0 = _mm256_mulhrs_epi16(f0,shift1); - f1 = _mm256_mulhrs_epi16(f1,shift1); - f2 = _mm256_mulhrs_epi16(f2,shift1); - f3 = _mm256_mulhrs_epi16(f3,shift1); - f0 = _mm256_and_si256(f0,mask); - f1 = _mm256_and_si256(f1,mask); - f2 = _mm256_and_si256(f2,mask); - f3 = _mm256_and_si256(f3,mask); - f0 = _mm256_packus_epi16(f0,f1); - f2 = _mm256_packus_epi16(f2,f3); - f0 = _mm256_maddubs_epi16(f0,shift2); - f2 = _mm256_maddubs_epi16(f2,shift2); - f0 = _mm256_packus_epi16(f0,f2); - f0 = _mm256_permutevar8x32_epi32(f0,permdidx); - _mm256_storeu_si256((__m256i *)&r[32*i],f0); - } -} - -void poly_decompress(poly * restrict r, const uint8_t a[128]) -{ - unsigned int i; - __m128i t; - __m256i f; - const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); - const __m256i shufbidx = _mm256_set_epi8(7,7,7,7,6,6,6,6,5,5,5,5,4,4,4,4, - 3,3,3,3,2,2,2,2,1,1,1,1,0,0,0,0); - const __m256i mask = _mm256_set1_epi32(0x00F0000F); - const __m256i shift = _mm256_set1_epi32((128 << 16) + 2048); - - for(i=0;ivec[i],f); - } -} - -#elif (KYBER_POLYCOMPRESSEDBYTES == 160) -void poly_compress(uint8_t r[160], const poly * restrict a) -{ - unsigned int i; - __m256i f0, f1; - __m128i t0, t1; - const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); - const __m256i shift1 = _mm256_set1_epi16(1 << 10); - const __m256i mask = _mm256_set1_epi16(31); - const __m256i shift2 = _mm256_set1_epi16((32 << 8) + 1); - const __m256i shift3 = _mm256_set1_epi32((1024 << 16) + 1); - const __m256i sllvdidx = _mm256_set1_epi64x(12); - const __m256i shufbidx = _mm256_set_epi8( 8,-1,-1,-1,-1,-1, 4, 3, 2, 1, 0,-1,12,11,10, 9, - -1,12,11,10, 9, 8,-1,-1,-1,-1,-1 ,4, 3, 2, 1, 0); - - for(i=0;ivec[2*i+0]); - f1 = _mm256_load_si256(&a->vec[2*i+1]); - f0 = _mm256_mulhi_epi16(f0,v); - f1 = _mm256_mulhi_epi16(f1,v); - f0 = _mm256_mulhrs_epi16(f0,shift1); - f1 = _mm256_mulhrs_epi16(f1,shift1); - f0 = _mm256_and_si256(f0,mask); - f1 = _mm256_and_si256(f1,mask); - f0 = _mm256_packus_epi16(f0,f1); - f0 = _mm256_maddubs_epi16(f0,shift2); // a0 a1 a2 a3 b0 b1 b2 b3 a4 a5 a6 a7 b4 b5 b6 b7 - f0 = _mm256_madd_epi16(f0,shift3); // a0 a1 b0 b1 a2 a3 b2 b3 - f0 = _mm256_sllv_epi32(f0,sllvdidx); - f0 = _mm256_srlv_epi64(f0,sllvdidx); - f0 = _mm256_shuffle_epi8(f0,shufbidx); - t0 = _mm256_castsi256_si128(f0); - t1 = _mm256_extracti128_si256(f0,1); - t0 = _mm_blendv_epi8(t0,t1,_mm256_castsi256_si128(shufbidx)); - _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0); - memcpy(&r[20*i+16],&t1,4); - } -} - -void poly_decompress(poly * restrict r, const uint8_t a[160]) -{ - unsigned int i; - __m128i t; - __m256i f; - int16_t ti; - const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); - const __m256i shufbidx = _mm256_set_epi8(9,9,9,8,8,8,8,7,7,6,6,6,6,5,5,5, - 4,4,4,3,3,3,3,2,2,1,1,1,1,0,0,0); - const __m256i mask = _mm256_set_epi16(248,1984,62,496,3968,124,992,31, - 248,1984,62,496,3968,124,992,31); - const __m256i shift = _mm256_set_epi16(128,16,512,64,8,256,32,1024, - 128,16,512,64,8,256,32,1024); - - for(i=0;ivec[i],f); - } -} - -#endif - -/************************************************* -* Name: poly_tobytes -* -* Description: Serialization of a polynomial in NTT representation. -* The coefficients of the input polynomial are assumed to -* lie in the invertal [0,q], i.e. the polynomial must be reduced -* by poly_reduce(). The coefficients are orderd as output by -* poly_ntt(); the serialized output coefficients are in bitreversed -* order. -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYBYTES bytes) -* - poly *a: pointer to input polynomial -**************************************************/ -void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a) -{ - ntttobytes_avx(r, a->vec, qdata.vec); -} - -/************************************************* -* Name: poly_frombytes -* -* Description: De-serialization of a polynomial; -* inverse of poly_tobytes -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *a: pointer to input byte array -* (of KYBER_POLYBYTES bytes) -**************************************************/ -void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]) -{ - nttfrombytes_avx(r->vec, a, qdata.vec); -} - -/************************************************* -* Name: poly_frommsg -* -* Description: Convert 32-byte message to polynomial -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *msg: pointer to input message -**************************************************/ -void poly_frommsg(poly * restrict r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]) -{ -#if (KYBER_INDCPA_MSGBYTES != 32) -#error "KYBER_INDCPA_MSGBYTES must be equal to 32!" -#endif - __m256i f, g0, g1, g2, g3, h0, h1, h2, h3; - const __m256i shift = _mm256_broadcastsi128_si256(_mm_set_epi32(0,1,2,3)); - const __m256i idx = _mm256_broadcastsi128_si256(_mm_set_epi8(15,14,11,10,7,6,3,2,13,12,9,8,5,4,1,0)); - const __m256i hqs = _mm256_set1_epi16((KYBER_Q+1)/2); - -#define FROMMSG64(i) \ - g3 = _mm256_shuffle_epi32(f,0x55*i); \ - g3 = _mm256_sllv_epi32(g3,shift); \ - g3 = _mm256_shuffle_epi8(g3,idx); \ - g0 = _mm256_slli_epi16(g3,12); \ - g1 = _mm256_slli_epi16(g3,8); \ - g2 = _mm256_slli_epi16(g3,4); \ - g0 = _mm256_srai_epi16(g0,15); \ - g1 = _mm256_srai_epi16(g1,15); \ - g2 = _mm256_srai_epi16(g2,15); \ - g3 = _mm256_srai_epi16(g3,15); \ - g0 = _mm256_and_si256(g0,hqs); /* 19 18 17 16 3 2 1 0 */ \ - g1 = _mm256_and_si256(g1,hqs); /* 23 22 21 20 7 6 5 4 */ \ - g2 = _mm256_and_si256(g2,hqs); /* 27 26 25 24 11 10 9 8 */ \ - g3 = _mm256_and_si256(g3,hqs); /* 31 30 29 28 15 14 13 12 */ \ - h0 = _mm256_unpacklo_epi64(g0,g1); \ - h2 = _mm256_unpackhi_epi64(g0,g1); \ - h1 = _mm256_unpacklo_epi64(g2,g3); \ - h3 = _mm256_unpackhi_epi64(g2,g3); \ - g0 = _mm256_permute2x128_si256(h0,h1,0x20); \ - g2 = _mm256_permute2x128_si256(h0,h1,0x31); \ - g1 = _mm256_permute2x128_si256(h2,h3,0x20); \ - g3 = _mm256_permute2x128_si256(h2,h3,0x31); \ - _mm256_store_si256(&r->vec[0+2*i+0],g0); \ - _mm256_store_si256(&r->vec[0+2*i+1],g1); \ - _mm256_store_si256(&r->vec[8+2*i+0],g2); \ - _mm256_store_si256(&r->vec[8+2*i+1],g3) - - f = _mm256_loadu_si256((__m256i *)msg); - FROMMSG64(0); - FROMMSG64(1); - FROMMSG64(2); - FROMMSG64(3); -} - -/************************************************* -* Name: poly_tomsg -* -* Description: Convert polynomial to 32-byte message. -* The coefficients of the input polynomial are assumed to -* lie in the invertal [0,q], i.e. the polynomial must be reduced -* by poly_reduce(). -* -* Arguments: - uint8_t *msg: pointer to output message -* - poly *a: pointer to input polynomial -**************************************************/ -void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly * restrict a) -{ - unsigned int i; - uint32_t small; - __m256i f0, f1, g0, g1; - const __m256i hq = _mm256_set1_epi16((KYBER_Q - 1)/2); - const __m256i hhq = _mm256_set1_epi16((KYBER_Q - 1)/4); - - for(i=0;ivec[2*i+0]); - f1 = _mm256_load_si256(&a->vec[2*i+1]); - f0 = _mm256_sub_epi16(hq, f0); - f1 = _mm256_sub_epi16(hq, f1); - g0 = _mm256_srai_epi16(f0, 15); - g1 = _mm256_srai_epi16(f1, 15); - f0 = _mm256_xor_si256(f0, g0); - f1 = _mm256_xor_si256(f1, g1); - f0 = _mm256_sub_epi16(f0, hhq); - f1 = _mm256_sub_epi16(f1, hhq); - f0 = _mm256_packs_epi16(f0, f1); - f0 = _mm256_permute4x64_epi64(f0, 0xD8); - small = _mm256_movemask_epi8(f0); - memcpy(&msg[4*i], &small, 4); - } -} - -/************************************************* -* Name: poly_getnoise_eta1 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter KYBER_ETA1 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length KYBER_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) -{ - ALIGNED_UINT8(KYBER_ETA1*KYBER_N/4+32) buf; // +32 bytes as required by poly_cbd_eta1 - prf(buf.coeffs, KYBER_ETA1*KYBER_N/4, seed, nonce); - poly_cbd_eta1(r, buf.vec); -} - -/************************************************* -* Name: poly_getnoise_eta2 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter KYBER_ETA2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length KYBER_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) -{ - ALIGNED_UINT8(KYBER_ETA2*KYBER_N/4) buf; - prf(buf.coeffs, KYBER_ETA2*KYBER_N/4, seed, nonce); - poly_cbd_eta2(r, buf.vec); -} - -#ifndef KYBER_90S -#define NOISE_NBLOCKS ((KYBER_ETA1*KYBER_N/4+SHAKE256_RATE-1)/SHAKE256_RATE) -void poly_getnoise_eta1_4x(poly *r0, - poly *r1, - poly *r2, - poly *r3, - const uint8_t seed[32], - uint8_t nonce0, - uint8_t nonce1, - uint8_t nonce2, - uint8_t nonce3) -{ - ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4]; - __m256i f; - shake256x4incctx state; - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - buf[0].coeffs[32] = nonce0; - buf[1].coeffs[32] = nonce1; - buf[2].coeffs[32] = nonce2; - buf[3].coeffs[32] = nonce3; - - shake256x4_inc_init(&state); - shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33); - shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state); - shake256x4_inc_ctx_release(&state); - - poly_cbd_eta1(r0, buf[0].vec); - poly_cbd_eta1(r1, buf[1].vec); - poly_cbd_eta1(r2, buf[2].vec); - poly_cbd_eta1(r3, buf[3].vec); -} - -#if KYBER_K == 2 -void poly_getnoise_eta1122_4x(poly *r0, - poly *r1, - poly *r2, - poly *r3, - const uint8_t seed[32], - uint8_t nonce0, - uint8_t nonce1, - uint8_t nonce2, - uint8_t nonce3) -{ - ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4]; - __m256i f; - shake256x4incctx state; - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - buf[0].coeffs[32] = nonce0; - buf[1].coeffs[32] = nonce1; - buf[2].coeffs[32] = nonce2; - buf[3].coeffs[32] = nonce3; - - shake256x4_inc_init(&state); - shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33); - shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state); - shake256x4_inc_ctx_release(&state); - - poly_cbd_eta1(r0, buf[0].vec); - poly_cbd_eta1(r1, buf[1].vec); - poly_cbd_eta2(r2, buf[2].vec); - poly_cbd_eta2(r3, buf[3].vec); -} -#endif -#endif - -/************************************************* -* Name: poly_ntt -* -* Description: Computes negacyclic number-theoretic transform (NTT) of -* a polynomial in place. -* Input coefficients assumed to be in normal order, -* output coefficients are in special order that is natural -* for the vectorization. Input coefficients are assumed to be -* bounded by q in absolute value, output coefficients are bounded -* by 16118 in absolute value. -* -* Arguments: - poly *r: pointer to in/output polynomial -**************************************************/ -void poly_ntt(poly *r) -{ - ntt_avx(r->vec, qdata.vec); -} - -/************************************************* -* Name: poly_invntt_tomont -* -* Description: Computes inverse of negacyclic number-theoretic transform (NTT) -* of a polynomial in place; -* Input coefficients assumed to be in special order from vectorized -* forward ntt, output in normal order. Input coefficients can be -* arbitrary 16-bit integers, output coefficients are bounded by 14870 -* in absolute value. -* -* Arguments: - poly *a: pointer to in/output polynomial -**************************************************/ -void poly_invntt_tomont(poly *r) -{ - invntt_avx(r->vec, qdata.vec); -} - -void poly_nttunpack(poly *r) -{ - nttunpack_avx(r->vec, qdata.vec); -} - -/************************************************* -* Name: poly_basemul_montgomery -* -* Description: Multiplication of two polynomials in NTT domain. -* One of the input polynomials needs to have coefficients -* bounded by q, the other polynomial can have arbitrary -* coefficients. Output coefficients are bounded by 6656. -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_basemul_montgomery(poly *r, const poly *a, const poly *b) -{ - basemul_avx(r->vec, a->vec, b->vec, qdata.vec); -} - -/************************************************* -* Name: poly_tomont -* -* Description: Inplace conversion of all coefficients of a polynomial -* from normal domain to Montgomery domain -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_tomont(poly *r) -{ - tomont_avx(r->vec, qdata.vec); -} - -/************************************************* -* Name: poly_reduce -* -* Description: Applies Barrett reduction to all coefficients of a polynomial -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_reduce(poly *r) -{ - reduce_avx(r->vec, qdata.vec); -} - -/************************************************* -* Name: poly_add -* -* Description: Add two polynomials. No modular reduction -* is performed. -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_add(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - __m256i f0, f1; - - for(i=0;ivec[i]); - f1 = _mm256_load_si256(&b->vec[i]); - f0 = _mm256_add_epi16(f0, f1); - _mm256_store_si256(&r->vec[i], f0); - } -} - -/************************************************* -* Name: poly_sub -* -* Description: Subtract two polynomials. No modular reduction -* is performed. -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_sub(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - __m256i f0, f1; - - for(i=0;ivec[i]); - f1 = _mm256_load_si256(&b->vec[i]); - f0 = _mm256_sub_epi16(f0, f1); - _mm256_store_si256(&r->vec[i], f0); - } -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/poly.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/poly.h deleted file mode 100644 index 6a9cf71c70..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/poly.h +++ /dev/null @@ -1,77 +0,0 @@ -#ifndef POLY_H -#define POLY_H - -#include -#include "align.h" -#include "params.h" - -typedef ALIGNED_INT16(KYBER_N) poly; - -#define poly_compress KYBER_NAMESPACE(poly_compress) -void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a); -#define poly_decompress KYBER_NAMESPACE(poly_decompress) -void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]); - -#define poly_tobytes KYBER_NAMESPACE(poly_tobytes) -void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a); -#define poly_frombytes KYBER_NAMESPACE(poly_frombytes) -void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]); - -#define poly_frommsg KYBER_NAMESPACE(poly_frommsg) -void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]); -#define poly_tomsg KYBER_NAMESPACE(poly_tomsg) -void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); - -#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1) -void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); - -#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2) -void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); - -#ifndef KYBER_90S -#define poly_getnoise_eta1_4x KYBER_NAMESPACE(poly_getnoise_eta2_4x) -void poly_getnoise_eta1_4x(poly *r0, - poly *r1, - poly *r2, - poly *r3, - const uint8_t seed[32], - uint8_t nonce0, - uint8_t nonce1, - uint8_t nonce2, - uint8_t nonce3); - -#if KYBER_K == 2 -#define poly_getnoise_eta1122_4x KYBER_NAMESPACE(poly_getnoise_eta1122_4x) -void poly_getnoise_eta1122_4x(poly *r0, - poly *r1, - poly *r2, - poly *r3, - const uint8_t seed[32], - uint8_t nonce0, - uint8_t nonce1, - uint8_t nonce2, - uint8_t nonce3); -#endif -#endif - - -#define poly_ntt KYBER_NAMESPACE(poly_ntt) -void poly_ntt(poly *r); -#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont) -void poly_invntt_tomont(poly *r); -#define poly_nttunpack KYBER_NAMESPACE(poly_nttunpack) -void poly_nttunpack(poly *r); -#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery) -void poly_basemul_montgomery(poly *r, const poly *a, const poly *b); -#define poly_tomont KYBER_NAMESPACE(poly_tomont) -void poly_tomont(poly *r); - -#define poly_reduce KYBER_NAMESPACE(poly_reduce) -void poly_reduce(poly *r); - -#define poly_add KYBER_NAMESPACE(poly_add) -void poly_add(poly *r, const poly *a, const poly *b); -#define poly_sub KYBER_NAMESPACE(poly_sub) -void poly_sub(poly *r, const poly *a, const poly *b); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/polyvec.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/polyvec.c deleted file mode 100644 index a0174b7b3f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/polyvec.c +++ /dev/null @@ -1,307 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "polyvec.h" -#include "poly.h" -#include "ntt.h" -#include "consts.h" - -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) -static void poly_compress10(uint8_t r[320], const poly * restrict a) -{ - unsigned int i; - __m256i f0, f1, f2; - __m128i t0, t1; - const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); - const __m256i v8 = _mm256_slli_epi16(v,3); - const __m256i off = _mm256_set1_epi16(15); - const __m256i shift1 = _mm256_set1_epi16(1 << 12); - const __m256i mask = _mm256_set1_epi16(1023); - const __m256i shift2 = _mm256_set1_epi64x((1024LL << 48) + (1LL << 32) + (1024 << 16) + 1); - const __m256i sllvdidx = _mm256_set1_epi64x(12); - const __m256i shufbidx = _mm256_set_epi8( 8, 4, 3, 2, 1, 0,-1,-1,-1,-1,-1,-1,12,11,10, 9, - -1,-1,-1,-1,-1,-1,12,11,10, 9, 8, 4, 3, 2, 1, 0); - - for(i=0;ivec[i]); - f1 = _mm256_mullo_epi16(f0,v8); - f2 = _mm256_add_epi16(f0,off); - f0 = _mm256_slli_epi16(f0,3); - f0 = _mm256_mulhi_epi16(f0,v); - f2 = _mm256_sub_epi16(f1,f2); - f1 = _mm256_andnot_si256(f1,f2); - f1 = _mm256_srli_epi16(f1,15); - f0 = _mm256_sub_epi16(f0,f1); - f0 = _mm256_mulhrs_epi16(f0,shift1); - f0 = _mm256_and_si256(f0,mask); - f0 = _mm256_madd_epi16(f0,shift2); - f0 = _mm256_sllv_epi32(f0,sllvdidx); - f0 = _mm256_srli_epi64(f0,12); - f0 = _mm256_shuffle_epi8(f0,shufbidx); - t0 = _mm256_castsi256_si128(f0); - t1 = _mm256_extracti128_si256(f0,1); - t0 = _mm_blend_epi16(t0,t1,0xE0); - _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0); - memcpy(&r[20*i+16],&t1,4); - } -} - -static void poly_decompress10(poly * restrict r, const uint8_t a[320+12]) -{ - unsigned int i; - __m256i f; - const __m256i q = _mm256_set1_epi32((KYBER_Q << 16) + 4*KYBER_Q); - const __m256i shufbidx = _mm256_set_epi8(11,10,10, 9, 9, 8, 8, 7, - 6, 5, 5, 4, 4, 3, 3, 2, - 9, 8, 8, 7, 7, 6, 6, 5, - 4, 3, 3, 2, 2, 1, 1, 0); - const __m256i sllvdidx = _mm256_set1_epi64x(4); - const __m256i mask = _mm256_set1_epi32((32736 << 16) + 8184); - - for(i=0;ivec[i],f); - } -} - -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) -static void poly_compress11(uint8_t r[352+2], const poly * restrict a) -{ - unsigned int i; - __m256i f0, f1, f2; - __m128i t0, t1; - const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); - const __m256i v8 = _mm256_slli_epi16(v,3); - const __m256i off = _mm256_set1_epi16(36); - const __m256i shift1 = _mm256_set1_epi16(1 << 13); - const __m256i mask = _mm256_set1_epi16(2047); - const __m256i shift2 = _mm256_set1_epi64x((2048LL << 48) + (1LL << 32) + (2048 << 16) + 1); - const __m256i sllvdidx = _mm256_set1_epi64x(10); - const __m256i srlvqidx = _mm256_set_epi64x(30,10,30,10); - const __m256i shufbidx = _mm256_set_epi8( 4, 3, 2, 1, 0, 0,-1,-1,-1,-1,10, 9, 8, 7, 6, 5, - -1,-1,-1,-1,-1,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - - for(i=0;ivec[i]); - f1 = _mm256_mullo_epi16(f0,v8); - f2 = _mm256_add_epi16(f0,off); - f0 = _mm256_slli_epi16(f0,3); - f0 = _mm256_mulhi_epi16(f0,v); - f2 = _mm256_sub_epi16(f1,f2); - f1 = _mm256_andnot_si256(f1,f2); - f1 = _mm256_srli_epi16(f1,15); - f0 = _mm256_sub_epi16(f0,f1); - f0 = _mm256_mulhrs_epi16(f0,shift1); - f0 = _mm256_and_si256(f0,mask); - f0 = _mm256_madd_epi16(f0,shift2); - f0 = _mm256_sllv_epi32(f0,sllvdidx); - f1 = _mm256_bsrli_epi128(f0,8); - f0 = _mm256_srlv_epi64(f0,srlvqidx); - f1 = _mm256_slli_epi64(f1,34); - f0 = _mm256_add_epi64(f0,f1); - f0 = _mm256_shuffle_epi8(f0,shufbidx); - t0 = _mm256_castsi256_si128(f0); - t1 = _mm256_extracti128_si256(f0,1); - t0 = _mm_blendv_epi8(t0,t1,_mm256_castsi256_si128(shufbidx)); - _mm_storeu_si128((__m128i *)&r[22*i+ 0],t0); - _mm_storel_epi64((__m128i *)&r[22*i+16],t1); - } -} - -static void poly_decompress11(poly * restrict r, const uint8_t a[352+10]) -{ - unsigned int i; - __m256i f; - const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); - const __m256i shufbidx = _mm256_set_epi8(13,12,12,11,10, 9, 9, 8, - 8, 7, 6, 5, 5, 4, 4, 3, - 10, 9, 9, 8, 7, 6, 6, 5, - 5, 4, 3, 2, 2, 1, 1, 0); - const __m256i srlvdidx = _mm256_set_epi32(0,0,1,0,0,0,1,0); - const __m256i srlvqidx = _mm256_set_epi64x(2,0,2,0); - const __m256i shift = _mm256_set_epi16(4,32,1,8,32,1,4,32,4,32,1,8,32,1,4,32); - const __m256i mask = _mm256_set1_epi16(32752); - - for(i=0;ivec[i],f); - } -} - -#endif - -/************************************************* -* Name: polyvec_compress -* -* Description: Compress and serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYVECCOMPRESSEDBYTES) -* - polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a) -{ - unsigned int i; - -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) - for(i=0;ivec[i]); -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) - for(i=0;ivec[i]); -#endif -} - -/************************************************* -* Name: polyvec_decompress -* -* Description: De-serialize and decompress vector of polynomials; -* approximate inverse of polyvec_compress -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const uint8_t *a: pointer to input byte array -* (of length KYBER_POLYVECCOMPRESSEDBYTES) -**************************************************/ -void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12]) -{ - unsigned int i; - -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) - for(i=0;ivec[i],&a[320*i]); -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) - for(i=0;ivec[i],&a[352*i]); -#endif -} - -/************************************************* -* Name: polyvec_tobytes -* -* Description: Serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYVECBYTES) -* - polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_frombytes -* -* Description: De-serialize vector of polynomials; -* inverse of polyvec_tobytes -* -* Arguments: - uint8_t *r: pointer to output byte array -* - const polyvec *a: pointer to input vector of polynomials -* (of length KYBER_POLYVECBYTES) -**************************************************/ -void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]) -{ - unsigned int i; - for(i=0;ivec[i], a+i*KYBER_POLYBYTES); -} - -/************************************************* -* Name: polyvec_ntt -* -* Description: Apply forward NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_ntt(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_invntt_tomont -* -* Description: Apply inverse NTT to all elements of a vector of polynomials -* and multiply by Montgomery factor 2^16 -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_invntt_tomont(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_basemul_acc_montgomery -* -* Description: Multiply elements in a and b in NTT domain, accumulate into r, -* and multiply by 2^-16. -* -* Arguments: - poly *r: pointer to output polynomial -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) -{ - unsigned int i; - poly tmp; - - poly_basemul_montgomery(r,&a->vec[0],&b->vec[0]); - for(i=1;ivec[i],&b->vec[i]); - poly_add(r,r,&tmp); - } -} - -/************************************************* -* Name: polyvec_reduce -* -* Description: Applies Barrett reduction to each coefficient -* of each element of a vector of polynomials; -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - polyvec *r: pointer to input/output polynomial -**************************************************/ -void polyvec_reduce(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_add -* -* Description: Add vectors of polynomials -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) -{ - unsigned int i; - for(i=0;ivec[i], &a->vec[i], &b->vec[i]); -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/polyvec.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/polyvec.h deleted file mode 100644 index 2ce23c31ff..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/polyvec.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef POLYVEC_H -#define POLYVEC_H - -#include -#include "params.h" -#include "poly.h" - -typedef struct{ - poly vec[KYBER_K]; -} polyvec; - -#define polyvec_compress KYBER_NAMESPACE(polyvec_compress) -void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a); -#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress) -void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12]); - -#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes) -void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a); -#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes) -void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]); - -#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt) -void polyvec_ntt(polyvec *r); -#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont) -void polyvec_invntt_tomont(polyvec *r); - -#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery) -void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b); - -#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce) -void polyvec_reduce(polyvec *r); - -#define polyvec_add KYBER_NAMESPACE(polyvec_add) -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/reduce.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/reduce.h deleted file mode 100644 index 5368185b5f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/reduce.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef REDUCE_H -#define REDUCE_H - -#include "params.h" -#include - -#define reduce_avx KYBER_NAMESPACE(reduce_avx) -void reduce_avx(__m256i *r, const __m256i *qdata); -#define tomont_avx KYBER_NAMESPACE(tomont_avx) -void tomont_avx(__m256i *r, const __m256i *qdata); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/rejsample.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/rejsample.c deleted file mode 100644 index 9060a44cb9..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/rejsample.c +++ /dev/null @@ -1,398 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "consts.h" -#include "rejsample.h" - -//#define BMI - -#ifndef BMI -static const uint8_t idx[256][8] = { - {-1, -1, -1, -1, -1, -1, -1, -1}, - { 0, -1, -1, -1, -1, -1, -1, -1}, - { 2, -1, -1, -1, -1, -1, -1, -1}, - { 0, 2, -1, -1, -1, -1, -1, -1}, - { 4, -1, -1, -1, -1, -1, -1, -1}, - { 0, 4, -1, -1, -1, -1, -1, -1}, - { 2, 4, -1, -1, -1, -1, -1, -1}, - { 0, 2, 4, -1, -1, -1, -1, -1}, - { 6, -1, -1, -1, -1, -1, -1, -1}, - { 0, 6, -1, -1, -1, -1, -1, -1}, - { 2, 6, -1, -1, -1, -1, -1, -1}, - { 0, 2, 6, -1, -1, -1, -1, -1}, - { 4, 6, -1, -1, -1, -1, -1, -1}, - { 0, 4, 6, -1, -1, -1, -1, -1}, - { 2, 4, 6, -1, -1, -1, -1, -1}, - { 0, 2, 4, 6, -1, -1, -1, -1}, - { 8, -1, -1, -1, -1, -1, -1, -1}, - { 0, 8, -1, -1, -1, -1, -1, -1}, - { 2, 8, -1, -1, -1, -1, -1, -1}, - { 0, 2, 8, -1, -1, -1, -1, -1}, - { 4, 8, -1, -1, -1, -1, -1, -1}, - { 0, 4, 8, -1, -1, -1, -1, -1}, - { 2, 4, 8, -1, -1, -1, -1, -1}, - { 0, 2, 4, 8, -1, -1, -1, -1}, - { 6, 8, -1, -1, -1, -1, -1, -1}, - { 0, 6, 8, -1, -1, -1, -1, -1}, - { 2, 6, 8, -1, -1, -1, -1, -1}, - { 0, 2, 6, 8, -1, -1, -1, -1}, - { 4, 6, 8, -1, -1, -1, -1, -1}, - { 0, 4, 6, 8, -1, -1, -1, -1}, - { 2, 4, 6, 8, -1, -1, -1, -1}, - { 0, 2, 4, 6, 8, -1, -1, -1}, - {10, -1, -1, -1, -1, -1, -1, -1}, - { 0, 10, -1, -1, -1, -1, -1, -1}, - { 2, 10, -1, -1, -1, -1, -1, -1}, - { 0, 2, 10, -1, -1, -1, -1, -1}, - { 4, 10, -1, -1, -1, -1, -1, -1}, - { 0, 4, 10, -1, -1, -1, -1, -1}, - { 2, 4, 10, -1, -1, -1, -1, -1}, - { 0, 2, 4, 10, -1, -1, -1, -1}, - { 6, 10, -1, -1, -1, -1, -1, -1}, - { 0, 6, 10, -1, -1, -1, -1, -1}, - { 2, 6, 10, -1, -1, -1, -1, -1}, - { 0, 2, 6, 10, -1, -1, -1, -1}, - { 4, 6, 10, -1, -1, -1, -1, -1}, - { 0, 4, 6, 10, -1, -1, -1, -1}, - { 2, 4, 6, 10, -1, -1, -1, -1}, - { 0, 2, 4, 6, 10, -1, -1, -1}, - { 8, 10, -1, -1, -1, -1, -1, -1}, - { 0, 8, 10, -1, -1, -1, -1, -1}, - { 2, 8, 10, -1, -1, -1, -1, -1}, - { 0, 2, 8, 10, -1, -1, -1, -1}, - { 4, 8, 10, -1, -1, -1, -1, -1}, - { 0, 4, 8, 10, -1, -1, -1, -1}, - { 2, 4, 8, 10, -1, -1, -1, -1}, - { 0, 2, 4, 8, 10, -1, -1, -1}, - { 6, 8, 10, -1, -1, -1, -1, -1}, - { 0, 6, 8, 10, -1, -1, -1, -1}, - { 2, 6, 8, 10, -1, -1, -1, -1}, - { 0, 2, 6, 8, 10, -1, -1, -1}, - { 4, 6, 8, 10, -1, -1, -1, -1}, - { 0, 4, 6, 8, 10, -1, -1, -1}, - { 2, 4, 6, 8, 10, -1, -1, -1}, - { 0, 2, 4, 6, 8, 10, -1, -1}, - {12, -1, -1, -1, -1, -1, -1, -1}, - { 0, 12, -1, -1, -1, -1, -1, -1}, - { 2, 12, -1, -1, -1, -1, -1, -1}, - { 0, 2, 12, -1, -1, -1, -1, -1}, - { 4, 12, -1, -1, -1, -1, -1, -1}, - { 0, 4, 12, -1, -1, -1, -1, -1}, - { 2, 4, 12, -1, -1, -1, -1, -1}, - { 0, 2, 4, 12, -1, -1, -1, -1}, - { 6, 12, -1, -1, -1, -1, -1, -1}, - { 0, 6, 12, -1, -1, -1, -1, -1}, - { 2, 6, 12, -1, -1, -1, -1, -1}, - { 0, 2, 6, 12, -1, -1, -1, -1}, - { 4, 6, 12, -1, -1, -1, -1, -1}, - { 0, 4, 6, 12, -1, -1, -1, -1}, - { 2, 4, 6, 12, -1, -1, -1, -1}, - { 0, 2, 4, 6, 12, -1, -1, -1}, - { 8, 12, -1, -1, -1, -1, -1, -1}, - { 0, 8, 12, -1, -1, -1, -1, -1}, - { 2, 8, 12, -1, -1, -1, -1, -1}, - { 0, 2, 8, 12, -1, -1, -1, -1}, - { 4, 8, 12, -1, -1, -1, -1, -1}, - { 0, 4, 8, 12, -1, -1, -1, -1}, - { 2, 4, 8, 12, -1, -1, -1, -1}, - { 0, 2, 4, 8, 12, -1, -1, -1}, - { 6, 8, 12, -1, -1, -1, -1, -1}, - { 0, 6, 8, 12, -1, -1, -1, -1}, - { 2, 6, 8, 12, -1, -1, -1, -1}, - { 0, 2, 6, 8, 12, -1, -1, -1}, - { 4, 6, 8, 12, -1, -1, -1, -1}, - { 0, 4, 6, 8, 12, -1, -1, -1}, - { 2, 4, 6, 8, 12, -1, -1, -1}, - { 0, 2, 4, 6, 8, 12, -1, -1}, - {10, 12, -1, -1, -1, -1, -1, -1}, - { 0, 10, 12, -1, -1, -1, -1, -1}, - { 2, 10, 12, -1, -1, -1, -1, -1}, - { 0, 2, 10, 12, -1, -1, -1, -1}, - { 4, 10, 12, -1, -1, -1, -1, -1}, - { 0, 4, 10, 12, -1, -1, -1, -1}, - { 2, 4, 10, 12, -1, -1, -1, -1}, - { 0, 2, 4, 10, 12, -1, -1, -1}, - { 6, 10, 12, -1, -1, -1, -1, -1}, - { 0, 6, 10, 12, -1, -1, -1, -1}, - { 2, 6, 10, 12, -1, -1, -1, -1}, - { 0, 2, 6, 10, 12, -1, -1, -1}, - { 4, 6, 10, 12, -1, -1, -1, -1}, - { 0, 4, 6, 10, 12, -1, -1, -1}, - { 2, 4, 6, 10, 12, -1, -1, -1}, - { 0, 2, 4, 6, 10, 12, -1, -1}, - { 8, 10, 12, -1, -1, -1, -1, -1}, - { 0, 8, 10, 12, -1, -1, -1, -1}, - { 2, 8, 10, 12, -1, -1, -1, -1}, - { 0, 2, 8, 10, 12, -1, -1, -1}, - { 4, 8, 10, 12, -1, -1, -1, -1}, - { 0, 4, 8, 10, 12, -1, -1, -1}, - { 2, 4, 8, 10, 12, -1, -1, -1}, - { 0, 2, 4, 8, 10, 12, -1, -1}, - { 6, 8, 10, 12, -1, -1, -1, -1}, - { 0, 6, 8, 10, 12, -1, -1, -1}, - { 2, 6, 8, 10, 12, -1, -1, -1}, - { 0, 2, 6, 8, 10, 12, -1, -1}, - { 4, 6, 8, 10, 12, -1, -1, -1}, - { 0, 4, 6, 8, 10, 12, -1, -1}, - { 2, 4, 6, 8, 10, 12, -1, -1}, - { 0, 2, 4, 6, 8, 10, 12, -1}, - {14, -1, -1, -1, -1, -1, -1, -1}, - { 0, 14, -1, -1, -1, -1, -1, -1}, - { 2, 14, -1, -1, -1, -1, -1, -1}, - { 0, 2, 14, -1, -1, -1, -1, -1}, - { 4, 14, -1, -1, -1, -1, -1, -1}, - { 0, 4, 14, -1, -1, -1, -1, -1}, - { 2, 4, 14, -1, -1, -1, -1, -1}, - { 0, 2, 4, 14, -1, -1, -1, -1}, - { 6, 14, -1, -1, -1, -1, -1, -1}, - { 0, 6, 14, -1, -1, -1, -1, -1}, - { 2, 6, 14, -1, -1, -1, -1, -1}, - { 0, 2, 6, 14, -1, -1, -1, -1}, - { 4, 6, 14, -1, -1, -1, -1, -1}, - { 0, 4, 6, 14, -1, -1, -1, -1}, - { 2, 4, 6, 14, -1, -1, -1, -1}, - { 0, 2, 4, 6, 14, -1, -1, -1}, - { 8, 14, -1, -1, -1, -1, -1, -1}, - { 0, 8, 14, -1, -1, -1, -1, -1}, - { 2, 8, 14, -1, -1, -1, -1, -1}, - { 0, 2, 8, 14, -1, -1, -1, -1}, - { 4, 8, 14, -1, -1, -1, -1, -1}, - { 0, 4, 8, 14, -1, -1, -1, -1}, - { 2, 4, 8, 14, -1, -1, -1, -1}, - { 0, 2, 4, 8, 14, -1, -1, -1}, - { 6, 8, 14, -1, -1, -1, -1, -1}, - { 0, 6, 8, 14, -1, -1, -1, -1}, - { 2, 6, 8, 14, -1, -1, -1, -1}, - { 0, 2, 6, 8, 14, -1, -1, -1}, - { 4, 6, 8, 14, -1, -1, -1, -1}, - { 0, 4, 6, 8, 14, -1, -1, -1}, - { 2, 4, 6, 8, 14, -1, -1, -1}, - { 0, 2, 4, 6, 8, 14, -1, -1}, - {10, 14, -1, -1, -1, -1, -1, -1}, - { 0, 10, 14, -1, -1, -1, -1, -1}, - { 2, 10, 14, -1, -1, -1, -1, -1}, - { 0, 2, 10, 14, -1, -1, -1, -1}, - { 4, 10, 14, -1, -1, -1, -1, -1}, - { 0, 4, 10, 14, -1, -1, -1, -1}, - { 2, 4, 10, 14, -1, -1, -1, -1}, - { 0, 2, 4, 10, 14, -1, -1, -1}, - { 6, 10, 14, -1, -1, -1, -1, -1}, - { 0, 6, 10, 14, -1, -1, -1, -1}, - { 2, 6, 10, 14, -1, -1, -1, -1}, - { 0, 2, 6, 10, 14, -1, -1, -1}, - { 4, 6, 10, 14, -1, -1, -1, -1}, - { 0, 4, 6, 10, 14, -1, -1, -1}, - { 2, 4, 6, 10, 14, -1, -1, -1}, - { 0, 2, 4, 6, 10, 14, -1, -1}, - { 8, 10, 14, -1, -1, -1, -1, -1}, - { 0, 8, 10, 14, -1, -1, -1, -1}, - { 2, 8, 10, 14, -1, -1, -1, -1}, - { 0, 2, 8, 10, 14, -1, -1, -1}, - { 4, 8, 10, 14, -1, -1, -1, -1}, - { 0, 4, 8, 10, 14, -1, -1, -1}, - { 2, 4, 8, 10, 14, -1, -1, -1}, - { 0, 2, 4, 8, 10, 14, -1, -1}, - { 6, 8, 10, 14, -1, -1, -1, -1}, - { 0, 6, 8, 10, 14, -1, -1, -1}, - { 2, 6, 8, 10, 14, -1, -1, -1}, - { 0, 2, 6, 8, 10, 14, -1, -1}, - { 4, 6, 8, 10, 14, -1, -1, -1}, - { 0, 4, 6, 8, 10, 14, -1, -1}, - { 2, 4, 6, 8, 10, 14, -1, -1}, - { 0, 2, 4, 6, 8, 10, 14, -1}, - {12, 14, -1, -1, -1, -1, -1, -1}, - { 0, 12, 14, -1, -1, -1, -1, -1}, - { 2, 12, 14, -1, -1, -1, -1, -1}, - { 0, 2, 12, 14, -1, -1, -1, -1}, - { 4, 12, 14, -1, -1, -1, -1, -1}, - { 0, 4, 12, 14, -1, -1, -1, -1}, - { 2, 4, 12, 14, -1, -1, -1, -1}, - { 0, 2, 4, 12, 14, -1, -1, -1}, - { 6, 12, 14, -1, -1, -1, -1, -1}, - { 0, 6, 12, 14, -1, -1, -1, -1}, - { 2, 6, 12, 14, -1, -1, -1, -1}, - { 0, 2, 6, 12, 14, -1, -1, -1}, - { 4, 6, 12, 14, -1, -1, -1, -1}, - { 0, 4, 6, 12, 14, -1, -1, -1}, - { 2, 4, 6, 12, 14, -1, -1, -1}, - { 0, 2, 4, 6, 12, 14, -1, -1}, - { 8, 12, 14, -1, -1, -1, -1, -1}, - { 0, 8, 12, 14, -1, -1, -1, -1}, - { 2, 8, 12, 14, -1, -1, -1, -1}, - { 0, 2, 8, 12, 14, -1, -1, -1}, - { 4, 8, 12, 14, -1, -1, -1, -1}, - { 0, 4, 8, 12, 14, -1, -1, -1}, - { 2, 4, 8, 12, 14, -1, -1, -1}, - { 0, 2, 4, 8, 12, 14, -1, -1}, - { 6, 8, 12, 14, -1, -1, -1, -1}, - { 0, 6, 8, 12, 14, -1, -1, -1}, - { 2, 6, 8, 12, 14, -1, -1, -1}, - { 0, 2, 6, 8, 12, 14, -1, -1}, - { 4, 6, 8, 12, 14, -1, -1, -1}, - { 0, 4, 6, 8, 12, 14, -1, -1}, - { 2, 4, 6, 8, 12, 14, -1, -1}, - { 0, 2, 4, 6, 8, 12, 14, -1}, - {10, 12, 14, -1, -1, -1, -1, -1}, - { 0, 10, 12, 14, -1, -1, -1, -1}, - { 2, 10, 12, 14, -1, -1, -1, -1}, - { 0, 2, 10, 12, 14, -1, -1, -1}, - { 4, 10, 12, 14, -1, -1, -1, -1}, - { 0, 4, 10, 12, 14, -1, -1, -1}, - { 2, 4, 10, 12, 14, -1, -1, -1}, - { 0, 2, 4, 10, 12, 14, -1, -1}, - { 6, 10, 12, 14, -1, -1, -1, -1}, - { 0, 6, 10, 12, 14, -1, -1, -1}, - { 2, 6, 10, 12, 14, -1, -1, -1}, - { 0, 2, 6, 10, 12, 14, -1, -1}, - { 4, 6, 10, 12, 14, -1, -1, -1}, - { 0, 4, 6, 10, 12, 14, -1, -1}, - { 2, 4, 6, 10, 12, 14, -1, -1}, - { 0, 2, 4, 6, 10, 12, 14, -1}, - { 8, 10, 12, 14, -1, -1, -1, -1}, - { 0, 8, 10, 12, 14, -1, -1, -1}, - { 2, 8, 10, 12, 14, -1, -1, -1}, - { 0, 2, 8, 10, 12, 14, -1, -1}, - { 4, 8, 10, 12, 14, -1, -1, -1}, - { 0, 4, 8, 10, 12, 14, -1, -1}, - { 2, 4, 8, 10, 12, 14, -1, -1}, - { 0, 2, 4, 8, 10, 12, 14, -1}, - { 6, 8, 10, 12, 14, -1, -1, -1}, - { 0, 6, 8, 10, 12, 14, -1, -1}, - { 2, 6, 8, 10, 12, 14, -1, -1}, - { 0, 2, 6, 8, 10, 12, 14, -1}, - { 4, 6, 8, 10, 12, 14, -1, -1}, - { 0, 4, 6, 8, 10, 12, 14, -1}, - { 2, 4, 6, 8, 10, 12, 14, -1}, - { 0, 2, 4, 6, 8, 10, 12, 14} -}; -#endif - -#define _mm256_cmpge_epu16(a, b) _mm256_cmpeq_epi16(_mm256_max_epu16(a, b), a) -#define _mm_cmpge_epu16(a, b) _mm_cmpeq_epi16(_mm_max_epu16(a, b), a) - -unsigned int rej_uniform_avx(int16_t * restrict r, const uint8_t *buf) -{ - unsigned int ctr, pos; - uint16_t val0, val1; - uint32_t good; -#ifdef BMI - uint64_t idx0, idx1, idx2, idx3; -#endif - const __m256i bound = _mm256_load_si256(&qdata.vec[_16XQ/16]); - const __m256i ones = _mm256_set1_epi8(1); - const __m256i mask = _mm256_set1_epi16(0xFFF); - const __m256i idx8 = _mm256_set_epi8(15,14,14,13,12,11,11,10, - 9, 8, 8, 7, 6, 5, 5, 4, - 11,10,10, 9, 8, 7, 7, 6, - 5, 4, 4, 3, 2, 1, 1, 0); - __m256i f0, f1, g0, g1, g2, g3; - __m128i f, t, pilo, pihi; - - ctr = pos = 0; - while(ctr <= KYBER_N - 32 && pos <= REJ_UNIFORM_AVX_BUFLEN - 56) { - f0 = _mm256_loadu_si256((__m256i *)&buf[pos]); - f1 = _mm256_loadu_si256((__m256i *)&buf[pos+24]); - f0 = _mm256_permute4x64_epi64(f0, 0x94); - f1 = _mm256_permute4x64_epi64(f1, 0x94); - f0 = _mm256_shuffle_epi8(f0, idx8); - f1 = _mm256_shuffle_epi8(f1, idx8); - g0 = _mm256_srli_epi16(f0, 4); - g1 = _mm256_srli_epi16(f1, 4); - f0 = _mm256_blend_epi16(f0, g0, 0xAA); - f1 = _mm256_blend_epi16(f1, g1, 0xAA); - f0 = _mm256_and_si256(f0, mask); - f1 = _mm256_and_si256(f1, mask); - pos += 48; - - g0 = _mm256_cmpgt_epi16(bound, f0); - g1 = _mm256_cmpgt_epi16(bound, f1); - - g0 = _mm256_packs_epi16(g0, g1); - good = _mm256_movemask_epi8(g0); - -#ifdef BMI - idx0 = _pdep_u64(good >> 0, 0x0101010101010101); - idx1 = _pdep_u64(good >> 8, 0x0101010101010101); - idx2 = _pdep_u64(good >> 16, 0x0101010101010101); - idx3 = _pdep_u64(good >> 24, 0x0101010101010101); - idx0 = (idx0 << 8) - idx0; - idx0 = _pext_u64(0x0E0C0A0806040200, idx0); - idx1 = (idx1 << 8) - idx1; - idx1 = _pext_u64(0x0E0C0A0806040200, idx1); - idx2 = (idx2 << 8) - idx2; - idx2 = _pext_u64(0x0E0C0A0806040200, idx2); - idx3 = (idx3 << 8) - idx3; - idx3 = _pext_u64(0x0E0C0A0806040200, idx3); - - g0 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx0)); - g1 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx1)); - g0 = _mm256_inserti128_si256(g0, _mm_cvtsi64_si128(idx2), 1); - g1 = _mm256_inserti128_si256(g1, _mm_cvtsi64_si128(idx3), 1); -#else - g0 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 0) & 0xFF])); - g1 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 8) & 0xFF])); - g0 = _mm256_inserti128_si256(g0, _mm_loadl_epi64((__m128i *)&idx[(good >> 16) & 0xFF]), 1); - g1 = _mm256_inserti128_si256(g1, _mm_loadl_epi64((__m128i *)&idx[(good >> 24) & 0xFF]), 1); -#endif - - g2 = _mm256_add_epi8(g0, ones); - g3 = _mm256_add_epi8(g1, ones); - g0 = _mm256_unpacklo_epi8(g0, g2); - g1 = _mm256_unpacklo_epi8(g1, g3); - - f0 = _mm256_shuffle_epi8(f0, g0); - f1 = _mm256_shuffle_epi8(f1, g1); - - _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f0)); - ctr += _mm_popcnt_u32((good >> 0) & 0xFF); - _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f0, 1)); - ctr += _mm_popcnt_u32((good >> 16) & 0xFF); - _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f1)); - ctr += _mm_popcnt_u32((good >> 8) & 0xFF); - _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f1, 1)); - ctr += _mm_popcnt_u32((good >> 24) & 0xFF); - } - - while(ctr <= KYBER_N - 8 && pos <= REJ_UNIFORM_AVX_BUFLEN - 16) { - f = _mm_loadu_si128((__m128i *)&buf[pos]); - f = _mm_shuffle_epi8(f, _mm256_castsi256_si128(idx8)); - t = _mm_srli_epi16(f, 4); - f = _mm_blend_epi16(f, t, 0xAA); - f = _mm_and_si128(f, _mm256_castsi256_si128(mask)); - pos += 12; - - t = _mm_cmpgt_epi16(_mm256_castsi256_si128(bound), f); - good = _mm_movemask_epi8(t); - -#ifdef BMI - good &= 0x5555; - idx0 = _pdep_u64(good, 0x1111111111111111); - idx0 = (idx0 << 8) - idx0; - idx0 = _pext_u64(0x0E0C0A0806040200, idx0); - pilo = _mm_cvtsi64_si128(idx0); -#else - good = _pext_u32(good, 0x5555); - pilo = _mm_loadl_epi64((__m128i *)&idx[good]); -#endif - - pihi = _mm_add_epi8(pilo, _mm256_castsi256_si128(ones)); - pilo = _mm_unpacklo_epi8(pilo, pihi); - f = _mm_shuffle_epi8(f, pilo); - _mm_storeu_si128((__m128i *)&r[ctr], f); - ctr += _mm_popcnt_u32(good); - } - - while(ctr < KYBER_N && pos <= REJ_UNIFORM_AVX_BUFLEN - 3) { - val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; - val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)); - pos += 3; - - if(val0 < KYBER_Q) - r[ctr++] = val0; - if(val1 < KYBER_Q && ctr < KYBER_N) - r[ctr++] = val1; - } - - return ctr; -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/rejsample.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/rejsample.h deleted file mode 100644 index 3be5e2192e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/rejsample.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef REJSAMPLE_H -#define REJSAMPLE_H - -#include -#include "params.h" -#include "symmetric.h" - -#define REJ_UNIFORM_AVX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES) -#define REJ_UNIFORM_AVX_BUFLEN (REJ_UNIFORM_AVX_NBLOCKS*XOF_BLOCKBYTES) - -#define rej_uniform_avx KYBER_NAMESPACE(rej_uniform_avx) -unsigned int rej_uniform_avx(int16_t *r, const uint8_t *buf); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/shuffle.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/shuffle.S deleted file mode 100644 index 18325ebec0..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/shuffle.S +++ /dev/null @@ -1,255 +0,0 @@ -#include "consts.h" -.include "fq.inc" -.include "shuffle.inc" - -/* -nttpack_avx: -#load -vmovdqa (%rdi),%ymm4 -vmovdqa 32(%rdi),%ymm5 -vmovdqa 64(%rdi),%ymm6 -vmovdqa 96(%rdi),%ymm7 -vmovdqa 128(%rdi),%ymm8 -vmovdqa 160(%rdi),%ymm9 -vmovdqa 192(%rdi),%ymm10 -vmovdqa 224(%rdi),%ymm11 - -shuffle1 4,5,3,5 -shuffle1 6,7,4,7 -shuffle1 8,9,6,9 -shuffle1 10,11,8,11 - -shuffle2 3,4,10,4 -shuffle2 6,8,3,8 -shuffle2 5,7,6,7 -shuffle2 9,11,5,11 - -shuffle4 10,3,9,3 -shuffle4 6,5,10,5 -shuffle4 4,8,6,8 -shuffle4 7,11,4,11 - -shuffle8 9,10,7,10 -shuffle8 6,4,9,4 -shuffle8 3,5,6,5 -shuffle8 8,11,3,11 - -#store -vmovdqa %ymm7,(%rdi) -vmovdqa %ymm9,32(%rdi) -vmovdqa %ymm6,64(%rdi) -vmovdqa %ymm3,96(%rdi) -vmovdqa %ymm10,128(%rdi) -vmovdqa %ymm4,160(%rdi) -vmovdqa %ymm5,192(%rdi) -vmovdqa %ymm11,224(%rdi) - -ret -*/ - -.text -nttunpack128_avx: -#load -vmovdqa (%rdi),%ymm4 -vmovdqa 32(%rdi),%ymm5 -vmovdqa 64(%rdi),%ymm6 -vmovdqa 96(%rdi),%ymm7 -vmovdqa 128(%rdi),%ymm8 -vmovdqa 160(%rdi),%ymm9 -vmovdqa 192(%rdi),%ymm10 -vmovdqa 224(%rdi),%ymm11 - -shuffle8 4,8,3,8 -shuffle8 5,9,4,9 -shuffle8 6,10,5,10 -shuffle8 7,11,6,11 - -shuffle4 3,5,7,5 -shuffle4 8,10,3,10 -shuffle4 4,6,8,6 -shuffle4 9,11,4,11 - -shuffle2 7,8,9,8 -shuffle2 5,6,7,6 -shuffle2 3,4,5,4 -shuffle2 10,11,3,11 - -shuffle1 9,5,10,5 -shuffle1 8,4,9,4 -shuffle1 7,3,8,3 -shuffle1 6,11,7,11 - -#store -vmovdqa %ymm10,(%rdi) -vmovdqa %ymm5,32(%rdi) -vmovdqa %ymm9,64(%rdi) -vmovdqa %ymm4,96(%rdi) -vmovdqa %ymm8,128(%rdi) -vmovdqa %ymm3,160(%rdi) -vmovdqa %ymm7,192(%rdi) -vmovdqa %ymm11,224(%rdi) - -ret - -.global cdecl(nttunpack_avx) -cdecl(nttunpack_avx): -call nttunpack128_avx -add $256,%rdi -call nttunpack128_avx -ret - -ntttobytes128_avx: -#load -vmovdqa (%rsi),%ymm5 -vmovdqa 32(%rsi),%ymm6 -vmovdqa 64(%rsi),%ymm7 -vmovdqa 96(%rsi),%ymm8 -vmovdqa 128(%rsi),%ymm9 -vmovdqa 160(%rsi),%ymm10 -vmovdqa 192(%rsi),%ymm11 -vmovdqa 224(%rsi),%ymm12 - -#csubq -csubq 5,13 -csubq 6,13 -csubq 7,13 -csubq 8,13 -csubq 9,13 -csubq 10,13 -csubq 11,13 -csubq 12,13 - -#bitpack -vpsllw $12,%ymm6,%ymm4 -vpor %ymm4,%ymm5,%ymm4 - -vpsrlw $4,%ymm6,%ymm5 -vpsllw $8,%ymm7,%ymm6 -vpor %ymm5,%ymm6,%ymm5 - -vpsrlw $8,%ymm7,%ymm6 -vpsllw $4,%ymm8,%ymm7 -vpor %ymm6,%ymm7,%ymm6 - -vpsllw $12,%ymm10,%ymm7 -vpor %ymm7,%ymm9,%ymm7 - -vpsrlw $4,%ymm10,%ymm8 -vpsllw $8,%ymm11,%ymm9 -vpor %ymm8,%ymm9,%ymm8 - -vpsrlw $8,%ymm11,%ymm9 -vpsllw $4,%ymm12,%ymm10 -vpor %ymm9,%ymm10,%ymm9 - -shuffle1 4,5,3,5 -shuffle1 6,7,4,7 -shuffle1 8,9,6,9 - -shuffle2 3,4,8,4 -shuffle2 6,5,3,5 -shuffle2 7,9,6,9 - -shuffle4 8,3,7,3 -shuffle4 6,4,8,4 -shuffle4 5,9,6,9 - -shuffle8 7,8,5,8 -shuffle8 6,3,7,3 -shuffle8 4,9,6,9 - -#store -vmovdqu %ymm5,(%rdi) -vmovdqu %ymm7,32(%rdi) -vmovdqu %ymm6,64(%rdi) -vmovdqu %ymm8,96(%rdi) -vmovdqu %ymm3,128(%rdi) -vmovdqu %ymm9,160(%rdi) - -ret - -.global cdecl(ntttobytes_avx) -cdecl(ntttobytes_avx): -#consts -vmovdqa _16XQ*2(%rdx),%ymm0 -call ntttobytes128_avx -add $256,%rsi -add $192,%rdi -call ntttobytes128_avx -ret - -nttfrombytes128_avx: -#load -vmovdqu (%rsi),%ymm4 -vmovdqu 32(%rsi),%ymm5 -vmovdqu 64(%rsi),%ymm6 -vmovdqu 96(%rsi),%ymm7 -vmovdqu 128(%rsi),%ymm8 -vmovdqu 160(%rsi),%ymm9 - -shuffle8 4,7,3,7 -shuffle8 5,8,4,8 -shuffle8 6,9,5,9 - -shuffle4 3,8,6,8 -shuffle4 7,5,3,5 -shuffle4 4,9,7,9 - -shuffle2 6,5,4,5 -shuffle2 8,7,6,7 -shuffle2 3,9,8,9 - -shuffle1 4,7,10,7 -shuffle1 5,8,4,8 -shuffle1 6,9,5,9 - -#bitunpack -vpsrlw $12,%ymm10,%ymm11 -vpsllw $4,%ymm7,%ymm12 -vpor %ymm11,%ymm12,%ymm11 -vpand %ymm0,%ymm10,%ymm10 -vpand %ymm0,%ymm11,%ymm11 - -vpsrlw $8,%ymm7,%ymm12 -vpsllw $8,%ymm4,%ymm13 -vpor %ymm12,%ymm13,%ymm12 -vpand %ymm0,%ymm12,%ymm12 - -vpsrlw $4,%ymm4,%ymm13 -vpand %ymm0,%ymm13,%ymm13 - -vpsrlw $12,%ymm8,%ymm14 -vpsllw $4,%ymm5,%ymm15 -vpor %ymm14,%ymm15,%ymm14 -vpand %ymm0,%ymm8,%ymm8 -vpand %ymm0,%ymm14,%ymm14 - -vpsrlw $8,%ymm5,%ymm15 -vpsllw $8,%ymm9,%ymm1 -vpor %ymm15,%ymm1,%ymm15 -vpand %ymm0,%ymm15,%ymm15 - -vpsrlw $4,%ymm9,%ymm1 -vpand %ymm0,%ymm1,%ymm1 - -#store -vmovdqa %ymm10,(%rdi) -vmovdqa %ymm11,32(%rdi) -vmovdqa %ymm12,64(%rdi) -vmovdqa %ymm13,96(%rdi) -vmovdqa %ymm8,128(%rdi) -vmovdqa %ymm14,160(%rdi) -vmovdqa %ymm15,192(%rdi) -vmovdqa %ymm1,224(%rdi) - -ret - -.global cdecl(nttfrombytes_avx) -cdecl(nttfrombytes_avx): -#consts -vmovdqa _16XMASK*2(%rdx),%ymm0 -call nttfrombytes128_avx -add $256,%rdi -add $192,%rsi -call nttfrombytes128_avx -ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/shuffle.inc b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/shuffle.inc deleted file mode 100644 index 73e9ffe03c..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/shuffle.inc +++ /dev/null @@ -1,25 +0,0 @@ -.macro shuffle8 r0,r1,r2,r3 -vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 -vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle4 r0,r1,r2,r3 -vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 -vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle2 r0,r1,r2,r3 -#vpsllq $32,%ymm\r1,%ymm\r2 -vmovsldup %ymm\r1,%ymm\r2 -vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 -vpsrlq $32,%ymm\r0,%ymm\r0 -#vmovshdup %ymm\r0,%ymm\r0 -vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle1 r0,r1,r2,r3 -vpslld $16,%ymm\r1,%ymm\r2 -vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 -vpsrld $16,%ymm\r0,%ymm\r0 -vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 -.endm diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/symmetric-shake.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/symmetric-shake.c deleted file mode 100644 index 20f451882e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/symmetric-shake.c +++ /dev/null @@ -1,74 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "symmetric.h" -#include "fips202.h" - -/************************************************* -* Name: kyber_shake128_absorb -* -* Description: Absorb step of the SHAKE128 specialized for the Kyber context. -* -* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state -* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state -* - uint8_t i: additional byte of input -* - uint8_t j: additional byte of input -**************************************************/ -void kyber_shake128_absorb(shake128incctx *state, - const uint8_t seed[KYBER_SYMBYTES], - uint8_t x, - uint8_t y) -{ - uint8_t extseed[KYBER_SYMBYTES+2]; - - memcpy(extseed, seed, KYBER_SYMBYTES); - extseed[KYBER_SYMBYTES+0] = x; - extseed[KYBER_SYMBYTES+1] = y; - - shake128_absorb_once(state, extseed, sizeof(extseed)); -} - -/************************************************* -* Name: kyber_shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - uint8_t *out: pointer to output -* - size_t outlen: number of requested output bytes -* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) -* - uint8_t nonce: single-byte nonce (public PRF input) -**************************************************/ -void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce) -{ - uint8_t extkey[KYBER_SYMBYTES+1]; - - memcpy(extkey, key, KYBER_SYMBYTES); - extkey[KYBER_SYMBYTES] = nonce; - - shake256(out, outlen, extkey, sizeof(extkey)); -} - -/************************************************* -* Name: kyber_shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - uint8_t *out: pointer to output -* - size_t outlen: number of requested output bytes -* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) -* - uint8_t nonce: single-byte nonce (public PRF input) -**************************************************/ -void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]) -{ - shake256incctx s; - - shake256_inc_init(&s); - shake256_inc_absorb(&s, key, KYBER_SYMBYTES); - shake256_inc_absorb(&s, input, KYBER_CIPHERTEXTBYTES); - shake256_inc_finalize(&s); - shake256_inc_squeeze(out, KYBER_SSBYTES, &s); - shake256_inc_ctx_release(&s); -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/symmetric.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/symmetric.h deleted file mode 100644 index e4941f7a86..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/symmetric.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef SYMMETRIC_H -#define SYMMETRIC_H - -#include -#include -#include "params.h" - -#include "fips202.h" -#include "fips202x4.h" - -typedef shake128incctx xof_state; - -#define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb) -void kyber_shake128_absorb(shake128incctx *s, - const uint8_t seed[KYBER_SYMBYTES], - uint8_t x, - uint8_t y); - -#define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf) -void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce); - -#define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf) -void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]); - -#define XOF_BLOCKBYTES SHAKE128_RATE - -#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) -#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) -#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y) -#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) -#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE) -#define rkprf(OUT, KEY, INPUT) kyber_shake256_rkprf(OUT, KEY, INPUT) - -#endif /* SYMMETRIC_H */ diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/verify.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/verify.c deleted file mode 100644 index 06243b837f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_avx2/verify.c +++ /dev/null @@ -1,83 +0,0 @@ -#include -#include -#include -#include "verify.h" - -/************************************************* -* Name: verify -* -* Description: Compare two arrays for equality in constant time. -* -* Arguments: const uint8_t *a: pointer to first byte array -* const uint8_t *b: pointer to second byte array -* size_t len: length of the byte arrays -* -* Returns 0 if the byte arrays are equal, 1 otherwise -**************************************************/ -int verify(const uint8_t *a, const uint8_t *b, size_t len) -{ - size_t i; - uint64_t r; - __m256i f, g, h; - - h = _mm256_setzero_si256(); - for(i=0;i> 63; - return r; -} - -/************************************************* -* Name: cmov -* -* Description: Copy len bytes from x to r if b is 1; -* don't modify x if b is 0. Requires b to be in {0,1}; -* assumes two's complement representation of negative integers. -* Runs in constant time. -* -* Arguments: uint8_t *r: pointer to output byte array -* const uint8_t *x: pointer to input byte array -* size_t len: Amount of bytes to be copied -* uint8_t b: Condition bit; has to be in {0,1} -**************************************************/ -void cmov(uint8_t * restrict r, const uint8_t *x, size_t len, uint8_t b) -{ - size_t i; - __m256i xvec, rvec, bvec; - -#if defined(__GNUC__) || defined(__clang__) - // Prevent the compiler from - // 1) inferring that b is 0/1-valued, and - // 2) handling the two cases with a branch. - // This is not necessary when verify.c and kem.c are separate translation - // units, but we expect that downstream consumers will copy this code and/or - // change how it is built. - __asm__("" : "+r"(b) : /* no inputs */); -#endif - - bvec = _mm256_set1_epi64x(-(uint64_t)b); - for(i=0;i -#include -#include "params.h" - -#define verify KYBER_NAMESPACE(verify) -int verify(const uint8_t *a, const uint8_t *b, size_t len); - -#define cmov KYBER_NAMESPACE(cmov) -void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); - -#define cmov_int16 KYBER_NAMESPACE(cmov_int16) -void cmov_int16(int16_t *r, int16_t v, uint16_t b); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/LICENSE b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/LICENSE deleted file mode 100644 index 7922ab8007..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/LICENSE +++ /dev/null @@ -1,6 +0,0 @@ -Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/); -or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html). - -For Keccak and AES we are using public-domain -code from sources and by authors listed in -comments on top of the respective files. diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/api.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/api.h deleted file mode 100644 index 70d40f3f3e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/api.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef API_H -#define API_H - -#include - -#define pqcrystals_kyber512_SECRETKEYBYTES 1632 -#define pqcrystals_kyber512_PUBLICKEYBYTES 800 -#define pqcrystals_kyber512_CIPHERTEXTBYTES 768 -#define pqcrystals_kyber512_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber512_ENCCOINBYTES 32 -#define pqcrystals_kyber512_BYTES 32 - -#define pqcrystals_kyber512_ref_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES -#define pqcrystals_kyber512_ref_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES -#define pqcrystals_kyber512_ref_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES -#define pqcrystals_kyber512_ref_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES -#define pqcrystals_kyber512_ref_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES -#define pqcrystals_kyber512_ref_BYTES pqcrystals_kyber512_BYTES - -int pqcrystals_kyber512_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber512_ref_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber512_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber512_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber512_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#define pqcrystals_kyber768_SECRETKEYBYTES 2400 -#define pqcrystals_kyber768_PUBLICKEYBYTES 1184 -#define pqcrystals_kyber768_CIPHERTEXTBYTES 1088 -#define pqcrystals_kyber768_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber768_ENCCOINBYTES 32 -#define pqcrystals_kyber768_BYTES 32 - -#define pqcrystals_kyber768_ref_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES -#define pqcrystals_kyber768_ref_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES -#define pqcrystals_kyber768_ref_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES -#define pqcrystals_kyber768_ref_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES -#define pqcrystals_kyber768_ref_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES -#define pqcrystals_kyber768_ref_BYTES pqcrystals_kyber768_BYTES - -int pqcrystals_kyber768_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber768_ref_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber768_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber768_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber768_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#define pqcrystals_kyber1024_SECRETKEYBYTES 3168 -#define pqcrystals_kyber1024_PUBLICKEYBYTES 1568 -#define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568 -#define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber1024_ENCCOINBYTES 32 -#define pqcrystals_kyber1024_BYTES 32 - -#define pqcrystals_kyber1024_ref_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES -#define pqcrystals_kyber1024_ref_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES -#define pqcrystals_kyber1024_ref_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES -#define pqcrystals_kyber1024_ref_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES -#define pqcrystals_kyber1024_ref_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES -#define pqcrystals_kyber1024_ref_BYTES pqcrystals_kyber1024_BYTES - -int pqcrystals_kyber1024_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber1024_ref_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber1024_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber1024_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber1024_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/cbd.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/cbd.c deleted file mode 100644 index 1500ffea56..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/cbd.c +++ /dev/null @@ -1,128 +0,0 @@ -#include -#include "params.h" -#include "cbd.h" - -/************************************************* -* Name: load32_littleendian -* -* Description: load 4 bytes into a 32-bit integer -* in little-endian order -* -* Arguments: - const uint8_t *x: pointer to input byte array -* -* Returns 32-bit unsigned integer loaded from x -**************************************************/ -static uint32_t load32_littleendian(const uint8_t x[4]) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - r |= (uint32_t)x[3] << 24; - return r; -} - -/************************************************* -* Name: load24_littleendian -* -* Description: load 3 bytes into a 32-bit integer -* in little-endian order. -* This function is only needed for Kyber-512 -* -* Arguments: - const uint8_t *x: pointer to input byte array -* -* Returns 32-bit unsigned integer loaded from x (most significant byte is zero) -**************************************************/ -#if KYBER_ETA1 == 3 -static uint32_t load24_littleendian(const uint8_t x[3]) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - return r; -} -#endif - - -/************************************************* -* Name: cbd2 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *buf: pointer to input byte array -**************************************************/ -static void cbd2(poly *r, const uint8_t buf[2*KYBER_N/4]) -{ - unsigned int i,j; - uint32_t t,d; - int16_t a,b; - - for(i=0;i>1) & 0x55555555; - - for(j=0;j<8;j++) { - a = (d >> (4*j+0)) & 0x3; - b = (d >> (4*j+2)) & 0x3; - r->coeffs[8*i+j] = a - b; - } - } -} - -/************************************************* -* Name: cbd3 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=3. -* This function is only needed for Kyber-512 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *buf: pointer to input byte array -**************************************************/ -#if KYBER_ETA1 == 3 -static void cbd3(poly *r, const uint8_t buf[3*KYBER_N/4]) -{ - unsigned int i,j; - uint32_t t,d; - int16_t a,b; - - for(i=0;i>1) & 0x00249249; - d += (t>>2) & 0x00249249; - - for(j=0;j<4;j++) { - a = (d >> (6*j+0)) & 0x7; - b = (d >> (6*j+3)) & 0x7; - r->coeffs[4*i+j] = a - b; - } - } -} -#endif - -void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]) -{ -#if KYBER_ETA1 == 2 - cbd2(r, buf); -#elif KYBER_ETA1 == 3 - cbd3(r, buf); -#else -#error "This implementation requires eta1 in {2,3}" -#endif -} - -void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]) -{ -#if KYBER_ETA2 == 2 - cbd2(r, buf); -#else -#error "This implementation requires eta2 = 2" -#endif -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/cbd.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/cbd.h deleted file mode 100644 index 7b677d745d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/cbd.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef CBD_H -#define CBD_H - -#include -#include "params.h" -#include "poly.h" - -#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1) -void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]); - -#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2) -void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/indcpa.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/indcpa.c deleted file mode 100644 index 726cfa985d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/indcpa.c +++ /dev/null @@ -1,334 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "indcpa.h" -#include "polyvec.h" -#include "poly.h" -#include "ntt.h" -#include "symmetric.h" -#include "randombytes.h" - -/************************************************* -* Name: pack_pk -* -* Description: Serialize the public key as concatenation of the -* serialized vector of polynomials pk -* and the public seed used to generate the matrix A. -* -* Arguments: uint8_t *r: pointer to the output serialized public key -* polyvec *pk: pointer to the input public-key polyvec -* const uint8_t *seed: pointer to the input public seed -**************************************************/ -static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES], - polyvec *pk, - const uint8_t seed[KYBER_SYMBYTES]) -{ - polyvec_tobytes(r, pk); - memcpy(r+KYBER_POLYVECBYTES, seed, KYBER_SYMBYTES); -} - -/************************************************* -* Name: unpack_pk -* -* Description: De-serialize public key from a byte array; -* approximate inverse of pack_pk -* -* Arguments: - polyvec *pk: pointer to output public-key polynomial vector -* - uint8_t *seed: pointer to output seed to generate matrix A -* - const uint8_t *packedpk: pointer to input serialized public key -**************************************************/ -static void unpack_pk(polyvec *pk, - uint8_t seed[KYBER_SYMBYTES], - const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES]) -{ - polyvec_frombytes(pk, packedpk); - memcpy(seed, packedpk+KYBER_POLYVECBYTES, KYBER_SYMBYTES); -} - -/************************************************* -* Name: pack_sk -* -* Description: Serialize the secret key -* -* Arguments: - uint8_t *r: pointer to output serialized secret key -* - polyvec *sk: pointer to input vector of polynomials (secret key) -**************************************************/ -static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk) -{ - polyvec_tobytes(r, sk); -} - -/************************************************* -* Name: unpack_sk -* -* Description: De-serialize the secret key; inverse of pack_sk -* -* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) -* - const uint8_t *packedsk: pointer to input serialized secret key -**************************************************/ -static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES]) -{ - polyvec_frombytes(sk, packedsk); -} - -/************************************************* -* Name: pack_ciphertext -* -* Description: Serialize the ciphertext as concatenation of the -* compressed and serialized vector of polynomials b -* and the compressed and serialized polynomial v -* -* Arguments: uint8_t *r: pointer to the output serialized ciphertext -* poly *pk: pointer to the input vector of polynomials b -* poly *v: pointer to the input polynomial v -**************************************************/ -static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v) -{ - polyvec_compress(r, b); - poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v); -} - -/************************************************* -* Name: unpack_ciphertext -* -* Description: De-serialize and decompress ciphertext from a byte array; -* approximate inverse of pack_ciphertext -* -* Arguments: - polyvec *b: pointer to the output vector of polynomials b -* - poly *v: pointer to the output polynomial v -* - const uint8_t *c: pointer to the input serialized ciphertext -**************************************************/ -static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES]) -{ - polyvec_decompress(b, c); - poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES); -} - -/************************************************* -* Name: rej_uniform -* -* Description: Run rejection sampling on uniform random bytes to generate -* uniform random integers mod q -* -* Arguments: - int16_t *r: pointer to output buffer -* - unsigned int len: requested number of 16-bit integers (uniform mod q) -* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes) -* - unsigned int buflen: length of input buffer in bytes -* -* Returns number of sampled 16-bit integers (at most len) -**************************************************/ -static unsigned int rej_uniform(int16_t *r, - unsigned int len, - const uint8_t *buf, - unsigned int buflen) -{ - unsigned int ctr, pos; - uint16_t val0, val1; - - ctr = pos = 0; - while(ctr < len && pos + 3 <= buflen) { - val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; - val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF; - pos += 3; - - if(val0 < KYBER_Q) - r[ctr++] = val0; - if(ctr < len && val1 < KYBER_Q) - r[ctr++] = val1; - } - - return ctr; -} - -#define gen_a(A,B) gen_matrix(A,B,0) -#define gen_at(A,B) gen_matrix(A,B,1) - -/************************************************* -* Name: gen_matrix -* -* Description: Deterministically generate matrix A (or the transpose of A) -* from a seed. Entries of the matrix are polynomials that look -* uniformly random. Performs rejection sampling on output of -* a XOF -* -* Arguments: - polyvec *a: pointer to ouptput matrix A -* - const uint8_t *seed: pointer to input seed -* - int transposed: boolean deciding whether A or A^T is generated -**************************************************/ -#if(XOF_BLOCKBYTES % 3) -#error "Implementation of gen_matrix assumes that XOF_BLOCKBYTES is a multiple of 3" -#endif - -#define GEN_MATRIX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES) -// Not static for benchmarking -void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed) -{ - unsigned int ctr, i, j; - unsigned int buflen; - uint8_t buf[GEN_MATRIX_NBLOCKS*XOF_BLOCKBYTES]; - xof_state state; - xof_init(&state, seed); - - for(i=0;i -#include "params.h" -#include "polyvec.h" - -#define gen_matrix KYBER_NAMESPACE(gen_matrix) -void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed); - -#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand) -void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], - uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], - const uint8_t coins[KYBER_SYMBYTES]); - -#define indcpa_enc KYBER_NAMESPACE(indcpa_enc) -void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], - const uint8_t m[KYBER_INDCPA_MSGBYTES], - const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], - const uint8_t coins[KYBER_SYMBYTES]); - -#define indcpa_dec KYBER_NAMESPACE(indcpa_dec) -void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES], - const uint8_t c[KYBER_INDCPA_BYTES], - const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/kem.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/kem.c deleted file mode 100644 index 63abc1029c..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/kem.c +++ /dev/null @@ -1,169 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "kem.h" -#include "indcpa.h" -#include "verify.h" -#include "symmetric.h" -#include "randombytes.h" -/************************************************* -* Name: crypto_kem_keypair_derand -* -* Description: Generates public and private key -* for CCA-secure Kyber key encapsulation mechanism -* -* Arguments: - uint8_t *pk: pointer to output public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* - uint8_t *coins: pointer to input randomness -* (an already allocated array filled with 2*KYBER_SYMBYTES random bytes) -** -* Returns 0 (success) -**************************************************/ -int crypto_kem_keypair_derand(uint8_t *pk, - uint8_t *sk, - const uint8_t *coins) -{ - indcpa_keypair_derand(pk, sk, coins); - memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_PUBLICKEYBYTES); - hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - /* Value z for pseudo-random output on reject */ - memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES); - return 0; -} - -/************************************************* -* Name: crypto_kem_keypair -* -* Description: Generates public and private key -* for CCA-secure Kyber key encapsulation mechanism -* -* Arguments: - uint8_t *pk: pointer to output public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int crypto_kem_keypair(uint8_t *pk, - uint8_t *sk) -{ - uint8_t coins[2*KYBER_SYMBYTES]; - randombytes(coins, 2*KYBER_SYMBYTES); - crypto_kem_keypair_derand(pk, sk, coins); - return 0; -} - -/************************************************* -* Name: crypto_kem_enc_derand -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - uint8_t *ct: pointer to output cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *pk: pointer to input public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - const uint8_t *coins: pointer to input randomness -* (an already allocated array filled with KYBER_SYMBYTES random bytes) -** -* Returns 0 (success) -**************************************************/ -int crypto_kem_enc_derand(uint8_t *ct, - uint8_t *ss, - const uint8_t *pk, - const uint8_t *coins) -{ - uint8_t buf[2*KYBER_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2*KYBER_SYMBYTES]; - - memcpy(buf, coins, KYBER_SYMBYTES); - - /* Multitarget countermeasure for coins + contributory KEM */ - hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - hash_g(kr, buf, 2*KYBER_SYMBYTES); - - /* coins are in kr+KYBER_SYMBYTES */ - indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES); - - memcpy(ss,kr,KYBER_SYMBYTES); - return 0; -} - -/************************************************* -* Name: crypto_kem_enc -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - uint8_t *ct: pointer to output cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *pk: pointer to input public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int crypto_kem_enc(uint8_t *ct, - uint8_t *ss, - const uint8_t *pk) -{ - uint8_t coins[KYBER_SYMBYTES]; - randombytes(coins, KYBER_SYMBYTES); - crypto_kem_enc_derand(ct, ss, pk, coins); - return 0; -} - -/************************************************* -* Name: crypto_kem_dec -* -* Description: Generates shared secret for given -* cipher text and private key -* -* Arguments: - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *ct: pointer to input cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - const uint8_t *sk: pointer to input private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* -* Returns 0. -* -* On failure, ss will contain a pseudo-random value. -**************************************************/ -int crypto_kem_dec(uint8_t *ss, - const uint8_t *ct, - const uint8_t *sk) -{ - int fail; - uint8_t buf[2*KYBER_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2*KYBER_SYMBYTES]; - uint8_t cmp[KYBER_CIPHERTEXTBYTES+KYBER_SYMBYTES]; - const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES; - - indcpa_dec(buf, ct, sk); - - /* Multitarget countermeasure for coins + contributory KEM */ - memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES); - hash_g(kr, buf, 2*KYBER_SYMBYTES); - - /* coins are in kr+KYBER_SYMBYTES */ - indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES); - - fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES); - - /* Compute rejection key */ - rkprf(ss,sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES,ct); - - /* Copy true key to return buffer if fail is false */ - cmov(ss,kr,KYBER_SYMBYTES,!fail); - - return 0; -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/kem.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/kem.h deleted file mode 100644 index 234f11966b..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/kem.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef KEM_H -#define KEM_H - -#include -#include "params.h" - -#define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES -#define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES -#define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES -#define CRYPTO_BYTES KYBER_SSBYTES - -#if (KYBER_K == 2) -#define CRYPTO_ALGNAME "Kyber512" -#elif (KYBER_K == 3) -#define CRYPTO_ALGNAME "Kyber768" -#elif (KYBER_K == 4) -#define CRYPTO_ALGNAME "Kyber1024" -#endif - -#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand) -int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); - -#define crypto_kem_keypair KYBER_NAMESPACE(keypair) -int crypto_kem_keypair(uint8_t *pk, uint8_t *sk); - -#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand) -int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); - -#define crypto_kem_enc KYBER_NAMESPACE(enc) -int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); - -#define crypto_kem_dec KYBER_NAMESPACE(dec) -int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/ntt.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/ntt.c deleted file mode 100644 index 2f2eb10b2f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/ntt.c +++ /dev/null @@ -1,146 +0,0 @@ -#include -#include "params.h" -#include "ntt.h" -#include "reduce.h" - -/* Code to generate zetas and zetas_inv used in the number-theoretic transform: - -#define KYBER_ROOT_OF_UNITY 17 - -static const uint8_t tree[128] = { - 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, - 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, - 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, - 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, - 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, - 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, - 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, - 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127 -}; - -void init_ntt() { - unsigned int i; - int16_t tmp[128]; - - tmp[0] = MONT; - for(i=1;i<128;i++) - tmp[i] = fqmul(tmp[i-1],MONT*KYBER_ROOT_OF_UNITY % KYBER_Q); - - for(i=0;i<128;i++) { - zetas[i] = tmp[tree[i]]; - if(zetas[i] > KYBER_Q/2) - zetas[i] -= KYBER_Q; - if(zetas[i] < -KYBER_Q/2) - zetas[i] += KYBER_Q; - } -} -*/ - -const int16_t zetas[128] = { - -1044, -758, -359, -1517, 1493, 1422, 287, 202, - -171, 622, 1577, 182, 962, -1202, -1474, 1468, - 573, -1325, 264, 383, -829, 1458, -1602, -130, - -681, 1017, 732, 608, -1542, 411, -205, -1571, - 1223, 652, -552, 1015, -1293, 1491, -282, -1544, - 516, -8, -320, -666, -1618, -1162, 126, 1469, - -853, -90, -271, 830, 107, -1421, -247, -951, - -398, 961, -1508, -725, 448, -1065, 677, -1275, - -1103, 430, 555, 843, -1251, 871, 1550, 105, - 422, 587, 177, -235, -291, -460, 1574, 1653, - -246, 778, 1159, -147, -777, 1483, -602, 1119, - -1590, 644, -872, 349, 418, 329, -156, -75, - 817, 1097, 603, 610, 1322, -1285, -1465, 384, - -1215, -136, 1218, -1335, -874, 220, -1187, -1659, - -1185, -1530, -1278, 794, -1510, -854, -870, 478, - -108, -308, 996, 991, 958, -1460, 1522, 1628 -}; - -/************************************************* -* Name: fqmul -* -* Description: Multiplication followed by Montgomery reduction -* -* Arguments: - int16_t a: first factor -* - int16_t b: second factor -* -* Returns 16-bit integer congruent to a*b*R^{-1} mod q -**************************************************/ -static int16_t fqmul(int16_t a, int16_t b) { - return montgomery_reduce((int32_t)a*b); -} - -/************************************************* -* Name: ntt -* -* Description: Inplace number-theoretic transform (NTT) in Rq. -* input is in standard order, output is in bitreversed order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void ntt(int16_t r[256]) { - unsigned int len, start, j, k; - int16_t t, zeta; - - k = 1; - for(len = 128; len >= 2; len >>= 1) { - for(start = 0; start < 256; start = j + len) { - zeta = zetas[k++]; - for(j = start; j < start + len; j++) { - t = fqmul(zeta, r[j + len]); - r[j + len] = r[j] - t; - r[j] = r[j] + t; - } - } - } -} - -/************************************************* -* Name: invntt_tomont -* -* Description: Inplace inverse number-theoretic transform in Rq and -* multiplication by Montgomery factor 2^16. -* Input is in bitreversed order, output is in standard order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void invntt(int16_t r[256]) { - unsigned int start, len, j, k; - int16_t t, zeta; - const int16_t f = 1441; // mont^2/128 - - k = 127; - for(len = 2; len <= 128; len <<= 1) { - for(start = 0; start < 256; start = j + len) { - zeta = zetas[k--]; - for(j = start; j < start + len; j++) { - t = r[j]; - r[j] = barrett_reduce(t + r[j + len]); - r[j + len] = r[j + len] - t; - r[j + len] = fqmul(zeta, r[j + len]); - } - } - } - - for(j = 0; j < 256; j++) - r[j] = fqmul(r[j], f); -} - -/************************************************* -* Name: basemul -* -* Description: Multiplication of polynomials in Zq[X]/(X^2-zeta) -* used for multiplication of elements in Rq in NTT domain -* -* Arguments: - int16_t r[2]: pointer to the output polynomial -* - const int16_t a[2]: pointer to the first factor -* - const int16_t b[2]: pointer to the second factor -* - int16_t zeta: integer defining the reduction polynomial -**************************************************/ -void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) -{ - r[0] = fqmul(a[1], b[1]); - r[0] = fqmul(r[0], zeta); - r[0] += fqmul(a[0], b[0]); - r[1] = fqmul(a[0], b[1]); - r[1] += fqmul(a[1], b[0]); -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/ntt.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/ntt.h deleted file mode 100644 index 227ea74f08..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/ntt.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef NTT_H -#define NTT_H - -#include -#include "params.h" - -#define zetas KYBER_NAMESPACE(zetas) -extern const int16_t zetas[128]; - -#define ntt KYBER_NAMESPACE(ntt) -void ntt(int16_t poly[256]); - -#define invntt KYBER_NAMESPACE(invntt) -void invntt(int16_t poly[256]); - -#define basemul KYBER_NAMESPACE(basemul) -void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/params.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/params.h deleted file mode 100644 index fb4190b311..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/params.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef PARAMS_H -#define PARAMS_H - -#ifndef KYBER_K -#define KYBER_K 3 /* Change this for different security strengths */ -#endif - - -/* Don't change parameters below this line */ -#if (KYBER_K == 2) -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_ref_##s -#elif (KYBER_K == 3) -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_ref_##s -#elif (KYBER_K == 4) -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_ref_##s -#else -#error "KYBER_K must be in {2,3,4}" -#endif - -#define KYBER_N 256 -#define KYBER_Q 3329 - -#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ -#define KYBER_SSBYTES 32 /* size in bytes of shared key */ - -#define KYBER_POLYBYTES 384 -#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) - -#if KYBER_K == 2 -#define KYBER_ETA1 3 -#define KYBER_POLYCOMPRESSEDBYTES 128 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -#elif KYBER_K == 3 -#define KYBER_ETA1 2 -#define KYBER_POLYCOMPRESSEDBYTES 128 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -#elif KYBER_K == 4 -#define KYBER_ETA1 2 -#define KYBER_POLYCOMPRESSEDBYTES 160 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) -#endif - -#define KYBER_ETA2 2 - -#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES) -#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) -#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) -#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) - -#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) -/* 32 bytes of additional space to save H(pk) */ -#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) -#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES) - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/poly.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/poly.c deleted file mode 100644 index cbd3abfb54..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/poly.c +++ /dev/null @@ -1,360 +0,0 @@ -#include -#include "params.h" -#include "poly.h" -#include "ntt.h" -#include "reduce.h" -#include "cbd.h" -#include "symmetric.h" -#include "verify.h" - -/************************************************* -* Name: poly_compress -* -* Description: Compression and subsequent serialization of a polynomial -* -* Arguments: - uint8_t *r: pointer to output byte array -* (of length KYBER_POLYCOMPRESSEDBYTES) -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) -{ - unsigned int i,j; - int16_t u; - uint32_t d0; - uint8_t t[8]; - -#if (KYBER_POLYCOMPRESSEDBYTES == 128) - - for(i=0;icoeffs[8*i+j]; - u += (u >> 15) & KYBER_Q; -/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */ - d0 = u << 4; - d0 += 1665; - d0 *= 80635; - d0 >>= 28; - t[j] = d0 & 0xf; - } - - r[0] = t[0] | (t[1] << 4); - r[1] = t[2] | (t[3] << 4); - r[2] = t[4] | (t[5] << 4); - r[3] = t[6] | (t[7] << 4); - r += 4; - } -#elif (KYBER_POLYCOMPRESSEDBYTES == 160) - for(i=0;icoeffs[8*i+j]; - u += (u >> 15) & KYBER_Q; -/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */ - d0 = u << 5; - d0 += 1664; - d0 *= 40318; - d0 >>= 27; - t[j] = d0 & 0x1f; - } - - r[0] = (t[0] >> 0) | (t[1] << 5); - r[1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7); - r[2] = (t[3] >> 1) | (t[4] << 4); - r[3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6); - r[4] = (t[6] >> 2) | (t[7] << 3); - r += 5; - } -#else -#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" -#endif -} - -/************************************************* -* Name: poly_decompress -* -* Description: De-serialization and subsequent decompression of a polynomial; -* approximate inverse of poly_compress -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *a: pointer to input byte array -* (of length KYBER_POLYCOMPRESSEDBYTES bytes) -**************************************************/ -void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]) -{ - unsigned int i; - -#if (KYBER_POLYCOMPRESSEDBYTES == 128) - for(i=0;icoeffs[2*i+0] = (((uint16_t)(a[0] & 15)*KYBER_Q) + 8) >> 4; - r->coeffs[2*i+1] = (((uint16_t)(a[0] >> 4)*KYBER_Q) + 8) >> 4; - a += 1; - } -#elif (KYBER_POLYCOMPRESSEDBYTES == 160) - unsigned int j; - uint8_t t[8]; - for(i=0;i> 0); - t[1] = (a[0] >> 5) | (a[1] << 3); - t[2] = (a[1] >> 2); - t[3] = (a[1] >> 7) | (a[2] << 1); - t[4] = (a[2] >> 4) | (a[3] << 4); - t[5] = (a[3] >> 1); - t[6] = (a[3] >> 6) | (a[4] << 2); - t[7] = (a[4] >> 3); - a += 5; - - for(j=0;j<8;j++) - r->coeffs[8*i+j] = ((uint32_t)(t[j] & 31)*KYBER_Q + 16) >> 5; - } -#else -#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" -#endif -} - -/************************************************* -* Name: poly_tobytes -* -* Description: Serialization of a polynomial -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYBYTES bytes) -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a) -{ - unsigned int i; - uint16_t t0, t1; - - for(i=0;icoeffs[2*i]; - t0 += ((int16_t)t0 >> 15) & KYBER_Q; - t1 = a->coeffs[2*i+1]; - t1 += ((int16_t)t1 >> 15) & KYBER_Q; - r[3*i+0] = (t0 >> 0); - r[3*i+1] = (t0 >> 8) | (t1 << 4); - r[3*i+2] = (t1 >> 4); - } -} - -/************************************************* -* Name: poly_frombytes -* -* Description: De-serialization of a polynomial; -* inverse of poly_tobytes -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *a: pointer to input byte array -* (of KYBER_POLYBYTES bytes) -**************************************************/ -void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]) -{ - unsigned int i; - for(i=0;icoeffs[2*i] = ((a[3*i+0] >> 0) | ((uint16_t)a[3*i+1] << 8)) & 0xFFF; - r->coeffs[2*i+1] = ((a[3*i+1] >> 4) | ((uint16_t)a[3*i+2] << 4)) & 0xFFF; - } -} - -/************************************************* -* Name: poly_frommsg -* -* Description: Convert 32-byte message to polynomial -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *msg: pointer to input message -**************************************************/ -void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]) -{ - unsigned int i,j; - -#if (KYBER_INDCPA_MSGBYTES != KYBER_N/8) -#error "KYBER_INDCPA_MSGBYTES must be equal to KYBER_N/8 bytes!" -#endif - - for(i=0;icoeffs[8*i+j] = 0; - cmov_int16(r->coeffs+8*i+j, ((KYBER_Q+1)/2), (msg[i] >> j)&1); - } - } -} - -/************************************************* -* Name: poly_tomsg -* -* Description: Convert polynomial to 32-byte message -* -* Arguments: - uint8_t *msg: pointer to output message -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *a) -{ - unsigned int i,j; - uint32_t t; - - for(i=0;icoeffs[8*i+j]; - // t += ((int16_t)t >> 15) & KYBER_Q; - // t = (((t << 1) + KYBER_Q/2)/KYBER_Q) & 1; - t <<= 1; - t += 1665; - t *= 80635; - t >>= 28; - t &= 1; - msg[i] |= t << j; - } - } -} - -/************************************************* -* Name: poly_getnoise_eta1 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter KYBER_ETA1 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length KYBER_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) -{ - uint8_t buf[KYBER_ETA1*KYBER_N/4]; - prf(buf, sizeof(buf), seed, nonce); - poly_cbd_eta1(r, buf); -} - -/************************************************* -* Name: poly_getnoise_eta2 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter KYBER_ETA2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length KYBER_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) -{ - uint8_t buf[KYBER_ETA2*KYBER_N/4]; - prf(buf, sizeof(buf), seed, nonce); - poly_cbd_eta2(r, buf); -} - - -/************************************************* -* Name: poly_ntt -* -* Description: Computes negacyclic number-theoretic transform (NTT) of -* a polynomial in place; -* inputs assumed to be in normal order, output in bitreversed order -* -* Arguments: - uint16_t *r: pointer to in/output polynomial -**************************************************/ -void poly_ntt(poly *r) -{ - ntt(r->coeffs); - poly_reduce(r); -} - -/************************************************* -* Name: poly_invntt_tomont -* -* Description: Computes inverse of negacyclic number-theoretic transform (NTT) -* of a polynomial in place; -* inputs assumed to be in bitreversed order, output in normal order -* -* Arguments: - uint16_t *a: pointer to in/output polynomial -**************************************************/ -void poly_invntt_tomont(poly *r) -{ - invntt(r->coeffs); -} - -/************************************************* -* Name: poly_basemul_montgomery -* -* Description: Multiplication of two polynomials in NTT domain -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_basemul_montgomery(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - for(i=0;icoeffs[4*i], &a->coeffs[4*i], &b->coeffs[4*i], zetas[64+i]); - basemul(&r->coeffs[4*i+2], &a->coeffs[4*i+2], &b->coeffs[4*i+2], -zetas[64+i]); - } -} - -/************************************************* -* Name: poly_tomont -* -* Description: Inplace conversion of all coefficients of a polynomial -* from normal domain to Montgomery domain -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_tomont(poly *r) -{ - unsigned int i; - const int16_t f = (1ULL << 32) % KYBER_Q; - for(i=0;icoeffs[i] = montgomery_reduce((int32_t)r->coeffs[i]*f); -} - -/************************************************* -* Name: poly_reduce -* -* Description: Applies Barrett reduction to all coefficients of a polynomial -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_reduce(poly *r) -{ - unsigned int i; - for(i=0;icoeffs[i] = barrett_reduce(r->coeffs[i]); -} - -/************************************************* -* Name: poly_add -* -* Description: Add two polynomials; no modular reduction is performed -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_add(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - for(i=0;icoeffs[i] = a->coeffs[i] + b->coeffs[i]; -} - -/************************************************* -* Name: poly_sub -* -* Description: Subtract two polynomials; no modular reduction is performed -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_sub(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - for(i=0;icoeffs[i] = a->coeffs[i] - b->coeffs[i]; -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/poly.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/poly.h deleted file mode 100644 index 9a99c7cdad..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/poly.h +++ /dev/null @@ -1,53 +0,0 @@ -#ifndef POLY_H -#define POLY_H - -#include -#include "params.h" - -/* - * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial - * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1] - */ -typedef struct{ - int16_t coeffs[KYBER_N]; -} poly; - -#define poly_compress KYBER_NAMESPACE(poly_compress) -void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a); -#define poly_decompress KYBER_NAMESPACE(poly_decompress) -void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]); - -#define poly_tobytes KYBER_NAMESPACE(poly_tobytes) -void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a); -#define poly_frombytes KYBER_NAMESPACE(poly_frombytes) -void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]); - -#define poly_frommsg KYBER_NAMESPACE(poly_frommsg) -void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]); -#define poly_tomsg KYBER_NAMESPACE(poly_tomsg) -void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); - -#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1) -void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); - -#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2) -void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); - -#define poly_ntt KYBER_NAMESPACE(poly_ntt) -void poly_ntt(poly *r); -#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont) -void poly_invntt_tomont(poly *r); -#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery) -void poly_basemul_montgomery(poly *r, const poly *a, const poly *b); -#define poly_tomont KYBER_NAMESPACE(poly_tomont) -void poly_tomont(poly *r); - -#define poly_reduce KYBER_NAMESPACE(poly_reduce) -void poly_reduce(poly *r); - -#define poly_add KYBER_NAMESPACE(poly_add) -void poly_add(poly *r, const poly *a, const poly *b); -#define poly_sub KYBER_NAMESPACE(poly_sub) -void poly_sub(poly *r, const poly *a, const poly *b); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/polyvec.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/polyvec.c deleted file mode 100644 index 669f6a5f1d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/polyvec.c +++ /dev/null @@ -1,246 +0,0 @@ -#include -#include "params.h" -#include "poly.h" -#include "polyvec.h" - -/************************************************* -* Name: polyvec_compress -* -* Description: Compress and serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYVECCOMPRESSEDBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) -{ - unsigned int i,j,k; - uint64_t d0; - -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) - uint16_t t[8]; - for(i=0;ivec[i].coeffs[8*j+k]; - t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; -/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */ - d0 = t[k]; - d0 <<= 11; - d0 += 1664; - d0 *= 645084; - d0 >>= 31; - t[k] = d0 & 0x7ff; - } - - r[ 0] = (t[0] >> 0); - r[ 1] = (t[0] >> 8) | (t[1] << 3); - r[ 2] = (t[1] >> 5) | (t[2] << 6); - r[ 3] = (t[2] >> 2); - r[ 4] = (t[2] >> 10) | (t[3] << 1); - r[ 5] = (t[3] >> 7) | (t[4] << 4); - r[ 6] = (t[4] >> 4) | (t[5] << 7); - r[ 7] = (t[5] >> 1); - r[ 8] = (t[5] >> 9) | (t[6] << 2); - r[ 9] = (t[6] >> 6) | (t[7] << 5); - r[10] = (t[7] >> 3); - r += 11; - } - } -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) - uint16_t t[4]; - for(i=0;ivec[i].coeffs[4*j+k]; - t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; -/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */ - d0 = t[k]; - d0 <<= 10; - d0 += 1665; - d0 *= 1290167; - d0 >>= 32; - t[k] = d0 & 0x3ff; - } - - r[0] = (t[0] >> 0); - r[1] = (t[0] >> 8) | (t[1] << 2); - r[2] = (t[1] >> 6) | (t[2] << 4); - r[3] = (t[2] >> 4) | (t[3] << 6); - r[4] = (t[3] >> 2); - r += 5; - } - } -#else -#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" -#endif -} - -/************************************************* -* Name: polyvec_decompress -* -* Description: De-serialize and decompress vector of polynomials; -* approximate inverse of polyvec_compress -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const uint8_t *a: pointer to input byte array -* (of length KYBER_POLYVECCOMPRESSEDBYTES) -**************************************************/ -void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]) -{ - unsigned int i,j,k; - -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) - uint16_t t[8]; - for(i=0;i> 0) | ((uint16_t)a[ 1] << 8); - t[1] = (a[1] >> 3) | ((uint16_t)a[ 2] << 5); - t[2] = (a[2] >> 6) | ((uint16_t)a[ 3] << 2) | ((uint16_t)a[4] << 10); - t[3] = (a[4] >> 1) | ((uint16_t)a[ 5] << 7); - t[4] = (a[5] >> 4) | ((uint16_t)a[ 6] << 4); - t[5] = (a[6] >> 7) | ((uint16_t)a[ 7] << 1) | ((uint16_t)a[8] << 9); - t[6] = (a[8] >> 2) | ((uint16_t)a[ 9] << 6); - t[7] = (a[9] >> 5) | ((uint16_t)a[10] << 3); - a += 11; - - for(k=0;k<8;k++) - r->vec[i].coeffs[8*j+k] = ((uint32_t)(t[k] & 0x7FF)*KYBER_Q + 1024) >> 11; - } - } -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) - uint16_t t[4]; - for(i=0;i> 0) | ((uint16_t)a[1] << 8); - t[1] = (a[1] >> 2) | ((uint16_t)a[2] << 6); - t[2] = (a[2] >> 4) | ((uint16_t)a[3] << 4); - t[3] = (a[3] >> 6) | ((uint16_t)a[4] << 2); - a += 5; - - for(k=0;k<4;k++) - r->vec[i].coeffs[4*j+k] = ((uint32_t)(t[k] & 0x3FF)*KYBER_Q + 512) >> 10; - } - } -#else -#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" -#endif -} - -/************************************************* -* Name: polyvec_tobytes -* -* Description: Serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYVECBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_frombytes -* -* Description: De-serialize vector of polynomials; -* inverse of polyvec_tobytes -* -* Arguments: - uint8_t *r: pointer to output byte array -* - const polyvec *a: pointer to input vector of polynomials -* (of length KYBER_POLYVECBYTES) -**************************************************/ -void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]) -{ - unsigned int i; - for(i=0;ivec[i], a+i*KYBER_POLYBYTES); -} - -/************************************************* -* Name: polyvec_ntt -* -* Description: Apply forward NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_ntt(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_invntt_tomont -* -* Description: Apply inverse NTT to all elements of a vector of polynomials -* and multiply by Montgomery factor 2^16 -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_invntt_tomont(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_basemul_acc_montgomery -* -* Description: Multiply elements of a and b in NTT domain, accumulate into r, -* and multiply by 2^-16. -* -* Arguments: - poly *r: pointer to output polynomial -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) -{ - unsigned int i; - poly t; - - poly_basemul_montgomery(r, &a->vec[0], &b->vec[0]); - for(i=1;ivec[i], &b->vec[i]); - poly_add(r, r, &t); - } - - poly_reduce(r); -} - -/************************************************* -* Name: polyvec_reduce -* -* Description: Applies Barrett reduction to each coefficient -* of each element of a vector of polynomials; -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - polyvec *r: pointer to input/output polynomial -**************************************************/ -void polyvec_reduce(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_add -* -* Description: Add vectors of polynomials -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) -{ - unsigned int i; - for(i=0;ivec[i], &a->vec[i], &b->vec[i]); -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/polyvec.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/polyvec.h deleted file mode 100644 index 57b605494e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/polyvec.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef POLYVEC_H -#define POLYVEC_H - -#include -#include "params.h" -#include "poly.h" - -typedef struct{ - poly vec[KYBER_K]; -} polyvec; - -#define polyvec_compress KYBER_NAMESPACE(polyvec_compress) -void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a); -#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress) -void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]); - -#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes) -void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a); -#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes) -void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]); - -#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt) -void polyvec_ntt(polyvec *r); -#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont) -void polyvec_invntt_tomont(polyvec *r); - -#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery) -void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b); - -#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce) -void polyvec_reduce(polyvec *r); - -#define polyvec_add KYBER_NAMESPACE(polyvec_add) -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/reduce.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/reduce.c deleted file mode 100644 index 9d8e7edf83..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/reduce.c +++ /dev/null @@ -1,42 +0,0 @@ -#include -#include "params.h" -#include "reduce.h" - -/************************************************* -* Name: montgomery_reduce -* -* Description: Montgomery reduction; given a 32-bit integer a, computes -* 16-bit integer congruent to a * R^-1 mod q, where R=2^16 -* -* Arguments: - int32_t a: input integer to be reduced; -* has to be in {-q2^15,...,q2^15-1} -* -* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. -**************************************************/ -int16_t montgomery_reduce(int32_t a) -{ - int16_t t; - - t = (int16_t)a*QINV; - t = (a - (int32_t)t*KYBER_Q) >> 16; - return t; -} - -/************************************************* -* Name: barrett_reduce -* -* Description: Barrett reduction; given a 16-bit integer a, computes -* centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2} -* -* Arguments: - int16_t a: input integer to be reduced -* -* Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q. -**************************************************/ -int16_t barrett_reduce(int16_t a) { - int16_t t; - const int16_t v = ((1<<26) + KYBER_Q/2)/KYBER_Q; - - t = ((int32_t)v*a + (1<<25)) >> 26; - t *= KYBER_Q; - return a - t; -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/reduce.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/reduce.h deleted file mode 100644 index c1bc1e4c7b..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/reduce.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef REDUCE_H -#define REDUCE_H - -#include -#include "params.h" - -#define MONT -1044 // 2^16 mod q -#define QINV -3327 // q^-1 mod 2^16 - -#define montgomery_reduce KYBER_NAMESPACE(montgomery_reduce) -int16_t montgomery_reduce(int32_t a); - -#define barrett_reduce KYBER_NAMESPACE(barrett_reduce) -int16_t barrett_reduce(int16_t a); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/symmetric-shake.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/symmetric-shake.c deleted file mode 100644 index 20f451882e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/symmetric-shake.c +++ /dev/null @@ -1,74 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "symmetric.h" -#include "fips202.h" - -/************************************************* -* Name: kyber_shake128_absorb -* -* Description: Absorb step of the SHAKE128 specialized for the Kyber context. -* -* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state -* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state -* - uint8_t i: additional byte of input -* - uint8_t j: additional byte of input -**************************************************/ -void kyber_shake128_absorb(shake128incctx *state, - const uint8_t seed[KYBER_SYMBYTES], - uint8_t x, - uint8_t y) -{ - uint8_t extseed[KYBER_SYMBYTES+2]; - - memcpy(extseed, seed, KYBER_SYMBYTES); - extseed[KYBER_SYMBYTES+0] = x; - extseed[KYBER_SYMBYTES+1] = y; - - shake128_absorb_once(state, extseed, sizeof(extseed)); -} - -/************************************************* -* Name: kyber_shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - uint8_t *out: pointer to output -* - size_t outlen: number of requested output bytes -* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) -* - uint8_t nonce: single-byte nonce (public PRF input) -**************************************************/ -void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce) -{ - uint8_t extkey[KYBER_SYMBYTES+1]; - - memcpy(extkey, key, KYBER_SYMBYTES); - extkey[KYBER_SYMBYTES] = nonce; - - shake256(out, outlen, extkey, sizeof(extkey)); -} - -/************************************************* -* Name: kyber_shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - uint8_t *out: pointer to output -* - size_t outlen: number of requested output bytes -* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) -* - uint8_t nonce: single-byte nonce (public PRF input) -**************************************************/ -void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]) -{ - shake256incctx s; - - shake256_inc_init(&s); - shake256_inc_absorb(&s, key, KYBER_SYMBYTES); - shake256_inc_absorb(&s, input, KYBER_CIPHERTEXTBYTES); - shake256_inc_finalize(&s); - shake256_inc_squeeze(out, KYBER_SSBYTES, &s); - shake256_inc_ctx_release(&s); -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/symmetric.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/symmetric.h deleted file mode 100644 index 2acc66f98d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/symmetric.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef SYMMETRIC_H -#define SYMMETRIC_H - -#include -#include -#include "params.h" - -#include "fips202.h" - -typedef shake128incctx xof_state; - -#define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb) -void kyber_shake128_absorb(shake128incctx *s, - const uint8_t seed[KYBER_SYMBYTES], - uint8_t x, - uint8_t y); - -#define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf) -void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce); - -#define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf) -void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]); - -#define XOF_BLOCKBYTES SHAKE128_RATE - -#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) -#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) -#define xof_init(STATE, SEED) shake128_inc_init(STATE) -#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y) -#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) -#define xof_release(STATE) shake128_inc_ctx_release(STATE) -#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE) -#define rkprf(OUT, KEY, INPUT) kyber_shake256_rkprf(OUT, KEY, INPUT) - -#endif /* SYMMETRIC_H */ diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/verify.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/verify.c deleted file mode 100644 index 914ccd448f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-512_ref/verify.c +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include -#include "verify.h" - -/************************************************* -* Name: verify -* -* Description: Compare two arrays for equality in constant time. -* -* Arguments: const uint8_t *a: pointer to first byte array -* const uint8_t *b: pointer to second byte array -* size_t len: length of the byte arrays -* -* Returns 0 if the byte arrays are equal, 1 otherwise -**************************************************/ -int verify(const uint8_t *a, const uint8_t *b, size_t len) -{ - size_t i; - uint8_t r = 0; - - for(i=0;i> 63; -} - -/************************************************* -* Name: cmov -* -* Description: Copy len bytes from x to r if b is 1; -* don't modify x if b is 0. Requires b to be in {0,1}; -* assumes two's complement representation of negative integers. -* Runs in constant time. -* -* Arguments: uint8_t *r: pointer to output byte array -* const uint8_t *x: pointer to input byte array -* size_t len: Amount of bytes to be copied -* uint8_t b: Condition bit; has to be in {0,1} -**************************************************/ -void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) -{ - size_t i; - -#if defined(__GNUC__) || defined(__clang__) - // Prevent the compiler from - // 1) inferring that b is 0/1-valued, and - // 2) handling the two cases with a branch. - // This is not necessary when verify.c and kem.c are separate translation - // units, but we expect that downstream consumers will copy this code and/or - // change how it is built. - __asm__("" : "+r"(b) : /* no inputs */); -#endif - - b = -b; - for(i=0;i -#include -#include "params.h" - -#define verify KYBER_NAMESPACE(verify) -int verify(const uint8_t *a, const uint8_t *b, size_t len); - -#define cmov KYBER_NAMESPACE(cmov) -void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); - -#define cmov_int16 KYBER_NAMESPACE(cmov_int16) -void cmov_int16(int16_t *r, int16_t v, uint16_t b); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/LICENSE b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/LICENSE deleted file mode 100644 index 7922ab8007..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/LICENSE +++ /dev/null @@ -1,6 +0,0 @@ -Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/); -or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html). - -For Keccak and AES we are using public-domain -code from sources and by authors listed in -comments on top of the respective files. diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/align.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/align.h deleted file mode 100644 index 3463866f37..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/align.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef ALIGN_H -#define ALIGN_H - -#include -#include - -#define ALIGNED_UINT8(N) \ - union { \ - uint8_t coeffs[N]; \ - __m256i vec[(N+31)/32]; \ - } - -#define ALIGNED_INT16(N) \ - union { \ - int16_t coeffs[N]; \ - __m256i vec[(N+15)/16]; \ - } - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/api.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/api.h deleted file mode 100644 index a154e80f1d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/api.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef API_H -#define API_H - -#include - -#define pqcrystals_kyber512_SECRETKEYBYTES 1632 -#define pqcrystals_kyber512_PUBLICKEYBYTES 800 -#define pqcrystals_kyber512_CIPHERTEXTBYTES 768 -#define pqcrystals_kyber512_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber512_ENCCOINBYTES 32 -#define pqcrystals_kyber512_BYTES 32 - -#define pqcrystals_kyber512_avx2_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES -#define pqcrystals_kyber512_avx2_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES -#define pqcrystals_kyber512_avx2_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES -#define pqcrystals_kyber512_avx2_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES -#define pqcrystals_kyber512_avx2_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES -#define pqcrystals_kyber512_avx2_BYTES pqcrystals_kyber512_BYTES - -int pqcrystals_kyber512_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber512_avx2_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber512_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber512_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber512_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#define pqcrystals_kyber768_SECRETKEYBYTES 2400 -#define pqcrystals_kyber768_PUBLICKEYBYTES 1184 -#define pqcrystals_kyber768_CIPHERTEXTBYTES 1088 -#define pqcrystals_kyber768_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber768_ENCCOINBYTES 32 -#define pqcrystals_kyber768_BYTES 32 - -#define pqcrystals_kyber768_avx2_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES -#define pqcrystals_kyber768_avx2_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES -#define pqcrystals_kyber768_avx2_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES -#define pqcrystals_kyber768_avx2_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES -#define pqcrystals_kyber768_avx2_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES -#define pqcrystals_kyber768_avx2_BYTES pqcrystals_kyber768_BYTES - -int pqcrystals_kyber768_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber768_avx2_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber768_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber768_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber768_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#define pqcrystals_kyber1024_SECRETKEYBYTES 3168 -#define pqcrystals_kyber1024_PUBLICKEYBYTES 1568 -#define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568 -#define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber1024_ENCCOINBYTES 32 -#define pqcrystals_kyber1024_BYTES 32 - -#define pqcrystals_kyber1024_avx2_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES -#define pqcrystals_kyber1024_avx2_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES -#define pqcrystals_kyber1024_avx2_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES -#define pqcrystals_kyber1024_avx2_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES -#define pqcrystals_kyber1024_avx2_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES -#define pqcrystals_kyber1024_avx2_BYTES pqcrystals_kyber1024_BYTES - -int pqcrystals_kyber1024_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber1024_avx2_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber1024_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber1024_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber1024_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/basemul.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/basemul.S deleted file mode 100644 index 36990639b2..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/basemul.S +++ /dev/null @@ -1,105 +0,0 @@ -#include "consts.h" - -.macro schoolbook off -vmovdqa _16XQINV*2(%rcx),%ymm0 -vmovdqa (64*\off+ 0)*2(%rsi),%ymm1 # a0 -vmovdqa (64*\off+16)*2(%rsi),%ymm2 # b0 -vmovdqa (64*\off+32)*2(%rsi),%ymm3 # a1 -vmovdqa (64*\off+48)*2(%rsi),%ymm4 # b1 - -vpmullw %ymm0,%ymm1,%ymm9 # a0.lo -vpmullw %ymm0,%ymm2,%ymm10 # b0.lo -vpmullw %ymm0,%ymm3,%ymm11 # a1.lo -vpmullw %ymm0,%ymm4,%ymm12 # b1.lo - -vmovdqa (64*\off+ 0)*2(%rdx),%ymm5 # c0 -vmovdqa (64*\off+16)*2(%rdx),%ymm6 # d0 - -vpmulhw %ymm5,%ymm1,%ymm13 # a0c0.hi -vpmulhw %ymm6,%ymm1,%ymm1 # a0d0.hi -vpmulhw %ymm5,%ymm2,%ymm14 # b0c0.hi -vpmulhw %ymm6,%ymm2,%ymm2 # b0d0.hi - -vmovdqa (64*\off+32)*2(%rdx),%ymm7 # c1 -vmovdqa (64*\off+48)*2(%rdx),%ymm8 # d1 - -vpmulhw %ymm7,%ymm3,%ymm15 # a1c1.hi -vpmulhw %ymm8,%ymm3,%ymm3 # a1d1.hi -vpmulhw %ymm7,%ymm4,%ymm0 # b1c1.hi -vpmulhw %ymm8,%ymm4,%ymm4 # b1d1.hi - -vmovdqa %ymm13,(%rsp) - -vpmullw %ymm5,%ymm9,%ymm13 # a0c0.lo -vpmullw %ymm6,%ymm9,%ymm9 # a0d0.lo -vpmullw %ymm5,%ymm10,%ymm5 # b0c0.lo -vpmullw %ymm6,%ymm10,%ymm10 # b0d0.lo - -vpmullw %ymm7,%ymm11,%ymm6 # a1c1.lo -vpmullw %ymm8,%ymm11,%ymm11 # a1d1.lo -vpmullw %ymm7,%ymm12,%ymm7 # b1c1.lo -vpmullw %ymm8,%ymm12,%ymm12 # b1d1.lo - -vmovdqa _16XQ*2(%rcx),%ymm8 -vpmulhw %ymm8,%ymm13,%ymm13 -vpmulhw %ymm8,%ymm9,%ymm9 -vpmulhw %ymm8,%ymm5,%ymm5 -vpmulhw %ymm8,%ymm10,%ymm10 -vpmulhw %ymm8,%ymm6,%ymm6 -vpmulhw %ymm8,%ymm11,%ymm11 -vpmulhw %ymm8,%ymm7,%ymm7 -vpmulhw %ymm8,%ymm12,%ymm12 - -vpsubw (%rsp),%ymm13,%ymm13 # -a0c0 -vpsubw %ymm9,%ymm1,%ymm9 # a0d0 -vpsubw %ymm5,%ymm14,%ymm5 # b0c0 -vpsubw %ymm10,%ymm2,%ymm10 # b0d0 - -vpsubw %ymm6,%ymm15,%ymm6 # a1c1 -vpsubw %ymm11,%ymm3,%ymm11 # a1d1 -vpsubw %ymm7,%ymm0,%ymm7 # b1c1 -vpsubw %ymm12,%ymm4,%ymm12 # b1d1 - -vmovdqa (%r9),%ymm0 -vmovdqa 32(%r9),%ymm1 -vpmullw %ymm0,%ymm10,%ymm2 -vpmullw %ymm0,%ymm12,%ymm3 -vpmulhw %ymm1,%ymm10,%ymm10 -vpmulhw %ymm1,%ymm12,%ymm12 -vpmulhw %ymm8,%ymm2,%ymm2 -vpmulhw %ymm8,%ymm3,%ymm3 -vpsubw %ymm2,%ymm10,%ymm10 # rb0d0 -vpsubw %ymm3,%ymm12,%ymm12 # rb1d1 - -vpaddw %ymm5,%ymm9,%ymm9 -vpaddw %ymm7,%ymm11,%ymm11 -vpsubw %ymm13,%ymm10,%ymm13 -vpsubw %ymm12,%ymm6,%ymm6 - -vmovdqa %ymm13,(64*\off+ 0)*2(%rdi) -vmovdqa %ymm9,(64*\off+16)*2(%rdi) -vmovdqa %ymm6,(64*\off+32)*2(%rdi) -vmovdqa %ymm11,(64*\off+48)*2(%rdi) -.endm - -.text -.global cdecl(basemul_avx) -cdecl(basemul_avx): -mov %rsp,%r8 -and $-32,%rsp -sub $32,%rsp - -lea (_ZETAS_EXP+176)*2(%rcx),%r9 -schoolbook 0 - -add $32*2,%r9 -schoolbook 1 - -add $192*2,%r9 -schoolbook 2 - -add $32*2,%r9 -schoolbook 3 - -mov %r8,%rsp -ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/cbd.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/cbd.c deleted file mode 100644 index dad473c79e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/cbd.c +++ /dev/null @@ -1,144 +0,0 @@ -#include -#include -#include "params.h" -#include "cbd.h" - -/************************************************* -* Name: cbd2 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const __m256i *buf: pointer to aligned input byte array -**************************************************/ -static void cbd2(poly * restrict r, const __m256i buf[2*KYBER_N/128]) -{ - unsigned int i; - __m256i f0, f1, f2, f3; - const __m256i mask55 = _mm256_set1_epi32(0x55555555); - const __m256i mask33 = _mm256_set1_epi32(0x33333333); - const __m256i mask03 = _mm256_set1_epi32(0x03030303); - const __m256i mask0F = _mm256_set1_epi32(0x0F0F0F0F); - - for(i = 0; i < KYBER_N/64; i++) { - f0 = _mm256_load_si256(&buf[i]); - - f1 = _mm256_srli_epi16(f0, 1); - f0 = _mm256_and_si256(mask55, f0); - f1 = _mm256_and_si256(mask55, f1); - f0 = _mm256_add_epi8(f0, f1); - - f1 = _mm256_srli_epi16(f0, 2); - f0 = _mm256_and_si256(mask33, f0); - f1 = _mm256_and_si256(mask33, f1); - f0 = _mm256_add_epi8(f0, mask33); - f0 = _mm256_sub_epi8(f0, f1); - - f1 = _mm256_srli_epi16(f0, 4); - f0 = _mm256_and_si256(mask0F, f0); - f1 = _mm256_and_si256(mask0F, f1); - f0 = _mm256_sub_epi8(f0, mask03); - f1 = _mm256_sub_epi8(f1, mask03); - - f2 = _mm256_unpacklo_epi8(f0, f1); - f3 = _mm256_unpackhi_epi8(f0, f1); - - f0 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f2)); - f1 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f2,1)); - f2 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f3)); - f3 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f3,1)); - - _mm256_store_si256(&r->vec[4*i+0], f0); - _mm256_store_si256(&r->vec[4*i+1], f2); - _mm256_store_si256(&r->vec[4*i+2], f1); - _mm256_store_si256(&r->vec[4*i+3], f3); - } -} - -#if KYBER_ETA1 == 3 -/************************************************* -* Name: cbd3 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=3 -* This function is only needed for Kyber-512 -* -* Arguments: - poly *r: pointer to output polynomial -* - const __m256i *buf: pointer to aligned input byte array -**************************************************/ -static void cbd3(poly * restrict r, const uint8_t buf[3*KYBER_N/4+8]) -{ - unsigned int i; - __m256i f0, f1, f2, f3; - const __m256i mask249 = _mm256_set1_epi32(0x249249); - const __m256i mask6DB = _mm256_set1_epi32(0x6DB6DB); - const __m256i mask07 = _mm256_set1_epi32(7); - const __m256i mask70 = _mm256_set1_epi32(7 << 16); - const __m256i mask3 = _mm256_set1_epi16(3); - const __m256i shufbidx = _mm256_set_epi8(-1,15,14,13,-1,12,11,10,-1, 9, 8, 7,-1, 6, 5, 4, - -1,11,10, 9,-1, 8, 7, 6,-1, 5, 4, 3,-1, 2, 1, 0); - - for(i = 0; i < KYBER_N/32; i++) { - f0 = _mm256_loadu_si256((__m256i *)&buf[24*i]); - f0 = _mm256_permute4x64_epi64(f0,0x94); - f0 = _mm256_shuffle_epi8(f0,shufbidx); - - f1 = _mm256_srli_epi32(f0,1); - f2 = _mm256_srli_epi32(f0,2); - f0 = _mm256_and_si256(mask249,f0); - f1 = _mm256_and_si256(mask249,f1); - f2 = _mm256_and_si256(mask249,f2); - f0 = _mm256_add_epi32(f0,f1); - f0 = _mm256_add_epi32(f0,f2); - - f1 = _mm256_srli_epi32(f0,3); - f0 = _mm256_add_epi32(f0,mask6DB); - f0 = _mm256_sub_epi32(f0,f1); - - f1 = _mm256_slli_epi32(f0,10); - f2 = _mm256_srli_epi32(f0,12); - f3 = _mm256_srli_epi32(f0, 2); - f0 = _mm256_and_si256(f0,mask07); - f1 = _mm256_and_si256(f1,mask70); - f2 = _mm256_and_si256(f2,mask07); - f3 = _mm256_and_si256(f3,mask70); - f0 = _mm256_add_epi16(f0,f1); - f1 = _mm256_add_epi16(f2,f3); - f0 = _mm256_sub_epi16(f0,mask3); - f1 = _mm256_sub_epi16(f1,mask3); - - f2 = _mm256_unpacklo_epi32(f0,f1); - f3 = _mm256_unpackhi_epi32(f0,f1); - - f0 = _mm256_permute2x128_si256(f2,f3,0x20); - f1 = _mm256_permute2x128_si256(f2,f3,0x31); - - _mm256_store_si256(&r->vec[2*i+0], f0); - _mm256_store_si256(&r->vec[2*i+1], f1); - } -} -#endif - -/* buf 32 bytes longer for cbd3 */ -void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1]) -{ -#if KYBER_ETA1 == 2 - cbd2(r, buf); -#elif KYBER_ETA1 == 3 - cbd3(r, (uint8_t *)buf); -#else -#error "This implementation requires eta1 in {2,3}" -#endif -} - -void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128]) -{ -#if KYBER_ETA2 == 2 - cbd2(r, buf); -#else -#error "This implementation requires eta2 = 2" -#endif -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/cbd.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/cbd.h deleted file mode 100644 index 05788e06b4..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/cbd.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef CBD_H -#define CBD_H - -#include -#include -#include "params.h" -#include "poly.h" - -#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1) -void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1]); - -#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2) -void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128]); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/consts.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/consts.c deleted file mode 100644 index 84e596893d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/consts.c +++ /dev/null @@ -1,121 +0,0 @@ -#include "align.h" -#include "params.h" -#include "consts.h" - -#define Q KYBER_Q -#define MONT -1044 // 2^16 mod q -#define QINV -3327 // q^-1 mod 2^16 -#define V 20159 // floor(2^26/q + 0.5) -#define FHI 1441 // mont^2/128 -#define FLO -10079 // qinv*FHI -#define MONTSQHI 1353 // mont^2 -#define MONTSQLO 20553 // qinv*MONTSQHI -#define MASK 4095 -#define SHIFT 32 - -const qdata_t qdata = {{ -#define _16XQ 0 - Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, - -#define _16XQINV 16 - QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, - QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, - -#define _16XV 32 - V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, - -#define _16XFLO 48 - FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, - FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, - -#define _16XFHI 64 - FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, - FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, - -#define _16XMONTSQLO 80 - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - -#define _16XMONTSQHI 96 - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - -#define _16XMASK 112 - MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, - MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, - -#define _REVIDXB 128 - 3854, 3340, 2826, 2312, 1798, 1284, 770, 256, - 3854, 3340, 2826, 2312, 1798, 1284, 770, 256, - -#define _REVIDXD 144 - 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0, 0, - -#define _ZETAS_EXP 160 - 31498, 31498, 31498, 31498, -758, -758, -758, -758, - 5237, 5237, 5237, 5237, 1397, 1397, 1397, 1397, - 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745, - 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745, - -359, -359, -359, -359, -359, -359, -359, -359, - -359, -359, -359, -359, -359, -359, -359, -359, - 13525, 13525, 13525, 13525, 13525, 13525, 13525, 13525, - -12402, -12402, -12402, -12402, -12402, -12402, -12402, -12402, - 1493, 1493, 1493, 1493, 1493, 1493, 1493, 1493, - 1422, 1422, 1422, 1422, 1422, 1422, 1422, 1422, - -20907, -20907, -20907, -20907, 27758, 27758, 27758, 27758, - -3799, -3799, -3799, -3799, -15690, -15690, -15690, -15690, - -171, -171, -171, -171, 622, 622, 622, 622, - 1577, 1577, 1577, 1577, 182, 182, 182, 182, - -5827, -5827, 17363, 17363, -26360, -26360, -29057, -29057, - 5571, 5571, -1102, -1102, 21438, 21438, -26242, -26242, - 573, 573, -1325, -1325, 264, 264, 383, 383, - -829, -829, 1458, 1458, -1602, -1602, -130, -130, - -5689, -6516, 1496, 30967, -23565, 20179, 20710, 25080, - -12796, 26616, 16064, -12442, 9134, -650, -25986, 27837, - 1223, 652, -552, 1015, -1293, 1491, -282, -1544, - 516, -8, -320, -666, -1618, -1162, 126, 1469, - -335, -11477, -32227, 20494, -27738, 945, -14883, 6182, - 32010, 10631, 29175, -28762, -18486, 17560, -14430, -5276, - -1103, 555, -1251, 1550, 422, 177, -291, 1574, - -246, 1159, -777, -602, -1590, -872, 418, -156, - 11182, 13387, -14233, -21655, 13131, -4587, 23092, 5493, - -32502, 30317, -18741, 12639, 20100, 18525, 19529, -12619, - 430, 843, 871, 105, 587, -235, -460, 1653, - 778, -147, 1483, 1119, 644, 349, 329, -75, - 787, 787, 787, 787, 787, 787, 787, 787, - 787, 787, 787, 787, 787, 787, 787, 787, - -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517, - -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517, - 28191, 28191, 28191, 28191, 28191, 28191, 28191, 28191, - -16694, -16694, -16694, -16694, -16694, -16694, -16694, -16694, - 287, 287, 287, 287, 287, 287, 287, 287, - 202, 202, 202, 202, 202, 202, 202, 202, - 10690, 10690, 10690, 10690, 1358, 1358, 1358, 1358, - -11202, -11202, -11202, -11202, 31164, 31164, 31164, 31164, - 962, 962, 962, 962, -1202, -1202, -1202, -1202, - -1474, -1474, -1474, -1474, 1468, 1468, 1468, 1468, - -28073, -28073, 24313, 24313, -10532, -10532, 8800, 8800, - 18426, 18426, 8859, 8859, 26675, 26675, -16163, -16163, - -681, -681, 1017, 1017, 732, 732, 608, 608, - -1542, -1542, 411, 411, -205, -205, -1571, -1571, - 19883, -28250, -15887, -8898, -28309, 9075, -30199, 18249, - 13426, 14017, -29156, -12757, 16832, 4311, -24155, -17915, - -853, -90, -271, 830, 107, -1421, -247, -951, - -398, 961, -1508, -725, 448, -1065, 677, -1275, - -31183, 25435, -7382, 24391, -20927, 10946, 24214, 16989, - 10335, -7934, -22502, 10906, 31636, 28644, 23998, -17422, - 817, 603, 1322, -1465, -1215, 1218, -874, -1187, - -1185, -1278, -1510, -870, -108, 996, 958, 1522, - 20297, 2146, 15355, -32384, -6280, -14903, -11044, 14469, - -21498, -20198, 23210, -17442, -23860, -20257, 7756, 23132, - 1097, 610, -1285, 384, -136, -1335, 220, -1659, - -1530, 794, -854, 478, -308, 991, -1460, 1628, - -#define _16XSHIFT 624 - SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, - SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT -}}; diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/consts.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/consts.h deleted file mode 100644 index f95899cd8e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/consts.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef CONSTS_H -#define CONSTS_H - -#include "params.h" - -#define _16XQ 0 -#define _16XQINV 16 -#define _16XV 32 -#define _16XFLO 48 -#define _16XFHI 64 -#define _16XMONTSQLO 80 -#define _16XMONTSQHI 96 -#define _16XMASK 112 -#define _REVIDXB 128 -#define _REVIDXD 144 -#define _ZETAS_EXP 160 -#define _16XSHIFT 624 - -/* The C ABI on MacOS exports all symbols with a leading - * underscore. This means that any symbols we refer to from - * C files (functions) can't be found, and all symbols we - * refer to from ASM also can't be found. - * - * This define helps us get around this - */ -#ifdef __ASSEMBLER__ -#if defined(__WIN32__) || defined(__APPLE__) -#define decorate(s) _##s -#define cdecl2(s) decorate(s) -#define cdecl(s) cdecl2(KYBER_NAMESPACE(##s)) -#else -#define cdecl(s) KYBER_NAMESPACE(##s) -#endif -#endif - -#ifndef __ASSEMBLER__ -#include "align.h" -typedef ALIGNED_INT16(640) qdata_t; -#define qdata KYBER_NAMESPACE(qdata) -extern const qdata_t qdata; -#endif - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/fq.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/fq.S deleted file mode 100644 index 3bb1ebd3d8..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/fq.S +++ /dev/null @@ -1,88 +0,0 @@ -#include "consts.h" -.include "fq.inc" - -.text -reduce128_avx: -#load -vmovdqa (%rdi),%ymm2 -vmovdqa 32(%rdi),%ymm3 -vmovdqa 64(%rdi),%ymm4 -vmovdqa 96(%rdi),%ymm5 -vmovdqa 128(%rdi),%ymm6 -vmovdqa 160(%rdi),%ymm7 -vmovdqa 192(%rdi),%ymm8 -vmovdqa 224(%rdi),%ymm9 - -red16 2 -red16 3 -red16 4 -red16 5 -red16 6 -red16 7 -red16 8 -red16 9 - -#store -vmovdqa %ymm2,(%rdi) -vmovdqa %ymm3,32(%rdi) -vmovdqa %ymm4,64(%rdi) -vmovdqa %ymm5,96(%rdi) -vmovdqa %ymm6,128(%rdi) -vmovdqa %ymm7,160(%rdi) -vmovdqa %ymm8,192(%rdi) -vmovdqa %ymm9,224(%rdi) - -ret - -.global cdecl(reduce_avx) -cdecl(reduce_avx): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -vmovdqa _16XV*2(%rsi),%ymm1 -call reduce128_avx -add $256,%rdi -call reduce128_avx -ret - -tomont128_avx: -#load -vmovdqa (%rdi),%ymm3 -vmovdqa 32(%rdi),%ymm4 -vmovdqa 64(%rdi),%ymm5 -vmovdqa 96(%rdi),%ymm6 -vmovdqa 128(%rdi),%ymm7 -vmovdqa 160(%rdi),%ymm8 -vmovdqa 192(%rdi),%ymm9 -vmovdqa 224(%rdi),%ymm10 - -fqmulprecomp 1,2,3,11 -fqmulprecomp 1,2,4,12 -fqmulprecomp 1,2,5,13 -fqmulprecomp 1,2,6,14 -fqmulprecomp 1,2,7,15 -fqmulprecomp 1,2,8,11 -fqmulprecomp 1,2,9,12 -fqmulprecomp 1,2,10,13 - -#store -vmovdqa %ymm3,(%rdi) -vmovdqa %ymm4,32(%rdi) -vmovdqa %ymm5,64(%rdi) -vmovdqa %ymm6,96(%rdi) -vmovdqa %ymm7,128(%rdi) -vmovdqa %ymm8,160(%rdi) -vmovdqa %ymm9,192(%rdi) -vmovdqa %ymm10,224(%rdi) - -ret - -.global cdecl(tomont_avx) -cdecl(tomont_avx): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -vmovdqa _16XMONTSQLO*2(%rsi),%ymm1 -vmovdqa _16XMONTSQHI*2(%rsi),%ymm2 -call tomont128_avx -add $256,%rdi -call tomont128_avx -ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/fq.inc b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/fq.inc deleted file mode 100644 index 4b7afc3118..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/fq.inc +++ /dev/null @@ -1,30 +0,0 @@ -.macro red16 r,rs=0,x=12 -vpmulhw %ymm1,%ymm\r,%ymm\x -.if \rs -vpmulhrsw %ymm\rs,%ymm\x,%ymm\x -.else -vpsraw $10,%ymm\x,%ymm\x -.endif -vpmullw %ymm0,%ymm\x,%ymm\x -vpsubw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro csubq r,x=12 -vpsubw %ymm0,%ymm\r,%ymm\r -vpsraw $15,%ymm\r,%ymm\x -vpand %ymm0,%ymm\x,%ymm\x -vpaddw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro caddq r,x=12 -vpsraw $15,%ymm\r,%ymm\x -vpand %ymm0,%ymm\x,%ymm\x -vpaddw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro fqmulprecomp al,ah,b,x=12 -vpmullw %ymm\al,%ymm\b,%ymm\x -vpmulhw %ymm\ah,%ymm\b,%ymm\b -vpmulhw %ymm0,%ymm\x,%ymm\x -vpsubw %ymm\x,%ymm\b,%ymm\b -.endm diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/indcpa.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/indcpa.c deleted file mode 100644 index c4b2b3a89f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/indcpa.c +++ /dev/null @@ -1,568 +0,0 @@ -#include -#include -#include -#include -#include "align.h" -#include "params.h" -#include "indcpa.h" -#include "polyvec.h" -#include "poly.h" -#include "ntt.h" -#include "cbd.h" -#include "rejsample.h" -#include "symmetric.h" -#include "randombytes.h" - -/************************************************* -* Name: pack_pk -* -* Description: Serialize the public key as concatenation of the -* serialized vector of polynomials pk and the -* public seed used to generate the matrix A. -* The polynomial coefficients in pk are assumed to -* lie in the invertal [0,q], i.e. pk must be reduced -* by polyvec_reduce(). -* -* Arguments: uint8_t *r: pointer to the output serialized public key -* polyvec *pk: pointer to the input public-key polyvec -* const uint8_t *seed: pointer to the input public seed -**************************************************/ -static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES], - polyvec *pk, - const uint8_t seed[KYBER_SYMBYTES]) -{ - polyvec_tobytes(r, pk); - memcpy(r+KYBER_POLYVECBYTES, seed, KYBER_SYMBYTES); -} - -/************************************************* -* Name: unpack_pk -* -* Description: De-serialize public key from a byte array; -* approximate inverse of pack_pk -* -* Arguments: - polyvec *pk: pointer to output public-key polynomial vector -* - uint8_t *seed: pointer to output seed to generate matrix A -* - const uint8_t *packedpk: pointer to input serialized public key -**************************************************/ -static void unpack_pk(polyvec *pk, - uint8_t seed[KYBER_SYMBYTES], - const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES]) -{ - polyvec_frombytes(pk, packedpk); - memcpy(seed, packedpk+KYBER_POLYVECBYTES, KYBER_SYMBYTES); -} - -/************************************************* -* Name: pack_sk -* -* Description: Serialize the secret key. -* The polynomial coefficients in sk are assumed to -* lie in the invertal [0,q], i.e. sk must be reduced -* by polyvec_reduce(). -* -* Arguments: - uint8_t *r: pointer to output serialized secret key -* - polyvec *sk: pointer to input vector of polynomials (secret key) -**************************************************/ -static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk) -{ - polyvec_tobytes(r, sk); -} - -/************************************************* -* Name: unpack_sk -* -* Description: De-serialize the secret key; inverse of pack_sk -* -* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) -* - const uint8_t *packedsk: pointer to input serialized secret key -**************************************************/ -static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES]) -{ - polyvec_frombytes(sk, packedsk); -} - -/************************************************* -* Name: pack_ciphertext -* -* Description: Serialize the ciphertext as concatenation of the -* compressed and serialized vector of polynomials b -* and the compressed and serialized polynomial v. -* The polynomial coefficients in b and v are assumed to -* lie in the invertal [0,q], i.e. b and v must be reduced -* by polyvec_reduce() and poly_reduce(), respectively. -* -* Arguments: uint8_t *r: pointer to the output serialized ciphertext -* poly *pk: pointer to the input vector of polynomials b -* poly *v: pointer to the input polynomial v -**************************************************/ -static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v) -{ - polyvec_compress(r, b); - poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v); -} - -/************************************************* -* Name: unpack_ciphertext -* -* Description: De-serialize and decompress ciphertext from a byte array; -* approximate inverse of pack_ciphertext -* -* Arguments: - polyvec *b: pointer to the output vector of polynomials b -* - poly *v: pointer to the output polynomial v -* - const uint8_t *c: pointer to the input serialized ciphertext -**************************************************/ -static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES]) -{ - polyvec_decompress(b, c); - poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES); -} - -/************************************************* -* Name: rej_uniform -* -* Description: Run rejection sampling on uniform random bytes to generate -* uniform random integers mod q -* -* Arguments: - int16_t *r: pointer to output array -* - unsigned int len: requested number of 16-bit integers (uniform mod q) -* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes) -* - unsigned int buflen: length of input buffer in bytes -* -* Returns number of sampled 16-bit integers (at most len) -**************************************************/ -static unsigned int rej_uniform(int16_t *r, - unsigned int len, - const uint8_t *buf, - unsigned int buflen) -{ - unsigned int ctr, pos; - uint16_t val0, val1; - - ctr = pos = 0; - while(ctr < len && pos <= buflen - 3) { // buflen is always at least 3 - val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; - val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF; - pos += 3; - - if(val0 < KYBER_Q) - r[ctr++] = val0; - if(ctr < len && val1 < KYBER_Q) - r[ctr++] = val1; - } - - return ctr; -} - -#define gen_a(A,B) gen_matrix(A,B,0) -#define gen_at(A,B) gen_matrix(A,B,1) - -/************************************************* -* Name: gen_matrix -* -* Description: Deterministically generate matrix A (or the transpose of A) -* from a seed. Entries of the matrix are polynomials that look -* uniformly random. Performs rejection sampling on output of -* a XOF -* -* Arguments: - polyvec *a: pointer to ouptput matrix A -* - const uint8_t *seed: pointer to input seed -* - int transposed: boolean deciding whether A or A^T is generated -**************************************************/ -#if KYBER_K == 2 -void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) -{ - unsigned int ctr0, ctr1, ctr2, ctr3; - ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; - __m256i f; - shake128x4incctx state; - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - if(transposed) { - buf[0].coeffs[32] = 0; - buf[0].coeffs[33] = 0; - buf[1].coeffs[32] = 0; - buf[1].coeffs[33] = 1; - buf[2].coeffs[32] = 1; - buf[2].coeffs[33] = 0; - buf[3].coeffs[32] = 1; - buf[3].coeffs[33] = 1; - } - else { - buf[0].coeffs[32] = 0; - buf[0].coeffs[33] = 0; - buf[1].coeffs[32] = 1; - buf[1].coeffs[33] = 0; - buf[2].coeffs[32] = 0; - buf[2].coeffs[33] = 1; - buf[3].coeffs[32] = 1; - buf[3].coeffs[33] = 1; - } - - shake128x4_inc_init(&state); - shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); - - ctr0 = rej_uniform_avx(a[0].vec[0].coeffs, buf[0].coeffs); - ctr1 = rej_uniform_avx(a[0].vec[1].coeffs, buf[1].coeffs); - ctr2 = rej_uniform_avx(a[1].vec[0].coeffs, buf[2].coeffs); - ctr3 = rej_uniform_avx(a[1].vec[1].coeffs, buf[3].coeffs); - - while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); - - ctr0 += rej_uniform(a[0].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); - ctr1 += rej_uniform(a[0].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); - ctr2 += rej_uniform(a[1].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); - ctr3 += rej_uniform(a[1].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); - } - - poly_nttunpack(&a[0].vec[0]); - poly_nttunpack(&a[0].vec[1]); - poly_nttunpack(&a[1].vec[0]); - poly_nttunpack(&a[1].vec[1]); - shake128x4_inc_ctx_release(&state); -} -#elif KYBER_K == 3 -void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) -{ - unsigned int ctr0, ctr1, ctr2, ctr3; - ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; - __m256i f; - shake128x4incctx state; - shake128incctx state1x; - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - if(transposed) { - buf[0].coeffs[32] = 0; - buf[0].coeffs[33] = 0; - buf[1].coeffs[32] = 0; - buf[1].coeffs[33] = 1; - buf[2].coeffs[32] = 0; - buf[2].coeffs[33] = 2; - buf[3].coeffs[32] = 1; - buf[3].coeffs[33] = 0; - } - else { - buf[0].coeffs[32] = 0; - buf[0].coeffs[33] = 0; - buf[1].coeffs[32] = 1; - buf[1].coeffs[33] = 0; - buf[2].coeffs[32] = 2; - buf[2].coeffs[33] = 0; - buf[3].coeffs[32] = 0; - buf[3].coeffs[33] = 1; - } - - shake128x4_inc_init(&state); - shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); - - ctr0 = rej_uniform_avx(a[0].vec[0].coeffs, buf[0].coeffs); - ctr1 = rej_uniform_avx(a[0].vec[1].coeffs, buf[1].coeffs); - ctr2 = rej_uniform_avx(a[0].vec[2].coeffs, buf[2].coeffs); - ctr3 = rej_uniform_avx(a[1].vec[0].coeffs, buf[3].coeffs); - - while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); - - ctr0 += rej_uniform(a[0].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); - ctr1 += rej_uniform(a[0].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); - ctr2 += rej_uniform(a[0].vec[2].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); - ctr3 += rej_uniform(a[1].vec[0].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); - } - - poly_nttunpack(&a[0].vec[0]); - poly_nttunpack(&a[0].vec[1]); - poly_nttunpack(&a[0].vec[2]); - poly_nttunpack(&a[1].vec[0]); - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - if(transposed) { - buf[0].coeffs[32] = 1; - buf[0].coeffs[33] = 1; - buf[1].coeffs[32] = 1; - buf[1].coeffs[33] = 2; - buf[2].coeffs[32] = 2; - buf[2].coeffs[33] = 0; - buf[3].coeffs[32] = 2; - buf[3].coeffs[33] = 1; - } - else { - buf[0].coeffs[32] = 1; - buf[0].coeffs[33] = 1; - buf[1].coeffs[32] = 2; - buf[1].coeffs[33] = 1; - buf[2].coeffs[32] = 0; - buf[2].coeffs[33] = 2; - buf[3].coeffs[32] = 1; - buf[3].coeffs[33] = 2; - } - - shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); - - ctr0 = rej_uniform_avx(a[1].vec[1].coeffs, buf[0].coeffs); - ctr1 = rej_uniform_avx(a[1].vec[2].coeffs, buf[1].coeffs); - ctr2 = rej_uniform_avx(a[2].vec[0].coeffs, buf[2].coeffs); - ctr3 = rej_uniform_avx(a[2].vec[1].coeffs, buf[3].coeffs); - - while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); - - ctr0 += rej_uniform(a[1].vec[1].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); - ctr1 += rej_uniform(a[1].vec[2].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); - ctr2 += rej_uniform(a[2].vec[0].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); - ctr3 += rej_uniform(a[2].vec[1].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); - } - shake128x4_inc_ctx_release(&state); - - poly_nttunpack(&a[1].vec[1]); - poly_nttunpack(&a[1].vec[2]); - poly_nttunpack(&a[2].vec[0]); - poly_nttunpack(&a[2].vec[1]); - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - buf[0].coeffs[32] = 2; - buf[0].coeffs[33] = 2; - - shake128_inc_init(&state1x); - shake128_absorb_once(&state1x, buf[0].coeffs, 34); - shake128_squeezeblocks(buf[0].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state1x); - ctr0 = rej_uniform_avx(a[2].vec[2].coeffs, buf[0].coeffs); - while(ctr0 < KYBER_N) { - shake128_squeezeblocks(buf[0].coeffs, 1, &state1x); - ctr0 += rej_uniform(a[2].vec[2].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); - } - shake128_inc_ctx_release(&state1x); - - poly_nttunpack(&a[2].vec[2]); -} -#elif KYBER_K == 4 -void gen_matrix(polyvec *a, const uint8_t seed[32], int transposed) -{ - unsigned int i, ctr0, ctr1, ctr2, ctr3; - ALIGNED_UINT8(REJ_UNIFORM_AVX_NBLOCKS*SHAKE128_RATE) buf[4]; - __m256i f; - shake128x4incctx state; - shake128x4_inc_init(&state); - - for(i=0;i<4;i++) { - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - if(transposed) { - buf[0].coeffs[32] = i; - buf[0].coeffs[33] = 0; - buf[1].coeffs[32] = i; - buf[1].coeffs[33] = 1; - buf[2].coeffs[32] = i; - buf[2].coeffs[33] = 2; - buf[3].coeffs[32] = i; - buf[3].coeffs[33] = 3; - } - else { - buf[0].coeffs[32] = 0; - buf[0].coeffs[33] = i; - buf[1].coeffs[32] = 1; - buf[1].coeffs[33] = i; - buf[2].coeffs[32] = 2; - buf[2].coeffs[33] = i; - buf[3].coeffs[32] = 3; - buf[3].coeffs[33] = i; - } - - shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_AVX_NBLOCKS, &state); - - ctr0 = rej_uniform_avx(a[i].vec[0].coeffs, buf[0].coeffs); - ctr1 = rej_uniform_avx(a[i].vec[1].coeffs, buf[1].coeffs); - ctr2 = rej_uniform_avx(a[i].vec[2].coeffs, buf[2].coeffs); - ctr3 = rej_uniform_avx(a[i].vec[3].coeffs, buf[3].coeffs); - - while(ctr0 < KYBER_N || ctr1 < KYBER_N || ctr2 < KYBER_N || ctr3 < KYBER_N) { - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); - - ctr0 += rej_uniform(a[i].vec[0].coeffs + ctr0, KYBER_N - ctr0, buf[0].coeffs, SHAKE128_RATE); - ctr1 += rej_uniform(a[i].vec[1].coeffs + ctr1, KYBER_N - ctr1, buf[1].coeffs, SHAKE128_RATE); - ctr2 += rej_uniform(a[i].vec[2].coeffs + ctr2, KYBER_N - ctr2, buf[2].coeffs, SHAKE128_RATE); - ctr3 += rej_uniform(a[i].vec[3].coeffs + ctr3, KYBER_N - ctr3, buf[3].coeffs, SHAKE128_RATE); - } - - poly_nttunpack(&a[i].vec[0]); - poly_nttunpack(&a[i].vec[1]); - poly_nttunpack(&a[i].vec[2]); - poly_nttunpack(&a[i].vec[3]); - } - shake128x4_inc_ctx_release(&state); -} -#endif - -/************************************************* -* Name: indcpa_keypair_derand -* -* Description: Generates public and private key for the CPA-secure -* public-key encryption scheme underlying Kyber -* -* Arguments: - uint8_t *pk: pointer to output public key -* (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key -* (of length KYBER_INDCPA_SECRETKEYBYTES bytes) -* - const uint8_t *coins: pointer to input randomness -* (of length KYBER_SYMBYTES bytes) -**************************************************/ -void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], - uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], - const uint8_t coins[KYBER_SYMBYTES]) -{ - unsigned int i; - uint8_t buf[2*KYBER_SYMBYTES]; - const uint8_t *publicseed = buf; - const uint8_t *noiseseed = buf + KYBER_SYMBYTES; - polyvec a[KYBER_K], e, pkpv, skpv; - - memcpy(buf, coins, KYBER_SYMBYTES); - buf[KYBER_SYMBYTES] = KYBER_K; - hash_g(buf, buf, KYBER_SYMBYTES+1); - - gen_a(a, publicseed); - -#if KYBER_K == 2 - poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, e.vec+0, e.vec+1, noiseseed, 0, 1, 2, 3); -#elif KYBER_K == 3 - poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, e.vec+0, noiseseed, 0, 1, 2, 3); - poly_getnoise_eta1_4x(e.vec+1, e.vec+2, pkpv.vec+0, pkpv.vec+1, noiseseed, 4, 5, 6, 7); -#elif KYBER_K == 4 - poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, skpv.vec+2, skpv.vec+3, noiseseed, 0, 1, 2, 3); - poly_getnoise_eta1_4x(e.vec+0, e.vec+1, e.vec+2, e.vec+3, noiseseed, 4, 5, 6, 7); -#endif - - polyvec_ntt(&skpv); - polyvec_reduce(&skpv); - polyvec_ntt(&e); - - // matrix-vector multiplication - for(i=0;i -#include "params.h" -#include "polyvec.h" - -#define gen_matrix KYBER_NAMESPACE(gen_matrix) -void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed); - -#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand) -void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], - uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], - const uint8_t coins[KYBER_SYMBYTES]); - -#define indcpa_enc KYBER_NAMESPACE(indcpa_enc) -void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], - const uint8_t m[KYBER_INDCPA_MSGBYTES], - const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], - const uint8_t coins[KYBER_SYMBYTES]); - -#define indcpa_dec KYBER_NAMESPACE(indcpa_dec) -void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES], - const uint8_t c[KYBER_INDCPA_BYTES], - const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/invntt.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/invntt.S deleted file mode 100644 index 76d4189996..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/invntt.S +++ /dev/null @@ -1,193 +0,0 @@ -#include "consts.h" -.include "shuffle.inc" -.include "fq.inc" - -.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,zl0=2,zl1=2,zh0=3,zh1=3 -vpsubw %ymm\rl0,%ymm\rh0,%ymm12 -vpaddw %ymm\rh0,%ymm\rl0,%ymm\rl0 -vpsubw %ymm\rl1,%ymm\rh1,%ymm13 - -vpmullw %ymm\zl0,%ymm12,%ymm\rh0 -vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl1 -vpsubw %ymm\rl2,%ymm\rh2,%ymm14 - -vpmullw %ymm\zl0,%ymm13,%ymm\rh1 -vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl2 -vpsubw %ymm\rl3,%ymm\rh3,%ymm15 - -vpmullw %ymm\zl1,%ymm14,%ymm\rh2 -vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl3 -vpmullw %ymm\zl1,%ymm15,%ymm\rh3 - -vpmulhw %ymm\zh0,%ymm12,%ymm12 -vpmulhw %ymm\zh0,%ymm13,%ymm13 - -vpmulhw %ymm\zh1,%ymm14,%ymm14 -vpmulhw %ymm\zh1,%ymm15,%ymm15 - -vpmulhw %ymm0,%ymm\rh0,%ymm\rh0 - -vpmulhw %ymm0,%ymm\rh1,%ymm\rh1 - -vpmulhw %ymm0,%ymm\rh2,%ymm\rh2 -vpmulhw %ymm0,%ymm\rh3,%ymm\rh3 - -# - -# - -vpsubw %ymm\rh0,%ymm12,%ymm\rh0 - -vpsubw %ymm\rh1,%ymm13,%ymm\rh1 - -vpsubw %ymm\rh2,%ymm14,%ymm\rh2 -vpsubw %ymm\rh3,%ymm15,%ymm\rh3 -.endm - -.macro intt_levels0t5 off -/* level 0 */ -vmovdqa _16XFLO*2(%rsi),%ymm2 -vmovdqa _16XFHI*2(%rsi),%ymm3 - -vmovdqa (128*\off+ 0)*2(%rdi),%ymm4 -vmovdqa (128*\off+ 32)*2(%rdi),%ymm6 -vmovdqa (128*\off+ 16)*2(%rdi),%ymm5 -vmovdqa (128*\off+ 48)*2(%rdi),%ymm7 - -fqmulprecomp 2,3,4 -fqmulprecomp 2,3,6 -fqmulprecomp 2,3,5 -fqmulprecomp 2,3,7 - -vmovdqa (128*\off+ 64)*2(%rdi),%ymm8 -vmovdqa (128*\off+ 96)*2(%rdi),%ymm10 -vmovdqa (128*\off+ 80)*2(%rdi),%ymm9 -vmovdqa (128*\off+112)*2(%rdi),%ymm11 - -fqmulprecomp 2,3,8 -fqmulprecomp 2,3,10 -fqmulprecomp 2,3,9 -fqmulprecomp 2,3,11 - -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+208)*2(%rsi),%ymm15 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+176)*2(%rsi),%ymm1 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+224)*2(%rsi),%ymm2 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+192)*2(%rsi),%ymm3 -vmovdqa _REVIDXB*2(%rsi),%ymm12 -vpshufb %ymm12,%ymm15,%ymm15 -vpshufb %ymm12,%ymm1,%ymm1 -vpshufb %ymm12,%ymm2,%ymm2 -vpshufb %ymm12,%ymm3,%ymm3 - -butterfly 4,5,8,9,6,7,10,11,15,1,2,3 - -/* level 1 */ -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+144)*2(%rsi),%ymm2 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+160)*2(%rsi),%ymm3 -vmovdqa _REVIDXB*2(%rsi),%ymm1 -vpshufb %ymm1,%ymm2,%ymm2 -vpshufb %ymm1,%ymm3,%ymm3 - -butterfly 4,5,6,7,8,9,10,11,2,2,3,3 - -shuffle1 4,5,3,5 -shuffle1 6,7,4,7 -shuffle1 8,9,6,9 -shuffle1 10,11,8,11 - -/* level 2 */ -vmovdqa _REVIDXD*2(%rsi),%ymm12 -vpermd (_ZETAS_EXP+(1-\off)*224+112)*2(%rsi),%ymm12,%ymm2 -vpermd (_ZETAS_EXP+(1-\off)*224+128)*2(%rsi),%ymm12,%ymm10 - -butterfly 3,4,6,8,5,7,9,11,2,2,10,10 - -vmovdqa _16XV*2(%rsi),%ymm1 -red16 3 - -shuffle2 3,4,10,4 -shuffle2 6,8,3,8 -shuffle2 5,7,6,7 -shuffle2 9,11,5,11 - -/* level 3 */ -vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+80)*2(%rsi),%ymm2 -vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+96)*2(%rsi),%ymm9 - -butterfly 10,3,6,5,4,8,7,11,2,2,9,9 - -shuffle4 10,3,9,3 -shuffle4 6,5,10,5 -shuffle4 4,8,6,8 -shuffle4 7,11,4,11 - -/* level 4 */ -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+48)*2(%rsi),%ymm2 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+64)*2(%rsi),%ymm7 - -butterfly 9,10,6,4,3,5,8,11,2,2,7,7 - -red16 9 - -shuffle8 9,10,7,10 -shuffle8 6,4,9,4 -shuffle8 3,5,6,5 -shuffle8 8,11,3,11 - -/* level 5 */ -vmovdqa (_ZETAS_EXP+(1-\off)*224+16)*2(%rsi),%ymm2 -vmovdqa (_ZETAS_EXP+(1-\off)*224+32)*2(%rsi),%ymm8 - -butterfly 7,9,6,3,10,4,5,11,2,2,8,8 - -vmovdqa %ymm7,(128*\off+ 0)*2(%rdi) -vmovdqa %ymm9,(128*\off+ 16)*2(%rdi) -vmovdqa %ymm6,(128*\off+ 32)*2(%rdi) -vmovdqa %ymm3,(128*\off+ 48)*2(%rdi) -vmovdqa %ymm10,(128*\off+ 64)*2(%rdi) -vmovdqa %ymm4,(128*\off+ 80)*2(%rdi) -vmovdqa %ymm5,(128*\off+ 96)*2(%rdi) -vmovdqa %ymm11,(128*\off+112)*2(%rdi) -.endm - -.macro intt_level6 off -/* level 6 */ -vmovdqa (64*\off+ 0)*2(%rdi),%ymm4 -vmovdqa (64*\off+128)*2(%rdi),%ymm8 -vmovdqa (64*\off+ 16)*2(%rdi),%ymm5 -vmovdqa (64*\off+144)*2(%rdi),%ymm9 -vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm2 - -vmovdqa (64*\off+ 32)*2(%rdi),%ymm6 -vmovdqa (64*\off+160)*2(%rdi),%ymm10 -vmovdqa (64*\off+ 48)*2(%rdi),%ymm7 -vmovdqa (64*\off+176)*2(%rdi),%ymm11 -vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm3 - -butterfly 4,5,6,7,8,9,10,11 - -.if \off == 0 -red16 4 -.endif - -vmovdqa %ymm4,(64*\off+ 0)*2(%rdi) -vmovdqa %ymm5,(64*\off+ 16)*2(%rdi) -vmovdqa %ymm6,(64*\off+ 32)*2(%rdi) -vmovdqa %ymm7,(64*\off+ 48)*2(%rdi) -vmovdqa %ymm8,(64*\off+128)*2(%rdi) -vmovdqa %ymm9,(64*\off+144)*2(%rdi) -vmovdqa %ymm10,(64*\off+160)*2(%rdi) -vmovdqa %ymm11,(64*\off+176)*2(%rdi) -.endm - -.text -.global cdecl(invntt_avx) -cdecl(invntt_avx): -vmovdqa _16XQ*2(%rsi),%ymm0 - -intt_levels0t5 0 -intt_levels0t5 1 - -intt_level6 0 -intt_level6 1 -ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/kem.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/kem.c deleted file mode 100644 index 63abc1029c..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/kem.c +++ /dev/null @@ -1,169 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "kem.h" -#include "indcpa.h" -#include "verify.h" -#include "symmetric.h" -#include "randombytes.h" -/************************************************* -* Name: crypto_kem_keypair_derand -* -* Description: Generates public and private key -* for CCA-secure Kyber key encapsulation mechanism -* -* Arguments: - uint8_t *pk: pointer to output public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* - uint8_t *coins: pointer to input randomness -* (an already allocated array filled with 2*KYBER_SYMBYTES random bytes) -** -* Returns 0 (success) -**************************************************/ -int crypto_kem_keypair_derand(uint8_t *pk, - uint8_t *sk, - const uint8_t *coins) -{ - indcpa_keypair_derand(pk, sk, coins); - memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_PUBLICKEYBYTES); - hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - /* Value z for pseudo-random output on reject */ - memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES); - return 0; -} - -/************************************************* -* Name: crypto_kem_keypair -* -* Description: Generates public and private key -* for CCA-secure Kyber key encapsulation mechanism -* -* Arguments: - uint8_t *pk: pointer to output public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int crypto_kem_keypair(uint8_t *pk, - uint8_t *sk) -{ - uint8_t coins[2*KYBER_SYMBYTES]; - randombytes(coins, 2*KYBER_SYMBYTES); - crypto_kem_keypair_derand(pk, sk, coins); - return 0; -} - -/************************************************* -* Name: crypto_kem_enc_derand -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - uint8_t *ct: pointer to output cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *pk: pointer to input public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - const uint8_t *coins: pointer to input randomness -* (an already allocated array filled with KYBER_SYMBYTES random bytes) -** -* Returns 0 (success) -**************************************************/ -int crypto_kem_enc_derand(uint8_t *ct, - uint8_t *ss, - const uint8_t *pk, - const uint8_t *coins) -{ - uint8_t buf[2*KYBER_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2*KYBER_SYMBYTES]; - - memcpy(buf, coins, KYBER_SYMBYTES); - - /* Multitarget countermeasure for coins + contributory KEM */ - hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - hash_g(kr, buf, 2*KYBER_SYMBYTES); - - /* coins are in kr+KYBER_SYMBYTES */ - indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES); - - memcpy(ss,kr,KYBER_SYMBYTES); - return 0; -} - -/************************************************* -* Name: crypto_kem_enc -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - uint8_t *ct: pointer to output cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *pk: pointer to input public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int crypto_kem_enc(uint8_t *ct, - uint8_t *ss, - const uint8_t *pk) -{ - uint8_t coins[KYBER_SYMBYTES]; - randombytes(coins, KYBER_SYMBYTES); - crypto_kem_enc_derand(ct, ss, pk, coins); - return 0; -} - -/************************************************* -* Name: crypto_kem_dec -* -* Description: Generates shared secret for given -* cipher text and private key -* -* Arguments: - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *ct: pointer to input cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - const uint8_t *sk: pointer to input private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* -* Returns 0. -* -* On failure, ss will contain a pseudo-random value. -**************************************************/ -int crypto_kem_dec(uint8_t *ss, - const uint8_t *ct, - const uint8_t *sk) -{ - int fail; - uint8_t buf[2*KYBER_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2*KYBER_SYMBYTES]; - uint8_t cmp[KYBER_CIPHERTEXTBYTES+KYBER_SYMBYTES]; - const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES; - - indcpa_dec(buf, ct, sk); - - /* Multitarget countermeasure for coins + contributory KEM */ - memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES); - hash_g(kr, buf, 2*KYBER_SYMBYTES); - - /* coins are in kr+KYBER_SYMBYTES */ - indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES); - - fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES); - - /* Compute rejection key */ - rkprf(ss,sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES,ct); - - /* Copy true key to return buffer if fail is false */ - cmov(ss,kr,KYBER_SYMBYTES,!fail); - - return 0; -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/kem.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/kem.h deleted file mode 100644 index 234f11966b..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/kem.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef KEM_H -#define KEM_H - -#include -#include "params.h" - -#define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES -#define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES -#define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES -#define CRYPTO_BYTES KYBER_SSBYTES - -#if (KYBER_K == 2) -#define CRYPTO_ALGNAME "Kyber512" -#elif (KYBER_K == 3) -#define CRYPTO_ALGNAME "Kyber768" -#elif (KYBER_K == 4) -#define CRYPTO_ALGNAME "Kyber1024" -#endif - -#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand) -int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); - -#define crypto_kem_keypair KYBER_NAMESPACE(keypair) -int crypto_kem_keypair(uint8_t *pk, uint8_t *sk); - -#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand) -int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); - -#define crypto_kem_enc KYBER_NAMESPACE(enc) -int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); - -#define crypto_kem_dec KYBER_NAMESPACE(dec) -int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/ntt.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/ntt.S deleted file mode 100644 index 0ce7b41297..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/ntt.S +++ /dev/null @@ -1,189 +0,0 @@ -#include "consts.h" -.include "shuffle.inc" - -.macro mul rh0,rh1,rh2,rh3,zl0=15,zl1=15,zh0=2,zh1=2 -vpmullw %ymm\zl0,%ymm\rh0,%ymm12 -vpmullw %ymm\zl0,%ymm\rh1,%ymm13 - -vpmullw %ymm\zl1,%ymm\rh2,%ymm14 -vpmullw %ymm\zl1,%ymm\rh3,%ymm15 - -vpmulhw %ymm\zh0,%ymm\rh0,%ymm\rh0 -vpmulhw %ymm\zh0,%ymm\rh1,%ymm\rh1 - -vpmulhw %ymm\zh1,%ymm\rh2,%ymm\rh2 -vpmulhw %ymm\zh1,%ymm\rh3,%ymm\rh3 -.endm - -.macro reduce -vpmulhw %ymm0,%ymm12,%ymm12 -vpmulhw %ymm0,%ymm13,%ymm13 - -vpmulhw %ymm0,%ymm14,%ymm14 -vpmulhw %ymm0,%ymm15,%ymm15 -.endm - -.macro update rln,rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 -vpaddw %ymm\rh0,%ymm\rl0,%ymm\rln -vpsubw %ymm\rh0,%ymm\rl0,%ymm\rh0 -vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl0 - -vpsubw %ymm\rh1,%ymm\rl1,%ymm\rh1 -vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl1 -vpsubw %ymm\rh2,%ymm\rl2,%ymm\rh2 - -vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl2 -vpsubw %ymm\rh3,%ymm\rl3,%ymm\rh3 - -vpsubw %ymm12,%ymm\rln,%ymm\rln -vpaddw %ymm12,%ymm\rh0,%ymm\rh0 -vpsubw %ymm13,%ymm\rl0,%ymm\rl0 - -vpaddw %ymm13,%ymm\rh1,%ymm\rh1 -vpsubw %ymm14,%ymm\rl1,%ymm\rl1 -vpaddw %ymm14,%ymm\rh2,%ymm\rh2 - -vpsubw %ymm15,%ymm\rl2,%ymm\rl2 -vpaddw %ymm15,%ymm\rh3,%ymm\rh3 -.endm - -.macro level0 off -vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm15 -vmovdqa (64*\off+128)*2(%rdi),%ymm8 -vmovdqa (64*\off+144)*2(%rdi),%ymm9 -vmovdqa (64*\off+160)*2(%rdi),%ymm10 -vmovdqa (64*\off+176)*2(%rdi),%ymm11 -vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm2 - -mul 8,9,10,11 - -vmovdqa (64*\off+ 0)*2(%rdi),%ymm4 -vmovdqa (64*\off+ 16)*2(%rdi),%ymm5 -vmovdqa (64*\off+ 32)*2(%rdi),%ymm6 -vmovdqa (64*\off+ 48)*2(%rdi),%ymm7 - -reduce -update 3,4,5,6,7,8,9,10,11 - -vmovdqa %ymm3,(64*\off+ 0)*2(%rdi) -vmovdqa %ymm4,(64*\off+ 16)*2(%rdi) -vmovdqa %ymm5,(64*\off+ 32)*2(%rdi) -vmovdqa %ymm6,(64*\off+ 48)*2(%rdi) -vmovdqa %ymm8,(64*\off+128)*2(%rdi) -vmovdqa %ymm9,(64*\off+144)*2(%rdi) -vmovdqa %ymm10,(64*\off+160)*2(%rdi) -vmovdqa %ymm11,(64*\off+176)*2(%rdi) -.endm - -.macro levels1t6 off -/* level 1 */ -vmovdqa (_ZETAS_EXP+224*\off+16)*2(%rsi),%ymm15 -vmovdqa (128*\off+ 64)*2(%rdi),%ymm8 -vmovdqa (128*\off+ 80)*2(%rdi),%ymm9 -vmovdqa (128*\off+ 96)*2(%rdi),%ymm10 -vmovdqa (128*\off+112)*2(%rdi),%ymm11 -vmovdqa (_ZETAS_EXP+224*\off+32)*2(%rsi),%ymm2 - -mul 8,9,10,11 - -vmovdqa (128*\off+ 0)*2(%rdi),%ymm4 -vmovdqa (128*\off+ 16)*2(%rdi),%ymm5 -vmovdqa (128*\off+ 32)*2(%rdi),%ymm6 -vmovdqa (128*\off+ 48)*2(%rdi),%ymm7 - -reduce -update 3,4,5,6,7,8,9,10,11 - -/* level 2 */ -shuffle8 5,10,7,10 -shuffle8 6,11,5,11 - -vmovdqa (_ZETAS_EXP+224*\off+48)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+64)*2(%rsi),%ymm2 - -mul 7,10,5,11 - -shuffle8 3,8,6,8 -shuffle8 4,9,3,9 - -reduce -update 4,6,8,3,9,7,10,5,11 - -/* level 3 */ -shuffle4 8,5,9,5 -shuffle4 3,11,8,11 - -vmovdqa (_ZETAS_EXP+224*\off+80)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+96)*2(%rsi),%ymm2 - -mul 9,5,8,11 - -shuffle4 4,7,3,7 -shuffle4 6,10,4,10 - -reduce -update 6,3,7,4,10,9,5,8,11 - -/* level 4 */ -shuffle2 7,8,10,8 -shuffle2 4,11,7,11 - -vmovdqa (_ZETAS_EXP+224*\off+112)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+128)*2(%rsi),%ymm2 - -mul 10,8,7,11 - -shuffle2 6,9,4,9 -shuffle2 3,5,6,5 - -reduce -update 3,4,9,6,5,10,8,7,11 - -/* level 5 */ -shuffle1 9,7,5,7 -shuffle1 6,11,9,11 - -vmovdqa (_ZETAS_EXP+224*\off+144)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+160)*2(%rsi),%ymm2 - -mul 5,7,9,11 - -shuffle1 3,10,6,10 -shuffle1 4,8,3,8 - -reduce -update 4,6,10,3,8,5,7,9,11 - -/* level 6 */ -vmovdqa (_ZETAS_EXP+224*\off+176)*2(%rsi),%ymm14 -vmovdqa (_ZETAS_EXP+224*\off+208)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+192)*2(%rsi),%ymm8 -vmovdqa (_ZETAS_EXP+224*\off+224)*2(%rsi),%ymm2 - -mul 10,3,9,11,14,15,8,2 - -reduce -update 8,4,6,5,7,10,3,9,11 - -vmovdqa %ymm8,(128*\off+ 0)*2(%rdi) -vmovdqa %ymm4,(128*\off+ 16)*2(%rdi) -vmovdqa %ymm10,(128*\off+ 32)*2(%rdi) -vmovdqa %ymm3,(128*\off+ 48)*2(%rdi) -vmovdqa %ymm6,(128*\off+ 64)*2(%rdi) -vmovdqa %ymm5,(128*\off+ 80)*2(%rdi) -vmovdqa %ymm9,(128*\off+ 96)*2(%rdi) -vmovdqa %ymm11,(128*\off+112)*2(%rdi) -.endm - -.text -.global cdecl(ntt_avx) -cdecl(ntt_avx): -vmovdqa _16XQ*2(%rsi),%ymm0 - -level0 0 -level0 1 - -levels1t6 0 -levels1t6 1 - -ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/ntt.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/ntt.h deleted file mode 100644 index a4f48e343b..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/ntt.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef NTT_H -#define NTT_H - -#include -#include - -#define ntt_avx KYBER_NAMESPACE(ntt_avx) -void ntt_avx(__m256i *r, const __m256i *qdata); -#define invntt_avx KYBER_NAMESPACE(invntt_avx) -void invntt_avx(__m256i *r, const __m256i *qdata); - -#define nttpack_avx KYBER_NAMESPACE(nttpack_avx) -void nttpack_avx(__m256i *r, const __m256i *qdata); -#define nttunpack_avx KYBER_NAMESPACE(nttunpack_avx) -void nttunpack_avx(__m256i *r, const __m256i *qdata); - -#define basemul_avx KYBER_NAMESPACE(basemul_avx) -void basemul_avx(__m256i *r, - const __m256i *a, - const __m256i *b, - const __m256i *qdata); - -#define ntttobytes_avx KYBER_NAMESPACE(ntttobytes_avx) -void ntttobytes_avx(uint8_t *r, const __m256i *a, const __m256i *qdata); -#define nttfrombytes_avx KYBER_NAMESPACE(nttfrombytes_avx) -void nttfrombytes_avx(__m256i *r, const uint8_t *a, const __m256i *qdata); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/params.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/params.h deleted file mode 100644 index ecfabce4a5..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/params.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef PARAMS_H -#define PARAMS_H - -#ifndef KYBER_K -#define KYBER_K 3 /* Change this for different security strengths */ -#endif - -//#define KYBER_90S /* Uncomment this if you want the 90S variant */ - -/* Don't change parameters below this line */ -#if (KYBER_K == 2) -#ifdef KYBER_90S -#define KYBER_NAMESPACE(s) pqcrystals_kyber512_90s_avx2_##s -#else -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_avx2_##s -#endif -#elif (KYBER_K == 3) -#ifdef KYBER_90S -#define KYBER_NAMESPACE(s) pqcrystals_kyber768_90s_avx2_##s -#else -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_avx2_##s -#endif -#elif (KYBER_K == 4) -#ifdef KYBER_90S -#define KYBER_NAMESPACE(s) pqcrystals_kyber1024_90s_avx2_##s -#else -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_avx2_##s -#endif -#else -#error "KYBER_K must be in {2,3,4}" -#endif - -#define KYBER_N 256 -#define KYBER_Q 3329 - -#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ -#define KYBER_SSBYTES 32 /* size in bytes of shared key */ - -#define KYBER_POLYBYTES 384 -#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) - -#if KYBER_K == 2 -#define KYBER_ETA1 3 -#define KYBER_POLYCOMPRESSEDBYTES 128 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -#elif KYBER_K == 3 -#define KYBER_ETA1 2 -#define KYBER_POLYCOMPRESSEDBYTES 128 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -#elif KYBER_K == 4 -#define KYBER_ETA1 2 -#define KYBER_POLYCOMPRESSEDBYTES 160 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) -#endif - -#define KYBER_ETA2 2 - -#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES) -#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) -#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) -#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) - -#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) -/* 32 bytes of additional space to save H(pk) */ -#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) -#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES) - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/poly.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/poly.c deleted file mode 100644 index 681fd6d23e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/poly.c +++ /dev/null @@ -1,519 +0,0 @@ -#include -#include -#include -#include "align.h" -#include "fips202x4.h" -#include "params.h" -#include "poly.h" -#include "ntt.h" -#include "consts.h" -#include "reduce.h" -#include "cbd.h" -#include "symmetric.h" - -/************************************************* -* Name: poly_compress -* -* Description: Compression and subsequent serialization of a polynomial. -* The coefficients of the input polynomial are assumed to -* lie in the invertal [0,q], i.e. the polynomial must be reduced -* by poly_reduce(). -* -* Arguments: - uint8_t *r: pointer to output byte array -* (of length KYBER_POLYCOMPRESSEDBYTES) -* - const poly *a: pointer to input polynomial -**************************************************/ -#if (KYBER_POLYCOMPRESSEDBYTES == 128) -void poly_compress(uint8_t r[128], const poly * restrict a) -{ - unsigned int i; - __m256i f0, f1, f2, f3; - const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); - const __m256i shift1 = _mm256_set1_epi16(1 << 9); - const __m256i mask = _mm256_set1_epi16(15); - const __m256i shift2 = _mm256_set1_epi16((16 << 8) + 1); - const __m256i permdidx = _mm256_set_epi32(7,3,6,2,5,1,4,0); - - for(i=0;ivec[4*i+0]); - f1 = _mm256_load_si256(&a->vec[4*i+1]); - f2 = _mm256_load_si256(&a->vec[4*i+2]); - f3 = _mm256_load_si256(&a->vec[4*i+3]); - f0 = _mm256_mulhi_epi16(f0,v); - f1 = _mm256_mulhi_epi16(f1,v); - f2 = _mm256_mulhi_epi16(f2,v); - f3 = _mm256_mulhi_epi16(f3,v); - f0 = _mm256_mulhrs_epi16(f0,shift1); - f1 = _mm256_mulhrs_epi16(f1,shift1); - f2 = _mm256_mulhrs_epi16(f2,shift1); - f3 = _mm256_mulhrs_epi16(f3,shift1); - f0 = _mm256_and_si256(f0,mask); - f1 = _mm256_and_si256(f1,mask); - f2 = _mm256_and_si256(f2,mask); - f3 = _mm256_and_si256(f3,mask); - f0 = _mm256_packus_epi16(f0,f1); - f2 = _mm256_packus_epi16(f2,f3); - f0 = _mm256_maddubs_epi16(f0,shift2); - f2 = _mm256_maddubs_epi16(f2,shift2); - f0 = _mm256_packus_epi16(f0,f2); - f0 = _mm256_permutevar8x32_epi32(f0,permdidx); - _mm256_storeu_si256((__m256i *)&r[32*i],f0); - } -} - -void poly_decompress(poly * restrict r, const uint8_t a[128]) -{ - unsigned int i; - __m128i t; - __m256i f; - const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); - const __m256i shufbidx = _mm256_set_epi8(7,7,7,7,6,6,6,6,5,5,5,5,4,4,4,4, - 3,3,3,3,2,2,2,2,1,1,1,1,0,0,0,0); - const __m256i mask = _mm256_set1_epi32(0x00F0000F); - const __m256i shift = _mm256_set1_epi32((128 << 16) + 2048); - - for(i=0;ivec[i],f); - } -} - -#elif (KYBER_POLYCOMPRESSEDBYTES == 160) -void poly_compress(uint8_t r[160], const poly * restrict a) -{ - unsigned int i; - __m256i f0, f1; - __m128i t0, t1; - const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); - const __m256i shift1 = _mm256_set1_epi16(1 << 10); - const __m256i mask = _mm256_set1_epi16(31); - const __m256i shift2 = _mm256_set1_epi16((32 << 8) + 1); - const __m256i shift3 = _mm256_set1_epi32((1024 << 16) + 1); - const __m256i sllvdidx = _mm256_set1_epi64x(12); - const __m256i shufbidx = _mm256_set_epi8( 8,-1,-1,-1,-1,-1, 4, 3, 2, 1, 0,-1,12,11,10, 9, - -1,12,11,10, 9, 8,-1,-1,-1,-1,-1 ,4, 3, 2, 1, 0); - - for(i=0;ivec[2*i+0]); - f1 = _mm256_load_si256(&a->vec[2*i+1]); - f0 = _mm256_mulhi_epi16(f0,v); - f1 = _mm256_mulhi_epi16(f1,v); - f0 = _mm256_mulhrs_epi16(f0,shift1); - f1 = _mm256_mulhrs_epi16(f1,shift1); - f0 = _mm256_and_si256(f0,mask); - f1 = _mm256_and_si256(f1,mask); - f0 = _mm256_packus_epi16(f0,f1); - f0 = _mm256_maddubs_epi16(f0,shift2); // a0 a1 a2 a3 b0 b1 b2 b3 a4 a5 a6 a7 b4 b5 b6 b7 - f0 = _mm256_madd_epi16(f0,shift3); // a0 a1 b0 b1 a2 a3 b2 b3 - f0 = _mm256_sllv_epi32(f0,sllvdidx); - f0 = _mm256_srlv_epi64(f0,sllvdidx); - f0 = _mm256_shuffle_epi8(f0,shufbidx); - t0 = _mm256_castsi256_si128(f0); - t1 = _mm256_extracti128_si256(f0,1); - t0 = _mm_blendv_epi8(t0,t1,_mm256_castsi256_si128(shufbidx)); - _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0); - memcpy(&r[20*i+16],&t1,4); - } -} - -void poly_decompress(poly * restrict r, const uint8_t a[160]) -{ - unsigned int i; - __m128i t; - __m256i f; - int16_t ti; - const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); - const __m256i shufbidx = _mm256_set_epi8(9,9,9,8,8,8,8,7,7,6,6,6,6,5,5,5, - 4,4,4,3,3,3,3,2,2,1,1,1,1,0,0,0); - const __m256i mask = _mm256_set_epi16(248,1984,62,496,3968,124,992,31, - 248,1984,62,496,3968,124,992,31); - const __m256i shift = _mm256_set_epi16(128,16,512,64,8,256,32,1024, - 128,16,512,64,8,256,32,1024); - - for(i=0;ivec[i],f); - } -} - -#endif - -/************************************************* -* Name: poly_tobytes -* -* Description: Serialization of a polynomial in NTT representation. -* The coefficients of the input polynomial are assumed to -* lie in the invertal [0,q], i.e. the polynomial must be reduced -* by poly_reduce(). The coefficients are orderd as output by -* poly_ntt(); the serialized output coefficients are in bitreversed -* order. -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYBYTES bytes) -* - poly *a: pointer to input polynomial -**************************************************/ -void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a) -{ - ntttobytes_avx(r, a->vec, qdata.vec); -} - -/************************************************* -* Name: poly_frombytes -* -* Description: De-serialization of a polynomial; -* inverse of poly_tobytes -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *a: pointer to input byte array -* (of KYBER_POLYBYTES bytes) -**************************************************/ -void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]) -{ - nttfrombytes_avx(r->vec, a, qdata.vec); -} - -/************************************************* -* Name: poly_frommsg -* -* Description: Convert 32-byte message to polynomial -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *msg: pointer to input message -**************************************************/ -void poly_frommsg(poly * restrict r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]) -{ -#if (KYBER_INDCPA_MSGBYTES != 32) -#error "KYBER_INDCPA_MSGBYTES must be equal to 32!" -#endif - __m256i f, g0, g1, g2, g3, h0, h1, h2, h3; - const __m256i shift = _mm256_broadcastsi128_si256(_mm_set_epi32(0,1,2,3)); - const __m256i idx = _mm256_broadcastsi128_si256(_mm_set_epi8(15,14,11,10,7,6,3,2,13,12,9,8,5,4,1,0)); - const __m256i hqs = _mm256_set1_epi16((KYBER_Q+1)/2); - -#define FROMMSG64(i) \ - g3 = _mm256_shuffle_epi32(f,0x55*i); \ - g3 = _mm256_sllv_epi32(g3,shift); \ - g3 = _mm256_shuffle_epi8(g3,idx); \ - g0 = _mm256_slli_epi16(g3,12); \ - g1 = _mm256_slli_epi16(g3,8); \ - g2 = _mm256_slli_epi16(g3,4); \ - g0 = _mm256_srai_epi16(g0,15); \ - g1 = _mm256_srai_epi16(g1,15); \ - g2 = _mm256_srai_epi16(g2,15); \ - g3 = _mm256_srai_epi16(g3,15); \ - g0 = _mm256_and_si256(g0,hqs); /* 19 18 17 16 3 2 1 0 */ \ - g1 = _mm256_and_si256(g1,hqs); /* 23 22 21 20 7 6 5 4 */ \ - g2 = _mm256_and_si256(g2,hqs); /* 27 26 25 24 11 10 9 8 */ \ - g3 = _mm256_and_si256(g3,hqs); /* 31 30 29 28 15 14 13 12 */ \ - h0 = _mm256_unpacklo_epi64(g0,g1); \ - h2 = _mm256_unpackhi_epi64(g0,g1); \ - h1 = _mm256_unpacklo_epi64(g2,g3); \ - h3 = _mm256_unpackhi_epi64(g2,g3); \ - g0 = _mm256_permute2x128_si256(h0,h1,0x20); \ - g2 = _mm256_permute2x128_si256(h0,h1,0x31); \ - g1 = _mm256_permute2x128_si256(h2,h3,0x20); \ - g3 = _mm256_permute2x128_si256(h2,h3,0x31); \ - _mm256_store_si256(&r->vec[0+2*i+0],g0); \ - _mm256_store_si256(&r->vec[0+2*i+1],g1); \ - _mm256_store_si256(&r->vec[8+2*i+0],g2); \ - _mm256_store_si256(&r->vec[8+2*i+1],g3) - - f = _mm256_loadu_si256((__m256i *)msg); - FROMMSG64(0); - FROMMSG64(1); - FROMMSG64(2); - FROMMSG64(3); -} - -/************************************************* -* Name: poly_tomsg -* -* Description: Convert polynomial to 32-byte message. -* The coefficients of the input polynomial are assumed to -* lie in the invertal [0,q], i.e. the polynomial must be reduced -* by poly_reduce(). -* -* Arguments: - uint8_t *msg: pointer to output message -* - poly *a: pointer to input polynomial -**************************************************/ -void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly * restrict a) -{ - unsigned int i; - uint32_t small; - __m256i f0, f1, g0, g1; - const __m256i hq = _mm256_set1_epi16((KYBER_Q - 1)/2); - const __m256i hhq = _mm256_set1_epi16((KYBER_Q - 1)/4); - - for(i=0;ivec[2*i+0]); - f1 = _mm256_load_si256(&a->vec[2*i+1]); - f0 = _mm256_sub_epi16(hq, f0); - f1 = _mm256_sub_epi16(hq, f1); - g0 = _mm256_srai_epi16(f0, 15); - g1 = _mm256_srai_epi16(f1, 15); - f0 = _mm256_xor_si256(f0, g0); - f1 = _mm256_xor_si256(f1, g1); - f0 = _mm256_sub_epi16(f0, hhq); - f1 = _mm256_sub_epi16(f1, hhq); - f0 = _mm256_packs_epi16(f0, f1); - f0 = _mm256_permute4x64_epi64(f0, 0xD8); - small = _mm256_movemask_epi8(f0); - memcpy(&msg[4*i], &small, 4); - } -} - -/************************************************* -* Name: poly_getnoise_eta1 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter KYBER_ETA1 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length KYBER_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) -{ - ALIGNED_UINT8(KYBER_ETA1*KYBER_N/4+32) buf; // +32 bytes as required by poly_cbd_eta1 - prf(buf.coeffs, KYBER_ETA1*KYBER_N/4, seed, nonce); - poly_cbd_eta1(r, buf.vec); -} - -/************************************************* -* Name: poly_getnoise_eta2 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter KYBER_ETA2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length KYBER_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) -{ - ALIGNED_UINT8(KYBER_ETA2*KYBER_N/4) buf; - prf(buf.coeffs, KYBER_ETA2*KYBER_N/4, seed, nonce); - poly_cbd_eta2(r, buf.vec); -} - -#ifndef KYBER_90S -#define NOISE_NBLOCKS ((KYBER_ETA1*KYBER_N/4+SHAKE256_RATE-1)/SHAKE256_RATE) -void poly_getnoise_eta1_4x(poly *r0, - poly *r1, - poly *r2, - poly *r3, - const uint8_t seed[32], - uint8_t nonce0, - uint8_t nonce1, - uint8_t nonce2, - uint8_t nonce3) -{ - ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4]; - __m256i f; - shake256x4incctx state; - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - buf[0].coeffs[32] = nonce0; - buf[1].coeffs[32] = nonce1; - buf[2].coeffs[32] = nonce2; - buf[3].coeffs[32] = nonce3; - - shake256x4_inc_init(&state); - shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33); - shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state); - shake256x4_inc_ctx_release(&state); - - poly_cbd_eta1(r0, buf[0].vec); - poly_cbd_eta1(r1, buf[1].vec); - poly_cbd_eta1(r2, buf[2].vec); - poly_cbd_eta1(r3, buf[3].vec); -} - -#if KYBER_K == 2 -void poly_getnoise_eta1122_4x(poly *r0, - poly *r1, - poly *r2, - poly *r3, - const uint8_t seed[32], - uint8_t nonce0, - uint8_t nonce1, - uint8_t nonce2, - uint8_t nonce3) -{ - ALIGNED_UINT8(NOISE_NBLOCKS*SHAKE256_RATE) buf[4]; - __m256i f; - shake256x4incctx state; - - f = _mm256_loadu_si256((__m256i *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - buf[0].coeffs[32] = nonce0; - buf[1].coeffs[32] = nonce1; - buf[2].coeffs[32] = nonce2; - buf[3].coeffs[32] = nonce3; - - shake256x4_inc_init(&state); - shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33); - shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state); - shake256x4_inc_ctx_release(&state); - - poly_cbd_eta1(r0, buf[0].vec); - poly_cbd_eta1(r1, buf[1].vec); - poly_cbd_eta2(r2, buf[2].vec); - poly_cbd_eta2(r3, buf[3].vec); -} -#endif -#endif - -/************************************************* -* Name: poly_ntt -* -* Description: Computes negacyclic number-theoretic transform (NTT) of -* a polynomial in place. -* Input coefficients assumed to be in normal order, -* output coefficients are in special order that is natural -* for the vectorization. Input coefficients are assumed to be -* bounded by q in absolute value, output coefficients are bounded -* by 16118 in absolute value. -* -* Arguments: - poly *r: pointer to in/output polynomial -**************************************************/ -void poly_ntt(poly *r) -{ - ntt_avx(r->vec, qdata.vec); -} - -/************************************************* -* Name: poly_invntt_tomont -* -* Description: Computes inverse of negacyclic number-theoretic transform (NTT) -* of a polynomial in place; -* Input coefficients assumed to be in special order from vectorized -* forward ntt, output in normal order. Input coefficients can be -* arbitrary 16-bit integers, output coefficients are bounded by 14870 -* in absolute value. -* -* Arguments: - poly *a: pointer to in/output polynomial -**************************************************/ -void poly_invntt_tomont(poly *r) -{ - invntt_avx(r->vec, qdata.vec); -} - -void poly_nttunpack(poly *r) -{ - nttunpack_avx(r->vec, qdata.vec); -} - -/************************************************* -* Name: poly_basemul_montgomery -* -* Description: Multiplication of two polynomials in NTT domain. -* One of the input polynomials needs to have coefficients -* bounded by q, the other polynomial can have arbitrary -* coefficients. Output coefficients are bounded by 6656. -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_basemul_montgomery(poly *r, const poly *a, const poly *b) -{ - basemul_avx(r->vec, a->vec, b->vec, qdata.vec); -} - -/************************************************* -* Name: poly_tomont -* -* Description: Inplace conversion of all coefficients of a polynomial -* from normal domain to Montgomery domain -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_tomont(poly *r) -{ - tomont_avx(r->vec, qdata.vec); -} - -/************************************************* -* Name: poly_reduce -* -* Description: Applies Barrett reduction to all coefficients of a polynomial -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_reduce(poly *r) -{ - reduce_avx(r->vec, qdata.vec); -} - -/************************************************* -* Name: poly_add -* -* Description: Add two polynomials. No modular reduction -* is performed. -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_add(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - __m256i f0, f1; - - for(i=0;ivec[i]); - f1 = _mm256_load_si256(&b->vec[i]); - f0 = _mm256_add_epi16(f0, f1); - _mm256_store_si256(&r->vec[i], f0); - } -} - -/************************************************* -* Name: poly_sub -* -* Description: Subtract two polynomials. No modular reduction -* is performed. -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_sub(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - __m256i f0, f1; - - for(i=0;ivec[i]); - f1 = _mm256_load_si256(&b->vec[i]); - f0 = _mm256_sub_epi16(f0, f1); - _mm256_store_si256(&r->vec[i], f0); - } -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/poly.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/poly.h deleted file mode 100644 index 6a9cf71c70..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/poly.h +++ /dev/null @@ -1,77 +0,0 @@ -#ifndef POLY_H -#define POLY_H - -#include -#include "align.h" -#include "params.h" - -typedef ALIGNED_INT16(KYBER_N) poly; - -#define poly_compress KYBER_NAMESPACE(poly_compress) -void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a); -#define poly_decompress KYBER_NAMESPACE(poly_decompress) -void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]); - -#define poly_tobytes KYBER_NAMESPACE(poly_tobytes) -void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a); -#define poly_frombytes KYBER_NAMESPACE(poly_frombytes) -void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]); - -#define poly_frommsg KYBER_NAMESPACE(poly_frommsg) -void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]); -#define poly_tomsg KYBER_NAMESPACE(poly_tomsg) -void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); - -#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1) -void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); - -#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2) -void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); - -#ifndef KYBER_90S -#define poly_getnoise_eta1_4x KYBER_NAMESPACE(poly_getnoise_eta2_4x) -void poly_getnoise_eta1_4x(poly *r0, - poly *r1, - poly *r2, - poly *r3, - const uint8_t seed[32], - uint8_t nonce0, - uint8_t nonce1, - uint8_t nonce2, - uint8_t nonce3); - -#if KYBER_K == 2 -#define poly_getnoise_eta1122_4x KYBER_NAMESPACE(poly_getnoise_eta1122_4x) -void poly_getnoise_eta1122_4x(poly *r0, - poly *r1, - poly *r2, - poly *r3, - const uint8_t seed[32], - uint8_t nonce0, - uint8_t nonce1, - uint8_t nonce2, - uint8_t nonce3); -#endif -#endif - - -#define poly_ntt KYBER_NAMESPACE(poly_ntt) -void poly_ntt(poly *r); -#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont) -void poly_invntt_tomont(poly *r); -#define poly_nttunpack KYBER_NAMESPACE(poly_nttunpack) -void poly_nttunpack(poly *r); -#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery) -void poly_basemul_montgomery(poly *r, const poly *a, const poly *b); -#define poly_tomont KYBER_NAMESPACE(poly_tomont) -void poly_tomont(poly *r); - -#define poly_reduce KYBER_NAMESPACE(poly_reduce) -void poly_reduce(poly *r); - -#define poly_add KYBER_NAMESPACE(poly_add) -void poly_add(poly *r, const poly *a, const poly *b); -#define poly_sub KYBER_NAMESPACE(poly_sub) -void poly_sub(poly *r, const poly *a, const poly *b); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/polyvec.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/polyvec.c deleted file mode 100644 index a0174b7b3f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/polyvec.c +++ /dev/null @@ -1,307 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "polyvec.h" -#include "poly.h" -#include "ntt.h" -#include "consts.h" - -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) -static void poly_compress10(uint8_t r[320], const poly * restrict a) -{ - unsigned int i; - __m256i f0, f1, f2; - __m128i t0, t1; - const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); - const __m256i v8 = _mm256_slli_epi16(v,3); - const __m256i off = _mm256_set1_epi16(15); - const __m256i shift1 = _mm256_set1_epi16(1 << 12); - const __m256i mask = _mm256_set1_epi16(1023); - const __m256i shift2 = _mm256_set1_epi64x((1024LL << 48) + (1LL << 32) + (1024 << 16) + 1); - const __m256i sllvdidx = _mm256_set1_epi64x(12); - const __m256i shufbidx = _mm256_set_epi8( 8, 4, 3, 2, 1, 0,-1,-1,-1,-1,-1,-1,12,11,10, 9, - -1,-1,-1,-1,-1,-1,12,11,10, 9, 8, 4, 3, 2, 1, 0); - - for(i=0;ivec[i]); - f1 = _mm256_mullo_epi16(f0,v8); - f2 = _mm256_add_epi16(f0,off); - f0 = _mm256_slli_epi16(f0,3); - f0 = _mm256_mulhi_epi16(f0,v); - f2 = _mm256_sub_epi16(f1,f2); - f1 = _mm256_andnot_si256(f1,f2); - f1 = _mm256_srli_epi16(f1,15); - f0 = _mm256_sub_epi16(f0,f1); - f0 = _mm256_mulhrs_epi16(f0,shift1); - f0 = _mm256_and_si256(f0,mask); - f0 = _mm256_madd_epi16(f0,shift2); - f0 = _mm256_sllv_epi32(f0,sllvdidx); - f0 = _mm256_srli_epi64(f0,12); - f0 = _mm256_shuffle_epi8(f0,shufbidx); - t0 = _mm256_castsi256_si128(f0); - t1 = _mm256_extracti128_si256(f0,1); - t0 = _mm_blend_epi16(t0,t1,0xE0); - _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0); - memcpy(&r[20*i+16],&t1,4); - } -} - -static void poly_decompress10(poly * restrict r, const uint8_t a[320+12]) -{ - unsigned int i; - __m256i f; - const __m256i q = _mm256_set1_epi32((KYBER_Q << 16) + 4*KYBER_Q); - const __m256i shufbidx = _mm256_set_epi8(11,10,10, 9, 9, 8, 8, 7, - 6, 5, 5, 4, 4, 3, 3, 2, - 9, 8, 8, 7, 7, 6, 6, 5, - 4, 3, 3, 2, 2, 1, 1, 0); - const __m256i sllvdidx = _mm256_set1_epi64x(4); - const __m256i mask = _mm256_set1_epi32((32736 << 16) + 8184); - - for(i=0;ivec[i],f); - } -} - -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) -static void poly_compress11(uint8_t r[352+2], const poly * restrict a) -{ - unsigned int i; - __m256i f0, f1, f2; - __m128i t0, t1; - const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); - const __m256i v8 = _mm256_slli_epi16(v,3); - const __m256i off = _mm256_set1_epi16(36); - const __m256i shift1 = _mm256_set1_epi16(1 << 13); - const __m256i mask = _mm256_set1_epi16(2047); - const __m256i shift2 = _mm256_set1_epi64x((2048LL << 48) + (1LL << 32) + (2048 << 16) + 1); - const __m256i sllvdidx = _mm256_set1_epi64x(10); - const __m256i srlvqidx = _mm256_set_epi64x(30,10,30,10); - const __m256i shufbidx = _mm256_set_epi8( 4, 3, 2, 1, 0, 0,-1,-1,-1,-1,10, 9, 8, 7, 6, 5, - -1,-1,-1,-1,-1,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - - for(i=0;ivec[i]); - f1 = _mm256_mullo_epi16(f0,v8); - f2 = _mm256_add_epi16(f0,off); - f0 = _mm256_slli_epi16(f0,3); - f0 = _mm256_mulhi_epi16(f0,v); - f2 = _mm256_sub_epi16(f1,f2); - f1 = _mm256_andnot_si256(f1,f2); - f1 = _mm256_srli_epi16(f1,15); - f0 = _mm256_sub_epi16(f0,f1); - f0 = _mm256_mulhrs_epi16(f0,shift1); - f0 = _mm256_and_si256(f0,mask); - f0 = _mm256_madd_epi16(f0,shift2); - f0 = _mm256_sllv_epi32(f0,sllvdidx); - f1 = _mm256_bsrli_epi128(f0,8); - f0 = _mm256_srlv_epi64(f0,srlvqidx); - f1 = _mm256_slli_epi64(f1,34); - f0 = _mm256_add_epi64(f0,f1); - f0 = _mm256_shuffle_epi8(f0,shufbidx); - t0 = _mm256_castsi256_si128(f0); - t1 = _mm256_extracti128_si256(f0,1); - t0 = _mm_blendv_epi8(t0,t1,_mm256_castsi256_si128(shufbidx)); - _mm_storeu_si128((__m128i *)&r[22*i+ 0],t0); - _mm_storel_epi64((__m128i *)&r[22*i+16],t1); - } -} - -static void poly_decompress11(poly * restrict r, const uint8_t a[352+10]) -{ - unsigned int i; - __m256i f; - const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); - const __m256i shufbidx = _mm256_set_epi8(13,12,12,11,10, 9, 9, 8, - 8, 7, 6, 5, 5, 4, 4, 3, - 10, 9, 9, 8, 7, 6, 6, 5, - 5, 4, 3, 2, 2, 1, 1, 0); - const __m256i srlvdidx = _mm256_set_epi32(0,0,1,0,0,0,1,0); - const __m256i srlvqidx = _mm256_set_epi64x(2,0,2,0); - const __m256i shift = _mm256_set_epi16(4,32,1,8,32,1,4,32,4,32,1,8,32,1,4,32); - const __m256i mask = _mm256_set1_epi16(32752); - - for(i=0;ivec[i],f); - } -} - -#endif - -/************************************************* -* Name: polyvec_compress -* -* Description: Compress and serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYVECCOMPRESSEDBYTES) -* - polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a) -{ - unsigned int i; - -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) - for(i=0;ivec[i]); -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) - for(i=0;ivec[i]); -#endif -} - -/************************************************* -* Name: polyvec_decompress -* -* Description: De-serialize and decompress vector of polynomials; -* approximate inverse of polyvec_compress -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const uint8_t *a: pointer to input byte array -* (of length KYBER_POLYVECCOMPRESSEDBYTES) -**************************************************/ -void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12]) -{ - unsigned int i; - -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) - for(i=0;ivec[i],&a[320*i]); -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) - for(i=0;ivec[i],&a[352*i]); -#endif -} - -/************************************************* -* Name: polyvec_tobytes -* -* Description: Serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYVECBYTES) -* - polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_frombytes -* -* Description: De-serialize vector of polynomials; -* inverse of polyvec_tobytes -* -* Arguments: - uint8_t *r: pointer to output byte array -* - const polyvec *a: pointer to input vector of polynomials -* (of length KYBER_POLYVECBYTES) -**************************************************/ -void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]) -{ - unsigned int i; - for(i=0;ivec[i], a+i*KYBER_POLYBYTES); -} - -/************************************************* -* Name: polyvec_ntt -* -* Description: Apply forward NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_ntt(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_invntt_tomont -* -* Description: Apply inverse NTT to all elements of a vector of polynomials -* and multiply by Montgomery factor 2^16 -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_invntt_tomont(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_basemul_acc_montgomery -* -* Description: Multiply elements in a and b in NTT domain, accumulate into r, -* and multiply by 2^-16. -* -* Arguments: - poly *r: pointer to output polynomial -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) -{ - unsigned int i; - poly tmp; - - poly_basemul_montgomery(r,&a->vec[0],&b->vec[0]); - for(i=1;ivec[i],&b->vec[i]); - poly_add(r,r,&tmp); - } -} - -/************************************************* -* Name: polyvec_reduce -* -* Description: Applies Barrett reduction to each coefficient -* of each element of a vector of polynomials; -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - polyvec *r: pointer to input/output polynomial -**************************************************/ -void polyvec_reduce(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_add -* -* Description: Add vectors of polynomials -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) -{ - unsigned int i; - for(i=0;ivec[i], &a->vec[i], &b->vec[i]); -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/polyvec.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/polyvec.h deleted file mode 100644 index 2ce23c31ff..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/polyvec.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef POLYVEC_H -#define POLYVEC_H - -#include -#include "params.h" -#include "poly.h" - -typedef struct{ - poly vec[KYBER_K]; -} polyvec; - -#define polyvec_compress KYBER_NAMESPACE(polyvec_compress) -void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a); -#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress) -void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12]); - -#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes) -void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a); -#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes) -void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]); - -#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt) -void polyvec_ntt(polyvec *r); -#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont) -void polyvec_invntt_tomont(polyvec *r); - -#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery) -void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b); - -#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce) -void polyvec_reduce(polyvec *r); - -#define polyvec_add KYBER_NAMESPACE(polyvec_add) -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/reduce.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/reduce.h deleted file mode 100644 index 5368185b5f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/reduce.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef REDUCE_H -#define REDUCE_H - -#include "params.h" -#include - -#define reduce_avx KYBER_NAMESPACE(reduce_avx) -void reduce_avx(__m256i *r, const __m256i *qdata); -#define tomont_avx KYBER_NAMESPACE(tomont_avx) -void tomont_avx(__m256i *r, const __m256i *qdata); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/rejsample.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/rejsample.c deleted file mode 100644 index 9060a44cb9..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/rejsample.c +++ /dev/null @@ -1,398 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "consts.h" -#include "rejsample.h" - -//#define BMI - -#ifndef BMI -static const uint8_t idx[256][8] = { - {-1, -1, -1, -1, -1, -1, -1, -1}, - { 0, -1, -1, -1, -1, -1, -1, -1}, - { 2, -1, -1, -1, -1, -1, -1, -1}, - { 0, 2, -1, -1, -1, -1, -1, -1}, - { 4, -1, -1, -1, -1, -1, -1, -1}, - { 0, 4, -1, -1, -1, -1, -1, -1}, - { 2, 4, -1, -1, -1, -1, -1, -1}, - { 0, 2, 4, -1, -1, -1, -1, -1}, - { 6, -1, -1, -1, -1, -1, -1, -1}, - { 0, 6, -1, -1, -1, -1, -1, -1}, - { 2, 6, -1, -1, -1, -1, -1, -1}, - { 0, 2, 6, -1, -1, -1, -1, -1}, - { 4, 6, -1, -1, -1, -1, -1, -1}, - { 0, 4, 6, -1, -1, -1, -1, -1}, - { 2, 4, 6, -1, -1, -1, -1, -1}, - { 0, 2, 4, 6, -1, -1, -1, -1}, - { 8, -1, -1, -1, -1, -1, -1, -1}, - { 0, 8, -1, -1, -1, -1, -1, -1}, - { 2, 8, -1, -1, -1, -1, -1, -1}, - { 0, 2, 8, -1, -1, -1, -1, -1}, - { 4, 8, -1, -1, -1, -1, -1, -1}, - { 0, 4, 8, -1, -1, -1, -1, -1}, - { 2, 4, 8, -1, -1, -1, -1, -1}, - { 0, 2, 4, 8, -1, -1, -1, -1}, - { 6, 8, -1, -1, -1, -1, -1, -1}, - { 0, 6, 8, -1, -1, -1, -1, -1}, - { 2, 6, 8, -1, -1, -1, -1, -1}, - { 0, 2, 6, 8, -1, -1, -1, -1}, - { 4, 6, 8, -1, -1, -1, -1, -1}, - { 0, 4, 6, 8, -1, -1, -1, -1}, - { 2, 4, 6, 8, -1, -1, -1, -1}, - { 0, 2, 4, 6, 8, -1, -1, -1}, - {10, -1, -1, -1, -1, -1, -1, -1}, - { 0, 10, -1, -1, -1, -1, -1, -1}, - { 2, 10, -1, -1, -1, -1, -1, -1}, - { 0, 2, 10, -1, -1, -1, -1, -1}, - { 4, 10, -1, -1, -1, -1, -1, -1}, - { 0, 4, 10, -1, -1, -1, -1, -1}, - { 2, 4, 10, -1, -1, -1, -1, -1}, - { 0, 2, 4, 10, -1, -1, -1, -1}, - { 6, 10, -1, -1, -1, -1, -1, -1}, - { 0, 6, 10, -1, -1, -1, -1, -1}, - { 2, 6, 10, -1, -1, -1, -1, -1}, - { 0, 2, 6, 10, -1, -1, -1, -1}, - { 4, 6, 10, -1, -1, -1, -1, -1}, - { 0, 4, 6, 10, -1, -1, -1, -1}, - { 2, 4, 6, 10, -1, -1, -1, -1}, - { 0, 2, 4, 6, 10, -1, -1, -1}, - { 8, 10, -1, -1, -1, -1, -1, -1}, - { 0, 8, 10, -1, -1, -1, -1, -1}, - { 2, 8, 10, -1, -1, -1, -1, -1}, - { 0, 2, 8, 10, -1, -1, -1, -1}, - { 4, 8, 10, -1, -1, -1, -1, -1}, - { 0, 4, 8, 10, -1, -1, -1, -1}, - { 2, 4, 8, 10, -1, -1, -1, -1}, - { 0, 2, 4, 8, 10, -1, -1, -1}, - { 6, 8, 10, -1, -1, -1, -1, -1}, - { 0, 6, 8, 10, -1, -1, -1, -1}, - { 2, 6, 8, 10, -1, -1, -1, -1}, - { 0, 2, 6, 8, 10, -1, -1, -1}, - { 4, 6, 8, 10, -1, -1, -1, -1}, - { 0, 4, 6, 8, 10, -1, -1, -1}, - { 2, 4, 6, 8, 10, -1, -1, -1}, - { 0, 2, 4, 6, 8, 10, -1, -1}, - {12, -1, -1, -1, -1, -1, -1, -1}, - { 0, 12, -1, -1, -1, -1, -1, -1}, - { 2, 12, -1, -1, -1, -1, -1, -1}, - { 0, 2, 12, -1, -1, -1, -1, -1}, - { 4, 12, -1, -1, -1, -1, -1, -1}, - { 0, 4, 12, -1, -1, -1, -1, -1}, - { 2, 4, 12, -1, -1, -1, -1, -1}, - { 0, 2, 4, 12, -1, -1, -1, -1}, - { 6, 12, -1, -1, -1, -1, -1, -1}, - { 0, 6, 12, -1, -1, -1, -1, -1}, - { 2, 6, 12, -1, -1, -1, -1, -1}, - { 0, 2, 6, 12, -1, -1, -1, -1}, - { 4, 6, 12, -1, -1, -1, -1, -1}, - { 0, 4, 6, 12, -1, -1, -1, -1}, - { 2, 4, 6, 12, -1, -1, -1, -1}, - { 0, 2, 4, 6, 12, -1, -1, -1}, - { 8, 12, -1, -1, -1, -1, -1, -1}, - { 0, 8, 12, -1, -1, -1, -1, -1}, - { 2, 8, 12, -1, -1, -1, -1, -1}, - { 0, 2, 8, 12, -1, -1, -1, -1}, - { 4, 8, 12, -1, -1, -1, -1, -1}, - { 0, 4, 8, 12, -1, -1, -1, -1}, - { 2, 4, 8, 12, -1, -1, -1, -1}, - { 0, 2, 4, 8, 12, -1, -1, -1}, - { 6, 8, 12, -1, -1, -1, -1, -1}, - { 0, 6, 8, 12, -1, -1, -1, -1}, - { 2, 6, 8, 12, -1, -1, -1, -1}, - { 0, 2, 6, 8, 12, -1, -1, -1}, - { 4, 6, 8, 12, -1, -1, -1, -1}, - { 0, 4, 6, 8, 12, -1, -1, -1}, - { 2, 4, 6, 8, 12, -1, -1, -1}, - { 0, 2, 4, 6, 8, 12, -1, -1}, - {10, 12, -1, -1, -1, -1, -1, -1}, - { 0, 10, 12, -1, -1, -1, -1, -1}, - { 2, 10, 12, -1, -1, -1, -1, -1}, - { 0, 2, 10, 12, -1, -1, -1, -1}, - { 4, 10, 12, -1, -1, -1, -1, -1}, - { 0, 4, 10, 12, -1, -1, -1, -1}, - { 2, 4, 10, 12, -1, -1, -1, -1}, - { 0, 2, 4, 10, 12, -1, -1, -1}, - { 6, 10, 12, -1, -1, -1, -1, -1}, - { 0, 6, 10, 12, -1, -1, -1, -1}, - { 2, 6, 10, 12, -1, -1, -1, -1}, - { 0, 2, 6, 10, 12, -1, -1, -1}, - { 4, 6, 10, 12, -1, -1, -1, -1}, - { 0, 4, 6, 10, 12, -1, -1, -1}, - { 2, 4, 6, 10, 12, -1, -1, -1}, - { 0, 2, 4, 6, 10, 12, -1, -1}, - { 8, 10, 12, -1, -1, -1, -1, -1}, - { 0, 8, 10, 12, -1, -1, -1, -1}, - { 2, 8, 10, 12, -1, -1, -1, -1}, - { 0, 2, 8, 10, 12, -1, -1, -1}, - { 4, 8, 10, 12, -1, -1, -1, -1}, - { 0, 4, 8, 10, 12, -1, -1, -1}, - { 2, 4, 8, 10, 12, -1, -1, -1}, - { 0, 2, 4, 8, 10, 12, -1, -1}, - { 6, 8, 10, 12, -1, -1, -1, -1}, - { 0, 6, 8, 10, 12, -1, -1, -1}, - { 2, 6, 8, 10, 12, -1, -1, -1}, - { 0, 2, 6, 8, 10, 12, -1, -1}, - { 4, 6, 8, 10, 12, -1, -1, -1}, - { 0, 4, 6, 8, 10, 12, -1, -1}, - { 2, 4, 6, 8, 10, 12, -1, -1}, - { 0, 2, 4, 6, 8, 10, 12, -1}, - {14, -1, -1, -1, -1, -1, -1, -1}, - { 0, 14, -1, -1, -1, -1, -1, -1}, - { 2, 14, -1, -1, -1, -1, -1, -1}, - { 0, 2, 14, -1, -1, -1, -1, -1}, - { 4, 14, -1, -1, -1, -1, -1, -1}, - { 0, 4, 14, -1, -1, -1, -1, -1}, - { 2, 4, 14, -1, -1, -1, -1, -1}, - { 0, 2, 4, 14, -1, -1, -1, -1}, - { 6, 14, -1, -1, -1, -1, -1, -1}, - { 0, 6, 14, -1, -1, -1, -1, -1}, - { 2, 6, 14, -1, -1, -1, -1, -1}, - { 0, 2, 6, 14, -1, -1, -1, -1}, - { 4, 6, 14, -1, -1, -1, -1, -1}, - { 0, 4, 6, 14, -1, -1, -1, -1}, - { 2, 4, 6, 14, -1, -1, -1, -1}, - { 0, 2, 4, 6, 14, -1, -1, -1}, - { 8, 14, -1, -1, -1, -1, -1, -1}, - { 0, 8, 14, -1, -1, -1, -1, -1}, - { 2, 8, 14, -1, -1, -1, -1, -1}, - { 0, 2, 8, 14, -1, -1, -1, -1}, - { 4, 8, 14, -1, -1, -1, -1, -1}, - { 0, 4, 8, 14, -1, -1, -1, -1}, - { 2, 4, 8, 14, -1, -1, -1, -1}, - { 0, 2, 4, 8, 14, -1, -1, -1}, - { 6, 8, 14, -1, -1, -1, -1, -1}, - { 0, 6, 8, 14, -1, -1, -1, -1}, - { 2, 6, 8, 14, -1, -1, -1, -1}, - { 0, 2, 6, 8, 14, -1, -1, -1}, - { 4, 6, 8, 14, -1, -1, -1, -1}, - { 0, 4, 6, 8, 14, -1, -1, -1}, - { 2, 4, 6, 8, 14, -1, -1, -1}, - { 0, 2, 4, 6, 8, 14, -1, -1}, - {10, 14, -1, -1, -1, -1, -1, -1}, - { 0, 10, 14, -1, -1, -1, -1, -1}, - { 2, 10, 14, -1, -1, -1, -1, -1}, - { 0, 2, 10, 14, -1, -1, -1, -1}, - { 4, 10, 14, -1, -1, -1, -1, -1}, - { 0, 4, 10, 14, -1, -1, -1, -1}, - { 2, 4, 10, 14, -1, -1, -1, -1}, - { 0, 2, 4, 10, 14, -1, -1, -1}, - { 6, 10, 14, -1, -1, -1, -1, -1}, - { 0, 6, 10, 14, -1, -1, -1, -1}, - { 2, 6, 10, 14, -1, -1, -1, -1}, - { 0, 2, 6, 10, 14, -1, -1, -1}, - { 4, 6, 10, 14, -1, -1, -1, -1}, - { 0, 4, 6, 10, 14, -1, -1, -1}, - { 2, 4, 6, 10, 14, -1, -1, -1}, - { 0, 2, 4, 6, 10, 14, -1, -1}, - { 8, 10, 14, -1, -1, -1, -1, -1}, - { 0, 8, 10, 14, -1, -1, -1, -1}, - { 2, 8, 10, 14, -1, -1, -1, -1}, - { 0, 2, 8, 10, 14, -1, -1, -1}, - { 4, 8, 10, 14, -1, -1, -1, -1}, - { 0, 4, 8, 10, 14, -1, -1, -1}, - { 2, 4, 8, 10, 14, -1, -1, -1}, - { 0, 2, 4, 8, 10, 14, -1, -1}, - { 6, 8, 10, 14, -1, -1, -1, -1}, - { 0, 6, 8, 10, 14, -1, -1, -1}, - { 2, 6, 8, 10, 14, -1, -1, -1}, - { 0, 2, 6, 8, 10, 14, -1, -1}, - { 4, 6, 8, 10, 14, -1, -1, -1}, - { 0, 4, 6, 8, 10, 14, -1, -1}, - { 2, 4, 6, 8, 10, 14, -1, -1}, - { 0, 2, 4, 6, 8, 10, 14, -1}, - {12, 14, -1, -1, -1, -1, -1, -1}, - { 0, 12, 14, -1, -1, -1, -1, -1}, - { 2, 12, 14, -1, -1, -1, -1, -1}, - { 0, 2, 12, 14, -1, -1, -1, -1}, - { 4, 12, 14, -1, -1, -1, -1, -1}, - { 0, 4, 12, 14, -1, -1, -1, -1}, - { 2, 4, 12, 14, -1, -1, -1, -1}, - { 0, 2, 4, 12, 14, -1, -1, -1}, - { 6, 12, 14, -1, -1, -1, -1, -1}, - { 0, 6, 12, 14, -1, -1, -1, -1}, - { 2, 6, 12, 14, -1, -1, -1, -1}, - { 0, 2, 6, 12, 14, -1, -1, -1}, - { 4, 6, 12, 14, -1, -1, -1, -1}, - { 0, 4, 6, 12, 14, -1, -1, -1}, - { 2, 4, 6, 12, 14, -1, -1, -1}, - { 0, 2, 4, 6, 12, 14, -1, -1}, - { 8, 12, 14, -1, -1, -1, -1, -1}, - { 0, 8, 12, 14, -1, -1, -1, -1}, - { 2, 8, 12, 14, -1, -1, -1, -1}, - { 0, 2, 8, 12, 14, -1, -1, -1}, - { 4, 8, 12, 14, -1, -1, -1, -1}, - { 0, 4, 8, 12, 14, -1, -1, -1}, - { 2, 4, 8, 12, 14, -1, -1, -1}, - { 0, 2, 4, 8, 12, 14, -1, -1}, - { 6, 8, 12, 14, -1, -1, -1, -1}, - { 0, 6, 8, 12, 14, -1, -1, -1}, - { 2, 6, 8, 12, 14, -1, -1, -1}, - { 0, 2, 6, 8, 12, 14, -1, -1}, - { 4, 6, 8, 12, 14, -1, -1, -1}, - { 0, 4, 6, 8, 12, 14, -1, -1}, - { 2, 4, 6, 8, 12, 14, -1, -1}, - { 0, 2, 4, 6, 8, 12, 14, -1}, - {10, 12, 14, -1, -1, -1, -1, -1}, - { 0, 10, 12, 14, -1, -1, -1, -1}, - { 2, 10, 12, 14, -1, -1, -1, -1}, - { 0, 2, 10, 12, 14, -1, -1, -1}, - { 4, 10, 12, 14, -1, -1, -1, -1}, - { 0, 4, 10, 12, 14, -1, -1, -1}, - { 2, 4, 10, 12, 14, -1, -1, -1}, - { 0, 2, 4, 10, 12, 14, -1, -1}, - { 6, 10, 12, 14, -1, -1, -1, -1}, - { 0, 6, 10, 12, 14, -1, -1, -1}, - { 2, 6, 10, 12, 14, -1, -1, -1}, - { 0, 2, 6, 10, 12, 14, -1, -1}, - { 4, 6, 10, 12, 14, -1, -1, -1}, - { 0, 4, 6, 10, 12, 14, -1, -1}, - { 2, 4, 6, 10, 12, 14, -1, -1}, - { 0, 2, 4, 6, 10, 12, 14, -1}, - { 8, 10, 12, 14, -1, -1, -1, -1}, - { 0, 8, 10, 12, 14, -1, -1, -1}, - { 2, 8, 10, 12, 14, -1, -1, -1}, - { 0, 2, 8, 10, 12, 14, -1, -1}, - { 4, 8, 10, 12, 14, -1, -1, -1}, - { 0, 4, 8, 10, 12, 14, -1, -1}, - { 2, 4, 8, 10, 12, 14, -1, -1}, - { 0, 2, 4, 8, 10, 12, 14, -1}, - { 6, 8, 10, 12, 14, -1, -1, -1}, - { 0, 6, 8, 10, 12, 14, -1, -1}, - { 2, 6, 8, 10, 12, 14, -1, -1}, - { 0, 2, 6, 8, 10, 12, 14, -1}, - { 4, 6, 8, 10, 12, 14, -1, -1}, - { 0, 4, 6, 8, 10, 12, 14, -1}, - { 2, 4, 6, 8, 10, 12, 14, -1}, - { 0, 2, 4, 6, 8, 10, 12, 14} -}; -#endif - -#define _mm256_cmpge_epu16(a, b) _mm256_cmpeq_epi16(_mm256_max_epu16(a, b), a) -#define _mm_cmpge_epu16(a, b) _mm_cmpeq_epi16(_mm_max_epu16(a, b), a) - -unsigned int rej_uniform_avx(int16_t * restrict r, const uint8_t *buf) -{ - unsigned int ctr, pos; - uint16_t val0, val1; - uint32_t good; -#ifdef BMI - uint64_t idx0, idx1, idx2, idx3; -#endif - const __m256i bound = _mm256_load_si256(&qdata.vec[_16XQ/16]); - const __m256i ones = _mm256_set1_epi8(1); - const __m256i mask = _mm256_set1_epi16(0xFFF); - const __m256i idx8 = _mm256_set_epi8(15,14,14,13,12,11,11,10, - 9, 8, 8, 7, 6, 5, 5, 4, - 11,10,10, 9, 8, 7, 7, 6, - 5, 4, 4, 3, 2, 1, 1, 0); - __m256i f0, f1, g0, g1, g2, g3; - __m128i f, t, pilo, pihi; - - ctr = pos = 0; - while(ctr <= KYBER_N - 32 && pos <= REJ_UNIFORM_AVX_BUFLEN - 56) { - f0 = _mm256_loadu_si256((__m256i *)&buf[pos]); - f1 = _mm256_loadu_si256((__m256i *)&buf[pos+24]); - f0 = _mm256_permute4x64_epi64(f0, 0x94); - f1 = _mm256_permute4x64_epi64(f1, 0x94); - f0 = _mm256_shuffle_epi8(f0, idx8); - f1 = _mm256_shuffle_epi8(f1, idx8); - g0 = _mm256_srli_epi16(f0, 4); - g1 = _mm256_srli_epi16(f1, 4); - f0 = _mm256_blend_epi16(f0, g0, 0xAA); - f1 = _mm256_blend_epi16(f1, g1, 0xAA); - f0 = _mm256_and_si256(f0, mask); - f1 = _mm256_and_si256(f1, mask); - pos += 48; - - g0 = _mm256_cmpgt_epi16(bound, f0); - g1 = _mm256_cmpgt_epi16(bound, f1); - - g0 = _mm256_packs_epi16(g0, g1); - good = _mm256_movemask_epi8(g0); - -#ifdef BMI - idx0 = _pdep_u64(good >> 0, 0x0101010101010101); - idx1 = _pdep_u64(good >> 8, 0x0101010101010101); - idx2 = _pdep_u64(good >> 16, 0x0101010101010101); - idx3 = _pdep_u64(good >> 24, 0x0101010101010101); - idx0 = (idx0 << 8) - idx0; - idx0 = _pext_u64(0x0E0C0A0806040200, idx0); - idx1 = (idx1 << 8) - idx1; - idx1 = _pext_u64(0x0E0C0A0806040200, idx1); - idx2 = (idx2 << 8) - idx2; - idx2 = _pext_u64(0x0E0C0A0806040200, idx2); - idx3 = (idx3 << 8) - idx3; - idx3 = _pext_u64(0x0E0C0A0806040200, idx3); - - g0 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx0)); - g1 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx1)); - g0 = _mm256_inserti128_si256(g0, _mm_cvtsi64_si128(idx2), 1); - g1 = _mm256_inserti128_si256(g1, _mm_cvtsi64_si128(idx3), 1); -#else - g0 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 0) & 0xFF])); - g1 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 8) & 0xFF])); - g0 = _mm256_inserti128_si256(g0, _mm_loadl_epi64((__m128i *)&idx[(good >> 16) & 0xFF]), 1); - g1 = _mm256_inserti128_si256(g1, _mm_loadl_epi64((__m128i *)&idx[(good >> 24) & 0xFF]), 1); -#endif - - g2 = _mm256_add_epi8(g0, ones); - g3 = _mm256_add_epi8(g1, ones); - g0 = _mm256_unpacklo_epi8(g0, g2); - g1 = _mm256_unpacklo_epi8(g1, g3); - - f0 = _mm256_shuffle_epi8(f0, g0); - f1 = _mm256_shuffle_epi8(f1, g1); - - _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f0)); - ctr += _mm_popcnt_u32((good >> 0) & 0xFF); - _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f0, 1)); - ctr += _mm_popcnt_u32((good >> 16) & 0xFF); - _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f1)); - ctr += _mm_popcnt_u32((good >> 8) & 0xFF); - _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f1, 1)); - ctr += _mm_popcnt_u32((good >> 24) & 0xFF); - } - - while(ctr <= KYBER_N - 8 && pos <= REJ_UNIFORM_AVX_BUFLEN - 16) { - f = _mm_loadu_si128((__m128i *)&buf[pos]); - f = _mm_shuffle_epi8(f, _mm256_castsi256_si128(idx8)); - t = _mm_srli_epi16(f, 4); - f = _mm_blend_epi16(f, t, 0xAA); - f = _mm_and_si128(f, _mm256_castsi256_si128(mask)); - pos += 12; - - t = _mm_cmpgt_epi16(_mm256_castsi256_si128(bound), f); - good = _mm_movemask_epi8(t); - -#ifdef BMI - good &= 0x5555; - idx0 = _pdep_u64(good, 0x1111111111111111); - idx0 = (idx0 << 8) - idx0; - idx0 = _pext_u64(0x0E0C0A0806040200, idx0); - pilo = _mm_cvtsi64_si128(idx0); -#else - good = _pext_u32(good, 0x5555); - pilo = _mm_loadl_epi64((__m128i *)&idx[good]); -#endif - - pihi = _mm_add_epi8(pilo, _mm256_castsi256_si128(ones)); - pilo = _mm_unpacklo_epi8(pilo, pihi); - f = _mm_shuffle_epi8(f, pilo); - _mm_storeu_si128((__m128i *)&r[ctr], f); - ctr += _mm_popcnt_u32(good); - } - - while(ctr < KYBER_N && pos <= REJ_UNIFORM_AVX_BUFLEN - 3) { - val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; - val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)); - pos += 3; - - if(val0 < KYBER_Q) - r[ctr++] = val0; - if(val1 < KYBER_Q && ctr < KYBER_N) - r[ctr++] = val1; - } - - return ctr; -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/rejsample.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/rejsample.h deleted file mode 100644 index 3be5e2192e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/rejsample.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef REJSAMPLE_H -#define REJSAMPLE_H - -#include -#include "params.h" -#include "symmetric.h" - -#define REJ_UNIFORM_AVX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES) -#define REJ_UNIFORM_AVX_BUFLEN (REJ_UNIFORM_AVX_NBLOCKS*XOF_BLOCKBYTES) - -#define rej_uniform_avx KYBER_NAMESPACE(rej_uniform_avx) -unsigned int rej_uniform_avx(int16_t *r, const uint8_t *buf); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/shuffle.S b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/shuffle.S deleted file mode 100644 index 18325ebec0..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/shuffle.S +++ /dev/null @@ -1,255 +0,0 @@ -#include "consts.h" -.include "fq.inc" -.include "shuffle.inc" - -/* -nttpack_avx: -#load -vmovdqa (%rdi),%ymm4 -vmovdqa 32(%rdi),%ymm5 -vmovdqa 64(%rdi),%ymm6 -vmovdqa 96(%rdi),%ymm7 -vmovdqa 128(%rdi),%ymm8 -vmovdqa 160(%rdi),%ymm9 -vmovdqa 192(%rdi),%ymm10 -vmovdqa 224(%rdi),%ymm11 - -shuffle1 4,5,3,5 -shuffle1 6,7,4,7 -shuffle1 8,9,6,9 -shuffle1 10,11,8,11 - -shuffle2 3,4,10,4 -shuffle2 6,8,3,8 -shuffle2 5,7,6,7 -shuffle2 9,11,5,11 - -shuffle4 10,3,9,3 -shuffle4 6,5,10,5 -shuffle4 4,8,6,8 -shuffle4 7,11,4,11 - -shuffle8 9,10,7,10 -shuffle8 6,4,9,4 -shuffle8 3,5,6,5 -shuffle8 8,11,3,11 - -#store -vmovdqa %ymm7,(%rdi) -vmovdqa %ymm9,32(%rdi) -vmovdqa %ymm6,64(%rdi) -vmovdqa %ymm3,96(%rdi) -vmovdqa %ymm10,128(%rdi) -vmovdqa %ymm4,160(%rdi) -vmovdqa %ymm5,192(%rdi) -vmovdqa %ymm11,224(%rdi) - -ret -*/ - -.text -nttunpack128_avx: -#load -vmovdqa (%rdi),%ymm4 -vmovdqa 32(%rdi),%ymm5 -vmovdqa 64(%rdi),%ymm6 -vmovdqa 96(%rdi),%ymm7 -vmovdqa 128(%rdi),%ymm8 -vmovdqa 160(%rdi),%ymm9 -vmovdqa 192(%rdi),%ymm10 -vmovdqa 224(%rdi),%ymm11 - -shuffle8 4,8,3,8 -shuffle8 5,9,4,9 -shuffle8 6,10,5,10 -shuffle8 7,11,6,11 - -shuffle4 3,5,7,5 -shuffle4 8,10,3,10 -shuffle4 4,6,8,6 -shuffle4 9,11,4,11 - -shuffle2 7,8,9,8 -shuffle2 5,6,7,6 -shuffle2 3,4,5,4 -shuffle2 10,11,3,11 - -shuffle1 9,5,10,5 -shuffle1 8,4,9,4 -shuffle1 7,3,8,3 -shuffle1 6,11,7,11 - -#store -vmovdqa %ymm10,(%rdi) -vmovdqa %ymm5,32(%rdi) -vmovdqa %ymm9,64(%rdi) -vmovdqa %ymm4,96(%rdi) -vmovdqa %ymm8,128(%rdi) -vmovdqa %ymm3,160(%rdi) -vmovdqa %ymm7,192(%rdi) -vmovdqa %ymm11,224(%rdi) - -ret - -.global cdecl(nttunpack_avx) -cdecl(nttunpack_avx): -call nttunpack128_avx -add $256,%rdi -call nttunpack128_avx -ret - -ntttobytes128_avx: -#load -vmovdqa (%rsi),%ymm5 -vmovdqa 32(%rsi),%ymm6 -vmovdqa 64(%rsi),%ymm7 -vmovdqa 96(%rsi),%ymm8 -vmovdqa 128(%rsi),%ymm9 -vmovdqa 160(%rsi),%ymm10 -vmovdqa 192(%rsi),%ymm11 -vmovdqa 224(%rsi),%ymm12 - -#csubq -csubq 5,13 -csubq 6,13 -csubq 7,13 -csubq 8,13 -csubq 9,13 -csubq 10,13 -csubq 11,13 -csubq 12,13 - -#bitpack -vpsllw $12,%ymm6,%ymm4 -vpor %ymm4,%ymm5,%ymm4 - -vpsrlw $4,%ymm6,%ymm5 -vpsllw $8,%ymm7,%ymm6 -vpor %ymm5,%ymm6,%ymm5 - -vpsrlw $8,%ymm7,%ymm6 -vpsllw $4,%ymm8,%ymm7 -vpor %ymm6,%ymm7,%ymm6 - -vpsllw $12,%ymm10,%ymm7 -vpor %ymm7,%ymm9,%ymm7 - -vpsrlw $4,%ymm10,%ymm8 -vpsllw $8,%ymm11,%ymm9 -vpor %ymm8,%ymm9,%ymm8 - -vpsrlw $8,%ymm11,%ymm9 -vpsllw $4,%ymm12,%ymm10 -vpor %ymm9,%ymm10,%ymm9 - -shuffle1 4,5,3,5 -shuffle1 6,7,4,7 -shuffle1 8,9,6,9 - -shuffle2 3,4,8,4 -shuffle2 6,5,3,5 -shuffle2 7,9,6,9 - -shuffle4 8,3,7,3 -shuffle4 6,4,8,4 -shuffle4 5,9,6,9 - -shuffle8 7,8,5,8 -shuffle8 6,3,7,3 -shuffle8 4,9,6,9 - -#store -vmovdqu %ymm5,(%rdi) -vmovdqu %ymm7,32(%rdi) -vmovdqu %ymm6,64(%rdi) -vmovdqu %ymm8,96(%rdi) -vmovdqu %ymm3,128(%rdi) -vmovdqu %ymm9,160(%rdi) - -ret - -.global cdecl(ntttobytes_avx) -cdecl(ntttobytes_avx): -#consts -vmovdqa _16XQ*2(%rdx),%ymm0 -call ntttobytes128_avx -add $256,%rsi -add $192,%rdi -call ntttobytes128_avx -ret - -nttfrombytes128_avx: -#load -vmovdqu (%rsi),%ymm4 -vmovdqu 32(%rsi),%ymm5 -vmovdqu 64(%rsi),%ymm6 -vmovdqu 96(%rsi),%ymm7 -vmovdqu 128(%rsi),%ymm8 -vmovdqu 160(%rsi),%ymm9 - -shuffle8 4,7,3,7 -shuffle8 5,8,4,8 -shuffle8 6,9,5,9 - -shuffle4 3,8,6,8 -shuffle4 7,5,3,5 -shuffle4 4,9,7,9 - -shuffle2 6,5,4,5 -shuffle2 8,7,6,7 -shuffle2 3,9,8,9 - -shuffle1 4,7,10,7 -shuffle1 5,8,4,8 -shuffle1 6,9,5,9 - -#bitunpack -vpsrlw $12,%ymm10,%ymm11 -vpsllw $4,%ymm7,%ymm12 -vpor %ymm11,%ymm12,%ymm11 -vpand %ymm0,%ymm10,%ymm10 -vpand %ymm0,%ymm11,%ymm11 - -vpsrlw $8,%ymm7,%ymm12 -vpsllw $8,%ymm4,%ymm13 -vpor %ymm12,%ymm13,%ymm12 -vpand %ymm0,%ymm12,%ymm12 - -vpsrlw $4,%ymm4,%ymm13 -vpand %ymm0,%ymm13,%ymm13 - -vpsrlw $12,%ymm8,%ymm14 -vpsllw $4,%ymm5,%ymm15 -vpor %ymm14,%ymm15,%ymm14 -vpand %ymm0,%ymm8,%ymm8 -vpand %ymm0,%ymm14,%ymm14 - -vpsrlw $8,%ymm5,%ymm15 -vpsllw $8,%ymm9,%ymm1 -vpor %ymm15,%ymm1,%ymm15 -vpand %ymm0,%ymm15,%ymm15 - -vpsrlw $4,%ymm9,%ymm1 -vpand %ymm0,%ymm1,%ymm1 - -#store -vmovdqa %ymm10,(%rdi) -vmovdqa %ymm11,32(%rdi) -vmovdqa %ymm12,64(%rdi) -vmovdqa %ymm13,96(%rdi) -vmovdqa %ymm8,128(%rdi) -vmovdqa %ymm14,160(%rdi) -vmovdqa %ymm15,192(%rdi) -vmovdqa %ymm1,224(%rdi) - -ret - -.global cdecl(nttfrombytes_avx) -cdecl(nttfrombytes_avx): -#consts -vmovdqa _16XMASK*2(%rdx),%ymm0 -call nttfrombytes128_avx -add $256,%rdi -add $192,%rsi -call nttfrombytes128_avx -ret diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/shuffle.inc b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/shuffle.inc deleted file mode 100644 index 73e9ffe03c..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/shuffle.inc +++ /dev/null @@ -1,25 +0,0 @@ -.macro shuffle8 r0,r1,r2,r3 -vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 -vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle4 r0,r1,r2,r3 -vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 -vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle2 r0,r1,r2,r3 -#vpsllq $32,%ymm\r1,%ymm\r2 -vmovsldup %ymm\r1,%ymm\r2 -vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 -vpsrlq $32,%ymm\r0,%ymm\r0 -#vmovshdup %ymm\r0,%ymm\r0 -vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle1 r0,r1,r2,r3 -vpslld $16,%ymm\r1,%ymm\r2 -vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 -vpsrld $16,%ymm\r0,%ymm\r0 -vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 -.endm diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/symmetric-shake.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/symmetric-shake.c deleted file mode 100644 index 20f451882e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/symmetric-shake.c +++ /dev/null @@ -1,74 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "symmetric.h" -#include "fips202.h" - -/************************************************* -* Name: kyber_shake128_absorb -* -* Description: Absorb step of the SHAKE128 specialized for the Kyber context. -* -* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state -* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state -* - uint8_t i: additional byte of input -* - uint8_t j: additional byte of input -**************************************************/ -void kyber_shake128_absorb(shake128incctx *state, - const uint8_t seed[KYBER_SYMBYTES], - uint8_t x, - uint8_t y) -{ - uint8_t extseed[KYBER_SYMBYTES+2]; - - memcpy(extseed, seed, KYBER_SYMBYTES); - extseed[KYBER_SYMBYTES+0] = x; - extseed[KYBER_SYMBYTES+1] = y; - - shake128_absorb_once(state, extseed, sizeof(extseed)); -} - -/************************************************* -* Name: kyber_shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - uint8_t *out: pointer to output -* - size_t outlen: number of requested output bytes -* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) -* - uint8_t nonce: single-byte nonce (public PRF input) -**************************************************/ -void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce) -{ - uint8_t extkey[KYBER_SYMBYTES+1]; - - memcpy(extkey, key, KYBER_SYMBYTES); - extkey[KYBER_SYMBYTES] = nonce; - - shake256(out, outlen, extkey, sizeof(extkey)); -} - -/************************************************* -* Name: kyber_shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - uint8_t *out: pointer to output -* - size_t outlen: number of requested output bytes -* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) -* - uint8_t nonce: single-byte nonce (public PRF input) -**************************************************/ -void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]) -{ - shake256incctx s; - - shake256_inc_init(&s); - shake256_inc_absorb(&s, key, KYBER_SYMBYTES); - shake256_inc_absorb(&s, input, KYBER_CIPHERTEXTBYTES); - shake256_inc_finalize(&s); - shake256_inc_squeeze(out, KYBER_SSBYTES, &s); - shake256_inc_ctx_release(&s); -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/symmetric.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/symmetric.h deleted file mode 100644 index e4941f7a86..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/symmetric.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef SYMMETRIC_H -#define SYMMETRIC_H - -#include -#include -#include "params.h" - -#include "fips202.h" -#include "fips202x4.h" - -typedef shake128incctx xof_state; - -#define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb) -void kyber_shake128_absorb(shake128incctx *s, - const uint8_t seed[KYBER_SYMBYTES], - uint8_t x, - uint8_t y); - -#define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf) -void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce); - -#define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf) -void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]); - -#define XOF_BLOCKBYTES SHAKE128_RATE - -#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) -#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) -#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y) -#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) -#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE) -#define rkprf(OUT, KEY, INPUT) kyber_shake256_rkprf(OUT, KEY, INPUT) - -#endif /* SYMMETRIC_H */ diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/verify.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/verify.c deleted file mode 100644 index 06243b837f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_avx2/verify.c +++ /dev/null @@ -1,83 +0,0 @@ -#include -#include -#include -#include "verify.h" - -/************************************************* -* Name: verify -* -* Description: Compare two arrays for equality in constant time. -* -* Arguments: const uint8_t *a: pointer to first byte array -* const uint8_t *b: pointer to second byte array -* size_t len: length of the byte arrays -* -* Returns 0 if the byte arrays are equal, 1 otherwise -**************************************************/ -int verify(const uint8_t *a, const uint8_t *b, size_t len) -{ - size_t i; - uint64_t r; - __m256i f, g, h; - - h = _mm256_setzero_si256(); - for(i=0;i> 63; - return r; -} - -/************************************************* -* Name: cmov -* -* Description: Copy len bytes from x to r if b is 1; -* don't modify x if b is 0. Requires b to be in {0,1}; -* assumes two's complement representation of negative integers. -* Runs in constant time. -* -* Arguments: uint8_t *r: pointer to output byte array -* const uint8_t *x: pointer to input byte array -* size_t len: Amount of bytes to be copied -* uint8_t b: Condition bit; has to be in {0,1} -**************************************************/ -void cmov(uint8_t * restrict r, const uint8_t *x, size_t len, uint8_t b) -{ - size_t i; - __m256i xvec, rvec, bvec; - -#if defined(__GNUC__) || defined(__clang__) - // Prevent the compiler from - // 1) inferring that b is 0/1-valued, and - // 2) handling the two cases with a branch. - // This is not necessary when verify.c and kem.c are separate translation - // units, but we expect that downstream consumers will copy this code and/or - // change how it is built. - __asm__("" : "+r"(b) : /* no inputs */); -#endif - - bvec = _mm256_set1_epi64x(-(uint64_t)b); - for(i=0;i -#include -#include "params.h" - -#define verify KYBER_NAMESPACE(verify) -int verify(const uint8_t *a, const uint8_t *b, size_t len); - -#define cmov KYBER_NAMESPACE(cmov) -void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); - -#define cmov_int16 KYBER_NAMESPACE(cmov_int16) -void cmov_int16(int16_t *r, int16_t v, uint16_t b); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/LICENSE b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/LICENSE deleted file mode 100644 index 7922ab8007..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/LICENSE +++ /dev/null @@ -1,6 +0,0 @@ -Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/); -or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html). - -For Keccak and AES we are using public-domain -code from sources and by authors listed in -comments on top of the respective files. diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/api.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/api.h deleted file mode 100644 index 70d40f3f3e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/api.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef API_H -#define API_H - -#include - -#define pqcrystals_kyber512_SECRETKEYBYTES 1632 -#define pqcrystals_kyber512_PUBLICKEYBYTES 800 -#define pqcrystals_kyber512_CIPHERTEXTBYTES 768 -#define pqcrystals_kyber512_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber512_ENCCOINBYTES 32 -#define pqcrystals_kyber512_BYTES 32 - -#define pqcrystals_kyber512_ref_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES -#define pqcrystals_kyber512_ref_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES -#define pqcrystals_kyber512_ref_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES -#define pqcrystals_kyber512_ref_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES -#define pqcrystals_kyber512_ref_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES -#define pqcrystals_kyber512_ref_BYTES pqcrystals_kyber512_BYTES - -int pqcrystals_kyber512_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber512_ref_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber512_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber512_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber512_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#define pqcrystals_kyber768_SECRETKEYBYTES 2400 -#define pqcrystals_kyber768_PUBLICKEYBYTES 1184 -#define pqcrystals_kyber768_CIPHERTEXTBYTES 1088 -#define pqcrystals_kyber768_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber768_ENCCOINBYTES 32 -#define pqcrystals_kyber768_BYTES 32 - -#define pqcrystals_kyber768_ref_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES -#define pqcrystals_kyber768_ref_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES -#define pqcrystals_kyber768_ref_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES -#define pqcrystals_kyber768_ref_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES -#define pqcrystals_kyber768_ref_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES -#define pqcrystals_kyber768_ref_BYTES pqcrystals_kyber768_BYTES - -int pqcrystals_kyber768_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber768_ref_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber768_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber768_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber768_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#define pqcrystals_kyber1024_SECRETKEYBYTES 3168 -#define pqcrystals_kyber1024_PUBLICKEYBYTES 1568 -#define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568 -#define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64 -#define pqcrystals_kyber1024_ENCCOINBYTES 32 -#define pqcrystals_kyber1024_BYTES 32 - -#define pqcrystals_kyber1024_ref_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES -#define pqcrystals_kyber1024_ref_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES -#define pqcrystals_kyber1024_ref_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES -#define pqcrystals_kyber1024_ref_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES -#define pqcrystals_kyber1024_ref_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES -#define pqcrystals_kyber1024_ref_BYTES pqcrystals_kyber1024_BYTES - -int pqcrystals_kyber1024_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); -int pqcrystals_kyber1024_ref_keypair(uint8_t *pk, uint8_t *sk); -int pqcrystals_kyber1024_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); -int pqcrystals_kyber1024_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); -int pqcrystals_kyber1024_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/cbd.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/cbd.c deleted file mode 100644 index 1500ffea56..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/cbd.c +++ /dev/null @@ -1,128 +0,0 @@ -#include -#include "params.h" -#include "cbd.h" - -/************************************************* -* Name: load32_littleendian -* -* Description: load 4 bytes into a 32-bit integer -* in little-endian order -* -* Arguments: - const uint8_t *x: pointer to input byte array -* -* Returns 32-bit unsigned integer loaded from x -**************************************************/ -static uint32_t load32_littleendian(const uint8_t x[4]) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - r |= (uint32_t)x[3] << 24; - return r; -} - -/************************************************* -* Name: load24_littleendian -* -* Description: load 3 bytes into a 32-bit integer -* in little-endian order. -* This function is only needed for Kyber-512 -* -* Arguments: - const uint8_t *x: pointer to input byte array -* -* Returns 32-bit unsigned integer loaded from x (most significant byte is zero) -**************************************************/ -#if KYBER_ETA1 == 3 -static uint32_t load24_littleendian(const uint8_t x[3]) -{ - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - return r; -} -#endif - - -/************************************************* -* Name: cbd2 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *buf: pointer to input byte array -**************************************************/ -static void cbd2(poly *r, const uint8_t buf[2*KYBER_N/4]) -{ - unsigned int i,j; - uint32_t t,d; - int16_t a,b; - - for(i=0;i>1) & 0x55555555; - - for(j=0;j<8;j++) { - a = (d >> (4*j+0)) & 0x3; - b = (d >> (4*j+2)) & 0x3; - r->coeffs[8*i+j] = a - b; - } - } -} - -/************************************************* -* Name: cbd3 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=3. -* This function is only needed for Kyber-512 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *buf: pointer to input byte array -**************************************************/ -#if KYBER_ETA1 == 3 -static void cbd3(poly *r, const uint8_t buf[3*KYBER_N/4]) -{ - unsigned int i,j; - uint32_t t,d; - int16_t a,b; - - for(i=0;i>1) & 0x00249249; - d += (t>>2) & 0x00249249; - - for(j=0;j<4;j++) { - a = (d >> (6*j+0)) & 0x7; - b = (d >> (6*j+3)) & 0x7; - r->coeffs[4*i+j] = a - b; - } - } -} -#endif - -void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]) -{ -#if KYBER_ETA1 == 2 - cbd2(r, buf); -#elif KYBER_ETA1 == 3 - cbd3(r, buf); -#else -#error "This implementation requires eta1 in {2,3}" -#endif -} - -void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]) -{ -#if KYBER_ETA2 == 2 - cbd2(r, buf); -#else -#error "This implementation requires eta2 = 2" -#endif -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/cbd.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/cbd.h deleted file mode 100644 index 7b677d745d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/cbd.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef CBD_H -#define CBD_H - -#include -#include "params.h" -#include "poly.h" - -#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1) -void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]); - -#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2) -void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/indcpa.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/indcpa.c deleted file mode 100644 index 726cfa985d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/indcpa.c +++ /dev/null @@ -1,334 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "indcpa.h" -#include "polyvec.h" -#include "poly.h" -#include "ntt.h" -#include "symmetric.h" -#include "randombytes.h" - -/************************************************* -* Name: pack_pk -* -* Description: Serialize the public key as concatenation of the -* serialized vector of polynomials pk -* and the public seed used to generate the matrix A. -* -* Arguments: uint8_t *r: pointer to the output serialized public key -* polyvec *pk: pointer to the input public-key polyvec -* const uint8_t *seed: pointer to the input public seed -**************************************************/ -static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES], - polyvec *pk, - const uint8_t seed[KYBER_SYMBYTES]) -{ - polyvec_tobytes(r, pk); - memcpy(r+KYBER_POLYVECBYTES, seed, KYBER_SYMBYTES); -} - -/************************************************* -* Name: unpack_pk -* -* Description: De-serialize public key from a byte array; -* approximate inverse of pack_pk -* -* Arguments: - polyvec *pk: pointer to output public-key polynomial vector -* - uint8_t *seed: pointer to output seed to generate matrix A -* - const uint8_t *packedpk: pointer to input serialized public key -**************************************************/ -static void unpack_pk(polyvec *pk, - uint8_t seed[KYBER_SYMBYTES], - const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES]) -{ - polyvec_frombytes(pk, packedpk); - memcpy(seed, packedpk+KYBER_POLYVECBYTES, KYBER_SYMBYTES); -} - -/************************************************* -* Name: pack_sk -* -* Description: Serialize the secret key -* -* Arguments: - uint8_t *r: pointer to output serialized secret key -* - polyvec *sk: pointer to input vector of polynomials (secret key) -**************************************************/ -static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk) -{ - polyvec_tobytes(r, sk); -} - -/************************************************* -* Name: unpack_sk -* -* Description: De-serialize the secret key; inverse of pack_sk -* -* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) -* - const uint8_t *packedsk: pointer to input serialized secret key -**************************************************/ -static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES]) -{ - polyvec_frombytes(sk, packedsk); -} - -/************************************************* -* Name: pack_ciphertext -* -* Description: Serialize the ciphertext as concatenation of the -* compressed and serialized vector of polynomials b -* and the compressed and serialized polynomial v -* -* Arguments: uint8_t *r: pointer to the output serialized ciphertext -* poly *pk: pointer to the input vector of polynomials b -* poly *v: pointer to the input polynomial v -**************************************************/ -static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v) -{ - polyvec_compress(r, b); - poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v); -} - -/************************************************* -* Name: unpack_ciphertext -* -* Description: De-serialize and decompress ciphertext from a byte array; -* approximate inverse of pack_ciphertext -* -* Arguments: - polyvec *b: pointer to the output vector of polynomials b -* - poly *v: pointer to the output polynomial v -* - const uint8_t *c: pointer to the input serialized ciphertext -**************************************************/ -static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES]) -{ - polyvec_decompress(b, c); - poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES); -} - -/************************************************* -* Name: rej_uniform -* -* Description: Run rejection sampling on uniform random bytes to generate -* uniform random integers mod q -* -* Arguments: - int16_t *r: pointer to output buffer -* - unsigned int len: requested number of 16-bit integers (uniform mod q) -* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes) -* - unsigned int buflen: length of input buffer in bytes -* -* Returns number of sampled 16-bit integers (at most len) -**************************************************/ -static unsigned int rej_uniform(int16_t *r, - unsigned int len, - const uint8_t *buf, - unsigned int buflen) -{ - unsigned int ctr, pos; - uint16_t val0, val1; - - ctr = pos = 0; - while(ctr < len && pos + 3 <= buflen) { - val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; - val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF; - pos += 3; - - if(val0 < KYBER_Q) - r[ctr++] = val0; - if(ctr < len && val1 < KYBER_Q) - r[ctr++] = val1; - } - - return ctr; -} - -#define gen_a(A,B) gen_matrix(A,B,0) -#define gen_at(A,B) gen_matrix(A,B,1) - -/************************************************* -* Name: gen_matrix -* -* Description: Deterministically generate matrix A (or the transpose of A) -* from a seed. Entries of the matrix are polynomials that look -* uniformly random. Performs rejection sampling on output of -* a XOF -* -* Arguments: - polyvec *a: pointer to ouptput matrix A -* - const uint8_t *seed: pointer to input seed -* - int transposed: boolean deciding whether A or A^T is generated -**************************************************/ -#if(XOF_BLOCKBYTES % 3) -#error "Implementation of gen_matrix assumes that XOF_BLOCKBYTES is a multiple of 3" -#endif - -#define GEN_MATRIX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES) -// Not static for benchmarking -void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed) -{ - unsigned int ctr, i, j; - unsigned int buflen; - uint8_t buf[GEN_MATRIX_NBLOCKS*XOF_BLOCKBYTES]; - xof_state state; - xof_init(&state, seed); - - for(i=0;i -#include "params.h" -#include "polyvec.h" - -#define gen_matrix KYBER_NAMESPACE(gen_matrix) -void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed); - -#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand) -void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], - uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], - const uint8_t coins[KYBER_SYMBYTES]); - -#define indcpa_enc KYBER_NAMESPACE(indcpa_enc) -void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], - const uint8_t m[KYBER_INDCPA_MSGBYTES], - const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], - const uint8_t coins[KYBER_SYMBYTES]); - -#define indcpa_dec KYBER_NAMESPACE(indcpa_dec) -void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES], - const uint8_t c[KYBER_INDCPA_BYTES], - const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/kem.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/kem.c deleted file mode 100644 index 63abc1029c..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/kem.c +++ /dev/null @@ -1,169 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "kem.h" -#include "indcpa.h" -#include "verify.h" -#include "symmetric.h" -#include "randombytes.h" -/************************************************* -* Name: crypto_kem_keypair_derand -* -* Description: Generates public and private key -* for CCA-secure Kyber key encapsulation mechanism -* -* Arguments: - uint8_t *pk: pointer to output public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* - uint8_t *coins: pointer to input randomness -* (an already allocated array filled with 2*KYBER_SYMBYTES random bytes) -** -* Returns 0 (success) -**************************************************/ -int crypto_kem_keypair_derand(uint8_t *pk, - uint8_t *sk, - const uint8_t *coins) -{ - indcpa_keypair_derand(pk, sk, coins); - memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_PUBLICKEYBYTES); - hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - /* Value z for pseudo-random output on reject */ - memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES); - return 0; -} - -/************************************************* -* Name: crypto_kem_keypair -* -* Description: Generates public and private key -* for CCA-secure Kyber key encapsulation mechanism -* -* Arguments: - uint8_t *pk: pointer to output public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int crypto_kem_keypair(uint8_t *pk, - uint8_t *sk) -{ - uint8_t coins[2*KYBER_SYMBYTES]; - randombytes(coins, 2*KYBER_SYMBYTES); - crypto_kem_keypair_derand(pk, sk, coins); - return 0; -} - -/************************************************* -* Name: crypto_kem_enc_derand -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - uint8_t *ct: pointer to output cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *pk: pointer to input public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* - const uint8_t *coins: pointer to input randomness -* (an already allocated array filled with KYBER_SYMBYTES random bytes) -** -* Returns 0 (success) -**************************************************/ -int crypto_kem_enc_derand(uint8_t *ct, - uint8_t *ss, - const uint8_t *pk, - const uint8_t *coins) -{ - uint8_t buf[2*KYBER_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2*KYBER_SYMBYTES]; - - memcpy(buf, coins, KYBER_SYMBYTES); - - /* Multitarget countermeasure for coins + contributory KEM */ - hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - hash_g(kr, buf, 2*KYBER_SYMBYTES); - - /* coins are in kr+KYBER_SYMBYTES */ - indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES); - - memcpy(ss,kr,KYBER_SYMBYTES); - return 0; -} - -/************************************************* -* Name: crypto_kem_enc -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - uint8_t *ct: pointer to output cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *pk: pointer to input public key -* (an already allocated array of KYBER_PUBLICKEYBYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int crypto_kem_enc(uint8_t *ct, - uint8_t *ss, - const uint8_t *pk) -{ - uint8_t coins[KYBER_SYMBYTES]; - randombytes(coins, KYBER_SYMBYTES); - crypto_kem_enc_derand(ct, ss, pk, coins); - return 0; -} - -/************************************************* -* Name: crypto_kem_dec -* -* Description: Generates shared secret for given -* cipher text and private key -* -* Arguments: - uint8_t *ss: pointer to output shared secret -* (an already allocated array of KYBER_SSBYTES bytes) -* - const uint8_t *ct: pointer to input cipher text -* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) -* - const uint8_t *sk: pointer to input private key -* (an already allocated array of KYBER_SECRETKEYBYTES bytes) -* -* Returns 0. -* -* On failure, ss will contain a pseudo-random value. -**************************************************/ -int crypto_kem_dec(uint8_t *ss, - const uint8_t *ct, - const uint8_t *sk) -{ - int fail; - uint8_t buf[2*KYBER_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2*KYBER_SYMBYTES]; - uint8_t cmp[KYBER_CIPHERTEXTBYTES+KYBER_SYMBYTES]; - const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES; - - indcpa_dec(buf, ct, sk); - - /* Multitarget countermeasure for coins + contributory KEM */ - memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES); - hash_g(kr, buf, 2*KYBER_SYMBYTES); - - /* coins are in kr+KYBER_SYMBYTES */ - indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES); - - fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES); - - /* Compute rejection key */ - rkprf(ss,sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES,ct); - - /* Copy true key to return buffer if fail is false */ - cmov(ss,kr,KYBER_SYMBYTES,!fail); - - return 0; -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/kem.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/kem.h deleted file mode 100644 index 234f11966b..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/kem.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef KEM_H -#define KEM_H - -#include -#include "params.h" - -#define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES -#define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES -#define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES -#define CRYPTO_BYTES KYBER_SSBYTES - -#if (KYBER_K == 2) -#define CRYPTO_ALGNAME "Kyber512" -#elif (KYBER_K == 3) -#define CRYPTO_ALGNAME "Kyber768" -#elif (KYBER_K == 4) -#define CRYPTO_ALGNAME "Kyber1024" -#endif - -#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand) -int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); - -#define crypto_kem_keypair KYBER_NAMESPACE(keypair) -int crypto_kem_keypair(uint8_t *pk, uint8_t *sk); - -#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand) -int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); - -#define crypto_kem_enc KYBER_NAMESPACE(enc) -int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); - -#define crypto_kem_dec KYBER_NAMESPACE(dec) -int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/ntt.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/ntt.c deleted file mode 100644 index 2f2eb10b2f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/ntt.c +++ /dev/null @@ -1,146 +0,0 @@ -#include -#include "params.h" -#include "ntt.h" -#include "reduce.h" - -/* Code to generate zetas and zetas_inv used in the number-theoretic transform: - -#define KYBER_ROOT_OF_UNITY 17 - -static const uint8_t tree[128] = { - 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, - 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, - 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, - 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, - 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, - 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, - 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, - 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127 -}; - -void init_ntt() { - unsigned int i; - int16_t tmp[128]; - - tmp[0] = MONT; - for(i=1;i<128;i++) - tmp[i] = fqmul(tmp[i-1],MONT*KYBER_ROOT_OF_UNITY % KYBER_Q); - - for(i=0;i<128;i++) { - zetas[i] = tmp[tree[i]]; - if(zetas[i] > KYBER_Q/2) - zetas[i] -= KYBER_Q; - if(zetas[i] < -KYBER_Q/2) - zetas[i] += KYBER_Q; - } -} -*/ - -const int16_t zetas[128] = { - -1044, -758, -359, -1517, 1493, 1422, 287, 202, - -171, 622, 1577, 182, 962, -1202, -1474, 1468, - 573, -1325, 264, 383, -829, 1458, -1602, -130, - -681, 1017, 732, 608, -1542, 411, -205, -1571, - 1223, 652, -552, 1015, -1293, 1491, -282, -1544, - 516, -8, -320, -666, -1618, -1162, 126, 1469, - -853, -90, -271, 830, 107, -1421, -247, -951, - -398, 961, -1508, -725, 448, -1065, 677, -1275, - -1103, 430, 555, 843, -1251, 871, 1550, 105, - 422, 587, 177, -235, -291, -460, 1574, 1653, - -246, 778, 1159, -147, -777, 1483, -602, 1119, - -1590, 644, -872, 349, 418, 329, -156, -75, - 817, 1097, 603, 610, 1322, -1285, -1465, 384, - -1215, -136, 1218, -1335, -874, 220, -1187, -1659, - -1185, -1530, -1278, 794, -1510, -854, -870, 478, - -108, -308, 996, 991, 958, -1460, 1522, 1628 -}; - -/************************************************* -* Name: fqmul -* -* Description: Multiplication followed by Montgomery reduction -* -* Arguments: - int16_t a: first factor -* - int16_t b: second factor -* -* Returns 16-bit integer congruent to a*b*R^{-1} mod q -**************************************************/ -static int16_t fqmul(int16_t a, int16_t b) { - return montgomery_reduce((int32_t)a*b); -} - -/************************************************* -* Name: ntt -* -* Description: Inplace number-theoretic transform (NTT) in Rq. -* input is in standard order, output is in bitreversed order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void ntt(int16_t r[256]) { - unsigned int len, start, j, k; - int16_t t, zeta; - - k = 1; - for(len = 128; len >= 2; len >>= 1) { - for(start = 0; start < 256; start = j + len) { - zeta = zetas[k++]; - for(j = start; j < start + len; j++) { - t = fqmul(zeta, r[j + len]); - r[j + len] = r[j] - t; - r[j] = r[j] + t; - } - } - } -} - -/************************************************* -* Name: invntt_tomont -* -* Description: Inplace inverse number-theoretic transform in Rq and -* multiplication by Montgomery factor 2^16. -* Input is in bitreversed order, output is in standard order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void invntt(int16_t r[256]) { - unsigned int start, len, j, k; - int16_t t, zeta; - const int16_t f = 1441; // mont^2/128 - - k = 127; - for(len = 2; len <= 128; len <<= 1) { - for(start = 0; start < 256; start = j + len) { - zeta = zetas[k--]; - for(j = start; j < start + len; j++) { - t = r[j]; - r[j] = barrett_reduce(t + r[j + len]); - r[j + len] = r[j + len] - t; - r[j + len] = fqmul(zeta, r[j + len]); - } - } - } - - for(j = 0; j < 256; j++) - r[j] = fqmul(r[j], f); -} - -/************************************************* -* Name: basemul -* -* Description: Multiplication of polynomials in Zq[X]/(X^2-zeta) -* used for multiplication of elements in Rq in NTT domain -* -* Arguments: - int16_t r[2]: pointer to the output polynomial -* - const int16_t a[2]: pointer to the first factor -* - const int16_t b[2]: pointer to the second factor -* - int16_t zeta: integer defining the reduction polynomial -**************************************************/ -void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) -{ - r[0] = fqmul(a[1], b[1]); - r[0] = fqmul(r[0], zeta); - r[0] += fqmul(a[0], b[0]); - r[1] = fqmul(a[0], b[1]); - r[1] += fqmul(a[1], b[0]); -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/ntt.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/ntt.h deleted file mode 100644 index 227ea74f08..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/ntt.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef NTT_H -#define NTT_H - -#include -#include "params.h" - -#define zetas KYBER_NAMESPACE(zetas) -extern const int16_t zetas[128]; - -#define ntt KYBER_NAMESPACE(ntt) -void ntt(int16_t poly[256]); - -#define invntt KYBER_NAMESPACE(invntt) -void invntt(int16_t poly[256]); - -#define basemul KYBER_NAMESPACE(basemul) -void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/params.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/params.h deleted file mode 100644 index fb4190b311..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/params.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef PARAMS_H -#define PARAMS_H - -#ifndef KYBER_K -#define KYBER_K 3 /* Change this for different security strengths */ -#endif - - -/* Don't change parameters below this line */ -#if (KYBER_K == 2) -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_512_ref_##s -#elif (KYBER_K == 3) -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_768_ref_##s -#elif (KYBER_K == 4) -#define KYBER_NAMESPACE(s) pqcrystals_ml_kem_1024_ref_##s -#else -#error "KYBER_K must be in {2,3,4}" -#endif - -#define KYBER_N 256 -#define KYBER_Q 3329 - -#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ -#define KYBER_SSBYTES 32 /* size in bytes of shared key */ - -#define KYBER_POLYBYTES 384 -#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) - -#if KYBER_K == 2 -#define KYBER_ETA1 3 -#define KYBER_POLYCOMPRESSEDBYTES 128 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -#elif KYBER_K == 3 -#define KYBER_ETA1 2 -#define KYBER_POLYCOMPRESSEDBYTES 128 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -#elif KYBER_K == 4 -#define KYBER_ETA1 2 -#define KYBER_POLYCOMPRESSEDBYTES 160 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) -#endif - -#define KYBER_ETA2 2 - -#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES) -#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) -#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) -#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) - -#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) -/* 32 bytes of additional space to save H(pk) */ -#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) -#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES) - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/poly.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/poly.c deleted file mode 100644 index cbd3abfb54..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/poly.c +++ /dev/null @@ -1,360 +0,0 @@ -#include -#include "params.h" -#include "poly.h" -#include "ntt.h" -#include "reduce.h" -#include "cbd.h" -#include "symmetric.h" -#include "verify.h" - -/************************************************* -* Name: poly_compress -* -* Description: Compression and subsequent serialization of a polynomial -* -* Arguments: - uint8_t *r: pointer to output byte array -* (of length KYBER_POLYCOMPRESSEDBYTES) -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) -{ - unsigned int i,j; - int16_t u; - uint32_t d0; - uint8_t t[8]; - -#if (KYBER_POLYCOMPRESSEDBYTES == 128) - - for(i=0;icoeffs[8*i+j]; - u += (u >> 15) & KYBER_Q; -/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */ - d0 = u << 4; - d0 += 1665; - d0 *= 80635; - d0 >>= 28; - t[j] = d0 & 0xf; - } - - r[0] = t[0] | (t[1] << 4); - r[1] = t[2] | (t[3] << 4); - r[2] = t[4] | (t[5] << 4); - r[3] = t[6] | (t[7] << 4); - r += 4; - } -#elif (KYBER_POLYCOMPRESSEDBYTES == 160) - for(i=0;icoeffs[8*i+j]; - u += (u >> 15) & KYBER_Q; -/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */ - d0 = u << 5; - d0 += 1664; - d0 *= 40318; - d0 >>= 27; - t[j] = d0 & 0x1f; - } - - r[0] = (t[0] >> 0) | (t[1] << 5); - r[1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7); - r[2] = (t[3] >> 1) | (t[4] << 4); - r[3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6); - r[4] = (t[6] >> 2) | (t[7] << 3); - r += 5; - } -#else -#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" -#endif -} - -/************************************************* -* Name: poly_decompress -* -* Description: De-serialization and subsequent decompression of a polynomial; -* approximate inverse of poly_compress -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *a: pointer to input byte array -* (of length KYBER_POLYCOMPRESSEDBYTES bytes) -**************************************************/ -void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]) -{ - unsigned int i; - -#if (KYBER_POLYCOMPRESSEDBYTES == 128) - for(i=0;icoeffs[2*i+0] = (((uint16_t)(a[0] & 15)*KYBER_Q) + 8) >> 4; - r->coeffs[2*i+1] = (((uint16_t)(a[0] >> 4)*KYBER_Q) + 8) >> 4; - a += 1; - } -#elif (KYBER_POLYCOMPRESSEDBYTES == 160) - unsigned int j; - uint8_t t[8]; - for(i=0;i> 0); - t[1] = (a[0] >> 5) | (a[1] << 3); - t[2] = (a[1] >> 2); - t[3] = (a[1] >> 7) | (a[2] << 1); - t[4] = (a[2] >> 4) | (a[3] << 4); - t[5] = (a[3] >> 1); - t[6] = (a[3] >> 6) | (a[4] << 2); - t[7] = (a[4] >> 3); - a += 5; - - for(j=0;j<8;j++) - r->coeffs[8*i+j] = ((uint32_t)(t[j] & 31)*KYBER_Q + 16) >> 5; - } -#else -#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" -#endif -} - -/************************************************* -* Name: poly_tobytes -* -* Description: Serialization of a polynomial -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYBYTES bytes) -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a) -{ - unsigned int i; - uint16_t t0, t1; - - for(i=0;icoeffs[2*i]; - t0 += ((int16_t)t0 >> 15) & KYBER_Q; - t1 = a->coeffs[2*i+1]; - t1 += ((int16_t)t1 >> 15) & KYBER_Q; - r[3*i+0] = (t0 >> 0); - r[3*i+1] = (t0 >> 8) | (t1 << 4); - r[3*i+2] = (t1 >> 4); - } -} - -/************************************************* -* Name: poly_frombytes -* -* Description: De-serialization of a polynomial; -* inverse of poly_tobytes -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *a: pointer to input byte array -* (of KYBER_POLYBYTES bytes) -**************************************************/ -void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]) -{ - unsigned int i; - for(i=0;icoeffs[2*i] = ((a[3*i+0] >> 0) | ((uint16_t)a[3*i+1] << 8)) & 0xFFF; - r->coeffs[2*i+1] = ((a[3*i+1] >> 4) | ((uint16_t)a[3*i+2] << 4)) & 0xFFF; - } -} - -/************************************************* -* Name: poly_frommsg -* -* Description: Convert 32-byte message to polynomial -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *msg: pointer to input message -**************************************************/ -void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]) -{ - unsigned int i,j; - -#if (KYBER_INDCPA_MSGBYTES != KYBER_N/8) -#error "KYBER_INDCPA_MSGBYTES must be equal to KYBER_N/8 bytes!" -#endif - - for(i=0;icoeffs[8*i+j] = 0; - cmov_int16(r->coeffs+8*i+j, ((KYBER_Q+1)/2), (msg[i] >> j)&1); - } - } -} - -/************************************************* -* Name: poly_tomsg -* -* Description: Convert polynomial to 32-byte message -* -* Arguments: - uint8_t *msg: pointer to output message -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *a) -{ - unsigned int i,j; - uint32_t t; - - for(i=0;icoeffs[8*i+j]; - // t += ((int16_t)t >> 15) & KYBER_Q; - // t = (((t << 1) + KYBER_Q/2)/KYBER_Q) & 1; - t <<= 1; - t += 1665; - t *= 80635; - t >>= 28; - t &= 1; - msg[i] |= t << j; - } - } -} - -/************************************************* -* Name: poly_getnoise_eta1 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter KYBER_ETA1 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length KYBER_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) -{ - uint8_t buf[KYBER_ETA1*KYBER_N/4]; - prf(buf, sizeof(buf), seed, nonce); - poly_cbd_eta1(r, buf); -} - -/************************************************* -* Name: poly_getnoise_eta2 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter KYBER_ETA2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length KYBER_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) -{ - uint8_t buf[KYBER_ETA2*KYBER_N/4]; - prf(buf, sizeof(buf), seed, nonce); - poly_cbd_eta2(r, buf); -} - - -/************************************************* -* Name: poly_ntt -* -* Description: Computes negacyclic number-theoretic transform (NTT) of -* a polynomial in place; -* inputs assumed to be in normal order, output in bitreversed order -* -* Arguments: - uint16_t *r: pointer to in/output polynomial -**************************************************/ -void poly_ntt(poly *r) -{ - ntt(r->coeffs); - poly_reduce(r); -} - -/************************************************* -* Name: poly_invntt_tomont -* -* Description: Computes inverse of negacyclic number-theoretic transform (NTT) -* of a polynomial in place; -* inputs assumed to be in bitreversed order, output in normal order -* -* Arguments: - uint16_t *a: pointer to in/output polynomial -**************************************************/ -void poly_invntt_tomont(poly *r) -{ - invntt(r->coeffs); -} - -/************************************************* -* Name: poly_basemul_montgomery -* -* Description: Multiplication of two polynomials in NTT domain -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_basemul_montgomery(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - for(i=0;icoeffs[4*i], &a->coeffs[4*i], &b->coeffs[4*i], zetas[64+i]); - basemul(&r->coeffs[4*i+2], &a->coeffs[4*i+2], &b->coeffs[4*i+2], -zetas[64+i]); - } -} - -/************************************************* -* Name: poly_tomont -* -* Description: Inplace conversion of all coefficients of a polynomial -* from normal domain to Montgomery domain -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_tomont(poly *r) -{ - unsigned int i; - const int16_t f = (1ULL << 32) % KYBER_Q; - for(i=0;icoeffs[i] = montgomery_reduce((int32_t)r->coeffs[i]*f); -} - -/************************************************* -* Name: poly_reduce -* -* Description: Applies Barrett reduction to all coefficients of a polynomial -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_reduce(poly *r) -{ - unsigned int i; - for(i=0;icoeffs[i] = barrett_reduce(r->coeffs[i]); -} - -/************************************************* -* Name: poly_add -* -* Description: Add two polynomials; no modular reduction is performed -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_add(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - for(i=0;icoeffs[i] = a->coeffs[i] + b->coeffs[i]; -} - -/************************************************* -* Name: poly_sub -* -* Description: Subtract two polynomials; no modular reduction is performed -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_sub(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - for(i=0;icoeffs[i] = a->coeffs[i] - b->coeffs[i]; -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/poly.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/poly.h deleted file mode 100644 index 9a99c7cdad..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/poly.h +++ /dev/null @@ -1,53 +0,0 @@ -#ifndef POLY_H -#define POLY_H - -#include -#include "params.h" - -/* - * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial - * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1] - */ -typedef struct{ - int16_t coeffs[KYBER_N]; -} poly; - -#define poly_compress KYBER_NAMESPACE(poly_compress) -void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a); -#define poly_decompress KYBER_NAMESPACE(poly_decompress) -void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]); - -#define poly_tobytes KYBER_NAMESPACE(poly_tobytes) -void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a); -#define poly_frombytes KYBER_NAMESPACE(poly_frombytes) -void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]); - -#define poly_frommsg KYBER_NAMESPACE(poly_frommsg) -void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]); -#define poly_tomsg KYBER_NAMESPACE(poly_tomsg) -void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); - -#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1) -void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); - -#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2) -void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); - -#define poly_ntt KYBER_NAMESPACE(poly_ntt) -void poly_ntt(poly *r); -#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont) -void poly_invntt_tomont(poly *r); -#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery) -void poly_basemul_montgomery(poly *r, const poly *a, const poly *b); -#define poly_tomont KYBER_NAMESPACE(poly_tomont) -void poly_tomont(poly *r); - -#define poly_reduce KYBER_NAMESPACE(poly_reduce) -void poly_reduce(poly *r); - -#define poly_add KYBER_NAMESPACE(poly_add) -void poly_add(poly *r, const poly *a, const poly *b); -#define poly_sub KYBER_NAMESPACE(poly_sub) -void poly_sub(poly *r, const poly *a, const poly *b); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/polyvec.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/polyvec.c deleted file mode 100644 index 669f6a5f1d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/polyvec.c +++ /dev/null @@ -1,246 +0,0 @@ -#include -#include "params.h" -#include "poly.h" -#include "polyvec.h" - -/************************************************* -* Name: polyvec_compress -* -* Description: Compress and serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYVECCOMPRESSEDBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) -{ - unsigned int i,j,k; - uint64_t d0; - -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) - uint16_t t[8]; - for(i=0;ivec[i].coeffs[8*j+k]; - t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; -/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */ - d0 = t[k]; - d0 <<= 11; - d0 += 1664; - d0 *= 645084; - d0 >>= 31; - t[k] = d0 & 0x7ff; - } - - r[ 0] = (t[0] >> 0); - r[ 1] = (t[0] >> 8) | (t[1] << 3); - r[ 2] = (t[1] >> 5) | (t[2] << 6); - r[ 3] = (t[2] >> 2); - r[ 4] = (t[2] >> 10) | (t[3] << 1); - r[ 5] = (t[3] >> 7) | (t[4] << 4); - r[ 6] = (t[4] >> 4) | (t[5] << 7); - r[ 7] = (t[5] >> 1); - r[ 8] = (t[5] >> 9) | (t[6] << 2); - r[ 9] = (t[6] >> 6) | (t[7] << 5); - r[10] = (t[7] >> 3); - r += 11; - } - } -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) - uint16_t t[4]; - for(i=0;ivec[i].coeffs[4*j+k]; - t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; -/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */ - d0 = t[k]; - d0 <<= 10; - d0 += 1665; - d0 *= 1290167; - d0 >>= 32; - t[k] = d0 & 0x3ff; - } - - r[0] = (t[0] >> 0); - r[1] = (t[0] >> 8) | (t[1] << 2); - r[2] = (t[1] >> 6) | (t[2] << 4); - r[3] = (t[2] >> 4) | (t[3] << 6); - r[4] = (t[3] >> 2); - r += 5; - } - } -#else -#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" -#endif -} - -/************************************************* -* Name: polyvec_decompress -* -* Description: De-serialize and decompress vector of polynomials; -* approximate inverse of polyvec_compress -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const uint8_t *a: pointer to input byte array -* (of length KYBER_POLYVECCOMPRESSEDBYTES) -**************************************************/ -void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]) -{ - unsigned int i,j,k; - -#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) - uint16_t t[8]; - for(i=0;i> 0) | ((uint16_t)a[ 1] << 8); - t[1] = (a[1] >> 3) | ((uint16_t)a[ 2] << 5); - t[2] = (a[2] >> 6) | ((uint16_t)a[ 3] << 2) | ((uint16_t)a[4] << 10); - t[3] = (a[4] >> 1) | ((uint16_t)a[ 5] << 7); - t[4] = (a[5] >> 4) | ((uint16_t)a[ 6] << 4); - t[5] = (a[6] >> 7) | ((uint16_t)a[ 7] << 1) | ((uint16_t)a[8] << 9); - t[6] = (a[8] >> 2) | ((uint16_t)a[ 9] << 6); - t[7] = (a[9] >> 5) | ((uint16_t)a[10] << 3); - a += 11; - - for(k=0;k<8;k++) - r->vec[i].coeffs[8*j+k] = ((uint32_t)(t[k] & 0x7FF)*KYBER_Q + 1024) >> 11; - } - } -#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) - uint16_t t[4]; - for(i=0;i> 0) | ((uint16_t)a[1] << 8); - t[1] = (a[1] >> 2) | ((uint16_t)a[2] << 6); - t[2] = (a[2] >> 4) | ((uint16_t)a[3] << 4); - t[3] = (a[3] >> 6) | ((uint16_t)a[4] << 2); - a += 5; - - for(k=0;k<4;k++) - r->vec[i].coeffs[4*j+k] = ((uint32_t)(t[k] & 0x3FF)*KYBER_Q + 512) >> 10; - } - } -#else -#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" -#endif -} - -/************************************************* -* Name: polyvec_tobytes -* -* Description: Serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for KYBER_POLYVECBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_frombytes -* -* Description: De-serialize vector of polynomials; -* inverse of polyvec_tobytes -* -* Arguments: - uint8_t *r: pointer to output byte array -* - const polyvec *a: pointer to input vector of polynomials -* (of length KYBER_POLYVECBYTES) -**************************************************/ -void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]) -{ - unsigned int i; - for(i=0;ivec[i], a+i*KYBER_POLYBYTES); -} - -/************************************************* -* Name: polyvec_ntt -* -* Description: Apply forward NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_ntt(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_invntt_tomont -* -* Description: Apply inverse NTT to all elements of a vector of polynomials -* and multiply by Montgomery factor 2^16 -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_invntt_tomont(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_basemul_acc_montgomery -* -* Description: Multiply elements of a and b in NTT domain, accumulate into r, -* and multiply by 2^-16. -* -* Arguments: - poly *r: pointer to output polynomial -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) -{ - unsigned int i; - poly t; - - poly_basemul_montgomery(r, &a->vec[0], &b->vec[0]); - for(i=1;ivec[i], &b->vec[i]); - poly_add(r, r, &t); - } - - poly_reduce(r); -} - -/************************************************* -* Name: polyvec_reduce -* -* Description: Applies Barrett reduction to each coefficient -* of each element of a vector of polynomials; -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - polyvec *r: pointer to input/output polynomial -**************************************************/ -void polyvec_reduce(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_add -* -* Description: Add vectors of polynomials -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) -{ - unsigned int i; - for(i=0;ivec[i], &a->vec[i], &b->vec[i]); -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/polyvec.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/polyvec.h deleted file mode 100644 index 57b605494e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/polyvec.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef POLYVEC_H -#define POLYVEC_H - -#include -#include "params.h" -#include "poly.h" - -typedef struct{ - poly vec[KYBER_K]; -} polyvec; - -#define polyvec_compress KYBER_NAMESPACE(polyvec_compress) -void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a); -#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress) -void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]); - -#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes) -void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a); -#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes) -void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]); - -#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt) -void polyvec_ntt(polyvec *r); -#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont) -void polyvec_invntt_tomont(polyvec *r); - -#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery) -void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b); - -#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce) -void polyvec_reduce(polyvec *r); - -#define polyvec_add KYBER_NAMESPACE(polyvec_add) -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/reduce.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/reduce.c deleted file mode 100644 index 9d8e7edf83..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/reduce.c +++ /dev/null @@ -1,42 +0,0 @@ -#include -#include "params.h" -#include "reduce.h" - -/************************************************* -* Name: montgomery_reduce -* -* Description: Montgomery reduction; given a 32-bit integer a, computes -* 16-bit integer congruent to a * R^-1 mod q, where R=2^16 -* -* Arguments: - int32_t a: input integer to be reduced; -* has to be in {-q2^15,...,q2^15-1} -* -* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. -**************************************************/ -int16_t montgomery_reduce(int32_t a) -{ - int16_t t; - - t = (int16_t)a*QINV; - t = (a - (int32_t)t*KYBER_Q) >> 16; - return t; -} - -/************************************************* -* Name: barrett_reduce -* -* Description: Barrett reduction; given a 16-bit integer a, computes -* centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2} -* -* Arguments: - int16_t a: input integer to be reduced -* -* Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q. -**************************************************/ -int16_t barrett_reduce(int16_t a) { - int16_t t; - const int16_t v = ((1<<26) + KYBER_Q/2)/KYBER_Q; - - t = ((int32_t)v*a + (1<<25)) >> 26; - t *= KYBER_Q; - return a - t; -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/reduce.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/reduce.h deleted file mode 100644 index c1bc1e4c7b..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/reduce.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef REDUCE_H -#define REDUCE_H - -#include -#include "params.h" - -#define MONT -1044 // 2^16 mod q -#define QINV -3327 // q^-1 mod 2^16 - -#define montgomery_reduce KYBER_NAMESPACE(montgomery_reduce) -int16_t montgomery_reduce(int32_t a); - -#define barrett_reduce KYBER_NAMESPACE(barrett_reduce) -int16_t barrett_reduce(int16_t a); - -#endif diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/symmetric-shake.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/symmetric-shake.c deleted file mode 100644 index 20f451882e..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/symmetric-shake.c +++ /dev/null @@ -1,74 +0,0 @@ -#include -#include -#include -#include "params.h" -#include "symmetric.h" -#include "fips202.h" - -/************************************************* -* Name: kyber_shake128_absorb -* -* Description: Absorb step of the SHAKE128 specialized for the Kyber context. -* -* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state -* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state -* - uint8_t i: additional byte of input -* - uint8_t j: additional byte of input -**************************************************/ -void kyber_shake128_absorb(shake128incctx *state, - const uint8_t seed[KYBER_SYMBYTES], - uint8_t x, - uint8_t y) -{ - uint8_t extseed[KYBER_SYMBYTES+2]; - - memcpy(extseed, seed, KYBER_SYMBYTES); - extseed[KYBER_SYMBYTES+0] = x; - extseed[KYBER_SYMBYTES+1] = y; - - shake128_absorb_once(state, extseed, sizeof(extseed)); -} - -/************************************************* -* Name: kyber_shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - uint8_t *out: pointer to output -* - size_t outlen: number of requested output bytes -* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) -* - uint8_t nonce: single-byte nonce (public PRF input) -**************************************************/ -void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce) -{ - uint8_t extkey[KYBER_SYMBYTES+1]; - - memcpy(extkey, key, KYBER_SYMBYTES); - extkey[KYBER_SYMBYTES] = nonce; - - shake256(out, outlen, extkey, sizeof(extkey)); -} - -/************************************************* -* Name: kyber_shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - uint8_t *out: pointer to output -* - size_t outlen: number of requested output bytes -* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) -* - uint8_t nonce: single-byte nonce (public PRF input) -**************************************************/ -void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]) -{ - shake256incctx s; - - shake256_inc_init(&s); - shake256_inc_absorb(&s, key, KYBER_SYMBYTES); - shake256_inc_absorb(&s, input, KYBER_CIPHERTEXTBYTES); - shake256_inc_finalize(&s); - shake256_inc_squeeze(out, KYBER_SSBYTES, &s); - shake256_inc_ctx_release(&s); -} diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/symmetric.h b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/symmetric.h deleted file mode 100644 index 2acc66f98d..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/symmetric.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef SYMMETRIC_H -#define SYMMETRIC_H - -#include -#include -#include "params.h" - -#include "fips202.h" - -typedef shake128incctx xof_state; - -#define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb) -void kyber_shake128_absorb(shake128incctx *s, - const uint8_t seed[KYBER_SYMBYTES], - uint8_t x, - uint8_t y); - -#define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf) -void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce); - -#define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf) -void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]); - -#define XOF_BLOCKBYTES SHAKE128_RATE - -#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) -#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) -#define xof_init(STATE, SEED) shake128_inc_init(STATE) -#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y) -#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) -#define xof_release(STATE) shake128_inc_ctx_release(STATE) -#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE) -#define rkprf(OUT, KEY, INPUT) kyber_shake256_rkprf(OUT, KEY, INPUT) - -#endif /* SYMMETRIC_H */ diff --git a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/verify.c b/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/verify.c deleted file mode 100644 index 914ccd448f..0000000000 --- a/src/kem/ml_kem/pqcrystals-kyber-standard_ml-kem-768_ref/verify.c +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include -#include "verify.h" - -/************************************************* -* Name: verify -* -* Description: Compare two arrays for equality in constant time. -* -* Arguments: const uint8_t *a: pointer to first byte array -* const uint8_t *b: pointer to second byte array -* size_t len: length of the byte arrays -* -* Returns 0 if the byte arrays are equal, 1 otherwise -**************************************************/ -int verify(const uint8_t *a, const uint8_t *b, size_t len) -{ - size_t i; - uint8_t r = 0; - - for(i=0;i> 63; -} - -/************************************************* -* Name: cmov -* -* Description: Copy len bytes from x to r if b is 1; -* don't modify x if b is 0. Requires b to be in {0,1}; -* assumes two's complement representation of negative integers. -* Runs in constant time. -* -* Arguments: uint8_t *r: pointer to output byte array -* const uint8_t *x: pointer to input byte array -* size_t len: Amount of bytes to be copied -* uint8_t b: Condition bit; has to be in {0,1} -**************************************************/ -void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) -{ - size_t i; - -#if defined(__GNUC__) || defined(__clang__) - // Prevent the compiler from - // 1) inferring that b is 0/1-valued, and - // 2) handling the two cases with a branch. - // This is not necessary when verify.c and kem.c are separate translation - // units, but we expect that downstream consumers will copy this code and/or - // change how it is built. - __asm__("" : "+r"(b) : /* no inputs */); -#endif - - b = -b; - for(i=0;i -#include -#include "params.h" - -#define verify KYBER_NAMESPACE(verify) -int verify(const uint8_t *a, const uint8_t *b, size_t len); - -#define cmov KYBER_NAMESPACE(cmov) -void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); - -#define cmov_int16 KYBER_NAMESPACE(cmov_int16) -void cmov_int16(int16_t *r, int16_t v, uint16_t b); - -#endif diff --git a/src/oqsconfig.h.cmake b/src/oqsconfig.h.cmake index 967c35e64e..9acc66c56d 100644 --- a/src/oqsconfig.h.cmake +++ b/src/oqsconfig.h.cmake @@ -128,11 +128,8 @@ #cmakedefine OQS_ENABLE_KEM_ML_KEM 1 #cmakedefine OQS_ENABLE_KEM_ml_kem_512 1 -#cmakedefine OQS_ENABLE_KEM_ml_kem_512_avx2 1 #cmakedefine OQS_ENABLE_KEM_ml_kem_768 1 -#cmakedefine OQS_ENABLE_KEM_ml_kem_768_avx2 1 #cmakedefine OQS_ENABLE_KEM_ml_kem_1024 1 -#cmakedefine OQS_ENABLE_KEM_ml_kem_1024_avx2 1 #cmakedefine OQS_ENABLE_SIG_DILITHIUM 1 #cmakedefine OQS_ENABLE_SIG_dilithium_2 1 diff --git a/tests/test_binary.py b/tests/test_binary.py index 53e114df00..a673e545ae 100644 --- a/tests/test_binary.py +++ b/tests/test_binary.py @@ -33,7 +33,7 @@ def test_namespace(): symbols.append(line) # ideally this would be just ['oqs', 'pqclean'], but contains exceptions (e.g., providing compat implementations of unavailable platform functions) - namespaces = ['oqs', 'pqclean', 'keccak', 'pqcrystals', 'pqmayo', 'init', 'fini', 'seedexpander', '__x86.get_pc_thunk', 'libjade', 'jade', '__jade', '__jasmin_syscall'] + namespaces = ['oqs', 'pqclean', 'keccak', 'pqcrystals', 'pqmayo', 'init', 'fini', 'seedexpander', '__x86.get_pc_thunk', 'libjade', 'jade', '__jade', '__jasmin_syscall', 'pqcp'] non_namespaced = [] for symbolstr in symbols: