diff --git a/.gitignore b/.gitignore index 31c072b121..6f45aaf267 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,6 @@ compilers/concrete-compiler/compiler/hpx* # For tests within ML tmp_directory_for_cml_tests + +# Temp file +frontends/concrete-python/examples/sha1/tmp_sha1_test_file.txt diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 856fe23475..98f0eb5328 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -28,11 +28,11 @@ ## Compilation * [Composition](compilation/composition.md) +* [Modules](compilation/modules.md) * [Compression](compilation/compression.md) * [Reuse arguments](compilation/reuse_arguments.md) * [Multi precision](compilation/multi_precision.md) * [Multi parameters](compilation/multi_parameters.md) -* [Modules](compilation/modules.md) * [Decorator](compilation/decorator.md) * [Direct circuits](compilation/direct_circuits.md) diff --git a/docs/tutorials/see-all-tutorials.md b/docs/tutorials/see-all-tutorials.md index ed8a8781db..8e59b7226b 100644 --- a/docs/tutorials/see-all-tutorials.md +++ b/docs/tutorials/see-all-tutorials.md @@ -13,6 +13,7 @@ * [SHA-256 ](../application-tutorial/sha256.ipynb) * [Game of Life](../../frontends/concrete-python/examples/game_of_life/game_of_life.md) * [XOR distance](../../frontends/concrete-python/examples/xor_distance/xor_distance.md) +* [SHA1 with Modules](../../frontends/concrete-python/examples/sha1/sha1.md) #### Blog tutorials diff --git a/frontends/concrete-python/examples/game_of_life/game_of_life.md b/frontends/concrete-python/examples/game_of_life/game_of_life.md index f68f6cd1e9..b332165abe 100644 --- a/frontends/concrete-python/examples/game_of_life/game_of_life.md +++ b/frontends/concrete-python/examples/game_of_life/game_of_life.md @@ -1,4 +1,4 @@ -# Game of Life +# Game of life In the associated [Python file](game_of_life.py), you can run the Game of Life, written in Concrete Python. diff --git a/frontends/concrete-python/examples/sha1/sha1.md b/frontends/concrete-python/examples/sha1/sha1.md new file mode 100644 index 0000000000..c188d26932 --- /dev/null +++ b/frontends/concrete-python/examples/sha1/sha1.md @@ -0,0 +1,298 @@ +# SHA1 computation with Modules + +This document demonstrates the use of Modules in Concrete through a SHA1 computation example. SHA1 +is a deprecated and broken hash function; we use it here for pedagogical purposes, not for its +security. + +The SHA1 code is available [here](sha1.py). Execution times for the different functions are provided +in the final section. We created our example by forking +[python-sha1](https://github.com/ajalt/python-sha1) by [AJ Alt](https://github.com/ajalt) and made +extensive modifications to implement SHA1 in FHE and corresponding tests. + +## SHA1 overview + +SHA1 is a deprecated broken hash function defined by NIST in 1995: You can find detailed information +on SHA1 on its [Wikipedia page](https://en.wikipedia.org/wiki/SHA-1) or in its +[official description](https://nvlpubs.nist.gov/nistpubs/fips/nist.fips.180-4.pdf). We follow the +structure of its [pseudo-code](https://en.wikipedia.org/wiki/SHA-1#SHA-1_pseudocode) in our +implementation. + +## Our FHE implementation + +In our implementation, only the compression function is implemented in FHE, corresponding to the +`_process_encrypted_chunk_server_side function`. The rest is done client-side in the clear, +including the message expansion. While more of the process could be done in FHE, this tutorial +focuses on demonstrating the use of [Modules](https://docs.zama.ai/concrete/compilation/modules). + +Our Module contains 7 functions which can be combined together: +- `xor3` XORs three values together +- `iftern` computes the ternary IF, i.e., c ? t : f +- `maj` computes the majority function +- `rotate30` rotates a 32-bit word by 30 positions +- `rotate5` rotates a 32-bit word by 5 positions +- `add2` adds two values together +- `add5` adds five values together + + +``` +@fhe.module() +class MyModule: + @fhe.function({"x": "encrypted", "y": "encrypted", "z": "encrypted"}) + def xor3(x, y, z): + return x ^ y ^ z + + @fhe.function({"x": "encrypted", "y": "encrypted", "z": "encrypted"}) + def iftern(x, y, z): + return z ^ (x & (y ^ z)) + + @fhe.function({"x": "encrypted", "y": "encrypted", "z": "encrypted"}) + def maj(x, y, z): + return (x & y) | (z & (x | y)) + + @fhe.function({"x": "encrypted"}) + def rotate30(x): + ans = fhe.zeros((32,)) + ans[30:32] = x[0:2] + ans[0:30] = x[2:32] + return ans + + @fhe.function({"x": "encrypted"}) + def rotate5(x): + ans = fhe.zeros((32,)) + ans[5:32] = x[0:27] + ans[0:5] = x[27:32] + return ans + + @fhe.function({"x": "encrypted", "y": "encrypted"}) + def add2(x, y): + ans = fhe.zeros((32,)) + cy = 0 + + for i in range(32): + t = x[i] + y[i] + cy + cy, tr = t >= 2, t % 2 + ans[i] = tr + + return ans + + @fhe.function( + {"x": "encrypted", "y": "encrypted", "u": "encrypted", "v": "encrypted", "w": "encrypted"} + ) + def add5(x, y, u, v, w): + ans = fhe.zeros((32,)) + cy = 0 + + for i in range(32): + t = x[i] + y[i] + cy + cy, tr = t // 2, t % 2 + ans[i] = tr + + cy = 0 + + for i in range(32): + t = ans[i] + u[i] + cy + cy, tr = t // 2, t % 2 + ans[i] = tr + + cy = 0 + + for i in range(32): + t = ans[i] + v[i] + cy + cy, tr = t // 2, t % 2 + ans[i] = tr + + cy = 0 + + for i in range(32): + t = ans[i] + w[i] + cy + cy, tr = t // 2, t % 2 + ans[i] = tr + + return ans +``` + +We then compile this Module, setting `p_error=10**-8` as a very small value to avoid computation +errors. The Module feature allows the combination of all these functions so that the outputs of +some to be used as inputs for others. This makes it convenient to create larger functions with +some control flow (conditions, branches, loops) handled in the clear while using these smaller +functions. In our case, this is done in the `_process_encrypted_chunk_server_side function.` + +## Details of `_process_encrypted_chunk_server_side` + +`_process_encrypted_chunk_server_side` uses encrypted inputs and returns encrypted values. In the +clear, all variables are 32-bit words, but here they are represented as 32 encrypted bits to +simplify and accelerate the non-linear operations in SHA1. + +Then, we have the main loop of the compression function: + +``` + for i in range(80): + if 0 <= i <= 19: + + # Do f = d ^ (b & (c ^ d)) + fsplit_enc = my_module.iftern.run(bsplit_enc, csplit_enc, dsplit_enc) + + ksplit = split(0x5A827999) + elif 20 <= i <= 39: + + # Do f = b ^ c ^ d + fsplit_enc = my_module.xor3.run(bsplit_enc, csplit_enc, dsplit_enc) + + ksplit = split(0x6ED9EBA1) + elif 40 <= i <= 59: + + # Do f = (b & c) | (b & d) | (c & d) + fsplit_enc = my_module.maj.run(bsplit_enc, csplit_enc, dsplit_enc) + + ksplit = split(0x8F1BBCDC) + elif 60 <= i <= 79: + + # Do f = b ^ c ^ d + fsplit_enc = my_module.xor3.run(bsplit_enc, csplit_enc, dsplit_enc) + + ksplit = split(0xCA62C1D6) +``` + +In this main loop, we take the right choice of `f` and `k`. Here, we can see a first use of +the different functions in the Module. + +Then we continue with other functions in the Module, to compute `arot5 = _left_rotate(a, 5)` and +`arot5 + f + e + k + w[i]`: + +``` + # Do arot5 = _left_rotate(a, 5) + arot5split_enc = my_module.rotate5.run(asplit_enc) + + # Do arot5 + f + e + k + w[i] + ssplit_enc = my_module.add5.run( + arot5split_enc, + fsplit_enc, + esplit_enc, + wsplit_enc[i], + my_module.rotate5.encrypt(ksplit), # BCM: later remove the encryption on k + ) +``` + +Finally, we update the different `a, b, c, d, e` values as in the clear +implementation but with the encrypted forms: + +``` + # Final update of the a, b, c, d and e registers + newasplit_enc = ssplit_enc + + esplit_enc = dsplit_enc + dsplit_enc = csplit_enc + + # Do c = _left_rotate(b, 30) + csplit_enc = my_module.rotate30.run(bsplit_enc) + + bsplit_enc = asplit_enc + asplit_enc = newasplit_enc +``` + +You can see that we compiled the Module's different functions on inputset made with +bits. Under the hood, Concrete adds a few programmable bootstrappings to compute the correct +functions in FHE. + + +## MLIR code + +Compiling with `show_mlir = True` allows to see the different MLIR implementations. + +## Testing or using + +This tutorial focuses on the use of Modules rather than a production-ready implementation. For a +full client-server API, you might want to perform more operations in FHE, including message +expansion, and function optimizations. + +You can verify the implementation in FHE by running `python sha1.py --autotest`: it will +pick a certain number of random inputs, hash them in FHE and compare the result with the `hashlib` +standard implementation. + +You can also hash a given value with +`echo -n "The quick brown fox jumps over the lazy dog" | python sha1.py`, and it will print +something like: + +``` +sha1-digest: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12 +computed in: 320.265383 seconds +``` + +## Benchmarks + +We have executed our implementation on an HPC7a machine with Concrete 2.7.0rc1. + +`python sha1.py --autotest` typically returns: + +``` +Checking SHA1(fdASguUMBwhPcKuDpPqoRlQXLrLQbnxEvPJSQSIUDTBoaqrJlBualgoWEINmDZDYSuGuSOpGBWwWzjAfktWYZZUliv) for an input length 90 +sha1-digest: 5bb539fd423875ccc8a33148dae724f5b2cf9391 +computed in: 295.306287 seconds +Checking SHA1(BYwXTbqE) for an input length 8 +sha1-digest: 90a8dcad6ddff7ca8fd487b80a37fcd250c56bed +computed in: 145.341164 seconds +Checking SHA1(rnPZh) for an input length 5 +sha1-digest: 47610d2c26ee8b45ab0f4c8f8e4d405b2cd37f1f +computed in: 145.318081 seconds +Checking SHA1(orRaJMGbUJtxITQvqiOCPjKJWYuHomuiexCQQgZyTeAAFJcgCftDCRAkcLKjRECelIMPQphGEUlSNthE) for an input length 80 +sha1-digest: bd74b4e64349d308f3b95b54cf61ee416bdd6b18 +computed in: 288.240576 seconds +Checking SHA1(ROokDcdczajNPjlCPoWotaRJHBtOVyiyxMIIeCtxaDCjk) for an input length 45 +sha1-digest: 1ff546c3a64f27339781c095cbc097f392c2cccd +computed in: 143.621941 seconds +Checking SHA1(KbCXFt) for an input length 6 +sha1-digest: 7e5789f0c83fa5102004fbeeef3ac22244d1cdac +computed in: 143.509567 seconds +Checking SHA1(mpKnkHtrgokxgQSzcIjFtxKnhmMfZbIbkJavnkSxW) for an input length 41 +sha1-digest: 1308d9f7cba634ab2617edb5116b8bdf434f16f5 +computed in: 143.341450 seconds +Checking SHA1(oauoWKJGyjjTcXqRIxFGuVuMwiwjKYfttQ) for an input length 34 +sha1-digest: 60367153b7049ca92eb979ad7b809c5a3f47a64e +computed in: 143.693254 seconds +Checking SHA1(ZMGiaIOmBJPncOsUCxj) for an input length 19 +sha1-digest: fafba9f2fe6b5a0fddad4ad765909c8fc32117c6 +computed in: 143.720215 seconds +Checking SHA1(HwCXIHnFoGUgIBqaQrrpDnhEvPBX) for an input length 28 +sha1-digest: 5224cace20f8d20fa3ea8d9974b5ff3a0be7fd48 +computed in: 143.523006 seconds +Checking SHA1(AfyzsimngrqeWoqZKOBRwVuvttfgJTpegMbiHjUNdWzTg) for an input length 45 +sha1-digest: 8ca27aca1c362ca63e50d58aa7065b4322f028a0 +computed in: 143.481069 seconds +Checking SHA1(hNEUPakrqQpGGZvtHvht) for an input length 20 +sha1-digest: 36ae34ed85e62ac0f922e36fc98b23e725695be1 +computed in: 143.478666 seconds +Checking SHA1(CjgfYYlNKqZdHeXFfqTwhycbGBeSpzpxKPwWItriiNKZCcEJRZlM) for an input length 52 +sha1-digest: 3c012f41c5fe4581f80e2901fc4bbbb70ff7a9ba +computed in: 143.490262 seconds +Checking SHA1(EXIGkYzWpcqpfRKCSbBJJqqmUBkFwWfPGooJvsVAshWjMr) for an input length 46 +sha1-digest: 2518c4d13ec7608f59632ac993b726e572c3aaae +computed in: 143.840785 seconds +Checking SHA1(sgzaAqZnhXmFJOJMyfGxweYFMmLeUHmMCWETfqzstzpFYKaGpnasiLHPTcJtukHztEQpXzquREcbtoJDaoqjfM) for an input length 86 +sha1-digest: 46f4b0653ed7ea0ce89cc18f6720e5e334d63a45 +computed in: 288.155301 seconds +Checking SHA1(oRaisdHJovDxCnwyComEGejqMceBTOVhJucVnwgC) for an input length 40 +sha1-digest: 909f9c6275aa9f41d8ecaf52203bb0e24cf978d7 +computed in: 143.466817 seconds +Checking SHA1(mtTWxtHerQgLdBGftWdiCwBKqtu) for an input length 27 +sha1-digest: 624a7dcec460061a2a6499dae978fe4afd674110 +computed in: 145.389956 seconds +Checking SHA1(beYzkJLvZMmoXbQwqoVThpyaQ) for an input length 25 +sha1-digest: 25a9df47bd055384a9ee614c1dc7213c04f2087c +computed in: 147.234881 seconds +Checking SHA1(CpQWXXRNlXIoSZNxmXUwWHqmUAdlOrDyZPzzOhznlpGntrUgvktlZ) for an input length 53 +sha1-digest: f2bde6574d8f6aa360929f6a5f919700b16e093b +computed in: 147.154393 seconds +Checking SHA1(busWigrVdsXnkjTh) for an input length 16 +sha1-digest: fe47568d433278a38a4729f7891d03eaacdb0e40 +computed in: 147.465694 seconds +Checking SHA1() +sha1-digest: da39a3ee5e6b4b0d3255bfef95601890afd80709 +computed in: 147.256297 seconds +Checking SHA1(The quick brown fox jumps over the lazy dog) +sha1-digest: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12 +computed in: 147.697102 seconds +``` + +These results mean that: +- one block of compression takes about 147 seconds +- two blocks of compression take about 290 seconds diff --git a/frontends/concrete-python/examples/sha1/sha1.py b/frontends/concrete-python/examples/sha1/sha1.py new file mode 100755 index 0000000000..babc3cd965 --- /dev/null +++ b/frontends/concrete-python/examples/sha1/sha1.py @@ -0,0 +1,531 @@ +# Forked and modified from https://github.com/ajalt/python-sha1, whose license was +# +# The MIT License (MIT) +# +# Copyright (c) 2013-2015 AJ Alt +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import io +import random +import string +import struct +import time +from hashlib import sha1 as hashlib_sha1 + +import numpy as np + +from concrete import fhe + + +def _left_rotate(n, b): + """Left rotate a 32-bit integer n by b bits.""" + return ((n << b) | (n >> (32 - b))) & 0xFFFFFFFF + + +def split(b): + """Splitting into bits.""" + ans = [] + for _ in range(32): + ans += [b % 2] + b = b // 2 + + return np.array(ans, dtype=np.int8) + + +def unsplit(bits): + """Unsplitting from bits to uint32.""" + ans = 0 + for i in range(32): + ans *= 2 + ans += bits[31 - i] + + return ans + + +def get_random_string(length): + """Return a random string.""" + if length == 0: + return "" + + # ruff: noqa:S311 + result_str = "".join(random.choice(string.ascii_letters) for i in range(length)) + return result_str + + +# FHE functions +@fhe.module() +class MyModule: + # ruff: noqa:N805 + @fhe.function({"x": "encrypted", "y": "encrypted", "z": "encrypted"}) + def xor3(x, y, z): + return x ^ y ^ z + + # ruff: noqa:N805 + @fhe.function({"x": "encrypted", "y": "encrypted", "z": "encrypted"}) + def iftern(x, y, z): + return z ^ (x & (y ^ z)) + + # ruff: noqa:N805 + @fhe.function({"x": "encrypted", "y": "encrypted", "z": "encrypted"}) + def maj(x, y, z): + return (x & y) | (z & (x | y)) + + # ruff: noqa:N805 + @fhe.function({"x": "encrypted"}) + def rotate30(x): + ans = fhe.zeros((32,)) + ans[30:32] = x[0:2] + ans[0:30] = x[2:32] + return ans + + # ruff: noqa:N805 + @fhe.function({"x": "encrypted"}) + def rotate5(x): + ans = fhe.zeros((32,)) + ans[5:32] = x[0:27] + ans[0:5] = x[27:32] + return ans + + # ruff: noqa:N805 + @fhe.function({"x": "encrypted", "y": "encrypted"}) + def add2(x, y): + ans = fhe.zeros((32,)) + cy = 0 + + for i in range(32): + t = x[i] + y[i] + cy + cy, tr = t >= 2, t % 2 + ans[i] = tr + + return ans + + # ruff: noqa:N805 + @fhe.function( + {"x": "encrypted", "y": "encrypted", "u": "encrypted", "v": "encrypted", "w": "encrypted"} + ) + def add5(x, y, u, v, w): + ans = fhe.zeros((32,)) + cy = 0 + + for i in range(32): + t = x[i] + y[i] + cy + cy, tr = t // 2, t % 2 + ans[i] = tr + + cy = 0 + + for i in range(32): + t = ans[i] + u[i] + cy + cy, tr = t // 2, t % 2 + ans[i] = tr + + cy = 0 + + for i in range(32): + t = ans[i] + v[i] + cy + cy, tr = t // 2, t % 2 + ans[i] = tr + + cy = 0 + + for i in range(32): + t = ans[i] + w[i] + cy + cy, tr = t // 2, t % 2 + ans[i] = tr + + return ans + + +# Compilation of the FHE functions +size_of_inputsets = 1000 +inputset1 = [(np.random.randint(2, size=(32,)),) for _ in range(size_of_inputsets)] +inputset2 = [ + ( + np.random.randint(2, size=(32,)), + np.random.randint(2, size=(32,)), + ) + for _ in range(size_of_inputsets) +] +inputset3 = [ + ( + np.random.randint(2, size=(32,)), + np.random.randint(2, size=(32,)), + np.random.randint(2, size=(32,)), + ) + for _ in range(size_of_inputsets) +] +inputset5 = [ + ( + np.random.randint(2, size=(32,)), + np.random.randint(2, size=(32,)), + np.random.randint(2, size=(32,)), + np.random.randint(2, size=(32,)), + np.random.randint(2, size=(32,)), + ) + for _ in range(size_of_inputsets) +] +# FIXME: remove the mypy exception once https://github.com/zama-ai/concrete-internal/issues/721 +# is fixed +my_module = MyModule.compile( # type: ignore + { + "xor3": inputset3, + "iftern": inputset3, + "maj": inputset3, + "rotate30": inputset1, + "rotate5": inputset1, + "add2": inputset2, + "add5": inputset5, + }, + show_mlir=False, + bitwise_strategy_preference=fhe.BitwiseStrategy.ONE_TLU_PROMOTED, + multivariate_strategy_preference=fhe.MultivariateStrategy.PROMOTED, + p_error=10**-8, +) + + +# Split and encrypt on the client side +def message_schedule_and_split_and_encrypt(chunk): + + assert len(chunk) == 64 + + w = [0] * 80 + + # Break chunk into sixteen 4-byte big-endian words w[i] + for i in range(16): + w[i] = struct.unpack(b">I", chunk[i * 4 : i * 4 + 4])[0] + + # Extend the sixteen 4-byte words into eighty 4-byte words + for i in range(16, 80): + w[i] = _left_rotate(w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16], 1) + + # Then split and encrypt + wsplit_enc = [0] * 80 + + for i in range(80): + wsplit_enc[i] = my_module.rotate5.encrypt(split(w[i])) + + return wsplit_enc + + +# Perform SHA computation server side, completely in FHE +def _process_encrypted_chunk_server_side( + wsplit_enc, h0split_enc, h1split_enc, h2split_enc, h3split_enc, h4split_enc +): + """Process a chunk of data and return the new digest variables.""" + + # Initialize hash value for this chunk + asplit_enc = h0split_enc + bsplit_enc = h1split_enc + csplit_enc = h2split_enc + dsplit_enc = h3split_enc + esplit_enc = h4split_enc + + for i in range(80): + if 0 <= i <= 19: + + # Do f = d ^ (b & (c ^ d)) + fsplit_enc = my_module.iftern.run(bsplit_enc, csplit_enc, dsplit_enc) + + ksplit = split(0x5A827999) + elif 20 <= i <= 39: + + # Do f = b ^ c ^ d + fsplit_enc = my_module.xor3.run(bsplit_enc, csplit_enc, dsplit_enc) + + ksplit = split(0x6ED9EBA1) + elif 40 <= i <= 59: + + # Do f = (b & c) | (b & d) | (c & d) + fsplit_enc = my_module.maj.run(bsplit_enc, csplit_enc, dsplit_enc) + + ksplit = split(0x8F1BBCDC) + elif 60 <= i <= 79: + + # Do f = b ^ c ^ d + fsplit_enc = my_module.xor3.run(bsplit_enc, csplit_enc, dsplit_enc) + + ksplit = split(0xCA62C1D6) + + # Do arot5 = _left_rotate(a, 5) + arot5split_enc = my_module.rotate5.run(asplit_enc) + + # Do arot5 + f + e + k + w[i] + ssplit_enc = my_module.add5.run( + arot5split_enc, + fsplit_enc, + esplit_enc, + wsplit_enc[i], + my_module.rotate5.encrypt(ksplit), # BCM: later remove the encryption on k + ) + + # Final update of the a, b, c, d and e registers + newasplit_enc = ssplit_enc + + esplit_enc = dsplit_enc + dsplit_enc = csplit_enc + + # Do c = _left_rotate(b, 30) + csplit_enc = my_module.rotate30.run(bsplit_enc) + + bsplit_enc = asplit_enc + asplit_enc = newasplit_enc + + # Add this chunk's hash to result so far + h0split_enc = my_module.add2.run(h0split_enc, asplit_enc) + h1split_enc = my_module.add2.run(h1split_enc, bsplit_enc) + h2split_enc = my_module.add2.run(h2split_enc, csplit_enc) + h3split_enc = my_module.add2.run(h3split_enc, dsplit_enc) + h4split_enc = my_module.add2.run(h4split_enc, esplit_enc) + + return h0split_enc, h1split_enc, h2split_enc, h3split_enc, h4split_enc + + +# ruff: noqa:UP004 +class Sha1Hash(object): + """A class that mimics that hashlib api and implements the SHA-1 algorithm.""" + + name = "python-sha1" + digest_size = 20 + block_size = 64 + + def __init__(self): + # Initial digest variables + h0, h1, h2, h3, h4 = (0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0) + + # Split + h0split = split(h0) + h1split = split(h1) + h2split = split(h2) + h3split = split(h3) + h4split = split(h4) + + # Encrypt + h0split_enc = my_module.rotate5.encrypt(h0split) + h1split_enc = my_module.rotate5.encrypt(h1split) + h2split_enc = my_module.rotate5.encrypt(h2split) + h3split_enc = my_module.rotate5.encrypt(h3split) + h4split_enc = my_module.rotate5.encrypt(h4split) + + self._hsplit_enc = (h0split_enc, h1split_enc, h2split_enc, h3split_enc, h4split_enc) + + # bytes object with 0 <= len < 64 used to store the end of the message + # if the message length is not congruent to 64 + self._unprocessed = b"" + # Length in bytes of all data that has been processed so far + self._message_byte_length = 0 + + def update(self, arg): + """Update the current digest. + + This may be called repeatedly, even after calling digest or hexdigest. + + Arguments: + arg: bytes, bytearray, or BytesIO object to read from. + """ + if isinstance(arg, (bytes, bytearray)): + arg = io.BytesIO(arg) + + # Try to build a chunk out of the unprocessed data, if any + chunk = self._unprocessed + arg.read(64 - len(self._unprocessed)) + + # Read the rest of the data, 64 bytes at a time + while len(chunk) == 64: + + wsplit_enc = message_schedule_and_split_and_encrypt(chunk) + self._hsplit_enc = _process_encrypted_chunk_server_side(wsplit_enc, *self._hsplit_enc) + self._message_byte_length += 64 + chunk = arg.read(64) + + self._unprocessed = chunk + return self + + def digest(self): + """Produce the final hash value (big-endian) as a bytes object""" + return b"".join(struct.pack(b">I", h) for h in self._produce_digest()) + + def hexdigest(self): + """Produce the final hash value (big-endian) as a hex string""" + # ruff: noqa:UP031 + return "%08x%08x%08x%08x%08x" % self._produce_digest() + + def _produce_digest(self): + """Return finalized digest variables for the data processed so far.""" + # Pre-processing: + message = self._unprocessed + message_byte_length = self._message_byte_length + len(message) + + # append the bit '1' to the message + message += b"\x80" + + # append 0 <= k < 512 bits '0', so that the resulting message length (in bytes) + # is congruent to 56 (mod 64) + message += b"\x00" * ((56 - (message_byte_length + 1) % 64) % 64) + + # append length of message (before pre-processing), in bits, as 64-bit big-endian integer + message_bit_length = message_byte_length * 8 + message += struct.pack(b">Q", message_bit_length) + + # Process the final chunk + # At this point, the length of the message is either 64 or 128 bytes. + wsplit_enc = message_schedule_and_split_and_encrypt(message[:64]) + hsplit_enc = _process_encrypted_chunk_server_side(wsplit_enc, *self._hsplit_enc) + + if len(message) != 64: + + wsplit_enc = message_schedule_and_split_and_encrypt(message[64:]) + hsplit_enc = _process_encrypted_chunk_server_side(wsplit_enc, *hsplit_enc) + + # Decrypt + h0split = my_module.rotate5.decrypt(hsplit_enc[0]) + h1split = my_module.rotate5.decrypt(hsplit_enc[1]) + h2split = my_module.rotate5.decrypt(hsplit_enc[2]) + h3split = my_module.rotate5.decrypt(hsplit_enc[3]) + h4split = my_module.rotate5.decrypt(hsplit_enc[4]) + + # Unsplit + h0 = unsplit(h0split) + h1 = unsplit(h1split) + h2 = unsplit(h2split) + h3 = unsplit(h3split) + h4 = unsplit(h4split) + + return h0, h1, h2, h3, h4 + + +def sha1(data): + """SHA-1 Hashing Function + + A custom SHA-1 hashing function implemented entirely in Python. + + Arguments: + data: A bytes or BytesIO object containing the input message to hash. + + Returns: + A hex SHA-1 digest of the input message. + """ + return Sha1Hash().update(data).hexdigest() + + +def print_timed_sha1(data): + time_begin = time.time() + ans = sha1(data) + print(f"sha1-digest: {ans}") + print(f"computed in: {time.time() - time_begin:2f} seconds") + return ans + + +if __name__ == "__main__": + # Imports required for command line parsing. No need for these elsewhere + import argparse + import os + import sys + + # Parse the incoming arguments + parser = argparse.ArgumentParser() + parser.add_argument("input", nargs="*", help="input file or message to hash") + parser.add_argument("--autotest", action="store_true", help="autotest") + args = parser.parse_args() + + if args.autotest: + + filename = "tmp_sha1_test_file.txt" + + # Checking random patterns + for _ in range(20): + + string_length = np.random.randint(100) + + # Take a random string + hash_input = get_random_string(string_length) + + print(f"Checking SHA1({hash_input}) for an input length {string_length}") + + # Hash it with hashlib_sha1 + # ruff: noqa:S324 + h = hashlib_sha1() + h.update(bytes(hash_input, encoding="utf-8")) + expected_ans = h.hexdigest() + + # Hash it in FHE + with open(filename, "w") as file: + file.write(f"{hash_input}") + + with open(filename, "rb") as data: + # Show the final digest + ans = print_timed_sha1(data) + + # And compare + assert ( + ans == expected_ans + ), f"Wrong computation: {ans} vs expected {expected_ans} for input {hash_input}" + + # Checking a few patterns + for hash_input, expected_ans in [ + ("", "da39a3ee5e6b4b0d3255bfef95601890afd80709"), + ( + "The quick brown fox jumps over the lazy dog", + "2fd4e1c67a2d28fced849ee1bb76e7391b93eb12", + ), + ]: + + with open(filename, "w") as file: + file.write(f"{hash_input}") + + print(f"Checking SHA1({hash_input})") + + with open(filename, "rb") as data: + # Show the final digest + ans = print_timed_sha1(data) + + assert ans == expected_ans, f"Wrong computation: {ans} vs expected {expected_ans}" + + sys.exit(0) + + if len(args.input) == 0: + # No argument given, assume message comes from standard input + try: + # sys.stdin is opened in text mode, which can change line endings, + # leading to incorrect results. Detach fixes this issue, but it's + # new in Python 3.1 + data = sys.stdin.detach() # type: ignore + + except AttributeError: + # Linux ans OSX both use \n line endings, so only windows is a + # problem. + if sys.platform == "win32": + import msvcrt + + msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY) + data = sys.stdin # type: ignore + + # Output to console + print_timed_sha1(data) + + else: + # Loop through arguments list + for argument in args.input: + if os.path.isfile(argument): + # An argument is given and it's a valid file. Read it + with open(filename, "rb") as data: + + # Show the final digest + print_timed_sha1(data) + + else: + print("Error, could not find " + argument + " file.") diff --git a/frontends/concrete-python/examples/xor_distance/xor_distance.md b/frontends/concrete-python/examples/xor_distance/xor_distance.md index 37555ee0b5..578fdc3618 100644 --- a/frontends/concrete-python/examples/xor_distance/xor_distance.md +++ b/frontends/concrete-python/examples/xor_distance/xor_distance.md @@ -1,4 +1,4 @@ -# Xor Distance +# Xor distance We describe how to compute a XOR distance (as known as an Hamming weight distance) in Concrete. This can be useful in particular for biometry use-cases, where obviously, private is a very interesting