Skip to content

Commit

Permalink
docs(frontend): making an Alphabet class
Browse files Browse the repository at this point in the history
  • Loading branch information
bcm-at-zama committed Jul 5, 2024
1 parent e4c4e07 commit e6ff264
Showing 1 changed file with 174 additions and 148 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,156 @@
from concrete import fhe


class Alphabet:

letters = None
mapping_to_int = {}
my_module = None

def set_lowercase(self):
self.letters = "".join([chr(97 + i) for i in range(26)])

def set_uppercase(self):
self.letters = "".join([chr(65 + i) for i in range(26)])

def set_anycase(self):
self.letters = "".join([chr(97 + i) for i in range(26)] + [chr(65 + i) for i in range(26)])

def set_dna(self):
self.letters = "ACTG"

def return_available_alphabets():
return ["string", "STRING", "StRiNg", "ACTG"]

def check_alphabet(self, alphabet):
assert alphabet in Alphabet.return_available_alphabets(), f"Unknown alphabet {alphabet}"

def set_alphabet(self, alphabet, verbose=True):
self.check_alphabet(alphabet)

if alphabet == "string":
self.set_lowercase()
if alphabet == "STRING":
self.set_uppercase()
if alphabet == "StRiNg":
self.set_anycase()
if alphabet == "ACTG":
self.set_dna()

if verbose:
print(f"Making random tests with alphabet {alphabet}")
print(f"Letters are {self.letters}\n")

for i, c in enumerate(self.letters):
self.mapping_to_int[c] = i

def compile_module(self, args):
"""Compile the FHE module."""
assert len(self.mapping_to_int) > 0, "Mapping not defined"

inputset_equal = [
(random_pick_in_values(self.mapping_to_int), random_pick_in_values(self.mapping_to_int))
for _ in range(1000)
]
inputset_mix = [
(
numpy.random.randint(2),
numpy.random.randint(args.max_string_length),
numpy.random.randint(args.max_string_length),
numpy.random.randint(args.max_string_length),
numpy.random.randint(args.max_string_length),
)
for _ in range(100)
]

self.my_module = LevenshsteinModule.compile(
{"equal": inputset_equal, "mix": inputset_mix},
show_mlir=args.show_mlir,
p_error=10**-20,
show_optimizer=args.show_optimizer,
comparison_strategy_preference=fhe.ComparisonStrategy.ONE_TLU_PROMOTED,
min_max_strategy_preference=fhe.MinMaxStrategy.ONE_TLU_PROMOTED,
)

def check_string_is_in_alphabet(string):
"""Check a string is a valid string of an alphabet."""
assert len(self.mapping_to_int) > 0, "Mapping not defined"

for c in string:
if c not in self.mapping_to_int:
raise ValueError(
f"Char {c} of {string} is not in alphabet {list(self.mapping_to_int.keys())}, please choose the right --alphabet"
)

def prepare_random_patterns(self, len_min, len_max, nb_strings):
"""Prepare random patterns of different lengths."""
assert len(self.mapping_to_int) > 0, "Mapping not defined"

list_patterns = []
for _ in range(nb_strings):
for length_1 in range(len_min, len_max + 1):
for length_2 in range(len_min, len_max + 1):
list_patterns += [
(
random_string(self.mapping_to_int, length_1),
random_string(self.mapping_to_int, length_2),
)
for _ in range(1)
]

return list_patterns

def compute_in_simulation(self, list_patterns):
"""Check equality between distance in simulation and clear distance."""
print("Computations in simulation\n")

for a, b in list_patterns:

print(f" Computing Levenshtein between strings '{a}' and '{b}'", end="")

a_as_int = tuple([self.mapping_to_int[ai] for ai in a])
b_as_int = tuple([self.mapping_to_int[bi] for bi in b])

l1_simulate = levenshtein_simulate(self.my_module, a_as_int, b_as_int)
l1_clear = levenshtein_clear(a_as_int, b_as_int)

assert l1_simulate == l1_clear, f" {l1_simulate=} and {l1_clear=} are different"
print(" - OK")

def compute_in_fhe(self, list_patterns, verbose=True, show_distance=False):
"""Check equality between distance in FHE and clear distance."""
self.my_module.keygen()

# Checks in FHE
if verbose:
print("\nComputations in FHE\n")

for a, b in list_patterns:

print(f" Computing Levenshtein between strings '{a}' and '{b}'", end="")

a_as_int = [self.mapping_to_int[ai] for ai in a]
b_as_int = [self.mapping_to_int[bi] for bi in b]

a_enc = tuple(self.my_module.equal.encrypt(ai, None)[0] for ai in a_as_int)
b_enc = tuple(self.my_module.equal.encrypt(None, bi)[1] for bi in b_as_int)

time_begin = time.time()
l1_fhe_enc = levenshtein_fhe(self.my_module, a_enc, b_enc)
time_end = time.time()

l1_fhe = self.my_module.mix.decrypt(l1_fhe_enc)

l1_clear = levenshtein_clear(a, b)

assert l1_fhe == l1_clear, f" {l1_fhe=} and {l1_clear=} are different"

if not show_distance:
print(f" - OK in {time_end - time_begin:.2f} seconds")
else:
print(f" - distance is {l1_fhe}, computed in {time_end - time_begin:.2f} seconds")


def random_pick_in_values(mapping_to_int):
"""Pick the integer-encoding of a random char in an alphabet."""
return numpy.random.randint(len(mapping_to_int))
Expand All @@ -25,15 +175,6 @@ def random_string(mapping_to_int, l):
return "".join([random_pick_in_keys(mapping_to_int) for _ in range(l)])


def check_string_is_in_alphabet(string, mapping_to_int):
"""Check a string is a valid string of an alphabet."""
for c in string:
if c not in mapping_to_int:
raise ValueError(
f"Char {c} of {string} is not in alphabet {list(mapping_to_int.keys())}, please choose the right --alphabet"
)


# Module FHE
@fhe.module()
class LevenshsteinModule:
Expand Down Expand Up @@ -169,7 +310,7 @@ def manage_args():
parser.add_argument(
"--alphabet",
dest="alphabet",
choices=["string", "STRING", "StRiNg", "ACTG"],
choices=[Alphabet.return_available_alphabets()],
default="string",
help="Setting the alphabet",
)
Expand All @@ -190,129 +331,6 @@ def manage_args():
return args


def compile_module(mapping_to_int, args):
"""Compile the FHE module."""
inputset_equal = [
(random_pick_in_values(mapping_to_int), random_pick_in_values(mapping_to_int))
for _ in range(1000)
]
inputset_mix = [
(
numpy.random.randint(2),
numpy.random.randint(args.max_string_length),
numpy.random.randint(args.max_string_length),
numpy.random.randint(args.max_string_length),
numpy.random.randint(args.max_string_length),
)
for _ in range(100)
]

my_module = LevenshsteinModule.compile(
{"equal": inputset_equal, "mix": inputset_mix},
show_mlir=args.show_mlir,
p_error=10**-20,
show_optimizer=args.show_optimizer,
comparison_strategy_preference=fhe.ComparisonStrategy.ONE_TLU_PROMOTED,
min_max_strategy_preference=fhe.MinMaxStrategy.ONE_TLU_PROMOTED,
)

return my_module


def prepare_alphabet_mapping(alphabet, verbose=True):
"""Check the alphabet option and compute corresponding char-to-int mapping."""
if alphabet == "string":
letters = "".join([chr(97 + i) for i in range(26)])
elif alphabet == "STRING":
letters = "".join([chr(65 + i) for i in range(26)])
elif alphabet == "StRiNg":
letters = "".join([chr(97 + i) for i in range(26)] + [chr(65 + i) for i in range(26)])
elif alphabet == "ACTG":
letters = "ACTG"
else:
raise ValueError(f"Unknown alphabet {alphabet}")

if verbose:
print(f"Making random tests with alphabet {alphabet}")
print(f"Letters are {letters}\n")

mapping_to_int = {}

for i, c in enumerate(letters):
mapping_to_int[c] = i

return mapping_to_int


def prepare_random_patterns(mapping_to_int, len_min, len_max, nb_strings):
"""Prepare random patterns of different lengths."""
list_patterns = []
for _ in range(nb_strings):
for length_1 in range(len_min, len_max + 1):
for length_2 in range(len_min, len_max + 1):
list_patterns += [
(
random_string(mapping_to_int, length_1),
random_string(mapping_to_int, length_2),
)
for _ in range(1)
]

return list_patterns


def compute_in_simulation(my_module, list_patterns, mapping_to_int):
"""Check equality between distance in simulation and clear distance."""
print("Computations in simulation\n")

for a, b in list_patterns:

print(f" Computing Levenshtein between strings '{a}' and '{b}'", end="")

a_as_int = tuple([mapping_to_int[ai] for ai in a])
b_as_int = tuple([mapping_to_int[bi] for bi in b])

l1_simulate = levenshtein_simulate(my_module, a_as_int, b_as_int)
l1_clear = levenshtein_clear(a_as_int, b_as_int)

assert l1_simulate == l1_clear, f" {l1_simulate=} and {l1_clear=} are different"
print(" - OK")


def compute_in_fhe(my_module, list_patterns, mapping_to_int, verbose=True, show_distance=False):
"""Check equality between distance in FHE and clear distance."""
my_module.keygen()

# Checks in FHE
if verbose:
print("\nComputations in FHE\n")

for a, b in list_patterns:

print(f" Computing Levenshtein between strings '{a}' and '{b}'", end="")

a_as_int = [mapping_to_int[ai] for ai in a]
b_as_int = [mapping_to_int[bi] for bi in b]

a_enc = tuple(my_module.equal.encrypt(ai, None)[0] for ai in a_as_int)
b_enc = tuple(my_module.equal.encrypt(None, bi)[1] for bi in b_as_int)

time_begin = time.time()
l1_fhe_enc = levenshtein_fhe(my_module, a_enc, b_enc)
time_end = time.time()

l1_fhe = my_module.mix.decrypt(l1_fhe_enc)

l1_clear = levenshtein_clear(a, b)

assert l1_fhe == l1_clear, f" {l1_fhe=} and {l1_clear=} are different"

if not show_distance:
print(f" - OK in {time_end - time_begin:.2f} seconds")
else:
print(f" - distance is {l1_fhe}, computed in {time_end - time_begin:.2f} seconds")


def main():
"""Main function."""
print()
Expand All @@ -322,22 +340,28 @@ def main():

# Do what the user requested
if args.autotest:
mapping_to_int = prepare_alphabet_mapping(args.alphabet)
my_module = compile_module(mapping_to_int, args)
list_patterns = prepare_random_patterns(mapping_to_int, 0, args.max_string_length, 1)
compute_in_simulation(my_module, list_patterns, mapping_to_int)
compute_in_fhe(my_module, list_patterns, mapping_to_int)
myalphabet = Alphabet()
myalphabet.set_alphabet(alphabet)

myalphabet.compile_module(args)
list_patterns = myalphabet.prepare_random_patterns(0, args.max_string_length, 1)
myalphabet.compute_in_simulation(list_patterns)
myalphabet.compute_in_fhe(list_patterns)
print("")

if args.autoperf:
myalphabet = Alphabet()

for alphabet in ["ACTG", "string", "STRING", "StRiNg"]:
print(f"Typical performances for alphabet {alphabet}, with string of maximal length:\n")
mapping_to_int = prepare_alphabet_mapping(alphabet, verbose=False)
my_module = compile_module(mapping_to_int, args)
list_patterns = prepare_random_patterns(
mapping_to_int, args.max_string_length, args.max_string_length, 3

myalphabet.set_alphabet(alphabet, verbose=False)

myalphabet.compile_module(args)
list_patterns = myalphabet.prepare_random_patterns(
args.max_string_length, args.max_string_length, 3
)
compute_in_fhe(my_module, list_patterns, mapping_to_int, verbose=False)
myalphabet.compute_in_fhe(list_patterns, verbose=False)
print("")

if args.distance != None:
Expand All @@ -351,12 +375,14 @@ def main():
"Warning, --max_string_length was smaller than lengths of the input strings, fixing it"
)

mapping_to_int = prepare_alphabet_mapping(args.alphabet, verbose=False)
my_module = compile_module(mapping_to_int, args)
check_string_is_in_alphabet(args.distance[0], mapping_to_int)
check_string_is_in_alphabet(args.distance[1], mapping_to_int)
myalphabet = Alphabet()
myalphabet.set_alphabet(alphabet, verbose=False)

myalphabet.compile_module(args)
myalphabet.check_string_is_in_alphabet(args.distance[0])
myalphabet.check_string_is_in_alphabet(args.distance[1])
list_patterns = [args.distance]
compute_in_fhe(my_module, list_patterns, mapping_to_int, verbose=False, show_distance=True)
myalphabet.compute_in_fhe(list_patterns, verbose=False, show_distance=True)
print("")

print("Successful end\n")
Expand Down

0 comments on commit e6ff264

Please sign in to comment.