-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerateanagram.py
152 lines (130 loc) · 5.26 KB
/
generateanagram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
hasanelement = False
english = True
wordlist=[]
# Read in the word list for a specific language (txt file separates words with each new line, recommended to reduce the number of small words (len <= 3))
if english:
with open('popular.txt','r') as f:
for line in f:
strip_lines=line.strip()
word = strip_lines.split()
if hasanelement:
if word[0].lower() != wordlist[-1]:
wordlist.append(word[0].lower())
else:
wordlist.append(word[0].lower())
hasanelement = True
wordlist = sorted(wordlist, key=len, reverse = True)
else:
with open('wortliste.txt','r', encoding='utf-8') as f:
for line in f:
strip_lines=line.strip()
word = strip_lines.split()
if hasanelement:
if word[0].lower() != wordlist[-1]:
wordlist.append(word[0].lower())
else:
wordlist.append(word[0].lower())
hasanelement = True
#wordlist = sorted(wordlist, key=len, reverse = True)
word = "johndoe" # all lower case
length = len(word)
#the letters are saved as dictionary of occurences
composition = {'a': 0, 'b': 0, 'c': 0, 'd': 0, 'e': 0, 'f': 0, 'g': 0, 'h': 0, 'i': 0, 'j': 0, 'k': 0, 'l': 0,
'm': 0, 'n': 0, 'o': 0, 'p': 0, 'q': 0, 'r': 0, 's': 0, 't': 0, 'u': 0, 'v': 0, 'w': 0, 'x': 0,
'y': 0, 'z': 0, 'ä': 0, 'ö': 0, 'ü': 0, 'ß': 0}
# get composition
for letter in word:
composition[letter] += 1
# how many letters are in a composition
def evaluate(composition):
sum = 0
for letter in word:
sum += composition[letter]
return sum
# remove all words that are not possible with a letter composition
def sortout(i_wordlist, composition):
wordlist = i_wordlist.copy()
deleted = []
#delete words
#by length/ missing letters
for index, testword in enumerate(wordlist):
if len(testword) <= length:
m_composition = composition.copy()
i = 0
while i < len(testword):
letter = testword[i]
m_composition[letter] -= 1
if m_composition[letter] < 0:
deleted += [index]
i = len(testword)
i += 1
else:
deleted += [index]
offset = 0
for i in deleted:
wordlist.pop(i-offset)
offset += 1
return wordlist
wordlist = sortout(wordlist, composition)
# recursively reduce the letter composition with possible words to arraive at a finished anagram
def narrowdown(wordlist, composition, cache = "", hit = 0, maxlet = 0, lastword = "", depth = 0):
if lastword == "":
for letter, number in composition.items():
if (number > 0): lastword += letter*number
match = 0
n = evaluate(composition)
max_match = 50000 # do not bother finding more than 50k anagrams
if n == 0: return 1, [str(cache)]
if n < maxlet:
if n == 1 and hit < 1: return 0, []
else:
string = ""
for letter, number in composition.items():
if (number > 0): string += letter*number
if cache == "": return 1, [str("~~ " + string)]
else: return 1, [str("~~ "+ cache + "+" + string)]
if wordlist == []:
if n == 1:
if hit >= 1:
string = ""
for letter, number in composition.items():
if (number > 0): string += letter*number
if cache == "": return 0, [str("- " + string)]
else: return 0, [str("- "+ cache + "+" + string)]
else: return 0, []
if n == 2:
if hit >= 2:
string = ""
for letter, number in composition.items():
if (number > 0): string += letter*number
if cache == "": return 0, [str("-- " + string)]
else: return 0, [str("-- "+ cache + "+" + string)]
else: return 0, []
else: return 0, []
else:
output = []
newmatch = 0
x = 0
for testword in wordlist:
if not ((depth == 0 and len(testword) < 4) or match >= max_match):
if len(testword) <= len(lastword):
new_composition = composition.copy()
for letter in testword:
new_composition[letter] -= 1
nextstep = sortout(wordlist, new_composition)
if cache == "": newmatch, newoutput = narrowdown(nextstep, new_composition, str(testword), lastword=testword, depth=depth+1)
else: newmatch, newoutput = narrowdown(nextstep, new_composition, str(cache+"+"+testword), lastword=testword, depth=depth+1)
output += newoutput
match += newmatch
if (x): print(x)
return match+newmatch, output
matches, anagrams = narrowdown(wordlist, composition)
print(matches, " matches found.")
if english:
with open('{}-anagrams-eng.txt'.format(word), 'w') as f:
for item in anagrams:
f.write("%s\n" % item)
else:
with open('{}-anagramme.txt'.format(word), 'w') as f:
for item in anagrams:
f.write("%s\n" % item)