-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcandidateGen.py
executable file
·106 lines (97 loc) · 2.51 KB
/
candidateGen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import sys
import os
import itertools
from collections import defaultdict
class candidateGen():
def __init__(self):
#transactions:
# a b c
# a b e
# a b c d
#data:
# data[" a b"] = [" c"]
# data[" a b"] = [" c"," e"]
# data[" a b c"] = [" d"]
self.data = defaultdict(list)
self.candidate = []
def get(self,inputfile,limit):
count = 0
try:
with open(inputfile, "rt") as file:
for line in file:
if (limit != -1):
if (count > limit):
file.close()
return
count +=1
temp = " ".join(sorted(line.split()))
temp = temp.strip().rsplit(' ', 1)
if len(temp) == 1:
if (" "+temp[0]) not in self.data[" "]:
self.data[" "].append(" "+temp[0])
continue
key,value = temp
value = " "+value
key = " " + key
if (value not in self.data[key]):
self.data[key].append(value)
list_item = line.split()
file.close()
return
except RuntimeError:
print count
print line
print "Something wrong when reading file"
return
def push(self,transactions,k):
if k == 1:
self.data[' '] += transactions
return
for t in transactions:
self.data[''.join(t[0:-1])].append(t[-1])
def gen(self):
for key,value in self.data.iteritems():
if (len(value) >= 2):
for t1,t2 in itertools.combinations(sorted(value),2):
new_set = (key + t1 + t2).split()
flag = True
if (len(new_set) == 2):
self.candidate.append([' {}'.format(x) for x in new_set])
continue
for list_new_item in itertools.combinations(new_set,len(new_set)-1):
k = " " + (' '.join(list_new_item[0:-1]))
if k in self.data:
if ((" " + list_new_item[-1]) not in self.data[k]):
flag = False
break
else:
flag = False
break
if (flag):
self.candidate.append([' {}'.format(x) for x in new_set])
self.candidate = sorted(self.candidate)
return self.candidate
def write(self,outputfile):
# Write output
try:
with open(outputfile, "wt") as file:
for c in self.candidate:
line = ''.join(c).strip()
file.write(line)
file.write("\n")
file.close()
except:
print "Cannot write file"
return
if __name__ == "__main__":
"""
spawner = candidateGen()
spawner.get("retail1.dat",-1)
spawner.gen()
spawner.write("output.dat")
"""
#-1 all
spawner = candidateGen()
spawner.get(sys.argv[1],-1)
spawner.gen()
spawner.write(sys.argv[2])