-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathApriori.py
71 lines (70 loc) · 2.3 KB
/
Apriori.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env python
#The dict {} in python such as:my_information = {'name': 'Pusheen the Cat', 'country': 'USA', 'favorite_numbers': [42, 105]}
#name -> Pusheen the Cat (Key is name , value is Pusheen the cat)
def Apriori_gen(Itemset, lenght):
"""Too generate new (k+1)-itemsets can see README Join Stage"""
canditate = []
canditate_index = 0
for i in range (0,lenght):
element = str(Itemset[i])
for j in range (i+1,lenght):
element1 = str(Itemset[j])
if element[0:(len(element)-1)] == element1[0:(len(element1)-1)]:
unionset = element[0:(len(element)-1)]+element1[len(element1)-1]+element[len(element)-1] #Combine (k-1)-Itemset to k-Itemset
unionset = ''.join(sorted(unionset)) #Sort itemset by dict order
canditate.append(unionset)
return canditate
def Apriori_prune(Ck,MinSupport):
L = []
for i in Ck:
if Ck[i] >= minsupport:
L.append(i)
return sorted(L)
def Apriori_count_subset(Canditate,Canditate_len):
""" Use bool to know is subset or not """
Lk = dict()
file = open('example.txt')
for l in file:
l = str(l.split())
count = 0
for i in range (0,Canditate_len):
key = str(Canditate[i])
if key not in Lk:
Lk[key] = 0
flag = True
for k in key:
if k not in l:
flag = False
if flag:
Lk[key] += 1
file.close()
return Lk
minsupport = 3
C1={}
file = open('example.txt')
"""Count one canditate"""
for line in file:
for item in line.split():
if item in C1:
C1[item] +=1
else:
C1[item] = 1
file.close()
C1.keys().sort()
L = []
L1 = Apriori_prune(C1,minsupport)
L = Apriori_gen(L1,len(L1))
print '===================================='
print 'Frequent 1-itemset is',L1
print '===================================='
k=2
while L != []:
C = dict()
C = Apriori_count_subset(L,len(L))
fruquent_itemset = []
fruquent_itemset = Apriori_prune(C,minsupport)
print '===================================='
print 'Frequent',k,'-itemset is',fruquent_itemset
print '===================================='
L = Apriori_gen(fruquent_itemset,len(fruquent_itemset))
k += 1