-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmongo_wordcloud.py
69 lines (52 loc) · 1.88 KB
/
mongo_wordcloud.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import mongo_functions
import constants
import helpers
import time
mongo_functions.mongo_connect()
time_start = time.time()
# Wordcount WORDS
for sentiment in constants.TWITTER_SENTIMENTS:
print("GENERATING WORDS %s WORDCLOUD" % sentiment)
words_count_sum = mongo_functions.aggregate('tweet', [
{'$match': {'sentiment': sentiment}}, {'$project': {'word_count': 1}},
{'$project': {'words': {'$objectToArray': '$word_count'}}}, {'$unwind': '$words'},
{'$project': {'word': '$words.k', 'count': '$words.v'}},
{'$group': {'_id': '$word', 'count': {'$sum': '$count'}}}
])
# converting results to dict
w_count = {}
for w in words_count_sum:
w_count[w['_id']] = w['count']
# helpers.word_cloud(w_count, sentiment + "_words")
# Wordcount EMOJI
for sentiment in constants.TWITTER_SENTIMENTS:
print("GENERATING EMOJI %s WORDCLOUD" % sentiment)
emoji_count_sum = mongo_functions.aggregate('tweet', [
{"$match": {"sentiment": sentiment}},
{"$project": {"emojis": 1}},
{"$unwind": "$emojis"},
{"$group": {"_id": "$emojis", "count": {"$sum": "$count"}}}
])
# converting results to dict
emj_count = {}
for w in emoji_count_sum:
emj_count[w['_id']] = w['count']
helpers.word_cloud(emj_count, sentiment + "_emojis", True)
# Wordcount EMOTICONS
for sentiment in constants.TWITTER_SENTIMENTS:
print("GENERATING EMOTICON %s WORDCLOUD" % sentiment)
emoticon_count_sum = mongo_functions.aggregate('tweet', [
{"$match": {"sentiment": sentiment}},
{"$project": {"emoticons": 1}},
{"$unwind": "$emoticons"},
{"$group": {"_id": "$emoticons", "count": {"$sum": 1}}}
])
# converting results to dict
emtc_count = {}
for w in emoticon_count_sum:
emtc_count[w['_id']] = w['count']
# helpers.word_cloud(emtc_count, sentiment + "_emoticons")
time_end = time.time()
time_lapsed = time_end - time_start
print("TIME MONGO WORDCLOUDS " + str(time_lapsed))
mongo_functions.mongo_disconnect()