forked from adi0509/Python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsmartbot.py
169 lines (125 loc) · 4.71 KB
/
smartbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# -*- coding: utf-8 -*-
"""Untitled42.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1GioqwcU64Ca1F3x6IE7-HGDzCrJ0pNti
"""
#Description: This is a 'self learning' chatbot program
#Resources:
#(1) https://www.freecodecamp.org/news/how-to-process-textual-data-using-tf-idf-in-python-cd2bbc0a94a3/
#(2) https://github.com/randerson112358/Building-a-Simple-Chatbot-in-Python-using-NLTK
#(3) http://www.tfidf.com/
#(4) https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.cosine_similarity.html
#Install the package NLTk
pip install nltk
#Install the package newspaper3k
pip install newspaper3k
#Import libraries
from newspaper import Article
import random
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
import numpy as np
import warnings
#Ignore any warning messages
warnings.filterwarnings('ignore')
#Download the packages from NLTK
nltk.download('punkt', quiet=True)
nltk.download('wordnet', quiet=True)
#Get the article URL
article = Article('https://www.mayoclinic.org/diseases-conditions/chronic-kidney-disease/symptoms-causes/syc-20354521')
article.download()
article.parse()
article.nlp()
corpus = article.text
#Print the corpus/text
print(corpus)
#Tokenization
text = corpus
sent_tokens = nltk.sent_tokenize(text) #Convert the text into a list of sentences
#Print the list of sentences
print(sent_tokens)
#Create a dictionary (key:value) pair to remove punctuations
remove_punct_dict = dict( ( ord(punct),None) for punct in string.punctuation)
#Print the punctuations
print(string.punctuation)
#Print the dictionary
print(remove_punct_dict)
#Create a function to return a list of lemmatized lower case words after removing punctuations
def LemNormalize(text):
return nltk.word_tokenize(text.lower().translate(remove_punct_dict))
#Print the tokenization text
print(LemNormalize(text))
#Keyword Matching
#Greeting Inputs
GREETING_INPUTS = ["hi", "hello", "hola", "greetings", "wassup", "hey"]
#Greeting responses back to the user
GREETING_RESPONSES=["howdy", "hi", "hey", "what's good", "hello", "hey there"]
#Function to return a random greeting response to a users greeting
def greeting(sentence):
#if the user's input is a greeting, then return a randomly chosen greeting response
for word in sentence.split():
if word.lower() in GREETING_INPUTS:
return random.choice(GREETING_RESPONSES)
#Generate the response
def response(user_response):
#The users response / query
#user_response = 'What is chronic kidney disease'
user_response = user_response.lower() #Make the response lower case
###Print the users query/ response
#print(user_response)
#Set the chatbot response to an empty string
robo_response = ''
#Append the users response to the sentence list
sent_tokens.append(user_response)
###Print the sentence list after appending the users response
#print(sent_tokens)
#Create a TfidfVectorizer Object
TfidfVec = TfidfVectorizer(tokenizer = LemNormalize, stop_words='english')
#Convert the text to a matrix of TF-IDF features
tfidf = TfidfVec.fit_transform(sent_tokens)
###Print the TFIDF features
#print(tfidf)
#Get the measure of similarity (similarity scores)
vals = cosine_similarity(tfidf[-1], tfidf)
#Print the similarity scores
#print(vals)
#Get the index of the most similar text/sentence to the users response
idx = vals.argsort()[0][-2]
#Reduce the dimensionality of vals
flat = vals.flatten()
#sort the list in ascending order
flat.sort()
#Get the most similar score to the users response
score = flat[-2]
#Print the similarity score
#print(score)
#If the variable 'score' is 0 then their is no text similar to the users response
if(score == 0):
robo_response = robo_response+"I apologize, I don't understand."
else:
robo_response = robo_response+sent_tokens[idx]
#Print the chat bot response
#print(robo_response)
#Remove the users response from the sentence tokens list
sent_tokens.remove(user_response)
return robo_response
flag = True
print("DOCBot: I am Doctor Bot or DOCBot for short. I will answer your queries about Chronic Kidney Disease. If you want to exit, type Bye!")
while(flag == True):
user_response = input()
user_response = user_response.lower()
if(user_response != 'bye'):
if(user_response == 'thanks' or user_response =='thank you'):
flag=False
print("DOCBot: You are welcome !")
else:
if(greeting(user_response) != None):
print("DOCBot: "+greeting(user_response))
else:
print("DOCBot: "+response(user_response))
else:
flag = False
print("DOCBot: Chat with you later !")