Skip to content

Commit

Permalink
Add specs for google parser
Browse files Browse the repository at this point in the history
  • Loading branch information
CoralineAda committed Sep 16, 2018
1 parent 98f739e commit 771cae0
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 42 deletions.
12 changes: 2 additions & 10 deletions alice/handlers/alpha.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def answer_question
private

def answer
# Try Wolfram Alpha first, then fall back to Google
if results = Parser::Alpha.new(sentence).answer
results.first
elsif answers.any?
Expand All @@ -19,16 +20,7 @@ def answer
end

def answers
@answers ||= Parser::Google.new(sentence).all_answers.map do |answer|
if answer.include?("...")
answer = answer.split("...")[1] || ""
end
if answer.scan(/\. [a-zA-Z]+/).any?
answer.split('.')[0..-2].join(' ')
else
answer
end
end.compact
@answers ||= Parser::Google.new(sentence).all_answers
end

def sentence
Expand Down
57 changes: 25 additions & 32 deletions alice/parser/google.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,59 +6,52 @@ class Google
attr_accessor :question

def self.fetch(topic)
new(topic).answer
new(topic).sorted_answers.first
end

def self.fetch_all(topic)
new(topic).all_answers
new(topic).sorted_answers
end

def initialize(question)
@question = question.gsub("+", "plus").gsub(" ", "+").encode("ASCII", invalid: :replace, undef: :replace, replace: '')
end

def answer
results.first
end

def all_answers
answers
def sorted_answers
answers = sanitized_answers(full_search + reductivist_search)
Grammar::DeclarativeSorter.sort(query: question, corpus: answers)
end

private

def answers
answers = (full_search + reductivist_search).compact.map{ |a| a.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') }
answers.reject!{ |a| a =~ /\.\.\./}
rescue Exception => e
Alice::Util::Logger.info "*** Parser::Google: Unable to process \"#{self.question}\": #{e}"
Alice::Util::Logger.info e.backtrace
return ["Hmm, that part of my brain is returning a #{e}. Google is getting suspicious. You should probably rotate my IP address again."]
end

def results
best_answer = answers.any? ? Grammar::DeclarativeSorter.sort(query: question, corpus: answers).first : ""
Alice::Util::Logger.info "*** Parser::Google: Answered \"#{self.question}\" with #{best_answer}"
return best_answer
rescue Exception => e
Alice::Util::Logger.info "*** Parser::Google: Unable to process \"#{self.question}\": #{e}"
Alice::Util::Logger.info e.backtrace
return ["Hmm, that part of my brain is returning a #{e}"]
def sanitized_answers(answer_array)
answer_array.map do |answer|
answer = answer.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
answer.delete!("\n")
if answer.include?("...")
answer = answer.split("...").select{ |c| c =~ /\.$/ }.first.to_s
end
answer = answer.gsub(/^ /,'')
end.compact.uniq#.reject{ |a| a =~ /\.\.\./}
end

def full_search
doc = Nokogiri::HTML(open("https://www.google.com/search?q=#{question}&hl=lang_en"))
doc.css("div span.st").map(&:text)
search(question)
end

def reductivist_search
doc = Nokogiri::HTML(open("https://www.google.com/search?q=#{simplified_question}&hl=lang_en"))
doc.css("div span.st").map(&:text)
parsed_question = Grammar::SentenceParser.parse(question)
query = "wikipedia what is #{(parsed_question.nouns + parsed_question.adjectives).join(' ')}"
search(query)
end

def simplified_question
parsed_question = Grammar::SentenceParser.parse(question)
"wikipedia what is #{(parsed_question.nouns + parsed_question.adjectives).join(' ')}"
def search(query)
doc = Nokogiri::HTML(open("https://www.google.com/search?q=#{query}&hl=lang_en"))
doc.css("div span.st").map(&:text)
rescue Exception => e
Alice::Util::Logger.info "*** Parser::Google: Unable to process \"#{self.question}\": #{e}"
Alice::Util::Logger.info e.backtrace
return ["Hmm, I can't really answer that since Google is getting suspicious of me. You should probably rotate my IP address again."]
end

end
Expand Down
45 changes: 45 additions & 0 deletions spec/parsers/google_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
require 'spec_helper'

describe "Parser::Google" do

let(:results) {
[
"The latest Tweets from Coraline Ada Ehmke (@CoralineAda). Code witch. Ruby \nHero. Speaker, writer, podcaster, activist. Creator of the Contributor Covenant.",
"Coraline Ada Ehmke. Coraline Ada Ehmke is a software developer and open source advocate based in Chicago, Illinois. She began her career as a web developer in 1994 and has worked in a variety of industries, including engineering, consulting, education, advertising, healthcare, and software development infrastructure.",
"Coraline Ada Ehmke. Notable Rubyist, Nefarious Code Witch, And Notorious \nSocial Justice Warrior. Coraline Ada Ehmke. I am a well-known speaker, writer, ...",
"Coraline Ada Ehmke | November 30, 2015. On June 18 of this year a friend on \nIRC expressed his frustration with tweets by a person named Elia (@elia).",
"Code witch. Ruby Hero. Speaker, writer, activist. @CoralineAda on Twitter. - \nCoralineAda.",
"Jul 6, 2017 ... Well-known programmer Coraline Ada Ehmke was fired from her job at GitHub \nand she had a lot to say about how, and why, she was let go.",
"Feb 26, 2016 ... Deconstructing Coraline Ada Ehmke's “Contributor Covenant”, And Why It's \nFoolish. Recently, this happened, where the subject of this article ...",
"May 17, 2018 ... CORALINE ADA EHMKE. Principal Engineer, Stitch Fix. Coraline Ada Ehmke is \nan open-source advocate and developer with over 20 years of ...",
"Apr 14, 2016 ... Coraline Ada Ehmke is known for the creation of the “Contributor Covenant,” an \nSJW Code ... GITHUB hires Ehmke to work on anti-harassment.",
"Dec 29, 2016 - 27 min - Uploaded by Trans HackathonCoraline keynote's Trans*H4CK's first online conference with a talk on diversity in tech and ..."
]
}
let(:parser) { Parser::Google.new("who is coraline ehmke") }

before do
allow(Grammar::DeclarativeSorter).to receive(:sort).and_return(results)
allow(parser).to receive(:full_search).and_return(results[0..4])
allow(parser).to receive(:reductivist_search).and_return(results[5..9])
end

describe "#sanitized_answers" do

it "strips out newlines" do
answer = parser.send(:sanitized_answers, [results[0]]).first
expect(answer).to eq("The latest Tweets from Coraline Ada Ehmke (@CoralineAda). Code witch. Ruby Hero. Speaker, writer, podcaster, activist. Creator of the Contributor Covenant.")
end

it "splits at ellipses" do
answer = parser.send(:sanitized_answers, [results[5]]).first
expect(answer).to eq("Well-known programmer Coraline Ada Ehmke was fired from her job at GitHub and she had a lot to say about how, and why, she was let go.")
end

it "grabs a complete sentence when there are ellipses" do
answer = parser.send(:sanitized_answers, [results[8]]).first
expect(answer).to eq("GITHUB hires Ehmke to work on anti-harassment.")
end

end
end

0 comments on commit 771cae0

Please sign in to comment.