Skip to content

Commit

Permalink
Fix dammit
Browse files Browse the repository at this point in the history
  • Loading branch information
CoralineAda committed Dec 16, 2014
1 parent 399e8b8 commit 5b4c135
Showing 1 changed file with 11 additions and 14 deletions.
25 changes: 11 additions & 14 deletions alice/parser/url.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,31 +13,28 @@ def initialize(url)

def content
return unless document_body = source
content = Nokogiri::HTML(document_body)
content.search("//script").remove
content.search("//css").remove
::Sanitize.fragment(content.to_s)
@content ||= begin
this_content = Nokogiri::HTML(document_body.to_s)
this_content.search("//script").remove
this_content.search("//css").remove
this_content
end
end

def source
file = open(url)
file.content_type == "text/html" && file.read
@source ||= Nokogiri::HTML(open(url))
@source = nil unless @source.search("//html").any?
@source.search("//script").remove
@source.search("//css").remove
@source
rescue Exception => e
Alice::Util::Logger.info("*** Couldn't process URL for #{url}")
Alice::Util::Logger.info e.backtrace
end

def preview
return unless source
title_node = source.search("//title")
title_node ||= source.search("//h1")
title_node ||= source.search("//h2")
snippet = source.xpath("//p").map(&:content).detect do |content|
return unless content.present?
title_node = content.search("//title")
title_node ||= content.search("//h1")
title_node ||= content.search("//h2")
snippet = content.xpath("//p").map(&:content).detect do |content|
content.length > 25
end
snippet = truncate(snippet.to_s.strip.gsub(/[\n\r ]+/," ")).split('|')[0]
Expand Down

0 comments on commit 5b4c135

Please sign in to comment.