Skip to content

Commit

Permalink
Fixed search failure due to unexpected parser state
Browse files Browse the repository at this point in the history
  • Loading branch information
ducalex authored Oct 1, 2024
1 parent 9363598 commit 40d7c52
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 67 deletions.
20 changes: 9 additions & 11 deletions nova3/engines/limetorrents.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#VERSION: 4.8
#VERSION: 4.9
# AUTHORS: Lima66
# CONTRIBUTORS: Diego de las Heras ([email protected])

Expand Down Expand Up @@ -38,7 +38,7 @@ def __init__(self, url):
HTMLParser.__init__(self)
self.url = url
self.current_item = {} # dict for found item
self.page_empty = 22000
self.page_items = 0
self.inside_table = False
self.inside_tr = False
self.column_index = -1
Expand Down Expand Up @@ -112,6 +112,7 @@ def handle_endtag(self, tag):
self.column_name = None
if "link" in self.current_item:
prettyPrinter(self.current_item)
self.page_items += 1

def download_torrent(self, info):
# since limetorrents provides torrent links in itorrent (cloudflare protected),
Expand All @@ -128,14 +129,11 @@ def search(self, query, cat='all'):
query = query.replace("%20", "-")
category = self.supported_categories[cat]

parser = self.MyHtmlParser(self.url)
page = 1
while True:
page_url = "{0}/search/{1}/{2}/seeds/{3}/".format(self.url, category, query, page)
for page in range(1, 5):
page_url = f"{self.url}/search/{category}/{query}/seeds/{page}/"
html = retrieve_url(page_url)
lunghezza_html = len(html)
if page > 6 or lunghezza_html <= parser.page_empty:
return
parser = self.MyHtmlParser(self.url)
parser.feed(html)
page += 1
parser.close()
parser.close()
if parser.page_items < 20:
break
30 changes: 7 additions & 23 deletions nova3/engines/solidtorrents.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# VERSION: 2.3
# VERSION: 2.4
# AUTHORS: nKlido

# LICENSING INFORMATION
Expand All @@ -24,7 +24,6 @@
from novaprinter import prettyPrinter
from html.parser import HTMLParser
from datetime import datetime
import math


class solidtorrents(object):
Expand All @@ -47,8 +46,6 @@ def __init__(self, url):
self.parseDate = False
self.column = 0
self.torrentReady = False
self.foundSearchStats = False
self.parseTotalResults = False
self.totalResults = 0

self.torrent_info = self.empty_torrent_info()
Expand All @@ -68,13 +65,6 @@ def empty_torrent_info(self):
def handle_starttag(self, tag, attrs):
params = dict(attrs)

if 'search-stats' in params.get('class', ''):
self.foundSearchStats = True

if (self.foundSearchStats and tag == 'b'):
self.parseTotalResults = True
self.foundSearchStats = False

if 'search-result' in params.get('class', ''):
self.foundResult = True
return
Expand Down Expand Up @@ -115,13 +105,10 @@ def handle_endtag(self, tag):
prettyPrinter(self.torrent_info)
self.torrentReady = False
self.torrent_info = self.empty_torrent_info()
self.totalResults += 1

def handle_data(self, data):

if (self.parseTotalResults):
self.totalResults = int(data.strip())
self.parseTotalResults = False

if (self.parseTitle):
if (bool(data.strip()) and data != '\n'):
self.torrent_info['name'] = data
Expand Down Expand Up @@ -161,12 +148,9 @@ def request(self, searchTerm, category, page=1):
def search(self, what, cat='all'):
category = self.supported_categories[cat]

parser = self.TorrentInfoParser(self.url)
parser.feed(self.request(what, category, 1))

totalPages = min(math.ceil(parser.totalResults / 20), 5)

for page in range(2, totalPages + 1):
for page in range(1, 5):
parser = self.TorrentInfoParser(self.url)
parser.feed(self.request(what, category, page))

parser.close()
parser.close()
if parser.totalResults < 15:
break
27 changes: 10 additions & 17 deletions nova3/engines/torlock.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#VERSION: 2.23
#VERSION: 2.24
# AUTHORS: Douman ([email protected])
# CONTRIBUTORS: Diego de las Heras ([email protected])

from re import compile as re_compile
from html.parser import HTMLParser
from datetime import datetime, timedelta

Expand Down Expand Up @@ -35,6 +34,7 @@ def __init__(self, url):
self.item_bad = False # set to True for malicious links
self.current_item = None # dict for found item
self.item_name = None # key's name in current_item dict
self.page_items = 0
self.parser_class = {"td": "pub_date",
"ts": "size",
"tul": "seeds",
Expand Down Expand Up @@ -91,26 +91,19 @@ def handle_endtag(self, tag):
except Exception:
self.current_item["pub_date"] = -1
prettyPrinter(self.current_item)
self.page_items += 1
self.current_item = {}

def search(self, query, cat='all'):
""" Performs search """
query = query.replace("%20", "-")
category = self.supported_categories[cat]

parser = self.MyHtmlParser(self.url)
page = "".join((self.url, "/", self.supported_categories[cat],
"/torrents/", query, ".html?sort=seeds&page=1"))
html = retrieve_url(page)
parser.feed(html)

counter = 1
additional_pages = re_compile(r"/{0}/torrents/{1}.html\?sort=seeds&page=[0-9]+"
.format(self.supported_categories[cat], query))
list_searches = additional_pages.findall(html)[:-1] # last link is next(i.e. second)
for page in map(lambda link: "".join((self.url, link)), list_searches):
html = retrieve_url(page)
for page in range(1, 5):
parser = self.MyHtmlParser(self.url)
page_url = f"{self.url}/{category}/torrents/{query}.html?sort=seeds&page={page}"
html = retrieve_url(page_url)
parser.feed(html)
counter += 1
if counter > 3:
parser.close()
if parser.page_items < 20:
break
parser.close()
16 changes: 4 additions & 12 deletions nova3/engines/torrentproject.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#VERSION: 1.4
#VERSION: 1.5
#AUTHORS: mauricci

from helpers import retrieve_url
Expand Down Expand Up @@ -102,26 +102,18 @@ def handle_data(self, data):
elif curr_key != 'name':
self.singleResData[curr_key] += data.strip()

def feed(self, html):
HTMLParser.feed(self, html)
self.pageComplete = False
self.insideResults = False
self.insideDataDiv = False
self.spanCount = -1

def search(self, what, cat='all'):
# curr_cat = self.supported_categories[cat]
parser = self.MyHTMLParser(self.url)
what = what.replace('%20', '+')
# analyze first 5 pages of results
for currPage in range(0, 5):
url = self.url + '/browse?t={0}&p={1}'.format(what, currPage)
html = retrieve_url(url)
parser = self.MyHTMLParser(self.url)
parser.feed(html)
if len(parser.pageRes) <= 0:
parser.close()
if len(parser.pageRes) < 20:
break
del parser.pageRes[:]
parser.close()

def download_torrent(self, info):
""" Downloader """
Expand Down
8 changes: 4 additions & 4 deletions nova3/engines/versions.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
eztv: 1.16
jackett: 4.0
limetorrents: 4.8
limetorrents: 4.9
piratebay: 3.3
solidtorrents: 2.3
torlock: 2.23
torrentproject: 1.4
solidtorrents: 2.4
torlock: 2.24
torrentproject: 1.5
torrentscsv: 1.4

0 comments on commit 40d7c52

Please sign in to comment.