Fixed search failure due to unexpected parser state

qbittorrent · Oct 1, 2024 · 40d7c52 · 40d7c52
1 parent 9363598
commit 40d7c52
Show file tree

Hide file tree

Showing 5 changed files with 34 additions and 67 deletions.
diff --git a/nova3/engines/limetorrents.py b/nova3/engines/limetorrents.py
@@ -1,4 +1,4 @@
-#VERSION: 4.8
+#VERSION: 4.9
 # AUTHORS: Lima66
 # CONTRIBUTORS: Diego de las Heras ([email protected])
 
@@ -38,7 +38,7 @@ def __init__(self, url):
             HTMLParser.__init__(self)
             self.url = url
             self.current_item = {}  # dict for found item
-            self.page_empty = 22000
+            self.page_items = 0
             self.inside_table = False
             self.inside_tr = False
             self.column_index = -1
@@ -112,6 +112,7 @@ def handle_endtag(self, tag):
                 self.column_name = None
                 if "link" in self.current_item:
                     prettyPrinter(self.current_item)
+                    self.page_items += 1
 
     def download_torrent(self, info):
         # since limetorrents provides torrent links in itorrent (cloudflare protected),
@@ -128,14 +129,11 @@ def search(self, query, cat='all'):
         query = query.replace("%20", "-")
         category = self.supported_categories[cat]
 
-        parser = self.MyHtmlParser(self.url)
-        page = 1
-        while True:
-            page_url = "{0}/search/{1}/{2}/seeds/{3}/".format(self.url, category, query, page)
+        for page in range(1, 5):
+            page_url = f"{self.url}/search/{category}/{query}/seeds/{page}/"
             html = retrieve_url(page_url)
-            lunghezza_html = len(html)
-            if page > 6 or lunghezza_html <= parser.page_empty:
-                return
+            parser = self.MyHtmlParser(self.url)
             parser.feed(html)
-            page += 1
-        parser.close()
+            parser.close()
+            if parser.page_items < 20:
+                break
diff --git a/nova3/engines/solidtorrents.py b/nova3/engines/solidtorrents.py
@@ -1,4 +1,4 @@
-# VERSION: 2.3
+# VERSION: 2.4
 # AUTHORS: nKlido
 
 # LICENSING INFORMATION
@@ -24,7 +24,6 @@
 from novaprinter import prettyPrinter
 from html.parser import HTMLParser
 from datetime import datetime
-import math
 
 
 class solidtorrents(object):
@@ -47,8 +46,6 @@ def __init__(self, url):
             self.parseDate = False
             self.column = 0
             self.torrentReady = False
-            self.foundSearchStats = False
-            self.parseTotalResults = False
             self.totalResults = 0
 
             self.torrent_info = self.empty_torrent_info()
@@ -68,13 +65,6 @@ def empty_torrent_info(self):
         def handle_starttag(self, tag, attrs):
             params = dict(attrs)
 
-            if 'search-stats' in params.get('class', ''):
-                self.foundSearchStats = True
-
-            if (self.foundSearchStats and tag == 'b'):
-                self.parseTotalResults = True
-                self.foundSearchStats = False
-
             if 'search-result' in params.get('class', ''):
                 self.foundResult = True
                 return
@@ -115,13 +105,10 @@ def handle_endtag(self, tag):
                 prettyPrinter(self.torrent_info)
                 self.torrentReady = False
                 self.torrent_info = self.empty_torrent_info()
+                self.totalResults += 1
 
         def handle_data(self, data):
 
-            if (self.parseTotalResults):
-                self.totalResults = int(data.strip())
-                self.parseTotalResults = False
-
             if (self.parseTitle):
                 if (bool(data.strip()) and data != '\n'):
                     self.torrent_info['name'] = data
@@ -161,12 +148,9 @@ def request(self, searchTerm, category, page=1):
     def search(self, what, cat='all'):
         category = self.supported_categories[cat]
 
-        parser = self.TorrentInfoParser(self.url)
-        parser.feed(self.request(what, category, 1))
-
-        totalPages = min(math.ceil(parser.totalResults / 20), 5)
-
-        for page in range(2, totalPages + 1):
+        for page in range(1, 5):
+            parser = self.TorrentInfoParser(self.url)
             parser.feed(self.request(what, category, page))
-
-        parser.close()
+            parser.close()
+            if parser.totalResults < 15:
+                break
diff --git a/nova3/engines/torlock.py b/nova3/engines/torlock.py
@@ -1,8 +1,7 @@
-#VERSION: 2.23
+#VERSION: 2.24
 # AUTHORS: Douman ([email protected])
 # CONTRIBUTORS: Diego de las Heras ([email protected])
 
-from re import compile as re_compile
 from html.parser import HTMLParser
 from datetime import datetime, timedelta
 
@@ -35,6 +34,7 @@ def __init__(self, url):
             self.item_bad = False  # set to True for malicious links
             self.current_item = None  # dict for found item
             self.item_name = None  # key's name in current_item dict
+            self.page_items = 0
             self.parser_class = {"td": "pub_date",
                                  "ts": "size",
                                  "tul": "seeds",
@@ -91,26 +91,19 @@ def handle_endtag(self, tag):
                     except Exception:
                         self.current_item["pub_date"] = -1
                     prettyPrinter(self.current_item)
+                    self.page_items += 1
                 self.current_item = {}
 
     def search(self, query, cat='all'):
         """ Performs search """
         query = query.replace("%20", "-")
+        category = self.supported_categories[cat]
 
-        parser = self.MyHtmlParser(self.url)
-        page = "".join((self.url, "/", self.supported_categories[cat],
-                        "/torrents/", query, ".html?sort=seeds&page=1"))
-        html = retrieve_url(page)
-        parser.feed(html)
-
-        counter = 1
-        additional_pages = re_compile(r"/{0}/torrents/{1}.html\?sort=seeds&page=[0-9]+"
-                                      .format(self.supported_categories[cat], query))
-        list_searches = additional_pages.findall(html)[:-1]  # last link is next(i.e. second)
-        for page in map(lambda link: "".join((self.url, link)), list_searches):
-            html = retrieve_url(page)
+        for page in range(1, 5):
+            parser = self.MyHtmlParser(self.url)
+            page_url = f"{self.url}/{category}/torrents/{query}.html?sort=seeds&page={page}"
+            html = retrieve_url(page_url)
             parser.feed(html)
-            counter += 1
-            if counter > 3:
+            parser.close()
+            if parser.page_items < 20:
                 break
-        parser.close()
diff --git a/nova3/engines/torrentproject.py b/nova3/engines/torrentproject.py
@@ -1,4 +1,4 @@
-#VERSION: 1.4
+#VERSION: 1.5
 #AUTHORS: mauricci
 
 from helpers import retrieve_url
@@ -102,26 +102,18 @@ def handle_data(self, data):
                             elif curr_key != 'name':
                                 self.singleResData[curr_key] += data.strip()
 
-        def feed(self, html):
-            HTMLParser.feed(self, html)
-            self.pageComplete = False
-            self.insideResults = False
-            self.insideDataDiv = False
-            self.spanCount = -1
-
     def search(self, what, cat='all'):
         # curr_cat = self.supported_categories[cat]
-        parser = self.MyHTMLParser(self.url)
         what = what.replace('%20', '+')
         # analyze first 5 pages of results
         for currPage in range(0, 5):
             url = self.url + '/browse?t={0}&p={1}'.format(what, currPage)
             html = retrieve_url(url)
+            parser = self.MyHTMLParser(self.url)
             parser.feed(html)
-            if len(parser.pageRes) <= 0:
+            parser.close()
+            if len(parser.pageRes) < 20:
                 break
-            del parser.pageRes[:]
-        parser.close()
 
     def download_torrent(self, info):
         """ Downloader """

diff --git a/nova3/engines/versions.txt b/nova3/engines/versions.txt
@@ -1,8 +1,8 @@
 eztv: 1.16
 jackett: 4.0
-limetorrents: 4.8
+limetorrents: 4.9
 piratebay: 3.3
-solidtorrents: 2.3
-torlock: 2.23
-torrentproject: 1.4
+solidtorrents: 2.4
+torlock: 2.24
+torrentproject: 1.5
 torrentscsv: 1.4