From 360bb867b22a2bbafd2260ceff1a206ffb643cfe Mon Sep 17 00:00:00 2001
From: Alex Duchesne <ducalex007@gmail.com>
Date: Thu, 12 Sep 2024 12:45:14 -0400
Subject: [PATCH] Fixed search failure due to unexpected parser state

In many plugins the parser's state wasn't reset between pages.

This meant that if a page ended in a weird state (truncated or temporary error or unexpected html), all following pages would fail to find results.

torrentproject noticed the issue and overrode feed() to reset some of its state between pages.

But creating a new parser for each page is simpler. I have updated all plugins with this issue.
---
 nova3/engines/limetorrents.py   | 19 ++++++++-----------
 nova3/engines/solidtorrents.py  | 30 +++++++-----------------------
 nova3/engines/torlock.py        | 27 ++++++++++-----------------
 nova3/engines/torrentproject.py | 16 ++++------------
 nova3/engines/versions.txt      |  8 ++++----
 5 files changed, 33 insertions(+), 67 deletions(-)

diff --git a/nova3/engines/limetorrents.py b/nova3/engines/limetorrents.py
index 37d8c5a..248aeda 100644
--- a/nova3/engines/limetorrents.py
+++ b/nova3/engines/limetorrents.py
@@ -1,4 +1,4 @@
-#VERSION: 4.7
+#VERSION: 4.8
 # AUTHORS: Lima66
 # CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
 
@@ -38,7 +38,7 @@ class limetorrents(object):
             self.url = url
             self.current_item = {}  # dict for found item
             self.item_name = None  # key's name in current_item dict
-            self.page_empty = 22000
+            self.page_items = 0
             self.inside_tr = False
             self.findTable = False
             self.parser_class = {"tdnormal": "size",  # class
@@ -113,14 +113,11 @@ class limetorrents(object):
         query = query.replace("%20", "-")
         category = self.supported_categories[cat]
 
-        parser = self.MyHtmlParser(self.url)
-        page = 1
-        while True:
-            page_url = "{0}/search/{1}/{2}/seeds/{3}/".format(self.url, category, query, page)
+        for page in range(1, 5):
+            page_url = f"{self.url}/search/{category}/{query}/seeds/{page}/"
             html = retrieve_url(page_url)
-            lunghezza_html = len(html)
-            if page > 6 or lunghezza_html <= parser.page_empty:
-                return
+            parser = self.MyHtmlParser(self.url)
             parser.feed(html)
-            page += 1
-        parser.close()
+            parser.close()
+            if parser.page_items < 20:
+                break
diff --git a/nova3/engines/solidtorrents.py b/nova3/engines/solidtorrents.py
index 5dfccd6..3a46f6a 100644
--- a/nova3/engines/solidtorrents.py
+++ b/nova3/engines/solidtorrents.py
@@ -1,4 +1,4 @@
-# VERSION: 2.3
+# VERSION: 2.4
 # AUTHORS: nKlido
 
 # LICENSING INFORMATION
@@ -24,7 +24,6 @@ from helpers import retrieve_url
 from novaprinter import prettyPrinter
 from html.parser import HTMLParser
 from datetime import datetime
-import math
 
 
 class solidtorrents(object):
@@ -47,8 +46,6 @@ class solidtorrents(object):
             self.parseDate = False
             self.column = 0
             self.torrentReady = False
-            self.foundSearchStats = False
-            self.parseTotalResults = False
             self.totalResults = 0
 
             self.torrent_info = self.empty_torrent_info()
@@ -68,13 +65,6 @@ class solidtorrents(object):
         def handle_starttag(self, tag, attrs):
             params = dict(attrs)
 
-            if 'search-stats' in params.get('class', ''):
-                self.foundSearchStats = True
-
-            if (self.foundSearchStats and tag == 'b'):
-                self.parseTotalResults = True
-                self.foundSearchStats = False
-
             if 'search-result' in params.get('class', ''):
                 self.foundResult = True
                 return
@@ -115,13 +105,10 @@ class solidtorrents(object):
                 prettyPrinter(self.torrent_info)
                 self.torrentReady = False
                 self.torrent_info = self.empty_torrent_info()
+                self.totalResults += 1
 
         def handle_data(self, data):
 
-            if (self.parseTotalResults):
-                self.totalResults = int(data.strip())
-                self.parseTotalResults = False
-
             if (self.parseTitle):
                 if (bool(data.strip()) and data != '\n'):
                     self.torrent_info['name'] = data
@@ -161,12 +148,9 @@ class solidtorrents(object):
     def search(self, what, cat='all'):
         category = self.supported_categories[cat]
 
-        parser = self.TorrentInfoParser(self.url)
-        parser.feed(self.request(what, category, 1))
-
-        totalPages = min(math.ceil(parser.totalResults / 20), 5)
-
-        for page in range(2, totalPages + 1):
+        for page in range(1, 5):
+            parser = self.TorrentInfoParser(self.url)
             parser.feed(self.request(what, category, page))
-
-        parser.close()
+            parser.close()
+            if parser.totalResults < 15:
+                break
diff --git a/nova3/engines/torlock.py b/nova3/engines/torlock.py
index 7b60263..6aa6a9d 100644
--- a/nova3/engines/torlock.py
+++ b/nova3/engines/torlock.py
@@ -1,8 +1,7 @@
-#VERSION: 2.23
+#VERSION: 2.24
 # AUTHORS: Douman (custparasite@gmx.se)
 # CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
 
-from re import compile as re_compile
 from html.parser import HTMLParser
 from datetime import datetime, timedelta
 
@@ -35,6 +34,7 @@ class torlock(object):
             self.item_bad = False  # set to True for malicious links
             self.current_item = None  # dict for found item
             self.item_name = None  # key's name in current_item dict
+            self.page_items = 0
             self.parser_class = {"td": "pub_date",
                                  "ts": "size",
                                  "tul": "seeds",
@@ -91,26 +91,19 @@ class torlock(object):
                     except Exception:
                         self.current_item["pub_date"] = -1
                     prettyPrinter(self.current_item)
+                    self.page_items += 1
                 self.current_item = {}
 
     def search(self, query, cat='all'):
         """ Performs search """
         query = query.replace("%20", "-")
+        category = self.supported_categories[cat]
 
-        parser = self.MyHtmlParser(self.url)
-        page = "".join((self.url, "/", self.supported_categories[cat],
-                        "/torrents/", query, ".html?sort=seeds&page=1"))
-        html = retrieve_url(page)
-        parser.feed(html)
-
-        counter = 1
-        additional_pages = re_compile(r"/{0}/torrents/{1}.html\?sort=seeds&page=[0-9]+"
-                                      .format(self.supported_categories[cat], query))
-        list_searches = additional_pages.findall(html)[:-1]  # last link is next(i.e. second)
-        for page in map(lambda link: "".join((self.url, link)), list_searches):
-            html = retrieve_url(page)
+        for page in range(1, 5):
+            parser = self.MyHtmlParser(self.url)
+            page_url = f"{self.url}/{category}/torrents/{query}.html?sort=seeds&page={page}"
+            html = retrieve_url(page_url)
             parser.feed(html)
-            counter += 1
-            if counter > 3:
+            parser.close()
+            if parser.page_items < 20:
                 break
-        parser.close()
diff --git a/nova3/engines/torrentproject.py b/nova3/engines/torrentproject.py
index e736871..2db3b8d 100644
--- a/nova3/engines/torrentproject.py
+++ b/nova3/engines/torrentproject.py
@@ -1,4 +1,4 @@
-#VERSION: 1.4
+#VERSION: 1.5
 #AUTHORS: mauricci
 
 from helpers import retrieve_url
@@ -102,26 +102,18 @@ class torrentproject(object):
                             elif curr_key != 'name':
                                 self.singleResData[curr_key] += data.strip()
 
-        def feed(self, html):
-            HTMLParser.feed(self, html)
-            self.pageComplete = False
-            self.insideResults = False
-            self.insideDataDiv = False
-            self.spanCount = -1
-
     def search(self, what, cat='all'):
         # curr_cat = self.supported_categories[cat]
-        parser = self.MyHTMLParser(self.url)
         what = what.replace('%20', '+')
         # analyze first 5 pages of results
         for currPage in range(0, 5):
             url = self.url + '/browse?t={0}&p={1}'.format(what, currPage)
             html = retrieve_url(url)
+            parser = self.MyHTMLParser(self.url)
             parser.feed(html)
-            if len(parser.pageRes) <= 0:
+            parser.close()
+            if len(parser.pageRes) < 20:
                 break
-            del parser.pageRes[:]
-        parser.close()
 
     def download_torrent(self, info):
         """ Downloader """
diff --git a/nova3/engines/versions.txt b/nova3/engines/versions.txt
index 672def0..65fc148 100644
--- a/nova3/engines/versions.txt
+++ b/nova3/engines/versions.txt
@@ -1,8 +1,8 @@
 eztv: 1.16
 jackett: 4.0
-limetorrents: 4.7
+limetorrents: 4.8
 piratebay: 3.3
-solidtorrents: 2.3
-torlock: 2.23
-torrentproject: 1.4
+solidtorrents: 2.4
+torlock: 2.24
+torrentproject: 1.5
 torrentscsv: 1.4