Fixed search failure due to unexpected parser state

2024-10-01 18:05:38 -04:00 · 2024-10-01 18:05:38 -04:00 · 40d7c52d6e
parent 93635981e8
commit 40d7c52d6e
5 changed files with 34 additions and 67 deletions
--- a/nova3/engines/limetorrents.py
+++ b/nova3/engines/limetorrents.py
@ -1,4 +1,4 @@
-#VERSION: 4.8
+#VERSION: 4.9
 # AUTHORS: Lima66
 # CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
@ -38,7 +38,7 @@ class limetorrents(object):
            HTMLParser.__init__(self)
            self.url = url
            self.current_item = {}  # dict for found item
-            self.page_empty = 22000
+            self.page_items = 0
            self.inside_table = False
            self.inside_tr = False
            self.column_index = -1
@ -112,6 +112,7 @@ class limetorrents(object):
                self.column_name = None
                if "link" in self.current_item:
                    prettyPrinter(self.current_item)
                    self.page_items += 1
    def download_torrent(self, info):
        # since limetorrents provides torrent links in itorrent (cloudflare protected),
@ -128,14 +129,11 @@ class limetorrents(object):
        query = query.replace("%20", "-")
        category = self.supported_categories[cat]
-        parser = self.MyHtmlParser(self.url)
+        for page in range(1, 5):
-        page = 1
+            page_url = f"{self.url}/search/{category}/{query}/seeds/{page}/"
        while True:
            page_url = "{0}/search/{1}/{2}/seeds/{3}/".format(self.url, category, query, page)
            html = retrieve_url(page_url)
-            lunghezza_html = len(html)
+            parser = self.MyHtmlParser(self.url)
            if page > 6 or lunghezza_html <= parser.page_empty:
                return
            parser.feed(html)
-            page += 1
+            parser.close()
-        parser.close()
+            if parser.page_items < 20:
                break
--- a/nova3/engines/solidtorrents.py
+++ b/nova3/engines/solidtorrents.py
@ -1,4 +1,4 @@
-# VERSION: 2.3
+# VERSION: 2.4
 # AUTHORS: nKlido
 # LICENSING INFORMATION
@ -24,7 +24,6 @@ from helpers import retrieve_url
 from novaprinter import prettyPrinter
 from html.parser import HTMLParser
 from datetime import datetime
 import math
 class solidtorrents(object):
@ -47,8 +46,6 @@ class solidtorrents(object):
            self.parseDate = False
            self.column = 0
            self.torrentReady = False
            self.foundSearchStats = False
            self.parseTotalResults = False
            self.totalResults = 0
            self.torrent_info = self.empty_torrent_info()
@ -68,13 +65,6 @@ class solidtorrents(object):
        def handle_starttag(self, tag, attrs):
            params = dict(attrs)
            if 'search-stats' in params.get('class', ''):
                self.foundSearchStats = True
            if (self.foundSearchStats and tag == 'b'):
                self.parseTotalResults = True
                self.foundSearchStats = False
            if 'search-result' in params.get('class', ''):
                self.foundResult = True
                return
@ -115,13 +105,10 @@ class solidtorrents(object):
                prettyPrinter(self.torrent_info)
                self.torrentReady = False
                self.torrent_info = self.empty_torrent_info()
                self.totalResults += 1
        def handle_data(self, data):
            if (self.parseTotalResults):
                self.totalResults = int(data.strip())
                self.parseTotalResults = False
            if (self.parseTitle):
                if (bool(data.strip()) and data != '\n'):
                    self.torrent_info['name'] = data
@ -161,12 +148,9 @@ class solidtorrents(object):
    def search(self, what, cat='all'):
        category = self.supported_categories[cat]
-        parser = self.TorrentInfoParser(self.url)
+        for page in range(1, 5):
-        parser.feed(self.request(what, category, 1))
+            parser = self.TorrentInfoParser(self.url)
        totalPages = min(math.ceil(parser.totalResults / 20), 5)
        for page in range(2, totalPages + 1):
            parser.feed(self.request(what, category, page))
-
+            parser.close()
-        parser.close()
+            if parser.totalResults < 15:
                break
--- a/nova3/engines/torlock.py
+++ b/nova3/engines/torlock.py
@ -1,8 +1,7 @@
-#VERSION: 2.23
+#VERSION: 2.24
 # AUTHORS: Douman (custparasite@gmx.se)
 # CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
 from re import compile as re_compile
 from html.parser import HTMLParser
 from datetime import datetime, timedelta
@ -35,6 +34,7 @@ class torlock(object):
            self.item_bad = False  # set to True for malicious links
            self.current_item = None  # dict for found item
            self.item_name = None  # key's name in current_item dict
            self.page_items = 0
            self.parser_class = {"td": "pub_date",
                                 "ts": "size",
                                 "tul": "seeds",
@ -91,26 +91,19 @@ class torlock(object):
                    except Exception:
                        self.current_item["pub_date"] = -1
                    prettyPrinter(self.current_item)
                    self.page_items += 1
                self.current_item = {}
    def search(self, query, cat='all'):
        """ Performs search """
        query = query.replace("%20", "-")
        category = self.supported_categories[cat]
-        parser = self.MyHtmlParser(self.url)
+        for page in range(1, 5):
-        page = "".join((self.url, "/", self.supported_categories[cat],
+            parser = self.MyHtmlParser(self.url)
-                        "/torrents/", query, ".html?sort=seeds&page=1"))
+            page_url = f"{self.url}/{category}/torrents/{query}.html?sort=seeds&page={page}"
-        html = retrieve_url(page)
+            html = retrieve_url(page_url)
        parser.feed(html)
        counter = 1
        additional_pages = re_compile(r"/{0}/torrents/{1}.html\?sort=seeds&page=[0-9]+"
                                      .format(self.supported_categories[cat], query))
        list_searches = additional_pages.findall(html)[:-1]  # last link is next(i.e. second)
        for page in map(lambda link: "".join((self.url, link)), list_searches):
            html = retrieve_url(page)
            parser.feed(html)
-            counter += 1
+            parser.close()
-            if counter > 3:
+            if parser.page_items < 20:
                break
        parser.close()
--- a/nova3/engines/torrentproject.py
+++ b/nova3/engines/torrentproject.py
@ -1,4 +1,4 @@
-#VERSION: 1.4
+#VERSION: 1.5
 #AUTHORS: mauricci
 from helpers import retrieve_url
@ -102,26 +102,18 @@ class torrentproject(object):
                            elif curr_key != 'name':
                                self.singleResData[curr_key] += data.strip()
        def feed(self, html):
            HTMLParser.feed(self, html)
            self.pageComplete = False
            self.insideResults = False
            self.insideDataDiv = False
            self.spanCount = -1
    def search(self, what, cat='all'):
        # curr_cat = self.supported_categories[cat]
        parser = self.MyHTMLParser(self.url)
        what = what.replace('%20', '+')
        # analyze first 5 pages of results
        for currPage in range(0, 5):
            url = self.url + '/browse?t={0}&p={1}'.format(what, currPage)
            html = retrieve_url(url)
            parser = self.MyHTMLParser(self.url)
            parser.feed(html)
-            if len(parser.pageRes) <= 0:
+            parser.close()
            if len(parser.pageRes) < 20:
                break
            del parser.pageRes[:]
        parser.close()
    def download_torrent(self, info):
        """ Downloader """
--- a/nova3/engines/versions.txt
+++ b/nova3/engines/versions.txt
@ -1,8 +1,8 @@
 eztv: 1.16
 jackett: 4.0
-limetorrents: 4.8
+limetorrents: 4.9
 piratebay: 3.3
-solidtorrents: 2.3
+solidtorrents: 2.4
-torlock: 2.23
+torlock: 2.24
-torrentproject: 1.4
+torrentproject: 1.5
 torrentscsv: 1.4