Fixed search failure due to unexpected parser state
This commit is contained in:
parent
93635981e8
commit
40d7c52d6e
|
|
@ -1,4 +1,4 @@
|
||||||
#VERSION: 4.8
|
#VERSION: 4.9
|
||||||
# AUTHORS: Lima66
|
# AUTHORS: Lima66
|
||||||
# CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
|
# CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
|
||||||
|
|
||||||
|
|
@ -38,7 +38,7 @@ class limetorrents(object):
|
||||||
HTMLParser.__init__(self)
|
HTMLParser.__init__(self)
|
||||||
self.url = url
|
self.url = url
|
||||||
self.current_item = {} # dict for found item
|
self.current_item = {} # dict for found item
|
||||||
self.page_empty = 22000
|
self.page_items = 0
|
||||||
self.inside_table = False
|
self.inside_table = False
|
||||||
self.inside_tr = False
|
self.inside_tr = False
|
||||||
self.column_index = -1
|
self.column_index = -1
|
||||||
|
|
@ -112,6 +112,7 @@ class limetorrents(object):
|
||||||
self.column_name = None
|
self.column_name = None
|
||||||
if "link" in self.current_item:
|
if "link" in self.current_item:
|
||||||
prettyPrinter(self.current_item)
|
prettyPrinter(self.current_item)
|
||||||
|
self.page_items += 1
|
||||||
|
|
||||||
def download_torrent(self, info):
|
def download_torrent(self, info):
|
||||||
# since limetorrents provides torrent links in itorrent (cloudflare protected),
|
# since limetorrents provides torrent links in itorrent (cloudflare protected),
|
||||||
|
|
@ -128,14 +129,11 @@ class limetorrents(object):
|
||||||
query = query.replace("%20", "-")
|
query = query.replace("%20", "-")
|
||||||
category = self.supported_categories[cat]
|
category = self.supported_categories[cat]
|
||||||
|
|
||||||
parser = self.MyHtmlParser(self.url)
|
for page in range(1, 5):
|
||||||
page = 1
|
page_url = f"{self.url}/search/{category}/{query}/seeds/{page}/"
|
||||||
while True:
|
|
||||||
page_url = "{0}/search/{1}/{2}/seeds/{3}/".format(self.url, category, query, page)
|
|
||||||
html = retrieve_url(page_url)
|
html = retrieve_url(page_url)
|
||||||
lunghezza_html = len(html)
|
parser = self.MyHtmlParser(self.url)
|
||||||
if page > 6 or lunghezza_html <= parser.page_empty:
|
|
||||||
return
|
|
||||||
parser.feed(html)
|
parser.feed(html)
|
||||||
page += 1
|
parser.close()
|
||||||
parser.close()
|
if parser.page_items < 20:
|
||||||
|
break
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
# VERSION: 2.3
|
# VERSION: 2.4
|
||||||
# AUTHORS: nKlido
|
# AUTHORS: nKlido
|
||||||
|
|
||||||
# LICENSING INFORMATION
|
# LICENSING INFORMATION
|
||||||
|
|
@ -24,7 +24,6 @@ from helpers import retrieve_url
|
||||||
from novaprinter import prettyPrinter
|
from novaprinter import prettyPrinter
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import math
|
|
||||||
|
|
||||||
|
|
||||||
class solidtorrents(object):
|
class solidtorrents(object):
|
||||||
|
|
@ -47,8 +46,6 @@ class solidtorrents(object):
|
||||||
self.parseDate = False
|
self.parseDate = False
|
||||||
self.column = 0
|
self.column = 0
|
||||||
self.torrentReady = False
|
self.torrentReady = False
|
||||||
self.foundSearchStats = False
|
|
||||||
self.parseTotalResults = False
|
|
||||||
self.totalResults = 0
|
self.totalResults = 0
|
||||||
|
|
||||||
self.torrent_info = self.empty_torrent_info()
|
self.torrent_info = self.empty_torrent_info()
|
||||||
|
|
@ -68,13 +65,6 @@ class solidtorrents(object):
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
params = dict(attrs)
|
params = dict(attrs)
|
||||||
|
|
||||||
if 'search-stats' in params.get('class', ''):
|
|
||||||
self.foundSearchStats = True
|
|
||||||
|
|
||||||
if (self.foundSearchStats and tag == 'b'):
|
|
||||||
self.parseTotalResults = True
|
|
||||||
self.foundSearchStats = False
|
|
||||||
|
|
||||||
if 'search-result' in params.get('class', ''):
|
if 'search-result' in params.get('class', ''):
|
||||||
self.foundResult = True
|
self.foundResult = True
|
||||||
return
|
return
|
||||||
|
|
@ -115,13 +105,10 @@ class solidtorrents(object):
|
||||||
prettyPrinter(self.torrent_info)
|
prettyPrinter(self.torrent_info)
|
||||||
self.torrentReady = False
|
self.torrentReady = False
|
||||||
self.torrent_info = self.empty_torrent_info()
|
self.torrent_info = self.empty_torrent_info()
|
||||||
|
self.totalResults += 1
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
|
|
||||||
if (self.parseTotalResults):
|
|
||||||
self.totalResults = int(data.strip())
|
|
||||||
self.parseTotalResults = False
|
|
||||||
|
|
||||||
if (self.parseTitle):
|
if (self.parseTitle):
|
||||||
if (bool(data.strip()) and data != '\n'):
|
if (bool(data.strip()) and data != '\n'):
|
||||||
self.torrent_info['name'] = data
|
self.torrent_info['name'] = data
|
||||||
|
|
@ -161,12 +148,9 @@ class solidtorrents(object):
|
||||||
def search(self, what, cat='all'):
|
def search(self, what, cat='all'):
|
||||||
category = self.supported_categories[cat]
|
category = self.supported_categories[cat]
|
||||||
|
|
||||||
parser = self.TorrentInfoParser(self.url)
|
for page in range(1, 5):
|
||||||
parser.feed(self.request(what, category, 1))
|
parser = self.TorrentInfoParser(self.url)
|
||||||
|
|
||||||
totalPages = min(math.ceil(parser.totalResults / 20), 5)
|
|
||||||
|
|
||||||
for page in range(2, totalPages + 1):
|
|
||||||
parser.feed(self.request(what, category, page))
|
parser.feed(self.request(what, category, page))
|
||||||
|
parser.close()
|
||||||
parser.close()
|
if parser.totalResults < 15:
|
||||||
|
break
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,7 @@
|
||||||
#VERSION: 2.23
|
#VERSION: 2.24
|
||||||
# AUTHORS: Douman (custparasite@gmx.se)
|
# AUTHORS: Douman (custparasite@gmx.se)
|
||||||
# CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
|
# CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
|
||||||
|
|
||||||
from re import compile as re_compile
|
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
|
@ -35,6 +34,7 @@ class torlock(object):
|
||||||
self.item_bad = False # set to True for malicious links
|
self.item_bad = False # set to True for malicious links
|
||||||
self.current_item = None # dict for found item
|
self.current_item = None # dict for found item
|
||||||
self.item_name = None # key's name in current_item dict
|
self.item_name = None # key's name in current_item dict
|
||||||
|
self.page_items = 0
|
||||||
self.parser_class = {"td": "pub_date",
|
self.parser_class = {"td": "pub_date",
|
||||||
"ts": "size",
|
"ts": "size",
|
||||||
"tul": "seeds",
|
"tul": "seeds",
|
||||||
|
|
@ -91,26 +91,19 @@ class torlock(object):
|
||||||
except Exception:
|
except Exception:
|
||||||
self.current_item["pub_date"] = -1
|
self.current_item["pub_date"] = -1
|
||||||
prettyPrinter(self.current_item)
|
prettyPrinter(self.current_item)
|
||||||
|
self.page_items += 1
|
||||||
self.current_item = {}
|
self.current_item = {}
|
||||||
|
|
||||||
def search(self, query, cat='all'):
|
def search(self, query, cat='all'):
|
||||||
""" Performs search """
|
""" Performs search """
|
||||||
query = query.replace("%20", "-")
|
query = query.replace("%20", "-")
|
||||||
|
category = self.supported_categories[cat]
|
||||||
|
|
||||||
parser = self.MyHtmlParser(self.url)
|
for page in range(1, 5):
|
||||||
page = "".join((self.url, "/", self.supported_categories[cat],
|
parser = self.MyHtmlParser(self.url)
|
||||||
"/torrents/", query, ".html?sort=seeds&page=1"))
|
page_url = f"{self.url}/{category}/torrents/{query}.html?sort=seeds&page={page}"
|
||||||
html = retrieve_url(page)
|
html = retrieve_url(page_url)
|
||||||
parser.feed(html)
|
|
||||||
|
|
||||||
counter = 1
|
|
||||||
additional_pages = re_compile(r"/{0}/torrents/{1}.html\?sort=seeds&page=[0-9]+"
|
|
||||||
.format(self.supported_categories[cat], query))
|
|
||||||
list_searches = additional_pages.findall(html)[:-1] # last link is next(i.e. second)
|
|
||||||
for page in map(lambda link: "".join((self.url, link)), list_searches):
|
|
||||||
html = retrieve_url(page)
|
|
||||||
parser.feed(html)
|
parser.feed(html)
|
||||||
counter += 1
|
parser.close()
|
||||||
if counter > 3:
|
if parser.page_items < 20:
|
||||||
break
|
break
|
||||||
parser.close()
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
#VERSION: 1.4
|
#VERSION: 1.5
|
||||||
#AUTHORS: mauricci
|
#AUTHORS: mauricci
|
||||||
|
|
||||||
from helpers import retrieve_url
|
from helpers import retrieve_url
|
||||||
|
|
@ -102,26 +102,18 @@ class torrentproject(object):
|
||||||
elif curr_key != 'name':
|
elif curr_key != 'name':
|
||||||
self.singleResData[curr_key] += data.strip()
|
self.singleResData[curr_key] += data.strip()
|
||||||
|
|
||||||
def feed(self, html):
|
|
||||||
HTMLParser.feed(self, html)
|
|
||||||
self.pageComplete = False
|
|
||||||
self.insideResults = False
|
|
||||||
self.insideDataDiv = False
|
|
||||||
self.spanCount = -1
|
|
||||||
|
|
||||||
def search(self, what, cat='all'):
|
def search(self, what, cat='all'):
|
||||||
# curr_cat = self.supported_categories[cat]
|
# curr_cat = self.supported_categories[cat]
|
||||||
parser = self.MyHTMLParser(self.url)
|
|
||||||
what = what.replace('%20', '+')
|
what = what.replace('%20', '+')
|
||||||
# analyze first 5 pages of results
|
# analyze first 5 pages of results
|
||||||
for currPage in range(0, 5):
|
for currPage in range(0, 5):
|
||||||
url = self.url + '/browse?t={0}&p={1}'.format(what, currPage)
|
url = self.url + '/browse?t={0}&p={1}'.format(what, currPage)
|
||||||
html = retrieve_url(url)
|
html = retrieve_url(url)
|
||||||
|
parser = self.MyHTMLParser(self.url)
|
||||||
parser.feed(html)
|
parser.feed(html)
|
||||||
if len(parser.pageRes) <= 0:
|
parser.close()
|
||||||
|
if len(parser.pageRes) < 20:
|
||||||
break
|
break
|
||||||
del parser.pageRes[:]
|
|
||||||
parser.close()
|
|
||||||
|
|
||||||
def download_torrent(self, info):
|
def download_torrent(self, info):
|
||||||
""" Downloader """
|
""" Downloader """
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
eztv: 1.16
|
eztv: 1.16
|
||||||
jackett: 4.0
|
jackett: 4.0
|
||||||
limetorrents: 4.8
|
limetorrents: 4.9
|
||||||
piratebay: 3.3
|
piratebay: 3.3
|
||||||
solidtorrents: 2.3
|
solidtorrents: 2.4
|
||||||
torlock: 2.23
|
torlock: 2.24
|
||||||
torrentproject: 1.4
|
torrentproject: 1.5
|
||||||
torrentscsv: 1.4
|
torrentscsv: 1.4
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue