Add pub_date support to limetorrents
I had to overhauled the parser a little because the old way of using td classes didn't work for our purpose.
This commit is contained in:
parent
3a88c6fb10
commit
93635981e8
|
|
@ -1,8 +1,9 @@
|
||||||
#VERSION: 4.7
|
#VERSION: 4.8
|
||||||
# AUTHORS: Lima66
|
# AUTHORS: Lima66
|
||||||
# CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
|
# CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from datetime import datetime, timedelta
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
||||||
|
|
@ -37,38 +38,49 @@ class limetorrents(object):
|
||||||
HTMLParser.__init__(self)
|
HTMLParser.__init__(self)
|
||||||
self.url = url
|
self.url = url
|
||||||
self.current_item = {} # dict for found item
|
self.current_item = {} # dict for found item
|
||||||
self.item_name = None # key's name in current_item dict
|
|
||||||
self.page_empty = 22000
|
self.page_empty = 22000
|
||||||
|
self.inside_table = False
|
||||||
self.inside_tr = False
|
self.inside_tr = False
|
||||||
self.findTable = False
|
self.column_index = -1
|
||||||
self.parser_class = {"tdnormal": "size", # class
|
self.column_name = None # key's name in current_item dict
|
||||||
"tdseed": "seeds",
|
self.columns = ["name", "pub_date", "size", "seeds", "leech"]
|
||||||
"tdleech": "leech"}
|
|
||||||
|
now = datetime.now()
|
||||||
|
self.date_parsers = {
|
||||||
|
r"yesterday": lambda m: now - timedelta(days=1),
|
||||||
|
r"last\s+month": lambda m: now - timedelta(days=30),
|
||||||
|
r"(\d+)\s+years?": lambda m: now - timedelta(days=int(m[1]) * 365),
|
||||||
|
r"(\d+)\s+months?": lambda m: now - timedelta(days=int(m[1]) * 30),
|
||||||
|
r"(\d+)\s+days?": lambda m: now - timedelta(days=int(m[1])),
|
||||||
|
r"(\d+)\s+hours?": lambda m: now - timedelta(hours=int(m[1])),
|
||||||
|
r"(\d+)\s+minutes?": lambda m: now - timedelta(minutes=int(m[1])),
|
||||||
|
}
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
|
|
||||||
params = dict(attrs)
|
params = dict(attrs)
|
||||||
if params.get('class') == 'table2':
|
|
||||||
self.findTable = True
|
|
||||||
|
|
||||||
if tag == self.TR and self.findTable and (params.get('bgcolor') == '#F4F4F4' or params.get('bgcolor') == '#FFFFFF'): # noqa
|
if params.get('class') == 'table2':
|
||||||
self.inside_tr = True
|
self.inside_table = True
|
||||||
self.current_item = {}
|
elif not self.inside_table:
|
||||||
if not self.inside_tr:
|
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.inside_tr and tag == self.TD:
|
if tag == self.TR and (params.get('bgcolor') == '#F4F4F4' or params.get('bgcolor') == '#FFFFFF'): # noqa
|
||||||
if "class" in params:
|
self.inside_tr = True
|
||||||
self.item_name = self.parser_class.get(params["class"], None)
|
self.column_index = -1
|
||||||
if self.item_name:
|
self.current_item = {"engine_url": self.url}
|
||||||
self.current_item[self.item_name] = -1
|
elif not self.inside_tr:
|
||||||
|
return
|
||||||
|
|
||||||
if self.inside_tr and tag == self.A and self.HREF in params:
|
if tag == self.TD:
|
||||||
|
self.column_index += 1
|
||||||
|
if self.column_index < len(self.columns):
|
||||||
|
self.column_name = self.columns[self.column_index]
|
||||||
|
else:
|
||||||
|
self.column_name = None
|
||||||
|
|
||||||
|
if self.column_name == "name" and tag == self.A and self.HREF in params:
|
||||||
link = params["href"]
|
link = params["href"]
|
||||||
if link.startswith("http://itorrents.org/torrent/"):
|
if link.endswith(".html"):
|
||||||
self.current_item["engine_url"] = self.url
|
|
||||||
self.item_name = "name"
|
|
||||||
elif link.endswith(".html"):
|
|
||||||
try:
|
try:
|
||||||
safe_link = quote(self.url + link, safe='/:')
|
safe_link = quote(self.url + link, safe='/:')
|
||||||
except KeyError:
|
except KeyError:
|
||||||
|
|
@ -77,26 +89,29 @@ class limetorrents(object):
|
||||||
self.current_item["desc_link"] = safe_link
|
self.current_item["desc_link"] = safe_link
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
if self.inside_tr and self.item_name:
|
if self.column_name:
|
||||||
if self.item_name == 'size' and (data.endswith('MB') or data.endswith('GB')):
|
if self.column_name in ["size", "seeds", "leech"]:
|
||||||
self.current_item[self.item_name] = data.strip().replace(',', '')
|
data = data.replace(',', '')
|
||||||
elif not self.item_name == 'size':
|
elif self.column_name == "pub_date":
|
||||||
self.current_item[self.item_name] = data.strip().replace(',', '')
|
timestamp = -1
|
||||||
|
for pattern, calc in self.date_parsers.items():
|
||||||
self.item_name = None
|
m = re.match(pattern, data, re.IGNORECASE)
|
||||||
|
if m:
|
||||||
|
timestamp = int(calc(m).timestamp())
|
||||||
|
break
|
||||||
|
data = str(timestamp)
|
||||||
|
self.current_item[self.column_name] = data.strip()
|
||||||
|
self.column_name = None
|
||||||
|
|
||||||
def handle_endtag(self, tag):
|
def handle_endtag(self, tag):
|
||||||
if tag == 'table':
|
if tag == 'table':
|
||||||
self.findTable = False
|
self.inside_table = False
|
||||||
|
|
||||||
if self.inside_tr and tag == self.TR:
|
if self.inside_tr and tag == self.TR:
|
||||||
self.inside_tr = False
|
self.inside_tr = False
|
||||||
self.item_name = None
|
self.column_name = None
|
||||||
array_length = len(self.current_item)
|
if "link" in self.current_item:
|
||||||
if array_length < 1:
|
prettyPrinter(self.current_item)
|
||||||
return
|
|
||||||
prettyPrinter(self.current_item)
|
|
||||||
self.current_item = {}
|
|
||||||
|
|
||||||
def download_torrent(self, info):
|
def download_torrent(self, info):
|
||||||
# since limetorrents provides torrent links in itorrent (cloudflare protected),
|
# since limetorrents provides torrent links in itorrent (cloudflare protected),
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
eztv: 1.16
|
eztv: 1.16
|
||||||
jackett: 4.0
|
jackett: 4.0
|
||||||
limetorrents: 4.7
|
limetorrents: 4.8
|
||||||
piratebay: 3.3
|
piratebay: 3.3
|
||||||
solidtorrents: 2.3
|
solidtorrents: 2.3
|
||||||
torlock: 2.23
|
torlock: 2.23
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue