111 lines
4.1 KiB
Python
111 lines
4.1 KiB
Python
# VERSION: 1.16
|
|
# AUTHORS: nindogo
|
|
# CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
|
|
|
|
import re
|
|
import urllib.error
|
|
import urllib.parse
|
|
import urllib.request
|
|
from datetime import datetime, timedelta
|
|
from html.parser import HTMLParser
|
|
|
|
from novaprinter import prettyPrinter
|
|
from helpers import retrieve_url
|
|
|
|
|
|
class eztv(object):
|
|
name = "EZTV"
|
|
url = 'https://eztvx.to/'
|
|
supported_categories = {'all': 'all', 'tv': 'tv'}
|
|
|
|
class MyHtmlParser(HTMLParser):
|
|
A, TD, TR, TABLE = ('a', 'td', 'tr', 'table')
|
|
|
|
""" Sub-class for parsing results """
|
|
def __init__(self, url):
|
|
HTMLParser.__init__(self)
|
|
self.url = url
|
|
|
|
now = datetime.now()
|
|
self.date_parsers = {
|
|
r"(\d+)h\s+(\d+)m": lambda m: now - timedelta(hours=int(m[1]), minutes=int(m[2])),
|
|
r"(\d+)d\s+(\d+)h": lambda m: now - timedelta(days=int(m[1]), hours=int(m[2])),
|
|
r"(\d+)\s+weeks?": lambda m: now - timedelta(weeks=int(m[1])),
|
|
r"(\d+)\s+mo": lambda m: now - timedelta(days=int(m[1]) * 30),
|
|
r"(\d+)\s+years?": lambda m: now - timedelta(days=int(m[1]) * 365),
|
|
}
|
|
self.in_table_row = False
|
|
self.current_item = {}
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
params = dict(attrs)
|
|
|
|
if (params.get('class') == 'forum_header_border'
|
|
and params.get('name') == 'hover'):
|
|
self.in_table_row = True
|
|
self.current_item = {}
|
|
self.current_item['seeds'] = -1
|
|
self.current_item['leech'] = -1
|
|
self.current_item['size'] = -1
|
|
self.current_item['engine_url'] = self.url
|
|
self.current_item['pub_date'] = -1
|
|
|
|
if (tag == self.A
|
|
and self.in_table_row and params.get('class') == 'magnet'):
|
|
self.current_item['link'] = params.get('href')
|
|
|
|
if (tag == self.A
|
|
and self.in_table_row and params.get('class') == 'epinfo'):
|
|
self.current_item['desc_link'] = self.url + params.get('href')
|
|
self.current_item['name'] = params.get('title').split(' (')[0]
|
|
|
|
def handle_data(self, data):
|
|
data = data.replace(',', '')
|
|
if (self.in_table_row
|
|
and (data.endswith(' KB') or data.endswith(' MB') or data.endswith(' GB'))):
|
|
self.current_item['size'] = data
|
|
|
|
elif self.in_table_row and data.isnumeric():
|
|
self.current_item['seeds'] = int(data)
|
|
|
|
elif self.in_table_row: # Check for a relative time
|
|
for pattern, calc in self.date_parsers.items():
|
|
m = re.match(pattern, data)
|
|
if m:
|
|
self.current_item["pub_date"] = int(calc(m).timestamp())
|
|
break
|
|
|
|
def handle_endtag(self, tag):
|
|
if self.in_table_row and tag == self.TR:
|
|
prettyPrinter(self.current_item)
|
|
self.in_table_row = False
|
|
|
|
def do_query(self, what):
|
|
url = f"{self.url}/search/{what.replace('%20', '-')}"
|
|
data = b"layout=def_wlinks"
|
|
try:
|
|
return retrieve_url(url, request_data=data)
|
|
except TypeError:
|
|
# Older versions of retrieve_url did not support request_data/POST, se we must do the
|
|
# request ourselves...
|
|
user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0'
|
|
req = urllib.request.Request(url, data, {'User-Agent': user_agent})
|
|
try:
|
|
response = urllib.request.urlopen(req) # nosec B310
|
|
return response.read().decode('utf-8')
|
|
except urllib.error.URLError as errno:
|
|
print(f"Connection error: {errno.reason}")
|
|
return ""
|
|
|
|
def search(self, what, cat='all'):
|
|
eztv_html = self.do_query(what)
|
|
|
|
eztv_parser = self.MyHtmlParser(self.url)
|
|
eztv_parser.feed(eztv_html)
|
|
eztv_parser.close()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
eztv_se = eztv()
|
|
eztv_se.search('Acre', 'all')
|