Add date field to torrentproject
This changes the search url from `/?t=abc` to `/browse?t=abc` because the latter provides a date. The search results are identical with for most queries I've tried but sometimes it differs, so maybe using `/browse` isn't acceptable?
This commit is contained in:
parent
b7c497003e
commit
3a88c6fb10
|
|
@ -1,10 +1,11 @@
|
||||||
#VERSION: 1.3
|
#VERSION: 1.4
|
||||||
#AUTHORS: mauricci
|
#AUTHORS: mauricci
|
||||||
|
|
||||||
from helpers import retrieve_url
|
from helpers import retrieve_url
|
||||||
from novaprinter import prettyPrinter
|
from novaprinter import prettyPrinter
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
|
|
||||||
|
|
@ -23,7 +24,14 @@ class torrentproject(object):
|
||||||
self.insideDataDiv = False
|
self.insideDataDiv = False
|
||||||
self.pageComplete = False
|
self.pageComplete = False
|
||||||
self.spanCount = -1
|
self.spanCount = -1
|
||||||
self.infoMap = {'name': 0, 'torrLink': 0, 'size': 5, 'seeds': 2, 'leech': 3}
|
self.infoMap = {
|
||||||
|
"name": 0,
|
||||||
|
"torrLink": 0,
|
||||||
|
"seeds": 2,
|
||||||
|
"leech": 3,
|
||||||
|
"pub_date": 4,
|
||||||
|
"size": 5,
|
||||||
|
}
|
||||||
self.fullResData = []
|
self.fullResData = []
|
||||||
self.pageRes = []
|
self.pageRes = []
|
||||||
self.singleResData = self.get_single_data()
|
self.singleResData = self.get_single_data()
|
||||||
|
|
@ -36,7 +44,8 @@ class torrentproject(object):
|
||||||
'size': '-1',
|
'size': '-1',
|
||||||
'link': '-1',
|
'link': '-1',
|
||||||
'desc_link': '-1',
|
'desc_link': '-1',
|
||||||
'engine_url': self.url
|
'engine_url': self.url,
|
||||||
|
'pub_date': '-1',
|
||||||
}
|
}
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
|
|
@ -68,6 +77,12 @@ class torrentproject(object):
|
||||||
# ignore those with link and desc_link equals to -1
|
# ignore those with link and desc_link equals to -1
|
||||||
if self.singleResData['desc_link'] != '-1' \
|
if self.singleResData['desc_link'] != '-1' \
|
||||||
or self.singleResData['link'] != '-1':
|
or self.singleResData['link'] != '-1':
|
||||||
|
try:
|
||||||
|
date_string = self.singleResData['pub_date']
|
||||||
|
date = datetime.strptime(date_string, '%Y-%m-%d %H:%M:%S')
|
||||||
|
self.singleResData['pub_date'] = int(date.timestamp())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
try:
|
try:
|
||||||
prettyPrinter(self.singleResData)
|
prettyPrinter(self.singleResData)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
@ -100,7 +115,7 @@ class torrentproject(object):
|
||||||
what = what.replace('%20', '+')
|
what = what.replace('%20', '+')
|
||||||
# analyze first 5 pages of results
|
# analyze first 5 pages of results
|
||||||
for currPage in range(0, 5):
|
for currPage in range(0, 5):
|
||||||
url = self.url + '?t={0}&p={1}'.format(what, currPage)
|
url = self.url + '/browse?t={0}&p={1}'.format(what, currPage)
|
||||||
html = retrieve_url(url)
|
html = retrieve_url(url)
|
||||||
parser.feed(html)
|
parser.feed(html)
|
||||||
if len(parser.pageRes) <= 0:
|
if len(parser.pageRes) <= 0:
|
||||||
|
|
|
||||||
|
|
@ -4,5 +4,5 @@ limetorrents: 4.7
|
||||||
piratebay: 3.3
|
piratebay: 3.3
|
||||||
solidtorrents: 2.3
|
solidtorrents: 2.3
|
||||||
torlock: 2.23
|
torlock: 2.23
|
||||||
torrentproject: 1.3
|
torrentproject: 1.4
|
||||||
torrentscsv: 1.4
|
torrentscsv: 1.4
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue