summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuo Jinghua <sunmoon1997@gmail.com>2010-07-18 13:37:55 +0800
committerLuo Jinghua <sunmoon1997@gmail.com>2010-07-18 13:37:55 +0800
commitf1e5f25a9f1bf44ac097e4b231c4e36e7031e85e (patch)
treea6d990cc2320d119f43206b6029cd279fe600e1c
parentac4d94561c0463c8ab3673d1e480c7b05d424e91 (diff)
pplist: track changes in kan.pps.tv
-rw-r--r--totem/plugin/ppslist.py50
1 files changed, 35 insertions, 15 deletions
diff --git a/totem/plugin/ppslist.py b/totem/plugin/ppslist.py
index 843cd55..8c6abbc 100644
--- a/totem/plugin/ppslist.py
+++ b/totem/plugin/ppslist.py
@@ -23,6 +23,7 @@ MOVIE_LIST_URL = KANPPS + MOVIE_LIST_PATH
class PPSClass:
def __init__ (self):
+ self.has_id = True
self.id = 0
self.title = ''
self.url = ''
@@ -41,6 +42,7 @@ class PPSClass:
d['id'] = self.id
d['url'] = self.url
d['title'] = self.title
+ d['has_id'] = self.has_id
return d
def parseid (self, url):
@@ -49,7 +51,12 @@ class PPSClass:
def parse (self, node):
self.title = unicode(node.contents[0]).encode('utf-8')
self.url = KANPPS + node['href'].encode('utf-8')
- self.id = self.parseid(self.url)
+ try:
+ self.id = self.parseid(self.url)
+ self.has_id = True
+ except:
+ self.has_id = False
+ self.id = (self.title + self.url).__hash__() & 0x7fffffff
return self
def load(self, d):
@@ -313,18 +320,20 @@ def extractNavigableStrings(node):
def parseMovieClassList(res):
ppslist = gbk2utf8(res)
- client_list = '<dt>客户端列表</dt>'
- startpos = ppslist.find(client_list)
- if startpos < 0:
+ if not ppslist:
return []
- endpos = ppslist[startpos:].find('</dd>')
- if endpos < 0:
+ soup = BeautifulSoup.BeautifulSoup(ppslist)
+ classification = soup.findAll('div', { 'id': 'classification' })
+ if not classification:
return []
- soup = BeautifulSoup.BeautifulSoup(ppslist[startpos:startpos + endpos + 5])
+ clses = classification[0]
+ clsnames = clses.findAll('dt')
+ lists = clses.findAll('dd')
result = []
- for l in soup.findAll('li'):
- cls = PPSClass()
- result.append(cls.parse(l.next))
+ for i, name in enumerate(clsnames):
+ for l in lists[i].findAll('li'):
+ cls = PPSClass()
+ result.append(cls.parse(l.next))
return result
def getMovieClassList():
@@ -475,11 +484,22 @@ class PPSList:
url = movie_class.url
if page_id != 0:
assert (page_id < movie_class.max_page)
+ actor = url.rfind('movie_actor')
pos = url.rfind('/')
- baseurl = url[:pos + 1]
- pos = url.rfind('.')
- suffix = url[pos:]
- url = '%s%d%s' % (baseurl, page_id + 2, suffix)
+ if actor > 0:
+ path = '/index.php?pageID=%d' % (page_id + 1)
+ path += '&act=front%2Fhome%2Fseek_by_actor&actor='
+ path += url[url.rfind('/') + 1:]
+ url = KANPPS + path
+ else:
+ numpos = url.rfind('_')
+ if numpos > pos:
+ baseurl = url[:numpos + 1]
+ else:
+ baseurl = url[:pos + 1]
+ pos = url.rfind('.')
+ suffix = url[pos:]
+ url = '%s%d%s' % (baseurl, page_id + 1, suffix)
s = download(url)
return s
@@ -649,7 +669,7 @@ if __name__ == '__main__':
ppsfile1.load(d)
print ppsfile1, ppsfile == ppsfile1
- #test_ppslist()
+ test_ppslist()
#test_search()
#test_get_file_list()
#test_get_file_list_by_iframe()