diff options
author | Luo Jinghua <sunmoon1997@gmail.com> | 2010-07-18 13:37:55 +0800 |
---|---|---|
committer | Luo Jinghua <sunmoon1997@gmail.com> | 2010-07-18 13:37:55 +0800 |
commit | f1e5f25a9f1bf44ac097e4b231c4e36e7031e85e (patch) | |
tree | a6d990cc2320d119f43206b6029cd279fe600e1c | |
parent | ac4d94561c0463c8ab3673d1e480c7b05d424e91 (diff) |
pplist: track changes in kan.pps.tv
-rw-r--r-- | totem/plugin/ppslist.py | 50 |
1 files changed, 35 insertions, 15 deletions
diff --git a/totem/plugin/ppslist.py b/totem/plugin/ppslist.py index 843cd55..8c6abbc 100644 --- a/totem/plugin/ppslist.py +++ b/totem/plugin/ppslist.py @@ -23,6 +23,7 @@ MOVIE_LIST_URL = KANPPS + MOVIE_LIST_PATH class PPSClass: def __init__ (self): + self.has_id = True self.id = 0 self.title = '' self.url = '' @@ -41,6 +42,7 @@ class PPSClass: d['id'] = self.id d['url'] = self.url d['title'] = self.title + d['has_id'] = self.has_id return d def parseid (self, url): @@ -49,7 +51,12 @@ class PPSClass: def parse (self, node): self.title = unicode(node.contents[0]).encode('utf-8') self.url = KANPPS + node['href'].encode('utf-8') - self.id = self.parseid(self.url) + try: + self.id = self.parseid(self.url) + self.has_id = True + except: + self.has_id = False + self.id = (self.title + self.url).__hash__() & 0x7fffffff return self def load(self, d): @@ -313,18 +320,20 @@ def extractNavigableStrings(node): def parseMovieClassList(res): ppslist = gbk2utf8(res) - client_list = '<dt>客户端列表</dt>' - startpos = ppslist.find(client_list) - if startpos < 0: + if not ppslist: return [] - endpos = ppslist[startpos:].find('</dd>') - if endpos < 0: + soup = BeautifulSoup.BeautifulSoup(ppslist) + classification = soup.findAll('div', { 'id': 'classification' }) + if not classification: return [] - soup = BeautifulSoup.BeautifulSoup(ppslist[startpos:startpos + endpos + 5]) + clses = classification[0] + clsnames = clses.findAll('dt') + lists = clses.findAll('dd') result = [] - for l in soup.findAll('li'): - cls = PPSClass() - result.append(cls.parse(l.next)) + for i, name in enumerate(clsnames): + for l in lists[i].findAll('li'): + cls = PPSClass() + result.append(cls.parse(l.next)) return result def getMovieClassList(): @@ -475,11 +484,22 @@ class PPSList: url = movie_class.url if page_id != 0: assert (page_id < movie_class.max_page) + actor = url.rfind('movie_actor') pos = url.rfind('/') - baseurl = url[:pos + 1] - pos = url.rfind('.') - suffix = url[pos:] - url = '%s%d%s' % (baseurl, page_id + 2, suffix) + if actor > 0: + path = '/index.php?pageID=%d' % (page_id + 1) + path += '&act=front%2Fhome%2Fseek_by_actor&actor=' + path += url[url.rfind('/') + 1:] + url = KANPPS + path + else: + numpos = url.rfind('_') + if numpos > pos: + baseurl = url[:numpos + 1] + else: + baseurl = url[:pos + 1] + pos = url.rfind('.') + suffix = url[pos:] + url = '%s%d%s' % (baseurl, page_id + 1, suffix) s = download(url) return s @@ -649,7 +669,7 @@ if __name__ == '__main__': ppsfile1.load(d) print ppsfile1, ppsfile == ppsfile1 - #test_ppslist() + test_ppslist() #test_search() #test_get_file_list() #test_get_file_list_by_iframe() |