pplist: track changes in kan.pps.tv

author: Luo Jinghua <sunmoon1997@gmail.com> 2010-07-18 13:37:55 +0800
committer: Luo Jinghua <sunmoon1997@gmail.com> 2010-07-18 13:37:55 +0800
commit: f1e5f25a9f1bf44ac097e4b231c4e36e7031e85e (patch)
tree: a6d990cc2320d119f43206b6029cd279fe600e1c
parent: ac4d94561c0463c8ab3673d1e480c7b05d424e91 (diff)
1 files changed, 35 insertions, 15 deletions
diff --git a/totem/plugin/ppslist.py b/totem/plugin/ppslist.py
index 843cd55..8c6abbc 100644
--- a/totem/plugin/ppslist.py
+++ b/totem/plugin/ppslist.py
@@ -23,6 +23,7 @@ MOVIE_LIST_URL = KANPPS + MOVIE_LIST_PATH
 
 class PPSClass:
     def __init__ (self):
+        self.has_id = True
         self.id = 0
         self.title = ''
         self.url = ''
@@ -41,6 +42,7 @@ class PPSClass:
         d['id'] = self.id
         d['url'] = self.url
         d['title'] = self.title
+        d['has_id'] = self.has_id
         return d
 
     def parseid (self, url):
@@ -49,7 +51,12 @@ class PPSClass:
     def parse (self, node):
         self.title = unicode(node.contents[0]).encode('utf-8')
         self.url = KANPPS + node['href'].encode('utf-8')
-        self.id = self.parseid(self.url)
+        try:
+            self.id = self.parseid(self.url)
+            self.has_id = True
+        except:
+            self.has_id = False
+            self.id = (self.title + self.url).__hash__() & 0x7fffffff
         return self
 
     def load(self, d):
@@ -313,18 +320,20 @@ def extractNavigableStrings(node):
 
 def parseMovieClassList(res):
     ppslist = gbk2utf8(res)
-    client_list = '<dt>客户端列表</dt>'
-    startpos = ppslist.find(client_list)
-    if startpos < 0:
+    if not ppslist:
         return []
-    endpos = ppslist[startpos:].find('</dd>')
-    if endpos < 0:
+    soup = BeautifulSoup.BeautifulSoup(ppslist)
+    classification = soup.findAll('div', { 'id': 'classification' })
+    if not classification:
         return []
-    soup = BeautifulSoup.BeautifulSoup(ppslist[startpos:startpos + endpos + 5])
+    clses = classification[0]
+    clsnames = clses.findAll('dt')
+    lists = clses.findAll('dd')
     result = []
-    for l in soup.findAll('li'):
-        cls = PPSClass()
-        result.append(cls.parse(l.next))
+    for i, name in enumerate(clsnames):
+        for l in lists[i].findAll('li'):
+            cls = PPSClass()
+            result.append(cls.parse(l.next))
     return result
 
 def getMovieClassList():
@@ -475,11 +484,22 @@ class PPSList:
         url = movie_class.url
         if page_id != 0:
             assert (page_id < movie_class.max_page)
+            actor = url.rfind('movie_actor')
             pos = url.rfind('/')
-            baseurl = url[:pos + 1]
-            pos = url.rfind('.')
-            suffix = url[pos:]
-            url = '%s%d%s' % (baseurl, page_id + 2, suffix)
+            if actor > 0:
+                path = '/index.php?pageID=%d' % (page_id + 1)
+                path += '&act=front%2Fhome%2Fseek_by_actor&actor='
+                path += url[url.rfind('/') + 1:]
+                url = KANPPS + path
+            else:
+                numpos = url.rfind('_')
+                if numpos > pos:
+                    baseurl = url[:numpos + 1]
+                else:
+                    baseurl = url[:pos + 1]
+                pos = url.rfind('.')
+                suffix = url[pos:]
+                url = '%s%d%s' % (baseurl, page_id + 1, suffix)
         s = download(url)
         return s
 
@@ -649,7 +669,7 @@ if __name__ == '__main__':
         ppsfile1.load(d)
         print ppsfile1, ppsfile == ppsfile1
 
-    #test_ppslist()
+    test_ppslist()
     #test_search()
     #test_get_file_list()
     #test_get_file_list_by_iframe()
author	Luo Jinghua <sunmoon1997@gmail.com>	2010-07-18 13:37:55 +0800
committer	Luo Jinghua <sunmoon1997@gmail.com>	2010-07-18 13:37:55 +0800
commit	f1e5f25a9f1bf44ac097e4b231c4e36e7031e85e (patch)
tree	a6d990cc2320d119f43206b6029cd279fe600e1c
parent	ac4d94561c0463c8ab3673d1e480c7b05d424e91 (diff)