diff options
author | Luo Jinghua <sunmoon1997@gmail.com> | 2010-01-21 23:19:36 +0800 |
---|---|---|
committer | Luo Jinghua <sunmoon1997@gmail.com> | 2010-01-21 23:19:36 +0800 |
commit | 71cb1f2636a27479d378da957a2129d98657f753 (patch) | |
tree | 58ad2ddb65512586c44386d6e472289f83f36187 | |
parent | dbd3bd13da70d22c1e3e965aa9270522e9d63884 (diff) |
ppslist: add methods for parsing long play lists
-rw-r--r-- | totem/plugin/ppslist.py | 90 |
1 files changed, 90 insertions, 0 deletions
diff --git a/totem/plugin/ppslist.py b/totem/plugin/ppslist.py index b507801..8510a5c 100644 --- a/totem/plugin/ppslist.py +++ b/totem/plugin/ppslist.py @@ -367,6 +367,58 @@ def parseMovieFileList(movie, s): ci += 1 return result +def getMovieFileListFrameUrl(s): + s = gbk2utf8(s) + soup = BeautifulSoup.BeautifulSoup(s) + result = [] + play_iframe = soup.findAll('iframe', { 'id': 'play_iframe' }) + if not play_iframe: + return '' + return play_iframe[0]['src'] + +def getMovieFileListFrameBaseUrl(s): + url = getMovieFileListFrameUrl(s) + if not url: + return url + pos = url.rfind('/') + if pos > 0: + return url[0:pos] + return url + +def parseMoviePlayList(s): + s = gbk2utf8(s) + soup = BeautifulSoup.BeautifulSoup(s) + seein = soup.findAll('div', { 'class': 'onseeIn' }) + if not seein: + return () + uls = seein[0].findAll('ul') + if not uls: + return () + items = uls[0].findAll('li') + if not items: + return () + + files = [] + for item in items: + f = PPSFile() + fid = extractNumber(item['id']) + refs = item.findAll('a') + title = refs[0].contents[-1] + url = refs[1]['href'] + f.id = fid + f.title = unicode(title).encode('utf-8') + f.url = unicode(url).encode('utf-8') + files.append(f) + + pagenav = soup.findAll('div', { 'class': 'pageNav' }) + if not pagenav: + return () + refs = pagenav[0].findAll('a') + maxpage = int(refs[-1].contents[-1]) + spans = pagenav[0].findAll('span') + curpage = int(spans[0].contents[-1]) + return (files, max(maxpage, curpage)) + def getMovieFileList(movie): s = download(movie.playerurl) return parseMovieFileList(movie, s) @@ -467,6 +519,43 @@ if __name__ == '__main__': sys.exit(0) + def test_get_file_list_by_iframe(): + url = 'http://kan.pps.tv/play/298356.html' + s = download(url) + movie = PPSMovie() + file_list = parseMovieFileList(movie, s) + for f in file_list: + print f + + if file_list: + return + baseurl = getMovieFileListFrameBaseUrl(s) + print 'baseurl = ', baseurl + + first = baseurl + '/0.html' + s = download(first) + if not s: + return + + last_maxpage = 0 + cur_page = 0 + while True: + url = baseurl + '/%d.html' % cur_page + s = download(url) + if not s: + break + result = parseMoviePlayList(s) + if not result: + break + files, max_page = result + print 'maxpage = ', max_page, cur_page + for f in files: + print f + + if max_page == cur_page: + break + cur_page += 1 + def test_search (): ppslist = PPSList() s = ppslist.searchMovieList('周星驰') @@ -520,3 +609,4 @@ if __name__ == '__main__': #test_ppslist() #test_search() #test_get_file_list() + #test_get_file_list_by_iframe() |