📄 baidump3.py
字号:
# !/usr/bin/env python# -*- coding: UTF-8 -*-# author: watermoon# date: 02/17/2009import os, sys, re, stringfrom urllib import urlretrieve as URTimport urllib2import urldecodeCODE = '0123456789ABCDEF'QUERY = '(http:.*baidusg,)(.*)(&word=mp3.*sgid=1)'MP3 = 'F=(\d+).*I="(\w{1}\w{2}\w{1}://\w+\..*\w{3})",J'def gb2312(ch): # Translate chinese input corresponding GB2312 code code = '' # code = 'OX' code += CODE[ch/16] code += CODE[ch%16] return codedef format(word): # translate into the format that baidu.com can recongnise code = '' i = 0 l = len(word) while i < l: if ord(word[i]) < 127: if word[i] == ' ': code += '+' else: code += word[i] else: code += '%%%s' % gb2312(ord(word[i])) i += 1 return code def getWeb(url, tmp='.tmp'): # get internet resource from server try: URT(url, tmp) except IOError: print 'Cannot fectch web page' sys.exit() def getResource(url): req = urllib2.Request(url) try: response = urllib2.urlopen(req) except urllib2.URLError, e: print e.reason else: return response def parseWeb(pat): # parse and get the first link of result tmpfile = open('.tmp', 'r') lines = tmpfile.readlines() r = re.search(pat, lines) if r: tmpfile.close() os.remove('.tmp') return r tmpfile.close() os.remove('.tmp') return None def urllib_one(): word = raw_input('Search> ') url = 'http://mp3.baidu.com/m?f=ms&rn=&tn=baidump3&ct=134217728&word=%s&lm=0'\ % format(word) getWeb(url) r = parseWeb(QUERY) if r: print 'Succeed in distilling query result url...' s = r.group(2) s = format(s) url = r.group(1) + s + r.group(3) else: print 'Fail to distill query url!' return getWeb(url) # get to the last page where show the link r = parseWeb(MP3) if r: print 'Succeed in distilling url!' baidu = urldecode.codec() url = baidu.de_baidump3(int(r.group(1)), r.group(2)) print 'Downloading mp3 file from: %s' % url getWeb(url, word+'_.mp3')def urllib_two(): word = raw_input('Search> ') url = 'http://mp3.baidu.com/m?f=ms&rn=&tn=baidump3&ct=134217728&word=%s&lm=0'\ % format(word) print url res = getResource(url) data = res.read() ##### r = re.search(QUERY, data) if r: print 'Succeed in distilling query result url...' s = r.group(2) s = format(s) url = r.group(1) + s + r.group(3) else: print 'Fail to distill query url!' return ### print url res = getResource(url) data = res.read() r = re.search(MP3, data) ### if r: print 'Succeed in distilling url!' baidu = urldecode.codec() print url url = baidu.de_baidump3(int(r.group(1)), r.group(2)) print 'Downloading mp3 file from: %s' % url ### download res = getResource(url) data = res.read() print url = url[url.rfind('/')+1:] f = open(url, 'wb') f.write(data) f.close() if __name__ == '__main__':# urllib_one() urllib_two()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -