#!/usr/bin/env python #encoding=utf-8 #Using GPL v2 #Author: cocobear.cn@gmail.com import urllib2,urllib,cookielib,httplib import sys,re import gzip,StringIO user = "cocobearc" #user whose bookmarks you want get tag = "from cang2html" #tag that you want to mark id = 0 def cang2html(name,url,description): global user file_name = user+".html" f = open(file_name,"a") f.write("\t
"+name+"\n") if description: f.write("\t
"+description+"
\n") f.close() def cang2adr(name,url,description): global user,id file_name = user+".adr" f = open(file_name,"a") f.write("#URL\n") f.write("\tID="+str(id)+"\n") f.write("NAME="+name+"\n\t"+"URL="+url+"\n") if description: f.write("\tDESCRIPTION="+description+"\n") f.close() id+=1 def process_data(data): result = re.findall("((?:http|ftp|https|file)://.*)\" target.*lnk\d+\">(.+?).*dc\d+\">(.*?)",data) #print len(result) for url,name,description in result: #print name,url,description cang2html(name,url,description) cang2adr(name,url,description) def get_data(user,opener,page): url = "http://cang.baidu.com/"+user+"/page/"+str(page) gziped_data = opener.open(url).read() gziped_stream = StringIO.StringIO(gziped_data) data = gzip.GzipFile(fileobj=gziped_stream).read() return data.decode('gbk').encode('utf-8') def init(): httplib.HTTPConnection.debuglevel = 1 cookie = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) exheaders = [("User-Agent","Opera/9.27 (X11; Linux x86_64; U; en)"),("Connection","Keep-Alive"),("Referer","http://cang.baidu.com"),("Accept","text/html, application/xml;q=0.9, application/xhtml+xml, */*;q=0.1"),("Accept-Charset","iso-8859-1, utf-8, utf-16, *;q=0.1"),("Cookie2","$Version=1"),("Accept-Encoding","deflate, gzip, x-gzip, identity, *;q=0"),] opener.addheaders = exheaders urllib2.install_opener(opener) return opener def create_adr_file(name): global tag,id f = open(name,"w") f.write("""Opera Hotlist version 2.0 Options: encoding = utf8, version=3 """) f.write("#FOLDER\n") f.write("\tID="+str(id)+"\n") id+=1 f.write("\tNAME="+tag+"\n") f.close() def create_html_file(name): global tag f = open(name,"w") f.write(""" Generated by cang2html """) f.write("

"+tag+"

\n") f.write("

\n") f.close() def end_html_file(name): f = open(name,"a") f.write("

\n") f.close() def main(argv=None): global user,tag opener=init() data = get_data(user,opener,1) match = re.search("(共(\d+)条)",data) if match: total = int(match.group(1)) else: print "User has no bookmars!" return 1 print "Total %d bookmarks" % total print "Start getting ......" create_html_file(user+".html") create_adr_file(user+".adr") process_data(data) if total % 10 != 0: for i in range(1,10): if (total+i) % 10 == 0: total+=i total/=10 for i in range(2,total+1): data = get_data(user,opener,i) #print "process %d" % i process_data(data) end_html_file(user+".html") #end_adr_file(user+".adr") print "Success!\nLook %s.html for your bookmarks" % user print "Success!\nLook %s.adr for your bookmarks" % user if __name__ == "__main__": sys.exit(main())