#!/usr/bin/env python
#encoding=utf-8
#Using GPL v2
#Author: cocobear.cn@gmail.com
import urllib2,urllib,cookielib,httplib
import sys,re
import gzip,StringIO
user = "cocobearc" #user whose bookmarks you want get
tag = "from cang2html" #tag that you want to mark
id = 0
def cang2html(name,url,description):
global user
file_name = user+".html"
f = open(file_name,"a")
f.write("\t
"+name+"\n")
if description:
f.write("\t"+description+"\n")
f.close()
def cang2adr(name,url,description):
global user,id
file_name = user+".adr"
f = open(file_name,"a")
f.write("#URL\n")
f.write("\tID="+str(id)+"\n")
f.write("NAME="+name+"\n\t"+"URL="+url+"\n")
if description:
f.write("\tDESCRIPTION="+description+"\n")
f.close()
id+=1
def process_data(data):
result = re.findall("((?:http|ftp|https|file)://.*)\" target.*lnk\d+\">(.+?).*dc\d+\">(.*?)",data)
#print len(result)
for url,name,description in result:
#print name,url,description
cang2html(name,url,description)
cang2adr(name,url,description)
def get_data(user,opener,page):
url = "http://cang.baidu.com/"+user+"/page/"+str(page)
gziped_data = opener.open(url).read()
gziped_stream = StringIO.StringIO(gziped_data)
data = gzip.GzipFile(fileobj=gziped_stream).read()
return data.decode('gbk').encode('utf-8')
def init():
httplib.HTTPConnection.debuglevel = 1
cookie = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
exheaders = [("User-Agent","Opera/9.27 (X11; Linux x86_64; U; en)"),("Connection","Keep-Alive"),("Referer","http://cang.baidu.com"),("Accept","text/html, application/xml;q=0.9, application/xhtml+xml, */*;q=0.1"),("Accept-Charset","iso-8859-1, utf-8, utf-16, *;q=0.1"),("Cookie2","$Version=1"),("Accept-Encoding","deflate, gzip, x-gzip, identity, *;q=0"),]
opener.addheaders = exheaders
urllib2.install_opener(opener)
return opener
def create_adr_file(name):
global tag,id
f = open(name,"w")
f.write("""Opera Hotlist version 2.0
Options: encoding = utf8, version=3
""")
f.write("#FOLDER\n")
f.write("\tID="+str(id)+"\n")
id+=1
f.write("\tNAME="+tag+"\n")
f.close()
def create_html_file(name):
global tag
f = open(name,"w")
f.write("""
Generated by cang2html
""")
f.write(""+tag+"
\n")
f.write("\n")
f.close()
def end_html_file(name):
f = open(name,"a")
f.write("
\n")
f.close()
def main(argv=None):
global user,tag
opener=init()
data = get_data(user,opener,1)
match = re.search("(共(\d+)条)",data)
if match:
total = int(match.group(1))
else:
print "User has no bookmars!"
return 1
print "Total %d bookmarks" % total
print "Start getting ......"
create_html_file(user+".html")
create_adr_file(user+".adr")
process_data(data)
if total % 10 != 0:
for i in range(1,10):
if (total+i) % 10 == 0:
total+=i
total/=10
for i in range(2,total+1):
data = get_data(user,opener,i)
#print "process %d" % i
process_data(data)
end_html_file(user+".html")
#end_adr_file(user+".adr")
print "Success!\nLook %s.html for your bookmarks" % user
print "Success!\nLook %s.adr for your bookmarks" % user
if __name__ == "__main__":
sys.exit(main())