Python语言: 校内网发帖机in Python,请勿滥用
#!/usr/bin/python #encoding=utf-8 #使用前请查找并更改用户名和密码 import cookielib, urllib2, urllib, sys, time from xml.sax.saxutils import unescape from BeautifulSoup import BeautifulSoup # For processing HTML def formalize(text): result = '' lines = text.split(u'\n') for line in lines: line = line.strip() if len(line) == 0: continue result += line + u'\n\n' return result #登陆校内网 cj = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) exheaders = [("User-Agent","Mozilla/4.0 (compatible; MSIE 7.1; Windows NT 5.1; SV1)"),] opener.addheaders=exheaders url_login = 'http:///Login.do' body = (('email','xxxxx@gmail.com'), ('password','*********')) #TODO:更改登录名和密码 print "ERROR! you need to update the password to be successful!" req1 = opener.open(url_login, urllib.urlencode(body)) #这时,cookie已经进来了。 #下载糗事百科,一个个发帖 body = {'relative_optype':'publisher', 'blogControl':'1'} url_post = 'http://blog./NewEntry.do' #发帖部分 count = 0 for i in range(11, 12): url = "http:///qiushi/best/all/page/%d" % i data = urllib2.urlopen(url).readlines() soup = BeautifulSoup("".join(data)) contents = soup.findAll('div', "content") stories = [str(text) for text in contents] for story in stories: count += 1 print "processing page %d, %d items added" % (i, count) minisoup = BeautifulSoup(story) #text = ''.join([e for e in minisoup.recursiveChildGenerator() if isinstance(e, unicode)]) #text = urllib.unquote(unescape(text, {'"':'"'})) text = str(minisoup) #text = text.encode("utf-8") title = '糗事-%d' % count text += '<br/><a href="http://www.">来自糗事百科</a><br/>' body['title'] = title body['body'] = text req2 = opener.open(url_post, urllib.urlencode(body)) #不出意外的话,就已经发帖成功了 所有评论,共7条:( 我也来说两句)
|
|