熊迪们大家好啊,过完了六一儿童节,星光俺又死回来了。 嗯~照例听首歌先。 今天和大家分享的内容是………………………………… 如何用VBA代码,爬取指定QQ账号在空间里的说说数据? 坦白的说,这事二步就可以解决了。 第一步,登录QQ或者TIM软件。 第二步,也就是最重要的一步,关注微信公众号:VBA编程学习与实践。后台回复关键词:说说 皮一下……开森…… 获取Excel模版后,打开,点击按钮,运行宏……
一个重要的说明: 代码使用了IE浏览器获取QQ空间的Cookie,并计算关键参数g_tk的值,因此需要先将IE浏览器设置为默认浏览器,否则运行代码会出现错误的提示信息。 …… ……
嗯,是时候回顾下过往的青春了。搞个小图表,可视化一下过去的时光里哪个月发的说说条数最多?哪个词出现的频率最高?再扩展下代码,看看哪个魂淡最爱到你的空间里扯淡?是基情四射还是情愫暗涌…… …… 往期内容推荐: 需要批量下载网上的图片,Excel行不行?当然行! 为了让您买到最廉价的图书,我们用VBA干了这件事…… …… 打个响指,好吧,本期代码如下所示。 Sub WebCrawlerQzone() Dim strURL As String Dim strCookie As String Dim strText As String Dim strGTK As String Dim strKey As String Dim strUserName As String Dim strMsg As String Dim intPageNum As Long Dim lngCreateTime As Long Dim k As Long Dim i As Long Dim blnClick As Boolean Dim objIE As Object Dim objWINHTTP As Object Dim objDIC As Object Dim objDOM As Object Dim objTagA As Object Dim objList As Object Dim objWindow As Object Dim vntTime As Variant Dim vntQQNum As Variant Set objDIC = CreateObject('scripting.dictionary') Set objIE = CreateObject('InternetExplorer.Application') Set objWINHTTP = CreateObject('WinHttp.WinHttpRequest.5.1') Set objDOM = CreateObject('htmlfile') Set objWindow = objDOM.parentWindow strURL = 'https://xui.ptlogin2.qq.com/cgi-bin/xlogin?' strURL = strURL & 'proxy_url=https%3A//qzs.qq.com/' strURL = strURL & 'qzone/v6/portal/proxy.html' strURL = strURL & '&appid=549000912' strURL = strURL & '&s_url=https%3A%2F%2Fqzs.qzone.qq.com' _ & '%2Fqzone%2Fv5%2Floginsucc.html%3Fpara%3Dizone' With objIE .navigate strURL .Visible = False vntTime = Timer Do While Timer < vnttime="" +=""> Loop Do Until .readyState = 4 DoEvents Loop For Each objTagA In .document.getElementsByTagName('a') If objTagA.TabIndex = 2 Then strUserName = objTagA.innerText objTagA.Click blnClick = True Exit For End If Next If Not blnClick Then MsgBox strUserName & '您的QQ软件未登录或QQ空间未开通。' Exit Sub End If vntTime = Timer Do While Timer < vnttime="" +=""> Loop strCookie = .document.cookie .Quit End With strKey = Split(Split(strCookie, 'p_skey=')(1), ';')(0) strGTK = strGetGTK(strKey) vntQQNum = [b1].Value strURL = 'https://user.qzone.qq.com/' strURL = strURL & 'proxy/domain/taotao.qq.com/' strURL = strURL & 'cgi-bin/emotion_cgi_msglist_v6?' strURL = strURL & 'num=20' strURL = strURL & '&callback=_preloadCallback' strURL = strURL & '&format=jsonp' strURL = strURL & '&uin=' & vntQQNum strURL = strURL & '&g_tk=' & strGTK ActiveSheet.UsedRange.Offset(2).ClearContents k = 3 On Error Resume Next Application.ScreenUpdating = False Do While 1 = 1 intPageNum = intPageNum + 20 With objWINHTTP .Open 'GET', strURL & '&pos=' & intPageNum - 20, False .setRequestHeader 'Cookie', strCookie .send strText = .responseText End With strText = Split(strText, '_preloadCallback(')(1) strText = Left(strText, InStrRev(strText, ')') - 1) objDOM.write '' For i = 0 To objWindow.eval('data.msglist.length') - 1 k = k + 1 Set objList = objWindow.eval('data.msglist[' & i & ']') lngCreateTime = CallByName(objList, 'created_time', VbGet) If Not objDIC.exists(lngCreateTime) Then objDIC(lngCreateTime) = '' Else Exit Do End If Cells(k, 1) = CallByName(objList, 'createTime', VbGet) Cells(k, 2) = CallByName(objList, 'content', VbGet) Cells(k, 3) = CallByName(objList, 'cmtnum', VbGet) Next i Loop [A3:C3] = Array('日期', '说说', '评论人数') Application.ScreenUpdating = True strMsg = '用户:' & strUserName & vbCrLf & '您好!' strMsg = strMsg & '目标QQ' & vntQQNum strMsg = strMsg & '的说说数据已抓取完成。' MsgBox strMsg Set objIE = Nothing Set objWINHTTP = Nothing Set objDOM = Nothing Set objWindow = Nothing Set objDIC = Nothing Set objList = Nothing End Sub Function strGetGTK(ByVal strKey As String) As String Dim objNewDom As Object Dim objNewWindow As Object Dim strJSON As String Set objNewDom = CreateObject('htmlfile') Set objNewWindow = objNewDom.parentWindow With objNewWindow strJSON = 'gtk=function(skey)' strJSON = strJSON & '{for(var hash=5381,i=0,' strJSON = strJSON & 'len=skey.length;i<> strJSON = strJSON & 'hash+=(hash<> strJSON = strJSON & '+skey.charAt(i).charCodeAt();' strJSON = strJSON & 'return hash&2147483647}' strJSON = strJSON & '('' & strKey & '');' .execScript strJSON strGetGTK = .gtk End With Set objNewWindow = Nothing Set objNewDom = Nothing End Function
|