2014/06/28修改
之前的写得实在太烂了, 所以重写了代码
”
流程:
1. 访问一次框架的源码(地址), 获取login_sig, appid, pt_version, mibao, pt_lang等参数;
2. 访问获取验证码的地址, 获取验证码(如果需要输入验证码, 则需要再访问其返回的获取验证码的地址, 通常不用验证码的话, 第二个参数就是验证码);
3. 根据QQ号, QQ密码, 验证码的值计算得出p的值(第一次登录需要用到, 计算方法在PSWEncrypt模块中);
4. 构建相关的Form, 并对地址(https://ssl.ptlogin2.qq.com/login)发送请求(必须使用Get方法, Post会出错), 登录成功后会返回一些参数, 例如二次登录需要访问的地址等, 其中ptwebqq在cookie中, 需要另外获取。。;
5. 访问一次第一次登录时返回的地址, 构建相关的Form(参数大多在之前已经得到了, 其中clientid参数在数字10000000-99999999中随便选一个就行了), 并向地址(http://d.web2.qq.com/channel/login2)发送请求, 登录成功后会返回相关的参数, 如uin, status, vfwebqq, psessionid等, 保存下来, 后面其他操作会用到的。
6.至此, webqq登录已经完成。
相关代码:
WebQQLogin.py
- #coding=utf-8
-
- import re;
- import json;
- import http;
- import urllib;
- import random;
- import http.cookiejar;
- import urllib.request;
- from urllib.parse import urlencode;
- from PSWEncrypt import PSWEncrypt;
- from Queryable import *;
-
- URL_LOGIN = "https://ssl.ptlogin2.qq.com/login?";
- URL_LOGIN2 = "http://d.web2.qq.com/channel/login2";
-
- #打开Chrome访问w.qq.com后右键查看框架源码就能看到这个了, 这个里面包含了登录时要用到的大部分信息
- URL_LOAD_INFO = "https://ui.ptlogin2.qq.com/cgi-bin/login?daid=164&target=self&style=16&mibao_css=m_webqq&appid=501004106&enable_qlogin=0&no_verifyimg=1&s_url=http%3A%2F%2Fw.qq.com%2Fproxy.html&f_url=loginerroralert&strong_login=1&login_state=10&t=20131024001";
- URL_VERYCODE = "https://ssl.ptlogin2.qq.com/check?";
- URL_VERYCODE_IMG = "https://ssl.captcha.qq.com/getimage?";
-
- webQQHeader = {
- "Host":"ui.ptlogin2.qq.com",
- "Referer":"http://w.qq.com/",
- "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.1916.153 Safari/537.36",
- };
- #后面大多数模块都要用到这个header.....
- webQQHeader2 = {
- "Referer":"http://d.web2.qq.com/proxy.html?v=20130916001&callback=1&id=2",
- "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36"
- }
-
- class WebQQLogin(Queryable):
- def __init__(self):
- #先保存一个clientid(8位), 第二次登录的时候要将这个id上传
- Queryable.__init__(self, {"clientid":str(random.randint(10000000, 99999999))});
- self._cookie = http.cookiejar.LWPCookieJar();
- self._opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self._cookie));
-
- #多次正则, 感觉效率应该会很低, 但又没有更好的抓取办法
- def _readInfo(self, url):
- request = urllib.request.Request(URL_LOAD_INFO, headers = webQQHeader);
- ret = self._opener.open(request).read().decode("utf-8");
- login_sig = re.compile("g_login_sig=encodeURIComponent\(\"([^\"]+)\"\)").findall(ret)[0];
- appid = re.compile("g_appid =encodeURIComponent\(\"(\d+)\"\)").findall(ret)[0];
- pt_version = re.compile("g_pt_version=encodeURIComponent\(\"(\d+)\"\)").findall(ret)[0];
- mibao = re.compile("mibao_css=encodeURIComponent\(\"(\w+)\"\)").findall(ret)[0];
- pt_lang = re.compile("g_lang=\"([^\"]+)\"").findall(ret)[0];
- #open("1.txt", "wb").write(ret.encode("utf-8"));
- hiddenList = re.compile('<input\s+type="hidden"\s+name="([^\"]*)"\s+value="([^\"]*)"[^/]+/>').findall(ret);
- for item in hiddenList:
- self.setQuery(item[0], item[1]);
- self.setQueryEx({"login_sig":login_sig, "appid":appid, "js_ver":pt_version, "mibao":mibao, "pt_lang":pt_lang});
-
- def _requestVerifyCode(self, param):
- headerData = {
- "daid":self.queryInfo("daid"),
- "target":"self",
- "style":"16",
- "mibao_css":self.queryInfo("mibao"),
- "appid":self.queryInfo("appid"),
- "enable_qlogin":"0",
- "no_verifyimg":"1",
- "s_url":"http://w.qq.com/proxy.html",
- "f_url":"loginerroralert",
- "strong_login":"1",
- "login_state":"10",
- "t":"20131024001",
- };
- header = {
- "Host":"Host:ssl.captcha.qq.com",
- "Referer":"https://ui.ptlogin2.qq.com/cgi-bin/login?" + urlencode(headerData),
- "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36",
- };
- data = {
- "aid":self.queryInfo("appid"),
- "r":"0.1642276826314628",
- "uin":self.queryInfo("uin"),
- };
- request = urllib.request.Request(URL_VERYCODE_IMG + urlencode(data), headers = header);
- open("verifyCode.jpg", "wb").write(self._opener.open(request).read());
- verifyCode = input("输入验证码:");
- return verifyCode;
-
- #虽然这个是获取验证码部分, 但一直都没碰到过验证码, 所以。。
- def _getVerycode(self, id):
- form = {"login_sig":self.queryInfo("login_sig"), "r":"0.3272944095078856", "js_type":"0", "appid":self.queryInfo("appid"), "js_ver":self.queryInfo("js_ver"), "u1":"http://w.qq.com/proxy.html", "uin":id};
- request = urllib.request.Request(URL_VERYCODE + urlencode(form));
- ret = self._opener.open(request).read().decode("utf-8");
- list = re.compile("ptui_checkVC\('(\d+)','([^\']*)','([^\']+)', '([^\']*)'\);").findall(ret)[0];
- if(list[0] == "0"):
- return list[1];
- else:
- return self._requestVerifyCode(list[1]);
- print(list);#debug
- return None;
-
- #第一次登录
- def _firstLogin(self, id, psw):
- verifycode = self._getVerycode(id);
- #大部分都用到了之前框架里面抓取到的东西, 避免硬编码(虽然还是有很多无法避免地用到了硬编码)
- form = {
- "u":id,
- "p":PSWEncrypt.encrypt(id, psw, verifycode),
- "verifycode":verifycode,
- "webqq_type":self.queryInfo("webqq_type"),
- "remember_uin":self.queryInfo("remember_uin"),
- "login2qq":self.queryInfo("login2qq"),
- "aid":self.queryInfo("appid"),
- "u1":"{0}?login2qq={1}&webqq_type={2}".format(self.queryInfo("u1"), self.queryInfo("login2qq"), self.queryInfo("webqq_type")),
- "h":self.queryInfo("h"),
- "ptredirect":self.queryInfo("ptredirect"),
- "ptlang":self.queryInfo("pt_lang"),
- "daid":self.queryInfo("daid"),
- "from_ui":self.queryInfo("from_ui"),
- "pttype":self.queryInfo("pttype"),
- "dumy":self.queryInfo("dumy"),
- "fp":self.queryInfo("fp"),
- "action":"0-23-34008",
- "mibao_css":self.queryInfo("mibao"),
- "t":"1",
- "g":"1",
- "js_type":"0",
- "js_ver":self.queryInfo("js_ver"),
- "login_sig":self.queryInfo("login_sig"),
- };
- #在这里用post方法的话。。会出错
- request = urllib.request.Request(URL_LOGIN + urlencode(form), headers = webQQHeader);
- ret = self._opener.open(request).read().decode("utf-8");
- #第一次登录完成, 保存返回的相关参数
- tuple = re.compile("ptuiCB\('([^\']*)',\s*'([^\']*)',\s*'([^\']*)',\s*'([^\']*)',\s*'([^\']*)',\s*'([^\']*)'\);").findall(ret)[0];
- if(tuple[0] == "0" and tuple[1] == "0"):
- self.setQuery("nick", tuple[-1]);
- self.setQuery("ptwebqq", re.compile("ptwebqq=([^\s]+)").findall(str(self._cookie))[0]);
- return tuple[2]; #这里会返回一个地址, 第二次登录的时候要先访问一次这个地址
- else:
- print(tuple);#debug
- return None;
-
- #第二次登录
- def _secLogin(self, url):
- #先访问一次第一次登录返回的那个地址
- self._opener.open(url);
- data = {"r":'{"ptwebqq":"' + self.queryInfo("ptwebqq") + '","clientid":' + self.queryInfo("clientid") + ',"psessionid":"","status":"online"}'};
- request = urllib.request.Request(URL_LOGIN2, headers = webQQHeader2);
- ret = json.loads(self._opener.open(request, urlencode(data).encode("utf-8")).read().decode("utf-8"));
- #retcode为0则第二次登录成功, 保存返回的相关参数
- if(ret["retcode"] == 0):
- result = ret["result"];
- self.setQuery("status", result["status"]);
- self.setQuery("vfwebqq", result["vfwebqq"]);
- self.setQuery("psessionid", result["psessionid"]);
- return True;
- else:
- print(ret); #debug
- return False;
-
- def login(self, id, psw):
- self.setQuery("uin", id);
- self._readInfo(URL_LOAD_INFO);
- secLoginAddr = self._firstLogin(id, psw);
- if(secLoginAddr == None):
- return None;
- if(not self._secLogin(secLoginAddr)):
- return None;
-
- self.setQuery("opener", self._opener);
- self.setQuery("header", webQQHeader2);
- #登录成功后将已经保存的所有参数信息返回
- return self.getDict();
-
- def main():
- qq = WebQQLogin();
- print(qq.login("397828451", "xxxx"));
-
- if(__name__ == "__main__"):
- main();
- #coding=utf-8
-
- class Queryable:
- def __init__(self, dict):
- self.__dict = dict;
-
- def queryInfo(self, key):
- if(self.find(key)):
- return self.__dict[key];
- return None;
-
- def setQuery(self, key, value):
- self.__dict[key] = value;
-
- def setQueryEx(self, dict):
- self.__dict.update(dict);
-
- def find(self, key):
- if(key in self.__dict):
- return True;
- return False;
-
- def getDict(self):
- return self.__dict;