爬虫破解知乎登入(不使用Selenium模块)

前两步简单稍微细心寻找规律即可

其中最难的是第三步应该他前端进行了js加密

这里没什么技巧凭感觉因为登入提交的url是https://www.zhihu.com/api/v3/oauth/sign_in

我们进行serch搜索sigin这时候我们会搜索到一条jsmai......js

然后凭身为程序猿的直觉,再获取js代码里搜索encrypt为什么搜这个因为一般程序猿不会瞎命名

然后看着看着我们会发现一条return __g._encrypt(encodeURIComponent(e))

凭感觉这条就可能是,打断点!走起

然后我们随意登入会发现这里的值是

client_id=c3cef7c66a1843f8b3a9e6a1e3160e20&grant_type=password&timestamp=1571641341225&source=com.zhihu.web&signature=628b0f124304c7eb3d7bb12f884666f24fba099c&username=%2B8615757876283&password=11223344&captcha=e7e6&lang=en&utm_source=&ref_source=other_https%3A%2F%2

我的天这个不是我们要的的嘛~~

重复几次失败的登入操作

我们发现client_id参数是不变的,timestamp时间戳猜一猜他尝试一下其实他就是str(int(time.time() * 1000))

唯一难点就是signature

这里还是凭我们程序员的直觉,搜索signature

我们会发现一段js

//这不是告诉我们加密规则了吗 var n = Date.now() , r = new i.a("SHA-1","TEXT"); //sha1加密方式 return r.setHMACKey("d1b964811afb40118a12068ff74a12f4", "TEXT"), //盐d1b964811afb40118a12068ff74a12f4 r.update(e), //提交的参数e,a,n,"com.zhihu.web" r.update(a), r.update("com.zhihu.web"), r.update(String(n)), Object.assign({ clientId: a, //赤裸裸的告诉我们a是clientId grantType: e, //打断点我们知道他是'password' timestamp: n, //时间戳 source: "com.zhihu.web", signature: r.getHMAC("HEX") //这里呢可以打断点验证我们想法 }, t)

分析完毕代码走起

二.推理后编写的代码 代码.py import base64 import hmac import time from hashlib import sha1 from urllib.parse import urlencode import execjs from PIL import Image from urllib import parse from requests_html import HTMLSession class ZhiHu: def __init__(self): self.session = HTMLSession() self.get_code_api = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en' self.get_cookies_api = 'https://www.zhihu.com/signin?next=%2F' self.login_api = 'https://www.zhihu.com/api/v3/oauth/sign_in' self.captcha = '' self.login_form_data = '' self.code = '' self.signature='' def get_cookies(self): self.session.get(self.get_cookies_api) def get_code(self): # 发起获取验证码类型 res = self.session.get(self.get_code_api) show_captcha = res.json()['show_captcha'] if show_captcha: # 获取验证的二进制 res = self.session.put(self.get_code_api) img_base64 = res.json().get('img_base64') img_content = base64.b64decode(img_base64) # 保存 with open('captcha.png', 'wb') as f: f.write(img_content) # 手动处理验证码,不手动可以去打码平台找 img_obj = Image.open('captcha.png') img_obj.show() self.captcha = input('输入验证码:') r = self.session.post(url=self.get_code_api, data={'input_text': self.captcha}) # 验证是否通过 susssion_msg = r.json().get('success') if susssion_msg: print('验证通过') else: self.get_code() # 加密的签名 def get_signature(self): r = hmac.new(b'd1b964811afb40118a12068ff74a12f4', digestmod=sha1) r.update(b'password') r.update(b'c3cef7c66a1843f8b3a9e6a1e3160e20') r.update(b'com.zhihu.web') r.update(str(int(time.time() * 1000)).encode('utf-8')) self.signature = r.hexdigest() def login(self): self.login_form_data = { 'client_id': 'c3cef7c66a1843f8b3a9e6a1e3160e20', 'grant_type': 'password', 'timestamp': str(int(time.time() * 1000)), 'source': 'com.zhihu.web', 'signature': self.signature, 'username': '+8615757876283', 'password': 'qwe16745', 'captcha': self.captcha, 'lang': 'en', 'utm_source': '', 'ref_source': 'other_https://www.zhihu.com/signin?next=%2F' } headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36', 'x-zse-83':'3_2.0', 'content-type':'application/x-www-form-urlencoded', } with open('Js_encryption.js','rt',encoding='utf-8') as f: js_code = f.read() # 调用js js_obj = execjs.compile(js_code) #js代码中是 return __g._encrypt(encodeURIComponent(e)) ,所有我们要对login_form_data进行url编码 # js_obj.call('函数名',参数) res = js_obj.call('b',urlencode(self.login_form_data )) print(res) if __name__ == '__main__': zhihu_obj = ZhiHu() zhihu_obj.get_cookies() zhihu_obj.get_code() zhihu_obj.get_signature() zhihu_obj.login() Js_encryption.js function t(e) { return (t = "function" == typeof Symbol && "symbol" == typeof Symbol.A ? function(e) { return typeof e } : function(e) { return e && "function" == typeof Symbol && e.constructor === Symbol && e !== Symbol.prototype ? "symbol" : typeof e } )(e) } Object.defineProperty(exports, "__esModule", { value: !0 }); var A = "2.0" , __g = {}; function s() {} function i(e) { this.t = (2048 & e) >> 11, this.s = (1536 & e) >> 9, this.i = 511 & e, this.h = 511 & e } function h(e) { this.s = (3072 & e) >> 10, this.h = 1023 & e } function a(e) { this.a = (3072 & e) >> 10, this.c = (768 & e) >> 8, this.n = (192 & e) >> 6, this.t = 63 & e } function c(e) { this.s = e >> 10 & 3, this.i = 1023 & e } function n() {} function e(e) { this.a = (3072 & e) >> 10, this.c = (768 & e) >> 8, this.n = (192 & e) >> 6, this.t = 63 & e } function o(e) { this.h = (4095 & e) >> 2, this.t = 3 & e } function r(e) { this.s = e >> 10 & 3, this.i = e >> 2 & 255, this.t = 3 & e } s.prototype.e = function(e) { e.o = !1 } , i.prototype.e = function(e) { switch (this.t) { case 0: e.r[this.s] = this.i; break; case 1: e.r[this.s] = e.k[this.h] } } , h.prototype.e = function(e) { e.k[this.h] = e.r[this.s] } , a.prototype.e = function(e) { switch (this.t) { case 0: e.r[this.a] = e.r[this.c] + e.r[this.n]; break; case 1: e.r[this.a] = e.r[this.c] - e.r[this.n]; break; case 2: e.r[this.a] = e.r[this.c] * e.r[this.n]; break; case 3: e.r[this.a] = e.r[this.c] / e.r[this.n]; break; case 4: e.r[this.a] = e.r[this.c] % e.r[this.n]; break; case 5: e.r[this.a] = e.r[this.c] == e.r[this.n]; break; case 6: e.r[this.a] = e.r[this.c] >= e.r[this.n]; break; case 7: e.r[this.a] = e.r[this.c] || e.r[this.n]; break; case 8: e.r[this.a] = e.r[this.c] && e.r[this.n]; break; case 9: e.r[this.a] = e.r[this.c] !== e.r[this.n]; break; case 10: e.r[this.a] = t(e.r[this.c]); break; case 11: e.r[this.a] = e.r[this.c]in e.r[this.n]; break; case 12: e.r[this.a] = e.r[this.c] > e.r[this.n]; break; case 13: e.r[this.a] = -e.r[this.c]; break; case 14: e.r[this.a] = e.r[this.c] < e.r[this.n]; break; case 15: e.r[this.a] = e.r[this.c] & e.r[this.n]; break; case 16: e.r[this.a] = e.r[this.c] ^ e.r[this.n]; break; case 17: e.r[this.a] = e.r[this.c] << e.r[this.n]; break; case 18: e.r[this.a] = e.r[this.c] >>> e.r[this.n]; break; case 19: e.r[this.a] = e.r[this.c] | e.r[this.n]; break; case 20: e.r[this.a] = !e.r[this.c] } } , c.prototype.e = function(e) { e.Q.push(e.C), e.B.push(e.k), e.C = e.r[this.s], e.k = []; for (var t = 0; t < this.i; t++) e.k.unshift(e.f.pop()); e.g.push(e.f), e.f = [] } , n.prototype.e = function(e) { e.C = e.Q.pop(), e.k = e.B.pop(), e.f = e.g.pop() } , e.prototype.e = function(e) { switch (this.t) { case 0: e.u = e.r[this.a] >= e.r[this.c]; break; case 1: e.u = e.r[this.a] <= e.r[this.c]; break; case 2: e.u = e.r[this.a] > e.r[this.c]; break; case 3: e.u = e.r[this.a] < e.r[this.c]; break; case 4: e.u = e.r[this.a] == e.r[this.c]; break; case 5: e.u = e.r[this.a] != e.r[this.c]; break; case 6: e.u = e.r[this.a]; break; case 7: e.u = !e.r[this.a] } } , o.prototype.e = function(e) { switch (this.t) { case 0: e.C = this.h; break; case 1: e.u && (e.C = this.h); break; case 2: e.u || (e.C = this.h); break; case 3: e.C = this.h, e.w = null } e.u = !1 } , r.prototype.e = function(e) { switch (this.t) { case 0: for (var t = [], n = 0; n < this.i; n++) t.unshift(e.f.pop()); e.r[3] = e.r[this.s](t[0], t[1]); break; case 1: for (var r = e.f.pop(), i = [], o = 0; o < this.i; o++) i.unshift(e.f.pop()); e.r[3] = e.r[this.s][r](i[0], i[1]); break; case 2: for (var a = [], s = 0; s < this.i; s++) a.unshift(e.f.pop()); e.r[3] = new e.r[this.s](a[0],a[1]) } } ; var k = function(e) { for (var t = 66, n = [], r = 0; r < e.length; r++) { var i = 24 ^ e.charCodeAt(r) ^ t; n.push(String.fromCharCode(i)), t = i } return n.join("") }; function Q(e) { this.t = (4095 & e) >> 10, this.s = (1023 & e) >> 8, this.i = 1023 & e, this.h = 63 & e } function C(e) { this.t = (4095 & e) >> 10, this.a = (1023 & e) >> 8, this.c = (255 & e) >> 6 } function B(e) { this.s = (3072 & e) >> 10, this.h = 1023 & e } function f(e) { this.h = 4095 & e } function g(e) { this.s = (3072 & e) >> 10 } function u(e) { this.h = 4095 & e } function w(e) { this.t = (3840 & e) >> 8, this.s = (192 & e) >> 6, this.i = 63 & e } function G() { this.r = [0, 0, 0, 0], this.C = 0, this.Q = [], this.k = [], this.B = [], this.f = [], this.g = [], this.u = !1, this.G = [], this.b = [], this.o = !1, this.w = null, this.U = null, this.F = [], this.R = 0, this.J = { 0: s, 1: i, 2: h, 3: a, 4: c, 5: n, 6: e, 7: o, 8: r, 9: Q, 10: C, 11: B, 12: f, 13: g, 14: u, 15: w } } Q.prototype.e = function(e) { switch (this.t) { case 0: e.f.push(e.r[this.s]); break; case 1: e.f.push(this.i); break; case 2: e.f.push(e.k[this.h]); break; case 3: e.f.push(k(e.b[this.h])) } } , C.prototype.e = function(A) { switch (this.t) { case 0: var t = A.f.pop(); A.r[this.a] = A.r[this.c][t]; break; case 1: var s = A.f.pop() , i = A.f.pop(); A.r[this.c][s] = i; break; case 2: var h = A.f.pop(); A.r[this.a] = eval(h) } } , B.prototype.e = function(e) { e.r[this.s] = k(e.b[this.h]) } , f.prototype.e = function(e) { e.w = this.h } , g.prototype.e = function(e) { throw e.r[this.s] } , u.prototype.e = function(e) { var t = this , n = [0]; e.k.forEach(function(e) { n.push(e) }); var r = function(r) { var i = new G; return i.k = n, i.k[0] = r, i.v(e.G, t.h, e.b, e.F), i.r[3] }; r.toString = function() { return "() { [native code] }" } , e.r[3] = r } , w.prototype.e = function(e) { switch (this.t) { case 0: for (var t = {}, n = 0; n < this.i; n++) { var r = e.f.pop(); t[e.f.pop()] = r } e.r[this.s] = t; break; case 1: for (var i = [], o = 0; o < this.i; o++) i.unshift(e.f.pop()); e.r[this.s] = i } } , G.prototype.D = function(e) { for (var t = atob(e), n = t.charCodeAt(0) << 8 | t.charCodeAt(1), r = [], i = 2; i < n + 2; i += 2) r.push(t.charCodeAt(i) << 8 | t.charCodeAt(i + 1)); this.G = r; for (var o = [], a = n + 2; a < t.length; ) { var s = t.charCodeAt(a) << 8 | t.charCodeAt(a + 1) , c = t.slice(a + 2, a + 2 + s); o.push(c), a += s + 2 } this.b = o } , G.prototype.v = function(e, t, n) { for (t = t || 0, n = n || [], this.C = t, "string" == typeof e ? this.D(e) : (this.G = e, this.b = n), this.o = !0, this.R = Date.now(); this.o; ) { var r = this.G[this.C++]; if ("number" != typeof r) break; var i = Date.now(); if (500 < i - this.R) return; this.R = i; try { this.e(r) } catch (e) { this.U = e, this.w && (this.C = this.w) } } } , G.prototype.e = function(e) { var t = (61440 & e) >> 12; new this.J[t](e).e(this) } , "undefined" != typeof window && (new G).v("AxjgB5MAnACoAJwBpAAAABAAIAKcAqgAMAq0AzRJZAZwUpwCqACQACACGAKcBKAAIAOcBagAIAQYAjAUGgKcBqFAuAc5hTSHZAZwqrAIGgA0QJEAJAAYAzAUGgOcCaFANRQ0R2QGcOKwChoANECRACQAsAuQABgDnAmgAJwMgAGcDYwFEAAzBmAGcSqwDhoANECRACQAGAKcD6AAGgKcEKFANEcYApwRoAAxB2AGcXKwEhoANECRACQAGAKcE6AAGgKcFKFANEdkBnGqsBUaADRAkQAkABgCnBagAGAGcdKwFxoANECRACQAGAKcGKAAYAZx+rAZGgA0QJEAJAAYA5waoABgBnIisBsaADRAkQAkABgCnBygABoCnB2hQDRHZAZyWrAeGgA0QJEAJAAYBJwfoAAwFGAGcoawIBoANECRACQAGAOQALAJkAAYBJwfgAlsBnK+sCEaADRAkQAkABgDkACwGpAAGAScH4AJbAZy9rAiGgA0QJEAJACwI5AAGAScH6AAkACcJKgAnCWgAJwmoACcJ4AFnA2MBRAAMw5gBnNasCgaADRAkQAkABgBEio0R5EAJAGwKSAFGACcKqAAEgM0RCQGGAYSATRFZAZzshgAtCs0QCQAGAYSAjRFZAZz1hgAtCw0QCQAEAAgB7AtIAgYAJwqoAASATRBJAkYCRIANEZkBnYqEAgaBxQBOYAoBxQEOYQ0giQKGAmQABgAnC6ABRgBGgo0UhD/MQ8zECALEAgaBxQBOYAoBxQEOYQ0gpEAJAoYARoKNFIQ/zEPkAAgChgLGgkUATmBkgAaAJwuhAUaCjdQFAg5kTSTJAsQCBoHFAE5gCgHFAQ5hDSCkQAkChgBGgo0UhD/MQ+QACAKGAsaCRQCOYGSABoAnC6EBRoKN1AUEDmRNJMkCxgFGgsUPzmPkgAaCJwvhAU0wCQFGAUaCxQGOZISPzZPkQAaCJwvhAU0wCQFGAUaCxQMOZISPzZPkQAaCJwvhAU0wCQFGAUaCxQSOZISPzZPkQAaCJwvhAU0wCQFGAkSAzRBJAlz/B4FUAAAAwUYIAAIBSITFQkTERwABi0GHxITAAAJLwMSGRsXHxMZAAk0Fw8HFh4NAwUABhU1EBceDwAENBcUEAAGNBkTGRcBAAFKAAkvHg4PKz4aEwIAAUsACDIVHB0QEQ4YAAsuAzs7AAoPKToKDgAHMx8SGQUvMQABSAALORoVGCQgERcCAxoACAU3ABEXAgMaAAsFGDcAERcCAxoUCgABSQAGOA8LGBsPAAYYLwsYGw8AAU4ABD8QHAUAAU8ABSkbCQ4BAAFMAAktCh8eDgMHCw8AAU0ADT4TGjQsGQMaFA0FHhkAFz4TGjQsGQMaFA0FHhk1NBkCHgUbGBEPAAFCABg9GgkjIAEmOgUHDQ8eFSU5DggJAwEcAwUAAUMAAUAAAUEADQEtFw0FBwtdWxQTGSAACBwrAxUPBR4ZAAkqGgUDAwMVEQ0ACC4DJD8eAx8RAAQ5GhUYAAFGAAAABjYRExELBAACWhgAAVoAQAg/PTw0NxcQPCQ5C3JZEBs9fkcnDRcUAXZia0Q4EhQgXHojMBY3MWVCNT0uDhMXcGQ7AUFPHigkQUwQFkhaAkEACjkTEQspNBMZPC0ABjkTEQsrLQ=="); var b = function(e) { return __g._encrypt(encodeURIComponent(e)) }; exports.ENCRYPT_VERSION = A, exports.default = b 三.调试时候发生的错误 1.可能会导致错误一(我电脑有环境所有我不清楚) #这是execjs模块他执行js我们必须给他js能执行的环境,电脑缺环境 #execjs会自动使用当前电脑上的运行时环境(建议用nodejs,与Phantomjs) 2.错误信息二(肯定会报错的哦) execjs._exceptions.ProgramError: TypeError: __g._encrypt is not a function

我们execjs除了nodejs我们还需要浏览器环境,我们浏览器上还需要document以及window对象所有呢我们要安装环境

移动至项目目录

执行npm install jsdom

代码.py中我们要修改内容

#js_obj = execjs.compile(js_codex)修改成 js_obj = execjs.compile(js_code,cwd='node_modules'), #也就是导入我们下载完成后的node_modules的文件

然后呢我们在Js_encryption.js

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:https://www.heiqu.com/wpdgww.html