py文件:
from fake_useragent import UserAgent
import requests
from Http import cookiejar
import base64
from PIL import Image
import time, JSON
import hashlib, hMac
import execjs
from urllib import parse
ua = UserAgent()
class MyException(Exception):
def __init__(self, status, msg):
self.status = status
self.msg = msg
class ZhiHu:
def __init__(self, username=None, passWord=None):
self.username = username
self.password = password
self.session = requests.Session()
self.session.headers = {
"user-agent": ua.random,
"referer": "https://www.zhihu.com/",
'host': 'www.zhihu.com',
}
self.session.cookies = cookiejar.LWPCookieJar(filename="./cookies.txt")
self.login_param = {
"client_id": "c3cef7c66a1843f8b3a9e6a1e3160e20",
"grant_type": "password",
"source": "com.zhihu.WEB",
"username": "",
"password": "",
"ref_source": "homepage",
"utm_source": "baidu",
}
def load_cookies(self):
'''加载cookies,保存在session中'''
try:
self.session.cookies.load(ignore_discard=True, ignore_expires=True)
return True
except FileNotFoundError:
return False
def login(self, captcha_lang: str = "en", is_load_cookies: bool = True):
'''
这里进行登陆操作
:param lang: 使用怎样的登陆验证,en表示验证码,zh表示点击倒立汉字
:param is_load_cookies: 是否使用保存的cookies进行登陆
:return:
'''
if self.load_cookies() and is_load_cookies:
# 进行登陆操作
print("读取cookies文件")
if self.check__login():
print("登陆成功")
return
print("cookies已经失效")
# 走到这里说明是没有登陆的,在这里进行登陆操作
# 检测用户名和密码已经输入了
self.check_user_input()
# 获取到xsrf的值,并且设置请求头
headers = self.session.headers.copy()
xsrf = self.get_xsrf()
headers.update({
"content-type": "application/x-www-fORM-urlencoded",
"x-xsrftoken": xsrf,
"x-zse-83": "3_1.1",
})
self.login_param.update({
"username": self.username,
"password": self.password,
"lang": captcha_lang
})
# 进行formdata的创建
timestamp = int(time.time() * 1000)
self.login_param.update({
"timestamp": timestamp,
"captcha": self.get_captcha() or "",
"signature": self.get_signature(timestamp)
})
formdata = self.__encrypt(self.login_param)
url = "https://www.zhihu.com/api/v3/oauth/sign_in"
# 进行登陆操作
self.session.post(url=url, headers=headers, data=formdata)
if self.check__login():
self.session.cookies.save()
print("cookies以写入文件")
print("登录成功")
return True
print("登录失败")
def check__login(self):
'''判断是否已经是登陆状态'''
url = "https://www.zhihu.com/"
response = self.session.get(url=url, allow_redirects=False)
if response.status_code == 302:
return False
elif response.status_code == 200:
return True
def check_user_input(self):
if not self.username:
self.username = input("请输入手机号>>:").strip()
if self.username.isdigit() and not self.username.startswith("+86"):
self.username = "+86" + self.username
if not self.password:
self.password = input("请输入密码>>:").strip()
def get_captcha(self):
'''获取到验证码,这里至少请求一次,请求的方法的顺序get,put,post'''
lang = self.login_param.get("lang")
if lang == "en":
captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=en"
else:
captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=cn"
response = self.session.get(captcha_api)
is_use_verify = response.json().get("show_captcha", False)
if is_use_verify:
# 使用验证,请求方式顺序为put,post
# 先获取验证图片的base64
response = self.session.put(captcha_api)
base64_img = response.json()['img_base64'].replace(r'\n', '')
with open("./captcha.png", "wb") as f:
f.write(base64.b64decode(base64_img))
img = Image.open("./captcha.png")
if lang == "en":
img.show()
code = input("请输入图片中的验证码>>:").strip()
else:
import matplotlib.pyplot as plt
plt.imshow(img)
print('点击所有倒立的汉字,在命令行中按回车提交')
points = plt.ginput(7)
code = json.dumps({'img_size': [200, 44],
'input_points': [[i[0] / 2, i[1] / 2] for i in points]})
self.session.post(captcha_api, data={"input_text": code}, headers={"user-agent": ua.random, })
return code
def get_no_captch(self):
'''调用这个方法,可以实现不需要验证码就可以登录'''
lang = self.login_param.get("lang")
if lang == "en":
captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=en"
else:
captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=cn"
while True:
print("正在请求验证码....")
time.sleep(0.5)
response = self.session.get(captcha_api)
is_use_verify = str(response.json().get("show_captcha"))
if is_use_verify == 'false':
return ""
print("继续...")
def get_signature(self, timestamp):
'''获取signature的值'''
ha = hmac.new(key=b"d1b964811afb40118a12068ff74a12f4", digestmod=hashlib.sha1)
client_id = self.login_param.get("client_id")
grant_type = self.login_param.get("grant_type")
source = self.login_param.get("source")
ha.update(bytes(grant_type + client_id + source + str(timestamp), encoding="utf-8"))
return ha.hexdigest()
def get_xsrf(self):
url = "https://www.zhihu.com/signin"
response = self.session.get(url=url, headers=self.session.headers, allow_redirects=False)
_xsrf = response.cookies.get("_xsrf")
return _xsrf
def __encrypt(self, data: dict):
data = parse.urlencode(data)
with open("./01.js", "r") as f:
js_code = f.read()
ctx = execjs.compile(js_code)
res = ctx.call("Q", data)
return res
if __name__ == '__main__':
zhihu = ZhiHu()
zhihu.login()
js文件:
window = {
"encodeURIComponent": encodeURIComponent
}
navigator = {
"userAgent": "5.0 (windows NT 10.0; WOW64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/72.0.3626.121 Safari/537.36"
}
function s(e) {
return (s = "function" == typeof Symbol && "symbol" == typeof Symbol.t ? function (e) {
return typeof e
}
: function (e) {
return e && "function" == typeof Symbol && e.constructor === Symbol && e !== Symbol.prototype ? "symbol" : typeof e
}
)(e)
}
var t = "1.1"
, __g = {};
function i() {
}
function h(e) {
this.s = (2048 & e) >> 11,
this.i = (1536 & e) >> 9,
this.h = 511 & e,
this.A = 511 & e
}
function A(e) {
this.i = (3072 & e) >> 10,
this.A = 1023 & e
}
function n(e) {
this.n = (3072 & e) >> 10,
this.e = (768 & e) >> 8,
this.a = (192 & e) >> 6,
this.s = 63 & e
}
function e(e) {
this.i = e >> 10 & 3,
this.h = 1023 & e
}
function a() {
}
function c(e) {
this.n = (3072 & e) >> 10,
this.e = (768 & e) >> 8,
this.a = (192 & e) >> 6,
this.s = 63 & e
}
function o(e) {
this.A = (4095 & e) >> 2,
this.s = 3 & e
}
function r(e) {
this.i = e >> 10 & 3,
this.h = e >> 2 & 255,
this.s = 3 & e
}
function k(e) {
this.s = (4095 & e) >> 10,
this.i = (1023 & e) >> 8,
this.h = 1023 & e,
this.A = 63 & e
}
function B(e) {
this.s = (4095 & e) >> 10,
this.n = (1023 & e) >> 8,
this.e = (255 & e) >> 6
}
function f(e) {
this.i = (3072 & e) >> 10,
this.A = 1023 & e
}
function u(e) {
this.A = 4095 & e
}
function C(e) {
this.i = (3072 & e) >> 10
}
function b(e) {
this.A = 4095 & e
}
function g(e) {
this.s = (3840 & e) >> 8,
this.i = (192 & e) >> 6,
this.h = 63 & e
}
function G() {
this.c = [0, 0, 0, 0],
this.o = 0,
this.r = [],
this.k = [],
this.B = [],
this.f = [],
this.u = [],
this.C = !1,
this.b = [],
this.g = [],
this.G = !1,
this.Q = null,
this.R = null,
this.w = [],
this.x = 0,
this.D = {
0: i,
1: h,
2: A,
3: n,
4: e,
5: a,
6: c,
7: o,
8: r,
9: k,
10: B,
11: f,
12: u,
13: C,
14: b,
15: g
}
}
i.prototype.M = function (e) {
e.G = !1
}
,
h.prototype.M = function (e) {
switch (this.s) {
case 0:
e.c[this.i] = this.h;
break;
case 1:
e.c[this.i] = e.k[this.A]
}
}
,
A.prototype.M = function (e) {
e.k[this.A] = e.c[this.i]
}
,
n.prototype.M = function (e) {
switch (this.s) {
case 0:
e.c[this.n] = e.c[this.e] + e.c[this.a];
break;
case 1:
e.c[this.n] = e.c[this.e] - e.c[this.a];
break;
case 2:
e.c[this.n] = e.c[this.e] * e.c[this.a];
break;
case 3:
e.c[this.n] = e.c[this.e] / e.c[this.a];
break;
case 4:
e.c[this.n] = e.c[this.e] % e.c[this.a];
break;
case 5:
e.c[this.n] = e.c[this.e] == e.c[this.a];
break;
case 6:
e.c[this.n] = e.c[this.e] >= e.c[this.a];
break;
case 7:
e.c[this.n] = e.c[this.e] || e.c[this.a];
break;
case 8:
e.c[this.n] = e.c[this.e] && e.c[this.a];
break;
case 9:
e.c[this.n] = e.c[this.e] !== e.c[this.a];
break;
case 10:
e.c[this.n] = s(e.c[this.e]);
break;
case 11:
e.c[this.n] = e.c[this.e] in e.c[this.a];
break;
case 12:
e.c[this.n] = e.c[this.e] > e.c[this.a];
break;
case 13:
e.c[this.n] = -e.c[this.e];
break;
case 14:
e.c[this.n] = e.c[this.e] < e.c[this.a];
break;
case 15:
e.c[this.n] = e.c[this.e] & e.c[this.a];
break;
case 16:
e.c[this.n] = e.c[this.e] ^ e.c[this.a];
break;
case 17:
e.c[this.n] = e.c[this.e] << e.c[this.a];
break;
case 18:
e.c[this.n] = e.c[this.e] >>> e.c[this.a];
break;
case 19:
e.c[this.n] = e.c[this.e] | e.c[this.a]
}
}
,
e.prototype.M = function (e) {
e.r.push(e.o),
e.B.push(e.k),
e.o = e.c[this.i],
e.k = [];
for (var t = 0; t < this.h; t++)
e.k.unshift(e.f.pop());
e.u.push(e.f),
e.f = []
}
,
a.prototype.M = function (e) {
e.o = e.r.pop(),
e.k = e.B.pop(),
e.f = e.u.pop()
}
,
c.prototype.M = function (e) {
switch (this.s) {
case 0:
e.C = e.c[this.n] >= e.c[this.e];
break;
case 1:
e.C = e.c[this.n] <= e.c[this.e];
break;
case 2:
e.C = e.c[this.n] > e.c[this.e];
break;
case 3:
e.C = e.c[this.n] < e.c[this.e];
break;
case 4:
e.C = e.c[this.n] == e.c[this.e];
break;
case 5:
e.C = e.c[this.n] != e.c[this.e];
break;
case 6:
e.C = e.c[this.n];
break;
case 7:
e.C = !e.c[this.n]
}
}
,
o.prototype.M = function (e) {
switch (this.s) {
case 0:
e.o = this.A;
break;
case 1:
e.C && (e.o = this.A);
break;
case 2:
e.C || (e.o = this.A);
break;
case 3:
e.o = this.A,
e.Q = null
}
e.C = !1
}
,
r.prototype.M = function (e) {
switch (this.s) {
case 0:
for (var t = [], n = 0; n < this.h; n++)
t.unshift(e.f.pop());
e.c[3] = e.c[this.i](t[0], t[1]);
break;
case 1:
for (var r = e.f.pop(), o = [], i = 0; i < this.h; i++)
o.unshift(e.f.pop());
e.c[3] = e.c[this.i][r](o[0], o[1]);
break;
case 2:
for (var a = [], c = 0; c < this.h; c++)
a.unshift(e.f.pop());
e.c[3] = new e.c[this.i](a[0], a[1])
}
}
,
k.prototype.M = function (e) {
switch (this.s) {
case 0:
e.f.push(e.c[this.i]);
break;
case 1:
e.f.push(this.h);
break;
case 2:
e.f.push(e.k[this.A]);
break;
case 3:
e.f.push(e.g[this.A])
}
}
,
B.prototype.M = function (t) {
switch (this.s) {
case 0:
var s = t.f.pop();
t.c[this.n] = t.c[this.e][s];
break;
case 1:
var i = t.f.pop()
, h = t.f.pop();
t.c[this.e][i] = h;
break;
case 2:
var A = t.f.pop();
t.c[this.n] = eval(A)
}
}
,
f.prototype.M = function (e) {
e.c[this.i] = e.g[this.A]
}
,
u.prototype.M = function (e) {
e.Q = this.A
}
,
C.prototype.M = function (e) {
throw e.c[this.i]
}
,
b.prototype.M = function (e) {
var t = this
, n = [0];
e.k.forEach(function (e) {
n.push(e)
});
var r = function (r) {
var o = new G;
return o.k = n,
o.k[0] = r,
o.J(e.b, t.A, e.g, e.w),
o.c[3]
};
r.toString = function () {
return "() { [native code] }"
}
,
e.c[3] = r
}
,
g.prototype.M = function (e) {
switch (this.s) {
case 0:
for (var t = {}, n = 0; n < this.h; n++) {
var r = e.f.pop();
t[e.f.pop()] = r
}
e.c[this.i] = t;
break;
case 1:
for (var o = [], i = 0; i < this.h; i++)
o.unshift(e.f.pop());
e.c[this.i] = o
}
}
,
G.prototype.v = function (e) {
for (var t = new Buffer(e, "base64").toString("binary"), n = [], r = 0; r < t.length - 1; r += 2)
n.push(t.charCodeAt(r) << 8 | t.charCodeAt(r + 1));
this.b = n
}
,
G.prototype.y = function (e) {
for (var t = new Buffer(e, "base64").toString("binary"), n = 66, r = [], o = 0; o < t.length; o++) {
var i = 24 ^ t.charCodeAt(o) ^ n;
r.push(String.fromCharCode(i)),
n = i
}
return r.join("")
}
,
G.prototype.F = function (e) {
var t = this;
this.g = e.map(function (e) {
return "string" == typeof e ? t.y(e) : e
})
}
,
G.prototype.J = function (e, t, n) {
for (t = t || 0,
n = n || [],
this.o = t,
"string" == typeof e ? (this.F(n),
this.v(e)) : (this.b = e,
this.g = n),
this.G = !0,
this.x = Date.now(); this.G;) {
var r = this.b[this.o++];
if ("number" != typeof r)
break;
var o = Date.now();
if (500 < o - this.x)
return;
this.x = o;
try {
this.M(r)
} catch (e) {
if (this.R = e,
!this.Q)
throw "execption at " + this.o + ": " + e;
this.o = this.Q
}
}
}
,
G.prototype.M = function (e) {
var t = (61440 & e) >> 12;
new this.D[t](e).M(this)
}
,
1 && (new G).J("4AeTAJwAqACcAaQAAAAYAJAAnAKoAJwDgAWTACwAnAKoACACGAESOTRHkQAkAbAEIAMYAJwFoAASAzREJAQYBBIBNEVkBnCiGAC0BjRAJAAYBBICNEVkBnDGGAC0BzRAJACwCJAAnAmoAJwKoACcC4ABnAyMBRAAMwZgBnESsA0aADRAkQAkABGCnA6gABoCnA+hQDRHGAKcEKAAMQdgBnFasBEaADRAkQAkABgCnBKgABoCnBOhQDRHZAZxkrAUGgA0QJEAJAAYApwVoABgBnG6sBYaADRAkQAkABgCnBegAGAGceKwGBoANECRACQAnAmoAJwZoABgBNIOsBoaADRAkQAkABgCnBugABoCnByhQDRHZAZyRrAdGgA0QJEAJAAQACAFsB4gBhgAnAWgABIBNEEkBxgHEgA0RmQGdJoQCBoFFAE5gCgFFAQ5hDSCJAgYB5AAGACcH4AFGAEaCDRSEP8xDzMQIAkQCBoFFAE5gCgFFAQ5hDSCkQAkCBgBGgg0UhD/MQ+QACAIGAkaBxQBOYGSABoAnB+EBRoIN1AUCDmRNJMkCRAIGgUUATmAKAUUBDmENIKRACQIGAEaCDRSEP8xD5AAIAgYCRoHFAI5gZIAGgCcH4QFGgg3UBQQOZE0kyQJGAMaCRQ/OY+SABoGnCCEBTTAJAMYAxoJFAY5khI/Nk+RABoGnCCEBTTAJAMYAxoJFAw5khI/Nk+RABoGnCCEBTTAJAMYAxoJFBI5khI/Nk+RABoGnCCEBTTAJAMYBxIDNEEkB3JsHgNQAA==", 0, ["BRgg", "BSITFQkTERw=", "LQYfEhMA", "PxMVFBMZKB8DEjQaBQcZExMC", "", "NhETEQsE", "Whg=", "Wg==", "MhUcHRARDhg=", "NBcPBxYeDQMF", "Lx4ODys+GhMC", "LgM7OwAKDyk6Cg4=", "Mx8SGQUvMQ==", "SA==", "ORoVGCQgERcCAxo=", "BTcAERcCAxo=", "BRg3ABEXAgMaFAo=", "SQ==", "OA8LGBsP", "GC8LGBsP", "Tg==", "PxAcBQ==", "Tw==", "KRsJDgE=", "TA==", "LQofHg4DBwsP", "TQ==", "PhMaNCwZAxoUDQUeGQ==", "PhMaNCwZAxoUDQUeGTU0GQIeBRsYEQ8=", "Qg==", "BWpUGxkfGRsZFxkbGR8ZGxkHGRsZHxkbGRcZG1MbGR8ZGxkXGRFpGxkfGRsZFxkbGR8ZGxkHGRsZHxkbGRcZGw==", "ORMRCyk0Exk8LQ==", "ORMRCyst"]);
var Q = function (e) {
return __g._encrypt(e)
};
参考的是这位博主的博客:https://home.cnblogs.com/u/zkqiang