Python爬⾍最强项⽬案例之——JS逆向。这波学到就是赚
到。
前⾔
前段时间看到有⼈js逆向了某⼿机的反馈专区,我也第⼀时间学习了⼀下,学完后⼀直想着凭借⾃⼰的能⼒,看能不能单独完成⼀次,拿下js逆向真正第⼀⾎,所以就有了今天的受害者,某蓝⼚⼿机圈⼦的逆向。
⽹站分析
既然选定了⽬标,那就开始抓包,分析⽹页。
这是抓包后的情况,通过对多个包进⾏⽐较发现,实际变化的参数只有lastId、nonce、timestamp、pageNum。具体分析了⼀
下,lastId:前⼀页最后⼀个发⾔⽤户的tid;pageNum:当前页码;timestamp:13位时间戳;nonce:不知道是什么,但是看他长了⼀副加密的脸。⾄此,⽬标就很清晰了,重点针对nonce。
js逆向分析
通过全局搜过,断点定位,最终将⽬标锁定在这⼀⾏代码上。
= Object(u["md5"])(t + "" + parseInt(1e7 * Math.random(), 10) + 1, 32)
通过观察发现,这⼀⾏代码的最终输出结果正是我们今天的⽬标。
分析⼀下代码:
“t”:13位的时间戳
“+ "" +”:为将时间戳转为字符串
“1e7” :10000000,固定值
“Math.random()”:随机数
“parseInt”:取整
“t + "" +parseInt(1e7 * Math.random(), 10)+ 1”,这⾥的意思就很明显了,时间戳+取整的随机数+1,
最终的结果是⼀个21位数。
继续分析Object(u["md5"])和32发现,这⾥是调⽤了u的[md5]⽅法,将前⾯的21位字符串和32作为参数,传给了MD5,⽹页源代码如下:
e.md5 = function(e, t) {
function n(e, t) {
return e << t | e >>> 32 - t
}
function i(e, t) {
var n, i, a, r, o;
return a = 2147483648 & e,
r = 2147483648 & t,
n = 1073741824 & e,
i = 1073741824 & t,
o = (1073741823 & e) + (1073741823 & t),
n & i ? 2147483648 ^ o ^ a ^ r : n | i ? 1073741824 & o ? 3221225472 ^ o ^ a ^ r : 1073741824 ^ o ^ a ^ r : o ^ a ^ r
}
…………//此处省略
⽬前为⽌,思路已经很清晰了,这⾥我们可以通过Python实现MD5加密,也可以抠源代码改写。为了保证百分百不出错,这⾥我选择了抠代码。
js代码改写
⾸先是源代码:
e.md5 = function(e, t) {
function n(e, t) {
学javascript前要学什么
return e << t | e >>> 32 - t
}
function i(e, t) {
var n, i, a, r, o;
return a = 2147483648 & e,
r = 2147483648 & t,
n = 1073741824 & e,
i = 1073741824 & t,
o = (1073741823 & e) + (1073741823 & t),
n & i ? 2147483648 ^ o ^ a ^ r : n | i ? 1073741824 & o ? 3221225472 ^ o ^ a ^ r : 1073741824 ^ o ^ a ^ r : o ^ a ^ r
}
function a(e, t, a, r, o, s, l) {
return e = i(e, i(i(function(e, t, n) {
return e & t | ~e & n
}(t, a, r), o), l)),
i(n(e, s), t)
}
function r(e, t, a, r, o, s, l) {
return e = i(e, i(i(function(e, t, n) {
return e & n | t & ~n
}(t, a, r), o), l)),
i(n(e, s), t)
}
function o(e, t, a, r, o, s, l) {
return e = i(e, i(i(function(e, t, n) {
return e ^ t ^ n
}(t, a, r), o), l)),
i(n(e, s), t)
}
function s(e, t, a, r, o, s, l) {
return e = i(e, i(i(function(e, t, n) {
return t ^ (e | ~n)
}(t, a, r), o), l)),
i(n(e, s), t)
}
function l(e) {
var t, n = "", i = "";
for (t = 0; t <= 3; t++)
n += (i = "0" + (e >>> 8 * t & 255).toString(16)).substr(i.length - 2, 2);
return n
}
var c, u, p, m, d, h, f, v, y, g = e, b = Array();
for (b = function(e) {
for (var t, n = e.length, i = n + 8, a = 16 * ((i - i % 64) / 64 + 1), r = Array(a - 1), o = 0, s = 0; s < n; )
o = s % 4 * 8,
r[t = (s - s % 4) / 4] = r[t] | e.charCodeAt(s) << o,
s++;
return t = (s - s % 4) / 4,
o = s % 4 * 8,
r[t] = r[t] | 128 << o,
r[a - 2] = n << 3,
r[a - 1] = n >>> 29,
r
}(g),
h = 1732584193,
f = 4023233417,
v = 2562383102,
y = 271733878,
c = 0; c < b.length; c += 16)
u = h,
p = f,
m = v,
d = y,
f = s(f = s(f = s(f = s(f = o(f = o(f = o(f = o(f = r(f = r(f = r(f = r(f = a(f = a(f = a(f = a(f, v = a(v, y = a(y, h = a(h, f, v, y, b[c + 0], 7, 3614090360), f, v, b[c +                    h = i(h, u),
f = i(f, p),
v = i(v, m),
y = i(y, d);
return 32 == t ? l(h) + l(f) + l(v) + l(y) : l(f) + l(v)
}
其次是改写后的代码,这⾥遵循的是改的越少越好的原则:
function MD5 (e, t) {
function n(e, t) {
return e << t | e >>> 32 - t
}
}
function i(e, t) {
var n, i, a, r, o;
return a = 2147483648 & e,
r = 2147483648 & t,
n = 1073741824 & e,
i = 1073741824 & t,
o = (1073741823 & e) + (1073741823 & t),
n & i ? 2147483648 ^ o ^ a ^ r : n | i ? 1073741824 & o ? 3221225472 ^ o ^ a ^ r : 1073741824 ^ o ^ a ^ r : o ^ a ^ r      }
function a(e, t, a, r, o, s, l) {
return e = i(e, i(i(function(e, t, n) {
return e & t | ~e & n
}(t, a, r), o), l)),
i(n(e, s), t)
}
function r(e, t, a, r, o, s, l) {
return e = i(e, i(i(function(e, t, n) {
return e & n | t & ~n
}(t, a, r), o), l)),
i(n(e, s), t)
}
function o(e, t, a, r, o, s, l) {
return e = i(e, i(i(function(e, t, n) {
return e ^ t ^ n
}(t, a, r), o), l)),
i(n(e, s), t)
}
function s(e, t, a, r, o, s, l) {
return e = i(e, i(i(function(e, t, n) {
return t ^ (e | ~n)
}(t, a, r), o), l)),
i(n(e, s), t)
}
function l(e) {
var t, n = "",
i = "";
for (t = 0; t <= 3; t++)
n += (i = "0" + (e >>> 8 * t & 255).toString(16)).substr(i.length - 2, 2);
return n
}
var c, u, p, m, d, h, f, v, y, g = e,
b = Array();
for (b = function(e) {
for (var t, n = e.length, i = n + 8, a = 16 * ((i - i % 64) / 64 + 1), r = Array(a - 1), o = 0, s = 0; s < n;)
o = s % 4 * 8,
r[t = (s - s % 4) / 4] = r[t] | e.charCodeAt(s) << o,
s++;
return t = (s - s % 4) / 4,
o = s % 4 * 8,
r[t] = r[t] | 128 << o,
r[a - 2] = n << 3,
r[a - 1] = n >>> 29,
r
}(g),
h = 1732584193,
f = 4023233417,
v = 2562383102,
y = 271733878,
c = 0; c < b.length; c += 16)
c = 0; c < b.length; c += 16)
u = h,
p = f,
m = v,
d = y,
f = s(f = s(f = s(f = s(f = o(f = o(f = o(f = o(f = r(f = r(f = r(f = r(f = a(f = a(f = a(f = a(f, v = a(v, y = a(y, h = a(h, f, v, y, b[c + 0], 7, 3614090360), f, v, b[c + 1], 12, 3    h = i(h, u),
f = i(f, p),
v = i(v, m),
y = i(y, d);
return 32 == t ? l(h) + l(f) + l(v) + l(y) : l(f) + l(v)
}
经过测试,代码能完美实现我想要的功能,
然后将代码保存为.js⽂件。
Python代码编写
常规操作
import requests
import random
import execjs
import json
import pandas as pd
import time
url = 'bbs.vivo/api/community/forum/threads'
headers = {
'accept': 'application/json, text/plain, */*',
'content-type': 'application/json;charset=UTF-8',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4621.0 Safari/537.36',
'sec-ch-ua': '"Chromium";v="21", " Not;A Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'origin': 'bbs.vivo',
'referer': 'bbs.vivo/newbbs/forum/9',
'cookie': 'cookieId=e1c6727a-9b29-1c13-a417-1b74440b9d521639290997482; KL9d_2132_saltkey=pU2Rr4AV; KL9d_2132_lastvisit=1639287439; Hm_lvt_9e }
这⾥虽然导⼊了好多包,但其实都是根据使⽤需要⼀个个导⼊的。
Python⽣成js需要的参数