在网上看到有人分享抓取的谷歌翻译api,一次闲来无聊,就在网上查了查关于抓取百度翻译的api的文章,自己一番钻研,本次就来一步一步分析一下百度翻译请求的玄机。
抓包
通过浏览器调试,我们很轻松就找到了百度翻译的api请求url。
https://fanyi.baidu.com/v2transapi?from=zh&to=en
请求参数
在请求url的最下边,就看到了翻译的一些请求参数:
from: zh
to: en
query: 你好。
transtype: realtime
simple_means_flag: 3
sign: 660863.980558
token: aebb5eff8a16122ae30f938fa142b7f7
前三个不用多解释,大家一看就知道参数的意思,我会吧重点放在后两个参数分析:
参数名 | 参数解释 |
---|---|
from | 原文语种 |
to | 译文语种 |
query | 原文内容 |
transtype | 翻译类型 |
simple_means_flag | 未知 |
sign | 签名 |
token | 客户端请求标识 |
注意:transtype,这个参数一般不用更改,网上说有realtime 和 translang 两种
关键参分析
接下来我们就来分析一下sign和token两个参数吧。
通过分析翻译时候所调用的js文件,我终于找到了关键代码:
langIsDeteced: function(t, a, n, r) {
if (null !== t) {
var s = $(".select-from-language .language-selected").attr("data-lang"),
o = $(".select-to-language .language-selected").attr("data-lang"),
l = !1;
c.get("langChangedByUser") && t === o && (l = !0);
var g = null;
r && !c.get("fromLangIsAuto") && s !== t ? g = i.processOcrLang(t, s, o) : (e.show(t, s), g = i.getLang(t, s, o)), u.show();
var d = this,
a = this.processQuery(a),
p = {
from: g.fromLang,
to: g.toLang,
query: a,
transtype: n,
simple_means_flag: 3,
sign: y(a),
token: window.common.token
};
this.translateXHR && 4 !== this.translateXHR.readyState && this.translateXHR.abort(), this.translateXHR = $.ajax({
type: "POST",
url: "/v2transapi?from=" + encodeURIComponent(g.fromLang) + "&to=" + encodeURIComponent(g.toLang),
cache: !1,
data: p
}).done(function(t) {
c.set("isInRtTransState", !0), d.translateSuccess(t, g.fromLang, g.toLang, a, l)
})
}
}
以上就是我们需要分析的重要代码啦,里面有我的各个参数。
首先看到token
只需通过window.common.token
就能直接获取。
sign
参数则又调用了y()
方法,传的参数就是我要翻译的原文。这个函数会返回 666666.233384
类似这样的结果。
由于js代码被加密了。没办法直接跳转方法,这咋整?!没捷径,只能凭借经验找了,我最后确定了一段关键代码:
define("translation:widget/translate/input/pGrab", function(r, o, t) {
"use strict";
function a(r) {
if (Array.isArray(r)) {
for (var o = 0, t = Array(r.length); o < r.length; o++) t[o] = r[o];
return t
}
return Array.from(r)
}
function n(r, o) {
for (var t = 0; t < o.length - 2; t += 3) {
var a = o.charAt(t + 2);
a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a), a = "+" === o.charAt(t + 1) ? r >>> a : r << a, r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
}
return r
}
function e(r) {
var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
if (null === o) {
var t = r.length;
t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
} else {
for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++) "" !== e[C] && f.push.apply(f, a(e[C].split(""))), C !== h - 1 && f.push(o[C]);
var g = f.length;
g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
}
var u = void 0,
l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
u = null !== i ? i : (i = window[l] || "") || "";
for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
var A = r.charCodeAt(v);
128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
}
for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b], p = n(p, F);
return p = n(p, D), p ^= s, 0 > p && (p = (2147483647 & p) + 2147483648), p %= 1e6, p.toString() + "." + (p ^ m)
}
var i = null;
t.exports = e
});;
对,没错,return p
就是我们要得到的sign
值,继续优化代码,我们只要我们想要的代码,去掉其他没用的。
一下是我最终整理的获取sign
参数的js代码:
var i= null;
function n(r, o) {
for (var t = 0; t < o.length - 2; t += 3) {
var a = o.charAt(t + 2);
a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a), a = "+" === o.charAt(t + 1) ? r >>> a : r << a, r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
}
return r
}
function sign(r) {
var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
if (null === o) {
var t = r.length;
t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
} else {
for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++) "" !== e[C] && f.push.apply(f, a(e[C].split(""))), C !== h - 1 && f.push(o[C]);
var g = f.length;
g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
}
var u = void 0,
l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
u = null !== i ? i : (i = window[l] || "") || "";
for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
var A = r.charCodeAt(v);
128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
}
for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b], p = n(p, F);
return p = n(p, D), p ^= s, 0 > p && (p = (2147483647 & p) + 2147483648), p %= 1e6, p.toString() + "." + (p ^ m)
}
复制以上整理的代码,直接调用sign('你好')
即可得到sign值。
语言标识代码
最后整理一下各类语言代码标识:
本文隐藏内容 登陆 后才可以浏览