专注于经验分享
   工具共享。

抓包逆向分析百度翻译接口API+语言种类代码

1213.png

在网上看到有人分享抓取的谷歌翻译api,一次闲来无聊,就在网上查了查关于抓取百度翻译的api的文章,自己一番钻研,本次就来一步一步分析一下百度翻译请求的玄机。

抓包

通过浏览器调试,我们很轻松就找到了百度翻译的api请求url。

https://fanyi.baidu.com/v2transapi?from=zh&to=en

fy1.png

请求参数

在请求url的最下边,就看到了翻译的一些请求参数:

from: zh
to: en
query: 你好。
transtype: realtime
simple_means_flag: 3
sign: 660863.980558
token: aebb5eff8a16122ae30f938fa142b7f7

前三个不用多解释,大家一看就知道参数的意思,我会吧重点放在后两个参数分析:

参数名 参数解释
from 原文语种
to 译文语种
query 原文内容
transtype 翻译类型
simple_means_flag 未知
sign 签名
token 客户端请求标识

注意:transtype,这个参数一般不用更改,网上说有realtime 和 translang 两种

关键参分析

接下来我们就来分析一下sign和token两个参数吧。

通过分析翻译时候所调用的js文件,我终于找到了关键代码:

            langIsDeteced: function(t, a, n, r) {
                if (null !== t) {
                    var s = $(".select-from-language .language-selected").attr("data-lang"),
                        o = $(".select-to-language .language-selected").attr("data-lang"),
                        l = !1;
                    c.get("langChangedByUser") && t === o && (l = !0);
                    var g = null;
                    r && !c.get("fromLangIsAuto") && s !== t ? g = i.processOcrLang(t, s, o) : (e.show(t, s), g = i.getLang(t, s, o)), u.show();
                    var d = this,
                        a = this.processQuery(a),
                        p = {
                            from: g.fromLang,
                            to: g.toLang,
                            query: a,
                            transtype: n,
                            simple_means_flag: 3,
                            sign: y(a),
                            token: window.common.token
                        };
                    this.translateXHR && 4 !== this.translateXHR.readyState && this.translateXHR.abort(), this.translateXHR = $.ajax({
                        type: "POST",
                        url: "/v2transapi?from=" + encodeURIComponent(g.fromLang) + "&to=" + encodeURIComponent(g.toLang),
                        cache: !1,
                        data: p
                    }).done(function(t) {
                        c.set("isInRtTransState", !0), d.translateSuccess(t, g.fromLang, g.toLang, a, l)
                    })
                }
            }

以上就是我们需要分析的重要代码啦,里面有我的各个参数。

首先看到token只需通过window.common.token就能直接获取。

fy2.png

sign参数则又调用了y()方法,传的参数就是我要翻译的原文。这个函数会返回 666666.233384 类似这样的结果。

由于js代码被加密了。没办法直接跳转方法,这咋整?!没捷径,只能凭借经验找了,我最后确定了一段关键代码:


define("translation:widget/translate/input/pGrab", function(r, o, t) {
    "use strict";

    function a(r) {
        if (Array.isArray(r)) {
            for (var o = 0, t = Array(r.length); o < r.length; o++) t[o] = r[o];
            return t
        }
        return Array.from(r)
    }

    function n(r, o) {
        for (var t = 0; t < o.length - 2; t += 3) {
            var a = o.charAt(t + 2);
            a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a), a = "+" === o.charAt(t + 1) ? r >>> a : r << a, r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
        }
        return r
    }

    function e(r) {
        var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
        if (null === o) {
            var t = r.length;
            t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
        } else {
            for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++) "" !== e[C] && f.push.apply(f, a(e[C].split(""))), C !== h - 1 && f.push(o[C]);
            var g = f.length;
            g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
        }
        var u = void 0,
            l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
        u = null !== i ? i : (i = window[l] || "") || "";
        for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
            var A = r.charCodeAt(v);
            128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
        }
        for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b], p = n(p, F);
        return p = n(p, D), p ^= s, 0 > p && (p = (2147483647 & p) + 2147483648), p %= 1e6, p.toString() + "." + (p ^ m)
    }
    var i = null;
    t.exports = e
});;

对,没错,return p就是我们要得到的sign值,继续优化代码,我们只要我们想要的代码,去掉其他没用的。

一下是我最终整理的获取sign参数的js代码:


var i= null;
function n(r, o) {
    for (var t = 0; t < o.length - 2; t += 3) {
        var a = o.charAt(t + 2);
        a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a), a = "+" === o.charAt(t + 1) ? r >>> a : r << a, r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
    }
    return r
}
function sign(r) {
    var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
    if (null === o) {
        var t = r.length;
        t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
    } else {
        for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++) "" !== e[C] && f.push.apply(f, a(e[C].split(""))), C !== h - 1 && f.push(o[C]);
        var g = f.length;
        g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
    }
    var u = void 0,
        l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
    u = null !== i ? i : (i = window[l] || "") || "";
    for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
        var A = r.charCodeAt(v);
        128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
    }
    for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b], p = n(p, F);
    return p = n(p, D), p ^= s, 0 > p && (p = (2147483647 & p) + 2147483648), p %= 1e6, p.toString() + "." + (p ^ m)
}

复制以上整理的代码,直接调用sign('你好')即可得到sign值。

语言标识代码

最后整理一下各类语言代码标识:

本文隐藏内容 登陆 后才可以浏览

赞(2) 打赏
转载请注明文章地址:Hellohao » 抓包逆向分析百度翻译接口API+语言种类代码
分享到: 更多 (0)

评论 抢沙发

评论前必须登录!

 

觉得文章有用就打赏一下文章作者

支付宝扫一扫打赏

微信扫一扫打赏