1 '''
2 i: 你好
3 from: AUTO
4 to: AUTO
5 smartresult: dict
6 client: fanyideskweb
7 salt: 16643765479061 //毫秒级别的时间戳后面加上个0-9之间的随机数,js代码:r + parseInt(10 * Math.random(), 10);这里的r表示时间戳字符串
8 sign: 1d69ce8f7c6258243e573e31e29e0012 //签名,下面找到了
9 lts: 1664376547906 //毫秒级别的时间戳
10 bv: 42c8b36dd7d61c619e7b1dc11e44d870 //同设备相同,使用md5加密的(方法是:md5(User-Agent)==>md5("5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53")
11 doctype: json
12 version: 2.1
13 keyfrom: fanyi.web
14 action: FY_BY_REALTlME
15
16 /*
17 解密出来了:sign的计算如下
18 define("newweb/common/service", ["./utils", "./md5", "./jquery-1.7"], function(e, t) {
19 var n = e("./jquery-1.7");
20 e("./utils");
21 e("./md5");
22 var r = function(e) {
23 var t = n.md5(navigator.appVersion)
24 , r = "" + (new Date).getTime()
25 , i = r + parseInt(10 * Math.random(), 10);
26 return {
27 ts: r,
28 bv: t,
29 salt: i,
30 sign: n.md5("fanyideskweb" + e + i + "Ygy_4c=r#e#4EX^NUGUc5") //在这里,e是要翻译的内容,i是毫秒级别的时间戳后面加上个0-9之间的随机数;后面这串字符串估计是服务器那边随机生成的,应该会变化。每次抓取的时候,可以查看下js代码
31 }
32 };
33
34 总体来说,data数据由函数“generateSaltSign”计算出来
35 */
36 '''
37
38 '''
39 获取翻译结果
40 The_translated_string:被翻译的字符串
41 由于翻译是以行为单位,所以一行一个结果,函数将解析The_translated_string参数,并以字符串形式返回所有翻译结果
42 '''
43 def youdao_translate(The_translated_string:str):
44 if(The_translated_string == ""):
45 return {"":""}
46 url = r'https://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'
47
48 User_Agent = "5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36 Edg/106.0.1370.37"
49 header = {
50 "Accept": "application/json, text/javascript, */*; q=0.01",
51 "Accept-Encoding": "gzip, deflate, br",
52 "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
53 "Connection": "keep-alive",
54 #"Content-Length": "307",
55 "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
56 "Cookie":"OUTFOX_SEARCH_USER_ID=1135160796@10.108.162.134; OUTFOX_SEARCH_USER_ID_NCOO=775555146.507473; JSESSIONID=aaaQ2GYK5N-ozb24rKNcy; SESSION_FROM_COOKIE=unknown; DICT_UGC=be3af0da19b5c5e6aa4e17bd8d90b28a|; JSESSIONID=abcPzon0RcZqc7GltuAgy; ___rl__test__cookies=1665366515354",
57 "Host": "fanyi.youdao.com",
58 "Origin": "https://fanyi.youdao.com",
59 "Referer": "https://fanyi.youdao.com/",
60 "sec-ch-ua": """\"Google Chrome";v="105", "Not)A;Brand";v="8", "Chromium";v="105"\"""",
61 "sec-ch-ua-mobile": "?0",
62 "sec-ch-ua-platform": "Windows",
63 "Sec-Fetch-Dest": "empty",
64 "Sec-Fetch-Mode": "cors",
65 "Sec-Fetch-Site": "same-origin",
66 "User-Agent": User_Agent,
67 "X-Requested-With": "XMLHttpRequest"
68 }
69
70 #The_translated_string=input("输入你要翻译的中文信息:\n") # 被翻译的字符串
71 timestamp = str(round(time.time()*1000)) # 毫秒级别的时间戳
72 salt = timestamp + str(random.randint(0, 9)) # 毫秒级别的时间戳后面加上个0-9之间的随机数
73 sign_str = "fanyideskweb" + The_translated_string + salt + "Ygy_4c=r#e#4EX^NUGUc5" # 构造签名字符串
74 # 签名,算法:sign: n.md5("fanyideskweb" + e + i + "Ygy_4c=r#e#4EX^NUGUc5") //在这里,后面这串字符串估计是服务器那边随机生成的,应该会变化。每次抓取的时候,可以查看下js代码
75 sign = hashlib.md5(str.encode(sign_str)).hexdigest()
76 # 同设备相同,使用md5加密的(方法是:md5(User-Agent)==>md5("5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53")
77 bv = hashlib.md5(str.encode(User_Agent)).hexdigest()
78 cookies = {
79 "OUTFOX_SEARCH_USER_ID": "1135160796@10.108.162.134",
80 "OUTFOX_SEARCH_USER_ID_NCOO": "775555146.507473",
81 "JSESSIONID": "aaaQ2GYK5N-ozb24rKNcy",
82 "SESSION_FROM_COOKIE": "unknown",
83 "DICT_UGC": "be3af0da19b5c5e6aa4e17bd8d90b28a|",
84 "JSESSIONID": "abcPzon0RcZqc7GltuAgy",
85 "___rl__test__cookies": "1665366515354"
86 }
87 data = {
88 "i": The_translated_string,
89 "from": "AUTO",
90 "to": "AUTO",
91 # 一般来说,是从中文翻译为英文
92 # "from": "zh-CHS",
93 # "to": "en",
94 "smartresult": "dict",
95 "client": "fanyideskweb",
96 "salt": salt,
97 "sign": sign,
98 "lts": timestamp,
99 "bv": bv,
100 "doctype": "json",
101 "version": "2.1",
102 "keyfrom": "fanyi.web",
103 "action": "FY_BY_CLICKBUTTION"
104 }
105 data = urlencode(data);
106 result = requests.post(url, data=data, cookies=cookies,headers=header)
107 json_result = json.loads(result.text);
108 #lines = The_translated_string.splitlines(); #按照行边界拆分
109 if(not json_result["errorCode"]):
110 ret_list="";
111 for i in json_result["translateResult"]: #如果源字符串就是存在段落的,则这里就会根据其来分结果
112 for j in i: #翻译服务器认为该分段的,这里就会再次存在子项
113 ret_list+=j['tgt']
114 ret_list+="\n"
115 #ret_list[json_result["translateResult"][i][0]["src"]]=json_result["translateResult"][i][0]["tgt"]
116 return ret_list;
117 else:
118 return "errorCode = "+str(json_result["errorCode"]);