Python函数:使用谷歌翻译翻译英语字符串

代码是同事写的,我把它单独抠出来,可以作为工具函数使用。当然,性能还是个问题,有待解决。

import random
import cookielib
import urllib
import urllib2
import HTMLParser
import re


cookie_support = urllib2.HTTPCookieProcessor(cookielib.CookieJar())
opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler)
urllib2.install_opener(opener)
user_agents = [
    'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
    'Opera/9.25 (Windows NT 5.1; U; en)',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
    'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
    'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
    'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
    "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7",
    "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0 ",
]
agent = random.choice(user_agents)
opener.addheaders = [("User-agent", agent), ("Accept", "*/*"), ('Referer', 'http://www.google.com')]

def unescape(text):
    parser = HTMLParser.HTMLParser()
    return (parser.unescape(text))

def TranslateByGoogle(text="", fromLang="en", toLang="zh-CN"):
    base_link = "https://translate.google.cn/m?hl=%s&sl=%s&q=%s"
    text = urllib.quote_plus(text.encode('utf8'))
    link = base_link % (toLang, fromLang, text)
    try:
        raw_data = urllib2.urlopen(link).read()
        data = raw_data.decode("utf-8")
        expr = r'class="t0">(.*?)<'
        re_result = re.findall(expr, data)
        if (len(re_result) == 0):
            result = ""
        else:
            result = unescape(re_result[0])
        return (result)
    except Exception, e:
        print e

 

posted @ 2018-01-03 11:07  Endoresu  阅读(617)  评论(0编辑  收藏  举报