使用Go语言编写邮件内容解析功能
保存为readmsg.go
package main import ( "bytes" "database/sql" "encoding/base64" "encoding/json" "fmt" "io" "io/ioutil" "log" "mime" "mime/multipart" "net/mail" "regexp" "strings" "time" _ "github.com/go-sql-driver/mysql" "golang.org/x/net/html/charset" ) type Email struct { Id int Created string RemoteAddr string MailFrom string RcptTo string Data []byte Header string Subject string BodyPlain []byte BodyHtml []byte } func main() { for { mysql_email_update() time.Sleep(time.Second * 10) } } func mysql_email_update() { log.Print("mysql connecting...") db, err := sql.Open("mysql", "firadio_mail:firadio_mail@tcp(39.101.248.243:3306)/firadio_mail") if err != nil { panic(err.Error()) } log.Print("mysql connected!") defer db.Close() rows, err := db.Query("SELECT id,created,remote_addr,mail_from,rcpt_to,data FROM emails WHERE 0 OR ISNULL(subject)") if err != nil { panic(err.Error()) } defer rows.Close() for rows.Next() { var email Email err := rows.Scan(&email.Id, &email.Created, &email.RemoteAddr, &email.MailFrom, &email.RcptTo, &email.Data) if err != nil { panic(err.Error()) } //log.Println(email) err = saveMail(db, email) if err != nil { log.Panicln(err) } } if err := rows.Err(); err != nil { panic(err.Error()) } log.Print("mysql updated!") } func saveMail(db *sql.DB, email Email) error { // 解析并存储邮件 log.Println("updating...", email.Id) // 准备插入语句 stmt, err := db.Prepare("UPDATE emails SET header=?, subject=?, body_plain=?, body_html=? WHERE id=?") if err != nil { return fmt.Errorf("db.Prepare: %w", err) } defer stmt.Close() data := email.Data msg, err := mail.ReadMessage(bytes.NewReader(data)) if err != nil { return fmt.Errorf("ReadMessage: %w", err) } if true { jsonHeader, err := GetJsonByMsg(msg) if err != nil { return fmt.Errorf("GetJsonByMsg: %w", err) } //log.Println(jsonHeader) email.Header = jsonHeader } if true { subject, err := GetSubjectByMsg(msg) if err != nil { return fmt.Errorf("GetSubjectByMsg: %w", err) } //log.Println(subject) email.Subject = subject } // 一次性把MultipartBody部分全部读过来 multipartBody, err := GetMultipartBody(msg) if err != nil { return fmt.Errorf("GetMultipartBody: %w", err) } if true { msg_body_plain, err := GetBodyByMsg(msg, multipartBody, "text/plain") if err != nil { return fmt.Errorf("GetBodyByMsg.text/plain: %w", err) } //log.Println("text/plain", string(msg_body_plain)) email.BodyPlain = msg_body_plain } if true { msg_body_html, err := GetBodyByMsg(msg, multipartBody, "text/html") if err != nil { return fmt.Errorf("GetBodyByMsg.text/html: %w", err) } //log.Println("text/html", string(msg_body_html)) email.BodyHtml = msg_body_html } // 执行插入语句 _, err = stmt.Exec(email.Header, email.Subject, email.BodyPlain, email.BodyHtml, email.Id) if err != nil { return err } return nil } func DecodeRFC2047String(str string) (string, error) { // 通过匹配正则的方式来处理 re := regexp.MustCompile("[\\s]?=\\?([\\w\\-]+)\\?([A-Z])\\?([\\w+/=]+={0,2})\\?=") // 定义一个替换函数 replaceFunc := func(match string, submatches []string) string { // 调用你的方法来处理子匹配项并返回结果 s, err := DecodeRFC2047String_one(match, submatches[1], submatches[2], submatches[3]) if err != nil { return "" } return s } // 使用正则表达式和替换函数来替换字符串 newStr := re.ReplaceAllStringFunc(str, func(match string) string { submatches := re.FindStringSubmatch(match) return replaceFunc(match, submatches) }) return newStr, nil } func DecodeRFC2047String_one(_match string, _charset string, _type string, _encoded string) (string, error) { var sDecoded string var err error if _type == "Q" { dec := new(mime.WordDecoder) sDecoded, err = dec.DecodeHeader(_match) if err != nil { return "", err } return sDecoded, nil } if _type == "B" { // 将编码后的字符串解码为字节数组 bDecoded, err := base64.StdEncoding.DecodeString(_encoded) if err != nil { return "", err } sDecoded = string(bDecoded) } // 将字节数组转换为UTF-8编码的字符串 reader := strings.NewReader(sDecoded) utf8Reader, err := charset.NewReaderLabel(_charset, reader) if err != nil { return "", err } utf8Bytes, err := ioutil.ReadAll(utf8Reader) if err != nil { return "", err } return string(utf8Bytes), nil } func GetSubjectByMsg(msg *mail.Message) (string, error) { // 读取Header中的邮件主题 subject, err := DecodeRFC2047String(msg.Header.Get("Subject")) if err != nil { return "", err } return subject, nil } func GetJsonByMsg(msg *mail.Message) (string, error) { // 读取Header并转换为JSON for k, v := range msg.Header { for kk, vv := range v { vvv, err := DecodeRFC2047String(vv) if err != nil { continue } msg.Header[k][kk] = vvv } } sJson, err := json.Marshal(msg.Header) if err != nil { return "", err } return string(sJson), nil } func GetBodyByMsg(msg *mail.Message, multipartBody []MsgPartBody, _ContentType string) ([]byte, error) { // 读取Data中的指定格式的内容,如果没有就返回空 ContentType := msg.Header.Get("Content-Type") var err error var params map[string]string mediaType := "" if ContentType != "" { mediaType, params, err = mime.ParseMediaType(ContentType) if err != nil { return nil, err } } if mediaType == "" || mediaType == _ContentType { bodyBytes, err := ioutil.ReadAll(msg.Body) if err != nil { return nil, err } if encoding := msg.Header.Get("Content-Transfer-Encoding"); encoding == "base64" { dst := make([]byte, base64.StdEncoding.DecodedLen(len(bodyBytes))) _, err := base64.StdEncoding.Decode(dst, bodyBytes) if err != nil { return nil, err } return dst, nil } if params["charset"] != "" { //log.Println(params["charset"]) // 将字节数组转换为UTF-8编码的字符串 reader := strings.NewReader(string(bodyBytes)) utf8Reader, err := charset.NewReaderLabel(params["charset"], reader) if err != nil { return nil, err } utf8Bytes, err := ioutil.ReadAll(utf8Reader) if err != nil { return nil, err } return utf8Bytes, nil } return bodyBytes, nil } body, err := GetBodyByMsgPartBodys(multipartBody, _ContentType) if err != nil { return nil, err } return body, nil } type MsgPartBody struct { ContentType string Body []byte } func GetMultipartBody(msg *mail.Message) ([]MsgPartBody, error) { // 一次性取得需要的MultipartBody // 解析Multipart格式的邮件,并存入[]MsgPartBody数组 bodys := []MsgPartBody{} mediaType, params, err := mime.ParseMediaType(msg.Header.Get("Content-Type")) if err != nil { return bodys, nil } if !strings.HasPrefix(mediaType, "multipart/") { return bodys, nil } mr := multipart.NewReader(msg.Body, params["boundary"]) for { part, err := mr.NextPart() if err == io.EOF { break } if err != nil { return nil, err } partMediaType, _, err := mime.ParseMediaType(part.Header.Get("Content-Type")) if err != nil { continue } partBytes, err := ioutil.ReadAll(part) if err != nil { return nil, err } if encoding := part.Header.Get("Content-Transfer-Encoding"); encoding == "base64" { dst := make([]byte, base64.StdEncoding.DecodedLen(len(partBytes))) _, err := base64.StdEncoding.Decode(dst, partBytes) if err != nil { return nil, err } bodys = append(bodys, MsgPartBody{ContentType: partMediaType, Body: dst}) } bodys = append(bodys, MsgPartBody{ContentType: partMediaType, Body: partBytes}) } return bodys, nil } func GetBodyByMsgPartBodys(bodys []MsgPartBody, ContentType string) ([]byte, error) { // 从一次性取得并解析好的[]MsgPartBody获取邮件内容 for _, v := range bodys { if v.ContentType == ContentType { return v.Body, nil } } return nil, nil }
然后编译
本文来自博客园,作者:项希盛,转载请注明原文链接:https://www.cnblogs.com/xiangxisheng/p/17155055.html
浙公网安备 33010602011771号