使用Go语言编写邮件内容解析功能

保存为readmsg.go

 

package main

import (
    "bytes"
    "database/sql"
    "encoding/base64"
    "encoding/json"
    "fmt"
    "io"
    "io/ioutil"
    "log"
    "mime"
    "mime/multipart"
    "net/mail"
    "regexp"
    "strings"
    "time"

    _ "github.com/go-sql-driver/mysql"
    "golang.org/x/net/html/charset"
)

type Email struct {
    Id         int
    Created    string
    RemoteAddr string
    MailFrom   string
    RcptTo     string
    Data       []byte
    Header     string
    Subject    string
    BodyPlain  []byte
    BodyHtml   []byte
}

func main() {
    for {
        mysql_email_update()
        time.Sleep(time.Second * 10)
    }
}
func mysql_email_update() {
    log.Print("mysql connecting...")
    db, err := sql.Open("mysql", "firadio_mail:firadio_mail@tcp(39.101.248.243:3306)/firadio_mail")
    if err != nil {
        panic(err.Error())
    }
    log.Print("mysql connected!")
    defer db.Close()

    rows, err := db.Query("SELECT id,created,remote_addr,mail_from,rcpt_to,data FROM emails WHERE 0 OR ISNULL(subject)")
    if err != nil {
        panic(err.Error())
    }
    defer rows.Close()

    for rows.Next() {
        var email Email
        err := rows.Scan(&email.Id, &email.Created, &email.RemoteAddr, &email.MailFrom, &email.RcptTo, &email.Data)
        if err != nil {
            panic(err.Error())
        }
        //log.Println(email)
        err = saveMail(db, email)
        if err != nil {
            log.Panicln(err)
        }
    }
    if err := rows.Err(); err != nil {
        panic(err.Error())
    }
    log.Print("mysql updated!")
}

func saveMail(db *sql.DB, email Email) error {
    // 解析并存储邮件
    log.Println("updating...", email.Id)

    // 准备插入语句
    stmt, err := db.Prepare("UPDATE emails SET header=?, subject=?, body_plain=?, body_html=? WHERE id=?")
    if err != nil {
        return fmt.Errorf("db.Prepare: %w", err)
    }
    defer stmt.Close()

    data := email.Data
    msg, err := mail.ReadMessage(bytes.NewReader(data))
    if err != nil {
        return fmt.Errorf("ReadMessage: %w", err)
    }

    if true {
        jsonHeader, err := GetJsonByMsg(msg)
        if err != nil {
            return fmt.Errorf("GetJsonByMsg: %w", err)
        }
        //log.Println(jsonHeader)
        email.Header = jsonHeader
    }

    if true {
        subject, err := GetSubjectByMsg(msg)
        if err != nil {
            return fmt.Errorf("GetSubjectByMsg: %w", err)
        }
        //log.Println(subject)
        email.Subject = subject
    }
    // 一次性把MultipartBody部分全部读过来
    multipartBody, err := GetMultipartBody(msg)
    if err != nil {
        return fmt.Errorf("GetMultipartBody: %w", err)
    }

    if true {
        msg_body_plain, err := GetBodyByMsg(msg, multipartBody, "text/plain")
        if err != nil {
            return fmt.Errorf("GetBodyByMsg.text/plain: %w", err)
        }
        //log.Println("text/plain", string(msg_body_plain))
        email.BodyPlain = msg_body_plain
    }

    if true {
        msg_body_html, err := GetBodyByMsg(msg, multipartBody, "text/html")
        if err != nil {
            return fmt.Errorf("GetBodyByMsg.text/html: %w", err)
        }
        //log.Println("text/html", string(msg_body_html))
        email.BodyHtml = msg_body_html
    }

    // 执行插入语句
    _, err = stmt.Exec(email.Header, email.Subject, email.BodyPlain, email.BodyHtml, email.Id)
    if err != nil {
        return err
    }

    return nil
}

func DecodeRFC2047String(str string) (string, error) {

    // 通过匹配正则的方式来处理
    re := regexp.MustCompile("[\\s]?=\\?([\\w\\-]+)\\?([A-Z])\\?([\\w+/=]+={0,2})\\?=")

    // 定义一个替换函数
    replaceFunc := func(match string, submatches []string) string {
        // 调用你的方法来处理子匹配项并返回结果
        s, err := DecodeRFC2047String_one(match, submatches[1], submatches[2], submatches[3])
        if err != nil {
            return ""
        }
        return s
    }

    // 使用正则表达式和替换函数来替换字符串
    newStr := re.ReplaceAllStringFunc(str, func(match string) string {
        submatches := re.FindStringSubmatch(match)
        return replaceFunc(match, submatches)
    })

    return newStr, nil
}

func DecodeRFC2047String_one(_match string, _charset string, _type string, _encoded string) (string, error) {

    var sDecoded string
    var err error

    if _type == "Q" {
        dec := new(mime.WordDecoder)
        sDecoded, err = dec.DecodeHeader(_match)
        if err != nil {
            return "", err
        }
        return sDecoded, nil
    }

    if _type == "B" {
        // 将编码后的字符串解码为字节数组
        bDecoded, err := base64.StdEncoding.DecodeString(_encoded)
        if err != nil {
            return "", err
        }
        sDecoded = string(bDecoded)
    }

    // 将字节数组转换为UTF-8编码的字符串
    reader := strings.NewReader(sDecoded)
    utf8Reader, err := charset.NewReaderLabel(_charset, reader)
    if err != nil {
        return "", err
    }

    utf8Bytes, err := ioutil.ReadAll(utf8Reader)
    if err != nil {
        return "", err
    }
    return string(utf8Bytes), nil
}

func GetSubjectByMsg(msg *mail.Message) (string, error) {
    // 读取Header中的邮件主题
    subject, err := DecodeRFC2047String(msg.Header.Get("Subject"))
    if err != nil {
        return "", err
    }
    return subject, nil
}

func GetJsonByMsg(msg *mail.Message) (string, error) {
    // 读取Header并转换为JSON
    for k, v := range msg.Header {
        for kk, vv := range v {
            vvv, err := DecodeRFC2047String(vv)
            if err != nil {
                continue
            }
            msg.Header[k][kk] = vvv
        }
    }
    sJson, err := json.Marshal(msg.Header)
    if err != nil {
        return "", err
    }
    return string(sJson), nil
}

func GetBodyByMsg(msg *mail.Message, multipartBody []MsgPartBody, _ContentType string) ([]byte, error) {
    // 读取Data中的指定格式的内容,如果没有就返回空
    ContentType := msg.Header.Get("Content-Type")
    var err error
    var params map[string]string
    mediaType := ""
    if ContentType != "" {
        mediaType, params, err = mime.ParseMediaType(ContentType)
        if err != nil {
            return nil, err
        }
    }
    if mediaType == "" || mediaType == _ContentType {
        bodyBytes, err := ioutil.ReadAll(msg.Body)
        if err != nil {
            return nil, err
        }
        if encoding := msg.Header.Get("Content-Transfer-Encoding"); encoding == "base64" {
            dst := make([]byte, base64.StdEncoding.DecodedLen(len(bodyBytes)))
            _, err := base64.StdEncoding.Decode(dst, bodyBytes)
            if err != nil {
                return nil, err
            }
            return dst, nil
        }

        if params["charset"] != "" {
            //log.Println(params["charset"])
            // 将字节数组转换为UTF-8编码的字符串
            reader := strings.NewReader(string(bodyBytes))
            utf8Reader, err := charset.NewReaderLabel(params["charset"], reader)
            if err != nil {
                return nil, err
            }
            utf8Bytes, err := ioutil.ReadAll(utf8Reader)
            if err != nil {
                return nil, err
            }
            return utf8Bytes, nil
        }

        return bodyBytes, nil
    }
    body, err := GetBodyByMsgPartBodys(multipartBody, _ContentType)
    if err != nil {
        return nil, err
    }
    return body, nil
}

type MsgPartBody struct {
    ContentType string
    Body        []byte
}

func GetMultipartBody(msg *mail.Message) ([]MsgPartBody, error) {
    // 一次性取得需要的MultipartBody
    // 解析Multipart格式的邮件,并存入[]MsgPartBody数组
    bodys := []MsgPartBody{}
    mediaType, params, err := mime.ParseMediaType(msg.Header.Get("Content-Type"))
    if err != nil {
        return bodys, nil
    }
    if !strings.HasPrefix(mediaType, "multipart/") {
        return bodys, nil
    }
    mr := multipart.NewReader(msg.Body, params["boundary"])
    for {
        part, err := mr.NextPart()
        if err == io.EOF {
            break
        }
        if err != nil {
            return nil, err
        }
        partMediaType, _, err := mime.ParseMediaType(part.Header.Get("Content-Type"))
        if err != nil {
            continue
        }
        partBytes, err := ioutil.ReadAll(part)
        if err != nil {
            return nil, err
        }
        if encoding := part.Header.Get("Content-Transfer-Encoding"); encoding == "base64" {
            dst := make([]byte, base64.StdEncoding.DecodedLen(len(partBytes)))
            _, err := base64.StdEncoding.Decode(dst, partBytes)
            if err != nil {
                return nil, err
            }
            bodys = append(bodys, MsgPartBody{ContentType: partMediaType, Body: dst})
        }
        bodys = append(bodys, MsgPartBody{ContentType: partMediaType, Body: partBytes})

    }
    return bodys, nil
}

func GetBodyByMsgPartBodys(bodys []MsgPartBody, ContentType string) ([]byte, error) {
    // 从一次性取得并解析好的[]MsgPartBody获取邮件内容
    for _, v := range bodys {
        if v.ContentType == ContentType {
            return v.Body, nil
        }
    }
    return nil, nil
}

 

 

然后编译

posted on 2023-02-25 18:48  项希盛  阅读(376)  评论(0)    收藏  举报