go语言构建全文检索无依赖

module ftsdemo

go 1.23.6

require (
	github.com/blevesearch/bleve/v2 v2.5.7
	github.com/gogf/gf v1.16.9
)

require (
	github.com/BurntSushi/toml v0.3.1 // indirect
	github.com/RoaringBitmap/roaring/v2 v2.4.5 // indirect
	github.com/bits-and-blooms/bitset v1.22.0 // indirect
	github.com/blevesearch/bleve_index_api v1.2.11 // indirect
	github.com/blevesearch/geo v0.2.4 // indirect
	github.com/blevesearch/go-faiss v1.0.26 // indirect
	github.com/blevesearch/go-porterstemmer v1.0.3 // indirect
	github.com/blevesearch/gtreap v0.1.1 // indirect
	github.com/blevesearch/mmap-go v1.0.4 // indirect
	github.com/blevesearch/scorch_segment_api/v2 v2.3.13 // indirect
	github.com/blevesearch/segment v0.9.1 // indirect
	github.com/blevesearch/snowballstem v0.9.0 // indirect
	github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect
	github.com/blevesearch/vellum v1.1.0 // indirect
	github.com/blevesearch/zapx/v11 v11.4.2 // indirect
	github.com/blevesearch/zapx/v12 v12.4.2 // indirect
	github.com/blevesearch/zapx/v13 v13.4.2 // indirect
	github.com/blevesearch/zapx/v14 v14.4.2 // indirect
	github.com/blevesearch/zapx/v15 v15.4.2 // indirect
	github.com/blevesearch/zapx/v16 v16.2.8 // indirect
	github.com/clbanning/mxj v1.8.5-0.20200714211355-ff02cfb8ea28 // indirect
	github.com/fatih/color v1.12.0 // indirect
	github.com/fsnotify/fsnotify v1.4.9 // indirect
	github.com/go-sql-driver/mysql v1.6.0 // indirect
	github.com/golang/snappy v0.0.4 // indirect
	github.com/gomodule/redigo v1.8.5 // indirect
	github.com/gorilla/websocket v1.4.2 // indirect
	github.com/grokify/html-strip-tags-go v0.0.1 // indirect
	github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede // indirect
	github.com/mattn/go-colorable v0.1.8 // indirect
	github.com/mattn/go-isatty v0.0.12 // indirect
	github.com/mattn/go-runewidth v0.0.9 // indirect
	github.com/mschoch/smat v0.2.0 // indirect
	github.com/olekukonko/tablewriter v0.0.5 // indirect
	go.etcd.io/bbolt v1.4.0 // indirect
	go.opentelemetry.io/otel v1.0.0 // indirect
	go.opentelemetry.io/otel/trace v1.0.0 // indirect
	golang.org/x/net v0.0.0-20210520170846-37e1c6afe023 // indirect
	golang.org/x/sys v0.29.0 // indirect
	golang.org/x/text v0.8.0 // indirect
	google.golang.org/protobuf v1.36.6 // indirect
	gopkg.in/yaml.v3 v3.0.1 // indirect
)

main.go

package main

import (
	"encoding/json"
	"fmt"
	"io/ioutil"
	"log"
	"os"
	"path/filepath"
	"strings"

	"github.com/blevesearch/bleve/v2"
	_ "github.com/blevesearch/bleve/v2/analysis/lang/cjk" // 注册 cjk analyzer
	"github.com/gogf/gf/frame/g"
)

type Document struct {
	ID      string `json:"id"`
	Path    string `json:"path"`
	Content string `json:"content"`
}

func main() {
	indexName := "my_index.bleve"
	docsDir := "docs"

	var index bleve.Index
	if _, err := os.Stat(indexName); os.IsNotExist(err) {
		// 创建自定义文档映射
		docMapping := bleve.NewDocumentMapping()

		// content 字段:索引 + 存储
		contentField := bleve.NewTextFieldMapping()
		contentField.Store = true     // 👈 关键!存储原始内容
		contentField.Analyzer = "cjk" // 使用 cjk 分词
		docMapping.AddFieldMappingsAt("content", contentField)

		// path 字段:可选,也存储以便显示
		pathField := bleve.NewTextFieldMapping()
		pathField.Store = true
		docMapping.AddFieldMappingsAt("path", pathField)

		// 设置默认映射
		indexMapping := bleve.NewIndexMapping()
		indexMapping.DefaultMapping = docMapping
		indexMapping.DefaultAnalyzer = "cjk"

		index, err = bleve.New(indexName, indexMapping)
		if err != nil {
			log.Fatal("Create index error:", err)
		}
	} else {
		index, err = bleve.Open(indexName)
		if err != nil {
			log.Fatal("Open index error:", err)
		}
		defer index.Close()
	}

	// 如果索引为空,则构建
	count, _ := index.DocCount()
	if count == 0 {
		fmt.Println("Building index...")
		err := buildIndex(index, docsDir)
		if err != nil {
			log.Fatal("Build index error:", err)
		}
		fmt.Println("Index built.")
	}

	// 搜索循环
	for {
		fmt.Print("\nSearch (or 'quit'): ")
		var q string
		fmt.Scanln(&q)
		if strings.ToLower(q) == "quit" {
			break
		}
		if q == "" {
			continue
		}

		query := bleve.NewQueryStringQuery(q)
		searchReq := bleve.NewSearchRequest(query)
		searchReq.Size = 10
		searchReq.Highlight = bleve.NewHighlight()
		searchReq.Fields = []string{"path", "content"}

		fmt.Println("Searching...", searchReq)
		g.Dump(searchReq)
		result, err := index.Search(searchReq)
		if err != nil {
			fmt.Printf("Search error: %v\n", err)
			continue
		}
		bytJson, _ := json.Marshal(result)
		fmt.Println(string(bytJson))

		fmt.Printf("Found %d results, cost: %v\n", len(result.Hits), result.Cost)
		for _, hit := range result.Hits {
			snappets := hit.Fragments["content"][0]
			if len(snappets) > 150 {
				snappets = snappets[:150] + "..."
			}
			fmt.Printf("ID: %v, Score: %v, Content: %v\n", hit.ID, hit.Score, snappets)
		}
	}
}

func buildIndex(index bleve.Index, dir string) error {
	return filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
		if err != nil {
			return err
		}
		if !info.IsDir() && strings.HasSuffix(strings.ToLower(info.Name()), ".md") {
			content, err := ioutil.ReadFile(path)
			if err != nil {
				return err
			}
			doc := Document{
				ID:      path,
				Path:    path,
				Content: string(content),
			}
			return index.Index(path, doc)
		}
		return nil
	})
}
posted @ 2026-01-09 18:38  jiftle  阅读(1)  评论(0)    收藏  举报