GodoOS 知识库实现文件系统监控与同步机制
引言
在软件开发中,文件系统的实时监控和同步是一项关键任务。无论是为了实现增量备份、日志分析还是数据同步,都需要一种高效且可靠的方法来跟踪文件的更改。本文将深入探讨如何使用 Go 语言及其 fsnotify 库实现一个强大的文件系统监控和同步系统,并进一步优化其性能和可靠性。
先上完整代码:
package vector
import (
	"fmt"
	"godo/libs"
	"log"
	"os"
	"path/filepath"
	"strings"
	"sync"
	"time"
	"github.com/fsnotify/fsnotify"
)
var (
	MapFilePathMonitors = map[string]uint{}
	watcher             *fsnotify.Watcher
	fileQueue           = make(chan string, 100) // 队列大小可以根据需要调整
	numWorkers          = 3                      // 工作协程的数量
	wg                  sync.WaitGroup
	syncingKnowledgeIds = make(map[uint]syncingStats) // 记录正在同步的 knowledgeId 及其同步状态
	syncMutex           sync.Mutex                    // 保护 syncingKnowledgeIds 的互斥锁
	renameMap           = make(map[string]string)     // 临时映射存储 Remove 事件的路径
	renameMutex         sync.Mutex                    // 保护 renameMap 的互斥锁
	watcherMutex        sync.Mutex                    // 保护 watcher 的互斥锁
)
type syncingStats struct {
	totalFiles     int
	processedFiles int
}
func InitMonitor() {
	var err error
	watcherMutex.Lock()
	watcher, err = fsnotify.NewWatcher()
	if err != nil {
		log.Fatalf("Error creating watcher: %s", err.Error())
	}
	watcherMutex.Unlock()
	go FolderMonitor()
	go startWatching()
	// 启动 worker
	for i := 0; i < numWorkers; i++ {
		wg.Add(1)
		go worker()
	}
}
func startWatching() {
	for {
		select {
		case event, ok := <-watcher.Events:
			if !ok {
				log.Println("error event")
				return
			}
			filePath := filepath.Clean(event.Name)
			result, exists := shouldProcess(filePath)
			if result > 0 {
				if event.Has(fsnotify.Write) || event.Has(fsnotify.Create) {
					log.Printf("Event: %v, File: %s", event.Op, filePath)
					if isFileComplete(filePath) {
						// 将文件路径放入队列
						fileQueue <- filePath
					}
				}
				if event.Has(fsnotify.Create) {
					if info, err := os.Stat(filePath); err == nil && info.IsDir() {
						addRecursive(filePath, watcher)
					}
					// 检查是否是重命名事件
					handleRenameCreateEvent(event)
				}
				if event.Has(fsnotify.Remove) {
					//log.Printf("Event: %v, File: %s,exists:%d", event.Op, filePath, exists)
					isDir := true
					newFileName := fmt.Sprintf(".godoos.%d.%s.json", result, filepath.Base(filePath))
					newFilePath := filepath.Join(filepath.Dir(filePath), newFileName)
					if libs.PathExists(newFilePath) {
						isDir = false
					}
					if isDir {
						watcherMutex.Lock()
						if watcher != nil {
							watcher.Remove(filePath)
						}
						watcherMutex.Unlock()
					}
					if exists == 1 {
						err := DeleteVector(result)
						if err != nil {
							log.Printf("Error deleting vector %d: %v", result, err)
						}
					}
					if exists == 2 && !isDir {
						err := DeleteVectorFile(result, filePath)
						if err != nil {
							log.Printf("Error deleting vector file %d: %v", result, err)
						}
					}
					//存储 Remove 事件的路径
					handleRenameRemoveEvent(event)
				}
			}
		case err, ok := <-watcher.Errors:
			if !ok {
				return
			}
			log.Println("error:", err)
		}
	}
}
func handleRenameRemoveEvent(event fsnotify.Event) {
	renameMutex.Lock()
	defer renameMutex.Unlock()
	//log.Printf("handleRenameRemoveEvent: %v, File: %s", event.Op, event.Name)
	renameMap[event.Name] = event.Name
}
func handleRenameCreateEvent(event fsnotify.Event) {
	renameMutex.Lock()
	defer renameMutex.Unlock()
	//log.Printf("handleRenameCreateEvent: %v, File: %s", event.Op, event.Name)
	// 规范化路径
	newPath := filepath.Clean(event.Name)
	// 检查是否是重命名事件
	for oldPath := range renameMap {
		if oldPath != "" {
			// 找到对应的 Remove 事件
			oldPathClean := filepath.Clean(oldPath)
			if oldPathClean == newPath {
				//log.Printf("File renamed from %s to %s", oldPath, newPath)
				// 更新 MapFilePathMonitors
				for path, id := range MapFilePathMonitors {
					if path == oldPathClean {
						delete(MapFilePathMonitors, path)
						MapFilePathMonitors[newPath] = id
						log.Printf("Updated MapFilePathMonitors: %s -> %s", oldPathClean, newPath)
						break
					}
				}
				// 更新 watcher
				watcherMutex.Lock()
				if watcher != nil {
					if err := watcher.Remove(oldPathClean); err != nil {
						log.Printf("Error removing old path %s from watcher: %v", oldPathClean, err)
					}
					if err := watcher.Add(newPath); err != nil {
						log.Printf("Error adding new path %s to watcher: %v", newPath, err)
					}
				}
				watcherMutex.Unlock()
				// 如果是目录,递归更新子目录
				if info, err := os.Stat(newPath); err == nil && info.IsDir() {
					addRecursive(newPath, watcher)
				}
				// 清除临时映射中的路径
				delete(renameMap, oldPath)
				break
			}
		}
	}
}
func worker() {
	defer wg.Done()
	for filePath := range fileQueue {
		knowledgeId, exists := shouldProcess(filePath)
		if exists == 0 {
			log.Printf("File path %s is not being monitored", filePath)
			continue
		}
		// 更新已处理文件数
		syncMutex.Lock()
		if stats, ok := syncingKnowledgeIds[knowledgeId]; ok {
			stats.processedFiles++
			syncingKnowledgeIds[knowledgeId] = stats
		}
		syncMutex.Unlock()
		err := handleGodoosFile(filePath, knowledgeId)
		if err != nil {
			log.Printf("Error handling file %s: %v", filePath, err)
		}
	}
}
func FolderMonitor() {
	basePath, err := libs.GetOsDir()
	if err != nil {
		log.Printf("Error getting base path: %s", err.Error())
		return
	}
	// 递归添加所有子目录
	addRecursive(basePath, watcher)
	// Add a path.
	watcherMutex.Lock()
	if watcher != nil {
		err = watcher.Add(basePath)
		if err != nil {
			log.Fatal(err)
		}
	}
	watcherMutex.Unlock()
	// Block main goroutine forever.
	<-make(chan struct{})
}
func AddWatchFolder(folderPath string, knowledgeId uint, callback func()) error {
	if watcher == nil {
		InitMonitor()
	}
	// 规范化路径
	folderPath = filepath.Clean(folderPath)
	// 检查文件夹是否存在
	if !libs.PathExists(folderPath) {
		return fmt.Errorf("folder path does not exist: %s", folderPath)
	}
	// 检查文件夹是否已经存在于监视器中
	if _, exists := MapFilePathMonitors[folderPath]; exists {
		return fmt.Errorf("folder path is already being monitored: %s", folderPath)
	}
	// 递归添加所有子目录
	addRecursive(folderPath, watcher)
	// 计算总文件数
	totalFiles, err := countFiles(folderPath)
	if err != nil {
		return fmt.Errorf("failed to count files in folder path: %w", err)
	}
	// 更新 syncingKnowledgeIds
	syncMutex.Lock()
	syncingKnowledgeIds[knowledgeId] = syncingStats{
		totalFiles:     totalFiles,
		processedFiles: 0,
	}
	syncMutex.Unlock()
	// 更新 MapFilePathMonitors
	MapFilePathMonitors[folderPath] = knowledgeId
	// 添加文件夹路径到监视器
	err = watcher.Add(folderPath)
	if err != nil {
		return fmt.Errorf("failed to add folder path to watcher: %w", err)
	}
	// 调用回调函数
	if callback != nil {
		callback()
	}
	log.Printf("Added folder path %s to watcher with knowledgeId %d", folderPath, knowledgeId)
	return nil
}
// RemoveWatchFolder 根据路径删除观察文件夹
func RemoveWatchFolder(folderPath string) error {
	// 规范化路径
	folderPath = filepath.Clean(folderPath)
	// 检查文件夹是否存在于监视器中
	knowledgeId, exists := MapFilePathMonitors[folderPath]
	if !exists {
		return fmt.Errorf("folder path is not being monitored: %s", folderPath)
	}
	// 从 watcher 中移除路径
	watcherMutex.Lock()
	if watcher != nil {
		err := watcher.Remove(folderPath)
		if err != nil {
			return fmt.Errorf("failed to remove folder path from watcher: %w", err)
		}
	}
	watcherMutex.Unlock()
	// 递归移除所有子目录
	err := filepath.Walk(folderPath, func(path string, info os.FileInfo, err error) error {
		if err != nil {
			log.Printf("Error walking path %s: %v", path, err)
			return err
		}
		
                    
                