GodoOS 知识库实现文件系统监控与同步机制
引言
在软件开发中,文件系统的实时监控和同步是一项关键任务。无论是为了实现增量备份、日志分析还是数据同步,都需要一种高效且可靠的方法来跟踪文件的更改。本文将深入探讨如何使用 Go 语言及其 fsnotify 库实现一个强大的文件系统监控和同步系统,并进一步优化其性能和可靠性。
先上完整代码:
package vector
import (
"fmt"
"godo/libs"
"log"
"os"
"path/filepath"
"strings"
"sync"
"time"
"github.com/fsnotify/fsnotify"
)
var (
MapFilePathMonitors = map[string]uint{}
watcher *fsnotify.Watcher
fileQueue = make(chan string, 100) // 队列大小可以根据需要调整
numWorkers = 3 // 工作协程的数量
wg sync.WaitGroup
syncingKnowledgeIds = make(map[uint]syncingStats) // 记录正在同步的 knowledgeId 及其同步状态
syncMutex sync.Mutex // 保护 syncingKnowledgeIds 的互斥锁
renameMap = make(map[string]string) // 临时映射存储 Remove 事件的路径
renameMutex sync.Mutex // 保护 renameMap 的互斥锁
watcherMutex sync.Mutex // 保护 watcher 的互斥锁
)
type syncingStats struct {
totalFiles int
processedFiles int
}
func InitMonitor() {
var err error
watcherMutex.Lock()
watcher, err = fsnotify.NewWatcher()
if err != nil {
log.Fatalf("Error creating watcher: %s", err.Error())
}
watcherMutex.Unlock()
go FolderMonitor()
go startWatching()
// 启动 worker
for i := 0; i < numWorkers; i++ {
wg.Add(1)
go worker()
}
}
func startWatching() {
for {
select {
case event, ok := <-watcher.Events:
if !ok {
log.Println("error event")
return
}
filePath := filepath.Clean(event.Name)
result, exists := shouldProcess(filePath)
if result > 0 {
if event.Has(fsnotify.Write) || event.Has(fsnotify.Create) {
log.Printf("Event: %v, File: %s", event.Op, filePath)
if isFileComplete(filePath) {
// 将文件路径放入队列
fileQueue <- filePath
}
}
if event.Has(fsnotify.Create) {
if info, err := os.Stat(filePath); err == nil && info.IsDir() {
addRecursive(filePath, watcher)
}
// 检查是否是重命名事件
handleRenameCreateEvent(event)
}
if event.Has(fsnotify.Remove) {
//log.Printf("Event: %v, File: %s,exists:%d", event.Op, filePath, exists)
isDir := true
newFileName := fmt.Sprintf(".godoos.%d.%s.json", result, filepath.Base(filePath))
newFilePath := filepath.Join(filepath.Dir(filePath), newFileName)
if libs.PathExists(newFilePath) {
isDir = false
}
if isDir {
watcherMutex.Lock()
if watcher != nil {
watcher.Remove(filePath)
}
watcherMutex.Unlock()
}
if exists == 1 {
err := DeleteVector(result)
if err != nil {
log.Printf("Error deleting vector %d: %v", result, err)
}
}
if exists == 2 && !isDir {
err := DeleteVectorFile(result, filePath)
if err != nil {
log.Printf("Error deleting vector file %d: %v", result, err)
}
}
//存储 Remove 事件的路径
handleRenameRemoveEvent(event)
}
}
case err, ok := <-watcher.Errors:
if !ok {
return
}
log.Println("error:", err)
}
}
}
func handleRenameRemoveEvent(event fsnotify.Event) {
renameMutex.Lock()
defer renameMutex.Unlock()
//log.Printf("handleRenameRemoveEvent: %v, File: %s", event.Op, event.Name)
renameMap[event.Name] = event.Name
}
func handleRenameCreateEvent(event fsnotify.Event) {
renameMutex.Lock()
defer renameMutex.Unlock()
//log.Printf("handleRenameCreateEvent: %v, File: %s", event.Op, event.Name)
// 规范化路径
newPath := filepath.Clean(event.Name)
// 检查是否是重命名事件
for oldPath := range renameMap {
if oldPath != "" {
// 找到对应的 Remove 事件
oldPathClean := filepath.Clean(oldPath)
if oldPathClean == newPath {
//log.Printf("File renamed from %s to %s", oldPath, newPath)
// 更新 MapFilePathMonitors
for path, id := range MapFilePathMonitors {
if path == oldPathClean {
delete(MapFilePathMonitors, path)
MapFilePathMonitors[newPath] = id
log.Printf("Updated MapFilePathMonitors: %s -> %s", oldPathClean, newPath)
break
}
}
// 更新 watcher
watcherMutex.Lock()
if watcher != nil {
if err := watcher.Remove(oldPathClean); err != nil {
log.Printf("Error removing old path %s from watcher: %v", oldPathClean, err)
}
if err := watcher.Add(newPath); err != nil {
log.Printf("Error adding new path %s to watcher: %v", newPath, err)
}
}
watcherMutex.Unlock()
// 如果是目录,递归更新子目录
if info, err := os.Stat(newPath); err == nil && info.IsDir() {
addRecursive(newPath, watcher)
}
// 清除临时映射中的路径
delete(renameMap, oldPath)
break
}
}
}
}
func worker() {
defer wg.Done()
for filePath := range fileQueue {
knowledgeId, exists := shouldProcess(filePath)
if exists == 0 {
log.Printf("File path %s is not being monitored", filePath)
continue
}
// 更新已处理文件数
syncMutex.Lock()
if stats, ok := syncingKnowledgeIds[knowledgeId]; ok {
stats.processedFiles++
syncingKnowledgeIds[knowledgeId] = stats
}
syncMutex.Unlock()
err := handleGodoosFile(filePath, knowledgeId)
if err != nil {
log.Printf("Error handling file %s: %v", filePath, err)
}
}
}
func FolderMonitor() {
basePath, err := libs.GetOsDir()
if err != nil {
log.Printf("Error getting base path: %s", err.Error())
return
}
// 递归添加所有子目录
addRecursive(basePath, watcher)
// Add a path.
watcherMutex.Lock()
if watcher != nil {
err = watcher.Add(basePath)
if err != nil {
log.Fatal(err)
}
}
watcherMutex.Unlock()
// Block main goroutine forever.
<-make(chan struct{})
}
func AddWatchFolder(folderPath string, knowledgeId uint, callback func()) error {
if watcher == nil {
InitMonitor()
}
// 规范化路径
folderPath = filepath.Clean(folderPath)
// 检查文件夹是否存在
if !libs.PathExists(folderPath) {
return fmt.Errorf("folder path does not exist: %s", folderPath)
}
// 检查文件夹是否已经存在于监视器中
if _, exists := MapFilePathMonitors[folderPath]; exists {
return fmt.Errorf("folder path is already being monitored: %s", folderPath)
}
// 递归添加所有子目录
addRecursive(folderPath, watcher)
// 计算总文件数
totalFiles, err := countFiles(folderPath)
if err != nil {
return fmt.Errorf("failed to count files in folder path: %w", err)
}
// 更新 syncingKnowledgeIds
syncMutex.Lock()
syncingKnowledgeIds[knowledgeId] = syncingStats{
totalFiles: totalFiles,
processedFiles: 0,
}
syncMutex.Unlock()
// 更新 MapFilePathMonitors
MapFilePathMonitors[folderPath] = knowledgeId
// 添加文件夹路径到监视器
err = watcher.Add(folderPath)
if err != nil {
return fmt.Errorf("failed to add folder path to watcher: %w", err)
}
// 调用回调函数
if callback != nil {
callback()
}
log.Printf("Added folder path %s to watcher with knowledgeId %d", folderPath, knowledgeId)
return nil
}
// RemoveWatchFolder 根据路径删除观察文件夹
func RemoveWatchFolder(folderPath string) error {
// 规范化路径
folderPath = filepath.Clean(folderPath)
// 检查文件夹是否存在于监视器中
knowledgeId, exists := MapFilePathMonitors[folderPath]
if !exists {
return fmt.Errorf("folder path is not being monitored: %s", folderPath)
}
// 从 watcher 中移除路径
watcherMutex.Lock()
if watcher != nil {
err := watcher.Remove(folderPath)
if err != nil {
return fmt.Errorf("failed to remove folder path from watcher: %w", err)
}
}
watcherMutex.Unlock()
// 递归移除所有子目录
err := filepath.Walk(folderPath, func(path string, info os.FileInfo, err error) error {
if err != nil {
log.Printf("Error walking path %s: %v", path, err)
return err
}
