JuiceFS 缓存预热 warmup

本文内容

怎么使用缓存预热我们不讲,主要讲JuiceFS的实现。

预热命令

我们从头到尾去看这个warmup warmup是主动的,可以直接cmd

func cmdWarmup() *cli.Command {
	return &cli.Command{
		Name:      "warmup",
		Action:    warmup,
		Category:  "TOOL",
		Usage:     "Build cache for target directories/files",
		ArgsUsage: "[PATH ...]",
		Description: `
This command provides a faster way to actively build cache for the target files. It reads all objects
of the files and then write them into local cache directory.

Examples:
# Warm all files in datadir
$ juicefs warmup /mnt/jfs/datadir

# Warm only three files in datadir
$ cat /tmp/filelist
/mnt/jfs/datadir/f1
/mnt/jfs/datadir/f2
/mnt/jfs/datadir/f3
$ juicefs warmup -f /tmp/filelist`,
		Flags: []cli.Flag{
			&cli.StringFlag{
				Name:    "file",
				Aliases: []string{"f"},
				Usage:   "file containing a list of paths",
			},
			&cli.UintFlag{
				Name:    "threads",
				Aliases: []string{"p"},
				Value:   50,
				Usage:   "number of concurrent workers",
			},
			&cli.BoolFlag{
				Name:    "background",
				Aliases: []string{"b"},
				Usage:   "run in background",
			},
			&cli.BoolFlag{
				Name:  "evict",
				Usage: "evict cached blocks",
			},
			&cli.BoolFlag{
				Name:  "check",
				Usage: "check whether the data blocks are cached or not",
			},
		},
	}
}

evict和check两个都是布尔选项,这两个不能共存,也跟我们预热关系不大,我们下面看代码默认这两个值是false。我们看一下warmup这个函数,这个函数有点大,分成几部分来看。

	setup(ctx, 0)

	evict := ctx.Bool("evict")
	check := ctx.Bool("check")
	if evict && check {
		logger.Fatalf("--check and --evict can't be used together")
	}

	var paths []string
	for _, p := range ctx.Args().Slice() {
		if abs, err := filepath.Abs(p); err == nil {
			paths = append(paths, abs)
		} else {
			logger.Fatalf("Failed to get absolute path of %s: %s", p, err)
		}
	}

evict和check不能同时为true,这个和我们没有关系。
下面会对所有参数进行检查,获取除了设置之外的命令行参数,将每个参数转换为绝对路径并存储在 paths 切片中。如果无法获取绝对路径,则记录致命错误并退出。

	if fname := ctx.String("file"); fname != "" {
		fd, err := os.Open(fname)
		if err != nil {
			logger.Fatalf("Failed to open file %s: %s", fname, err)
		}
		defer fd.Close()
		scanner := bufio.NewScanner(fd)
		for scanner.Scan() {
			if p := strings.TrimSpace(scanner.Text()); p != "" {
				if abs, e := filepath.Abs(p); e == nil {
					paths = append(paths, abs)
				} else {
					logger.Warnf("Skipped path %s because it fails to get absolute path: %s", p, e)
				}
			}
		}
		if err = scanner.Err(); err != nil {
			logger.Fatalf("Reading file %s failed with error: %s", fname, err)
		}
	}
	if len(paths) == 0 {
		logger.Infof("no path")
		return nil
	}

这一步是读取文件,这个文件包含了要预热的文件路径。这些路径连同启动的参数一起,添加到paths里,paths若为0则直接退出。

func openController(dpath string) (*os.File, error) {
	st, err := os.Stat(dpath)
	if err != nil {
		return nil, err
	}
	if !st.IsDir() {
		dpath = filepath.Dir(dpath)
	}
	fp, err := os.OpenFile(filepath.Join(dpath, ".jfs.control"), os.O_RDWR, 0)
	if os.IsNotExist(err) {
		fp, err = os.OpenFile(filepath.Join(dpath, ".control"), os.O_RDWR, 0)
	}
	return fp, err
}

这是一个读取文件的函数,不需要讲解。

	first := paths[0]
	controller, err := openController(first)
	if err != nil {
		return fmt.Errorf("open control file for %s: %s", first, err)
	}
	defer controller.Close()

	mp := first
	for ; mp != "/"; mp = filepath.Dir(mp) {
		inode, err := utils.GetFileInode(mp)
		if err != nil {
			logger.Fatalf("lookup inode for %s: %s", mp, err)
		}
		if inode == uint64(meta.RootInode) {
			break
		}
	}

预读只针对已经挂载的文件系统。这一步是为了获取挂载点。

	threads := ctx.Uint("threads")
	if threads == 0 {
		logger.Warnf("threads should be larger than 0, reset it to 1")
		threads = 1
	}

	action := vfs.WarmupCache
	if evict {
		action = vfs.EvictCache
	} else if check {
		action = vfs.CheckCache
	}

另一个参数thread检测。action设置为WramupCache。

	background := ctx.Bool("background")
	start := len(mp)
	batch := make([]string, 0, batchMax)
	progress := utils.NewProgress(background)
	dspin := progress.AddDoubleSpinner(action.String())
	total := &vfs.CacheResponse{}
	for _, path := range paths {
		if mp == "/" {
			inode, err := utils.GetFileInode(path)
			if err != nil {
				logger.Errorf("lookup inode for %s: %s", mp, err)
				continue
			}
			batch = append(batch, fmt.Sprintf("inode:%d", inode))
		} else if strings.HasPrefix(path, mp) {
			batch = append(batch, path[start:])
		} else {
			logger.Errorf("Path %s is not under mount point %s", path, mp)
			continue
		}
		if len(batch) >= batchMax {
			resp := sendCommand(controller, action, batch, threads, background, dspin)
			total.Add(resp)
			batch = batch[0:]
		}
	}
	if len(batch) > 0 {
		resp := sendCommand(controller, action, batch, threads, background, dspin)
		total.Add(resp)
	}

backgroud这个参数决定预热是否要在后台静默进行,dspin也是相关功能,没必要看。sendCommand就是发送预缓存请求。后面的代码不用看了。
我们直接看vfs的cache函数。func (v *VFS) cache(ctx meta.Context, action CacheAction, paths []string, concurrent int, resp *CacheResponse) 它接受一个ctx,action,这里就是WarmupCache,path就是要预热的目录。resp是要填的回报,不用管,我们只看逻辑。

func (v *VFS) cache(ctx meta.Context, action CacheAction, paths []string, concurrent int, resp *CacheResponse) {
	logger.Infof("start to %s %d paths with %d workers", action, len(paths), concurrent)

	start := time.Now()
	todo := make(chan _file, 10*concurrent)
	wg := sync.WaitGroup{}
	for i := 0; i < concurrent; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for f := range todo {
				if ctx.Canceled() {
					return
				}

				if f.ino == 0 {
					logger.Warnf("%s got inode 0", action)
					continue
				}

				iter := newSliceIterator(ctx, v.Meta, f.ino, f.size)
				var handler sliceHandler
				switch action {
				case WarmupCache:
					handler = func(s meta.Slice) error {
						return v.Store.FillCache(s.Id, s.Size)
					}

					if v.Conf.Meta.OpenCache > 0 {
						if err := v.Meta.Open(ctx, f.ino, syscall.O_RDONLY, &meta.Attr{}); err != 0 {
							logger.Errorf("Inode %d could be opened: %s", f.ino, err)
						}
						_ = v.Meta.Close(ctx, f.ino)
					}
				case EvictCache:
					//省略
				case CheckCache:
					//省略
				}

				// log and skip error
				err := iter.Iterate(handler)
				if err != nil {
					logger.Errorf("%s error : %s", action, err)
				}

				if resp != nil {
					atomic.AddUint64(&resp.FileCount, 1)
					atomic.AddUint64(&resp.SliceCount, iter.stat.count)
					atomic.AddUint64(&resp.TotalBytes, iter.stat.bytes)
				}
			}
		}()
	}

	var inode Ino
	var attr = &Attr{}
	for _, p := range paths {
		if st := v.resolve(ctx, p, &inode, attr); st != 0 {
			logger.Warnf("Failed to resolve path %s: %s", p, st)
			continue
		}
		logger.Debugf("Warming up path %s", p)
		if attr.Typ == meta.TypeDirectory {
			v.walkDir(ctx, inode, todo)
		} else if attr.Typ == meta.TypeFile {
			_ = sendFile(ctx, todo, _file{inode, attr.Length})
		}
		if ctx.Canceled() {
			break
		}
	}
	close(todo)
	wg.Wait()

	if ctx.Canceled() {
		logger.Infof("%s cancelled", action)
	}
	logger.Infof("%s %d paths in %s", action, len(paths), time.Since(start))
}

这是一个并发处理的函数 如果看过之前readAhead的文章,应该对slice有一些概念,slice就是file的更小单位。这个函数对文件的slice进行遍历。再往下看,这其实就很简单了,就是把这些东西加载到缓存里。

func (store *cachedStore) FillCache(id uint64, length uint32) error {
	r := sliceForRead(id, int(length), store)
	keys := r.keys()
	var err error
	for _, k := range keys {
		f, e := store.bcache.load(k)
		if e == nil { // already cached
			_ = f.Close()
			continue
		}
		size := parseObjOrigSize(k)
		if size == 0 || size > store.conf.BlockSize {
			logger.Warnf("Invalid size: %s %d", k, size)
			continue
		}
		p := NewOffPage(size)
		defer p.Release()
		if e := store.load(k, p, true, true); e != nil {
			logger.Warnf("Failed to load key: %s %s", k, e)
			err = e
		}
	}
	return err
}

所以JuiceFS的缓存预热很简单,也没有什么智能的因素。它首先要求文件系统已经挂载,接着由用户指定要预热的目录,接着将对应的内容加载到缓存里。

posted @ 2024-03-29 11:06  SimonJM  阅读(116)  评论(0)    收藏  举报