JuiceFS 缓存预热 warmup
本文内容
怎么使用缓存预热我们不讲,主要讲JuiceFS的实现。
预热命令
我们从头到尾去看这个warmup warmup是主动的,可以直接cmd
func cmdWarmup() *cli.Command {
return &cli.Command{
Name: "warmup",
Action: warmup,
Category: "TOOL",
Usage: "Build cache for target directories/files",
ArgsUsage: "[PATH ...]",
Description: `
This command provides a faster way to actively build cache for the target files. It reads all objects
of the files and then write them into local cache directory.
Examples:
# Warm all files in datadir
$ juicefs warmup /mnt/jfs/datadir
# Warm only three files in datadir
$ cat /tmp/filelist
/mnt/jfs/datadir/f1
/mnt/jfs/datadir/f2
/mnt/jfs/datadir/f3
$ juicefs warmup -f /tmp/filelist`,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "file",
Aliases: []string{"f"},
Usage: "file containing a list of paths",
},
&cli.UintFlag{
Name: "threads",
Aliases: []string{"p"},
Value: 50,
Usage: "number of concurrent workers",
},
&cli.BoolFlag{
Name: "background",
Aliases: []string{"b"},
Usage: "run in background",
},
&cli.BoolFlag{
Name: "evict",
Usage: "evict cached blocks",
},
&cli.BoolFlag{
Name: "check",
Usage: "check whether the data blocks are cached or not",
},
},
}
}
evict和check两个都是布尔选项,这两个不能共存,也跟我们预热关系不大,我们下面看代码默认这两个值是false。我们看一下warmup这个函数,这个函数有点大,分成几部分来看。
setup(ctx, 0)
evict := ctx.Bool("evict")
check := ctx.Bool("check")
if evict && check {
logger.Fatalf("--check and --evict can't be used together")
}
var paths []string
for _, p := range ctx.Args().Slice() {
if abs, err := filepath.Abs(p); err == nil {
paths = append(paths, abs)
} else {
logger.Fatalf("Failed to get absolute path of %s: %s", p, err)
}
}
evict和check不能同时为true,这个和我们没有关系。
下面会对所有参数进行检查,获取除了设置之外的命令行参数,将每个参数转换为绝对路径并存储在 paths 切片中。如果无法获取绝对路径,则记录致命错误并退出。
if fname := ctx.String("file"); fname != "" {
fd, err := os.Open(fname)
if err != nil {
logger.Fatalf("Failed to open file %s: %s", fname, err)
}
defer fd.Close()
scanner := bufio.NewScanner(fd)
for scanner.Scan() {
if p := strings.TrimSpace(scanner.Text()); p != "" {
if abs, e := filepath.Abs(p); e == nil {
paths = append(paths, abs)
} else {
logger.Warnf("Skipped path %s because it fails to get absolute path: %s", p, e)
}
}
}
if err = scanner.Err(); err != nil {
logger.Fatalf("Reading file %s failed with error: %s", fname, err)
}
}
if len(paths) == 0 {
logger.Infof("no path")
return nil
}
这一步是读取文件,这个文件包含了要预热的文件路径。这些路径连同启动的参数一起,添加到paths里,paths若为0则直接退出。
func openController(dpath string) (*os.File, error) {
st, err := os.Stat(dpath)
if err != nil {
return nil, err
}
if !st.IsDir() {
dpath = filepath.Dir(dpath)
}
fp, err := os.OpenFile(filepath.Join(dpath, ".jfs.control"), os.O_RDWR, 0)
if os.IsNotExist(err) {
fp, err = os.OpenFile(filepath.Join(dpath, ".control"), os.O_RDWR, 0)
}
return fp, err
}
这是一个读取文件的函数,不需要讲解。
first := paths[0]
controller, err := openController(first)
if err != nil {
return fmt.Errorf("open control file for %s: %s", first, err)
}
defer controller.Close()
mp := first
for ; mp != "/"; mp = filepath.Dir(mp) {
inode, err := utils.GetFileInode(mp)
if err != nil {
logger.Fatalf("lookup inode for %s: %s", mp, err)
}
if inode == uint64(meta.RootInode) {
break
}
}
预读只针对已经挂载的文件系统。这一步是为了获取挂载点。
threads := ctx.Uint("threads")
if threads == 0 {
logger.Warnf("threads should be larger than 0, reset it to 1")
threads = 1
}
action := vfs.WarmupCache
if evict {
action = vfs.EvictCache
} else if check {
action = vfs.CheckCache
}
另一个参数thread检测。action设置为WramupCache。
background := ctx.Bool("background")
start := len(mp)
batch := make([]string, 0, batchMax)
progress := utils.NewProgress(background)
dspin := progress.AddDoubleSpinner(action.String())
total := &vfs.CacheResponse{}
for _, path := range paths {
if mp == "/" {
inode, err := utils.GetFileInode(path)
if err != nil {
logger.Errorf("lookup inode for %s: %s", mp, err)
continue
}
batch = append(batch, fmt.Sprintf("inode:%d", inode))
} else if strings.HasPrefix(path, mp) {
batch = append(batch, path[start:])
} else {
logger.Errorf("Path %s is not under mount point %s", path, mp)
continue
}
if len(batch) >= batchMax {
resp := sendCommand(controller, action, batch, threads, background, dspin)
total.Add(resp)
batch = batch[0:]
}
}
if len(batch) > 0 {
resp := sendCommand(controller, action, batch, threads, background, dspin)
total.Add(resp)
}
backgroud这个参数决定预热是否要在后台静默进行,dspin也是相关功能,没必要看。sendCommand就是发送预缓存请求。后面的代码不用看了。
我们直接看vfs的cache函数。func (v *VFS) cache(ctx meta.Context, action CacheAction, paths []string, concurrent int, resp *CacheResponse) 它接受一个ctx,action,这里就是WarmupCache,path就是要预热的目录。resp是要填的回报,不用管,我们只看逻辑。
func (v *VFS) cache(ctx meta.Context, action CacheAction, paths []string, concurrent int, resp *CacheResponse) {
logger.Infof("start to %s %d paths with %d workers", action, len(paths), concurrent)
start := time.Now()
todo := make(chan _file, 10*concurrent)
wg := sync.WaitGroup{}
for i := 0; i < concurrent; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for f := range todo {
if ctx.Canceled() {
return
}
if f.ino == 0 {
logger.Warnf("%s got inode 0", action)
continue
}
iter := newSliceIterator(ctx, v.Meta, f.ino, f.size)
var handler sliceHandler
switch action {
case WarmupCache:
handler = func(s meta.Slice) error {
return v.Store.FillCache(s.Id, s.Size)
}
if v.Conf.Meta.OpenCache > 0 {
if err := v.Meta.Open(ctx, f.ino, syscall.O_RDONLY, &meta.Attr{}); err != 0 {
logger.Errorf("Inode %d could be opened: %s", f.ino, err)
}
_ = v.Meta.Close(ctx, f.ino)
}
case EvictCache:
//省略
case CheckCache:
//省略
}
// log and skip error
err := iter.Iterate(handler)
if err != nil {
logger.Errorf("%s error : %s", action, err)
}
if resp != nil {
atomic.AddUint64(&resp.FileCount, 1)
atomic.AddUint64(&resp.SliceCount, iter.stat.count)
atomic.AddUint64(&resp.TotalBytes, iter.stat.bytes)
}
}
}()
}
var inode Ino
var attr = &Attr{}
for _, p := range paths {
if st := v.resolve(ctx, p, &inode, attr); st != 0 {
logger.Warnf("Failed to resolve path %s: %s", p, st)
continue
}
logger.Debugf("Warming up path %s", p)
if attr.Typ == meta.TypeDirectory {
v.walkDir(ctx, inode, todo)
} else if attr.Typ == meta.TypeFile {
_ = sendFile(ctx, todo, _file{inode, attr.Length})
}
if ctx.Canceled() {
break
}
}
close(todo)
wg.Wait()
if ctx.Canceled() {
logger.Infof("%s cancelled", action)
}
logger.Infof("%s %d paths in %s", action, len(paths), time.Since(start))
}
这是一个并发处理的函数 如果看过之前readAhead的文章,应该对slice有一些概念,slice就是file的更小单位。这个函数对文件的slice进行遍历。再往下看,这其实就很简单了,就是把这些东西加载到缓存里。
func (store *cachedStore) FillCache(id uint64, length uint32) error {
r := sliceForRead(id, int(length), store)
keys := r.keys()
var err error
for _, k := range keys {
f, e := store.bcache.load(k)
if e == nil { // already cached
_ = f.Close()
continue
}
size := parseObjOrigSize(k)
if size == 0 || size > store.conf.BlockSize {
logger.Warnf("Invalid size: %s %d", k, size)
continue
}
p := NewOffPage(size)
defer p.Release()
if e := store.load(k, p, true, true); e != nil {
logger.Warnf("Failed to load key: %s %s", k, e)
err = e
}
}
return err
}
所以JuiceFS的缓存预热很简单,也没有什么智能的因素。它首先要求文件系统已经挂载,接着由用户指定要预热的目录,接着将对应的内容加载到缓存里。

浙公网安备 33010602011771号