2. Lab syscall: system calls

https://pdos.csail.mit.edu/6.S081/2021/labs/syscall.html

1. System call tracing

1.1 要求

In this assignment you will add a system call tracing feature that may help you when debugging later labs. You'll create a new trace system call that will control tracing. It should take one argument, an integer "mask", whose bits specify which system calls to trace. For example, to trace the fork system call, a program calls trace(1 << SYS_fork), where SYS_fork is a syscall number from kernel/syscall.h. You have to modify the xv6 kernel to print out a line when each system call is about to return, if the system call's number is set in the mask. The line should contain the process id, the name of the system call and the return value; you don't need to print the system call arguments. The trace system call should enable tracing for the process that calls it and any children that it subsequently forks, but should not affect other processes.

实现系统调用及命令 trace,格式如:

trace [mask] [command] [command args]
...
output:
${pid}: syscall ${syscall name} -> ${return value} 

效果如下:

# 32 是 1 << SYS_READ
$ trace 32 grep hello README
3: syscall read -> 1023
3: syscall read -> 966
3: syscall read -> 70
3: syscall read -> 0
$
$ trace 2147483647 grep hello README
4: syscall trace -> 0
4: syscall exec -> 3
4: syscall open -> 3
4: syscall read -> 1023
4: syscall read -> 966
4: syscall read -> 70
4: syscall read -> 0
4: syscall close -> 0
$
$ grep hello README
$
$ trace 2 usertests forkforkfork
usertests starting
test forkforkfork: 407: syscall fork -> 408
408: syscall fork -> 409
409: syscall fork -> 410
410: syscall fork -> 411
409: syscall fork -> 412
410: syscall fork -> 413
409: syscall fork -> 414
411: syscall fork -> 415
...
$   

1.2 分析

首先我们需要根据 trace 的参数掩码,判断需要打印哪些系统调用,其次,trace 的输出主要关注三个变量:

  • 进程 pid
  • 系统调用名
  • 系统调用返回值

参考执行系统调用的文件 syscall.c,如下:

static uint64 (*syscalls[])(void) = {
[SYS_fork]    sys_fork,
[SYS_exit]    sys_exit,
[SYS_wait]    sys_wait,
[SYS_pipe]    sys_pipe,
[SYS_read]    sys_read,
[SYS_kill]    sys_kill,
[SYS_exec]    sys_exec,
[SYS_fstat]   sys_fstat,
[SYS_chdir]   sys_chdir,
[SYS_dup]     sys_dup,
[SYS_getpid]  sys_getpid,
[SYS_sbrk]    sys_sbrk,
[SYS_sleep]   sys_sleep,
[SYS_uptime]  sys_uptime,
[SYS_open]    sys_open,
[SYS_write]   sys_write,
[SYS_mknod]   sys_mknod,
[SYS_unlink]  sys_unlink,
[SYS_link]    sys_link,
[SYS_mkdir]   sys_mkdir,
[SYS_close]   sys_close
 };

void syscall(void)
{
  int num;
  struct proc *p = myproc();

  num = p->trapframe->a7;
  if(num > 0 && num < NELEM(syscalls) && syscalls[num]) {
    p->trapframe->a0 = syscalls[num]();
  } else {
    printf("%d %s: unknown sys call %d\n",
            p->pid, p->name, num);
    p->trapframe->a0 = -1;
  }
}
  • 保存掩码

由于 trace 会通过 fork 系统调用执行参数后面的 command,为了执行子进程时,能够根据掩码打印需要打印的系统调用,此处需要把参数中的掩码传给子进程。因此在 proc 中增加字段 tracemask,然后修改 fork系统调用,为 tracemask 字段赋值。

  • 解析掩码

判断是否需要打印该系统调用,只需 proc.tracemask & (1<<syscall_num)条件为 True 即可。

  • 获取系统调用返回值

可以看到,系统调用注册在一个数组当中,执行系统调用时,通过栈上的 trapframe 获取系统调用号和参数,执行结果保存在 trapframe.a0

  • 获取 pid

其次此处可以通过 myproc()获取进程 proc 结构,拿到当前进程 pid

  • 获取系统调用名

新增一个数组映射,系统调用号 - 系统调用名 的映射,通过系统调用号获取系统调用名。

1.3 实现

// Create a new process, copying the parent.
// Sets up child kernel stack to return as if from fork() system call.
int
fork(void)
{
  struct proc *np;
  struct proc *p = myproc();
  // some code ...

  // copy trace mask to child , for syscall trace
  np->tracemask = p->tracemask;
  // some code ...
}

static char* syscall_name[] = {
// some code ...
[SYS_trace]   "trace",
// some code ...
};

void
syscall(void)
{
  int num;
  struct proc *p = myproc();

  num = p->trapframe->a7;
  if(num > 0 && num < NELEM(syscalls) && syscalls[num]) {
    p->trapframe->a0 = syscalls[num]();
    if (p->tracemask && ( (1 << num) & p->tracemask))
    {
      printf("%d: syscall %s -> %d\n", p->pid, syscall_name[num], p->trapframe->a0);
    }
  } else {
    printf("%d %s: unknown sys call %d\n",
            p->pid, p->name, num);
    p->trapframe->a0 = -1;
  }
}

2. Sysinfo

2.1 要求

In this assignment you will add a system call, sysinfo, that collects information about the running system. The system call takes one argument: a pointer to a struct sysinfo (see kernel/sysinfo.h). The kernel should fill out the fields of this struct: the freemem field should be set to the number of bytes of free memory, and the nproc field should be set to the number of processes whose state is not UNUSED. We provide a test program sysinfotest; you pass this assignment if it prints "sysinfotest: OK".

执行 sysinfo,能够获取当前 kernel 空余内存及进程数量。

2.2 分析

sysinfo的结构如下:

struct sysinfo {
  uint64 freemem;   // amount of free memory (bytes)
  uint64 nproc;     // number of process
};

目的主要填充这两个字段。

  • freemem

内存容量,主要参考 kalloc.c 文件,kallockfree 函数

struct {
  struct spinlock lock;
  struct run *freelist;
} kmem;

void kfree(void *pa)
{
  struct run *r;

  if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
    panic("kfree");

  // Fill with junk to catch dangling refs.
  memset(pa, 1, PGSIZE);

  r = (struct run*)pa;

  acquire(&kmem.lock);
  r->next = kmem.freelist;
  kmem.freelist = r;
  release(&kmem.lock);
}

// Allocate one 4096-byte page of physical memory.
// Returns a pointer that the kernel can use.
// Returns 0 if the memory cannot be allocated.
void * kalloc(void)
{
  struct run *r;

  acquire(&kmem.lock);
  r = kmem.freelist;
  if(r)
    kmem.freelist = r->next;
  release(&kmem.lock);

  if(r)
    memset((char*)r, 5, PGSIZE); // fill with junk
  return (void*)r;
}

可以看到,内核的内存管理主要通过 kmem结构,kmem.freelist为当前空闲的内存页链表,每个元素的内存为 1 页,因此只需遍历该链表,每次 freemem += PGSIZE即可。

  • nproc

进程数量,需要剔除 UNUSED状态的进程,参考 proc.cprocdump函数

// Print a process listing to console.  For debugging.
// Runs when user types ^P on console.
// No lock to avoid wedging a stuck machine further.
void procdump(void)
{
  static char *states[] = {
  [UNUSED]    "unused",
  [SLEEPING]  "sleep ",
  [RUNNABLE]  "runble",
  [RUNNING]   "run   ",
  [ZOMBIE]    "zombie"
  };
  struct proc *p;
  char *state;

  printf("\n");
  for(p = proc; p < &proc[NPROC]; p++){
    if(p->state == UNUSED)
      continue;
    if(p->state >= 0 && p->state < NELEM(states) && states[p->state])
      state = states[p->state];
    else
      state = "???";
    printf("%d %s %s", p->pid, state, p->name);
    printf("\n");
  }
}

可以看到进程都存放在 proc 数组中,其定义为 struct proc proc[NPROC]; 只需遍历该数组即可。

  • sysinfo 数据传回 user space

由于系统调用的执行在内核态,因此还需要将 sysinfo收集的数据复制给用户空间,此处还是同一个页表,因此操作相对简单,复制参考 copyout 函数。

// Copy from kernel to user.
// Copy len bytes from src to virtual address dstva in a given page table.
// Return 0 on success, -1 on error.
int copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
{
  uint64 n, va0, pa0;

  while(len > 0){
    va0 = PGROUNDDOWN(dstva);
    pa0 = walkaddr(pagetable, va0);
    if(pa0 == 0)
      return -1;
    n = PGSIZE - (dstva - va0);
    if(n > len)
      n = len;
    memmove((void *)(pa0 + (dstva - va0)), src, n);

    len -= n;
    src += n;
    dstva = va0 + PGSIZE;
  }
  return 0;
}

copyout 的使用参考 fstat,通过 argaddr 函数接收用户态传入的 stat* 参数,。然后局部定义一个 stat 结构,收集好数据后,通过 copyout将数据复制到目标指针地址。

uint64 sys_fstat(void)
{
  struct file *f;
  uint64 st; // user pointer to struct stat

  if(argfd(0, 0, &f) < 0 || argaddr(1, &st) < 0)
    return -1;
  return filestat(f, st);
}

// Get metadata about file f.
// addr is a user virtual address, pointing to a struct stat.
int filestat(struct file *f, uint64 addr)
{
  struct proc *p = myproc();
  struct stat st;
  
  if(f->type == FD_INODE || f->type == FD_DEVICE){
    ilock(f->ip);
    stati(f->ip, &st);
    iunlock(f->ip);
    if(copyout(p->pagetable, addr, (char *)&st, sizeof(st)) < 0)
      return -1;
    return 0;
  }
  return -1;

2.3 实现

uint64 sys_sysinfo(void)
{
  struct proc *p = myproc();
  uint64 dst_addr;

  if(argaddr(0, &dst_addr) < 0)
  {
    printf("sysinfo error \n");
    return -1;
  }
  
  struct sysinfo srcinfo;
  srcinfo.freemem = kcalcfreememory();
  srcinfo.nproc = calcprocnum();

  // 数据返回用户态
  if(copyout(p->pagetable, dst_addr, (char *)&srcinfo, sizeof(srcinfo)) < 0)
    return -1;
    
  return 0;
}

  • 统计内存
// Calc free memory 
int kcalcfreememory()
{
  struct run* r = kmem.freelist;
  int freemem = 0;
  while (r)
  {
    r = r->next;
    freemem += PGSIZE;
  }

  return freemem;
}

  • 统计进程
// calc proc num
int calcprocnum()
{
  int nproc = 0;
  struct proc* p;
  for(p = proc; p < &proc[NPROC]; p++){
    if(p->state == UNUSED)
      continue;
    nproc += 1;
  }
  return nproc;
}
posted @ 2022-04-05 09:50  lawliet9  阅读(9)  评论(0编辑  收藏  举报