lab5 copy on write

copy on write

实验结果

实验

1:./kernel/kalloc.c文件

用一个全局数组记录每一个页的引用次数,那么数组的元素个数到底是啥。是否可以确定end的大小吗?观察ld文件,0x80000000到end时kernel .text和kernel .data段,大小每次编译后都不一定。而直接减0x80000000,所有page肯定都可以记录,所以最后((PHYSTOP - 0x80000000)/PGSIZE)+1,1是想可以达到PHYSTOP吗?没认真想,但只要不少就可以了,不要加太多,你加10,5都无所谓

多个cpu都操作这一个全局变量数组,应该用自旋锁。

// Physical memory allocator, for user processes,
// kernel stacks, page-table pages,
// and pipe buffers. Allocates whole 4096-byte pages.

#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "spinlock.h"
#include "riscv.h"
#include "defs.h"

void freerange(void *pa_start, void *pa_end);

int CowArray[((PHYSTOP - 0x80000000)/PGSIZE)+1] = {0};

extern char end[]; // first address after kernel.
                   // defined by kernel.ld.

struct run {
  struct run *next;
};

struct {
  struct spinlock lock;
  struct run *freelist;
} kmem;

struct spinlock RefLock;

void
kinit()
{
  initlock(&RefLock, "ref");
  initlock(&kmem.lock, "kmem");
  freerange(end, (void*)PHYSTOP);
}

void
freerange(void *pa_start, void *pa_end)
{
  char *p;
  p = (char*)PGROUNDUP((uint64)pa_start);
  for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE)
  {
    CowArrayAddNum((uint64)p);
    kfree(p);
  }
    
}

// Free the page of physical memory pointed at by v,
// which normally should have been returned by a
// call to kalloc().  (The exception is when
// initializing the allocator; see kinit above.)
void
kfree(void *pa)
{
  struct run *r;
  uint8 flag = 0;

  if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
    panic("kfree");

  CowArrayDelNum((uint64)pa);
  acquire(&RefLock);
  if (CowArray[((((uint64)pa)- 0x80000000)/PGSIZE)] > 0)
  {
    flag = 1;
  }
  release(&RefLock);
  if (1 == flag)
  {
    goto nofree;
  }

  // Fill with junk to catch dangling refs.
  memset(pa, 1, PGSIZE);

  r = (struct run*)pa;

  acquire(&kmem.lock);
  r->next = kmem.freelist;
  kmem.freelist = r;
  release(&kmem.lock);

  return;

nofree:

    return;
}

// Allocate one 4096-byte page of physical memory.
// Returns a pointer that the kernel can use.
// Returns 0 if the memory cannot be allocated.
void *
kalloc(void)
{
  struct run *r;

  acquire(&kmem.lock);
  r = kmem.freelist;
  if(r)
    kmem.freelist = r->next;
  release(&kmem.lock);

  if(r)
  {
    CowArrayAddNum((uint64)r);
    memset((char*)r, 5, PGSIZE); // fill with junk
  }
  return (void*)r;
}

int CowArrayAddNum(uint64 pa)
{
    uint64 Index = 0;
    int Result = 0;

    acquire(&RefLock);
    if ((pa > PHYSTOP) || (pa < (uint64)end))
    {
      Result = -1;
    }

    pa = PGROUNDDOWN(pa);
    Index = (pa - 0x80000000)/PGSIZE;
    if (CowArray[Index] > 0)
    {
       CowArray[Index] = CowArray[Index] + 1;
    }
    else
    {
      CowArray[Index] = 1;
    }
   
    release(&RefLock);
    
    return Result;
}

int CowArrayDelNum(uint64 pa)
{
    uint64 Index = 0;
    int Result = 0;

    acquire(&RefLock);
    if ((pa > PHYSTOP) || (pa < (uint64)end))
    {
      Result = -1;
    }

    pa = PGROUNDDOWN(pa);
    Index = (pa - 0x80000000)/PGSIZE;
    if (CowArray[Index] > 0)
    {
      CowArray[Index] = CowArray[Index] - 1;
    }
    else
    {
      CowArray[Index] = 0;
    }
    release(&RefLock);
    
    return Result;
}

2:./kernel/trap.c文件

从RISCV(参考玄铁c906用户手册)知道store page fault(即写引起的page fault)为15,所以SCAUSE为15,也从用户手册中知道STVAL是引起page fault的虚拟地址。

但不一定引起store page fault的都是cow page,如每个process .text page的权限是rx,往那么pagex写,也会造成store page fault。所以通过PTE_COW来判断是不是cow引起的page fault,不是的话,不分配,怎么处理(。。。。没想好,可能killed吧!也通过测试了)。

walk可能返回0,注意这个条件

在trap中换一个新page,对旧的page 的物理page那个引用减掉1就可以了吗?注意,你无法知道这个phy page的引用次数还剩多少,如果只剩下1,你只是简单的-1,不free,由于你后面va是重映射另一个进程,你再也找不到那个page,所以调用kfree(),在kfree()里减1,然后判断

void
usertrap(void)
{

  int which_dev = 0;

  if((r_sstatus() & SSTATUS_SPP) != 0)
    panic("usertrap: not from user mode");

  // send interrupts and exceptions to kerneltrap(),
  // since we're now in the kernel.
  w_stvec((uint64)kernelvec);

  struct proc *p = myproc();
  
  // save user program counter.
  p->trapframe->epc = r_sepc();
  
  if(r_scause() == 8){
    // system call

    if(p->killed)
      exit(-1);

    // sepc points to the ecall instruction,
    // but we want to return to the next instruction.
    p->trapframe->epc += 4;

    // an interrupt will change sstatus &c registers,
    // so don't enable until done with those registers.
    intr_on();

    syscall();
  }
  else if(r_scause() == 15)//store page fault
  {
    uint64 The_Pte = 0;
    pte_t* Pte;
    uint64 Va11 = r_stval();

    if (Va11 >= MAXVA)
    {
      p->killed = 1;
      exit(-1);
    }
    else
    {
      Pte = walk(p->pagetable, Va11, 0);
      The_Pte = (uint64)(*Pte);
      if ((The_Pte != 0) && ((PTE_COW & The_Pte) != 0))//是因为cow 引起的pagefault
      {
        CowPageFaultProcess(Va11);
      }
    }
  }
  else if((which_dev = devintr()) != 0){
    // ok
  } else {
    printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid);
    printf("            sepc=%p stval=%p\n", r_sepc(), r_stval());
    p->killed = 1;
  }

  if(p->killed)
    exit(-1);

  // give up the CPU if this is a timer interrupt.
  if(which_dev == 2)
    yield();

  usertrapret();
}

//
// return to user space
//
void
usertrapret(void)
{
  struct proc *p = myproc();

  // we're about to switch the destination of traps from
  // kerneltrap() to usertrap(), so turn off interrupts until
  // we're back in user space, where usertrap() is correct.
  intr_off();

  // send syscalls, interrupts, and exceptions to trampoline.S
  w_stvec(TRAMPOLINE + (uservec - trampoline));

  // set up trapframe values that uservec will need when
  // the process next re-enters the kernel.
  p->trapframe->kernel_satp = r_satp();         // kernel page table
  p->trapframe->kernel_sp = p->kstack + PGSIZE; // process's kernel stack
  p->trapframe->kernel_trap = (uint64)usertrap;
  p->trapframe->kernel_hartid = r_tp();         // hartid for cpuid()

  // set up the registers that trampoline.S's sret will use
  // to get to user space.
  
  // set S Previous Privilege mode to User.
  unsigned long x = r_sstatus();
  x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode
  x |= SSTATUS_SPIE; // enable interrupts in user mode
  w_sstatus(x);

  // set S Exception Program Counter to the saved user pc.
  w_sepc(p->trapframe->epc);

  // tell trampoline.S the user page table to switch to.
  uint64 satp = MAKE_SATP(p->pagetable);

  // jump to trampoline.S at the top of memory, which 
  // switches to the user page table, restores user registers,
  // and switches to user mode with sret.
  uint64 fn = TRAMPOLINE + (userret - trampoline);
  ((void (*)(uint64,uint64))fn)(TRAPFRAME, satp);
}

// interrupts and exceptions from kernel code go here via kernelvec,
// on whatever the current kernel stack is.
void 
kerneltrap()
{
  int which_dev = 0;
  uint64 sepc = r_sepc();
  uint64 sstatus = r_sstatus();
  uint64 scause = r_scause();
  
  if((sstatus & SSTATUS_SPP) == 0)
    panic("kerneltrap: not from supervisor mode");
  if(intr_get() != 0)
    panic("kerneltrap: interrupts enabled");

  if((which_dev = devintr()) == 0){
    printf("scause %p\n", scause);
    printf("sepc=%p stval=%p\n", r_sepc(), r_stval());
    panic("kerneltrap");
  }

  // give up the CPU if this is a timer interrupt.
  if(which_dev == 2 && myproc() != 0 && myproc()->state == RUNNING)
    yield();

  // the yield() may have caused some traps to occur,
  // so restore trap registers for use by kernelvec.S's sepc instruction.
  w_sepc(sepc);
  w_sstatus(sstatus);
}

void
clockintr()
{
  acquire(&tickslock);
  ticks++;
  wakeup(&ticks);
  release(&tickslock);
}

// check if it's an external interrupt or software interrupt,
// and handle it.
// returns 2 if timer interrupt,
// 1 if other device,
// 0 if not recognized.
int
devintr()
{
  uint64 scause = r_scause();

  if((scause & 0x8000000000000000L) &&
     (scause & 0xff) == 9){
    // this is a supervisor external interrupt, via PLIC.

    // irq indicates which device interrupted.
    int irq = plic_claim();

    if(irq == UART0_IRQ){
      uartintr();
    } else if(irq == VIRTIO0_IRQ){
      virtio_disk_intr();
    } else if(irq){
      printf("unexpected interrupt irq=%d\n", irq);
    }

    // the PLIC allows each device to raise at most one
    // interrupt at a time; tell the PLIC the device is
    // now allowed to interrupt again.
    if(irq)
      plic_complete(irq);

    return 1;
  } else if(scause == 0x8000000000000001L){
    // software interrupt from a machine-mode timer interrupt,
    // forwarded by timervec in kernelvec.S.

    if(cpuid() == 0){
      clockintr();
    }
    
    // acknowledge the software interrupt by clearing
    // the SSIP bit in sip.
    w_sip(r_sip() & ~2);

    return 2;
  } else {
    return 0;
  }
}

/** 
*@brief 此时stval上的虚拟地址一定是cow page,一定有proc
*/
static void CowPageFaultProcess(uint64 Va)
{
    struct proc *p = myproc();
    pte_t* Pte;
    //uint64 Va = r_stval();
    uint64 OldPa = 0;//得到的是那个page 开头对应的pa
    uint64* NewPaP = 0;//新页面开头对应的pa
    uint64 ThePte = 0;
    uint64 Flag = 0;

    Pte = walk(p->pagetable, Va, 0);
    ThePte = (uint64)(*Pte);
    OldPa = PTE2PA(ThePte);
    
   

    NewPaP = kalloc();
    if (NewPaP == 0)
    {
      p->killed = 1;
    }
    else
    {
      *Pte &= ~(PTE_COW);//新的不是cow page,产生 store page fault就是别的原因了
      *Pte |= PTE_W;
      Flag = PTE_FLAGS(*Pte);
      *Pte = 0;
      memmove(NewPaP, (const void*)(OldPa), PGSIZE);
      Va = PGROUNDDOWN(Va);
      kfree((void*)OldPa);//不是用CowArrayDelNum(),这个都不map了,不free()一下都找不到了
      mappages(p->pagetable, Va, PGSIZE, (uint64)NewPaP, Flag);
    }

    return;

}

3. ./kernel/vm.c文件 uvmcopy函数

// Given a parent process's page table, copy
// its memory into a child's page table.
// Copies both the page table and the
// physical memory.
// returns 0 on success, -1 on failure.
// frees any allocated pages on failure.
int
uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
{
  pte_t *pte;
  uint64 pa, i;
  uint flags;
  //char *mem;

  for(i = 0; i < sz; i += PGSIZE)
  {
    if((pte = walk(old, i, 0)) == 0)
      panic("uvmcopy: pte should exist");
    if((*pte & PTE_V) == 0)
      panic("uvmcopy: page not present");
    /*
    pa = PTE2PA(*pte);
    flags = PTE_FLAGS(*pte);
    if((mem = kalloc()) == 0)
      goto err;
    memmove(mem, (char*)pa, PGSIZE);
    */
    pa = PTE2PA(*pte);
    (*pte) |= PTE_COW;
    (*pte) &= ~(PTE_W);
    flags = PTE_FLAGS(*pte);
    
    CowArrayAddNum(pa);
    if(mappages(new, i, PGSIZE, (uint64)pa, flags) != 0){
      kfree((void*)pa);
      goto err;
    }
  }
  
  return 0;

 err:
  uvmunmap(new, 0, i / PGSIZE, 1);
  return -1;
}

4. ./kernel/vm.c文件 copyout函数

/ Copy from kernel to user.
// Copy len bytes from src to virtual address dstva in a given page table.
// Return 0 on success, -1 on error.
int
copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
{
  uint64 n, va0, pa0;
  pte_t* PteP;
  uint64 PteFlag;
  uint64* Pa0P;
  uint64 OldPa;


  while(len > 0)
  {
    
    va0 = PGROUNDDOWN(dstva);
    if (va0 >= MAXVA)
    {
      return -1;
    }
   
    PteP = walk(pagetable, va0, 0);
    OldPa = walkaddr(pagetable, va0);
    pa0 = walkaddr(pagetable, va0);

    if (PteP == 0)//walk失败
    {
      return -1;
    }
    if ((((*PteP) & PTE_COW) != 0) )
    {
      
      Pa0P = kalloc();
      if (Pa0P == 0)
      {
        return -1;
      }
      
      
      PteFlag = PTE_FLAGS(*PteP);
      PteFlag &= ~(PTE_COW);
      PteFlag |= PTE_W;
      *PteP = 0;
      mappages(pagetable, va0, PGSIZE, (uint64)Pa0P, PteFlag);
      memmove(Pa0P, (const void*)OldPa, PGSIZE);
      kfree((void*)pa0);//不是用CowArrayDelNum(),这个都不map了,不free()一下都找不到了

      pa0 = (uint64)Pa0P;
    }
   
    if(pa0 == 0)
      return -1;
    n = PGSIZE - (dstva - va0);
    if(n > len)
      n = len;
    memmove((void *)(pa0 + (dstva - va0)), src, n);

    len -= n;
    src += n;
    dstva = va0 + PGSIZE;
  }
  return 0;
}

5. ./kernel/riscv.h文件

#define PGSIZE 4096 // bytes per page
#define PGSHIFT 12  // bits of offset within a page

#define PGROUNDUP(sz)  (((sz)+PGSIZE-1) & ~(PGSIZE-1))
#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1))

#define PTE_V (1L << 0) // valid
#define PTE_R (1L << 1)
#define PTE_W (1L << 2)
#define PTE_X (1L << 3)
#define PTE_U (1L << 4) // 1 -> user can access
#define PTE_COW  (1L << 8)

// shift a physical address to the right place for a PTE.
#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10)

#define PTE2PA(pte) (((pte) >> 10) << 12)

#define PTE_FLAGS(pte) ((pte) & 0x3FF)

// extract the three 9-bit page table indices from a virtual address.
#define PXMASK          0x1FF // 9 bits
#define PXSHIFT(level)  (PGSHIFT+(9*(level)))
#define PX(level, va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK)

// one beyond the highest possible virtual address.
// MAXVA is actually one bit less than the max allowed by
// Sv39, to avoid having to sign-extend virtual addresses
// that have the high bit set.
#define MAXVA (1L << (9 + 9 + 9 + 12 - 1))

typedef uint64 pte_t;
typedef uint64 *pagetable_t; // 512 PTEs

5 记得在defs.h文件添加使用的函数

调试过程

panic mampage remap

如下面两张图

根据错误的提示信息,应该是在调用mappage的时候产生的错误,于是用vscode find all refrence,分别在所有可能调用的mampage函数的前后都打上log,最后是在这个上面第一张图的地方产生错误。

remap的意思是我在那个3级页表项的地方有valid标志位,注意walk只是返回3级页表项的值的指针,不会对3级页表项做啥改变。而且我前面不是*PTE = 0了吗?怎么还会valid呢?

观察发现mappage函数,如果va地址不是page对齐,不止会检查这个page是否有valid,还是检查下一个虚拟页对应的人PTE是否有valid标志位,所以mampage的va应该要page对齐,最后用PGROUNDDOWN解决。

改变如图所示

结果如图所示

引用次数想错导致的错误

还是打log和应用相关知识,这个错误应该可以用gdb解决。

刚开始的时候,并没有在任何地方free(),而是在copuout,trap,进程增加减少一个page的地方加1减1。**当时只是觉得这个不通用,要加一个就要在对应地方CowArrayDelNum,CowArrayAddNum,很容易漏掉,但后面认真想。

在trap中换一个新page,对旧的page 的物理page那个引用减掉1就可以了吗?注意,你无法知道这个phy page的引用次数还剩多少,如果只剩下1,你只是简单的-1,不free,由于你后面va是重映射另一个进程,你再也找不到那个page,所以调用kfree(),在kfree()里减1,然后判断

就是上面那个意思,挺烦的,想不出来要怎么办?后面简单看了一下别人的代码,发现别人在kfree()减,突然有了思路。
在kfree()里减1,但kinit()也调用了,所以初始化为1(或者初始化+1),在uvmcopy里加1,trap,copyout调用kfree(如果为0了,那么就释放,没有就设计代码,让这个page 不free),kfree减1

结果如图所示

panic walk

usertestss没通过,第一次遇见。

如图usertestss的结果。

是walk函数引起的,发现是va>=MAXVA,导致panic。

用vscode find all refrence,发现有些代码在调用walk前,会判断va是否>=MAXVA,不是的话,返回-1,交给程序处理。

panic相当于windows的蓝屏了,我不应让系统蓝屏,那么我应该让程序处理,而不是让系统蓝屏。最后解决方案是在trap里大于MAXVA,trap里没有返回,你发现是错误是p->killed = 1,exit(-1),参考它的写法;在copyout里返回-1(把错误交给上一级去处理)。

copyout usertests不通过

如图

引起的是kernel trap,scause等于13,load page fault,可以看做读错误。kerneltrap,代表是kernel space区的代码引起的,查看8000d9c的汇编代码,把s2寄存器的值偏移2的地址加载到a5中,这个引起的错误。这个应该是获取pte的值,但pte这个地方没有读权限。为啥会没有读权限,pte是walk得到的,walk返回的正常都是有读权限的(.text段和.data段都有可读权限,page table表应该也有读权限设置),除非返回0,于是要做walk的错误处理。解决。

posted @ 2025-01-17 22:17  我们的歌谣  阅读(46)  评论(0)    收藏  举报