lab5 copy on write
copy on write
实验结果

实验
1:./kernel/kalloc.c文件
用一个全局数组记录每一个页的引用次数,那么数组的元素个数到底是啥。是否可以确定end的大小吗?观察ld文件,0x80000000到end时kernel .text和kernel .data段,大小每次编译后都不一定。而直接减0x80000000,所有page肯定都可以记录,所以最后((PHYSTOP - 0x80000000)/PGSIZE)+1,1是想可以达到PHYSTOP吗?没认真想,但只要不少就可以了,不要加太多,你加10,5都无所谓。
多个cpu都操作这一个全局变量数组,应该用自旋锁。
// Physical memory allocator, for user processes,
// kernel stacks, page-table pages,
// and pipe buffers. Allocates whole 4096-byte pages.
#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "spinlock.h"
#include "riscv.h"
#include "defs.h"
void freerange(void *pa_start, void *pa_end);
int CowArray[((PHYSTOP - 0x80000000)/PGSIZE)+1] = {0};
extern char end[]; // first address after kernel.
// defined by kernel.ld.
struct run {
struct run *next;
};
struct {
struct spinlock lock;
struct run *freelist;
} kmem;
struct spinlock RefLock;
void
kinit()
{
initlock(&RefLock, "ref");
initlock(&kmem.lock, "kmem");
freerange(end, (void*)PHYSTOP);
}
void
freerange(void *pa_start, void *pa_end)
{
char *p;
p = (char*)PGROUNDUP((uint64)pa_start);
for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE)
{
CowArrayAddNum((uint64)p);
kfree(p);
}
}
// Free the page of physical memory pointed at by v,
// which normally should have been returned by a
// call to kalloc(). (The exception is when
// initializing the allocator; see kinit above.)
void
kfree(void *pa)
{
struct run *r;
uint8 flag = 0;
if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
panic("kfree");
CowArrayDelNum((uint64)pa);
acquire(&RefLock);
if (CowArray[((((uint64)pa)- 0x80000000)/PGSIZE)] > 0)
{
flag = 1;
}
release(&RefLock);
if (1 == flag)
{
goto nofree;
}
// Fill with junk to catch dangling refs.
memset(pa, 1, PGSIZE);
r = (struct run*)pa;
acquire(&kmem.lock);
r->next = kmem.freelist;
kmem.freelist = r;
release(&kmem.lock);
return;
nofree:
return;
}
// Allocate one 4096-byte page of physical memory.
// Returns a pointer that the kernel can use.
// Returns 0 if the memory cannot be allocated.
void *
kalloc(void)
{
struct run *r;
acquire(&kmem.lock);
r = kmem.freelist;
if(r)
kmem.freelist = r->next;
release(&kmem.lock);
if(r)
{
CowArrayAddNum((uint64)r);
memset((char*)r, 5, PGSIZE); // fill with junk
}
return (void*)r;
}
int CowArrayAddNum(uint64 pa)
{
uint64 Index = 0;
int Result = 0;
acquire(&RefLock);
if ((pa > PHYSTOP) || (pa < (uint64)end))
{
Result = -1;
}
pa = PGROUNDDOWN(pa);
Index = (pa - 0x80000000)/PGSIZE;
if (CowArray[Index] > 0)
{
CowArray[Index] = CowArray[Index] + 1;
}
else
{
CowArray[Index] = 1;
}
release(&RefLock);
return Result;
}
int CowArrayDelNum(uint64 pa)
{
uint64 Index = 0;
int Result = 0;
acquire(&RefLock);
if ((pa > PHYSTOP) || (pa < (uint64)end))
{
Result = -1;
}
pa = PGROUNDDOWN(pa);
Index = (pa - 0x80000000)/PGSIZE;
if (CowArray[Index] > 0)
{
CowArray[Index] = CowArray[Index] - 1;
}
else
{
CowArray[Index] = 0;
}
release(&RefLock);
return Result;
}
2:./kernel/trap.c文件
从RISCV(参考玄铁c906用户手册)知道store page fault(即写引起的page fault)为15,所以SCAUSE为15,也从用户手册中知道STVAL是引起page fault的虚拟地址。
但不一定引起store page fault的都是cow page,如每个process .text page的权限是rx,往那么pagex写,也会造成store page fault。所以通过PTE_COW来判断是不是cow引起的page fault,不是的话,不分配,怎么处理(。。。。没想好,可能killed吧!也通过测试了)。
walk可能返回0,注意这个条件。
在trap中换一个新page,对旧的page 的物理page那个引用减掉1就可以了吗?注意,你无法知道这个phy page的引用次数还剩多少,如果只剩下1,你只是简单的-1,不free,由于你后面va是重映射另一个进程,你再也找不到那个page,所以调用kfree(),在kfree()里减1,然后判断
void
usertrap(void)
{
int which_dev = 0;
if((r_sstatus() & SSTATUS_SPP) != 0)
panic("usertrap: not from user mode");
// send interrupts and exceptions to kerneltrap(),
// since we're now in the kernel.
w_stvec((uint64)kernelvec);
struct proc *p = myproc();
// save user program counter.
p->trapframe->epc = r_sepc();
if(r_scause() == 8){
// system call
if(p->killed)
exit(-1);
// sepc points to the ecall instruction,
// but we want to return to the next instruction.
p->trapframe->epc += 4;
// an interrupt will change sstatus &c registers,
// so don't enable until done with those registers.
intr_on();
syscall();
}
else if(r_scause() == 15)//store page fault
{
uint64 The_Pte = 0;
pte_t* Pte;
uint64 Va11 = r_stval();
if (Va11 >= MAXVA)
{
p->killed = 1;
exit(-1);
}
else
{
Pte = walk(p->pagetable, Va11, 0);
The_Pte = (uint64)(*Pte);
if ((The_Pte != 0) && ((PTE_COW & The_Pte) != 0))//是因为cow 引起的pagefault
{
CowPageFaultProcess(Va11);
}
}
}
else if((which_dev = devintr()) != 0){
// ok
} else {
printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid);
printf(" sepc=%p stval=%p\n", r_sepc(), r_stval());
p->killed = 1;
}
if(p->killed)
exit(-1);
// give up the CPU if this is a timer interrupt.
if(which_dev == 2)
yield();
usertrapret();
}
//
// return to user space
//
void
usertrapret(void)
{
struct proc *p = myproc();
// we're about to switch the destination of traps from
// kerneltrap() to usertrap(), so turn off interrupts until
// we're back in user space, where usertrap() is correct.
intr_off();
// send syscalls, interrupts, and exceptions to trampoline.S
w_stvec(TRAMPOLINE + (uservec - trampoline));
// set up trapframe values that uservec will need when
// the process next re-enters the kernel.
p->trapframe->kernel_satp = r_satp(); // kernel page table
p->trapframe->kernel_sp = p->kstack + PGSIZE; // process's kernel stack
p->trapframe->kernel_trap = (uint64)usertrap;
p->trapframe->kernel_hartid = r_tp(); // hartid for cpuid()
// set up the registers that trampoline.S's sret will use
// to get to user space.
// set S Previous Privilege mode to User.
unsigned long x = r_sstatus();
x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode
x |= SSTATUS_SPIE; // enable interrupts in user mode
w_sstatus(x);
// set S Exception Program Counter to the saved user pc.
w_sepc(p->trapframe->epc);
// tell trampoline.S the user page table to switch to.
uint64 satp = MAKE_SATP(p->pagetable);
// jump to trampoline.S at the top of memory, which
// switches to the user page table, restores user registers,
// and switches to user mode with sret.
uint64 fn = TRAMPOLINE + (userret - trampoline);
((void (*)(uint64,uint64))fn)(TRAPFRAME, satp);
}
// interrupts and exceptions from kernel code go here via kernelvec,
// on whatever the current kernel stack is.
void
kerneltrap()
{
int which_dev = 0;
uint64 sepc = r_sepc();
uint64 sstatus = r_sstatus();
uint64 scause = r_scause();
if((sstatus & SSTATUS_SPP) == 0)
panic("kerneltrap: not from supervisor mode");
if(intr_get() != 0)
panic("kerneltrap: interrupts enabled");
if((which_dev = devintr()) == 0){
printf("scause %p\n", scause);
printf("sepc=%p stval=%p\n", r_sepc(), r_stval());
panic("kerneltrap");
}
// give up the CPU if this is a timer interrupt.
if(which_dev == 2 && myproc() != 0 && myproc()->state == RUNNING)
yield();
// the yield() may have caused some traps to occur,
// so restore trap registers for use by kernelvec.S's sepc instruction.
w_sepc(sepc);
w_sstatus(sstatus);
}
void
clockintr()
{
acquire(&tickslock);
ticks++;
wakeup(&ticks);
release(&tickslock);
}
// check if it's an external interrupt or software interrupt,
// and handle it.
// returns 2 if timer interrupt,
// 1 if other device,
// 0 if not recognized.
int
devintr()
{
uint64 scause = r_scause();
if((scause & 0x8000000000000000L) &&
(scause & 0xff) == 9){
// this is a supervisor external interrupt, via PLIC.
// irq indicates which device interrupted.
int irq = plic_claim();
if(irq == UART0_IRQ){
uartintr();
} else if(irq == VIRTIO0_IRQ){
virtio_disk_intr();
} else if(irq){
printf("unexpected interrupt irq=%d\n", irq);
}
// the PLIC allows each device to raise at most one
// interrupt at a time; tell the PLIC the device is
// now allowed to interrupt again.
if(irq)
plic_complete(irq);
return 1;
} else if(scause == 0x8000000000000001L){
// software interrupt from a machine-mode timer interrupt,
// forwarded by timervec in kernelvec.S.
if(cpuid() == 0){
clockintr();
}
// acknowledge the software interrupt by clearing
// the SSIP bit in sip.
w_sip(r_sip() & ~2);
return 2;
} else {
return 0;
}
}
/**
*@brief 此时stval上的虚拟地址一定是cow page,一定有proc
*/
static void CowPageFaultProcess(uint64 Va)
{
struct proc *p = myproc();
pte_t* Pte;
//uint64 Va = r_stval();
uint64 OldPa = 0;//得到的是那个page 开头对应的pa
uint64* NewPaP = 0;//新页面开头对应的pa
uint64 ThePte = 0;
uint64 Flag = 0;
Pte = walk(p->pagetable, Va, 0);
ThePte = (uint64)(*Pte);
OldPa = PTE2PA(ThePte);
NewPaP = kalloc();
if (NewPaP == 0)
{
p->killed = 1;
}
else
{
*Pte &= ~(PTE_COW);//新的不是cow page,产生 store page fault就是别的原因了
*Pte |= PTE_W;
Flag = PTE_FLAGS(*Pte);
*Pte = 0;
memmove(NewPaP, (const void*)(OldPa), PGSIZE);
Va = PGROUNDDOWN(Va);
kfree((void*)OldPa);//不是用CowArrayDelNum(),这个都不map了,不free()一下都找不到了
mappages(p->pagetable, Va, PGSIZE, (uint64)NewPaP, Flag);
}
return;
}
3. ./kernel/vm.c文件 uvmcopy函数
// Given a parent process's page table, copy
// its memory into a child's page table.
// Copies both the page table and the
// physical memory.
// returns 0 on success, -1 on failure.
// frees any allocated pages on failure.
int
uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
{
pte_t *pte;
uint64 pa, i;
uint flags;
//char *mem;
for(i = 0; i < sz; i += PGSIZE)
{
if((pte = walk(old, i, 0)) == 0)
panic("uvmcopy: pte should exist");
if((*pte & PTE_V) == 0)
panic("uvmcopy: page not present");
/*
pa = PTE2PA(*pte);
flags = PTE_FLAGS(*pte);
if((mem = kalloc()) == 0)
goto err;
memmove(mem, (char*)pa, PGSIZE);
*/
pa = PTE2PA(*pte);
(*pte) |= PTE_COW;
(*pte) &= ~(PTE_W);
flags = PTE_FLAGS(*pte);
CowArrayAddNum(pa);
if(mappages(new, i, PGSIZE, (uint64)pa, flags) != 0){
kfree((void*)pa);
goto err;
}
}
return 0;
err:
uvmunmap(new, 0, i / PGSIZE, 1);
return -1;
}
4. ./kernel/vm.c文件 copyout函数
/ Copy from kernel to user.
// Copy len bytes from src to virtual address dstva in a given page table.
// Return 0 on success, -1 on error.
int
copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
{
uint64 n, va0, pa0;
pte_t* PteP;
uint64 PteFlag;
uint64* Pa0P;
uint64 OldPa;
while(len > 0)
{
va0 = PGROUNDDOWN(dstva);
if (va0 >= MAXVA)
{
return -1;
}
PteP = walk(pagetable, va0, 0);
OldPa = walkaddr(pagetable, va0);
pa0 = walkaddr(pagetable, va0);
if (PteP == 0)//walk失败
{
return -1;
}
if ((((*PteP) & PTE_COW) != 0) )
{
Pa0P = kalloc();
if (Pa0P == 0)
{
return -1;
}
PteFlag = PTE_FLAGS(*PteP);
PteFlag &= ~(PTE_COW);
PteFlag |= PTE_W;
*PteP = 0;
mappages(pagetable, va0, PGSIZE, (uint64)Pa0P, PteFlag);
memmove(Pa0P, (const void*)OldPa, PGSIZE);
kfree((void*)pa0);//不是用CowArrayDelNum(),这个都不map了,不free()一下都找不到了
pa0 = (uint64)Pa0P;
}
if(pa0 == 0)
return -1;
n = PGSIZE - (dstva - va0);
if(n > len)
n = len;
memmove((void *)(pa0 + (dstva - va0)), src, n);
len -= n;
src += n;
dstva = va0 + PGSIZE;
}
return 0;
}
5. ./kernel/riscv.h文件
#define PGSIZE 4096 // bytes per page
#define PGSHIFT 12 // bits of offset within a page
#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1))
#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1))
#define PTE_V (1L << 0) // valid
#define PTE_R (1L << 1)
#define PTE_W (1L << 2)
#define PTE_X (1L << 3)
#define PTE_U (1L << 4) // 1 -> user can access
#define PTE_COW (1L << 8)
// shift a physical address to the right place for a PTE.
#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10)
#define PTE2PA(pte) (((pte) >> 10) << 12)
#define PTE_FLAGS(pte) ((pte) & 0x3FF)
// extract the three 9-bit page table indices from a virtual address.
#define PXMASK 0x1FF // 9 bits
#define PXSHIFT(level) (PGSHIFT+(9*(level)))
#define PX(level, va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK)
// one beyond the highest possible virtual address.
// MAXVA is actually one bit less than the max allowed by
// Sv39, to avoid having to sign-extend virtual addresses
// that have the high bit set.
#define MAXVA (1L << (9 + 9 + 9 + 12 - 1))
typedef uint64 pte_t;
typedef uint64 *pagetable_t; // 512 PTEs
5 记得在defs.h文件添加使用的函数
调试过程
panic mampage remap
如下面两张图


根据错误的提示信息,应该是在调用mappage的时候产生的错误,于是用vscode find all refrence,分别在所有可能调用的mampage函数的前后都打上log,最后是在这个上面第一张图的地方产生错误。

remap的意思是我在那个3级页表项的地方有valid标志位,注意walk只是返回3级页表项的值的指针,不会对3级页表项做啥改变。而且我前面不是*PTE = 0了吗?怎么还会valid呢?
观察发现mappage函数,如果va地址不是page对齐,不止会检查这个page是否有valid,还是检查下一个虚拟页对应的人PTE是否有valid标志位,所以mampage的va应该要page对齐,最后用PGROUNDDOWN解决。
改变如图所示

结果如图所示

引用次数想错导致的错误
还是打log和应用相关知识,这个错误应该可以用gdb解决。
刚开始的时候,并没有在任何地方free(),而是在copuout,trap,进程增加减少一个page的地方加1减1。**当时只是觉得这个不通用,要加一个就要在对应地方CowArrayDelNum,CowArrayAddNum,很容易漏掉,但后面认真想。
在trap中换一个新page,对旧的page 的物理page那个引用减掉1就可以了吗?注意,你无法知道这个phy page的引用次数还剩多少,如果只剩下1,你只是简单的-1,不free,由于你后面va是重映射另一个进程,你再也找不到那个page,所以调用kfree(),在kfree()里减1,然后判断
就是上面那个意思,挺烦的,想不出来要怎么办?后面简单看了一下别人的代码,发现别人在kfree()减,突然有了思路。
在kfree()里减1,但kinit()也调用了,所以初始化为1(或者初始化+1),在uvmcopy里加1,trap,copyout调用kfree(如果为0了,那么就释放,没有就设计代码,让这个page 不free),kfree减1
结果如图所示

panic walk
usertestss没通过,第一次遇见。
如图usertestss的结果。


是walk函数引起的,发现是va>=MAXVA,导致panic。
用vscode find all refrence,发现有些代码在调用walk前,会判断va是否>=MAXVA,不是的话,返回-1,交给程序处理。
panic相当于windows的蓝屏了,我不应让系统蓝屏,那么我应该让程序处理,而不是让系统蓝屏。最后解决方案是在trap里大于MAXVA,trap里没有返回,你发现是错误是p->killed = 1,exit(-1),参考它的写法;在copyout里返回-1(把错误交给上一级去处理)。
copyout usertests不通过
如图


引起的是kernel trap,scause等于13,load page fault,可以看做读错误。kerneltrap,代表是kernel space区的代码引起的,查看8000d9c的汇编代码,把s2寄存器的值偏移2的地址加载到a5中,这个引起的错误。这个应该是获取pte的值,但pte这个地方没有读权限。为啥会没有读权限,pte是walk得到的,walk返回的正常都是有读权限的(.text段和.data段都有可读权限,page table表应该也有读权限设置),除非返回0,于是要做walk的错误处理。解决。

浙公网安备 33010602011771号