go语言的GC

mark&sweep, 2分钟保证至少一次GC过程，如果分配的总内存超过上次分配的总内存一定比例(默认100%)后进行一次GC
进行mark的过程中，会停止一切G的运行，mark的过程是多任务并发的
sweep的过程是分散的

mark过程

整个程序内存块包括 .data， .bss，每个G的stack， SpecialFinalizer
每段内存都有其相应的bitmap，用来表示每个word（8BYTE）的标志位，每word需要4bit的标志位
mark的过程就是递归遍历内存块bitmap的过程
word标志位有如下3种类型:

标量
指针
连续两个word表示一个iface或eface

// scanblock scans a block of n bytes starting at pointer b for references
// to other objects, scanning any it finds recursively until there are no
// unscanned objects left. Instead of using an explicit recursion, it keeps
// a work list in the Workbuf* structures and loops in the main function
// body. Keeping an explicit work list is easier on the stack allocator and
// more efficient.
static void
scanblock(byte *b, uintptr n, byte *ptrmask)
{
byte *obj, *obj0, *p, *arena_start, *arena_used, **wp, *scanbuf[8], *ptrbitp, *bitp;
uintptr i, j, nobj, size, idx, x, off, scanbufpos, bits, xbits, shift;
Workbuf *wbuf;
Iface *iface;
Eface *eface;
Type *typ;
MSpan *s;
pageID k;
bool keepworking;

// Cache memory arena parameters in local vars.
arena_start = runtime·mheap.arena_start;
arena_used = runtime·mheap.arena_used;

wbuf = getempty(nil);
nobj = wbuf->nobj;
wp = &wbuf->obj[nobj];
keepworking = b == nil;
scanbufpos = 0;
for(i = 0; i < nelem(scanbuf); i++)
scanbuf[i] = nil;

ptrbitp = nil;

// ptrmask can have 2 possible values:
// 1. nil - obtain pointer mask from GC bitmap.
// 2. pointer to a compact mask (for stacks and data).
if(b != nil)
goto scanobj;
for(;😉 {
if(nobj == 0) {
// Out of work in workbuf.
// First, see is there is any work in scanbuf.
for(i = 0; i < nelem(scanbuf); i++) {
b = scanbuf[scanbufpos];
scanbuf[scanbufpos++] = nil;
scanbufpos %= nelem(scanbuf);
if(b != nil) {
n = arena_used - b; // scan until bitBoundary or BitsDead
ptrmask = nil; // use GC bitmap for pointer info
goto scanobj;
}
}
if(!keepworking) {
putempty(wbuf);
return;
}
// Refill workbuf from global queue.
wbuf = getfull(wbuf);
if(wbuf == nil)
return;
nobj = wbuf->nobj;
wp = &wbuf->obj[nobj];
}

// If another proc wants a pointer, give it some.
if(runtime·work.nwait > 0 && nobj > 4 && runtime·work.full == 0) {
wbuf->nobj = nobj;
wbuf = handoff(wbuf);
nobj = wbuf->nobj;
wp = &wbuf->obj[nobj];
}

wp--;
nobj--;
b = *wp;
n = arena_used - b; // scan until next bitBoundary or BitsDead
ptrmask = nil; // use GC bitmap for pointer info

scanobj:
if(DebugPtrs)
runtime·printf("scanblock %p +%p %p\n", b, n, ptrmask);
// Find bits of the beginning of the object.
if(ptrmask == nil) {
off = (uintptr)b - (uintptr)arena_start;
ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
}
for(i = 0; i < n; i += PtrSize) {
obj = nil;
// Find bits for this word.
......

if(bits <= BitsScalar) // BitsScalar || BitsDead
continue;
if(bits == BitsPointer) {
obj = *(byte**)(b+i);
obj0 = obj;
goto markobj;
}

// With those three out of the way, must be multi-word.
if(Debug && bits != BitsMultiWord)
runtime·throw("unexpected garbage collection bits");
// Find the next pair of bits.
if(ptrmask == nil) {
bits = ptrbitp;
j = ((uintptr)b+i+PtrSize)/PtrSize & 1;
ptrbitp -= j;
bits >>= gcBitsj;
bits = (bits>>2)&BitsMask;
} else
bits = (ptrmask[((i+PtrSize)/PtrSize)/4]>>((((i+PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask;

if(Debug && bits != BitsIface && bits != BitsEface)
runtime·throw("unexpected garbage collection bits");

if(bits == BitsIface) {
iface = (Iface)(b+i);
if(iface->tab != nil) {
typ = iface->tab->type;
if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
obj = iface->data;
}
} else {
eface = (Eface)(b+i);
typ = eface->type;
if(typ != nil) {
if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
obj = eface->data;
}
}

i += PtrSize;

obj0 = obj;
markobj:
// At this point we have extracted the next potential pointer.
// Check if it points into heap.
if(obj == nil)
continue;
if(obj < arena_start || obj >= arena_used) {
if((uintptr)obj < PhysPageSize && runtime·invalidptr) {
s = nil;
goto badobj;
}
continue;
}
// Mark the object.
obj = (byte)((uintptr)obj & ~(PtrSize-1));
off = (uintptr)obj - (uintptr*)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
xbits = *bitp;
bits = (xbits >> shift) & bitMask;
if((bits&bitBoundary) == 0) {
// Not a beginning of a block, consult span table to find the block beginning.
......

obj = p;
goto markobj;
}
if(DebugPtrs)
runtime·printf("scan *%p = %p => base %p\n", b+i, obj0, obj);

if(nbadblock > 0 && (uintptr)obj == badblock[nbadblock-1]) {
// Running garbage collection again because
// we want to find the path from a root to a bad pointer.
// Found possible next step; extend or finish path.
for(j=0; j<nbadblock; j++)
if(badblock[j] == (uintptr)b)
goto AlreadyBad;
runtime·printf("runtime: found *(%p+%p) = %p+%p\n", b, i, obj0, (uintptr)(obj-obj0));
if(ptrmask != nil)
runtime·throw("bad pointer");
if(nbadblock >= nelem(badblock))
runtime·throw("badblock trace too long");
badblock[nbadblock++] = (uintptr)b;
AlreadyBad:;
}

// Now we have bits, bitp, and shift correct for
// obj pointing at the base of the object.
// Only care about not marked objects.
if((bits&bitMarked) != 0)
continue;
// If obj size is greater than 8, then each byte of GC bitmap
// contains info for at most one object. In such case we use
// non-atomic byte store to mark the object. This can lead
// to double enqueue of the object for scanning, but scanning
// is an idempotent operation, so it is OK. This cannot lead
// to bitmap corruption because the single marked bit is the
// only thing that can change in the byte.
// For 8-byte objects we use non-atomic store, if the other
// quadruple is already marked. Otherwise we resort to CAS
// loop for marking.
if((xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) ||
runtime·work.nproc == 1)
*bitp = xbits | (bitMarked<<shift);
else
runtime·atomicor8(bitp, bitMarked<<shift);

if(((xbits>>(shift+2))&BitsMask) == BitsDead)
continue; // noscan object

// Queue the obj for scanning.
PREFETCH(obj);
p = scanbuf[scanbufpos];
scanbuf[scanbufpos++] = obj;
scanbufpos %= nelem(scanbuf);
if(p == nil)
continue;

// If workbuf is full, obtain an empty one.
if(nobj >= nelem(wbuf->obj)) {
wbuf->nobj = nobj;
wbuf = getempty(wbuf);
nobj = wbuf->nobj;
wp = &wbuf->obj[nobj];
}
*wp = p;
wp++;
nobj++;
}
............
}
}

static void
markroot(ParFor *desc, uint32 i)
{
FinBlock *fb;
MSpan *s;
uint32 spanidx, sg;
G *gp;
void *p;
uint32 status;
bool restart;

USED(&desc);
// Note: if you add a case here, please also update heapdump.c:dumproots.
switch(i) {
case RootData:
scanblock(runtime·data, runtime·edata - runtime·data, runtime·gcdatamask.bytedata);
break;

case RootBss:
scanblock(runtime·bss, runtime·ebss - runtime·bss, runtime·gcbssmask.bytedata);
break;

case RootFinalizers:
for(fb=runtime·allfin; fb; fb=fb->alllink)
scanblock((byte)fb->fin, fb->cntsizeof(fb->fin[0]), finptrmask);
break;

case RootSpans:
// mark MSpan.specials
......
break;

case RootFlushCaches:
flushallmcaches();
break;

default:
// the rest is scanning goroutine stacks
if(i - RootCount >= runtime·allglen)
runtime·throw("markroot: bad index");
gp = runtime·allg[i - RootCount];
// remember when we've first observed the G blocked
// needed only to output in traceback
status = runtime·readgstatus(gp);
if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0)
gp->waitsince = runtime·work.tstart;
// Shrink a stack if not much of it is being used.
runtime·shrinkstack(gp);
if(runtime·readgstatus(gp) == Gdead)
gp->gcworkdone = true;
else
gp->gcworkdone = false;
restart = runtime·stopg(gp);
scanstack(gp);
if(restart)
runtime·restartg(gp);
break;
}
}

sweep过程

sweep扫描span里面每一个对象是否marked，将未marked的对象放入span的freelist中
如果span中的所有对象都进入了freelist，那么会将span的内存释放到heap中。

// sweeps one span
// returns number of pages returned to heap, or -1 if there is nothing to sweep
uintptr
runtime·sweepone(void)
{
MSpan *s;
uint32 idx, sg;
uintptr npages;

// increment locks to ensure that the goroutine is not preempted
// in the middle of sweep thus leaving the span in an inconsistent state for next GC
g->m->locks++;
sg = runtime·mheap.sweepgen;
for(;😉 {
idx = runtime·xadd(&runtime·sweep.spanidx, 1) - 1;
if(idx >= runtime·work.nspan) {
runtime·mheap.sweepdone = true;
g->m->locks--;
return -1;
}
s = runtime·work.spans[idx];
if(s->state != MSpanInUse) {
s->sweepgen = sg;
continue;
}
if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
continue;
npages = s->npages;
if(!runtime·MSpan_Sweep(s, false))
npages = 0;
g->m->locks--;
return npages;
}
}

// Sweep frees or collects finalizers for blocks not marked in the mark phase.
// It clears the mark bits in preparation for the next GC round.
// Returns true if the span was returned to heap.
// If preserve=true, don't return it to heap nor relink in MCentral lists;
// caller takes care of it.
bool
runtime·MSpan_Sweep(MSpan *s, bool preserve)
{
int32 cl, n, npages, nfree;
uintptr size, off, step;
uint32 sweepgen;
byte *p, *bitp, shift, xbits, bits;
MCache *c;
byte *arena_start;
MLink head, *end, *link;
Special *special, **specialp, *y;
bool res, sweepgenset;

// It's critical that we enter this function with preemption disabled,
// GC must not start while we are in the middle of this function.
if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
runtime·throw("MSpan_Sweep: m is not locked");
sweepgen = runtime·mheap.sweepgen;
if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
s->state, s->sweepgen, sweepgen);
runtime·throw("MSpan_Sweep: bad span state");
}
arena_start = runtime·mheap.arena_start;
cl = s->sizeclass;
size = s->elemsize;
if(cl == 0) {
n = 1;
} else {
// Chunk full of small blocks.
npages = runtime·class_to_allocnpages[cl];
n = (npages << PageShift) / size;
}
res = false;
nfree = 0;
end = &head;
c = g->m->mcache;
sweepgenset = false;

// Mark any free objects in this span so we don't collect them.
for(link = s->freelist; link != nil; link = link->next) {
off = (uintptr)link - (uintptr)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
*bitp |= bitMarked<<shift;
}

// Unlink & free special records for any objects we're about to free.
specialp = &s->specials;
special = specialp;
while(special != nil) {
// A finalizer can be set for an inner byte of an object, find object beginning.
p = (byte)(s->start << PageShift) + special->offset/sizesize;
off = (uintptr)p - (uintptr)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
bits = (bitp>>shift) & bitMask;
if((bits&bitMarked) == 0) {
// Find the exact byte for which the special was setup
// (as opposed to object beginning).
p = (byte*)(s->start << PageShift) + special->offset;
// about to free object: splice out special record
y = special;
special = special->next;
*specialp = special;
if(!runtime·freespecial(y, p, size, false)) {
// stop freeing of object if it has a finalizer
*bitp |= bitMarked << shift;
}
} else {
// object is still live: keep special record
specialp = &special->next;
special = *specialp;
}
}

// Sweep through n objects of given size starting at p.
// This thread owns the span now, so it can manipulate
// the block bitmap without atomic operations.
p = (byte)(s->start << PageShift);
// Find bits for the beginning of the span.
off = (uintptr)p - (uintptr)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = 0;
step = size/(PtrSizewordsPerBitmapByte);
// Rewind to the previous quadruple as we move to the next
// in the beginning of the loop.
bitp += step;
if(step == 0) {
// 8-byte objects.
bitp++;
shift = gcBits;
}
for(; n > 0; n--, p += size) {
bitp -= step;
if(step == 0) {
if(shift != 0)
bitp--;
shift = gcBits - shift;
}

xbits = *bitp;
bits = (xbits>>shift) & bitMask;

// Allocated and marked object, reset bits to allocated.
if((bits&bitMarked) != 0) {
bitp &= ~(bitMarked<<shift);
continue;
}
// At this point we know that we are looking at garbage object
// that needs to be collected.
if(runtime·debug.allocfreetrace)
runtime·tracefree(p, size);
// Reset to allocated+noscan.
bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
if(cl == 0) {
// Free large span.
if(preserve)
runtime·throw("can't preserve large span");
runtime·unmarkspan(p, s->npages<<PageShift);
s->needzero = 1;
// important to set sweepgen before returning it to heap
runtime·atomicstore(&s->sweepgen, sweepgen);
sweepgenset = true;
// NOTE(rsc,dvyukov): The original implementation of efence
// in CL 22060046 used SysFree instead of SysFault, so that
// the operating system would eventually give the memory
// back to us again, so that an efence program could run
// longer without running out of memory. Unfortunately,
// calling SysFree here without any kind of adjustment of the
// heap data structures means that when the memory does
// come back to us, we have the wrong metadata for it, either in
// the MSpan structures or in the garbage collection bitmap.
// Using SysFault here means that the program will run out of
// memory fairly quickly in efence mode, but at least it won't
// have mysterious crashes due to confused memory reuse.
// It should be possible to switch back to SysFree if we also
// implement and then call some kind of MHeap_DeleteSpan.
if(runtime·debug.efence) {
s->limit = nil; // prevent mlookup from finding this span
runtime·SysFault(p, size);
} else
runtime·MHeap_Free(&runtime·mheap, s, 1);
c->local_nlargefree++;
c->local_largefree += size;
runtime·xadd64(&mstats.next_gc, -(uint64)(size * (runtime·gcpercent + 100)/100));
res = true;
} else {
// Free small object.
if(size > 2sizeof(uintptr))
((uintptr)p)[1] = (uintptr)0xdeaddeaddeaddeadll; // mark as "needs to be zeroed"
else if(size > sizeof(uintptr))
((uintptr*)p)[1] = 0;

end->next = (MLink)p;
end = (MLink)p;
nfree++;
}
}

// We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
// because of the potential for a concurrent free/SetFinalizer.
// But we need to set it before we make the span available for allocation
// (return it to heap or mcentral), because allocation code assumes that a
// span is already swept if available for allocation.

if(!sweepgenset && nfree == 0) {
// The span must be in our exclusive ownership until we update sweepgen,
// check for potential races.
if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
s->state, s->sweepgen, sweepgen);
runtime·throw("MSpan_Sweep: bad span state after sweep");
}
runtime·atomicstore(&s->sweepgen, sweepgen);
}
if(nfree > 0) {
c->local_nsmallfree[cl] += nfree;
c->local_cachealloc -= nfree * size;
runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent + 100)/100));
res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end, preserve);
// MCentral_FreeSpan updates sweepgen
}
return res;
}

posted on 2015-05-17 14:28 richmonkey 阅读(671) 评论(0) 收藏举报

刷新页面返回顶部

go语言的GC

mark过程

sweep过程

导航

公告