go语言的GC

mark&sweep, 2分钟保证至少一次GC过程,如果分配的总内存超过上次分配的总内存一定比例(默认100%)后进行一次GC
进行mark的过程中,会停止一切G的运行,mark的过程是多任务并发的
sweep的过程是分散的

mark过程

整个程序内存块包括 .data, .bss, 每个G的stack, SpecialFinalizer
每段内存都有其相应的bitmap,用来表示每个word(8BYTE)的标志位,每word需要4bit的标志位
mark的过程就是递归遍历内存块bitmap的过程
word标志位有如下3种类型:

  1. 标量

  2. 指针

  3. 连续两个word表示一个iface或eface

    // scanblock scans a block of n bytes starting at pointer b for references
    // to other objects, scanning any it finds recursively until there are no
    // unscanned objects left. Instead of using an explicit recursion, it keeps
    // a work list in the Workbuf* structures and loops in the main function
    // body. Keeping an explicit work list is easier on the stack allocator and
    // more efficient.
    static void
    scanblock(byte *b, uintptr n, byte *ptrmask)
    {
    byte *obj, *obj0, *p, *arena_start, *arena_used, **wp, *scanbuf[8], *ptrbitp, *bitp;
    uintptr i, j, nobj, size, idx, x, off, scanbufpos, bits, xbits, shift;
    Workbuf *wbuf;
    Iface *iface;
    Eface *eface;
    Type *typ;
    MSpan *s;
    pageID k;
    bool keepworking;

    // Cache memory arena parameters in local vars.
    arena_start = runtime·mheap.arena_start;
    arena_used = runtime·mheap.arena_used;

    wbuf = getempty(nil);
    nobj = wbuf->nobj;
    wp = &wbuf->obj[nobj];
    keepworking = b == nil;
    scanbufpos = 0;
    for(i = 0; i < nelem(scanbuf); i++)
    scanbuf[i] = nil;

    ptrbitp = nil;

    // ptrmask can have 2 possible values:
    // 1. nil - obtain pointer mask from GC bitmap.
    // 2. pointer to a compact mask (for stacks and data).
    if(b != nil)
    goto scanobj;
    for(;😉 {
    if(nobj == 0) {
    // Out of work in workbuf.
    // First, see is there is any work in scanbuf.
    for(i = 0; i < nelem(scanbuf); i++) {
    b = scanbuf[scanbufpos];
    scanbuf[scanbufpos++] = nil;
    scanbufpos %= nelem(scanbuf);
    if(b != nil) {
    n = arena_used - b; // scan until bitBoundary or BitsDead
    ptrmask = nil; // use GC bitmap for pointer info
    goto scanobj;
    }
    }
    if(!keepworking) {
    putempty(wbuf);
    return;
    }
    // Refill workbuf from global queue.
    wbuf = getfull(wbuf);
    if(wbuf == nil)
    return;
    nobj = wbuf->nobj;
    wp = &wbuf->obj[nobj];
    }

    // If another proc wants a pointer, give it some.
    if(runtime·work.nwait > 0 && nobj > 4 && runtime·work.full == 0) {
    wbuf->nobj = nobj;
    wbuf = handoff(wbuf);
    nobj = wbuf->nobj;
    wp = &wbuf->obj[nobj];
    }

    wp--;
    nobj--;
    b = *wp;
    n = arena_used - b; // scan until next bitBoundary or BitsDead
    ptrmask = nil; // use GC bitmap for pointer info

    scanobj:
    if(DebugPtrs)
    runtime·printf("scanblock %p +%p %p\n", b, n, ptrmask);
    // Find bits of the beginning of the object.
    if(ptrmask == nil) {
    off = (uintptr)b - (uintptr)arena_start;
    ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
    }
    for(i = 0; i < n; i += PtrSize) {
    obj = nil;
    // Find bits for this word.
    ......

    if(bits <= BitsScalar) // BitsScalar || BitsDead
    continue;
    if(bits == BitsPointer) {
    obj = *(byte**)(b+i);
    obj0 = obj;
    goto markobj;
    }

    // With those three out of the way, must be multi-word.
    if(Debug && bits != BitsMultiWord)
    runtime·throw("unexpected garbage collection bits");
    // Find the next pair of bits.
    if(ptrmask == nil) {
    bits = ptrbitp;
    j = ((uintptr)b+i+PtrSize)/PtrSize & 1;
    ptrbitp -= j;
    bits >>= gcBits
    j;
    bits = (bits>>2)&BitsMask;
    } else
    bits = (ptrmask[((i+PtrSize)/PtrSize)/4]>>((((i+PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask;

    if(Debug && bits != BitsIface && bits != BitsEface)
    runtime·throw("unexpected garbage collection bits");

    if(bits == BitsIface) {
    iface = (Iface)(b+i);
    if(iface->tab != nil) {
    typ = iface->tab->type;
    if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
    obj = iface->data;
    }
    } else {
    eface = (Eface
    )(b+i);
    typ = eface->type;
    if(typ != nil) {
    if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
    obj = eface->data;
    }
    }

    i += PtrSize;

    obj0 = obj;
    markobj:
    // At this point we have extracted the next potential pointer.
    // Check if it points into heap.
    if(obj == nil)
    continue;
    if(obj < arena_start || obj >= arena_used) {
    if((uintptr)obj < PhysPageSize && runtime·invalidptr) {
    s = nil;
    goto badobj;
    }
    continue;
    }
    // Mark the object.
    obj = (byte)((uintptr)obj & ~(PtrSize-1));
    off = (uintptr
    )obj - (uintptr*)arena_start;
    bitp = arena_start - off/wordsPerBitmapByte - 1;
    shift = (off % wordsPerBitmapByte) * gcBits;
    xbits = *bitp;
    bits = (xbits >> shift) & bitMask;
    if((bits&bitBoundary) == 0) {
    // Not a beginning of a block, consult span table to find the block beginning.
    ......

    obj = p;
    goto markobj;
    }
    if(DebugPtrs)
    runtime·printf("scan *%p = %p => base %p\n", b+i, obj0, obj);

    if(nbadblock > 0 && (uintptr)obj == badblock[nbadblock-1]) {
    // Running garbage collection again because
    // we want to find the path from a root to a bad pointer.
    // Found possible next step; extend or finish path.
    for(j=0; j<nbadblock; j++)
    if(badblock[j] == (uintptr)b)
    goto AlreadyBad;
    runtime·printf("runtime: found *(%p+%p) = %p+%p\n", b, i, obj0, (uintptr)(obj-obj0));
    if(ptrmask != nil)
    runtime·throw("bad pointer");
    if(nbadblock >= nelem(badblock))
    runtime·throw("badblock trace too long");
    badblock[nbadblock++] = (uintptr)b;
    AlreadyBad:;
    }

    // Now we have bits, bitp, and shift correct for
    // obj pointing at the base of the object.
    // Only care about not marked objects.
    if((bits&bitMarked) != 0)
    continue;
    // If obj size is greater than 8, then each byte of GC bitmap
    // contains info for at most one object. In such case we use
    // non-atomic byte store to mark the object. This can lead
    // to double enqueue of the object for scanning, but scanning
    // is an idempotent operation, so it is OK. This cannot lead
    // to bitmap corruption because the single marked bit is the
    // only thing that can change in the byte.
    // For 8-byte objects we use non-atomic store, if the other
    // quadruple is already marked. Otherwise we resort to CAS
    // loop for marking.
    if((xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) ||
    runtime·work.nproc == 1)
    *bitp = xbits | (bitMarked<<shift);
    else
    runtime·atomicor8(bitp, bitMarked<<shift);

    if(((xbits>>(shift+2))&BitsMask) == BitsDead)
    continue; // noscan object

    // Queue the obj for scanning.
    PREFETCH(obj);
    p = scanbuf[scanbufpos];
    scanbuf[scanbufpos++] = obj;
    scanbufpos %= nelem(scanbuf);
    if(p == nil)
    continue;

    // If workbuf is full, obtain an empty one.
    if(nobj >= nelem(wbuf->obj)) {
    wbuf->nobj = nobj;
    wbuf = getempty(wbuf);
    nobj = wbuf->nobj;
    wp = &wbuf->obj[nobj];
    }
    *wp = p;
    wp++;
    nobj++;
    }
    ............
    }
    }

    static void
    markroot(ParFor *desc, uint32 i)
    {
    FinBlock *fb;
    MSpan *s;
    uint32 spanidx, sg;
    G *gp;
    void *p;
    uint32 status;
    bool restart;

    USED(&desc);
    // Note: if you add a case here, please also update heapdump.c:dumproots.
    switch(i) {
    case RootData:
    scanblock(runtime·data, runtime·edata - runtime·data, runtime·gcdatamask.bytedata);
    break;

    case RootBss:
    scanblock(runtime·bss, runtime·ebss - runtime·bss, runtime·gcbssmask.bytedata);
    break;

    case RootFinalizers:
    for(fb=runtime·allfin; fb; fb=fb->alllink)
    scanblock((byte)fb->fin, fb->cntsizeof(fb->fin[0]), finptrmask);
    break;

    case RootSpans:
    // mark MSpan.specials
    ......
    break;

    case RootFlushCaches:
    flushallmcaches();
    break;

    default:
    // the rest is scanning goroutine stacks
    if(i - RootCount >= runtime·allglen)
    runtime·throw("markroot: bad index");
    gp = runtime·allg[i - RootCount];
    // remember when we've first observed the G blocked
    // needed only to output in traceback
    status = runtime·readgstatus(gp);
    if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0)
    gp->waitsince = runtime·work.tstart;
    // Shrink a stack if not much of it is being used.
    runtime·shrinkstack(gp);
    if(runtime·readgstatus(gp) == Gdead)
    gp->gcworkdone = true;
    else
    gp->gcworkdone = false;
    restart = runtime·stopg(gp);
    scanstack(gp);
    if(restart)
    runtime·restartg(gp);
    break;
    }
    }

sweep过程

sweep扫描span里面每一个对象是否marked,将未marked的对象放入span的freelist中
如果span中的所有对象都进入了freelist,那么会将span的内存释放到heap中。

// sweeps one span
// returns number of pages returned to heap, or -1 if there is nothing to sweep
uintptr
runtime·sweepone(void)
{
MSpan *s;
uint32 idx, sg;
uintptr npages;

// increment locks to ensure that the goroutine is not preempted
// in the middle of sweep thus leaving the span in an inconsistent state for next GC
g->m->locks++;
sg = runtime·mheap.sweepgen;
for(;😉 {
idx = runtime·xadd(&runtime·sweep.spanidx, 1) - 1;
if(idx >= runtime·work.nspan) {
runtime·mheap.sweepdone = true;
g->m->locks--;
return -1;
}
s = runtime·work.spans[idx];
if(s->state != MSpanInUse) {
s->sweepgen = sg;
continue;
}
if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
continue;
npages = s->npages;
if(!runtime·MSpan_Sweep(s, false))
npages = 0;
g->m->locks--;
return npages;
}
}

// Sweep frees or collects finalizers for blocks not marked in the mark phase.
// It clears the mark bits in preparation for the next GC round.
// Returns true if the span was returned to heap.
// If preserve=true, don't return it to heap nor relink in MCentral lists;
// caller takes care of it.
bool
runtime·MSpan_Sweep(MSpan *s, bool preserve)
{
int32 cl, n, npages, nfree;
uintptr size, off, step;
uint32 sweepgen;
byte *p, *bitp, shift, xbits, bits;
MCache *c;
byte *arena_start;
MLink head, *end, *link;
Special *special, **specialp, *y;
bool res, sweepgenset;

// It's critical that we enter this function with preemption disabled,
// GC must not start while we are in the middle of this function.
if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
runtime·throw("MSpan_Sweep: m is not locked");
sweepgen = runtime·mheap.sweepgen;
if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
s->state, s->sweepgen, sweepgen);
runtime·throw("MSpan_Sweep: bad span state");
}
arena_start = runtime·mheap.arena_start;
cl = s->sizeclass;
size = s->elemsize;
if(cl == 0) {
n = 1;
} else {
// Chunk full of small blocks.
npages = runtime·class_to_allocnpages[cl];
n = (npages << PageShift) / size;
}
res = false;
nfree = 0;
end = &head;
c = g->m->mcache;
sweepgenset = false;

// Mark any free objects in this span so we don't collect them.
for(link = s->freelist; link != nil; link = link->next) {
off = (uintptr)link - (uintptr)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
*bitp |= bitMarked<<shift;
}

// Unlink & free special records for any objects we're about to free.
specialp = &s->specials;
special = specialp;
while(special != nil) {
// A finalizer can be set for an inner byte of an object, find object beginning.
p = (byte
)(s->start << PageShift) + special->offset/sizesize;
off = (uintptr
)p - (uintptr)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
bits = (
bitp>>shift) & bitMask;
if((bits&bitMarked) == 0) {
// Find the exact byte for which the special was setup
// (as opposed to object beginning).
p = (byte*)(s->start << PageShift) + special->offset;
// about to free object: splice out special record
y = special;
special = special->next;
*specialp = special;
if(!runtime·freespecial(y, p, size, false)) {
// stop freeing of object if it has a finalizer
*bitp |= bitMarked << shift;
}
} else {
// object is still live: keep special record
specialp = &special->next;
special = *specialp;
}
}

// Sweep through n objects of given size starting at p.
// This thread owns the span now, so it can manipulate
// the block bitmap without atomic operations.
p = (byte)(s->start << PageShift);
// Find bits for the beginning of the span.
off = (uintptr
)p - (uintptr)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = 0;
step = size/(PtrSize
wordsPerBitmapByte);
// Rewind to the previous quadruple as we move to the next
// in the beginning of the loop.
bitp += step;
if(step == 0) {
// 8-byte objects.
bitp++;
shift = gcBits;
}
for(; n > 0; n--, p += size) {
bitp -= step;
if(step == 0) {
if(shift != 0)
bitp--;
shift = gcBits - shift;
}

xbits = *bitp;
bits = (xbits>>shift) & bitMask;

// Allocated and marked object, reset bits to allocated.
if((bits&bitMarked) != 0) {
bitp &= ~(bitMarked<<shift);
continue;
}
// At this point we know that we are looking at garbage object
// that needs to be collected.
if(runtime·debug.allocfreetrace)
runtime·tracefree(p, size);
// Reset to allocated+noscan.
bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
if(cl == 0) {
// Free large span.
if(preserve)
runtime·throw("can't preserve large span");
runtime·unmarkspan(p, s->npages<<PageShift);
s->needzero = 1;
// important to set sweepgen before returning it to heap
runtime·atomicstore(&s->sweepgen, sweepgen);
sweepgenset = true;
// NOTE(rsc,dvyukov): The original implementation of efence
// in CL 22060046 used SysFree instead of SysFault, so that
// the operating system would eventually give the memory
// back to us again, so that an efence program could run
// longer without running out of memory. Unfortunately,
// calling SysFree here without any kind of adjustment of the
// heap data structures means that when the memory does
// come back to us, we have the wrong metadata for it, either in
// the MSpan structures or in the garbage collection bitmap.
// Using SysFault here means that the program will run out of
// memory fairly quickly in efence mode, but at least it won't
// have mysterious crashes due to confused memory reuse.
// It should be possible to switch back to SysFree if we also
// implement and then call some kind of MHeap_DeleteSpan.
if(runtime·debug.efence) {
s->limit = nil; // prevent mlookup from finding this span
runtime·SysFault(p, size);
} else
runtime·MHeap_Free(&runtime·mheap, s, 1);
c->local_nlargefree++;
c->local_largefree += size;
runtime·xadd64(&mstats.next_gc, -(uint64)(size * (runtime·gcpercent + 100)/100));
res = true;
} else {
// Free small object.
if(size > 2
sizeof(uintptr))
((uintptr
)p)[1] = (uintptr)0xdeaddeaddeaddeadll; // mark as "needs to be zeroed"
else if(size > sizeof(uintptr))
((uintptr*)p)[1] = 0;

end->next = (MLink)p;
end = (MLink
)p;
nfree++;
}
}

// We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
// because of the potential for a concurrent free/SetFinalizer.
// But we need to set it before we make the span available for allocation
// (return it to heap or mcentral), because allocation code assumes that a
// span is already swept if available for allocation.

if(!sweepgenset && nfree == 0) {
// The span must be in our exclusive ownership until we update sweepgen,
// check for potential races.
if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
s->state, s->sweepgen, sweepgen);
runtime·throw("MSpan_Sweep: bad span state after sweep");
}
runtime·atomicstore(&s->sweepgen, sweepgen);
}
if(nfree > 0) {
c->local_nsmallfree[cl] += nfree;
c->local_cachealloc -= nfree * size;
runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent + 100)/100));
res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end, preserve);
// MCentral_FreeSpan updates sweepgen
}
return res;
}

posted on 2015-05-17 14:28  richmonkey  阅读(671)  评论(0)    收藏  举报

导航