最近在看左神新書 《Go 語言設計與實現》的垃圾收集器時產生一個疑惑,花了點時間搞清楚了記錄一下。
Go 語言垃圾回收的實現使用了標記清除算法,將對象的狀態抽象成黑色(活躍對象)、灰色(活躍對象中間狀態)、白色(潛在垃圾對象也是所有對象的默認狀態)三種,注意沒有具體的字段標記顏色。
整個標記過程就是把白色對象標黑的過程:
1.首先將 ROOT 根對象(包括全局變量、goroutine 棧上的對象等)放入到灰色集合
2.選一個灰色對象,標成黑色,將所有可達的子對象放入到灰色集合
3.重復2的步驟,直到灰色集合中為空
下圖是書上的插圖,看上去是一個典型的深度優先搜索的算法。
下圖是劉丹冰寫的《Golang 修養之路》的插圖,看上去是一個典型的廣度優先搜索的算法。
我疑惑的點在于這個標記過程是深度優先算法還是廣度優先算法,因為很多文章博客對此都沒有很清楚的說明,作為學習者這種細節其實也不影響對整個 GC 流程的理解,但是這種細節我非常喜歡扣:)
對著書和源碼摸索著大致找到了一個結果是深度優先。下面看下大致的過程,源碼基于1.15.2版本:
gcStart 是 Go 語言三種條件觸發 GC 的共同入口
func gcStart(trigger gcTrigger) {
......
// 啟動后臺標記任務
gcBgMarkStartWorkers()
......
}
啟動后臺標記任務
func gcBgMarkStartWorkers() {
// Background marking is performed by per-P G's. Ensure that
// each P has a background GC G.
for _, p := range allp {
if p.gcBgMarkWorker == 0 {
// 為每個處理器創建用于執行后臺標記任務的 Goroutine
go gcBgMarkWorker(p)
......
}
}
}
為每個處理器創建用于執行后臺標記任務的 Goroutine
func gcBgMarkWorker(_p_ *p) {
......
for {
// Go to sleep until woken by gcController.findRunnable.
// We can't releasem yet since even the call to gopark
// may be preempted.
// 讓當前 G 進入休眠
gopark(func(g *g, parkp unsafe.Pointer) bool {
park := (*parkInfo)(parkp)
// The worker G is no longer running, so it's
// now safe to allow preemption.
releasem(park.m.ptr())
// If the worker isn't attached to its P,
// attach now. During initialization and after
// a phase change, the worker may have been
// running on a different P. As soon as we
// attach, the owner P may schedule the
// worker, so this must be done after the G is
// stopped.
if park.attach != 0 {
p := park.attach.ptr()
park.attach.set(nil)
// cas the worker because we may be
// racing with a new worker starting
// on this P.
// 把當前的G設到P的gcBgMarkWorker成員
if !p.gcBgMarkWorker.cas(0, guintptr(unsafe.Pointer(g))) {
// The P got a new worker.
// Exit this worker.
return false
}
}
return true
}, unsafe.Pointer(park), waitReasonGCWorkerIdle, traceEvGoBlock, 0)
......
systemstack(func() {
// Mark our goroutine preemptible so its stack
// can be scanned. This lets two mark workers
// scan each other (otherwise, they would
// deadlock). We must not modify anything on
// the G stack. However, stack shrinking is
// disabled for mark workers, so it is safe to
// read from the G stack.
// 設置G的狀態為等待中,這樣它的棧可以被掃描
casgstatus(gp, _Grunning, _Gwaiting)
switch _p_.gcMarkWorkerMode {
default:
throw("gcBgMarkWorker: unexpected gcMarkWorkerMode")
case gcMarkWorkerDedicatedMode:
// 這個模式下P應該專心執行標記
gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
if gp.preempt {
// We were preempted. This is
// a useful signal to kick
// everything out of the run
// queue so it can run
// somewhere else.
// 被搶占時把本地運行隊列中的所有G都踢到全局運行隊列
lock(&sched.lock)
for {
gp, _ := runqget(_p_)
if gp == nil {
break
}
globrunqput(gp)
}
unlock(&sched.lock)
}
// Go back to draining, this time
// without preemption.
// 繼續執行標記
gcDrain(&_p_.gcw, gcDrainFlushBgCredit)
case gcMarkWorkerFractionalMode:
// 執行標記
gcDrain(&_p_.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit)
case gcMarkWorkerIdleMode:
// 執行標記, 直到被搶占或者達到一定的量
gcDrain(&_p_.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit)
}
// 恢復G的狀態到運行中
casgstatus(gp, _Gwaiting, _Grunning)
})
......
}
}
上面休眠的 G 會在調度循環中檢查并喚醒執行
func schedule() {
......
// 正在 GC,去找 GC 的 g
if gp == nil && gcBlackenEnabled != 0 {
gp = gcController.findRunnableGCWorker(_g_.m.p.ptr())
tryWakeP = tryWakeP || gp != nil
}
......
// 開始執行
execute(gp, inheritTime)
}
執行標記
func gcDrain(gcw *gcWork, flags gcDrainFlags) {
.......
// Drain heap marking jobs.
// Stop if we're preemptible or if someone wants to STW.
for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) {
// Try to keep work available on the global queue. We used to
// check if there were waiting workers, but it's better to
// just keep work available than to make workers wait. In the
// worst case, we'll do O(log(_WorkbufSize)) unnecessary
// balances.
// 將本地一部分工作放回全局隊列中
if work.full == 0 {
gcw.balance()
}
// 獲取待掃描的對象,一個 fast path,沒有則走 slow path
b := gcw.tryGetFast()
if b == 0 {
b = gcw.tryGet()
if b == 0 {
// Flush the write barrier
// buffer; this may create
// more work.
wbBufFlush(nil, 0)
b = gcw.tryGet()
}
}
if b == 0 {
// Unable to get work.
break
}
// 掃描獲取到的對象
scanobject(b, gcw)
......
}
gcw 是每個 P 獨有的所以不用擔心并發的問題 和 GMP、mcache 一樣設計,減少鎖競爭
func (w *gcWork) tryGetFast() uintptr {
wbuf := w.wbuf1
if wbuf == nil {
return 0
}
if wbuf.nobj == 0 {
return 0
}
// 從 尾部 取出一個對象,對象數減一,重點是尾部
wbuf.nobj--
return wbuf.obj[wbuf.nobj]
}
// slow path
func (w *gcWork) tryGet() uintptr {
wbuf := w.wbuf1
if wbuf == nil {
w.init()
wbuf = w.wbuf1
// wbuf is empty at this point.
}
// 第一個 buf 為空
if wbuf.nobj == 0 {
// 交換第一和第二的 buf
w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1
wbuf = w.wbuf1
// 都為空
if wbuf.nobj == 0 {
owbuf := wbuf
// 嘗試在全局列表中獲取一個不為空的 buf
wbuf = trygetfull()
// 全局也沒有
if wbuf == nil {
return 0
}
// 把之前的空 buf 放到全局列表中
putempty(owbuf)
w.wbuf1 = wbuf
}
}
// 返回 buf 里最后一個對象
wbuf.nobj--
return wbuf.obj[wbuf.nobj]
}
嘗試在全局列表中獲取一個不為空的 buf
// trygetfull tries to get a full or partially empty workbuffer.
// If one is not immediately available return nil
//go:nowritebarrier
func trygetfull() *workbuf {
b := (*workbuf)(work.full.pop())
if b != nil {
b.checknonempty()
return b
}
return b
}
這是官方實現的無鎖隊列:)漲見識了,for 循環加原子操作實現棧的 pop
// lfstack is the head of a lock-free stack.
func (head *lfstack) pop() unsafe.Pointer {
for {
old := atomic.Load64((*uint64)(head))
if old == 0 {
return nil
}
node := lfstackUnpack(old)
next := atomic.Load64(&node.next)
if atomic.Cas64((*uint64)(head), old, next) {
return unsafe.Pointer(node)
}
}
}
到這里從灰色集合中獲取待掃描的對象邏輯說完了。找到對象了接著就是 scanobject(b, gcw) 了,里面有兩段邏輯要注意
func scanobject(b uintptr, gcw *gcWork) {
// Find the bits for b and the size of the object at b.
//
// b is either the beginning of an object, in which case this
// is the size of the object to scan, or it points to an
// oblet, in which case we compute the size to scan below.
// 獲取 b 的 heapBits 對象
hbits := heapBitsForAddr(b)
// 獲取 span
s := spanOfUnchecked(b)
// span 對應的對象大小
n := s.elemsize
if n == 0 {
throw("scanobject n == 0")
}
// 大于 128KB 的大對象 為了更高的性能 打散成小對象,加入到灰色集合中待掃描
if n > maxObletBytes {
......
// Enqueue the other oblets to scan later.
// Some oblets may be in b's scalar tail, but
// these will be marked as "no more pointers",
// so we'll drop out immediately when we go to
// scan those.
for oblet := b + maxObletBytes; oblet < s.base()+s.elemsize; oblet += maxObletBytes {
if !gcw.putFast(oblet) {
gcw.put(oblet)
}
}
}
// Compute the size of the oblet. Since this object
// must be a large object, s.base() is the beginning
// of the object.
n = s.base() + s.elemsize - b
if n > maxObletBytes {
n = maxObletBytes
}
}
// 一個指針一個指針的掃描
var i uintptr
for i = 0; i < n; i += sys.PtrSize {
// Find bits for this word.
if i != 0 {
// Avoid needless hbits.next() on last iteration.
hbits = hbits.next()
}
// Load bits once. See CL 22712 and issue 16973 for discussion.
bits := hbits.bits()
// During checkmarking, 1-word objects store the checkmark
// in the type bit for the one word. The only one-word objects
// are pointers, or else they'd be merged with other non-pointer
// data into larger allocations.
if i != 1*sys.PtrSize && bits&bitScan == 0 {
break // no more pointers in this object 通過位運算得出已經沒有更多的指針了
}
if bits&bitPointer == 0 {
continue // not a pointer 不是指針
}
// Work here is duplicated in scanblock and above.
// If you make changes here, make changes there too.
// 根據偏移算出對象的指針
obj := *(*uintptr)(unsafe.Pointer(b + i))
// At this point we have extracted the next potential pointer. 找到下一個指針了
// Quickly filter out nil and pointers back to the current object.
if obj != 0 && obj-b >= n {
// Test if obj points into the Go heap and, if so,
// mark the object.
//
// Note that it's possible for findObject to
// fail if obj points to a just-allocated heap
// object because of a race with growing the
// heap. In this case, we know the object was
// just allocated and hence will be marked by
// allocation itself.
// 請注意,如果 obj 指向剛剛分配的堆對象,則 findObject 可能會因為堆增長的競爭而失敗。
// 在這種情況下,我們知道對象剛剛被分配,因此將由分配本身標記。
// 標記期間分配的對象直接標位黑色(混合寫屏障)
// 根據索引位置找到對象進行標色
if obj, span, objIndex := findObject(obj, b, i); obj != 0 {
greyobject(obj, b, i, span, gcw, objIndex)
}
}
}
......
}
根據索引位置找到對象進行標色
func greyobject(obj, base, off uintptr, span *mspan, gcw *gcWork, objIndex uintptr) {
// obj should be start of allocation, and so must be at least pointer-aligned.
if obj&(sys.PtrSize-1) != 0 {
throw("greyobject: obj not pointer-aligned")
}
mbits := span.markBitsForIndex(objIndex)
// 檢查是否所有可到達的對象都被正確標記的機制, 僅出錯使用
if useCheckmark {
if !mbits.isMarked() {
printlock()
print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
// Dump the source (base) object
gcDumpObject("base", base, off)
// Dump the object
gcDumpObject("obj", obj, ^uintptr(0))
getg().m.traceback = 2
throw("checkmark found unmarked object")
}
hbits := heapBitsForAddr(obj)
if hbits.isCheckmarked(span.elemsize) {
return
}
hbits.setCheckmarked(span.elemsize)
if !hbits.isCheckmarked(span.elemsize) {
throw("setCheckmarked and isCheckmarked disagree")
}
} else {
if debug.gccheckmark > 0 && span.isFree(objIndex) {
print("runtime: marking free object ", hex(obj), " found at *(", hex(base), "+", hex(off), ")\n")
gcDumpObject("base", base, off)
gcDumpObject("obj", obj, ^uintptr(0))
getg().m.traceback = 2
throw("marking free object")
}
// If marked we have nothing to do.
if mbits.isMarked() {
return
}
// 設置標記 黑色
mbits.setMarked()
// Mark span. 標記 span
arena, pageIdx, pageMask := pageIndexOf(span.base())
if arena.pageMarks[pageIdx]&pageMask == 0 {
atomic.Or8(&arena.pageMarks[pageIdx], pageMask)
}
// If this is a noscan object, fast-track it to black
// instead of greying it.
if span.spanclass.noscan() {
gcw.bytesMarked += uint64(span.elemsize)
return
}
}
// Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
// seems like a nice optimization that can be added back in.
// There needs to be time between the PREFETCH and the use.
// Previously we put the obj in an 8 element buffer that is drained at a rate
// to give the PREFETCH time to do its work.
// Use of PREFETCHNTA might be more appropriate than PREFETCH
// 嘗試將對象存入 gcwork 的緩存中,或全局隊列中,用作后面處理
if !gcw.putFast(obj) {
gcw.put(obj)
}
}
這里有一點要特別說明的,我思考了好久才想明白(菜是真菜),greyobject() 方法名很迷惑,標灰對象?其實 mspan 中使用 gcmarkBits 位圖代表是否被垃圾回收掃描的狀態,只有黑色和白色,mbits.setMarked() 設置的就是 gcmarkBits 對應的 index 位為 1。灰色是抽象出來的中間狀態,沒有專門的標灰的邏輯,放入到 gcw 中就是標灰。greyobject() 做的事情就是把自身 位置 標成黑色,代表它存活。最后把當前 位置 保存的 對象 放入到灰色集合,是為了掃描這個對象后續的引用。這里 位置 和 對象 的關系有點繞,需要細品。
嘗試存入 gcwork 的緩存中,或全局隊列中
func (w *gcWork) putFast(obj uintptr) bool {
w.checkPut(obj, nil)
wbuf := w.wbuf1
if wbuf == nil {
return false
} else if wbuf.nobj == len(wbuf.obj) {
return false
}
// 在尾部添加 注意
wbuf.obj[wbuf.nobj] = obj
wbuf.nobj++
return true
}
// slow path
func (w *gcWork) put(obj uintptr) {
w.checkPut(obj, nil)
flushed := false
wbuf := w.wbuf1
// Record that this may acquire the wbufSpans or heap lock to
// allocate a workbuf.
lockWithRankMayAcquire(&work.wbufSpans.lock, lockRankWbufSpans)
lockWithRankMayAcquire(&mheap_.lock, lockRankMheap)
if wbuf == nil {
w.init()
wbuf = w.wbuf1
// wbuf is empty at this point.
} else if wbuf.nobj == len(wbuf.obj) {
w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1
wbuf = w.wbuf1
if wbuf.nobj == len(wbuf.obj) {
putfull(wbuf)
w.flushedWork = true
wbuf = getempty()
w.wbuf1 = wbuf
flushed = true
}
}
// 在尾部添加 注意
wbuf.obj[wbuf.nobj] = obj
wbuf.nobj++
......
}
func putfull(b *workbuf) {
b.checknonempty()
work.full.push(&b.node)
}
無鎖隊列,for 循環加原子操作實現棧的 push
func (head *lfstack) push(node *lfnode) {
node.pushcnt++
new := lfstackPack(node, node.pushcnt)
if node1 := lfstackUnpack(new); node1 != node {
print("runtime: lfstack.push invalid packing: node=", node, " cnt=", hex(node.pushcnt), " packed=", hex(new), " -> node=", node1, "\n")
throw("lfstack.push")
}
for {
old := atomic.Load64((*uint64)(head))
node.next = old
if atomic.Cas64((*uint64)(head), old, new) {
break
}
}
}
到這里把灰色對象標黑就完成了,又放回灰色集合接著掃下一個指針。