运行时在创建新的 G 时会在 runtime.malg 中调用 runtime.stackalloc 申请新的栈内存,并在编译器插入的 runtime.morestack 中检查栈空间是否充足;
// runtime/runtime2.go
// Stack describes a Go execution stack.
// The bounds of the stack are exactly [lo, hi),
// with no implicit data structures on either side.
type stack struct {
lo uintptr
hi uintptr
}
// runtime/stack.go
//
// Stack frame layout
//
// (x86)
// +------------------+
// | args from caller |
// +------------------+ <- frame->argp
// | return address |
// +------------------+
// | caller's BP (*) | (*) if framepointer_enabled && varp > sp
// +------------------+ <- frame->varp
// | locals |
// +------------------+
// | args to callee |
// +------------------+ <- frame->sp
//
// (arm)
// +------------------+
// | args from caller |
// +------------------+ <- frame->argp
// | caller's retaddr |
// +------------------+
// | caller's FP (*) | (*) on ARM64, if framepointer_enabled && varp > sp
// +------------------+ <- frame->varp
// | locals |
// +------------------+
// | args to callee |
// +------------------+
// | return address |
// +------------------+ <- frame->sp
//
// varp > sp means that the function has a frame;
// varp == sp means frameless function.
初始化
// runtime/stack.go
// stackpool 表示全局栈缓存:分配小于 32KB 的栈空间
// stackLarge 表示大栈缓存:分配大于 32KB 的栈空间
// Global pool of spans that have free stacks.
// Stacks are assigned an order according to size.
//
// order = log_2(size/FixedStack)
//
// There is a free list for each order.
var stackpool [_NumStackOrders]struct {
item stackpoolItem
_ [(cpu.CacheLinePadSize - unsafe.Sizeof(stackpoolItem{})%cpu.CacheLinePadSize) % cpu.CacheLinePadSize]byte
}
type stackpoolItem struct {
_ sys.NotInHeap
mu mutex
span mSpanList
}
// Global pool of large stack spans.
var stackLarge struct {
lock mutex
free [heapAddrBits - pageShift]mSpanList // free lists by log_2(s.npages)
}
// runtime/stack.go
// 初始化 stackpool 和 stackLarge
// 两者都与`mSpanList`有关,也就是`mspan`相关,可以认为 Go 的栈内存都是分配在堆上的
func stackinit() {
if _StackCacheSize&_PageMask != 0 {
throw("cache size must be a multiple of page size")
}
for i := range stackpool {
stackpool[i].item.span.init()
lockInit(&stackpool[i].item.mu, lockRankStackpool)
}
for i := range stackLarge.free {
stackLarge.free[i].init()
lockInit(&stackLarge.lock, lockRankStackLarge)
}
}
// 使用全局进行分配的话,会造成锁竞争
// 栈与 P 关系密切,在每一个 P 增加栈内存缓存
type p struct {
// ...
mcache *mcache
// ...
}
type mcache struct {
// ...
stackcache [_NumStackOrders]stackfreelist
// ...
}
type stackfreelist struct {
list gclinkptr // linked list of free stacks
size uintptr // total size of stacks in list
}
分配
// runtime/stack.go
// 分配 n 字节的栈
// 1. 栈空间较小:使用全局缓存或P上缓存进行分配
// 2. 栈空间较大:使用大栈缓存 stackLarge 分配
// 3. 栈空间较大且 stackLarge 空间不足:从堆上申请内存
//
// stackalloc allocates an n byte stack.
//
// stackalloc must run on the system stack because it uses per-P
// resources and must not split the stack.
//
//go:systemstack
func stackalloc(n uint32) stack {
// Stackalloc must be called on scheduler stack, so that we
// never try to grow the stack during the code that stackalloc runs.
// Doing so would cause a deadlock (issue 1547).
thisg := getg()
// ...
// Small stacks are allocated with a fixed-size free-list allocator.
// If we need a stack of a bigger size, we fall back on allocating
// a dedicated span.
var v unsafe.Pointer
if n < fixedStack<<_NumStackOrders && n < _StackCacheSize { // n < 32768 /// 1. 小于32KB字节
order := uint8(0)
n2 := n
for n2 > fixedStack {
order++
n2 >>= 1
}
var x gclinkptr
if stackNoCache != 0 || thisg.m.p == 0 || thisg.m.preemptoff != "" { /// 禁用 P 的栈缓存,当前P为空,当前G不可被抢占
// thisg.m.p == 0 can happen in the guts of exitsyscall
// or procresize. Just get a stack from the global pool.
// Also don't touch stackcache during gc
// as it's flushed concurrently.
lock(&stackpool[order].item.mu)
x = stackpoolalloc(order) /// 从 stackpool 中分配
unlock(&stackpool[order].item.mu)
} else { /// 从 P 的缓存分配
c := thisg.m.p.ptr().mcache
x = c.stackcache[order].list
if x.ptr() == nil {
stackcacherefill(c, order) /// 从 stackpool 中拿一些到 P 缓存
x = c.stackcache[order].list
}
c.stackcache[order].list = x.ptr().next
c.stackcache[order].size -= uintptr(n)
}
v = unsafe.Pointer(x)
} else { /// 2. 大于等于32KB
var s *mspan
npage := uintptr(n) >> _PageShift
log2npage := stacklog2(npage)
// Try to get a stack from the large stack cache.
lock(&stackLarge.lock)
if !stackLarge.free[log2npage].isEmpty() { /// 如果大栈缓存不为空,进行分配
s = stackLarge.free[log2npage].first
stackLarge.free[log2npage].remove(s)
}
unlock(&stackLarge.lock)
lockWithRankMayAcquire(&mheap_.lock, lockRankMheap)
if s == nil { /// 分配失败了
// Allocate a new stack from the heap.
s = mheap_.allocManual(npage, spanAllocStack) /// 直接从堆上分配一个
if s == nil {
throw("out of memory")
}
osStackAlloc(s) /// 初始化分配的栈内存
s.elemsize = uintptr(n)
}
v = unsafe.Pointer(s.base())
}
// ...
return stack{uintptr(v), uintptr(v) + uintptr(n)}
}
扩容
// runtime/asm_amd64.s
// Called during function prolog when more stack is needed.
//
// The traceback routines see morestack on a g0 as being
// the top of a stack (for example, morestack calling newstack
// calling the scheduler calling newm calling gc), so we must
// record an argument size. For that purpose, it has no arguments.
TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
// ...
CALL runtime·newstack(SB)
// ...
// runtime/stack.go
// 需要更多栈空间时调用:
// 1. 分配更大的空间
// 2. 将当前栈内容迁移到新的栈空间中
//
// Called from runtime·morestack when more stack is needed.
// Allocate larger stack and relocate to new stack.
// Stack growth is multiplicative, for constant amortized cost.
//
// g->atomicstatus will be Grunning or Gscanrunning upon entry.
// If the scheduler is trying to stop this g, then it will set preemptStop.
//
// This must be nowritebarrierrec because it can be called as part of
// stack growth from other nowritebarrierrec functions, but the
// compiler doesn't check this.
//
//go:nowritebarrierrec
func newstack() {
thisg := getg()
// ...
gp := thisg.m.curg
// ...
morebuf := thisg.m.morebuf /// 清空 morebuf
thisg.m.morebuf.pc = 0
thisg.m.morebuf.lr = 0
thisg.m.morebuf.sp = 0
thisg.m.morebuf.g = 0
// NOTE: stackguard0 may change underfoot, if another thread
// is about to try to preempt gp. Read it just once and use that same
// value now and below.
stackguard0 := atomic.Loaduintptr(&gp.stackguard0) /// 加载当前栈的 stackguard0
// Be conservative about where we preempt.
// We are interested in preempting user Go code, not runtime code.
// If we're holding locks, mallocing, or preemption is disabled, don't
// preempt.
// This check is very early in newstack so that even the status change
// from Grunning to Gwaiting and back doesn't happen in this case.
// That status change by itself can be viewed as a small preemption,
// because the GC might change Gwaiting to Gscanwaiting, and then
// this goroutine has to wait for the GC to finish before continuing.
// If the GC is in some way dependent on this goroutine (for example,
// it needs a lock held by the goroutine), that small preemption turns
// into a real deadlock.
preempt := stackguard0 == stackPreempt /// 检查是否需要抢占
if preempt {
if !canPreemptM(thisg.m) { /// 不能抢占,则继续运行
// Let the goroutine keep running for now.
// gp->preempt is set, so it will be preempted next time.
gp.stackguard0 = gp.stack.lo + stackGuard
gogo(&gp.sched) // never return
}
}
// ...
// Allocate a bigger segment and move the stack.
oldsize := gp.stack.hi - gp.stack.lo
newsize := oldsize * 2 /// 分配新栈空间,旧栈大小的两倍
// Make sure we grow at least as much as needed to fit the new frame.
// (This is just an optimization - the caller of morestack will
// recheck the bounds on return.)
if f := findfunc(gp.sched.pc); f.valid() {
max := uintptr(funcMaxSPDelta(f))
needed := max + stackGuard
used := gp.stack.hi - gp.sched.sp
for newsize-used < needed {
newsize *= 2
}
}
if stackguard0 == stackForceMove {
// Forced stack movement used for debugging.
// Don't double the stack (or we may quickly run out
// if this is done repeatedly).
newsize = oldsize
}
// ...
// The goroutine must be executing in order to call newstack,
// so it must be Grunning (or Gscanrunning).
casgstatus(gp, _Grunning, _Gcopystack)
// The concurrent GC will not scan the stack while we are doing the copy since
// the gp is in a Gcopystack status.
copystack(gp, newsize) /// 迁移旧栈内容到新栈道
if stackDebug >= 1 {
print("stack grow done\n")
}
casgstatus(gp, _Gcopystack, _Grunning)
gogo(&gp.sched) /// 继续运行
}
// Copies gp's stack to a new stack of a different size.
// Caller must have changed gp status to Gcopystack.
func copystack(gp *g, newsize uintptr) {
old := gp.stack /// 旧栈
used := old.hi - gp.sched.sp /// 旧栈使用大小
// Add just the difference to gcController.addScannableStack.
// g0 stacks never move, so this will never account for them.
// It's also fine if we have no P, addScannableStack can deal with
// that case.
gcController.addScannableStack(getg().m.p.ptr(), int64(newsize)-int64(old.hi-old.lo)) /// 调整GC可以扫描的栈大小
// allocate new stack
new := stackalloc(uint32(newsize)) /// 分配新栈
// ...
// Compute adjustment.
var adjinfo adjustinfo /// 记录栈调整信息
adjinfo.old = old
adjinfo.delta = new.hi - old.hi
// Adjust sudogs, synchronizing with channel ops if necessary.
ncopy := used
if !gp.activeStackChans { /// 是否有chan指向当前栈
if newsize < old.hi-old.lo && gp.parkingOnChan.Load() {
// It's not safe for someone to shrink this stack while we're actively
// parking on a channel, but it is safe to grow since we do that
// ourselves and explicitly don't want to synchronize with channels
// since we could self-deadlock.
throw("racy sudog adjustment due to parking on channel")
}
adjustsudogs(gp, &adjinfo) /// 调整
} else {
// sudogs may be pointing in to the stack and gp has
// released channel locks, so other goroutines could
// be writing to gp's stack. Find the highest such
// pointer so we can handle everything there and below
// carefully. (This shouldn't be far from the bottom
// of the stack, so there's little cost in handling
// everything below it carefully.)
adjinfo.sghi = findsghi(gp, old)
// Synchronize with channel ops and copy the part of
// the stack they may interact with.
ncopy -= syncadjustsudogs(gp, used, &adjinfo) /// 调整,同步相关的chan
}
// Copy the stack (or the rest of it) to the new location
memmove(unsafe.Pointer(new.hi-ncopy), unsafe.Pointer(old.hi-ncopy), ncopy) /// 复制栈内容
// Adjust remaining structures that have pointers into stacks.
// We have to do most of these before we traceback the new
// stack because gentraceback uses them.
adjustctxt(gp, &adjinfo) /// 调整指向新栈的结构:上下文
adjustdefers(gp, &adjinfo) /// 调整指向新栈的结构:defer 调用
adjustpanics(gp, &adjinfo) /// 调整指向新栈的结构:panic 调用
if adjinfo.sghi != 0 {
adjinfo.sghi += adjinfo.delta
}
// Swap out old stack for new one
gp.stack = new /// g指向新栈
gp.stackguard0 = new.lo + stackGuard // NOTE: might clobber a preempt request /// 更新g的栈保护
gp.sched.sp = new.hi - used /// 调整g栈sp
gp.stktopsp += adjinfo.delta /// g栈最高sp
// Adjust pointers in the new stack.
var u unwinder
for u.init(gp, 0); u.valid(); u.next() { /// 遍历栈帧
adjustframe(&u.frame, &adjinfo) /// 进行调整
}
// free old stack
if stackPoisonCopy != 0 {
fillstack(old, 0xfc)
}
stackfree(old) /// 释放旧栈
}
缩容
// runtime/stack.go
// Maybe shrink the stack being used by gp.
//
// gp must be stopped and we must own its stack. It may be in
// _Grunning, but only if this is our own user G.
func shrinkstack(gp *g) {
//...
oldsize := gp.stack.hi - gp.stack.lo
newsize := oldsize / 2 /// 计算新栈大小:新栈的大小会是原始栈的一半
// Don't shrink the allocation below the minimum-sized stack
// allocation.
if newsize < fixedStack { /// 大小低于程序的最低限制 2KB,不进行缩容
return
}
// Compute how much of the stack is currently in use and only
// shrink the stack if gp is using less than a quarter of its
// current stack. The currently used stack includes everything
// down to the SP plus the stack guard space that ensures
// there's room for nosplit functions.
avail := gp.stack.hi - gp.stack.lo
if used := gp.stack.hi - gp.sched.sp + stackNosplit; used >= avail/4 { /// 栈内存使用大于 1/4 时不进行缩容
return
}
// ...
copystack(gp, newsize) /// 复制栈
}
stack for G
初始化
分配
扩容
缩容