Index: src/pkg/runtime/stack.c |
=================================================================== |
--- a/src/pkg/runtime/stack.c |
+++ b/src/pkg/runtime/stack.c |
@@ -21,163 +21,76 @@ |
StackDebug = 0, |
StackFromSystem = 0, // allocate stacks from system memory instead of the heap |
StackFaultOnFree = 0, // old stacks are mapped noaccess to detect use after free |
- |
- StackCache = 1, |
}; |
-// Global pool of spans that have free stacks. |
-// Stacks are assigned an order according to size. |
-// order = log_2(size/FixedStack) |
-// There is a free list for each order. |
-static MSpan stackpool[NumStackOrders]; |
-static Lock stackpoolmu; |
-// TODO: one lock per order? |
+typedef struct StackCacheNode StackCacheNode; |
+struct StackCacheNode |
+{ |
+ StackCacheNode *next; |
+ void* batch[StackCacheBatch-1]; |
+}; |
-void |
-runtime·stackinit(void) |
+static StackCacheNode *stackcache; |
+static Lock stackcachemu; |
+ |
+// stackcacherefill/stackcacherelease implement a global cache of stack segments. |
+// The cache is required to prevent unlimited growth of per-thread caches. |
+static void |
+stackcacherefill(void) |
{ |
- int32 i; |
+ StackCacheNode *n; |
+ int32 i, pos; |
- for(i = 0; i < NumStackOrders; i++) |
- runtime·MSpanList_Init(&stackpool[i]); |
-} |
- |
-// Allocates a stack from the free pool. Must be called with |
-// stackpoolmu held. |
-static MLink* |
-poolalloc(uint8 order) |
-{ |
- MSpan *list; |
- MSpan *s; |
- MLink *x; |
- uintptr i; |
- |
- list = &stackpool[order]; |
- s = list->next; |
- if(s == list) { |
- // no free stacks. Allocate another span worth. |
- s = runtime·MHeap_AllocStack(&runtime·mheap, StackCacheSize >> PageShift); |
- if(s == nil) |
- runtime·throw("out of memory"); |
- for(i = 0; i < StackCacheSize; i += FixedStack << order) { |
- x = (MLink*)((s->start << PageShift) + i); |
- x->next = s->freelist; |
- s->freelist = x; |
- } |
+ runtime·lock(&stackcachemu); |
+ n = stackcache; |
+ if(n) |
+ stackcache = n->next; |
+ runtime·unlock(&stackcachemu); |
+ if(n == nil) { |
+ n = (StackCacheNode*)runtime·SysAlloc(FixedStack*StackCacheBatch, &mstats.stacks_sys); |
+ if(n == nil) |
+ runtime·throw("out of memory (stackcacherefill)"); |
+ for(i = 0; i < StackCacheBatch-1; i++) |
+ n->batch[i] = (byte*)n + (i+1)*FixedStack; |
} |
- x = s->freelist; |
- s->freelist = x->next; |
- s->ref--; |
- if(s->ref == 0) { |
- // all stacks in s are allocated. |
- runtime·MSpanList_Remove(s); |
+ pos = g->m->stackcachepos; |
+ for(i = 0; i < StackCacheBatch-1; i++) { |
+ g->m->stackcache[pos] = n->batch[i]; |
+ pos = (pos + 1) % StackCacheSize; |
} |
- return x; |
-} |
- |
-// Adds stack x to the free pool. Must be called with stackpoolmu held. |
-static void |
-poolfree(MLink *x, uint8 order) |
-{ |
- MSpan *s; |
- |
- s = runtime·MHeap_Lookup(&runtime·mheap, x); |
- x->next = s->freelist; |
- s->freelist = x; |
- if(s->ref == 0) { |
- // s now has a free stack |
- runtime·MSpanList_Insert(&stackpool[order], s); |
- } |
- s->ref++; |
- if(s->ref == (StackCacheSize / FixedStack) >> order) { |
- // span is completely free - return to heap |
- runtime·MSpanList_Remove(s); |
- runtime·MHeap_FreeStack(&runtime·mheap, s); |
- } |
-} |
- |
-// stackcacherefill/stackcacherelease implement a global pool of stack segments. |
-// The pool is required to prevent unlimited growth of per-thread caches. |
-static void |
-stackcacherefill(MCache *c, uint8 order) |
-{ |
- MLink *x, *list; |
- uintptr size; |
- |
- if(StackDebug >= 1) |
- runtime·printf("stackcacherefill order=%d\n", order); |
- |
- // Grab some stacks from the global cache. |
- // Grab half of the allowed capacity (to prevent thrashing). |
- list = nil; |
- size = 0; |
- runtime·lock(&stackpoolmu); |
- while(size < StackCacheSize/2) { |
- x = poolalloc(order); |
- x->next = list; |
- list = x; |
- size += FixedStack << order; |
- } |
- runtime·unlock(&stackpoolmu); |
- |
- c->stackcache[order].list = list; |
- c->stackcache[order].size = size; |
+ g->m->stackcache[pos] = n; |
+ pos = (pos + 1) % StackCacheSize; |
+ g->m->stackcachepos = pos; |
+ g->m->stackcachecnt += StackCacheBatch; |
} |
static void |
-stackcacherelease(MCache *c, uint8 order) |
+stackcacherelease(void) |
{ |
- MLink *x, *y; |
- uintptr size; |
+ StackCacheNode *n; |
+ uint32 i, pos; |
- if(StackDebug >= 1) |
- runtime·printf("stackcacherelease order=%d\n", order); |
- x = c->stackcache[order].list; |
- size = c->stackcache[order].size; |
- runtime·lock(&stackpoolmu); |
- while(size > StackCacheSize/2) { |
- y = x->next; |
- poolfree(x, order); |
- x = y; |
- size -= FixedStack << order; |
+ pos = (g->m->stackcachepos - g->m->stackcachecnt) % StackCacheSize; |
+ n = (StackCacheNode*)g->m->stackcache[pos]; |
+ pos = (pos + 1) % StackCacheSize; |
+ for(i = 0; i < StackCacheBatch-1; i++) { |
+ n->batch[i] = g->m->stackcache[pos]; |
+ pos = (pos + 1) % StackCacheSize; |
} |
- runtime·unlock(&stackpoolmu); |
- c->stackcache[order].list = x; |
- c->stackcache[order].size = size; |
-} |
- |
-void |
-runtime·stackcache_clear(MCache *c) |
-{ |
- uint8 order; |
- MLink *x, *y; |
- |
- if(StackDebug >= 1) |
- runtime·printf("stackcache clear\n"); |
- runtime·lock(&stackpoolmu); |
- for(order = 0; order < NumStackOrders; order++) { |
- x = c->stackcache[order].list; |
- while(x != nil) { |
- y = x->next; |
- poolfree(x, order); |
- x = y; |
- } |
- c->stackcache[order].list = nil; |
- c->stackcache[order].size = 0; |
- } |
- runtime·unlock(&stackpoolmu); |
+ g->m->stackcachecnt -= StackCacheBatch; |
+ runtime·lock(&stackcachemu); |
+ n->next = stackcache; |
+ stackcache = n; |
+ runtime·unlock(&stackcachemu); |
} |
void* |
runtime·stackalloc(G *gp, uint32 n) |
{ |
- uint8 order; |
- uint32 n2; |
+ uint32 pos; |
void *v; |
+ bool malloced; |
Stktop *top; |
- MLink *x; |
- MSpan *s; |
- MCache *c; |
// Stackalloc must be called on scheduler stack, so that we |
// never try to grow the stack during the code that stackalloc runs. |
@@ -197,58 +110,41 @@ |
return v; |
} |
- // Small stacks are allocated with a fixed-size free-list allocator. |
- // If we need a stack of a bigger size, we fall back on allocating |
- // a dedicated span. |
- if(StackCache && n < FixedStack << NumStackOrders) { |
- order = 0; |
- n2 = n; |
- while(n2 > FixedStack) { |
- order++; |
- n2 >>= 1; |
+ // Minimum-sized stacks are allocated with a fixed-size free-list allocator, |
+ // but if we need a stack of a bigger size, we fall back on malloc |
+ // (assuming that inside malloc all the stack frames are small, |
+ // so that we do not deadlock). |
+ malloced = true; |
+ if(n == FixedStack || g->m->mallocing) { |
+ if(n != FixedStack) { |
+ runtime·printf("stackalloc: in malloc, size=%d want %d\n", FixedStack, n); |
+ runtime·throw("stackalloc"); |
} |
- c = g->m->mcache; |
- if(c == nil) { |
- // This can happen in the guts of exitsyscall or |
- // procresize. Just get a stack from the global pool. |
- runtime·lock(&stackpoolmu); |
- x = poolalloc(order); |
- runtime·unlock(&stackpoolmu); |
- } else { |
- x = c->stackcache[order].list; |
- if(x == nil) { |
- stackcacherefill(c, order); |
- x = c->stackcache[order].list; |
- } |
- c->stackcache[order].list = x->next; |
- c->stackcache[order].size -= n; |
- } |
- v = (byte*)x; |
- } else { |
- s = runtime·MHeap_AllocStack(&runtime·mheap, (n+PageSize-1) >> PageShift); |
- if(s == nil) |
- runtime·throw("out of memory"); |
- v = (byte*)(s->start<<PageShift); |
- } |
+ if(g->m->stackcachecnt == 0) |
+ stackcacherefill(); |
+ pos = g->m->stackcachepos; |
+ pos = (pos - 1) % StackCacheSize; |
+ v = g->m->stackcache[pos]; |
+ g->m->stackcachepos = pos; |
+ g->m->stackcachecnt--; |
+ g->m->stackinuse++; |
+ malloced = false; |
+ } else |
+ v = runtime·mallocgc(n, 0, FlagNoProfiling|FlagNoGC|FlagNoZero|FlagNoInvokeGC); |
+ |
top = (Stktop*)((byte*)v+n-sizeof(Stktop)); |
runtime·memclr((byte*)top, sizeof(*top)); |
- if(StackDebug >= 1) |
- runtime·printf(" allocated %p\n", v); |
+ top->malloced = malloced; |
return v; |
} |
void |
runtime·stackfree(G *gp, void *v, Stktop *top) |
{ |
- uint8 order; |
- uintptr n, n2; |
- MSpan *s; |
- MLink *x; |
- MCache *c; |
+ uint32 pos; |
+ uintptr n; |
n = (uintptr)(top+1) - (uintptr)v; |
- if(n & (n-1)) |
- runtime·throw("stack not a power of 2"); |
if(StackDebug >= 1) |
runtime·printf("stackfree %p %d\n", v, (int32)n); |
gp->stacksize -= n; |
@@ -259,34 +155,19 @@ |
runtime·SysFree(v, n, &mstats.stacks_sys); |
return; |
} |
- if(StackCache && n < FixedStack << NumStackOrders) { |
- order = 0; |
- n2 = n; |
- while(n2 > FixedStack) { |
- order++; |
- n2 >>= 1; |
- } |
- x = (MLink*)v; |
- c = g->m->mcache; |
- if(c == nil) { |
- runtime·lock(&stackpoolmu); |
- poolfree(x, order); |
- runtime·unlock(&stackpoolmu); |
- } else { |
- if(c->stackcache[order].size >= StackCacheSize) |
- stackcacherelease(c, order); |
- x->next = c->stackcache[order].list; |
- c->stackcache[order].list = x; |
- c->stackcache[order].size += n; |
- } |
- } else { |
- s = runtime·MHeap_Lookup(&runtime·mheap, v); |
- if(s->state != MSpanStack) { |
- runtime·printf("%p %p\n", s->start<<PageShift, v); |
- runtime·throw("bad span state"); |
- } |
- runtime·MHeap_FreeStack(&runtime·mheap, s); |
+ if(top->malloced) { |
+ runtime·free(v); |
+ return; |
} |
+ if(n != FixedStack) |
+ runtime·throw("stackfree: bad fixed size"); |
+ if(g->m->stackcachecnt == StackCacheSize) |
+ stackcacherelease(); |
+ pos = g->m->stackcachepos; |
+ g->m->stackcache[pos] = v; |
+ g->m->stackcachepos = (pos + 1) % StackCacheSize; |
+ g->m->stackcachecnt++; |
+ g->m->stackinuse--; |
} |
// Called from runtime·lessstack when returning from a function which |
@@ -718,6 +599,7 @@ |
uintptr oldsize, used; |
AdjustInfo adjinfo; |
Stktop *oldtop, *newtop; |
+ bool malloced; |
if(gp->syscallstack != 0) |
runtime·throw("can't handle stack copy in syscall yet"); |
@@ -731,9 +613,10 @@ |
newstk = runtime·stackalloc(gp, newsize); |
newbase = newstk + newsize; |
newtop = (Stktop*)(newbase - sizeof(Stktop)); |
+ malloced = newtop->malloced; |
if(StackDebug >= 1) |
- runtime·printf("copystack gp=%p [%p %p]/%d -> [%p %p]/%d\n", gp, oldstk, oldbase, (int32)oldsize, newstk, newbase, (int32)newsize); |
+ runtime·printf("copystack [%p %p]/%d -> [%p %p]/%d\n", oldstk, oldbase, (int32)oldsize, newstk, newbase, (int32)newsize); |
USED(oldsize); |
// adjust pointers in the to-be-copied frames |
@@ -748,6 +631,7 @@ |
// copy the stack (including Stktop) to the new location |
runtime·memmove(newbase - used, oldbase - used, used); |
+ newtop->malloced = malloced; |
// Swap out old stack for new one |
gp->stackbase = (uintptr)newtop; |
@@ -908,7 +792,7 @@ |
top = (Stktop*)(stk+framesize-sizeof(*top)); |
if(StackDebug >= 1) { |
- runtime·printf("\t-> new stack gp=%p [%p, %p]\n", gp, stk, top); |
+ runtime·printf("\t-> new stack [%p, %p]\n", stk, top); |
} |
top->stackbase = gp->stackbase; |
@@ -997,6 +881,7 @@ |
int32 nframes; |
byte *oldstk, *oldbase; |
uintptr used, oldsize, newsize; |
+ MSpan *span; |
if(!runtime·copystack) |
return; |
@@ -1010,14 +895,53 @@ |
if(used >= oldsize / 4) |
return; // still using at least 1/4 of the segment. |
- if(gp->syscallstack != (uintptr)nil) // TODO: can we handle this case? |
+ // To shrink to less than 1/2 a page, we need to copy. |
+ if(newsize < PageSize/2) { |
+ if(gp->syscallstack != (uintptr)nil) // TODO: can we handle this case? |
+ return; |
+#ifdef GOOS_windows |
+ if(gp->m != nil && gp->m->libcallsp != 0) |
+ return; |
+#endif |
+ nframes = copyabletopsegment(gp); |
+ if(nframes == -1) |
+ return; |
+ copystack(gp, nframes, newsize); |
return; |
-#ifdef GOOS_windows |
- if(gp->m != nil && gp->m->libcallsp != 0) |
+ } |
+ |
+ // To shrink a stack of one page size or more, we can shrink it |
+ // without copying. Just deallocate the lower half. |
+ span = runtime·MHeap_LookupMaybe(&runtime·mheap, oldstk); |
+ if(span == nil) |
+ return; // stack allocated outside heap. Can't shrink it. Can happen if stack is allocated while inside malloc. TODO: shrink by copying? |
+ if(span->elemsize != oldsize) |
+ runtime·throw("span element size doesn't match stack size"); |
+ if((uintptr)oldstk != span->start << PageShift) |
+ runtime·throw("stack not at start of span"); |
+ |
+ if(StackDebug) |
+ runtime·printf("shrinking stack in place %p %X->%X\n", oldstk, oldsize, newsize); |
+ |
+ // new stack guard for smaller stack |
+ gp->stackguard = (uintptr)oldstk + newsize + StackGuard; |
+ gp->stackguard0 = (uintptr)oldstk + newsize + StackGuard; |
+ if(gp->stack0 == (uintptr)oldstk) |
+ gp->stack0 = (uintptr)oldstk + newsize; |
+ gp->stacksize -= oldsize - newsize; |
+ |
+ // Free bottom half of the stack. |
+ if(runtime·debug.efence || StackFromSystem) { |
+ if(runtime·debug.efence || StackFaultOnFree) |
+ runtime·SysFault(oldstk, newsize); |
+ else |
+ runtime·SysFree(oldstk, newsize, &mstats.stacks_sys); |
return; |
-#endif |
- nframes = copyabletopsegment(gp); |
- if(nframes == -1) |
- return; |
- copystack(gp, nframes, newsize); |
+ } |
+ // First, we trick malloc into thinking |
+ // we allocated the stack as two separate half-size allocs. Then the |
+ // free() call does the rest of the work for us. |
+ runtime·MSpan_EnsureSwept(span); |
+ runtime·MHeap_SplitSpan(&runtime·mheap, span); |
+ runtime·free(oldstk); |
} |