Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(49)

Delta Between Two Patch Sets: src/pkg/runtime/proc.c

Issue 46970043: code review 46970043: runtime: allocate goroutine ids in batches (Closed)
Left Patch Set: diff -r d5dbdcc7f614 https://dvyukov%40google.com@code.google.com/p/go/ Created 11 years, 2 months ago
Right Patch Set: diff -r 72c0dfd50949 https://dvyukov%40google.com@code.google.com/p/go/ Created 11 years, 1 month ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | src/pkg/runtime/runtime.h » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 // Copyright 2009 The Go Authors. All rights reserved. 1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 #include "runtime.h" 5 #include "runtime.h"
6 #include "arch_GOARCH.h" 6 #include "arch_GOARCH.h"
7 #include "zaexperiment.h" 7 #include "zaexperiment.h"
8 #include "malloc.h" 8 #include "malloc.h"
9 #include "stack.h" 9 #include "stack.h"
10 #include "race.h" 10 #include "race.h"
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
67 // Number of goroutine ids to grab from runtime·sched.goidgen to local p er-P cache at once. 67 // Number of goroutine ids to grab from runtime·sched.goidgen to local p er-P cache at once.
68 // 16 seems to provide enough amortization, but other than that it's mos tly arbitrary number. 68 // 16 seems to provide enough amortization, but other than that it's mos tly arbitrary number.
69 GoidCacheBatch = 16, 69 GoidCacheBatch = 16,
70 }; 70 };
71 71
72 Sched runtime·sched; 72 Sched runtime·sched;
73 int32 runtime·gomaxprocs; 73 int32 runtime·gomaxprocs;
74 uint32 runtime·needextram; 74 uint32 runtime·needextram;
75 bool runtime·iscgo; 75 bool runtime·iscgo;
76 M runtime·m0; 76 M runtime·m0;
77 G» runtime·g0;» // idle goroutine for m0 77 G» runtime·g0;» // idle goroutine for m0
78 G*» runtime·allg;
79 G* runtime·lastg; 78 G* runtime·lastg;
80 M* runtime·allm; 79 M* runtime·allm;
81 M* runtime·extram; 80 M* runtime·extram;
82 int8* runtime·goos; 81 int8* runtime·goos;
83 int32 runtime·ncpu; 82 int32 runtime·ncpu;
84 static int32 newprocs; 83 static int32 newprocs;
85 84
85 static Lock allglock; // the following vars are protected by this lock or by s toptheworld
86 G** runtime·allg;
87 uintptr runtime·allglen;
88 static uintptr allgcap;
89
86 void runtime·mstart(void); 90 void runtime·mstart(void);
87 static void runqput(P*, G*); 91 static void runqput(P*, G*);
88 static G* runqget(P*); 92 static G* runqget(P*);
89 static void runqgrow(P*); 93 static bool runqputslow(P*, G*, uint32, uint32);
90 static G* runqsteal(P*, P*); 94 static G* runqsteal(P*, P*);
91 static void mput(M*); 95 static void mput(M*);
92 static M* mget(void); 96 static M* mget(void);
93 static void mcommoninit(M*); 97 static void mcommoninit(M*);
94 static void schedule(void); 98 static void schedule(void);
95 static void procresize(int32); 99 static void procresize(int32);
96 static void acquirep(P*); 100 static void acquirep(P*);
97 static P* releasep(void); 101 static P* releasep(void);
98 static void newm(void(*)(void), P*); 102 static void newm(void(*)(void), P*);
99 static void stopm(void); 103 static void stopm(void);
100 static void startm(P*, bool); 104 static void startm(P*, bool);
101 static void handoffp(P*); 105 static void handoffp(P*);
102 static void wakep(void); 106 static void wakep(void);
103 static void stoplockedm(void); 107 static void stoplockedm(void);
104 static void startlockedm(G*); 108 static void startlockedm(G*);
105 static void sysmon(void); 109 static void sysmon(void);
106 static uint32 retake(int64); 110 static uint32 retake(int64);
107 static void incidlelocked(int32); 111 static void incidlelocked(int32);
108 static void checkdead(void); 112 static void checkdead(void);
109 static void exitsyscall0(G*); 113 static void exitsyscall0(G*);
110 static void park0(G*); 114 static void park0(G*);
111 static void goexit0(G*); 115 static void goexit0(G*);
112 static void gfput(P*, G*); 116 static void gfput(P*, G*);
113 static G* gfget(P*); 117 static G* gfget(P*);
114 static void gfpurge(P*); 118 static void gfpurge(P*);
115 static void globrunqput(G*); 119 static void globrunqput(G*);
120 static void globrunqputbatch(G*, G*, int32);
116 static G* globrunqget(P*, int32); 121 static G* globrunqget(P*, int32);
117 static P* pidleget(void); 122 static P* pidleget(void);
118 static void pidleput(P*); 123 static void pidleput(P*);
119 static void injectglist(G*); 124 static void injectglist(G*);
120 static bool preemptall(void); 125 static bool preemptall(void);
121 static bool preemptone(P*); 126 static bool preemptone(P*);
122 static bool exitsyscallfast(void); 127 static bool exitsyscallfast(void);
123 static bool haveexperiment(int8*); 128 static bool haveexperiment(int8*);
129 static void allgadd(G*);
124 130
125 // The bootstrap sequence is: 131 // The bootstrap sequence is:
126 // 132 //
127 // call osinit 133 // call osinit
128 // call schedinit 134 // call schedinit
129 // make & queue new G 135 // make & queue new G
130 // call runtime·mstart 136 // call runtime·mstart
131 // 137 //
132 // The new G calls runtime·main. 138 // The new G calls runtime·main.
133 void 139 void
134 runtime·schedinit(void) 140 runtime·schedinit(void)
135 { 141 {
136 int32 n, procs; 142 int32 n, procs;
137 byte *p; 143 byte *p;
138 Eface i; 144 Eface i;
139 145
140 runtime·sched.maxmcount = 10000; 146 runtime·sched.maxmcount = 10000;
141 runtime·precisestack = haveexperiment("precisestack"); 147 runtime·precisestack = haveexperiment("precisestack");
142 148
143 runtime·mprofinit();
144 runtime·mallocinit(); 149 runtime·mallocinit();
145 mcommoninit(m); 150 mcommoninit(m);
146 ········ 151 ········
147 // Initialize the itable value for newErrorCString, 152 // Initialize the itable value for newErrorCString,
148 // so that the next time it gets called, possibly 153 // so that the next time it gets called, possibly
149 // in a fault during a garbage collection, it will not 154 // in a fault during a garbage collection, it will not
150 // need to allocated memory. 155 // need to allocated memory.
151 runtime·newErrorCString(0, &i); 156 runtime·newErrorCString(0, &i);
152 157
153 runtime·goargs(); 158 runtime·goargs();
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
204 // by calling runtime.LockOSThread during initialization 209 // by calling runtime.LockOSThread during initialization
205 // to preserve the lock. 210 // to preserve the lock.
206 runtime·lockOSThread(); 211 runtime·lockOSThread();
207 ········ 212 ········
208 // Defer unlock so that runtime.Goexit during init does the unlock too. 213 // Defer unlock so that runtime.Goexit during init does the unlock too.
209 d.fn = &initDone; 214 d.fn = &initDone;
210 d.siz = 0; 215 d.siz = 0;
211 d.link = g->defer; 216 d.link = g->defer;
212 d.argp = (void*)-1; 217 d.argp = (void*)-1;
213 d.special = true; 218 d.special = true;
214 d.free = false;
215 g->defer = &d; 219 g->defer = &d;
216 220
217 if(m != &runtime·m0) 221 if(m != &runtime·m0)
218 runtime·throw("runtime·main not on m0"); 222 runtime·throw("runtime·main not on m0");
219 runtime·newproc1(&scavenger, nil, 0, 0, runtime·main); 223 runtime·newproc1(&scavenger, nil, 0, 0, runtime·main);
220 main·init(); 224 main·init();
221 225
222 if(g->defer != &d || d.fn != &initDone) 226 if(g->defer != &d || d.fn != &initDone)
223 runtime·throw("runtime: bad defer entry after init"); 227 runtime·throw("runtime: bad defer entry after init");
224 g->defer = d.link; 228 g->defer = d.link;
(...skipping 12 matching lines...) Expand all
237 241
238 runtime·exit(0); 242 runtime·exit(0);
239 for(;;) 243 for(;;)
240 *(int32*)runtime·main = 0; 244 *(int32*)runtime·main = 0;
241 } 245 }
242 246
243 void 247 void
244 runtime·goroutineheader(G *gp) 248 runtime·goroutineheader(G *gp)
245 { 249 {
246 int8 *status; 250 int8 *status;
251 int64 waitfor;
247 252
248 switch(gp->status) { 253 switch(gp->status) {
249 case Gidle: 254 case Gidle:
250 status = "idle"; 255 status = "idle";
251 break; 256 break;
252 case Grunnable: 257 case Grunnable:
253 status = "runnable"; 258 status = "runnable";
254 break; 259 break;
255 case Grunning: 260 case Grunning:
256 status = "running"; 261 status = "running";
257 break; 262 break;
258 case Gsyscall: 263 case Gsyscall:
259 status = "syscall"; 264 status = "syscall";
260 break; 265 break;
261 case Gwaiting: 266 case Gwaiting:
262 if(gp->waitreason) 267 if(gp->waitreason)
263 status = gp->waitreason; 268 status = gp->waitreason;
264 else 269 else
265 status = "waiting"; 270 status = "waiting";
266 break; 271 break;
267 default: 272 default:
268 status = "???"; 273 status = "???";
269 break; 274 break;
270 } 275 }
271 » runtime·printf("goroutine %D [%s]:\n", gp->goid, status); 276
277 » // approx time the G is blocked, in minutes
278 » waitfor = 0;
279 » if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince ! = 0)
280 » » waitfor = (runtime·nanotime() - gp->waitsince) / (60LL*1000*1000 *1000);
281
282 » if(waitfor < 1)
283 » » runtime·printf("goroutine %D [%s]:\n", gp->goid, status);
284 » else
285 » » runtime·printf("goroutine %D [%s, %D minutes]:\n", gp->goid, sta tus, waitfor);
272 } 286 }
273 287
274 void 288 void
275 runtime·tracebackothers(G *me) 289 runtime·tracebackothers(G *me)
276 { 290 {
277 G *gp; 291 G *gp;
278 int32 traceback; 292 int32 traceback;
293 uintptr i;
279 294
280 traceback = runtime·gotraceback(nil); 295 traceback = runtime·gotraceback(nil);
281 ········ 296 ········
282 // Show the current goroutine first, if we haven't already. 297 // Show the current goroutine first, if we haven't already.
283 if((gp = m->curg) != nil && gp != me) { 298 if((gp = m->curg) != nil && gp != me) {
284 runtime·printf("\n"); 299 runtime·printf("\n");
285 runtime·goroutineheader(gp); 300 runtime·goroutineheader(gp);
286 runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp); 301 runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
287 } 302 }
288 303
289 » for(gp = runtime·allg; gp != nil; gp = gp->alllink) { 304 » runtime·lock(&allglock);
305 » for(i = 0; i < runtime·allglen; i++) {
306 » » gp = runtime·allg[i];
290 if(gp == me || gp == m->curg || gp->status == Gdead) 307 if(gp == me || gp == m->curg || gp->status == Gdead)
291 continue; 308 continue;
292 if(gp->issystem && traceback < 2) 309 if(gp->issystem && traceback < 2)
293 continue; 310 continue;
294 runtime·printf("\n"); 311 runtime·printf("\n");
295 runtime·goroutineheader(gp); 312 runtime·goroutineheader(gp);
296 if(gp->status == Grunning) { 313 if(gp->status == Grunning) {
297 runtime·printf("\tgoroutine running on other thread; sta ck unavailable\n"); 314 runtime·printf("\tgoroutine running on other thread; sta ck unavailable\n");
298 runtime·printcreatedby(gp); 315 runtime·printcreatedby(gp);
299 } else 316 } else
300 runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp); 317 runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
301 } 318 }
319 runtime·unlock(&allglock);
302 } 320 }
303 321
304 static void 322 static void
305 checkmcount(void) 323 checkmcount(void)
306 { 324 {
307 // sched lock is held 325 // sched lock is held
308 if(runtime·sched.mcount > runtime·sched.maxmcount) { 326 if(runtime·sched.mcount > runtime·sched.maxmcount) {
309 runtime·printf("runtime: program exceeds %d-thread limit\n", run time·sched.maxmcount); 327 runtime·printf("runtime: program exceeds %d-thread limit\n", run time·sched.maxmcount);
310 runtime·throw("thread exhaustion"); 328 runtime·throw("thread exhaustion");
311 } 329 }
(...skipping 308 matching lines...) Expand 10 before | Expand all | Expand 10 after
620 // When running with cgo, we call _cgo_thread_start 638 // When running with cgo, we call _cgo_thread_start
621 // to start threads for us so that we can play nicely with 639 // to start threads for us so that we can play nicely with
622 // foreign code. 640 // foreign code.
623 void (*_cgo_thread_start)(void*); 641 void (*_cgo_thread_start)(void*);
624 642
625 typedef struct CgoThreadStart CgoThreadStart; 643 typedef struct CgoThreadStart CgoThreadStart;
626 struct CgoThreadStart 644 struct CgoThreadStart
627 { 645 {
628 M *m; 646 M *m;
629 G *g; 647 G *g;
648 uintptr *tls;
630 void (*fn)(void); 649 void (*fn)(void);
631 }; 650 };
632 651
633 // Allocate a new m unassociated with any thread. 652 // Allocate a new m unassociated with any thread.
634 // Can use p for allocation context if needed. 653 // Can use p for allocation context if needed.
635 M* 654 M*
636 runtime·allocm(P *p) 655 runtime·allocm(P *p)
637 { 656 {
638 M *mp; 657 M *mp;
639 static Type *mtype; // The Go type M 658 static Type *mtype; // The Go type M
640 659
641 m->locks++; // disable GC because it can be called from sysmon 660 m->locks++; // disable GC because it can be called from sysmon
642 if(m->p == nil) 661 if(m->p == nil)
643 acquirep(p); // temporarily borrow p for mallocs in this functi on 662 acquirep(p); // temporarily borrow p for mallocs in this functi on
644 if(mtype == nil) { 663 if(mtype == nil) {
645 Eface e; 664 Eface e;
646 runtime·gc_m_ptr(&e); 665 runtime·gc_m_ptr(&e);
647 mtype = ((PtrType*)e.type)->elem; 666 mtype = ((PtrType*)e.type)->elem;
648 } 667 }
649 668
650 mp = runtime·cnew(mtype); 669 mp = runtime·cnew(mtype);
651 mcommoninit(mp); 670 mcommoninit(mp);
652 671
653 » // In case of cgo, pthread_create will make us a stack. 672 » // In case of cgo or Solaris, pthread_create will make us a stack.
654 // Windows will layout sched stack on OS stack. 673 // Windows will layout sched stack on OS stack.
655 » if(runtime·iscgo || Windows) 674 » if(runtime·iscgo || Solaris || Windows)
656 mp->g0 = runtime·malg(-1); 675 mp->g0 = runtime·malg(-1);
657 else 676 else
658 mp->g0 = runtime·malg(8192); 677 mp->g0 = runtime·malg(8192);
659 678
660 if(p == m->p) 679 if(p == m->p)
661 releasep(); 680 releasep();
662 m->locks--; 681 m->locks--;
663 if(m->locks == 0 && g->preempt) // restore the preemption request in ca se we've cleared it in newstack 682 if(m->locks == 0 && g->preempt) // restore the preemption request in ca se we've cleared it in newstack
664 g->stackguard0 = StackPreempt; 683 g->stackguard0 = StackPreempt;
665 684
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
783 gp->syscallguard = gp->stackguard; 802 gp->syscallguard = gp->stackguard;
784 gp->status = Gsyscall; 803 gp->status = Gsyscall;
785 mp->curg = gp; 804 mp->curg = gp;
786 mp->locked = LockInternal; 805 mp->locked = LockInternal;
787 mp->lockedg = gp; 806 mp->lockedg = gp;
788 gp->lockedm = mp; 807 gp->lockedm = mp;
789 gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1); 808 gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
790 if(raceenabled) 809 if(raceenabled)
791 gp->racectx = runtime·racegostart(runtime·newextram); 810 gp->racectx = runtime·racegostart(runtime·newextram);
792 // put on allg for garbage collector 811 // put on allg for garbage collector
793 » runtime·lock(&runtime·sched); 812 » allgadd(gp);
794 » if(runtime·lastg == nil)
795 » » runtime·allg = gp;
796 » else
797 » » runtime·lastg->alllink = gp;
798 » runtime·lastg = gp;
799 » runtime·unlock(&runtime·sched);
800 813
801 // Add m to the extra list. 814 // Add m to the extra list.
802 mnext = lockextra(true); 815 mnext = lockextra(true);
803 mp->schedlink = mnext; 816 mp->schedlink = mnext;
804 unlockextra(mp); 817 unlockextra(mp);
805 } 818 }
806 819
807 // dropm is called when a cgo callback has called needm but is now 820 // dropm is called when a cgo callback has called needm but is now
808 // done with the callback and returning back into the non-Go thread. 821 // done with the callback and returning back into the non-Go thread.
809 // It puts the current m back onto the extra list. 822 // It puts the current m back onto the extra list.
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
904 mp->nextp = p; 917 mp->nextp = p;
905 mp->mstartfn = fn; 918 mp->mstartfn = fn;
906 919
907 if(runtime·iscgo) { 920 if(runtime·iscgo) {
908 CgoThreadStart ts; 921 CgoThreadStart ts;
909 922
910 if(_cgo_thread_start == nil) 923 if(_cgo_thread_start == nil)
911 runtime·throw("_cgo_thread_start missing"); 924 runtime·throw("_cgo_thread_start missing");
912 ts.m = mp; 925 ts.m = mp;
913 ts.g = mp->g0; 926 ts.g = mp->g0;
927 ts.tls = mp->tls;
914 ts.fn = runtime·mstart; 928 ts.fn = runtime·mstart;
915 runtime·asmcgocall(_cgo_thread_start, &ts); 929 runtime·asmcgocall(_cgo_thread_start, &ts);
916 return; 930 return;
917 } 931 }
918 runtime·newosproc(mp, (byte*)mp->g0->stackbase); 932 runtime·newosproc(mp, (byte*)mp->g0->stackbase);
919 } 933 }
920 934
921 // Stops execution of the current m until new work is available. 935 // Stops execution of the current m until new work is available.
922 // Returns with acquired P. 936 // Returns with acquired P.
923 static void 937 static void
(...skipping 24 matching lines...) Expand all
948 m->nextp = nil; 962 m->nextp = nil;
949 } 963 }
950 964
951 static void 965 static void
952 mspinning(void) 966 mspinning(void)
953 { 967 {
954 m->spinning = true; 968 m->spinning = true;
955 } 969 }
956 970
957 // Schedules some M to run the p (creates an M if necessary). 971 // Schedules some M to run the p (creates an M if necessary).
958 // If p==nil, tries to get an idle P, if no idle P's returns false. 972 // If p==nil, tries to get an idle P, if no idle P's does nothing.
959 static void 973 static void
960 startm(P *p, bool spinning) 974 startm(P *p, bool spinning)
961 { 975 {
962 M *mp; 976 M *mp;
963 void (*fn)(void); 977 void (*fn)(void);
964 978
965 runtime·lock(&runtime·sched); 979 runtime·lock(&runtime·sched);
966 if(p == nil) { 980 if(p == nil) {
967 p = pidleget(); 981 p = pidleget();
968 if(p == nil) { 982 if(p == nil) {
(...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after
1112 static void 1126 static void
1113 execute(G *gp) 1127 execute(G *gp)
1114 { 1128 {
1115 int32 hz; 1129 int32 hz;
1116 1130
1117 if(gp->status != Grunnable) { 1131 if(gp->status != Grunnable) {
1118 runtime·printf("execute: bad g status %d\n", gp->status); 1132 runtime·printf("execute: bad g status %d\n", gp->status);
1119 runtime·throw("execute: bad g status"); 1133 runtime·throw("execute: bad g status");
1120 } 1134 }
1121 gp->status = Grunning; 1135 gp->status = Grunning;
1136 gp->waitsince = 0;
1122 gp->preempt = false; 1137 gp->preempt = false;
1123 gp->stackguard0 = gp->stackguard; 1138 gp->stackguard0 = gp->stackguard;
1124 m->p->schedtick++; 1139 m->p->schedtick++;
1125 m->curg = gp; 1140 m->curg = gp;
1126 gp->m = m; 1141 gp->m = m;
1127 1142
1128 // Check whether the profiler needs to be turned on or off. 1143 // Check whether the profiler needs to be turned on or off.
1129 hz = runtime·sched.profilehz; 1144 hz = runtime·sched.profilehz;
1130 if(m->profilehz != hz) 1145 if(m->profilehz != hz)
1131 runtime·resetcpuprofiler(hz); 1146 runtime·resetcpuprofiler(hz);
(...skipping 403 matching lines...) Expand 10 before | Expand all | Expand 10 after
1535 // from the low-level system calls used by the runtime. 1550 // from the low-level system calls used by the runtime.
1536 #pragma textflag NOSPLIT 1551 #pragma textflag NOSPLIT
1537 void 1552 void
1538 runtime·exitsyscall(void) 1553 runtime·exitsyscall(void)
1539 { 1554 {
1540 m->locks++; // see comment in entersyscall 1555 m->locks++; // see comment in entersyscall
1541 1556
1542 if(g->isbackground) // do not consider blocked scavenger for deadlock d etection 1557 if(g->isbackground) // do not consider blocked scavenger for deadlock d etection
1543 incidlelocked(-1); 1558 incidlelocked(-1);
1544 1559
1560 g->waitsince = 0;
1545 if(exitsyscallfast()) { 1561 if(exitsyscallfast()) {
1546 // There's a cpu for us, so we can run. 1562 // There's a cpu for us, so we can run.
1547 m->p->syscalltick++; 1563 m->p->syscalltick++;
1548 g->status = Grunning; 1564 g->status = Grunning;
1549 // Garbage collector isn't running (since we are), 1565 // Garbage collector isn't running (since we are),
1550 // so okay to clear gcstack and gcsp. 1566 // so okay to clear gcstack and gcsp.
1551 g->syscallstack = (uintptr)nil; 1567 g->syscallstack = (uintptr)nil;
1552 g->syscallsp = (uintptr)nil; 1568 g->syscallsp = (uintptr)nil;
1553 m->locks--; 1569 m->locks--;
1554 if(g->preempt) { 1570 if(g->preempt) {
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after
1757 // Not worth it: this is almost always an error. 1773 // Not worth it: this is almost always an error.
1758 if(siz > StackMin - 1024) 1774 if(siz > StackMin - 1024)
1759 runtime·throw("runtime.newproc: function arguments too large for new goroutine"); 1775 runtime·throw("runtime.newproc: function arguments too large for new goroutine");
1760 1776
1761 p = m->p; 1777 p = m->p;
1762 if((newg = gfget(p)) != nil) { 1778 if((newg = gfget(p)) != nil) {
1763 if(newg->stackguard - StackGuard != newg->stack0) 1779 if(newg->stackguard - StackGuard != newg->stack0)
1764 runtime·throw("invalid stack in newg"); 1780 runtime·throw("invalid stack in newg");
1765 } else { 1781 } else {
1766 newg = runtime·malg(StackMin); 1782 newg = runtime·malg(StackMin);
1767 » » runtime·lock(&runtime·sched); 1783 » » allgadd(newg);
1768 » » if(runtime·lastg == nil)
1769 » » » runtime·allg = newg;
1770 » » else
1771 » » » runtime·lastg->alllink = newg;
1772 » » runtime·lastg = newg;
1773 » » runtime·unlock(&runtime·sched);
1774 } 1784 }
1775 1785
1776 sp = (byte*)newg->stackbase; 1786 sp = (byte*)newg->stackbase;
1777 sp -= siz; 1787 sp -= siz;
1778 runtime·memmove(sp, argp, narg); 1788 runtime·memmove(sp, argp, narg);
1779 if(thechar == '5') { 1789 if(thechar == '5') {
1780 // caller's LR 1790 // caller's LR
1781 sp -= sizeof(void*); 1791 sp -= sizeof(void*);
1782 *(void**)sp = nil; 1792 *(void**)sp = nil;
1783 } 1793 }
(...skipping 14 matching lines...) Expand all
1798 if(raceenabled) 1808 if(raceenabled)
1799 newg->racectx = runtime·racegostart((void*)callerpc); 1809 newg->racectx = runtime·racegostart((void*)callerpc);
1800 runqput(p, newg); 1810 runqput(p, newg);
1801 1811
1802 if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload( &runtime·sched.nmspinning) == 0 && fn->fn != runtime·main) // TODO: fast atomic 1812 if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload( &runtime·sched.nmspinning) == 0 && fn->fn != runtime·main) // TODO: fast atomic
1803 wakep(); 1813 wakep();
1804 m->locks--; 1814 m->locks--;
1805 if(m->locks == 0 && g->preempt) // restore the preemption request in ca se we've cleared it in newstack 1815 if(m->locks == 0 && g->preempt) // restore the preemption request in ca se we've cleared it in newstack
1806 g->stackguard0 = StackPreempt; 1816 g->stackguard0 = StackPreempt;
1807 return newg; 1817 return newg;
1818 }
1819
1820 static void
1821 allgadd(G *gp)
1822 {
1823 G **new;
1824 uintptr cap;
1825
1826 runtime·lock(&allglock);
1827 if(runtime·allglen >= allgcap) {
1828 cap = 4096/sizeof(new[0]);
1829 if(cap < 2*allgcap)
1830 cap = 2*allgcap;
1831 new = runtime·malloc(cap*sizeof(new[0]));
1832 if(new == nil)
1833 runtime·throw("runtime: cannot allocate memory");
1834 if(runtime·allg != nil) {
1835 runtime·memmove(new, runtime·allg, runtime·allglen*sizeo f(new[0]));
1836 runtime·free(runtime·allg);
1837 }
1838 runtime·allg = new;
1839 allgcap = cap;
1840 }
1841 runtime·allg[runtime·allglen++] = gp;
1842 runtime·unlock(&allglock);
1808 } 1843 }
1809 1844
1810 // Put on gfree list. 1845 // Put on gfree list.
1811 // If local list is too long, transfer a batch to the global list. 1846 // If local list is too long, transfer a batch to the global list.
1812 static void 1847 static void
1813 gfput(P *p, G *gp) 1848 gfput(P *p, G *gp)
1814 { 1849 {
1815 if(gp->stackguard - StackGuard != gp->stack0) 1850 if(gp->stackguard - StackGuard != gp->stack0)
1816 runtime·throw("invalid stack in gfput"); 1851 runtime·throw("invalid stack in gfput");
1817 gp->schedlink = p->gfree; 1852 gp->schedlink = p->gfree;
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after
1989 { 2024 {
1990 ret = runtime·gcount(); 2025 ret = runtime·gcount();
1991 FLUSH(&ret); 2026 FLUSH(&ret);
1992 } 2027 }
1993 2028
1994 int32 2029 int32
1995 runtime·gcount(void) 2030 runtime·gcount(void)
1996 { 2031 {
1997 G *gp; 2032 G *gp;
1998 int32 n, s; 2033 int32 n, s;
2034 uintptr i;
1999 2035
2000 n = 0; 2036 n = 0;
2001 » runtime·lock(&runtime·sched); 2037 » runtime·lock(&allglock);
2002 // TODO(dvyukov): runtime.NumGoroutine() is O(N). 2038 // TODO(dvyukov): runtime.NumGoroutine() is O(N).
2003 // We do not want to increment/decrement centralized counter in newproc/ goexit, 2039 // We do not want to increment/decrement centralized counter in newproc/ goexit,
2004 // just to make runtime.NumGoroutine() faster. 2040 // just to make runtime.NumGoroutine() faster.
2005 // Compromise solution is to introduce per-P counters of active goroutin es. 2041 // Compromise solution is to introduce per-P counters of active goroutin es.
2006 » for(gp = runtime·allg; gp; gp = gp->alllink) { 2042 » for(i = 0; i < runtime·allglen; i++) {
2043 » » gp = runtime·allg[i];
2007 s = gp->status; 2044 s = gp->status;
2008 if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwai ting) 2045 if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwai ting)
2009 n++; 2046 n++;
2010 } 2047 }
2011 » runtime·unlock(&runtime·sched); 2048 » runtime·unlock(&allglock);
2012 return n; 2049 return n;
2013 } 2050 }
2014 2051
2015 int32 2052 int32
2016 runtime·mcount(void) 2053 runtime·mcount(void)
2017 { 2054 {
2018 return runtime·sched.mcount; 2055 return runtime·sched.mcount;
2019 } 2056 }
2020 2057
2021 void 2058 void
(...skipping 23 matching lines...) Expand all
2045 uintptr pcbuf[100]; 2082 uintptr pcbuf[100];
2046 } prof; 2083 } prof;
2047 2084
2048 static void 2085 static void
2049 System(void) 2086 System(void)
2050 { 2087 {
2051 } 2088 }
2052 2089
2053 // Called if we receive a SIGPROF signal. 2090 // Called if we receive a SIGPROF signal.
2054 void 2091 void
2055 runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp) 2092 runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp, M *mp)
2056 { 2093 {
2057 int32 n; 2094 int32 n;
2058 bool traceback; 2095 bool traceback;
2096 MCache *mcache;
2097 // Do not use global m in this function, use mp instead.
2098 // On windows one m is sending reports about all the g's, so m means a w rong thing.
2099 byte m;
2100
2101 m = 0;
2102 USED(m);
2059 2103
2060 if(prof.fn == nil || prof.hz == 0) 2104 if(prof.fn == nil || prof.hz == 0)
2061 return; 2105 return;
2062 » traceback = true; 2106
2063 » // Windows does profiling in a dedicated thread w/o m. 2107 » // Profiling runs concurrently with GC, so it must not allocate.
2064 » if(!Windows && (m == nil || m->mcache == nil)) 2108 » mcache = mp->mcache;
2065 » » traceback = false; 2109 » mp->mcache = nil;
2066 »······· 2110
2067 // Define that a "user g" is a user-created goroutine, and a "system g" 2111 // Define that a "user g" is a user-created goroutine, and a "system g"
2068 // is one that is m->g0 or m->gsignal. We've only made sure that we 2112 // is one that is m->g0 or m->gsignal. We've only made sure that we
2069 // can unwind user g's, so exclude the system g's. 2113 // can unwind user g's, so exclude the system g's.
2070 // 2114 //
2071 // It is not quite as easy as testing gp == m->curg (the current user g) 2115 // It is not quite as easy as testing gp == m->curg (the current user g)
2072 // because we might be interrupted for profiling halfway through a 2116 // because we might be interrupted for profiling halfway through a
2073 // goroutine switch. The switch involves updating three (or four) values : 2117 // goroutine switch. The switch involves updating three (or four) values :
2074 // g, PC, SP, and (on arm) LR. The PC must be the last to be updated, 2118 // g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
2075 // because once it gets updated the new g is running. 2119 // because once it gets updated the new g is running.
2076 // 2120 //
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
2129 // The biggest drawback to this solution is that it requires that we can tell 2173 // The biggest drawback to this solution is that it requires that we can tell
2130 // whether it's safe to read from the memory pointed at by PC. 2174 // whether it's safe to read from the memory pointed at by PC.
2131 // In a correct program, we can test PC == nil and otherwise read, 2175 // In a correct program, we can test PC == nil and otherwise read,
2132 // but if a profiling signal happens at the instant that a program execu tes 2176 // but if a profiling signal happens at the instant that a program execu tes
2133 // a bad jump (before the program manages to handle the resulting fault) 2177 // a bad jump (before the program manages to handle the resulting fault)
2134 // the profiling handler could fault trying to read nonexistent memory. 2178 // the profiling handler could fault trying to read nonexistent memory.
2135 // 2179 //
2136 // To recap, there are no constraints on the assembly being used for the 2180 // To recap, there are no constraints on the assembly being used for the
2137 // transition. We simply require that g and SP match and that the PC is not 2181 // transition. We simply require that g and SP match and that the PC is not
2138 // in runtime.gogo. 2182 // in runtime.gogo.
2139 » // 2183 » traceback = true;
2140 » // On Windows, one m is sending reports about all the g's, so gp == m->c urg 2184 » if(gp == nil || gp != mp->curg ||
2141 » // is not a useful comparison. The profilem function in os_windows.c has
2142 » // already checked that gp is a user g.
2143 » if(gp == nil ||
2144 » (!Windows && gp != m->curg) ||
2145 (uintptr)sp < gp->stackguard - StackGuard || gp->stackbase < (uintptr )sp || 2185 (uintptr)sp < gp->stackguard - StackGuard || gp->stackbase < (uintptr )sp ||
2146 ((uint8*)runtime·gogo <= pc && pc < (uint8*)runtime·gogo + RuntimeGog oBytes)) 2186 ((uint8*)runtime·gogo <= pc && pc < (uint8*)runtime·gogo + RuntimeGog oBytes))
2147 traceback = false; 2187 traceback = false;
2148 2188
2149 // Race detector calls asmcgocall w/o entersyscall/exitsyscall, 2189 // Race detector calls asmcgocall w/o entersyscall/exitsyscall,
2150 // we can not currently unwind through asmcgocall. 2190 // we can not currently unwind through asmcgocall.
2151 » if(m != nil && m->racecall) 2191 » if(mp != nil && mp->racecall)
2152 traceback = false; 2192 traceback = false;
2153 2193
2154 runtime·lock(&prof); 2194 runtime·lock(&prof);
2155 if(prof.fn == nil) { 2195 if(prof.fn == nil) {
2156 runtime·unlock(&prof); 2196 runtime·unlock(&prof);
2197 mp->mcache = mcache;
2157 return; 2198 return;
2158 } 2199 }
2159 n = 0; 2200 n = 0;
2160 if(traceback) 2201 if(traceback)
2161 n = runtime·gentraceback((uintptr)pc, (uintptr)sp, (uintptr)lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false); 2202 n = runtime·gentraceback((uintptr)pc, (uintptr)sp, (uintptr)lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
2162 if(!traceback || n <= 0) { 2203 if(!traceback || n <= 0) {
2163 n = 2; 2204 n = 2;
2164 prof.pcbuf[0] = (uintptr)pc; 2205 prof.pcbuf[0] = (uintptr)pc;
2165 prof.pcbuf[1] = (uintptr)System + 1; 2206 prof.pcbuf[1] = (uintptr)System + 1;
2166 } 2207 }
2167 prof.fn(prof.pcbuf, n); 2208 prof.fn(prof.pcbuf, n);
2168 runtime·unlock(&prof); 2209 runtime·unlock(&prof);
2210 mp->mcache = mcache;
2169 } 2211 }
2170 2212
2171 // Arrange to call fn with a traceback hz times a second. 2213 // Arrange to call fn with a traceback hz times a second.
2172 void 2214 void
2173 runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz) 2215 runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
2174 { 2216 {
2175 // Force sane arguments. 2217 // Force sane arguments.
2176 if(hz < 0) 2218 if(hz < 0)
2177 hz = 0; 2219 hz = 0;
2178 if(hz == 0) 2220 if(hz == 0)
(...skipping 22 matching lines...) Expand all
2201 runtime·resetcpuprofiler(hz); 2243 runtime·resetcpuprofiler(hz);
2202 2244
2203 m->locks--; 2245 m->locks--;
2204 } 2246 }
2205 2247
2206 // Change number of processors. The world is stopped, sched is locked. 2248 // Change number of processors. The world is stopped, sched is locked.
2207 static void 2249 static void
2208 procresize(int32 new) 2250 procresize(int32 new)
2209 { 2251 {
2210 int32 i, old; 2252 int32 i, old;
2253 bool empty;
2211 G *gp; 2254 G *gp;
2212 P *p; 2255 P *p;
2213 2256
2214 old = runtime·gomaxprocs; 2257 old = runtime·gomaxprocs;
2215 if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs) 2258 if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
2216 runtime·throw("procresize: invalid arg"); 2259 runtime·throw("procresize: invalid arg");
2217 // initialize new P's 2260 // initialize new P's
2218 for(i = 0; i < new; i++) { 2261 for(i = 0; i < new; i++) {
2219 p = runtime·allp[i]; 2262 p = runtime·allp[i];
2220 if(p == nil) { 2263 if(p == nil) {
2221 p = (P*)runtime·mallocgc(sizeof(*p), 0, FlagNoInvokeGC); 2264 p = (P*)runtime·mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
2222 p->id = i; 2265 p->id = i;
2223 p->status = Pgcstop; 2266 p->status = Pgcstop;
2224 runtime·atomicstorep(&runtime·allp[i], p); 2267 runtime·atomicstorep(&runtime·allp[i], p);
2225 } 2268 }
2226 if(p->mcache == nil) { 2269 if(p->mcache == nil) {
2227 if(old==0 && i==0) 2270 if(old==0 && i==0)
2228 p->mcache = m->mcache; // bootstrap 2271 p->mcache = m->mcache; // bootstrap
2229 else 2272 else
2230 p->mcache = runtime·allocmcache(); 2273 p->mcache = runtime·allocmcache();
2231 } 2274 }
2232 if(p->runq == nil) {
2233 p->runqsize = 128;
2234 p->runq = (G**)runtime·mallocgc(p->runqsize*sizeof(G*), 0, FlagNoInvokeGC);
2235 }
2236 } 2275 }
2237 2276
2238 // redistribute runnable G's evenly 2277 // redistribute runnable G's evenly
2239 » for(i = 0; i < old; i++) { 2278 » // collect all runnable goroutines in global queue preserving FIFO order
2240 » » p = runtime·allp[i]; 2279 » // FIFO order is required to ensure fairness even during frequent GCs
2241 » » while(gp = runqget(p)) 2280 » // see http://golang.org/issue/7126
2242 » » » globrunqput(gp); 2281 » empty = false;
2243 » } 2282 » while(!empty) {
2283 » » empty = true;
2284 » » for(i = 0; i < old; i++) {
2285 » » » p = runtime·allp[i];
2286 » » » if(p->runqhead == p->runqtail)
2287 » » » » continue;
2288 » » » empty = false;
2289 » » » // pop from tail of local queue
2290 » » » p->runqtail--;
2291 » » » gp = p->runq[p->runqtail%nelem(p->runq)];
2292 » » » // push onto head of global queue
2293 » » » gp->schedlink = runtime·sched.runqhead;
2294 » » » runtime·sched.runqhead = gp;
2295 » » » if(runtime·sched.runqtail == nil)
2296 » » » » runtime·sched.runqtail = gp;
2297 » » » runtime·sched.runqsize++;
2298 » » }
2299 » }
2300 » // fill local queues with at most nelem(p->runq)/2 goroutines
2244 // start at 1 because current M already executes some G and will acquire allp[0] below, 2301 // start at 1 because current M already executes some G and will acquire allp[0] below,
2245 // so if we have a spare G we want to put it into allp[1]. 2302 // so if we have a spare G we want to put it into allp[1].
2246 » for(i = 1; runtime·sched.runqhead; i++) { 2303 » for(i = 1; i < new * nelem(p->runq)/2 && runtime·sched.runqsize > 0; i++ ) {
2247 gp = runtime·sched.runqhead; 2304 gp = runtime·sched.runqhead;
2248 runtime·sched.runqhead = gp->schedlink; 2305 runtime·sched.runqhead = gp->schedlink;
2306 if(runtime·sched.runqhead == nil)
2307 runtime·sched.runqtail = nil;
2308 runtime·sched.runqsize--;
2249 runqput(runtime·allp[i%new], gp); 2309 runqput(runtime·allp[i%new], gp);
2250 } 2310 }
2251 runtime·sched.runqtail = nil;
2252 runtime·sched.runqsize = 0;
2253 2311
2254 // free unused P's 2312 // free unused P's
2255 for(i = new; i < old; i++) { 2313 for(i = new; i < old; i++) {
2256 p = runtime·allp[i]; 2314 p = runtime·allp[i];
2257 runtime·freemcache(p->mcache); 2315 runtime·freemcache(p->mcache);
2258 p->mcache = nil; 2316 p->mcache = nil;
2259 gfpurge(p); 2317 gfpurge(p);
2260 p->status = Pdead; 2318 p->status = Pdead;
2261 // can't free P itself because it can be referenced by an M in s yscall 2319 // can't free P itself because it can be referenced by an M in s yscall
2262 } 2320 }
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
2324 runtime·unlock(&runtime·sched); 2382 runtime·unlock(&runtime·sched);
2325 } 2383 }
2326 2384
2327 // Check for deadlock situation. 2385 // Check for deadlock situation.
2328 // The check is based on number of running M's, if 0 -> deadlock. 2386 // The check is based on number of running M's, if 0 -> deadlock.
2329 static void 2387 static void
2330 checkdead(void) 2388 checkdead(void)
2331 { 2389 {
2332 G *gp; 2390 G *gp;
2333 int32 run, grunning, s; 2391 int32 run, grunning, s;
2392 uintptr i;
2334 2393
2335 // -1 for sysmon 2394 // -1 for sysmon
2336 run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.nmidle locked - 1; 2395 run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.nmidle locked - 1;
2337 if(run > 0) 2396 if(run > 0)
2338 return; 2397 return;
2339 if(run < 0) { 2398 if(run < 0) {
2340 runtime·printf("checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n ", 2399 runtime·printf("checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n ",
2341 runtime·sched.nmidle, runtime·sched.nmidlelocked, runtim e·sched.mcount); 2400 runtime·sched.nmidle, runtime·sched.nmidlelocked, runtim e·sched.mcount);
2342 runtime·throw("checkdead: inconsistent counts"); 2401 runtime·throw("checkdead: inconsistent counts");
2343 } 2402 }
2344 grunning = 0; 2403 grunning = 0;
2345 » for(gp = runtime·allg; gp; gp = gp->alllink) { 2404 » runtime·lock(&allglock);
2405 » for(i = 0; i < runtime·allglen; i++) {
2406 » » gp = runtime·allg[i];
2346 if(gp->isbackground) 2407 if(gp->isbackground)
2347 continue; 2408 continue;
2348 s = gp->status; 2409 s = gp->status;
2349 if(s == Gwaiting) 2410 if(s == Gwaiting)
2350 grunning++; 2411 grunning++;
2351 else if(s == Grunnable || s == Grunning || s == Gsyscall) { 2412 else if(s == Grunnable || s == Grunning || s == Gsyscall) {
2413 runtime·unlock(&allglock);
2352 runtime·printf("checkdead: find g %D in status %d\n", gp ->goid, s); 2414 runtime·printf("checkdead: find g %D in status %d\n", gp ->goid, s);
2353 runtime·throw("checkdead: runnable g"); 2415 runtime·throw("checkdead: runnable g");
2354 } 2416 }
2355 } 2417 }
2418 runtime·unlock(&allglock);
2356 if(grunning == 0) // possible if main goroutine calls runtime·Goexit() 2419 if(grunning == 0) // possible if main goroutine calls runtime·Goexit()
2357 runtime·exit(0); 2420 runtime·exit(0);
2358 m->throwing = -1; // do not dump full stacks 2421 m->throwing = -1; // do not dump full stacks
2359 runtime·throw("all goroutines are asleep - deadlock!"); 2422 runtime·throw("all goroutines are asleep - deadlock!");
2360 } 2423 }
2361 2424
2362 static void 2425 static void
2363 sysmon(void) 2426 sysmon(void)
2364 { 2427 {
2365 uint32 idle, delay; 2428 uint32 idle, delay;
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after
2531 gp->stackguard0 = StackPreempt; 2594 gp->stackguard0 = StackPreempt;
2532 return true; 2595 return true;
2533 } 2596 }
2534 2597
2535 void 2598 void
2536 runtime·schedtrace(bool detailed) 2599 runtime·schedtrace(bool detailed)
2537 { 2600 {
2538 static int64 starttime; 2601 static int64 starttime;
2539 int64 now; 2602 int64 now;
2540 int64 id1, id2, id3; 2603 int64 id1, id2, id3;
2541 » int32 i, q, t, h, s; 2604 » int32 i, t, h;
2605 » uintptr gi;
2542 int8 *fmt; 2606 int8 *fmt;
2543 M *mp, *lockedm; 2607 M *mp, *lockedm;
2544 G *gp, *lockedg; 2608 G *gp, *lockedg;
2545 P *p; 2609 P *p;
2546 2610
2547 now = runtime·nanotime(); 2611 now = runtime·nanotime();
2548 if(starttime == 0) 2612 if(starttime == 0)
2549 starttime = now; 2613 starttime = now;
2550 2614
2551 runtime·lock(&runtime·sched); 2615 runtime·lock(&runtime·sched);
2552 runtime·printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idleth reads=%d runqueue=%d", 2616 runtime·printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idleth reads=%d runqueue=%d",
2553 (now-starttime)/1000000, runtime·gomaxprocs, runtime·sched.npidl e, runtime·sched.mcount, 2617 (now-starttime)/1000000, runtime·gomaxprocs, runtime·sched.npidl e, runtime·sched.mcount,
2554 runtime·sched.nmidle, runtime·sched.runqsize); 2618 runtime·sched.nmidle, runtime·sched.runqsize);
2555 if(detailed) { 2619 if(detailed) {
2556 runtime·printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stop wait=%d sysmonwait=%d\n", 2620 runtime·printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stop wait=%d sysmonwait=%d\n",
2557 runtime·sched.gcwaiting, runtime·sched.nmidlelocked, run time·sched.nmspinning, 2621 runtime·sched.gcwaiting, runtime·sched.nmidlelocked, run time·sched.nmspinning,
2558 runtime·sched.stopwait, runtime·sched.sysmonwait); 2622 runtime·sched.stopwait, runtime·sched.sysmonwait);
2559 } 2623 }
2560 // We must be careful while reading data from P's, M's and G's. 2624 // We must be careful while reading data from P's, M's and G's.
2561 // Even if we hold schedlock, most data can be changed concurrently. 2625 // Even if we hold schedlock, most data can be changed concurrently.
2562 // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil. 2626 // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
2563 for(i = 0; i < runtime·gomaxprocs; i++) { 2627 for(i = 0; i < runtime·gomaxprocs; i++) {
2564 p = runtime·allp[i]; 2628 p = runtime·allp[i];
2565 if(p == nil) 2629 if(p == nil)
2566 continue; 2630 continue;
2567 mp = p->m; 2631 mp = p->m;
2568 » » t = p->runqtail; 2632 » » h = runtime·atomicload(&p->runqhead);
2569 » » h = p->runqhead; 2633 » » t = runtime·atomicload(&p->runqtail);
2570 » » s = p->runqsize;
2571 » » q = t - h;
2572 » » if(q < 0)
2573 » » » q += s;
2574 if(detailed) 2634 if(detailed)
2575 » » » runtime·printf(" P%d: status=%d schedtick=%d syscalltic k=%d m=%d runqsize=%d/%d gfreecnt=%d\n", 2635 » » » runtime·printf(" P%d: status=%d schedtick=%d syscalltic k=%d m=%d runqsize=%d gfreecnt=%d\n",
2576 » » » » i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, q, s, p->gfreecnt); 2636 » » » » i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
2577 else { 2637 else {
2578 // In non-detailed mode format lengths of per-P run queu es as: 2638 // In non-detailed mode format lengths of per-P run queu es as:
2579 // [len1 len2 len3 len4] 2639 // [len1 len2 len3 len4]
2580 fmt = " %d"; 2640 fmt = " %d";
2581 if(runtime·gomaxprocs == 1) 2641 if(runtime·gomaxprocs == 1)
2582 fmt = " [%d]\n"; 2642 fmt = " [%d]\n";
2583 else if(i == 0) 2643 else if(i == 0)
2584 fmt = " [%d"; 2644 fmt = " [%d";
2585 else if(i == runtime·gomaxprocs-1) 2645 else if(i == runtime·gomaxprocs-1)
2586 fmt = " %d]\n"; 2646 fmt = " %d]\n";
2587 » » » runtime·printf(fmt, q); 2647 » » » runtime·printf(fmt, t-h);
2588 } 2648 }
2589 } 2649 }
2590 if(!detailed) { 2650 if(!detailed) {
2591 runtime·unlock(&runtime·sched); 2651 runtime·unlock(&runtime·sched);
2592 return; 2652 return;
2593 } 2653 }
2594 for(mp = runtime·allm; mp; mp = mp->alllink) { 2654 for(mp = runtime·allm; mp; mp = mp->alllink) {
2595 p = mp->p; 2655 p = mp->p;
2596 gp = mp->curg; 2656 gp = mp->curg;
2597 lockedg = mp->lockedg; 2657 lockedg = mp->lockedg;
2598 id1 = -1; 2658 id1 = -1;
2599 if(p) 2659 if(p)
2600 id1 = p->id; 2660 id1 = p->id;
2601 id2 = -1; 2661 id2 = -1;
2602 if(gp) 2662 if(gp)
2603 id2 = gp->goid; 2663 id2 = gp->goid;
2604 id3 = -1; 2664 id3 = -1;
2605 if(lockedg) 2665 if(lockedg)
2606 id3 = lockedg->goid; 2666 id3 = lockedg->goid;
2607 runtime·printf(" M%d: p=%D curg=%D mallocing=%d throwing=%d gci ng=%d" 2667 runtime·printf(" M%d: p=%D curg=%D mallocing=%d throwing=%d gci ng=%d"
2608 " locks=%d dying=%d helpgc=%d spinning=%d lockedg=%D\n", 2668 " locks=%d dying=%d helpgc=%d spinning=%d lockedg=%D\n",
2609 mp->id, id1, id2, 2669 mp->id, id1, id2,
2610 mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->d ying, mp->helpgc, 2670 mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->d ying, mp->helpgc,
2611 mp->spinning, id3); 2671 mp->spinning, id3);
2612 } 2672 }
2613 » for(gp = runtime·allg; gp; gp = gp->alllink) { 2673 » runtime·lock(&allglock);
2674 » for(gi = 0; gi < runtime·allglen; gi++) {
2675 » » gp = runtime·allg[gi];
2614 mp = gp->m; 2676 mp = gp->m;
2615 lockedm = gp->lockedm; 2677 lockedm = gp->lockedm;
2616 runtime·printf(" G%D: status=%d(%s) m=%d lockedm=%d\n", 2678 runtime·printf(" G%D: status=%d(%s) m=%d lockedm=%d\n",
2617 gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1, 2679 gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1,
2618 lockedm ? lockedm->id : -1); 2680 lockedm ? lockedm->id : -1);
2619 } 2681 }
2682 runtime·unlock(&allglock);
2620 runtime·unlock(&runtime·sched); 2683 runtime·unlock(&runtime·sched);
2621 } 2684 }
2622 2685
2623 // Put mp on midle list. 2686 // Put mp on midle list.
2624 // Sched must be locked. 2687 // Sched must be locked.
2625 static void 2688 static void
2626 mput(M *mp) 2689 mput(M *mp)
2627 { 2690 {
2628 mp->schedlink = runtime·sched.midle; 2691 mp->schedlink = runtime·sched.midle;
2629 runtime·sched.midle = mp; 2692 runtime·sched.midle = mp;
(...skipping 22 matching lines...) Expand all
2652 { 2715 {
2653 gp->schedlink = nil; 2716 gp->schedlink = nil;
2654 if(runtime·sched.runqtail) 2717 if(runtime·sched.runqtail)
2655 runtime·sched.runqtail->schedlink = gp; 2718 runtime·sched.runqtail->schedlink = gp;
2656 else 2719 else
2657 runtime·sched.runqhead = gp; 2720 runtime·sched.runqhead = gp;
2658 runtime·sched.runqtail = gp; 2721 runtime·sched.runqtail = gp;
2659 runtime·sched.runqsize++; 2722 runtime·sched.runqsize++;
2660 } 2723 }
2661 2724
2725 // Put a batch of runnable goroutines on the global runnable queue.
2726 // Sched must be locked.
2727 static void
2728 globrunqputbatch(G *ghead, G *gtail, int32 n)
2729 {
2730 gtail->schedlink = nil;
2731 if(runtime·sched.runqtail)
2732 runtime·sched.runqtail->schedlink = ghead;
2733 else
2734 runtime·sched.runqhead = ghead;
2735 runtime·sched.runqtail = gtail;
2736 runtime·sched.runqsize += n;
2737 }
2738
2662 // Try get a batch of G's from the global runnable queue. 2739 // Try get a batch of G's from the global runnable queue.
2663 // Sched must be locked. 2740 // Sched must be locked.
2664 static G* 2741 static G*
2665 globrunqget(P *p, int32 max) 2742 globrunqget(P *p, int32 max)
2666 { 2743 {
2667 G *gp, *gp1; 2744 G *gp, *gp1;
2668 int32 n; 2745 int32 n;
2669 2746
2670 if(runtime·sched.runqsize == 0) 2747 if(runtime·sched.runqsize == 0)
2671 return nil; 2748 return nil;
2672 n = runtime·sched.runqsize/runtime·gomaxprocs+1; 2749 n = runtime·sched.runqsize/runtime·gomaxprocs+1;
2673 if(n > runtime·sched.runqsize) 2750 if(n > runtime·sched.runqsize)
2674 n = runtime·sched.runqsize; 2751 n = runtime·sched.runqsize;
2675 if(max > 0 && n > max) 2752 if(max > 0 && n > max)
2676 n = max; 2753 n = max;
2754 if(n > nelem(p->runq)/2)
2755 n = nelem(p->runq)/2;
2677 runtime·sched.runqsize -= n; 2756 runtime·sched.runqsize -= n;
2678 if(runtime·sched.runqsize == 0) 2757 if(runtime·sched.runqsize == 0)
2679 runtime·sched.runqtail = nil; 2758 runtime·sched.runqtail = nil;
2680 gp = runtime·sched.runqhead; 2759 gp = runtime·sched.runqhead;
2681 runtime·sched.runqhead = gp->schedlink; 2760 runtime·sched.runqhead = gp->schedlink;
2682 n--; 2761 n--;
2683 while(n--) { 2762 while(n--) {
2684 gp1 = runtime·sched.runqhead; 2763 gp1 = runtime·sched.runqhead;
2685 runtime·sched.runqhead = gp1->schedlink; 2764 runtime·sched.runqhead = gp1->schedlink;
2686 runqput(p, gp1); 2765 runqput(p, gp1);
(...skipping 19 matching lines...) Expand all
2706 P *p; 2785 P *p;
2707 2786
2708 p = runtime·sched.pidle; 2787 p = runtime·sched.pidle;
2709 if(p) { 2788 if(p) {
2710 runtime·sched.pidle = p->link; 2789 runtime·sched.pidle = p->link;
2711 runtime·xadd(&runtime·sched.npidle, -1); // TODO: fast atomic 2790 runtime·xadd(&runtime·sched.npidle, -1); // TODO: fast atomic
2712 } 2791 }
2713 return p; 2792 return p;
2714 } 2793 }
2715 2794
2716 // Put g on local runnable queue. 2795 // Try to put g on local runnable queue.
2717 // TODO(dvyukov): consider using lock-free queue. 2796 // If it's full, put onto global queue.
2797 // Executed only by the owner P.
2718 static void 2798 static void
2719 runqput(P *p, G *gp) 2799 runqput(P *p, G *gp)
2720 { 2800 {
2721 » int32 h, t, s; 2801 » uint32 h, t;
2722 2802
2723 » runtime·lock(p);
2724 retry: 2803 retry:
2725 » h = p->runqhead; 2804 » h = runtime·atomicload(&p->runqhead); // load-acquire, synchronize with consumers
2726 t = p->runqtail; 2805 t = p->runqtail;
2727 » s = p->runqsize; 2806 » if(t - h < nelem(p->runq)) {
2728 » if(t == h-1 || (h == 0 && t == s-1)) { 2807 » » p->runq[t%nelem(p->runq)] = gp;
2729 » » runqgrow(p); 2808 » » runtime·atomicstore(&p->runqtail, t+1); // store-release, makes the item available for consumption
2730 » » goto retry; 2809 » » return;
2731 » } 2810 » }
2732 » p->runq[t++] = gp; 2811 » if(runqputslow(p, gp, h, t))
2733 » if(t == s) 2812 » » return;
2734 » » t = 0; 2813 » // the queue is not full, now the put above must suceed
2735 » p->runqtail = t; 2814 » goto retry;
2736 » runtime·unlock(p); 2815 }
2816
2817 // Put g and a batch of work from local runnable queue on global queue.
2818 // Executed only by the owner P.
2819 static bool
2820 runqputslow(P *p, G *gp, uint32 h, uint32 t)
2821 {
2822 » G *batch[nelem(p->runq)/2+1];
2823 » uint32 n, i;
2824
2825 » // First, grab a batch from local queue.
2826 » n = t-h;
2827 » n = n/2;
2828 » if(n != nelem(p->runq)/2)
2829 » » runtime·throw("runqputslow: queue is not full");
2830 » for(i=0; i<n; i++)
2831 » » batch[i] = p->runq[(h+i)%nelem(p->runq)];
2832 » if(!runtime·cas(&p->runqhead, h, h+n)) // cas-release, commits consume
2833 » » return false;
2834 » batch[n] = gp;
2835 » // Link the goroutines.
2836 » for(i=0; i<n; i++)
2837 » » batch[i]->schedlink = batch[i+1];
2838 » // Now put the batch on global queue.
2839 » runtime·lock(&runtime·sched);
2840 » globrunqputbatch(batch[0], batch[n], n+1);
2841 » runtime·unlock(&runtime·sched);
2842 » return true;
2737 } 2843 }
2738 2844
2739 // Get g from local runnable queue. 2845 // Get g from local runnable queue.
2846 // Executed only by the owner P.
2740 static G* 2847 static G*
2741 runqget(P *p) 2848 runqget(P *p)
2742 { 2849 {
2743 G *gp; 2850 G *gp;
2744 » int32 t, h, s; 2851 » uint32 t, h;
2745 2852
2746 » if(p->runqhead == p->runqtail) 2853 » for(;;) {
2747 » » return nil; 2854 » » h = runtime·atomicload(&p->runqhead); // load-acquire, synchron ize with other consumers
2748 » runtime·lock(p); 2855 » » t = p->runqtail;
2749 » h = p->runqhead; 2856 » » if(t == h)
2750 » t = p->runqtail; 2857 » » » return nil;
2751 » s = p->runqsize; 2858 » » gp = p->runq[h%nelem(p->runq)];
2752 » if(t == h) { 2859 » » if(runtime·cas(&p->runqhead, h, h+1)) // cas-release, commits c onsume
2753 » » runtime·unlock(p); 2860 » » » return gp;
2754 » » return nil; 2861 » }
2755 » } 2862 }
2756 » gp = p->runq[h++]; 2863
2757 » if(h == s) 2864 // Grabs a batch of goroutines from local runnable queue.
2758 » » h = 0; 2865 // batch array must be of size nelem(p->runq)/2. Returns number of grabbed gorou tines.
2759 » p->runqhead = h; 2866 // Can be executed by any P.
2760 » runtime·unlock(p); 2867 static uint32
2761 » return gp; 2868 runqgrab(P *p, G **batch)
2762 } 2869 {
2763 2870 » uint32 t, h, n, i;
2764 // Grow local runnable queue. 2871
2765 // TODO(dvyukov): consider using fixed-size array 2872 » for(;;) {
2766 // and transfer excess to the global list (local queue can grow way too big). 2873 » » h = runtime·atomicload(&p->runqhead); // load-acquire, synchron ize with other consumers
2767 static void 2874 » » t = runtime·atomicload(&p->runqtail); // load-acquire, synchron ize with the producer
2768 runqgrow(P *p) 2875 » » n = t-h;
2769 { 2876 » » n = n - n/2;
2770 » G **q; 2877 » » if(n == 0)
2771 » int32 s, t, h, t2; 2878 » » » break;
2772 2879 » » if(n > nelem(p->runq)/2) // read inconsistent h and t
2773 » h = p->runqhead; 2880 » » » continue;
2774 » t = p->runqtail; 2881 » » for(i=0; i<n; i++)
2775 » s = p->runqsize; 2882 » » » batch[i] = p->runq[(h+i)%nelem(p->runq)];
2776 » t2 = 0; 2883 » » if(runtime·cas(&p->runqhead, h, h+n)) // cas-release, commits c onsume
2777 » q = runtime·malloc(2*s*sizeof(*q)); 2884 » » » break;
2778 » while(t != h) { 2885 » }
2779 » » q[t2++] = p->runq[h++]; 2886 » return n;
2780 » » if(h == s)
2781 » » » h = 0;
2782 » }
2783 » runtime·free(p->runq);
2784 » p->runq = q;
2785 » p->runqhead = 0;
2786 » p->runqtail = t2;
2787 » p->runqsize = 2*s;
2788 } 2887 }
2789 2888
2790 // Steal half of elements from local runnable queue of p2 2889 // Steal half of elements from local runnable queue of p2
2791 // and put onto local runnable queue of p. 2890 // and put onto local runnable queue of p.
2792 // Returns one of the stolen elements (or nil if failed). 2891 // Returns one of the stolen elements (or nil if failed).
2793 static G* 2892 static G*
2794 runqsteal(P *p, P *p2) 2893 runqsteal(P *p, P *p2)
2795 { 2894 {
2796 » G *gp, *gp1; 2895 » G *gp;
2797 » int32 t, h, s, t2, h2, s2, c, i; 2896 » G *batch[nelem(p->runq)/2];
2798 2897 » uint32 t, h, n, i;
2799 » if(p2->runqhead == p2->runqtail) 2898
2899 » n = runqgrab(p2, batch);
2900 » if(n == 0)
2800 return nil; 2901 return nil;
2801 » // sort locks to prevent deadlocks 2902 » n--;
2802 » if(p < p2) 2903 » gp = batch[n];
2803 » » runtime·lock(p); 2904 » if(n == 0)
2804 » runtime·lock(p2); 2905 » » return gp;
2805 » if(p2->runqhead == p2->runqtail) { 2906 » h = runtime·atomicload(&p->runqhead); // load-acquire, synchronize with consumers
2806 » » runtime·unlock(p2);
2807 » » if(p < p2)
2808 » » » runtime·unlock(p);
2809 » » return nil;
2810 » }
2811 » if(p >= p2)
2812 » » runtime·lock(p);
2813 » // now we've locked both queues and know the victim is not empty
2814 » h = p->runqhead;
2815 t = p->runqtail; 2907 t = p->runqtail;
2816 » s = p->runqsize; 2908 » if(t - h + n >= nelem(p->runq))
2817 » h2 = p2->runqhead; 2909 » » runtime·throw("runqsteal: runq overflow");
2818 » t2 = p2->runqtail; 2910 » for(i=0; i<n; i++, t++)
2819 » s2 = p2->runqsize; 2911 » » p->runq[t%nelem(p->runq)] = batch[i];
2820 » gp = p2->runq[h2++]; // return value 2912 » runtime·atomicstore(&p->runqtail, t); // store-release, makes the item available for consumption
2821 » if(h2 == s2)
2822 » » h2 = 0;
2823 » // steal roughly half
2824 » if(t2 > h2)
2825 » » c = (t2 - h2) / 2;
2826 » else
2827 » » c = (s2 - h2 + t2) / 2;
2828 » // copy
2829 » for(i = 0; i != c; i++) {
2830 » » // the target queue is full?
2831 » » if(t == h-1 || (h == 0 && t == s-1))
2832 » » » break;
2833 » » // the victim queue is empty?
2834 » » if(t2 == h2)
2835 » » » break;
2836 » » gp1 = p2->runq[h2++];
2837 » » if(h2 == s2)
2838 » » » h2 = 0;
2839 » » p->runq[t++] = gp1;
2840 » » if(t == s)
2841 » » » t = 0;
2842 » }
2843 » p->runqtail = t;
2844 » p2->runqhead = h2;
2845 » runtime·unlock(p2);
2846 » runtime·unlock(p);
2847 return gp; 2913 return gp;
2848 } 2914 }
2849 2915
2850 void 2916 void
2851 runtime·testSchedLocalQueue(void) 2917 runtime·testSchedLocalQueue(void)
2852 { 2918 {
2853 P p; 2919 P p;
2854 » G gs[1000]; 2920 » G gs[nelem(p.runq)];
2855 int32 i, j; 2921 int32 i, j;
2856 2922
2857 runtime·memclr((byte*)&p, sizeof(p)); 2923 runtime·memclr((byte*)&p, sizeof(p));
2858 p.runqsize = 1;
2859 p.runqhead = 0;
2860 p.runqtail = 0;
2861 p.runq = runtime·malloc(p.runqsize*sizeof(*p.runq));
2862 2924
2863 for(i = 0; i < nelem(gs); i++) { 2925 for(i = 0; i < nelem(gs); i++) {
2864 if(runqget(&p) != nil) 2926 if(runqget(&p) != nil)
2865 runtime·throw("runq is not empty initially"); 2927 runtime·throw("runq is not empty initially");
2866 for(j = 0; j < i; j++) 2928 for(j = 0; j < i; j++)
2867 runqput(&p, &gs[i]); 2929 runqput(&p, &gs[i]);
2868 for(j = 0; j < i; j++) { 2930 for(j = 0; j < i; j++) {
2869 if(runqget(&p) != &gs[i]) { 2931 if(runqget(&p) != &gs[i]) {
2870 runtime·printf("bad element at iter %d/%d\n", i, j); 2932 runtime·printf("bad element at iter %d/%d\n", i, j);
2871 runtime·throw("bad element"); 2933 runtime·throw("bad element");
2872 } 2934 }
2873 } 2935 }
2874 if(runqget(&p) != nil) 2936 if(runqget(&p) != nil)
2875 runtime·throw("runq is not empty afterwards"); 2937 runtime·throw("runq is not empty afterwards");
2876 } 2938 }
2877 } 2939 }
2878 2940
2879 void 2941 void
2880 runtime·testSchedLocalQueueSteal(void) 2942 runtime·testSchedLocalQueueSteal(void)
2881 { 2943 {
2882 P p1, p2; 2944 P p1, p2;
2883 » G gs[1000], *gp; 2945 » G gs[nelem(p1.runq)], *gp;
2884 int32 i, j, s; 2946 int32 i, j, s;
2885 2947
2886 runtime·memclr((byte*)&p1, sizeof(p1)); 2948 runtime·memclr((byte*)&p1, sizeof(p1));
2887 p1.runqsize = 1;
2888 p1.runqhead = 0;
2889 p1.runqtail = 0;
2890 p1.runq = runtime·malloc(p1.runqsize*sizeof(*p1.runq));
2891
2892 runtime·memclr((byte*)&p2, sizeof(p2)); 2949 runtime·memclr((byte*)&p2, sizeof(p2));
2893 p2.runqsize = nelem(gs);
2894 p2.runqhead = 0;
2895 p2.runqtail = 0;
2896 p2.runq = runtime·malloc(p2.runqsize*sizeof(*p2.runq));
2897 2950
2898 for(i = 0; i < nelem(gs); i++) { 2951 for(i = 0; i < nelem(gs); i++) {
2899 for(j = 0; j < i; j++) { 2952 for(j = 0; j < i; j++) {
2900 gs[j].sig = 0; 2953 gs[j].sig = 0;
2901 runqput(&p1, &gs[j]); 2954 runqput(&p1, &gs[j]);
2902 } 2955 }
2903 gp = runqsteal(&p2, &p1); 2956 gp = runqsteal(&p2, &p1);
2904 s = 0; 2957 s = 0;
2905 if(gp) { 2958 if(gp) {
2906 s++; 2959 s++;
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
2964 if(experiment[i+j] != name[j]) 3017 if(experiment[i+j] != name[j])
2965 goto nomatch; 3018 goto nomatch;
2966 if(experiment[i+j] != '\0' && experiment[i+j] != ',') 3019 if(experiment[i+j] != '\0' && experiment[i+j] != ',')
2967 goto nomatch; 3020 goto nomatch;
2968 return 1; 3021 return 1;
2969 } 3022 }
2970 nomatch:; 3023 nomatch:;
2971 } 3024 }
2972 return 0; 3025 return 0;
2973 } 3026 }
LEFTRIGHT

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b