LEFT | RIGHT |
1 // Copyright 2009 The Go Authors. All rights reserved. | 1 // Copyright 2009 The Go Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 #include "runtime.h" | 5 #include "runtime.h" |
6 #include "arch_GOARCH.h" | 6 #include "arch_GOARCH.h" |
7 #include "zaexperiment.h" | 7 #include "zaexperiment.h" |
8 #include "malloc.h" | 8 #include "malloc.h" |
9 #include "stack.h" | 9 #include "stack.h" |
10 #include "race.h" | 10 #include "race.h" |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
72 G* runtime·lastg; | 72 G* runtime·lastg; |
73 M* runtime·allm; | 73 M* runtime·allm; |
74 M* runtime·extram; | 74 M* runtime·extram; |
75 int8* runtime·goos; | 75 int8* runtime·goos; |
76 int32 runtime·ncpu; | 76 int32 runtime·ncpu; |
77 static int32 newprocs; | 77 static int32 newprocs; |
78 | 78 |
79 void runtime·mstart(void); | 79 void runtime·mstart(void); |
80 static void runqput(P*, G*); | 80 static void runqput(P*, G*); |
81 static G* runqget(P*); | 81 static G* runqget(P*); |
82 static void runqgrow(P*); | 82 static bool runqputslow(P*, G*, uint32, uint32); |
83 static G* runqsteal(P*, P*); | 83 static G* runqsteal(P*, P*); |
84 static void mput(M*); | 84 static void mput(M*); |
85 static M* mget(void); | 85 static M* mget(void); |
86 static void mcommoninit(M*); | 86 static void mcommoninit(M*); |
87 static void schedule(void); | 87 static void schedule(void); |
88 static void procresize(int32); | 88 static void procresize(int32); |
89 static void acquirep(P*); | 89 static void acquirep(P*); |
90 static P* releasep(void); | 90 static P* releasep(void); |
91 static void newm(void(*)(void), P*); | 91 static void newm(void(*)(void), P*); |
92 static void stopm(void); | 92 static void stopm(void); |
93 static void startm(P*, bool); | 93 static void startm(P*, bool); |
94 static void handoffp(P*); | 94 static void handoffp(P*); |
95 static void wakep(void); | 95 static void wakep(void); |
96 static void stoplockedm(void); | 96 static void stoplockedm(void); |
97 static void startlockedm(G*); | 97 static void startlockedm(G*); |
98 static void sysmon(void); | 98 static void sysmon(void); |
99 static uint32 retake(int64); | 99 static uint32 retake(int64); |
100 static void incidlelocked(int32); | 100 static void incidlelocked(int32); |
101 static void checkdead(void); | 101 static void checkdead(void); |
102 static void exitsyscall0(G*); | 102 static void exitsyscall0(G*); |
103 static void park0(G*); | 103 static void park0(G*); |
104 static void goexit0(G*); | 104 static void goexit0(G*); |
105 static void gfput(P*, G*); | 105 static void gfput(P*, G*); |
106 static G* gfget(P*); | 106 static G* gfget(P*); |
107 static void gfpurge(P*); | 107 static void gfpurge(P*); |
108 static void globrunqput(G*); | 108 static void globrunqput(G*); |
| 109 static void globrunqputbatch(G*, G*, int32); |
109 static G* globrunqget(P*, int32); | 110 static G* globrunqget(P*, int32); |
110 static P* pidleget(void); | 111 static P* pidleget(void); |
111 static void pidleput(P*); | 112 static void pidleput(P*); |
112 static void injectglist(G*); | 113 static void injectglist(G*); |
113 static bool preemptall(void); | 114 static bool preemptall(void); |
114 static bool preemptone(P*); | 115 static bool preemptone(P*); |
115 static bool exitsyscallfast(void); | 116 static bool exitsyscallfast(void); |
116 static bool haveexperiment(int8*); | 117 static bool haveexperiment(int8*); |
117 | 118 |
118 // The bootstrap sequence is: | 119 // The bootstrap sequence is: |
119 // | 120 // |
120 // call osinit | 121 // call osinit |
121 // call schedinit | 122 // call schedinit |
122 // make & queue new G | 123 // make & queue new G |
123 // call runtime·mstart | 124 // call runtime·mstart |
124 // | 125 // |
125 // The new G calls runtime·main. | 126 // The new G calls runtime·main. |
126 void | 127 void |
127 runtime·schedinit(void) | 128 runtime·schedinit(void) |
128 { | 129 { |
129 int32 n, procs; | 130 int32 n, procs; |
130 byte *p; | 131 byte *p; |
131 Eface i; | 132 Eface i; |
132 | 133 |
133 runtime·sched.maxmcount = 10000; | 134 runtime·sched.maxmcount = 10000; |
134 runtime·precisestack = haveexperiment("precisestack"); | 135 runtime·precisestack = haveexperiment("precisestack"); |
135 | 136 |
136 runtime·mprofinit(); | |
137 runtime·mallocinit(); | 137 runtime·mallocinit(); |
138 mcommoninit(m); | 138 mcommoninit(m); |
139 ········ | 139 ········ |
140 // Initialize the itable value for newErrorCString, | 140 // Initialize the itable value for newErrorCString, |
141 // so that the next time it gets called, possibly | 141 // so that the next time it gets called, possibly |
142 // in a fault during a garbage collection, it will not | 142 // in a fault during a garbage collection, it will not |
143 // need to allocated memory. | 143 // need to allocated memory. |
144 runtime·newErrorCString(0, &i); | 144 runtime·newErrorCString(0, &i); |
145 | 145 |
146 runtime·goargs(); | 146 runtime·goargs(); |
(...skipping 794 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
941 m->nextp = nil; | 941 m->nextp = nil; |
942 } | 942 } |
943 | 943 |
944 static void | 944 static void |
945 mspinning(void) | 945 mspinning(void) |
946 { | 946 { |
947 m->spinning = true; | 947 m->spinning = true; |
948 } | 948 } |
949 | 949 |
950 // Schedules some M to run the p (creates an M if necessary). | 950 // Schedules some M to run the p (creates an M if necessary). |
951 // If p==nil, tries to get an idle P, if no idle P's returns false. | 951 // If p==nil, tries to get an idle P, if no idle P's does nothing. |
952 static void | 952 static void |
953 startm(P *p, bool spinning) | 953 startm(P *p, bool spinning) |
954 { | 954 { |
955 M *mp; | 955 M *mp; |
956 void (*fn)(void); | 956 void (*fn)(void); |
957 | 957 |
958 runtime·lock(&runtime·sched); | 958 runtime·lock(&runtime·sched); |
959 if(p == nil) { | 959 if(p == nil) { |
960 p = pidleget(); | 960 p = pidleget(); |
961 if(p == nil) { | 961 if(p == nil) { |
(...skipping 1247 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2209 p->id = i; | 2209 p->id = i; |
2210 p->status = Pgcstop; | 2210 p->status = Pgcstop; |
2211 runtime·atomicstorep(&runtime·allp[i], p); | 2211 runtime·atomicstorep(&runtime·allp[i], p); |
2212 } | 2212 } |
2213 if(p->mcache == nil) { | 2213 if(p->mcache == nil) { |
2214 if(old==0 && i==0) | 2214 if(old==0 && i==0) |
2215 p->mcache = m->mcache; // bootstrap | 2215 p->mcache = m->mcache; // bootstrap |
2216 else | 2216 else |
2217 p->mcache = runtime·allocmcache(); | 2217 p->mcache = runtime·allocmcache(); |
2218 } | 2218 } |
2219 if(p->runq == nil) { | |
2220 p->runqsize = 128; | |
2221 p->runq = (G**)runtime·mallocgc(p->runqsize*sizeof(G*),
0, FlagNoInvokeGC); | |
2222 } | |
2223 } | 2219 } |
2224 | 2220 |
2225 // redistribute runnable G's evenly | 2221 // redistribute runnable G's evenly |
| 2222 // collect all runnable goroutines in global queue |
2226 for(i = 0; i < old; i++) { | 2223 for(i = 0; i < old; i++) { |
2227 p = runtime·allp[i]; | 2224 p = runtime·allp[i]; |
2228 while(gp = runqget(p)) | 2225 while(gp = runqget(p)) |
2229 globrunqput(gp); | 2226 globrunqput(gp); |
2230 } | 2227 } |
| 2228 // fill local queues with at most nelem(p->runq)/2 goroutines |
2231 // start at 1 because current M already executes some G and will acquire
allp[0] below, | 2229 // start at 1 because current M already executes some G and will acquire
allp[0] below, |
2232 // so if we have a spare G we want to put it into allp[1]. | 2230 // so if we have a spare G we want to put it into allp[1]. |
2233 » for(i = 1; runtime·sched.runqhead; i++) { | 2231 » for(i = 1; i < new * nelem(p->runq)/2 && runtime·sched.runqsize > 0; i++
) { |
2234 gp = runtime·sched.runqhead; | 2232 gp = runtime·sched.runqhead; |
2235 runtime·sched.runqhead = gp->schedlink; | 2233 runtime·sched.runqhead = gp->schedlink; |
| 2234 if(runtime·sched.runqhead == nil) |
| 2235 runtime·sched.runqtail = nil; |
| 2236 runtime·sched.runqsize--; |
2236 runqput(runtime·allp[i%new], gp); | 2237 runqput(runtime·allp[i%new], gp); |
2237 } | 2238 } |
2238 runtime·sched.runqtail = nil; | |
2239 runtime·sched.runqsize = 0; | |
2240 | 2239 |
2241 // free unused P's | 2240 // free unused P's |
2242 for(i = new; i < old; i++) { | 2241 for(i = new; i < old; i++) { |
2243 p = runtime·allp[i]; | 2242 p = runtime·allp[i]; |
2244 runtime·freemcache(p->mcache); | 2243 runtime·freemcache(p->mcache); |
2245 p->mcache = nil; | 2244 p->mcache = nil; |
2246 gfpurge(p); | 2245 gfpurge(p); |
2247 p->status = Pdead; | 2246 p->status = Pdead; |
2248 // can't free P itself because it can be referenced by an M in s
yscall | 2247 // can't free P itself because it can be referenced by an M in s
yscall |
2249 } | 2248 } |
(...skipping 268 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2518 gp->stackguard0 = StackPreempt; | 2517 gp->stackguard0 = StackPreempt; |
2519 return true; | 2518 return true; |
2520 } | 2519 } |
2521 | 2520 |
2522 void | 2521 void |
2523 runtime·schedtrace(bool detailed) | 2522 runtime·schedtrace(bool detailed) |
2524 { | 2523 { |
2525 static int64 starttime; | 2524 static int64 starttime; |
2526 int64 now; | 2525 int64 now; |
2527 int64 id1, id2, id3; | 2526 int64 id1, id2, id3; |
2528 » int32 i, q, t, h, s; | 2527 » int32 i, t, h; |
2529 int8 *fmt; | 2528 int8 *fmt; |
2530 M *mp, *lockedm; | 2529 M *mp, *lockedm; |
2531 G *gp, *lockedg; | 2530 G *gp, *lockedg; |
2532 P *p; | 2531 P *p; |
2533 | 2532 |
2534 now = runtime·nanotime(); | 2533 now = runtime·nanotime(); |
2535 if(starttime == 0) | 2534 if(starttime == 0) |
2536 starttime = now; | 2535 starttime = now; |
2537 | 2536 |
2538 runtime·lock(&runtime·sched); | 2537 runtime·lock(&runtime·sched); |
2539 runtime·printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idleth
reads=%d runqueue=%d", | 2538 runtime·printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idleth
reads=%d runqueue=%d", |
2540 (now-starttime)/1000000, runtime·gomaxprocs, runtime·sched.npidl
e, runtime·sched.mcount, | 2539 (now-starttime)/1000000, runtime·gomaxprocs, runtime·sched.npidl
e, runtime·sched.mcount, |
2541 runtime·sched.nmidle, runtime·sched.runqsize); | 2540 runtime·sched.nmidle, runtime·sched.runqsize); |
2542 if(detailed) { | 2541 if(detailed) { |
2543 runtime·printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stop
wait=%d sysmonwait=%d\n", | 2542 runtime·printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stop
wait=%d sysmonwait=%d\n", |
2544 runtime·sched.gcwaiting, runtime·sched.nmidlelocked, run
time·sched.nmspinning, | 2543 runtime·sched.gcwaiting, runtime·sched.nmidlelocked, run
time·sched.nmspinning, |
2545 runtime·sched.stopwait, runtime·sched.sysmonwait); | 2544 runtime·sched.stopwait, runtime·sched.sysmonwait); |
2546 } | 2545 } |
2547 // We must be careful while reading data from P's, M's and G's. | 2546 // We must be careful while reading data from P's, M's and G's. |
2548 // Even if we hold schedlock, most data can be changed concurrently. | 2547 // Even if we hold schedlock, most data can be changed concurrently. |
2549 // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to
nil. | 2548 // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to
nil. |
2550 for(i = 0; i < runtime·gomaxprocs; i++) { | 2549 for(i = 0; i < runtime·gomaxprocs; i++) { |
2551 p = runtime·allp[i]; | 2550 p = runtime·allp[i]; |
2552 if(p == nil) | 2551 if(p == nil) |
2553 continue; | 2552 continue; |
2554 mp = p->m; | 2553 mp = p->m; |
2555 » » t = p->runqtail; | 2554 » » h = runtime·atomicload(&p->runqhead); |
2556 » » h = p->runqhead; | 2555 » » t = runtime·atomicload(&p->runqtail); |
2557 » » s = p->runqsize; | |
2558 » » q = t - h; | |
2559 » » if(q < 0) | |
2560 » » » q += s; | |
2561 if(detailed) | 2556 if(detailed) |
2562 » » » runtime·printf(" P%d: status=%d schedtick=%d syscalltic
k=%d m=%d runqsize=%d/%d gfreecnt=%d\n", | 2557 » » » runtime·printf(" P%d: status=%d schedtick=%d syscalltic
k=%d m=%d runqsize=%d gfreecnt=%d\n", |
2563 » » » » i, p->status, p->schedtick, p->syscalltick, mp ?
mp->id : -1, q, s, p->gfreecnt); | 2558 » » » » i, p->status, p->schedtick, p->syscalltick, mp ?
mp->id : -1, t-h, p->gfreecnt); |
2564 else { | 2559 else { |
2565 // In non-detailed mode format lengths of per-P run queu
es as: | 2560 // In non-detailed mode format lengths of per-P run queu
es as: |
2566 // [len1 len2 len3 len4] | 2561 // [len1 len2 len3 len4] |
2567 fmt = " %d"; | 2562 fmt = " %d"; |
2568 if(runtime·gomaxprocs == 1) | 2563 if(runtime·gomaxprocs == 1) |
2569 fmt = " [%d]\n"; | 2564 fmt = " [%d]\n"; |
2570 else if(i == 0) | 2565 else if(i == 0) |
2571 fmt = " [%d"; | 2566 fmt = " [%d"; |
2572 else if(i == runtime·gomaxprocs-1) | 2567 else if(i == runtime·gomaxprocs-1) |
2573 fmt = " %d]\n"; | 2568 fmt = " %d]\n"; |
2574 » » » runtime·printf(fmt, q); | 2569 » » » runtime·printf(fmt, t-h); |
2575 } | 2570 } |
2576 } | 2571 } |
2577 if(!detailed) { | 2572 if(!detailed) { |
2578 runtime·unlock(&runtime·sched); | 2573 runtime·unlock(&runtime·sched); |
2579 return; | 2574 return; |
2580 } | 2575 } |
2581 for(mp = runtime·allm; mp; mp = mp->alllink) { | 2576 for(mp = runtime·allm; mp; mp = mp->alllink) { |
2582 p = mp->p; | 2577 p = mp->p; |
2583 gp = mp->curg; | 2578 gp = mp->curg; |
2584 lockedg = mp->lockedg; | 2579 lockedg = mp->lockedg; |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2639 { | 2634 { |
2640 gp->schedlink = nil; | 2635 gp->schedlink = nil; |
2641 if(runtime·sched.runqtail) | 2636 if(runtime·sched.runqtail) |
2642 runtime·sched.runqtail->schedlink = gp; | 2637 runtime·sched.runqtail->schedlink = gp; |
2643 else | 2638 else |
2644 runtime·sched.runqhead = gp; | 2639 runtime·sched.runqhead = gp; |
2645 runtime·sched.runqtail = gp; | 2640 runtime·sched.runqtail = gp; |
2646 runtime·sched.runqsize++; | 2641 runtime·sched.runqsize++; |
2647 } | 2642 } |
2648 | 2643 |
| 2644 // Put a batch of runnable goroutines on the global runnable queue. |
| 2645 // Sched must be locked. |
| 2646 static void |
| 2647 globrunqputbatch(G *ghead, G *gtail, int32 n) |
| 2648 { |
| 2649 gtail->schedlink = nil; |
| 2650 if(runtime·sched.runqtail) |
| 2651 runtime·sched.runqtail->schedlink = ghead; |
| 2652 else |
| 2653 runtime·sched.runqhead = ghead; |
| 2654 runtime·sched.runqtail = gtail; |
| 2655 runtime·sched.runqsize += n; |
| 2656 } |
| 2657 |
2649 // Try get a batch of G's from the global runnable queue. | 2658 // Try get a batch of G's from the global runnable queue. |
2650 // Sched must be locked. | 2659 // Sched must be locked. |
2651 static G* | 2660 static G* |
2652 globrunqget(P *p, int32 max) | 2661 globrunqget(P *p, int32 max) |
2653 { | 2662 { |
2654 G *gp, *gp1; | 2663 G *gp, *gp1; |
2655 int32 n; | 2664 int32 n; |
2656 | 2665 |
2657 if(runtime·sched.runqsize == 0) | 2666 if(runtime·sched.runqsize == 0) |
2658 return nil; | 2667 return nil; |
2659 n = runtime·sched.runqsize/runtime·gomaxprocs+1; | 2668 n = runtime·sched.runqsize/runtime·gomaxprocs+1; |
2660 if(n > runtime·sched.runqsize) | 2669 if(n > runtime·sched.runqsize) |
2661 n = runtime·sched.runqsize; | 2670 n = runtime·sched.runqsize; |
2662 if(max > 0 && n > max) | 2671 if(max > 0 && n > max) |
2663 n = max; | 2672 n = max; |
| 2673 if(n > nelem(p->runq)/2) |
| 2674 n = nelem(p->runq)/2; |
2664 runtime·sched.runqsize -= n; | 2675 runtime·sched.runqsize -= n; |
2665 if(runtime·sched.runqsize == 0) | 2676 if(runtime·sched.runqsize == 0) |
2666 runtime·sched.runqtail = nil; | 2677 runtime·sched.runqtail = nil; |
2667 gp = runtime·sched.runqhead; | 2678 gp = runtime·sched.runqhead; |
2668 runtime·sched.runqhead = gp->schedlink; | 2679 runtime·sched.runqhead = gp->schedlink; |
2669 n--; | 2680 n--; |
2670 while(n--) { | 2681 while(n--) { |
2671 gp1 = runtime·sched.runqhead; | 2682 gp1 = runtime·sched.runqhead; |
2672 runtime·sched.runqhead = gp1->schedlink; | 2683 runtime·sched.runqhead = gp1->schedlink; |
2673 runqput(p, gp1); | 2684 runqput(p, gp1); |
(...skipping 19 matching lines...) Expand all Loading... |
2693 P *p; | 2704 P *p; |
2694 | 2705 |
2695 p = runtime·sched.pidle; | 2706 p = runtime·sched.pidle; |
2696 if(p) { | 2707 if(p) { |
2697 runtime·sched.pidle = p->link; | 2708 runtime·sched.pidle = p->link; |
2698 runtime·xadd(&runtime·sched.npidle, -1); // TODO: fast atomic | 2709 runtime·xadd(&runtime·sched.npidle, -1); // TODO: fast atomic |
2699 } | 2710 } |
2700 return p; | 2711 return p; |
2701 } | 2712 } |
2702 | 2713 |
2703 // Put g on local runnable queue. | |
2704 static | |
2705 runqputslow(P *p, G *gp) | |
2706 { | |
2707 | |
2708 } | |
2709 | |
2710 // Try to put g on local runnable queue. | 2714 // Try to put g on local runnable queue. |
2711 // If it's full, put onto global queue. | 2715 // If it's full, put onto global queue. |
| 2716 // Executed only by the owner P. |
2712 static void | 2717 static void |
2713 runqput(P *p, G *gp) | 2718 runqput(P *p, G *gp) |
2714 { | 2719 { |
2715 » int32 h, t, s; | 2720 » uint32 h, t; |
2716 | 2721 |
| 2722 retry: |
2717 h = runtime·atomicload(&p->runqhead); // load-acquire, synchronize with
consumers | 2723 h = runtime·atomicload(&p->runqhead); // load-acquire, synchronize with
consumers |
2718 t = p->runqtail; | 2724 t = p->runqtail; |
2719 » if(t - h == nelem(p->runq)) { | 2725 » if(t - h < nelem(p->runq)) { |
2720 » » runqputslow(p, gp); | 2726 » » p->runq[t%nelem(p->runq)] = gp; |
| 2727 » » runtime·atomicstore(&p->runqtail, t+1); // store-release, makes
the item available for consumption |
2721 return; | 2728 return; |
2722 } | 2729 } |
2723 » p->runq[t%nelem(p->runq)] = gp; | 2730 » if(runqputslow(p, gp, h, t)) |
2724 » runtime·atomicstore(&p->runqtail, t+1); // store-release, makes the ite
m available for consumption | 2731 » » return; |
| 2732 » // the queue is not full, now the put above must suceed |
| 2733 » goto retry; |
| 2734 } |
| 2735 |
| 2736 // Put g and a batch of work from local runnable queue on global queue. |
| 2737 // Executed only by the owner P. |
| 2738 static bool |
| 2739 runqputslow(P *p, G *gp, uint32 h, uint32 t) |
| 2740 { |
| 2741 » G *batch[nelem(p->runq)/2+1]; |
| 2742 » uint32 n, i; |
| 2743 |
| 2744 » // First, grab a batch from local queue. |
| 2745 » n = t-h; |
| 2746 » n = n/2; |
| 2747 » if(n != nelem(p->runq)/2) |
| 2748 » » runtime·throw("runqputslow: queue is not full"); |
| 2749 » for(i=0; i<n; i++) |
| 2750 » » batch[i] = p->runq[(h+i)%nelem(p->runq)]; |
| 2751 » if(!runtime·cas(&p->runqhead, h, h+n)) // cas-release, commits consume |
| 2752 » » return false; |
| 2753 » batch[n] = gp; |
| 2754 » // Link the goroutines. |
| 2755 » for(i=0; i<n; i++) |
| 2756 » » batch[i]->schedlink = batch[i+1]; |
| 2757 » // Now put the batch on global queue. |
| 2758 » runtime·lock(&runtime·sched); |
| 2759 » globrunqputbatch(batch[0], batch[n], n+1); |
| 2760 » runtime·unlock(&runtime·sched); |
| 2761 » return true; |
2725 } | 2762 } |
2726 | 2763 |
2727 // Get g from local runnable queue. | 2764 // Get g from local runnable queue. |
| 2765 // Executed only by the owner P. |
2728 static G* | 2766 static G* |
2729 runqget(P *p) | 2767 runqget(P *p) |
2730 { | 2768 { |
2731 G *gp; | 2769 G *gp; |
2732 » int32 t, h, s; | 2770 » uint32 t, h; |
2733 | 2771 |
2734 for(;;) { | 2772 for(;;) { |
2735 h = runtime·atomicload(&p->runqhead); // load-acquire, synchron
ize with other consumers | 2773 h = runtime·atomicload(&p->runqhead); // load-acquire, synchron
ize with other consumers |
2736 » » t = runtime·atomicload(&p->runqtail); // load-acquire, synchron
ize with producer | 2774 » » t = p->runqtail; |
2737 if(t == h) | 2775 if(t == h) |
2738 return nil; | 2776 return nil; |
2739 » runtime·lock(p); | 2777 » » gp = p->runq[h%nelem(p->runq)]; |
2740 » h = p->runqhead; | 2778 » » if(runtime·cas(&p->runqhead, h, h+1)) // cas-release, commits c
onsume |
2741 » t = p->runqtail; | 2779 » » » return gp; |
2742 » s = p->runqsize; | 2780 » } |
2743 » if(t == h) { | 2781 } |
2744 » » runtime·unlock(p); | 2782 |
2745 » » return nil; | 2783 // Grabs a batch of goroutines from local runnable queue. |
2746 » } | 2784 // batch array must be of size nelem(p->runq)/2. Returns number of grabbed gorou
tines. |
2747 » gp = p->runq[h++]; | 2785 // Can be executed by any P. |
2748 » if(h == s) | 2786 static uint32 |
2749 » » h = 0; | 2787 runqgrab(P *p, G **batch) |
2750 » p->runqhead = h; | 2788 { |
2751 » runtime·unlock(p); | 2789 » uint32 t, h, n, i; |
2752 » return gp; | 2790 |
2753 } | 2791 » for(;;) { |
2754 | 2792 » » h = runtime·atomicload(&p->runqhead); // load-acquire, synchron
ize with other consumers |
2755 // Grow local runnable queue. | 2793 » » t = runtime·atomicload(&p->runqtail); // load-acquire, synchron
ize with the producer |
2756 // TODO(dvyukov): consider using fixed-size array | 2794 » » n = t-h; |
2757 // and transfer excess to the global list (local queue can grow way too big). | 2795 » » n = n - n/2; |
2758 static void | 2796 » » if(n == 0) |
2759 runqgrow(P *p) | 2797 » » » break; |
2760 { | 2798 » » if(n > nelem(p->runq)/2) // read inconsistent h and t |
2761 » G **q; | 2799 » » » continue; |
2762 » int32 s, t, h, t2; | 2800 » » for(i=0; i<n; i++) |
2763 | 2801 » » » batch[i] = p->runq[(h+i)%nelem(p->runq)]; |
2764 » h = p->runqhead; | 2802 » » if(runtime·cas(&p->runqhead, h, h+n)) // cas-release, commits c
onsume |
2765 » t = p->runqtail; | 2803 » » » break; |
2766 » s = p->runqsize; | 2804 » } |
2767 » t2 = 0; | 2805 » return n; |
2768 » q = runtime·malloc(2*s*sizeof(*q)); | |
2769 » while(t != h) { | |
2770 » » q[t2++] = p->runq[h++]; | |
2771 » » if(h == s) | |
2772 » » » h = 0; | |
2773 » } | |
2774 » runtime·free(p->runq); | |
2775 » p->runq = q; | |
2776 » p->runqhead = 0; | |
2777 » p->runqtail = t2; | |
2778 » p->runqsize = 2*s; | |
2779 } | 2806 } |
2780 | 2807 |
2781 // Steal half of elements from local runnable queue of p2 | 2808 // Steal half of elements from local runnable queue of p2 |
2782 // and put onto local runnable queue of p. | 2809 // and put onto local runnable queue of p. |
2783 // Returns one of the stolen elements (or nil if failed). | 2810 // Returns one of the stolen elements (or nil if failed). |
2784 static G* | 2811 static G* |
2785 runqsteal(P *p, P *p2) | 2812 runqsteal(P *p, P *p2) |
2786 { | 2813 { |
2787 » G *gp, *gp1; | 2814 » G *gp; |
2788 » int32 t, h, s, t2, h2, s2, c, i; | 2815 » G *batch[nelem(p->runq)/2]; |
2789 | 2816 » uint32 t, h, n, i; |
2790 » if(p2->runqhead == p2->runqtail) | 2817 |
| 2818 » n = runqgrab(p2, batch); |
| 2819 » if(n == 0) |
2791 return nil; | 2820 return nil; |
2792 » // sort locks to prevent deadlocks | 2821 » n--; |
2793 » if(p < p2) | 2822 » gp = batch[n]; |
2794 » » runtime·lock(p); | 2823 » if(n == 0) |
2795 » runtime·lock(p2); | 2824 » » return gp; |
2796 » if(p2->runqhead == p2->runqtail) { | 2825 » h = runtime·atomicload(&p->runqhead); // load-acquire, synchronize with
consumers |
2797 » » runtime·unlock(p2); | |
2798 » » if(p < p2) | |
2799 » » » runtime·unlock(p); | |
2800 » » return nil; | |
2801 » } | |
2802 » if(p >= p2) | |
2803 » » runtime·lock(p); | |
2804 » // now we've locked both queues and know the victim is not empty | |
2805 » h = p->runqhead; | |
2806 t = p->runqtail; | 2826 t = p->runqtail; |
2807 » s = p->runqsize; | 2827 » if(t - h + n >= nelem(p->runq)) |
2808 » h2 = p2->runqhead; | 2828 » » runtime·throw("runqsteal: runq overflow"); |
2809 » t2 = p2->runqtail; | 2829 » for(i=0; i<n; i++, t++) |
2810 » s2 = p2->runqsize; | 2830 » » p->runq[t%nelem(p->runq)] = batch[i]; |
2811 » gp = p2->runq[h2++]; // return value | 2831 » runtime·atomicstore(&p->runqtail, t); // store-release, makes the item
available for consumption |
2812 » if(h2 == s2) | |
2813 » » h2 = 0; | |
2814 » // steal roughly half | |
2815 » if(t2 > h2) | |
2816 » » c = (t2 - h2) / 2; | |
2817 » else | |
2818 » » c = (s2 - h2 + t2) / 2; | |
2819 » // copy | |
2820 » for(i = 0; i != c; i++) { | |
2821 » » // the target queue is full? | |
2822 » » if(t == h-1 || (h == 0 && t == s-1)) | |
2823 » » » break; | |
2824 » » // the victim queue is empty? | |
2825 » » if(t2 == h2) | |
2826 » » » break; | |
2827 » » gp1 = p2->runq[h2++]; | |
2828 » » if(h2 == s2) | |
2829 » » » h2 = 0; | |
2830 » » p->runq[t++] = gp1; | |
2831 » » if(t == s) | |
2832 » » » t = 0; | |
2833 » } | |
2834 » p->runqtail = t; | |
2835 » p2->runqhead = h2; | |
2836 » runtime·unlock(p2); | |
2837 » runtime·unlock(p); | |
2838 return gp; | 2832 return gp; |
2839 } | 2833 } |
2840 | 2834 |
2841 void | 2835 void |
2842 runtime·testSchedLocalQueue(void) | 2836 runtime·testSchedLocalQueue(void) |
2843 { | 2837 { |
2844 P p; | 2838 P p; |
2845 » G gs[1000]; | 2839 » G gs[nelem(p.runq)]; |
2846 int32 i, j; | 2840 int32 i, j; |
2847 | 2841 |
2848 runtime·memclr((byte*)&p, sizeof(p)); | 2842 runtime·memclr((byte*)&p, sizeof(p)); |
2849 p.runqsize = 1; | |
2850 p.runqhead = 0; | |
2851 p.runqtail = 0; | |
2852 p.runq = runtime·malloc(p.runqsize*sizeof(*p.runq)); | |
2853 | 2843 |
2854 for(i = 0; i < nelem(gs); i++) { | 2844 for(i = 0; i < nelem(gs); i++) { |
2855 if(runqget(&p) != nil) | 2845 if(runqget(&p) != nil) |
2856 runtime·throw("runq is not empty initially"); | 2846 runtime·throw("runq is not empty initially"); |
2857 for(j = 0; j < i; j++) | 2847 for(j = 0; j < i; j++) |
2858 runqput(&p, &gs[i]); | 2848 runqput(&p, &gs[i]); |
2859 for(j = 0; j < i; j++) { | 2849 for(j = 0; j < i; j++) { |
2860 if(runqget(&p) != &gs[i]) { | 2850 if(runqget(&p) != &gs[i]) { |
2861 runtime·printf("bad element at iter %d/%d\n", i,
j); | 2851 runtime·printf("bad element at iter %d/%d\n", i,
j); |
2862 runtime·throw("bad element"); | 2852 runtime·throw("bad element"); |
2863 } | 2853 } |
2864 } | 2854 } |
2865 if(runqget(&p) != nil) | 2855 if(runqget(&p) != nil) |
2866 runtime·throw("runq is not empty afterwards"); | 2856 runtime·throw("runq is not empty afterwards"); |
2867 } | 2857 } |
2868 } | 2858 } |
2869 | 2859 |
2870 void | 2860 void |
2871 runtime·testSchedLocalQueueSteal(void) | 2861 runtime·testSchedLocalQueueSteal(void) |
2872 { | 2862 { |
2873 P p1, p2; | 2863 P p1, p2; |
2874 » G gs[1000], *gp; | 2864 » G gs[nelem(p1.runq)], *gp; |
2875 int32 i, j, s; | 2865 int32 i, j, s; |
2876 | 2866 |
2877 runtime·memclr((byte*)&p1, sizeof(p1)); | 2867 runtime·memclr((byte*)&p1, sizeof(p1)); |
2878 p1.runqsize = 1; | |
2879 p1.runqhead = 0; | |
2880 p1.runqtail = 0; | |
2881 p1.runq = runtime·malloc(p1.runqsize*sizeof(*p1.runq)); | |
2882 | |
2883 runtime·memclr((byte*)&p2, sizeof(p2)); | 2868 runtime·memclr((byte*)&p2, sizeof(p2)); |
2884 p2.runqsize = nelem(gs); | |
2885 p2.runqhead = 0; | |
2886 p2.runqtail = 0; | |
2887 p2.runq = runtime·malloc(p2.runqsize*sizeof(*p2.runq)); | |
2888 | 2869 |
2889 for(i = 0; i < nelem(gs); i++) { | 2870 for(i = 0; i < nelem(gs); i++) { |
2890 for(j = 0; j < i; j++) { | 2871 for(j = 0; j < i; j++) { |
2891 gs[j].sig = 0; | 2872 gs[j].sig = 0; |
2892 runqput(&p1, &gs[j]); | 2873 runqput(&p1, &gs[j]); |
2893 } | 2874 } |
2894 gp = runqsteal(&p2, &p1); | 2875 gp = runqsteal(&p2, &p1); |
2895 s = 0; | 2876 s = 0; |
2896 if(gp) { | 2877 if(gp) { |
2897 s++; | 2878 s++; |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2955 if(experiment[i+j] != name[j]) | 2936 if(experiment[i+j] != name[j]) |
2956 goto nomatch; | 2937 goto nomatch; |
2957 if(experiment[i+j] != '\0' && experiment[i+j] != ',') | 2938 if(experiment[i+j] != '\0' && experiment[i+j] != ',') |
2958 goto nomatch; | 2939 goto nomatch; |
2959 return 1; | 2940 return 1; |
2960 } | 2941 } |
2961 nomatch:; | 2942 nomatch:; |
2962 } | 2943 } |
2963 return 0; | 2944 return 0; |
2964 } | 2945 } |
LEFT | RIGHT |