Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(5)

Delta Between Two Patch Sets: src/pkg/runtime/mgc0.c

Issue 5279048: code review 5279048: runtime: faster and more scalable GC (Closed)
Left Patch Set: diff -r da9e7548e6ef https://go.googlecode.com/hg/ Created 13 years, 4 months ago
Right Patch Set: diff -r f44057cc01b2 https://go.googlecode.com/hg/ Created 12 years, 11 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « src/pkg/runtime/mcentral.c ('k') | src/pkg/runtime/mheap.c » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 // Copyright 2009 The Go Authors. All rights reserved. 1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 // Garbage collector. 5 // Garbage collector.
6 6
7 #include "runtime.h" 7 #include "runtime.h"
8 #include "arch.h" 8 #include "arch_GOARCH.h"
9 #include "malloc.h" 9 #include "malloc.h"
10 #include "stack.h" 10 #include "stack.h"
11 11
12 enum { 12 enum {
13 Debug = 0, 13 Debug = 0,
14 PtrSize = sizeof(void*), 14 PtrSize = sizeof(void*),
15 DebugMark = 0, // run second pass to check mark 15 DebugMark = 0, // run second pass to check mark
16 DataBlock = 8*1024,
16 17
17 // Four bits per word (see #defines below). 18 // Four bits per word (see #defines below).
18 wordsPerBitmapWord = sizeof(void*)*8/4, 19 wordsPerBitmapWord = sizeof(void*)*8/4,
19 bitShift = sizeof(void*)*8/4, 20 bitShift = sizeof(void*)*8/4,
20 }; 21 };
21 22
22 // Bits in per-word bitmap. 23 // Bits in per-word bitmap.
23 // #defines because enum might not be able to hold the values. 24 // #defines because enum might not be able to hold the values.
24 // 25 //
25 // Each word in the bitmap describes wordsPerBitmapWord words 26 // Each word in the bitmap describes wordsPerBitmapWord words
(...skipping 19 matching lines...) Expand all
45 // /* then test bits & bitAllocated, bits & bitMarked, etc. */ 46 // /* then test bits & bitAllocated, bits & bitMarked, etc. */
46 // 47 //
47 #define bitAllocated ((uintptr)1<<(bitShift*0)) 48 #define bitAllocated ((uintptr)1<<(bitShift*0))
48 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAlloc ated is set */ 49 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAlloc ated is set */
49 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAlloc ated is set */ 50 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAlloc ated is set */
50 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAlloc ated is set - has finalizer or being profiled */ 51 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAlloc ated is set - has finalizer or being profiled */
51 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAlloc ated is NOT set */ 52 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAlloc ated is NOT set */
52 53
53 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial) 54 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
54 55
56 // Holding worldsema grants an M the right to try to stop the world.
57 // The procedure is:
58 //
59 // runtime·semacquire(&runtime·worldsema);
60 // m->gcing = 1;
61 // runtime·stoptheworld();
62 //
63 // ... do stuff ...
64 //
65 // m->gcing = 0;
66 // runtime·semrelease(&runtime·worldsema);
67 // runtime·starttheworld();
68 //
69 uint32 runtime·worldsema = 1;
55 static int32 gctrace; 70 static int32 gctrace;
56 71
57 typedef struct Workbuf Workbuf; 72 typedef struct Workbuf Workbuf;
58 struct Workbuf 73 struct Workbuf
59 { 74 {
60 Workbuf *next; 75 Workbuf *next;
76 uintptr pushcnt;
61 uintptr nobj; 77 uintptr nobj;
62 » byte *obj[512-2]; 78 » byte *obj[512-3];
63 }; 79 };
64 80
65 typedef struct Finalizer Finalizer; 81 typedef struct Finalizer Finalizer;
66 struct Finalizer 82 struct Finalizer
67 { 83 {
68 void (*fn)(void*); 84 void (*fn)(void*);
69 void *arg; 85 void *arg;
70 int32 nret; 86 int32 nret;
71 }; 87 };
72 88
73 typedef struct FinBlock FinBlock; 89 typedef struct FinBlock FinBlock;
74 struct FinBlock 90 struct FinBlock
75 { 91 {
76 FinBlock *alllink; 92 FinBlock *alllink;
77 FinBlock *next; 93 FinBlock *next;
78 int32 cnt; 94 int32 cnt;
79 int32 cap; 95 int32 cap;
80 Finalizer fin[1]; 96 Finalizer fin[1];
81 }; 97 };
82 98
83 extern byte data[]; 99 extern byte data[];
84 extern byte end[]; 100 extern byte etext[];
101 extern byte ebss[];
85 102
86 static G *fing; 103 static G *fing;
87 static FinBlock *finq; // list of finalizers that are to be executed 104 static FinBlock *finq; // list of finalizers that are to be executed
88 static FinBlock *finc; // cache of free blocks 105 static FinBlock *finc; // cache of free blocks
89 static FinBlock *allfin; // list of all blocks 106 static FinBlock *allfin; // list of all blocks
90 static Lock finlock; 107 static Lock finlock;
91 static int32 fingwait; 108 static int32 fingwait;
92 109
93 static void runfinq(void); 110 static void runfinq(void);
94 static Workbuf* getempty(Workbuf*); 111 static Workbuf* getempty(Workbuf*);
95 static Workbuf* getfull(Workbuf*); 112 static Workbuf* getfull(Workbuf*);
96 static void putempty(Workbuf*); 113 static void putempty(Workbuf*);
97 static Workbuf* handoff(Workbuf*); 114 static Workbuf* handoff(Workbuf*);
98 115
99 /* 116 // parallel for descriptor
100 typedef struct GcThread GcThread;
101 struct GcThread
102 {
103 » uint64 spanpos;
104 };
105 */
106
107 static struct {
108 » Lock fmu;
109 » Workbuf»*full;
110 » Lock emu;
111 » Workbuf»*empty;
112 » uint32» nproc;
113 » volatile uint32»nwait;
114 » volatile uint32»ndone;
115 » volatile uint32 markdone;
116 » //volatile uint32 sweepdone;
117 » Note» alldone;
118 » uint32» tidseq;
119
120 » Lock;
121 » byte» *chunk;
122 » uintptr»nchunk;
123 »·······
124 » struct
125 » {
126 » » byte *p;
127 » » uintptr n;
128 » } roots[64*1024];
129 » uintptr nroots;
130 » /*
131 » union
132 » {
133 » » GcThread;
134 » » byte pad[CacheLineSize];
135 » } thr[MaxGcproc];
136 » */
137 } work;
138
139 typedef struct Parfor Parfor; 117 typedef struct Parfor Parfor;
140 struct Parfor 118 struct Parfor
141 { 119 {
120 // executed for each element
121 void (*body)(Parfor*, uint32);
122 // number of idle threads
142 uint32 done; 123 uint32 done;
124 // total number of threads
125 uint32 nthr;
126 // thread id sequencer
127 uint32 thrseq;
128 // iteration space [0, cnt)
129 uint32 cnt;
130 // arbitrary user context
131 void *ctx;
132 // if true, wait while all threads finish processing,
133 // otherwise parfor may return while other threads still working
134 bool wait;
143 byte pad[CacheLineSize]; 135 byte pad[CacheLineSize];
144 struct 136 struct
145 { 137 {
138 // the thread's iteration space [32lsb, 32msb)
146 uint64 pos; 139 uint64 pos;
147 byte pad[CacheLineSize-sizeof(uint64)]; 140 byte pad[CacheLineSize-sizeof(uint64)];
148 } thr[MaxGcproc]; 141 } thr[MaxGcproc];
149 }; 142 };
150 143
151 Parfor sweepfor; 144 typedef struct GcRoot GcRoot;
152 Parfor markfor; 145 struct GcRoot
146 {
147 » byte *p;
148 » uintptr n;
149 };
150
151 static struct
152 {
153 » uint64» full; // Workbuf* + ABA counter in 17msb
154 » uint64» empty; // Workbuf* + ABA counter in 17msb
155 » uint32» nproc;
156 » byte pad0[CacheLineSize];
157 » volatile uint32»nwait;
158 » byte pad1[CacheLineSize];
159 » volatile uint32 ndone;
160 » volatile uint32 debugmarkdone;
161 » Note» alldone;
162 » Parfor» sweepfor;
163 » Parfor» markfor;
164
165 » Lock;
166 » byte» *chunk;
167 » uintptr»nchunk;
168
169 » GcRoot» *roots;
170 » uint32» nroot;
171 » uint32» rootcap;
172 } work;
173
174 // lock-free stack
175 // used to manage empty and free Workbuf's during mark phase
176 static void» lifopush(uint64 *a, Workbuf *b);
177 static Workbuf*»lifopop(uint64 *a);
178
179 // parallel for over [0, n).
180 // body() is executed for each iteration.
181 // nthr - total number of worker threads.
182 // ctx - arbitrary user context.
183 // if wait=true, threads return from parfor() when all work is done;
184 // otherwise, threads can return while other threads still finishing processing. ·
185 static void» parforsetup(Parfor *desc, void (*body)(Parfor*, uint32),
186 » » » uint32 nthr, uint32 n, void *ctx, bool wait);
187 static void» parfor(Parfor *desc);
153 188
154 // scanblock scans a block of n bytes starting at pointer b for references 189 // scanblock scans a block of n bytes starting at pointer b for references
155 // to other objects, scanning any it finds recursively until there are no 190 // to other objects, scanning any it finds recursively until there are no
156 // unscanned objects left. Instead of using an explicit recursion, it keeps 191 // unscanned objects left. Instead of using an explicit recursion, it keeps
157 // a work list in the Workbuf* structures and loops in the main function 192 // a work list in the Workbuf* structures and loops in the main function
158 // body. Keeping an explicit work list is easier on the stack allocator and 193 // body. Keeping an explicit work list is easier on the stack allocator and
159 // more efficient. 194 // more efficient.
160 static void 195 static void
161 scanblock(byte *b, int64 n) 196 scanblock(byte *b, int64 n)
162 { 197 {
(...skipping 14 matching lines...) Expand all
177 // Memory arena parameters. 212 // Memory arena parameters.
178 arena_start = runtime·mheap.arena_start; 213 arena_start = runtime·mheap.arena_start;
179 arena_used = runtime·mheap.arena_used; 214 arena_used = runtime·mheap.arena_used;
180 nproc = work.nproc; 215 nproc = work.nproc;
181 216
182 wbuf = nil; // current work buffer 217 wbuf = nil; // current work buffer
183 wp = nil; // storage for next queued pointer (write pointer) 218 wp = nil; // storage for next queued pointer (write pointer)
184 nobj = 0; // number of queued objects 219 nobj = 0; // number of queued objects
185 220
186 // Scanblock helpers pass b==nil. 221 // Scanblock helpers pass b==nil.
187 » // The main proc needs to return to make more 222 » // Procs need to return to make more
188 // calls to scanblock. But if work.nproc==1 then 223 // calls to scanblock. But if work.nproc==1 then
189 // might as well process blocks as soon as we 224 // might as well process blocks as soon as we
190 // have them. 225 // have them.
191 keepworking = b == nil || work.nproc == 1; 226 keepworking = b == nil || work.nproc == 1;
192 227
193 // Align b to a word boundary. 228 // Align b to a word boundary.
194 off = (uintptr)b & (PtrSize-1); 229 off = (uintptr)b & (PtrSize-1);
195 if(off != 0) { 230 if(off != 0) {
196 b += PtrSize - off; 231 b += PtrSize - off;
197 n -= PtrSize - off; 232 n -= PtrSize - off;
198 } 233 }
199 234
200 for(;;) { 235 for(;;) {
201 m->gcstats.nscanblocks++;
202 ················
203 // Each iteration scans the block b of length n, queueing pointe rs in 236 // Each iteration scans the block b of length n, queueing pointe rs in
204 // the work buffer. 237 // the work buffer.
205 if(Debug > 1) 238 if(Debug > 1)
206 runtime·printf("scanblock %p %D\n", b, n); 239 runtime·printf("scanblock %p %D\n", b, n);
207 240
208 vp = (void**)b; 241 vp = (void**)b;
209 n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */ 242 n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */
210 for(i=0; i<n; i++) { 243 for(i=0; i<n; i++) {
211 m->gcstats.nscanwords++;
212 obj = (byte*)vp[i]; 244 obj = (byte*)vp[i];
213 » » » 245
214 // Words outside the arena cannot be pointers. 246 // Words outside the arena cannot be pointers.
215 if((byte*)obj < arena_start || (byte*)obj >= arena_used) 247 if((byte*)obj < arena_start || (byte*)obj >= arena_used)
216 continue; 248 continue;
217 249
218 m->gcstats.nscanptr++;
219 // obj may be a pointer to a live object. 250 // obj may be a pointer to a live object.
220 // Try to find the beginning of the object. 251 // Try to find the beginning of the object.
221 252
222 // Round down to word boundary. 253 // Round down to word boundary.
223 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); 254 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
224 255
225 // Find bits for this word. 256 // Find bits for this word.
226 off = (uintptr*)obj - (uintptr*)arena_start; 257 off = (uintptr*)obj - (uintptr*)arena_start;
227 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 258 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
228 shift = off % wordsPerBitmapWord; 259 shift = off % wordsPerBitmapWord;
(...skipping 10 matching lines...) Expand all
239 if(((xbits>>j) & (bitAllocated|bitBlockBoundary) ) != 0) { 270 if(((xbits>>j) & (bitAllocated|bitBlockBoundary) ) != 0) {
240 obj = (byte*)obj - (shift-j)*PtrSize; 271 obj = (byte*)obj - (shift-j)*PtrSize;
241 shift = j; 272 shift = j;
242 bits = xbits>>shift; 273 bits = xbits>>shift;
243 goto found; 274 goto found;
244 } 275 }
245 } 276 }
246 277
247 // Otherwise consult span table to find beginning. 278 // Otherwise consult span table to find beginning.
248 // (Manually inlined copy of MHeap_LookupMaybe.) 279 // (Manually inlined copy of MHeap_LookupMaybe.)
249 m->gcstats.naddrlookup++;
250 k = (uintptr)obj>>PageShift; 280 k = (uintptr)obj>>PageShift;
251 x = k; 281 x = k;
252 if(sizeof(void*) == 8) 282 if(sizeof(void*) == 8)
253 x -= (uintptr)arena_start>>PageShift; 283 x -= (uintptr)arena_start>>PageShift;
254 s = runtime·mheap.map[x]; 284 s = runtime·mheap.map[x];
255 if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse) 285 if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
256 continue; 286 continue;
257 p = (byte*)((uintptr)s->start<<PageShift); 287 p = (byte*)((uintptr)s->start<<PageShift);
258 if(s->sizeclass == 0) { 288 if(s->sizeclass == 0) {
259 obj = p; 289 obj = p;
260 } else { 290 } else {
261 if((byte*)obj >= (byte*)s->limit) 291 if((byte*)obj >= (byte*)s->limit)
262 continue; 292 continue;
263 size = runtime·class_to_size[s->sizeclass]; 293 size = runtime·class_to_size[s->sizeclass];
264 int32 i = ((byte*)obj - p)/size; 294 int32 i = ((byte*)obj - p)/size;
265 obj = p+i*size; 295 obj = p+i*size;
266 } 296 }
267 297
268 // Now that we know the object header, reload bits. 298 // Now that we know the object header, reload bits.
269 off = (uintptr*)obj - (uintptr*)arena_start; 299 off = (uintptr*)obj - (uintptr*)arena_start;
270 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 300 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
271 shift = off % wordsPerBitmapWord; 301 shift = off % wordsPerBitmapWord;
272 xbits = *bitp; 302 xbits = *bitp;
273 bits = xbits >> shift; 303 bits = xbits >> shift;
274 304
275 found: 305 found:
306 // If another proc wants a pointer, give it some.
307 if(work.nwait > 0 && nobj > 4 && work.full == 0) {
308 wbuf->nobj = nobj;
309 wbuf = handoff(wbuf);
310 nobj = wbuf->nobj;
311 wp = wbuf->obj + nobj;
312 }
313
276 // Now we have bits, bitp, and shift correct for 314 // Now we have bits, bitp, and shift correct for
277 // obj pointing at the base of the object. 315 // obj pointing at the base of the object.
278 // Only care about allocated and not marked. 316 // Only care about allocated and not marked.
279 if((bits & (bitAllocated|bitMarked)) != bitAllocated) 317 if((bits & (bitAllocated|bitMarked)) != bitAllocated)
280 continue; 318 continue;
281 if(nproc == 1) 319 if(nproc == 1)
282 *bitp |= bitMarked<<shift; 320 *bitp |= bitMarked<<shift;
283 else { 321 else {
284 for(;;) { 322 for(;;) {
285 x = *bitp; 323 x = *bitp;
286 if(x & (bitMarked<<shift)) 324 if(x & (bitMarked<<shift))
287 goto continue_obj; 325 goto continue_obj;
288 if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift)))) 326 if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
289 break; 327 break;
290 } 328 }
291 } 329 }
292 330
293 // If object has no pointers, don't need to scan further . 331 // If object has no pointers, don't need to scan further .
294 if((bits & bitNoPointers) != 0) 332 if((bits & bitNoPointers) != 0)
295 continue; 333 continue;
296 334
297 » » » // If another proc wants a pointer, give it some. 335 » » » PREFETCH(obj);
298 » » » if(nobj > 4 && work.nwait > 0 && work.full == nil) {
299 » » » » wbuf->nobj = nobj;
300 » » » » wbuf = handoff(wbuf);
301 » » » » nobj = wbuf->nobj;
302 » » » » wp = wbuf->obj + nobj;
303 » » » }
304 336
305 // If buffer is full, get a new one. 337 // If buffer is full, get a new one.
306 if(wbuf == nil || nobj >= nelem(wbuf->obj)) { 338 if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
307 if(wbuf != nil) 339 if(wbuf != nil)
308 wbuf->nobj = nobj; 340 wbuf->nobj = nobj;
309 wbuf = getempty(wbuf); 341 wbuf = getempty(wbuf);
310 wp = wbuf->obj; 342 wp = wbuf->obj;
311 nobj = 0; 343 nobj = 0;
312 } 344 }
313 *wp++ = obj; 345 *wp++ = obj;
314 nobj++; 346 nobj++;
315 continue_obj:; 347 continue_obj:;
316 } 348 }
317 349
318 // Done scanning [b, b+n). Prepare for the next iteration of 350 // Done scanning [b, b+n). Prepare for the next iteration of
319 // the loop by setting b and n to the parameters for the next bl ock. 351 // the loop by setting b and n to the parameters for the next bl ock.
320 352
321 // Fetch b from the work buffer. 353 // Fetch b from the work buffer.
322 if(nobj == 0) { 354 if(nobj == 0) {
323 //runtime·printf("nobj==0\n");
324 if(!keepworking) { 355 if(!keepworking) {
325 » » » » putempty(wbuf); 356 » » » » if(wbuf != nil)
357 » » » » » putempty(wbuf);
326 return; 358 return;
327 } 359 }
328 // Emptied our buffer: refill. 360 // Emptied our buffer: refill.
329 wbuf = getfull(wbuf); 361 wbuf = getfull(wbuf);
330 if(wbuf == nil) 362 if(wbuf == nil)
331 return; 363 return;
332 nobj = wbuf->nobj; 364 nobj = wbuf->nobj;
333 wp = wbuf->obj + wbuf->nobj; 365 wp = wbuf->obj + wbuf->nobj;
334 //runtime·printf("getfull(%p) %d\n", wbuf, (uint32)wbuf->nobj);
335 } 366 }
336 b = *--wp; 367 b = *--wp;
337 nobj--; 368 nobj--;
338 369
339 » » // Figure out n = size of b. Start by loading bits for b. 370 » » // Ask span about size class.
340 » » off = (uintptr*)b - (uintptr*)arena_start;
341 » » bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
342 » » shift = off % wordsPerBitmapWord;
343 » » xbits = *bitp;
344 » » bits = xbits >> shift;
345
346 » » // Might be small; look for nearby block boundary.
347 » » // A block boundary is marked by either bitBlockBoundary
348 » » // or bitAllocated being set (see notes near their definition).
349 » » enum {
350 » » » boundary = bitBlockBoundary|bitAllocated
351 » » };
352 » » // Look for a block boundary both after and before b
353 » » // in the same bitmap word.
354 » » //
355 » » // A block boundary j words after b is indicated by
356 » » //» bits>>j & boundary
357 » » // assuming shift+j < bitShift. (If shift+j >= bitShift then
358 » » // we'll be bleeding other bit types like bitMarked into our tes t.)
359 » » // Instead of inserting the conditional shift+j < bitShift into the loop,
360 » » // we can let j range from 1 to bitShift as long as we first
361 » » // apply a mask to keep only the bits corresponding
362 » » // to shift+j < bitShift aka j < bitShift-shift.
363 » » bits &= (boundary<<(bitShift-shift)) - boundary;
364
365 » » // A block boundary j words before b is indicated by
366 » » //» xbits>>(shift-j) & boundary
367 » » // (assuming shift >= j). There is no cleverness here
368 » » // avoid the test, because when j gets too large the shift
369 » » // turns negative, which is undefined in C.
370
371 » » for(j=1; j<bitShift; j++) {
372 » » » if(((bits>>j)&boundary) != 0 || shift>=j && ((xbits>>(sh ift-j))&boundary) != 0) {
373 » » » » n = j*PtrSize;
374 » » » » goto scan;
375 » » » }
376 » » }
377
378 » » // Fall back to asking span about size class.
379 // (Manually inlined copy of MHeap_Lookup.) 371 // (Manually inlined copy of MHeap_Lookup.)
380 m->gcstats.nsizelookup++;
381 x = (uintptr)b>>PageShift; 372 x = (uintptr)b>>PageShift;
382 if(sizeof(void*) == 8) 373 if(sizeof(void*) == 8)
383 x -= (uintptr)arena_start>>PageShift; 374 x -= (uintptr)arena_start>>PageShift;
384 s = runtime·mheap.map[x]; 375 s = runtime·mheap.map[x];
385 if(s->sizeclass == 0) 376 if(s->sizeclass == 0)
386 n = s->npages<<PageShift; 377 n = s->npages<<PageShift;
387 else 378 else
388 n = runtime·class_to_size[s->sizeclass]; 379 n = runtime·class_to_size[s->sizeclass];
389 scan:;
390 } 380 }
391 } 381 }
392 382
393 // debug_scanblock is the debug copy of scanblock. 383 // debug_scanblock is the debug copy of scanblock.
394 // it is simpler, slower, single-threaded, recursive, 384 // it is simpler, slower, single-threaded, recursive,
395 // and uses bitSpecial as the mark bit. 385 // and uses bitSpecial as the mark bit.
396 static void 386 static void
397 debug_scanblock(byte *b, int64 n) 387 debug_scanblock(byte *b, int64 n)
398 { 388 {
399 byte *obj, *p; 389 byte *obj, *p;
(...skipping 25 matching lines...) Expand all
425 if((byte*)obj < runtime·mheap.arena_start || (byte*)obj >= runti me·mheap.arena_used) 415 if((byte*)obj < runtime·mheap.arena_start || (byte*)obj >= runti me·mheap.arena_used)
426 continue; 416 continue;
427 417
428 // Round down to word boundary. 418 // Round down to word boundary.
429 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); 419 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
430 420
431 // Consult span table to find beginning. 421 // Consult span table to find beginning.
432 s = runtime·MHeap_LookupMaybe(&runtime·mheap, obj); 422 s = runtime·MHeap_LookupMaybe(&runtime·mheap, obj);
433 if(s == nil) 423 if(s == nil)
434 continue; 424 continue;
435
436 425
437 p = (byte*)((uintptr)s->start<<PageShift); 426 p = (byte*)((uintptr)s->start<<PageShift);
438 if(s->sizeclass == 0) { 427 if(s->sizeclass == 0) {
439 obj = p; 428 obj = p;
440 size = (uintptr)s->npages<<PageShift; 429 size = (uintptr)s->npages<<PageShift;
441 } else { 430 } else {
442 if((byte*)obj >= (byte*)s->limit) 431 if((byte*)obj >= (byte*)s->limit)
443 continue; 432 continue;
444 size = runtime·class_to_size[s->sizeclass]; 433 size = runtime·class_to_size[s->sizeclass];
445 int32 i = ((byte*)obj - p)/size; 434 int32 i = ((byte*)obj - p)/size;
(...skipping 17 matching lines...) Expand all
463 runtime·printf("found unmarked block %p in %p\n", obj, v p+i); 452 runtime·printf("found unmarked block %p in %p\n", obj, v p+i);
464 453
465 // If object has no pointers, don't need to scan further. 454 // If object has no pointers, don't need to scan further.
466 if((bits & bitNoPointers) != 0) 455 if((bits & bitNoPointers) != 0)
467 continue; 456 continue;
468 457
469 debug_scanblock(obj, size); 458 debug_scanblock(obj, size);
470 } 459 }
471 } 460 }
472 461
462 static void
463 markroot(Parfor *desc, uint32 i)
464 {
465 USED(desc);
466 scanblock(work.roots[i].p, work.roots[i].n);
467 }
468
473 // Get an empty work buffer off the work.empty list, 469 // Get an empty work buffer off the work.empty list,
474 // allocating new buffers as needed. 470 // allocating new buffers as needed.
475 static Workbuf* 471 static Workbuf*
476 getempty(Workbuf *b) 472 getempty(Workbuf *b)
477 { 473 {
478 » if(work.nproc == 1) { 474 » if(b != nil)
479 » » // Put b on full list. 475 » » lifopush(&work.full, b);
480 » » if(b != nil) { 476 » b = lifopop(&work.empty);
481 » » » b->next = work.full; 477 » if(b == nil) {
482 » » » work.full = b; 478 » » // Need to allocate.
483 » » } 479 » » runtime·lock(&work);
484 » » // Grab from empty list if possible. 480 » » if(work.nchunk < sizeof *b) {
485 » » b = work.empty; 481 » » » work.nchunk = 1<<20;
486 » » if(b != nil) { 482 » » » work.chunk = runtime·SysAlloc(work.nchunk);
487 » » » work.empty = b->next; 483 » » }
488 » » » goto haveb; 484 » » b = (Workbuf*)work.chunk;
489 » » } 485 » » work.chunk += sizeof *b;
490 » } else { 486 » » work.nchunk -= sizeof *b;
491 » » // Put b on full list. 487 » » runtime·unlock(&work);
492 » » if(b != nil) { 488 » }
493 » » » runtime·lock(&work.fmu);
494 » » » b->next = work.full;
495 » » » work.full = b;
496 » » » runtime·unlock(&work.fmu);
497 » » }
498 » » // Grab from empty list if possible.
499 » » runtime·lock(&work.emu);
500 » » b = work.empty;
501 » » if(b != nil)
502 » » » work.empty = b->next;
503 » » runtime·unlock(&work.emu);
504 » » if(b != nil)
505 » » » goto haveb;
506 » }
507
508 » // Need to allocate.
509 » runtime·lock(&work);
510 » if(work.nchunk < sizeof *b) {
511 » » work.nchunk = 1<<20;
512 » » work.chunk = runtime·SysAlloc(work.nchunk);
513 » }
514 » b = (Workbuf*)work.chunk;
515 » work.chunk += sizeof *b;
516 » work.nchunk -= sizeof *b;
517 » runtime·unlock(&work);
518
519 haveb:
520 b->nobj = 0; 489 b->nobj = 0;
521 return b; 490 return b;
522 } 491 }
523 492
524 static void 493 static void
525 putempty(Workbuf *b) 494 putempty(Workbuf *b)
526 { 495 {
527 » if(b == nil) 496 » lifopush(&work.empty, b);
528 » » return;
529
530 » if(work.nproc == 1) {
531 » » b->next = work.empty;
532 » » work.empty = b;
533 » » return;
534 » }
535
536 » runtime·lock(&work.emu);
537 » b->next = work.empty;
538 » work.empty = b;
539 » runtime·unlock(&work.emu);
540 } 497 }
541 498
542 // Get a full work buffer off the work.full list, or return nil. 499 // Get a full work buffer off the work.full list, or return nil.
543 static Workbuf* 500 static Workbuf*
544 getfull(Workbuf *b) 501 getfull(Workbuf *b)
545 { 502 {
546 int32 i; 503 int32 i;
547 » Workbuf *b1; 504
548 505 » if(b != nil)
549 //runtime·printf("getfull nproc=%d b=%p(%d)\n", work.nproc, b, (uint32)(b?b->nob j:0)); 506 » » lifopush(&work.empty, b);
550 » if(work.nproc == 1) { 507 » b = lifopop(&work.full);
551 » » // Put b on empty list. 508 » if(b != nil || work.nproc == 1)
552 » » if(b != nil) {
553 » » » b->next = work.empty;
554 » » » work.empty = b;
555 » » }
556 » » // Grab from full list if possible.
557 » » // Since work.nproc==1, no one else is
558 » » // going to give us work.
559 » » b = work.full;
560 » » if(b != nil)
561 » » » work.full = b->next;
562 return b; 509 return b;
563 }
564
565 putempty(b);
566
567 // Grab buffer from full list if possible.
568 for(;;) {
569 b1 = work.full;
570 if(b1 == nil)
571 break;
572 runtime·lock(&work.fmu);
573 if(work.full != nil) {
574 b1 = work.full;
575 work.full = b1->next;
576 runtime·unlock(&work.fmu);
577 return b1;
578 }
579 runtime·unlock(&work.fmu);
580 }
581 510
582 runtime·xadd(&work.nwait, +1); 511 runtime·xadd(&work.nwait, +1);
583 for(i=0;; i++) { 512 for(i=0;; i++) {
584 » » b1 = work.full; 513 » » if(work.full != 0) {
585 » » if(b1 != nil) { 514 » » » runtime·xadd(&work.nwait, -1);
586 » » » runtime·lock(&work.fmu); 515 » » » b = lifopop(&work.full);
587 » » » if(work.full != nil) { 516 » » » if(b != nil)
588 » » » » runtime·xadd(&work.nwait, -1); 517 » » » » return b;
589 » » » » b1 = work.full; 518 » » » runtime·xadd(&work.nwait, +1);
590 » » » » work.full = b1->next;
591 » » » » runtime·unlock(&work.fmu);
592 » » » » return b1;
593 » » » }
594 » » » runtime·unlock(&work.fmu);
595 » » » continue;
596 } 519 }
597 if(work.nwait == work.nproc) 520 if(work.nwait == work.nproc)
598 return nil; 521 return nil;
599 » » if(i < 10) 522 » » if(i < 10) {
523 » » » m->gcstats.nprocyield++;
600 runtime·procyield(20); 524 runtime·procyield(20);
601 » » else if(i < 20) 525 » » } else if(i < 20) {
526 » » » m->gcstats.nosyield++;
602 runtime·osyield(); 527 runtime·osyield();
603 » » else 528 » » } else {
529 » » » m->gcstats.nsleep++;
604 runtime·usleep(100); 530 runtime·usleep(100);
531 }
605 } 532 }
606 } 533 }
607 534
608 static Workbuf* 535 static Workbuf*
609 handoff(Workbuf *b) 536 handoff(Workbuf *b)
610 { 537 {
611 int32 n; 538 int32 n;
612 Workbuf *b1; 539 Workbuf *b1;
613 540
614 // Make new buffer with half of b's pointers. 541 // Make new buffer with half of b's pointers.
615 b1 = getempty(nil); 542 b1 = getempty(nil);
616 n = b->nobj/2; 543 n = b->nobj/2;
617 b->nobj -= n; 544 b->nobj -= n;
618 b1->nobj = n; 545 b1->nobj = n;
619 runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]); 546 runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
620 m->gcstats.nhandoff += 1; 547 m->gcstats.nhandoff += 1;
621 m->gcstats.nhandoffcnt += n; 548 m->gcstats.nhandoffcnt += n;
622 549
623 // Put b on full list - let first half of b get stolen. 550 // Put b on full list - let first half of b get stolen.
624 » runtime·lock(&work.fmu); 551 » lifopush(&work.full, b);
625 » b->next = work.full;
626 » work.full = b;
627 » runtime·unlock(&work.fmu);
628
629 return b1; 552 return b1;
630 } 553 }
631 554
632 // Scanstack calls scanblock on each of gp's stack segments.
633 static void 555 static void
634 scanstack(void (*scanblock)(byte*, int64), G *gp) 556 addroot(byte *p, uintptr n)
557 {
558 » uint32 cap;
559 » GcRoot *new;
560
561 » if(work.nroot >= work.rootcap) {
562 » » cap = PageSize/sizeof(GcRoot);
563 » » if(cap < 2*work.rootcap)
564 » » » cap = 2*work.rootcap;
565 » » new = (GcRoot*)runtime·SysAlloc(cap*sizeof(GcRoot));
566 » » if(work.roots != nil) {
567 » » » runtime·memmove(new, work.roots, work.rootcap*sizeof(GcR oot));
568 » » » runtime·SysFree(work.roots, work.rootcap*sizeof(GcRoot)) ;
569 » » }
570 » » work.roots = new;
571 » » work.rootcap = cap;
572 » }
573 » work.roots[work.nroot].p = p;
574 » work.roots[work.nroot].n = n;
575 » work.nroot++;
576 }
577
578 static void
579 addstackroots(G *gp)
635 { 580 {
636 M *mp; 581 M *mp;
637 int32 n; 582 int32 n;
638 Stktop *stk; 583 Stktop *stk;
639 byte *sp, *guard; 584 byte *sp, *guard;
640 585
641 stk = (Stktop*)gp->stackbase; 586 stk = (Stktop*)gp->stackbase;
642 guard = gp->stackguard; 587 guard = gp->stackguard;
643 588
644 if(gp == g) { 589 if(gp == g) {
(...skipping 12 matching lines...) Expand all
657 // as schedlock and may have needed to start a new stack segment . 602 // as schedlock and may have needed to start a new stack segment .
658 // Use the stack segment and stack pointer at the time of 603 // Use the stack segment and stack pointer at the time of
659 // the system call instead, since that won't change underfoot. 604 // the system call instead, since that won't change underfoot.
660 if(gp->gcstack != nil) { 605 if(gp->gcstack != nil) {
661 stk = (Stktop*)gp->gcstack; 606 stk = (Stktop*)gp->gcstack;
662 sp = gp->gcsp; 607 sp = gp->gcsp;
663 guard = gp->gcguard; 608 guard = gp->gcguard;
664 } 609 }
665 } 610 }
666 611
667 if(Debug > 1)
668 runtime·printf("scanstack %d %p\n", gp->goid, sp);
669 n = 0; 612 n = 0;
670 while(stk) { 613 while(stk) {
671 if(sp < guard-StackGuard || (byte*)stk < sp) { 614 if(sp < guard-StackGuard || (byte*)stk < sp) {
672 runtime·printf("scanstack inconsistent: g%d#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk); 615 runtime·printf("scanstack inconsistent: g%d#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk);
673 runtime·throw("scanstack"); 616 runtime·throw("scanstack");
674 } 617 }
675 scanblock(sp, (byte*)stk - sp);
676 sp = stk->gobuf.sp;
677 guard = stk->stackguard;
678 stk = (Stktop*)stk->stackbase;
679 n++;
680 }
681 }
682
683 // Markfin calls scanblock on the blocks that have finalizers:
684 // the things pointed at cannot be freed until the finalizers have run.
685 static void
686 markfin(void *v)
687 {
688 uintptr size;
689
690 size = 0;
691 if(!runtime·mlookup(v, &v, &size, nil) || !runtime·blockspecial(v))
692 runtime·throw("mark - finalizer inconsistency");
693
694 // do not mark the finalizer block itself. just mark the things it poin ts at.
695 scanblock(v, size);
696 }
697
698 static void
699 debug_markfin(void *v)
700 {
701 uintptr size;
702
703 if(!runtime·mlookup(v, &v, &size, nil))
704 runtime·throw("debug_mark - finalizer inconsistency");
705 debug_scanblock(v, size);
706 }
707
708 // Mark
709 static void
710 mark(void (*scan)(byte*, int64))
711 {
712 G *gp;
713 FinBlock *fb;
714
715 // mark data+bss.
716 // skip runtime·mheap itself, which has no interesting pointers
717 // and is mostly zeroed and would not otherwise be paged in.
718 //runtime·printf("mark globals\n");
719 scan(data, (byte*)&runtime·mheap - data);
720 scan((byte*)(&runtime·mheap+1), end - (byte*)(&runtime·mheap+1));
721
722 // mark stacks
723 //runtime·printf("mark stacks\n");
724 for(gp=runtime·allg; gp!=nil; gp=gp->alllink) {
725 switch(gp->status){
726 default:
727 runtime·printf("unexpected G.status %d\n", gp->status);
728 runtime·throw("mark - bad status");
729 case Gdead:
730 break;
731 case Grunning:
732 if(gp != g)
733 runtime·throw("mark - world not stopped");
734 scanstack(scan, gp);
735 break;
736 case Grunnable:
737 case Gsyscall:
738 case Gwaiting:
739 scanstack(scan, gp);
740 break;
741 }
742 }
743
744 // mark things pointed at by objects with finalizers
745 //runtime·printf("mark fin\n");
746 if(scan == debug_scanblock)
747 runtime·walkfintab(debug_markfin);
748 else
749 runtime·walkfintab(markfin);
750
751 //runtime·printf("mark fin blocks\n");
752 for(fb=allfin; fb; fb=fb->alllink)
753 scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]));
754
755 // in multiproc mode, join in the queued work.
756 //runtime·printf("mark help\n");
757 scan(nil, 0);
758 }
759
760 static void
761 addroot(byte *p, uintptr n)
762 {
763 if(work.nroots == nelem(work.roots))
764 runtime·throw("gc: root overflow");
765 work.roots[work.nroots].p = p;
766 work.roots[work.nroots].n = n;
767 work.nroots++;
768 }
769
770 static void
771 collectstackroots(G *gp)
772 {
773 M *mp;
774 int32 n;
775 Stktop *stk;
776 byte *sp, *guard;
777 ········
778 stk = (Stktop*)gp->stackbase;
779 guard = gp->stackguard;
780 ········
781 if(gp == g) {
782 // Scanning our own stack: start at &gp.
783 sp = (byte*)&gp;
784 } else if((mp = gp->m) != nil && mp->helpgc) {
785 // gchelper's stack is in active use and has no interesting poin ters.
786 return;
787 } else {
788 // Scanning another goroutine's stack.
789 // The goroutine is usually asleep (the world is stopped).
790 sp = gp->sched.sp;
791 ················
792 // The exception is that if the goroutine is about to enter or m ight
793 // have just exited a system call, it may be executing code such
794 // as schedlock and may have needed to start a new stack segment .
795 // Use the stack segment and stack pointer at the time of
796 // the system call instead, since that won't change underfoot.
797 if(gp->gcstack != nil) {
798 stk = (Stktop*)gp->gcstack;
799 sp = gp->gcsp;
800 guard = gp->gcguard;
801 }
802 }
803 ········
804 n = 0;
805 while(stk) {
806 if(sp < guard-StackGuard || (byte*)stk < sp) {
807 runtime·printf("scanstack inconsistent: g%d#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk);
808 runtime·throw("scanstack");
809 }
810 addroot(sp, (byte*)stk - sp); 618 addroot(sp, (byte*)stk - sp);
811 sp = stk->gobuf.sp; 619 sp = stk->gobuf.sp;
812 guard = stk->stackguard; 620 guard = stk->stackguard;
813 stk = (Stktop*)stk->stackbase; 621 stk = (Stktop*)stk->stackbase;
814 n++; 622 n++;
815 } 623 }
816 } 624 }
817 625
818 static void 626 static void
819 collectfinroots(void *v) 627 addfinroots(void *v)
820 { 628 {
821 uintptr size; 629 uintptr size;
822 » 630
823 size = 0; 631 size = 0;
824 if(!runtime·mlookup(v, &v, &size, nil) || !runtime·blockspecial(v)) 632 if(!runtime·mlookup(v, &v, &size, nil) || !runtime·blockspecial(v))
825 runtime·throw("mark - finalizer inconsistency"); 633 runtime·throw("mark - finalizer inconsistency");
826 » 634
827 // do not mark the finalizer block itself. just mark the things it poin ts at. 635 // do not mark the finalizer block itself. just mark the things it poin ts at.
828 addroot(v, size); 636 addroot(v, size);
829 } 637 }
830 638
831 static void 639 static void
832 collectroots(void) 640 addroots(void)
833 { 641 {
834 G *gp; 642 G *gp;
835 FinBlock *fb; 643 FinBlock *fb;
836 » byte *p, *e; 644 » byte *p;
837 645
838 » work.nroots = 0; 646 » work.nroot = 0;
839 »······· 647
840 » e = (byte*)&runtime·mheap; 648 » // mark data+bss.
841 » for(p=data; p<e; p+=4*1024) 649 » for(p=data; p<ebss; p+=DataBlock)
842 » » addroot(p, p+4*1024<e?4*1024:e-p); 650 » » addroot(p, p+DataBlock<ebss?DataBlock:ebss-p);
843 651
844 » e = end; 652 » runtime·walkfintab(addfinroots);
845 » for(p=(byte*)(&runtime·mheap+1); p<e; p+=4*1024) 653
846 » » addroot(p, p+4*1024<e?4*1024:e-p);
847 »·······
848 » runtime·walkfintab(collectfinroots);
849 »·······
850 for(fb=allfin; fb; fb=fb->alllink) 654 for(fb=allfin; fb; fb=fb->alllink)
851 addroot((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0])); 655 addroot((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]));
852 » 656
853 for(gp=runtime·allg; gp!=nil; gp=gp->alllink) { 657 for(gp=runtime·allg; gp!=nil; gp=gp->alllink) {
854 switch(gp->status){ 658 switch(gp->status){
855 default: 659 default:
856 runtime·printf("unexpected G.status %d\n", gp->s tatus); 660 runtime·printf("unexpected G.status %d\n", gp->s tatus);
857 runtime·throw("mark - bad status"); 661 runtime·throw("mark - bad status");
858 case Gdead: 662 case Gdead:
859 break; 663 break;
860 case Grunning: 664 case Grunning:
861 if(gp != g) 665 if(gp != g)
862 runtime·throw("mark - world not stopped" ); 666 runtime·throw("mark - world not stopped" );
863 » » » » collectstackroots(gp); 667 » » » » addstackroots(gp);
864 break; 668 break;
865 case Grunnable: 669 case Grunnable:
866 case Gsyscall: 670 case Gsyscall:
867 case Gwaiting: 671 case Gwaiting:
868 » » » » collectstackroots(gp); 672 » » » » addstackroots(gp);
869 break; 673 break;
870 } 674 }
871 } 675 }
872 ········
873 //runtime·printf("collected %d gc roots\n", (uint32)work.nroots);
874 } 676 }
875 677
876 static bool 678 static bool
877 handlespecial(byte *p, uintptr size) 679 handlespecial(byte *p, uintptr size)
878 { 680 {
879 void (*fn)(void*); 681 void (*fn)(void*);
880 int32 nret; 682 int32 nret;
881 FinBlock *block; 683 FinBlock *block;
882 Finalizer *f; 684 Finalizer *f;
883 » 685
884 if(!runtime·getfinalizer(p, true, &fn, &nret)) { 686 if(!runtime·getfinalizer(p, true, &fn, &nret)) {
885 runtime·setblockspecial(p, false); 687 runtime·setblockspecial(p, false);
886 runtime·MProf_Free(p, size); 688 runtime·MProf_Free(p, size);
887 return false; 689 return false;
888 } 690 }
889 691
890 runtime·lock(&finlock); 692 runtime·lock(&finlock);
891 if(finq == nil || finq->cnt == finq->cap) { 693 if(finq == nil || finq->cnt == finq->cap) {
892 if(finc == nil) { 694 if(finc == nil) {
893 finc = runtime·SysAlloc(PageSize); 695 finc = runtime·SysAlloc(PageSize);
894 finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Final izer) + 1; 696 finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Final izer) + 1;
895 finc->alllink = allfin; 697 finc->alllink = allfin;
896 allfin = finc; 698 allfin = finc;
897 } 699 }
898 block = finc; 700 block = finc;
899 finc = block->next; 701 finc = block->next;
900 block->next = finq; 702 block->next = finq;
901 finq = block; 703 finq = block;
902 } 704 }
903 f = &finq->fin[finq->cnt]; 705 f = &finq->fin[finq->cnt];
904 finq->cnt++; 706 finq->cnt++;
905 f->fn = fn; 707 f->fn = fn;
906 f->nret = nret; 708 f->nret = nret;
907 f->arg = p; 709 f->arg = p;
908 runtime·unlock(&finlock); 710 runtime·unlock(&finlock);
909 return true; 711 return true;
910 } 712 }
911 713
912 static void sweepspan(uint32 sid) 714 // Sweep frees or collects finalizers for blocks not marked in the mark phase.
715 // It clears the mark bits in preparation for the next GC round.
716 static void
717 sweepspan(Parfor *desc, uint32 spanidx)
913 { 718 {
914 MSpan *s; 719 MSpan *s;
915 int32 cl, n, npages; 720 int32 cl, n, npages;
916 uintptr size; 721 uintptr size;
917 byte *p; 722 byte *p;
918 MCache *c; 723 MCache *c;
919 byte *arena_start; 724 byte *arena_start;
920 725 » MLink *start, *end;
921 » s = runtime·mheap.allspans[sid]; 726 » int32 nfree;
727
728 » USED(desc);
729
730 » s = runtime·mheap.allspans[spanidx];
731 » // Stamp newly unused spans. The scavenger will use that
732 » // info to potentially give back some pages to the OS.
733 » if(s->state == MSpanFree && s->unusedsince == 0)
734 » » s->unusedsince = runtime·nanotime();
922 if(s->state != MSpanInUse) 735 if(s->state != MSpanInUse)
923 return; 736 return;
924 737
925 arena_start = runtime·mheap.arena_start; 738 arena_start = runtime·mheap.arena_start;
926 739
927 m->gcstats.nspanfull++;
928 p = (byte*)(s->start << PageShift); 740 p = (byte*)(s->start << PageShift);
929 cl = s->sizeclass; 741 cl = s->sizeclass;
930 if(cl == 0) { 742 if(cl == 0) {
931 m->gcstats.nspanlarge++;
932 size = s->npages<<PageShift; 743 size = s->npages<<PageShift;
933 n = 1; 744 n = 1;
934 } else { 745 } else {
935 // Chunk full of small blocks. 746 // Chunk full of small blocks.
936 size = runtime·class_to_size[cl]; 747 size = runtime·class_to_size[cl];
937 npages = runtime·class_to_allocnpages[cl]; 748 npages = runtime·class_to_allocnpages[cl];
938 n = (npages << PageShift) / size; 749 n = (npages << PageShift) / size;
939 } 750 }
940 c = m->mcache; 751 c = m->mcache;
752 nfree = 0;
753 start = end = nil;
941 754
942 // Sweep through n objects of given size starting at p. 755 // Sweep through n objects of given size starting at p.
943 // This thread owns the span now, so it can manipulate 756 // This thread owns the span now, so it can manipulate
944 // the block bitmap without atomic operations. 757 // the block bitmap without atomic operations.
945 for(; n > 0; n--, p += size) { 758 for(; n > 0; n--, p += size) {
946 uintptr off, *bitp, shift, bits; 759 uintptr off, *bitp, shift, bits;
947 760
948 off = (uintptr*)p - (uintptr*)arena_start; 761 off = (uintptr*)p - (uintptr*)arena_start;
949 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; 762 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
763 PREFETCH((byte*)bitp - size);
950 shift = off % wordsPerBitmapWord; 764 shift = off % wordsPerBitmapWord;
951 bits = *bitp>>shift; 765 bits = *bitp>>shift;
952 766
953 if((bits & bitAllocated) == 0) 767 if((bits & bitAllocated) == 0)
954 continue; 768 continue;
955 769
956 if((bits & bitMarked) != 0) { 770 if((bits & bitMarked) != 0) {
957 if(DebugMark) { 771 if(DebugMark) {
958 if(!(bits & bitSpecial)) 772 if(!(bits & bitSpecial))
959 runtime·printf("found spurious mark on % p\n", p); 773 runtime·printf("found spurious mark on % p\n", p);
960 *bitp &= ~(bitSpecial<<shift); 774 *bitp &= ~(bitSpecial<<shift);
961 } 775 }
962 *bitp &= ~(bitMarked<<shift); 776 *bitp &= ~(bitMarked<<shift);
963 continue; 777 continue;
964 } 778 }
965 779
966 // Special means it has a finalizer or is being profiled. 780 // Special means it has a finalizer or is being profiled.
967 // In DebugMark mode, the bit has been coopted so 781 // In DebugMark mode, the bit has been coopted so
968 // we have to assume all blocks are special. 782 // we have to assume all blocks are special.
969 if(DebugMark || (bits & bitSpecial) != 0) { 783 if(DebugMark || (bits & bitSpecial) != 0) {
970 if(handlespecial(p, size)) 784 if(handlespecial(p, size))
971 continue; 785 continue;
972 } 786 }
973 787
974 // Mark freed; restore block boundary bit. 788 // Mark freed; restore block boundary bit.
975 *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); 789 *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
976 790
977 » » if(s->sizeclass == 0) { 791 » » if(cl == 0) {
978 // Free large span. 792 // Free large span.
979 runtime·unmarkspan(p, 1<<PageShift); 793 runtime·unmarkspan(p, 1<<PageShift);
980 *(uintptr*)p = 1; // needs zeroing 794 *(uintptr*)p = 1; // needs zeroing
981 runtime·MHeap_Free(&runtime·mheap, s, 1); 795 runtime·MHeap_Free(&runtime·mheap, s, 1);
796 c->local_alloc -= size;
797 c->local_nfree++;
982 } else { 798 } else {
983 // Free small object. 799 // Free small object.
984 » » » if(size > sizeof(uintptr)) 800 » » » if(nfree) {
985 » » » » ((uintptr*)p)[1] = 1;» // mark as "needs to be zeroed" 801 » » » » PREFETCH(p);
986 » » » c->local_by_size[s->sizeclass].nfree++; 802 » » » » if(size > sizeof(uintptr))
987 » » » runtime·MCache_Free(c, p, s->sizeclass, size); 803 » » » » » ((uintptr*)end)[1] = 1; // mark as "ne eds to be zeroed"
988 » » } 804 » » » » end->next = (MLink*)p;
989 » » c->local_alloc -= size; 805 » » » » end = (MLink*)p;
990 » » c->local_nfree++; 806 » » » } else {
991 » } 807 » » » » start = (MLink*)p;
992 } 808 » » » » end = (MLink*)p;
993
994 static void
995 parallelfor(Parfor *desc, uint32 nthr, uint32 tid, uint32 count, void (*body)(ui nt32 i))
996 {
997 » uint32 begin, end, begin2, try, victim;
998 » uint64 *mypos, pos;
999 » bool idle;
1000
1001 » if(nthr > MaxGcproc || tid > nthr || body == nil) {
1002 » » runtime·printf("nthr=%d tid=%d count=%d body=%p\n",
1003 » » » nthr, tid, count, body);
1004 » » runtime·throw("parallel for: invalid args");
1005 » }
1006
1007 » if(nthr==1 || count<=1) {
1008 » » for(pos=0; pos<count; pos++)
1009 » » » body(pos);
1010 » » return;
1011 » }
1012
1013 » begin = count / nthr * tid;
1014 » end = count / nthr * (tid+1);
1015 » if(tid==nthr-1)
1016 » » end = count;
1017 » mypos = &desc->thr[tid].pos;
1018 » runtime·atomicstore64(mypos, (uint64)begin | ((uint64)end)<<32);
1019 » //runtime·printf("thr #%d(%d): %d-%d (%d)\n", tid, work.nproc, begin, en d, runtime·mheap.nspan);
1020
1021 » for(;;) {
1022 » » //spanpos = runtime·atomicload64(&thr->spanpos);
1023 » » //begin = (uint32)spanpos;
1024 » » //end = (uint32)(spanpos>>32);
1025 » » //runtime·printf("thr #%d(%d): starting with %d-%d:%d\n", tid, w ork.nproc, begin, end, end-begin);
1026
1027 » » for(;;) {
1028 » » » pos = runtime·xadd64(mypos, 1);
1029 » » » begin = (uint32)pos-1;
1030 » » » end = (uint32)(pos>>32);
1031 » » » if(begin>=end)
1032 » » » » break;
1033 » » » body(begin);
1034 » » }
1035
1036 » » idle = false;
1037 » » for(try=0;; try++) {
1038 » » » if(try > nthr*4) {
1039 » » » » if(idle == false) {
1040 » » » » » runtime·xadd(&desc->done, 1);
1041 » » » » » idle = true;
1042 » » » » }
1043 » » » » if(desc->done == nthr)
1044 » » » » » return;
1045 } 809 }
1046 » » » victim = runtime·fastrand1() % (nthr-1); 810 » » » nfree++;
1047 » » » if(victim >= tid) 811 » » }
1048 » » » » victim++; 812 » }
1049 » » » pos = runtime·atomicload64(&desc->thr[victim].pos); 813
1050 » » » //begin = (uint32)spanpos; 814 » if(nfree) {
1051 » » » //end = (uint32)(spanpos>>32); 815 » » if(size > sizeof(uintptr))
1052 » » » //runtime·printf("thr #%d(%d): stealing from %d (%d-%d:% d)\n", tid, work.nproc, victim, begin, end, end-begin); 816 » » » ((uintptr*)end)[1] = 1; // mark as "needs to be zeroed "
1053 » » » for(;;) { 817 » » c->local_by_size[s->sizeclass].nfree += nfree;
1054 » » » » begin = (uint32)pos; 818 » » c->local_alloc -= size * nfree;
1055 » » » » end = (uint32)(pos>>32); 819 » » c->local_nfree += nfree;
1056 » » » » if(begin >= end-1) { 820 » » c->local_cachealloc -= nfree * size;
1057 » » » » » begin = end = 0; 821 » » c->local_objects -= nfree;
1058 » » » » » break; 822 » » runtime·MCentral_FreeSpan(&runtime·mheap.central[cl],
1059 » » » » } 823 » » » » » s, nfree, start, end);
1060 » » » » if(idle) { 824 » }
1061 » » » » » runtime·xadd(&desc->done, -1);
1062 » » » » » idle = false;
1063 » » » » }
1064 » » » » begin2 = begin + (end-begin)/2;
1065 » » » » if(runtime·cas64(&desc->thr[victim].pos, &pos, ( uint64)begin | (((uint64)begin2)<<32))) {
1066 » » » » » begin = begin2;
1067 » » » » » break;
1068 » » » » }
1069 » » » }
1070 » » » if(begin < end) {
1071 » » » » if(idle)
1072 » » » » » runtime·xadd(&desc->done, -1);
1073 » » » » runtime·atomicstore64(mypos, (uint64)begin | ((u int64)end)<<32);
1074 » » » » m->gcstats.nspansteals += 1;
1075 » » » » m->gcstats.nspanstolen += end-begin;
1076 » » » » break;
1077 » » » }
1078 » » » if(try<nthr) {
1079 » » » } else if (try<nthr*2) {
1080 » » » » runtime·procyield(20);
1081 » » » } else if (try<nthr*4) {
1082 » » » » runtime·osyield();
1083 » » » } else {
1084 » » » » runtime·usleep(1);
1085 » » » }
1086 » » }
1087 » }
1088 }
1089
1090 static void
1091 markcb(uint32 i)
1092 {
1093 » scanblock(work.roots[i].p, work.roots[i].n);
1094 }
1095
1096 static void
1097 parmark(uint32 tid)
1098 {
1099 » //uintptr i;
1100
1101 » parallelfor(&markfor, work.nproc, tid, work.nroots, markcb);
1102 » //» for(i=0; i<work.nroots; i++)
1103 » //» » scanblock(work.roots[i].p, work.roots[i].n);
1104 » scanblock(nil, 0);
1105 }
1106
1107 // Sweep frees or collects finalizers for blocks not marked in the mark phase.
1108 // It clears the mark bits in preparation for the next GC round.
1109 static void
1110 sweep(uint32 tid)
1111 {
1112 » //GcThread *thr;
1113 » //MSpan *s;
1114 » //uint64 spanpos;
1115
1116 » parallelfor(&sweepfor, work.nproc, tid, runtime·mheap.nspan, sweepspan);
1117
1118 /*
1119 » uint32 begin = runtime·mheap.nspan / work.nproc * tid;
1120 » uint32 end = runtime·mheap.nspan / work.nproc * (tid+1);
1121 » if(tid==work.nproc-1)
1122 » » end = runtime·mheap.nspan;
1123 » thr = &work.thr[tid];
1124 » runtime·atomicstore64(&thr->spanpos, (uint64)begin | ((uint64)end)<<32);
1125 //runtime·printf("thr #%d(%d): %d-%d (%d)\n", tid, work.nproc, begin, end, runti me·mheap.nspan);
1126
1127 » for(;;) {
1128 » » //spanpos = runtime·atomicload64(&thr->spanpos);
1129 » » //begin = (uint32)spanpos;
1130 » » //end = (uint32)(spanpos>>32);
1131 » » //runtime·printf("thr #%d(%d): starting with %d-%d:%d\n", tid, w ork.nproc, begin, end, end-begin);
1132
1133 » » for(;;) {
1134 » » » spanpos = runtime·xadd64(&thr->spanpos, 1);
1135 » » » begin = (uint32)spanpos-1;
1136 » » » end = (uint32)(spanpos>>32);
1137 » » » if(begin>=end)
1138 » » » » break;
1139
1140 » » » sweepspan(begin);
1141 » » }
1142
1143 » » if(work.nproc==1)
1144 » » » return;
1145
1146 » » uint32 try;
1147 » » bool idle = false;
1148 » » for(try=0;; try++) {
1149 » » » if(try > work.nproc*4) {
1150 » » » » if(idle == false) {
1151 » » » » » runtime·xadd(&work.sweepdone, 1);
1152 » » » » » idle = true;
1153 » » » » }
1154 » » » » if(work.sweepdone == work.nproc)
1155 » » » » » return;
1156 » » » }
1157 » » » uint32 victim = runtime·fastrand1() % (work.nproc-1);
1158 » » » if(victim >= tid)
1159 » » » » victim++;
1160 » » » spanpos = runtime·atomicload64(&work.thr[victim].spanpos );
1161 » » » //begin = (uint32)spanpos;
1162 » » » //end = (uint32)(spanpos>>32);
1163 » » » //runtime·printf("thr #%d(%d): stealing from %d (%d-%d:% d)\n", tid, work.nproc, victim, begin, end, end-begin);
1164 » » » for(;;) {
1165 » » » » begin = (uint32)spanpos;
1166 » » » » end = (uint32)(spanpos>>32);
1167 » » » » if(begin >= end-1) {
1168 » » » » » begin = end = 0;
1169 » » » » » break;
1170 » » » » }
1171 » » » » if(idle) {
1172 » » » » » runtime·xadd(&work.sweepdone, -1);
1173 » » » » » idle = false;
1174 » » » » }
1175 » » » » uint32 mybegin = begin + (end-begin)/2;
1176 » » » » if(runtime·cas64(&work.thr[victim].spanpos, &spa npos, (uint64)begin | (((uint64)mybegin)<<32))) {
1177 » » » » » begin = mybegin;
1178 » » » » » break;
1179 » » » » }
1180 » » » }
1181 » » » if(begin < end) {
1182 » » » » if(idle)
1183 » » » » » runtime·xadd(&work.sweepdone, -1);
1184 » » » » runtime·atomicstore64(&thr->spanpos, (uint64)beg in | ((uint64)end)<<32);
1185 » » » » m->gcstats.nspansteals += 1;
1186 » » » » m->gcstats.nspanstolen += end-begin;
1187 » » » » break;
1188 » » » }
1189 » » » if(try<work.nproc) {
1190 » » » } else if (try<work.nproc*2) {
1191 » » » » runtime·procyield(20);
1192 » » » } else if (try<work.nproc*4) {
1193 » » » » runtime·osyield();
1194 » » » } else {
1195 » » » » runtime·usleep(1);
1196 » » » }
1197 » » }
1198 » }
1199 » */
1200 } 825 }
1201 826
1202 void 827 void
1203 runtime·gchelper(void) 828 runtime·gchelper(void)
1204 { 829 {
1205 » uint32 tid; 830 » // parallel mark for over gc roots
1206 831 » parfor(&work.markfor);
1207 » tid = runtime·xadd(&work.tidseq, 1); 832 » // help other threads scan secondary blocks
1208 833 » scanblock(nil, 0);
1209 » parmark(tid); 834
1210 » //scanblock(nil, 0);
1211 if(DebugMark) { 835 if(DebugMark) {
1212 » » while(runtime·atomicload(&work.markdone) == 0) 836 » » // wait while the main thread executes mark(debug_scanblock)
837 » » while(runtime·atomicload(&work.debugmarkdone) == 0)
1213 runtime·usleep(10); 838 runtime·usleep(10);
1214 } 839 }
1215 840
1216 » sweep(tid); 841 » // parallel sweep for over all spans
842 » parfor(&work.sweepfor);
1217 843
1218 if(runtime·xadd(&work.ndone, +1) == work.nproc-1) 844 if(runtime·xadd(&work.ndone, +1) == work.nproc-1)
1219 runtime·notewakeup(&work.alldone); 845 runtime·notewakeup(&work.alldone);
1220 } 846 }
1221
1222 // Semaphore, not Lock, so that the goroutine
1223 // reschedules when there is contention rather
1224 // than spinning.
1225 static uint32 gcsema = 1;
1226 847
1227 // Initialized from $GOGC. GOGC=off means no gc. 848 // Initialized from $GOGC. GOGC=off means no gc.
1228 // 849 //
1229 // Next gc is after we've allocated an extra amount of 850 // Next gc is after we've allocated an extra amount of
1230 // memory proportional to the amount already in use. 851 // memory proportional to the amount already in use.
1231 // If gcpercent=100 and we're using 4M, we'll gc again 852 // If gcpercent=100 and we're using 4M, we'll gc again
1232 // when we get to 8M. This keeps the gc cost in linear 853 // when we get to 8M. This keeps the gc cost in linear
1233 // proportion to the allocation cost. Adjusting gcpercent 854 // proportion to the allocation cost. Adjusting gcpercent
1234 // just changes the linear constant (and also the amount of 855 // just changes the linear constant (and also the amount of
1235 // extra memory used). 856 // extra memory used).
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
1279 } 900 }
1280 mstats.stacks_inuse = stacks_inuse; 901 mstats.stacks_inuse = stacks_inuse;
1281 mstats.stacks_sys = stacks_sys; 902 mstats.stacks_sys = stacks_sys;
1282 } 903 }
1283 904
1284 void 905 void
1285 runtime·gc(int32 force) 906 runtime·gc(int32 force)
1286 { 907 {
1287 int64 t0, t1, t2, t3; 908 int64 t0, t1, t2, t3;
1288 uint64 heap0, heap1, obj0, obj1; 909 uint64 heap0, heap1, obj0, obj1;
910 GCStats stats;
911 uint32 i;
1289 byte *p; 912 byte *p;
1290 GCStats stats;
1291 913
1292 // The gc is turned off (via enablegc) until 914 // The gc is turned off (via enablegc) until
1293 // the bootstrap has completed. 915 // the bootstrap has completed.
1294 // Also, malloc gets called in the guts 916 // Also, malloc gets called in the guts
1295 // of a number of libraries that might be 917 // of a number of libraries that might be
1296 // holding locks. To avoid priority inversion 918 // holding locks. To avoid priority inversion
1297 // problems, don't bother trying to run gc 919 // problems, don't bother trying to run gc
1298 // while holding a lock. The next mallocgc 920 // while holding a lock. The next mallocgc
1299 // without a lock will do the gc instead. 921 // without a lock will do the gc instead.
1300 if(!mstats.enablegc || m->locks > 0 || runtime·panicking) 922 if(!mstats.enablegc || m->locks > 0 || runtime·panicking)
1301 return; 923 return;
1302 924
1303 if(gcpercent == -2) { // first time through 925 if(gcpercent == -2) { // first time through
1304 p = runtime·getenv("GOGC"); 926 p = runtime·getenv("GOGC");
1305 if(p == nil || p[0] == '\0') 927 if(p == nil || p[0] == '\0')
1306 gcpercent = 100; 928 gcpercent = 100;
1307 else if(runtime·strcmp(p, (byte*)"off") == 0) 929 else if(runtime·strcmp(p, (byte*)"off") == 0)
1308 gcpercent = -1; 930 gcpercent = -1;
1309 else 931 else
1310 gcpercent = runtime·atoi(p); 932 gcpercent = runtime·atoi(p);
1311 933
1312 p = runtime·getenv("GOGCTRACE"); 934 p = runtime·getenv("GOGCTRACE");
1313 if(p != nil) 935 if(p != nil)
1314 gctrace = runtime·atoi(p); 936 gctrace = runtime·atoi(p);
1315 } 937 }
1316 if(gcpercent < 0) 938 if(gcpercent < 0)
1317 return; 939 return;
1318 940
1319 » runtime·semacquire(&gcsema); 941 » runtime·semacquire(&runtime·worldsema);
1320 if(!force && mstats.heap_alloc < mstats.next_gc) { 942 if(!force && mstats.heap_alloc < mstats.next_gc) {
1321 » » runtime·semrelease(&gcsema); 943 » » runtime·semrelease(&runtime·worldsema);
1322 return; 944 return;
1323 } 945 }
1324 946
1325 t0 = runtime·nanotime(); 947 t0 = runtime·nanotime();
1326 948
1327 m->gcing = 1; 949 m->gcing = 1;
1328 runtime·stoptheworld(); 950 runtime·stoptheworld();
1329 951
1330 heap0 = 0; 952 heap0 = 0;
1331 obj0 = 0; 953 obj0 = 0;
1332 if(gctrace) { 954 if(gctrace) {
1333 cachestats(0); 955 cachestats(0);
1334 heap0 = mstats.heap_alloc; 956 heap0 = mstats.heap_alloc;
1335 obj0 = mstats.nmalloc - mstats.nfree; 957 obj0 = mstats.nmalloc - mstats.nfree;
1336 } 958 }
1337 959
1338 work.nproc = runtime·gcprocs(); 960 work.nproc = runtime·gcprocs();
1339 sweepfor.done = 0;
1340 markfor.done = 0;
1341
1342 work.nwait = 0; 961 work.nwait = 0;
1343 work.ndone = 0; 962 work.ndone = 0;
1344 » work.tidseq = 0; 963 » work.debugmarkdone = 0;
1345 » work.markdone = 0; 964 » addroots();
1346 » //work.sweepdone = 0; 965 » parforsetup(&work.markfor, markroot, work.nproc, work.nroot, nil, false) ;
1347 » collectroots(); 966 » parforsetup(&work.sweepfor, sweepspan, work.nproc, runtime·mheap.nspan, nil, true);
1348 if(work.nproc > 1) { 967 if(work.nproc > 1) {
1349 runtime·noteclear(&work.alldone); 968 runtime·noteclear(&work.alldone);
1350 » » runtime·helpgc(); 969 » » runtime·helpgc(work.nproc);
1351 » } 970 » }
1352 971
1353 //runtime·printf("mark\n"); 972 » parfor(&work.markfor);
1354 » parmark(0); 973 » scanblock(nil, 0);
1355 974
1356 » //mark(scanblock);
1357 if(DebugMark) { 975 if(DebugMark) {
1358 » » mark(debug_scanblock); 976 » » for(i=0; i<work.nroot; i++)
1359 » » runtime·xchg(&work.markdone, 1); 977 » » » debug_scanblock(work.roots[i].p, work.roots[i].n);
978 » » runtime·xchg(&work.debugmarkdone, 1);
1360 } 979 }
1361 t1 = runtime·nanotime(); 980 t1 = runtime·nanotime();
1362 981 » // parallel sweep for over all spans
1363 //runtime·printf("sweep\n"); 982 » parfor(&work.sweepfor);
1364 » sweep(0);
1365 » if(work.nproc > 1)
1366 » » runtime·notesleep(&work.alldone);
1367 t2 = runtime·nanotime(); 983 t2 = runtime·nanotime();
1368 984
1369 stealcache(); 985 stealcache();
1370 cachestats(&stats); 986 cachestats(&stats);
1371 987
1372 mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; 988 mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
1373 m->gcing = 0; 989 m->gcing = 0;
1374 990
1375 m->locks++; // disable gc during the mallocs in newproc
1376 if(finq != nil) { 991 if(finq != nil) {
992 m->locks++; // disable gc during the mallocs in newproc
1377 // kick off or wake up goroutine to run queued finalizers 993 // kick off or wake up goroutine to run queued finalizers
1378 if(fing == nil) 994 if(fing == nil)
1379 fing = runtime·newproc1((byte*)runfinq, nil, 0, 0, runti me·gc); 995 fing = runtime·newproc1((byte*)runfinq, nil, 0, 0, runti me·gc);
1380 else if(fingwait) { 996 else if(fingwait) {
1381 fingwait = 0; 997 fingwait = 0;
1382 runtime·ready(fing); 998 runtime·ready(fing);
1383 } 999 }
1384 » } 1000 » » m->locks--;
1385 » m->locks--; 1001 » }
1002 »·······
1003 » if(work.nproc > 1)
1004 » » runtime·notesleep(&work.alldone);
1386 1005
1387 heap1 = mstats.heap_alloc; 1006 heap1 = mstats.heap_alloc;
1388 obj1 = mstats.nmalloc - mstats.nfree; 1007 obj1 = mstats.nmalloc - mstats.nfree;
1389 1008
1390 t3 = runtime·nanotime(); 1009 t3 = runtime·nanotime();
1010 mstats.last_gc = t3;
1391 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0; 1011 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0;
1392 mstats.pause_total_ns += t3 - t0; 1012 mstats.pause_total_ns += t3 - t0;
1393 mstats.numgc++; 1013 mstats.numgc++;
1394 if(mstats.debuggc) 1014 if(mstats.debuggc)
1395 runtime·printf("pause %D\n", t3-t0); 1015 runtime·printf("pause %D\n", t3-t0);
1396 1016
1397 if(gctrace) { 1017 if(gctrace) {
1398 » » runtime·printf("gc%d(%d): %D+%D+%D ms %D -> %D MB %D -> %D (%D-% D) objects %D pointer lookups (%D size, %D addr) %D(%D) handoff, scan block/word /ptr: %D/%D/%D\n", 1018 » » runtime·printf("gc%d(%d): %D+%D+%D ms %D -> %D MB %D -> %D (%D-% D) objects, %D(%D) steals, %D(%D) handoffs, %D/%D/%D yields\n",
1399 mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/10000 00, (t3-t2)/1000000, 1019 mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/10000 00, (t3-t2)/1000000,
1400 heap0>>20, heap1>>20, obj0, obj1, 1020 heap0>>20, heap1>>20, obj0, obj1,
1401 mstats.nmalloc, mstats.nfree, 1021 mstats.nmalloc, mstats.nfree,
1402 » » » stats.nsizelookup+stats.naddrlookup, stats.nsizelookup, stats.naddrlookup, 1022 » » » stats.nsteal, stats.nstealcnt,
1403 stats.nhandoff, stats.nhandoffcnt, 1023 stats.nhandoff, stats.nhandoffcnt,
1404 » » » stats.nscanblocks, stats.nscanwords, stats.nscanptr); 1024 » » » stats.nprocyield, stats.nosyield, stats.nsleep);
1405 » » runtime·printf(" sweep: span total/full/large %D/%D/%D steal count/amount %D/%D\n", 1025 » }
1406 » » » runtime·mheap.nspan, stats.nspanfull, stats.nspanlarge, stats.nspansteals, stats.nspanstolen); 1026 »·······
1407 » } 1027 » runtime·MProf_GC();
1408 1028 » runtime·semrelease(&runtime·worldsema);
1409 » runtime·semrelease(&gcsema);
1410 runtime·starttheworld(); 1029 runtime·starttheworld();
1411 1030
1412 // give the queued finalizers, if any, a chance to run 1031 // give the queued finalizers, if any, a chance to run
1413 if(finq != nil) 1032 if(finq != nil)
1414 runtime·gosched(); 1033 runtime·gosched();
1415 1034
1416 if(gctrace > 1 && !force) 1035 if(gctrace > 1 && !force)
1417 runtime·gc(1); 1036 runtime·gc(1);
1418 } 1037 }
1419 1038
1420 void 1039 void
1421 runtime·UpdateMemStats(void) 1040 runtime·ReadMemStats(MStats *stats)
1422 { 1041 {
1423 » // Have to acquire gcsema to stop the world, 1042 » // Have to acquire worldsema to stop the world,
1424 // because stoptheworld can only be used by 1043 // because stoptheworld can only be used by
1425 // one goroutine at a time, and there might be 1044 // one goroutine at a time, and there might be
1426 // a pending garbage collection already calling it. 1045 // a pending garbage collection already calling it.
1427 » runtime·semacquire(&gcsema); 1046 » runtime·semacquire(&runtime·worldsema);
1428 m->gcing = 1; 1047 m->gcing = 1;
1429 runtime·stoptheworld(); 1048 runtime·stoptheworld();
1430 cachestats(0); 1049 cachestats(0);
1050 *stats = mstats;
1431 m->gcing = 0; 1051 m->gcing = 0;
1432 » runtime·semrelease(&gcsema); 1052 » runtime·semrelease(&runtime·worldsema);
1433 runtime·starttheworld(); 1053 runtime·starttheworld();
1434 } 1054 }
1435 1055
1436 static void 1056 static void
1437 runfinq(void) 1057 runfinq(void)
1438 { 1058 {
1439 Finalizer *f; 1059 Finalizer *f;
1440 FinBlock *fb, *next; 1060 FinBlock *fb, *next;
1441 byte *frame; 1061 byte *frame;
1442 uint32 framesz, framecap, i; 1062 uint32 framesz, framecap, i;
(...skipping 235 matching lines...) Expand 10 before | Expand all | Expand 10 after
1678 uintptr n; 1298 uintptr n;
1679 1299
1680 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; 1300 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
1681 n = (n+bitmapChunk-1) & ~(bitmapChunk-1); 1301 n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
1682 if(h->bitmap_mapped >= n) 1302 if(h->bitmap_mapped >= n)
1683 return; 1303 return;
1684 1304
1685 runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped); 1305 runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped);
1686 h->bitmap_mapped = n; 1306 h->bitmap_mapped = n;
1687 } 1307 }
1308
1309 #ifdef _64BIT
1310 // Amd64 uses 48-bit virtual addresses, 47-th bit is used as kernel/user flag.
1311 // So we use 17msb of pointers as ABA counter.
1312 # define PTR_BITS 47
1313 #else
1314 # define PTR_BITS 32
1315 #endif
1316 #define PTR_MASK ((1ull<<PTR_BITS)-1)
1317
1318 static void
1319 lifopush(uint64 *a, Workbuf *b)
1320 {
1321 uint64 old, new;
1322
1323 if((uint64)b != ((uint64)b&PTR_MASK)) {
1324 runtime·printf("p=%p\n", b);
1325 runtime·throw("lifopush: invalid pointer");
1326 }
1327
1328 if(work.nproc == 1) {
1329 b->next = (Workbuf*)(*a&PTR_MASK);
1330 *a = (uint64)b;
1331 return;
1332 }
1333
1334 b->pushcnt++;
1335 new = (uint64)b|(((uint64)b->pushcnt)<<PTR_BITS);
1336 old = runtime·atomicload64(a);
1337 for(;;) {
1338 b->next = (Workbuf*)(old&PTR_MASK);
1339 if(runtime·cas64(a, &old, new))
1340 break;
1341 }
1342 }
1343
1344 static Workbuf*
1345 lifopop(uint64 *a)
1346 {
1347 Workbuf *b, *b2;
1348 uint64 old, new;
1349
1350 if(work.nproc == 1) {
1351 b = (Workbuf*)(*a&PTR_MASK);
1352 if(b == nil)
1353 return nil;
1354 *a = (uint64)b->next;
1355 return b;
1356 }
1357
1358 old = runtime·atomicload64(a);
1359 for(;;) {
1360 if(old == 0)
1361 return nil;
1362 b = (Workbuf*)(old&PTR_MASK);
1363 b2 = runtime·atomicloadp(&b->next);
1364 new = 0;
1365 if(b2 != nil)
1366 new = (uint64)b2|(((uint64)b2->pushcnt)<<PTR_BITS);
1367 if(runtime·cas64(a, &old, new))
1368 return b;
1369 }
1370 }
1371
1372 void
1373 runtime·CTestLockFreeStack(bool isShort)
1374 {
1375 uint64 stack;
1376 Workbuf *b;
1377
1378 USED(isShort);
1379
1380 for(work.nproc=1; work.nproc<=2; work.nproc++) {
1381 // check the stack is initially empty
1382 stack = 0;
1383 if(lifopop(&stack) != nil)
1384 runtime·panicstring("stack is not empty");
1385
1386 // push one element
1387 b = (Workbuf*)runtime·mallocgc(sizeof(Workbuf), FlagNoGC, 0, 0);
1388 b->nobj = 42;
1389 lifopush(&stack, b);
1390
1391 // push another
1392 b = (Workbuf*)runtime·mallocgc(sizeof(Workbuf), FlagNoGC, 0, 0);
1393 b->nobj = 43;
1394 lifopush(&stack, b);
1395
1396 // pop one element
1397 b = lifopop(&stack);
1398 if(b == nil)
1399 runtime·panicstring("stack is empty");
1400 if(b->nobj != 43)
1401 runtime·panicstring("no lifo");
1402 runtime·free(b);
1403
1404 // pop another
1405 b = lifopop(&stack);
1406 if(b == nil)
1407 runtime·panicstring("stack is empty");
1408 if(b->nobj != 42)
1409 runtime·panicstring("no lifo");
1410 runtime·free(b);
1411
1412 // check the stack is empty again
1413 if(stack != 0)
1414 runtime·panicstring("stack is not empty");
1415 if(lifopop(&stack) != nil)
1416 runtime·panicstring("stack is not empty");
1417 }
1418 }
1419
1420 typedef struct StackTestCtx StackTestCtx;
1421 struct StackTestCtx
1422 {
1423 uint32 niter;
1424 uint32 waitsema;
1425 uint64 stack[2];
1426 };
1427
1428 static void
1429 stackTestProc(StackTestCtx *ctx)
1430 {
1431 int32 i, n;
1432 Workbuf *b;
1433
1434 for(i=0; i<ctx->niter; i++) {
1435 n = runtime·fastrand1()%2;
1436 b = lifopop(&ctx->stack[n]);
1437 if(b==nil)
1438 continue;
1439 n = runtime·fastrand1()%2;
1440 lifopush(&ctx->stack[n], b);
1441 }
1442 runtime·semrelease(&ctx->waitsema);
1443 }
1444
1445 void
1446 runtime·CTestLockFreeStackStress(bool isShort)
1447 {
1448 StackTestCtx ctx, *arg;
1449 Workbuf *b;
1450 int32 i, sum, sum2, cnt, procs;
1451 const int32 N = 100;
1452 const int32 P = 8;
1453 const int32 G = 16;
1454
1455 procs = runtime·gomaxprocsfunc(P);
1456 work.nproc = P;
1457 ctx.niter = isShort ? 10000 : 100000;
1458 ctx.waitsema = 0;
1459 ctx.stack[0] = 0;
1460 ctx.stack[1] = 0;
1461 arg = &ctx;
1462 sum = 0;
1463 for(i=0; i<N; i++) {
1464 b = (Workbuf*)runtime·mallocgc(sizeof(Workbuf), FlagNoGC, 0, 0);
1465 b->nobj = i;
1466 sum += i;
1467 lifopush(&ctx.stack[i%2], b);
1468 }
1469 m->locks++; // disable gc during the mallocs (gc resets work.nproc)
1470 for(i=0; i<G; i++)
1471 runtime·newproc1((byte*)stackTestProc, (byte*)&arg, sizeof(arg), 0, runtime·CTestLockFreeStackStress);
1472 m->locks--;
1473 for(i=0; i<G; i++)
1474 runtime·semacquire(&ctx.waitsema);
1475 sum2 = 0;
1476 cnt = 0;
1477 for(i=0; i<2; i++) {
1478 while((b=lifopop(&ctx.stack[i])) != nil) {
1479 cnt++;
1480 sum2 += b->nobj;
1481 b->next = (Workbuf*)123;
1482 runtime·free(b);
1483 }
1484 }
1485 if(cnt != N || sum2 != sum)
1486 runtime·panicstring("stack corruption");
1487 runtime·gomaxprocsfunc(procs);
1488 }
1489
1490 static void
1491 parforsetup(Parfor *desc, void (*body)(Parfor*, uint32), uint32 nthr, uint32 n, void *ctx, bool wait)
1492 {
1493 uint32 i, d, begin, end;
1494
1495 if(nthr == 0 || nthr > MaxGcproc || body == nil) {
1496 runtime·printf("nthr=%d count=%d body=%p\n",
1497 nthr, n, body);
1498 runtime·throw("parallel for: invalid args");
1499 }
1500
1501 desc->body = body;
1502 desc->done = 0;
1503 desc->nthr = nthr;
1504 desc->thrseq = 0;
1505 desc->cnt = n;
1506 desc->ctx = ctx;
1507 desc->wait = wait;
1508 for(i=0; i<nthr; i++) {
1509 begin = n / nthr * i;
1510 end = n / nthr * (i+1);
1511 d = n % nthr;
1512 if(i < d) {
1513 begin += i;
1514 end += (i+1);
1515 } else {
1516 begin += d;
1517 end += d;
1518 }
1519 desc->thr[i].pos = (uint64)begin | (((uint64)end)<<32);
1520 }
1521 }
1522
1523 static void
1524 parfor(Parfor *desc)
1525 {
1526 uint32 tid, begin, end, begin2, try, victim, i;
1527 uint64 *mypos, pos;
1528 bool idle;
1529
1530 // obtain 0-based thread index
1531 tid = runtime·xadd(&desc->thrseq, 1) - 1;
1532 if(tid >= desc->nthr) {
1533 runtime·throw("parallel for: invalid tid");
1534 }
1535
1536 // if single-threaded, just execute the for serially
1537 if(desc->nthr==1) {
1538 for(i=0; i<desc->cnt; i++)
1539 desc->body(desc, i);
1540 return;
1541 }
1542
1543 mypos = &desc->thr[tid].pos;
1544 for(;;) {
1545 for(;;) {
1546 // while have work, bump low index and execute the itera tion
1547 pos = runtime·xadd64(mypos, 1);
1548 begin = (uint32)pos-1;
1549 end = (uint32)(pos>>32);
1550 if(begin>=end)
1551 break;
1552 desc->body(desc, begin);
1553 }
1554
1555 // out of work, need to steal something
1556 idle = false;
1557 for(try=0;; try++) {
1558 // if we don't see any work for long enough,
1559 // increment the done counter...
1560 if(try > desc->nthr*4 && !idle) {
1561 idle = true;
1562 runtime·xadd(&desc->done, 1);
1563 }
1564 // ...if all threads have incremented the counter,
1565 // we are done
1566 if(desc->done + !idle == desc->nthr) {
1567 if(!idle)
1568 runtime·xadd(&desc->done, 1);
1569 return;
1570 }
1571 // choose a random victim for stealing
1572 victim = runtime·fastrand1() % (desc->nthr-1);
1573 if(victim >= tid)
1574 victim++;
1575 pos = runtime·atomicload64(&desc->thr[victim].pos);
1576 for(;;) {
1577 // see if it has any work
1578 begin = (uint32)pos;
1579 end = (uint32)(pos>>32);
1580 if(begin >= end-1) {
1581 begin = end = 0;
1582 break;
1583 }
1584 if(idle) {
1585 runtime·xadd(&desc->done, -1);
1586 idle = false;
1587 }
1588 begin2 = begin + (end-begin)/2;
1589 if(runtime·cas64(&desc->thr[victim].pos, &pos, ( uint64)begin | (((uint64)begin2)<<32))) {
1590 begin = begin2;
1591 break;
1592 }
1593 }
1594 if(begin < end) {
1595 // has successfully stolen some work
1596 if(idle)
1597 runtime·throw("parfor: should not be idl e");
1598 runtime·atomicstore64(mypos, (uint64)begin | ((u int64)end)<<32);
1599 m->gcstats.nsteal += 1;
1600 m->gcstats.nstealcnt += end-begin;
1601 break;
1602 }
1603 // backoff
1604 if(try < desc->nthr) {
1605 } else if (try < desc->nthr*4) {
1606 m->gcstats.nprocyield++;
1607 runtime·procyield(20);
1608 // if a user asked to not wait for others, exit now
1609 // (assume that most work is already done)
1610 } else if (!desc->wait) {
1611 if(!idle)
1612 runtime·xadd(&desc->done, 1);
1613 return;
1614 } else if (try < desc->nthr*6) {
1615 m->gcstats.nosyield++;
1616 runtime·osyield();
1617 } else {
1618 m->gcstats.nsleep++;
1619 runtime·usleep(1);
1620 }
1621 }
1622 }
1623 }
1624
1625 static void
1626 testParforBody(Parfor *desc, uint32 i)
1627 {
1628 uint64 *data;
1629
1630 data = (uint64*)desc->ctx;
1631 data[i] *= data[i];
1632 }
1633
1634 static void
1635 testParforProc(Parfor *desc)
1636 {
1637 parfor(desc);
1638 }
1639
1640 // Simple serial sanity test for parallelfor.
1641 void
1642 runtime·CTestParfor(bool isShort)
1643 {
1644 Parfor desc;
1645 uint64 *data;
1646 uint32 i, N;
1647
1648 USED(isShort);
1649
1650 N = 1000;
1651 data = (uint64*)runtime·mal(N*sizeof(uint64));
1652 for(i=0; i<N; i++)
1653 data[i] = i;
1654 parforsetup(&desc, testParforBody, 1, N, data, true);
1655 parfor(&desc);
1656 for(i=0; i<N; i++) {
1657 if(data[i] != i*i)
1658 runtime·panicstring("incorrect result");
1659 }
1660 runtime·free(data);
1661 }
1662
1663 // Test that iterations are properly distributed.
1664 void
1665 runtime·CTestParforSetup(bool isShort)
1666 {
1667 Parfor desc;
1668 uint32 n, t, i, begin, end, size, end0, size0, sum;
1669
1670 USED(isShort);
1671
1672 for(n=0; n<100; n++) {
1673 for(t=1; t<=MaxGcproc; t++) {
1674 parforsetup(&desc, testParforBody, t, n, 0, true);
1675 sum = 0;
1676 size0 = 0;
1677 end0 = 0;
1678 for(i=0; i<t; i++) {
1679 begin = (uint32)desc.thr[i].pos;
1680 end = (uint32)(desc.thr[i].pos>>32);
1681 size = end - begin;
1682 sum += size;
1683 if(i==0) {
1684 size0 = size;
1685 if(begin != 0)
1686 runtime·panicstring("incorrect b egin");
1687 } else {
1688 if(size != size0 && size != size0-1)
1689 runtime·panicstring("incorrect s ize");
1690 if(begin != end0)
1691 runtime·panicstring("incorrect b egin/end");
1692 }
1693 end0 = end;
1694 }
1695 if(sum != n)
1696 runtime·panicstring("incorrect sum");
1697 }
1698 }
1699 }
1700
1701 // Test that nonblocking parallelfor does not block.
1702 void
1703 runtime·CTestParforNonblock(bool isShort)
1704 {
1705 Parfor desc;
1706 uint64 *data;
1707 uint32 i, N;
1708
1709 USED(isShort);
1710
1711 N = 1000;
1712 data = (uint64*)runtime·mal(N*sizeof(uint64));
1713 for(i=0; i<N; i++)
1714 data[i] = i;
1715 parforsetup(&desc, testParforBody, MaxGcproc, N, data, false);
1716 for(i=0; i<MaxGcproc; i++)
1717 parfor(&desc);
1718 for(i=0; i<N; i++) {
1719 if(data[i] != i*i)
1720 runtime·panicstring("incorrect result");
1721 }
1722 runtime·free(data);
1723 }
1724
1725 // Test parallel parallelfor.
1726 void
1727 runtime·CTestParforParallel(bool isShort)
1728 {
1729 Parfor desc, *arg;
1730 uint64 *data;
1731 uint32 i, N;
1732 int32 procs;
1733
1734 procs = runtime·gomaxprocsfunc(MaxGcproc+1);
1735 N = 10000;
1736 if(isShort)
1737 N /= 10;
1738 data = (uint64*)runtime·mal(N*sizeof(uint64));
1739 for(i=0; i<N; i++)
1740 data[i] = i;
1741 parforsetup(&desc, testParforBody, MaxGcproc, N, data, true);
1742 arg = &desc;
1743 m->locks++; // disable gc during the mallocs in newproc
1744 for(i=1; i<MaxGcproc; i++)
1745 runtime·newproc1((byte*)testParforProc, (byte*)&arg, sizeof(arg) , 0, runtime·CTestParforParallel);
1746 m->locks--;
1747 parfor(&desc);
1748 for(i=0; i<N; i++) {
1749 if(data[i] != i*i)
1750 runtime·panicstring("incorrect result");
1751 }
1752 runtime·free(data);
1753 runtime·gomaxprocsfunc(procs);
1754 }
LEFTRIGHT

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b