LEFT | RIGHT |
1 // Copyright 2009 The Go Authors. All rights reserved. | 1 // Copyright 2009 The Go Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 // Garbage collector. | 5 // Garbage collector. |
6 | 6 |
7 #include "runtime.h" | 7 #include "runtime.h" |
8 #include "arch.h" | 8 #include "arch_GOARCH.h" |
9 #include "malloc.h" | 9 #include "malloc.h" |
10 #include "stack.h" | 10 #include "stack.h" |
11 | 11 |
12 enum { | 12 enum { |
13 Debug = 0, | 13 Debug = 0, |
14 PtrSize = sizeof(void*), | 14 PtrSize = sizeof(void*), |
15 DebugMark = 0, // run second pass to check mark | 15 DebugMark = 0, // run second pass to check mark |
| 16 DataBlock = 8*1024, |
16 | 17 |
17 // Four bits per word (see #defines below). | 18 // Four bits per word (see #defines below). |
18 wordsPerBitmapWord = sizeof(void*)*8/4, | 19 wordsPerBitmapWord = sizeof(void*)*8/4, |
19 bitShift = sizeof(void*)*8/4, | 20 bitShift = sizeof(void*)*8/4, |
20 }; | 21 }; |
21 | 22 |
22 // Bits in per-word bitmap. | 23 // Bits in per-word bitmap. |
23 // #defines because enum might not be able to hold the values. | 24 // #defines because enum might not be able to hold the values. |
24 // | 25 // |
25 // Each word in the bitmap describes wordsPerBitmapWord words | 26 // Each word in the bitmap describes wordsPerBitmapWord words |
(...skipping 19 matching lines...) Expand all Loading... |
45 // /* then test bits & bitAllocated, bits & bitMarked, etc. */ | 46 // /* then test bits & bitAllocated, bits & bitMarked, etc. */ |
46 // | 47 // |
47 #define bitAllocated ((uintptr)1<<(bitShift*0)) | 48 #define bitAllocated ((uintptr)1<<(bitShift*0)) |
48 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAlloc
ated is set */ | 49 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAlloc
ated is set */ |
49 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAlloc
ated is set */ | 50 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAlloc
ated is set */ |
50 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAlloc
ated is set - has finalizer or being profiled */ | 51 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAlloc
ated is set - has finalizer or being profiled */ |
51 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAlloc
ated is NOT set */ | 52 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAlloc
ated is NOT set */ |
52 | 53 |
53 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial) | 54 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial) |
54 | 55 |
| 56 // Holding worldsema grants an M the right to try to stop the world. |
| 57 // The procedure is: |
| 58 // |
| 59 // runtime·semacquire(&runtime·worldsema); |
| 60 // m->gcing = 1; |
| 61 // runtime·stoptheworld(); |
| 62 // |
| 63 // ... do stuff ... |
| 64 // |
| 65 // m->gcing = 0; |
| 66 // runtime·semrelease(&runtime·worldsema); |
| 67 // runtime·starttheworld(); |
| 68 // |
| 69 uint32 runtime·worldsema = 1; |
55 static int32 gctrace; | 70 static int32 gctrace; |
56 | 71 |
57 typedef struct Workbuf Workbuf; | 72 typedef struct Workbuf Workbuf; |
58 struct Workbuf | 73 struct Workbuf |
59 { | 74 { |
60 Workbuf *next; | 75 Workbuf *next; |
| 76 uintptr pushcnt; |
61 uintptr nobj; | 77 uintptr nobj; |
62 » byte *obj[512-2]; | 78 » byte *obj[512-3]; |
63 }; | 79 }; |
64 | 80 |
65 typedef struct Finalizer Finalizer; | 81 typedef struct Finalizer Finalizer; |
66 struct Finalizer | 82 struct Finalizer |
67 { | 83 { |
68 void (*fn)(void*); | 84 void (*fn)(void*); |
69 void *arg; | 85 void *arg; |
70 int32 nret; | 86 int32 nret; |
71 }; | 87 }; |
72 | 88 |
73 typedef struct FinBlock FinBlock; | 89 typedef struct FinBlock FinBlock; |
74 struct FinBlock | 90 struct FinBlock |
75 { | 91 { |
76 FinBlock *alllink; | 92 FinBlock *alllink; |
77 FinBlock *next; | 93 FinBlock *next; |
78 int32 cnt; | 94 int32 cnt; |
79 int32 cap; | 95 int32 cap; |
80 Finalizer fin[1]; | 96 Finalizer fin[1]; |
81 }; | 97 }; |
82 | 98 |
83 extern byte data[]; | 99 extern byte data[]; |
84 extern byte end[]; | 100 extern byte etext[]; |
| 101 extern byte ebss[]; |
85 | 102 |
86 static G *fing; | 103 static G *fing; |
87 static FinBlock *finq; // list of finalizers that are to be executed | 104 static FinBlock *finq; // list of finalizers that are to be executed |
88 static FinBlock *finc; // cache of free blocks | 105 static FinBlock *finc; // cache of free blocks |
89 static FinBlock *allfin; // list of all blocks | 106 static FinBlock *allfin; // list of all blocks |
90 static Lock finlock; | 107 static Lock finlock; |
91 static int32 fingwait; | 108 static int32 fingwait; |
92 | 109 |
93 static void runfinq(void); | 110 static void runfinq(void); |
94 static Workbuf* getempty(Workbuf*); | 111 static Workbuf* getempty(Workbuf*); |
95 static Workbuf* getfull(Workbuf*); | 112 static Workbuf* getfull(Workbuf*); |
96 static void putempty(Workbuf*); | 113 static void putempty(Workbuf*); |
97 static Workbuf* handoff(Workbuf*); | 114 static Workbuf* handoff(Workbuf*); |
98 | 115 |
99 /* | 116 // parallel for descriptor |
100 typedef struct GcThread GcThread; | |
101 struct GcThread | |
102 { | |
103 » uint64 spanpos; | |
104 }; | |
105 */ | |
106 | |
107 static struct { | |
108 » Lock fmu; | |
109 » Workbuf»*full; | |
110 » Lock emu; | |
111 » Workbuf»*empty; | |
112 » uint32» nproc; | |
113 » volatile uint32»nwait; | |
114 » volatile uint32»ndone; | |
115 » volatile uint32 markdone; | |
116 » //volatile uint32 sweepdone; | |
117 » Note» alldone; | |
118 » uint32» tidseq; | |
119 | |
120 » Lock; | |
121 » byte» *chunk; | |
122 » uintptr»nchunk; | |
123 »······· | |
124 » struct | |
125 » { | |
126 » » byte *p; | |
127 » » uintptr n; | |
128 » } roots[64*1024]; | |
129 » uintptr nroots; | |
130 » /* | |
131 » union | |
132 » { | |
133 » » GcThread; | |
134 » » byte pad[CacheLineSize]; | |
135 » } thr[MaxGcproc]; | |
136 » */ | |
137 } work; | |
138 | |
139 typedef struct Parfor Parfor; | 117 typedef struct Parfor Parfor; |
140 struct Parfor | 118 struct Parfor |
141 { | 119 { |
| 120 // executed for each element |
| 121 void (*body)(Parfor*, uint32); |
| 122 // number of idle threads |
142 uint32 done; | 123 uint32 done; |
| 124 // total number of threads |
| 125 uint32 nthr; |
| 126 // thread id sequencer |
| 127 uint32 thrseq; |
| 128 // iteration space [0, cnt) |
| 129 uint32 cnt; |
| 130 // arbitrary user context |
| 131 void *ctx; |
| 132 // if true, wait while all threads finish processing, |
| 133 // otherwise parfor may return while other threads still working |
| 134 bool wait; |
143 byte pad[CacheLineSize]; | 135 byte pad[CacheLineSize]; |
144 struct | 136 struct |
145 { | 137 { |
| 138 // the thread's iteration space [32lsb, 32msb) |
146 uint64 pos; | 139 uint64 pos; |
147 byte pad[CacheLineSize-sizeof(uint64)]; | 140 byte pad[CacheLineSize-sizeof(uint64)]; |
148 } thr[MaxGcproc]; | 141 } thr[MaxGcproc]; |
149 }; | 142 }; |
150 | 143 |
151 Parfor sweepfor; | 144 typedef struct GcRoot GcRoot; |
152 Parfor markfor; | 145 struct GcRoot |
| 146 { |
| 147 » byte *p; |
| 148 » uintptr n; |
| 149 }; |
| 150 |
| 151 static struct |
| 152 { |
| 153 » uint64» full; // Workbuf* + ABA counter in 17msb |
| 154 » uint64» empty; // Workbuf* + ABA counter in 17msb |
| 155 » uint32» nproc; |
| 156 » byte pad0[CacheLineSize]; |
| 157 » volatile uint32»nwait; |
| 158 » byte pad1[CacheLineSize]; |
| 159 » volatile uint32 ndone; |
| 160 » volatile uint32 debugmarkdone; |
| 161 » Note» alldone; |
| 162 » Parfor» sweepfor; |
| 163 » Parfor» markfor; |
| 164 |
| 165 » Lock; |
| 166 » byte» *chunk; |
| 167 » uintptr»nchunk; |
| 168 |
| 169 » GcRoot» *roots; |
| 170 » uint32» nroot; |
| 171 » uint32» rootcap; |
| 172 } work; |
| 173 |
| 174 // lock-free stack |
| 175 // used to manage empty and free Workbuf's during mark phase |
| 176 static void» lifopush(uint64 *a, Workbuf *b); |
| 177 static Workbuf*»lifopop(uint64 *a); |
| 178 |
| 179 // parallel for over [0, n). |
| 180 // body() is executed for each iteration. |
| 181 // nthr - total number of worker threads. |
| 182 // ctx - arbitrary user context. |
| 183 // if wait=true, threads return from parfor() when all work is done; |
| 184 // otherwise, threads can return while other threads still finishing processing.
· |
| 185 static void» parforsetup(Parfor *desc, void (*body)(Parfor*, uint32), |
| 186 » » » uint32 nthr, uint32 n, void *ctx, bool wait); |
| 187 static void» parfor(Parfor *desc); |
153 | 188 |
154 // scanblock scans a block of n bytes starting at pointer b for references | 189 // scanblock scans a block of n bytes starting at pointer b for references |
155 // to other objects, scanning any it finds recursively until there are no | 190 // to other objects, scanning any it finds recursively until there are no |
156 // unscanned objects left. Instead of using an explicit recursion, it keeps | 191 // unscanned objects left. Instead of using an explicit recursion, it keeps |
157 // a work list in the Workbuf* structures and loops in the main function | 192 // a work list in the Workbuf* structures and loops in the main function |
158 // body. Keeping an explicit work list is easier on the stack allocator and | 193 // body. Keeping an explicit work list is easier on the stack allocator and |
159 // more efficient. | 194 // more efficient. |
160 static void | 195 static void |
161 scanblock(byte *b, int64 n) | 196 scanblock(byte *b, int64 n) |
162 { | 197 { |
(...skipping 14 matching lines...) Expand all Loading... |
177 // Memory arena parameters. | 212 // Memory arena parameters. |
178 arena_start = runtime·mheap.arena_start; | 213 arena_start = runtime·mheap.arena_start; |
179 arena_used = runtime·mheap.arena_used; | 214 arena_used = runtime·mheap.arena_used; |
180 nproc = work.nproc; | 215 nproc = work.nproc; |
181 | 216 |
182 wbuf = nil; // current work buffer | 217 wbuf = nil; // current work buffer |
183 wp = nil; // storage for next queued pointer (write pointer) | 218 wp = nil; // storage for next queued pointer (write pointer) |
184 nobj = 0; // number of queued objects | 219 nobj = 0; // number of queued objects |
185 | 220 |
186 // Scanblock helpers pass b==nil. | 221 // Scanblock helpers pass b==nil. |
187 » // The main proc needs to return to make more | 222 » // Procs need to return to make more |
188 // calls to scanblock. But if work.nproc==1 then | 223 // calls to scanblock. But if work.nproc==1 then |
189 // might as well process blocks as soon as we | 224 // might as well process blocks as soon as we |
190 // have them. | 225 // have them. |
191 keepworking = b == nil || work.nproc == 1; | 226 keepworking = b == nil || work.nproc == 1; |
192 | 227 |
193 // Align b to a word boundary. | 228 // Align b to a word boundary. |
194 off = (uintptr)b & (PtrSize-1); | 229 off = (uintptr)b & (PtrSize-1); |
195 if(off != 0) { | 230 if(off != 0) { |
196 b += PtrSize - off; | 231 b += PtrSize - off; |
197 n -= PtrSize - off; | 232 n -= PtrSize - off; |
198 } | 233 } |
199 | 234 |
200 for(;;) { | 235 for(;;) { |
201 m->gcstats.nscanblocks++; | |
202 ················ | |
203 // Each iteration scans the block b of length n, queueing pointe
rs in | 236 // Each iteration scans the block b of length n, queueing pointe
rs in |
204 // the work buffer. | 237 // the work buffer. |
205 if(Debug > 1) | 238 if(Debug > 1) |
206 runtime·printf("scanblock %p %D\n", b, n); | 239 runtime·printf("scanblock %p %D\n", b, n); |
207 | 240 |
208 vp = (void**)b; | 241 vp = (void**)b; |
209 n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */ | 242 n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */ |
210 for(i=0; i<n; i++) { | 243 for(i=0; i<n; i++) { |
211 m->gcstats.nscanwords++; | |
212 obj = (byte*)vp[i]; | 244 obj = (byte*)vp[i]; |
213 » » » | 245 |
214 // Words outside the arena cannot be pointers. | 246 // Words outside the arena cannot be pointers. |
215 if((byte*)obj < arena_start || (byte*)obj >= arena_used) | 247 if((byte*)obj < arena_start || (byte*)obj >= arena_used) |
216 continue; | 248 continue; |
217 | 249 |
218 m->gcstats.nscanptr++; | |
219 // obj may be a pointer to a live object. | 250 // obj may be a pointer to a live object. |
220 // Try to find the beginning of the object. | 251 // Try to find the beginning of the object. |
221 | 252 |
222 // Round down to word boundary. | 253 // Round down to word boundary. |
223 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); | 254 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); |
224 | 255 |
225 // Find bits for this word. | 256 // Find bits for this word. |
226 off = (uintptr*)obj - (uintptr*)arena_start; | 257 off = (uintptr*)obj - (uintptr*)arena_start; |
227 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord -
1; | 258 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord -
1; |
228 shift = off % wordsPerBitmapWord; | 259 shift = off % wordsPerBitmapWord; |
(...skipping 10 matching lines...) Expand all Loading... |
239 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)
) != 0) { | 270 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)
) != 0) { |
240 obj = (byte*)obj - (shift-j)*PtrSize; | 271 obj = (byte*)obj - (shift-j)*PtrSize; |
241 shift = j; | 272 shift = j; |
242 bits = xbits>>shift; | 273 bits = xbits>>shift; |
243 goto found; | 274 goto found; |
244 } | 275 } |
245 } | 276 } |
246 | 277 |
247 // Otherwise consult span table to find beginning. | 278 // Otherwise consult span table to find beginning. |
248 // (Manually inlined copy of MHeap_LookupMaybe.) | 279 // (Manually inlined copy of MHeap_LookupMaybe.) |
249 m->gcstats.naddrlookup++; | |
250 k = (uintptr)obj>>PageShift; | 280 k = (uintptr)obj>>PageShift; |
251 x = k; | 281 x = k; |
252 if(sizeof(void*) == 8) | 282 if(sizeof(void*) == 8) |
253 x -= (uintptr)arena_start>>PageShift; | 283 x -= (uintptr)arena_start>>PageShift; |
254 s = runtime·mheap.map[x]; | 284 s = runtime·mheap.map[x]; |
255 if(s == nil || k < s->start || k - s->start >= s->npages
|| s->state != MSpanInUse) | 285 if(s == nil || k < s->start || k - s->start >= s->npages
|| s->state != MSpanInUse) |
256 continue; | 286 continue; |
257 p = (byte*)((uintptr)s->start<<PageShift); | 287 p = (byte*)((uintptr)s->start<<PageShift); |
258 if(s->sizeclass == 0) { | 288 if(s->sizeclass == 0) { |
259 obj = p; | 289 obj = p; |
260 } else { | 290 } else { |
261 if((byte*)obj >= (byte*)s->limit) | 291 if((byte*)obj >= (byte*)s->limit) |
262 continue; | 292 continue; |
263 size = runtime·class_to_size[s->sizeclass]; | 293 size = runtime·class_to_size[s->sizeclass]; |
264 int32 i = ((byte*)obj - p)/size; | 294 int32 i = ((byte*)obj - p)/size; |
265 obj = p+i*size; | 295 obj = p+i*size; |
266 } | 296 } |
267 | 297 |
268 // Now that we know the object header, reload bits. | 298 // Now that we know the object header, reload bits. |
269 off = (uintptr*)obj - (uintptr*)arena_start; | 299 off = (uintptr*)obj - (uintptr*)arena_start; |
270 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord -
1; | 300 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord -
1; |
271 shift = off % wordsPerBitmapWord; | 301 shift = off % wordsPerBitmapWord; |
272 xbits = *bitp; | 302 xbits = *bitp; |
273 bits = xbits >> shift; | 303 bits = xbits >> shift; |
274 | 304 |
275 found: | 305 found: |
| 306 // If another proc wants a pointer, give it some. |
| 307 if(work.nwait > 0 && nobj > 4 && work.full == 0) { |
| 308 wbuf->nobj = nobj; |
| 309 wbuf = handoff(wbuf); |
| 310 nobj = wbuf->nobj; |
| 311 wp = wbuf->obj + nobj; |
| 312 } |
| 313 |
276 // Now we have bits, bitp, and shift correct for | 314 // Now we have bits, bitp, and shift correct for |
277 // obj pointing at the base of the object. | 315 // obj pointing at the base of the object. |
278 // Only care about allocated and not marked. | 316 // Only care about allocated and not marked. |
279 if((bits & (bitAllocated|bitMarked)) != bitAllocated) | 317 if((bits & (bitAllocated|bitMarked)) != bitAllocated) |
280 continue; | 318 continue; |
281 if(nproc == 1) | 319 if(nproc == 1) |
282 *bitp |= bitMarked<<shift; | 320 *bitp |= bitMarked<<shift; |
283 else { | 321 else { |
284 for(;;) { | 322 for(;;) { |
285 x = *bitp; | 323 x = *bitp; |
286 if(x & (bitMarked<<shift)) | 324 if(x & (bitMarked<<shift)) |
287 goto continue_obj; | 325 goto continue_obj; |
288 if(runtime·casp((void**)bitp, (void*)x,
(void*)(x|(bitMarked<<shift)))) | 326 if(runtime·casp((void**)bitp, (void*)x,
(void*)(x|(bitMarked<<shift)))) |
289 break; | 327 break; |
290 } | 328 } |
291 } | 329 } |
292 | 330 |
293 // If object has no pointers, don't need to scan further
. | 331 // If object has no pointers, don't need to scan further
. |
294 if((bits & bitNoPointers) != 0) | 332 if((bits & bitNoPointers) != 0) |
295 continue; | 333 continue; |
296 | 334 |
297 » » » // If another proc wants a pointer, give it some. | 335 » » » PREFETCH(obj); |
298 » » » if(nobj > 4 && work.nwait > 0 && work.full == nil) { | |
299 » » » » wbuf->nobj = nobj; | |
300 » » » » wbuf = handoff(wbuf); | |
301 » » » » nobj = wbuf->nobj; | |
302 » » » » wp = wbuf->obj + nobj; | |
303 » » » } | |
304 | 336 |
305 // If buffer is full, get a new one. | 337 // If buffer is full, get a new one. |
306 if(wbuf == nil || nobj >= nelem(wbuf->obj)) { | 338 if(wbuf == nil || nobj >= nelem(wbuf->obj)) { |
307 if(wbuf != nil) | 339 if(wbuf != nil) |
308 wbuf->nobj = nobj; | 340 wbuf->nobj = nobj; |
309 wbuf = getempty(wbuf); | 341 wbuf = getempty(wbuf); |
310 wp = wbuf->obj; | 342 wp = wbuf->obj; |
311 nobj = 0; | 343 nobj = 0; |
312 } | 344 } |
313 *wp++ = obj; | 345 *wp++ = obj; |
314 nobj++; | 346 nobj++; |
315 continue_obj:; | 347 continue_obj:; |
316 } | 348 } |
317 | 349 |
318 // Done scanning [b, b+n). Prepare for the next iteration of | 350 // Done scanning [b, b+n). Prepare for the next iteration of |
319 // the loop by setting b and n to the parameters for the next bl
ock. | 351 // the loop by setting b and n to the parameters for the next bl
ock. |
320 | 352 |
321 // Fetch b from the work buffer. | 353 // Fetch b from the work buffer. |
322 if(nobj == 0) { | 354 if(nobj == 0) { |
323 //runtime·printf("nobj==0\n"); | |
324 if(!keepworking) { | 355 if(!keepworking) { |
325 » » » » putempty(wbuf); | 356 » » » » if(wbuf != nil) |
| 357 » » » » » putempty(wbuf); |
326 return; | 358 return; |
327 } | 359 } |
328 // Emptied our buffer: refill. | 360 // Emptied our buffer: refill. |
329 wbuf = getfull(wbuf); | 361 wbuf = getfull(wbuf); |
330 if(wbuf == nil) | 362 if(wbuf == nil) |
331 return; | 363 return; |
332 nobj = wbuf->nobj; | 364 nobj = wbuf->nobj; |
333 wp = wbuf->obj + wbuf->nobj; | 365 wp = wbuf->obj + wbuf->nobj; |
334 //runtime·printf("getfull(%p) %d\n", wbuf, (uint32)wbuf->nobj); | |
335 } | 366 } |
336 b = *--wp; | 367 b = *--wp; |
337 nobj--; | 368 nobj--; |
338 | 369 |
339 » » // Figure out n = size of b. Start by loading bits for b. | 370 » » // Ask span about size class. |
340 » » off = (uintptr*)b - (uintptr*)arena_start; | |
341 » » bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; | |
342 » » shift = off % wordsPerBitmapWord; | |
343 » » xbits = *bitp; | |
344 » » bits = xbits >> shift; | |
345 | |
346 » » // Might be small; look for nearby block boundary. | |
347 » » // A block boundary is marked by either bitBlockBoundary | |
348 » » // or bitAllocated being set (see notes near their definition). | |
349 » » enum { | |
350 » » » boundary = bitBlockBoundary|bitAllocated | |
351 » » }; | |
352 » » // Look for a block boundary both after and before b | |
353 » » // in the same bitmap word. | |
354 » » // | |
355 » » // A block boundary j words after b is indicated by | |
356 » » //» bits>>j & boundary | |
357 » » // assuming shift+j < bitShift. (If shift+j >= bitShift then | |
358 » » // we'll be bleeding other bit types like bitMarked into our tes
t.) | |
359 » » // Instead of inserting the conditional shift+j < bitShift into
the loop, | |
360 » » // we can let j range from 1 to bitShift as long as we first | |
361 » » // apply a mask to keep only the bits corresponding | |
362 » » // to shift+j < bitShift aka j < bitShift-shift. | |
363 » » bits &= (boundary<<(bitShift-shift)) - boundary; | |
364 | |
365 » » // A block boundary j words before b is indicated by | |
366 » » //» xbits>>(shift-j) & boundary | |
367 » » // (assuming shift >= j). There is no cleverness here | |
368 » » // avoid the test, because when j gets too large the shift | |
369 » » // turns negative, which is undefined in C. | |
370 | |
371 » » for(j=1; j<bitShift; j++) { | |
372 » » » if(((bits>>j)&boundary) != 0 || shift>=j && ((xbits>>(sh
ift-j))&boundary) != 0) { | |
373 » » » » n = j*PtrSize; | |
374 » » » » goto scan; | |
375 » » » } | |
376 » » } | |
377 | |
378 » » // Fall back to asking span about size class. | |
379 // (Manually inlined copy of MHeap_Lookup.) | 371 // (Manually inlined copy of MHeap_Lookup.) |
380 m->gcstats.nsizelookup++; | |
381 x = (uintptr)b>>PageShift; | 372 x = (uintptr)b>>PageShift; |
382 if(sizeof(void*) == 8) | 373 if(sizeof(void*) == 8) |
383 x -= (uintptr)arena_start>>PageShift; | 374 x -= (uintptr)arena_start>>PageShift; |
384 s = runtime·mheap.map[x]; | 375 s = runtime·mheap.map[x]; |
385 if(s->sizeclass == 0) | 376 if(s->sizeclass == 0) |
386 n = s->npages<<PageShift; | 377 n = s->npages<<PageShift; |
387 else | 378 else |
388 n = runtime·class_to_size[s->sizeclass]; | 379 n = runtime·class_to_size[s->sizeclass]; |
389 scan:; | |
390 } | 380 } |
391 } | 381 } |
392 | 382 |
393 // debug_scanblock is the debug copy of scanblock. | 383 // debug_scanblock is the debug copy of scanblock. |
394 // it is simpler, slower, single-threaded, recursive, | 384 // it is simpler, slower, single-threaded, recursive, |
395 // and uses bitSpecial as the mark bit. | 385 // and uses bitSpecial as the mark bit. |
396 static void | 386 static void |
397 debug_scanblock(byte *b, int64 n) | 387 debug_scanblock(byte *b, int64 n) |
398 { | 388 { |
399 byte *obj, *p; | 389 byte *obj, *p; |
(...skipping 25 matching lines...) Expand all Loading... |
425 if((byte*)obj < runtime·mheap.arena_start || (byte*)obj >= runti
me·mheap.arena_used) | 415 if((byte*)obj < runtime·mheap.arena_start || (byte*)obj >= runti
me·mheap.arena_used) |
426 continue; | 416 continue; |
427 | 417 |
428 // Round down to word boundary. | 418 // Round down to word boundary. |
429 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); | 419 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); |
430 | 420 |
431 // Consult span table to find beginning. | 421 // Consult span table to find beginning. |
432 s = runtime·MHeap_LookupMaybe(&runtime·mheap, obj); | 422 s = runtime·MHeap_LookupMaybe(&runtime·mheap, obj); |
433 if(s == nil) | 423 if(s == nil) |
434 continue; | 424 continue; |
435 | |
436 | 425 |
437 p = (byte*)((uintptr)s->start<<PageShift); | 426 p = (byte*)((uintptr)s->start<<PageShift); |
438 if(s->sizeclass == 0) { | 427 if(s->sizeclass == 0) { |
439 obj = p; | 428 obj = p; |
440 size = (uintptr)s->npages<<PageShift; | 429 size = (uintptr)s->npages<<PageShift; |
441 } else { | 430 } else { |
442 if((byte*)obj >= (byte*)s->limit) | 431 if((byte*)obj >= (byte*)s->limit) |
443 continue; | 432 continue; |
444 size = runtime·class_to_size[s->sizeclass]; | 433 size = runtime·class_to_size[s->sizeclass]; |
445 int32 i = ((byte*)obj - p)/size; | 434 int32 i = ((byte*)obj - p)/size; |
(...skipping 17 matching lines...) Expand all Loading... |
463 runtime·printf("found unmarked block %p in %p\n", obj, v
p+i); | 452 runtime·printf("found unmarked block %p in %p\n", obj, v
p+i); |
464 | 453 |
465 // If object has no pointers, don't need to scan further. | 454 // If object has no pointers, don't need to scan further. |
466 if((bits & bitNoPointers) != 0) | 455 if((bits & bitNoPointers) != 0) |
467 continue; | 456 continue; |
468 | 457 |
469 debug_scanblock(obj, size); | 458 debug_scanblock(obj, size); |
470 } | 459 } |
471 } | 460 } |
472 | 461 |
| 462 static void |
| 463 markroot(Parfor *desc, uint32 i) |
| 464 { |
| 465 USED(desc); |
| 466 scanblock(work.roots[i].p, work.roots[i].n); |
| 467 } |
| 468 |
473 // Get an empty work buffer off the work.empty list, | 469 // Get an empty work buffer off the work.empty list, |
474 // allocating new buffers as needed. | 470 // allocating new buffers as needed. |
475 static Workbuf* | 471 static Workbuf* |
476 getempty(Workbuf *b) | 472 getempty(Workbuf *b) |
477 { | 473 { |
478 » if(work.nproc == 1) { | 474 » if(b != nil) |
479 » » // Put b on full list. | 475 » » lifopush(&work.full, b); |
480 » » if(b != nil) { | 476 » b = lifopop(&work.empty); |
481 » » » b->next = work.full; | 477 » if(b == nil) { |
482 » » » work.full = b; | 478 » » // Need to allocate. |
483 » » } | 479 » » runtime·lock(&work); |
484 » » // Grab from empty list if possible. | 480 » » if(work.nchunk < sizeof *b) { |
485 » » b = work.empty; | 481 » » » work.nchunk = 1<<20; |
486 » » if(b != nil) { | 482 » » » work.chunk = runtime·SysAlloc(work.nchunk); |
487 » » » work.empty = b->next; | 483 » » } |
488 » » » goto haveb; | 484 » » b = (Workbuf*)work.chunk; |
489 » » } | 485 » » work.chunk += sizeof *b; |
490 » } else { | 486 » » work.nchunk -= sizeof *b; |
491 » » // Put b on full list. | 487 » » runtime·unlock(&work); |
492 » » if(b != nil) { | 488 » } |
493 » » » runtime·lock(&work.fmu); | |
494 » » » b->next = work.full; | |
495 » » » work.full = b; | |
496 » » » runtime·unlock(&work.fmu); | |
497 » » } | |
498 » » // Grab from empty list if possible. | |
499 » » runtime·lock(&work.emu); | |
500 » » b = work.empty; | |
501 » » if(b != nil) | |
502 » » » work.empty = b->next; | |
503 » » runtime·unlock(&work.emu); | |
504 » » if(b != nil) | |
505 » » » goto haveb; | |
506 » } | |
507 | |
508 » // Need to allocate. | |
509 » runtime·lock(&work); | |
510 » if(work.nchunk < sizeof *b) { | |
511 » » work.nchunk = 1<<20; | |
512 » » work.chunk = runtime·SysAlloc(work.nchunk); | |
513 » } | |
514 » b = (Workbuf*)work.chunk; | |
515 » work.chunk += sizeof *b; | |
516 » work.nchunk -= sizeof *b; | |
517 » runtime·unlock(&work); | |
518 | |
519 haveb: | |
520 b->nobj = 0; | 489 b->nobj = 0; |
521 return b; | 490 return b; |
522 } | 491 } |
523 | 492 |
524 static void | 493 static void |
525 putempty(Workbuf *b) | 494 putempty(Workbuf *b) |
526 { | 495 { |
527 » if(b == nil) | 496 » lifopush(&work.empty, b); |
528 » » return; | |
529 | |
530 » if(work.nproc == 1) { | |
531 » » b->next = work.empty; | |
532 » » work.empty = b; | |
533 » » return; | |
534 » } | |
535 | |
536 » runtime·lock(&work.emu); | |
537 » b->next = work.empty; | |
538 » work.empty = b; | |
539 » runtime·unlock(&work.emu); | |
540 } | 497 } |
541 | 498 |
542 // Get a full work buffer off the work.full list, or return nil. | 499 // Get a full work buffer off the work.full list, or return nil. |
543 static Workbuf* | 500 static Workbuf* |
544 getfull(Workbuf *b) | 501 getfull(Workbuf *b) |
545 { | 502 { |
546 int32 i; | 503 int32 i; |
547 » Workbuf *b1; | 504 |
548 | 505 » if(b != nil) |
549 //runtime·printf("getfull nproc=%d b=%p(%d)\n", work.nproc, b, (uint32)(b?b->nob
j:0)); | 506 » » lifopush(&work.empty, b); |
550 » if(work.nproc == 1) { | 507 » b = lifopop(&work.full); |
551 » » // Put b on empty list. | 508 » if(b != nil || work.nproc == 1) |
552 » » if(b != nil) { | |
553 » » » b->next = work.empty; | |
554 » » » work.empty = b; | |
555 » » } | |
556 » » // Grab from full list if possible. | |
557 » » // Since work.nproc==1, no one else is | |
558 » » // going to give us work. | |
559 » » b = work.full; | |
560 » » if(b != nil) | |
561 » » » work.full = b->next; | |
562 return b; | 509 return b; |
563 } | |
564 | |
565 putempty(b); | |
566 | |
567 // Grab buffer from full list if possible. | |
568 for(;;) { | |
569 b1 = work.full; | |
570 if(b1 == nil) | |
571 break; | |
572 runtime·lock(&work.fmu); | |
573 if(work.full != nil) { | |
574 b1 = work.full; | |
575 work.full = b1->next; | |
576 runtime·unlock(&work.fmu); | |
577 return b1; | |
578 } | |
579 runtime·unlock(&work.fmu); | |
580 } | |
581 | 510 |
582 runtime·xadd(&work.nwait, +1); | 511 runtime·xadd(&work.nwait, +1); |
583 for(i=0;; i++) { | 512 for(i=0;; i++) { |
584 » » b1 = work.full; | 513 » » if(work.full != 0) { |
585 » » if(b1 != nil) { | 514 » » » runtime·xadd(&work.nwait, -1); |
586 » » » runtime·lock(&work.fmu); | 515 » » » b = lifopop(&work.full); |
587 » » » if(work.full != nil) { | 516 » » » if(b != nil) |
588 » » » » runtime·xadd(&work.nwait, -1); | 517 » » » » return b; |
589 » » » » b1 = work.full; | 518 » » » runtime·xadd(&work.nwait, +1); |
590 » » » » work.full = b1->next; | |
591 » » » » runtime·unlock(&work.fmu); | |
592 » » » » return b1; | |
593 » » » } | |
594 » » » runtime·unlock(&work.fmu); | |
595 » » » continue; | |
596 } | 519 } |
597 if(work.nwait == work.nproc) | 520 if(work.nwait == work.nproc) |
598 return nil; | 521 return nil; |
599 » » if(i < 10) | 522 » » if(i < 10) { |
| 523 » » » m->gcstats.nprocyield++; |
600 runtime·procyield(20); | 524 runtime·procyield(20); |
601 » » else if(i < 20) | 525 » » } else if(i < 20) { |
| 526 » » » m->gcstats.nosyield++; |
602 runtime·osyield(); | 527 runtime·osyield(); |
603 » » else | 528 » » } else { |
| 529 » » » m->gcstats.nsleep++; |
604 runtime·usleep(100); | 530 runtime·usleep(100); |
| 531 } |
605 } | 532 } |
606 } | 533 } |
607 | 534 |
608 static Workbuf* | 535 static Workbuf* |
609 handoff(Workbuf *b) | 536 handoff(Workbuf *b) |
610 { | 537 { |
611 int32 n; | 538 int32 n; |
612 Workbuf *b1; | 539 Workbuf *b1; |
613 | 540 |
614 // Make new buffer with half of b's pointers. | 541 // Make new buffer with half of b's pointers. |
615 b1 = getempty(nil); | 542 b1 = getempty(nil); |
616 n = b->nobj/2; | 543 n = b->nobj/2; |
617 b->nobj -= n; | 544 b->nobj -= n; |
618 b1->nobj = n; | 545 b1->nobj = n; |
619 runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]); | 546 runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]); |
620 m->gcstats.nhandoff += 1; | 547 m->gcstats.nhandoff += 1; |
621 m->gcstats.nhandoffcnt += n; | 548 m->gcstats.nhandoffcnt += n; |
622 | 549 |
623 // Put b on full list - let first half of b get stolen. | 550 // Put b on full list - let first half of b get stolen. |
624 » runtime·lock(&work.fmu); | 551 » lifopush(&work.full, b); |
625 » b->next = work.full; | |
626 » work.full = b; | |
627 » runtime·unlock(&work.fmu); | |
628 | |
629 return b1; | 552 return b1; |
630 } | 553 } |
631 | 554 |
632 // Scanstack calls scanblock on each of gp's stack segments. | |
633 static void | 555 static void |
634 scanstack(void (*scanblock)(byte*, int64), G *gp) | 556 addroot(byte *p, uintptr n) |
| 557 { |
| 558 » uint32 cap; |
| 559 » GcRoot *new; |
| 560 |
| 561 » if(work.nroot >= work.rootcap) { |
| 562 » » cap = PageSize/sizeof(GcRoot); |
| 563 » » if(cap < 2*work.rootcap) |
| 564 » » » cap = 2*work.rootcap; |
| 565 » » new = (GcRoot*)runtime·SysAlloc(cap*sizeof(GcRoot)); |
| 566 » » if(work.roots != nil) { |
| 567 » » » runtime·memmove(new, work.roots, work.rootcap*sizeof(GcR
oot)); |
| 568 » » » runtime·SysFree(work.roots, work.rootcap*sizeof(GcRoot))
; |
| 569 » » } |
| 570 » » work.roots = new; |
| 571 » » work.rootcap = cap; |
| 572 » } |
| 573 » work.roots[work.nroot].p = p; |
| 574 » work.roots[work.nroot].n = n; |
| 575 » work.nroot++; |
| 576 } |
| 577 |
| 578 static void |
| 579 addstackroots(G *gp) |
635 { | 580 { |
636 M *mp; | 581 M *mp; |
637 int32 n; | 582 int32 n; |
638 Stktop *stk; | 583 Stktop *stk; |
639 byte *sp, *guard; | 584 byte *sp, *guard; |
640 | 585 |
641 stk = (Stktop*)gp->stackbase; | 586 stk = (Stktop*)gp->stackbase; |
642 guard = gp->stackguard; | 587 guard = gp->stackguard; |
643 | 588 |
644 if(gp == g) { | 589 if(gp == g) { |
(...skipping 12 matching lines...) Expand all Loading... |
657 // as schedlock and may have needed to start a new stack segment
. | 602 // as schedlock and may have needed to start a new stack segment
. |
658 // Use the stack segment and stack pointer at the time of | 603 // Use the stack segment and stack pointer at the time of |
659 // the system call instead, since that won't change underfoot. | 604 // the system call instead, since that won't change underfoot. |
660 if(gp->gcstack != nil) { | 605 if(gp->gcstack != nil) { |
661 stk = (Stktop*)gp->gcstack; | 606 stk = (Stktop*)gp->gcstack; |
662 sp = gp->gcsp; | 607 sp = gp->gcsp; |
663 guard = gp->gcguard; | 608 guard = gp->gcguard; |
664 } | 609 } |
665 } | 610 } |
666 | 611 |
667 if(Debug > 1) | |
668 runtime·printf("scanstack %d %p\n", gp->goid, sp); | |
669 n = 0; | 612 n = 0; |
670 while(stk) { | 613 while(stk) { |
671 if(sp < guard-StackGuard || (byte*)stk < sp) { | 614 if(sp < guard-StackGuard || (byte*)stk < sp) { |
672 runtime·printf("scanstack inconsistent: g%d#%d sp=%p not
in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk); | 615 runtime·printf("scanstack inconsistent: g%d#%d sp=%p not
in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk); |
673 runtime·throw("scanstack"); | 616 runtime·throw("scanstack"); |
674 } | 617 } |
675 scanblock(sp, (byte*)stk - sp); | |
676 sp = stk->gobuf.sp; | |
677 guard = stk->stackguard; | |
678 stk = (Stktop*)stk->stackbase; | |
679 n++; | |
680 } | |
681 } | |
682 | |
683 // Markfin calls scanblock on the blocks that have finalizers: | |
684 // the things pointed at cannot be freed until the finalizers have run. | |
685 static void | |
686 markfin(void *v) | |
687 { | |
688 uintptr size; | |
689 | |
690 size = 0; | |
691 if(!runtime·mlookup(v, &v, &size, nil) || !runtime·blockspecial(v)) | |
692 runtime·throw("mark - finalizer inconsistency"); | |
693 | |
694 // do not mark the finalizer block itself. just mark the things it poin
ts at. | |
695 scanblock(v, size); | |
696 } | |
697 | |
698 static void | |
699 debug_markfin(void *v) | |
700 { | |
701 uintptr size; | |
702 | |
703 if(!runtime·mlookup(v, &v, &size, nil)) | |
704 runtime·throw("debug_mark - finalizer inconsistency"); | |
705 debug_scanblock(v, size); | |
706 } | |
707 | |
708 // Mark | |
709 static void | |
710 mark(void (*scan)(byte*, int64)) | |
711 { | |
712 G *gp; | |
713 FinBlock *fb; | |
714 | |
715 // mark data+bss. | |
716 // skip runtime·mheap itself, which has no interesting pointers | |
717 // and is mostly zeroed and would not otherwise be paged in. | |
718 //runtime·printf("mark globals\n"); | |
719 scan(data, (byte*)&runtime·mheap - data); | |
720 scan((byte*)(&runtime·mheap+1), end - (byte*)(&runtime·mheap+1)); | |
721 | |
722 // mark stacks | |
723 //runtime·printf("mark stacks\n"); | |
724 for(gp=runtime·allg; gp!=nil; gp=gp->alllink) { | |
725 switch(gp->status){ | |
726 default: | |
727 runtime·printf("unexpected G.status %d\n", gp->status); | |
728 runtime·throw("mark - bad status"); | |
729 case Gdead: | |
730 break; | |
731 case Grunning: | |
732 if(gp != g) | |
733 runtime·throw("mark - world not stopped"); | |
734 scanstack(scan, gp); | |
735 break; | |
736 case Grunnable: | |
737 case Gsyscall: | |
738 case Gwaiting: | |
739 scanstack(scan, gp); | |
740 break; | |
741 } | |
742 } | |
743 | |
744 // mark things pointed at by objects with finalizers | |
745 //runtime·printf("mark fin\n"); | |
746 if(scan == debug_scanblock) | |
747 runtime·walkfintab(debug_markfin); | |
748 else | |
749 runtime·walkfintab(markfin); | |
750 | |
751 //runtime·printf("mark fin blocks\n"); | |
752 for(fb=allfin; fb; fb=fb->alllink) | |
753 scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0])); | |
754 | |
755 // in multiproc mode, join in the queued work. | |
756 //runtime·printf("mark help\n"); | |
757 scan(nil, 0); | |
758 } | |
759 | |
760 static void | |
761 addroot(byte *p, uintptr n) | |
762 { | |
763 if(work.nroots == nelem(work.roots)) | |
764 runtime·throw("gc: root overflow"); | |
765 work.roots[work.nroots].p = p; | |
766 work.roots[work.nroots].n = n; | |
767 work.nroots++; | |
768 } | |
769 | |
770 static void | |
771 collectstackroots(G *gp) | |
772 { | |
773 M *mp; | |
774 int32 n; | |
775 Stktop *stk; | |
776 byte *sp, *guard; | |
777 ········ | |
778 stk = (Stktop*)gp->stackbase; | |
779 guard = gp->stackguard; | |
780 ········ | |
781 if(gp == g) { | |
782 // Scanning our own stack: start at &gp. | |
783 sp = (byte*)&gp; | |
784 } else if((mp = gp->m) != nil && mp->helpgc) { | |
785 // gchelper's stack is in active use and has no interesting poin
ters. | |
786 return; | |
787 } else { | |
788 // Scanning another goroutine's stack. | |
789 // The goroutine is usually asleep (the world is stopped). | |
790 sp = gp->sched.sp; | |
791 ················ | |
792 // The exception is that if the goroutine is about to enter or m
ight | |
793 // have just exited a system call, it may be executing code such | |
794 // as schedlock and may have needed to start a new stack segment
. | |
795 // Use the stack segment and stack pointer at the time of | |
796 // the system call instead, since that won't change underfoot. | |
797 if(gp->gcstack != nil) { | |
798 stk = (Stktop*)gp->gcstack; | |
799 sp = gp->gcsp; | |
800 guard = gp->gcguard; | |
801 } | |
802 } | |
803 ········ | |
804 n = 0; | |
805 while(stk) { | |
806 if(sp < guard-StackGuard || (byte*)stk < sp) { | |
807 runtime·printf("scanstack inconsistent: g%d#%d sp=%p not
in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk); | |
808 runtime·throw("scanstack"); | |
809 } | |
810 addroot(sp, (byte*)stk - sp); | 618 addroot(sp, (byte*)stk - sp); |
811 sp = stk->gobuf.sp; | 619 sp = stk->gobuf.sp; |
812 guard = stk->stackguard; | 620 guard = stk->stackguard; |
813 stk = (Stktop*)stk->stackbase; | 621 stk = (Stktop*)stk->stackbase; |
814 n++; | 622 n++; |
815 } | 623 } |
816 } | 624 } |
817 | 625 |
818 static void | 626 static void |
819 collectfinroots(void *v) | 627 addfinroots(void *v) |
820 { | 628 { |
821 uintptr size; | 629 uintptr size; |
822 » | 630 |
823 size = 0; | 631 size = 0; |
824 if(!runtime·mlookup(v, &v, &size, nil) || !runtime·blockspecial(v)) | 632 if(!runtime·mlookup(v, &v, &size, nil) || !runtime·blockspecial(v)) |
825 runtime·throw("mark - finalizer inconsistency"); | 633 runtime·throw("mark - finalizer inconsistency"); |
826 » | 634 |
827 // do not mark the finalizer block itself. just mark the things it poin
ts at. | 635 // do not mark the finalizer block itself. just mark the things it poin
ts at. |
828 addroot(v, size); | 636 addroot(v, size); |
829 } | 637 } |
830 | 638 |
831 static void | 639 static void |
832 collectroots(void) | 640 addroots(void) |
833 { | 641 { |
834 G *gp; | 642 G *gp; |
835 FinBlock *fb; | 643 FinBlock *fb; |
836 » byte *p, *e; | 644 » byte *p; |
837 | 645 |
838 » work.nroots = 0; | 646 » work.nroot = 0; |
839 »······· | 647 |
840 » e = (byte*)&runtime·mheap; | 648 » // mark data+bss. |
841 » for(p=data; p<e; p+=4*1024) | 649 » for(p=data; p<ebss; p+=DataBlock) |
842 » » addroot(p, p+4*1024<e?4*1024:e-p); | 650 » » addroot(p, p+DataBlock<ebss?DataBlock:ebss-p); |
843 | 651 |
844 » e = end; | 652 » runtime·walkfintab(addfinroots); |
845 » for(p=(byte*)(&runtime·mheap+1); p<e; p+=4*1024) | 653 |
846 » » addroot(p, p+4*1024<e?4*1024:e-p); | |
847 »······· | |
848 » runtime·walkfintab(collectfinroots); | |
849 »······· | |
850 for(fb=allfin; fb; fb=fb->alllink) | 654 for(fb=allfin; fb; fb=fb->alllink) |
851 addroot((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0])); | 655 addroot((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0])); |
852 » | 656 |
853 for(gp=runtime·allg; gp!=nil; gp=gp->alllink) { | 657 for(gp=runtime·allg; gp!=nil; gp=gp->alllink) { |
854 switch(gp->status){ | 658 switch(gp->status){ |
855 default: | 659 default: |
856 runtime·printf("unexpected G.status %d\n", gp->s
tatus); | 660 runtime·printf("unexpected G.status %d\n", gp->s
tatus); |
857 runtime·throw("mark - bad status"); | 661 runtime·throw("mark - bad status"); |
858 case Gdead: | 662 case Gdead: |
859 break; | 663 break; |
860 case Grunning: | 664 case Grunning: |
861 if(gp != g) | 665 if(gp != g) |
862 runtime·throw("mark - world not stopped"
); | 666 runtime·throw("mark - world not stopped"
); |
863 » » » » collectstackroots(gp); | 667 » » » » addstackroots(gp); |
864 break; | 668 break; |
865 case Grunnable: | 669 case Grunnable: |
866 case Gsyscall: | 670 case Gsyscall: |
867 case Gwaiting: | 671 case Gwaiting: |
868 » » » » collectstackroots(gp); | 672 » » » » addstackroots(gp); |
869 break; | 673 break; |
870 } | 674 } |
871 } | 675 } |
872 ········ | |
873 //runtime·printf("collected %d gc roots\n", (uint32)work.nroots); | |
874 } | 676 } |
875 | 677 |
876 static bool | 678 static bool |
877 handlespecial(byte *p, uintptr size) | 679 handlespecial(byte *p, uintptr size) |
878 { | 680 { |
879 void (*fn)(void*); | 681 void (*fn)(void*); |
880 int32 nret; | 682 int32 nret; |
881 FinBlock *block; | 683 FinBlock *block; |
882 Finalizer *f; | 684 Finalizer *f; |
883 » | 685 |
884 if(!runtime·getfinalizer(p, true, &fn, &nret)) { | 686 if(!runtime·getfinalizer(p, true, &fn, &nret)) { |
885 runtime·setblockspecial(p, false); | 687 runtime·setblockspecial(p, false); |
886 runtime·MProf_Free(p, size); | 688 runtime·MProf_Free(p, size); |
887 return false; | 689 return false; |
888 } | 690 } |
889 | 691 |
890 runtime·lock(&finlock); | 692 runtime·lock(&finlock); |
891 if(finq == nil || finq->cnt == finq->cap) { | 693 if(finq == nil || finq->cnt == finq->cap) { |
892 if(finc == nil) { | 694 if(finc == nil) { |
893 finc = runtime·SysAlloc(PageSize); | 695 finc = runtime·SysAlloc(PageSize); |
894 finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Final
izer) + 1; | 696 finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Final
izer) + 1; |
895 finc->alllink = allfin; | 697 finc->alllink = allfin; |
896 allfin = finc; | 698 allfin = finc; |
897 } | 699 } |
898 block = finc; | 700 block = finc; |
899 finc = block->next; | 701 finc = block->next; |
900 block->next = finq; | 702 block->next = finq; |
901 finq = block; | 703 finq = block; |
902 } | 704 } |
903 f = &finq->fin[finq->cnt]; | 705 f = &finq->fin[finq->cnt]; |
904 finq->cnt++; | 706 finq->cnt++; |
905 f->fn = fn; | 707 f->fn = fn; |
906 f->nret = nret; | 708 f->nret = nret; |
907 f->arg = p; | 709 f->arg = p; |
908 runtime·unlock(&finlock); | 710 runtime·unlock(&finlock); |
909 return true; | 711 return true; |
910 } | 712 } |
911 | 713 |
912 static void sweepspan(uint32 sid) | 714 // Sweep frees or collects finalizers for blocks not marked in the mark phase. |
| 715 // It clears the mark bits in preparation for the next GC round. |
| 716 static void |
| 717 sweepspan(Parfor *desc, uint32 spanidx) |
913 { | 718 { |
914 MSpan *s; | 719 MSpan *s; |
915 int32 cl, n, npages; | 720 int32 cl, n, npages; |
916 uintptr size; | 721 uintptr size; |
917 byte *p; | 722 byte *p; |
918 MCache *c; | 723 MCache *c; |
919 byte *arena_start; | 724 byte *arena_start; |
920 | 725 » MLink *start, *end; |
921 » s = runtime·mheap.allspans[sid]; | 726 » int32 nfree; |
| 727 |
| 728 » USED(desc); |
| 729 |
| 730 » s = runtime·mheap.allspans[spanidx]; |
| 731 » // Stamp newly unused spans. The scavenger will use that |
| 732 » // info to potentially give back some pages to the OS. |
| 733 » if(s->state == MSpanFree && s->unusedsince == 0) |
| 734 » » s->unusedsince = runtime·nanotime(); |
922 if(s->state != MSpanInUse) | 735 if(s->state != MSpanInUse) |
923 return; | 736 return; |
924 | 737 |
925 arena_start = runtime·mheap.arena_start; | 738 arena_start = runtime·mheap.arena_start; |
926 | 739 |
927 m->gcstats.nspanfull++; | |
928 p = (byte*)(s->start << PageShift); | 740 p = (byte*)(s->start << PageShift); |
929 cl = s->sizeclass; | 741 cl = s->sizeclass; |
930 if(cl == 0) { | 742 if(cl == 0) { |
931 m->gcstats.nspanlarge++; | |
932 size = s->npages<<PageShift; | 743 size = s->npages<<PageShift; |
933 n = 1; | 744 n = 1; |
934 } else { | 745 } else { |
935 // Chunk full of small blocks. | 746 // Chunk full of small blocks. |
936 size = runtime·class_to_size[cl]; | 747 size = runtime·class_to_size[cl]; |
937 npages = runtime·class_to_allocnpages[cl]; | 748 npages = runtime·class_to_allocnpages[cl]; |
938 n = (npages << PageShift) / size; | 749 n = (npages << PageShift) / size; |
939 } | 750 } |
940 c = m->mcache; | 751 c = m->mcache; |
| 752 nfree = 0; |
| 753 start = end = nil; |
941 | 754 |
942 // Sweep through n objects of given size starting at p. | 755 // Sweep through n objects of given size starting at p. |
943 // This thread owns the span now, so it can manipulate | 756 // This thread owns the span now, so it can manipulate |
944 // the block bitmap without atomic operations. | 757 // the block bitmap without atomic operations. |
945 for(; n > 0; n--, p += size) { | 758 for(; n > 0; n--, p += size) { |
946 uintptr off, *bitp, shift, bits; | 759 uintptr off, *bitp, shift, bits; |
947 | 760 |
948 off = (uintptr*)p - (uintptr*)arena_start; | 761 off = (uintptr*)p - (uintptr*)arena_start; |
949 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; | 762 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; |
| 763 PREFETCH((byte*)bitp - size); |
950 shift = off % wordsPerBitmapWord; | 764 shift = off % wordsPerBitmapWord; |
951 bits = *bitp>>shift; | 765 bits = *bitp>>shift; |
952 | 766 |
953 if((bits & bitAllocated) == 0) | 767 if((bits & bitAllocated) == 0) |
954 continue; | 768 continue; |
955 | 769 |
956 if((bits & bitMarked) != 0) { | 770 if((bits & bitMarked) != 0) { |
957 if(DebugMark) { | 771 if(DebugMark) { |
958 if(!(bits & bitSpecial)) | 772 if(!(bits & bitSpecial)) |
959 runtime·printf("found spurious mark on %
p\n", p); | 773 runtime·printf("found spurious mark on %
p\n", p); |
960 *bitp &= ~(bitSpecial<<shift); | 774 *bitp &= ~(bitSpecial<<shift); |
961 } | 775 } |
962 *bitp &= ~(bitMarked<<shift); | 776 *bitp &= ~(bitMarked<<shift); |
963 continue; | 777 continue; |
964 } | 778 } |
965 | 779 |
966 // Special means it has a finalizer or is being profiled. | 780 // Special means it has a finalizer or is being profiled. |
967 // In DebugMark mode, the bit has been coopted so | 781 // In DebugMark mode, the bit has been coopted so |
968 // we have to assume all blocks are special. | 782 // we have to assume all blocks are special. |
969 if(DebugMark || (bits & bitSpecial) != 0) { | 783 if(DebugMark || (bits & bitSpecial) != 0) { |
970 if(handlespecial(p, size)) | 784 if(handlespecial(p, size)) |
971 continue; | 785 continue; |
972 } | 786 } |
973 | 787 |
974 // Mark freed; restore block boundary bit. | 788 // Mark freed; restore block boundary bit. |
975 *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); | 789 *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); |
976 | 790 |
977 » » if(s->sizeclass == 0) { | 791 » » if(cl == 0) { |
978 // Free large span. | 792 // Free large span. |
979 runtime·unmarkspan(p, 1<<PageShift); | 793 runtime·unmarkspan(p, 1<<PageShift); |
980 *(uintptr*)p = 1; // needs zeroing | 794 *(uintptr*)p = 1; // needs zeroing |
981 runtime·MHeap_Free(&runtime·mheap, s, 1); | 795 runtime·MHeap_Free(&runtime·mheap, s, 1); |
| 796 c->local_alloc -= size; |
| 797 c->local_nfree++; |
982 } else { | 798 } else { |
983 // Free small object. | 799 // Free small object. |
984 » » » if(size > sizeof(uintptr)) | 800 » » » if(nfree) { |
985 » » » » ((uintptr*)p)[1] = 1;» // mark as "needs to be
zeroed" | 801 » » » » PREFETCH(p); |
986 » » » c->local_by_size[s->sizeclass].nfree++; | 802 » » » » if(size > sizeof(uintptr)) |
987 » » » runtime·MCache_Free(c, p, s->sizeclass, size); | 803 » » » » » ((uintptr*)end)[1] = 1; // mark as "ne
eds to be zeroed" |
988 » » } | 804 » » » » end->next = (MLink*)p; |
989 » » c->local_alloc -= size; | 805 » » » » end = (MLink*)p; |
990 » » c->local_nfree++; | 806 » » » } else { |
991 » } | 807 » » » » start = (MLink*)p; |
992 } | 808 » » » » end = (MLink*)p; |
993 | |
994 static void | |
995 parallelfor(Parfor *desc, uint32 nthr, uint32 tid, uint32 count, void (*body)(ui
nt32 i)) | |
996 { | |
997 » uint32 begin, end, begin2, try, victim; | |
998 » uint64 *mypos, pos; | |
999 » bool idle; | |
1000 | |
1001 » if(nthr > MaxGcproc || tid > nthr || body == nil) { | |
1002 » » runtime·printf("nthr=%d tid=%d count=%d body=%p\n", | |
1003 » » » nthr, tid, count, body); | |
1004 » » runtime·throw("parallel for: invalid args"); | |
1005 » } | |
1006 | |
1007 » if(nthr==1 || count<=1) { | |
1008 » » for(pos=0; pos<count; pos++) | |
1009 » » » body(pos); | |
1010 » » return; | |
1011 » } | |
1012 | |
1013 » begin = count / nthr * tid; | |
1014 » end = count / nthr * (tid+1); | |
1015 » if(tid==nthr-1) | |
1016 » » end = count; | |
1017 » mypos = &desc->thr[tid].pos; | |
1018 » runtime·atomicstore64(mypos, (uint64)begin | ((uint64)end)<<32); | |
1019 » //runtime·printf("thr #%d(%d): %d-%d (%d)\n", tid, work.nproc, begin, en
d, runtime·mheap.nspan); | |
1020 | |
1021 » for(;;) { | |
1022 » » //spanpos = runtime·atomicload64(&thr->spanpos); | |
1023 » » //begin = (uint32)spanpos; | |
1024 » » //end = (uint32)(spanpos>>32); | |
1025 » » //runtime·printf("thr #%d(%d): starting with %d-%d:%d\n", tid, w
ork.nproc, begin, end, end-begin); | |
1026 | |
1027 » » for(;;) { | |
1028 » » » pos = runtime·xadd64(mypos, 1); | |
1029 » » » begin = (uint32)pos-1; | |
1030 » » » end = (uint32)(pos>>32); | |
1031 » » » if(begin>=end) | |
1032 » » » » break; | |
1033 » » » body(begin); | |
1034 » » } | |
1035 | |
1036 » » idle = false; | |
1037 » » for(try=0;; try++) { | |
1038 » » » if(try > nthr*4) { | |
1039 » » » » if(idle == false) { | |
1040 » » » » » runtime·xadd(&desc->done, 1); | |
1041 » » » » » idle = true; | |
1042 » » » » } | |
1043 » » » » if(desc->done == nthr) | |
1044 » » » » » return; | |
1045 } | 809 } |
1046 » » » victim = runtime·fastrand1() % (nthr-1); | 810 » » » nfree++; |
1047 » » » if(victim >= tid) | 811 » » } |
1048 » » » » victim++; | 812 » } |
1049 » » » pos = runtime·atomicload64(&desc->thr[victim].pos); | 813 |
1050 » » » //begin = (uint32)spanpos; | 814 » if(nfree) { |
1051 » » » //end = (uint32)(spanpos>>32); | 815 » » if(size > sizeof(uintptr)) |
1052 » » » //runtime·printf("thr #%d(%d): stealing from %d (%d-%d:%
d)\n", tid, work.nproc, victim, begin, end, end-begin); | 816 » » » ((uintptr*)end)[1] = 1; // mark as "needs to be zeroed
" |
1053 » » » for(;;) { | 817 » » c->local_by_size[s->sizeclass].nfree += nfree; |
1054 » » » » begin = (uint32)pos; | 818 » » c->local_alloc -= size * nfree; |
1055 » » » » end = (uint32)(pos>>32); | 819 » » c->local_nfree += nfree; |
1056 » » » » if(begin >= end-1) { | 820 » » c->local_cachealloc -= nfree * size; |
1057 » » » » » begin = end = 0; | 821 » » c->local_objects -= nfree; |
1058 » » » » » break; | 822 » » runtime·MCentral_FreeSpan(&runtime·mheap.central[cl], |
1059 » » » » } | 823 » » » » » s, nfree, start, end); |
1060 » » » » if(idle) { | 824 » } |
1061 » » » » » runtime·xadd(&desc->done, -1); | |
1062 » » » » » idle = false; | |
1063 » » » » } | |
1064 » » » » begin2 = begin + (end-begin)/2; | |
1065 » » » » if(runtime·cas64(&desc->thr[victim].pos, &pos, (
uint64)begin | (((uint64)begin2)<<32))) { | |
1066 » » » » » begin = begin2; | |
1067 » » » » » break; | |
1068 » » » » } | |
1069 » » » } | |
1070 » » » if(begin < end) { | |
1071 » » » » if(idle) | |
1072 » » » » » runtime·xadd(&desc->done, -1); | |
1073 » » » » runtime·atomicstore64(mypos, (uint64)begin | ((u
int64)end)<<32); | |
1074 » » » » m->gcstats.nspansteals += 1; | |
1075 » » » » m->gcstats.nspanstolen += end-begin; | |
1076 » » » » break; | |
1077 » » » } | |
1078 » » » if(try<nthr) { | |
1079 » » » } else if (try<nthr*2) { | |
1080 » » » » runtime·procyield(20); | |
1081 » » » } else if (try<nthr*4) { | |
1082 » » » » runtime·osyield(); | |
1083 » » » } else { | |
1084 » » » » runtime·usleep(1); | |
1085 » » » } | |
1086 » » } | |
1087 » } | |
1088 } | |
1089 | |
1090 static void | |
1091 markcb(uint32 i) | |
1092 { | |
1093 » scanblock(work.roots[i].p, work.roots[i].n); | |
1094 } | |
1095 | |
1096 static void | |
1097 parmark(uint32 tid) | |
1098 { | |
1099 » //uintptr i; | |
1100 | |
1101 » parallelfor(&markfor, work.nproc, tid, work.nroots, markcb); | |
1102 » //» for(i=0; i<work.nroots; i++) | |
1103 » //» » scanblock(work.roots[i].p, work.roots[i].n); | |
1104 » scanblock(nil, 0); | |
1105 } | |
1106 | |
1107 // Sweep frees or collects finalizers for blocks not marked in the mark phase. | |
1108 // It clears the mark bits in preparation for the next GC round. | |
1109 static void | |
1110 sweep(uint32 tid) | |
1111 { | |
1112 » //GcThread *thr; | |
1113 » //MSpan *s; | |
1114 » //uint64 spanpos; | |
1115 | |
1116 » parallelfor(&sweepfor, work.nproc, tid, runtime·mheap.nspan, sweepspan); | |
1117 | |
1118 /* | |
1119 » uint32 begin = runtime·mheap.nspan / work.nproc * tid; | |
1120 » uint32 end = runtime·mheap.nspan / work.nproc * (tid+1); | |
1121 » if(tid==work.nproc-1) | |
1122 » » end = runtime·mheap.nspan; | |
1123 » thr = &work.thr[tid]; | |
1124 » runtime·atomicstore64(&thr->spanpos, (uint64)begin | ((uint64)end)<<32); | |
1125 //runtime·printf("thr #%d(%d): %d-%d (%d)\n", tid, work.nproc, begin, end, runti
me·mheap.nspan); | |
1126 | |
1127 » for(;;) { | |
1128 » » //spanpos = runtime·atomicload64(&thr->spanpos); | |
1129 » » //begin = (uint32)spanpos; | |
1130 » » //end = (uint32)(spanpos>>32); | |
1131 » » //runtime·printf("thr #%d(%d): starting with %d-%d:%d\n", tid, w
ork.nproc, begin, end, end-begin); | |
1132 | |
1133 » » for(;;) { | |
1134 » » » spanpos = runtime·xadd64(&thr->spanpos, 1); | |
1135 » » » begin = (uint32)spanpos-1; | |
1136 » » » end = (uint32)(spanpos>>32); | |
1137 » » » if(begin>=end) | |
1138 » » » » break; | |
1139 | |
1140 » » » sweepspan(begin); | |
1141 » » } | |
1142 | |
1143 » » if(work.nproc==1) | |
1144 » » » return; | |
1145 | |
1146 » » uint32 try; | |
1147 » » bool idle = false; | |
1148 » » for(try=0;; try++) { | |
1149 » » » if(try > work.nproc*4) { | |
1150 » » » » if(idle == false) { | |
1151 » » » » » runtime·xadd(&work.sweepdone, 1); | |
1152 » » » » » idle = true; | |
1153 » » » » } | |
1154 » » » » if(work.sweepdone == work.nproc) | |
1155 » » » » » return; | |
1156 » » » } | |
1157 » » » uint32 victim = runtime·fastrand1() % (work.nproc-1); | |
1158 » » » if(victim >= tid) | |
1159 » » » » victim++; | |
1160 » » » spanpos = runtime·atomicload64(&work.thr[victim].spanpos
); | |
1161 » » » //begin = (uint32)spanpos; | |
1162 » » » //end = (uint32)(spanpos>>32); | |
1163 » » » //runtime·printf("thr #%d(%d): stealing from %d (%d-%d:%
d)\n", tid, work.nproc, victim, begin, end, end-begin); | |
1164 » » » for(;;) { | |
1165 » » » » begin = (uint32)spanpos; | |
1166 » » » » end = (uint32)(spanpos>>32); | |
1167 » » » » if(begin >= end-1) { | |
1168 » » » » » begin = end = 0; | |
1169 » » » » » break; | |
1170 » » » » } | |
1171 » » » » if(idle) { | |
1172 » » » » » runtime·xadd(&work.sweepdone, -1); | |
1173 » » » » » idle = false; | |
1174 » » » » } | |
1175 » » » » uint32 mybegin = begin + (end-begin)/2; | |
1176 » » » » if(runtime·cas64(&work.thr[victim].spanpos, &spa
npos, (uint64)begin | (((uint64)mybegin)<<32))) { | |
1177 » » » » » begin = mybegin; | |
1178 » » » » » break; | |
1179 » » » » } | |
1180 » » » } | |
1181 » » » if(begin < end) { | |
1182 » » » » if(idle) | |
1183 » » » » » runtime·xadd(&work.sweepdone, -1); | |
1184 » » » » runtime·atomicstore64(&thr->spanpos, (uint64)beg
in | ((uint64)end)<<32); | |
1185 » » » » m->gcstats.nspansteals += 1; | |
1186 » » » » m->gcstats.nspanstolen += end-begin; | |
1187 » » » » break; | |
1188 » » » } | |
1189 » » » if(try<work.nproc) { | |
1190 » » » } else if (try<work.nproc*2) { | |
1191 » » » » runtime·procyield(20); | |
1192 » » » } else if (try<work.nproc*4) { | |
1193 » » » » runtime·osyield(); | |
1194 » » » } else { | |
1195 » » » » runtime·usleep(1); | |
1196 » » » } | |
1197 » » } | |
1198 » } | |
1199 » */ | |
1200 } | 825 } |
1201 | 826 |
1202 void | 827 void |
1203 runtime·gchelper(void) | 828 runtime·gchelper(void) |
1204 { | 829 { |
1205 » uint32 tid; | 830 » // parallel mark for over gc roots |
1206 | 831 » parfor(&work.markfor); |
1207 » tid = runtime·xadd(&work.tidseq, 1); | 832 » // help other threads scan secondary blocks |
1208 | 833 » scanblock(nil, 0); |
1209 » parmark(tid); | 834 |
1210 » //scanblock(nil, 0); | |
1211 if(DebugMark) { | 835 if(DebugMark) { |
1212 » » while(runtime·atomicload(&work.markdone) == 0) | 836 » » // wait while the main thread executes mark(debug_scanblock) |
| 837 » » while(runtime·atomicload(&work.debugmarkdone) == 0) |
1213 runtime·usleep(10); | 838 runtime·usleep(10); |
1214 } | 839 } |
1215 | 840 |
1216 » sweep(tid); | 841 » // parallel sweep for over all spans |
| 842 » parfor(&work.sweepfor); |
1217 | 843 |
1218 if(runtime·xadd(&work.ndone, +1) == work.nproc-1) | 844 if(runtime·xadd(&work.ndone, +1) == work.nproc-1) |
1219 runtime·notewakeup(&work.alldone); | 845 runtime·notewakeup(&work.alldone); |
1220 } | 846 } |
1221 | |
1222 // Semaphore, not Lock, so that the goroutine | |
1223 // reschedules when there is contention rather | |
1224 // than spinning. | |
1225 static uint32 gcsema = 1; | |
1226 | 847 |
1227 // Initialized from $GOGC. GOGC=off means no gc. | 848 // Initialized from $GOGC. GOGC=off means no gc. |
1228 // | 849 // |
1229 // Next gc is after we've allocated an extra amount of | 850 // Next gc is after we've allocated an extra amount of |
1230 // memory proportional to the amount already in use. | 851 // memory proportional to the amount already in use. |
1231 // If gcpercent=100 and we're using 4M, we'll gc again | 852 // If gcpercent=100 and we're using 4M, we'll gc again |
1232 // when we get to 8M. This keeps the gc cost in linear | 853 // when we get to 8M. This keeps the gc cost in linear |
1233 // proportion to the allocation cost. Adjusting gcpercent | 854 // proportion to the allocation cost. Adjusting gcpercent |
1234 // just changes the linear constant (and also the amount of | 855 // just changes the linear constant (and also the amount of |
1235 // extra memory used). | 856 // extra memory used). |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1279 } | 900 } |
1280 mstats.stacks_inuse = stacks_inuse; | 901 mstats.stacks_inuse = stacks_inuse; |
1281 mstats.stacks_sys = stacks_sys; | 902 mstats.stacks_sys = stacks_sys; |
1282 } | 903 } |
1283 | 904 |
1284 void | 905 void |
1285 runtime·gc(int32 force) | 906 runtime·gc(int32 force) |
1286 { | 907 { |
1287 int64 t0, t1, t2, t3; | 908 int64 t0, t1, t2, t3; |
1288 uint64 heap0, heap1, obj0, obj1; | 909 uint64 heap0, heap1, obj0, obj1; |
| 910 GCStats stats; |
| 911 uint32 i; |
1289 byte *p; | 912 byte *p; |
1290 GCStats stats; | |
1291 | 913 |
1292 // The gc is turned off (via enablegc) until | 914 // The gc is turned off (via enablegc) until |
1293 // the bootstrap has completed. | 915 // the bootstrap has completed. |
1294 // Also, malloc gets called in the guts | 916 // Also, malloc gets called in the guts |
1295 // of a number of libraries that might be | 917 // of a number of libraries that might be |
1296 // holding locks. To avoid priority inversion | 918 // holding locks. To avoid priority inversion |
1297 // problems, don't bother trying to run gc | 919 // problems, don't bother trying to run gc |
1298 // while holding a lock. The next mallocgc | 920 // while holding a lock. The next mallocgc |
1299 // without a lock will do the gc instead. | 921 // without a lock will do the gc instead. |
1300 if(!mstats.enablegc || m->locks > 0 || runtime·panicking) | 922 if(!mstats.enablegc || m->locks > 0 || runtime·panicking) |
1301 return; | 923 return; |
1302 | 924 |
1303 if(gcpercent == -2) { // first time through | 925 if(gcpercent == -2) { // first time through |
1304 p = runtime·getenv("GOGC"); | 926 p = runtime·getenv("GOGC"); |
1305 if(p == nil || p[0] == '\0') | 927 if(p == nil || p[0] == '\0') |
1306 gcpercent = 100; | 928 gcpercent = 100; |
1307 else if(runtime·strcmp(p, (byte*)"off") == 0) | 929 else if(runtime·strcmp(p, (byte*)"off") == 0) |
1308 gcpercent = -1; | 930 gcpercent = -1; |
1309 else | 931 else |
1310 gcpercent = runtime·atoi(p); | 932 gcpercent = runtime·atoi(p); |
1311 | 933 |
1312 p = runtime·getenv("GOGCTRACE"); | 934 p = runtime·getenv("GOGCTRACE"); |
1313 if(p != nil) | 935 if(p != nil) |
1314 gctrace = runtime·atoi(p); | 936 gctrace = runtime·atoi(p); |
1315 } | 937 } |
1316 if(gcpercent < 0) | 938 if(gcpercent < 0) |
1317 return; | 939 return; |
1318 | 940 |
1319 » runtime·semacquire(&gcsema); | 941 » runtime·semacquire(&runtime·worldsema); |
1320 if(!force && mstats.heap_alloc < mstats.next_gc) { | 942 if(!force && mstats.heap_alloc < mstats.next_gc) { |
1321 » » runtime·semrelease(&gcsema); | 943 » » runtime·semrelease(&runtime·worldsema); |
1322 return; | 944 return; |
1323 } | 945 } |
1324 | 946 |
1325 t0 = runtime·nanotime(); | 947 t0 = runtime·nanotime(); |
1326 | 948 |
1327 m->gcing = 1; | 949 m->gcing = 1; |
1328 runtime·stoptheworld(); | 950 runtime·stoptheworld(); |
1329 | 951 |
1330 heap0 = 0; | 952 heap0 = 0; |
1331 obj0 = 0; | 953 obj0 = 0; |
1332 if(gctrace) { | 954 if(gctrace) { |
1333 cachestats(0); | 955 cachestats(0); |
1334 heap0 = mstats.heap_alloc; | 956 heap0 = mstats.heap_alloc; |
1335 obj0 = mstats.nmalloc - mstats.nfree; | 957 obj0 = mstats.nmalloc - mstats.nfree; |
1336 } | 958 } |
1337 | 959 |
1338 work.nproc = runtime·gcprocs(); | 960 work.nproc = runtime·gcprocs(); |
1339 sweepfor.done = 0; | |
1340 markfor.done = 0; | |
1341 | |
1342 work.nwait = 0; | 961 work.nwait = 0; |
1343 work.ndone = 0; | 962 work.ndone = 0; |
1344 » work.tidseq = 0; | 963 » work.debugmarkdone = 0; |
1345 » work.markdone = 0; | 964 » addroots(); |
1346 » //work.sweepdone = 0; | 965 » parforsetup(&work.markfor, markroot, work.nproc, work.nroot, nil, false)
; |
1347 » collectroots(); | 966 » parforsetup(&work.sweepfor, sweepspan, work.nproc, runtime·mheap.nspan,
nil, true); |
1348 if(work.nproc > 1) { | 967 if(work.nproc > 1) { |
1349 runtime·noteclear(&work.alldone); | 968 runtime·noteclear(&work.alldone); |
1350 » » runtime·helpgc(); | 969 » » runtime·helpgc(work.nproc); |
1351 » } | 970 » } |
1352 | 971 |
1353 //runtime·printf("mark\n"); | 972 » parfor(&work.markfor); |
1354 » parmark(0); | 973 » scanblock(nil, 0); |
1355 | 974 |
1356 » //mark(scanblock); | |
1357 if(DebugMark) { | 975 if(DebugMark) { |
1358 » » mark(debug_scanblock); | 976 » » for(i=0; i<work.nroot; i++) |
1359 » » runtime·xchg(&work.markdone, 1); | 977 » » » debug_scanblock(work.roots[i].p, work.roots[i].n); |
| 978 » » runtime·xchg(&work.debugmarkdone, 1); |
1360 } | 979 } |
1361 t1 = runtime·nanotime(); | 980 t1 = runtime·nanotime(); |
1362 | 981 » // parallel sweep for over all spans |
1363 //runtime·printf("sweep\n"); | 982 » parfor(&work.sweepfor); |
1364 » sweep(0); | |
1365 » if(work.nproc > 1) | |
1366 » » runtime·notesleep(&work.alldone); | |
1367 t2 = runtime·nanotime(); | 983 t2 = runtime·nanotime(); |
1368 | 984 |
1369 stealcache(); | 985 stealcache(); |
1370 cachestats(&stats); | 986 cachestats(&stats); |
1371 | 987 |
1372 mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; | 988 mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; |
1373 m->gcing = 0; | 989 m->gcing = 0; |
1374 | 990 |
1375 m->locks++; // disable gc during the mallocs in newproc | |
1376 if(finq != nil) { | 991 if(finq != nil) { |
| 992 m->locks++; // disable gc during the mallocs in newproc |
1377 // kick off or wake up goroutine to run queued finalizers | 993 // kick off or wake up goroutine to run queued finalizers |
1378 if(fing == nil) | 994 if(fing == nil) |
1379 fing = runtime·newproc1((byte*)runfinq, nil, 0, 0, runti
me·gc); | 995 fing = runtime·newproc1((byte*)runfinq, nil, 0, 0, runti
me·gc); |
1380 else if(fingwait) { | 996 else if(fingwait) { |
1381 fingwait = 0; | 997 fingwait = 0; |
1382 runtime·ready(fing); | 998 runtime·ready(fing); |
1383 } | 999 } |
1384 » } | 1000 » » m->locks--; |
1385 » m->locks--; | 1001 » } |
| 1002 »······· |
| 1003 » if(work.nproc > 1) |
| 1004 » » runtime·notesleep(&work.alldone); |
1386 | 1005 |
1387 heap1 = mstats.heap_alloc; | 1006 heap1 = mstats.heap_alloc; |
1388 obj1 = mstats.nmalloc - mstats.nfree; | 1007 obj1 = mstats.nmalloc - mstats.nfree; |
1389 | 1008 |
1390 t3 = runtime·nanotime(); | 1009 t3 = runtime·nanotime(); |
| 1010 mstats.last_gc = t3; |
1391 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0; | 1011 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0; |
1392 mstats.pause_total_ns += t3 - t0; | 1012 mstats.pause_total_ns += t3 - t0; |
1393 mstats.numgc++; | 1013 mstats.numgc++; |
1394 if(mstats.debuggc) | 1014 if(mstats.debuggc) |
1395 runtime·printf("pause %D\n", t3-t0); | 1015 runtime·printf("pause %D\n", t3-t0); |
1396 | 1016 |
1397 if(gctrace) { | 1017 if(gctrace) { |
1398 » » runtime·printf("gc%d(%d): %D+%D+%D ms %D -> %D MB %D -> %D (%D-%
D) objects %D pointer lookups (%D size, %D addr) %D(%D) handoff, scan block/word
/ptr: %D/%D/%D\n", | 1018 » » runtime·printf("gc%d(%d): %D+%D+%D ms %D -> %D MB %D -> %D (%D-%
D) objects, %D(%D) steals, %D(%D) handoffs, %D/%D/%D yields\n", |
1399 mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/10000
00, (t3-t2)/1000000, | 1019 mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/10000
00, (t3-t2)/1000000, |
1400 heap0>>20, heap1>>20, obj0, obj1, | 1020 heap0>>20, heap1>>20, obj0, obj1, |
1401 mstats.nmalloc, mstats.nfree, | 1021 mstats.nmalloc, mstats.nfree, |
1402 » » » stats.nsizelookup+stats.naddrlookup, stats.nsizelookup,
stats.naddrlookup, | 1022 » » » stats.nsteal, stats.nstealcnt, |
1403 stats.nhandoff, stats.nhandoffcnt, | 1023 stats.nhandoff, stats.nhandoffcnt, |
1404 » » » stats.nscanblocks, stats.nscanwords, stats.nscanptr); | 1024 » » » stats.nprocyield, stats.nosyield, stats.nsleep); |
1405 » » runtime·printf(" sweep: span total/full/large %D/%D/%D steal
count/amount %D/%D\n", | 1025 » } |
1406 » » » runtime·mheap.nspan, stats.nspanfull, stats.nspanlarge,
stats.nspansteals, stats.nspanstolen); | 1026 »······· |
1407 » } | 1027 » runtime·MProf_GC(); |
1408 | 1028 » runtime·semrelease(&runtime·worldsema); |
1409 » runtime·semrelease(&gcsema); | |
1410 runtime·starttheworld(); | 1029 runtime·starttheworld(); |
1411 | 1030 |
1412 // give the queued finalizers, if any, a chance to run | 1031 // give the queued finalizers, if any, a chance to run |
1413 if(finq != nil) | 1032 if(finq != nil) |
1414 runtime·gosched(); | 1033 runtime·gosched(); |
1415 | 1034 |
1416 if(gctrace > 1 && !force) | 1035 if(gctrace > 1 && !force) |
1417 runtime·gc(1); | 1036 runtime·gc(1); |
1418 } | 1037 } |
1419 | 1038 |
1420 void | 1039 void |
1421 runtime·UpdateMemStats(void) | 1040 runtime·ReadMemStats(MStats *stats) |
1422 { | 1041 { |
1423 » // Have to acquire gcsema to stop the world, | 1042 » // Have to acquire worldsema to stop the world, |
1424 // because stoptheworld can only be used by | 1043 // because stoptheworld can only be used by |
1425 // one goroutine at a time, and there might be | 1044 // one goroutine at a time, and there might be |
1426 // a pending garbage collection already calling it. | 1045 // a pending garbage collection already calling it. |
1427 » runtime·semacquire(&gcsema); | 1046 » runtime·semacquire(&runtime·worldsema); |
1428 m->gcing = 1; | 1047 m->gcing = 1; |
1429 runtime·stoptheworld(); | 1048 runtime·stoptheworld(); |
1430 cachestats(0); | 1049 cachestats(0); |
| 1050 *stats = mstats; |
1431 m->gcing = 0; | 1051 m->gcing = 0; |
1432 » runtime·semrelease(&gcsema); | 1052 » runtime·semrelease(&runtime·worldsema); |
1433 runtime·starttheworld(); | 1053 runtime·starttheworld(); |
1434 } | 1054 } |
1435 | 1055 |
1436 static void | 1056 static void |
1437 runfinq(void) | 1057 runfinq(void) |
1438 { | 1058 { |
1439 Finalizer *f; | 1059 Finalizer *f; |
1440 FinBlock *fb, *next; | 1060 FinBlock *fb, *next; |
1441 byte *frame; | 1061 byte *frame; |
1442 uint32 framesz, framecap, i; | 1062 uint32 framesz, framecap, i; |
(...skipping 235 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1678 uintptr n; | 1298 uintptr n; |
1679 | 1299 |
1680 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; | 1300 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; |
1681 n = (n+bitmapChunk-1) & ~(bitmapChunk-1); | 1301 n = (n+bitmapChunk-1) & ~(bitmapChunk-1); |
1682 if(h->bitmap_mapped >= n) | 1302 if(h->bitmap_mapped >= n) |
1683 return; | 1303 return; |
1684 | 1304 |
1685 runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped); | 1305 runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped); |
1686 h->bitmap_mapped = n; | 1306 h->bitmap_mapped = n; |
1687 } | 1307 } |
| 1308 |
| 1309 #ifdef _64BIT |
| 1310 // Amd64 uses 48-bit virtual addresses, 47-th bit is used as kernel/user flag. |
| 1311 // So we use 17msb of pointers as ABA counter. |
| 1312 # define PTR_BITS 47 |
| 1313 #else |
| 1314 # define PTR_BITS 32 |
| 1315 #endif |
| 1316 #define PTR_MASK ((1ull<<PTR_BITS)-1) |
| 1317 |
| 1318 static void |
| 1319 lifopush(uint64 *a, Workbuf *b) |
| 1320 { |
| 1321 uint64 old, new; |
| 1322 |
| 1323 if((uint64)b != ((uint64)b&PTR_MASK)) { |
| 1324 runtime·printf("p=%p\n", b); |
| 1325 runtime·throw("lifopush: invalid pointer"); |
| 1326 } |
| 1327 |
| 1328 if(work.nproc == 1) { |
| 1329 b->next = (Workbuf*)(*a&PTR_MASK); |
| 1330 *a = (uint64)b; |
| 1331 return; |
| 1332 } |
| 1333 |
| 1334 b->pushcnt++; |
| 1335 new = (uint64)b|(((uint64)b->pushcnt)<<PTR_BITS); |
| 1336 old = runtime·atomicload64(a); |
| 1337 for(;;) { |
| 1338 b->next = (Workbuf*)(old&PTR_MASK); |
| 1339 if(runtime·cas64(a, &old, new)) |
| 1340 break; |
| 1341 } |
| 1342 } |
| 1343 |
| 1344 static Workbuf* |
| 1345 lifopop(uint64 *a) |
| 1346 { |
| 1347 Workbuf *b, *b2; |
| 1348 uint64 old, new; |
| 1349 |
| 1350 if(work.nproc == 1) { |
| 1351 b = (Workbuf*)(*a&PTR_MASK); |
| 1352 if(b == nil) |
| 1353 return nil; |
| 1354 *a = (uint64)b->next; |
| 1355 return b; |
| 1356 } |
| 1357 |
| 1358 old = runtime·atomicload64(a); |
| 1359 for(;;) { |
| 1360 if(old == 0) |
| 1361 return nil; |
| 1362 b = (Workbuf*)(old&PTR_MASK); |
| 1363 b2 = runtime·atomicloadp(&b->next); |
| 1364 new = 0; |
| 1365 if(b2 != nil) |
| 1366 new = (uint64)b2|(((uint64)b2->pushcnt)<<PTR_BITS); |
| 1367 if(runtime·cas64(a, &old, new)) |
| 1368 return b; |
| 1369 } |
| 1370 } |
| 1371 |
| 1372 void |
| 1373 runtime·CTestLockFreeStack(bool isShort) |
| 1374 { |
| 1375 uint64 stack; |
| 1376 Workbuf *b; |
| 1377 |
| 1378 USED(isShort); |
| 1379 |
| 1380 for(work.nproc=1; work.nproc<=2; work.nproc++) { |
| 1381 // check the stack is initially empty |
| 1382 stack = 0; |
| 1383 if(lifopop(&stack) != nil) |
| 1384 runtime·panicstring("stack is not empty"); |
| 1385 |
| 1386 // push one element |
| 1387 b = (Workbuf*)runtime·mallocgc(sizeof(Workbuf), FlagNoGC, 0, 0); |
| 1388 b->nobj = 42; |
| 1389 lifopush(&stack, b); |
| 1390 |
| 1391 // push another |
| 1392 b = (Workbuf*)runtime·mallocgc(sizeof(Workbuf), FlagNoGC, 0, 0); |
| 1393 b->nobj = 43; |
| 1394 lifopush(&stack, b); |
| 1395 |
| 1396 // pop one element |
| 1397 b = lifopop(&stack); |
| 1398 if(b == nil) |
| 1399 runtime·panicstring("stack is empty"); |
| 1400 if(b->nobj != 43) |
| 1401 runtime·panicstring("no lifo"); |
| 1402 runtime·free(b); |
| 1403 |
| 1404 // pop another |
| 1405 b = lifopop(&stack); |
| 1406 if(b == nil) |
| 1407 runtime·panicstring("stack is empty"); |
| 1408 if(b->nobj != 42) |
| 1409 runtime·panicstring("no lifo"); |
| 1410 runtime·free(b); |
| 1411 |
| 1412 // check the stack is empty again |
| 1413 if(stack != 0) |
| 1414 runtime·panicstring("stack is not empty"); |
| 1415 if(lifopop(&stack) != nil) |
| 1416 runtime·panicstring("stack is not empty"); |
| 1417 } |
| 1418 } |
| 1419 |
| 1420 typedef struct StackTestCtx StackTestCtx; |
| 1421 struct StackTestCtx |
| 1422 { |
| 1423 uint32 niter; |
| 1424 uint32 waitsema; |
| 1425 uint64 stack[2]; |
| 1426 }; |
| 1427 |
| 1428 static void |
| 1429 stackTestProc(StackTestCtx *ctx) |
| 1430 { |
| 1431 int32 i, n; |
| 1432 Workbuf *b; |
| 1433 |
| 1434 for(i=0; i<ctx->niter; i++) { |
| 1435 n = runtime·fastrand1()%2; |
| 1436 b = lifopop(&ctx->stack[n]); |
| 1437 if(b==nil) |
| 1438 continue; |
| 1439 n = runtime·fastrand1()%2; |
| 1440 lifopush(&ctx->stack[n], b); |
| 1441 } |
| 1442 runtime·semrelease(&ctx->waitsema); |
| 1443 } |
| 1444 |
| 1445 void |
| 1446 runtime·CTestLockFreeStackStress(bool isShort) |
| 1447 { |
| 1448 StackTestCtx ctx, *arg; |
| 1449 Workbuf *b; |
| 1450 int32 i, sum, sum2, cnt, procs; |
| 1451 const int32 N = 100; |
| 1452 const int32 P = 8; |
| 1453 const int32 G = 16; |
| 1454 |
| 1455 procs = runtime·gomaxprocsfunc(P); |
| 1456 work.nproc = P; |
| 1457 ctx.niter = isShort ? 10000 : 100000; |
| 1458 ctx.waitsema = 0; |
| 1459 ctx.stack[0] = 0; |
| 1460 ctx.stack[1] = 0; |
| 1461 arg = &ctx; |
| 1462 sum = 0; |
| 1463 for(i=0; i<N; i++) { |
| 1464 b = (Workbuf*)runtime·mallocgc(sizeof(Workbuf), FlagNoGC, 0, 0); |
| 1465 b->nobj = i; |
| 1466 sum += i; |
| 1467 lifopush(&ctx.stack[i%2], b); |
| 1468 } |
| 1469 m->locks++; // disable gc during the mallocs (gc resets work.nproc) |
| 1470 for(i=0; i<G; i++) |
| 1471 runtime·newproc1((byte*)stackTestProc, (byte*)&arg, sizeof(arg),
0, runtime·CTestLockFreeStackStress); |
| 1472 m->locks--; |
| 1473 for(i=0; i<G; i++) |
| 1474 runtime·semacquire(&ctx.waitsema); |
| 1475 sum2 = 0; |
| 1476 cnt = 0; |
| 1477 for(i=0; i<2; i++) { |
| 1478 while((b=lifopop(&ctx.stack[i])) != nil) { |
| 1479 cnt++; |
| 1480 sum2 += b->nobj; |
| 1481 b->next = (Workbuf*)123; |
| 1482 runtime·free(b); |
| 1483 } |
| 1484 } |
| 1485 if(cnt != N || sum2 != sum) |
| 1486 runtime·panicstring("stack corruption"); |
| 1487 runtime·gomaxprocsfunc(procs); |
| 1488 } |
| 1489 |
| 1490 static void |
| 1491 parforsetup(Parfor *desc, void (*body)(Parfor*, uint32), uint32 nthr, uint32 n,
void *ctx, bool wait) |
| 1492 { |
| 1493 uint32 i, d, begin, end; |
| 1494 |
| 1495 if(nthr == 0 || nthr > MaxGcproc || body == nil) { |
| 1496 runtime·printf("nthr=%d count=%d body=%p\n", |
| 1497 nthr, n, body); |
| 1498 runtime·throw("parallel for: invalid args"); |
| 1499 } |
| 1500 |
| 1501 desc->body = body; |
| 1502 desc->done = 0; |
| 1503 desc->nthr = nthr; |
| 1504 desc->thrseq = 0; |
| 1505 desc->cnt = n; |
| 1506 desc->ctx = ctx; |
| 1507 desc->wait = wait; |
| 1508 for(i=0; i<nthr; i++) { |
| 1509 begin = n / nthr * i; |
| 1510 end = n / nthr * (i+1); |
| 1511 d = n % nthr; |
| 1512 if(i < d) { |
| 1513 begin += i; |
| 1514 end += (i+1); |
| 1515 } else { |
| 1516 begin += d; |
| 1517 end += d; |
| 1518 } |
| 1519 desc->thr[i].pos = (uint64)begin | (((uint64)end)<<32); |
| 1520 } |
| 1521 } |
| 1522 |
| 1523 static void |
| 1524 parfor(Parfor *desc) |
| 1525 { |
| 1526 uint32 tid, begin, end, begin2, try, victim, i; |
| 1527 uint64 *mypos, pos; |
| 1528 bool idle; |
| 1529 |
| 1530 // obtain 0-based thread index |
| 1531 tid = runtime·xadd(&desc->thrseq, 1) - 1; |
| 1532 if(tid >= desc->nthr) { |
| 1533 runtime·throw("parallel for: invalid tid"); |
| 1534 } |
| 1535 |
| 1536 // if single-threaded, just execute the for serially |
| 1537 if(desc->nthr==1) { |
| 1538 for(i=0; i<desc->cnt; i++) |
| 1539 desc->body(desc, i); |
| 1540 return; |
| 1541 } |
| 1542 |
| 1543 mypos = &desc->thr[tid].pos; |
| 1544 for(;;) { |
| 1545 for(;;) { |
| 1546 // while have work, bump low index and execute the itera
tion |
| 1547 pos = runtime·xadd64(mypos, 1); |
| 1548 begin = (uint32)pos-1; |
| 1549 end = (uint32)(pos>>32); |
| 1550 if(begin>=end) |
| 1551 break; |
| 1552 desc->body(desc, begin); |
| 1553 } |
| 1554 |
| 1555 // out of work, need to steal something |
| 1556 idle = false; |
| 1557 for(try=0;; try++) { |
| 1558 // if we don't see any work for long enough, |
| 1559 // increment the done counter... |
| 1560 if(try > desc->nthr*4 && !idle) { |
| 1561 idle = true; |
| 1562 runtime·xadd(&desc->done, 1); |
| 1563 } |
| 1564 // ...if all threads have incremented the counter, |
| 1565 // we are done |
| 1566 if(desc->done + !idle == desc->nthr) { |
| 1567 if(!idle) |
| 1568 runtime·xadd(&desc->done, 1); |
| 1569 return; |
| 1570 } |
| 1571 // choose a random victim for stealing |
| 1572 victim = runtime·fastrand1() % (desc->nthr-1); |
| 1573 if(victim >= tid) |
| 1574 victim++; |
| 1575 pos = runtime·atomicload64(&desc->thr[victim].pos); |
| 1576 for(;;) { |
| 1577 // see if it has any work |
| 1578 begin = (uint32)pos; |
| 1579 end = (uint32)(pos>>32); |
| 1580 if(begin >= end-1) { |
| 1581 begin = end = 0; |
| 1582 break; |
| 1583 } |
| 1584 if(idle) { |
| 1585 runtime·xadd(&desc->done, -1); |
| 1586 idle = false; |
| 1587 } |
| 1588 begin2 = begin + (end-begin)/2; |
| 1589 if(runtime·cas64(&desc->thr[victim].pos, &pos, (
uint64)begin | (((uint64)begin2)<<32))) { |
| 1590 begin = begin2; |
| 1591 break; |
| 1592 } |
| 1593 } |
| 1594 if(begin < end) { |
| 1595 // has successfully stolen some work |
| 1596 if(idle) |
| 1597 runtime·throw("parfor: should not be idl
e"); |
| 1598 runtime·atomicstore64(mypos, (uint64)begin | ((u
int64)end)<<32); |
| 1599 m->gcstats.nsteal += 1; |
| 1600 m->gcstats.nstealcnt += end-begin; |
| 1601 break; |
| 1602 } |
| 1603 // backoff |
| 1604 if(try < desc->nthr) { |
| 1605 } else if (try < desc->nthr*4) { |
| 1606 m->gcstats.nprocyield++; |
| 1607 runtime·procyield(20); |
| 1608 // if a user asked to not wait for others, exit now |
| 1609 // (assume that most work is already done) |
| 1610 } else if (!desc->wait) { |
| 1611 if(!idle) |
| 1612 runtime·xadd(&desc->done, 1); |
| 1613 return; |
| 1614 } else if (try < desc->nthr*6) { |
| 1615 m->gcstats.nosyield++; |
| 1616 runtime·osyield(); |
| 1617 } else { |
| 1618 m->gcstats.nsleep++; |
| 1619 runtime·usleep(1); |
| 1620 } |
| 1621 } |
| 1622 } |
| 1623 } |
| 1624 |
| 1625 static void |
| 1626 testParforBody(Parfor *desc, uint32 i) |
| 1627 { |
| 1628 uint64 *data; |
| 1629 |
| 1630 data = (uint64*)desc->ctx; |
| 1631 data[i] *= data[i]; |
| 1632 } |
| 1633 |
| 1634 static void |
| 1635 testParforProc(Parfor *desc) |
| 1636 { |
| 1637 parfor(desc); |
| 1638 } |
| 1639 |
| 1640 // Simple serial sanity test for parallelfor. |
| 1641 void |
| 1642 runtime·CTestParfor(bool isShort) |
| 1643 { |
| 1644 Parfor desc; |
| 1645 uint64 *data; |
| 1646 uint32 i, N; |
| 1647 |
| 1648 USED(isShort); |
| 1649 |
| 1650 N = 1000; |
| 1651 data = (uint64*)runtime·mal(N*sizeof(uint64)); |
| 1652 for(i=0; i<N; i++) |
| 1653 data[i] = i; |
| 1654 parforsetup(&desc, testParforBody, 1, N, data, true); |
| 1655 parfor(&desc); |
| 1656 for(i=0; i<N; i++) { |
| 1657 if(data[i] != i*i) |
| 1658 runtime·panicstring("incorrect result"); |
| 1659 } |
| 1660 runtime·free(data); |
| 1661 } |
| 1662 |
| 1663 // Test that iterations are properly distributed. |
| 1664 void |
| 1665 runtime·CTestParforSetup(bool isShort) |
| 1666 { |
| 1667 Parfor desc; |
| 1668 uint32 n, t, i, begin, end, size, end0, size0, sum; |
| 1669 |
| 1670 USED(isShort); |
| 1671 |
| 1672 for(n=0; n<100; n++) { |
| 1673 for(t=1; t<=MaxGcproc; t++) { |
| 1674 parforsetup(&desc, testParforBody, t, n, 0, true); |
| 1675 sum = 0; |
| 1676 size0 = 0; |
| 1677 end0 = 0; |
| 1678 for(i=0; i<t; i++) { |
| 1679 begin = (uint32)desc.thr[i].pos; |
| 1680 end = (uint32)(desc.thr[i].pos>>32); |
| 1681 size = end - begin; |
| 1682 sum += size; |
| 1683 if(i==0) { |
| 1684 size0 = size; |
| 1685 if(begin != 0) |
| 1686 runtime·panicstring("incorrect b
egin"); |
| 1687 } else { |
| 1688 if(size != size0 && size != size0-1) |
| 1689 runtime·panicstring("incorrect s
ize"); |
| 1690 if(begin != end0) |
| 1691 runtime·panicstring("incorrect b
egin/end"); |
| 1692 } |
| 1693 end0 = end; |
| 1694 } |
| 1695 if(sum != n) |
| 1696 runtime·panicstring("incorrect sum"); |
| 1697 } |
| 1698 } |
| 1699 } |
| 1700 |
| 1701 // Test that nonblocking parallelfor does not block. |
| 1702 void |
| 1703 runtime·CTestParforNonblock(bool isShort) |
| 1704 { |
| 1705 Parfor desc; |
| 1706 uint64 *data; |
| 1707 uint32 i, N; |
| 1708 |
| 1709 USED(isShort); |
| 1710 |
| 1711 N = 1000; |
| 1712 data = (uint64*)runtime·mal(N*sizeof(uint64)); |
| 1713 for(i=0; i<N; i++) |
| 1714 data[i] = i; |
| 1715 parforsetup(&desc, testParforBody, MaxGcproc, N, data, false); |
| 1716 for(i=0; i<MaxGcproc; i++) |
| 1717 parfor(&desc); |
| 1718 for(i=0; i<N; i++) { |
| 1719 if(data[i] != i*i) |
| 1720 runtime·panicstring("incorrect result"); |
| 1721 } |
| 1722 runtime·free(data); |
| 1723 } |
| 1724 |
| 1725 // Test parallel parallelfor. |
| 1726 void |
| 1727 runtime·CTestParforParallel(bool isShort) |
| 1728 { |
| 1729 Parfor desc, *arg; |
| 1730 uint64 *data; |
| 1731 uint32 i, N; |
| 1732 int32 procs; |
| 1733 |
| 1734 procs = runtime·gomaxprocsfunc(MaxGcproc+1); |
| 1735 N = 10000; |
| 1736 if(isShort) |
| 1737 N /= 10; |
| 1738 data = (uint64*)runtime·mal(N*sizeof(uint64)); |
| 1739 for(i=0; i<N; i++) |
| 1740 data[i] = i; |
| 1741 parforsetup(&desc, testParforBody, MaxGcproc, N, data, true); |
| 1742 arg = &desc; |
| 1743 m->locks++; // disable gc during the mallocs in newproc |
| 1744 for(i=1; i<MaxGcproc; i++) |
| 1745 runtime·newproc1((byte*)testParforProc, (byte*)&arg, sizeof(arg)
, 0, runtime·CTestParforParallel); |
| 1746 m->locks--; |
| 1747 parfor(&desc); |
| 1748 for(i=0; i<N; i++) { |
| 1749 if(data[i] != i*i) |
| 1750 runtime·panicstring("incorrect result"); |
| 1751 } |
| 1752 runtime·free(data); |
| 1753 runtime·gomaxprocsfunc(procs); |
| 1754 } |
LEFT | RIGHT |