LEFT | RIGHT |
1 // Copyright 2009 The Go Authors. All rights reserved. | 1 // Copyright 2009 The Go Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 // Garbage collector. | 5 // Garbage collector. |
6 | 6 |
7 #include "runtime.h" | 7 #include "runtime.h" |
8 #include "arch_GOARCH.h" | 8 #include "arch_GOARCH.h" |
9 #include "malloc.h" | 9 #include "malloc.h" |
10 #include "stack.h" | 10 #include "stack.h" |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
46 // | 46 // |
47 #define bitAllocated ((uintptr)1<<(bitShift*0)) | 47 #define bitAllocated ((uintptr)1<<(bitShift*0)) |
48 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAlloc
ated is set */ | 48 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAlloc
ated is set */ |
49 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAlloc
ated is set */ | 49 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAlloc
ated is set */ |
50 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAlloc
ated is set - has finalizer or being profiled */ | 50 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAlloc
ated is set - has finalizer or being profiled */ |
51 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAlloc
ated is NOT set */ | 51 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAlloc
ated is NOT set */ |
52 | 52 |
53 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial) | 53 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial) |
54 | 54 |
55 // TODO: Make these per-M. | 55 // TODO: Make these per-M. |
56 static uint64 nlookup; | |
57 static uint64 nsizelookup; | |
58 static uint64 naddrlookup; | |
59 static uint64 nhandoff; | 56 static uint64 nhandoff; |
60 | 57 |
61 static int32 gctrace; | 58 static int32 gctrace; |
62 | 59 |
63 typedef struct Workbuf Workbuf; | 60 typedef struct Workbuf Workbuf; |
64 struct Workbuf | 61 struct Workbuf |
65 { | 62 { |
66 Workbuf *next; | 63 Workbuf *next; |
67 uintptr nobj; | 64 uintptr nobj; |
68 byte *obj[512-2]; | 65 byte *obj[512-2]; |
(...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
205 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)
) != 0) { | 202 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)
) != 0) { |
206 obj = (byte*)obj - (shift-j)*PtrSize; | 203 obj = (byte*)obj - (shift-j)*PtrSize; |
207 shift = j; | 204 shift = j; |
208 bits = xbits>>shift; | 205 bits = xbits>>shift; |
209 goto found; | 206 goto found; |
210 } | 207 } |
211 } | 208 } |
212 | 209 |
213 // Otherwise consult span table to find beginning. | 210 // Otherwise consult span table to find beginning. |
214 // (Manually inlined copy of MHeap_LookupMaybe.) | 211 // (Manually inlined copy of MHeap_LookupMaybe.) |
215 nlookup++; | |
216 naddrlookup++; | |
217 k = (uintptr)obj>>PageShift; | 212 k = (uintptr)obj>>PageShift; |
218 x = k; | 213 x = k; |
219 if(sizeof(void*) == 8) | 214 if(sizeof(void*) == 8) |
220 x -= (uintptr)arena_start>>PageShift; | 215 x -= (uintptr)arena_start>>PageShift; |
221 s = runtime·mheap.map[x]; | 216 s = runtime·mheap.map[x]; |
222 if(s == nil || k < s->start || k - s->start >= s->npages
|| s->state != MSpanInUse) | 217 if(s == nil || k < s->start || k - s->start >= s->npages
|| s->state != MSpanInUse) |
223 continue; | 218 continue; |
224 p = (byte*)((uintptr)s->start<<PageShift); | 219 p = (byte*)((uintptr)s->start<<PageShift); |
225 if(s->sizeclass == 0) { | 220 if(s->sizeclass == 0) { |
226 obj = p; | 221 obj = p; |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
294 // Emptied our buffer: refill. | 289 // Emptied our buffer: refill. |
295 wbuf = getfull(wbuf); | 290 wbuf = getfull(wbuf); |
296 if(wbuf == nil) | 291 if(wbuf == nil) |
297 return; | 292 return; |
298 nobj = wbuf->nobj; | 293 nobj = wbuf->nobj; |
299 wp = wbuf->obj + wbuf->nobj; | 294 wp = wbuf->obj + wbuf->nobj; |
300 } | 295 } |
301 b = *--wp; | 296 b = *--wp; |
302 nobj--; | 297 nobj--; |
303 | 298 |
304 » » // Figure out n = size of b. Start by loading bits for b. | 299 » » // Ask span about size class. |
305 » » off = (uintptr*)b - (uintptr*)arena_start; | |
306 » » bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; | |
307 » » shift = off % wordsPerBitmapWord; | |
308 » » xbits = *bitp; | |
309 » » bits = xbits >> shift; | |
310 | |
311 » » // Might be small; look for nearby block boundary. | |
312 » » // A block boundary is marked by either bitBlockBoundary | |
313 » » // or bitAllocated being set (see notes near their definition). | |
314 » » enum { | |
315 » » » boundary = bitBlockBoundary|bitAllocated | |
316 » » }; | |
317 » » // Look for a block boundary both after and before b | |
318 » » // in the same bitmap word. | |
319 » » // | |
320 » » // A block boundary j words after b is indicated by | |
321 » » //» bits>>j & boundary | |
322 » » // assuming shift+j < bitShift. (If shift+j >= bitShift then | |
323 » » // we'll be bleeding other bit types like bitMarked into our tes
t.) | |
324 » » // Instead of inserting the conditional shift+j < bitShift into
the loop, | |
325 » » // we can let j range from 1 to bitShift as long as we first | |
326 » » // apply a mask to keep only the bits corresponding | |
327 » » // to shift+j < bitShift aka j < bitShift-shift. | |
328 » » bits &= (boundary<<(bitShift-shift)) - boundary; | |
329 | |
330 » » // A block boundary j words before b is indicated by | |
331 » » //» xbits>>(shift-j) & boundary | |
332 » » // (assuming shift >= j). There is no cleverness here | |
333 » » // avoid the test, because when j gets too large the shift | |
334 » » // turns negative, which is undefined in C. | |
335 | |
336 » » for(j=1; j<bitShift; j++) { | |
337 » » » if(((bits>>j)&boundary) != 0 || shift>=j && ((xbits>>(sh
ift-j))&boundary) != 0) { | |
338 » » » » n = j*PtrSize; | |
339 » » » » goto scan; | |
340 » » » } | |
341 » » } | |
342 | |
343 » » // Fall back to asking span about size class. | |
344 // (Manually inlined copy of MHeap_Lookup.) | 300 // (Manually inlined copy of MHeap_Lookup.) |
345 nlookup++; | |
346 nsizelookup++; | |
347 x = (uintptr)b>>PageShift; | 301 x = (uintptr)b>>PageShift; |
348 if(sizeof(void*) == 8) | 302 if(sizeof(void*) == 8) |
349 x -= (uintptr)arena_start>>PageShift; | 303 x -= (uintptr)arena_start>>PageShift; |
350 s = runtime·mheap.map[x]; | 304 s = runtime·mheap.map[x]; |
351 if(s->sizeclass == 0) | 305 if(s->sizeclass == 0) |
352 n = s->npages<<PageShift; | 306 n = s->npages<<PageShift; |
353 else | 307 else |
354 n = runtime·class_to_size[s->sizeclass]; | 308 n = runtime·class_to_size[s->sizeclass]; |
355 scan:; | |
356 } | 309 } |
357 } | 310 } |
358 | 311 |
359 // debug_scanblock is the debug copy of scanblock. | 312 // debug_scanblock is the debug copy of scanblock. |
360 // it is simpler, slower, single-threaded, recursive, | 313 // it is simpler, slower, single-threaded, recursive, |
361 // and uses bitSpecial as the mark bit. | 314 // and uses bitSpecial as the mark bit. |
362 static void | 315 static void |
363 debug_scanblock(byte *b, int64 n) | 316 debug_scanblock(byte *b, int64 n) |
364 { | 317 { |
365 byte *obj, *p; | 318 byte *obj, *p; |
(...skipping 390 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
756 // It clears the mark bits in preparation for the next GC round. | 709 // It clears the mark bits in preparation for the next GC round. |
757 static void | 710 static void |
758 sweep(void) | 711 sweep(void) |
759 { | 712 { |
760 MSpan *s; | 713 MSpan *s; |
761 int32 cl, n, npages; | 714 int32 cl, n, npages; |
762 uintptr size; | 715 uintptr size; |
763 byte *p; | 716 byte *p; |
764 MCache *c; | 717 MCache *c; |
765 byte *arena_start; | 718 byte *arena_start; |
| 719 int64 now; |
766 | 720 |
767 arena_start = runtime·mheap.arena_start; | 721 arena_start = runtime·mheap.arena_start; |
| 722 now = runtime·nanotime(); |
768 | 723 |
769 for(;;) { | 724 for(;;) { |
770 s = work.spans; | 725 s = work.spans; |
771 if(s == nil) | 726 if(s == nil) |
772 break; | 727 break; |
773 if(!runtime·casp(&work.spans, s, s->allnext)) | 728 if(!runtime·casp(&work.spans, s, s->allnext)) |
774 continue; | 729 continue; |
| 730 |
| 731 // Stamp newly unused spans. The scavenger will use that |
| 732 // info to potentially give back some pages to the OS. |
| 733 if(s->state == MSpanFree && s->unusedsince == 0) |
| 734 s->unusedsince = now; |
775 | 735 |
776 if(s->state != MSpanInUse) | 736 if(s->state != MSpanInUse) |
777 continue; | 737 continue; |
778 | 738 |
779 p = (byte*)(s->start << PageShift); | 739 p = (byte*)(s->start << PageShift); |
780 cl = s->sizeclass; | 740 cl = s->sizeclass; |
781 if(cl == 0) { | 741 if(cl == 0) { |
782 size = s->npages<<PageShift; | 742 size = s->npages<<PageShift; |
783 n = 1; | 743 n = 1; |
784 } else { | 744 } else { |
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
911 mstats.stacks_sys = stacks_sys; | 871 mstats.stacks_sys = stacks_sys; |
912 } | 872 } |
913 | 873 |
914 void | 874 void |
915 runtime·gc(int32 force) | 875 runtime·gc(int32 force) |
916 { | 876 { |
917 int64 t0, t1, t2, t3; | 877 int64 t0, t1, t2, t3; |
918 uint64 heap0, heap1, obj0, obj1; | 878 uint64 heap0, heap1, obj0, obj1; |
919 byte *p; | 879 byte *p; |
920 bool extra; | 880 bool extra; |
921 uint32 i; | |
922 MSpan *s, *list; | |
923 MHeap *h = &runtime·mheap; | |
924 | 881 |
925 // The gc is turned off (via enablegc) until | 882 // The gc is turned off (via enablegc) until |
926 // the bootstrap has completed. | 883 // the bootstrap has completed. |
927 // Also, malloc gets called in the guts | 884 // Also, malloc gets called in the guts |
928 // of a number of libraries that might be | 885 // of a number of libraries that might be |
929 // holding locks. To avoid priority inversion | 886 // holding locks. To avoid priority inversion |
930 // problems, don't bother trying to run gc | 887 // problems, don't bother trying to run gc |
931 // while holding a lock. The next mallocgc | 888 // while holding a lock. The next mallocgc |
932 // without a lock will do the gc instead. | 889 // without a lock will do the gc instead. |
933 if(!mstats.enablegc || m->locks > 0 || runtime·panicking) | 890 if(!mstats.enablegc || m->locks > 0 || runtime·panicking) |
(...skipping 15 matching lines...) Expand all Loading... |
949 if(gcpercent < 0) | 906 if(gcpercent < 0) |
950 return; | 907 return; |
951 | 908 |
952 runtime·semacquire(&gcsema); | 909 runtime·semacquire(&gcsema); |
953 if(!force && mstats.heap_alloc < mstats.next_gc) { | 910 if(!force && mstats.heap_alloc < mstats.next_gc) { |
954 runtime·semrelease(&gcsema); | 911 runtime·semrelease(&gcsema); |
955 return; | 912 return; |
956 } | 913 } |
957 | 914 |
958 t0 = runtime·nanotime(); | 915 t0 = runtime·nanotime(); |
959 nlookup = 0; | |
960 nsizelookup = 0; | |
961 naddrlookup = 0; | |
962 nhandoff = 0; | 916 nhandoff = 0; |
963 | 917 |
964 m->gcing = 1; | 918 m->gcing = 1; |
965 runtime·stoptheworld(); | 919 runtime·stoptheworld(); |
966 | 920 |
967 cachestats(); | 921 cachestats(); |
968 heap0 = mstats.heap_alloc; | 922 heap0 = mstats.heap_alloc; |
969 obj0 = mstats.nmalloc - mstats.nfree; | 923 obj0 = mstats.nmalloc - mstats.nfree; |
970 | 924 |
971 runtime·lock(&work.markgate); | 925 runtime·lock(&work.markgate); |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1009 runtime·ready(fing); | 963 runtime·ready(fing); |
1010 } | 964 } |
1011 } | 965 } |
1012 m->locks--; | 966 m->locks--; |
1013 | 967 |
1014 cachestats(); | 968 cachestats(); |
1015 heap1 = mstats.heap_alloc; | 969 heap1 = mstats.heap_alloc; |
1016 obj1 = mstats.nmalloc - mstats.nfree; | 970 obj1 = mstats.nmalloc - mstats.nfree; |
1017 | 971 |
1018 t3 = runtime·nanotime(); | 972 t3 = runtime·nanotime(); |
| 973 mstats.last_gc = t3; |
1019 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0; | 974 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0; |
1020 mstats.pause_total_ns += t3 - t0; | 975 mstats.pause_total_ns += t3 - t0; |
1021 mstats.numgc++; | 976 mstats.numgc++; |
1022 if(mstats.debuggc) | 977 if(mstats.debuggc) |
1023 runtime·printf("pause %D\n", t3-t0); | 978 runtime·printf("pause %D\n", t3-t0); |
1024 | 979 |
1025 if(gctrace) { | 980 if(gctrace) { |
1026 » » runtime·printf("gc%d(%d): %D+%D+%D ms %D -> %D MB %D -> %D (%D-%
D) objects %D pointer lookups (%D size, %D addr) %D handoff\n", | 981 » » runtime·printf("gc%d(%d): %D+%D+%D ms %D -> %D MB %D -> %D (%D-%
D) objects %D handoff\n", |
1027 mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/10000
00, (t3-t2)/1000000, | 982 mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/10000
00, (t3-t2)/1000000, |
1028 heap0>>20, heap1>>20, obj0, obj1, | 983 heap0>>20, heap1>>20, obj0, obj1, |
1029 mstats.nmalloc, mstats.nfree, | 984 mstats.nmalloc, mstats.nfree, |
1030 » » » nlookup, nsizelookup, naddrlookup, nhandoff); | 985 » » » nhandoff); |
1031 » } | 986 » } |
1032 | |
1033 » // Stamp newly unused spans. The scavenger will use that | |
1034 » // info to potentially give back some pages to the OS. | |
1035 » for(i=0; i < nelem(h->free)+1; i++) { | |
1036 » » if(i < nelem(h->free)) | |
1037 » » » list = &h->free[i]; | |
1038 » » else | |
1039 » » » list = &h->large; | |
1040 » » if(runtime·MSpanList_IsEmpty(list)) | |
1041 » » » continue; | |
1042 » » for(s=list->next; s != list; s=s->next) { | |
1043 » » » if(s->state == MSpanFree && s->unusedsince == 0) | |
1044 » » » » s->unusedsince = t3; | |
1045 » » } | |
1046 » } | |
1047 » mstats.last_gc = t3; | |
1048 | 987 |
1049 runtime·semrelease(&gcsema); | 988 runtime·semrelease(&gcsema); |
1050 | 989 |
1051 // If we could have used another helper proc, start one now, | 990 // If we could have used another helper proc, start one now, |
1052 // in the hope that it will be available next time. | 991 // in the hope that it will be available next time. |
1053 // It would have been even better to start it before the collection, | 992 // It would have been even better to start it before the collection, |
1054 // but doing so requires allocating memory, so it's tricky to | 993 // but doing so requires allocating memory, so it's tricky to |
1055 // coordinate. This lazy approach works out in practice: | 994 // coordinate. This lazy approach works out in practice: |
1056 // we don't mind if the first couple gc rounds don't have quite | 995 // we don't mind if the first couple gc rounds don't have quite |
1057 // the maximum number of procs. | 996 // the maximum number of procs. |
1058 runtime·starttheworld(extra); | 997 runtime·starttheworld(extra); |
1059 | 998 |
1060 // give the queued finalizers, if any, a chance to run·· | 999 // give the queued finalizers, if any, a chance to run·· |
1061 if(finq != nil)· | 1000 if(finq != nil)· |
1062 runtime·gosched(); | 1001 runtime·gosched(); |
1063 | 1002 |
1064 if(gctrace > 1 && !force) | 1003 if(gctrace > 1 && !force) |
1065 runtime·gc(1); | 1004 runtime·gc(1); |
1066 } | 1005 } |
1067 | 1006 |
1068 void | 1007 void |
1069 runtime·UpdateMemStats(void) | 1008 runtime·ReadMemStats(MStats *stats) |
1070 { | 1009 { |
1071 // Have to acquire gcsema to stop the world, | 1010 // Have to acquire gcsema to stop the world, |
1072 // because stoptheworld can only be used by | 1011 // because stoptheworld can only be used by |
1073 // one goroutine at a time, and there might be | 1012 // one goroutine at a time, and there might be |
1074 // a pending garbage collection already calling it. | 1013 // a pending garbage collection already calling it. |
1075 runtime·semacquire(&gcsema); | 1014 runtime·semacquire(&gcsema); |
1076 m->gcing = 1; | 1015 m->gcing = 1; |
1077 runtime·stoptheworld(); | 1016 runtime·stoptheworld(); |
1078 cachestats(); | 1017 cachestats(); |
| 1018 *stats = mstats; |
1079 m->gcing = 0; | 1019 m->gcing = 0; |
1080 runtime·semrelease(&gcsema); | 1020 runtime·semrelease(&gcsema); |
1081 runtime·starttheworld(false); | 1021 runtime·starttheworld(false); |
1082 } | 1022 } |
1083 | 1023 |
1084 static void | 1024 static void |
1085 runfinq(void) | 1025 runfinq(void) |
1086 { | 1026 { |
1087 Finalizer *f; | 1027 Finalizer *f; |
1088 FinBlock *fb, *next; | 1028 FinBlock *fb, *next; |
(...skipping 237 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1326 uintptr n; | 1266 uintptr n; |
1327 | 1267 |
1328 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; | 1268 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; |
1329 n = (n+bitmapChunk-1) & ~(bitmapChunk-1); | 1269 n = (n+bitmapChunk-1) & ~(bitmapChunk-1); |
1330 if(h->bitmap_mapped >= n) | 1270 if(h->bitmap_mapped >= n) |
1331 return; | 1271 return; |
1332 | 1272 |
1333 runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped); | 1273 runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped); |
1334 h->bitmap_mapped = n; | 1274 h->bitmap_mapped = n; |
1335 } | 1275 } |
LEFT | RIGHT |