LEFT | RIGHT |
1 // Copyright 2009 The Go Authors. All rights reserved. | 1 // Copyright 2009 The Go Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 // Garbage collector. | 5 // Garbage collector. |
6 | 6 |
7 #include "runtime.h" | 7 #include "runtime.h" |
8 #include "arch_GOARCH.h" | 8 #include "arch_GOARCH.h" |
9 #include "malloc.h" | 9 #include "malloc.h" |
10 #include "stack.h" | 10 #include "stack.h" |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
60 // runtime·stoptheworld(); | 60 // runtime·stoptheworld(); |
61 // | 61 // |
62 // ... do stuff ... | 62 // ... do stuff ... |
63 // | 63 // |
64 // m->gcing = 0; | 64 // m->gcing = 0; |
65 // runtime·semrelease(&runtime·worldsema); | 65 // runtime·semrelease(&runtime·worldsema); |
66 // runtime·starttheworld(); | 66 // runtime·starttheworld(); |
67 // | 67 // |
68 uint32 runtime·worldsema = 1; | 68 uint32 runtime·worldsema = 1; |
69 | 69 |
70 // TODO: Make these per-M. | |
71 static uint64 nhandoff; | |
72 | |
73 static int32 gctrace; | 70 static int32 gctrace; |
74 | 71 |
75 typedef struct Workbuf Workbuf; | 72 typedef struct Workbuf Workbuf; |
76 struct Workbuf | 73 struct Workbuf |
77 { | 74 { |
78 Workbuf *next; | 75 Workbuf *next; |
79 uintptr nobj; | 76 uintptr nobj; |
80 byte *obj[512-2]; | 77 byte *obj[512-2]; |
81 }; | 78 }; |
82 | 79 |
(...skipping 439 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
522 b1 = work.full; | 519 b1 = work.full; |
523 work.full = b1->next; | 520 work.full = b1->next; |
524 runtime·unlock(&work.fmu); | 521 runtime·unlock(&work.fmu); |
525 return b1; | 522 return b1; |
526 } | 523 } |
527 runtime·unlock(&work.fmu); | 524 runtime·unlock(&work.fmu); |
528 continue; | 525 continue; |
529 } | 526 } |
530 if(work.nwait == work.nproc) | 527 if(work.nwait == work.nproc) |
531 return nil; | 528 return nil; |
532 » » if(i < 10) | 529 » » if(i < 10) { |
| 530 » » » m->gcstats.nprocyield++; |
533 runtime·procyield(20); | 531 runtime·procyield(20); |
534 » » else if(i < 20) | 532 » » } else if(i < 20) { |
| 533 » » » m->gcstats.nosyield++; |
535 runtime·osyield(); | 534 runtime·osyield(); |
536 » » else | 535 » » } else { |
| 536 » » » m->gcstats.nsleep++; |
537 runtime·usleep(100); | 537 runtime·usleep(100); |
| 538 } |
538 } | 539 } |
539 } | 540 } |
540 | 541 |
541 static Workbuf* | 542 static Workbuf* |
542 handoff(Workbuf *b) | 543 handoff(Workbuf *b) |
543 { | 544 { |
544 int32 n; | 545 int32 n; |
545 Workbuf *b1; | 546 Workbuf *b1; |
546 | 547 |
547 // Make new buffer with half of b's pointers. | 548 // Make new buffer with half of b's pointers. |
548 b1 = getempty(nil); | 549 b1 = getempty(nil); |
549 n = b->nobj/2; | 550 n = b->nobj/2; |
550 b->nobj -= n; | 551 b->nobj -= n; |
551 b1->nobj = n; | 552 b1->nobj = n; |
552 runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]); | 553 runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]); |
553 » nhandoff += n; | 554 » m->gcstats.nhandoff++; |
| 555 » m->gcstats.nhandoffcnt += n; |
554 | 556 |
555 // Put b on full list - let first half of b get stolen. | 557 // Put b on full list - let first half of b get stolen. |
556 runtime·lock(&work.fmu); | 558 runtime·lock(&work.fmu); |
557 b->next = work.full; | 559 b->next = work.full; |
558 work.full = b; | 560 work.full = b; |
559 runtime·unlock(&work.fmu); | 561 runtime·unlock(&work.fmu); |
560 | 562 |
561 return b1; | 563 return b1; |
562 } | 564 } |
563 | 565 |
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
710 } | 712 } |
711 f = &finq->fin[finq->cnt]; | 713 f = &finq->fin[finq->cnt]; |
712 finq->cnt++; | 714 finq->cnt++; |
713 f->fn = fn; | 715 f->fn = fn; |
714 f->nret = nret; | 716 f->nret = nret; |
715 f->arg = p; | 717 f->arg = p; |
716 runtime·unlock(&finlock);· | 718 runtime·unlock(&finlock);· |
717 return true; | 719 return true; |
718 } | 720 } |
719 | 721 |
| 722 static void sweepspan(MSpan *s); |
| 723 |
| 724 // Sweep frees or collects finalizers for blocks not marked in the mark phase. |
| 725 // It clears the mark bits in preparation for the next GC round. |
| 726 static void |
| 727 sweep(void) |
| 728 { |
| 729 MSpan *s; |
| 730 int64 now; |
| 731 |
| 732 now = runtime·nanotime(); |
| 733 for(;;) { |
| 734 s = work.spans; |
| 735 if(s == nil) |
| 736 break; |
| 737 if(!runtime·casp(&work.spans, s, s->allnext)) |
| 738 continue; |
| 739 |
| 740 // Stamp newly unused spans. The scavenger will use that |
| 741 // info to potentially give back some pages to the OS. |
| 742 if(s->state == MSpanFree && s->unusedsince == 0) |
| 743 s->unusedsince = now; |
| 744 |
| 745 if(s->state != MSpanInUse) |
| 746 continue; |
| 747 |
| 748 sweepspan(s); |
| 749 } |
| 750 } |
| 751 |
720 static void | 752 static void |
721 sweepspan(MSpan *s) | 753 sweepspan(MSpan *s) |
722 { | 754 { |
723 int32 cl, n, npages; | 755 int32 cl, n, npages; |
724 uintptr size; | 756 uintptr size; |
725 byte *p; | 757 byte *p; |
726 MCache *c; | 758 MCache *c; |
727 byte *arena_start; | 759 byte *arena_start; |
728 | 760 |
729 arena_start = runtime·mheap.arena_start; | 761 arena_start = runtime·mheap.arena_start; |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
785 if(size > sizeof(uintptr)) | 817 if(size > sizeof(uintptr)) |
786 ((uintptr*)p)[1] = 1; // mark as "needs to be
zeroed" | 818 ((uintptr*)p)[1] = 1; // mark as "needs to be
zeroed" |
787 c->local_by_size[s->sizeclass].nfree++; | 819 c->local_by_size[s->sizeclass].nfree++; |
788 runtime·MCache_Free(c, p, s->sizeclass, size); | 820 runtime·MCache_Free(c, p, s->sizeclass, size); |
789 } | 821 } |
790 c->local_alloc -= size; | 822 c->local_alloc -= size; |
791 c->local_nfree++; | 823 c->local_nfree++; |
792 } | 824 } |
793 } | 825 } |
794 | 826 |
795 // Sweep frees or collects finalizers for blocks not marked in the mark phase. | |
796 // It clears the mark bits in preparation for the next GC round. | |
797 static void | |
798 sweep(void) | |
799 { | |
800 MSpan *s; | |
801 int64 now; | |
802 | |
803 now = runtime·nanotime(); | |
804 for(;;) { | |
805 s = work.spans; | |
806 if(s == nil) | |
807 break; | |
808 if(!runtime·casp(&work.spans, s, s->allnext)) | |
809 continue; | |
810 | |
811 // Stamp newly unused spans. The scavenger will use that | |
812 // info to potentially give back some pages to the OS. | |
813 if(s->state == MSpanFree && s->unusedsince == 0) | |
814 s->unusedsince = now; | |
815 | |
816 if(s->state != MSpanInUse) | |
817 continue; | |
818 | |
819 sweepspan(s); | |
820 } | |
821 } | |
822 | |
823 void | 827 void |
824 runtime·gchelper(void) | 828 runtime·gchelper(void) |
825 { | 829 { |
826 // Wait until main proc is ready for mark help. | 830 // Wait until main proc is ready for mark help. |
827 runtime·lock(&work.markgate); | 831 runtime·lock(&work.markgate); |
828 runtime·unlock(&work.markgate); | 832 runtime·unlock(&work.markgate); |
829 scanblock(nil, 0); | 833 scanblock(nil, 0); |
830 | 834 |
831 // Wait until main proc is ready for sweep help. | 835 // Wait until main proc is ready for sweep help. |
832 runtime·lock(&work.sweepgate); | 836 runtime·lock(&work.sweepgate); |
(...skipping 18 matching lines...) Expand all Loading... |
851 static void | 855 static void |
852 stealcache(void) | 856 stealcache(void) |
853 { | 857 { |
854 M *m; | 858 M *m; |
855 | 859 |
856 for(m=runtime·allm; m; m=m->alllink) | 860 for(m=runtime·allm; m; m=m->alllink) |
857 runtime·MCache_ReleaseAll(m->mcache); | 861 runtime·MCache_ReleaseAll(m->mcache); |
858 } | 862 } |
859 | 863 |
860 static void | 864 static void |
861 cachestats(void) | 865 cachestats(GCStats *stats) |
862 { | 866 { |
863 M *m; | 867 M *m; |
864 MCache *c; | 868 MCache *c; |
865 int32 i; | 869 int32 i; |
866 uint64 stacks_inuse; | 870 uint64 stacks_inuse; |
867 uint64 stacks_sys; | 871 uint64 stacks_sys; |
868 | 872 » uint64 *src, *dst; |
| 873 |
| 874 » if(stats) |
| 875 » » runtime·memclr((byte*)stats, sizeof(*stats)); |
869 stacks_inuse = 0; | 876 stacks_inuse = 0; |
870 stacks_sys = 0; | 877 stacks_sys = 0; |
871 for(m=runtime·allm; m; m=m->alllink) { | 878 for(m=runtime·allm; m; m=m->alllink) { |
872 runtime·purgecachedstats(m); | 879 runtime·purgecachedstats(m); |
873 stacks_inuse += m->stackalloc->inuse; | 880 stacks_inuse += m->stackalloc->inuse; |
874 stacks_sys += m->stackalloc->sys; | 881 stacks_sys += m->stackalloc->sys; |
| 882 if(stats) { |
| 883 src = (uint64*)&m->gcstats; |
| 884 dst = (uint64*)stats; |
| 885 for(i=0; i<sizeof(*stats)/sizeof(uint64); i++) |
| 886 dst[i] += src[i]; |
| 887 runtime·memclr((byte*)&m->gcstats, sizeof(m->gcstats)); |
| 888 } |
875 c = m->mcache; | 889 c = m->mcache; |
876 for(i=0; i<nelem(c->local_by_size); i++) { | 890 for(i=0; i<nelem(c->local_by_size); i++) { |
877 mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc
; | 891 mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc
; |
878 c->local_by_size[i].nmalloc = 0; | 892 c->local_by_size[i].nmalloc = 0; |
879 mstats.by_size[i].nfree += c->local_by_size[i].nfree; | 893 mstats.by_size[i].nfree += c->local_by_size[i].nfree; |
880 c->local_by_size[i].nfree = 0; | 894 c->local_by_size[i].nfree = 0; |
881 } | 895 } |
882 } | 896 } |
883 mstats.stacks_inuse = stacks_inuse; | 897 mstats.stacks_inuse = stacks_inuse; |
884 mstats.stacks_sys = stacks_sys; | 898 mstats.stacks_sys = stacks_sys; |
885 } | 899 } |
886 | 900 |
887 void | 901 void |
888 runtime·gc(int32 force) | 902 runtime·gc(int32 force) |
889 { | 903 { |
890 int64 t0, t1, t2, t3; | 904 int64 t0, t1, t2, t3; |
891 uint64 heap0, heap1, obj0, obj1; | 905 uint64 heap0, heap1, obj0, obj1; |
892 byte *p; | 906 byte *p; |
893 bool extra; | 907 bool extra; |
| 908 GCStats stats; |
894 | 909 |
895 // The gc is turned off (via enablegc) until | 910 // The gc is turned off (via enablegc) until |
896 // the bootstrap has completed. | 911 // the bootstrap has completed. |
897 // Also, malloc gets called in the guts | 912 // Also, malloc gets called in the guts |
898 // of a number of libraries that might be | 913 // of a number of libraries that might be |
899 // holding locks. To avoid priority inversion | 914 // holding locks. To avoid priority inversion |
900 // problems, don't bother trying to run gc | 915 // problems, don't bother trying to run gc |
901 // while holding a lock. The next mallocgc | 916 // while holding a lock. The next mallocgc |
902 // without a lock will do the gc instead. | 917 // without a lock will do the gc instead. |
903 if(!mstats.enablegc || m->locks > 0 || runtime·panicking) | 918 if(!mstats.enablegc || m->locks > 0 || runtime·panicking) |
(...skipping 15 matching lines...) Expand all Loading... |
919 if(gcpercent < 0) | 934 if(gcpercent < 0) |
920 return; | 935 return; |
921 | 936 |
922 runtime·semacquire(&runtime·worldsema); | 937 runtime·semacquire(&runtime·worldsema); |
923 if(!force && mstats.heap_alloc < mstats.next_gc) { | 938 if(!force && mstats.heap_alloc < mstats.next_gc) { |
924 runtime·semrelease(&runtime·worldsema); | 939 runtime·semrelease(&runtime·worldsema); |
925 return; | 940 return; |
926 } | 941 } |
927 | 942 |
928 t0 = runtime·nanotime(); | 943 t0 = runtime·nanotime(); |
929 nhandoff = 0; | |
930 | 944 |
931 m->gcing = 1; | 945 m->gcing = 1; |
932 runtime·stoptheworld(); | 946 runtime·stoptheworld(); |
933 | 947 |
934 » cachestats(); | 948 » cachestats(nil); |
935 heap0 = mstats.heap_alloc; | 949 heap0 = mstats.heap_alloc; |
936 obj0 = mstats.nmalloc - mstats.nfree; | 950 obj0 = mstats.nmalloc - mstats.nfree; |
937 | 951 |
938 runtime·lock(&work.markgate); | 952 runtime·lock(&work.markgate); |
939 runtime·lock(&work.sweepgate); | 953 runtime·lock(&work.sweepgate); |
940 | 954 |
941 extra = false; | 955 extra = false; |
942 work.nproc = 1; | 956 work.nproc = 1; |
943 if(runtime·gomaxprocs > 1 && runtime·ncpu > 1) { | 957 if(runtime·gomaxprocs > 1 && runtime·ncpu > 1) { |
944 runtime·noteclear(&work.alldone); | 958 runtime·noteclear(&work.alldone); |
945 work.nproc += runtime·helpgc(&extra); | 959 work.nproc += runtime·helpgc(&extra); |
946 } | 960 } |
947 work.nwait = 0; | 961 work.nwait = 0; |
948 work.ndone = 0; | 962 work.ndone = 0; |
949 | 963 |
950 runtime·unlock(&work.markgate); // let the helpers in | 964 runtime·unlock(&work.markgate); // let the helpers in |
951 mark(scanblock); | 965 mark(scanblock); |
952 if(DebugMark) | 966 if(DebugMark) |
953 mark(debug_scanblock); | 967 mark(debug_scanblock); |
954 t1 = runtime·nanotime(); | 968 t1 = runtime·nanotime(); |
955 | 969 |
956 work.spans = runtime·mheap.allspans; | 970 work.spans = runtime·mheap.allspans; |
957 runtime·unlock(&work.sweepgate); // let the helpers in | 971 runtime·unlock(&work.sweepgate); // let the helpers in |
958 sweep(); | 972 sweep(); |
959 if(work.nproc > 1) | 973 if(work.nproc > 1) |
960 runtime·notesleep(&work.alldone); | 974 runtime·notesleep(&work.alldone); |
961 t2 = runtime·nanotime(); | 975 t2 = runtime·nanotime(); |
962 | 976 |
963 stealcache(); | 977 stealcache(); |
964 » cachestats(); | 978 » cachestats(&stats); |
965 | 979 |
966 mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; | 980 mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; |
967 m->gcing = 0; | 981 m->gcing = 0; |
968 | 982 |
969 m->locks++; // disable gc during the mallocs in newproc | |
970 if(finq != nil) { | 983 if(finq != nil) { |
| 984 m->locks++; // disable gc during the mallocs in newproc |
971 // kick off or wake up goroutine to run queued finalizers | 985 // kick off or wake up goroutine to run queued finalizers |
972 if(fing == nil) | 986 if(fing == nil) |
973 fing = runtime·newproc1((byte*)runfinq, nil, 0, 0, runti
me·gc); | 987 fing = runtime·newproc1((byte*)runfinq, nil, 0, 0, runti
me·gc); |
974 else if(fingwait) { | 988 else if(fingwait) { |
975 fingwait = 0; | 989 fingwait = 0; |
976 runtime·ready(fing); | 990 runtime·ready(fing); |
977 } | 991 } |
978 » } | 992 » » m->locks--; |
979 » m->locks--; | 993 » } |
980 | 994 |
981 » cachestats(); | |
982 heap1 = mstats.heap_alloc; | 995 heap1 = mstats.heap_alloc; |
983 obj1 = mstats.nmalloc - mstats.nfree; | 996 obj1 = mstats.nmalloc - mstats.nfree; |
984 | 997 |
985 t3 = runtime·nanotime(); | 998 t3 = runtime·nanotime(); |
986 mstats.last_gc = t3; | 999 mstats.last_gc = t3; |
987 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0; | 1000 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0; |
988 mstats.pause_total_ns += t3 - t0; | 1001 mstats.pause_total_ns += t3 - t0; |
989 mstats.numgc++; | 1002 mstats.numgc++; |
990 if(mstats.debuggc) | 1003 if(mstats.debuggc) |
991 runtime·printf("pause %D\n", t3-t0); | 1004 runtime·printf("pause %D\n", t3-t0); |
992 | 1005 |
993 if(gctrace) { | 1006 if(gctrace) { |
994 » » runtime·printf("gc%d(%d): %D+%D+%D ms %D -> %D MB %D -> %D (%D-%
D) objects %D handoff\n", | 1007 » » runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-
%D) objects," |
| 1008 » » » » " %D(%D) handoff, %D/%D/%D yields\n", |
995 mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/10000
00, (t3-t2)/1000000, | 1009 mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/10000
00, (t3-t2)/1000000, |
996 heap0>>20, heap1>>20, obj0, obj1, | 1010 heap0>>20, heap1>>20, obj0, obj1, |
997 mstats.nmalloc, mstats.nfree, | 1011 mstats.nmalloc, mstats.nfree, |
998 » » » nhandoff); | 1012 » » » stats.nhandoff, stats.nhandoffcnt, |
| 1013 » » » stats.nprocyield, stats.nosyield, stats.nsleep); |
999 } | 1014 } |
1000 ········ | 1015 ········ |
1001 runtime·MProf_GC(); | 1016 runtime·MProf_GC(); |
1002 runtime·semrelease(&runtime·worldsema); | 1017 runtime·semrelease(&runtime·worldsema); |
1003 | 1018 |
1004 // If we could have used another helper proc, start one now, | 1019 // If we could have used another helper proc, start one now, |
1005 // in the hope that it will be available next time. | 1020 // in the hope that it will be available next time. |
1006 // It would have been even better to start it before the collection, | 1021 // It would have been even better to start it before the collection, |
1007 // but doing so requires allocating memory, so it's tricky to | 1022 // but doing so requires allocating memory, so it's tricky to |
1008 // coordinate. This lazy approach works out in practice: | 1023 // coordinate. This lazy approach works out in practice: |
(...skipping 12 matching lines...) Expand all Loading... |
1021 void | 1036 void |
1022 runtime·ReadMemStats(MStats *stats) | 1037 runtime·ReadMemStats(MStats *stats) |
1023 { | 1038 { |
1024 // Have to acquire worldsema to stop the world, | 1039 // Have to acquire worldsema to stop the world, |
1025 // because stoptheworld can only be used by | 1040 // because stoptheworld can only be used by |
1026 // one goroutine at a time, and there might be | 1041 // one goroutine at a time, and there might be |
1027 // a pending garbage collection already calling it. | 1042 // a pending garbage collection already calling it. |
1028 runtime·semacquire(&runtime·worldsema); | 1043 runtime·semacquire(&runtime·worldsema); |
1029 m->gcing = 1; | 1044 m->gcing = 1; |
1030 runtime·stoptheworld(); | 1045 runtime·stoptheworld(); |
1031 » cachestats(); | 1046 » cachestats(nil); |
1032 *stats = mstats; | 1047 *stats = mstats; |
1033 m->gcing = 0; | 1048 m->gcing = 0; |
1034 runtime·semrelease(&runtime·worldsema); | 1049 runtime·semrelease(&runtime·worldsema); |
1035 runtime·starttheworld(false); | 1050 runtime·starttheworld(false); |
1036 } | 1051 } |
1037 | 1052 |
1038 static void | 1053 static void |
1039 runfinq(void) | 1054 runfinq(void) |
1040 { | 1055 { |
1041 Finalizer *f; | 1056 Finalizer *f; |
(...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1280 uintptr n; | 1295 uintptr n; |
1281 | 1296 |
1282 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; | 1297 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; |
1283 n = (n+bitmapChunk-1) & ~(bitmapChunk-1); | 1298 n = (n+bitmapChunk-1) & ~(bitmapChunk-1); |
1284 if(h->bitmap_mapped >= n) | 1299 if(h->bitmap_mapped >= n) |
1285 return; | 1300 return; |
1286 | 1301 |
1287 runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped); | 1302 runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped); |
1288 h->bitmap_mapped = n; | 1303 h->bitmap_mapped = n; |
1289 } | 1304 } |
LEFT | RIGHT |