OLD | NEW |
1 // Copyright 2011 The Go Authors. All rights reserved. | 1 // Copyright 2011 The Go Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 // CPU profiling. | 5 // CPU profiling. |
6 // Based on algorithms and data structures used in | 6 // Based on algorithms and data structures used in |
7 // http://code.google.com/p/google-perftools/. | 7 // http://code.google.com/p/google-perftools/. |
8 // | 8 // |
9 // The main difference between this code and the google-perftools | 9 // The main difference between this code and the google-perftools |
10 // code is that this code is written to allow copying the profile data | 10 // code is that this code is written to allow copying the profile data |
11 // to an arbitrary io.Writer, while the google-perftools code always | 11 // to an arbitrary io.Writer, while the google-perftools code always |
12 // writes to an operating system file. | 12 // writes to an operating system file. |
13 // | 13 // |
14 // The signal handler for the profiling clock tick adds a new stack trace | 14 // The signal handler for the profiling clock tick adds a new stack trace |
15 // to a hash table tracking counts for recent traces. Most clock ticks | 15 // to a hash table tracking counts for recent traces. Most clock ticks |
16 // hit in the cache. In the event of a cache miss, an entry must be | 16 // hit in the cache. In the event of a cache miss, an entry must be |
17 // evicted from the hash table, copied to a log that will eventually be | 17 // evicted from the hash table, copied to a log that will eventually be |
18 // written as profile data. The google-perftools code flushed the | 18 // written as profile data. The google-perftools code flushed the |
19 // log itself during the signal handler. This code cannot do that, because | 19 // log itself during the signal handler. This code cannot do that, because |
20 // the io.Writer might block or need system calls or locks that are not | 20 // the io.Writer might block or need system calls or locks that are not |
21 // safe to use from within the signal handler. Instead, we split the log | 21 // safe to use from within the signal handler. Instead, we split the log |
22 // into two halves and let the signal handler fill one half while a goroutine | 22 // into two halves and let the signal handler fill one half while a goroutine |
23 // is writing out the other half. When the signal handler fills its half, it | 23 // is writing out the other half. When the signal handler fills its half, it |
24 // offers to swap with the goroutine. If the writer is not done with its half, | 24 // offers to swap with the goroutine. If the writer is not done with its half, |
25 // we lose the stack trace for this clock tick (and record that loss). | 25 // we lose the stack trace for this clock tick (and record that loss). |
26 // The goroutine interacts with the signal handler by calling getprofile() to | 26 // The goroutine interacts with the signal handler by calling getprofile() to |
27 // get the next log piece to write, implicitly handing back the last log | 27 // get the next log piece to write, implicitly handing back the last log |
28 // piece it obtained. | 28 // piece it obtained. |
29 // | 29 // |
30 // The state of this dance between the signal handler and the goroutine | 30 // The state of this dance between the signal handler and the goroutine |
31 // is encoded in the Profile.handoff field. If handoff == 0, then the goroutine | 31 // is encoded in the Profile.handoff field. If handoff == 0, then the goroutine |
32 // is not using either log half and is waiting (or will soon be waiting) for | 32 // is not using either log half and is waiting (or will soon be waiting) for |
33 // a new piece by calling notesleep(&p->wait). If the signal handler | 33 // a new piece by calling notesleep(&p->wait). If the signal handler |
34 // changes handoff from 0 to non-zero, it must call notewakeup(&p->wait) | 34 // changes handoff from 0 to non-zero, it must call notewakeup(&p->wait) |
35 // to wake the goroutine. The value indicates the number of entries in the | 35 // to wake the goroutine. The value indicates the number of entries in the |
36 // log half being handed off. The goroutine leaves the non-zero value in | 36 // log half being handed off. The goroutine leaves the non-zero value in |
37 // place until it has finished processing the log half and then flips the number | 37 // place until it has finished processing the log half and then flips the number |
38 // back to zero. Setting the high bit in handoff means that the profiling is ov
er, | 38 // back to zero. Setting the high bit in handoff means that the profiling is ov
er, |
39 // and the goroutine is now in charge of flushing the data left in the hash tabl
e | 39 // and the goroutine is now in charge of flushing the data left in the hash tabl
e |
40 // to the log and returning that data. | 40 // to the log and returning that data. |
41 // | 41 // |
42 // The handoff field is manipulated using atomic operations. | 42 // The handoff field is manipulated using atomic operations. |
43 // For the most part, the manipulation of handoff is orderly: if handoff == 0 | 43 // For the most part, the manipulation of handoff is orderly: if handoff == 0 |
44 // then the signal handler owns it and can change it to non-zero. | 44 // then the signal handler owns it and can change it to non-zero. |
45 // If handoff != 0 then the goroutine owns it and can change it to zero. | 45 // If handoff != 0 then the goroutine owns it and can change it to zero. |
46 // If that were the end of the story then we would not need to manipulate | 46 // If that were the end of the story then we would not need to manipulate |
47 // handoff using atomic operations. The operations are needed, however, | 47 // handoff using atomic operations. The operations are needed, however, |
48 // in order to let the log closer set the high bit to indicate "EOF" safely | 48 // in order to let the log closer set the high bit to indicate "EOF" safely |
49 // in the situation when normally the goroutine "owns" handoff. | 49 // in the situation when normally the goroutine "owns" handoff. |
50 | 50 |
51 package runtime | 51 package runtime |
52 #include "runtime.h" | |
53 #include "arch_GOARCH.h" | |
54 #include "malloc.h" | |
55 | 52 |
56 enum | 53 import "unsafe" |
57 { | |
58 » HashSize = 1<<10, | |
59 » LogSize = 1<<17, | |
60 » Assoc = 4, | |
61 » MaxStack = 64, | |
62 }; | |
63 | 54 |
64 typedef struct Profile Profile; | 55 const ( |
65 typedef struct Bucket Bucket; | 56 » numBuckets = 1 << 10 |
66 typedef struct Entry Entry; | 57 » logSize = 1 << 17 |
| 58 » assoc = 4 |
| 59 » maxStack = 64 |
| 60 ) |
67 | 61 |
68 struct Entry { | 62 type cpuprofEntry struct { |
69 » uintptr count; | 63 » count uintptr |
70 » uintptr depth; | 64 » depth uintptr |
71 » uintptr stack[MaxStack]; | 65 » stack [maxStack]uintptr |
72 }; | 66 } |
73 | 67 |
74 struct Bucket { | 68 type cpuProfile struct { |
75 » Entry entry[Assoc]; | 69 » on bool // profiling is on |
76 }; | 70 » wait note // goroutine waits here |
77 | 71 » count uintptr // tick count |
78 struct Profile { | 72 » evicts uintptr // eviction count |
79 » bool on;» » // profiling is on | 73 » lost uintptr // lost ticks that need to be logged |
80 » Note wait;» » // goroutine waits here | |
81 » uintptr count;» » // tick count | |
82 » uintptr evicts;»» // eviction count | |
83 » uintptr lost;» » // lost ticks that need to be logged | |
84 | 74 |
85 // Active recent stack traces. | 75 // Active recent stack traces. |
86 » Bucket hash[HashSize]; | 76 » hash [numBuckets]struct { |
| 77 » » entry [assoc]cpuprofEntry |
| 78 » } |
87 | 79 |
88 // Log of traces evicted from hash. | 80 // Log of traces evicted from hash. |
89 // Signal handler has filled log[toggle][:nlog]. | 81 // Signal handler has filled log[toggle][:nlog]. |
90 // Goroutine is writing log[1-toggle][:handoff]. | 82 // Goroutine is writing log[1-toggle][:handoff]. |
91 » uintptr log[2][LogSize/2]; | 83 » log [2][logSize / 2]uintptr |
92 » uintptr nlog; | 84 » nlog uintptr |
93 » int32 toggle; | 85 » toggle int32 |
94 » uint32 handoff; | 86 » handoff uint32 |
95 »······· | 87 |
96 // Writer state. | 88 // Writer state. |
97 // Writer maintains its own toggle to avoid races | 89 // Writer maintains its own toggle to avoid races |
98 // looking at signal handler's toggle. | 90 // looking at signal handler's toggle. |
99 » uint32 wtoggle; | 91 » wtoggle uint32 |
100 » bool wholding;» // holding & need to release a log half | 92 » wholding bool // holding & need to release a log half |
101 » bool flushing;» // flushing hash table - profile is over | 93 » flushing bool // flushing hash table - profile is over |
102 » bool eod_sent; // special end-of-data record sent; => flushing | 94 » eodSent bool // special end-of-data record sent; => flushing |
103 }; | 95 } |
104 | 96 |
105 static Mutex lk; | 97 var ( |
106 static Profile *prof; | 98 » cpuprofLock mutex |
| 99 » cpuprof *cpuProfile |
107 | 100 |
108 static void tick(uintptr*, int32); | 101 » eod = [3]uintptr{0, 1, 0} |
109 static void add(Profile*, uintptr*, int32); | 102 ) |
110 static bool evict(Profile*, Entry*); | |
111 static bool flushlog(Profile*); | |
112 | 103 |
113 static uintptr eod[3] = {0, 1, 0}; | 104 func setcpuprofilerate(int32) |
114 | 105 |
115 // LostProfileData is a no-op function used in profiles | 106 // lostProfileData is a no-op function used in profiles |
116 // to mark the number of profiling stack traces that were | 107 // to mark the number of profiling stack traces that were |
117 // discarded due to slow data writers. | 108 // discarded due to slow data writers. |
118 static void | 109 func lostProfileData() {} |
119 LostProfileData(void) | |
120 { | |
121 } | |
122 | 110 |
123 // SetCPUProfileRate sets the CPU profiling rate. | 111 // SetCPUProfileRate sets the CPU profiling rate to hz samples per second. |
124 // The user documentation is in debug.go. | 112 // If hz <= 0, SetCPUProfileRate turns off profiling. |
125 void | 113 // If the profiler is on, the rate cannot be changed without first turning it of
f. |
126 runtime·SetCPUProfileRate(intgo hz) | 114 // |
127 { | 115 // Most clients should use the runtime/cpuprof package or |
128 » uintptr *p; | 116 // the testing package's -test.cpuprofile flag instead of calling |
129 » uintptr n; | 117 // SetCPUProfileRate directly. |
| 118 func SetCPUProfileRate(hz int) { |
| 119 » // Clamp hz to something reasonable. |
| 120 » if hz < 0 { |
| 121 » » hz = 0 |
| 122 » } |
| 123 » if hz > 1000000 { |
| 124 » » hz = 1000000 |
| 125 » } |
130 | 126 |
131 » // Clamp hz to something reasonable. | 127 » lock(&cpuprofLock) |
132 » if(hz < 0) | 128 » if hz > 0 { |
133 » » hz = 0; | 129 » » if cpuprof == nil { |
134 » if(hz > 1000000) | 130 » » » cpuprof = &cpuProfile{} |
135 » » hz = 1000000; | |
136 | |
137 » runtime·lock(&lk); | |
138 » if(hz > 0) { | |
139 » » if(prof == nil) { | |
140 » » » prof = runtime·SysAlloc(sizeof *prof, &mstats.other_sys)
; | |
141 » » » if(prof == nil) { | |
142 » » » » runtime·printf("runtime: cpu profiling cannot al
locate memory\n"); | |
143 » » » » runtime·unlock(&lk); | |
144 » » » » return; | |
145 » » » } | |
146 } | 131 } |
147 » » if(prof->on || prof->handoff != 0) { | 132 » » if cpuprof.on || cpuprof.handoff != 0 { |
148 » » » runtime·printf("runtime: cannot set cpu profile rate unt
il previous profile has finished.\n"); | 133 » » » printstring("runtime: cannot set cpu profile rate until
previous profile has finished.\n") |
149 » » » runtime·unlock(&lk); | 134 » » » unlock(&cpuprofLock) |
150 » » » return; | 135 » » » return |
151 } | 136 } |
152 | 137 |
153 » » prof->on = true; | 138 » » cpuprof.on = true |
154 » » p = prof->log[0]; | |
155 // pprof binary header format. | 139 // pprof binary header format. |
156 // http://code.google.com/p/google-perftools/source/browse/trunk
/src/profiledata.cc#117 | 140 // http://code.google.com/p/google-perftools/source/browse/trunk
/src/profiledata.cc#117 |
157 » » *p++ = 0; // count for header | 141 » » p := &cpuprof.log[0] |
158 » » *p++ = 3; // depth for header | 142 » » p[0] = 0 // count for header |
159 » » *p++ = 0; // version number | 143 » » p[1] = 3 // depth for header |
160 » » *p++ = 1000000 / hz; // period (microseconds) | 144 » » p[2] = 0 // version number |
161 » » *p++ = 0; | 145 » » p[3] = uintptr(1e6 / hz) // period (microseconds) |
162 » » prof->nlog = p - prof->log[0]; | 146 » » p[4] = 0 |
163 » » prof->toggle = 0; | 147 » » cpuprof.nlog = 5 |
164 » » prof->wholding = false; | 148 » » cpuprof.toggle = 0 |
165 » » prof->wtoggle = 0; | 149 » » cpuprof.wholding = false |
166 » » prof->flushing = false; | 150 » » cpuprof.wtoggle = 0 |
167 » » prof->eod_sent = false; | 151 » » cpuprof.flushing = false |
168 » » runtime·noteclear(&prof->wait); | 152 » » cpuprof.eodSent = false |
| 153 » » noteclear(&cpuprof.wait) |
169 | 154 |
170 » » runtime·setcpuprofilerate(tick, hz); | 155 » » setcpuprofilerate(int32(hz)) |
171 » } else if(prof != nil && prof->on) { | 156 » } else if cpuprof != nil && cpuprof.on { |
172 » » runtime·setcpuprofilerate(nil, 0); | 157 » » setcpuprofilerate(0) |
173 » » prof->on = false; | 158 » » cpuprof.on = false |
174 | 159 |
175 // Now add is not running anymore, and getprofile owns the entir
e log. | 160 // Now add is not running anymore, and getprofile owns the entir
e log. |
176 // Set the high bit in prof->handoff to tell getprofile. | 161 // Set the high bit in prof->handoff to tell getprofile. |
177 » » for(;;) { | 162 » » for { |
178 » » » n = prof->handoff; | 163 » » » n := cpuprof.handoff |
179 » » » if(n&0x80000000) | 164 » » » if n&0x80000000 != 0 { |
180 » » » » runtime·printf("runtime: setcpuprofile(off) twic
e"); | 165 » » » » printstring("runtime: setcpuprofile(off) twice") |
181 » » » if(runtime·cas(&prof->handoff, n, n|0x80000000)) | 166 » » » } |
182 » » » » break; | 167 » » » if cas(&cpuprof.handoff, n, n|0x80000000) { |
183 » » } | 168 » » » » if n == 0 { |
184 » » if(n == 0) { | 169 » » » » » // we did the transition from 0 -> nonze
ro so we wake getprofile |
185 » » » // we did the transition from 0 -> nonzero so we wake ge
tprofile | 170 » » » » » notewakeup(&cpuprof.wait) |
186 » » » runtime·notewakeup(&prof->wait); | 171 » » » » } |
| 172 » » » » break |
| 173 » » » } |
187 } | 174 } |
188 } | 175 } |
189 » runtime·unlock(&lk); | 176 » unlock(&cpuprofLock) |
190 } | 177 } |
191 | 178 |
192 static void | 179 func cpuproftick(pc *uintptr, n int32) { |
193 tick(uintptr *pc, int32 n) | 180 » if n > maxStack { |
194 { | 181 » » n = maxStack |
195 » add(prof, pc, n); | 182 » } |
| 183 » s := (*[maxStack]uintptr)(unsafe.Pointer(pc))[:n] |
| 184 » cpuprof.add(s) |
196 } | 185 } |
197 | 186 |
198 // add adds the stack trace to the profile. | 187 // add adds the stack trace to the profile. |
199 // It is called from signal handlers and other limited environments | 188 // It is called from signal handlers and other limited environments |
200 // and cannot allocate memory or acquire locks that might be | 189 // and cannot allocate memory or acquire locks that might be |
201 // held at the time of the signal, nor can it use substantial amounts | 190 // held at the time of the signal, nor can it use substantial amounts |
202 // of stack. It is allowed to call evict. | 191 // of stack. It is allowed to call evict. |
203 static void | 192 func (p *cpuProfile) add(pc []uintptr) { |
204 add(Profile *p, uintptr *pc, int32 n) | |
205 { | |
206 » int32 i, j; | |
207 » uintptr h, x; | |
208 » Bucket *b; | |
209 » Entry *e; | |
210 | |
211 » if(n > MaxStack) | |
212 » » n = MaxStack; | |
213 »······· | |
214 // Compute hash. | 193 // Compute hash. |
215 » h = 0; | 194 » h := uintptr(0) |
216 » for(i=0; i<n; i++) { | 195 » for _, x := range pc { |
217 » » h = h<<8 | (h>>(8*(sizeof(h)-1))); | 196 » » h = h<<8 | (h >> (8 * (unsafe.Sizeof(h) - 1))) |
218 » » x = pc[i]; | 197 » » h += x*31 + x*7 + x*3 |
219 » » h += x*31 + x*7 + x*3; | 198 » } |
220 » } | 199 » p.count++ |
221 » p->count++; | |
222 | 200 |
223 // Add to entry count if already present in table. | 201 // Add to entry count if already present in table. |
224 » b = &p->hash[h%HashSize]; | 202 » b := &p.hash[h%numBuckets] |
225 » for(i=0; i<Assoc; i++) { | 203 Assoc: |
226 » » e = &b->entry[i]; | 204 » for i := range b.entry { |
227 » » if(e->depth != n)» | 205 » » e := &b.entry[i] |
228 » » » continue; | 206 » » if e.depth != uintptr(len(pc)) { |
229 » » for(j=0; j<n; j++) | 207 » » » continue |
230 » » » if(e->stack[j] != pc[j]) | 208 » » } |
231 » » » » goto ContinueAssoc; | 209 » » for j := range pc { |
232 » » e->count++; | 210 » » » if e.stack[j] != pc[j] { |
233 » » return; | 211 » » » » continue Assoc |
234 » ContinueAssoc:; | 212 » » » } |
| 213 » » } |
| 214 » » e.count++ |
| 215 » » return |
235 } | 216 } |
236 | 217 |
237 // Evict entry with smallest count. | 218 // Evict entry with smallest count. |
238 » e = &b->entry[0]; | 219 » var e *cpuprofEntry |
239 » for(i=1; i<Assoc; i++) | 220 » for i := range b.entry { |
240 » » if(b->entry[i].count < e->count) | 221 » » if e == nil || b.entry[i].count < e.count { |
241 » » » e = &b->entry[i]; | 222 » » » e = &b.entry[i] |
242 » if(e->count > 0) { | 223 » » } |
243 » » if(!evict(p, e)) { | 224 » } |
| 225 » if e.count > 0 { |
| 226 » » if !p.evict(e) { |
244 // Could not evict entry. Record lost stack. | 227 // Could not evict entry. Record lost stack. |
245 » » » p->lost++; | 228 » » » p.lost++ |
246 » » » return; | 229 » » » return |
247 » » } | 230 » » } |
248 » » p->evicts++; | 231 » » p.evicts++ |
249 » } | 232 » } |
250 » | 233 |
251 // Reuse the newly evicted entry. | 234 // Reuse the newly evicted entry. |
252 » e->depth = n; | 235 » e.depth = uintptr(len(pc)) |
253 » e->count = 1; | 236 » e.count = 1 |
254 » for(i=0; i<n; i++) | 237 » copy(e.stack[:], pc) |
255 » » e->stack[i] = pc[i]; | |
256 } | 238 } |
257 | 239 |
258 // evict copies the given entry's data into the log, so that | 240 // evict copies the given entry's data into the log, so that |
259 // the entry can be reused. evict is called from add, which | 241 // the entry can be reused. evict is called from add, which |
260 // is called from the profiling signal handler, so it must not | 242 // is called from the profiling signal handler, so it must not |
261 // allocate memory or block. It is safe to call flushLog. | 243 // allocate memory or block. It is safe to call flushlog. |
262 // evict returns true if the entry was copied to the log, | 244 // evict returns true if the entry was copied to the log, |
263 // false if there was no room available. | 245 // false if there was no room available. |
264 static bool | 246 func (p *cpuProfile) evict(e *cpuprofEntry) bool { |
265 evict(Profile *p, Entry *e) | 247 » d := e.depth |
266 { | 248 » nslot := d + 2 |
267 » int32 i, d, nslot; | 249 » log := &p.log[p.toggle] |
268 » uintptr *log, *q; | 250 » if p.nlog+nslot > uintptr(len(p.log[0])) { |
269 »······· | 251 » » if !p.flushlog() { |
270 » d = e->depth; | 252 » » » return false |
271 » nslot = d+2; | 253 » » } |
272 » log = p->log[p->toggle]; | 254 » » log = &p.log[p.toggle] |
273 » if(p->nlog+nslot > nelem(p->log[0])) { | 255 » } |
274 » » if(!flushlog(p)) | 256 |
275 » » » return false; | 257 » q := p.nlog |
276 » » log = p->log[p->toggle]; | 258 » log[q] = e.count |
277 » } | 259 » log[q+1] = d |
278 »······· | 260 » for i := uintptr(0); i < d; i++ { |
279 » q = log+p->nlog; | 261 » » log[q+2+i] = e.stack[i] |
280 » *q++ = e->count; | 262 » } |
281 » *q++ = d; | 263 » p.nlog = q + 2 + d |
282 » for(i=0; i<d; i++) | 264 » e.count = 0 |
283 » » *q++ = e->stack[i]; | 265 » return true |
284 » p->nlog = q - log; | |
285 » e->count = 0; | |
286 » return true; | |
287 } | 266 } |
288 | 267 |
289 // flushlog tries to flush the current log and switch to the other one. | 268 // flushlog tries to flush the current log and switch to the other one. |
290 // flushlog is called from evict, called from add, called from the signal handle
r, | 269 // flushlog is called from evict, called from add, called from the signal handle
r, |
291 // so it cannot allocate memory or block. It can try to swap logs with | 270 // so it cannot allocate memory or block. It can try to swap logs with |
292 // the writing goroutine, as explained in the comment at the top of this file. | 271 // the writing goroutine, as explained in the comment at the top of this file. |
293 static bool | 272 func (p *cpuProfile) flushlog() bool { |
294 flushlog(Profile *p) | 273 » if !cas(&p.handoff, 0, uint32(p.nlog)) { |
295 { | 274 » » return false |
296 » uintptr *log, *q; | 275 » } |
297 | 276 » notewakeup(&p.wait) |
298 » if(!runtime·cas(&p->handoff, 0, p->nlog)) | 277 |
299 » » return false; | 278 » p.toggle = 1 - p.toggle |
300 » runtime·notewakeup(&p->wait); | 279 » log := &p.log[p.toggle] |
301 | 280 » q := uintptr(0) |
302 » p->toggle = 1 - p->toggle; | 281 » if p.lost > 0 { |
303 » log = p->log[p->toggle]; | 282 » » f := lostProfileData |
304 » q = log; | 283 » » lostPC := **(**uintptr)(unsafe.Pointer(&f)) |
305 » if(p->lost > 0) { | 284 » » log[0] = p.lost |
306 » » *q++ = p->lost; | 285 » » log[1] = 1 |
307 » » *q++ = 1; | 286 » » log[2] = lostPC |
308 » » *q++ = (uintptr)LostProfileData; | 287 » » q = 3 |
309 » » p->lost = 0; | 288 » » p.lost = 0 |
310 » } | 289 » } |
311 » p->nlog = q - log; | 290 » p.nlog = q |
312 » return true; | 291 » return true |
313 } | 292 } |
314 | 293 |
315 // getprofile blocks until the next block of profiling data is available | 294 // getprofile blocks until the next block of profiling data is available |
316 // and returns it as a []byte. It is called from the writing goroutine. | 295 // and returns it as a []byte. It is called from the writing goroutine. |
317 static Slice | 296 func (p *cpuProfile) getprofile() []byte { |
318 getprofile(Profile *p) | 297 » if p == nil { |
319 { | 298 » » return nil |
320 » uint32 i, j, n; | 299 » } |
321 » Slice ret; | 300 |
322 » Bucket *b; | 301 » if p.wholding { |
323 » Entry *e; | |
324 | |
325 » ret.array = nil; | |
326 » ret.len = 0; | |
327 » ret.cap = 0; | |
328 »······· | |
329 » if(p == nil)»··· | |
330 » » return ret; | |
331 | |
332 » if(p->wholding) { | |
333 // Release previous log to signal handling side. | 302 // Release previous log to signal handling side. |
334 // Loop because we are racing against SetCPUProfileRate(0). | 303 // Loop because we are racing against SetCPUProfileRate(0). |
335 » » for(;;) { | 304 » » for { |
336 » » » n = p->handoff; | 305 » » » n := p.handoff |
337 » » » if(n == 0) { | 306 » » » if n == 0 { |
338 » » » » runtime·printf("runtime: phase error during cpu
profile handoff\n"); | 307 » » » » printstring("runtime: phase error during cpu pro
file handoff\n") |
339 » » » » return ret; | 308 » » » » return nil |
340 » » » } | 309 » » » } |
341 » » » if(n & 0x80000000) { | 310 » » » if n&0x80000000 != 0 { |
342 » » » » p->wtoggle = 1 - p->wtoggle; | 311 » » » » p.wtoggle = 1 - p.wtoggle |
343 » » » » p->wholding = false; | 312 » » » » p.wholding = false |
344 » » » » p->flushing = true; | 313 » » » » p.flushing = true |
345 » » » » goto flush; | 314 » » » » goto flush |
346 » » » } | 315 » » » } |
347 » » » if(runtime·cas(&p->handoff, n, 0)) | 316 » » » if cas(&p.handoff, n, 0) { |
348 » » » » break; | 317 » » » » break |
349 » » } | 318 » » » } |
350 » » p->wtoggle = 1 - p->wtoggle; | 319 » » } |
351 » » p->wholding = false; | 320 » » p.wtoggle = 1 - p.wtoggle |
352 » } | 321 » » p.wholding = false |
353 »······· | 322 » } |
354 » if(p->flushing) | 323 |
355 » » goto flush; | 324 » if p.flushing { |
356 »······· | 325 » » goto flush |
357 » if(!p->on && p->handoff == 0) | 326 » } |
358 » » return ret; | 327 |
| 328 » if !p.on && p.handoff == 0 { |
| 329 » » return nil |
| 330 » } |
359 | 331 |
360 // Wait for new log. | 332 // Wait for new log. |
361 » runtime·notetsleepg(&p->wait, -1); | 333 » notetsleepg(&p.wait, -1) |
362 » runtime·noteclear(&p->wait); | 334 » noteclear(&p.wait) |
363 | 335 |
364 » n = p->handoff; | 336 » { |
365 » if(n == 0) { | 337 » » n := p.handoff |
366 » » runtime·printf("runtime: phase error during cpu profile wait\n")
; | 338 » » if n == 0 { |
367 » » return ret; | 339 » » » printstring("runtime: phase error during cpu profile wai
t\n") |
368 » } | 340 » » » return nil |
369 » if(n == 0x80000000) { | 341 » » } |
370 » » p->flushing = true; | 342 » » if n == 0x80000000 { |
371 » » goto flush; | 343 » » » p.flushing = true |
372 » } | 344 » » » goto flush |
373 » n &= ~0x80000000; | 345 » » } |
374 | 346 » » n &^= 0x80000000 |
375 » // Return new log to caller. | 347 |
376 » p->wholding = true; | 348 » » // Return new log to caller. |
377 | 349 » » p.wholding = true |
378 » ret.array = (byte*)p->log[p->wtoggle]; | 350 |
379 » ret.len = n*sizeof(uintptr); | 351 » » return asByteSlice(p.log[p.wtoggle][:n]) |
380 » ret.cap = ret.len; | 352 » } |
381 » return ret; | 353 |
382 | |
383 flush: | |
384 // In flush mode. | 354 // In flush mode. |
385 // Add is no longer being called. We own the log. | 355 // Add is no longer being called. We own the log. |
386 // Also, p->handoff is non-zero, so flushlog will return false. | 356 // Also, p->handoff is non-zero, so flushlog will return false. |
387 // Evict the hash table into the log and return it. | 357 // Evict the hash table into the log and return it. |
388 » for(i=0; i<HashSize; i++) { | 358 flush: |
389 » » b = &p->hash[i]; | 359 » for i := range p.hash { |
390 » » for(j=0; j<Assoc; j++) { | 360 » » b := &p.hash[i] |
391 » » » e = &b->entry[j]; | 361 » » for j := range b.entry { |
392 » » » if(e->count > 0 && !evict(p, e)) { | 362 » » » e := &b.entry[j] |
| 363 » » » if e.count > 0 && !p.evict(e) { |
393 // Filled the log. Stop the loop and return wha
t we've got. | 364 // Filled the log. Stop the loop and return wha
t we've got. |
394 » » » » goto breakflush; | 365 » » » » break flush |
395 » » » } | 366 » » » } |
396 » » } | 367 » » } |
397 » } | 368 » } |
398 breakflush: | |
399 | 369 |
400 // Return pending log data. | 370 // Return pending log data. |
401 » if(p->nlog > 0) { | 371 » if p.nlog > 0 { |
402 // Note that we're using toggle now, not wtoggle, | 372 // Note that we're using toggle now, not wtoggle, |
403 // because we're working on the log directly. | 373 // because we're working on the log directly. |
404 » » ret.array = (byte*)p->log[p->toggle]; | 374 » » n := p.nlog |
405 » » ret.len = p->nlog*sizeof(uintptr); | 375 » » p.nlog = 0 |
406 » » ret.cap = ret.len; | 376 » » return asByteSlice(p.log[p.toggle][:n]) |
407 » » p->nlog = 0; | |
408 » » return ret; | |
409 } | 377 } |
410 | 378 |
411 // Made it through the table without finding anything to log. | 379 // Made it through the table without finding anything to log. |
412 » if(!p->eod_sent) { | 380 » if !p.eodSent { |
413 // We may not have space to append this to the partial log buf, | 381 // We may not have space to append this to the partial log buf, |
414 // so we always return a new slice for the end-of-data marker. | 382 // so we always return a new slice for the end-of-data marker. |
415 » » p->eod_sent = true; | 383 » » p.eodSent = true |
416 » » ret.array = (byte*)eod; | 384 » » return asByteSlice(eod[:]) |
417 » » ret.len = sizeof eod; | |
418 » » ret.cap = ret.len; | |
419 » » return ret; | |
420 } | 385 } |
421 | 386 |
422 // Finally done. Clean up and return nil. | 387 // Finally done. Clean up and return nil. |
423 » p->flushing = false; | 388 » p.flushing = false |
424 » if(!runtime·cas(&p->handoff, p->handoff, 0)) | 389 » if !cas(&p.handoff, p.handoff, 0) { |
425 » » runtime·printf("runtime: profile flush racing with something\n")
; | 390 » » printstring("runtime: profile flush racing with something\n") |
426 » return ret; // set to nil at top of function | 391 » } |
427 } | 392 » return nil |
428 | 393 } |
429 // CPUProfile returns the next cpu profile block as a []byte. | 394 |
430 // The user documentation is in debug.go. | 395 func asByteSlice(p []uintptr) (ret []byte) { |
431 func CPUProfile() (ret Slice) { | 396 » pp := (*sliceStruct)(unsafe.Pointer(&p)) |
432 » ret = getprofile(prof); | 397 » rp := (*sliceStruct)(unsafe.Pointer(&ret)) |
433 } | 398 |
| 399 » rp.array = pp.array |
| 400 » rp.len = pp.len * int(unsafe.Sizeof(p[0])) |
| 401 » rp.cap = rp.len |
| 402 |
| 403 » return |
| 404 } |
| 405 |
| 406 // CPUProfile returns the next chunk of binary CPU profiling stack trace data, |
| 407 // blocking until data is available. If profiling is turned off and all the pro
file |
| 408 // data accumulated while it was on has been returned, CPUProfile returns nil. |
| 409 // The caller must save the returned data before calling CPUProfile again. |
| 410 // |
| 411 // Most clients should use the runtime/cpuprof package or |
| 412 // the testing package's -test.cpuprofile flag instead of calling |
| 413 // CPUProfile directly. |
| 414 func CPUProfile() []byte { |
| 415 » return cpuprof.getprofile() |
| 416 } |
OLD | NEW |