LEFT | RIGHT |
1 // Copyright 2011 The Go Authors. All rights reserved. | 1 // Copyright 2011 The Go Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 // CPU profiling. | 5 // CPU profiling. |
6 // Based on algorithms and data structures used in | 6 // Based on algorithms and data structures used in |
7 // http://code.google.com/p/google-perftools/. | 7 // http://code.google.com/p/google-perftools/. |
8 // | 8 // |
9 // The main difference between this code and the google-perftools | 9 // The main difference between this code and the google-perftools |
10 // code is that this code is written to allow copying the profile data | 10 // code is that this code is written to allow copying the profile data |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
46 // If that were the end of the story then we would not need to manipulate | 46 // If that were the end of the story then we would not need to manipulate |
47 // handoff using atomic operations. The operations are needed, however, | 47 // handoff using atomic operations. The operations are needed, however, |
48 // in order to let the log closer set the high bit to indicate "EOF" safely | 48 // in order to let the log closer set the high bit to indicate "EOF" safely |
49 // in the situation when normally the goroutine "owns" handoff. | 49 // in the situation when normally the goroutine "owns" handoff. |
50 | 50 |
51 package runtime | 51 package runtime |
52 | 52 |
53 import "unsafe" | 53 import "unsafe" |
54 | 54 |
55 const ( | 55 const ( |
56 » numBuckets = 1 << 10 | 56 » numBuckets = 1 << 10 |
57 » logSize = 1 << 17 | 57 » logSize = 1 << 17 |
58 » assoc = 4 | 58 » assoc = 4 |
59 » maxStack = 64 | 59 » maxCPUProfStack = 64 |
60 ) | 60 ) |
61 | 61 |
62 type cpuprofEntry struct { | 62 type cpuprofEntry struct { |
63 count uintptr | 63 count uintptr |
64 depth uintptr | 64 depth uintptr |
65 » stack [maxStack]uintptr | 65 » stack [maxCPUProfStack]uintptr |
66 } | 66 } |
67 | 67 |
68 type cpuProfile struct { | 68 type cpuProfile struct { |
69 on bool // profiling is on | 69 on bool // profiling is on |
70 wait note // goroutine waits here | 70 wait note // goroutine waits here |
71 count uintptr // tick count | 71 count uintptr // tick count |
72 evicts uintptr // eviction count | 72 evicts uintptr // eviction count |
73 lost uintptr // lost ticks that need to be logged | 73 lost uintptr // lost ticks that need to be logged |
74 | 74 |
75 // Active recent stack traces. | 75 // Active recent stack traces. |
(...skipping 18 matching lines...) Expand all Loading... |
94 eodSent bool // special end-of-data record sent; => flushing | 94 eodSent bool // special end-of-data record sent; => flushing |
95 } | 95 } |
96 | 96 |
97 var ( | 97 var ( |
98 cpuprofLock mutex | 98 cpuprofLock mutex |
99 cpuprof *cpuProfile | 99 cpuprof *cpuProfile |
100 | 100 |
101 eod = [3]uintptr{0, 1, 0} | 101 eod = [3]uintptr{0, 1, 0} |
102 ) | 102 ) |
103 | 103 |
104 func setcpuprofilerate(int32) | 104 func setcpuprofilerate(int32) // proc.c |
105 | 105 |
106 // lostProfileData is a no-op function used in profiles | 106 // lostProfileData is a no-op function used in profiles |
107 // to mark the number of profiling stack traces that were | 107 // to mark the number of profiling stack traces that were |
108 // discarded due to slow data writers. | 108 // discarded due to slow data writers. |
109 func lostProfileData() {} | 109 func lostProfileData() {} |
110 | 110 |
111 // SetCPUProfileRate sets the CPU profiling rate to hz samples per second. | 111 // SetCPUProfileRate sets the CPU profiling rate to hz samples per second. |
112 // If hz <= 0, SetCPUProfileRate turns off profiling. | 112 // If hz <= 0, SetCPUProfileRate turns off profiling. |
113 // If the profiler is on, the rate cannot be changed without first turning it of
f. | 113 // If the profiler is on, the rate cannot be changed without first turning it of
f. |
114 // | 114 // |
115 // Most clients should use the runtime/cpuprof package or | 115 // Most clients should use the runtime/pprof package or |
116 // the testing package's -test.cpuprofile flag instead of calling | 116 // the testing package's -test.cpuprofile flag instead of calling |
117 // SetCPUProfileRate directly. | 117 // SetCPUProfileRate directly. |
118 func SetCPUProfileRate(hz int) { | 118 func SetCPUProfileRate(hz int) { |
119 // Clamp hz to something reasonable. | 119 // Clamp hz to something reasonable. |
120 if hz < 0 { | 120 if hz < 0 { |
121 hz = 0 | 121 hz = 0 |
122 } | 122 } |
123 if hz > 1000000 { | 123 if hz > 1000000 { |
124 hz = 1000000 | 124 hz = 1000000 |
125 } | 125 } |
126 | 126 |
127 lock(&cpuprofLock) | 127 lock(&cpuprofLock) |
128 if hz > 0 { | 128 if hz > 0 { |
129 if cpuprof == nil { | 129 if cpuprof == nil { |
130 » » » cpuprof = &cpuProfile{} | 130 » » » cpuprof = (*cpuProfile)(sysAlloc(unsafe.Sizeof(cpuProfil
e{}), &memstats.other_sys)) |
| 131 » » » if cpuprof == nil { |
| 132 » » » » print("runtime: cpu profiling cannot allocate me
mory\n") |
| 133 » » » » unlock(&cpuprofLock) |
| 134 » » » » return |
| 135 » » » } |
131 } | 136 } |
132 if cpuprof.on || cpuprof.handoff != 0 { | 137 if cpuprof.on || cpuprof.handoff != 0 { |
133 » » » printstring("runtime: cannot set cpu profile rate until
previous profile has finished.\n") | 138 » » » print("runtime: cannot set cpu profile rate until previo
us profile has finished.\n") |
134 unlock(&cpuprofLock) | 139 unlock(&cpuprofLock) |
135 return | 140 return |
136 } | 141 } |
137 | 142 |
138 cpuprof.on = true | 143 cpuprof.on = true |
139 // pprof binary header format. | 144 // pprof binary header format. |
140 // http://code.google.com/p/google-perftools/source/browse/trunk
/src/profiledata.cc#117 | 145 // http://code.google.com/p/google-perftools/source/browse/trunk
/src/profiledata.cc#117 |
141 p := &cpuprof.log[0] | 146 p := &cpuprof.log[0] |
142 p[0] = 0 // count for header | 147 p[0] = 0 // count for header |
143 p[1] = 3 // depth for header | 148 p[1] = 3 // depth for header |
(...skipping 11 matching lines...) Expand all Loading... |
155 setcpuprofilerate(int32(hz)) | 160 setcpuprofilerate(int32(hz)) |
156 } else if cpuprof != nil && cpuprof.on { | 161 } else if cpuprof != nil && cpuprof.on { |
157 setcpuprofilerate(0) | 162 setcpuprofilerate(0) |
158 cpuprof.on = false | 163 cpuprof.on = false |
159 | 164 |
160 // Now add is not running anymore, and getprofile owns the entir
e log. | 165 // Now add is not running anymore, and getprofile owns the entir
e log. |
161 // Set the high bit in prof->handoff to tell getprofile. | 166 // Set the high bit in prof->handoff to tell getprofile. |
162 for { | 167 for { |
163 n := cpuprof.handoff | 168 n := cpuprof.handoff |
164 if n&0x80000000 != 0 { | 169 if n&0x80000000 != 0 { |
165 » » » » printstring("runtime: setcpuprofile(off) twice") | 170 » » » » print("runtime: setcpuprofile(off) twice\n") |
166 } | 171 } |
167 if cas(&cpuprof.handoff, n, n|0x80000000) { | 172 if cas(&cpuprof.handoff, n, n|0x80000000) { |
168 if n == 0 { | 173 if n == 0 { |
169 // we did the transition from 0 -> nonze
ro so we wake getprofile | 174 // we did the transition from 0 -> nonze
ro so we wake getprofile |
170 notewakeup(&cpuprof.wait) | 175 notewakeup(&cpuprof.wait) |
171 } | 176 } |
172 break | 177 break |
173 } | 178 } |
174 } | 179 } |
175 } | 180 } |
176 unlock(&cpuprofLock) | 181 unlock(&cpuprofLock) |
177 } | 182 } |
178 | 183 |
179 func cpuproftick(pc *uintptr, n int32) { | 184 func cpuproftick(pc *uintptr, n int32) { |
180 » if n > maxStack { | 185 » if n > maxCPUProfStack { |
181 » » n = maxStack | 186 » » n = maxCPUProfStack |
182 » } | 187 » } |
183 » s := (*[maxStack]uintptr)(unsafe.Pointer(pc))[:n] | 188 » s := (*[maxCPUProfStack]uintptr)(unsafe.Pointer(pc))[:n] |
184 cpuprof.add(s) | 189 cpuprof.add(s) |
185 } | 190 } |
186 | 191 |
187 // add adds the stack trace to the profile. | 192 // add adds the stack trace to the profile. |
188 // It is called from signal handlers and other limited environments | 193 // It is called from signal handlers and other limited environments |
189 // and cannot allocate memory or acquire locks that might be | 194 // and cannot allocate memory or acquire locks that might be |
190 // held at the time of the signal, nor can it use substantial amounts | 195 // held at the time of the signal, nor can it use substantial amounts |
191 // of stack. It is allowed to call evict. | 196 // of stack. It is allowed to call evict. |
192 func (p *cpuProfile) add(pc []uintptr) { | 197 func (p *cpuProfile) add(pc []uintptr) { |
193 // Compute hash. | 198 // Compute hash. |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
227 // Could not evict entry. Record lost stack. | 232 // Could not evict entry. Record lost stack. |
228 p.lost++ | 233 p.lost++ |
229 return | 234 return |
230 } | 235 } |
231 p.evicts++ | 236 p.evicts++ |
232 } | 237 } |
233 | 238 |
234 // Reuse the newly evicted entry. | 239 // Reuse the newly evicted entry. |
235 e.depth = uintptr(len(pc)) | 240 e.depth = uintptr(len(pc)) |
236 e.count = 1 | 241 e.count = 1 |
237 » copy(e.stack[:], pc) | 242 » for i := range pc { |
| 243 » » e.stack[i] = pc[i] |
| 244 » } |
238 } | 245 } |
239 | 246 |
240 // evict copies the given entry's data into the log, so that | 247 // evict copies the given entry's data into the log, so that |
241 // the entry can be reused. evict is called from add, which | 248 // the entry can be reused. evict is called from add, which |
242 // is called from the profiling signal handler, so it must not | 249 // is called from the profiling signal handler, so it must not |
243 // allocate memory or block. It is safe to call flushlog. | 250 // allocate memory or block. It is safe to call flushlog. |
244 // evict returns true if the entry was copied to the log, | 251 // evict returns true if the entry was copied to the log, |
245 // false if there was no room available. | 252 // false if there was no room available. |
246 func (p *cpuProfile) evict(e *cpuprofEntry) bool { | 253 func (p *cpuProfile) evict(e *cpuprofEntry) bool { |
247 d := e.depth | 254 d := e.depth |
248 nslot := d + 2 | 255 nslot := d + 2 |
249 log := &p.log[p.toggle] | 256 log := &p.log[p.toggle] |
250 if p.nlog+nslot > uintptr(len(p.log[0])) { | 257 if p.nlog+nslot > uintptr(len(p.log[0])) { |
251 if !p.flushlog() { | 258 if !p.flushlog() { |
252 return false | 259 return false |
253 } | 260 } |
254 log = &p.log[p.toggle] | 261 log = &p.log[p.toggle] |
255 } | 262 } |
256 | 263 |
257 q := p.nlog | 264 q := p.nlog |
258 log[q] = e.count | 265 log[q] = e.count |
259 » log[q+1] = d | 266 » q++ |
| 267 » log[q] = d |
| 268 » q++ |
260 for i := uintptr(0); i < d; i++ { | 269 for i := uintptr(0); i < d; i++ { |
261 » » log[q+2+i] = e.stack[i] | 270 » » log[q] = e.stack[i] |
262 » } | 271 » » q++ |
263 » p.nlog = q + 2 + d | 272 » } |
| 273 » p.nlog = q |
264 e.count = 0 | 274 e.count = 0 |
265 return true | 275 return true |
266 } | 276 } |
267 | 277 |
268 // flushlog tries to flush the current log and switch to the other one. | 278 // flushlog tries to flush the current log and switch to the other one. |
269 // flushlog is called from evict, called from add, called from the signal handle
r, | 279 // flushlog is called from evict, called from add, called from the signal handle
r, |
270 // so it cannot allocate memory or block. It can try to swap logs with | 280 // so it cannot allocate memory or block. It can try to swap logs with |
271 // the writing goroutine, as explained in the comment at the top of this file. | 281 // the writing goroutine, as explained in the comment at the top of this file. |
272 func (p *cpuProfile) flushlog() bool { | 282 func (p *cpuProfile) flushlog() bool { |
273 if !cas(&p.handoff, 0, uint32(p.nlog)) { | 283 if !cas(&p.handoff, 0, uint32(p.nlog)) { |
(...skipping 23 matching lines...) Expand all Loading... |
297 if p == nil { | 307 if p == nil { |
298 return nil | 308 return nil |
299 } | 309 } |
300 | 310 |
301 if p.wholding { | 311 if p.wholding { |
302 // Release previous log to signal handling side. | 312 // Release previous log to signal handling side. |
303 // Loop because we are racing against SetCPUProfileRate(0). | 313 // Loop because we are racing against SetCPUProfileRate(0). |
304 for { | 314 for { |
305 n := p.handoff | 315 n := p.handoff |
306 if n == 0 { | 316 if n == 0 { |
307 » » » » printstring("runtime: phase error during cpu pro
file handoff\n") | 317 » » » » print("runtime: phase error during cpu profile h
andoff\n") |
308 return nil | 318 return nil |
309 } | 319 } |
310 if n&0x80000000 != 0 { | 320 if n&0x80000000 != 0 { |
311 p.wtoggle = 1 - p.wtoggle | 321 p.wtoggle = 1 - p.wtoggle |
312 p.wholding = false | 322 p.wholding = false |
313 p.flushing = true | 323 p.flushing = true |
314 » » » » goto flush | 324 » » » » goto Flush |
315 } | 325 } |
316 if cas(&p.handoff, n, 0) { | 326 if cas(&p.handoff, n, 0) { |
317 break | 327 break |
318 } | 328 } |
319 } | 329 } |
320 p.wtoggle = 1 - p.wtoggle | 330 p.wtoggle = 1 - p.wtoggle |
321 p.wholding = false | 331 p.wholding = false |
322 } | 332 } |
323 | 333 |
324 if p.flushing { | 334 if p.flushing { |
325 » » goto flush | 335 » » goto Flush |
326 } | 336 } |
327 | 337 |
328 if !p.on && p.handoff == 0 { | 338 if !p.on && p.handoff == 0 { |
329 return nil | 339 return nil |
330 } | 340 } |
331 | 341 |
332 // Wait for new log. | 342 // Wait for new log. |
333 notetsleepg(&p.wait, -1) | 343 notetsleepg(&p.wait, -1) |
334 noteclear(&p.wait) | 344 noteclear(&p.wait) |
335 | 345 |
336 » { | 346 » switch n := p.handoff; { |
337 » » n := p.handoff | 347 » case n == 0: |
338 » » if n == 0 { | 348 » » print("runtime: phase error during cpu profile wait\n") |
339 » » » printstring("runtime: phase error during cpu profile wai
t\n") | 349 » » return nil |
340 » » » return nil | 350 » case n == 0x80000000: |
341 » » } | 351 » » p.flushing = true |
342 » » if n == 0x80000000 { | 352 » » goto Flush |
343 » » » p.flushing = true | 353 » default: |
344 » » » goto flush | |
345 » » } | |
346 n &^= 0x80000000 | 354 n &^= 0x80000000 |
347 | 355 |
348 // Return new log to caller. | 356 // Return new log to caller. |
349 p.wholding = true | 357 p.wholding = true |
350 | 358 |
351 » » return asByteSlice(p.log[p.wtoggle][:n]) | 359 » » return uintptrBytes(p.log[p.wtoggle][:n]) |
352 } | 360 } |
353 | 361 |
354 // In flush mode. | 362 // In flush mode. |
355 // Add is no longer being called. We own the log. | 363 // Add is no longer being called. We own the log. |
356 // Also, p->handoff is non-zero, so flushlog will return false. | 364 // Also, p->handoff is non-zero, so flushlog will return false. |
357 // Evict the hash table into the log and return it. | 365 // Evict the hash table into the log and return it. |
358 flush: | 366 Flush: |
359 for i := range p.hash { | 367 for i := range p.hash { |
360 b := &p.hash[i] | 368 b := &p.hash[i] |
361 for j := range b.entry { | 369 for j := range b.entry { |
362 e := &b.entry[j] | 370 e := &b.entry[j] |
363 if e.count > 0 && !p.evict(e) { | 371 if e.count > 0 && !p.evict(e) { |
364 // Filled the log. Stop the loop and return wha
t we've got. | 372 // Filled the log. Stop the loop and return wha
t we've got. |
365 » » » » break flush | 373 » » » » break Flush |
366 } | 374 } |
367 } | 375 } |
368 } | 376 } |
369 | 377 |
370 // Return pending log data. | 378 // Return pending log data. |
371 if p.nlog > 0 { | 379 if p.nlog > 0 { |
372 // Note that we're using toggle now, not wtoggle, | 380 // Note that we're using toggle now, not wtoggle, |
373 // because we're working on the log directly. | 381 // because we're working on the log directly. |
374 n := p.nlog | 382 n := p.nlog |
375 p.nlog = 0 | 383 p.nlog = 0 |
376 » » return asByteSlice(p.log[p.toggle][:n]) | 384 » » return uintptrBytes(p.log[p.toggle][:n]) |
377 } | 385 } |
378 | 386 |
379 // Made it through the table without finding anything to log. | 387 // Made it through the table without finding anything to log. |
380 if !p.eodSent { | 388 if !p.eodSent { |
381 // We may not have space to append this to the partial log buf, | 389 // We may not have space to append this to the partial log buf, |
382 // so we always return a new slice for the end-of-data marker. | 390 // so we always return a new slice for the end-of-data marker. |
383 p.eodSent = true | 391 p.eodSent = true |
384 » » return asByteSlice(eod[:]) | 392 » » return uintptrBytes(eod[:]) |
385 } | 393 } |
386 | 394 |
387 // Finally done. Clean up and return nil. | 395 // Finally done. Clean up and return nil. |
388 p.flushing = false | 396 p.flushing = false |
389 if !cas(&p.handoff, p.handoff, 0) { | 397 if !cas(&p.handoff, p.handoff, 0) { |
390 » » printstring("runtime: profile flush racing with something\n") | 398 » » print("runtime: profile flush racing with something\n") |
391 } | 399 } |
392 return nil | 400 return nil |
393 } | 401 } |
394 | 402 |
395 func asByteSlice(p []uintptr) (ret []byte) { | 403 func uintptrBytes(p []uintptr) (ret []byte) { |
396 pp := (*sliceStruct)(unsafe.Pointer(&p)) | 404 pp := (*sliceStruct)(unsafe.Pointer(&p)) |
397 rp := (*sliceStruct)(unsafe.Pointer(&ret)) | 405 rp := (*sliceStruct)(unsafe.Pointer(&ret)) |
398 | 406 |
399 rp.array = pp.array | 407 rp.array = pp.array |
400 rp.len = pp.len * int(unsafe.Sizeof(p[0])) | 408 rp.len = pp.len * int(unsafe.Sizeof(p[0])) |
401 rp.cap = rp.len | 409 rp.cap = rp.len |
402 | 410 |
403 return | 411 return |
404 } | 412 } |
405 | 413 |
406 // CPUProfile returns the next chunk of binary CPU profiling stack trace data, | 414 // CPUProfile returns the next chunk of binary CPU profiling stack trace data, |
407 // blocking until data is available. If profiling is turned off and all the pro
file | 415 // blocking until data is available. If profiling is turned off and all the pro
file |
408 // data accumulated while it was on has been returned, CPUProfile returns nil. | 416 // data accumulated while it was on has been returned, CPUProfile returns nil. |
409 // The caller must save the returned data before calling CPUProfile again. | 417 // The caller must save the returned data before calling CPUProfile again. |
410 // | 418 // |
411 // Most clients should use the runtime/cpuprof package or | 419 // Most clients should use the runtime/pprof package or |
412 // the testing package's -test.cpuprofile flag instead of calling | 420 // the testing package's -test.cpuprofile flag instead of calling |
413 // CPUProfile directly. | 421 // CPUProfile directly. |
414 func CPUProfile() []byte { | 422 func CPUProfile() []byte { |
415 return cpuprof.getprofile() | 423 return cpuprof.getprofile() |
416 } | 424 } |
LEFT | RIGHT |