Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(9)

Side by Side Diff: src/pkg/runtime/cpuprof.goc

Issue 160200044: [dev.power64] code review 160200044: build: merge default into dev.power64 (Closed)
Patch Set: diff -r be0c14f62257b42485019e9e1db23cf40d2e249f https://code.google.com/p/go Created 10 years, 4 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/pkg/runtime/cpuprof.go ('k') | src/pkg/runtime/debug.go » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // CPU profiling.
6 // Based on algorithms and data structures used in
7 // http://code.google.com/p/google-perftools/.
8 //
9 // The main difference between this code and the google-perftools
10 // code is that this code is written to allow copying the profile data
11 // to an arbitrary io.Writer, while the google-perftools code always
12 // writes to an operating system file.
13 //
14 // The signal handler for the profiling clock tick adds a new stack trace
15 // to a hash table tracking counts for recent traces. Most clock ticks
16 // hit in the cache. In the event of a cache miss, an entry must be·
17 // evicted from the hash table, copied to a log that will eventually be
18 // written as profile data. The google-perftools code flushed the
19 // log itself during the signal handler. This code cannot do that, because
20 // the io.Writer might block or need system calls or locks that are not
21 // safe to use from within the signal handler. Instead, we split the log
22 // into two halves and let the signal handler fill one half while a goroutine
23 // is writing out the other half. When the signal handler fills its half, it
24 // offers to swap with the goroutine. If the writer is not done with its half,
25 // we lose the stack trace for this clock tick (and record that loss).
26 // The goroutine interacts with the signal handler by calling getprofile() to
27 // get the next log piece to write, implicitly handing back the last log
28 // piece it obtained.
29 //
30 // The state of this dance between the signal handler and the goroutine
31 // is encoded in the Profile.handoff field. If handoff == 0, then the goroutine
32 // is not using either log half and is waiting (or will soon be waiting) for
33 // a new piece by calling notesleep(&p->wait). If the signal handler
34 // changes handoff from 0 to non-zero, it must call notewakeup(&p->wait)
35 // to wake the goroutine. The value indicates the number of entries in the
36 // log half being handed off. The goroutine leaves the non-zero value in
37 // place until it has finished processing the log half and then flips the number
38 // back to zero. Setting the high bit in handoff means that the profiling is ov er,·
39 // and the goroutine is now in charge of flushing the data left in the hash tabl e
40 // to the log and returning that data.··
41 //
42 // The handoff field is manipulated using atomic operations.
43 // For the most part, the manipulation of handoff is orderly: if handoff == 0
44 // then the signal handler owns it and can change it to non-zero.··
45 // If handoff != 0 then the goroutine owns it and can change it to zero.
46 // If that were the end of the story then we would not need to manipulate
47 // handoff using atomic operations. The operations are needed, however,
48 // in order to let the log closer set the high bit to indicate "EOF" safely
49 // in the situation when normally the goroutine "owns" handoff.
50
51 package runtime
52 #include "runtime.h"
53 #include "arch_GOARCH.h"
54 #include "malloc.h"
55
56 enum
57 {
58 HashSize = 1<<10,
59 LogSize = 1<<17,
60 Assoc = 4,
61 MaxStack = 64,
62 };
63
64 typedef struct Profile Profile;
65 typedef struct Bucket Bucket;
66 typedef struct Entry Entry;
67
68 struct Entry {
69 uintptr count;
70 uintptr depth;
71 uintptr stack[MaxStack];
72 };
73
74 struct Bucket {
75 Entry entry[Assoc];
76 };
77
78 struct Profile {
79 bool on; // profiling is on
80 Note wait; // goroutine waits here
81 uintptr count; // tick count
82 uintptr evicts; // eviction count
83 uintptr lost; // lost ticks that need to be logged
84
85 // Active recent stack traces.
86 Bucket hash[HashSize];
87
88 // Log of traces evicted from hash.
89 // Signal handler has filled log[toggle][:nlog].
90 // Goroutine is writing log[1-toggle][:handoff].
91 uintptr log[2][LogSize/2];
92 uintptr nlog;
93 int32 toggle;
94 uint32 handoff;
95 ········
96 // Writer state.
97 // Writer maintains its own toggle to avoid races
98 // looking at signal handler's toggle.
99 uint32 wtoggle;
100 bool wholding; // holding & need to release a log half
101 bool flushing; // flushing hash table - profile is over
102 bool eod_sent; // special end-of-data record sent; => flushing
103 };
104
105 static Lock lk;
106 static Profile *prof;
107
108 static void tick(uintptr*, int32);
109 static void add(Profile*, uintptr*, int32);
110 static bool evict(Profile*, Entry*);
111 static bool flushlog(Profile*);
112
113 static uintptr eod[3] = {0, 1, 0};
114
115 // LostProfileData is a no-op function used in profiles
116 // to mark the number of profiling stack traces that were
117 // discarded due to slow data writers.
118 static void
119 LostProfileData(void)
120 {
121 }
122
123 // SetCPUProfileRate sets the CPU profiling rate.
124 // The user documentation is in debug.go.
125 void
126 runtime·SetCPUProfileRate(intgo hz)
127 {
128 uintptr *p;
129 uintptr n;
130
131 // Clamp hz to something reasonable.
132 if(hz < 0)
133 hz = 0;
134 if(hz > 1000000)
135 hz = 1000000;
136
137 runtime·lock(&lk);
138 if(hz > 0) {
139 if(prof == nil) {
140 prof = runtime·SysAlloc(sizeof *prof, &mstats.other_sys) ;
141 if(prof == nil) {
142 runtime·printf("runtime: cpu profiling cannot al locate memory\n");
143 runtime·unlock(&lk);
144 return;
145 }
146 }
147 if(prof->on || prof->handoff != 0) {
148 runtime·printf("runtime: cannot set cpu profile rate unt il previous profile has finished.\n");
149 runtime·unlock(&lk);
150 return;
151 }
152
153 prof->on = true;
154 p = prof->log[0];
155 // pprof binary header format.
156 // http://code.google.com/p/google-perftools/source/browse/trunk /src/profiledata.cc#117
157 *p++ = 0; // count for header
158 *p++ = 3; // depth for header
159 *p++ = 0; // version number
160 *p++ = 1000000 / hz; // period (microseconds)
161 *p++ = 0;
162 prof->nlog = p - prof->log[0];
163 prof->toggle = 0;
164 prof->wholding = false;
165 prof->wtoggle = 0;
166 prof->flushing = false;
167 prof->eod_sent = false;
168 runtime·noteclear(&prof->wait);
169
170 runtime·setcpuprofilerate(tick, hz);
171 } else if(prof != nil && prof->on) {
172 runtime·setcpuprofilerate(nil, 0);
173 prof->on = false;
174
175 // Now add is not running anymore, and getprofile owns the entir e log.
176 // Set the high bit in prof->handoff to tell getprofile.
177 for(;;) {
178 n = prof->handoff;
179 if(n&0x80000000)
180 runtime·printf("runtime: setcpuprofile(off) twic e");
181 if(runtime·cas(&prof->handoff, n, n|0x80000000))
182 break;
183 }
184 if(n == 0) {
185 // we did the transition from 0 -> nonzero so we wake ge tprofile
186 runtime·notewakeup(&prof->wait);
187 }
188 }
189 runtime·unlock(&lk);
190 }
191
192 static void
193 tick(uintptr *pc, int32 n)
194 {
195 add(prof, pc, n);
196 }
197
198 // add adds the stack trace to the profile.
199 // It is called from signal handlers and other limited environments
200 // and cannot allocate memory or acquire locks that might be
201 // held at the time of the signal, nor can it use substantial amounts
202 // of stack. It is allowed to call evict.
203 static void
204 add(Profile *p, uintptr *pc, int32 n)
205 {
206 int32 i, j;
207 uintptr h, x;
208 Bucket *b;
209 Entry *e;
210
211 if(n > MaxStack)
212 n = MaxStack;
213 ········
214 // Compute hash.
215 h = 0;
216 for(i=0; i<n; i++) {
217 h = h<<8 | (h>>(8*(sizeof(h)-1)));
218 x = pc[i];
219 h += x*31 + x*7 + x*3;
220 }
221 p->count++;
222
223 // Add to entry count if already present in table.
224 b = &p->hash[h%HashSize];
225 for(i=0; i<Assoc; i++) {
226 e = &b->entry[i];
227 if(e->depth != n)·······
228 continue;
229 for(j=0; j<n; j++)
230 if(e->stack[j] != pc[j])
231 goto ContinueAssoc;
232 e->count++;
233 return;
234 ContinueAssoc:;
235 }
236
237 // Evict entry with smallest count.
238 e = &b->entry[0];
239 for(i=1; i<Assoc; i++)
240 if(b->entry[i].count < e->count)
241 e = &b->entry[i];
242 if(e->count > 0) {
243 if(!evict(p, e)) {
244 // Could not evict entry. Record lost stack.
245 p->lost++;
246 return;
247 }
248 p->evicts++;
249 }
250 ········
251 // Reuse the newly evicted entry.
252 e->depth = n;
253 e->count = 1;
254 for(i=0; i<n; i++)
255 e->stack[i] = pc[i];
256 }
257
258 // evict copies the given entry's data into the log, so that
259 // the entry can be reused. evict is called from add, which
260 // is called from the profiling signal handler, so it must not
261 // allocate memory or block. It is safe to call flushLog.
262 // evict returns true if the entry was copied to the log,
263 // false if there was no room available.
264 static bool
265 evict(Profile *p, Entry *e)
266 {
267 int32 i, d, nslot;
268 uintptr *log, *q;
269 ········
270 d = e->depth;
271 nslot = d+2;
272 log = p->log[p->toggle];
273 if(p->nlog+nslot > nelem(p->log[0])) {
274 if(!flushlog(p))
275 return false;
276 log = p->log[p->toggle];
277 }
278 ········
279 q = log+p->nlog;
280 *q++ = e->count;
281 *q++ = d;
282 for(i=0; i<d; i++)
283 *q++ = e->stack[i];
284 p->nlog = q - log;
285 e->count = 0;
286 return true;
287 }
288
289 // flushlog tries to flush the current log and switch to the other one.
290 // flushlog is called from evict, called from add, called from the signal handle r,
291 // so it cannot allocate memory or block. It can try to swap logs with
292 // the writing goroutine, as explained in the comment at the top of this file.
293 static bool
294 flushlog(Profile *p)
295 {
296 uintptr *log, *q;
297
298 if(!runtime·cas(&p->handoff, 0, p->nlog))
299 return false;
300 runtime·notewakeup(&p->wait);
301
302 p->toggle = 1 - p->toggle;
303 log = p->log[p->toggle];
304 q = log;
305 if(p->lost > 0) {
306 *q++ = p->lost;
307 *q++ = 1;
308 *q++ = (uintptr)LostProfileData;
309 p->lost = 0;
310 }
311 p->nlog = q - log;
312 return true;
313 }
314
315 // getprofile blocks until the next block of profiling data is available
316 // and returns it as a []byte. It is called from the writing goroutine.
317 Slice
318 getprofile(Profile *p)
319 {
320 uint32 i, j, n;
321 Slice ret;
322 Bucket *b;
323 Entry *e;
324
325 ret.array = nil;
326 ret.len = 0;
327 ret.cap = 0;
328 ········
329 if(p == nil)····
330 return ret;
331
332 if(p->wholding) {
333 // Release previous log to signal handling side.
334 // Loop because we are racing against SetCPUProfileRate(0).
335 for(;;) {
336 n = p->handoff;
337 if(n == 0) {
338 runtime·printf("runtime: phase error during cpu profile handoff\n");
339 return ret;
340 }
341 if(n & 0x80000000) {
342 p->wtoggle = 1 - p->wtoggle;
343 p->wholding = false;
344 p->flushing = true;
345 goto flush;
346 }
347 if(runtime·cas(&p->handoff, n, 0))
348 break;
349 }
350 p->wtoggle = 1 - p->wtoggle;
351 p->wholding = false;
352 }
353 ········
354 if(p->flushing)
355 goto flush;
356 ········
357 if(!p->on && p->handoff == 0)
358 return ret;
359
360 // Wait for new log.
361 runtime·notetsleepg(&p->wait, -1);
362 runtime·noteclear(&p->wait);
363
364 n = p->handoff;
365 if(n == 0) {
366 runtime·printf("runtime: phase error during cpu profile wait\n") ;
367 return ret;
368 }
369 if(n == 0x80000000) {
370 p->flushing = true;
371 goto flush;
372 }
373 n &= ~0x80000000;
374
375 // Return new log to caller.
376 p->wholding = true;
377
378 ret.array = (byte*)p->log[p->wtoggle];
379 ret.len = n*sizeof(uintptr);
380 ret.cap = ret.len;
381 return ret;
382
383 flush:
384 // In flush mode.
385 // Add is no longer being called. We own the log.
386 // Also, p->handoff is non-zero, so flushlog will return false.
387 // Evict the hash table into the log and return it.
388 for(i=0; i<HashSize; i++) {
389 b = &p->hash[i];
390 for(j=0; j<Assoc; j++) {
391 e = &b->entry[j];
392 if(e->count > 0 && !evict(p, e)) {
393 // Filled the log. Stop the loop and return wha t we've got.
394 goto breakflush;
395 }
396 }
397 }
398 breakflush:
399
400 // Return pending log data.
401 if(p->nlog > 0) {
402 // Note that we're using toggle now, not wtoggle,
403 // because we're working on the log directly.
404 ret.array = (byte*)p->log[p->toggle];
405 ret.len = p->nlog*sizeof(uintptr);
406 ret.cap = ret.len;
407 p->nlog = 0;
408 return ret;
409 }
410
411 // Made it through the table without finding anything to log.
412 if(!p->eod_sent) {
413 // We may not have space to append this to the partial log buf,
414 // so we always return a new slice for the end-of-data marker.
415 p->eod_sent = true;
416 ret.array = (byte*)eod;
417 ret.len = sizeof eod;
418 ret.cap = ret.len;
419 return ret;
420 }
421
422 // Finally done. Clean up and return nil.
423 p->flushing = false;
424 if(!runtime·cas(&p->handoff, p->handoff, 0))
425 runtime·printf("runtime: profile flush racing with something\n") ;
426 return ret; // set to nil at top of function
427 }
428
429 // CPUProfile returns the next cpu profile block as a []byte.
430 // The user documentation is in debug.go.
431 func CPUProfile() (ret Slice) {
432 ret = getprofile(prof);
433 }
OLDNEW
« no previous file with comments | « src/pkg/runtime/cpuprof.go ('k') | src/pkg/runtime/debug.go » ('j') | no next file with comments »

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b