Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(619)

Delta Between Two Patch Sets: src/pkg/exp/ssa/lift.go

Issue 7229074: code review 7229074: exp/ssa: build fully pruned SSA form. (Closed)
Left Patch Set: diff -r dd18b993ba95 https://go.googlecode.com/hg/ Created 11 years, 1 month ago
Right Patch Set: diff -r 734059df2768 https://code.google.com/p/go/ Created 11 years, 1 month ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « src/pkg/exp/ssa/func.go ('k') | src/pkg/exp/ssa/literal.go » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 package ssa 1 package ssa
2 2
3 // This file defines the lifting pass which tries to "lift" Alloc 3 // This file defines the lifting pass which tries to "lift" Alloc
4 // cells (new/local variables) into SSA registers, replacing loads 4 // cells (new/local variables) into SSA registers, replacing loads
5 // with the dominating stored value, eliminating loads and stores, and 5 // with the dominating stored value, eliminating loads and stores, and
6 // inserting φ-nodes as needed. 6 // inserting φ-nodes as needed.
7 7
8 // Cited papers and resources: 8 // Cited papers and resources:
9 // 9 //
10 // Ron Cytron et al. 1991. Efficiently computing SSA form... 10 // Ron Cytron et al. 1991. Efficiently computing SSA form...
11 // http://doi.acm.org/10.1145/115372.115320 11 // http://doi.acm.org/10.1145/115372.115320
12 // 12 //
13 // Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm. 13 // Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm.
14 // Software Practice and Experience 2001, 4:1-10. 14 // Software Practice and Experience 2001, 4:1-10.
15 // http://www.hipersoft.rice.edu/grads/publications/dom14.pdf 15 // http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
16 // 16 //
17 // Daniel Berlin, llvmdev mailing list, 2012. 17 // Daniel Berlin, llvmdev mailing list, 2012.
18 // http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html 18 // http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html
19 // (Be sure to expand the whole thread.) 19 // (Be sure to expand the whole thread.)
20 20
21 // TODO(adonovan): there are many optimisations worth evaluating, and 21 // TODO(adonovan): there are many optimizations worth evaluating, and
22 // the conventional wisdom for SSA construction is that a simple 22 // the conventional wisdom for SSA construction is that a simple
23 // algorithm well engineered often beats those of better asymptotic 23 // algorithm well engineered often beats those of better asymptotic
24 // complexity on all but the most egregious inputs. 24 // complexity on all but the most egregious inputs.
25 // 25 //
26 // Danny Berlin suggests that the Cooper et al. algorithm for 26 // Danny Berlin suggests that the Cooper et al. algorithm for
27 // computing the dominance frontier is superior to Cytron et al. 27 // computing the dominance frontier is superior to Cytron et al.
28 // Furthermore he recommends that rather than computing the DF for the 28 // Furthermore he recommends that rather than computing the DF for the
29 // whole function then renaming all alloc cells, it may be cheaper to 29 // whole function then renaming all alloc cells, it may be cheaper to
30 // compute the DF for each alloc cell separately and throw it away. 30 // compute the DF for each alloc cell separately and throw it away.
31 // 31 //
(...skipping 24 matching lines...) Expand all
56 // 56 //
57 // TODO(adonovan): opt: measure impact of dups; consider a packed bit 57 // TODO(adonovan): opt: measure impact of dups; consider a packed bit
58 // representation, e.g. big.Int, and bitwise parallel operations for 58 // representation, e.g. big.Int, and bitwise parallel operations for
59 // the union step in the Children loop. 59 // the union step in the Children loop.
60 // 60 //
61 // domFrontier's methods mutate the slice's elements but not its 61 // domFrontier's methods mutate the slice's elements but not its
62 // length, so their receivers needn't be pointers. 62 // length, so their receivers needn't be pointers.
63 // 63 //
64 type domFrontier [][]*BasicBlock 64 type domFrontier [][]*BasicBlock
65 65
66 func (df domFrontier) add(u, v *domTree) { 66 func (df domFrontier) add(u, v *domNode) {
67 p := &df[u.Block.Index] 67 p := &df[u.Block.Index]
68 *p = append(*p, v.Block) 68 *p = append(*p, v.Block)
69 } 69 }
70 70
71 // build builds the dominance frontier df for the dominator (sub)tree 71 // build builds the dominance frontier df for the dominator (sub)tree
72 // rooted at u, using the Cytron et al. algorithm. 72 // rooted at u, using the Cytron et al. algorithm.
73 // 73 //
74 // TODO(adonovan): opt: consider Berlin approach, computing pruned SSA 74 // TODO(adonovan): opt: consider Berlin approach, computing pruned SSA
75 // by pruning the entire IDF computation, rather than merely pruning 75 // by pruning the entire IDF computation, rather than merely pruning
76 // the DF -> IDF step. 76 // the DF -> IDF step.
77 func (df domFrontier) build(u *domTree) { 77 func (df domFrontier) build(u *domNode) {
78 // Encounter each node u in postorder of dom tree. 78 // Encounter each node u in postorder of dom tree.
79 for _, child := range u.Children { 79 for _, child := range u.Children {
80 df.build(child) 80 df.build(child)
81 } 81 }
82 for _, vb := range u.Block.Succs { 82 for _, vb := range u.Block.Succs {
83 if v := vb.dom; v.Idom != u { 83 if v := vb.dom; v.Idom != u {
84 df.add(u, v) 84 df.add(u, v)
85 } 85 }
86 } 86 }
87 for _, w := range u.Children { 87 for _, w := range u.Children {
(...skipping 14 matching lines...) Expand all
102 102
103 // lift attempts to replace local and new Allocs accessed only with 103 // lift attempts to replace local and new Allocs accessed only with
104 // load/store by SSA registers, inserting φ-nodes where necessary. 104 // load/store by SSA registers, inserting φ-nodes where necessary.
105 // The result is a program in classical pruned SSA form. 105 // The result is a program in classical pruned SSA form.
106 // 106 //
107 // Preconditions: 107 // Preconditions:
108 // - fn has no dead blocks (blockopt has run). 108 // - fn has no dead blocks (blockopt has run).
109 // - Def/use info (Operands and Referrers) is up-to-date. 109 // - Def/use info (Operands and Referrers) is up-to-date.
110 // 110 //
111 func lift(fn *Function) { 111 func lift(fn *Function) {
112 » // TODO(adonovan): opt: lots of little optimisations may be 112 » // TODO(adonovan): opt: lots of little optimizations may be
113 // worthwhile here, especially if they cause us to avoid 113 // worthwhile here, especially if they cause us to avoid
114 // buildDomTree. For example: 114 // buildDomTree. For example:
115 // 115 //
116 // - Alloc never loaded? Eliminate. 116 // - Alloc never loaded? Eliminate.
117 // - Alloc never stored? Replace all loads with a zero literal. 117 // - Alloc never stored? Replace all loads with a zero literal.
118 // - Alloc stored once? Replace loads with dominating store; 118 // - Alloc stored once? Replace loads with dominating store;
119 // don't forget that an Alloc is itself an effective store 119 // don't forget that an Alloc is itself an effective store
120 // of zero. 120 // of zero.
121 // - Alloc used only within a single block? 121 // - Alloc used only within a single block?
122 // Use degenerate algorithm avoiding φ-nodes. 122 // Use degenerate algorithm avoiding φ-nodes.
(...skipping 18 matching lines...) Expand all
141 fmt.Fprintln(os.Stderr, "Dominance front ier:") 141 fmt.Fprintln(os.Stderr, "Dominance front ier:")
142 title = true 142 title = true
143 } 143 }
144 fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i ], blocks) 144 fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i ], blocks)
145 } 145 }
146 } 146 }
147 } 147 }
148 148
149 newPhis := make(newPhiMap) 149 newPhis := make(newPhiMap)
150 150
151 » // During this pass we will replace deleted some 151 » // During this pass we will replace some BasicBlock.Instrs
152 » // BasicBlock.Instrs (allocs, loads and stores) with nil, 152 » // (allocs, loads and stores) with nil, keeping a count in
153 » // keeping a count in BasicBlock.gaps. 153 » // BasicBlock.gaps. At the end we will reset Instrs to the
154 » // At the end we will reset Instrs to the concatenation of all 154 » // concatenation of all non-dead newPhis and non-nil Instrs
155 » // non-dead newPhis and non-nil Instrs for the block, reusing 155 » // for the block, reusing the original array if space permits.
156 » // the original array if space permits.
157 156
158 // Determine which allocs we can lift and number them densely. 157 // Determine which allocs we can lift and number them densely.
159 // The renaming phase uses this numbering for compact maps. 158 // The renaming phase uses this numbering for compact maps.
160 numAllocs := 0 159 numAllocs := 0
161 for _, b := range fn.Blocks { 160 for _, b := range fn.Blocks {
162 b.gaps = 0 161 b.gaps = 0
163 for i, instr := range b.Instrs { 162 for i, instr := range b.Instrs {
164 if alloc, ok := instr.(*Alloc); ok { 163 if alloc, ok := instr.(*Alloc); ok {
165 if liftAlloc(df, alloc, newPhis) { 164 if liftAlloc(df, alloc, newPhis) {
166 alloc.index = numAllocs 165 alloc.index = numAllocs
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
249 if s.Bit(i) != 0 { 248 if s.Bit(i) != 0 {
250 return false 249 return false
251 } 250 }
252 s.SetBit(&s.Int, i, 1) 251 s.SetBit(&s.Int, i, 1)
253 return true 252 return true
254 } 253 }
255 254
256 // take removes an arbitrary element from a set s and 255 // take removes an arbitrary element from a set s and
257 // returns its index, or returns -1 if empty. 256 // returns its index, or returns -1 if empty.
258 // 257 //
259 // TODO(adonovan): add this method (optimised) to big.Int. 258 // TODO(adonovan): add this method (optimized) to big.Int.
260 func (s *blockSet) take() int { 259 func (s *blockSet) take() int {
261 l := s.BitLen() 260 l := s.BitLen()
262 for i := 0; i < l; i++ { 261 for i := 0; i < l; i++ {
263 if s.Bit(i) == 1 { 262 if s.Bit(i) == 1 {
264 s.SetBit(&s.Int, i, 0) 263 s.SetBit(&s.Int, i, 0)
265 return i 264 return i
266 } 265 }
267 } 266 }
268 return -1 267 return -1
269 } 268 }
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after
389 } 388 }
390 } 389 }
391 if pyrefs != nil { 390 if pyrefs != nil {
392 *pyrefs = append(*pyrefs, instr) // dups ok 391 *pyrefs = append(*pyrefs, instr) // dups ok
393 } 392 }
394 } 393 }
395 *pxrefs = nil // x is now unreferenced 394 *pxrefs = nil // x is now unreferenced
396 } 395 }
397 396
398 // renamed returns the value to which alloc is being renamed, 397 // renamed returns the value to which alloc is being renamed,
399 // constructing it lazily if it's the implicit zero initialization. 398 // constructing it lazily if it's the implicit zero initialization.
400 // 399 //
401 func renamed(renaming []Value, alloc *Alloc) Value { 400 func renamed(renaming []Value, alloc *Alloc) Value {
402 v := renaming[alloc.index] 401 v := renaming[alloc.index]
403 if v == nil { 402 if v == nil {
404 v = zeroLiteral(indirectType(alloc.Type())) 403 v = zeroLiteral(indirectType(alloc.Type()))
405 renaming[alloc.index] = v 404 renaming[alloc.index] = v
406 } 405 }
407 return v 406 return v
408 } 407 }
409 408
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
478 phi.Edges[i] = newval 477 phi.Edges[i] = newval
479 if prefs := newval.Referrers(); prefs != nil { 478 if prefs := newval.Referrers(); prefs != nil {
480 *prefs = append(*prefs, phi) 479 *prefs = append(*prefs, phi)
481 } 480 }
482 } 481 }
483 } 482 }
484 483
485 // Continue depth-first recursion over domtree, pushing a 484 // Continue depth-first recursion over domtree, pushing a
486 // fresh copy of the renaming map for each subtree. 485 // fresh copy of the renaming map for each subtree.
487 for _, v := range u.dom.Children { 486 for _, v := range u.dom.Children {
488 » » // TODO(adonovan): opt: avoid copy if len(u.dom.Children) < 2. 487 » » // TODO(adonovan): opt: avoid copy on final iteration; use destr uctive update.
489 r := make([]Value, len(renaming)) 488 r := make([]Value, len(renaming))
490 copy(r, renaming) 489 copy(r, renaming)
491 rename(v.Block, r, newPhis) 490 rename(v.Block, r, newPhis)
492 } 491 }
493 } 492 }
LEFTRIGHT

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b