LEFT | RIGHT |
1 package ssa | 1 package ssa |
2 | 2 |
3 // This file defines the lifting pass which tries to "lift" Alloc | 3 // This file defines the lifting pass which tries to "lift" Alloc |
4 // cells (new/local variables) into SSA registers, replacing loads | 4 // cells (new/local variables) into SSA registers, replacing loads |
5 // with the dominating stored value, eliminating loads and stores, and | 5 // with the dominating stored value, eliminating loads and stores, and |
6 // inserting φ-nodes as needed. | 6 // inserting φ-nodes as needed. |
7 | 7 |
8 // Cited papers and resources: | 8 // Cited papers and resources: |
9 // | 9 // |
10 // Ron Cytron et al. 1991. Efficiently computing SSA form... | 10 // Ron Cytron et al. 1991. Efficiently computing SSA form... |
11 // http://doi.acm.org/10.1145/115372.115320 | 11 // http://doi.acm.org/10.1145/115372.115320 |
12 // | 12 // |
13 // Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm. | 13 // Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm. |
14 // Software Practice and Experience 2001, 4:1-10. | 14 // Software Practice and Experience 2001, 4:1-10. |
15 // http://www.hipersoft.rice.edu/grads/publications/dom14.pdf | 15 // http://www.hipersoft.rice.edu/grads/publications/dom14.pdf |
16 // | 16 // |
17 // Daniel Berlin, llvmdev mailing list, 2012. | 17 // Daniel Berlin, llvmdev mailing list, 2012. |
18 // http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html | 18 // http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html |
19 // (Be sure to expand the whole thread.) | 19 // (Be sure to expand the whole thread.) |
20 | 20 |
21 // TODO(adonovan): there are many optimisations worth evaluating, and | 21 // TODO(adonovan): there are many optimizations worth evaluating, and |
22 // the conventional wisdom for SSA construction is that a simple | 22 // the conventional wisdom for SSA construction is that a simple |
23 // algorithm well engineered often beats those of better asymptotic | 23 // algorithm well engineered often beats those of better asymptotic |
24 // complexity on all but the most egregious inputs. | 24 // complexity on all but the most egregious inputs. |
25 // | 25 // |
26 // Danny Berlin suggests that the Cooper et al. algorithm for | 26 // Danny Berlin suggests that the Cooper et al. algorithm for |
27 // computing the dominance frontier is superior to Cytron et al. | 27 // computing the dominance frontier is superior to Cytron et al. |
28 // Furthermore he recommends that rather than computing the DF for the | 28 // Furthermore he recommends that rather than computing the DF for the |
29 // whole function then renaming all alloc cells, it may be cheaper to | 29 // whole function then renaming all alloc cells, it may be cheaper to |
30 // compute the DF for each alloc cell separately and throw it away. | 30 // compute the DF for each alloc cell separately and throw it away. |
31 // | 31 // |
(...skipping 24 matching lines...) Expand all Loading... |
56 // | 56 // |
57 // TODO(adonovan): opt: measure impact of dups; consider a packed bit | 57 // TODO(adonovan): opt: measure impact of dups; consider a packed bit |
58 // representation, e.g. big.Int, and bitwise parallel operations for | 58 // representation, e.g. big.Int, and bitwise parallel operations for |
59 // the union step in the Children loop. | 59 // the union step in the Children loop. |
60 // | 60 // |
61 // domFrontier's methods mutate the slice's elements but not its | 61 // domFrontier's methods mutate the slice's elements but not its |
62 // length, so their receivers needn't be pointers. | 62 // length, so their receivers needn't be pointers. |
63 // | 63 // |
64 type domFrontier [][]*BasicBlock | 64 type domFrontier [][]*BasicBlock |
65 | 65 |
66 func (df domFrontier) add(u, v *domTree) { | 66 func (df domFrontier) add(u, v *domNode) { |
67 p := &df[u.Block.Index] | 67 p := &df[u.Block.Index] |
68 *p = append(*p, v.Block) | 68 *p = append(*p, v.Block) |
69 } | 69 } |
70 | 70 |
71 // build builds the dominance frontier df for the dominator (sub)tree | 71 // build builds the dominance frontier df for the dominator (sub)tree |
72 // rooted at u, using the Cytron et al. algorithm. | 72 // rooted at u, using the Cytron et al. algorithm. |
73 // | 73 // |
74 // TODO(adonovan): opt: consider Berlin approach, computing pruned SSA | 74 // TODO(adonovan): opt: consider Berlin approach, computing pruned SSA |
75 // by pruning the entire IDF computation, rather than merely pruning | 75 // by pruning the entire IDF computation, rather than merely pruning |
76 // the DF -> IDF step. | 76 // the DF -> IDF step. |
77 func (df domFrontier) build(u *domTree) { | 77 func (df domFrontier) build(u *domNode) { |
78 // Encounter each node u in postorder of dom tree. | 78 // Encounter each node u in postorder of dom tree. |
79 for _, child := range u.Children { | 79 for _, child := range u.Children { |
80 df.build(child) | 80 df.build(child) |
81 } | 81 } |
82 for _, vb := range u.Block.Succs { | 82 for _, vb := range u.Block.Succs { |
83 if v := vb.dom; v.Idom != u { | 83 if v := vb.dom; v.Idom != u { |
84 df.add(u, v) | 84 df.add(u, v) |
85 } | 85 } |
86 } | 86 } |
87 for _, w := range u.Children { | 87 for _, w := range u.Children { |
(...skipping 14 matching lines...) Expand all Loading... |
102 | 102 |
103 // lift attempts to replace local and new Allocs accessed only with | 103 // lift attempts to replace local and new Allocs accessed only with |
104 // load/store by SSA registers, inserting φ-nodes where necessary. | 104 // load/store by SSA registers, inserting φ-nodes where necessary. |
105 // The result is a program in classical pruned SSA form. | 105 // The result is a program in classical pruned SSA form. |
106 // | 106 // |
107 // Preconditions: | 107 // Preconditions: |
108 // - fn has no dead blocks (blockopt has run). | 108 // - fn has no dead blocks (blockopt has run). |
109 // - Def/use info (Operands and Referrers) is up-to-date. | 109 // - Def/use info (Operands and Referrers) is up-to-date. |
110 // | 110 // |
111 func lift(fn *Function) { | 111 func lift(fn *Function) { |
112 » // TODO(adonovan): opt: lots of little optimisations may be | 112 » // TODO(adonovan): opt: lots of little optimizations may be |
113 // worthwhile here, especially if they cause us to avoid | 113 // worthwhile here, especially if they cause us to avoid |
114 // buildDomTree. For example: | 114 // buildDomTree. For example: |
115 // | 115 // |
116 // - Alloc never loaded? Eliminate. | 116 // - Alloc never loaded? Eliminate. |
117 // - Alloc never stored? Replace all loads with a zero literal. | 117 // - Alloc never stored? Replace all loads with a zero literal. |
118 // - Alloc stored once? Replace loads with dominating store; | 118 // - Alloc stored once? Replace loads with dominating store; |
119 // don't forget that an Alloc is itself an effective store | 119 // don't forget that an Alloc is itself an effective store |
120 // of zero. | 120 // of zero. |
121 // - Alloc used only within a single block? | 121 // - Alloc used only within a single block? |
122 // Use degenerate algorithm avoiding φ-nodes. | 122 // Use degenerate algorithm avoiding φ-nodes. |
(...skipping 18 matching lines...) Expand all Loading... |
141 fmt.Fprintln(os.Stderr, "Dominance front
ier:") | 141 fmt.Fprintln(os.Stderr, "Dominance front
ier:") |
142 title = true | 142 title = true |
143 } | 143 } |
144 fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i
], blocks) | 144 fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i
], blocks) |
145 } | 145 } |
146 } | 146 } |
147 } | 147 } |
148 | 148 |
149 newPhis := make(newPhiMap) | 149 newPhis := make(newPhiMap) |
150 | 150 |
151 » // During this pass we will replace deleted some | 151 » // During this pass we will replace some BasicBlock.Instrs |
152 » // BasicBlock.Instrs (allocs, loads and stores) with nil, | 152 » // (allocs, loads and stores) with nil, keeping a count in |
153 » // keeping a count in BasicBlock.gaps. | 153 » // BasicBlock.gaps. At the end we will reset Instrs to the |
154 » // At the end we will reset Instrs to the concatenation of all | 154 » // concatenation of all non-dead newPhis and non-nil Instrs |
155 » // non-dead newPhis and non-nil Instrs for the block, reusing | 155 » // for the block, reusing the original array if space permits. |
156 » // the original array if space permits. | |
157 | 156 |
158 // Determine which allocs we can lift and number them densely. | 157 // Determine which allocs we can lift and number them densely. |
159 // The renaming phase uses this numbering for compact maps. | 158 // The renaming phase uses this numbering for compact maps. |
160 numAllocs := 0 | 159 numAllocs := 0 |
161 for _, b := range fn.Blocks { | 160 for _, b := range fn.Blocks { |
162 b.gaps = 0 | 161 b.gaps = 0 |
163 for i, instr := range b.Instrs { | 162 for i, instr := range b.Instrs { |
164 if alloc, ok := instr.(*Alloc); ok { | 163 if alloc, ok := instr.(*Alloc); ok { |
165 if liftAlloc(df, alloc, newPhis) { | 164 if liftAlloc(df, alloc, newPhis) { |
166 alloc.index = numAllocs | 165 alloc.index = numAllocs |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
249 if s.Bit(i) != 0 { | 248 if s.Bit(i) != 0 { |
250 return false | 249 return false |
251 } | 250 } |
252 s.SetBit(&s.Int, i, 1) | 251 s.SetBit(&s.Int, i, 1) |
253 return true | 252 return true |
254 } | 253 } |
255 | 254 |
256 // take removes an arbitrary element from a set s and | 255 // take removes an arbitrary element from a set s and |
257 // returns its index, or returns -1 if empty. | 256 // returns its index, or returns -1 if empty. |
258 // | 257 // |
259 // TODO(adonovan): add this method (optimised) to big.Int. | 258 // TODO(adonovan): add this method (optimized) to big.Int. |
260 func (s *blockSet) take() int { | 259 func (s *blockSet) take() int { |
261 l := s.BitLen() | 260 l := s.BitLen() |
262 for i := 0; i < l; i++ { | 261 for i := 0; i < l; i++ { |
263 if s.Bit(i) == 1 { | 262 if s.Bit(i) == 1 { |
264 s.SetBit(&s.Int, i, 0) | 263 s.SetBit(&s.Int, i, 0) |
265 return i | 264 return i |
266 } | 265 } |
267 } | 266 } |
268 return -1 | 267 return -1 |
269 } | 268 } |
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
389 } | 388 } |
390 } | 389 } |
391 if pyrefs != nil { | 390 if pyrefs != nil { |
392 *pyrefs = append(*pyrefs, instr) // dups ok | 391 *pyrefs = append(*pyrefs, instr) // dups ok |
393 } | 392 } |
394 } | 393 } |
395 *pxrefs = nil // x is now unreferenced | 394 *pxrefs = nil // x is now unreferenced |
396 } | 395 } |
397 | 396 |
398 // renamed returns the value to which alloc is being renamed, | 397 // renamed returns the value to which alloc is being renamed, |
399 // constructing it lazily if it's the implicit zero initialization. | 398 // constructing it lazily if it's the implicit zero initialization. |
400 // | 399 // |
401 func renamed(renaming []Value, alloc *Alloc) Value { | 400 func renamed(renaming []Value, alloc *Alloc) Value { |
402 v := renaming[alloc.index] | 401 v := renaming[alloc.index] |
403 if v == nil { | 402 if v == nil { |
404 v = zeroLiteral(indirectType(alloc.Type())) | 403 v = zeroLiteral(indirectType(alloc.Type())) |
405 renaming[alloc.index] = v | 404 renaming[alloc.index] = v |
406 } | 405 } |
407 return v | 406 return v |
408 } | 407 } |
409 | 408 |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
478 phi.Edges[i] = newval | 477 phi.Edges[i] = newval |
479 if prefs := newval.Referrers(); prefs != nil { | 478 if prefs := newval.Referrers(); prefs != nil { |
480 *prefs = append(*prefs, phi) | 479 *prefs = append(*prefs, phi) |
481 } | 480 } |
482 } | 481 } |
483 } | 482 } |
484 | 483 |
485 // Continue depth-first recursion over domtree, pushing a | 484 // Continue depth-first recursion over domtree, pushing a |
486 // fresh copy of the renaming map for each subtree. | 485 // fresh copy of the renaming map for each subtree. |
487 for _, v := range u.dom.Children { | 486 for _, v := range u.dom.Children { |
488 » » // TODO(adonovan): opt: avoid copy if len(u.dom.Children) < 2. | 487 » » // TODO(adonovan): opt: avoid copy on final iteration; use destr
uctive update. |
489 r := make([]Value, len(renaming)) | 488 r := make([]Value, len(renaming)) |
490 copy(r, renaming) | 489 copy(r, renaming) |
491 rename(v.Block, r, newPhis) | 490 rename(v.Block, r, newPhis) |
492 } | 491 } |
493 } | 492 } |
LEFT | RIGHT |