OLD | NEW |
1 // Inferno utils/6l/span.c | 1 // Inferno utils/6l/span.c |
2 // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c | 2 // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c |
3 // | 3 // |
4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. | 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. |
5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) | 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) |
6 // Portions Copyright © 1997-1999 Vita Nuova Limited | 6 // Portions Copyright © 1997-1999 Vita Nuova Limited |
7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuov
a.com) | 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuov
a.com) |
8 // Portions Copyright © 2004,2006 Bruce Ellis | 8 // Portions Copyright © 2004,2006 Bruce Ellis |
9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) | 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) |
10 //» Revisions Copyright © 2000-2007 Lucent Technologies Inc. ctxt->and other
s | 10 //» Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others |
11 // Portions Copyright © 2009 The Go Authors. All rights reserved. | 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. |
12 // | 12 // |
13 // Permission is hereby granted, free of charge, to any person obtaining a copy | 13 // Permission is hereby granted, free of charge, to any person obtaining a copy |
14 // of this software ctxt->and associated documentation files (the "Software"), t
o deal | 14 // of this software and associated documentation files (the "Software"), to deal |
15 // in the Software without restriction, including without limitation the rights | 15 // in the Software without restriction, including without limitation the rights |
16 // to use, copy, modify, merge, publish, distribute, sublicense, ctxt->and/or se
ll | 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
17 // copies of the Software, ctxt->and to permit persons to whom the Software is | 17 // copies of the Software, and to permit persons to whom the Software is |
18 // furnished to do so, subject to the following conditions: | 18 // furnished to do so, subject to the following conditions: |
19 // | 19 // |
20 // The above copyright notice ctxt->and this permission notice shall be included
in | 20 // The above copyright notice and this permission notice shall be included in |
21 // all copies or substantial portions of the Software. | 21 // all copies or substantial portions of the Software. |
22 // | 22 // |
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
29 // THE SOFTWARE. | 29 // THE SOFTWARE. |
30 | 30 |
31 // Instruction layout. | 31 // Instruction layout. |
32 | 32 |
33 #include <u.h> | 33 #include <u.h> |
34 #include <libc.h> | 34 #include <libc.h> |
35 #include <bio.h> | 35 #include <bio.h> |
36 #include <link.h> | 36 #include <link.h> |
37 #include "../cmd/6l/6.out.h" | 37 #include "../cmd/6l/6.out.h" |
38 #include "../pkg/runtime/stack.h" | 38 #include "../pkg/runtime/stack.h" |
39 | 39 |
40 enum | 40 enum |
41 { | 41 { |
42 MaxAlign = 32, // max data alignment | 42 MaxAlign = 32, // max data alignment |
43 ········ | 43 ········ |
44 // Loop alignment constants: | 44 // Loop alignment constants: |
45 // want to align loop entry to LoopAlign-byte boundary, | 45 // want to align loop entry to LoopAlign-byte boundary, |
46 » // ctxt->and willing to insert at most MaxLoopPad bytes of NOP to do so. | 46 » // and willing to insert at most MaxLoopPad bytes of NOP to do so. |
47 // We define a loop entry as the target of a backward jump. | 47 // We define a loop entry as the target of a backward jump. |
48 // | 48 // |
49 // gcc uses MaxLoopPad = 10 for its 'generic x86-64' config, | 49 // gcc uses MaxLoopPad = 10 for its 'generic x86-64' config, |
50 » // ctxt->and it aligns all jump targets, not just backward jump targets. | 50 » // and it aligns all jump targets, not just backward jump targets. |
51 // | 51 // |
52 // As of 6/1/2012, the effect of setting MaxLoopPad = 10 here | 52 // As of 6/1/2012, the effect of setting MaxLoopPad = 10 here |
53 // is very slight but negative, so the alignment is disabled by | 53 // is very slight but negative, so the alignment is disabled by |
54 » // setting MaxLoopPad = 0. The code is here for reference ctxt->and | 54 » // setting MaxLoopPad = 0. The code is here for reference and |
55 // for future experiments. | 55 // for future experiments. |
56 //· | 56 //· |
57 LoopAlign = 16, | 57 LoopAlign = 16, |
58 MaxLoopPad = 0, | 58 MaxLoopPad = 0, |
59 | 59 |
60 FuncAlign = 16 | 60 FuncAlign = 16 |
61 }; | 61 }; |
62 | 62 |
63 extern char *anames6[]; | 63 extern char *anames6[]; |
64 | 64 |
(...skipping 686 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
751 0 | 751 0 |
752 }; | 752 }; |
753 static uchar yaes2[] = | 753 static uchar yaes2[] = |
754 { | 754 { |
755 Yxm, Yxr, Zibm_r, 2, | 755 Yxm, Yxr, Zibm_r, 2, |
756 0 | 756 0 |
757 }; | 757 }; |
758 | 758 |
759 /* | 759 /* |
760 * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32, | 760 * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32, |
761 * ctxt->and p->from ctxt->and p->to as operands (Addr*). The linker scans opta
b to find | 761 * and p->from and p->to as operands (Addr*). The linker scans optab to find |
762 * the entry with the given p->as ctxt->and then looks through the ytable for th
at | 762 * the entry with the given p->as and then looks through the ytable for that |
763 * instruction (the second field in the optab struct) for a line whose first | 763 * instruction (the second field in the optab struct) for a line whose first |
764 * two values match the Ytypes of the p->from ctxt->and p->to operands. The fun
ction | 764 * two values match the Ytypes of the p->from and p->to operands. The function |
765 * oclass in span.c computes the specific Ytype of an operand ctxt->and then the
set | 765 * oclass in span.c computes the specific Ytype of an operand and then the set |
766 * of more general Ytypes that it satisfies is implied by the ycover table, set | 766 * of more general Ytypes that it satisfies is implied by the ycover table, set |
767 * up in instinit. For example, oclass distinguishes the constants 0 ctxt->and
1 | 767 * up in instinit. For example, oclass distinguishes the constants 0 and 1 |
768 * from the more general 8-bit constants, but instinit says | 768 * from the more general 8-bit constants, but instinit says |
769 * | 769 * |
770 * ycover[Yi0*Ymax + Ys32] = 1; | 770 * ycover[Yi0*Ymax + Ys32] = 1; |
771 * ycover[Yi1*Ymax + Ys32] = 1; | 771 * ycover[Yi1*Ymax + Ys32] = 1; |
772 * ycover[Yi8*Ymax + Ys32] = 1; | 772 * ycover[Yi8*Ymax + Ys32] = 1; |
773 * | 773 * |
774 * which means that Yi0, Yi1, ctxt->and Yi8 all count as Ys32 (signed 32) | 774 * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32) |
775 * if that's what an instruction can handle. | 775 * if that's what an instruction can handle. |
776 * | 776 * |
777 * In parallel with the scan through the ytable for the appropriate line, there | 777 * In parallel with the scan through the ytable for the appropriate line, there |
778 * is a z pointer that starts out pointing at the strange magic byte list in | 778 * is a z pointer that starts out pointing at the strange magic byte list in |
779 * the Optab struct. With each step past a non-matching ytable line, z | 779 * the Optab struct. With each step past a non-matching ytable line, z |
780 * advances by the 4th entry in the line. When a matching line is found, that | 780 * advances by the 4th entry in the line. When a matching line is found, that |
781 * z pointer has the extra data to use in laying down the instruction bytes. | 781 * z pointer has the extra data to use in laying down the instruction bytes. |
782 * The actual bytes laid down are a function of the 3rd entry in the line (that | 782 * The actual bytes laid down are a function of the 3rd entry in the line (that |
783 * is, the Ztype) ctxt->and the z bytes. | 783 * is, the Ztype) and the z bytes. |
784 * | 784 * |
785 * For example, let's look at AADDL. The optab line says: | 785 * For example, let's look at AADDL. The optab line says: |
786 * { AADDL, yaddl, Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 }, | 786 * { AADDL, yaddl, Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 }, |
787 * | 787 * |
788 * ctxt->and yaddl says | 788 * and yaddl says |
789 * uchar yaddl[] = | 789 * uchar yaddl[] = |
790 * { | 790 * { |
791 * Yi8, Yml, Zibo_m, 2, | 791 * Yi8, Yml, Zibo_m, 2, |
792 * Yi32, Yax, Zil_, 1, | 792 * Yi32, Yax, Zil_, 1, |
793 * Yi32, Yml, Zilo_m, 2, | 793 * Yi32, Yml, Zilo_m, 2, |
794 * Yrl, Yml, Zr_m, 1, | 794 * Yrl, Yml, Zr_m, 1, |
795 * Yml, Yrl, Zm_r, 1, | 795 * Yml, Yrl, Zm_r, 1, |
796 * 0 | 796 * 0 |
797 * }; | 797 * }; |
798 * | 798 * |
799 * so there are 5 possible types of ADDL instruction that can be laid down, ctxt
->and | 799 * so there are 5 possible types of ADDL instruction that can be laid down, and |
800 * possible states used to lay them down (Ztype ctxt->and z pointer, assuming z | 800 * possible states used to lay them down (Ztype and z pointer, assuming z |
801 * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are: | 801 * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are: |
802 * | 802 * |
803 * Yi8, Yml -> Zibo_m, z (0x83, 00) | 803 * Yi8, Yml -> Zibo_m, z (0x83, 00) |
804 * Yi32, Yax -> Zil_, z+2 (0x05) | 804 * Yi32, Yax -> Zil_, z+2 (0x05) |
805 * Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00) | 805 * Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00) |
806 * Yrl, Yml -> Zr_m, z+2+1+2 (0x01) | 806 * Yrl, Yml -> Zr_m, z+2+1+2 (0x01) |
807 * Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03) | 807 * Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03) |
808 * | 808 * |
809 * The Pconstant in the optab line controls the prefix bytes to emit. That's | 809 * The Pconstant in the optab line controls the prefix bytes to emit. That's |
810 * relatively straightforward as this program goes. | 810 * relatively straightforward as this program goes. |
811 * | 811 * |
812 * The switch on t[2] in doasm implements the various Z cases. Zibo_m, for | 812 * The switch on t[2] in doasm implements the various Z cases. Zibo_m, for |
813 * example, is an opcode byte (z[0]) then an asmando (which is some kind of | 813 * example, is an opcode byte (z[0]) then an asmando (which is some kind of |
814 * encoded addressing mode for the Yml arg), ctxt->and then a single immediate b
yte. | 814 * encoded addressing mode for the Yml arg), and then a single immediate byte. |
815 * Zilo_m is the same but a long (32-bit) immediate. | 815 * Zilo_m is the same but a long (32-bit) immediate. |
816 */ | 816 */ |
817 Optab optab[] = | 817 Optab optab[] = |
818 /* as, ytab, andproto, opcode */ | 818 /* as, ytab, andproto, opcode */ |
819 { | 819 { |
820 { AXXX }, | 820 { AXXX }, |
821 { AAAA, ynone, P32, 0x37 }, | 821 { AAAA, ynone, P32, 0x37 }, |
822 { AAAD, ynone, P32, 0xd5,0x0a }, | 822 { AAAD, ynone, P32, 0xd5,0x0a }, |
823 { AAAM, ynone, P32, 0xd4,0x0a }, | 823 { AAAM, ynone, P32, 0xd4,0x0a }, |
824 { AAAS, ynone, P32, 0x3f }, | 824 { AAAS, ynone, P32, 0x3f }, |
(...skipping 696 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1521 { AFATVARDEF }, | 1521 { AFATVARDEF }, |
1522 | 1522 |
1523 { AEND }, | 1523 { AEND }, |
1524 0 | 1524 0 |
1525 }; | 1525 }; |
1526 | 1526 |
1527 static Optab* opindex[ALAST+1]; | 1527 static Optab* opindex[ALAST+1]; |
1528 static vlong vaddr(Link*, Addr*, Reloc*); | 1528 static vlong vaddr(Link*, Addr*, Reloc*); |
1529 | 1529 |
1530 // single-instruction no-ops of various lengths. | 1530 // single-instruction no-ops of various lengths. |
1531 // constructed by hand ctxt->and disassembled with gdb to verify. | 1531 // constructed by hand and disassembled with gdb to verify. |
1532 // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion. | 1532 // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion. |
1533 static uchar nop[][16] = { | 1533 static uchar nop[][16] = { |
1534 {0x90}, | 1534 {0x90}, |
1535 {0x66, 0x90}, | 1535 {0x66, 0x90}, |
1536 {0x0F, 0x1F, 0x00}, | 1536 {0x0F, 0x1F, 0x00}, |
1537 {0x0F, 0x1F, 0x40, 0x00}, | 1537 {0x0F, 0x1F, 0x40, 0x00}, |
1538 {0x0F, 0x1F, 0x44, 0x00, 0x00}, | 1538 {0x0F, 0x1F, 0x44, 0x00, 0x00}, |
1539 {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}, | 1539 {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}, |
1540 {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, | 1540 {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, |
1541 {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, | 1541 {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, |
(...skipping 1094 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2636 } | 2636 } |
2637 xo = o->op[0] == 0x0f; | 2637 xo = o->op[0] == 0x0f; |
2638 for(z=0; *t; z+=t[3]+xo,t+=4) | 2638 for(z=0; *t; z+=t[3]+xo,t+=4) |
2639 if(ycover[ft+t[0]]) | 2639 if(ycover[ft+t[0]]) |
2640 if(ycover[tt+t[1]]) | 2640 if(ycover[tt+t[1]]) |
2641 goto found; | 2641 goto found; |
2642 goto domov; | 2642 goto domov; |
2643 | 2643 |
2644 found: | 2644 found: |
2645 switch(o->prefix) { | 2645 switch(o->prefix) { |
2646 » case Pq:» /* 16 bit escape ctxt->and opcode escape */ | 2646 » case Pq:» /* 16 bit escape and opcode escape */ |
2647 *ctxt->andptr++ = Pe; | 2647 *ctxt->andptr++ = Pe; |
2648 *ctxt->andptr++ = Pm; | 2648 *ctxt->andptr++ = Pm; |
2649 break; | 2649 break; |
2650 » case Pq3:» /* 16 bit escape, Rex.w, ctxt->and opcode escape */ | 2650 » case Pq3:» /* 16 bit escape, Rex.w, and opcode escape */ |
2651 *ctxt->andptr++ = Pe; | 2651 *ctxt->andptr++ = Pe; |
2652 *ctxt->andptr++ = Pw; | 2652 *ctxt->andptr++ = Pw; |
2653 *ctxt->andptr++ = Pm; | 2653 *ctxt->andptr++ = Pm; |
2654 break; | 2654 break; |
2655 | 2655 |
2656 case Pf2: /* xmm opcode escape */ | 2656 case Pf2: /* xmm opcode escape */ |
2657 case Pf3: | 2657 case Pf3: |
2658 *ctxt->andptr++ = o->prefix; | 2658 *ctxt->andptr++ = o->prefix; |
2659 *ctxt->andptr++ = Pm; | 2659 *ctxt->andptr++ = Pm; |
2660 break; | 2660 break; |
(...skipping 442 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3103 if(ycover[tt+mo->tt]){ | 3103 if(ycover[tt+mo->tt]){ |
3104 t = mo->op; | 3104 t = mo->op; |
3105 goto mfound; | 3105 goto mfound; |
3106 } | 3106 } |
3107 bad: | 3107 bad: |
3108 if(p->mode != 64){ | 3108 if(p->mode != 64){ |
3109 /* | 3109 /* |
3110 * here, the assembly has failed. | 3110 * here, the assembly has failed. |
3111 * if its a byte instruction that has | 3111 * if its a byte instruction that has |
3112 * unaddressable registers, try to | 3112 * unaddressable registers, try to |
3113 » » * exchange registers ctxt->and reissue the | 3113 » » * exchange registers and reissue the |
3114 * instruction with the operands renamed. | 3114 * instruction with the operands renamed. |
3115 */ | 3115 */ |
3116 pp = *p; | 3116 pp = *p; |
3117 z = p->from.type; | 3117 z = p->from.type; |
3118 if(z >= D_BP && z <= D_DI) { | 3118 if(z >= D_BP && z <= D_DI) { |
3119 if(isax(&p->to) || p->to.type == D_NONE) { | 3119 if(isax(&p->to) || p->to.type == D_NONE) { |
3120 // We certainly don't want to exchange | 3120 // We certainly don't want to exchange |
3121 // with AX if the op is MUL or DIV. | 3121 // with AX if the op is MUL or DIV. |
3122 *ctxt->andptr++ = 0x87; /* xchg
lhs,bx */ | 3122 *ctxt->andptr++ = 0x87; /* xchg
lhs,bx */ |
3123 asmando(ctxt, &p->from, reg[D_BX]); | 3123 asmando(ctxt, &p->from, reg[D_BX]); |
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3255 Reloc *r; | 3255 Reloc *r; |
3256 | 3256 |
3257 ctxt->rexflag = 0; | 3257 ctxt->rexflag = 0; |
3258 ctxt->andptr = ctxt->and; | 3258 ctxt->andptr = ctxt->and; |
3259 ctxt->asmode = p->mode; | 3259 ctxt->asmode = p->mode; |
3260 doasm(ctxt, p); | 3260 doasm(ctxt, p); |
3261 if(ctxt->rexflag){ | 3261 if(ctxt->rexflag){ |
3262 /* | 3262 /* |
3263 * as befits the whole approach of the architecture, | 3263 * as befits the whole approach of the architecture, |
3264 * the rex prefix must appear before the first opcode byte | 3264 * the rex prefix must appear before the first opcode byte |
3265 » » * (ctxt->and thus after any 66/67/f2/f3/26/2e/3e prefix bytes,
but | 3265 » » * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but |
3266 * before the 0f opcode escape!), or it might be ignored. | 3266 * before the 0f opcode escape!), or it might be ignored. |
3267 * note that the handbook often misleadingly shows 66/f2/f3 in `
opcode'. | 3267 * note that the handbook often misleadingly shows 66/f2/f3 in `
opcode'. |
3268 */ | 3268 */ |
3269 if(p->mode != 64) | 3269 if(p->mode != 64) |
3270 ctxt->diag("asmins: illegal in mode %d: %P", p->mode, p)
; | 3270 ctxt->diag("asmins: illegal in mode %d: %P", p->mode, p)
; |
3271 n = ctxt->andptr - ctxt->and; | 3271 n = ctxt->andptr - ctxt->and; |
3272 for(np = 0; np < n; np++) { | 3272 for(np = 0; np < n; np++) { |
3273 c = ctxt->and[np]; | 3273 c = ctxt->and[np]; |
3274 if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c
!= 0x2e && c != 0x3e && c != 0x26) | 3274 if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c
!= 0x2e && c != 0x3e && c != 0x26) |
3275 break; | 3275 break; |
3276 } | 3276 } |
3277 memmove(ctxt->and+np+1, ctxt->and+np, n-np); | 3277 memmove(ctxt->and+np+1, ctxt->and+np, n-np); |
3278 ctxt->and[np] = 0x40 | ctxt->rexflag; | 3278 ctxt->and[np] = 0x40 | ctxt->rexflag; |
3279 ctxt->andptr++; | 3279 ctxt->andptr++; |
3280 } | 3280 } |
3281 n = ctxt->andptr - ctxt->and; | 3281 n = ctxt->andptr - ctxt->and; |
3282 for(r=ctxt->cursym->r+ctxt->cursym->nr; r-- > ctxt->cursym->r; ) { | 3282 for(r=ctxt->cursym->r+ctxt->cursym->nr; r-- > ctxt->cursym->r; ) { |
3283 if(r->off < p->pc) | 3283 if(r->off < p->pc) |
3284 break; | 3284 break; |
3285 if(ctxt->rexflag) | 3285 if(ctxt->rexflag) |
3286 r->off++; | 3286 r->off++; |
3287 if(r->type == D_PCREL) | 3287 if(r->type == D_PCREL) |
3288 r->add -= p->pc + n - (r->off + r->siz); | 3288 r->add -= p->pc + n - (r->off + r->siz); |
3289 } | 3289 } |
3290 } | 3290 } |
OLD | NEW |