LEFT | RIGHT |
1 // Copyright 2009 The Go Authors. All rights reserved. | 1 // Copyright 2009 The Go Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 #include "zasm_GOOS_GOARCH.h" | 5 #include "zasm_GOOS_GOARCH.h" |
6 | 6 |
7 TEXT _rt0_amd64(SB),7,$-8 | 7 TEXT _rt0_amd64(SB),7,$-8 |
8 // copy arguments forward on an even stack | 8 // copy arguments forward on an even stack |
9 MOVQ DI, AX // argc | 9 MOVQ DI, AX // argc |
10 MOVQ SI, BX // argv | 10 MOVQ SI, BX // argv |
(...skipping 916 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
927 eqret: | 927 eqret: |
928 MOVB AX, ret+48(FP) | 928 MOVB AX, ret+48(FP) |
929 RET | 929 RET |
930 | 930 |
931 // a in SI | 931 // a in SI |
932 // b in DI | 932 // b in DI |
933 // count in BX | 933 // count in BX |
934 TEXT runtime·memeqbody(SB),7,$0 | 934 TEXT runtime·memeqbody(SB),7,$0 |
935 XORQ AX, AX | 935 XORQ AX, AX |
936 | 936 |
| 937 CMPQ BX, $8 |
| 938 JB small |
| 939 ········ |
937 // 64 bytes at a time using xmm registers | 940 // 64 bytes at a time using xmm registers |
938 hugeloop: | 941 hugeloop: |
939 CMPQ BX, $64 | 942 CMPQ BX, $64 |
940 JB bigloop | 943 JB bigloop |
941 MOVOU (SI), X0 | 944 MOVOU (SI), X0 |
942 MOVOU (DI), X1 | 945 MOVOU (DI), X1 |
943 MOVOU 16(SI), X2 | 946 MOVOU 16(SI), X2 |
944 MOVOU 16(DI), X3 | 947 MOVOU 16(DI), X3 |
945 MOVOU 32(SI), X4 | 948 MOVOU 32(SI), X4 |
946 MOVOU 32(DI), X5 | 949 MOVOU 32(DI), X5 |
(...skipping 10 matching lines...) Expand all Loading... |
957 ADDQ $64, SI | 960 ADDQ $64, SI |
958 ADDQ $64, DI | 961 ADDQ $64, DI |
959 SUBQ $64, BX | 962 SUBQ $64, BX |
960 CMPL DX, $0xffff | 963 CMPL DX, $0xffff |
961 JEQ hugeloop | 964 JEQ hugeloop |
962 RET | 965 RET |
963 | 966 |
964 // 8 bytes at a time using 64-bit register | 967 // 8 bytes at a time using 64-bit register |
965 bigloop: | 968 bigloop: |
966 CMPQ BX, $8 | 969 CMPQ BX, $8 |
967 » JB» leftover | 970 » JBE» leftover |
968 MOVQ (SI), CX | 971 MOVQ (SI), CX |
969 MOVQ (DI), DX | 972 MOVQ (DI), DX |
970 ADDQ $8, SI | 973 ADDQ $8, SI |
971 ADDQ $8, DI | 974 ADDQ $8, DI |
972 SUBQ $8, BX | 975 SUBQ $8, BX |
973 CMPQ CX, DX | 976 CMPQ CX, DX |
974 JEQ bigloop | 977 JEQ bigloop |
975 RET | 978 RET |
976 | 979 |
977 » // remaining 0-7 bytes | 980 » // remaining 0-8 bytes |
978 leftover: | 981 leftover: |
| 982 MOVQ -8(SI)(BX*1), CX |
| 983 MOVQ -8(DI)(BX*1), DX |
| 984 CMPQ CX, DX |
| 985 SETEQ AX |
| 986 RET |
| 987 |
| 988 small: |
979 CMPQ BX, $0 | 989 CMPQ BX, $0 |
980 JEQ equal | 990 JEQ equal |
981 | 991 |
982 LEAQ 0(BX*8), CX | 992 LEAQ 0(BX*8), CX |
983 NEGQ CX | 993 NEGQ CX |
984 | 994 |
985 » TESTB» $8, SI | 995 » CMPB» SI, $0xf8 |
986 » JNE» si_high | 996 » JA» si_high |
987 | 997 |
988 » // address ends in 0xxx. Load from bytes we want onwards. | 998 » // load at SI won't cross a page boundary. |
989 MOVQ (SI), SI | 999 MOVQ (SI), SI |
990 JMP si_finish | 1000 JMP si_finish |
991 si_high: | 1001 si_high: |
992 » // address ends in 1xxx. Load up to bytes we want, move to correct posi
tion. | 1002 » // address ends in 11111xxx. Load up to bytes we want, move to correct
position. |
993 MOVQ -8(SI)(BX*1), SI | 1003 MOVQ -8(SI)(BX*1), SI |
994 SHRQ CX, SI | 1004 SHRQ CX, SI |
995 si_finish: | 1005 si_finish: |
996 | 1006 |
997 // same for DI. | 1007 // same for DI. |
998 » TESTB» $8, DI | 1008 » CMPB» DI, $0xf8 |
999 » JNE» di_high | 1009 » JA» di_high |
1000 MOVQ (DI), DI | 1010 MOVQ (DI), DI |
1001 JMP di_finish | 1011 JMP di_finish |
1002 di_high: | 1012 di_high: |
1003 MOVQ -8(DI)(BX*1), DI | 1013 MOVQ -8(DI)(BX*1), DI |
1004 SHRQ CX, DI | 1014 SHRQ CX, DI |
1005 di_finish: | 1015 di_finish: |
1006 | 1016 |
1007 SUBQ SI, DI | 1017 SUBQ SI, DI |
1008 SHLQ CX, DI | 1018 SHLQ CX, DI |
1009 equal: | 1019 equal: |
1010 SETEQ AX | 1020 SETEQ AX |
1011 RET | 1021 RET |
LEFT | RIGHT |