Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(303)

Delta Between Two Patch Sets: src/pkg/runtime/asm_386.s

Issue 8056043: code review 8056043: runtime: Implement faster equals for strings and bytes. (Closed)
Left Patch Set: diff -r 7505bf6d8988 https://khr%40golang.org@code.google.com/p/go/ Created 11 years ago
Right Patch Set: diff -r 52e3407d249f https://khr%40golang.org@code.google.com/p/go/ Created 10 years, 12 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « src/pkg/runtime/alg.c ('k') | src/pkg/runtime/asm_amd64.s » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 // Copyright 2009 The Go Authors. All rights reserved. 1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 #include "zasm_GOOS_GOARCH.h" 5 #include "zasm_GOOS_GOARCH.h"
6 6
7 TEXT _rt0_386(SB),7,$0 7 TEXT _rt0_386(SB),7,$0
8 // copy arguments forward on an even stack 8 // copy arguments forward on an even stack
9 MOVL argc+0(FP), AX 9 MOVL argc+0(FP), AX
10 MOVL argv+4(FP), BX 10 MOVL argv+4(FP), BX
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
101 RET 101 RET
102 102
103 DATA runtime·main·f+0(SB)/4,$runtime·main(SB) 103 DATA runtime·main·f+0(SB)/4,$runtime·main(SB)
104 GLOBL runtime·main·f(SB),8,$4 104 GLOBL runtime·main·f(SB),8,$4
105 105
106 TEXT runtime·breakpoint(SB),7,$0 106 TEXT runtime·breakpoint(SB),7,$0
107 INT $3 107 INT $3
108 RET 108 RET
109 109
110 TEXT runtime·asminit(SB),7,$0 110 TEXT runtime·asminit(SB),7,$0
111 » // Linux, Windows start the FPU in extended double precision. 111 » // Linux and MinGW start the FPU in extended double precision.
112 // Other operating systems use double precision. 112 // Other operating systems use double precision.
113 // Change to double precision to match them, 113 // Change to double precision to match them,
114 // and to match other hardware that only has double. 114 // and to match other hardware that only has double.
115 PUSHL $0x27F 115 PUSHL $0x27F
116 FLDCW 0(SP) 116 FLDCW 0(SP)
117 POPL AX 117 POPL AX
118 RET 118 RET
119 119
120 /* 120 /*
121 * go-routine 121 * go-routine
(...skipping 882 matching lines...) Expand 10 before | Expand all | Expand 10 after
1004 CALL runtime·memeqbody(SB) 1004 CALL runtime·memeqbody(SB)
1005 eqret: 1005 eqret:
1006 MOVB AX, ret+24(FP) 1006 MOVB AX, ret+24(FP)
1007 RET 1007 RET
1008 1008
1009 // a in SI 1009 // a in SI
1010 // b in DI 1010 // b in DI
1011 // count in BX 1011 // count in BX
1012 TEXT runtime·memeqbody(SB),7,$0 1012 TEXT runtime·memeqbody(SB),7,$0
1013 XORL AX, AX 1013 XORL AX, AX
1014
1015 CMPL BX, $4
1016 JB small
1014 1017
1015 // 64 bytes at a time using xmm registers 1018 // 64 bytes at a time using xmm registers
1016 hugeloop: 1019 hugeloop:
1017 CMPL BX, $64 1020 CMPL BX, $64
1018 JB bigloop 1021 JB bigloop
1019 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1022 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
1020 JE bigloop 1023 JE bigloop
1021 MOVOU (SI), X0 1024 MOVOU (SI), X0
1022 MOVOU (DI), X1 1025 MOVOU (DI), X1
1023 MOVOU 16(SI), X2 1026 MOVOU 16(SI), X2
(...skipping 13 matching lines...) Expand all
1037 ADDL $64, SI 1040 ADDL $64, SI
1038 ADDL $64, DI 1041 ADDL $64, DI
1039 SUBL $64, BX 1042 SUBL $64, BX
1040 CMPL DX, $0xffff 1043 CMPL DX, $0xffff
1041 JEQ hugeloop 1044 JEQ hugeloop
1042 RET 1045 RET
1043 1046
1044 // 4 bytes at a time using 32-bit register 1047 // 4 bytes at a time using 32-bit register
1045 bigloop: 1048 bigloop:
1046 CMPL BX, $4 1049 CMPL BX, $4
1047 » JB» leftover 1050 » JBE» leftover
1048 MOVL (SI), CX 1051 MOVL (SI), CX
1049 MOVL (DI), DX 1052 MOVL (DI), DX
1050 ADDL $4, SI 1053 ADDL $4, SI
1051 ADDL $4, DI 1054 ADDL $4, DI
1052 SUBL $4, BX 1055 SUBL $4, BX
1053 CMPL CX, DX 1056 CMPL CX, DX
1054 JEQ bigloop 1057 JEQ bigloop
1055 RET 1058 RET
1056 1059
1057 » // remaining 0-3 bytes 1060 » // remaining 0-4 bytes
1058 leftover: 1061 leftover:
1062 MOVL -4(SI)(BX*1), CX
1063 MOVL -4(DI)(BX*1), DX
1064 CMPL CX, DX
1065 SETEQ AX
1066 RET
1067
1068 small:
1059 CMPL BX, $0 1069 CMPL BX, $0
1060 JEQ equal 1070 JEQ equal
1061 1071
1062 LEAL 0(BX*8), CX 1072 LEAL 0(BX*8), CX
1063 NEGL CX 1073 NEGL CX
1064 1074
1065 » TESTL» $4, SI 1075 » MOVL» SI, DX
1066 » JNE» si_high 1076 » CMPB» DX, $0xfc
1067 1077 » JA» si_high
1068 » // address ends in 0xx. Load from bytes we want onwards. 1078
1079 » // load at SI won't cross a page boundary.
1069 MOVL (SI), SI 1080 MOVL (SI), SI
1070 JMP si_finish 1081 JMP si_finish
1071 si_high: 1082 si_high:
1072 » // address ends in 1xx. Load up to bytes we want, move to correct posit ion. 1083 » // address ends in 111111xx. Load up to bytes we want, move to correct position.
1073 MOVL -4(SI)(BX*1), SI 1084 MOVL -4(SI)(BX*1), SI
1074 SHRL CX, SI 1085 SHRL CX, SI
1075 si_finish: 1086 si_finish:
1076 1087
1077 // same for DI. 1088 // same for DI.
1078 » TESTL» $4, DI 1089 » MOVL» DI, DX
1079 » JNE» di_high 1090 » CMPB» DX, $0xfc
1091 » JA» di_high
1080 MOVL (DI), DI 1092 MOVL (DI), DI
1081 JMP di_finish 1093 JMP di_finish
1082 di_high: 1094 di_high:
1083 MOVL -4(DI)(BX*1), DI 1095 MOVL -4(DI)(BX*1), DI
1084 SHRL CX, DI 1096 SHRL CX, DI
1085 di_finish: 1097 di_finish:
1086 1098
1087 SUBL SI, DI 1099 SUBL SI, DI
1088 SHLL CX, DI 1100 SHLL CX, DI
1089 equal: 1101 equal:
1090 SETEQ AX 1102 SETEQ AX
1091 RET 1103 RET
LEFTRIGHT

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b