Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(2179)

Side by Side Diff: src/pkg/runtime/asm_386.s

Issue 8853048: code review 8853048: runtime/bytes: fast Compare for byte arrays and strings. (Closed)
Patch Set: diff -r 00d69aa6619e https://khr%40golang.org@code.google.com/p/go/ Created 11 years, 11 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/pkg/bytes/compare_test.go ('k') | src/pkg/runtime/asm_amd64.s » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2009 The Go Authors. All rights reserved. 1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 #include "zasm_GOOS_GOARCH.h" 5 #include "zasm_GOOS_GOARCH.h"
6 6
7 TEXT _rt0_386(SB),7,$0 7 TEXT _rt0_386(SB),7,$0
8 // copy arguments forward on an even stack 8 // copy arguments forward on an even stack
9 MOVL argc+0(FP), AX 9 MOVL argc+0(FP), AX
10 MOVL argv+4(FP), BX 10 MOVL argv+4(FP), BX
(...skipping 1083 matching lines...) Expand 10 before | Expand all | Expand 10 after
1094 di_high: 1094 di_high:
1095 MOVL -4(DI)(BX*1), DI 1095 MOVL -4(DI)(BX*1), DI
1096 SHRL CX, DI 1096 SHRL CX, DI
1097 di_finish: 1097 di_finish:
1098 1098
1099 SUBL SI, DI 1099 SUBL SI, DI
1100 SHLL CX, DI 1100 SHLL CX, DI
1101 equal: 1101 equal:
1102 SETEQ AX 1102 SETEQ AX
1103 RET 1103 RET
1104
1105 TEXT runtime·cmpstring(SB),7,$0
1106 MOVL s1+0(FP), SI
1107 MOVL s1+4(FP), BX
1108 MOVL s2+8(FP), DI
1109 MOVL s2+12(FP), DX
1110 CALL runtime·cmpbody(SB)
1111 MOVL AX, res+16(FP)
1112 RET
1113
1114 TEXT bytes·Compare(SB),7,$0
1115 MOVL s1+0(FP), SI
1116 MOVL s1+4(FP), BX
1117 MOVL s2+12(FP), DI
1118 MOVL s2+16(FP), DX
1119 CALL runtime·cmpbody(SB)
1120 MOVL AX, res+24(FP)
1121 RET
1122
1123 // input:
1124 // SI = a
1125 // DI = b
1126 // BX = alen
1127 // DX = blen
1128 // output:
1129 // AX = 1/0/-1
1130 TEXT runtime·cmpbody(SB),7,$0
1131 CMPL SI, DI
1132 JEQ cmp_allsame
1133 CMPL BX, DX
1134 MOVL DX, BP
1135 CMOVLLT BX, BP // BP = min(alen, blen)
1136 CMPL BP, $4
1137 JB cmp_small
1138 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
1139 JE cmp_mediumloop
1140 cmp_largeloop:
1141 CMPL BP, $16
1142 JB cmp_mediumloop
1143 MOVOU (SI), X0
1144 MOVOU (DI), X1
1145 PCMPEQB X0, X1
1146 PMOVMSKB X1, AX
1147 XORL $0xffff, AX // convert EQ to NE
1148 JNE cmp_diff16 // branch if at least one byte is not equal
1149 ADDL $16, SI
1150 ADDL $16, DI
1151 SUBL $16, BP
1152 JMP cmp_largeloop
1153
1154 cmp_diff16:
1155 BSFL AX, BX // index of first byte that differs
1156 XORL AX, AX
1157 MOVB (SI)(BX*1), CX
1158 CMPB CX, (DI)(BX*1)
1159 SETHI AX
1160 LEAL -1(AX*2), AX // convert 1/0 to +1/-1
1161 RET
1162
1163 cmp_mediumloop:
1164 CMPL BP, $4
1165 JBE cmp_0through4
1166 MOVL (SI), AX
1167 MOVL (DI), CX
1168 CMPL AX, CX
1169 JNE cmp_diff4
1170 ADDL $4, SI
1171 ADDL $4, DI
1172 SUBL $4, BP
1173 JMP cmp_mediumloop
1174
1175 cmp_0through4:
1176 MOVL -4(SI)(BP*1), AX
1177 MOVL -4(DI)(BP*1), CX
1178 CMPL AX, CX
1179 JEQ cmp_allsame
1180
1181 cmp_diff4:
1182 BSWAPL AX // reverse order of bytes
1183 BSWAPL CX
1184 XORL AX, CX // find bit differences
1185 BSRL CX, CX // index of highest bit difference
1186 SHRL CX, AX // move a's bit to bottom
1187 ANDL $1, AX // mask bit
1188 LEAL -1(AX*2), AX // 1/0 => +1/-1
1189 RET
1190
1191 // 0-3 bytes in common
1192 cmp_small:
1193 LEAL (BP*8), CX
1194 NEGL CX
1195 JEQ cmp_allsame
1196
1197 // load si
1198 CMPB SI, $0xfc
1199 JA cmp_si_high
1200 MOVL (SI), SI
1201 JMP cmp_si_finish
1202 cmp_si_high:
1203 MOVL -4(SI)(BP*1), SI
1204 SHRL CX, SI
1205 cmp_si_finish:
1206 SHLL CX, SI
1207
1208 // same for di
1209 CMPB DI, $0xfc
1210 JA cmp_di_high
1211 MOVL (DI), DI
1212 JMP cmp_di_finish
1213 cmp_di_high:
1214 MOVL -4(DI)(BP*1), DI
1215 SHRL CX, DI
1216 cmp_di_finish:
1217 SHLL CX, DI
1218
1219 BSWAPL SI // reverse order of bytes
1220 BSWAPL DI
1221 XORL SI, DI // find bit differences
1222 JEQ cmp_allsame
1223 BSRL DI, CX // index of highest bit difference
1224 SHRL CX, SI // move a's bit to bottom
1225 ANDL $1, SI // mask bit
1226 LEAL -1(SI*2), AX // 1/0 => +1/-1
1227 RET
1228
1229 // all the bytes in common are the same, so we just need
1230 // to compare the lengths.
1231 cmp_allsame:
1232 XORL AX, AX
1233 XORL CX, CX
1234 CMPL BX, DX
1235 SETGT AX // 1 if alen > blen
1236 SETEQ CX // 1 if alen == blen
1237 LEAL -1(CX)(AX*2), AX // 1,0,-1 result
1238 RET
OLDNEW
« no previous file with comments | « src/pkg/bytes/compare_test.go ('k') | src/pkg/runtime/asm_amd64.s » ('j') | no next file with comments »

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b