Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(206)

Side by Side Diff: src/pkg/crypto/sha1/sha1block_amd64.s

Issue 7763049: code review 7763049: crypto/sha1: faster amd64, 386 implementations (Closed)
Patch Set: diff -r e5620fd3ba5f https://code.google.com/p/go/ Created 12 years ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/pkg/crypto/sha1/sha1block_386.s ('k') | src/pkg/crypto/sha1/sha1block_decl.go » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // SHA1 block routine. See sha1block.go for Go equivalent.
6 //
7 // There are 80 rounds of 4 types:
8 // - rounds 0-15 are type 1 and load data (ROUND1 macro).
9 // - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
10 // - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
11 // - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
12 // - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
13 //
14 // Each round loads or shuffles the data, then computes a per-round
15 // function of b, c, d, and then mixes the result into and rotates the
16 // five registers a, b, c, d, e holding the intermediate results.
17 //
18 // The register rotation is implemented by rotating the arguments to
19 // the round macros instead of by explicit move instructions.
20
21 #define LOAD(index) \
22 MOVL (index*4)(SI), R10; \
23 BSWAPL R10; \
24 MOVL R10, (index*4)(SP)
25
26 #define SHUFFLE(index) \
27 MOVL (((index)&0xf)*4)(SP), R10; \
28 XORL (((index-3)&0xf)*4)(SP), R10; \
29 XORL (((index-8)&0xf)*4)(SP), R10; \
30 XORL (((index-14)&0xf)*4)(SP), R10; \
31 ROLL $1, R10; \
32 MOVL R10, (((index)&0xf)*4)(SP)
33
34 #define FUNC1(a, b, c, d, e) \
35 MOVL b, R8; \
36 ANDL c, R8; \
37 MOVL b, R9; \
38 NOTL R9; \
39 ANDL d, R9; \
40 ORL R8, R9
41
42 #define FUNC2(a, b, c, d, e) \
43 MOVL b, R9; \
44 XORL c, R9; \
45 XORL d, R9
46
47 #define FUNC3(a, b, c, d, e) \
48 MOVL b, R8; \
49 ORL c, R8; \
50 ANDL d, R8; \
51 MOVL b, R9; \
52 ANDL c, R9; \
53 ORL R8, R9
54 ········
55 #define FUNC4 FUNC2
56
57 #define MIX(a, b, c, d, e, const) \
58 ROLL $30, b; \
59 ADDL R9, e; \
60 MOVL a, R8; \
61 ROLL $5, R8; \
62 LEAL const(e)(R10*1), e; \
63 ADDL R8, e
64
65 #define ROUND1(a, b, c, d, e, index) \
66 LOAD(index); \
67 FUNC1(a, b, c, d, e); \
68 MIX(a, b, c, d, e, 0x5A827999)
69
70 #define ROUND1x(a, b, c, d, e, index) \
71 SHUFFLE(index); \
72 FUNC1(a, b, c, d, e); \
73 MIX(a, b, c, d, e, 0x5A827999)
74
75 #define ROUND2(a, b, c, d, e, index) \
76 SHUFFLE(index); \
77 FUNC2(a, b, c, d, e); \
78 MIX(a, b, c, d, e, 0x6ED9EBA1)
79
80 #define ROUND3(a, b, c, d, e, index) \
81 SHUFFLE(index); \
82 FUNC3(a, b, c, d, e); \
83 MIX(a, b, c, d, e, 0x8F1BBCDC)
84
85 #define ROUND4(a, b, c, d, e, index) \
86 SHUFFLE(index); \
87 FUNC4(a, b, c, d, e); \
88 MIX(a, b, c, d, e, 0xCA62C1D6)
89
90 TEXT ·block(SB),7,$64-32
91 MOVQ dig+0(FP), BP
92 MOVQ p+8(FP), SI
93 MOVQ n+16(FP), DX
94 SHRQ $6, DX
95 SHLQ $6, DX
96 ········
97 LEAQ (SI)(DX*1), DI
98 MOVL (0*4)(BP), AX
99 MOVL (1*4)(BP), BX
100 MOVL (2*4)(BP), CX
101 MOVL (3*4)(BP), DX
102 MOVL (4*4)(BP), BP
103
104 CMPQ SI, DI
105 JEQ end
106
107 loop:
108 MOVL AX, R11
109 MOVL BX, R12
110 MOVL CX, R13
111 MOVL DX, R14
112 MOVL BP, R15
113
114 ROUND1(AX, BX, CX, DX, BP, 0)
115 ROUND1(BP, AX, BX, CX, DX, 1)
116 ROUND1(DX, BP, AX, BX, CX, 2)
117 ROUND1(CX, DX, BP, AX, BX, 3)
118 ROUND1(BX, CX, DX, BP, AX, 4)
119 ROUND1(AX, BX, CX, DX, BP, 5)
120 ROUND1(BP, AX, BX, CX, DX, 6)
121 ROUND1(DX, BP, AX, BX, CX, 7)
122 ROUND1(CX, DX, BP, AX, BX, 8)
123 ROUND1(BX, CX, DX, BP, AX, 9)
124 ROUND1(AX, BX, CX, DX, BP, 10)
125 ROUND1(BP, AX, BX, CX, DX, 11)
126 ROUND1(DX, BP, AX, BX, CX, 12)
127 ROUND1(CX, DX, BP, AX, BX, 13)
128 ROUND1(BX, CX, DX, BP, AX, 14)
129 ROUND1(AX, BX, CX, DX, BP, 15)
130
131 ROUND1x(BP, AX, BX, CX, DX, 16)
132 ROUND1x(DX, BP, AX, BX, CX, 17)
133 ROUND1x(CX, DX, BP, AX, BX, 18)
134 ROUND1x(BX, CX, DX, BP, AX, 19)
135 ········
136 ROUND2(AX, BX, CX, DX, BP, 20)
137 ROUND2(BP, AX, BX, CX, DX, 21)
138 ROUND2(DX, BP, AX, BX, CX, 22)
139 ROUND2(CX, DX, BP, AX, BX, 23)
140 ROUND2(BX, CX, DX, BP, AX, 24)
141 ROUND2(AX, BX, CX, DX, BP, 25)
142 ROUND2(BP, AX, BX, CX, DX, 26)
143 ROUND2(DX, BP, AX, BX, CX, 27)
144 ROUND2(CX, DX, BP, AX, BX, 28)
145 ROUND2(BX, CX, DX, BP, AX, 29)
146 ROUND2(AX, BX, CX, DX, BP, 30)
147 ROUND2(BP, AX, BX, CX, DX, 31)
148 ROUND2(DX, BP, AX, BX, CX, 32)
149 ROUND2(CX, DX, BP, AX, BX, 33)
150 ROUND2(BX, CX, DX, BP, AX, 34)
151 ROUND2(AX, BX, CX, DX, BP, 35)
152 ROUND2(BP, AX, BX, CX, DX, 36)
153 ROUND2(DX, BP, AX, BX, CX, 37)
154 ROUND2(CX, DX, BP, AX, BX, 38)
155 ROUND2(BX, CX, DX, BP, AX, 39)
156 ········
157 ROUND3(AX, BX, CX, DX, BP, 40)
158 ROUND3(BP, AX, BX, CX, DX, 41)
159 ROUND3(DX, BP, AX, BX, CX, 42)
160 ROUND3(CX, DX, BP, AX, BX, 43)
161 ROUND3(BX, CX, DX, BP, AX, 44)
162 ROUND3(AX, BX, CX, DX, BP, 45)
163 ROUND3(BP, AX, BX, CX, DX, 46)
164 ROUND3(DX, BP, AX, BX, CX, 47)
165 ROUND3(CX, DX, BP, AX, BX, 48)
166 ROUND3(BX, CX, DX, BP, AX, 49)
167 ROUND3(AX, BX, CX, DX, BP, 50)
168 ROUND3(BP, AX, BX, CX, DX, 51)
169 ROUND3(DX, BP, AX, BX, CX, 52)
170 ROUND3(CX, DX, BP, AX, BX, 53)
171 ROUND3(BX, CX, DX, BP, AX, 54)
172 ROUND3(AX, BX, CX, DX, BP, 55)
173 ROUND3(BP, AX, BX, CX, DX, 56)
174 ROUND3(DX, BP, AX, BX, CX, 57)
175 ROUND3(CX, DX, BP, AX, BX, 58)
176 ROUND3(BX, CX, DX, BP, AX, 59)
177 ········
178 ROUND4(AX, BX, CX, DX, BP, 60)
179 ROUND4(BP, AX, BX, CX, DX, 61)
180 ROUND4(DX, BP, AX, BX, CX, 62)
181 ROUND4(CX, DX, BP, AX, BX, 63)
182 ROUND4(BX, CX, DX, BP, AX, 64)
183 ROUND4(AX, BX, CX, DX, BP, 65)
184 ROUND4(BP, AX, BX, CX, DX, 66)
185 ROUND4(DX, BP, AX, BX, CX, 67)
186 ROUND4(CX, DX, BP, AX, BX, 68)
187 ROUND4(BX, CX, DX, BP, AX, 69)
188 ROUND4(AX, BX, CX, DX, BP, 70)
189 ROUND4(BP, AX, BX, CX, DX, 71)
190 ROUND4(DX, BP, AX, BX, CX, 72)
191 ROUND4(CX, DX, BP, AX, BX, 73)
192 ROUND4(BX, CX, DX, BP, AX, 74)
193 ROUND4(AX, BX, CX, DX, BP, 75)
194 ROUND4(BP, AX, BX, CX, DX, 76)
195 ROUND4(DX, BP, AX, BX, CX, 77)
196 ROUND4(CX, DX, BP, AX, BX, 78)
197 ROUND4(BX, CX, DX, BP, AX, 79)
198
199 ADDL R11, AX
200 ADDL R12, BX
201 ADDL R13, CX
202 ADDL R14, DX
203 ADDL R15, BP
204
205 ADDQ $64, SI
206 CMPQ SI, DI
207 JB loop
208
209 end:
210 MOVQ dig+0(FP), DI
211 MOVL AX, (0*4)(DI)
212 MOVL BX, (1*4)(DI)
213 MOVL CX, (2*4)(DI)
214 MOVL DX, (3*4)(DI)
215 MOVL BP, (4*4)(DI)
216 RET
OLDNEW
« no previous file with comments | « src/pkg/crypto/sha1/sha1block_386.s ('k') | src/pkg/crypto/sha1/sha1block_decl.go » ('j') | no next file with comments »

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b