Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(2259)

Side by Side Diff: src/pkg/crypto/sha1/sha1block_arm.s

Issue 56900043: crypto/sha1: native ARM assembler version (Closed)
Patch Set: diff -r 54a5513d9d6a https://code.google.com/p/go Created 10 years, 2 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/pkg/crypto/sha1/sha1block.go ('k') | src/pkg/crypto/sha1/sha1block_decl.go » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "../../../cmd/ld/textflag.h"
6
7 // SHA1 block routine. See sha1block.go for Go equivalent.
8 //
9 // There are 80 rounds of 4 types:
10 // - rounds 0-15 are type 1 and load data (ROUND1 macro).
11 // - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
12 // - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
13 // - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
14 // - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
15 //
16 // Each round loads or shuffles the data, then computes a per-round
17 // function of b, c, d, and then mixes the result into and rotates the
18 // five registers a, b, c, d, e holding the intermediate results.
19 //
20 // The register rotation is implemented by rotating the arguments to
21 // the round macros instead of by explicit move instructions.
22
23 // Register definitions
24 data = 0 // Pointer to incoming data
25 const = 1 // Current constant for SHA round
26 a = 2 // SHA1 accumulator
27 b = 3 // SHA1 accumulator
28 c = 4 // SHA1 accumulator
29 d = 5 // SHA1 accumulator
30 e = 6 // SHA1 accumulator
31 t0 = 7 // Temporary
32 t1 = 8 // Temporary
33 // r9, r10 are forbidden
34 // r11 is OK provided you check the assembler that no synthetic instructions use it
35 t2 = 11 // Temporary
36 t3 = 12 // Temporary
37 t4 = 14 // Temporary
38
39 // func block(dig *digest, p []byte)
40 // 0(FP) is *digest
41 // 4(FP) is p.array (struct Slice)
42 // 8(FP) is p.len
43 //12(FP) is p.cap
44 //
45 // Stack frame
46 p_end = -4 // -4(SP) pointer to the end of data
47 p_data = p_end - 4 // -8(SP) current data pointer
48 w = p_data - 4*16 // -72(SP) 16 words temporary buffer w uint32[16]
49 saved = w - 4*5 // -92(SP) saved sha1 registers a,b,c,d,e - these must b e last
50 // Total size +4 for saved LR is 96
51
52 // w[i] = p[j]<<24 | p[j+1]<<16 | p[j+2]<<8 | p[j+3]
53 // e += w[i]
54 #define LOAD(index, e) \
55 MOVBU (index*4+0)(R(data)), R(t0) ; \
56 MOVBU (index*4+1)(R(data)), R(t1) ; \
57 MOVBU (index*4+2)(R(data)), R(t2) ; \
58 MOVBU (index*4+3)(R(data)), R(t3) ; \
59 ORR R(t0)<<8, R(t1), R(t0) ; \
60 ORR R(t2)<<8, R(t3), R(t2) ; \
61 ORR R(t0)<<16, R(t2), R(t0) ; \
62 MOVW R(t0), (index*4+w)(SP) ; \
63 ADD R(t0), R(e), R(e)
64
65 // tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf]
66 // w[i&0xf] = tmp<<1 | tmp>>(32-1)
67 // e += w[i&0xf]·
68 #define SHUFFLE(index, e) \
69 MOVW (((index)&0xf)*4+w)(SP), R(t0) ; \
70 MOVW (((index-3)&0xf)*4+w)(SP), R(t1) ; \
71 MOVW (((index-8)&0xf)*4+w)(SP), R(t2) ; \
72 MOVW (((index-14)&0xf)*4+w)(SP), R(t3) ; \
73 EOR R(t0), R(t1), R(t0) ; \
74 EOR R(t2), R(t3), R(t2) ; \
75 EOR R(t0), R(t2), R(t0) ; \
76 MOVW R(t0)@>(32-1), R(t0) ; \
77 MOVW R(t0), (((index)&0xf)*4+w)(SP) ; \
78 ADD R(t0), R(e), R(e)
79
80 // t1 = (b & c) | ((~b) & d) =·
81 // t1 = (d ^ (b & (c ^ d))
82 #define FUNC1(a, b, c, d, e) \
83 EOR R(c), R(d), R(t1) ; \
84 AND R(b), R(t1), R(t1) ; \
85 EOR R(d), R(t1), R(t1)
86
87 // t1 = b ^ c ^ d
88 #define FUNC2(a, b, c, d, e) \
89 EOR R(b), R(c), R(t1) ; \
90 EOR R(d), R(t1), R(t1)
91
92 // t1 = (b & c) | (b & d) | (c & d) =
93 // t1 = (b & c) | ((b | c) & d)
94 #define FUNC3(a, b, c, d, e) \
95 ORR R(b), R(c), R(t0) ; \
96 AND R(b), R(c), R(t1) ; \
97 AND R(d), R(t0), R(t0) ; \
98 ORR R(t0), R(t1), R(t1)
99
100 #define FUNC4 FUNC2
101
102 // a5 := a<<5 | a>>(32-5)
103 // b = b<<30 | b>>(32-30)
104 // e = a5 + t1 + e + const
105 #define MIX(a, b, c, d, e) \
106 ADD R(t1), R(e), R(e) ; \
107 MOVW R(b)@>(32-30), R(b) ; \
108 ADD R(a)@>(32-5), R(e), R(e) ; \
109 ADD R(const), R(e), R(e)
110
111 #define ROUND1(a, b, c, d, e, index) \
112 LOAD(index, e) ; \
113 FUNC1(a, b, c, d, e) ; \
114 MIX(a, b, c, d, e)
115
116 #define ROUND1x(a, b, c, d, e, index) \
117 SHUFFLE(index, e) ; \
118 FUNC1(a, b, c, d, e) ; \
119 MIX(a, b, c, d, e)
120
121 #define ROUND2(a, b, c, d, e, index) \
122 SHUFFLE(index, e) ; \
123 FUNC2(a, b, c, d, e) ; \
124 MIX(a, b, c, d, e)
125
126 #define ROUND3(a, b, c, d, e, index) \
127 SHUFFLE(index, e) ; \
128 FUNC3(a, b, c, d, e) ; \
129 MIX(a, b, c, d, e)
130
131 #define ROUND4(a, b, c, d, e, index) \
132 SHUFFLE(index, e) ; \
133 FUNC4(a, b, c, d, e) ; \
134 MIX(a, b, c, d, e)
135
136 // func block(dig *digest, p []byte)
137 TEXT ·block(SB), NOSPLIT, $96-16
138 MOVW p+4(FP), R(data) // pointer to the data
139 MOVW p_len+8(FP), R(t0) // number of bytes
140 ADD R(data), R(t0)
141 MOVW R(t0), p_end(SP) // pointer to end of data
142
143 // Load up initial SHA1 accumulator
144 MOVW dig+0(FP), R(t0)
145 MOVM.IA (R(t0)), [R(a),R(b),R(c),R(d),R(e)]
146
147 loop:
148 // Save registers at SP+4 onwards
149 MOVM.IB [R(a),R(b),R(c),R(d),R(e)], (R13)
150
151 MOVW $0x5A827999, R(const)
152 ROUND1(a, b, c, d, e, 0)
153 ROUND1(e, a, b, c, d, 1)
154 ROUND1(d, e, a, b, c, 2)
155 ROUND1(c, d, e, a, b, 3)
156 ROUND1(b, c, d, e, a, 4)
157 ROUND1(a, b, c, d, e, 5)
158 ROUND1(e, a, b, c, d, 6)
159 ROUND1(d, e, a, b, c, 7)
160 ROUND1(c, d, e, a, b, 8)
161 ROUND1(b, c, d, e, a, 9)
162 ROUND1(a, b, c, d, e, 10)
163 ROUND1(e, a, b, c, d, 11)
164 ROUND1(d, e, a, b, c, 12)
165 ROUND1(c, d, e, a, b, 13)
166 ROUND1(b, c, d, e, a, 14)
167 ROUND1(a, b, c, d, e, 15)
168
169 ROUND1x(e, a, b, c, d, 16)
170 ROUND1x(d, e, a, b, c, 17)
171 ROUND1x(c, d, e, a, b, 18)
172 ROUND1x(b, c, d, e, a, 19)
173 ········
174 MOVW $0x6ED9EBA1, R(const)
175 ROUND2(a, b, c, d, e, 20)
176 ROUND2(e, a, b, c, d, 21)
177 ROUND2(d, e, a, b, c, 22)
178 ROUND2(c, d, e, a, b, 23)
179 ROUND2(b, c, d, e, a, 24)
180 ROUND2(a, b, c, d, e, 25)
181 ROUND2(e, a, b, c, d, 26)
182 ROUND2(d, e, a, b, c, 27)
183 ROUND2(c, d, e, a, b, 28)
184 ROUND2(b, c, d, e, a, 29)
185 ROUND2(a, b, c, d, e, 30)
186 ROUND2(e, a, b, c, d, 31)
187 ROUND2(d, e, a, b, c, 32)
188 ROUND2(c, d, e, a, b, 33)
189 ROUND2(b, c, d, e, a, 34)
190 ROUND2(a, b, c, d, e, 35)
191 ROUND2(e, a, b, c, d, 36)
192 ROUND2(d, e, a, b, c, 37)
193 ROUND2(c, d, e, a, b, 38)
194 ROUND2(b, c, d, e, a, 39)
195 ········
196 MOVW $0x8F1BBCDC, R(const)
197 ROUND3(a, b, c, d, e, 40)
198 ROUND3(e, a, b, c, d, 41)
199 ROUND3(d, e, a, b, c, 42)
200 ROUND3(c, d, e, a, b, 43)
201 ROUND3(b, c, d, e, a, 44)
202 ROUND3(a, b, c, d, e, 45)
203 ROUND3(e, a, b, c, d, 46)
204 ROUND3(d, e, a, b, c, 47)
205 ROUND3(c, d, e, a, b, 48)
206 ROUND3(b, c, d, e, a, 49)
207 ROUND3(a, b, c, d, e, 50)
208 ROUND3(e, a, b, c, d, 51)
209 ROUND3(d, e, a, b, c, 52)
210 ROUND3(c, d, e, a, b, 53)
211 ROUND3(b, c, d, e, a, 54)
212 ROUND3(a, b, c, d, e, 55)
213 ROUND3(e, a, b, c, d, 56)
214 ROUND3(d, e, a, b, c, 57)
215 ROUND3(c, d, e, a, b, 58)
216 ROUND3(b, c, d, e, a, 59)
217 ········
218 MOVW $0xCA62C1D6, R(const)
219 ROUND4(a, b, c, d, e, 60)
220 ROUND4(e, a, b, c, d, 61)
221 ROUND4(d, e, a, b, c, 62)
222 ROUND4(c, d, e, a, b, 63)
223 ROUND4(b, c, d, e, a, 64)
224 ROUND4(a, b, c, d, e, 65)
225 ROUND4(e, a, b, c, d, 66)
226 ROUND4(d, e, a, b, c, 67)
227 ROUND4(c, d, e, a, b, 68)
228 ROUND4(b, c, d, e, a, 69)
229 ROUND4(a, b, c, d, e, 70)
230 ROUND4(e, a, b, c, d, 71)
231 ROUND4(d, e, a, b, c, 72)
232 ROUND4(c, d, e, a, b, 73)
233 ROUND4(b, c, d, e, a, 74)
234 ROUND4(a, b, c, d, e, 75)
235 ROUND4(e, a, b, c, d, 76)
236 ROUND4(d, e, a, b, c, 77)
237 ROUND4(c, d, e, a, b, 78)
238 ROUND4(b, c, d, e, a, 79)
239
240 // Accumulate - restoring registers from SP+4
241 MOVM.IB (R13), [R(t0),R(t1),R(t2),R(t3),R(t4)]
242 ADD R(t0), R(a)
243 ADD R(t1), R(b)
244 ADD R(t2), R(c)
245 ADD R(t3), R(d)
246 ADD R(t4), R(e)
247
248 MOVW p_end(SP), R(t0)
249 ADD $64, R(data)
250 CMP R(t0), R(data)
251 BLO loop
252
253 // Save final SHA1 accumulator
254 MOVW dig+0(FP), R(t0)
255 MOVM.IA [R(a),R(b),R(c),R(d),R(e)], (R(t0))
256
257 RET
OLDNEW
« no previous file with comments | « src/pkg/crypto/sha1/sha1block.go ('k') | src/pkg/crypto/sha1/sha1block_decl.go » ('j') | no next file with comments »

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b