Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(1437)

Side by Side Diff: src/pkg/crypto/md5/md5block_arm.s

Issue 11648043: code review 11648043: crypto/md5: native arm assembler version (Closed)
Patch Set: diff -r 3bf9ffdcca1f https://code.google.com/p/go Created 10 years, 8 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/pkg/crypto/md5/md5block.go ('k') | src/pkg/crypto/md5/md5block_decl.go » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 //
5 // ARM version of md5block.go by Nick Craig-Wood
rsc 2013/07/22 18:41:51 Delete. There are no author lines in the Go tree.
6 ········
7 // Register definitions
8 data = 0 // Pointer to data to hash
9 table = 1 // Pointer to MD5 constants table
10 a = 2 // MD5 accumulator
11 b = 3 // MD5 accumulator
12 c = 4 // MD5 accumulator
13 d = 5 // MD5 accumulator
14 c0 = 6 // MD5 constant
15 c1 = 7 // MD5 constant
16 c2 = 8 // MD5 constant
17 // r9, r10 are forbidden
18 // r11 is OK provided you check the assembler that no synthetic instructions use it
19 c3 = 11 // MD5 constant
20 t0 = 12 // temporary
21 t1 = 14 // temporary
22
23 // func block(dig *digest, p []byte)
24 // 0(FP) is *digest
25 // 4(FP) is p.array (struct Slice)
26 // 8(FP) is p.len
27 //12(FP) is p.cap
28 //
29 // Stack frame
30 // 0(R13) is saved LR
31 m_to = 4 // 4(R13) memmove to address
32 m_from = 8 // 8(R13) memmove from address
33 m_n = 12 //12(R13) memmove count
34 p_end = 16 //16(R13) pointer to the end of data
35 buf = 20 //20(R13) 16 words temporary buffer
36
37 TEXT ·block(SB), 7, $(20*4)-16
dfc 2013/07/22 04:19:53 I've never seen this 20*4 form before, does it act
Nick Craig-Wood 2013/07/22 15:37:40 The assembler generated is correct str r14, [
rsc 2013/07/22 18:41:51 Yes, it works, but please just write 80. To allow
38 MOVW p+4(FP), R(data) // pointer to the data
39 MOVW p_len+8(FP), R(t0) // number of bytes
40 BIC.S $63, R(t0) // make a multiple of 64
rsc 2013/07/22 18:41:51 You can assume the length is a multiple of 64. The
41 BEQ end // end if none
rsc 2013/07/22 18:41:51 You can also assume the length is nonzero. The oth
42 ADD R(data), R(t0)
43 MOVW R(t0), p_end(R13) // pointer to end of data
44
45 loop:
46 MOVW R(data), m_from(R13)
47 AND.S $3, R(data), R(t0) // TST $3, R(data) not working see issue 5921
48 BEQ aligned // aligned detected - skip copy
49
50 // Copy the unaligned source data into the aligned temporary buffer
51 // memove(to=4(R13), from=8(R13), n=12(R13)) - Corrupts all registers
52 MOVW $buf(R13), R(t0)
53 MOVW R(t0), m_to(R13)
54 MOVW $64, R(t0)
55 MOVW R(t0), m_n(R13)
56 BL runtime·memmove(SB)
57
58 // Point to the local aligned copy of the data
59 MOVW $buf(R13), R(data)
60
61 aligned:
62 // Point to the table of constants
63 // A PC relative add would be cheaper than this
64 MOVW $_md5_block_table(SB), R(table)
65
66 // Load up initial MD5 accumulator
67 MOVW dig+0(FP), R(c0)
68 MOVM.IA (R(c0)), [R(a),R(b),R(c),R(d)]
69
70 // a += (((c^d)&b)^d) + X[index] + const
71 // a = a<<shift | a>>(32-shift) + b
72 #define ROUND1(a, b, c, d, index, shift, const) \
73 EOR R(c), R(d), R(t0) ; \
74 AND R(b), R(t0) ; \
75 EOR R(d), R(t0) ; \
76 MOVW (index<<2)(R(data)), R(t1) ; \
77 ADD R(t1), R(t0) ; \
78 ADD R(const), R(t0) ; \
rsc 2013/07/22 18:41:51 If you do ADD $0x######, R(t0) or MOVW $0x#####
Nick Craig-Wood 2013/07/23 08:25:14 This generates LDR r11, 0xxxx(PC) ADD R(t0),
79 ADD R(t0), R(a) ; \
80 ADD R(a)@>(32-shift), R(b), R(a) ;
81
82 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)]
83 ROUND1(a, b, c, d, 0, 7, c0)
84 ROUND1(d, a, b, c, 1, 12, c1)
85 ROUND1(c, d, a, b, 2, 17, c2)
86 ROUND1(b, c, d, a, 3, 22, c3)
87
88 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)]
89 ROUND1(a, b, c, d, 4, 7, c0)
90 ROUND1(d, a, b, c, 5, 12, c1)
91 ROUND1(c, d, a, b, 6, 17, c2)
92 ROUND1(b, c, d, a, 7, 22, c3)
93
94 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)]
95 ROUND1(a, b, c, d, 8, 7, c0)
96 ROUND1(d, a, b, c, 9, 12, c1)
97 ROUND1(c, d, a, b, 10, 17, c2)
98 ROUND1(b, c, d, a, 11, 22, c3)
99
100 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)]
101 ROUND1(a, b, c, d, 12, 7, c0)
102 ROUND1(d, a, b, c, 13, 12, c1)
103 ROUND1(c, d, a, b, 14, 17, c2)
104 ROUND1(b, c, d, a, 15, 22, c3)
105 ········
106 // a += (((b^c)&d)^c) + X[index] + const
107 // a = a<<shift | a>>(32-shift) + b
108 #define ROUND2(a, b, c, d, index, shift, const) \
109 EOR R(b), R(c), R(t0) ; \
110 AND R(d), R(t0) ; \
111 EOR R(c), R(t0) ; \
112 MOVW (index<<2)(R(data)), R(t1) ; \
113 ADD R(t1), R(t0) ; \
114 ADD R(const), R(t0) ; \
115 ADD R(t0), R(a) ; \
116 ADD R(a)@>(32-shift), R(b), R(a) ;
117
118 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)]
119 ROUND2(a, b, c, d, 1, 5, c0)
120 ROUND2(d, a, b, c, 6, 9, c1)
121 ROUND2(c, d, a, b, 11, 14, c2)
122 ROUND2(b, c, d, a, 0, 20, c3)
123
124 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)]
125 ROUND2(a, b, c, d, 5, 5, c0)
126 ROUND2(d, a, b, c, 10, 9, c1)
127 ROUND2(c, d, a, b, 15, 14, c2)
128 ROUND2(b, c, d, a, 4, 20, c3)
129
130 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)]
131 ROUND2(a, b, c, d, 9, 5, c0)
132 ROUND2(d, a, b, c, 14, 9, c1)
133 ROUND2(c, d, a, b, 3, 14, c2)
134 ROUND2(b, c, d, a, 8, 20, c3)
135
136 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)]
137 ROUND2(a, b, c, d, 13, 5, c0)
138 ROUND2(d, a, b, c, 2, 9, c1)
139 ROUND2(c, d, a, b, 7, 14, c2)
140 ROUND2(b, c, d, a, 12, 20, c3)
141 ········
142 // a += (b^c^d) + X[index] + const
143 // a = a<<shift | a>>(32-shift) + b
144 #define ROUND3(a, b, c, d, index, shift, const) \
145 EOR R(b), R(c), R(t0) ; \
146 EOR R(d), R(t0) ; \
147 MOVW (index<<2)(R(data)), R(t1) ; \
148 ADD R(t1), R(t0) ; \
149 ADD R(const), R(t0) ; \
150 ADD R(t0), R(a) ; \
151 ADD R(a)@>(32-shift), R(b), R(a) ;
152
153 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)]
154 ROUND3(a, b, c, d, 5, 4, c0)
155 ROUND3(d, a, b, c, 8, 11, c1)
156 ROUND3(c, d, a, b, 11, 16, c2)
157 ROUND3(b, c, d, a, 14, 23, c3)
158
159 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)]
160 ROUND3(a, b, c, d, 1, 4, c0)
161 ROUND3(d, a, b, c, 4, 11, c1)
162 ROUND3(c, d, a, b, 7, 16, c2)
163 ROUND3(b, c, d, a, 10, 23, c3)
164
165 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)]
166 ROUND3(a, b, c, d, 13, 4, c0)
167 ROUND3(d, a, b, c, 0, 11, c1)
168 ROUND3(c, d, a, b, 3, 16, c2)
169 ROUND3(b, c, d, a, 6, 23, c3)
170
171 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)]
172 ROUND3(a, b, c, d, 9, 4, c0)
173 ROUND3(d, a, b, c, 12, 11, c1)
174 ROUND3(c, d, a, b, 15, 16, c2)
175 ROUND3(b, c, d, a, 2, 23, c3)
176 ········
177 // a += (c^(b|^d)) + X[index] + const
178 // a = a<<shift | a>>(32-shift) + b
179 #define ROUND4(a, b, c, d, index, shift, const) \
180 MVN R(d), R(t0) ; \
181 ORR R(b), R(t0) ; \
182 EOR R(c), R(t0) ; \
183 MOVW (index<<2)(R(data)), R(t1) ; \
184 ADD R(t1), R(t0) ; \
185 ADD R(const), R(t0) ; \
186 ADD R(t0), R(a) ; \
187 ADD R(a)@>(32-shift), R(b), R(a) ;
188 ········
189 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)]
190 ROUND4(a, b, c, d, 0, 6, c0)
191 ROUND4(d, a, b, c, 7, 10, c1)
192 ROUND4(c, d, a, b, 14, 15, c2)
193 ROUND4(b, c, d, a, 5, 21, c3)
194
195 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)]
196 ROUND4(a, b, c, d, 12, 6, c0)
197 ROUND4(d, a, b, c, 3, 10, c1)
198 ROUND4(c, d, a, b, 10, 15, c2)
199 ROUND4(b, c, d, a, 1, 21, c3)
200
201 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)]
202 ROUND4(a, b, c, d, 8, 6, c0)
203 ROUND4(d, a, b, c, 15, 10, c1)
204 ROUND4(c, d, a, b, 6, 15, c2)
205 ROUND4(b, c, d, a, 13, 21, c3)
206
207 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)]
208 ROUND4(a, b, c, d, 4, 6, c0)
209 ROUND4(d, a, b, c, 11, 10, c1)
210 ROUND4(c, d, a, b, 2, 15, c2)
211 ROUND4(b, c, d, a, 9, 21, c3)
212 ········
213 MOVW dig+0(FP), R(t0)
214 MOVM.IA (R(t0)), [R(c0),R(c1),R(c2),R(c3)]
215 ········
216 ADD R(c0), R(a)
217 ADD R(c1), R(b)
218 ADD R(c2), R(c)
219 ADD R(c3), R(d)
220
221 MOVM.IA [R(a),R(b),R(c),R(d)], (R(t0))
222
223 MOVW m_from(R13), R(data)
224 ADD $64, R(data)
225 MOVW p_end(R13), R(t0)
226 CMP R(t0), R(data)
227 BLO loop
228
229 end:
230 RET
231
232 // Md5 constants table
rsc 2013/07/22 18:41:51 MD5
233 TEXT _md5_block_table(SB),7,$-4
rsc 2013/07/22 18:41:51 Please make this (read-only) data. See the masks a
Nick Craig-Wood 2013/07/23 08:25:14 OK I'll have a go with that. In an ideal world on
234 // Round 1
235 WORD $0xd76aa478
236 WORD $0xe8c7b756
237 WORD $0x242070db
238 WORD $0xc1bdceee
239 WORD $0xf57c0faf
240 WORD $0x4787c62a
241 WORD $0xa8304613
242 WORD $0xfd469501
243 WORD $0x698098d8
244 WORD $0x8b44f7af
245 WORD $0xffff5bb1
246 WORD $0x895cd7be
247 WORD $0x6b901122
248 WORD $0xfd987193
249 WORD $0xa679438e
250 WORD $0x49b40821
251 // Round 2
252 WORD $0xf61e2562
253 WORD $0xc040b340
254 WORD $0x265e5a51
255 WORD $0xe9b6c7aa
256 WORD $0xd62f105d
257 WORD $0x02441453
258 WORD $0xd8a1e681
259 WORD $0xe7d3fbc8
260 WORD $0x21e1cde6
261 WORD $0xc33707d6
262 WORD $0xf4d50d87
263 WORD $0x455a14ed
264 WORD $0xa9e3e905
265 WORD $0xfcefa3f8
266 WORD $0x676f02d9
267 WORD $0x8d2a4c8a
268 // Round 3
269 WORD $0xfffa3942
270 WORD $0x8771f681
271 WORD $0x6d9d6122
272 WORD $0xfde5380c
273 WORD $0xa4beea44
274 WORD $0x4bdecfa9
275 WORD $0xf6bb4b60
276 WORD $0xbebfbc70
277 WORD $0x289b7ec6
278 WORD $0xeaa127fa
279 WORD $0xd4ef3085
280 WORD $0x04881d05
281 WORD $0xd9d4d039
282 WORD $0xe6db99e5
283 WORD $0x1fa27cf8
284 WORD $0xc4ac5665
285 // Round 4
286 WORD $0xf4292244
287 WORD $0x432aff97
288 WORD $0xab9423a7
289 WORD $0xfc93a039
290 WORD $0x655b59c3
291 WORD $0x8f0ccc92
292 WORD $0xffeff47d
293 WORD $0x85845dd1
294 WORD $0x6fa87e4f
295 WORD $0xfe2ce6e0
296 WORD $0xa3014314
297 WORD $0x4e0811a1
298 WORD $0xf7537e82
299 WORD $0xbd3af235
300 WORD $0x2ad7d2bb
301 WORD $0xeb86d391
OLDNEW
« no previous file with comments | « src/pkg/crypto/md5/md5block.go ('k') | src/pkg/crypto/md5/md5block_decl.go » ('j') | no next file with comments »

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b