Left: | ||
Right: |
OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2013 The Go Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style | |
3 // license that can be found in the LICENSE file. | |
4 // | |
5 // ARM version of md5block.go by Nick Craig-Wood | |
rsc
2013/07/22 18:41:51
Delete. There are no author lines in the Go tree.
| |
6 ········ | |
7 // Register definitions | |
8 data = 0 // Pointer to data to hash | |
9 table = 1 // Pointer to MD5 constants table | |
10 a = 2 // MD5 accumulator | |
11 b = 3 // MD5 accumulator | |
12 c = 4 // MD5 accumulator | |
13 d = 5 // MD5 accumulator | |
14 c0 = 6 // MD5 constant | |
15 c1 = 7 // MD5 constant | |
16 c2 = 8 // MD5 constant | |
17 // r9, r10 are forbidden | |
18 // r11 is OK provided you check the assembler that no synthetic instructions use it | |
19 c3 = 11 // MD5 constant | |
20 t0 = 12 // temporary | |
21 t1 = 14 // temporary | |
22 | |
23 // func block(dig *digest, p []byte) | |
24 // 0(FP) is *digest | |
25 // 4(FP) is p.array (struct Slice) | |
26 // 8(FP) is p.len | |
27 //12(FP) is p.cap | |
28 // | |
29 // Stack frame | |
30 // 0(R13) is saved LR | |
31 m_to = 4 // 4(R13) memmove to address | |
32 m_from = 8 // 8(R13) memmove from address | |
33 m_n = 12 //12(R13) memmove count | |
34 p_end = 16 //16(R13) pointer to the end of data | |
35 buf = 20 //20(R13) 16 words temporary buffer | |
36 | |
37 TEXT ·block(SB), 7, $(20*4)-16 | |
dfc
2013/07/22 04:19:53
I've never seen this 20*4 form before, does it act
Nick Craig-Wood
2013/07/22 15:37:40
The assembler generated is correct
str r14, [
rsc
2013/07/22 18:41:51
Yes, it works, but please just write 80.
To allow
| |
38 MOVW p+4(FP), R(data) // pointer to the data | |
39 MOVW p_len+8(FP), R(t0) // number of bytes | |
40 BIC.S $63, R(t0) // make a multiple of 64 | |
rsc
2013/07/22 18:41:51
You can assume the length is a multiple of 64. The
| |
41 BEQ end // end if none | |
rsc
2013/07/22 18:41:51
You can also assume the length is nonzero. The oth
| |
42 ADD R(data), R(t0) | |
43 MOVW R(t0), p_end(R13) // pointer to end of data | |
44 | |
45 loop: | |
46 MOVW R(data), m_from(R13) | |
47 AND.S $3, R(data), R(t0) // TST $3, R(data) not working see issue 5921 | |
48 BEQ aligned // aligned detected - skip copy | |
49 | |
50 // Copy the unaligned source data into the aligned temporary buffer | |
51 // memove(to=4(R13), from=8(R13), n=12(R13)) - Corrupts all registers | |
52 MOVW $buf(R13), R(t0) | |
53 MOVW R(t0), m_to(R13) | |
54 MOVW $64, R(t0) | |
55 MOVW R(t0), m_n(R13) | |
56 BL runtime·memmove(SB) | |
57 | |
58 // Point to the local aligned copy of the data | |
59 MOVW $buf(R13), R(data) | |
60 | |
61 aligned: | |
62 // Point to the table of constants | |
63 // A PC relative add would be cheaper than this | |
64 MOVW $_md5_block_table(SB), R(table) | |
65 | |
66 // Load up initial MD5 accumulator | |
67 MOVW dig+0(FP), R(c0) | |
68 MOVM.IA (R(c0)), [R(a),R(b),R(c),R(d)] | |
69 | |
70 // a += (((c^d)&b)^d) + X[index] + const | |
71 // a = a<<shift | a>>(32-shift) + b | |
72 #define ROUND1(a, b, c, d, index, shift, const) \ | |
73 EOR R(c), R(d), R(t0) ; \ | |
74 AND R(b), R(t0) ; \ | |
75 EOR R(d), R(t0) ; \ | |
76 MOVW (index<<2)(R(data)), R(t1) ; \ | |
77 ADD R(t1), R(t0) ; \ | |
78 ADD R(const), R(t0) ; \ | |
rsc
2013/07/22 18:41:51
If you do
ADD $0x######, R(t0)
or
MOVW $0x#####
Nick Craig-Wood
2013/07/23 08:25:14
This generates
LDR r11, 0xxxx(PC)
ADD R(t0),
| |
79 ADD R(t0), R(a) ; \ | |
80 ADD R(a)@>(32-shift), R(b), R(a) ; | |
81 | |
82 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] | |
83 ROUND1(a, b, c, d, 0, 7, c0) | |
84 ROUND1(d, a, b, c, 1, 12, c1) | |
85 ROUND1(c, d, a, b, 2, 17, c2) | |
86 ROUND1(b, c, d, a, 3, 22, c3) | |
87 | |
88 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] | |
89 ROUND1(a, b, c, d, 4, 7, c0) | |
90 ROUND1(d, a, b, c, 5, 12, c1) | |
91 ROUND1(c, d, a, b, 6, 17, c2) | |
92 ROUND1(b, c, d, a, 7, 22, c3) | |
93 | |
94 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] | |
95 ROUND1(a, b, c, d, 8, 7, c0) | |
96 ROUND1(d, a, b, c, 9, 12, c1) | |
97 ROUND1(c, d, a, b, 10, 17, c2) | |
98 ROUND1(b, c, d, a, 11, 22, c3) | |
99 | |
100 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] | |
101 ROUND1(a, b, c, d, 12, 7, c0) | |
102 ROUND1(d, a, b, c, 13, 12, c1) | |
103 ROUND1(c, d, a, b, 14, 17, c2) | |
104 ROUND1(b, c, d, a, 15, 22, c3) | |
105 ········ | |
106 // a += (((b^c)&d)^c) + X[index] + const | |
107 // a = a<<shift | a>>(32-shift) + b | |
108 #define ROUND2(a, b, c, d, index, shift, const) \ | |
109 EOR R(b), R(c), R(t0) ; \ | |
110 AND R(d), R(t0) ; \ | |
111 EOR R(c), R(t0) ; \ | |
112 MOVW (index<<2)(R(data)), R(t1) ; \ | |
113 ADD R(t1), R(t0) ; \ | |
114 ADD R(const), R(t0) ; \ | |
115 ADD R(t0), R(a) ; \ | |
116 ADD R(a)@>(32-shift), R(b), R(a) ; | |
117 | |
118 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] | |
119 ROUND2(a, b, c, d, 1, 5, c0) | |
120 ROUND2(d, a, b, c, 6, 9, c1) | |
121 ROUND2(c, d, a, b, 11, 14, c2) | |
122 ROUND2(b, c, d, a, 0, 20, c3) | |
123 | |
124 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] | |
125 ROUND2(a, b, c, d, 5, 5, c0) | |
126 ROUND2(d, a, b, c, 10, 9, c1) | |
127 ROUND2(c, d, a, b, 15, 14, c2) | |
128 ROUND2(b, c, d, a, 4, 20, c3) | |
129 | |
130 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] | |
131 ROUND2(a, b, c, d, 9, 5, c0) | |
132 ROUND2(d, a, b, c, 14, 9, c1) | |
133 ROUND2(c, d, a, b, 3, 14, c2) | |
134 ROUND2(b, c, d, a, 8, 20, c3) | |
135 | |
136 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] | |
137 ROUND2(a, b, c, d, 13, 5, c0) | |
138 ROUND2(d, a, b, c, 2, 9, c1) | |
139 ROUND2(c, d, a, b, 7, 14, c2) | |
140 ROUND2(b, c, d, a, 12, 20, c3) | |
141 ········ | |
142 // a += (b^c^d) + X[index] + const | |
143 // a = a<<shift | a>>(32-shift) + b | |
144 #define ROUND3(a, b, c, d, index, shift, const) \ | |
145 EOR R(b), R(c), R(t0) ; \ | |
146 EOR R(d), R(t0) ; \ | |
147 MOVW (index<<2)(R(data)), R(t1) ; \ | |
148 ADD R(t1), R(t0) ; \ | |
149 ADD R(const), R(t0) ; \ | |
150 ADD R(t0), R(a) ; \ | |
151 ADD R(a)@>(32-shift), R(b), R(a) ; | |
152 | |
153 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] | |
154 ROUND3(a, b, c, d, 5, 4, c0) | |
155 ROUND3(d, a, b, c, 8, 11, c1) | |
156 ROUND3(c, d, a, b, 11, 16, c2) | |
157 ROUND3(b, c, d, a, 14, 23, c3) | |
158 | |
159 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] | |
160 ROUND3(a, b, c, d, 1, 4, c0) | |
161 ROUND3(d, a, b, c, 4, 11, c1) | |
162 ROUND3(c, d, a, b, 7, 16, c2) | |
163 ROUND3(b, c, d, a, 10, 23, c3) | |
164 | |
165 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] | |
166 ROUND3(a, b, c, d, 13, 4, c0) | |
167 ROUND3(d, a, b, c, 0, 11, c1) | |
168 ROUND3(c, d, a, b, 3, 16, c2) | |
169 ROUND3(b, c, d, a, 6, 23, c3) | |
170 | |
171 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] | |
172 ROUND3(a, b, c, d, 9, 4, c0) | |
173 ROUND3(d, a, b, c, 12, 11, c1) | |
174 ROUND3(c, d, a, b, 15, 16, c2) | |
175 ROUND3(b, c, d, a, 2, 23, c3) | |
176 ········ | |
177 // a += (c^(b|^d)) + X[index] + const | |
178 // a = a<<shift | a>>(32-shift) + b | |
179 #define ROUND4(a, b, c, d, index, shift, const) \ | |
180 MVN R(d), R(t0) ; \ | |
181 ORR R(b), R(t0) ; \ | |
182 EOR R(c), R(t0) ; \ | |
183 MOVW (index<<2)(R(data)), R(t1) ; \ | |
184 ADD R(t1), R(t0) ; \ | |
185 ADD R(const), R(t0) ; \ | |
186 ADD R(t0), R(a) ; \ | |
187 ADD R(a)@>(32-shift), R(b), R(a) ; | |
188 ········ | |
189 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] | |
190 ROUND4(a, b, c, d, 0, 6, c0) | |
191 ROUND4(d, a, b, c, 7, 10, c1) | |
192 ROUND4(c, d, a, b, 14, 15, c2) | |
193 ROUND4(b, c, d, a, 5, 21, c3) | |
194 | |
195 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] | |
196 ROUND4(a, b, c, d, 12, 6, c0) | |
197 ROUND4(d, a, b, c, 3, 10, c1) | |
198 ROUND4(c, d, a, b, 10, 15, c2) | |
199 ROUND4(b, c, d, a, 1, 21, c3) | |
200 | |
201 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] | |
202 ROUND4(a, b, c, d, 8, 6, c0) | |
203 ROUND4(d, a, b, c, 15, 10, c1) | |
204 ROUND4(c, d, a, b, 6, 15, c2) | |
205 ROUND4(b, c, d, a, 13, 21, c3) | |
206 | |
207 MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] | |
208 ROUND4(a, b, c, d, 4, 6, c0) | |
209 ROUND4(d, a, b, c, 11, 10, c1) | |
210 ROUND4(c, d, a, b, 2, 15, c2) | |
211 ROUND4(b, c, d, a, 9, 21, c3) | |
212 ········ | |
213 MOVW dig+0(FP), R(t0) | |
214 MOVM.IA (R(t0)), [R(c0),R(c1),R(c2),R(c3)] | |
215 ········ | |
216 ADD R(c0), R(a) | |
217 ADD R(c1), R(b) | |
218 ADD R(c2), R(c) | |
219 ADD R(c3), R(d) | |
220 | |
221 MOVM.IA [R(a),R(b),R(c),R(d)], (R(t0)) | |
222 | |
223 MOVW m_from(R13), R(data) | |
224 ADD $64, R(data) | |
225 MOVW p_end(R13), R(t0) | |
226 CMP R(t0), R(data) | |
227 BLO loop | |
228 | |
229 end: | |
230 RET | |
231 | |
232 // Md5 constants table | |
rsc
2013/07/22 18:41:51
MD5
| |
233 TEXT _md5_block_table(SB),7,$-4 | |
rsc
2013/07/22 18:41:51
Please make this (read-only) data. See the masks a
Nick Craig-Wood
2013/07/23 08:25:14
OK I'll have a go with that.
In an ideal world on
| |
234 // Round 1 | |
235 WORD $0xd76aa478 | |
236 WORD $0xe8c7b756 | |
237 WORD $0x242070db | |
238 WORD $0xc1bdceee | |
239 WORD $0xf57c0faf | |
240 WORD $0x4787c62a | |
241 WORD $0xa8304613 | |
242 WORD $0xfd469501 | |
243 WORD $0x698098d8 | |
244 WORD $0x8b44f7af | |
245 WORD $0xffff5bb1 | |
246 WORD $0x895cd7be | |
247 WORD $0x6b901122 | |
248 WORD $0xfd987193 | |
249 WORD $0xa679438e | |
250 WORD $0x49b40821 | |
251 // Round 2 | |
252 WORD $0xf61e2562 | |
253 WORD $0xc040b340 | |
254 WORD $0x265e5a51 | |
255 WORD $0xe9b6c7aa | |
256 WORD $0xd62f105d | |
257 WORD $0x02441453 | |
258 WORD $0xd8a1e681 | |
259 WORD $0xe7d3fbc8 | |
260 WORD $0x21e1cde6 | |
261 WORD $0xc33707d6 | |
262 WORD $0xf4d50d87 | |
263 WORD $0x455a14ed | |
264 WORD $0xa9e3e905 | |
265 WORD $0xfcefa3f8 | |
266 WORD $0x676f02d9 | |
267 WORD $0x8d2a4c8a | |
268 // Round 3 | |
269 WORD $0xfffa3942 | |
270 WORD $0x8771f681 | |
271 WORD $0x6d9d6122 | |
272 WORD $0xfde5380c | |
273 WORD $0xa4beea44 | |
274 WORD $0x4bdecfa9 | |
275 WORD $0xf6bb4b60 | |
276 WORD $0xbebfbc70 | |
277 WORD $0x289b7ec6 | |
278 WORD $0xeaa127fa | |
279 WORD $0xd4ef3085 | |
280 WORD $0x04881d05 | |
281 WORD $0xd9d4d039 | |
282 WORD $0xe6db99e5 | |
283 WORD $0x1fa27cf8 | |
284 WORD $0xc4ac5665 | |
285 // Round 4 | |
286 WORD $0xf4292244 | |
287 WORD $0x432aff97 | |
288 WORD $0xab9423a7 | |
289 WORD $0xfc93a039 | |
290 WORD $0x655b59c3 | |
291 WORD $0x8f0ccc92 | |
292 WORD $0xffeff47d | |
293 WORD $0x85845dd1 | |
294 WORD $0x6fa87e4f | |
295 WORD $0xfe2ce6e0 | |
296 WORD $0xa3014314 | |
297 WORD $0x4e0811a1 | |
298 WORD $0xf7537e82 | |
299 WORD $0xbd3af235 | |
300 WORD $0x2ad7d2bb | |
301 WORD $0xeb86d391 | |
OLD | NEW |