Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(1027)

Delta Between Two Patch Sets: src/pkg/crypto/aes/asm_amd64.s

Issue 6549055: code review 6549055: crypto/aes: speed up using AES-NI on amd64 (Closed)
Left Patch Set: diff -r 29cff1e8de4e https://code.google.com/p/go Created 11 years, 6 months ago
Right Patch Set: diff -r c2719ae32b09 https://code.google.com/p/go/ Created 11 years, 6 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Right: Side by side diff | Download
« no previous file with change/comment | « src/pkg/crypto/aes/aes_test.go ('k') | src/pkg/crypto/aes/block.go » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
(no file at all)
1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // func hasAsm() bool
6 // returns whether AES-NI is supported
7 TEXT ·hasAsm(SB),7,$0
8 XORQ AX, AX
9 INCL AX
10 CPUID
11 SHRQ $25, CX
12 ANDQ $1, CX
13 MOVB CX, ret+0(FP)
14 RET
15
16 // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
17 TEXT ·encryptBlockAsm(SB),7,$0
18 MOVQ nr+0(FP), CX
19 MOVQ xk+8(FP), AX
20 MOVQ dst+16(FP), DX
21 MOVQ src+24(FP), BX
22 MOVUPS 0(AX), X1
23 MOVUPS 0(BX), X0
24 ADDQ $16, AX
25 PXOR X1, X0
26 SUBQ $12, CX
27 JE Lenc196
28 JB Lenc128
29 Lenc256:
30 MOVUPS 0(AX), X1
31 AESENC X1, X0
32 MOVUPS 16(AX), X1
33 AESENC X1, X0
34 ADDQ $32, AX
35 Lenc196:
36 MOVUPS 0(AX), X1
37 AESENC X1, X0
38 MOVUPS 16(AX), X1
39 AESENC X1, X0
40 ADDQ $32, AX
41 Lenc128:
42 MOVUPS 0(AX), X1
43 AESENC X1, X0
44 MOVUPS 16(AX), X1
45 AESENC X1, X0
46 MOVUPS 32(AX), X1
47 AESENC X1, X0
48 MOVUPS 48(AX), X1
49 AESENC X1, X0
50 MOVUPS 64(AX), X1
51 AESENC X1, X0
52 MOVUPS 80(AX), X1
53 AESENC X1, X0
54 MOVUPS 96(AX), X1
55 AESENC X1, X0
56 MOVUPS 112(AX), X1
57 AESENC X1, X0
58 MOVUPS 128(AX), X1
59 AESENC X1, X0
60 MOVUPS 144(AX), X1
61 AESENCLAST X1, X0
62 MOVUPS X0, 0(DX)
63 RET
64
65 // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
66 TEXT ·decryptBlockAsm(SB),7,$0
67 MOVQ nr+0(FP), CX
68 MOVQ xk+8(FP), AX
69 MOVQ dst+16(FP), DX
70 MOVQ src+24(FP), BX
71 MOVUPS 0(AX), X1
72 MOVUPS 0(BX), X0
73 ADDQ $16, AX
74 PXOR X1, X0
75 SUBQ $12, CX
76 JE Ldec196
77 JB Ldec128
78 Ldec256:
79 MOVUPS 0(AX), X1
80 AESDEC X1, X0
81 MOVUPS 16(AX), X1
82 AESDEC X1, X0
83 ADDQ $32, AX
84 Ldec196:
85 MOVUPS 0(AX), X1
86 AESDEC X1, X0
87 MOVUPS 16(AX), X1
88 AESDEC X1, X0
89 ADDQ $32, AX
90 Ldec128:
91 MOVUPS 0(AX), X1
92 AESDEC X1, X0
93 MOVUPS 16(AX), X1
94 AESDEC X1, X0
95 MOVUPS 32(AX), X1
96 AESDEC X1, X0
97 MOVUPS 48(AX), X1
98 AESDEC X1, X0
99 MOVUPS 64(AX), X1
100 AESDEC X1, X0
101 MOVUPS 80(AX), X1
102 AESDEC X1, X0
103 MOVUPS 96(AX), X1
104 AESDEC X1, X0
105 MOVUPS 112(AX), X1
106 AESDEC X1, X0
107 MOVUPS 128(AX), X1
108 AESDEC X1, X0
109 MOVUPS 144(AX), X1
110 AESDECLAST X1, X0
111 MOVUPS X0, 0(DX)
112 RET
113
114 // func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
115 // Note that round keys are stored in uint128 format, not uint32
116 TEXT ·expandKeyAsm(SB),7,$0
117 MOVQ nr+0(FP), CX
118 MOVQ key+8(FP), AX
119 MOVQ enc+16(FP), BX
120 MOVQ dec+24(FP), DX
121 MOVUPS (AX), X0
122 // enc
123 MOVUPS X0, (BX)
124 ADDQ $16, BX
125 PXOR X4, X4 // _expand_key_* expect X4 to be zero
126 CMPL CX, $12
127 JE Lexp_enc196
128 JB Lexp_enc128
129 Lexp_enc256:
130 MOVUPS 16(AX), X2
131 MOVUPS X2, (BX)
132 ADDQ $16, BX
133 AESKEYGENASSIST $0x01, X2, X1
134 CALL _expand_key_256a<>(SB)
135 AESKEYGENASSIST $0x01, X0, X1
136 CALL _expand_key_256b<>(SB)
137 AESKEYGENASSIST $0x02, X2, X1
138 CALL _expand_key_256a<>(SB)
139 AESKEYGENASSIST $0x02, X0, X1
140 CALL _expand_key_256b<>(SB)
141 AESKEYGENASSIST $0x04, X2, X1
142 CALL _expand_key_256a<>(SB)
143 AESKEYGENASSIST $0x04, X0, X1
144 CALL _expand_key_256b<>(SB)
145 AESKEYGENASSIST $0x08, X2, X1
146 CALL _expand_key_256a<>(SB)
147 AESKEYGENASSIST $0x08, X0, X1
148 CALL _expand_key_256b<>(SB)
149 AESKEYGENASSIST $0x10, X2, X1
150 CALL _expand_key_256a<>(SB)
151 AESKEYGENASSIST $0x10, X0, X1
152 CALL _expand_key_256b<>(SB)
153 AESKEYGENASSIST $0x20, X2, X1
154 CALL _expand_key_256a<>(SB)
155 AESKEYGENASSIST $0x20, X0, X1
156 CALL _expand_key_256b<>(SB)
157 AESKEYGENASSIST $0x40, X2, X1
158 CALL _expand_key_256a<>(SB)
159 JMP Lexp_dec
160 Lexp_enc196:
161 MOVQ 16(AX), X2
162 AESKEYGENASSIST $0x01, X2, X1
163 CALL _expand_key_192a<>(SB)
164 AESKEYGENASSIST $0x02, X2, X1
165 CALL _expand_key_192b<>(SB)
166 AESKEYGENASSIST $0x04, X2, X1
167 CALL _expand_key_192a<>(SB)
168 AESKEYGENASSIST $0x08, X2, X1
169 CALL _expand_key_192b<>(SB)
170 AESKEYGENASSIST $0x10, X2, X1
171 CALL _expand_key_192a<>(SB)
172 AESKEYGENASSIST $0x20, X2, X1
173 CALL _expand_key_192b<>(SB)
174 AESKEYGENASSIST $0x40, X2, X1
175 CALL _expand_key_192a<>(SB)
176 AESKEYGENASSIST $0x80, X2, X1
177 CALL _expand_key_192b<>(SB)
178 JMP Lexp_dec
179 Lexp_enc128:
180 AESKEYGENASSIST $0x01, X0, X1
181 CALL _expand_key_128<>(SB)
182 AESKEYGENASSIST $0x02, X0, X1
183 CALL _expand_key_128<>(SB)
184 AESKEYGENASSIST $0x04, X0, X1
185 CALL _expand_key_128<>(SB)
186 AESKEYGENASSIST $0x08, X0, X1
187 CALL _expand_key_128<>(SB)
188 AESKEYGENASSIST $0x10, X0, X1
189 CALL _expand_key_128<>(SB)
190 AESKEYGENASSIST $0x20, X0, X1
191 CALL _expand_key_128<>(SB)
192 AESKEYGENASSIST $0x40, X0, X1
193 CALL _expand_key_128<>(SB)
194 AESKEYGENASSIST $0x80, X0, X1
195 CALL _expand_key_128<>(SB)
196 AESKEYGENASSIST $0x1b, X0, X1
197 CALL _expand_key_128<>(SB)
198 AESKEYGENASSIST $0x36, X0, X1
199 CALL _expand_key_128<>(SB)
200 Lexp_dec:
201 // dec
202 SUBQ $16, BX
203 MOVUPS (BX), X1
204 MOVUPS X1, (DX)
205 DECQ CX
206 Lexp_dec_loop:
207 MOVUPS -16(BX), X1
208 AESIMC X1, X0
209 MOVUPS X0, 16(DX)
210 SUBQ $16, BX
211 ADDQ $16, DX
212 DECQ CX
213 JNZ Lexp_dec_loop
214 MOVUPS -16(BX), X0
215 MOVUPS X0, 16(DX)
216 RET
217
218 #define PSHUFD_X0_X0_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc0
219 #define PSHUFD_X1_X1_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc9
220 TEXT _expand_key_128<>(SB),7,$0
221 PSHUFD $0xff, X1, X1
222 SHUFPS $0x10, X0, X4
223 PXOR X4, X0
224 SHUFPS $0x8c, X0, X4
225 PXOR X4, X0
226 PXOR X1, X0
227 MOVUPS X0, (BX)
228 ADDQ $16, BX
229 RET
230
231 #define PSLLDQ_X5_ BYTE $0x66; BYTE $0x0f; BYTE $0x73; BYTE $0xfd
232 #define PSHUFD_X0_X3_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xd8
233 TEXT _expand_key_192a<>(SB),7,$0
234 PSHUFD $0x55, X1, X1
235 SHUFPS $0x10, X0, X4
236 PXOR X4, X0
237 SHUFPS $0x8c, X0, X4
238 PXOR X4, X0
239 PXOR X1, X0
240
241 MOVAPS X2, X5
242 MOVAPS X2, X6
243 PSLLDQ_X5_; BYTE $0x4
244 PSHUFD $0xff, X0, X3
245 PXOR X3, X2
246 PXOR X5, X2
247
248 MOVAPS X0, X1
249 SHUFPS $0x44, X0, X6
250 MOVUPS X6, (BX)
251 SHUFPS $0x4e, X2, X1
252 MOVUPS X1, 16(BX)
253 ADDQ $32, BX
254 RET
255
256 TEXT _expand_key_192b<>(SB),7,$0
257 PSHUFD $0x55, X1, X1
258 SHUFPS $0x10, X0, X4
259 PXOR X4, X0
260 SHUFPS $0x8c, X0, X4
261 PXOR X4, X0
262 PXOR X1, X0
263
264 MOVAPS X2, X5
265 PSLLDQ_X5_; BYTE $0x4
266 PSHUFD $0xff, X0, X3
267 PXOR X3, X2
268 PXOR X5, X2
269
270 MOVUPS X0, (BX)
271 ADDQ $16, BX
272 RET
273
274 TEXT _expand_key_256a<>(SB),7,$0
275 JMP _expand_key_128<>(SB)
276
277 TEXT _expand_key_256b<>(SB),7,$0
278 PSHUFD $0xaa, X1, X1
279 SHUFPS $0x10, X2, X4
280 PXOR X4, X2
281 SHUFPS $0x8c, X2, X4
282 PXOR X4, X2
283 PXOR X1, X2
284
285 MOVUPS X2, (BX)
286 ADDQ $16, BX
287 RET
LEFTRIGHT

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b