LEFT | RIGHT |
(no file at all) | |
| 1 // Copyright 2012 The Go Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style |
| 3 // license that can be found in the LICENSE file. |
| 4 |
| 5 // func hasAsm() bool |
| 6 // returns whether AES-NI is supported |
| 7 TEXT ·hasAsm(SB),7,$0 |
| 8 XORQ AX, AX |
| 9 INCL AX |
| 10 CPUID |
| 11 SHRQ $25, CX |
| 12 ANDQ $1, CX |
| 13 MOVB CX, ret+0(FP) |
| 14 RET |
| 15 |
| 16 // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte) |
| 17 TEXT ·encryptBlockAsm(SB),7,$0 |
| 18 MOVQ nr+0(FP), CX |
| 19 MOVQ xk+8(FP), AX |
| 20 MOVQ dst+16(FP), DX |
| 21 MOVQ src+24(FP), BX |
| 22 MOVUPS 0(AX), X1 |
| 23 MOVUPS 0(BX), X0 |
| 24 ADDQ $16, AX |
| 25 PXOR X1, X0 |
| 26 SUBQ $12, CX |
| 27 JE Lenc196 |
| 28 JB Lenc128 |
| 29 Lenc256: |
| 30 MOVUPS 0(AX), X1 |
| 31 AESENC X1, X0 |
| 32 MOVUPS 16(AX), X1 |
| 33 AESENC X1, X0 |
| 34 ADDQ $32, AX |
| 35 Lenc196: |
| 36 MOVUPS 0(AX), X1 |
| 37 AESENC X1, X0 |
| 38 MOVUPS 16(AX), X1 |
| 39 AESENC X1, X0 |
| 40 ADDQ $32, AX |
| 41 Lenc128: |
| 42 MOVUPS 0(AX), X1 |
| 43 AESENC X1, X0 |
| 44 MOVUPS 16(AX), X1 |
| 45 AESENC X1, X0 |
| 46 MOVUPS 32(AX), X1 |
| 47 AESENC X1, X0 |
| 48 MOVUPS 48(AX), X1 |
| 49 AESENC X1, X0 |
| 50 MOVUPS 64(AX), X1 |
| 51 AESENC X1, X0 |
| 52 MOVUPS 80(AX), X1 |
| 53 AESENC X1, X0 |
| 54 MOVUPS 96(AX), X1 |
| 55 AESENC X1, X0 |
| 56 MOVUPS 112(AX), X1 |
| 57 AESENC X1, X0 |
| 58 MOVUPS 128(AX), X1 |
| 59 AESENC X1, X0 |
| 60 MOVUPS 144(AX), X1 |
| 61 AESENCLAST X1, X0 |
| 62 MOVUPS X0, 0(DX) |
| 63 RET |
| 64 |
| 65 // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte) |
| 66 TEXT ·decryptBlockAsm(SB),7,$0 |
| 67 MOVQ nr+0(FP), CX |
| 68 MOVQ xk+8(FP), AX |
| 69 MOVQ dst+16(FP), DX |
| 70 MOVQ src+24(FP), BX |
| 71 MOVUPS 0(AX), X1 |
| 72 MOVUPS 0(BX), X0 |
| 73 ADDQ $16, AX |
| 74 PXOR X1, X0 |
| 75 SUBQ $12, CX |
| 76 JE Ldec196 |
| 77 JB Ldec128 |
| 78 Ldec256: |
| 79 MOVUPS 0(AX), X1 |
| 80 AESDEC X1, X0 |
| 81 MOVUPS 16(AX), X1 |
| 82 AESDEC X1, X0 |
| 83 ADDQ $32, AX |
| 84 Ldec196: |
| 85 MOVUPS 0(AX), X1 |
| 86 AESDEC X1, X0 |
| 87 MOVUPS 16(AX), X1 |
| 88 AESDEC X1, X0 |
| 89 ADDQ $32, AX |
| 90 Ldec128: |
| 91 MOVUPS 0(AX), X1 |
| 92 AESDEC X1, X0 |
| 93 MOVUPS 16(AX), X1 |
| 94 AESDEC X1, X0 |
| 95 MOVUPS 32(AX), X1 |
| 96 AESDEC X1, X0 |
| 97 MOVUPS 48(AX), X1 |
| 98 AESDEC X1, X0 |
| 99 MOVUPS 64(AX), X1 |
| 100 AESDEC X1, X0 |
| 101 MOVUPS 80(AX), X1 |
| 102 AESDEC X1, X0 |
| 103 MOVUPS 96(AX), X1 |
| 104 AESDEC X1, X0 |
| 105 MOVUPS 112(AX), X1 |
| 106 AESDEC X1, X0 |
| 107 MOVUPS 128(AX), X1 |
| 108 AESDEC X1, X0 |
| 109 MOVUPS 144(AX), X1 |
| 110 AESDECLAST X1, X0 |
| 111 MOVUPS X0, 0(DX) |
| 112 RET |
| 113 |
| 114 // func expandKeyAsm(nr int, key *byte, enc, dec *uint32) { |
| 115 // Note that round keys are stored in uint128 format, not uint32 |
| 116 TEXT ·expandKeyAsm(SB),7,$0 |
| 117 MOVQ nr+0(FP), CX |
| 118 MOVQ key+8(FP), AX |
| 119 MOVQ enc+16(FP), BX |
| 120 MOVQ dec+24(FP), DX |
| 121 MOVUPS (AX), X0 |
| 122 // enc |
| 123 MOVUPS X0, (BX) |
| 124 ADDQ $16, BX |
| 125 PXOR X4, X4 // _expand_key_* expect X4 to be zero |
| 126 CMPL CX, $12 |
| 127 JE Lexp_enc196 |
| 128 JB Lexp_enc128 |
| 129 Lexp_enc256: |
| 130 MOVUPS 16(AX), X2 |
| 131 MOVUPS X2, (BX) |
| 132 ADDQ $16, BX |
| 133 AESKEYGENASSIST $0x01, X2, X1 |
| 134 CALL _expand_key_256a<>(SB) |
| 135 AESKEYGENASSIST $0x01, X0, X1 |
| 136 CALL _expand_key_256b<>(SB) |
| 137 AESKEYGENASSIST $0x02, X2, X1 |
| 138 CALL _expand_key_256a<>(SB) |
| 139 AESKEYGENASSIST $0x02, X0, X1 |
| 140 CALL _expand_key_256b<>(SB) |
| 141 AESKEYGENASSIST $0x04, X2, X1 |
| 142 CALL _expand_key_256a<>(SB) |
| 143 AESKEYGENASSIST $0x04, X0, X1 |
| 144 CALL _expand_key_256b<>(SB) |
| 145 AESKEYGENASSIST $0x08, X2, X1 |
| 146 CALL _expand_key_256a<>(SB) |
| 147 AESKEYGENASSIST $0x08, X0, X1 |
| 148 CALL _expand_key_256b<>(SB) |
| 149 AESKEYGENASSIST $0x10, X2, X1 |
| 150 CALL _expand_key_256a<>(SB) |
| 151 AESKEYGENASSIST $0x10, X0, X1 |
| 152 CALL _expand_key_256b<>(SB) |
| 153 AESKEYGENASSIST $0x20, X2, X1 |
| 154 CALL _expand_key_256a<>(SB) |
| 155 AESKEYGENASSIST $0x20, X0, X1 |
| 156 CALL _expand_key_256b<>(SB) |
| 157 AESKEYGENASSIST $0x40, X2, X1 |
| 158 CALL _expand_key_256a<>(SB) |
| 159 JMP Lexp_dec |
| 160 Lexp_enc196: |
| 161 MOVQ 16(AX), X2 |
| 162 AESKEYGENASSIST $0x01, X2, X1 |
| 163 CALL _expand_key_192a<>(SB) |
| 164 AESKEYGENASSIST $0x02, X2, X1 |
| 165 CALL _expand_key_192b<>(SB) |
| 166 AESKEYGENASSIST $0x04, X2, X1 |
| 167 CALL _expand_key_192a<>(SB) |
| 168 AESKEYGENASSIST $0x08, X2, X1 |
| 169 CALL _expand_key_192b<>(SB) |
| 170 AESKEYGENASSIST $0x10, X2, X1 |
| 171 CALL _expand_key_192a<>(SB) |
| 172 AESKEYGENASSIST $0x20, X2, X1 |
| 173 CALL _expand_key_192b<>(SB) |
| 174 AESKEYGENASSIST $0x40, X2, X1 |
| 175 CALL _expand_key_192a<>(SB) |
| 176 AESKEYGENASSIST $0x80, X2, X1 |
| 177 CALL _expand_key_192b<>(SB) |
| 178 JMP Lexp_dec |
| 179 Lexp_enc128: |
| 180 AESKEYGENASSIST $0x01, X0, X1 |
| 181 CALL _expand_key_128<>(SB) |
| 182 AESKEYGENASSIST $0x02, X0, X1 |
| 183 CALL _expand_key_128<>(SB) |
| 184 AESKEYGENASSIST $0x04, X0, X1 |
| 185 CALL _expand_key_128<>(SB) |
| 186 AESKEYGENASSIST $0x08, X0, X1 |
| 187 CALL _expand_key_128<>(SB) |
| 188 AESKEYGENASSIST $0x10, X0, X1 |
| 189 CALL _expand_key_128<>(SB) |
| 190 AESKEYGENASSIST $0x20, X0, X1 |
| 191 CALL _expand_key_128<>(SB) |
| 192 AESKEYGENASSIST $0x40, X0, X1 |
| 193 CALL _expand_key_128<>(SB) |
| 194 AESKEYGENASSIST $0x80, X0, X1 |
| 195 CALL _expand_key_128<>(SB) |
| 196 AESKEYGENASSIST $0x1b, X0, X1 |
| 197 CALL _expand_key_128<>(SB) |
| 198 AESKEYGENASSIST $0x36, X0, X1 |
| 199 CALL _expand_key_128<>(SB) |
| 200 Lexp_dec: |
| 201 // dec |
| 202 SUBQ $16, BX |
| 203 MOVUPS (BX), X1 |
| 204 MOVUPS X1, (DX) |
| 205 DECQ CX |
| 206 Lexp_dec_loop: |
| 207 MOVUPS -16(BX), X1 |
| 208 AESIMC X1, X0 |
| 209 MOVUPS X0, 16(DX) |
| 210 SUBQ $16, BX |
| 211 ADDQ $16, DX |
| 212 DECQ CX |
| 213 JNZ Lexp_dec_loop |
| 214 MOVUPS -16(BX), X0 |
| 215 MOVUPS X0, 16(DX) |
| 216 RET |
| 217 |
| 218 #define PSHUFD_X0_X0_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc0 |
| 219 #define PSHUFD_X1_X1_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc9 |
| 220 TEXT _expand_key_128<>(SB),7,$0 |
| 221 PSHUFD $0xff, X1, X1 |
| 222 SHUFPS $0x10, X0, X4 |
| 223 PXOR X4, X0 |
| 224 SHUFPS $0x8c, X0, X4 |
| 225 PXOR X4, X0 |
| 226 PXOR X1, X0 |
| 227 MOVUPS X0, (BX) |
| 228 ADDQ $16, BX |
| 229 RET |
| 230 |
| 231 #define PSLLDQ_X5_ BYTE $0x66; BYTE $0x0f; BYTE $0x73; BYTE $0xfd |
| 232 #define PSHUFD_X0_X3_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xd8 |
| 233 TEXT _expand_key_192a<>(SB),7,$0 |
| 234 PSHUFD $0x55, X1, X1 |
| 235 SHUFPS $0x10, X0, X4 |
| 236 PXOR X4, X0 |
| 237 SHUFPS $0x8c, X0, X4 |
| 238 PXOR X4, X0 |
| 239 PXOR X1, X0 |
| 240 |
| 241 MOVAPS X2, X5 |
| 242 MOVAPS X2, X6 |
| 243 PSLLDQ_X5_; BYTE $0x4 |
| 244 PSHUFD $0xff, X0, X3 |
| 245 PXOR X3, X2 |
| 246 PXOR X5, X2 |
| 247 |
| 248 MOVAPS X0, X1 |
| 249 SHUFPS $0x44, X0, X6 |
| 250 MOVUPS X6, (BX) |
| 251 SHUFPS $0x4e, X2, X1 |
| 252 MOVUPS X1, 16(BX) |
| 253 ADDQ $32, BX |
| 254 RET |
| 255 |
| 256 TEXT _expand_key_192b<>(SB),7,$0 |
| 257 PSHUFD $0x55, X1, X1 |
| 258 SHUFPS $0x10, X0, X4 |
| 259 PXOR X4, X0 |
| 260 SHUFPS $0x8c, X0, X4 |
| 261 PXOR X4, X0 |
| 262 PXOR X1, X0 |
| 263 |
| 264 MOVAPS X2, X5 |
| 265 PSLLDQ_X5_; BYTE $0x4 |
| 266 PSHUFD $0xff, X0, X3 |
| 267 PXOR X3, X2 |
| 268 PXOR X5, X2 |
| 269 |
| 270 MOVUPS X0, (BX) |
| 271 ADDQ $16, BX |
| 272 RET |
| 273 |
| 274 TEXT _expand_key_256a<>(SB),7,$0 |
| 275 JMP _expand_key_128<>(SB) |
| 276 |
| 277 TEXT _expand_key_256b<>(SB),7,$0 |
| 278 PSHUFD $0xaa, X1, X1 |
| 279 SHUFPS $0x10, X2, X4 |
| 280 PXOR X4, X2 |
| 281 SHUFPS $0x8c, X2, X4 |
| 282 PXOR X4, X2 |
| 283 PXOR X1, X2 |
| 284 |
| 285 MOVUPS X2, (BX) |
| 286 ADDQ $16, BX |
| 287 RET |
LEFT | RIGHT |