OLD | NEW |
(Empty) | |
| 1 // Copyright 2013 The Go Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style |
| 3 // license that can be found in the LICENSE file. |
| 4 |
| 5 #include "../../../cmd/ld/textflag.h" |
| 6 |
| 7 // SHA1 block routine. See sha1block.go for Go equivalent. |
| 8 // |
| 9 // There are 80 rounds of 4 types: |
| 10 // - rounds 0-15 are type 1 and load data (ROUND1 macro). |
| 11 // - rounds 16-19 are type 1 and do not load data (ROUND1x macro). |
| 12 // - rounds 20-39 are type 2 and do not load data (ROUND2 macro). |
| 13 // - rounds 40-59 are type 3 and do not load data (ROUND3 macro). |
| 14 // - rounds 60-79 are type 4 and do not load data (ROUND4 macro). |
| 15 // |
| 16 // Each round loads or shuffles the data, then computes a per-round |
| 17 // function of b, c, d, and then mixes the result into and rotates the |
| 18 // five registers a, b, c, d, e holding the intermediate results. |
| 19 // |
| 20 // The register rotation is implemented by rotating the arguments to |
| 21 // the round macros instead of by explicit move instructions. |
| 22 |
| 23 // Register definitions |
| 24 data = 0 // Pointer to incoming data |
| 25 const = 1 // Current constant for SHA round |
| 26 a = 2 // SHA1 accumulator |
| 27 b = 3 // SHA1 accumulator |
| 28 c = 4 // SHA1 accumulator |
| 29 d = 5 // SHA1 accumulator |
| 30 e = 6 // SHA1 accumulator |
| 31 t0 = 7 // Temporary |
| 32 t1 = 8 // Temporary |
| 33 // r9, r10 are forbidden |
| 34 // r11 is OK provided you check the assembler that no synthetic instructions use
it |
| 35 t2 = 11 // Temporary |
| 36 t3 = 12 // Temporary |
| 37 t4 = 14 // Temporary |
| 38 |
| 39 // func block(dig *digest, p []byte) |
| 40 // 0(FP) is *digest |
| 41 // 4(FP) is p.array (struct Slice) |
| 42 // 8(FP) is p.len |
| 43 //12(FP) is p.cap |
| 44 // |
| 45 // Stack frame |
| 46 p_end = -4 // -4(SP) pointer to the end of data |
| 47 p_data = p_end - 4 // -8(SP) current data pointer |
| 48 w = p_data - 4*16 // -72(SP) 16 words temporary buffer w uint32[16] |
| 49 saved = w - 4*5 // -92(SP) saved sha1 registers a,b,c,d,e - these must b
e last |
| 50 // Total size +4 for saved LR is 96 |
| 51 |
| 52 // w[i] = p[j]<<24 | p[j+1]<<16 | p[j+2]<<8 | p[j+3] |
| 53 // e += w[i] |
| 54 #define LOAD(index, e) \ |
| 55 MOVBU (index*4+0)(R(data)), R(t0) ; \ |
| 56 MOVBU (index*4+1)(R(data)), R(t1) ; \ |
| 57 MOVBU (index*4+2)(R(data)), R(t2) ; \ |
| 58 MOVBU (index*4+3)(R(data)), R(t3) ; \ |
| 59 ORR R(t0)<<8, R(t1), R(t0) ; \ |
| 60 ORR R(t2)<<8, R(t3), R(t2) ; \ |
| 61 ORR R(t0)<<16, R(t2), R(t0) ; \ |
| 62 MOVW R(t0), (index*4+w)(SP) ; \ |
| 63 ADD R(t0), R(e), R(e) |
| 64 |
| 65 // tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] |
| 66 // w[i&0xf] = tmp<<1 | tmp>>(32-1) |
| 67 // e += w[i&0xf]· |
| 68 #define SHUFFLE(index, e) \ |
| 69 MOVW (((index)&0xf)*4+w)(SP), R(t0) ; \ |
| 70 MOVW (((index-3)&0xf)*4+w)(SP), R(t1) ; \ |
| 71 MOVW (((index-8)&0xf)*4+w)(SP), R(t2) ; \ |
| 72 MOVW (((index-14)&0xf)*4+w)(SP), R(t3) ; \ |
| 73 EOR R(t0), R(t1), R(t0) ; \ |
| 74 EOR R(t2), R(t3), R(t2) ; \ |
| 75 EOR R(t0), R(t2), R(t0) ; \ |
| 76 MOVW R(t0)@>(32-1), R(t0) ; \ |
| 77 MOVW R(t0), (((index)&0xf)*4+w)(SP) ; \ |
| 78 ADD R(t0), R(e), R(e) |
| 79 |
| 80 // t1 = (b & c) | ((~b) & d) =· |
| 81 // t1 = (d ^ (b & (c ^ d)) |
| 82 #define FUNC1(a, b, c, d, e) \ |
| 83 EOR R(c), R(d), R(t1) ; \ |
| 84 AND R(b), R(t1), R(t1) ; \ |
| 85 EOR R(d), R(t1), R(t1) |
| 86 |
| 87 // t1 = b ^ c ^ d |
| 88 #define FUNC2(a, b, c, d, e) \ |
| 89 EOR R(b), R(c), R(t1) ; \ |
| 90 EOR R(d), R(t1), R(t1) |
| 91 |
| 92 // t1 = (b & c) | (b & d) | (c & d) = |
| 93 // t1 = (b & c) | ((b | c) & d) |
| 94 #define FUNC3(a, b, c, d, e) \ |
| 95 ORR R(b), R(c), R(t0) ; \ |
| 96 AND R(b), R(c), R(t1) ; \ |
| 97 AND R(d), R(t0), R(t0) ; \ |
| 98 ORR R(t0), R(t1), R(t1) |
| 99 |
| 100 #define FUNC4 FUNC2 |
| 101 |
| 102 // a5 := a<<5 | a>>(32-5) |
| 103 // b = b<<30 | b>>(32-30) |
| 104 // e = a5 + t1 + e + const |
| 105 #define MIX(a, b, c, d, e) \ |
| 106 ADD R(t1), R(e), R(e) ; \ |
| 107 MOVW R(b)@>(32-30), R(b) ; \ |
| 108 ADD R(a)@>(32-5), R(e), R(e) ; \ |
| 109 ADD R(const), R(e), R(e) |
| 110 |
| 111 #define ROUND1(a, b, c, d, e, index) \ |
| 112 LOAD(index, e) ; \ |
| 113 FUNC1(a, b, c, d, e) ; \ |
| 114 MIX(a, b, c, d, e) |
| 115 |
| 116 #define ROUND1x(a, b, c, d, e, index) \ |
| 117 SHUFFLE(index, e) ; \ |
| 118 FUNC1(a, b, c, d, e) ; \ |
| 119 MIX(a, b, c, d, e) |
| 120 |
| 121 #define ROUND2(a, b, c, d, e, index) \ |
| 122 SHUFFLE(index, e) ; \ |
| 123 FUNC2(a, b, c, d, e) ; \ |
| 124 MIX(a, b, c, d, e) |
| 125 |
| 126 #define ROUND3(a, b, c, d, e, index) \ |
| 127 SHUFFLE(index, e) ; \ |
| 128 FUNC3(a, b, c, d, e) ; \ |
| 129 MIX(a, b, c, d, e) |
| 130 |
| 131 #define ROUND4(a, b, c, d, e, index) \ |
| 132 SHUFFLE(index, e) ; \ |
| 133 FUNC4(a, b, c, d, e) ; \ |
| 134 MIX(a, b, c, d, e) |
| 135 |
| 136 // func block(dig *digest, p []byte) |
| 137 TEXT ·block(SB), NOSPLIT, $96-16 |
| 138 MOVW p+4(FP), R(data) // pointer to the data |
| 139 MOVW p_len+8(FP), R(t0) // number of bytes |
| 140 ADD R(data), R(t0) |
| 141 MOVW R(t0), p_end(SP) // pointer to end of data |
| 142 |
| 143 // Load up initial SHA1 accumulator |
| 144 MOVW dig+0(FP), R(t0) |
| 145 MOVM.IA (R(t0)), [R(a),R(b),R(c),R(d),R(e)] |
| 146 |
| 147 loop: |
| 148 // Save registers at SP+4 onwards |
| 149 MOVM.IB [R(a),R(b),R(c),R(d),R(e)], (R13) |
| 150 |
| 151 MOVW $0x5A827999, R(const) |
| 152 ROUND1(a, b, c, d, e, 0) |
| 153 ROUND1(e, a, b, c, d, 1) |
| 154 ROUND1(d, e, a, b, c, 2) |
| 155 ROUND1(c, d, e, a, b, 3) |
| 156 ROUND1(b, c, d, e, a, 4) |
| 157 ROUND1(a, b, c, d, e, 5) |
| 158 ROUND1(e, a, b, c, d, 6) |
| 159 ROUND1(d, e, a, b, c, 7) |
| 160 ROUND1(c, d, e, a, b, 8) |
| 161 ROUND1(b, c, d, e, a, 9) |
| 162 ROUND1(a, b, c, d, e, 10) |
| 163 ROUND1(e, a, b, c, d, 11) |
| 164 ROUND1(d, e, a, b, c, 12) |
| 165 ROUND1(c, d, e, a, b, 13) |
| 166 ROUND1(b, c, d, e, a, 14) |
| 167 ROUND1(a, b, c, d, e, 15) |
| 168 |
| 169 ROUND1x(e, a, b, c, d, 16) |
| 170 ROUND1x(d, e, a, b, c, 17) |
| 171 ROUND1x(c, d, e, a, b, 18) |
| 172 ROUND1x(b, c, d, e, a, 19) |
| 173 ········ |
| 174 MOVW $0x6ED9EBA1, R(const) |
| 175 ROUND2(a, b, c, d, e, 20) |
| 176 ROUND2(e, a, b, c, d, 21) |
| 177 ROUND2(d, e, a, b, c, 22) |
| 178 ROUND2(c, d, e, a, b, 23) |
| 179 ROUND2(b, c, d, e, a, 24) |
| 180 ROUND2(a, b, c, d, e, 25) |
| 181 ROUND2(e, a, b, c, d, 26) |
| 182 ROUND2(d, e, a, b, c, 27) |
| 183 ROUND2(c, d, e, a, b, 28) |
| 184 ROUND2(b, c, d, e, a, 29) |
| 185 ROUND2(a, b, c, d, e, 30) |
| 186 ROUND2(e, a, b, c, d, 31) |
| 187 ROUND2(d, e, a, b, c, 32) |
| 188 ROUND2(c, d, e, a, b, 33) |
| 189 ROUND2(b, c, d, e, a, 34) |
| 190 ROUND2(a, b, c, d, e, 35) |
| 191 ROUND2(e, a, b, c, d, 36) |
| 192 ROUND2(d, e, a, b, c, 37) |
| 193 ROUND2(c, d, e, a, b, 38) |
| 194 ROUND2(b, c, d, e, a, 39) |
| 195 ········ |
| 196 MOVW $0x8F1BBCDC, R(const) |
| 197 ROUND3(a, b, c, d, e, 40) |
| 198 ROUND3(e, a, b, c, d, 41) |
| 199 ROUND3(d, e, a, b, c, 42) |
| 200 ROUND3(c, d, e, a, b, 43) |
| 201 ROUND3(b, c, d, e, a, 44) |
| 202 ROUND3(a, b, c, d, e, 45) |
| 203 ROUND3(e, a, b, c, d, 46) |
| 204 ROUND3(d, e, a, b, c, 47) |
| 205 ROUND3(c, d, e, a, b, 48) |
| 206 ROUND3(b, c, d, e, a, 49) |
| 207 ROUND3(a, b, c, d, e, 50) |
| 208 ROUND3(e, a, b, c, d, 51) |
| 209 ROUND3(d, e, a, b, c, 52) |
| 210 ROUND3(c, d, e, a, b, 53) |
| 211 ROUND3(b, c, d, e, a, 54) |
| 212 ROUND3(a, b, c, d, e, 55) |
| 213 ROUND3(e, a, b, c, d, 56) |
| 214 ROUND3(d, e, a, b, c, 57) |
| 215 ROUND3(c, d, e, a, b, 58) |
| 216 ROUND3(b, c, d, e, a, 59) |
| 217 ········ |
| 218 MOVW $0xCA62C1D6, R(const) |
| 219 ROUND4(a, b, c, d, e, 60) |
| 220 ROUND4(e, a, b, c, d, 61) |
| 221 ROUND4(d, e, a, b, c, 62) |
| 222 ROUND4(c, d, e, a, b, 63) |
| 223 ROUND4(b, c, d, e, a, 64) |
| 224 ROUND4(a, b, c, d, e, 65) |
| 225 ROUND4(e, a, b, c, d, 66) |
| 226 ROUND4(d, e, a, b, c, 67) |
| 227 ROUND4(c, d, e, a, b, 68) |
| 228 ROUND4(b, c, d, e, a, 69) |
| 229 ROUND4(a, b, c, d, e, 70) |
| 230 ROUND4(e, a, b, c, d, 71) |
| 231 ROUND4(d, e, a, b, c, 72) |
| 232 ROUND4(c, d, e, a, b, 73) |
| 233 ROUND4(b, c, d, e, a, 74) |
| 234 ROUND4(a, b, c, d, e, 75) |
| 235 ROUND4(e, a, b, c, d, 76) |
| 236 ROUND4(d, e, a, b, c, 77) |
| 237 ROUND4(c, d, e, a, b, 78) |
| 238 ROUND4(b, c, d, e, a, 79) |
| 239 |
| 240 // Accumulate - restoring registers from SP+4 |
| 241 MOVM.IB (R13), [R(t0),R(t1),R(t2),R(t3),R(t4)] |
| 242 ADD R(t0), R(a) |
| 243 ADD R(t1), R(b) |
| 244 ADD R(t2), R(c) |
| 245 ADD R(t3), R(d) |
| 246 ADD R(t4), R(e) |
| 247 |
| 248 MOVW p_end(SP), R(t0) |
| 249 ADD $64, R(data) |
| 250 CMP R(t0), R(data) |
| 251 BLO loop |
| 252 |
| 253 // Save final SHA1 accumulator |
| 254 MOVW dig+0(FP), R(t0) |
| 255 MOVM.IA [R(a),R(b),R(c),R(d),R(e)], (R(t0)) |
| 256 |
| 257 RET |
OLD | NEW |