LEFT | RIGHT |
(no file at all) | |
1 // Inferno's libkern/memmove-arm.s | 1 // Inferno's libkern/memmove-arm.s |
2 // http://code.google.com/p/inferno-os/source/browse/libkern/memmove-arm.s | 2 // http://code.google.com/p/inferno-os/source/browse/libkern/memmove-arm.s |
3 // | 3 // |
4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. | 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. |
5 // Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vita
nuova.com). All rights reserved. | 5 // Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vita
nuova.com). All rights reserved. |
6 // Portions Copyright 2009 The Go Authors. All rights reserved. | 6 // Portions Copyright 2009 The Go Authors. All rights reserved. |
7 // | 7 // |
8 // Permission is hereby granted, free of charge, to any person obtaining a copy | 8 // Permission is hereby granted, free of charge, to any person obtaining a copy |
9 // of this software and associated documentation files (the "Software"), to deal | 9 // of this software and associated documentation files (the "Software"), to deal |
10 // in the Software without restriction, including without limitation the rights | 10 // in the Software without restriction, including without limitation the rights |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
78 | 78 |
79 MOVBU.W -1(R(FROM)), R(TMP) /* pre-indexed */ | 79 MOVBU.W -1(R(FROM)), R(TMP) /* pre-indexed */ |
80 MOVBU.W R(TMP), -1(R(TE)) /* pre-indexed */ | 80 MOVBU.W R(TMP), -1(R(TE)) /* pre-indexed */ |
81 B _b4align | 81 B _b4align |
82 | 82 |
83 _b4aligned: /* is source now aligned? */ | 83 _b4aligned: /* is source now aligned? */ |
84 AND.S $3, R(FROM), R(TMP) | 84 AND.S $3, R(FROM), R(TMP) |
85 BNE _bunaligned | 85 BNE _bunaligned |
86 | 86 |
87 ADD $31, R(TS), R(TMP) /* do 32-byte chunks if possible */ | 87 ADD $31, R(TS), R(TMP) /* do 32-byte chunks if possible */ |
88 » MOVW» R(TS), savedts+4(SP) | 88 » MOVW» R(TS), savedts-4(SP) |
89 _b32loop: | 89 _b32loop: |
90 CMP R(TMP), R(TE) | 90 CMP R(TMP), R(TE) |
91 BLS _b4tail | 91 BLS _b4tail |
92 | 92 |
93 MOVM.DB.W (R(FROM)), [R0-R7] | 93 MOVM.DB.W (R(FROM)), [R0-R7] |
94 MOVM.DB.W [R0-R7], (R(TE)) | 94 MOVM.DB.W [R0-R7], (R(TE)) |
95 B _b32loop | 95 B _b32loop |
96 | 96 |
97 _b4tail: /* do remaining words if possible */ | 97 _b4tail: /* do remaining words if possible */ |
98 » MOVW» savedts+4(SP), R(TS) | 98 » MOVW» savedts-4(SP), R(TS) |
99 ADD $3, R(TS), R(TMP) | 99 ADD $3, R(TS), R(TMP) |
100 _b4loop: | 100 _b4loop: |
101 CMP R(TMP), R(TE) | 101 CMP R(TMP), R(TE) |
102 BLS _b1tail | 102 BLS _b1tail |
103 | 103 |
104 MOVW.W -4(R(FROM)), R(TMP1) /* pre-indexed */ | 104 MOVW.W -4(R(FROM)), R(TMP1) /* pre-indexed */ |
105 MOVW.W R(TMP1), -4(R(TE)) /* pre-indexed */ | 105 MOVW.W R(TMP1), -4(R(TE)) /* pre-indexed */ |
106 B _b4loop | 106 B _b4loop |
107 | 107 |
108 _b1tail: /* remaining bytes */ | 108 _b1tail: /* remaining bytes */ |
(...skipping 14 matching lines...) Expand all Loading... |
123 | 123 |
124 MOVBU.P 1(R(FROM)), R(TMP) /* implicit write back */ | 124 MOVBU.P 1(R(FROM)), R(TMP) /* implicit write back */ |
125 MOVBU.P R(TMP), 1(R(TS)) /* implicit write back */ | 125 MOVBU.P R(TMP), 1(R(TS)) /* implicit write back */ |
126 B _f4align | 126 B _f4align |
127 | 127 |
128 _f4aligned: /* is source now aligned? */ | 128 _f4aligned: /* is source now aligned? */ |
129 AND.S $3, R(FROM), R(TMP) | 129 AND.S $3, R(FROM), R(TMP) |
130 BNE _funaligned | 130 BNE _funaligned |
131 | 131 |
132 SUB $31, R(TE), R(TMP) /* do 32-byte chunks if possible */ | 132 SUB $31, R(TE), R(TMP) /* do 32-byte chunks if possible */ |
133 » MOVW» R(TE), savedte+4(SP) | 133 » MOVW» R(TE), savedte-4(SP) |
134 _f32loop: | 134 _f32loop: |
135 CMP R(TMP), R(TS) | 135 CMP R(TMP), R(TS) |
136 BHS _f4tail | 136 BHS _f4tail |
137 | 137 |
138 MOVM.IA.W (R(FROM)), [R1-R8]· | 138 MOVM.IA.W (R(FROM)), [R1-R8]· |
139 MOVM.IA.W [R1-R8], (R(TS)) | 139 MOVM.IA.W [R1-R8], (R(TS)) |
140 B _f32loop | 140 B _f32loop |
141 | 141 |
142 _f4tail: | 142 _f4tail: |
143 » MOVW» savedte+4(SP), R(TE) | 143 » MOVW» savedte-4(SP), R(TE) |
144 SUB $3, R(TE), R(TMP) /* do remaining words if possible */ | 144 SUB $3, R(TE), R(TMP) /* do remaining words if possible */ |
145 _f4loop: | 145 _f4loop: |
146 CMP R(TMP), R(TS) | 146 CMP R(TMP), R(TS) |
147 BHS _f1tail | 147 BHS _f1tail |
148 | 148 |
149 MOVW.P 4(R(FROM)), R(TMP1) /* implicit write back */ | 149 MOVW.P 4(R(FROM)), R(TMP1) /* implicit write back */ |
150 MOVW.P R(TMP1), 4(R(TS)) /* implicit write back */ | 150 MOVW.P R(TMP1), 4(R(TS)) /* implicit write back */ |
151 B _f4loop | 151 B _f4loop |
152 | 152 |
153 _f1tail: | 153 _f1tail: |
(...skipping 21 matching lines...) Expand all Loading... |
175 | 175 |
176 MOVW.GT $24, R(RSHIFT) /* (R(n)<<8)|(R(n-1)>>24) */ | 176 MOVW.GT $24, R(RSHIFT) /* (R(n)<<8)|(R(n-1)>>24) */ |
177 MOVW.GT $8, R(LSHIFT) | 177 MOVW.GT $8, R(LSHIFT) |
178 MOVW.GT $3, R(OFFSET) | 178 MOVW.GT $3, R(OFFSET) |
179 | 179 |
180 ADD $16, R(TS), R(TMP) /* do 16-byte chunks if possible */ | 180 ADD $16, R(TS), R(TMP) /* do 16-byte chunks if possible */ |
181 CMP R(TMP), R(TE) | 181 CMP R(TMP), R(TE) |
182 BLS _b1tail | 182 BLS _b1tail |
183 | 183 |
184 BIC $3, R(FROM) /* align source */ | 184 BIC $3, R(FROM) /* align source */ |
185 » MOVW» R(TS), savedts+4(SP) | 185 » MOVW» R(TS), savedts-4(SP) |
186 MOVW (R(FROM)), R(BR0) /* prime first block register */ | 186 MOVW (R(FROM)), R(BR0) /* prime first block register */ |
187 | 187 |
188 _bu16loop: | 188 _bu16loop: |
189 CMP R(TMP), R(TE) | 189 CMP R(TMP), R(TE) |
190 BLS _bu1tail | 190 BLS _bu1tail |
191 | 191 |
192 MOVW R(BR0)<<R(LSHIFT), R(BW3) | 192 MOVW R(BR0)<<R(LSHIFT), R(BW3) |
193 MOVM.DB.W (R(FROM)), [R(BR0)-R(BR3)] | 193 MOVM.DB.W (R(FROM)), [R(BR0)-R(BR3)] |
194 ORR R(BR3)>>R(RSHIFT), R(BW3) | 194 ORR R(BR3)>>R(RSHIFT), R(BW3) |
195 | 195 |
196 MOVW R(BR3)<<R(LSHIFT), R(BW2) | 196 MOVW R(BR3)<<R(LSHIFT), R(BW2) |
197 ORR R(BR2)>>R(RSHIFT), R(BW2) | 197 ORR R(BR2)>>R(RSHIFT), R(BW2) |
198 | 198 |
199 MOVW R(BR2)<<R(LSHIFT), R(BW1) | 199 MOVW R(BR2)<<R(LSHIFT), R(BW1) |
200 ORR R(BR1)>>R(RSHIFT), R(BW1) | 200 ORR R(BR1)>>R(RSHIFT), R(BW1) |
201 | 201 |
202 MOVW R(BR1)<<R(LSHIFT), R(BW0) | 202 MOVW R(BR1)<<R(LSHIFT), R(BW0) |
203 ORR R(BR0)>>R(RSHIFT), R(BW0) | 203 ORR R(BR0)>>R(RSHIFT), R(BW0) |
204 | 204 |
205 MOVM.DB.W [R(BW0)-R(BW3)], (R(TE)) | 205 MOVM.DB.W [R(BW0)-R(BW3)], (R(TE)) |
206 B _bu16loop | 206 B _bu16loop |
207 | 207 |
208 _bu1tail: | 208 _bu1tail: |
209 » MOVW» savedts+4(SP), R(TS) | 209 » MOVW» savedts-4(SP), R(TS) |
210 ADD R(OFFSET), R(FROM) | 210 ADD R(OFFSET), R(FROM) |
211 B _b1tail | 211 B _b1tail |
212 | 212 |
213 _funaligned: | 213 _funaligned: |
214 CMP $2, R(TMP) | 214 CMP $2, R(TMP) |
215 | 215 |
216 MOVW.LT $8, R(RSHIFT) /* (R(n+1)<<24)|(R(n)>>8) */ | 216 MOVW.LT $8, R(RSHIFT) /* (R(n+1)<<24)|(R(n)>>8) */ |
217 MOVW.LT $24, R(LSHIFT) | 217 MOVW.LT $24, R(LSHIFT) |
218 MOVW.LT $3, R(OFFSET) | 218 MOVW.LT $3, R(OFFSET) |
219 | 219 |
220 MOVW.EQ $16, R(RSHIFT) /* (R(n+1)<<16)|(R(n)>>16) */ | 220 MOVW.EQ $16, R(RSHIFT) /* (R(n+1)<<16)|(R(n)>>16) */ |
221 MOVW.EQ $16, R(LSHIFT) | 221 MOVW.EQ $16, R(LSHIFT) |
222 MOVW.EQ $2, R(OFFSET) | 222 MOVW.EQ $2, R(OFFSET) |
223 | 223 |
224 MOVW.GT $24, R(RSHIFT) /* (R(n+1)<<8)|(R(n)>>24) */ | 224 MOVW.GT $24, R(RSHIFT) /* (R(n+1)<<8)|(R(n)>>24) */ |
225 MOVW.GT $8, R(LSHIFT) | 225 MOVW.GT $8, R(LSHIFT) |
226 MOVW.GT $1, R(OFFSET) | 226 MOVW.GT $1, R(OFFSET) |
227 | 227 |
228 SUB $16, R(TE), R(TMP) /* do 16-byte chunks if possible */ | 228 SUB $16, R(TE), R(TMP) /* do 16-byte chunks if possible */ |
229 CMP R(TMP), R(TS) | 229 CMP R(TMP), R(TS) |
230 BHS _f1tail | 230 BHS _f1tail |
231 | 231 |
232 BIC $3, R(FROM) /* align source */ | 232 BIC $3, R(FROM) /* align source */ |
233 » MOVW» R(TE), savedte+4(SP) | 233 » MOVW» R(TE), savedte-4(SP) |
234 MOVW.P 4(R(FROM)), R(FR3) /* prime last block register, implicit w
rite back */ | 234 MOVW.P 4(R(FROM)), R(FR3) /* prime last block register, implicit w
rite back */ |
235 | 235 |
236 _fu16loop: | 236 _fu16loop: |
237 CMP R(TMP), R(TS) | 237 CMP R(TMP), R(TS) |
238 BHS _fu1tail | 238 BHS _fu1tail |
239 | 239 |
240 MOVW R(FR3)>>R(RSHIFT), R(FW0) | 240 MOVW R(FR3)>>R(RSHIFT), R(FW0) |
241 MOVM.IA.W (R(FROM)), [R(FR0),R(FR1),R(FR2),R(FR3)] | 241 MOVM.IA.W (R(FROM)), [R(FR0),R(FR1),R(FR2),R(FR3)] |
242 ORR R(FR0)<<R(LSHIFT), R(FW0) | 242 ORR R(FR0)<<R(LSHIFT), R(FW0) |
243 | 243 |
244 MOVW R(FR0)>>R(RSHIFT), R(FW1) | 244 MOVW R(FR0)>>R(RSHIFT), R(FW1) |
245 ORR R(FR1)<<R(LSHIFT), R(FW1) | 245 ORR R(FR1)<<R(LSHIFT), R(FW1) |
246 | 246 |
247 MOVW R(FR1)>>R(RSHIFT), R(FW2) | 247 MOVW R(FR1)>>R(RSHIFT), R(FW2) |
248 ORR R(FR2)<<R(LSHIFT), R(FW2) | 248 ORR R(FR2)<<R(LSHIFT), R(FW2) |
249 | 249 |
250 MOVW R(FR2)>>R(RSHIFT), R(FW3) | 250 MOVW R(FR2)>>R(RSHIFT), R(FW3) |
251 ORR R(FR3)<<R(LSHIFT), R(FW3) | 251 ORR R(FR3)<<R(LSHIFT), R(FW3) |
252 | 252 |
253 MOVM.IA.W [R(FW0),R(FW1),R(FW2),R(FW3)], (R(TS)) | 253 MOVM.IA.W [R(FW0),R(FW1),R(FW2),R(FW3)], (R(TS)) |
254 B _fu16loop | 254 B _fu16loop |
255 | 255 |
256 _fu1tail: | 256 _fu1tail: |
257 » MOVW» savedte+4(SP), R(TE) | 257 » MOVW» savedte-4(SP), R(TE) |
258 SUB R(OFFSET), R(FROM) | 258 SUB R(OFFSET), R(FROM) |
259 B _f1tail | 259 B _f1tail |
LEFT | RIGHT |