src/pkg/runtime/asm_386.s - Issue 5279048: code review 5279048: runtime: faster and more scalable GC

Delta Between Two Patch Sets: src/pkg/runtime/asm_386.s

Issue 5279048: code review 5279048: runtime: faster and more scalable GC (Closed)

Left Patch Set: diff -r da9e7548e6ef https://go.googlecode.com/hg/ Created 13 years, 4 months ago

Right Patch Set: diff -r f44057cc01b2 https://go.googlecode.com/hg/ Created 12 years, 11 months ago

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.

Jump to:

Right: Side by side diff | Download

LEFT	RIGHT
(no file at all)
1 // Copyright 2009 The Go Authors. All rights reserved.	1 // Copyright 2009 The Go Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style	2 // Use of this source code is governed by a BSD-style

3 // license that can be found in the LICENSE file.	3 // license that can be found in the LICENSE file.

4	4

5 #include "zasm_GOOS_GOARCH.h"	5 #include "zasm_GOOS_GOARCH.h"

6	6

7 TEXT _rt0_386(SB),7,$0	7 TEXT _rt0_386(SB),7,$0

8 // copy arguments forward on an even stack	8 // copy arguments forward on an even stack

9 MOVL 0(SP), AX // argc	9 MOVL 0(SP), AX // argc

10 LEAL 4(SP), BX // argv	10 LEAL 4(SP), BX // argv

(...skipping 281 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
292 MOVL 8(SP), AX	292 MOVL 8(SP), AX

293 MOVL 12(SP), CX	293 MOVL 12(SP), CX

294 LOCK	294 LOCK

295 CMPXCHGL CX, 0(BX)	295 CMPXCHGL CX, 0(BX)

296 JZ 3(PC)	296 JZ 3(PC)

297 MOVL $0, AX	297 MOVL $0, AX

298 RET	298 RET

299 MOVL $1, AX	299 MOVL $1, AX

300 RET	300 RET

301	301

	302 // bool runtime·cas64(uint64 val, uint64 old, uint64 new)

	303 // Atomically:

	304 // if(val == old){

	305 // *val = new;

	306 // return 1;

	307 // } else {

	308 // old = val

	309 // return 0;

	310 // }

	311 TEXT runtime·cas64(SB), 7, $0

	312 MOVL 4(SP), BP

	313 MOVL 8(SP), SI

	314 MOVL 0(SI), AX

	315 MOVL 4(SI), DX

	316 MOVL 12(SP), BX

	317 MOVL 16(SP), CX

	318 LOCK

	319 CMPXCHG8B 0(BP)

	320 JNZ cas64_fail

	321 MOVL $1, AX

	322 RET

	323 cas64_fail:

	324 MOVL AX, 0(SI)

	325 MOVL DX, 4(SI)

	326 XORL AX, AX

	327 RET

	328

302 // bool casp(void *p, void old, void *new)	329 // bool casp(void *p, void old, void *new)

303 // Atomically:	330 // Atomically:

304 // if(*p == old){	331 // if(*p == old){

305 // *p = new;	332 // *p = new;

306 // return 1;	333 // return 1;

307 // }else	334 // }else

308 // return 0;	335 // return 0;

309 TEXT runtime·casp(SB), 7, $0	336 TEXT runtime·casp(SB), 7, $0

310 MOVL 4(SP), BX	337 MOVL 4(SP), BX

311 MOVL 8(SP), AX	338 MOVL 8(SP), AX

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
348 TEXT runtime·atomicstorep(SB), 7, $0	375 TEXT runtime·atomicstorep(SB), 7, $0

349 MOVL 4(SP), BX	376 MOVL 4(SP), BX

350 MOVL 8(SP), AX	377 MOVL 8(SP), AX

351 XCHGL AX, 0(BX)	378 XCHGL AX, 0(BX)

352 RET	379 RET

353	380

354 TEXT runtime·atomicstore(SB), 7, $0	381 TEXT runtime·atomicstore(SB), 7, $0

355 MOVL 4(SP), BX	382 MOVL 4(SP), BX

356 MOVL 8(SP), AX	383 MOVL 8(SP), AX

357 XCHGL AX, 0(BX)	384 XCHGL AX, 0(BX)

	385 RET

	386

	387 // uint64 atomicload64(uint64 volatile* addr);

	388 // so actually

	389 // void atomicload64(uint64 res, uint64 volatile addr);

	390 TEXT runtime·atomicload64(SB), 7, $0

	391 MOVL 4(SP), BX

	392 MOVL 8(SP), AX

	393 // MOVQ (%EAX), %MM0

	394 BYTE $0x0f; BYTE $0x6f; BYTE $0x00

	395 // MOVQ %MM0, 0(%EBX)

	396 BYTE $0x0f; BYTE $0x7f; BYTE $0x03

	397 // EMMS

	398 BYTE $0x0F; BYTE $0x77

	399 RET

	400

	401 // void runtime·atomicstore64(uint64 volatile* addr, uint64 v);

	402 TEXT runtime·atomicstore64(SB), 7, $0

	403 MOVL 4(SP), AX

	404 // MOVQ and EMMS were introduced on the Pentium MMX.

	405 // MOVQ 0x8(%ESP), %MM0

	406 BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08

	407 // MOVQ %MM0, (%EAX)

	408 BYTE $0x0f; BYTE $0x7f; BYTE $0x00·

	409 // EMMS

	410 BYTE $0x0F; BYTE $0x77

	411 // This is essentially a no-op, but it provides required memory fencing.

	412 // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).

	413 XORL AX, AX

	414 LOCK

	415 XADDL AX, (SP)

	416 RET

	417

	418 TEXT runtime·prefetch(SB), 7, $0

	419 MOVL 4(SP), AX

	420 // PREFETCHNTA (AX)

	421 BYTE $0x0f; BYTE $0x18; BYTE $0x00

358 RET	422 RET

359	423

360 // void jmpdefer(fn, sp);	424 // void jmpdefer(fn, sp);

361 // called from deferreturn.	425 // called from deferreturn.

362 // 1. pop the caller	426 // 1. pop the caller

363 // 2. sub 5 bytes from the callers return	427 // 2. sub 5 bytes from the callers return

364 // 3. jmp to the argument	428 // 3. jmp to the argument

365 TEXT runtime·jmpdefer(SB), 7, $0	429 TEXT runtime·jmpdefer(SB), 7, $0

366 MOVL 4(SP), AX // fn	430 MOVL 4(SP), AX // fn

367 MOVL 8(SP), BX // caller sp	431 MOVL 8(SP), BX // caller sp

(...skipping 195 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
563 TEXT runtime·stackguard(SB),7,$0	627 TEXT runtime·stackguard(SB),7,$0

564 MOVL SP, DX	628 MOVL SP, DX

565 MOVL DX, sp+0(FP)	629 MOVL DX, sp+0(FP)

566 get_tls(CX)	630 get_tls(CX)

567 MOVL g(CX), BX	631 MOVL g(CX), BX

568 MOVL g_stackguard(BX), DX	632 MOVL g_stackguard(BX), DX

569 MOVL DX, guard+4(FP)	633 MOVL DX, guard+4(FP)

570 RET	634 RET

571	635

572 GLOBL runtime·tls0(SB), $32	636 GLOBL runtime·tls0(SB), $32

LEFT	RIGHT