|
|
Created:
11 years, 8 months ago by rsc Modified:
11 years, 4 months ago CC:
golang-dev Visibility:
Public. |
Descriptionruntime: fix cgo callbacks on windows
Fixes issue 4955.
Patch Set 1 #Patch Set 2 : diff -r 622045a2f25a https://code.google.com/p/go/ #Patch Set 3 : diff -r 622045a2f25a https://code.google.com/p/go/ #Patch Set 4 : diff -r 622045a2f25a https://code.google.com/p/go/ #
Total comments: 3
Patch Set 5 : diff -r 622045a2f25a https://code.google.com/p/go/ #Patch Set 6 : diff -r 622045a2f25a https://code.google.com/p/go/ #Patch Set 7 : diff -r 790eddf30d5d https://go.googlecode.com/hg/ #
MessagesTotal messages: 27
Hello golang-dev@googlegroups.com, I'd like you to review this change to https://code.google.com/p/go/
Sign in to reply to this message.
It fails on my 386: ... # runtime -cpu=1,2,4 panic: test timed out goroutine 255 [running]: testing.alarm() C:/go/root/src/pkg/testing/testing.go:526 +0x43 created by time.goFunc C:/go/root/src/pkg/time/sleep.go:122 +0x44 goroutine 1 [chan receive]: testing.RunTests(0x596424, 0x690278, 0x2b, 0x2b, 0x1, ...) C:/go/root/src/pkg/testing/testing.go:427 +0x69e testing.Main(0x596424, 0x690278, 0x2b, 0x2b, 0x690e08, ...) C:/go/root/src/pkg/testing/testing.go:358 +0x68 main.main() C:/DOCUME~1/brainman/LOCALS~1/Temp/go-build240754935/runtime/_test/_testmain.go:269 +0x80 goroutine 254 [syscall]: syscall.Syscall6(0x7c801812, 0x5, 0x768, 0x11094c00, 0x200, ...) C:/go/root/src/pkg/runtime/zsyscall_windows_windows_386.c:97 +0x49 syscall.ReadFile(0x768, 0x11094c00, 0x200, 0x200, 0x10fe0550, ...) C:/go/root/src/pkg/syscall/zsyscall_windows_386.go:264 +0xa0 syscall.Read(0x768, 0x11094c00, 0x200, 0x200, 0x480d35, ...) C:/go/root/src/pkg/syscall/syscall_windows.go:266 +0x70 os.(*File).read(0x10fe0a58, 0x11094c00, 0x200, 0x200, 0x0, ...) C:/go/root/src/pkg/os/file_windows.go:288 +0xd2 os.(*File).Read(0x10fe0a58, 0x11094c00, 0x200, 0x200, 0x0, ...) C:/go/root/src/pkg/os/file.go:95 +0x76 bytes.(*Buffer).ReadFrom(0x11084000, 0x11052d80, 0x10fe0a58, 0x0, 0x0, ...) C:/go/root/src/pkg/bytes/buffer.go:166 +0x194 io.Copy(0x11052560, 0x11084000, 0x11052d80, 0x10fe0a58, 0x0, ...) C:/go/root/src/pkg/io/io.go:328 +0x82 os/exec.func·003(0x5999f0, 0x597b40) C:/go/root/src/pkg/os/exec/exec.go:207 +0x53 os/exec.func·004(0x1106ddc0) C:/go/root/src/pkg/os/exec/exec.go:274 +0x2b created by os/exec.(*Cmd).Start C:/go/root/src/pkg/os/exec/exec.go:275 +0x510 goroutine 253 [syscall]: syscall.Syscall(0x7c802530, 0x2, 0x730, 0xffffffff, 0x0, ...) C:/go/root/src/pkg/runtime/zsyscall_windows_windows_386.c:74 +0x49 syscall.WaitForSingleObject(0x730, 0xffffffff, 0x0, 0x0, 0x0, ...) C:/go/root/src/pkg/syscall/zsyscall_windows_386.go:671 +0x66 os.(*Process).wait(0x1106dff0, 0x0, 0x0, 0x0) C:/go/root/src/pkg/os/exec_windows.go:16 +0x4b os.(*Process).Wait(0x1106dff0, 0x1106ddd0, 0x3, 0x4) C:/go/root/src/pkg/os/doc.go:43 +0x28 os/exec.(*Cmd).Wait(0x1103d5a0, 0x0, 0x0) C:/go/root/src/pkg/os/exec/exec.go:308 +0x125 os/exec.(*Cmd).Run(0x1103d5a0, 0x11084000, 0x4) C:/go/root/src/pkg/os/exec/exec.go:232 +0x4d os/exec.(*Cmd).CombinedOutput(0x1103d5a0, 0x2, 0x31421648, 0x4, 0x4, ...) C:/go/root/src/pkg/os/exec/exec.go:352 +0x1a0 runtime_test.checkStaleRuntime(0x1106eb40) C:/go/root/src/pkg/runtime/crash_test.go:46 +0x82 runtime_test.executeTest(0x1106eb40, 0x599cc8, 0x358, 0x0, 0x0, ...) C:/go/root/src/pkg/runtime/crash_test.go:18 +0x48 runtime_test.TestCgoSignalDeadlock(0x1106eb40) C:/go/root/src/pkg/runtime/crash_cgo_test.go:18 +0x3d testing.tRunner(0x1106eb40, 0x6902c0) C:/go/root/src/pkg/testing/testing.go:346 +0x86 created by testing.RunTests C:/go/root/src/pkg/testing/testing.go:426 +0x683 FAIL runtime 120.095s C:\go\root\src> I think this is because you have bug in your code. But even, if you fix it, your new version is slower. But we will find out. Alex https://codereview.appspot.com/7563043/diff/8001/src/pkg/runtime/sys_windows_... File src/pkg/runtime/sys_windows_386.s (right): https://codereview.appspot.com/7563043/diff/8001/src/pkg/runtime/sys_windows_... src/pkg/runtime/sys_windows_386.s:324: MOVL $lo-8(SP), BX Are you sure about that line? See my gdb output: Breakpoint 1, runtime.usleep (usec=void) at C:/go/root/src/pkg/runtime/sys_windows_386.s:334 334 TEXT runtime-+usleep(SB),7,$20 (gdb) disas Dump of assembler code for function runtime.usleep: => 0x0041e880 <+0>: sub $0x14,%esp 0x0041e883 <+3>: mov 0x568478,%eax 0x0041e889 <+9>: mov 0x18(%esp),%ebx 0x0041e88d <+13>: movl $0x0,0x10(%esp) 0x0041e895 <+21>: mov %ebx,0xc(%esp) 0x0041e899 <+25>: lea -0x8(%esp),%ebx 0x0041e89d <+29>: mov %ebx,0x8(%esp) 0x0041e8a1 <+33>: movl $0x0,0x4(%esp) 0x0041e8a9 <+41>: movl $0x0,(%esp) 0x0041e8b0 <+48>: mov %esp,%ebp 0x0041e8b2 <+50>: call 0x41e8c0 <checkstack4> 0x0041e8b7 <+55>: call *%eax 0x0041e8b9 <+57>: mov %ebp,%esp 0x0041e8bb <+59>: add $0x14,%esp 0x0041e8be <+62>: ret End of assembler dump. (gdb) https://codereview.appspot.com/7563043/diff/8001/src/pkg/runtime/sys_windows_... src/pkg/runtime/sys_windows_386.s:339: MOVL $lo-8(SP), BX As above https://codereview.appspot.com/7563043/diff/8001/src/pkg/runtime/sys_windows_... File src/pkg/runtime/sys_windows_amd64.s (right): https://codereview.appspot.com/7563043/diff/8001/src/pkg/runtime/sys_windows_... src/pkg/runtime/sys_windows_amd64.s:355: // a SYSENTER and a RET. Should we allow for stack alignment and room for first 4 parameters as per http://blogs.msdn.com/b/oldnewthing/archive/2004/01/14/58579.aspx ?
Sign in to reply to this message.
Thanks. In addition to MOVL not working the way I thought it did, I found a few other bugs too (wrong handle, wrong time units). Please try the latest patch set. We don't need the stack alignment and first 4 parameters on amd64 because it's just a system call, not an ordinary function call.
Sign in to reply to this message.
LGTM Thank you. Alex
Sign in to reply to this message.
Thanks for pushing back. The old sleep and yield were doing neither, which was making various pieces of Go spin instead of schedule nicely.
Sign in to reply to this message.
*** Submitted as https://code.google.com/p/go/source/detail?r=f800157ce425 *** runtime: fix cgo callbacks on windows Fixes issue 4955. R=golang-dev, alex.brainman CC=golang-dev https://codereview.appspot.com/7563043
Sign in to reply to this message.
Message was sent while issue was closed.
On 2013/03/07 14:18:52, rsc wrote: > *** Submitted as https://code.google.com/p/go/source/detail?r=f800157ce425 *** > > runtime: fix cgo callbacks on windows > > Fixes issue 4955. > > R=golang-dev, alex.brainman > CC=golang-dev > https://codereview.appspot.com/7563043 It looks like the change to yield causes breakage when running under wine (see https://code.google.com/p/go/issues/detail?id=5831). Wine does the windows emulation in user space, so some of the assumption made here may not hold? http://blogs.msdn.com/b/oldnewthing/archive/2004/01/14/58579.aspx mentions that there should be space on the stack reserved for register spills. Could that happen when the NT call is implemented in user space? How could I test if this is the case?
Sign in to reply to this message.
On Wed, Jul 3, 2013 at 4:14 PM, <hanwen@google.com> wrote: > On 2013/03/07 14:18:52, rsc wrote: > >> *** Submitted as >> > https://code.google.com/p/go/**source/detail?r=f800157ce425<https://code.goog... > > runtime: fix cgo callbacks on windows >> > > Fixes issue 4955. >> > > R=golang-dev, alex.brainman >> CC=golang-dev >> https://codereview.appspot.**com/7563043<https://codereview.appspot.com/7563043> >> > > It looks like the change to yield causes breakage when running under > wine (see https://code.google.com/p/go/**issues/detail?id=5831<https://code.google.com/... > ). > > Wine does the windows emulation in user space, so some of the assumption > made here may not hold? > yes, very possible. windows abi mandates 4 uintptr space below sp, and in fact we did have a cgo bug in the past that resulted from violating this. if we need to support where NtWaitForSingleObject is implemented in user space, i'm afraid we have to resort to using normal cgocall/stdcall mechanism to call it and thus incur big overhead for real windows machines. if we want to fix this problem, perhaps we'd better detect wine and use alternative call paths. > > http://blogs.msdn.com/b/**oldnewthing/archive/2004/01/**14/58579.aspx<http://... > mentions that there should be space on the stack reserved for register > spills. Could that happen when the NT call is implemented in user space? > How could I test if this is the case? >
Sign in to reply to this message.
What is the problem with allocating the extra space? (sorry for the stupid question; I'm not very familiar with either windows or the Go runtime.) On Wed, Jul 3, 2013 at 10:27 AM, minux <minux.ma@gmail.com> wrote: > > On Wed, Jul 3, 2013 at 4:14 PM, <hanwen@google.com> wrote: >> >> On 2013/03/07 14:18:52, rsc wrote: >>> >>> *** Submitted as >> >> https://code.google.com/p/go/source/detail?r=f800157ce425 *** >> >>> runtime: fix cgo callbacks on windows >> >> >>> Fixes issue 4955. >> >> >>> R=golang-dev, alex.brainman >>> CC=golang-dev >>> https://codereview.appspot.com/7563043 >> >> >> It looks like the change to yield causes breakage when running under >> wine (see https://code.google.com/p/go/issues/detail?id=5831). >> >> Wine does the windows emulation in user space, so some of the assumption >> made here may not hold? > > yes, very possible. > windows abi mandates 4 uintptr space below sp, and in fact we did have a cgo > bug in the past that resulted from violating this. > > if we need to support where NtWaitForSingleObject is implemented in user > space, > i'm afraid we have to resort to using normal cgocall/stdcall mechanism to > call it and > thus incur big overhead for real windows machines. > > if we want to fix this problem, perhaps we'd better detect wine and use > alternative > call paths. >> >> >> http://blogs.msdn.com/b/oldnewthing/archive/2004/01/14/58579.aspx >> mentions that there should be space on the stack reserved for register >> spills. Could that happen when the NT call is implemented in user space? >> How could I test if this is the case? -- Han-Wen Nienhuys Google Munich hanwen@google.com
Sign in to reply to this message.
Message was sent while issue was closed.
On 2013/07/03 08:14:11, hanwen wrote: > > Wine does the windows emulation in user space, so some of the assumption made > here may not hold? It is possible. The api we use is undocumented / non-official. > http://blogs.msdn.com/b/oldnewthing/archive/2004/01/14/58579.aspx mentions that > there should be space on the stack reserved for register spills. Could that > happen when the NT call is implemented in user space? How could I test if this > is the case? Just change asm to leave room for 4 registers. You might discover that you won't be able to do that (linker will complain) because this function is marked as "no split stack". You might run out of stack. Alex PS: I am not convinced your problem is in this function. Your test has a few questions. But lets test your theory first.
Sign in to reply to this message.
Message was sent while issue was closed.
On 2013/07/03 08:46:16, brainman wrote: > On 2013/07/03 08:14:11, hanwen wrote: > ... > Just change asm to leave room for 4 registers. ... hanwen, I will try to do it myself tomorrow, if you get nowhere. Alex
Sign in to reply to this message.
On Wed, Jul 3, 2013 at 4:43 PM, Han-Wen Nienhuys <hanwen@google.com> wrote: > What is the problem with allocating the extra space? > we're calling osyield on segmented goroutine stack, where there might not be enough stack space for an arbitrary dll functions to run. so if the NtWaitForSingleObject is implemented as userspace function, we have no choice but to switch back to the OS stack to call it safely. > > (sorry for the stupid question; I'm not very familiar with either > windows or the Go runtime.) apply a change like this: diff -r bb98e4c9b6b3 src/pkg/runtime/sys_windows_386.s --- a/src/pkg/runtime/sys_windows_386.s Tue Jul 02 22:37:19 2013 -0700 +++ b/src/pkg/runtime/sys_windows_386.s Wed Jul 03 16:56:47 2013 +0800 @@ -367,5 +367,5 @@ // (It is just a CALL to the system call dispatch.) // If the linker okays the call to checkstack4 (a NOSPLIT function) // then the call to NtWaitForSingleObject is okay too. -TEXT checkstack4<>(SB),7,$4 +TEXT checkstack4<>(SB),7,$20 RET and see if the linker complains that nonsplit stack overflow, if does complain, it's very probably this is indeed (or at least, part of) the root cause. but if it doesn't complain, it's probably not the root cause (of course, it depends on how much stack space does the NtWaitForSingleObject call need). i just realized that you found problem on windows/386 port, it seems the extra register spill area rule doesn't apply to 386 as all arguments are always passed on stack. so i think the cause might be that NtWaitForSingleObject requires more stack than available, so it overflows the goroutine stack and causes memory corruption. if my hypothesis is correct, i think we have to detect wine and use different call path on wine.
Sign in to reply to this message.
Message was sent while issue was closed.
On 2013/07/03 09:08:04, minux wrote: > On Wed, Jul 3, 2013 at 4:43 PM, Han-Wen Nienhuys <mailto:hanwen@google.com> wrote: > > > apply a change like this: > diff -r bb98e4c9b6b3 src/pkg/runtime/sys_windows_386.s This cannot be right. The problem is on windows/amd64 (not windows/386). Isn't it? Alex
Sign in to reply to this message.
Message was sent while issue was closed.
On 2013/07/03 09:08:04, minux wrote: > > so i think the cause might be that NtWaitForSingleObject requires more > stack than > available, so it overflows the goroutine stack and causes memory corruption. That could well be a problem too. But, then it could be happening on Windows too (not just on wine). :-( But I haven't heard anyone complain yet. If that is the case, we should go back to switching stacks. Alex
Sign in to reply to this message.
I applied this, $ hg diff diff -r 09e39a9ce38e src/pkg/runtime/sys_windows_amd64.s --- a/src/pkg/runtime/sys_windows_amd64.s Wed Jul 03 00:33:38 2013 +0800 +++ b/src/pkg/runtime/sys_windows_amd64.s Wed Jul 03 11:09:29 2013 +0200 @@ -335,7 +335,10 @@ MOVQ BX, (R8) MOVQ $-1, CX // handle MOVQ $0, DX // alertable + + SUBQ $32, SP CALL AX + ADDQ $32, SP RET but see the same problem as before. Another thing to note is that the crash always says [signal 0xc0000005 code=0x0 addr=0x30 pc=0x41f530] if this was a routine overwriting the stack, wouldn't the fault addr change depending on how I change the repro case? I've trimmed it down from its original significantly, but addr is always 0x30. On Wed, Jul 3, 2013 at 11:10 AM, <alex.brainman@gmail.com> wrote: > On 2013/07/03 09:08:04, minux wrote: >> >> On Wed, Jul 3, 2013 at 4:43 PM, Han-Wen Nienhuys > > <mailto:hanwen@google.com> wrote: > > >> apply a change like this: >> diff -r bb98e4c9b6b3 src/pkg/runtime/sys_windows_386.s > > > This cannot be right. The problem is on windows/amd64 (not windows/386). > Isn't it? > > Alex > > https://codereview.appspot.com/7563043/ -- Han-Wen Nienhuys Google Munich hanwen@google.com
Sign in to reply to this message.
On Wed, Jul 3, 2013 at 11:15 AM, <alex.brainman@gmail.com> wrote: >> so i think the cause might be that NtWaitForSingleObject requires more >> stack than >> available, so it overflows the goroutine stack and causes memory > > corruption. > > That could well be a problem too. But, then it could be happening on > Windows too (not just on wine). :-( But I haven't heard anyone complain > yet. If that is the case, we should go back to switching stacks. Wouldn't NtWaitForSingleObject execute in the windows kernel address space, and hence have a different memory layout, with likely more stack available? -- Han-Wen Nienhuys Google Munich hanwen@google.com
Sign in to reply to this message.
On Wed, Jul 3, 2013 at 5:10 PM, <alex.brainman@gmail.com> wrote: > On 2013/07/03 09:08:04, minux wrote: > >> On Wed, Jul 3, 2013 at 4:43 PM, Han-Wen Nienhuys >> > <mailto:hanwen@google.com> wrote: > > > apply a change like this: >> diff -r bb98e4c9b6b3 src/pkg/runtime/sys_windows_**386.s >> > > This cannot be right. The problem is on windows/amd64 (not windows/386). > Isn't it? > i looked at the original bug report ( https://code.google.com/p/go/issues/detail?id=5831) and i was misled by the small pc values... yes, it should be a problem of the windows/amd64. On Wed, Jul 3, 2013 at 5:15 PM, <alex.brainman@gmail.com> wrote: > On 2013/07/03 09:08:04, minux wrote: > > so i think the cause might be that NtWaitForSingleObject requires more >> stack than >> available, so it overflows the goroutine stack and causes memory >> > corruption. > > That could well be a problem too. But, then it could be happening on > Windows too (not just on wine). :-( But I haven't heard anyone complain > yet. If that is the case, we should go back to switching stacks. > i suspect that is the real cause now given hanwen's experiments. the reason why it doesn't happen on real windows, is that on real windows systems, NtWaitForSingleObject in ntdll.ddl is just a syscall (the real syscall in the sense of int 0x2E/sysenter) wrapper, so it won't use arbitrary user space stack space. (in fact, as Russ checks in the code, it requires precisely 4 bytes of stack on i386 and no extra stack on amd64) wine implements ntdll.dll in user space, so it can use arbitrary amount of user space stack space, so if we want to support wine, either we switch stacks and accept the overhead, or use different call path for wine.
Sign in to reply to this message.
According to the wine FAQ we can look for wine_get_version in ntdll. If it's there, we're on Wine. How would I go about implementing a different call path? Check the wine_get_version address in the asm code, write a C wrapper that calls to the asm code, or make runtime.osyield be something that can be set at runtime? On Wed, Jul 3, 2013 at 11:25 AM, minux <minux.ma@gmail.com> wrote: > > On Wed, Jul 3, 2013 at 5:10 PM, <alex.brainman@gmail.com> wrote: >> >> On 2013/07/03 09:08:04, minux wrote: >>> >>> On Wed, Jul 3, 2013 at 4:43 PM, Han-Wen Nienhuys >> >> <mailto:hanwen@google.com> wrote: >> >> >>> apply a change like this: >>> diff -r bb98e4c9b6b3 src/pkg/runtime/sys_windows_386.s >> >> >> This cannot be right. The problem is on windows/amd64 (not windows/386). >> Isn't it? > > i looked at the original bug report > (https://code.google.com/p/go/issues/detail?id=5831) > and i was misled by the small pc values... > yes, it should be a problem of the windows/amd64. > > On Wed, Jul 3, 2013 at 5:15 PM, <alex.brainman@gmail.com> wrote: >> >> On 2013/07/03 09:08:04, minux wrote: >> >>> so i think the cause might be that NtWaitForSingleObject requires more >>> stack than >>> available, so it overflows the goroutine stack and causes memory >> >> corruption. >> >> That could well be a problem too. But, then it could be happening on >> Windows too (not just on wine). :-( But I haven't heard anyone complain >> yet. If that is the case, we should go back to switching stacks. > > i suspect that is the real cause now given hanwen's experiments. > > the reason why it doesn't happen on real windows, is that on real windows > systems, NtWaitForSingleObject in ntdll.ddl is just a syscall (the real > syscall in > the sense of int 0x2E/sysenter) wrapper, so it won't use arbitrary user > space > stack space. (in fact, as Russ checks in the code, it requires precisely 4 > bytes > of stack on i386 and no extra stack on amd64) > > wine implements ntdll.dll in user space, so it can use arbitrary amount of > user space stack space, so if we want to support wine, either we > switch stacks and accept the overhead, > or use different call path for wine. -- Han-Wen Nienhuys Google Munich hanwen@google.com
Sign in to reply to this message.
Message was sent while issue was closed.
On 2013/07/03 09:18:03, hanwen wrote: > I applied this, > > $ hg diff > diff -r 09e39a9ce38e src/pkg/runtime/sys_windows_amd64.s > --- a/src/pkg/runtime/sys_windows_amd64.s Wed Jul 03 00:33:38 2013 +0800 > +++ b/src/pkg/runtime/sys_windows_amd64.s Wed Jul 03 11:09:29 2013 +0200 > @@ -335,7 +335,10 @@ > MOVQ BX, (R8) > MOVQ $-1, CX // handle > MOVQ $0, DX // alertable > + > + SUBQ $32, SP > CALL AX > + ADDQ $32, SP > RET > > but see the same problem as before. > This is wrong. Firstly, if you look at what this function does, you will see that it has local variable that would be overwritten still if the callee is to use these 4 stack places. And secondly go linker does not see stack growth when SP is manipulated directly. You must use either PUSH/PULL commands or you could change local space size ($8) at TEXT runtime.osyield. If you do this, you might discover, as minux said, that you will run out of stack. As far as I remember, limited stack room is the reason why rsc used NtWaitForSingleObject here. It does not use stack itself, and we don't need to switch stacks to call it. I think, if we try to switch stack here, we will run out of room. So our idea of switching stacks here conditionally or not will not fly. runtime.osyield is marked as "no stack split", because it is called by "no split stack" function. There might be a few "no stack split" functions on the call stack - that is why we are so short of stack space. I am not sure what to do. Perhaps we should wait for Russ. While we wait you could confirm minux's idea - see how much stack does NtWaitForSingleObject use on Wine. Just use gdb to see where it takes you. If the stack usage is not very large, perhaps we can do something ... Alex
Sign in to reply to this message.
Message was sent while issue was closed.
On 2013/07/03 09:29:53, hanwen wrote: > According to the wine FAQ we can look for wine_get_version in ntdll. > If it's there, we're on Wine. That should be doable. > How would I go about implementing a different call path? ... Like I said before, switching stacks might not be an option here. :-( Alex
Sign in to reply to this message.
On Wed, Jul 3, 2013 at 1:43 PM, <alex.brainman@gmail.com> wrote: > On 2013/07/03 09:18:03, hanwen wrote: >> >> I applied this, > > >> $ hg diff >> diff -r 09e39a9ce38e src/pkg/runtime/sys_windows_amd64.s >> --- a/src/pkg/runtime/sys_windows_amd64.s Wed Jul 03 00:33:38 2013 > > +0800 >> >> +++ b/src/pkg/runtime/sys_windows_amd64.s Wed Jul 03 11:09:29 2013 > > +0200 >> >> @@ -335,7 +335,10 @@ >> MOVQ BX, (R8) >> MOVQ $-1, CX // handle >> MOVQ $0, DX // alertable >> + >> + SUBQ $32, SP >> CALL AX >> + ADDQ $32, SP >> RET > > >> but see the same problem as before. > > > > This is wrong. Firstly, if you look at what this function does, you will > see that it has local variable that would be overwritten still if the > callee is to use these 4 stack places. And secondly go linker does not > see stack growth when SP is manipulated directly. You must use either > PUSH/PULL commands or you could change local space size ($8) at TEXT > runtime.osyield. > > If you do this, you might discover, as minux said, that you will run out > of stack. As far as I remember, limited stack room is the reason why rsc > used NtWaitForSingleObject here. It does not use stack itself, and we > don't need to switch stacks to call it. I think, if we try to switch > stack here, we will run out of room. So our idea of switching stacks > here conditionally or not will not fly. runtime.osyield is marked as "no > stack split", because it is called by "no split stack" function. There > might be a few "no stack split" functions on the call stack - that is > why we are so short of stack space. > > I am not sure what to do. Perhaps we should wait for Russ. While we wait > you could confirm minux's idea - see how much stack does > NtWaitForSingleObject use on Wine. Just use gdb to see where it takes > you. If the stack usage is not very large, perhaps we can do something How big is 'not very large' in this context? WaitForSingleObject doesn't look too good, http://source.winehq.org/source/dlls/ntdll/sync.c#L1090 it has an array of 64 int32s for starters, to store local handles. However, the point is to finally yield the CPU, right? at the bottom, it calls NtSchedYield, which is an alias for sched_yield(2), see http://source.winehq.org/source/dlls/ntdll/sync.c#L1191 could we conditionally call NtSchedYield instead? > ... > > Alex > > https://codereview.appspot.com/7563043/ -- Han-Wen Nienhuys Google Munich hanwen@google.com
Sign in to reply to this message.
On Wed, Jul 3, 2013 at 8:26 PM, Han-Wen Nienhuys <hanwen@google.com> wrote: > > How big is 'not very large' in this context? WaitForSingleObject > i'd say a few uintptrs. because the linker will check that any chain of non-split stack calls will use at most 120 bytes of stack. > doesn't look too good, > > http://source.winehq.org/source/dlls/ntdll/sync.c#L1090 > > it has an array of 64 int32s for starters, to store local handles. > ok, then it's a non-starter. > > However, the point is to finally yield the CPU, right? at the bottom, > it calls NtSchedYield, which is an alias for sched_yield(2), see > > http://source.winehq.org/source/dlls/ntdll/sync.c#L1191 > > could we conditionally call NtSchedYield instead? > calling NtSchedYield on normal windows doesn't yield hard enough, but i think it's good enough for wine. have you changed experimented with replacing NtWaitForSingleObject with NtSchedYield in runtime.osyield? does it run well on wine?
Sign in to reply to this message.
Message was sent while issue was closed.
On 2013/07/03 12:26:45, hanwen wrote: > > How big is 'not very large' in this context? ... 20-40 bytes. You could try and increase runtime.osyield frame size and see when the linker complains. > ... WaitForSingleObject > doesn't look too good, Yes. > However, the point is to finally yield the CPU, right? at the bottom, > it calls NtSchedYield, which is an alias for sched_yield(2), see > > http://source.winehq.org/source/dlls/ntdll/sync.c#L1191 > > could we conditionally call NtSchedYield instead? Sure, we could try. It might even work for Windows, then we can use it everywhere. There is also similar function runtime.usleep that is in the same boat. You didn't have problem with runtime.usleep? Alex
Sign in to reply to this message.
On Wed, Jul 3, 2013 at 8:38 PM, <alex.brainman@gmail.com> wrote: > Sure, we could try. It might even work for Windows, then we can use it everywhere. There is also similar function runtime.usleep that is in the > as discussed on the issue tracker, https://code.google.com/p/go/issues/detail?id=4955, NtYieldExecution is not good enough for normal windows use, so i'm afraid we have to detect wine and only let wine use that call path. > same boat. You didn't have problem with runtime.usleep? > yes, i'd expect runtime.usleep will cause the same problem on wine. the major uses of runtime.usleep are from GC, parfor (also used by GC) and sysmon, and cgo callback in proc.c. sysmon always runs on its own M, so nothing to worry about. as the GC is now happening on g0 (OS thread stack), so i'd expect a lot of calls to usleep will actually be made from OS stack already, and it might explain why it doesn't surface. another use of runtime.usleep is in external cgo callback path. @hanwen, have you tried to run misc/cgo/test inside wine? it might expose other problems (you won't be able to cross compile that test, so you will have to build it from a windows machine).
Sign in to reply to this message.
I tried TEXT runtime·osyield(SB),7,$0 MOVQ runtime·NtYieldExecution(SB), AX CALL AX RET TEXT runtime·usleep(SB),7,$8 MOVQ runtime·NtDelayExecution(SB), AX MOVL usec+0(FP), BX IMULQ $10, BX NEGQ BX MOVQ SP, DX // ptime MOVQ BX, (DX) MOVQ $0, CX // alertable CALL AX RET this improved things a little bit, in that it sometimes exits successfully, but 1 in 3 times, it still crashes. When it crashes, the program completes more loop iterations successfully. Is there a way to ensure that NtYieldExecution has enough stack? I tried adding a random amount to the last argument of TEXT, but it made the crashiness worse. On Wed, Jul 3, 2013 at 2:53 PM, minux <minux.ma@gmail.com> wrote: > > On Wed, Jul 3, 2013 at 8:38 PM, <alex.brainman@gmail.com> wrote: >> >> Sure, we could try. It might even work for Windows, then we can use it >> >> everywhere. There is also similar function runtime.usleep that is in the > > as discussed on the issue tracker, > https://code.google.com/p/go/issues/detail?id=4955, > NtYieldExecution is not good enough for normal windows use, so i'm afraid > we have to detect wine and only let wine use that call path. >> >> same boat. You didn't have problem with runtime.usleep? > > yes, i'd expect runtime.usleep will cause the same problem on wine. > > the major uses of runtime.usleep are from GC, parfor (also used by GC) and > sysmon, and cgo callback in proc.c. > > sysmon always runs on its own M, so nothing to worry about. > > as the GC is now happening on g0 (OS thread stack), so i'd expect a lot of > calls > to usleep will actually be made from OS stack already, and it might explain > why it > doesn't surface. > > another use of runtime.usleep is in external cgo callback path. > @hanwen, have you tried to run misc/cgo/test inside wine? it might expose > other problems (you won't be able to cross compile that test, so you will > have > to build it from a windows machine). -- Han-Wen Nienhuys Google Munich hanwen@google.com
Sign in to reply to this message.
Message was sent while issue was closed.
On 2013/07/03 14:40:15, hanwen wrote: > I tried > > TEXT runtime·osyield(SB),7,$0 > MOVQ runtime·NtYieldExecution(SB), AX > CALL AX > RET > > TEXT runtime·usleep(SB),7,$8 > MOVQ runtime·NtDelayExecution(SB), AX > MOVL usec+0(FP), BX > IMULQ $10, BX > NEGQ BX > MOVQ SP, DX // ptime > > MOVQ BX, (DX) > MOVQ $0, CX // alertable > CALL AX > RET > > this improved things a little bit, in that it sometimes exits > successfully, but 1 in 3 times, it still crashes. When it crashes, the > program completes more loop iterations successfully. > > Is there a way to ensure that NtYieldExecution has enough stack? Not realy. You are running on Go stack here. The stack is allocated from heap as any other memory. It is quite small at the start. Every Go function extends it, if need be. Once you are in Wine, no one will extend that stack. So you have to do it somehow here, and have enough to last the distance, since all normal programs (non-Go) assume that stack is unlimited. This situation is worse here, because we have few special functions on our stack that "do not extend stack", so linker makes sure that they have enough room to run. Here I have changed source to demonstrate: # hg diff -g diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c --- a/src/pkg/runtime/proc.c +++ b/src/pkg/runtime/proc.c @@ -752,13 +752,11 @@ lockextra(bool nilokay) { M *mp; - void (*yield)(void); for(;;) { mp = runtime·atomicloadp(&runtime·extram); if(mp == MLOCKED) { - yield = runtime·osyield; - yield(); + runtime·osyield(); continue; } if(mp == nil && !nilokay) { @@ -766,8 +764,7 @@ continue; } if(!runtime·casp(&runtime·extram, mp, MLOCKED)) { - yield = runtime·osyield; - yield(); + runtime·osyield(); continue; } break; diff --git a/src/pkg/runtime/sys_windows_amd64.s b/src/pkg/runtime/sys_windows_amd64.s --- a/src/pkg/runtime/sys_windows_amd64.s +++ b/src/pkg/runtime/sys_windows_amd64.s @@ -322,7 +322,7 @@ TEXT runtime·remove_exception_handler(SB),7,$0 RET -TEXT runtime·osyield(SB),7,$8 +TEXT runtime·osyield(SB),7,$100 // Tried NtYieldExecution but it doesn't yield hard enough. // NtWaitForSingleObject being used here as Sleep(0). // The CALL is safe because NtXxx is a system call wrapper: # GOOS=windows GOARCH=amd64 go test -c # testmain runtime.needm: nosplit stack overflow 120 assumed on entry to runtime.needm 88 after runtime.needm uses 32 80 on entry to lockextra 48 after lockextra uses 32 40 on entry to runtime.osyield -60 after runtime.osyield uses 100 lockextra: nosplit stack overflow 120 assumed on entry to lockextra 88 after lockextra uses 32 80 on entry to runtime.osyield -20 after runtime.osyield uses 100 # So, assuming runtime·osyield call requires 100 bytes of stack, the compiler says we are 60 bytes short. So, very little room. > ... I > tried adding a random amount to the last argument of TEXT, but it made > the crashiness worse. > Sure, you increased runtime·osyield stack size. More chance for it to blow away someone elses data. We could switch onto OS thread here, but standard switching code takes more stack room than we have here. And as far as I remember it uses functions that split stack (once you are in no-split-stack world, you have to stay that way). Perhaps we can write some custom asm code to do it, but my asm is not good. :-) You could just try and do nothing in osyield. Just return from there. You might discover that runtime will start hogging cpu, but. I really don't know what to do. Perhaps Russ will help. Alex
Sign in to reply to this message.
It should work to switch to the m stack, which was allocated by CreateThread and should be "big enough". Untested: TEXT runtime·osyield(SB),7,$8 // Tried NtYieldExecution but it doesn't yield hard enough. // NtWaitForSingleObject being used here as Sleep(0). // The CALL is safe because NtXxx is a system call wrapper: // it puts the right system call number in AX, then does // a SYSENTER and a RET. MOVQ runtime·NtWaitForSingleObject(SB), AX MOVQ $1, BX NEGQ BX MOVQ SP, R8 // ptime MOVQ BX, (R8) MOVQ $-1, CX // handle MOVQ $0, DX // alertable // Execute call on m->g0 stack, in case we are not actually // calling a system call wrapper, like when running under WINE. get_tls(R15) MOVQ m(R15), R14 CMPQ g(R15), m_g0(R14) JNE 3(PC) // executing on m->g0 already CALL AX RET // Switch to m->g0 stack and back. MOVQ m_g0(R14), R14 MOVQ (g_sched+gobuf_sp)(R14), R14 MOVQ SP, -8(R14) LEAQ -8(R14), SP CALL AX MOVQ 0(SP), SP RET
Sign in to reply to this message.
|