N
NewsBot
I hand compiled the InvSqrt() function that I have been studying. Here is the function:
float InvSqrt ( float x )
{
*** union bitConverter {
*** *** int intPart
*** *** float floatPart
*** } bitC
*** /* Do Taylor Series Expansion */
*** bitC.floatPart = x
*** bitC.intPart = 0x5f375a86 - (bitC.intPart >> 1)
*** /* Do First Newton Approximation */
*** bitC.floatPart = ( 3.0f - x * bitC.floatPart * bitC.floatPart ) * 0.5 * bitC.floatPart
*** /* Do Second Newton Approximation and Return */
*** return ( 3.0f - x * bitC.floatPart * bitC.floatPart ) * 0.5 * bitC.floatPart
}
Hand compiling it, I get this:
float InvSqrt ( float x )
{
*** float bitC, a = 3.0, b = 0.5
*** __asm {
*** *** fld*** *** dword ptr
*** *** fld*** *** dword ptr [x]
*** *** fld*** *** dword ptr [a]
*** *** mov*** *** eax,dword ptr [x]
*** *** sar*** *** eax,1
*** *** mov*** *** ecx,5F375A86h
*** *** sub*** *** ecx,eax
*** *** mov*** *** dword ptr [bitC],ecx
*** *** fld*** *** dword ptr [bitC]
*** *** fld*** *** st
*** *** fmul*** st,st(3)
*** *** fmul*** st,st(1)
*** *** fsubr*** st,st(2)
*** *** fmul*** st,st(4)
*** *** fmulp*** st(1),st
*** *** fld*** *** st
*** *** fmul*** st,st(3)
*** *** fmul*** st,st(1)
*** *** fsubp*** st(2),st
*** *** fmulp*** st(1),st
*** *** fmulp*** st(2),st
*** *** fstp*** dword ptr [x]
*** }
}
When I view the disassembly in Visual Studio, that becomes:
//This Visual Studio wrote
00401035* fld******** dword ptr [__real@40400000 (4021A4h)]
0040103B* fstp******* dword ptr [esp+34h]
0040103F* fld******** dword ptr [__real@3f000000 (4021A0h)]
00401045* fstp******* dword ptr [esp+38h]
00401049* lea******** esp,[esp]
00401050* fld******** dword ptr [esp+28h]
00401054* fstp******* dword ptr [esp+2Ch]
//This I wrote
00401058* fld******** dword ptr [esp+38h]
0040105C* fld******** dword ptr [esp+2Ch]
00401060* fld******** dword ptr [esp+34h]
00401064* mov******** eax,dword ptr [esp+2Ch]
00401068* sar******** eax,1
0040106A* mov******** ecx,5F375A86h
0040106F* sub******** ecx,eax
00401071* mov******** dword ptr [esp+30h],ecx
00401075* fld******** dword ptr [esp+30h]
00401079* fld******** st(0)
0040107B* fmul******* st,st(3)
0040107D* fmul******* st,st(1)
0040107F* fsubr****** st,st(2)
00401081* fmul******* st,st(4)
00401083* fmulp****** st(1),st
00401085* fld******** st(0)
00401087* fmul******* st,st(3)
00401089* fmul******* st,st(1)
0040108B* fsubp****** st(2),st
0040108D* fmulp****** st(1),st
0040108F* fmulp****** st(2),st
Here is what Visual Studio generates when it compiles the code:
00401035* fld******** dword ptr [__real@40400000 (4021A8h)]
0040103B* mov******** edx,5F375A86h
00401040* fld******** qword ptr [__real@3fe0000000000000 (4021A0h)]
00401046* sub******** esp,30h
00401049* fld******** dword ptr [esp+6Ch]
0040104D* fst******** dword ptr [esp+68h]
00401051* mov******** ecx,dword ptr [esp+68h]
00401055* sar******** ecx,1
00401057* sub******** edx,ecx
00401059* mov******** dword ptr [esp+68h],edx
0040105D* fld******** dword ptr [esp+68h]
00401061* fld******** st(0)
00401063* fmul******* st,st(1)
00401065* fmul******* st,st(2)
00401067* fsubr****** st,st(4)
00401069* fmulp****** st(1),st
0040106B* fmul******* st,st(2)
0040106D* fld******** st(0)
0040106F* fmul******* st,st(1)
00401071* fmul******* st,st(2)
00401073* fsubp****** st(4),st
00401075* fmulp****** st(3),st
00401077* fxch******* st(2)
00401079* fmulp****** st(1),st
The assembly when Visual Studio generates it is obviously more efficient and I probably should leave well enough alone, but for educational purposes, I want to learn how to hand compile code as efficiently as Visual Studio does it.
I have no clue how I thought of the correct words to get any useful information from google, but from what I can gather through googling this, I believe that the inefficiency is coming from the fact that I have no clue how to take advantage of function calling conventions. I only know that you can return a float by leaving it on the stack when your function terminates and I learned that by studying the assembly creates. There are plenty of websites with information on the x86 function call conventions, but I cannot seem to find any with information on the x87 function call conventions, which is what I expect my function because it receives and returns floating point numbers.
Does anyone know what the x87 Function-call Conventions are?
Of course, I could be completely offtrack and my problem is that I do not know how to get constants into assembly code rather than being unable to take care of the calling conventions or perhaps a combination of the two. I am an undergraduate student and I have not taken any courses on assembly, so I am not really qualified to diagnose what I am doing wrong.
More...
View All Our Microsoft Related Feeds
float InvSqrt ( float x )
{
*** union bitConverter {
*** *** int intPart
*** *** float floatPart
*** } bitC
*** /* Do Taylor Series Expansion */
*** bitC.floatPart = x
*** bitC.intPart = 0x5f375a86 - (bitC.intPart >> 1)
*** /* Do First Newton Approximation */
*** bitC.floatPart = ( 3.0f - x * bitC.floatPart * bitC.floatPart ) * 0.5 * bitC.floatPart
*** /* Do Second Newton Approximation and Return */
*** return ( 3.0f - x * bitC.floatPart * bitC.floatPart ) * 0.5 * bitC.floatPart
}
Hand compiling it, I get this:
float InvSqrt ( float x )
{
*** float bitC, a = 3.0, b = 0.5
*** __asm {
*** *** fld*** *** dword ptr
*** *** fld*** *** dword ptr [x]
*** *** fld*** *** dword ptr [a]
*** *** mov*** *** eax,dword ptr [x]
*** *** sar*** *** eax,1
*** *** mov*** *** ecx,5F375A86h
*** *** sub*** *** ecx,eax
*** *** mov*** *** dword ptr [bitC],ecx
*** *** fld*** *** dword ptr [bitC]
*** *** fld*** *** st
*** *** fmul*** st,st(3)
*** *** fmul*** st,st(1)
*** *** fsubr*** st,st(2)
*** *** fmul*** st,st(4)
*** *** fmulp*** st(1),st
*** *** fld*** *** st
*** *** fmul*** st,st(3)
*** *** fmul*** st,st(1)
*** *** fsubp*** st(2),st
*** *** fmulp*** st(1),st
*** *** fmulp*** st(2),st
*** *** fstp*** dword ptr [x]
*** }
}
When I view the disassembly in Visual Studio, that becomes:
//This Visual Studio wrote
00401035* fld******** dword ptr [__real@40400000 (4021A4h)]
0040103B* fstp******* dword ptr [esp+34h]
0040103F* fld******** dword ptr [__real@3f000000 (4021A0h)]
00401045* fstp******* dword ptr [esp+38h]
00401049* lea******** esp,[esp]
00401050* fld******** dword ptr [esp+28h]
00401054* fstp******* dword ptr [esp+2Ch]
//This I wrote
00401058* fld******** dword ptr [esp+38h]
0040105C* fld******** dword ptr [esp+2Ch]
00401060* fld******** dword ptr [esp+34h]
00401064* mov******** eax,dword ptr [esp+2Ch]
00401068* sar******** eax,1
0040106A* mov******** ecx,5F375A86h
0040106F* sub******** ecx,eax
00401071* mov******** dword ptr [esp+30h],ecx
00401075* fld******** dword ptr [esp+30h]
00401079* fld******** st(0)
0040107B* fmul******* st,st(3)
0040107D* fmul******* st,st(1)
0040107F* fsubr****** st,st(2)
00401081* fmul******* st,st(4)
00401083* fmulp****** st(1),st
00401085* fld******** st(0)
00401087* fmul******* st,st(3)
00401089* fmul******* st,st(1)
0040108B* fsubp****** st(2),st
0040108D* fmulp****** st(1),st
0040108F* fmulp****** st(2),st
Here is what Visual Studio generates when it compiles the code:
00401035* fld******** dword ptr [__real@40400000 (4021A8h)]
0040103B* mov******** edx,5F375A86h
00401040* fld******** qword ptr [__real@3fe0000000000000 (4021A0h)]
00401046* sub******** esp,30h
00401049* fld******** dword ptr [esp+6Ch]
0040104D* fst******** dword ptr [esp+68h]
00401051* mov******** ecx,dword ptr [esp+68h]
00401055* sar******** ecx,1
00401057* sub******** edx,ecx
00401059* mov******** dword ptr [esp+68h],edx
0040105D* fld******** dword ptr [esp+68h]
00401061* fld******** st(0)
00401063* fmul******* st,st(1)
00401065* fmul******* st,st(2)
00401067* fsubr****** st,st(4)
00401069* fmulp****** st(1),st
0040106B* fmul******* st,st(2)
0040106D* fld******** st(0)
0040106F* fmul******* st,st(1)
00401071* fmul******* st,st(2)
00401073* fsubp****** st(4),st
00401075* fmulp****** st(3),st
00401077* fxch******* st(2)
00401079* fmulp****** st(1),st
The assembly when Visual Studio generates it is obviously more efficient and I probably should leave well enough alone, but for educational purposes, I want to learn how to hand compile code as efficiently as Visual Studio does it.
I have no clue how I thought of the correct words to get any useful information from google, but from what I can gather through googling this, I believe that the inefficiency is coming from the fact that I have no clue how to take advantage of function calling conventions. I only know that you can return a float by leaving it on the stack when your function terminates and I learned that by studying the assembly creates. There are plenty of websites with information on the x86 function call conventions, but I cannot seem to find any with information on the x87 function call conventions, which is what I expect my function because it receives and returns floating point numbers.
Does anyone know what the x87 Function-call Conventions are?
Of course, I could be completely offtrack and my problem is that I do not know how to get constants into assembly code rather than being unable to take care of the calling conventions or perhaps a combination of the two. I am an undergraduate student and I have not taken any courses on assembly, so I am not really qualified to diagnose what I am doing wrong.
More...
View All Our Microsoft Related Feeds