gcc

All posts tagged gcc

For development of SPARCSIM, recently I had to look into GCC implementation of methods defined in C99 for handling of floating point rounding and exceptions. My intention was to perform the floating point math on host processor and retrieve the exception bits set according to IEEE-754 standard to display it in simulator. Below is a code snippet which tries to compute 1050, which is, of course, beyond the limit of float-32. Hence, it is “expected” to raise OF (Overflow) and INX (Inexact) exceptions.

#include
#include #include

int main()
{
int exponent = 50;
float result = pow(10.0, exponent);

if (fetestexcept(FE_INEXACT))
puts("Inexact");

if (fetestexcept(FE_UNDERFLOW))
puts("Underflow");

if (fetestexcept(FE_OVERFLOW))
puts("Overflow");

if (fetestexcept(FE_DIVBYZERO))
puts("Division by zero");

if (fetestexcept(FE_INVALID))
puts("Invalid");

return 0;
}

Let’s try to compile and run on gcc-4.4.3 on Ubuntu 10.04.

gcc demo.c -o demo -lm && ./demo
Inexact
Overflow

Pretty good, isn’t it? Let’s try to be a bit cleverer.  We’ll modify line#9 with the actual value of the exponent, i.e. 50. There is no point in using a redundant variable. So, the modified program looks like

#include
#include #include

int main()
{
int exponent = 50;
float result = pow(10.0, 50);

if (fetestexcept(FE_INEXACT))
puts("Inexact");

if (fetestexcept(FE_UNDERFLOW))
puts("Underflow");

if (fetestexcept(FE_OVERFLOW))
puts("Overflow");

if (fetestexcept(FE_DIVBYZERO))
puts("Division by zero");

if (fetestexcept(FE_INVALID))
puts("Invalid");

return 0;
}

Again, compile and run, BUT…what is it?

gcc demo.c -o demo -lm && ./demo

Means, no exception is being raised. How come? What’s happening behind the scene? To attempt to drill down, we’ll have a look into the assembly listing of both the code. It is generated using

gcc demo.c -o demoasm -S

Here are the respective listings

.file "demo.c"
.section .rodata
.LC1:
.string "Inexact"
.LC2:
.string "Underflow"
.LC3:
.string "Overflow"
.LC4:
.string "Division by zero"
.LC5:
.string "Invalid"
.text
.globl main
.type main, @function
main:
pushl %ebp
movl %esp, %ebp
andl $-16, %esp
subl $48, %esp
movl $50, 44(%esp)
fildl 44(%esp)
fstpl 8(%esp)
fldl .LC0
fstpl (%esp)
call pow
fstps 40(%esp)
movl $32, (%esp)
call fetestexcept
testl %eax, %eax
je .L2
movl $.LC1, (%esp)
call puts
.L2:
movl $16, (%esp)
call fetestexcept
testl %eax, %eax
je .L3
movl $.LC2, (%esp)
call puts
.L3:
movl $8, (%esp)
call fetestexcept
testl %eax, %eax
je .L4
movl $.LC3, (%esp)
call puts
.L4:
movl $4, (%esp)
call fetestexcept
testl %eax, %eax
je .L5
movl $.LC4, (%esp)
call puts
.L5:
movl $1, (%esp)
call fetestexcept
testl %eax, %eax
je .L6
movl $.LC5, (%esp)
call puts
.L6:
movl $0, %eax
leave
ret
.size main, .-main
.section .rodata
.align 8
.LC0:
.long 0
.long 1076101120
.ident "GCC: (Ubuntu 4.4.3-4ubuntu5.1) 4.4.3"
.section .note.GNU-stack,"",@progbits

.file "demo.c"
.section .rodata
.LC1:
.string "Inexact"
.LC2:
.string "Underflow"
.LC3:
.string "Overflow"
.LC4:
.string "Division by zero"
.LC5:
.string "Invalid"
.text
.globl main
.type main, @function
main:
pushl %ebp
movl %esp, %ebp
andl $-16, %esp
subl $32, %esp
movl $50, 28(%esp)
movl $0x7f800000, %eax
movl %eax, 24(%esp)
movl $32, (%esp)
call fetestexcept
testl %eax, %eax
je .L2
movl $.LC1, (%esp)
call puts
.L2:
movl $16, (%esp)
call fetestexcept
testl %eax, %eax
je .L3
movl $.LC2, (%esp)
call puts
.L3:
movl $8, (%esp)
call fetestexcept
testl %eax, %eax
je .L4
movl $.LC3, (%esp)
call puts
.L4:
movl $4, (%esp)
call fetestexcept
testl %eax, %eax
je .L5
movl $.LC4, (%esp)
call puts
.L5:
movl $1, (%esp)
call fetestexcept
testl %eax, %eax
je .L6
movl $.LC5, (%esp)
call puts
.L6:
movl $0, %eax
leave
ret
.size main, .-main
.ident "GCC: (Ubuntu 4.4.3-4ubuntu5.1) 4.4.3"
.section .note.GNU-stack,"",@progbits

Have a closer look at line#71 in the first disassembly and line#22 of the second one. According to IEEE definition, 0x7f800000 represents positive infinity. Clearly, the compiler pre-calculates constant expression and sets the value to be infinity. This optimization is formally called “constant folding”. Since, the operation, in the second case, is being performed during compile time, appropriate exceptions are not being raised during execution.

Again, let’s try disabling compiler optimizations explicitly and re-compile and re-run the second (one having unexpected result) C source code.

gcc demo.c -o demo -lm -O0 && ./demo

The situation doesn’t improve. Our final try will be to enable most of the compiler optimizations and re-compile and re-run the first (one producing correct result so far) C source code.

gcc demo.c -o demo -lm -O3 && ./demo

As you can see, maximizing the optimization level makes GCC behave apparently clever, thereby doing some sort of value propagation. It results both “pow(10.0, exponent)” and “pow(10.0, 50)” to be evaluated compile time rather than run time.

Hope I have been able to highlight how gcc optimizations can a nightmare be.