Hey,
I'm having trouble converting my code to long double.
- Some compiler flag in Visual Studio seems to reduce the precision when I use any operator (e.g. "*=", or "*")
It seems that I can fix it with the compiler argument "/Qpc80".
How does the "/Qpc80" interact with the Floating Point Model "Precise (/fp:precise)"? - The overloaded long double version of log() doesn't seem to be available, so the double version is used. I have to call the logl() directly.
Isn't the overloaded version of log(long double) supposed to be provided by the mathimf.h? - In a larger project long double version of log() of Microsoft is used although I didn't include "math.h"
It seems that some other std headers include the math.h if i didn't include mathimf.h first in each c/cpp file. Either I get linker errors or I can see in the debug mode, that the long double version of Microsoft math.h is used, which calls the double version.
Should I include mathimf.h in each file before including other files?
I wrote a program to test narrow down the problems. Basically I'm using quad precision (which seems to work) to test the other data types:
long double b = 0.333333333333333333333333333333333333333333333q;
printf("\n%20s = ", "long double(1/3)");
myDebugPrintDigits(b, 45);
b *= 2;
printf("\n%20s = ", "long double 2*(1/3)");
myDebugPrintDigits(b, 45);
...
b = log(2.0L);
printf("\n%20s = ", "long double(log(2))");
myDebugPrintDigits(b, 45);
b = logl(2.0L);
printf("\n%20s = ", "long double(logl(2))");
myDebugPrintDigits(b, 45);
The result looks like this in Visual Studio 2015 (added compiler flags "/Qoption,cpp,--extended_float_type /Qlong-double"):
double(1/3) = 3.33333333333333314829616256247390992939472198e-1
double 2*(1/3) = 6.66666666666666629659232512494781985878944396e-1
long double(1/3) = 3.33333333333333333342368351437379203616728773e-1
long double 2*(1/3) = 6.66666666666666629659232512494781985878944396e-1
_Quad(1/3) = 3.33333333333333333333333333333333307654267408e-1
_Quad 2*(1/3) = 6.66666666666666666666666666666666615308534816e-1
double(log(2)) = 6.93147180559945286226763982995180413126945495e-1
long double(log(2)) = 6.93147180559945286226763982995180413126945495e-1
long double(logl(2)) = 6.93147180559945309428690474184975300886435434e-1
_Quad(__logq(2)) = 6.93147180559945309417232121458176613602238496e-1
correct log(2) = 6.93147180559945309417232121458176568075500134e-1
sizeof(double) = 8
sizeof(long double) = 16
sizeof(_Quad) = 16
__IMFLONGDOUBLE = 80
The list of compiler flags used by Visual Studio is "/GS /W3 /Zc:wchar_t /ZI /Od /Fd"x64\Debug\vc140.pdb" /D "_MBCS" /Zc:forScope /RTC1 /MDd /Fa"x64\Debug\" /EHsc /nologo /Fo"x64\Debug\" /Qprof-dir "x64\Debug\" /Fp"x64\Debug\Projekt1.pch" + "/Qoption,cpp,--extended_float_type /Qlong-double".
The long double precision is reduced to double precision when I multiply it with 2 (see "long double 2*(1/3)" compared to "long double(1/3)")!
If I compile the source directly with icl ("icl main.cpp /Qoption,cpp,--extended_float_type /Qlong-double"), then I get:
double(1/3) = 3.33333333333333314829616256247390992939472198e-1
double 2*(1/3) = 6.66666666666666629659232512494781985878944396e-1
long double(1/3) = 3.33333333333333333342368351437379203616728773e-1
long double 2*(1/3) = 6.66666666666666666684736702874758407233457546e-1
_Quad(1/3) = 3.33333333333333333333333333333333307654267408e-1
_Quad 2*(1/3) = 6.66666666666666666666666666666666615308534816e-1
double(log(2)) = 6.93147180559945286226763982995180413126945495e-1
long double(log(2)) = 6.93147180559945286226763982995180413126945495e-1
long double(logl(2)) = 6.93147180559945309428690474184975300886435434e-1
_Quad(__logq(2)) = 6.93147180559945309417232121458176613602238496e-1
correct log(2) = 6.93147180559945309417232121458176568075500134e-1
sizeof(double) = 8
sizeof(long double) = 16
sizeof(_Quad) = 16
__IMFLONGDOUBLE = 80
Compiling manually with icl or adding "/Qpc80" in Visual Studio seems to solve the multiply precision issue, but the log(long double) function is still not using the logl() method. Is this intended behavior?
Thanks,
Christian
The full code is:
#include <mathimf.h>
#include <stdio.h>
typedef _Quad float128_type;
extern "C" {
_Quad __ldexpq(_Quad, int);
_Quad __frexpq(_Quad, int*);
_Quad __fabsq(_Quad);
_Quad __floorq(_Quad);
_Quad __ceilq(_Quad);
_Quad __sqrtq(_Quad);
_Quad __truncq(_Quad);
_Quad __expq(_Quad);
_Quad __powq(_Quad, _Quad);
_Quad __logq(_Quad);
_Quad __log10q(_Quad);
_Quad __sinq(_Quad);
_Quad __cosq(_Quad);
_Quad __tanq(_Quad);
_Quad __asinq(_Quad);
_Quad __acosq(_Quad);
_Quad __atanq(_Quad);
_Quad __sinhq(_Quad);
_Quad __coshq(_Quad);
_Quad __tanhq(_Quad);
_Quad __fmodq(_Quad, _Quad);
_Quad __atan2q(_Quad, _Quad);
}
void myDebugPrintDigits(_Quad q, int noOfDigits) {
int i,j,k;
j = 0;
while (q < 1) {
q *= 10;
j--;
}
while (q > 10) {
q /= 10;
j++;
}
i = floor((double)q);
k = 0;
while (q > 0 && k<noOfDigits) {
q -= i;
printf("%d", i);
q *= 10;
i = __floorq(q);
if (k == 0)
printf(".");
k++;
}
printf("e%d",j);
}
int main() {
double a = 0.333333333333333333333333333333333333333333333q;
printf("\n%20s = ","double(1/3)");
myDebugPrintDigits(a, 45);
a *= 2;
printf("\n%20s = ", "double 2*(1/3)");
myDebugPrintDigits(a, 45);
long double b = 0.333333333333333333333333333333333333333333333q;
printf("\n%20s = ", "long double(1/3)");
myDebugPrintDigits(b, 45);
b *= 2;
printf("\n%20s = ", "long double 2*(1/3)");
myDebugPrintDigits(b, 45);
_Quad c = 0.333333333333333333333333333333333333333333333q;
printf("\n%20s = ", "_Quad(1/3)");
myDebugPrintDigits(c, 45);
c *= 2;
printf("\n%20s = ", "_Quad 2*(1/3)");
myDebugPrintDigits(c, 45);
a = log(2.0f);
printf("\n%20s = ", "double(log(2))");
myDebugPrintDigits(a, 45);
b = log(2.0L);
printf("\n%20s = ", "long double(log(2))");
myDebugPrintDigits(b, 45);
b = logl(2.0L);
printf("\n%20s = ", "long double(logl(2))");
myDebugPrintDigits(b, 45);
c = __logq(2.0q);
printf("\n%20s = ", "_Quad(__logq(2))");
myDebugPrintDigits(c, 45);
printf("\n%20s = %s", "correct log(2)","6.93147180559945309417232121458176568075500134e-1");
printf("\n");
printf("\nsizeof(double) = %d", sizeof(double));
printf("\nsizeof(long double) = %d", sizeof(long double));
printf("\nsizeof(_Quad) = %d", sizeof(_Quad));
printf("\n__IMFLONGDOUBLE = %d", __IMFLONGDOUBLE);
return 0;
}