Dear all,
the following code:
#include <iostream>
#include <unistd.h>
#include <pthread.h>
class Test {
public:
Test() : fDone(false) { }
~Test() {}
bool IsDone() const { return fDone; }
void SetDone() { fDone = true; }
private:
bool fDone;
};
void* run(void* data) {
// input data
Test* t = static_cast<Test*>(data);
// sleep for 1 second
usleep(1000*1000);
// thread is done
t->SetDone();
std::cout << t->IsDone() << std::endl;
std::cout << "Thread finished"<< std::endl;
return 0;
}
int main(int argc, char** argv) {
Test t;
pthread_t thread;
pthread_create(&thread, NULL, &run, &t);
while (!t.IsDone()) {
usleep(100*1000);
}
std::cout << "never reached"<< std::endl;
void* status;
pthread_join(thread, &status);
return 0;
}
Does not run as expected with Intel Compiler 2019, Update 1 on Linux x86_64. The loop in the main function never finishes, even though the auxiliary thread sets 't.IsDone()' to true after 1 second.
The code is compiled using:
icpc -O2 -g -fPIC -pthread -o bug bug.cc
The bug does not occur if compiled with -00 or -O1. It also doesn't occur when the loop is modified as follows:
while (!t.IsDone()) {
std::cout << "..."<< std::endl;
usleep(100*1000);
}
The loop in question is actually translated by the compiler into this piece of assembly:
Dump of assembler code for function main:
0x0000000000401280 <+0>: push %rbp
0x0000000000401281 <+1>: mov %rsp,%rbp
0x0000000000401284 <+4>: and $0xffffffffffffff80,%rsp
0x0000000000401288 <+8>: sub $0x80,%rsp
0x000000000040128f <+15>: xor %esi,%esi
0x0000000000401291 <+17>: mov $0x3,%edi
0x0000000000401296 <+22>: callq 0x4013d0 <__intel_new_feature_proc_init>
0x000000000040129b <+27>: stmxcsr 0x8(%rsp)
0x00000000004012a0 <+32>: xor %esi,%esi
0x00000000004012a2 <+34>: lea (%rsp),%rdi
0x00000000004012a6 <+38>: orl $0x8040,0x8(%rdi)
0x00000000004012ad <+45>: lea 0x10(%rsp),%rcx
0x00000000004012b2 <+50>: lea 0x57(%rip),%rdx # 0x401310 <run(void*)>
0x00000000004012b9 <+57>: ldmxcsr -0x8(%rcx)
0x00000000004012bd <+61>: movb $0x0,(%rcx)
0x00000000004012c0 <+64>: callq 0x4010a0 <pthread_create@plt>
0x00000000004012c5 <+69>: cmpb $0x0,0x10(%rsp)
0x00000000004012ca <+74>: jne 0x4012d8 <main+88>
0x00000000004012cc <+76>: mov $0x186a0,%edi
0x00000000004012d1 <+81>: callq 0x401130 <usleep@plt>
=> 0x00000000004012d6 <+86>: jmp 0x4012cc <main+76>
0x00000000004012d8 <+88>: lea 0x1d25(%rip),%rsi # 0x403004
0x00000000004012df <+95>: mov 0x4cd2(%rip),%rdi # 0x405fb8
0x00000000004012e6 <+102>: callq 0x4010c0 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@plt>
0x00000000004012eb <+107>: mov %rax,%rdi
0x00000000004012ee <+110>: mov 0x4cbb(%rip),%rsi # 0x405fb0
0x00000000004012f5 <+117>: callq 0x4010d0 <_ZNSolsEPFRSoS_E@plt>
0x00000000004012fa <+122>: mov (%rsp),%rdi
0x00000000004012fe <+126>: lea 0x8(%rsp),%rsi
0x0000000000401303 <+131>: callq 0x401070 <pthread_join@plt>
0x0000000000401308 <+136>: xor %eax,%eax
0x000000000040130a <+138>: mov %rbp,%rsp
0x000000000040130d <+141>: pop %rbp
0x000000000040130e <+142>: retq
0x000000000040130f <+143>: nop
In particular, note the infinite loop marked in bold, which never checks the status of IsDone() at all and just runs usleep forever.
This bug was not present in older Intel Compiler releases, I think 2019 is the first release that introduced this issue, but I am not 100% sure.