#include __global__ void race() { // "__shared__": all threads use the same memory location for x. // "volatile": compiler is prevented from optimizing the variable away. volatile __shared__ int x; int tid = threadIdx.x; // Threads write shared variable at the same time. // The value of x is afterwards undefined (either 0 or 1). x = tid; if(x != tid) printf("shared variable was modified by other thread!\n"); } int main() { // Launch kernel with 1 block and 2 threads. race<<<1, 2>>>(); cudaDeviceSynchronize(); cudaError_t err = cudaGetLastError(); if(err != cudaSuccess) { printf("%s\n",cudaGetErrorName(err)); return -1; } return 0; }