#include #include #include __global__ void median(int* arraydevice, int length) { // sharedmem points to a memoryblock shared by all threads of the block. // Its size in bytes is not fixed at compile time (dynamic). extern __shared__ int sharedmem[]; int nthreads = blockDim.x; int tid = threadIdx.x; int i,a,b,c,tmp; // Copy arraydevice to sharedmem and synchronize. for(i=tid; i b) { tmp=a; a=b; b=tmp; } if(a > c) { c=a; } if(b > c) { b=c; } arraydevice[i] = b; } } int main() { int *arrayhost, *arraydevice; int length = 10; int nthreads = 3; int size = length*sizeof(int); int i; arrayhost = (int*)malloc(size); cudaMalloc(&arraydevice,size); srand((unsigned int)clock()); for(i=0; i>>(arraydevice,length); cudaDeviceSynchronize(); cudaMemcpy(arrayhost, arraydevice, size, cudaMemcpyDeviceToHost); printf("\narray after filter\n"); for(int i=0; i