I have a problem related to CUDA. I have to pass Two string arrays to cuda kernel for boyer moore string matching to be done in parallel so I copied them to a 2d char array(with fix length) and pass them to kernel for matching but my code doesn't work correctly
please help me.I don't know how to pass string array as text and pattern to the kernel(pattern match) to work correct. I realy nead your help.thank you
__device__ int max_(int a, int b)
{
return (a > b) ? a : b;
}
//boyer moore pattern matching
_global__ void search(char* txt, char *pat,int*c)
{
int col = blockIdx.x*blockDim.x+threadIdx.x;
int row = blockIdx.y*blockDim.y+threadIdx.y;
for(int r[row+col*Mrow]=0,r[row+col*Mrow]<4001,r[row+col*Mrow]++)
int m[row+col*Mrow] = pat[row+col*Mrow].length();
int n[r[row+col*Mrow]] = txt[r[row+col*Mrow]].length();
int badchar[NO_OF_CHARS];
badCharHeuristic(pat[row+col*Mrow], m[row+col*Mrow], badchar);
int s[row+col*Mrow] = 0;
while (s <= (n[r[row+col*Mrow]] - m[row+col*Mrow]))
{
int j[row+col*Mrow] = m[row+col*Mrow]- 1;
while (j >= 0 && pat[j[row+col*Mrow]] == txt[s[row+col*Mrow] + j[row+col*Mrow]])
j[row+col*Mrow]--;
if (j[row+col*Mrow] < 0)
{
c[row+col*Mrow] =s[row+col*Mrow];
s[row+col*Mrow] += (s[row+col*Mrow] + m[row+col*Mrow] < n[r[row+col*Mrow]]) ? m[row+col*Mrow] - badchar[txt[s[row+col*Mrow] + m[row+col*Mrow]]] : 1;
}
else
s[row+col*Mrow] += max_(1, j[row+col*Mrow] - badchar[txt[s[row+col*Mrow] + j[row+col*Mrow]]]) ;
}
c[row+col*Mrow]= -1;
}
__device__ void badCharHeuristic(char pat[j] , int m[j], int badchar[NO_OF_CHARS])
{
int i[j];
for (i[j] = 0; i[j] < NO_OF_CHARS; i[j]++)
badchar[i[j]] = -1;
for (i[j] = 0; i[j] < m[j]; i[j]++)
badchar[(int)pat[i[j]]] = i[j];
}
define block_size 20
int main(){
cudaEvent_t start,stop;
read_txt();
read_pattern();
//1010:txt length
//800:pattern length
char (*str)[1010] = new char[21][1010];
char (*p)[800]=new char[4000][800];
for (int j = 2; j<21; j++)
strncpy(str[j], all_txt[j].c_str(), 1010);
for (int j = 2; j<4000; j++)
strncpy(str[j], all_pattern[j].c_str(), 800);
const int csize = 21*1010*sizeof(char);
const int csize2 = 4000*800*sizeof(char);
const int size = 4000 * sizeof(int);
char *d_all_txt, *d_all_pattern;
int *d_c;
// Alloc space for device copies of all_txt, all_pattern
cudaMalloc((void **)&d_all_txt, csize);
cudaMalloc((void **)&d_all_pattern, csize1);
cudaMalloc((void **)&d_c, size);
// Copy inputs to device
cudaMemcpy(d_all_txt, &(all_txt), csize, cudaMemcpyHostToDevice);
cudaMemcpy(d_all_pattern, &(all_pattern), csize1, cudaMemcpyHostToDevice);
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start,0);
dim3 dimGrid(Mcol/block_size,Mrow/block_size);
dim3 dimBlock(block_size,block_size);
// Launch search() kernel on GPU with N blocks
search<<<dimGrid, dimBlock>>>(d_all_txt, d_all_pattern, d_c);
cudaEventRecord(stop,0);
cudaEventSynchronize(stop);
float elapsedTime;
cudaEventElapsedTime(&elapsedTime,start,stop);
//printf("time to generate:%3.5f ms\n",elapsedTime);
// Copy result back to host
cudaMemcpy(c,d_c, size, cudaMemcpyDeviceToHost);
// Cleanup
cudaFree(d_all_txt); cudaFree(d_all_pattern); cudaFree(d_c);
//disply answer
printf("c[%d]=%d\n",1,c[1]);
getchar();
return(0);
}
Aucun commentaire:
Enregistrer un commentaire