jeudi 17 août 2017

how to pass string array to Cuda kernel for string matching

I have a problem related to CUDA. I have to pass Two string arrays to cuda kernel for boyer moore string matching to be done in parallel so I copied them to a 2d char array(with fix length) and pass them to kernel for matching but my code doesn't work correctly

please help me.I don't know how to pass string array as text and pattern to the kernel(pattern match) to work correct. I realy nead your help.thank you

    __device__ int max_(int a, int b)
{ 
    return (a > b) ? a : b; 
}


//boyer moore pattern matching
 _global__  void search(char* txt, char *pat,int*c)
{
int col = blockIdx.x*blockDim.x+threadIdx.x;
int row = blockIdx.y*blockDim.y+threadIdx.y;
    for(int r[row+col*Mrow]=0,r[row+col*Mrow]<4001,r[row+col*Mrow]++)
int m[row+col*Mrow] = pat[row+col*Mrow].length();
    int n[r[row+col*Mrow]] = txt[r[row+col*Mrow]].length();

    int badchar[NO_OF_CHARS];

    badCharHeuristic(pat[row+col*Mrow], m[row+col*Mrow], badchar);

    int s[row+col*Mrow] = 0;
    while (s <= (n[r[row+col*Mrow]] - m[row+col*Mrow]))
    {
        int j[row+col*Mrow] = m[row+col*Mrow]- 1;

        while (j >= 0 && pat[j[row+col*Mrow]] == txt[s[row+col*Mrow] + j[row+col*Mrow]])
            j[row+col*Mrow]--;

        if (j[row+col*Mrow] < 0)
        {
            c[row+col*Mrow] =s[row+col*Mrow];

            s[row+col*Mrow] += (s[row+col*Mrow] + m[row+col*Mrow] < n[r[row+col*Mrow]]) ? m[row+col*Mrow] - badchar[txt[s[row+col*Mrow] + m[row+col*Mrow]]] : 1;
        }

        else
    s[row+col*Mrow] += max_(1, j[row+col*Mrow] - badchar[txt[s[row+col*Mrow] + j[row+col*Mrow]]]) ;
    }
    c[row+col*Mrow]= -1;
}



__device__ void badCharHeuristic(char pat[j] , int m[j], int badchar[NO_OF_CHARS])
{
    int i[j];


    for (i[j] = 0; i[j] < NO_OF_CHARS; i[j]++)
        badchar[i[j]] = -1;


    for (i[j] = 0; i[j] < m[j]; i[j]++)
        badchar[(int)pat[i[j]]] = i[j];
}




    define block_size 20



     int main(){
    cudaEvent_t start,stop;
    read_txt();
    read_pattern();
//1010:txt length
//800:pattern length
    char (*str)[1010] = new char[21][1010];
    char (*p)[800]=new char[4000][800];

    for (int j = 2; j<21; j++)
     strncpy(str[j], all_txt[j].c_str(), 1010);
     for (int j = 2; j<4000; j++)
     strncpy(str[j], all_pattern[j].c_str(), 800);
const int csize = 21*1010*sizeof(char);
const int csize2 = 4000*800*sizeof(char);
const int size = 4000 * sizeof(int);

    char *d_all_txt, *d_all_pattern;
int *d_c;

    // Alloc space for device copies of all_txt, all_pattern

    cudaMalloc((void **)&d_all_txt, csize);
    cudaMalloc((void **)&d_all_pattern, csize1);
     cudaMalloc((void **)&d_c, size);


    // Copy inputs to device

    cudaMemcpy(d_all_txt, &(all_txt), csize, cudaMemcpyHostToDevice);

    cudaMemcpy(d_all_pattern, &(all_pattern), csize1, cudaMemcpyHostToDevice);

    cudaEventCreate(&start);
    cudaEventCreate(&stop);
    cudaEventRecord(start,0);

    dim3 dimGrid(Mcol/block_size,Mrow/block_size);
    dim3 dimBlock(block_size,block_size);

    // Launch search() kernel on GPU with N blocks

    search<<<dimGrid, dimBlock>>>(d_all_txt, d_all_pattern, d_c);

    cudaEventRecord(stop,0);
    cudaEventSynchronize(stop);
    float elapsedTime;
    cudaEventElapsedTime(&elapsedTime,start,stop);

    //printf("time to generate:%3.5f ms\n",elapsedTime);
    // Copy result back to host

    cudaMemcpy(c,d_c, size, cudaMemcpyDeviceToHost);
    // Cleanup
    cudaFree(d_all_txt); cudaFree(d_all_pattern); cudaFree(d_c);

    //disply answer

    printf("c[%d]=%d\n",1,c[1]);
    getchar();
    return(0);
    }

Aucun commentaire:

Enregistrer un commentaire