From 4ea1156e99ec2f37e64569a524593e8b71be954e Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 16 Sep 2016 17:05:51 +0200 Subject: [PATCH] try to use parallelism --- src/test.cu | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/test.cu b/src/test.cu index 4ab86eb3..03a91abb 100644 --- a/src/test.cu +++ b/src/test.cu @@ -25,6 +25,22 @@ __global__ void gArgMax(float* arr, size_t rows, size_t cols) { } } +__global__ void gArgMax2(float* arr, size_t rows, size_t cols) { + size_t row = blockIdx.x; + size_t startInd = row * cols; + float maxScore = -99999; + size_t maxInd = -1; + for (size_t col = 0; col < cols; ++col) { + size_t ind = startInd + col; + float score = arr[ind]; + if (score > maxScore) { + maxScore = score; + maxInd = col; + } + } + arr[startInd] = maxInd; +} + string output(const std::vector &vec) { stringstream strm; @@ -46,7 +62,8 @@ void temp() thrust::copy(hVec.begin(), hVec.end(), dVec.begin()); float *data = thrust::raw_pointer_cast(dVec.data()); - gArgMax<<<10, 20, sizeof(float)>>>(data, 4, 2); + //gArgMax<<<10, 20, sizeof(float)>>>(data, 4, 2); + gArgMax2<<<10, 20, sizeof(float)>>>(data, 4, 2); std::vector hVec2(8); thrust::copy(dVec.begin(), dVec.end(), hVec2.begin());