Merge branch 'master' of github.com:emjotde/marian

2024-11-04 14:04:24 +03:00 · 2016-09-16 18:34:45 +02:00 · 2016-09-16 18:34:45 +02:00 · 3f74962df7
commit 3f74962df7
parent 257b621db9 6b7ced99d3
6 changed files with 2398 additions and 1 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -20,3 +20,14 @@ endif(Boost_FOUND)

 include_directories(${marian_SOURCE_DIR}/src)
 add_subdirectory(src)
+
+# add a target to generate API documentation with Doxygen
+find_package(Doxygen)
+if(DOXYGEN_FOUND)
+    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
+    add_custom_target(doc ALL
+        ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+        COMMENT "Generating API documentation with Doxygen" VERBATIM
+    )
+endif(DOXYGEN_FOUND)
--- a/Doxyfile.in
+++ b/Doxyfile.in
--- a/README.md
+++ b/README.md
@ -29,3 +29,6 @@ Compilation with `cmake > 3.5`:
    cmake ..
    make -j

+To compile API documentation using Doxygen, first cd to the build directory, and then:
+
+    make doc
--- a/src/tensor_operators.cu
+++ b/src/tensor_operators.cu
@ -1,5 +1,7 @@
 #include "tensor_operators.h"

+using namespace std;
+
 namespace marian {

 __global__ void gSubtractMean(float* out, float* weights,
@ -53,6 +55,7 @@ void SubtractMean(Tensor* Out, Tensor &Weights) {
  cudaStreamSynchronize(0);
 }

+///////////////////////////////////////////////////////
 __global__ void gSoftMax(float* softMaxP, size_t rows, size_t cols) {
  for(int bid = 0; bid < rows; bid += gridDim.x) {
    int j = bid + blockIdx.x;
@ -97,6 +100,35 @@ void Softmax(Tensor* Out) {
  gSoftMax<<<blocks, threads, shared>>>(Out->data(), m, k);
  cudaStreamSynchronize(0);
 }
+///////////////////////////////////////////////////////
+__global__ void gArgMax(float *out, const float *data, size_t rows, size_t cols) {
+  size_t row = blockIdx.x;
+    size_t startInd = row * cols;
+    float maxScore = -99999;
+    size_t maxInd;
+    for (size_t col = 0; col < cols; ++col) {
+      size_t ind = startInd + col;
+      float score = data[ind];
+      if (score > maxScore) {
+        maxScore = score;
+        maxInd = col;
+      }
+    }
+    out[row] = maxInd;
+}
+
+void Argmax(Tensor* Out, const Tensor* In) {
+  size_t m = In->shape()[0];
+  size_t k = In->shape()[1];
+
+  int blocks = m; //std::min(MAX_BLOCKS, (int) m);
+  int threads = k; //std::min(MAX_THREADS, (int) k);
+  //int shared = sizeof(float) * threads * 2;
+  gArgMax<<<blocks, threads>>>(Out->data(), In->data(), m, k);
+  cudaStreamSynchronize(0);
+}
+
+///////////////////////////////////////////////////////

 Tensor Prod(cublasHandle_t handle, Tensor C, const Tensor A, const Tensor B,
             bool transA, bool transB, Float beta) {
--- a/src/tensor_operators.h
+++ b/src/tensor_operators.h
@ -151,6 +151,10 @@ __global__ void gSoftMax(float* softMaxP, size_t rows, size_t cols);

 void Softmax(Tensor* Out);

+__global__ void gArgMax(float *out, const float *data, size_t rows, size_t cols);
+
+void Argmax(Tensor* Out, const Tensor* In);
+
 Tensor Prod(cublasHandle_t handle, Tensor C, const Tensor A, const Tensor B,
             bool transA, bool transB, Float beta);

--- a/src/test.cu
+++ b/src/test.cu
@ -3,7 +3,51 @@
 #include "mnist.h"
 #include "vocab.h"

+#include "tensor_operators.h"
+
+using namespace std;
+
+///////////////////////////////////////////////////////
+string output(const std::vector<float> &vec)
+{
+  stringstream strm;
+  for (size_t i = 0; i < vec.size(); ++i) {
+  strm << vec[i] << " ";
+  }
+  return strm.str();
+}
+
+void testArgMax()
+{
+  using namespace std;
+  using namespace marian;
+
+  std::vector<float> hVec({29,19,  49,39,  79,99,  79,39});
+        cerr << "hVec =" << output(hVec) << endl;
+
+  thrust::device_vector<float> dVec(8);
+  thrust::copy(hVec.begin(), hVec.end(), dVec.begin());
+  float *data = thrust::raw_pointer_cast(dVec.data());
+
+  thrust::device_vector<float> dLabel(4);
+  float *labelPtr = thrust::raw_pointer_cast(dLabel.data());
+
+  gArgMax<<<4, 1, sizeof(float)>>>(labelPtr, data, 4, 2);
+
+  std::vector<float> hVec2(8);
+  thrust::copy(dVec.begin(), dVec.end(), hVec2.begin());
+  cerr << "hVec2=" << output(hVec2) << endl;
+
+  std::vector<float> hLabel(4);
+  thrust::copy(dLabel.begin(), dLabel.end(), hLabel.begin());
+  cerr << "hLabel=" << output(hLabel) << endl;
+
+  exit(0);
+}
+
+///////////////////////////////////////////////////////
 int main(int argc, char** argv) {
+  //testArgMax();

  using namespace std;
  using namespace marian;