mirror of
https://github.com/marian-nmt/marian.git
synced 2024-09-17 09:47:34 +03:00
Merge branch 'master' of https://github.com/emjotde/Marian
This commit is contained in:
commit
9015577101
@ -11,14 +11,15 @@ Installation
|
||||
|
||||
Requirements:
|
||||
|
||||
* g++ with C++14
|
||||
* g++ with c++11
|
||||
* CUDA and CuDNN
|
||||
* Boost (>= 1.56)
|
||||
|
||||
Exporting some paths for CuDNN may be required (put it, for example, in your `.bashrc` file):
|
||||
|
||||
export PATH=$PATH:$HOME/.local/bin:/usr/local/cuda/bin
|
||||
export LIBRARY_PATH=$LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cudnn-5/lib64
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cudnn-5/lib64
|
||||
export LIBRARY_PATH=$LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/local/cudnn-5/lib64
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/local/cudnn-5/lib64
|
||||
export CPATH=$CPATH:/usr/local/cudnn-5/include
|
||||
|
||||
Compilation with `cmake > 3.5`:
|
||||
|
@ -56,11 +56,11 @@
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
<fileInfo id="com.nvidia.cuda.ide.seven_five.configuration.debug.1479727693.924444438" name="train_mnist.cu" rcbsApplicability="disable" resourcePath="src/train_mnist.cu" toolsToInvoke="nvcc.compiler.base.1979453423.2078504098">
|
||||
<tool id="nvcc.compiler.base.1979453423.2078504098" name="NVCC Compiler" superClass="nvcc.compiler.base.1979453423"/>
|
||||
<fileInfo id="com.nvidia.cuda.ide.seven_five.configuration.debug.1479727693.843925199" name="validate_mnist_batch.cu" rcbsApplicability="disable" resourcePath="src/validate_mnist_batch.cu" toolsToInvoke="nvcc.compiler.base.1979453423.378728796">
|
||||
<tool id="nvcc.compiler.base.1979453423.378728796" name="NVCC Compiler" superClass="nvcc.compiler.base.1979453423"/>
|
||||
</fileInfo>
|
||||
<sourceEntries>
|
||||
<entry excluding="src/train_mnist.cu|src/validate_mnist.cu|src/npz_converter.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
<entry excluding="src/validate_mnist_batch.cu|src/train_mnist.cu|src/validate_mnist.cu|src/npz_converter.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
</sourceEntries>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
|
@ -5,6 +5,7 @@ cuda_add_library(marian_lib
|
||||
cnpy/cnpy.cpp
|
||||
exception.cpp
|
||||
expressions.cu
|
||||
sgd.cu
|
||||
tensor.cu
|
||||
tensor_operators.cu
|
||||
)
|
||||
|
143
src/sgd.cu
Normal file
143
src/sgd.cu
Normal file
@ -0,0 +1,143 @@
|
||||
#include <ctime>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include "sgd.h"
|
||||
#include "thrust_functions.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace marian {
|
||||
SGD::SGD(Expr& cost_func, Expr& inX, Expr& inY,
|
||||
const std::vector<Expr*> params, float eta,
|
||||
std::vector<float>& xData, size_t numFeatures,
|
||||
std::vector<float>& yData, size_t numClasses,
|
||||
size_t epochs, size_t batchSize)
|
||||
: cost_function_(&cost_func),
|
||||
inX_(&inX),
|
||||
inY_(&inY),
|
||||
params_(params),
|
||||
eta_(eta),
|
||||
xData_(xData),
|
||||
numFeatures_(numFeatures),
|
||||
yData_(yData),
|
||||
numClasses_(numClasses),
|
||||
epochs_(epochs),
|
||||
maxBatchSize_(batchSize)
|
||||
{}
|
||||
|
||||
void SGD::Run()
|
||||
{
|
||||
std::srand ( unsigned ( std::time(0) ) );
|
||||
|
||||
size_t numExamples = xData_.size()/ numFeatures_;
|
||||
Tensor xt({(int)maxBatchSize_, (int)numExamples}, 0.0f);
|
||||
Tensor yt({(int)maxBatchSize_, (int)numClasses_}, 0.0f);
|
||||
|
||||
vector<size_t> shuffle = CreateShuffle(numExamples);
|
||||
//vector<size_t> shuffle;
|
||||
|
||||
for (size_t numEpoch = 0; numEpoch < epochs_; ++numEpoch) {
|
||||
std::cerr << "Starting epoch #" << numEpoch << std::endl;
|
||||
size_t startId = 0;
|
||||
size_t endId = startId + maxBatchSize_;
|
||||
|
||||
while (endId < numExamples) {
|
||||
PrepareBatch(startId, endId, maxBatchSize_, shuffle, xt, yt);
|
||||
*inX_ = xt;
|
||||
*inY_ = yt;
|
||||
|
||||
cost_function_->forward(maxBatchSize_);
|
||||
cost_function_->backward();
|
||||
|
||||
UpdateModel();
|
||||
|
||||
startId += maxBatchSize_;
|
||||
endId += maxBatchSize_;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<size_t> SGD::CreateShuffle(size_t numExamples) const {
|
||||
vector<size_t> ret(numExamples);
|
||||
std::iota(ret.begin(), ret.end(), 0);
|
||||
std::random_shuffle ( ret.begin(), ret.end() );
|
||||
/*
|
||||
cerr << "shuffled" << endl;
|
||||
for (size_t i = 0; i < ret.size(); ++i) {
|
||||
cerr << ret[i] << " ";
|
||||
}
|
||||
*/
|
||||
return ret;
|
||||
}
|
||||
|
||||
void SGD::PrepareBatch(
|
||||
size_t startId,
|
||||
size_t endId,
|
||||
size_t batchSize,
|
||||
const std::vector<size_t> &shuffle,
|
||||
Tensor& xt,
|
||||
Tensor& yt) {
|
||||
/*
|
||||
std::vector<float> x(xData_.begin() + startId * numFeatures_,
|
||||
xData_.begin() + endId * numFeatures_);
|
||||
std::vector<float> y(yData_.begin() + startId * numClasses_,
|
||||
yData_.begin() + endId * numClasses_);
|
||||
*/
|
||||
std::vector<float> x(batchSize * numFeatures_);
|
||||
std::vector<float> y(batchSize * numClasses_);
|
||||
|
||||
/*
|
||||
cerr << "startId=" << startId
|
||||
<< " " << endId
|
||||
<< " " << batchSize
|
||||
<< endl;
|
||||
cerr << "numExamples=" << shuffle.size() << endl;
|
||||
cerr << "numFeatures_=" << numFeatures_ << " " << numClasses_ << endl;
|
||||
cerr << "sizes=" << x.size()
|
||||
<< " " << y.size()
|
||||
<< " " << xData_.size()
|
||||
<< " " << yData_.size()
|
||||
<< endl;
|
||||
*/
|
||||
size_t startXId = 0;
|
||||
size_t startYId = 0;
|
||||
|
||||
for (size_t i = startId; i < endId; ++i) {
|
||||
size_t ind = shuffle[i];
|
||||
size_t startXDataId = ind * numFeatures_;
|
||||
size_t startYDataId = ind * numClasses_;
|
||||
|
||||
size_t endXDataId = startXDataId + numFeatures_;
|
||||
size_t endYDataId = startYDataId + numClasses_;
|
||||
/*
|
||||
cerr << "i=" << i
|
||||
<< " " << ind
|
||||
<< " " << startXDataId << "-" << endXDataId
|
||||
<< " " << startYDataId << "-" << endYDataId
|
||||
<< endl;
|
||||
*/
|
||||
std::copy(xData_.begin() + startXDataId,
|
||||
xData_.begin() + endXDataId,
|
||||
x.begin() + startXId);
|
||||
|
||||
std::copy(yData_.begin() + startYDataId,
|
||||
yData_.begin() + endYDataId,
|
||||
y.begin() + startYId);
|
||||
|
||||
startXId += numFeatures_;
|
||||
startYId += numClasses_;
|
||||
}
|
||||
|
||||
xt.set(x);
|
||||
yt.set(y);
|
||||
}
|
||||
|
||||
void SGD::UpdateModel() {
|
||||
for (auto& param : params_) {
|
||||
using namespace thrust::placeholders;
|
||||
Element(_1 = _1 - eta_ * _2, param->val(), param->grad());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
77
src/sgd.h
77
src/sgd.h
@ -5,6 +5,7 @@
|
||||
|
||||
#include "expressions.h"
|
||||
#include "thrust_functions.h"
|
||||
#include "tensor_operators.h"
|
||||
|
||||
namespace marian {
|
||||
|
||||
@ -14,67 +15,14 @@ class SGD {
|
||||
const std::vector<Expr*> params, float eta,
|
||||
std::vector<float>& xData, size_t numFeatures,
|
||||
std::vector<float>& yData, size_t numClasses,
|
||||
size_t epochs, size_t batchSize)
|
||||
: cost_function_(&cost_func),
|
||||
inX_(&inX),
|
||||
inY_(&inY),
|
||||
params_(params),
|
||||
eta_(eta),
|
||||
xData_(xData),
|
||||
numFeatures_(numFeatures),
|
||||
yData_(yData),
|
||||
numClasses_(numClasses),
|
||||
epochs_(epochs),
|
||||
batchSize_(batchSize)
|
||||
{}
|
||||
size_t epochs, size_t batchSize);
|
||||
|
||||
void Run() {
|
||||
size_t numExamples = xData_.size()/ numFeatures_;
|
||||
Tensor xt({(int)batchSize_, (int)numExamples}, 0.0f);
|
||||
Tensor yt({(int)batchSize_, (int)numClasses_}, 0.0f);
|
||||
|
||||
for (size_t numEpoch = 0; numEpoch < epochs_; ++numEpoch) {
|
||||
std::cerr << "Starting epoch #" << numEpoch << std::endl;
|
||||
size_t startId = 0;
|
||||
size_t endId = startId + batchSize_;
|
||||
|
||||
while (endId < numExamples) {
|
||||
PrepareBatch(startId, endId, xt, yt);
|
||||
*inX_ = xt;
|
||||
*inY_ = yt;
|
||||
|
||||
cost_function_->forward(batchSize_);
|
||||
cost_function_->backward();
|
||||
|
||||
UpdateModel();
|
||||
|
||||
startId += batchSize_;
|
||||
endId += batchSize_;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PrepareBatch(size_t startId, size_t endId, Tensor& xt, Tensor& yt) {
|
||||
std::vector<float> x(xData_.begin() + startId * numFeatures_,
|
||||
xData_.begin() + endId * numFeatures_);
|
||||
std::vector<float> y(yData_.begin() + startId * numClasses_,
|
||||
yData_.begin() + endId * numClasses_);
|
||||
|
||||
xt.set(x);
|
||||
yt.set(y);
|
||||
}
|
||||
|
||||
void UpdateModel() {
|
||||
for (auto& param : params_) {
|
||||
using namespace thrust::placeholders;
|
||||
Element(_1 = _1 - eta_ * _2, param->val(), param->grad());
|
||||
}
|
||||
}
|
||||
void Run();
|
||||
|
||||
private:
|
||||
std::shared_ptr<Expr> cost_function_;
|
||||
std::shared_ptr<Expr> inX_;
|
||||
std::shared_ptr<Expr> inY_;
|
||||
Expr *cost_function_;
|
||||
Expr *inX_;
|
||||
Expr *inY_;
|
||||
std::vector<Expr*> params_;
|
||||
const float eta_;
|
||||
std::vector<float>& xData_;
|
||||
@ -82,7 +30,18 @@ class SGD {
|
||||
std::vector<float>& yData_;
|
||||
const size_t numClasses_;
|
||||
const size_t epochs_;
|
||||
const size_t batchSize_;
|
||||
const size_t maxBatchSize_;
|
||||
|
||||
std::vector<size_t> CreateShuffle(size_t numExamples) const;
|
||||
void PrepareBatch(
|
||||
size_t startId,
|
||||
size_t endId,
|
||||
size_t batchSize,
|
||||
const std::vector<size_t> &shuffle,
|
||||
Tensor& xt,
|
||||
Tensor& yt);
|
||||
|
||||
void UpdateModel();
|
||||
};
|
||||
|
||||
} // namespace marian
|
||||
|
@ -21,7 +21,7 @@ int main(int argc, char** argv) {
|
||||
std::cerr << "Done." << std::endl;
|
||||
|
||||
std::cerr << "Loading model params...";
|
||||
NpzConverter converter("../scripts/test_model/model.npz");
|
||||
NpzConverter converter("../scripts/test_model_single/model.npz");
|
||||
|
||||
std::vector<float> wData, bData;
|
||||
Shape wShape, bShape;
|
||||
|
@ -21,22 +21,39 @@ int main(int argc, char** argv) {
|
||||
std::cerr << "\tDone." << std::endl;
|
||||
|
||||
std::cerr << "Loading model params...";
|
||||
NpzConverter converter("../scripts/test_model/model.npz");
|
||||
|
||||
std::vector<float> wData;
|
||||
Shape wShape;
|
||||
converter.Load("weights", wData, wShape);
|
||||
NpzConverter converter("../scripts/test_model_single/model.npz");
|
||||
|
||||
std::vector<float> bData;
|
||||
Shape bShape;
|
||||
converter.Load("bias", bData, bShape);
|
||||
std::vector<float> wData1;
|
||||
Shape wShape1;
|
||||
converter.Load("weights1", wData1, wShape1);
|
||||
|
||||
std::vector<float> bData1;
|
||||
Shape bShape1;
|
||||
converter.Load("bias1", bData1, bShape1);
|
||||
|
||||
std::vector<float> wData2;
|
||||
Shape wShape2;
|
||||
converter.Load("weights2", wData2, wShape2);
|
||||
|
||||
std::vector<float> bData2;
|
||||
Shape bShape2;
|
||||
converter.Load("bias2", bData2, bShape2);
|
||||
|
||||
auto initW = [wData](Tensor t) {
|
||||
t.set(wData);
|
||||
auto initW1 = [wData1](Tensor t) {
|
||||
t.set(wData1);
|
||||
};
|
||||
|
||||
auto initB = [bData](Tensor t) {
|
||||
t.set(bData);
|
||||
auto initB1 = [bData1](Tensor t) {
|
||||
t.set(bData1);
|
||||
};
|
||||
|
||||
auto initW2 = [wData2](Tensor t) {
|
||||
t.set(wData2);
|
||||
};
|
||||
|
||||
auto initB2 = [bData2](Tensor t) {
|
||||
t.set(bData2);
|
||||
};
|
||||
|
||||
std::cerr << "\tDone." << std::endl;
|
||||
@ -45,11 +62,15 @@ int main(int argc, char** argv) {
|
||||
auto x = input(shape={whatevs, IMAGE_SIZE}, name="X");
|
||||
auto y = input(shape={whatevs, LABEL_SIZE}, name="Y");
|
||||
|
||||
auto w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0", init=initW);
|
||||
auto b = param(shape={1, LABEL_SIZE}, name="b0", init=initB);
|
||||
auto w1 = param(shape={IMAGE_SIZE, 100}, name="W0", init=initW1);
|
||||
auto b1 = param(shape={1, 100}, name="b0", init=initB1);
|
||||
auto w2 = param(shape={100, LABEL_SIZE}, name="W1", init=initW2);
|
||||
auto b2 = param(shape={1, LABEL_SIZE}, name="b1", init=initB2);
|
||||
|
||||
std::cerr << "Building model...";
|
||||
auto predict = softmax(dot(x, w) + b, axis=1, name="pred");
|
||||
auto layer1 = tanh(dot(x, w1) + b1);
|
||||
auto layer2 = softmax(dot(layer1, w2) + b2, axis=1, name="layer2");
|
||||
auto predict = layer2;
|
||||
|
||||
std::cerr << "Done." << std::endl;
|
||||
|
||||
@ -77,6 +98,7 @@ int main(int argc, char** argv) {
|
||||
if (testLabels[startId * LABEL_SIZE + i + j]) correct = j;
|
||||
if (results[i + j] > results[i + predicted]) predicted = j;
|
||||
}
|
||||
/*std::cerr << "CORRECT: " << correct << " PREDICTED: " << predicted << std::endl;*/
|
||||
acc += (correct == predicted);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user