From 7333366551583fdac66bc5f7bed182d82df1c7f1 Mon Sep 17 00:00:00 2001 From: Marcin Junczys-Dowmunt Date: Wed, 20 Apr 2016 12:44:59 +0200 Subject: [PATCH] reorganized files, first attempt at logging --- CMakeLists.txt | 4 ++- src/CMakeLists.txt | 2 +- src/cnpy/CMakeLists.txt | 24 --------------- src/cnpy/README | 37 ---------------------- src/cnpy/example1.cpp | 61 ------------------------------------- src/common/logging.h | 5 +++ src/{ => common}/utf8.h | 0 src/decoder/decoder_main.cu | 32 +++++++++++++------ src/decoder/god.cu | 6 ++-- src/decoder/god.h | 2 ++ src/{ => dl4mt}/dl4mt.h | 0 11 files changed, 35 insertions(+), 138 deletions(-) delete mode 100644 src/cnpy/CMakeLists.txt delete mode 100644 src/cnpy/README delete mode 100644 src/cnpy/example1.cpp create mode 100644 src/common/logging.h rename src/{ => common}/utf8.h (100%) rename src/{ => dl4mt}/dl4mt.h (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6009bcc4..04e0631f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,10 +7,12 @@ LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O3; -a add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM) SET(CUDA_PROPAGATE_HOST_FLAGS OFF) +add_definitions(-DBOOST_LOG_DYN_LINK) + include_directories(${amunn_SOURCE_DIR}) find_package(CUDA REQUIRED) -find_package(Boost COMPONENTS system filesystem program_options timer) +find_package(Boost COMPONENTS system filesystem program_options timer log) if(Boost_FOUND) include_directories(${Boost_INCLUDE_DIRS}) set(EXT_LIBS ${EXT_LIBS} ${Boost_LIBRARIES}) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cf035996..dbdea096 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,9 +1,9 @@ include_directories(.) +include_directories(dl4mt) include_directories(common) include_directories(decoder) include_directories(mblas) -include_directories(dl4mt) add_library(libamunn OBJECT cnpy/cnpy.cpp diff --git a/src/cnpy/CMakeLists.txt b/src/cnpy/CMakeLists.txt deleted file mode 100644 index 5a7cdd30..00000000 --- a/src/cnpy/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR) -if(COMMAND cmake_policy) - cmake_policy(SET CMP0003 NEW) -endif(COMMAND cmake_policy) - -project(CNPY) - -option(ENABLE_STATIC "Build static (.a) library" ON) - -add_library(cnpy SHARED "cnpy.cpp") -target_link_libraries(cnpy z) -install(TARGETS "cnpy" LIBRARY DESTINATION lib PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) - -if(ENABLE_STATIC) - add_library(cnpy-static STATIC "cnpy.cpp") - set_target_properties(cnpy-static PROPERTIES OUTPUT_NAME "cnpy") - install(TARGETS "cnpy-static" ARCHIVE DESTINATION lib) -endif(ENABLE_STATIC) - -install(FILES "cnpy.h" DESTINATION include) -install(FILES "mat2npz" "npy2mat" "npz2mat" DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) - -add_executable(example1 example1.cpp) -target_link_libraries(example1 cnpy) diff --git a/src/cnpy/README b/src/cnpy/README deleted file mode 100644 index 117ca564..00000000 --- a/src/cnpy/README +++ /dev/null @@ -1,37 +0,0 @@ -Purpose: - -Numpy offers the save method for easy saving of arrays into .npy and savez for zipping multiple .npy arrays together into a .npz file. cnpy lets you read and write to these formats in C++. The motivation comes from scientific programming where large amounts of data are generated in C++ and analyzed in Python. Writing to .npy has the advantage of using low-level C++ I/O (fread and fwrite) for speed and binary format for size. The .npy file header takes care of specifying the size, shape, and data type of the array, so specifying the format of the data is unnecessary. Loading data written in numpy formats into C++ is equally simple, but requires you to type-cast the loaded data to the type of your choice. - -Installation: - -Default installation directory is /usr/local. To specify a different directory, add -DCMAKE_INSTALL_PREFIX=/path/to/install/dir to the cmake invocation in step 4. - -1. get cmake at www.cmake.org -2. create a build directory, say $HOME/build -3. cd $HOME/build -4. cmake /path/to/cnpy -5. make -6. make install - -Using: - -To use, #include"cnpy.h" in your source code. Compile the source code mycode.cpp as - -g++ -o mycode mycode.cpp -L/path/to/install/dir -lcnpy - -Description: - -There are two functions for writing data: npy_save, npz_save. - -There are 3 functions for reading. npy_load will load a .npy file. npz_load(fname) will load a .npz and return a dictionary of NpyArray structues. npz_load(fname,varname) will load and return the NpyArray for data varname from the specified .npz file. -Note that NpyArray allocates char* data using new[] and *will not* delete the data upon the NpyArray destruction. You are responsible for delete the data yourself. - -The data structure for loaded data is below. Data is loaded into a a raw byte array. The array shape and word size are read from the npy header. You are responsible for casting/copying the data to its intended data type. - -struct NpyArray { - char* data; - std::vector shape; - unsigned int word_size; -}; - -See example1.cpp for examples of how to use the library. example1 will also be build during cmake installation. diff --git a/src/cnpy/example1.cpp b/src/cnpy/example1.cpp deleted file mode 100644 index 7f1ab5d6..00000000 --- a/src/cnpy/example1.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include"cnpy.h" -#include -#include -#include -#include -#include - -const int Nx = 128; -const int Ny = 64; -const int Nz = 32; - -int main() -{ - //create random data - std::complex* data = new std::complex[Nx*Ny*Nz]; - for(int i = 0;i < Nx*Ny*Nz;i++) data[i] = std::complex(rand(),rand()); - - //save it to file - const unsigned int shape[] = {Nz,Ny,Nx}; - cnpy::npy_save("arr1.npy",data,shape,3,"w"); - - //load it into a new array - cnpy::NpyArray arr = cnpy::npy_load("arr1.npy"); - std::complex* loaded_data = reinterpret_cast*>(arr.data); - - //make sure the loaded data matches the saved data - assert(arr.word_size == sizeof(std::complex)); - assert(arr.shape.size() == 3 && arr.shape[0] == Nz && arr.shape[1] == Ny && arr.shape[2] == Nx); - for(int i = 0; i < Nx*Ny*Nz;i++) assert(data[i] == loaded_data[i]); - - //append the same data to file - //npy array on file now has shape (Nz+Nz,Ny,Nx) - cnpy::npy_save("arr1.npy",data,shape,3,"a"); - - //now write to an npz file - //non-array variables are treated as 1D arrays with 1 element - double myVar1 = 1.2; - char myVar2 = 'a'; - unsigned int shape2[] = {1}; - cnpy::npz_save("out.npz","myVar1",&myVar1,shape2,1,"w"); //"w" overwrites any existing file - cnpy::npz_save("out.npz","myVar2",&myVar2,shape2,1,"a"); //"a" appends to the file we created above - cnpy::npz_save("out.npz","arr1",data,shape,3,"a"); //"a" appends to the file we created above - - //load a single var from the npz file - cnpy::NpyArray arr2 = cnpy::npz_load("out.npz","arr1"); - - //load the entire npz file - cnpy::npz_t my_npz = cnpy::npz_load("out.npz"); - - //check that the loaded myVar1 matches myVar1 - cnpy::NpyArray arr_mv1 = my_npz["myVar1"]; - double* mv1 = reinterpret_cast(arr_mv1.data); - assert(arr_mv1.shape.size() == 1 && arr_mv1.shape[0] == 1); - assert(mv1[0] == myVar1); - - //cleanup: note that we are responsible for deleting all loaded data - delete[] data; - delete[] loaded_data; - arr2.destruct(); - my_npz.destruct(); -} diff --git a/src/common/logging.h b/src/common/logging.h new file mode 100644 index 00000000..3c6b7d14 --- /dev/null +++ b/src/common/logging.h @@ -0,0 +1,5 @@ +#pragma once + +#include + +#define LOG BOOST_LOG_TRIVIAL diff --git a/src/utf8.h b/src/common/utf8.h similarity index 100% rename from src/utf8.h rename to src/common/utf8.h diff --git a/src/decoder/decoder_main.cu b/src/decoder/decoder_main.cu index aa195471..cded42ac 100644 --- a/src/decoder/decoder_main.cu +++ b/src/decoder/decoder_main.cu @@ -4,16 +4,31 @@ #include #include "god.h" +#include "logging.h" #include "search.h" #include "threadpool.h" #include "printer.h" +History TranslationTask(const std::string& in, size_t taskCounter) { + LOG(info) << "Line " << taskCounter + << " (thread " << std::this_thread::get_id() << "): " + << in; + + thread_local std::unique_ptr search; + if(!search) { + LOG(info) << "Created Search for thread " << std::this_thread::get_id(); + search.reset(new Search(taskCounter)); + } + + return search->Decode(God::GetSourceVocab()(in)); +} + int main(int argc, char* argv[]) { God::Init(argc, argv); std::ios_base::sync_with_stdio(false); boost::timer::cpu_timer timer; - std::cerr << "Translating...\n"; + LOG(info) << "Reading input"; std::string in; std::size_t taskCounter = 0; @@ -21,15 +36,12 @@ int main(int argc, char* argv[]) { ThreadPool pool(God::Get("threads")); std::vector> results; while(std::getline(std::cin, in)) { - - auto translationTask = [in, taskCounter] { - thread_local std::unique_ptr search; - if(!search) - search.reset(new Search(taskCounter)); - return search->Decode(God::GetSourceVocab()(in)); - }; - results.emplace_back(pool.enqueue(translationTask)); + results.emplace_back( + pool.enqueue( + [=]{ return TranslationTask(in, taskCounter); } + ) + ); taskCounter++; } @@ -38,7 +50,7 @@ int main(int argc, char* argv[]) { for(auto&& result : results) Printer(result.get(), lineCounter++, std::cout); - std::cerr << timer.format() << std::endl; + LOG(info) << timer.format(); God::CleanUp(); return 0; diff --git a/src/decoder/god.cu b/src/decoder/god.cu index 0c777fdd..2aa0d358 100644 --- a/src/decoder/god.cu +++ b/src/decoder/god.cu @@ -104,7 +104,7 @@ God& God::NonStaticInit(int argc, char** argv) { ThreadPool devicePool(devices.size()); for(auto& modelPath : modelPaths) { for(size_t i = 0; i < devices.size(); ++i) { - std::cerr << "Loading model " << modelPath << " onto gpu" << devices[i] << std::endl; + LOG(info) << "Loading model " << modelPath << " onto gpu" << devices[i]; devicePool.enqueue([i, &devices, &modelPath, this]{ cudaSetDevice(devices[i]); modelsPerDevice_[i].emplace_back(new Weights(modelPath, devices[i])); @@ -114,7 +114,7 @@ God& God::NonStaticInit(int argc, char** argv) { } for(auto& lmPath : lmPaths) { - std::cerr << "Loading lm " << lmPath << std::endl; + LOG(info) << "Loading lm " << lmPath; lms_.emplace_back(lmPath, *targetVocab_); } @@ -124,8 +124,6 @@ God& God::NonStaticInit(int argc, char** argv) { if(weights_.size() < lmPaths.size()) weights_.resize(weights_.size() + lmPaths.size(), 0.0); - std::cerr << "done." << std::endl; - return *this; } diff --git a/src/decoder/god.h b/src/decoder/god.h index 2825abcc..d4f9508c 100644 --- a/src/decoder/god.h +++ b/src/decoder/god.h @@ -7,11 +7,13 @@ #include "dl4mt.h" #include "vocab.h" #include "kenlm.h" +#include "logging.h" namespace po = boost::program_options; class God { public: + static God& Init(const std::string&); static God& Init(int argc, char** argv); diff --git a/src/dl4mt.h b/src/dl4mt/dl4mt.h similarity index 100% rename from src/dl4mt.h rename to src/dl4mt/dl4mt.h