mirror of
https://github.com/marian-nmt/marian.git
synced 2024-11-03 20:13:47 +03:00
Fix to resolve run time failures for FastOpt enabled WASM builds (#779)
* copy changes from commit 4df92f2 * add comments for better understanding * restore the newline at the end of file and add this changes in changelog.md
This commit is contained in:
parent
c1c4af08a9
commit
737f43014a
@ -53,6 +53,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
||||
- Added default "none" for option shuffle in BatchGenerator, so that it works in executables where shuffle is not an option.
|
||||
- Added a few missing header files in shortlist.h and beam_search.h.
|
||||
- Improved handling for receiving SIGTERM during training. By default, SIGTERM triggers 'save (now) and exit'. Prior to this fix, batch pre-fetching did not check for this sigal, potentially delaying exit considerably. It now pays attention to that. Also, the default behaviour of save-and-exit can now be disabled on the command line with --sigterm exit-immediately.
|
||||
- Fix the runtime failures for FASTOPT on 32-bit builds (wasm just happens to be 32-bit) because it uses hashing with an inconsistent mix of uint64_t and size_t.
|
||||
|
||||
### Changed
|
||||
- Updated SentencePiece repository to version 8336bbd0c1cfba02a879afe625bf1ddaf7cd93c5 from https://github.com/google/sentencepiece.
|
||||
|
@ -38,9 +38,10 @@ inline constexpr uint64_t crc(const char* const str) noexcept {
|
||||
/*****************************************************************************/
|
||||
|
||||
// PerfectHash constructs a perfect hash for a set K of n numeric keys. The size of
|
||||
// the hash is m > n (not much larger) and n << max(K) (much smaller). If I am not wrong m
|
||||
// is the next power of 2 larger than n? We then build an array of size m with n fields defined.
|
||||
// m - n fields stay undefined (a bit of waste).
|
||||
// the hash is m > n (not much larger) and n << max(K) (much smaller). The output array size is
|
||||
// determined by PHF::init in "src/3rd_party/phf/phf.h". m - n fields stay undefined (a bit of waste).
|
||||
|
||||
// Wrapper class for the 3rd-party library in "src/3rd_party/phf"
|
||||
class PerfectHash {
|
||||
private:
|
||||
phf phf_;
|
||||
@ -62,10 +63,12 @@ public:
|
||||
PHF::destroy(&phf_);
|
||||
}
|
||||
|
||||
// subscript operator [] overloading: if the key is uint64_t, return the hash code directly
|
||||
uint32_t operator[](const uint64_t& key) const {
|
||||
return PHF::hash<uint64_t>(const_cast<phf*>(&phf_), key);
|
||||
}
|
||||
|
||||
// If the key is a string, return the hash code for the string's CRC code
|
||||
uint32_t operator[](const char* const keyStr) const {
|
||||
return (*this)[crc::crc(keyStr)];
|
||||
}
|
||||
@ -109,6 +112,9 @@ private:
|
||||
|
||||
public:
|
||||
// Node types for FastOpt, seem to be enough to cover YAML:NodeType
|
||||
// Multi-element types include "Sequence" and "Map"
|
||||
// "Sequence" is implemented with STL vectors
|
||||
// "Map" is implemented with a 3rd-party PHF library (see the PerfectHash class)
|
||||
enum struct NodeType {
|
||||
Null, Bool, Int64, Float64, String, Sequence, Map
|
||||
};
|
||||
@ -126,6 +132,7 @@ private:
|
||||
size_t elements_{0}; // Number of elements if isMap or isSequence is true, 0 otherwise.
|
||||
|
||||
// Used to find elements if isSequence() is true.
|
||||
// Retrieve the entry using array indexing.
|
||||
inline const std::unique_ptr<const FastOpt>& arrayLookup(size_t keyId) const {
|
||||
if(keyId < array_.size())
|
||||
return array_[keyId];
|
||||
@ -134,13 +141,15 @@ private:
|
||||
}
|
||||
|
||||
// Used to find elements if isMap() is true.
|
||||
inline const std::unique_ptr<const FastOpt>& phLookup(size_t keyId) const {
|
||||
// Retrieve the entry from the hash table.
|
||||
inline const std::unique_ptr<const FastOpt>& phLookup(uint64_t keyId) const {
|
||||
if(ph_)
|
||||
return array_[(*ph_)[keyId]];
|
||||
else
|
||||
return uniqueNullPtr;
|
||||
}
|
||||
|
||||
// Builders for different types of nodes.
|
||||
// Build Null node.
|
||||
void makeNull() {
|
||||
elements_ = 0;
|
||||
@ -221,7 +230,7 @@ private:
|
||||
type_ = NodeType::Map;
|
||||
}
|
||||
|
||||
// Build a Map node, uses std::string as key, which gets hashed to size_t and used in the function above.
|
||||
// Build a Map node, uses std::string as key, which gets hashed to uint64_t and used in the function above.
|
||||
void makeMap(const std::map<std::string, YAML::Node>& m) {
|
||||
std::map<uint64_t, YAML::Node> mi;
|
||||
for(const auto& it : m) {
|
||||
@ -263,13 +272,14 @@ private:
|
||||
|
||||
public:
|
||||
// Constructor to recursively create a FastOpt object from a YAML::Node following the yaml structure.
|
||||
FastOpt(const YAML::Node& node)
|
||||
FastOpt(const YAML::Node& node)
|
||||
{ construct(node); }
|
||||
|
||||
FastOpt(const YAML::Node& node, uint64_t fingerprint)
|
||||
FastOpt(const YAML::Node& node, uint64_t fingerprint)
|
||||
: fingerprint_{fingerprint}
|
||||
{ construct(node); }
|
||||
|
||||
// Predicates for node types
|
||||
bool isSequence() const {
|
||||
return type_ == NodeType::Sequence;
|
||||
}
|
||||
@ -279,20 +289,20 @@ public:
|
||||
}
|
||||
|
||||
bool isScalar() const {
|
||||
return type_ == NodeType::Bool
|
||||
|| type_ == NodeType::Float64
|
||||
|| type_ == NodeType::Int64
|
||||
return type_ == NodeType::Bool
|
||||
|| type_ == NodeType::Float64
|
||||
|| type_ == NodeType::Int64
|
||||
|| type_ == NodeType::String;
|
||||
}
|
||||
|
||||
bool isNull() const {
|
||||
return type_ == NodeType::Null;
|
||||
}
|
||||
}
|
||||
|
||||
bool isInt() const {
|
||||
return type_ == NodeType::Int64;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool isBool() const {
|
||||
return type_ == NodeType::Bool;
|
||||
}
|
||||
@ -318,11 +328,11 @@ public:
|
||||
std::swap(array_, other.array_);
|
||||
std::swap(type_, other.type_);
|
||||
std::swap(elements_, other.elements_);
|
||||
// leave fingerprint alone as it needed by parent node.
|
||||
// leave fingerprint alone as it needed by parent node.
|
||||
}
|
||||
|
||||
// Is the hashed key in a map?
|
||||
bool has(size_t keyId) const {
|
||||
// Is the hashed key in a map?
|
||||
bool has(uint64_t keyId) const {
|
||||
if(isMap() && elements_ > 0) {
|
||||
const auto& ptr = phLookup(keyId);
|
||||
return ptr ? ptr->fingerprint_ == keyId : false;
|
||||
@ -346,27 +356,27 @@ public:
|
||||
}
|
||||
|
||||
// access sequence or map element
|
||||
const FastOpt& operator[](size_t keyId) const {
|
||||
const FastOpt& operator[](uint64_t keyId) const {
|
||||
if(isSequence()) {
|
||||
const auto& ptr = arrayLookup(keyId);
|
||||
const auto& ptr = arrayLookup((size_t)keyId);
|
||||
ABORT_IF(!ptr, "Unseen key {}" , keyId);
|
||||
return *ptr;
|
||||
} else if(isMap()) {
|
||||
const auto& ptr = phLookup(keyId);
|
||||
ABORT_IF(!ptr || ptr->fingerprint_ != keyId, "Unseen key {}", keyId);
|
||||
return *ptr;
|
||||
return *ptr;
|
||||
} else {
|
||||
ABORT("Not a sequence or map node");
|
||||
}
|
||||
}
|
||||
|
||||
// operator [] overloading for non-uint64_t keys
|
||||
const FastOpt& operator[](int key) const {
|
||||
return operator[]((size_t)key);
|
||||
return operator[]((uint64_t)key);
|
||||
}
|
||||
|
||||
const FastOpt& operator[](const char* const key) const {
|
||||
// MacOS requires explicit cast to size_t before we can use it.
|
||||
return operator[]((size_t)crc::crc(key));
|
||||
return operator[](crc::crc(key));
|
||||
}
|
||||
|
||||
const FastOpt& operator[](const std::string& key) const {
|
||||
|
Loading…
Reference in New Issue
Block a user