From 7d9013a85b39b9f2c66ba8fae24dd67ccb5ee2c2 Mon Sep 17 00:00:00 2001 From: Marcin Junczys-Dowmunt Date: Mon, 19 Jan 2015 23:15:08 +0100 Subject: [PATCH] Work-around for temporary translation option collection size during phrase table binarization --- .../CompactPT/PhraseTableCreator.cpp | 6 +- .../CompactPT/PhraseTableCreator.h | 7 +- .../CompactPT/StringVectorTemp.h | 430 ++++++++++++++++++ 3 files changed, 437 insertions(+), 6 deletions(-) create mode 100644 moses/TranslationModel/CompactPT/StringVectorTemp.h diff --git a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp index b3f3c995b..6de3340d1 100644 --- a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp +++ b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp @@ -112,9 +112,9 @@ PhraseTableCreator::PhraseTableCreator(std::string inPath, if(tempfilePath.size()) { MmapAllocator allocEncoded(util::FMakeTemp(tempfilePath)); - m_encodedTargetPhrases = new StringVector(allocEncoded); + m_encodedTargetPhrases = new StringVectorTemp(allocEncoded); } else { - m_encodedTargetPhrases = new StringVector(); + m_encodedTargetPhrases = new StringVectorTemp(); } EncodeTargetPhrases(); @@ -1210,7 +1210,7 @@ size_t CompressionTask::m_collectionNum = 0; boost::mutex CompressionTask::m_mutex; #endif -CompressionTask::CompressionTask(StringVector& encodedCollections, PhraseTableCreator& creator) : m_encodedCollections(encodedCollections), m_creator(creator) {} diff --git a/moses/TranslationModel/CompactPT/PhraseTableCreator.h b/moses/TranslationModel/CompactPT/PhraseTableCreator.h index a8aa0e81a..f63a4f61f 100644 --- a/moses/TranslationModel/CompactPT/PhraseTableCreator.h +++ b/moses/TranslationModel/CompactPT/PhraseTableCreator.h @@ -35,6 +35,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "BlockHashIndex.h" #include "StringVector.h" +#include "StringVectorTemp.h" #include "CanonicalHuffman.h" namespace Moses @@ -237,7 +238,7 @@ private: std::vector m_lexicalTableIndex; std::vector m_lexicalTable; - StringVector* + StringVectorTemp* m_encodedTargetPhrases; StringVector* @@ -396,12 +397,12 @@ private: static boost::mutex m_mutex; #endif static size_t m_collectionNum; - StringVector& + StringVectorTemp& m_encodedCollections; PhraseTableCreator& m_creator; public: - CompressionTask(StringVector& + CompressionTask(StringVectorTemp& encodedCollections, PhraseTableCreator& creator); void operator()(); }; diff --git a/moses/TranslationModel/CompactPT/StringVectorTemp.h b/moses/TranslationModel/CompactPT/StringVectorTemp.h new file mode 100644 index 000000000..ffac0b718 --- /dev/null +++ b/moses/TranslationModel/CompactPT/StringVectorTemp.h @@ -0,0 +1,430 @@ +// $Id$ +// vim:tabstop=2 +/*********************************************************************** +Moses - factored phrase-based language decoder +Copyright (C) 2006 University of Edinburgh + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ + +#ifndef moses_StringVectorTemp_h +#define moses_StringVectorTemp_h + +#include +#include +#include +#include +#include +#include + +#include + +#include "ThrowingFwrite.h" +#include "StringVector.h" + +#include "MmapAllocator.h" + +namespace Moses +{ + + +// ********** StringVectorTemp ********** + +template class Allocator = std::allocator> +class StringVectorTemp +{ +protected: + bool m_sorted; + bool m_memoryMapped; + + std::vector >* m_charArray; + std::vector m_positions; + + virtual const ValueT* value_ptr(PosT i) const; + +public: + //typedef ValueIteratorRange >::const_iterator> range; + typedef ValueIteratorRange range; + + // ********** RangeIterator ********** + + class RangeIterator : public boost::iterator_facade + { + + private: + PosT m_index; + StringVectorTemp* m_container; + + public: + RangeIterator(); + RangeIterator(StringVectorTemp &sv, PosT index=0); + + PosT get_index(); + + private: + friend class boost::iterator_core_access; + + range dereference() const; + bool equal(RangeIterator const& other) const; + void increment(); + void decrement(); + void advance(PosT n); + + PosT distance_to(RangeIterator const& other) const; + }; + + // ********** StringIterator ********** + + class StringIterator : public boost::iterator_facade + { + + private: + PosT m_index; + StringVectorTemp* m_container; + + public: + StringIterator(); + StringIterator(StringVectorTemp &sv, PosT index=0); + + PosT get_index(); + + private: + friend class boost::iterator_core_access; + + const std::string dereference() const; + bool equal(StringIterator const& other) const; + void increment(); + void decrement(); + void advance(PosT n); + PosT distance_to(StringIterator const& other) const; + }; + + typedef RangeIterator iterator; + typedef StringIterator string_iterator; + + StringVectorTemp(); + StringVectorTemp(Allocator alloc); + + virtual ~StringVectorTemp() { + delete m_charArray; + } + + void swap(StringVectorTemp &c) { + m_positions.swap(c.m_positions); + m_charArray->swap(*c.m_charArray); + + bool temp = m_sorted; + m_sorted = c.m_sorted; + c.m_sorted = temp; + } + + bool is_sorted() const; + PosT size() const; + virtual PosT size2() const; + + template Iterator begin() const; + template Iterator end() const; + + iterator begin() const; + iterator end() const; + + PosT length(PosT i) const; + //typename std::vector >::const_iterator begin(PosT i) const; + //typename std::vector >::const_iterator end(PosT i) const; + const ValueT* begin(PosT i) const; + const ValueT* end(PosT i) const; + + void clear() { + m_charArray->clear(); + m_sorted = true; + m_positions.clear(); + } + + range at(PosT i) const; + range operator[](PosT i) const; + range back() const; + + template + void push_back(StringT s); + void push_back(const char* c); + + template + PosT find(StringT &s) const; + PosT find(const char* c) const; +}; + +// ********** Implementation ********** + +// StringVectorTemp + +template class Allocator> +StringVectorTemp::StringVectorTemp() + : m_sorted(true), m_memoryMapped(false), m_charArray(new std::vector >()) { } + +template class Allocator> +StringVectorTemp::StringVectorTemp(Allocator alloc) + : m_sorted(true), m_memoryMapped(false), m_charArray(new std::vector >(alloc)) { } + +template class Allocator> +template +void StringVectorTemp::push_back(StringT s) +{ + if(is_sorted() && size() && !(back() < s)) + m_sorted = false; + + m_positions.push_back(size2()); + std::copy(s.begin(), s.end(), std::back_inserter(*m_charArray)); +} + +template class Allocator> +void StringVectorTemp::push_back(const char* c) +{ + std::string dummy(c); + push_back(dummy); +} + +template class Allocator> +template +Iterator StringVectorTemp::begin() const +{ + return Iterator(const_cast&>(*this), 0); +} + +template class Allocator> +template +Iterator StringVectorTemp::end() const +{ + return Iterator(const_cast&>(*this), size()); +} + +template class Allocator> +typename StringVectorTemp::iterator StringVectorTemp::begin() const +{ + return begin(); +}; + +template class Allocator> +typename StringVectorTemp::iterator StringVectorTemp::end() const +{ + return end(); +}; + +template class Allocator> +bool StringVectorTemp::is_sorted() const +{ + return m_sorted; +} + +template class Allocator> +PosT StringVectorTemp::size() const +{ + return m_positions.size(); +} + +template class Allocator> +PosT StringVectorTemp::size2() const +{ + return m_charArray->size(); +} + +template class Allocator> +typename StringVectorTemp::range StringVectorTemp::at(PosT i) const +{ + return range(begin(i), end(i)); +} + +template class Allocator> +typename StringVectorTemp::range StringVectorTemp::operator[](PosT i) const +{ + return at(i); +} + +template class Allocator> +typename StringVectorTemp::range StringVectorTemp::back() const +{ + return at(size()-1); +} + +template class Allocator> +PosT StringVectorTemp::length(PosT i) const +{ + if(i+1 < size()) + return m_positions[i+1] - m_positions[i]; + else + return size2() - m_positions[i]; +} + +template class Allocator> +const ValueT* StringVectorTemp::value_ptr(PosT i) const +{ + return &(*m_charArray)[m_positions[i]]; +} + +template class Allocator> +//typename std::vector >::const_iterator StringVectorTemp::begin(PosT i) const +const ValueT* StringVectorTemp::begin(PosT i) const +{ + //return typename std::vector >::const_iterator(value_ptr(i)); + return value_ptr(i); +} + +template class Allocator> +//typename std::vector >::const_iterator StringVectorTemp::end(PosT i) const +const ValueT* StringVectorTemp::end(PosT i) const +{ + //return typename std::vector >::const_iterator(value_ptr(i) + length(i)); + return value_ptr(i) + length(i); +} + +template class Allocator> +template +PosT StringVectorTemp::find(StringT &s) const +{ + if(m_sorted) + return std::distance(begin(), std::lower_bound(begin(), end(), s)); + return std::distance(begin(), std::find(begin(), end(), s)); +} + +template class Allocator> +PosT StringVectorTemp::find(const char* c) const +{ + std::string s(c); + return find(s); +} + +// RangeIterator + +template class Allocator> +StringVectorTemp::RangeIterator::RangeIterator() : m_index(0), m_container(0) { } + +template class Allocator> +StringVectorTemp::RangeIterator::RangeIterator(StringVectorTemp &sv, PosT index) + : m_index(index), m_container(&sv) { } + +template class Allocator> +PosT StringVectorTemp::RangeIterator::get_index() +{ + return m_index; +} + +template class Allocator> +typename StringVectorTemp::range +StringVectorTemp::RangeIterator::dereference() const +{ + return typename StringVectorTemp::range( + m_container->begin(m_index), + m_container->end(m_index) + ); +} + +template class Allocator> +bool StringVectorTemp::RangeIterator::equal( + StringVectorTemp::RangeIterator const& other) const +{ + return m_index == other.m_index && m_container == other.m_container; +} + +template class Allocator> +void StringVectorTemp::RangeIterator::increment() +{ + m_index++; +} + +template class Allocator> +void StringVectorTemp::RangeIterator::decrement() +{ + m_index--; +} + +template class Allocator> +void StringVectorTemp::RangeIterator::advance(PosT n) +{ + m_index += n; +} + +template class Allocator> +PosT StringVectorTemp::RangeIterator::distance_to( + StringVectorTemp::RangeIterator const& other) const +{ + return other.m_index - m_index; +} + +// StringIterator + +template class Allocator> +StringVectorTemp::StringIterator::StringIterator() + : m_index(0), m_container(0) { } + +template class Allocator> +StringVectorTemp::StringIterator::StringIterator( + StringVectorTemp &sv, PosT index) : m_index(index), + m_container(&sv) { } + +template class Allocator> +PosT StringVectorTemp::StringIterator::get_index() +{ + return m_index; +} + +template class Allocator> +const std::string StringVectorTemp::StringIterator::dereference() const +{ + return StringVectorTemp::range(m_container->begin(m_index), + m_container->end(m_index)).str(); +} + +template class Allocator> +bool StringVectorTemp::StringIterator::equal( + StringVectorTemp::StringIterator const& other) const +{ + return m_index == other.m_index && m_container == other.m_container; +} + +template class Allocator> +void StringVectorTemp::StringIterator::increment() +{ + m_index++; +} + +template class Allocator> +void StringVectorTemp::StringIterator::decrement() +{ + m_index--; +} + +template class Allocator> +void StringVectorTemp::StringIterator::advance(PosT n) +{ + m_index += n; +} + +template class Allocator> +PosT StringVectorTemp::StringIterator::distance_to( + StringVectorTemp::StringIterator const& other) const +{ + return other.m_index - m_index; +} + +// ********** Some typedefs ********** + +typedef StringVectorTemp MediumStringVectorTemp; +typedef StringVectorTemp LongStringVectorTemp; + +} + +#endif