mosesdecoder/phrase-extract/extract-lex.h

71 lines
1.5 KiB
C
Raw Normal View History

#pragma once
#include <map>
#include <set>
#include <sstream>
#include <fstream>
#include <iostream>
2012-06-30 18:43:47 +04:00
namespace MosesTraining
{
class WordCount
{
2013-05-29 21:16:15 +04:00
friend std::ostream& operator<<(std::ostream&, const WordCount&);
public:
float m_count;
std::map<const std::string*, WordCount> m_coll;
WordCount()
:m_count(0) {
}
//WordCount(const WordCount &copy);
WordCount(float count)
:m_count(count) {
}
void AddCount(float incr);
2013-05-29 21:16:15 +04:00
std::map<const std::string*, WordCount> &GetColl() {
return m_coll;
}
const std::map<const std::string*, WordCount> &GetColl() const {
return m_coll;
}
2013-05-29 21:16:15 +04:00
const float GetCount() const {
return m_count;
}
};
class Vocab
{
std::set<std::string> m_coll;
public:
const std::string *GetOrAdd(const std::string &word);
};
class ExtractLex
{
Vocab m_vocab;
std::map<const std::string*, WordCount> m_collS2T, m_collT2S;
void Process(const std::string *target, const std::string *source);
void Process(WordCount &wcIn, const std::string *out);
void ProcessUnaligned(std::vector<std::string> &toksTarget, std::vector<std::string> &toksSource
, const std::vector<bool> &m_sourceAligned, const std::vector<bool> &m_targetAligned);
void Output(const std::map<const std::string*, WordCount> &coll, std::ofstream &outStream);
public:
2012-01-04 20:29:31 +04:00
void Process(std::vector<std::string> &toksTarget, std::vector<std::string> &toksSource, std::vector<std::string> &toksAlign, size_t lineCount);
void Output(std::ofstream &streamLexS2T, std::ofstream &streamLexT2S);
};
2012-06-30 18:43:47 +04:00
} // namespace