// $Id$ #include "ConfusionNet.h" #include #include "FactorCollection.h" #include "Util.h" #include "TranslationOptionCollectionConfusionNet.h" #include "StaticData.h" #include "Sentence.h" #include "UserMessage.h" #include "moses/FF/InputFeature.h" #include "util/exception.hh" namespace Moses { struct CNStats { size_t created,destr,read,colls,words; CNStats() : created(0),destr(0),read(0),colls(0),words(0) {} ~CNStats() { print(std::cerr); } void createOne() { ++created; } void destroyOne() { ++destr; } void collect(const ConfusionNet& cn) { ++read; colls+=cn.GetSize(); for(size_t i=0; i0) { out<<"confusion net statistics:\n" " created:\t"< temp = std::make_pair(s.GetWord(i), scorePair); data[i].push_back(temp); } } bool ConfusionNet::ReadF(std::istream& in, const std::vector& factorOrder, int format) { VERBOSE(1, "read confusion net with format "<& factorOrder) { int rv=ReadF(in,factorOrder,0); if(rv) stats.collect(*this); return rv; } void ConfusionNet::String2Word(const std::string& s,Word& w, const std::vector& factorOrder) { std::vector factorStrVector = Tokenize(s, "|"); for(size_t i=0; i& factorOrder) { Clear(); const StaticData &staticData = StaticData::Instance(); const InputFeature *inputFeature = staticData.GetInputFeature(); size_t numInputScores = inputFeature->GetNumInputScores(); size_t numRealWordCount = inputFeature->GetNumRealWordsInInput(); size_t totalCount = numInputScores + numRealWordCount; bool addRealWordCount = (numRealWordCount > 0); std::string line; while(getline(in,line)) { std::istringstream is(line); std::string word; Column col; while(is>>word) { Word w; String2Word(word,w,factorOrder); std::vector probs(totalCount, 0.0); for(size_t i=0; i < numInputScores; i++) { double prob; if (!(is>>prob)) { TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n"); return false; } if(prob<0.0) { VERBOSE(1, "WARN: negative prob: "<set to 0.0\n"); prob=0.0; } else if (prob>1.0) { VERBOSE(1, "WARN: prob > 1.0 : "< set to 1.0\n"); prob=1.0; } probs[i] = (std::max(static_cast(log(prob)),LOWEST_SCORE)); } //store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon if (addRealWordCount && word!=EPSILON && word!="") probs.back() = -1.0; ScorePair scorePair(probs); col.push_back(std::make_pair(w,scorePair)); } if(col.size()) { data.push_back(col); ShrinkToFit(data.back()); } else break; } return !data.empty(); } bool ConfusionNet::ReadFormat1(std::istream& in, const std::vector& factorOrder) { Clear(); std::string line; if(!getline(in,line)) return 0; size_t s; if(getline(in,line)) s=atoi(line.c_str()); else return 0; data.resize(s); for(size_t i=0; i>s)) return 0; std::string word; double prob; data[i].resize(s); for(size_t j=0; j>word>>prob) { //TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS data[i][j].second.denseScores = std::vector (1); data[i][j].second.denseScores.push_back((float) log(prob)); if(data[i][j].second.denseScores[0]<0) { VERBOSE(1, "WARN: neg costs: "< set to 0\n"); data[i][j].second.denseScores[0]=0.0; } String2Word(word,data[i][j].first,factorOrder); } else return 0; } return !data.empty(); } void ConfusionNet::Print(std::ostream& out) const { out<<"conf net: "<::const_iterator iterDense; for(iterDense = data[i][j].second.denseScores.begin(); iterDense < data[i][j].second.denseScores.end(); ++iterDense) { out<<", "<<*iterDense; } // sparse std::map::const_iterator iterSparse; for(iterSparse = data[i][j].second.sparseScores.begin(); iterSparse != data[i][j].second.sparseScores.end(); ++iterSparse) { out << ", " << iterSparse->first << "=" << iterSparse->second; } out<<") "; } out<<"\n"; } out<<"\n\n"; } #ifdef _WIN32 #pragma warning(disable:4716) #endif Phrase ConfusionNet::GetSubString(const WordsRange&) const { TRACE_ERR("ERROR: call to ConfusionNet::GetSubString\n"); abort(); //return Phrase(Input); } std::string ConfusionNet::GetStringRep(const std::vector /* factorsToPrint */) const //not well defined yet { TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n"); return ""; } #ifdef _WIN32 #pragma warning(disable:4716) #endif const Word& ConfusionNet::GetWord(size_t) const { TRACE_ERR("ERROR: call to ConfusionNet::GetFactorArray\n"); abort(); } #ifdef _WIN32 #pragma warning(default:4716) #endif std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn) { cn.Print(out); return out; } TranslationOptionCollection* ConfusionNet::CreateTranslationOptionCollection() const { size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage(); float translationOptionThreshold = StaticData::Instance().GetTranslationOptionThreshold(); TranslationOptionCollection *rv= new TranslationOptionCollectionConfusionNet(*this, maxNoTransOptPerCoverage, translationOptionThreshold); assert(rv); return rv; } }