Removing alignments and quality-scores test-code (#196)

* Removing alignments and quality-scores test-code
* BRT: Update to main
This commit is contained in:
Jerin Philip 2021-06-14 18:40:41 +01:00 committed by GitHub
parent e9e5ac6782
commit 4b014665ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 2 additions and 78 deletions

@ -1 +1 @@
Subproject commit b0ba62eade4af7752c65c76cb17eab421ea02445
Subproject commit e1ae3b58a6e6c25856a5f279fedb567bbe695c49

View File

@ -28,67 +28,6 @@ Response translateFromStdin(Ptr<Options> options, ResponseOptions responseOption
return response;
}
void qualityScores(Ptr<Options> options) {
ResponseOptions responseOptions;
responseOptions.qualityScores = true;
Response response = translateFromStdin(options, responseOptions);
for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
auto &quality = response.qualityScores[sentenceIdx];
std::cout << ((sentenceIdx == 0) ? "" : "\n") << quality.sequence << '\n';
for (int wordIdx = 0; wordIdx < quality.word.size(); wordIdx++) {
std::cout << ((wordIdx == 0) ? "" : " ");
std::cout << quality.word[wordIdx];
}
std::cout << '\n';
}
}
void alignmentAggregatedToSource(Ptr<Options> options, bool numeric) {
ResponseOptions responseOptions;
responseOptions.alignment = true;
responseOptions.alignmentThreshold = 0.2f;
Response response = translateFromStdin(options, responseOptions);
for (size_t sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
std::cout << (sentenceIdx == 0 ? "" : "\n");
// We are aggregating at source, which does not depend on matrix-multiplications and printing only target so we can
// do BLEU based stuff on the text.
//
typedef std::pair<size_t, float> Point;
std::vector<std::vector<Point>> aggregate(response.source.numWords(sentenceIdx));
auto &alignments = response.alignments[sentenceIdx];
for (auto &p : alignments) {
aggregate[p.src].emplace_back(p.tgt, p.prob);
}
for (size_t sourceIdx = 0; sourceIdx < aggregate.size(); sourceIdx++) {
// Sort in order of target tokens.
auto cmp = [](const Point &p, const Point &q) { return p.first < q.first; };
std::sort(aggregate[sourceIdx].begin(), aggregate[sourceIdx].end(), cmp);
if (!numeric) {
std::cout << response.source.word(sentenceIdx, sourceIdx) << ": ";
}
for (size_t j = 0; j < aggregate[sourceIdx].size(); j++) {
if (numeric) {
float alignmentScore = aggregate[sourceIdx][j].second;
std::cout << (j == 0 ? "" : " ");
std::cout << alignmentScore;
} else {
std::cout << " ";
size_t targetIdx = aggregate[sourceIdx][j].first;
std::cout << response.target.word(sentenceIdx, targetIdx);
}
}
std::cout << '\n';
}
}
}
void annotatedTextWords(Ptr<Options> options, bool source) {
ResponseOptions responseOptions;
Response response = translateFromStdin(options, responseOptions);

View File

@ -25,15 +25,6 @@ namespace testapp {
// a response containing translation data according responseOptions.
Response translateFromStdin(Ptr<Options> options, ResponseOptions responseOptions);
// Reads from stdin and translates. The quality score for the translations (each sentence) are printed separated by
// empty-lines. The first line contains whole quality scores and the second line word quality scores, for each entry.
void qualityScores(Ptr<Options> options);
// Reads from stdin and translates. Alignments are printed aligned to the source-tokens, following format src-token:
// [possible-target-alignments], if numeric is false. If numeric is true, only alignment probabilities are printed
// instead of the tokens.
void alignmentAggregatedToSource(Ptr<Options> options, bool numeric = false);
// Reads from stdin and translates. Prints the tokens separated by space for each sentence. Prints words from source
// side text annotation if source=true, target annotation otherwise.
void annotatedTextWords(Ptr<Options> options, bool source = true);

View File

@ -6,13 +6,7 @@ int main(int argc, char *argv[]) {
auto options = cp.parseOptions(argc, argv, true);
const std::string mode = options->get<std::string>("bergamot-mode");
using namespace marian::bergamot;
if (mode == "test-quality-scores") {
testapp::qualityScores(options);
} else if (mode == "test-alignment-scores") {
testapp::alignmentAggregatedToSource(options, /*numeric=*/true);
} else if (mode == "test-alignment-words") {
testapp::alignmentAggregatedToSource(options, /*numeric=*/false);
} else if (mode == "test-response-source-sentences") {
if (mode == "test-response-source-sentences") {
testapp::annotatedTextSentences(options, /*source=*/true);
} else if (mode == "test-response-target-sentences") {
testapp::annotatedTextSentences(options, /*source=*/false);