From 4b014665ba54eef8f8ab68a1929d2ede9a012902 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Mon, 14 Jun 2021 18:40:41 +0100 Subject: [PATCH] Removing alignments and quality-scores test-code (#196) * Removing alignments and quality-scores test-code * BRT: Update to main --- bergamot-translator-tests | 2 +- src/tests/apps.cpp | 61 --------------------------------------- src/tests/apps.h | 9 ------ src/tests/cli.cpp | 8 +---- 4 files changed, 2 insertions(+), 78 deletions(-) diff --git a/bergamot-translator-tests b/bergamot-translator-tests index b0ba62e..e1ae3b5 160000 --- a/bergamot-translator-tests +++ b/bergamot-translator-tests @@ -1 +1 @@ -Subproject commit b0ba62eade4af7752c65c76cb17eab421ea02445 +Subproject commit e1ae3b58a6e6c25856a5f279fedb567bbe695c49 diff --git a/src/tests/apps.cpp b/src/tests/apps.cpp index 9c00bff..c57f5f5 100644 --- a/src/tests/apps.cpp +++ b/src/tests/apps.cpp @@ -28,67 +28,6 @@ Response translateFromStdin(Ptr options, ResponseOptions responseOption return response; } -void qualityScores(Ptr options) { - ResponseOptions responseOptions; - responseOptions.qualityScores = true; - - Response response = translateFromStdin(options, responseOptions); - for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) { - auto &quality = response.qualityScores[sentenceIdx]; - std::cout << ((sentenceIdx == 0) ? "" : "\n") << quality.sequence << '\n'; - for (int wordIdx = 0; wordIdx < quality.word.size(); wordIdx++) { - std::cout << ((wordIdx == 0) ? "" : " "); - std::cout << quality.word[wordIdx]; - } - std::cout << '\n'; - } -} - -void alignmentAggregatedToSource(Ptr options, bool numeric) { - ResponseOptions responseOptions; - responseOptions.alignment = true; - responseOptions.alignmentThreshold = 0.2f; - Response response = translateFromStdin(options, responseOptions); - - for (size_t sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) { - std::cout << (sentenceIdx == 0 ? "" : "\n"); - - // We are aggregating at source, which does not depend on matrix-multiplications and printing only target so we can - // do BLEU based stuff on the text. - // - typedef std::pair Point; - - std::vector> aggregate(response.source.numWords(sentenceIdx)); - auto &alignments = response.alignments[sentenceIdx]; - for (auto &p : alignments) { - aggregate[p.src].emplace_back(p.tgt, p.prob); - } - - for (size_t sourceIdx = 0; sourceIdx < aggregate.size(); sourceIdx++) { - // Sort in order of target tokens. - auto cmp = [](const Point &p, const Point &q) { return p.first < q.first; }; - std::sort(aggregate[sourceIdx].begin(), aggregate[sourceIdx].end(), cmp); - - if (!numeric) { - std::cout << response.source.word(sentenceIdx, sourceIdx) << ": "; - } - - for (size_t j = 0; j < aggregate[sourceIdx].size(); j++) { - if (numeric) { - float alignmentScore = aggregate[sourceIdx][j].second; - std::cout << (j == 0 ? "" : " "); - std::cout << alignmentScore; - } else { - std::cout << " "; - size_t targetIdx = aggregate[sourceIdx][j].first; - std::cout << response.target.word(sentenceIdx, targetIdx); - } - } - std::cout << '\n'; - } - } -} - void annotatedTextWords(Ptr options, bool source) { ResponseOptions responseOptions; Response response = translateFromStdin(options, responseOptions); diff --git a/src/tests/apps.h b/src/tests/apps.h index 2ccf2c4..b380b57 100644 --- a/src/tests/apps.h +++ b/src/tests/apps.h @@ -25,15 +25,6 @@ namespace testapp { // a response containing translation data according responseOptions. Response translateFromStdin(Ptr options, ResponseOptions responseOptions); -// Reads from stdin and translates. The quality score for the translations (each sentence) are printed separated by -// empty-lines. The first line contains whole quality scores and the second line word quality scores, for each entry. -void qualityScores(Ptr options); - -// Reads from stdin and translates. Alignments are printed aligned to the source-tokens, following format src-token: -// [possible-target-alignments], if numeric is false. If numeric is true, only alignment probabilities are printed -// instead of the tokens. -void alignmentAggregatedToSource(Ptr options, bool numeric = false); - // Reads from stdin and translates. Prints the tokens separated by space for each sentence. Prints words from source // side text annotation if source=true, target annotation otherwise. void annotatedTextWords(Ptr options, bool source = true); diff --git a/src/tests/cli.cpp b/src/tests/cli.cpp index f2f0218..4ecb24e 100644 --- a/src/tests/cli.cpp +++ b/src/tests/cli.cpp @@ -6,13 +6,7 @@ int main(int argc, char *argv[]) { auto options = cp.parseOptions(argc, argv, true); const std::string mode = options->get("bergamot-mode"); using namespace marian::bergamot; - if (mode == "test-quality-scores") { - testapp::qualityScores(options); - } else if (mode == "test-alignment-scores") { - testapp::alignmentAggregatedToSource(options, /*numeric=*/true); - } else if (mode == "test-alignment-words") { - testapp::alignmentAggregatedToSource(options, /*numeric=*/false); - } else if (mode == "test-response-source-sentences") { + if (mode == "test-response-source-sentences") { testapp::annotatedTextSentences(options, /*source=*/true); } else if (mode == "test-response-target-sentences") { testapp::annotatedTextSentences(options, /*source=*/false);