From 4d3702c4ec3d3fd8efd1c40a8746f39f446d9980 Mon Sep 17 00:00:00 2001
From: Marcin Junczys-Dowmunt <Marcin.JunczysDowmunt@microsoft.com>
Date: Thu, 6 Oct 2022 05:53:16 +0000
Subject: [PATCH 01/10] Merged PR 25950: Add missing defaults for concatenated
 factors

This PR adds missing default values for concatenated factors.
---
 CHANGELOG.md             |  1 +
 VERSION                  |  2 +-
 src/layers/embedding.cpp | 11 +++++------
 3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f93148e8..c46df0f2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 - `--output-sampling` now works with ensembles (requires proper normalization via e.g `--weights 0.5 0.5`)
 
 ### Fixed
+- Make concat factors not break old vector implementation
 - Use allocator in hashing
 - Read/restore checkpoints from main process only when training with MPI
 - Multi-loss casts type to first loss-type before accumulation (aborted before due to missing cast)
diff --git a/VERSION b/VERSION
index 9ec46594..daf48f91 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-v1.11.13
+v1.11.14
diff --git a/src/layers/embedding.cpp b/src/layers/embedding.cpp
index d6768fdb..b60f6cc1 100644
--- a/src/layers/embedding.cpp
+++ b/src/layers/embedding.cpp
@@ -6,10 +6,8 @@ namespace marian {
 Embedding::Embedding(Ptr<ExpressionGraph> graph, Ptr<Options> options)
     : LayerBase(graph, options), inference_(opt<bool>("inference")) {
   std::string name = opt<std::string>("prefix");
-  int dimVoc       = opt<int>("dimVocab");
-  int dimEmb       = opt<int>("dimEmb");
-  int dimFactorEmb = opt<int>("dimFactorEmb");
-
+  int dimVoc = opt<int>("dimVocab");
+  int dimEmb = opt<int>("dimEmb");
   bool fixed = opt<bool>("fixed", false);
 
   // Embedding layer initialization should depend only on embedding size, hence fanIn=false
@@ -21,6 +19,7 @@ Embedding::Embedding(Ptr<ExpressionGraph> graph, Ptr<Options> options)
     dimVoc = (int)factoredVocab_->factorVocabSize();
     LOG_ONCE(info, "[embedding] Factored embeddings enabled");
     if(opt<std::string>("factorsCombine") == "concat") {
+      int dimFactorEmb = opt<int>("dimFactorEmb", 0);
       ABORT_IF(dimFactorEmb == 0,
                "Embedding: If concatenation is chosen to combine the factor embeddings, a factor "
                "embedding size must be specified.");
@@ -179,8 +178,8 @@ Expr Embedding::applyIndices(const std::vector<WordIndex>& embIdx, const Shape&
       "prefix",         (opt<bool>("tied-embeddings-src") || opt<bool>("tied-embeddings-all")) ? "Wemb"
                                                                                           : prefix_ + "_Wemb",
       "fixed",          embeddingFix_,
-      "dimFactorEmb",   opt<int>("factors-dim-emb"),  // for factored embeddings
-      "factorsCombine", opt<std::string>("factors-combine"),  // for factored embeddings
+      "dimFactorEmb",   opt<int>("factors-dim-emb", 0),  // for factored embeddings
+      "factorsCombine", opt<std::string>("factors-combine", ""),  // for factored embeddings
       "vocab",     opt<std::vector<std::string>>("vocabs")[batchIndex_]);  // for factored embeddings
   // clang-format on
   if(options_->hasAndNotEmpty("embedding-vectors")) {

From a6de1b781c61f129f77fabf5f75a9a3b60c55913 Mon Sep 17 00:00:00 2001
From: Roman Grundkiewicz <rogrundk@microsoft.com>
Date: Tue, 1 Nov 2022 06:26:56 +0000
Subject: [PATCH 02/10] Merged PR 26271: Update CI pipeline triggers

Updates to the CI triggers:
- Stop running parallel CI runs, i.e. if a pipeline is running, it must finish before new runs are started.
- Exclude paths to files, which are not related to/critical the codebase
- Downloading MKL from a mirror hosting server
---
 azure-pipelines.yml        | 31 +++++++++++++++++++++++++++----
 azure-regression-tests.yml |  3 +++
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 0c7bd9c7..5a989db4 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -16,7 +16,24 @@ parameters:
 # The pipeline CI trigger is set on the branch master only and PR trigger on a
 # (non-draft) pull request to any branch
 trigger:
-- master
+  # This minimizes the number of parallel pipeline runs. When a pipeline is
+  # running, the CI waits until it is completed before starting another one.
+  batch: true
+  branches:
+    include:
+    - master
+  paths:
+    exclude:
+    - azure-regression-tests.yml
+    - contrib
+    - doc
+    - examples
+    - regression-tests
+    - scripts
+    - VERSION
+    - vs
+    - '**/*.md'
+    - '**/*.txt'
 
 pool:
   name: Azure Pipelines
@@ -32,7 +49,7 @@ variables:
   - name: MKL_DIR
     value: "$(Build.SourcesDirectory)/mkl"
   - name: MKL_URL
-    value: "https://romang.blob.core.windows.net/mariandev/ci/mkl-2020.1-windows-static.zip"
+    value: "https://data.statmt.org/romang/marian-regression-tests/ci/mkl-2020.1-windows-static.zip"
   - name: VCPKG_COMMIT
     value: 2022.03.10
   - name: VCPKG_DIR
@@ -52,6 +69,7 @@ stages:
 
   ######################################################################
   - job: BuildWindows
+    cancelTimeoutInMinutes: 1
     condition: eq(${{ parameters.runBuilds }}, true)
     displayName: Windows
 
@@ -188,6 +206,7 @@ stages:
 
   ######################################################################
   - job: BuildUbuntu
+    cancelTimeoutInMinutes: 1
     condition: eq(${{ parameters.runBuilds }}, true)
     displayName: Ubuntu
     timeoutInMinutes: 120
@@ -324,6 +343,7 @@ stages:
 
   ######################################################################
   - job: BuildMacOS
+    cancelTimeoutInMinutes: 1
     condition: eq(${{ parameters.runBuilds }}, true)
     displayName: macOS CPU clang
 
@@ -373,6 +393,7 @@ stages:
 
   ######################################################################
   - job: BuildInstall
+    cancelTimeoutInMinutes: 1
     condition: eq(${{ parameters.runBuilds }}, true)
     displayName: Linux CPU library install
 
@@ -435,6 +456,7 @@ stages:
 
   ######################################################################
   - job: TestWindows
+    cancelTimeoutInMinutes: 1
     displayName: Windows CPU+FBGEMM
 
     pool:
@@ -535,7 +557,7 @@ stages:
         ls
       displayName: Prepare tests
       env:
-          AWS_SECRET_SAS_TOKEN: $(blob-sas-token)
+          SAS_TOKEN: $(blob-sas-token)
       workingDirectory: marian-prod-tests
 
     # Avoid using $(Build.SourcesDirectory) in bash tasks because on Windows pools it uses '\'
@@ -560,6 +582,7 @@ stages:
 
   ######################################################################
   - job: TestLinux
+    cancelTimeoutInMinutes: 1
     displayName: Linux CPU+FBGEMM
 
     pool:
@@ -636,7 +659,7 @@ stages:
         ls
       displayName: Prepare tests
       env:
-          AWS_SECRET_SAS_TOKEN: $(blob-sas-token)
+        AZURE_STORAGE_SAS_TOKEN: $(blob-sas-token)
       workingDirectory: marian-prod-tests
 
     - bash: MARIAN=../marian-dev/build bash ./run_mrt.sh '#cpu' '#basics' '#devops'
diff --git a/azure-regression-tests.yml b/azure-regression-tests.yml
index c849b59d..a56c9dce 100644
--- a/azure-regression-tests.yml
+++ b/azure-regression-tests.yml
@@ -20,6 +20,7 @@ stages:
 
   ######################################################################
   - job: TestsGPULinux
+    cancelTimeoutInMinutes: 1
     displayName: Linux GPU tests
     timeoutInMinutes: 120
 
@@ -108,6 +109,8 @@ stages:
         git pull origin master
         make install
       displayName: Prepare regression tests
+      env:
+        AZURE_STORAGE_SAS_TOKEN: $(blob-sas-token)
       workingDirectory: regression-tests
 
     # Continue on error to be able to collect outputs and publish them as an artifact

From be1ee3fa944c6e49f61d0e5e1153333b49dd936a Mon Sep 17 00:00:00 2001
From: Roman Grundkiewicz <rogrundk@microsoft.com>
Date: Tue, 1 Nov 2022 10:07:40 +0000
Subject: [PATCH 03/10] Merged PR 26318: Fix incorrect envvar name in Azure
 Pipeline

Fix incorrect environment variable name for SAS token in Windows tests
---
 azure-pipelines.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 5a989db4..bef33aec 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -557,7 +557,7 @@ stages:
         ls
       displayName: Prepare tests
       env:
-          SAS_TOKEN: $(blob-sas-token)
+          AZURE_STORAGE_SAS_TOKEN: $(blob-sas-token)
       workingDirectory: marian-prod-tests
 
     # Avoid using $(Build.SourcesDirectory) in bash tasks because on Windows pools it uses '\'

From c79dc80a2fc114b083e01387f3503da680a3541a Mon Sep 17 00:00:00 2001
From: Roman Grundkiewicz <rogrundk@microsoft.com>
Date: Sun, 20 Nov 2022 13:31:10 +0000
Subject: [PATCH 04/10] Merged PR 26617: Update regression-tests & fix CI
 pipelines

Update regression-tests & fix CI pipelines
---
 azure-pipelines.yml        | 10 +++++-----
 azure-regression-tests.yml |  6 +++++-
 regression-tests           |  2 +-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index bef33aec..faa61900 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -39,7 +39,7 @@ pool:
   name: Azure Pipelines
 
 variables:
-  - group: marian-prod-tests
+  - group: marian-regression-tests
   - name: BOOST_ROOT_WINDOWS
     value: "C:/hostedtoolcache/windows/Boost/1.72.0/x86_64"
   - name: BOOST_URL
@@ -550,14 +550,14 @@ stages:
       displayName: Machine statistics
       workingDirectory: marian-prod-tests
 
-    # The current SAS token will expire on 8/30/2023 and a new one will need to be set in Marian > Pipelines > Library
+    # The current SAS token will expire on 12/31/2023 and a new one will need to be set in Marian > Pipelines > Library
     - bash: |
         cd models
         bash download-models.sh
         ls
       displayName: Prepare tests
       env:
-          AZURE_STORAGE_SAS_TOKEN: $(blob-sas-token)
+        AZURE_STORAGE_SAS_TOKEN: $(marian-prod-tests-blob-sas-token)
       workingDirectory: marian-prod-tests
 
     # Avoid using $(Build.SourcesDirectory) in bash tasks because on Windows pools it uses '\'
@@ -652,14 +652,14 @@ stages:
       displayName: Machine statistics
       workingDirectory: marian-prod-tests
 
-    # The current SAS token will expire on 8/30/2023 and a new one will need to be set in Marian > Pipelines > Library
+    # The current SAS token will expire on 12/31/2023 and a new one will need to be set in Marian > Pipelines > Library
     - bash: |
         cd models
         bash download-models.sh
         ls
       displayName: Prepare tests
       env:
-        AZURE_STORAGE_SAS_TOKEN: $(blob-sas-token)
+        AZURE_STORAGE_SAS_TOKEN: $(marian-prod-tests-blob-sas-token)
       workingDirectory: marian-prod-tests
 
     - bash: MARIAN=../marian-dev/build bash ./run_mrt.sh '#cpu' '#basics' '#devops'
diff --git a/azure-regression-tests.yml b/azure-regression-tests.yml
index a56c9dce..cb3730c1 100644
--- a/azure-regression-tests.yml
+++ b/azure-regression-tests.yml
@@ -14,6 +14,9 @@ trigger: none
 # Hosted Azure DevOps Pool determining OS, CUDA version and available GPUs
 pool: mariandevops-pool-m60-eus
 
+variables:
+  - group: marian-regression-tests
+
 stages:
 - stage: TestsGPU
   jobs:
@@ -104,13 +107,14 @@ stages:
       workingDirectory: build
 
     # Always run regression tests from the master branch
+    # The current SAS token will expire on 12/31/2023 and a new one will need to be set in Marian > Pipelines > Library
     - bash: |
         git checkout master
         git pull origin master
         make install
       displayName: Prepare regression tests
       env:
-        AZURE_STORAGE_SAS_TOKEN: $(blob-sas-token)
+        AZURE_STORAGE_SAS_TOKEN: $(marian-pub-tests-blob-sas-token)
       workingDirectory: regression-tests
 
     # Continue on error to be able to collect outputs and publish them as an artifact
diff --git a/regression-tests b/regression-tests
index 92e116ef..488d454a 160000
--- a/regression-tests
+++ b/regression-tests
@@ -1 +1 @@
-Subproject commit 92e116efa369d6ed848c8eb19dfcef8bf7245d71
+Subproject commit 488d454a0177ef300eab91ab813e485d420dc38d

From b6581c4c44147f92ad8febe0e331bd68a8bda23f Mon Sep 17 00:00:00 2001
From: Roman Grundkiewicz <rogrundk@microsoft.com>
Date: Wed, 23 Nov 2022 19:16:44 +0000
Subject: [PATCH 05/10] Merged PR 26667: Update examples submodule to fix
 vulnerability issues

Updating examples submodule using [protobuf 3.20.2](https://github.com/marian-nmt/marian-examples/pull/29) to fix recent [vulnerability issues](https://machinetranslation.visualstudio.com/MachineTranslation/_componentGovernance/mtmain/alert/8035094?typeId=14698327&pipelinesTrackingFilter=0).

Related work items: #134319
---
 examples | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples b/examples
index 25e84383..58f48a06 160000
--- a/examples
+++ b/examples
@@ -1 +1 @@
-Subproject commit 25e84383225a29f769e362250654ddf256d06261
+Subproject commit 58f48a06756c623fe799613134810322e061863f

From b7205fc0b029efe5ea62fd3e35ac2a2227ea641f Mon Sep 17 00:00:00 2001
From: Alex Muzio <Alex.Muzio@microsoft.com>
Date: Wed, 30 Nov 2022 12:23:38 +0000
Subject: [PATCH 06/10] Merged PR 25220: Add extra model information to
 model_info.py script

Adding model shapes flag to model_info.py script: dtype and total number of model parameters.
Example: `python model_info.py -m ~/model.npz -mi`
---
 regression-tests              |  2 +-
 scripts/contrib/model_info.py | 12 ++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/regression-tests b/regression-tests
index 488d454a..2a8bed3f 160000
--- a/regression-tests
+++ b/regression-tests
@@ -1 +1 @@
-Subproject commit 488d454a0177ef300eab91ab813e485d420dc38d
+Subproject commit 2a8bed3f0e937a9de2d6fa92dee3bcf482d3d47b
diff --git a/scripts/contrib/model_info.py b/scripts/contrib/model_info.py
index 3c573084..9e2a0263 100755
--- a/scripts/contrib/model_info.py
+++ b/scripts/contrib/model_info.py
@@ -42,11 +42,15 @@ def main():
             else:
                 print(model[args.key])
         else:
+            total_nb_of_parameters = 0
             for key in model:
-                if args.matrix_shapes:
-                    print(key, model[key].shape)
+                if not key == S2S_SPECIAL_NODE:
+                    total_nb_of_parameters += np.prod(model[key].shape)
+                if args.matrix_info:
+                    print(key, model[key].shape, model[key].dtype)
                 else:
                     print(key)
+            print('Total number of parameters:', total_nb_of_parameters)
 
 
 def parse_args():
@@ -57,8 +61,8 @@ def parse_args():
                         help="print values from special:model.yml node")
     parser.add_argument("-f", "--full-matrix", action="store_true",
                         help="force numpy to print full arrays for single key")
-    parser.add_argument("-ms", "--matrix-shapes", action="store_true",
-                        help="print shapes of all arrays in the model")
+    parser.add_argument("-mi", "--matrix-info", action="store_true",
+                        help="print full matrix info for all keys. Includes shape and dtype")
     return parser.parse_args()
 
 

From ee50d4aaeabbec3a82628d0804b0e078b04b84d4 Mon Sep 17 00:00:00 2001
From: Roman Grundkiewicz <rogrundk@microsoft.com>
Date: Tue, 20 Dec 2022 17:56:10 +0000
Subject: [PATCH 07/10] Merged PR 27051: Add an option for completely resetting
 validation metrics

Added `--valid-reset-all` that works as `--valid-reset-stalled` but it also resets last best saved validation metrics, which is useful for when the validation sets change for continued training.

Added new regression test: https://github.com/marian-nmt/marian-regression-tests/pull/89
---
 CHANGELOG.md                 |  1 +
 VERSION                      |  2 +-
 azure-pipelines.yml          |  5 ++++-
 src/common/config_parser.cpp |  6 ++++--
 src/training/scheduler.h     | 13 +++++++++----
 5 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c46df0f2..53f81397 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 - Fused inplace-dropout in FFN layer in Transformer
 - `--force-decode` option for marian-decoder
 - `--output-sampling` now works with ensembles (requires proper normalization via e.g `--weights 0.5 0.5`)
+- `--valid-reset-all` option
 
 ### Fixed
 - Make concat factors not break old vector implementation
diff --git a/VERSION b/VERSION
index daf48f91..2eac760f 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-v1.11.14
+v1.11.15
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index faa61900..3b1bfff3 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -595,7 +595,10 @@ stages:
 
     # The following packages are already installed on Azure-hosted runners: build-essential openssl libssl-dev
     # No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because it is installed together with libprotobuf-dev
-    - bash: sudo apt-get install -y libgoogle-perftools-dev libprotobuf-dev protobuf-compiler gcc-9 g++-9
+    # Installing libunwind-dev fixes a bug in 2204 (the libunwind-14 and libunwind-dev conflict)
+    - bash: |
+        sudo apt-get install -y libunwind-dev
+        sudo apt-get install -y libgoogle-perftools-dev libprotobuf-dev protobuf-compiler gcc-9 g++-9
       displayName: Install packages
 
     # https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html
diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp
index c9ab45f8..4cc23f2c 100644
--- a/src/common/config_parser.cpp
+++ b/src/common/config_parser.cpp
@@ -269,7 +269,7 @@ void ConfigParser::addOptionsModel(cli::CLIWrapper& cli) {
       "Pool encoder states instead of using cross attention (selects first encoder state, best used with special token)");
   cli.add<int>("--transformer-dim-ffn",
       "Size of position-wise feed-forward network (transformer)",
-      2048);  
+      2048);
   cli.add<int>("--transformer-decoder-dim-ffn",
       "Size of position-wise feed-forward network in decoder (transformer). Uses --transformer-dim-ffn if 0.",
       0);
@@ -591,7 +591,9 @@ void ConfigParser::addOptionsValidation(cli::CLIWrapper& cli) {
       "Multiple metrics can be specified",
       {"cross-entropy"});
   cli.add<bool>("--valid-reset-stalled",
-     "Reset all stalled validation metrics when the training is restarted");
+     "Reset stalled validation metrics when the training is restarted");
+  cli.add<bool>("--valid-reset-all",
+     "Reset all validation metrics when the training is restarted");
   cli.add<size_t>("--early-stopping",
      "Stop if the first validation metric does not improve for arg consecutive validation steps",
      10);
diff --git a/src/training/scheduler.h b/src/training/scheduler.h
index 34aa18c2..30f8c8de 100644
--- a/src/training/scheduler.h
+++ b/src/training/scheduler.h
@@ -494,12 +494,17 @@ public:
       state_->wordsDisp    = 0;
     }
 
-    if(options_->get<bool>("valid-reset-stalled")) {
+    if(options_->get<bool>("valid-reset-stalled") || options_->get<bool>("valid-reset-all")) {
       state_->stalled      = 0;
       state_->maxStalled   = 0;
       for(const auto& validator : validators_) {
-        if(state_->validators[validator->type()])
+        if(state_->validators[validator->type()]) {
+          // reset the number of stalled validations, e.g. when the validation set is the same
           state_->validators[validator->type()]["stalled"] = 0;
+          // reset last best results as well, e.g. when the validation set changes
+          if(options_->get<bool>("valid-reset-all"))
+            state_->validators[validator->type()]["last-best"] = validator->initScore();
+        }
       }
     }
 
@@ -512,10 +517,10 @@ public:
     if(mpi_->isMainProcess())
       if(filesystem::exists(nameYaml))
         yamlStr = io::InputFileStream(nameYaml).readToString();
-    
+
     if(mpi_)
       mpi_->bCast(yamlStr);
-    
+
     loadFromString(yamlStr);
   }
 

From 4f145c450f2b4b956d175fbbfe118a90e494acf4 Mon Sep 17 00:00:00 2001
From: Varun Mathur <vamathur@microsoft.com>
Date: Fri, 10 Feb 2023 16:34:37 +0000
Subject: [PATCH 08/10] Merged PR 26311: [FSM] make model loading lock
 non-static

make lock non-static
---
 src/data/factored_vocab.cpp | 3 +--
 src/data/factored_vocab.h   | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/data/factored_vocab.cpp b/src/data/factored_vocab.cpp
index caee2e0c..f51869d5 100644
--- a/src/data/factored_vocab.cpp
+++ b/src/data/factored_vocab.cpp
@@ -21,8 +21,7 @@ namespace marian {
   maxSizeUnused;
   // If model has already been loaded, then assume this is a shared object, and skip loading it again.
   // This can be multi-threaded, so must run under lock.
-  static std::mutex s_mtx;
-  std::lock_guard<std::mutex> criticalSection(s_mtx);
+  std::lock_guard<std::mutex> criticalSection(loadMtx_);
   if (size() != 0) {
     //LOG(info, "[vocab] Attempting to load model a second time; skipping (assuming shared vocab)");
     return size();
diff --git a/src/data/factored_vocab.h b/src/data/factored_vocab.h
index b644ce4c..edbee154 100644
--- a/src/data/factored_vocab.h
+++ b/src/data/factored_vocab.h
@@ -110,6 +110,7 @@ private:
   Word unkId_{};
   WordLUT vocab_;
   size_t lemmaSize_;
+  std::mutex loadMtx_;
 
   // factors
   char factorSeparator_ = '|';                         // separator symbol for parsing factored words

From 9ad5203ca228fb63cba5147ebf566849945a4919 Mon Sep 17 00:00:00 2001
From: Marcin Junczys-Dowmunt <Marcin.JunczysDowmunt@microsoft.com>
Date: Sat, 11 Feb 2023 16:35:29 +0000
Subject: [PATCH 09/10] Merged PR 26476: Sanitize guided-alignment with
 case-augmentation (still somewhat wonky)

This fixes the blow-ups of using case-augmentation with guided-alignment. However, it's still not recommended to use this particular combination, results will be unreliable.
---
 src/data/corpus.cpp                |  2 +-
 src/data/corpus_base.cpp           |  5 +++-
 src/data/corpus_base.h             |  8 +++++--
 src/graph/expression_graph.cpp     |  8 +++++++
 src/graph/expression_graph.h       |  6 +++++
 src/graph/expression_operators.cpp |  5 ++++
 src/graph/expression_operators.h   |  5 ++++
 src/layers/guided_alignment.h      | 38 +++++++++++++++++-------------
 8 files changed, 57 insertions(+), 20 deletions(-)

diff --git a/src/data/corpus.cpp b/src/data/corpus.cpp
index 835d9d76..b36d42ac 100644
--- a/src/data/corpus.cpp
+++ b/src/data/corpus.cpp
@@ -128,7 +128,7 @@ SentenceTuple Corpus::next() {
           size_t vocabId = i - shift;
           bool altered;
           preprocessLine(fields[i], vocabId, curId, /*out=*/altered);
-          if (altered)
+          if(altered)
             tup.markAltered();
           addWordsToSentenceTuple(fields[i], vocabId, tup);
         }
diff --git a/src/data/corpus_base.cpp b/src/data/corpus_base.cpp
index addcc3bf..d276ca6b 100644
--- a/src/data/corpus_base.cpp
+++ b/src/data/corpus_base.cpp
@@ -476,7 +476,10 @@ void CorpusBase::addAlignmentsToBatch(Ptr<CorpusBatch> batch,
     // If the batch vector is altered within marian by, for example, case augmentation,
     // the guided alignments we received for this tuple cease to be valid.
     // Hence skip setting alignments for that sentence tuple..
-    if (!batchVector[b].isAltered()) {
+    if (batchVector[b].isAltered()) {
+      LOG_ONCE(info, "Using guided-alignment with case-augmentation is not recommended and can result in unexpected behavior");
+      aligns.push_back(WordAlignment());
+    } else {
       aligns.push_back(std::move(batchVector[b].getAlignment()));
     }
   }
diff --git a/src/data/corpus_base.h b/src/data/corpus_base.h
index 4e6d923e..2e572ebd 100644
--- a/src/data/corpus_base.h
+++ b/src/data/corpus_base.h
@@ -56,12 +56,16 @@ public:
    * @brief Returns whether this Tuple was altered or augmented from what
    * was provided to Marian in input.
    */
-  bool isAltered() const { return altered_; }
+  bool isAltered() const { 
+    return altered_; 
+  }
 
   /**
    * @brief Mark that this Tuple was internally altered or augmented by Marian
    */
-  void markAltered() { altered_ = true; }
+  void markAltered() { 
+    altered_ = true; 
+  }
 
   /**
    * @brief Adds a new sentence at the end of the tuple.
diff --git a/src/graph/expression_graph.cpp b/src/graph/expression_graph.cpp
index 146f7c4c..9e90b541 100644
--- a/src/graph/expression_graph.cpp
+++ b/src/graph/expression_graph.cpp
@@ -64,6 +64,14 @@ Expr ExpressionGraph::add(Expr node) {
   }
 }
 
+/**
+ * Removes the node from the set of roots (will not be initialized during back propagation)
+ * @param node a pointer to a expression node
+ */
+void ExpressionGraph::removeAsRoot(Expr node) {
+  topNodes_.erase(node);
+}
+
 // Call on every checkpoint in backwards order
 void createSubtape(Expr node) {
   auto subtape = New<std::list<Expr>>();
diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h
index 9272e42a..da69af09 100644
--- a/src/graph/expression_graph.h
+++ b/src/graph/expression_graph.h
@@ -676,6 +676,12 @@ public:
    * @param node a pointer to a expression node
    */
   Expr add(Expr node);
+  
+  /**
+   * Removes the node from the set of roots (will not be initialized during back propagation)
+   * @param node a pointer to a expression node
+   */
+  void removeAsRoot(Expr node);
 
   /**
    * Allocate memory for the forward pass of the given node.
diff --git a/src/graph/expression_operators.cpp b/src/graph/expression_operators.cpp
index 09049f98..b0d40949 100644
--- a/src/graph/expression_operators.cpp
+++ b/src/graph/expression_operators.cpp
@@ -27,6 +27,11 @@ Expr checkpoint(Expr a) {
   return a;
 }
 
+Expr removeAsRoot(Expr a) {
+  a->graph()->removeAsRoot(a); // ugly, hence why hidden here
+  return a;
+}
+
 Expr lambda(const std::vector<Expr>& nodes, Shape shape, Type type,
             LambdaNodeFunctor fwd, size_t hash) {
   return Expression<LambdaNodeOp>(nodes, shape, type, fwd, hash);
diff --git a/src/graph/expression_operators.h b/src/graph/expression_operators.h
index 5d9ceab3..cc3e6028 100644
--- a/src/graph/expression_operators.h
+++ b/src/graph/expression_operators.h
@@ -16,6 +16,11 @@ Expr debug(Expr a, const std::string& message = "");
  */
 Expr checkpoint(Expr a);
 
+/**
+ * Removes the node from the set of root nodes, no-op otherwise
+ */
+Expr removeAsRoot(Expr node);
+
 typedef Expr(ActivationFunction)(Expr);  ///< ActivationFunction has signature Expr(Expr)
 
 /**
diff --git a/src/layers/guided_alignment.h b/src/layers/guided_alignment.h
index d5929a6d..50a78557 100644
--- a/src/layers/guided_alignment.h
+++ b/src/layers/guided_alignment.h
@@ -26,7 +26,8 @@ guidedAlignmentToSparse(Ptr<data::CorpusBatch> batch) {
 
   std::sort(byIndex.begin(), byIndex.end(), [](const BiPoint& a, const BiPoint& b) { return std::get<0>(a) < std::get<0>(b); });
   std::vector<IndexType> indices; std::vector<float> valuesFwd; 
-  indices.reserve(byIndex.size()); valuesFwd.reserve(byIndex.size()); 
+  indices.reserve(byIndex.size()); 
+  valuesFwd.reserve(byIndex.size()); 
   for(auto& p : byIndex) {
     indices.push_back((IndexType)std::get<0>(p));
     valuesFwd.push_back(std::get<1>(p));
@@ -40,28 +41,33 @@ static inline RationalLoss guidedAlignmentCost(Ptr<ExpressionGraph> graph,
                                                Ptr<Options> options,
                                                Expr attention) { // [beam depth=1, max src length, batch size, tgt length]
   std::string guidedLossType = options->get<std::string>("guided-alignment-cost");  // @TODO: change "cost" to "loss"
-
+  // @TODO: It is ugly to check the multi-loss type here, but doing this right requires
+  // a substantial rewrite of the multi-loss architecture, which is planned anyways.
+  std::string multiLossType = options->get<std::string>("multi-loss-type", "sum");
+  
   // We dropped support for other losses which are not possible to implement with sparse labels.
   // They were most likely not used anyway.
   ABORT_IF(guidedLossType != "ce", "Only alignment loss type 'ce' is supported");
 
   float guidedLossWeight = options->get<float>("guided-alignment-weight");
-
-  auto [indices, values]  = guidedAlignmentToSparse(batch);
-  auto alignmentIndices   = graph->indices(indices);
-  auto alignmentValues    = graph->constant({(int)values.size()}, inits::fromVector(values));
-  auto attentionAtAligned = cols(flatten(attention), alignmentIndices);
-
-  float epsilon           = 1e-6f;
-  Expr alignmentLoss      = -sum(cast(alignmentValues * log(attentionAtAligned + epsilon), Type::float32));
-  size_t numLabels        = alignmentIndices->shape().elements();
-
+  const auto& [indices, values] = guidedAlignmentToSparse(batch);
+  
+  Expr alignmentLoss;
+  size_t numLabels = indices.size(); // can be zero
+  if(indices.empty()) {
+    removeAsRoot(stopGradient(attention)); // unused, hence make sure we don't polute the backwards operations
+    alignmentLoss = graph->zeros({1});
+    numLabels = multiLossType == "sum" ? 0 : 1;
+  } else {
+    float epsilon           = 1e-6f;
+    auto alignmentIndices   = graph->indices(indices);
+    auto alignmentValues    = graph->constant({(int)values.size()}, inits::fromVector(values));
+    auto attentionAtAligned = cols(flatten(attention), alignmentIndices);
+    alignmentLoss           = -sum(cast(alignmentValues * log(attentionAtAligned + epsilon), Type::float32));
+  }
   // Create label node, also weigh by scalar so labels and cost are in the same domain.
   // Fractional label counts are OK. But only if combined as "sum".
-  // @TODO: It is ugly to check the multi-loss type here, but doing this right requires
-  // a substantial rewrite of the multi-loss architecture, which is planned anyways.
-  std::string multiLossType = options->get<std::string>("multi-loss-type", "sum");
-  if (multiLossType == "sum")         // sum of sums
+  if (multiLossType == "sum") // sum of sums
     return RationalLoss(guidedLossWeight * alignmentLoss, guidedLossWeight * numLabels);
   else
     return RationalLoss(guidedLossWeight * alignmentLoss, (float)numLabels);

From 031dbb32668cf82f767524394f8dd500f6227b0f Mon Sep 17 00:00:00 2001
From: Marcin Junczys-Dowmunt <Marcin.JunczysDowmunt@microsoft.com>
Date: Mon, 13 Feb 2023 15:44:19 +0000
Subject: [PATCH 10/10] Merged PR 27804: Fallback to old LSH code for MSVC due
 to bad loop unrolling

The Visual Studio compiler has inferior optimization and loop unrolling to gcc which results in much slower LSH code that was written to explicitly take advantage of loop unrolling at compile time.

Added an #ifdef to fall back to old LSH code on MSVC.
---
 src/layers/lsh.cpp | 43 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 36 insertions(+), 7 deletions(-)

diff --git a/src/layers/lsh.cpp b/src/layers/lsh.cpp
index 73d45fc7..eedf227e 100644
--- a/src/layers/lsh.cpp
+++ b/src/layers/lsh.cpp
@@ -1,6 +1,12 @@
-#include "layers/lsh.h"
-#include "tensors/tensor_operators.h"
+#include "common/timer.h"
 #include "common/utils.h"
+#include "layers/lsh.h"
+#include "layers/lsh_impl.h"
+#include "tensors/tensor_operators.h"
+
+#if _MSC_VER
+#include "3rd_party/faiss/Index.h"
+#endif
 
 #include "3rd_party/faiss/utils/hamming.h"
 
@@ -8,10 +14,6 @@
 #include "3rd_party/faiss/VectorTransform.h"
 #endif
 
-#include "common/timer.h"
-
-#include "layers/lsh_impl.h"
-
 namespace marian {
 namespace lsh {
 
@@ -116,7 +118,7 @@ Expr searchEncoded(Expr encodedQuery, Expr encodedWeights, int dimK, int firstNR
 
   int currBeamSize = encodedQuery->shape()[0];
   int batchSize    = encodedQuery->shape()[2];
-
+  
   auto search = [=](Expr out, const std::vector<Expr>& inputs) {
     Expr encodedQuery   = inputs[0];
     Expr encodedWeights = inputs[1];
@@ -130,6 +132,32 @@ Expr searchEncoded(Expr encodedQuery, Expr encodedWeights, int dimK, int firstNR
 
     ABORT_IF(dimK > wRows, "k is larger than number of candidate values?"); // @TODO: use min(k, wRows) silently?
 
+#if _MSC_VER // unfortunately MSVC is horrible at loop unrolling, so we fall back to the old code (hrmph!) @TODO: figure this out one day
+    int qRows = encodedQuery->shape().elements() / bytesPerVector;
+
+    uint8_t* qCodes = encodedQuery->val()->data<uint8_t>();
+    uint8_t* wCodes = encodedWeights->val()->data<uint8_t>();
+
+    // use actual faiss code for performing the hamming search. 
+    std::vector<int> distances(qRows * dimK);
+    std::vector<faiss::Index::idx_t> ids(qRows * dimK);
+    faiss::int_maxheap_array_t res = {(size_t)qRows, (size_t)dimK, ids.data(), distances.data()};
+    faiss::hammings_knn_hc(&res, qCodes, wCodes, (size_t)wRows, (size_t)bytesPerVector, 0);
+
+    // Copy int64_t indices to Marian index type and sort by increasing index value per hypothesis.
+    // The sorting is required as we later do a binary search on those values for reverse look-up.
+    uint32_t* outData = out->val()->data<uint32_t>();
+    
+    int numHypos = out->shape().elements() / dimK;
+    for (size_t hypoIdx = 0; hypoIdx < numHypos; ++hypoIdx) {
+      size_t startIdx = dimK * hypoIdx;
+      size_t endIdx = startIdx + dimK;
+      for(size_t i = startIdx; i < endIdx; ++i)
+        outData[i] = (uint32_t)ids[i];
+      if(!noSort)
+        std::sort(outData + startIdx, outData + endIdx);
+    }
+#else // this is using the new code for search, other parts of the code, like conversion are fine.
     IndexType* outData = out->val()->data<IndexType>();
     auto gather = [outData, dimK](IndexType rowId, IndexType k, IndexType kthColId, DistType /*dist*/) { 
       outData[rowId * dimK + k] = kthColId; 
@@ -144,6 +172,7 @@ Expr searchEncoded(Expr encodedQuery, Expr encodedWeights, int dimK, int firstNR
     params.bytesPerVector = bytesPerVector;
 
     hammingTopK(params, gather);
+#endif
   };
 
   Shape kShape({currBeamSize, batchSize, dimK});